def predict_multiple_molecules(model_name, molecule_file, charge_file, out_file, format): """ Make predictions of bond energies of multiple molecules. Args: model_name (str): The pretrained model to use for making predictions. A model should be of the format format `dataset/date`, e.g. `mesd/20200808`, `pubchem/20200531`. It is possible to provide only the `dataset` part, and in this case, the latest model will be used. molecule_file (str): path to molecule file charge_file (str): path to charge file, if `None` charges are set to zero out_file (str): path to file to write output format (str): format of molecules, e.g. `sdf`, `graph`, `pdb`, `smiles`, and `inchi`. """ model_path = get_model_path(model_name) model_info = get_model_info(model_path) allowed_charge = model_info["allowed_charge"] unit_converter = model_info["unit_conversion"] predictor = PredictionMultiReactant(molecule_file, charge_file, format, allowed_charge, ring_bond=False) molecules, labels, extra_features = predictor.prepare_data() predictions = get_prediction(model_path, unit_converter, molecules, labels, extra_features) return predictor.write_results(predictions, out_file)
def predict_by_reactions(model_name, molecule_file, reaction_file, charge_file, out_file, format): """ Make predictions for many bonds where each bond is specified as an reaction. Args: model_name (str): The pretrained model to use for making predictions. A model should be of the format format `dataset/date`, e.g. `mesd/20200808`, `pubchem/20200531`. It is possible to provide only the `dataset` part, and in this case, the latest model will be used. molecule_file (str): path to file storing all molecules reaction_file (str): path to file specifying reactions charge_file (str): path to charge file, if `None` charges are set to zero out_file (str): path to file to write output format (str): format of molecules, e.g. `sdf`, `graph`, `pdb`, `smiles`, and `inchi`. """ model_path = get_model_path(model_name) model_info = get_model_info(model_path) unit_converter = model_info["unit_conversion"] predictor = PredictionByReaction(molecule_file, reaction_file, charge_file, format=format) molecules, labels, extra_features = predictor.prepare_data() predictions = get_prediction(model_path, unit_converter, molecules, labels, extra_features) return predictor.write_results(predictions, out_file)
def predict_by_struct_label_extra_feats_files(model_name, molecule_file, label_file, extra_feats_file, out_file="bde.yaml"): model_path = get_model_path(model_name) model_info = get_model_info(model_path) unit_converter = model_info["unit_conversion"] predictor = PredictionStructLabelFeatFiles(molecule_file, label_file, extra_feats_file) molecules, labels, extra_features = predictor.prepare_data() predictions = get_prediction(model_path, unit_converter, molecules, labels, extra_features) return predictor.write_results(predictions, out_file)
def predict_single_molecule( model_name, molecule, charge=0, ring_bond=False, write_result=False, figure_name="prediction.png", format=None, ): """ Make predictions for a single molecule. Breaking a bond may result in products with different combination of products charge, we report the smallest charge w.r.t. the product charge assignation. Args: model_name (str): The pre-trained model to use for making predictions. A model should be of the format format `dataset/date`, e.g. `mesd/20200808`, `pubchem/20200521`. It is possible to provide only the `dataset` part, and in this case, the latest model will be used. molecule (str): SMILES or InChI string or a path to a file storing these string. charge (int): charge of the molecule. ring_bond (bool): whether to make predictions for ring bond. write_result (bool): whether to write the returned sdf to stdout. figure_name (str): the name of the figure to be created showing the bond energy. format (str): format of the molecule, if not provided, will guess based on the file extension. Returns: str: sdf string representing the molecules and energies. """ model_path = get_model_path(model_name) model_info = get_model_info(model_path) allowed_charge = model_info["allowed_charge"] unit_converter = model_info["unit_conversion"] assert (charge in allowed_charge ), f"expect charge to be one of {allowed_charge}, but got {charge}" p = to_path(molecule) if p.is_file(): if format is None: suffix = p.suffix.lower() if suffix == ".sdf": format = "sdf" elif suffix == ".pdb": format = "pdb" else: raise RuntimeError( f"Expect file format `.sdf` or `.pdb`, but got {suffix}") with open(p, "r") as f: molecule = f.read().strip() else: if format is None: if molecule.lower().startswith("inchi="): format = "inchi" else: format = "smiles" predictor = PredictionOneReactant(molecule, charge, format, allowed_charge, ring_bond) molecules, labels, extra_features = predictor.prepare_data() predictions = get_prediction(model_path, unit_converter, molecules, labels, extra_features) return predictor.write_results(predictions, figure_name, write_result)