예제 #1
0
def predict_multiple_molecules(model_name, molecule_file, charge_file,
                               out_file, format):
    """
    Make predictions of bond energies of multiple molecules.

    Args:
        model_name (str): The pretrained model to use for making predictions. A model
            should be of the format format `dataset/date`, e.g. `mesd/20200808`,
            `pubchem/20200531`. It is possible to provide only the `dataset` part,
            and in this case, the latest model will be used.
        molecule_file (str): path to molecule file
        charge_file (str): path to charge file, if `None` charges are set to zero
        out_file (str): path to file to write output
        format (str): format of molecules, e.g. `sdf`, `graph`, `pdb`, `smiles`,
            and `inchi`.
    """

    model_path = get_model_path(model_name)
    model_info = get_model_info(model_path)
    allowed_charge = model_info["allowed_charge"]
    unit_converter = model_info["unit_conversion"]

    predictor = PredictionMultiReactant(molecule_file,
                                        charge_file,
                                        format,
                                        allowed_charge,
                                        ring_bond=False)
    molecules, labels, extra_features = predictor.prepare_data()
    predictions = get_prediction(model_path, unit_converter, molecules, labels,
                                 extra_features)

    return predictor.write_results(predictions, out_file)
예제 #2
0
def predict_by_reactions(model_name, molecule_file, reaction_file, charge_file,
                         out_file, format):
    """
    Make predictions for many bonds where each bond is specified as an reaction.

    Args:
        model_name (str): The pretrained model to use for making predictions. A model
            should be of the format format `dataset/date`, e.g. `mesd/20200808`,
            `pubchem/20200531`. It is possible to provide only the `dataset` part,
            and in this case, the latest model will be used.
        molecule_file (str): path to file storing all molecules
        reaction_file (str): path to file specifying reactions
        charge_file (str): path to charge file, if `None` charges are set to zero
        out_file (str): path to file to write output
        format (str): format of molecules, e.g. `sdf`, `graph`, `pdb`, `smiles`,
            and `inchi`.
    """
    model_path = get_model_path(model_name)
    model_info = get_model_info(model_path)
    unit_converter = model_info["unit_conversion"]

    predictor = PredictionByReaction(molecule_file,
                                     reaction_file,
                                     charge_file,
                                     format=format)

    molecules, labels, extra_features = predictor.prepare_data()
    predictions = get_prediction(model_path, unit_converter, molecules, labels,
                                 extra_features)

    return predictor.write_results(predictions, out_file)
예제 #3
0
def predict_by_struct_label_extra_feats_files(model_name,
                                              molecule_file,
                                              label_file,
                                              extra_feats_file,
                                              out_file="bde.yaml"):
    model_path = get_model_path(model_name)
    model_info = get_model_info(model_path)
    unit_converter = model_info["unit_conversion"]

    predictor = PredictionStructLabelFeatFiles(molecule_file, label_file,
                                               extra_feats_file)

    molecules, labels, extra_features = predictor.prepare_data()
    predictions = get_prediction(model_path, unit_converter, molecules, labels,
                                 extra_features)

    return predictor.write_results(predictions, out_file)
예제 #4
0
def predict_single_molecule(
    model_name,
    molecule,
    charge=0,
    ring_bond=False,
    write_result=False,
    figure_name="prediction.png",
    format=None,
):
    """
    Make predictions for a single molecule.

    Breaking a bond may result in products with different combination of products
    charge, we report the smallest charge w.r.t. the product charge assignation.

    Args:
        model_name (str): The pre-trained model to use for making predictions. A model
            should be of the format format `dataset/date`, e.g. `mesd/20200808`,
            `pubchem/20200521`. It is possible to provide only the `dataset` part,
            and in this case, the latest model will be used.
        molecule (str): SMILES or InChI string or a path to a file storing these string.
        charge (int): charge of the molecule.
        ring_bond (bool): whether to make predictions for ring bond.
        write_result (bool): whether to write the returned sdf to stdout.
        figure_name (str): the name of the figure to be created showing the bond energy.
        format (str): format of the molecule, if not provided, will guess based on the
            file extension.

    Returns:
        str: sdf string representing the molecules and energies.
    """

    model_path = get_model_path(model_name)
    model_info = get_model_info(model_path)
    allowed_charge = model_info["allowed_charge"]
    unit_converter = model_info["unit_conversion"]

    assert (charge in allowed_charge
            ), f"expect charge to be one of {allowed_charge}, but got {charge}"

    p = to_path(molecule)
    if p.is_file():
        if format is None:
            suffix = p.suffix.lower()
            if suffix == ".sdf":
                format = "sdf"
            elif suffix == ".pdb":
                format = "pdb"
            else:
                raise RuntimeError(
                    f"Expect file format `.sdf` or `.pdb`, but got {suffix}")
        with open(p, "r") as f:
            molecule = f.read().strip()
    else:
        if format is None:
            if molecule.lower().startswith("inchi="):
                format = "inchi"
            else:
                format = "smiles"

    predictor = PredictionOneReactant(molecule, charge, format, allowed_charge,
                                      ring_bond)

    molecules, labels, extra_features = predictor.prepare_data()
    predictions = get_prediction(model_path, unit_converter, molecules, labels,
                                 extra_features)

    return predictor.write_results(predictions, figure_name, write_result)