Ejemplo n.º 1
0
def get_prediction(model_path, unit_converter, molecules, labels,
                   extra_features):

    model = load_model(model_path)
    dataset = load_dataset(model_path, molecules, labels, extra_features)
    data_loader = DataLoaderReactionNetwork(dataset,
                                            batch_size=100,
                                            shuffle=False)

    feature_names = ["atom", "bond", "global"]

    # evaluate
    predictions = evaluate(model, feature_names, data_loader)

    # in case some entry fail
    if len(predictions) != len(dataset.failed):
        pred = []
        idx = 0
        for failed in dataset.failed:
            if failed:
                pred.append(None)
            else:
                pred.append(predictions[idx] * unit_converter)
                idx += 1
        predictions = pred
    else:
        predictions = np.asarray(predictions) * unit_converter

    return predictions
Ejemplo n.º 2
0
def main(
    model_name="bdncm/20200808",
    sdf_file="~/Applications/db_access/mol_builder/struct_rxn_ntwk_rgrn_qc.sdf",
    label_file="~/Applications/db_access/mol_builder/label_rxn_ntwk_rgrn_qc.yaml",
    feature_file="~/Applications/db_access/mol_builder/feature_rxn_ntwk_rgrn_qc.yaml",
    error_file="~/Applications/db_access/mol_builder/post_analysis/evaluation_error.tsv",
    charge_file="~/Applications/db_access/mol_builder/post_analysis/charges.tsv",
):

    seed_torch()

    dataset = load_dataset(model_name, sdf_file, label_file, feature_file)

    # trainset, valset, testset = train_validation_test_split(
    #     dataset, validation=0.1, test=0.1
    # )

    trainset, valset, testset = train_validation_test_split_selected_bond_in_train(
        dataset,
        validation=0.1,
        test=0.1,
        selected_bond_type=(("H", "H"), ("H", "F"), ("F", "F")),
    )

    # data_loader = DataLoaderReactionNetwork(trainset, batch_size=100, shuffle=False)
    # data_loader = DataLoaderReactionNetwork(valset, batch_size=100, shuffle=False)
    data_loader = DataLoaderReactionNetwork(testset,
                                            batch_size=100,
                                            shuffle=False)

    model = load_model(model_name)

    # make predictions
    feature_names = ["atom", "bond", "global"]
    ids, targets, predictions, errors, species = evaluate(
        model, feature_names, data_loader)

    # sort by error
    ids, targets, predictions, errors, species = zip(*sorted(
        zip(ids, targets, predictions, errors, species), key=lambda x: x[3]))

    df = pd.DataFrame({
        "identifier": ids,
        "target": targets,
        "prediction": predictions,
        "error": errors,
        "species": species,
    })

    df.to_csv(to_path(error_file), sep="\t", index=False)

    # charges
    df = get_charges(label_file, feature_file)
    df.to_csv(to_path(charge_file), sep="\t", index=False)
Ejemplo n.º 3
0
def main(
    model_name="bdncm/20200808",
    sdf_file="~/Applications/db_access/mol_builder/struct_rxn_ntwk_rgrn_qc.sdf",
    label_file="~/Applications/db_access/mol_builder/label_rxn_ntwk_rgrn_qc.yaml",
    feature_file="~/Applications/db_access/mol_builder/feature_rxn_ntwk_rgrn_qc.yaml",
    feat_filename="~/Applications/db_access/mol_builder/post_analysis/feats.tsv",
    meta_filename="~/Applications/db_access/mol_builder/post_analysis/feats_metadata.tsv",
):

    seed_torch()

    dataset = load_dataset(model_name, sdf_file, label_file, feature_file)
    _, _, testset = train_validation_test_split(dataset,
                                                validation=0.1,
                                                test=0.1)
    data_loader = DataLoaderReactionNetwork(testset,
                                            batch_size=100,
                                            shuffle=False)
    # data_loader = DataLoaderReactionNetwork(dataset, batch_size=100, shuffle=False)

    model = load_model(model_name)

    # make predictions
    feature_names = ["atom", "bond", "global"]
    ids, targets, predictions, errors, species, features = evaluate(
        model, feature_names, data_loader, compute_features=True)
    df = pd.DataFrame(features)
    df.to_csv(to_path(feat_filename), sep="\t", header=False, index=False)

    # metadata
    charges = get_charges(label_file, feature_file)
    rct_charges = []
    prdt1_charges = []
    prdt2_charges = []
    for i in ids:
        c = charges[charges["identifier"] == i].to_dict("records")[0]
        rct_charges.append(c["charge"])
        prdt1_charges.append(c["product1 charge"])
        prdt2_charges.append(c["product2 charge"])

    df = pd.DataFrame({
        "identifier": ids,
        "target": targets,
        "prediction": predictions,
        "error": errors,
        "species": species,
        "reactant charge": rct_charges,
        "product1 charge": prdt1_charges,
        "product2 charge": prdt2_charges,
    })
    df.to_csv(to_path(meta_filename), sep="\t", index=False)
Ejemplo n.º 4
0
def main(
    model_name="mesd/20200808",
    sdf_file="/Users/mjwen/Applications/db_access/mol_builder/post_analysis/lbdc/struct.sdf",
    label_file="/Users/mjwen/Applications/db_access/mol_builder/post_analysis/lbdc/label.yaml",
    feature_file="/Users/mjwen/Applications/db_access/mol_builder/post_analysis/lbdc/feature.yaml",
    feat_meta_prefix=f"~/Applications/db_access/mol_builder/post_analysis/lbdc",
):

    seed_torch()

    dataset = load_dataset(model_name, sdf_file, label_file, feature_file)
    data_loader = DataLoaderReactionNetwork(dataset, batch_size=100, shuffle=False)

    model = load_model(model_name, pretrained=False)

    # make predictions
    feature_names = ["atom", "bond", "global"]
    ids, targets, predictions, broken_bonds, species, features = evaluate(
        model, feature_names, data_loader
    )

    # write to file
    for idx, ft in features.items():
        fname = to_path(feat_meta_prefix).joinpath(f"feats_layer{idx}.tsv")
        df = pd.DataFrame(ft)
        df.to_csv(fname, sep="\t", header=False, index=False)

    df = pd.DataFrame(
        {
            "identifier": ids,
            "target": targets,
            "prediction": predictions,
            "broken_bonds": broken_bonds,
            "species": species,
        }
    )
    to_path(feat_meta_prefix).joinpath("feats_metadata.tsv")
    df.to_csv(fname, sep="\t", index=False)