def get_prediction(model_path, unit_converter, molecules, labels, extra_features): model = load_model(model_path) dataset = load_dataset(model_path, molecules, labels, extra_features) data_loader = DataLoaderReactionNetwork(dataset, batch_size=100, shuffle=False) feature_names = ["atom", "bond", "global"] # evaluate predictions = evaluate(model, feature_names, data_loader) # in case some entry fail if len(predictions) != len(dataset.failed): pred = [] idx = 0 for failed in dataset.failed: if failed: pred.append(None) else: pred.append(predictions[idx] * unit_converter) idx += 1 predictions = pred else: predictions = np.asarray(predictions) * unit_converter return predictions
def main( model_name="bdncm/20200808", sdf_file="~/Applications/db_access/mol_builder/struct_rxn_ntwk_rgrn_qc.sdf", label_file="~/Applications/db_access/mol_builder/label_rxn_ntwk_rgrn_qc.yaml", feature_file="~/Applications/db_access/mol_builder/feature_rxn_ntwk_rgrn_qc.yaml", error_file="~/Applications/db_access/mol_builder/post_analysis/evaluation_error.tsv", charge_file="~/Applications/db_access/mol_builder/post_analysis/charges.tsv", ): seed_torch() dataset = load_dataset(model_name, sdf_file, label_file, feature_file) # trainset, valset, testset = train_validation_test_split( # dataset, validation=0.1, test=0.1 # ) trainset, valset, testset = train_validation_test_split_selected_bond_in_train( dataset, validation=0.1, test=0.1, selected_bond_type=(("H", "H"), ("H", "F"), ("F", "F")), ) # data_loader = DataLoaderReactionNetwork(trainset, batch_size=100, shuffle=False) # data_loader = DataLoaderReactionNetwork(valset, batch_size=100, shuffle=False) data_loader = DataLoaderReactionNetwork(testset, batch_size=100, shuffle=False) model = load_model(model_name) # make predictions feature_names = ["atom", "bond", "global"] ids, targets, predictions, errors, species = evaluate( model, feature_names, data_loader) # sort by error ids, targets, predictions, errors, species = zip(*sorted( zip(ids, targets, predictions, errors, species), key=lambda x: x[3])) df = pd.DataFrame({ "identifier": ids, "target": targets, "prediction": predictions, "error": errors, "species": species, }) df.to_csv(to_path(error_file), sep="\t", index=False) # charges df = get_charges(label_file, feature_file) df.to_csv(to_path(charge_file), sep="\t", index=False)
def main( model_name="bdncm/20200808", sdf_file="~/Applications/db_access/mol_builder/struct_rxn_ntwk_rgrn_qc.sdf", label_file="~/Applications/db_access/mol_builder/label_rxn_ntwk_rgrn_qc.yaml", feature_file="~/Applications/db_access/mol_builder/feature_rxn_ntwk_rgrn_qc.yaml", feat_filename="~/Applications/db_access/mol_builder/post_analysis/feats.tsv", meta_filename="~/Applications/db_access/mol_builder/post_analysis/feats_metadata.tsv", ): seed_torch() dataset = load_dataset(model_name, sdf_file, label_file, feature_file) _, _, testset = train_validation_test_split(dataset, validation=0.1, test=0.1) data_loader = DataLoaderReactionNetwork(testset, batch_size=100, shuffle=False) # data_loader = DataLoaderReactionNetwork(dataset, batch_size=100, shuffle=False) model = load_model(model_name) # make predictions feature_names = ["atom", "bond", "global"] ids, targets, predictions, errors, species, features = evaluate( model, feature_names, data_loader, compute_features=True) df = pd.DataFrame(features) df.to_csv(to_path(feat_filename), sep="\t", header=False, index=False) # metadata charges = get_charges(label_file, feature_file) rct_charges = [] prdt1_charges = [] prdt2_charges = [] for i in ids: c = charges[charges["identifier"] == i].to_dict("records")[0] rct_charges.append(c["charge"]) prdt1_charges.append(c["product1 charge"]) prdt2_charges.append(c["product2 charge"]) df = pd.DataFrame({ "identifier": ids, "target": targets, "prediction": predictions, "error": errors, "species": species, "reactant charge": rct_charges, "product1 charge": prdt1_charges, "product2 charge": prdt2_charges, }) df.to_csv(to_path(meta_filename), sep="\t", index=False)
def main( model_name="mesd/20200808", sdf_file="/Users/mjwen/Applications/db_access/mol_builder/post_analysis/lbdc/struct.sdf", label_file="/Users/mjwen/Applications/db_access/mol_builder/post_analysis/lbdc/label.yaml", feature_file="/Users/mjwen/Applications/db_access/mol_builder/post_analysis/lbdc/feature.yaml", feat_meta_prefix=f"~/Applications/db_access/mol_builder/post_analysis/lbdc", ): seed_torch() dataset = load_dataset(model_name, sdf_file, label_file, feature_file) data_loader = DataLoaderReactionNetwork(dataset, batch_size=100, shuffle=False) model = load_model(model_name, pretrained=False) # make predictions feature_names = ["atom", "bond", "global"] ids, targets, predictions, broken_bonds, species, features = evaluate( model, feature_names, data_loader ) # write to file for idx, ft in features.items(): fname = to_path(feat_meta_prefix).joinpath(f"feats_layer{idx}.tsv") df = pd.DataFrame(ft) df.to_csv(fname, sep="\t", header=False, index=False) df = pd.DataFrame( { "identifier": ids, "target": targets, "prediction": predictions, "broken_bonds": broken_bonds, "species": species, } ) to_path(feat_meta_prefix).joinpath("feats_metadata.tsv") df.to_csv(fname, sep="\t", index=False)