def __getitem__(self, idx): batch_data = self.data[idx] tree1_batch = [dpair[0] for dpair in batch_data] tree2_batch = [dpair[1] for dpair in batch_data] x_batch = MolTree.tensorize(tree1_batch, self.vocab, self.avocab, target=False, add_target=self.add_target) y_batch = MolTree.tensorize(tree2_batch, self.vocab, self.avocab, target=True, add_target=self.add_target) return x_batch, y_batch, tree1_batch, tree2_batch
def predict(smiles, lr, vocab, avocab, reselect, ori_smiles, iternum, output): mol = Chem.MolFromSmiles(smiles) atomnum = mol.GetNumAtoms() tree = get_tree(smiles) score1 = penalized_logp(smiles) try: xbatch = MolTree.tensorize([tree], vocab, avocab, target=False) new_smiles, sim, reselect, score11, score2 = model.test(xbatch, tree, lr=lr, reselect_num=reselect) except: ori_sim = similarity(smiles, ori_smiles) return score1, score1, atomnum, smiles, 1.0, ori_sim, 0 if smiles == new_smiles: s = "iter: %d sim: 0.00 ori_sim: 0.00 imp: 0.00 cannot decode\n" % (iternum) ori_sim = similarity(smiles, ori_smiles) else: ori_sim = similarity(new_smiles, ori_smiles) if reselect == 0: s = "iter: %d sim: %.2f ori_sim: %.4f imp: %.2f decode molecule %s\n" % (iternum, sim, ori_sim, score2-score1, new_smiles) elif reselect == 1: s = "iter: %d sim: %.2f ori_sim: %.4f imp: %.2f decode molecule %s reselect\n" % (iternum, sim, ori_sim, score2-score1, new_smiles) output.write(s) print(s) return score1, score2, atomnum, new_smiles, sim, ori_sim, reselect
def predict(smiles, lr, vocab, avocab, reselect, ori_smiles, iternum, output, prop="logp", sim_type="binary"): mol = Chem.MolFromSmiles(smiles) atomnum = mol.GetNumAtoms() tree = get_tree(smiles) score1 = get_prop(smiles, prop=prop) try: xbatch = MolTree.tensorize([tree], vocab, avocab, target=False) new_smiles, sim, reselect, score11, score2 = model.test( xbatch, tree, reselect_num=reselect, prop=prop, sim_type=sim_type) except Exception as e: print(e) print("cannot process molecule %s at iteration %d" % (smiles, iternum)) return score1, score1, atomnum, smiles, 1.0, 0.0, 0 #except: # ori_sim = similarity(smiles, ori_smiles) # return score1, score1, atomnum, smiles, 1.0, ori_sim, 0 if smiles == new_smiles: s = "iter: %d sim: 0.00 ori_sim: 0.00 imp: 0.00 cannot decode\n" % ( iternum) ori_sim = similarity(smiles, ori_smiles, sim_type=sim_type) else: ori_sim = similarity(new_smiles, ori_smiles, sim_type=sim_type) if reselect == 0: s = "iter: %d sim: %.2f ori_sim: %.4f prop1: %.2f prop2: %.2f imp: %.2f decode molecule %s\n" % ( iternum, sim, ori_sim, score1, score2, score2 - score1, new_smiles) elif reselect == 1: s = "iter: %d sim: %.2f ori_sim: %.4f prop1: %.2f prop2: %.2f imp: %.2f decode molecule %s reselect\n" % ( iternum, sim, ori_sim, score1, score2, score2 - score1, new_smiles) output.write(s) print(s) return score1, score2, atomnum, new_smiles, sim, ori_sim, reselect
def predict(smiles, model, vocab, avocab, reselect, ori_smiles, iternum, prop="logp"): mol = Chem.MolFromSmiles(smiles) atomnum1 = mol.GetNumAtoms() tree = get_tree(smiles) score1 = get_prop(smiles, prop=prop) try: xbatch = MolTree.tensorize([tree], vocab, avocab, target=False) new_smiles, sim, reselect, score11, score2 = model.test(xbatch, tree, reselect_num=reselect, prop=prop) except: ori_sim = similarity(smiles, ori_smiles, "binary") result = [smiles, smiles, 1.0, ori_sim, score1, score1, atomnum1, atomnum1] return result if smiles == new_smiles: ori_sim = similarity(smiles, ori_smiles, "binary") else: ori_sim = similarity(new_smiles, ori_smiles, "binary") atomnum2 = Chem.MolFromSmiles(new_smiles).GetNumAtoms() result = [smiles, new_smiles, sim, ori_sim, score1, score2, atomnum1, atomnum2] return result