Ejemplo n.º 1
0
 def __getitem__(self, idx):
     batch_data = self.data[idx]
     
     tree1_batch = [dpair[0] for dpair in batch_data]
     tree2_batch = [dpair[1] for dpair in batch_data]
     
     
     x_batch = MolTree.tensorize(tree1_batch, self.vocab, self.avocab, target=False, add_target=self.add_target)
     y_batch = MolTree.tensorize(tree2_batch, self.vocab, self.avocab, target=True, add_target=self.add_target)
     
     return x_batch, y_batch, tree1_batch, tree2_batch
Ejemplo n.º 2
0
def predict(smiles, lr, vocab, avocab, reselect, ori_smiles, iternum, output):
    mol = Chem.MolFromSmiles(smiles)
    atomnum = mol.GetNumAtoms()
    tree = get_tree(smiles)
    score1 = penalized_logp(smiles)

    try:
        xbatch = MolTree.tensorize([tree], vocab, avocab, target=False)
        new_smiles, sim, reselect, score11, score2 = model.test(xbatch, tree, lr=lr, reselect_num=reselect)
    except:
        ori_sim = similarity(smiles, ori_smiles)
        return score1, score1, atomnum, smiles, 1.0, ori_sim, 0
    
    if smiles == new_smiles:
        s = "iter: %d sim: 0.00 ori_sim: 0.00 imp: 0.00 cannot decode\n" % (iternum)
        ori_sim = similarity(smiles, ori_smiles)
    else:
        ori_sim = similarity(new_smiles, ori_smiles)
        if reselect == 0:
            s = "iter: %d sim: %.2f ori_sim: %.4f imp: %.2f decode molecule %s\n" % (iternum, sim, ori_sim, score2-score1, new_smiles)
        elif reselect == 1:
            s = "iter: %d sim: %.2f ori_sim: %.4f imp: %.2f decode molecule %s reselect\n" % (iternum, sim, ori_sim, score2-score1, new_smiles)

    output.write(s)
    print(s)
    return score1, score2, atomnum, new_smiles, sim, ori_sim, reselect
Ejemplo n.º 3
0
def predict(smiles,
            lr,
            vocab,
            avocab,
            reselect,
            ori_smiles,
            iternum,
            output,
            prop="logp",
            sim_type="binary"):
    mol = Chem.MolFromSmiles(smiles)
    atomnum = mol.GetNumAtoms()
    tree = get_tree(smiles)
    score1 = get_prop(smiles, prop=prop)
    try:
        xbatch = MolTree.tensorize([tree], vocab, avocab, target=False)
        new_smiles, sim, reselect, score11, score2 = model.test(
            xbatch, tree, reselect_num=reselect, prop=prop, sim_type=sim_type)
    except Exception as e:
        print(e)
        print("cannot process molecule %s at iteration %d" % (smiles, iternum))
        return score1, score1, atomnum, smiles, 1.0, 0.0, 0
    #except:
    #    ori_sim = similarity(smiles, ori_smiles)
    #    return score1, score1, atomnum, smiles, 1.0, ori_sim, 0

    if smiles == new_smiles:
        s = "iter: %d sim: 0.00 ori_sim: 0.00 imp: 0.00 cannot decode\n" % (
            iternum)
        ori_sim = similarity(smiles, ori_smiles, sim_type=sim_type)
    else:
        ori_sim = similarity(new_smiles, ori_smiles, sim_type=sim_type)
        if reselect == 0:
            s = "iter: %d sim: %.2f ori_sim: %.4f prop1: %.2f prop2: %.2f imp: %.2f decode molecule %s\n" % (
                iternum, sim, ori_sim, score1, score2, score2 - score1,
                new_smiles)
        elif reselect == 1:
            s = "iter: %d sim: %.2f ori_sim: %.4f prop1: %.2f prop2: %.2f imp: %.2f decode molecule %s reselect\n" % (
                iternum, sim, ori_sim, score1, score2, score2 - score1,
                new_smiles)

    output.write(s)
    print(s)
    return score1, score2, atomnum, new_smiles, sim, ori_sim, reselect
Ejemplo n.º 4
0
def predict(smiles, model, vocab, avocab, reselect, ori_smiles, iternum, prop="logp"):
    mol = Chem.MolFromSmiles(smiles)
    atomnum1 = mol.GetNumAtoms()
    tree = get_tree(smiles)
    score1 = get_prop(smiles, prop=prop)

    try:    
        xbatch = MolTree.tensorize([tree], vocab, avocab, target=False)
        new_smiles, sim, reselect, score11, score2 = model.test(xbatch, tree, reselect_num=reselect, prop=prop)
    except:
        ori_sim = similarity(smiles, ori_smiles, "binary")
        result = [smiles, smiles, 1.0, ori_sim, score1, score1, atomnum1, atomnum1]
        return result
    
    if smiles == new_smiles:
        ori_sim = similarity(smiles, ori_smiles, "binary")
    else:
        ori_sim = similarity(new_smiles, ori_smiles, "binary")
    
    atomnum2 = Chem.MolFromSmiles(new_smiles).GetNumAtoms()
    result = [smiles, new_smiles, sim, ori_sim, score1, score2, atomnum1, atomnum2]
    return result