Ejemplo n.º 1
0
    def decode_test():
        wrong = 0
        for tot, s in enumerate(sys.stdin):
            s = s.split()[0]
            tree = MolTree(s)
            tree.recover()

            cur_mol = copy_edit_mol(tree.nodes[0].mol)
            global_amap = [{}] + [{} for node in tree.nodes]
            global_amap[1] = {
                atom.GetIdx(): atom.GetIdx()
                for atom in cur_mol.GetAtoms()
            }

            dfs_assemble(cur_mol, global_amap, [], tree.nodes[0], None)

            cur_mol = cur_mol.GetMol()
            cur_mol = Chem.MolFromSmiles(Chem.MolToSmiles(cur_mol))
            set_atommap(cur_mol)
            dec_smiles = Chem.MolToSmiles(cur_mol)

            gold_smiles = Chem.MolToSmiles(Chem.MolFromSmiles(s))
            if gold_smiles != dec_smiles:
                print gold_smiles, dec_smiles
                wrong += 1
            print wrong, tot + 1
Ejemplo n.º 2
0
 def __getitem__(self, item):
     smiles = self.data[item]
     mol_tree = MolTree(smiles)
     if mol_tree.mol is None:
         return None
     mol_tree.recover()
     mol_tree.assemble()
     return mol_tree
Ejemplo n.º 3
0
    def __getitem__(self, idx):
        smiles = self.data[idx]
        mol_tree = MolTree(smiles)
        #print(len(smiles))

        mol_tree.recover()
        mol_tree.assemble()
        return mol_tree
Ejemplo n.º 4
0
 def count():
     cnt, n = 0, 0
     for s in sys.stdin:
         s = s.split()[0]
         tree = MolTree(s)
         tree.recover()
         tree.assemble()
         for node in tree.nodes:
             cnt += len(node.cands)
         n += len(tree.nodes)
Ejemplo n.º 5
0
    def optimize(self, smiles, sim_cutoff, lr=2.0, num_iter=20):
        mol_tree = MolTree(smiles)
        mol_tree.recover()
        _, tree_vec, mol_vec = self.encode([mol_tree])

        mol = Chem.MolFromSmiles(smiles)
        fp1 = AllChem.GetMorganFingerprint(mol, 2)

        tree_mean = self.T_mean(tree_vec)
        # Following Mueller et al.
        tree_log_var = -torch.abs(self.T_var(tree_vec))
        mol_mean = self.G_mean(mol_vec)
        # Following Mueller et al.
        mol_log_var = -torch.abs(self.G_var(mol_vec))
        mean = torch.cat([tree_mean, mol_mean], dim=1)
        log_var = torch.cat([tree_log_var, mol_log_var], dim=1)
        cur_vec = create_var(mean.data, True)

        visited = []
        for _ in xrange(num_iter):
            prop_val = self.propNN(cur_vec).squeeze()
            grad = torch.autograd.grad(prop_val, cur_vec)[0]
            cur_vec = cur_vec.data + lr * grad.data
            cur_vec = create_var(cur_vec, True)
            visited.append(cur_vec)

        l, r = 0, num_iter - 1
        while l < r - 1:
            mid = (l + r) / 2
            new_vec = visited[mid]
            tree_vec, mol_vec = torch.chunk(new_vec, 2, dim=1)
            new_smiles = self.decode(tree_vec, mol_vec, prob_decode=False)
            if new_smiles is None:
                r = mid - 1
                continue

            new_mol = Chem.MolFromSmiles(new_smiles)
            fp2 = AllChem.GetMorganFingerprint(new_mol, 2)
            sim = DataStructs.TanimotoSimilarity(fp1, fp2)
            if sim < sim_cutoff:
                r = mid - 1
            else:
                l = mid

        tree_vec, mol_vec = torch.chunk(visited[l], 2, dim=1)
        new_smiles = self.decode(tree_vec, mol_vec, prob_decode=False)
        if new_smiles is None:
            return smiles, 1.0
        new_mol = Chem.MolFromSmiles(new_smiles)
        fp2 = AllChem.GetMorganFingerprint(new_mol, 2)
        sim = DataStructs.TanimotoSimilarity(fp1, fp2)
        if sim >= sim_cutoff:
            return new_smiles, sim
        else:
            return smiles, 1.0
Ejemplo n.º 6
0
 def enum_test():
     for s in sys.stdin:
         s = s.split()[0]
         tree = MolTree(s)
         tree.recover()
         tree.assemble()
         for node in tree.nodes:
             if node.label not in node.cands:
                 print tree.smiles
                 print node.smiles, [x.smiles for x in node.neighbors]
                 print node.label, len(node.cands)
Ejemplo n.º 7
0
def tensorize(smiles, assm=True):
    mol_tree = MolTree(smiles)
    mol_tree.recover()
    if assm:
        mol_tree.assemble()
        for node in mol_tree.nodes:
            if node.label not in node.cands:
                node.cands.append(node.label)

    del mol_tree.mol
    for node in mol_tree.nodes:
        del node.mol

    return mol_tree
Ejemplo n.º 8
0
    def reconstruct(self, smiles, prob_decode=False):
        mol_tree = MolTree(smiles)
        mol_tree.recover()
        _,tree_vec,mol_vec = self.encode([mol_tree])
        
        tree_mean = self.T_mean(tree_vec)
        tree_log_var = -torch.abs(self.T_var(tree_vec)) #Following Mueller et al.
        mol_mean = self.G_mean(mol_vec)
        mol_log_var = -torch.abs(self.G_var(mol_vec)) #Following Mueller et al.

        epsilon = create_var(torch.randn(1, self.latent_size / 2), False)
        tree_vec = tree_mean + torch.exp(tree_log_var / 2) * epsilon
        epsilon = create_var(torch.randn(1, self.latent_size / 2), False)
        mol_vec = mol_mean + torch.exp(mol_log_var / 2) * epsilon
        return self.decode(tree_vec, mol_vec, prob_decode)
Ejemplo n.º 9
0
    def reconstruct1(self, smiles, prob_decode=False):
        mol_tree = MolTree(smiles)
        mol_tree.recover()
        # print("tree olusturuldu")
        _, tree_vec, mol_vec = self.encode([mol_tree])
        # print("encode edildi")
        tree_mean = self.T_mean(tree_vec)
        tree_log_var = -torch.abs(self.T_var(tree_vec))  # Following Mueller et al.
        mol_mean = self.G_mean(mol_vec)
        mol_log_var = -torch.abs(self.G_var(mol_vec))  # Following Mueller et al.

        epsilon = create_var(torch.randn(1, (int)(self.latent_size / 2)), False)
        tree_vec = tree_mean + torch.exp(tree_log_var / 2) * epsilon
        epsilon = create_var(torch.randn(1, (int)(self.latent_size / 2)), False)
        mol_vec = mol_mean + torch.exp(mol_log_var / 2) * epsilon

        return tree_vec,mol_vec,prob_decode
Ejemplo n.º 10
0
 def recon_eval(self, smiles):
     mol_tree = MolTree(smiles)
     mol_tree.recover()
     _,tree_vec,mol_vec = self.encode([mol_tree])
     
     tree_mean = self.T_mean(tree_vec)
     tree_log_var = -torch.abs(self.T_var(tree_vec)) #Following Mueller et al.
     mol_mean = self.G_mean(mol_vec)
     mol_log_var = -torch.abs(self.G_var(mol_vec)) #Following Mueller et al.
     
     all_smiles = []
     for i in range(10):
         epsilon = create_var(torch.randn(1, (int)(self.latent_size / 2)), False)
         tree_vec = tree_mean + torch.exp(tree_log_var / 2) * epsilon
         epsilon = create_var(torch.randn(1, (int)(self.latent_size / 2)), False)
         mol_vec = mol_mean + torch.exp(mol_log_var / 2) * epsilon
         for j in range(10):
             new_smiles = self.decode(tree_vec, mol_vec, prob_decode=True)
             all_smiles.append(new_smiles)
     return all_smiles
Ejemplo n.º 11
0
    def reconstruct(self, smiles, prob_decode=False,DataFrame=None):
        mol_tree = MolTree(smiles)
        mol_tree.recover()
        #print("tree olusturuldu")
        _,tree_vec,mol_vec = self.encode([mol_tree])
        #print("encode edildi")
        tree_mean = self.T_mean(tree_vec)
        tree_log_var = -torch.abs(self.T_var(tree_vec)) #Following Mueller et al.
        mol_mean = self.G_mean(mol_vec)
        mol_log_var = -torch.abs(self.G_var(mol_vec)) #Following Mueller et al.

        epsilon = create_var(torch.randn(1, (int)(self.latent_size / 2)), False)
        tree_vec = tree_mean + torch.exp(tree_log_var / 2) * epsilon
        epsilon = create_var(torch.randn(1, (int)(self.latent_size / 2)), False)
        mol_vec = mol_mean + torch.exp(mol_log_var / 2) * epsilon
        thethird=torch.cat((tree_vec, mol_vec), 1)
        #print(thethird.to('cpu').data.numpy())
        DataFrame.loc[smiles]=thethird.to('cpu').data.numpy()[0]


        return self.decode(tree_vec, mol_vec, prob_decode)
Ejemplo n.º 12
0
 def __getitem__(self, idx):
     smiles = self.data[idx]
     mol_tree = MolTree(smiles)
     mol_tree.recover()
     mol_tree.assemble()
     return mol_tree, self.prop_data[idx]