Beispiel #1
0
    def __getitem__(self, idx):
        mol = Chem.MolFromSmiles(self.batches[idx])
        leaves = get_leaves(mol)
        smiles_list = set( [Chem.MolToSmiles(mol, rootedAtAtom=i, isomericSmiles=False) for i in leaves] )
        smiles_list = sorted(list(smiles_list)) #To ensure reproducibility

        safe_list = []
        for s in smiles_list:
            hmol = MolGraph(s)
            ok = True
            for node,attr in hmol.mol_tree.nodes(data=True):
                if attr['label'] not in self.vocab.vmap:
                    ok = False
            if ok: safe_list.append(s)
        
        if len(safe_list) > 0:
            return MolGraph.tensorize(safe_list, self.vocab, self.avocab)
        else:
            return None
Beispiel #2
0
    def __init__(self, data, vocab, avocab, batch_size):
        safe_data = []
        for mol_s in data:
            hmol = MolGraph(mol_s)
            ok = True
            for node, attr in hmol.mol_tree.nodes(data=True):
                smiles = attr['smiles']
                ok &= attr['label'] in vocab.vmap
                for i, s in attr['inter_label']:
                    ok &= (smiles, s) in vocab.vmap
            if ok:
                safe_data.append(mol_s)

        print(f'After pruning {len(data)} -> {len(safe_data)}')
        self.batches = [
            safe_data[i:i + batch_size]
            for i in range(0, len(safe_data), batch_size)
        ]
        self.vocab = vocab
        self.avocab = avocab
Beispiel #3
0
 def __getitem__(self, idx):
     x, y = zip(*self.batches[idx])
     x = MolGraph.tensorize(x, self.vocab,
                            self.avocab)[:-1]  #no need of order for x
     y = MolGraph.tensorize(y, self.vocab, self.avocab)
     return x + y
Beispiel #4
0
 def __getitem__(self, idx):
     return MolGraph.tensorize(self.batches[idx], self.vocab, self.avocab)