コード例 #1
0
 def process(molid, smiles):
     if molid is _END_MOLID:
         writer.close()
         return
     try:
         mol = to_rdkit_mol(smiles)
         fpsinfo = {}
         # N.B. We won't actually use rdkit hash, so we won't ask for nonzero values...
         # Is there a way of asking rdkit to give us this directly?
         AllChem.GetMorganFingerprint(mol, max_radius, bitInfo=fpsinfo, useFeatures=fcfp)
         counts = defaultdict(int)
         centers = defaultdict(list)
         for bit_descs in fpsinfo.values():
             for center, radius in bit_descs:
                 cansmiles = explain_circular_substructure(mol, center, radius)
                 counts[cansmiles] += 1
                 centers[cansmiles].append((center, radius))
         if write_centers:
             features_strings = ['%s %d %s' % (cansmiles,
                                               count,
                                               ' '.join(['%d %d' % (c, r) for c, r in centers[cansmiles]]))
                                 for cansmiles, count in counts.iteritems()]
         else:
             features_strings = ['%s %d' % (cansmiles, count) for cansmiles, count in counts.iteritems()]
         writer.write('%s\t%s\n' % (molid, '\t'.join(features_strings)))
     except:
         info('Failed molecule %s: %s' % (molid, smiles))
         writer.write('%s\t*FAILED*\n' % molid)
コード例 #2
0
 def process(molid, smiles):
     if molid is _END_MOLID:
         h5.close()
         return
     ne = len(molids)
     try:
         molids.resize((ne + 1,))
         molids[ne] = molid
         mol = to_rdkit_mol(smiles)
         descs.resize((ne + 1, nf))
         descs[ne, :] = computer.compute(mol)[0]
     except:
         info('Failed molecule %s: %s' % (molid, smiles))
         descs[ne, :] = [np.nan] * nf
コード例 #3
0
ファイル: molscatalog.py プロジェクト: sdvillal/ccl-malaria
 def save_from_smiles_iterator(self, it):
     """Creates the catalog from the (molid, smiles) iterator, possibly overwriting the present files."""
     molids = []
     coords = []
     base = 0
     with open(op.join(self._root, 'molsdata'), 'wb') as writer:
         for molid, smiles in it:
             mol = to_rdkit_mol(smiles, molid=molid)
             if mol is None:
                 molids.append(molid)
                 coords.append((-1, 0))
             else:
                 moldata = mol.ToBinary()
                 molids.append(molid)
                 coords.append((base, len(moldata)))
                 base += len(moldata)
                 writer.write(moldata)
     with open(self._molids_file, 'wt') as writer:
         for molid in molids:
             writer.write(molid + '\n')
     np.save(self._coords_file, np.array(coords))