def testAtomMatch(self): fdefBlock = \ """ DefineFeature HAcceptor1 [#7,#8] Family HBondAcceptor Weights 1.0 EndFeature DefineFeature Arom1 a1aaaaa1 Family Aromatic Weights 1.0,1.0,1.0,1.0,1.0,1.0 EndFeature """ cfac = ChemicalFeatures.BuildFeatureFactoryFromString(fdefBlock) self.failUnless(cfac.GetNumFeatureDefs() == 2) mol = Chem.MolFromSmiles('n1ccccc1') feats = cfac.GetFeaturesForMol(mol) self.failUnless(len(feats)==2) m = ChemicalFeatures.GetAtomMatch(feats) self.failIf(m) mol = Chem.MolFromSmiles('c1ccccc1N') feats = cfac.GetFeaturesForMol(mol) self.failUnless(len(feats)==2) m = ChemicalFeatures.GetAtomMatch(feats) self.failUnless(len(m)==2)
def testBasic(self): ffeat = ChemicalFeatures.FreeChemicalFeature() ffeat.SetId(123) pos = ffeat.GetId() self.assertTrue(pos == 123) ffeat.SetFamily("HBondDonor") self.assertTrue(ffeat.GetFamily() == "HBondDonor") ffeat.SetPos(geom.Point3D(1.0, 2.0, 3.0)) pos = ffeat.GetPos() self.assertTrue(ptFeq(pos, geom.Point3D(1.0, 2.0, 3.0))) ffeat.SetType("HBondDonor1") self.assertTrue(ffeat.GetType() == "HBondDonor1") ffeat = ChemicalFeatures.FreeChemicalFeature("HBondDonor", "HBondDonor1", geom.Point3D(1.0, 2.0, 3.0)) self.assertTrue(ffeat.GetId() == -1) self.assertTrue(ffeat.GetFamily() == "HBondDonor") self.assertTrue(ffeat.GetType() == "HBondDonor1") ffeat = ChemicalFeatures.FreeChemicalFeature("HBondDonor", "HBondDonor1", geom.Point3D(1.0, 2.0, 3.0), id=123) self.assertTrue(ffeat.GetId() == 123) self.assertTrue(ffeat.GetFamily() == "HBondDonor") self.assertTrue(ffeat.GetType() == "HBondDonor1") pos = ffeat.GetPos() self.assertTrue(ptFeq(pos, geom.Point3D(1.0, 2.0, 3.0))) ffeat = ChemicalFeatures.FreeChemicalFeature(id=123, type="HBondDonor1", family="HBondDonor", loc=geom.Point3D(1.0, 2.0, 3.0)) self.assertTrue(ffeat.GetId() == 123) self.assertTrue(ffeat.GetFamily() == "HBondDonor") self.assertTrue(ffeat.GetType() == "HBondDonor1") pos = ffeat.GetPos() self.assertTrue(ptFeq(pos, geom.Point3D(1.0, 2.0, 3.0)))
def setUp(self): self.dataDir = os.path.join(RDConfig.RDCodeDir, 'Chem/Pharm3D/test_data') self.fdefBlock = """ DefineFeature HAcceptor1 [N,O;H0] Family HBondAcceptor Weights 1.0 EndFeature DefineFeature HDonor1 [N,O;!H0] Family HBondDonor Weights 1.0 EndFeature DefineFeature Aromatic1 c1ccccc1 Family Aromatic Weights 1.,1.,1.,1.,1.,1. EndFeature\n""" self.featFactory = ChemicalFeatures.BuildFeatureFactoryFromString( self.fdefBlock) self.feats = [ ChemicalFeatures.FreeChemicalFeature( 'HBondAcceptor', 'HAcceptor1', Geometry.Point3D(0.0, 0.0, 0.0)), ChemicalFeatures.FreeChemicalFeature( 'HBondDonor', 'HDonor1', Geometry.Point3D(2.65, 0.0, 0.0)), ChemicalFeatures.FreeChemicalFeature( 'Aromatic', 'Aromatic1', Geometry.Point3D(5.12, 0.908, 0.0)), ] self.pcophore = Pharmacophore.Pharmacophore(self.feats) self.pcophore.setLowerBound(0, 1, 2.0) self.pcophore.setUpperBound(0, 1, 3.3) self.pcophore.setLowerBound(0, 2, 5.0) self.pcophore.setUpperBound(0, 2, 5.4) self.pcophore.setLowerBound(1, 2, 2.6) self.pcophore.setUpperBound(1, 2, 3.0)
def setUp(self): self.fdefBlock = \ """DefineFeature HAcceptor1 [N,O;H0] Family HBondAcceptor Weights 1.0 EndFeature DefineFeature HDonor1 [N,O;!H0] Family HBondDonor Weights 1.0 EndFeature DefineFeature Aromatic1 c1ccccc1 Family Aromatic Weights 1.0,1.0,1.0,1.0,1.0,1.0 EndFeature\n""" self.featFactory = ChemicalFeatures.BuildFeatureFactoryFromString( self.fdefBlock) self.feats = [ ChemicalFeatures.FreeChemicalFeature( 'HBondAcceptor', 'HAcceptor1', Geometry.Point3D(0.0, 0.0, 0.0)), ChemicalFeatures.FreeChemicalFeature( 'HBondDonor', 'HDonor1', Geometry.Point3D(2.65, 0.0, 0.0)), ChemicalFeatures.FreeChemicalFeature( 'Aromatic', 'Aromatic1', Geometry.Point3D(5.12, 0.908, 0.0)), ] self.pcophore = Pharmacophore.Pharmacophore(self.feats)
def test4Search(self): featFactory = ChemicalFeatures.BuildFeatureFactory(os.path.join(self.dataDir, 'BaseFeatures.fdef')) activeFeats = [ChemicalFeatures.FreeChemicalFeature('Acceptor', Geometry.Point3D(0.0, 0.0, 0.0)), ChemicalFeatures.FreeChemicalFeature('Donor', Geometry.Point3D(0.0, 0.0, 0.0)), ChemicalFeatures.FreeChemicalFeature('Aromatic', Geometry.Point3D(0.0, 0.0, 0.0))] pcophore= Pharmacophore.Pharmacophore(activeFeats) pcophore.setLowerBound(0,1,2.251) pcophore.setUpperBound(0,1,2.451) pcophore.setUpperBound2D(0,1,3) pcophore.setLowerBound(0,2,4.970) pcophore.setUpperBound(0,2,5.170) pcophore.setUpperBound2D(0,2,6) pcophore.setLowerBound(1,2,2.681) pcophore.setUpperBound(1,2,2.881) pcophore.setUpperBound2D(1,2,6) inF = gzip.open(os.path.join(self.dataDir,'cdk2-syn-clip100.pkl.gz'),'rb') nDone = 0 nMatches = 0 nHits = 0 while 1: try: name,molPkl,boundsMat = cPickle.load(inF, encoding='latin1') if PY3: molPkl = bytes(molPkl, encoding='latin1') except: break nDone += 1 mol = Chem.Mol(molPkl) boundsMat = rdDistGeom.GetMoleculeBoundsMatrix(mol) DG.DoTriangleSmoothing(boundsMat) canMatch,matches = EmbedLib.MatchPharmacophoreToMol(mol,featFactory, pcophore) if canMatch: nMatches+=1 r = EmbedLib.MatchPharmacophore(matches,boundsMat,pcophore, useDownsampling=True,use2DLimits=True, mol=mol) failed,bm,match,details = r if not failed: nHits+=1 self.assertEqual(nDone,100) self.assertEqual(nMatches,93) #print 'nhits:',nHits self.assertEqual(nHits,67)
def setUp(self): fdefFile = os.path.join(RDConfig.RDCodeDir, 'Chem', 'Pharm2D', 'test_data', 'BaseFeatures.fdef') featFactory = ChemicalFeatures.BuildFeatureFactory(fdefFile) self.factory = SigFactory.SigFactory(featFactory, minPointCount=2, maxPointCount=3) self.factory.SetBins([(0, 2), (2, 5), (5, 8)]) self.factory.Init()
def _getFeatureFamily(mol): FEATURE_DEF_FILE = os.path.join(RDConfig.RDDataDir, 'BaseFeatures.fdef') feat_factory = ChemicalFeatures.BuildFeatureFactory(FEATURE_DEF_FILE) hmol = rdkit.Chem.AddHs(mol) AllChem.EmbedMolecule(hmol, useRandomCoords=True) rc = rdkit.Chem.AllChem.EmbedMolecule(hmol) logging.debug("Getting features for mol " + mol.GetProp("_Name")) if rc < 0: rc = rdkit.Chem.AllChem.EmbedMolecule(hmol, useRandomCoords=True) if rc == 0: try: if rdkit.Chem.AllChem.UFFOptimizeMolecule(hmol) != 0: rdkit.Chem.AllChem.UFFOptimizeMolecule(hmol, maxIters=1000) except ValueError: logging.error("Problem with 3D version of molecule " + hmol.GetProp("_Name")) pass feats = feat_factory.GetFeaturesForMol(hmol) atomFeatures = [["" for feature in range(len(feats))] for atom in range(hmol.GetNumAtoms())] for feature in feats: for atomId in feature.GetAtomIds(): if feature.GetFamily() not in atomFeatures[atomId]: atomFeatures[atomId].append(feature.GetFamily()) return atomFeatures
def testIncludeOnly(self): cfac = ChemicalFeatures.BuildFeatureFactory( os.path.join(RDConfig.RDBaseDir, 'Code', 'GraphMol', 'MolChemicalFeatures', 'test_data', 'featDef.txt')) self.failUnless(cfac.GetNumFeatureDefs() == 2) mol = Chem.MolFromSmiles("COCN") rdDistGeom.EmbedMolecule(mol) self.failUnless(cfac.GetNumMolFeatures(mol, includeOnly="HBondAcceptor") == 2) self.failUnless(cfac.GetNumMolFeatures(mol, includeOnly="HBondDonor") == 1) self.failUnless(cfac.GetNumMolFeatures(mol, includeOnly="Bogus") == 0) self.failUnlessRaises(IndexError, lambda: cfac.GetMolFeature(mol, 1, includeOnly="HBondDonor")) self.failUnlessRaises(IndexError, lambda: cfac.GetMolFeature(mol, 2, includeOnly="HBondAcceptor")) f = cfac.GetMolFeature(mol, 0, includeOnly="HBondDonor") self.failUnless(f.GetFamily() == 'HBondDonor') feats = cfac.GetFeaturesForMol(mol, includeOnly="HBondAcceptor") self.failUnless(len(feats) == 2) feats = cfac.GetFeaturesForMol(mol, includeOnly="HBondDonor") self.failUnless(len(feats) == 1) feats = cfac.GetFeaturesForMol(mol, includeOnly="Bogus") self.failUnless(len(feats) == 0)
def ConstrainedEnum(matches,mol,pcophore,bounds,use2DLimits=False, index=0,soFar=[]): """ Enumerates the list of atom mappings a molecule has to a particular pharmacophore. We do check distance bounds here. """ nMatches = len(matches) if index>=nMatches: yield soFar,[] elif index==nMatches-1: for entry in matches[index]: nextStep = soFar+[entry] if index != 0: atomMatch = _checkMatch(nextStep,mol,bounds,pcophore,use2DLimits) else: atomMatch = ChemicalFeatures.GetAtomMatch(nextStep) if atomMatch: yield soFar+[entry],atomMatch else: for entry in matches[index]: nextStep = soFar+[entry] if index != 0: atomMatch = _checkMatch(nextStep,mol,bounds,pcophore,use2DLimits) if not atomMatch: continue for val in ConstrainedEnum(matches,mol,pcophore,bounds,use2DLimits=use2DLimits, index=index+1,soFar=nextStep): if val: yield val
def testPickle(self): ffeat = ChemicalFeatures.FreeChemicalFeature( "HBondDonor", "HBondDonor1", geom.Point3D(1.0, 2.0, 3.0), 123) pkl = cPickle.dumps(ffeat) ffeat2 = cPickle.loads(pkl, encoding='bytes') self.assertTrue(ffeat2.GetId() == ffeat.GetId()) self.assertTrue(ffeat2.GetFamily() == ffeat.GetFamily()) self.assertTrue(ffeat2.GetType() == ffeat.GetType()) self.assertTrue(ptFeq(ffeat2.GetPos(), ffeat.GetPos())) # Check that the old pickled versions have not been broken inF = open( os.path.join(RDConfig.RDBaseDir, 'Code/ChemicalFeatures/Wrap/testData/feat.pkl'), 'rb') ffeat2 = cPickle.load(inF, encoding='bytes') # this version (1.0) does not have an id in the byte stream self.assertTrue(ffeat2.GetFamily() == ffeat.GetFamily()) self.assertTrue(ffeat2.GetType() == ffeat.GetType()) self.assertTrue(ptFeq(ffeat2.GetPos(), ffeat.GetPos())) # Test the new version also has the id and works as expected # uncomment the following to generate (overrwrite) new version of pickled # data file #cPickle.dump(ffeat,file(os.path.join(RDConfig.RDBaseDir, 'Code/ChemicalFeatures/Wrap/testData/featv2.pkl'),'wb+')) inF = open( os.path.join(RDConfig.RDBaseDir, 'Code/ChemicalFeatures/Wrap/testData/featv2.pkl'), 'rb') ffeat2 = cPickle.load(inF, encoding='bytes') self.assertTrue(ffeat2.GetId() == ffeat.GetId()) self.assertTrue(ffeat2.GetFamily() == ffeat.GetFamily()) self.assertTrue(ffeat2.GetType() == ffeat.GetType()) self.assertTrue(ptFeq(ffeat2.GetPos(), ffeat.GetPos()))
def initFromLines(self, lines): import re spaces = re.compile('[\ \t]+') feats = [] rads = [] for lineNum, line in enumerate(lines): txt = line.split('#')[0].strip() if txt: splitL = spaces.split(txt) if len(splitL) < 5: logger.error( 'Input line %d only contains %d fields, 5 are required. Read failed.' % (lineNum, len(splitL))) return fName = splitL[0] try: xP = float(splitL[1]) yP = float(splitL[2]) zP = float(splitL[3]) rad = float(splitL[4]) except ValueError: logger.error( 'Error parsing a number of line %d. Read failed.' % (lineNum)) return feats.append( ChemicalFeatures.FreeChemicalFeature( fName, fName, Geometry.Point3D(xP, yP, zP))) rads.append(rad) self._initializeFeats(feats, rads)
def processArgs(args, parser): try: factory = ChemicalFeatures.BuildFeatureFactory(args.fdefFilename) except Exception: parser.error( "Could not parse Fdef file {0.fdefFilename}.".format(args)) with open(args.smilesFilename) as inF: for lineNo, line in enumerate(inF, 1): if lineNo == args.maxLines + 1: break smi = splitExpr.split(line.strip())[0].strip() mol = Chem.MolFromSmiles(smi) if mol is None: logger.warning("Could not process smiles '%s' on line %d." % (smi, lineNo)) continue print('Mol-%d\t%s' % (lineNo, smi)) if args.reverseIt: feats = factory.GetFeaturesForMol(mol) for feat in feats: print('\t%s-%s: ' % (feat.GetFamily(), feat.GetType()), end='') print(', '.join([str(x) for x in feat.GetAtomIds()])) else: featInfo = GetAtomFeatInfo(factory, mol) for i, v in enumerate(featInfo): print('\t% 2s(%d)' % (mol.GetAtomWithIdx(i).GetSymbol(), i + 1), end='') if v: print('\t', ', '.join(v)) else: print()
def get_hydrogen_bonding(self): """Gets hydrogen bonding character for all atoms. Returns: A dict mapping RDKit Atom indices to a HydrogenBonding object. Atom indices not in the dict are neither acceptors nor donors. Raises: TypeError: if more than one atom index is associated with the same acceptor or donor. """ self.check_indices() factory = ChemicalFeatures.BuildFeatureFactoryFromString(_HBOND_FEATURE_DEF) features = factory.GetFeaturesForMol(self.mol) hb = collections.defaultdict(lambda: HydrogenBonding(False, False)) for feat in features: family = feat.GetFamily().lower() if family in ['acceptor', 'donor']: if len(feat.GetAtomIds()) != 1: raise TypeError('More than one atom index for %s.' % family) idx = feat.GetAtomIds()[0] # pylint:disable=protected-access if family == 'acceptor': hb[idx] = hb[idx]._replace(acceptor=True) elif family == 'donor': hb[idx] = hb[idx]._replace(donor=True) # pylint:enable=protected-access return hb
def _align_molecules(self, molecules: List[Chem.Mol]) -> None: """ Align a list of molecules to a given pharmacophore. Parameters ---------- molecules : list of rdkit.Chem.Mol List of molecules to align. """ self.n_molecules += len(molecules) rdkit_pharmacophore, radii = self.pharmacophore.to_rdkit() apply_radii_to_bounds(radii, rdkit_pharmacophore) fdef = os.path.join(RDConfig.RDDataDir, 'BaseFeatures.fdef') featFactory = ChemicalFeatures.BuildFeatureFactory(fdef) MolScore = namedtuple("MolScore", ["score", "id", "mol"]) for mol in tqdm(molecules): bounds_matrix = rdDistGeom.GetMoleculeBoundsMatrix(mol) can_match, all_matches = EmbedLib.MatchPharmacophoreToMol( mol, featFactory, rdkit_pharmacophore) if can_match: failed, _, matched_mols, _ = EmbedLib.MatchPharmacophore( all_matches, bounds_matrix, rdkit_pharmacophore, useDownsampling=True) if failed: matched_mol = MolScore(0.0, mol.GetProp("_Name"), mol) self.molecules.append(matched_mol) continue else: matched_mol = MolScore(0.0, mol.GetProp("_Name"), mol) self.molecules.append(matched_mol) continue atom_match = [list(x.GetAtomIds()) for x in matched_mols] try: mol_H = Chem.AddHs(mol) _, embeddings, _ = EmbedLib.EmbedPharmacophore( mol_H, atom_match, rdkit_pharmacophore, count=10) except: continue SSDs = transform_embeddings(rdkit_pharmacophore, embeddings, atom_match) if len(SSDs) == 0: matched_mol = MolScore(0.0, mol.GetProp("_Name"), mol) self.molecules.append(matched_mol) continue best_fit_index = min(enumerate(SSDs), key=itemgetter(1))[0] score = 1 / SSDs[best_fit_index] matched_mol = MolScore(score, mol.GetProp("_Name"), embeddings[best_fit_index]) self.molecules.append(matched_mol)
def testGithub2603(self): cfac = ChemicalFeatures.BuildFeatureFactory( os.path.join(RDConfig.RDDataDir, "BaseFeatures.fdef")) m = Chem.MolFromSmiles('OCc1ccccc1CN') feats = cfac.GetFeaturesForMol(m) self.assertEqual(feats[0].GetFamily(), 'Donor') cfac = None self.assertEqual(feats[0].GetFamily(), 'Donor')
def alchemy_nodes(mol): """Featurization for all atoms in a molecule. The atom indices will be preserved. Parameters ---------- mol : rdkit.Chem.rdchem.Mol RDKit molecule object Returns ------- atom_feats_dict : dict Dictionary for atom features """ atom_feats_dict = defaultdict(list) is_donor = defaultdict(int) is_acceptor = defaultdict(int) fdef_name = osp.join(RDConfig.RDDataDir, 'BaseFeatures.fdef') mol_featurizer = ChemicalFeatures.BuildFeatureFactory(fdef_name) mol_feats = mol_featurizer.GetFeaturesForMol(mol) mol_conformers = mol.GetConformers() assert len(mol_conformers) == 1 for i in range(len(mol_feats)): if mol_feats[i].GetFamily() == 'Donor': node_list = mol_feats[i].GetAtomIds() for u in node_list: is_donor[u] = 1 elif mol_feats[i].GetFamily() == 'Acceptor': node_list = mol_feats[i].GetAtomIds() for u in node_list: is_acceptor[u] = 1 num_atoms = mol.GetNumAtoms() for u in range(num_atoms): atom = mol.GetAtomWithIdx(u) atom_type = atom.GetAtomicNum() num_h = atom.GetTotalNumHs() atom_feats_dict['node_type'].append(atom_type) h_u = [] h_u += atom_type_one_hot(atom, ['H', 'C', 'N', 'O', 'F', 'S', 'Cl']) h_u.append(atom_type) h_u.append(is_acceptor[u]) h_u.append(is_donor[u]) h_u += atom_is_aromatic(atom) h_u += atom_hybridization_one_hot(atom, [Chem.rdchem.HybridizationType.SP, Chem.rdchem.HybridizationType.SP2, Chem.rdchem.HybridizationType.SP3]) h_u.append(num_h) atom_feats_dict['n_feat'].append(F.tensor(np.asarray(h_u, dtype=np.float32))) atom_feats_dict['n_feat'] = F.stack(atom_feats_dict['n_feat'], dim=0) atom_feats_dict['node_type'] = F.tensor( np.asarray(atom_feats_dict['node_type'], dtype=np.int64)) return atom_feats_dict
def str2molgraph( rawstr, length ): # rawstr :tuple() e.g. ('<RX_6>', 'N', 'c', '1', 'n', 'c', '2', '[', 'n', 'H', ']', 'c', '(', 'C', 'C', 'C', 'c', '3', 'c', 's', 'c', '(', 'C', '(', '=', 'O', ')', 'O', ')', 'c', '3', ')', 'c', 'c', '2', 'c', '(', '=', 'O', ')', '[', 'n', 'H', ']', '1') smiles = ''.join(rawstr[:length]) m = Chem.MolFromSmiles(smiles) g = nx.Graph() fdef_name = os.path.join(RDConfig.RDDataDir, 'BaseFeatures.fdef') factory = ChemicalFeatures.BuildFeatureFactory(fdef_name) feats = factory.GetFeaturesForMol(m) atom_true_index = {} atom_index = 0 # Nodes for i in range(len(rawstr)): if not need_emb(rawstr[i], EMB_ATOMS): g.add_node(i) else: atom_true_index[ atom_index] = i # meanwhile, set a map dict to find the true index of atoms atom_i = m.GetAtomWithIdx(atom_index) atom_index += 1 g.add_node(i, a_type=atom_i.GetSymbol(), a_num=atom_i.GetAtomicNum(), acceptor=0, donor=0, aromatic=atom_i.GetIsAromatic(), hybridization=atom_i.GetHybridization(), num_h=atom_i.GetTotalNumHs()) # Donor and Acceptor properties for i in range(0, len(feats)): if feats[i].GetFamily() == 'Donor': node_list = feats[i].GetAtomIds() for i in node_list: if i in atom_true_index: g.nodes[atom_true_index[i]]['donor'] = 1 elif feats[i].GetFamily() == 'Acceptor': node_list = feats[i].GetAtomIds() for i in node_list: if i in atom_true_index: g.nodes[atom_true_index[i]]['acceptor'] = 1 #Edges for i in range(0, m.GetNumAtoms()): for j in range(0, m.GetNumAtoms()): e_ij = m.GetBondBetweenAtoms(i, j) if e_ij is not None and i in atom_true_index and j in atom_true_index: g.add_edge(atom_true_index[i], atom_true_index[j], b_type=e_ij.GetBondType()) return g
def __config_feature_factory(self): """ Initialize the 'feature factory' rdkit module with the current molecule. """ fdefName = os.path.join(RDConfig.RDDataDir, 'BaseFeatures.fdef') self.__feat_factory = ChemicalFeatures.BuildFeatureFactory(fdefName) return
def testParseErrorHandling(self): fdefBlock = \ """DefineFeature HDonor1 [N,O;!HQ] Family HBondDonor Weights 1.0 EndFeature """ self.failUnlessRaises(ValueError, lambda: ChemicalFeatures.BuildFeatureFactoryFromString(fdefBlock)) fdefBlock = \ """DefineFeature HDonor1 [N,O;!H0] Family HBondDonor Weights 1.0 """ self.failUnlessRaises(ValueError, lambda: ChemicalFeatures.BuildFeatureFactoryFromString(fdefBlock)) self.failUnlessRaises(IOError, lambda: ChemicalFeatures.BuildFeatureFactory('noSuchFile.txt'))
def get_factory(self): """ Generate the Ph4 feature factory :return: """ if self.factory is None: this_dir, this_filename = os.path.split(__file__) data_path = os.path.join(this_dir, "data", "RDKitPh4.fdef") self.factory = ChemicalFeatures.BuildFeatureFactory(data_path) return self.factory
def test4Github252(self): fdef = os.path.join(RDConfig.RDDataDir, 'BaseFeatures.fdef') feat_factory = ChemicalFeatures.BuildFeatureFactory(fdef) m1 = Chem.MolFromSmiles('Cc1ccccc1') feats = feat_factory.GetFeaturesForMol(m1) self.assertRaises(RuntimeError, lambda: Pharmacophore.Pharmacophore(feats)) AllChem.Compute2DCoords(m1) Pharmacophore.Pharmacophore(feats)
def _initializeFeats(self, feats): self._feats = [] for feat in feats: if isinstance(feat, ChemicalFeatures.MolChemicalFeature): pos = feat.GetPos() newFeat = ChemicalFeatures.FreeChemicalFeature(feat.GetFamily(), feat.GetType(), Geometry.Point3D(pos[0], pos[1], pos[2])) self._feats.append(newFeat) else: self._feats.append(feat)
def GetAllPharmacophoreMatches(matches, bounds, pcophore, useDownsampling=0, progressCallback=None, use2DLimits=False, mol=None, verbose=False): res = [] nDone = 0 for match in CombiEnum(matches): atomMatch = ChemicalFeatures.GetAtomMatch(match) if atomMatch and use2DLimits and mol: pass2D = Check2DBounds(atomMatch, mol, pcophore) if verbose: print('..', atomMatch) print(' ..Pass2d:', pass2D) else: pass2D = True if atomMatch and pass2D and \ CoarseScreenPharmacophore(atomMatch,bounds,pcophore,verbose=verbose): if verbose: print(' ..CoarseScreen: Pass') bm = bounds.copy() if verbose: print('pre update:') for row in bm: print(' ', ' '.join(['% 4.2f' % x for x in row])) bm = UpdatePharmacophoreBounds(bm, atomMatch, pcophore) sz = bm.shape[0] if verbose: print('pre downsample:') for row in bm: print(' ', ' '.join(['% 4.2f' % x for x in row])) if useDownsampling: indices = [] for entry in atomMatch: indices += list(entry) bm = DownsampleBoundsMatrix(bm, indices) if verbose: print('post downsample:') for row in bm: print(' ', ' '.join(['% 4.2f' % x for x in row])) if DG.DoTriangleSmoothing(bm): res.append(match) elif verbose: print('cannot smooth') nDone += 1 if progressCallback: progressCallback(nDone) return res
def get_instance(cls): try: from rdkit import RDConfig from rdkit.Chem import ChemicalFeatures except ModuleNotFoundError: raise ValueError("This class requires RDKit to be installed.") if not cls._instance: fdefName = os.path.join(RDConfig.RDDataDir, 'BaseFeatures.fdef') cls._instance = ChemicalFeatures.BuildFeatureFactory(fdefName) return cls._instance
def __init__(self, atms: typing.List[str]): self.atms_to_idx = dict(zip(atms, range(len(atms)))) self.number_atom_options = len(self.atms_to_idx) self.hyb_mapping = {Chem.rdchem.HybridizationType.SP:0 , Chem.rdchem.HybridizationType.SP2: 1, Chem.rdchem.HybridizationType.SP3: 2} self.number_hyb_options = len(self.hyb_mapping) self.fdef_name = os.path.join(RDDataDir, 'BaseFeatures.fdef') self.feats_factory = ChemicalFeatures.BuildFeatureFactory(self.fdef_name)
def numpy_pp_fps(mols): """ Calculate Gobbi and Poppinger pharmacophore fingerprints and return them as numpy.ndarrays :param mols: {list} list of molecules (RDKit mols) :return: numpy array containing row-wise fingerprints for every molecule """ feat_fact = ChemicalFeatures.BuildFeatureFactory() sig_fact = SigFactory(feat_fact, useCounts=False, minPointCount=2, maxPointCount=3) sig_fact.SetBins([(0, 2), (2, 4), (4, 6), (6, 8), (8, 100)]) sig_fact.Init() return _rdk2numpy([Generate.Gen2DFingerprint(m, sig_fact) for m in mols if m])
def rdkit_featuredefinition() -> ChemicalFeatures.MolChemicalFeatureFactory: """ Loads rdkit chemical feature factory. Returns ------- rdkit.Chem.rdMolChemicalFeatures.MolChemicalFeatureFactory The feature factory. """ fdefName = os.path.join(RDConfig.RDDataDir, 'BaseFeatures.fdef') return ChemicalFeatures.BuildFeatureFactory(fdefName)
def __call__(self, mol): """Featurizes the input molecule. Parameters ---------- mol : rdkit.Chem.rdchem.Mol RDKit molecule instance. Returns ------- dict Mapping atom_data_field as specified in the input argument to the atom features, which is a float32 tensor of shape (N, M), N is the number of atoms and M is the feature size. """ atom_features = [] AllChem.ComputeGasteigerCharges(mol) num_atoms = mol.GetNumAtoms() # Get information for donor and acceptor fdef_name = osp.join(RDConfig.RDDataDir, 'BaseFeatures.fdef') mol_featurizer = ChemicalFeatures.BuildFeatureFactory(fdef_name) mol_feats = mol_featurizer.GetFeaturesForMol(mol) is_donor, is_acceptor = self.get_donor_acceptor_info(mol_feats) # Get a symmetrized smallest set of smallest rings # Following the practice from Chainer Chemistry (https://github.com/chainer/ # chainer-chemistry/blob/da2507b38f903a8ee333e487d422ba6dcec49b05/chainer_chemistry/ # dataset/preprocessors/weavenet_preprocessor.py) sssr = Chem.GetSymmSSSR(mol) for i in range(num_atoms): atom = mol.GetAtomWithIdx(i) # Features that can be computed directly from RDKit atom instances, which is a list feats = self._featurizer(atom) # Donor/acceptor indicator feats.append(float(is_donor[i])) feats.append(float(is_acceptor[i])) # Count the number of rings the atom belongs to for ring size between 3 and 8 count = [0 for _ in range(3, 9)] for ring in sssr: ring_size = len(ring) if i in ring and 3 <= ring_size <= 8: count[ring_size - 3] += 1 feats.extend(count) atom_features.append(feats) atom_features = np.stack(atom_features) return { self._atom_data_field: F.zerocopy_from_numpy(atom_features.astype(np.float32)) }
def extract_features(mol): factory = ChemicalFeatures.BuildFeatureFactory('./LigityFeatures.fdef') feats = factory.GetFeaturesForMol(mol) features = [] for feat in feats: feature = feat.GetFamily() if feature in pharmacophores: id = feat.GetId() x, y, z = list(feat.GetPos()) string = str(id) + ',' + feature + ',' + str(x) + ',' + str( y) + ',' + str(z) features.append(string) return features
def construct_hydrogen_bonding(mol, num_max_atoms=WEAVE_DEFAULT_NUM_MAX_ATOMS): fdefName = os.path.join(RDConfig.RDDataDir, 'BaseFeatures.fdef') factory = ChemicalFeatures.BuildFeatureFactory(fdefName) feats = factory.GetFeaturesForMol(mol) hydrogen_bonding_vec = numpy.zeros((num_max_atoms, 2), dtype=numpy.float32) for f in feats: if f.GetFamily() == 'Donor': idx = f.GetAtomIds()[0] hydrogen_bonding_vec[idx, 0] = 1.0 if f.GetFamily() == 'Acceptor': idx = f.GetAtomIds()[0] hydrogen_bonding_vec[idx, 1] = 1.0 return hydrogen_bonding_vec