def testIncludeOnly(self): cfac = ChemicalFeatures.BuildFeatureFactory( os.path.join(RDConfig.RDBaseDir, 'Code', 'GraphMol', 'MolChemicalFeatures', 'test_data', 'featDef.txt')) self.failUnless(cfac.GetNumFeatureDefs() == 2) mol = Chem.MolFromSmiles("COCN") rdDistGeom.EmbedMolecule(mol) self.failUnless(cfac.GetNumMolFeatures(mol, includeOnly="HBondAcceptor") == 2) self.failUnless(cfac.GetNumMolFeatures(mol, includeOnly="HBondDonor") == 1) self.failUnless(cfac.GetNumMolFeatures(mol, includeOnly="Bogus") == 0) self.failUnlessRaises(IndexError, lambda: cfac.GetMolFeature(mol, 1, includeOnly="HBondDonor")) self.failUnlessRaises(IndexError, lambda: cfac.GetMolFeature(mol, 2, includeOnly="HBondAcceptor")) f = cfac.GetMolFeature(mol, 0, includeOnly="HBondDonor") self.failUnless(f.GetFamily() == 'HBondDonor') feats = cfac.GetFeaturesForMol(mol, includeOnly="HBondAcceptor") self.failUnless(len(feats) == 2) feats = cfac.GetFeaturesForMol(mol, includeOnly="HBondDonor") self.failUnless(len(feats) == 1) feats = cfac.GetFeaturesForMol(mol, includeOnly="Bogus") self.failUnless(len(feats) == 0)
def setUp(self): fdefFile = os.path.join(RDConfig.RDCodeDir, 'Chem', 'Pharm2D', 'test_data', 'BaseFeatures.fdef') featFactory = ChemicalFeatures.BuildFeatureFactory(fdefFile) self.factory = SigFactory.SigFactory(featFactory, minPointCount=2, maxPointCount=3) self.factory.SetBins([(0, 2), (2, 5), (5, 8)]) self.factory.Init()
def _getFeatureFamily(mol): FEATURE_DEF_FILE = os.path.join(RDConfig.RDDataDir, 'BaseFeatures.fdef') feat_factory = ChemicalFeatures.BuildFeatureFactory(FEATURE_DEF_FILE) hmol = rdkit.Chem.AddHs(mol) AllChem.EmbedMolecule(hmol, useRandomCoords=True) rc = rdkit.Chem.AllChem.EmbedMolecule(hmol) logging.debug("Getting features for mol " + mol.GetProp("_Name")) if rc < 0: rc = rdkit.Chem.AllChem.EmbedMolecule(hmol, useRandomCoords=True) if rc == 0: try: if rdkit.Chem.AllChem.UFFOptimizeMolecule(hmol) != 0: rdkit.Chem.AllChem.UFFOptimizeMolecule(hmol, maxIters=1000) except ValueError: logging.error("Problem with 3D version of molecule " + hmol.GetProp("_Name")) pass feats = feat_factory.GetFeaturesForMol(hmol) atomFeatures = [["" for feature in range(len(feats))] for atom in range(hmol.GetNumAtoms())] for feature in feats: for atomId in feature.GetAtomIds(): if feature.GetFamily() not in atomFeatures[atomId]: atomFeatures[atomId].append(feature.GetFamily()) return atomFeatures
def processArgs(args, parser): try: factory = ChemicalFeatures.BuildFeatureFactory(args.fdefFilename) except Exception: parser.error( "Could not parse Fdef file {0.fdefFilename}.".format(args)) with open(args.smilesFilename) as inF: for lineNo, line in enumerate(inF, 1): if lineNo == args.maxLines + 1: break smi = splitExpr.split(line.strip())[0].strip() mol = Chem.MolFromSmiles(smi) if mol is None: logger.warning("Could not process smiles '%s' on line %d." % (smi, lineNo)) continue print('Mol-%d\t%s' % (lineNo, smi)) if args.reverseIt: feats = factory.GetFeaturesForMol(mol) for feat in feats: print('\t%s-%s: ' % (feat.GetFamily(), feat.GetType()), end='') print(', '.join([str(x) for x in feat.GetAtomIds()])) else: featInfo = GetAtomFeatInfo(factory, mol) for i, v in enumerate(featInfo): print('\t% 2s(%d)' % (mol.GetAtomWithIdx(i).GetSymbol(), i + 1), end='') if v: print('\t', ', '.join(v)) else: print()
def _align_molecules(self, molecules: List[Chem.Mol]) -> None: """ Align a list of molecules to a given pharmacophore. Parameters ---------- molecules : list of rdkit.Chem.Mol List of molecules to align. """ self.n_molecules += len(molecules) rdkit_pharmacophore, radii = self.pharmacophore.to_rdkit() apply_radii_to_bounds(radii, rdkit_pharmacophore) fdef = os.path.join(RDConfig.RDDataDir, 'BaseFeatures.fdef') featFactory = ChemicalFeatures.BuildFeatureFactory(fdef) MolScore = namedtuple("MolScore", ["score", "id", "mol"]) for mol in tqdm(molecules): bounds_matrix = rdDistGeom.GetMoleculeBoundsMatrix(mol) can_match, all_matches = EmbedLib.MatchPharmacophoreToMol( mol, featFactory, rdkit_pharmacophore) if can_match: failed, _, matched_mols, _ = EmbedLib.MatchPharmacophore( all_matches, bounds_matrix, rdkit_pharmacophore, useDownsampling=True) if failed: matched_mol = MolScore(0.0, mol.GetProp("_Name"), mol) self.molecules.append(matched_mol) continue else: matched_mol = MolScore(0.0, mol.GetProp("_Name"), mol) self.molecules.append(matched_mol) continue atom_match = [list(x.GetAtomIds()) for x in matched_mols] try: mol_H = Chem.AddHs(mol) _, embeddings, _ = EmbedLib.EmbedPharmacophore( mol_H, atom_match, rdkit_pharmacophore, count=10) except: continue SSDs = transform_embeddings(rdkit_pharmacophore, embeddings, atom_match) if len(SSDs) == 0: matched_mol = MolScore(0.0, mol.GetProp("_Name"), mol) self.molecules.append(matched_mol) continue best_fit_index = min(enumerate(SSDs), key=itemgetter(1))[0] score = 1 / SSDs[best_fit_index] matched_mol = MolScore(score, mol.GetProp("_Name"), embeddings[best_fit_index]) self.molecules.append(matched_mol)
def testGithub2603(self): cfac = ChemicalFeatures.BuildFeatureFactory( os.path.join(RDConfig.RDDataDir, "BaseFeatures.fdef")) m = Chem.MolFromSmiles('OCc1ccccc1CN') feats = cfac.GetFeaturesForMol(m) self.assertEqual(feats[0].GetFamily(), 'Donor') cfac = None self.assertEqual(feats[0].GetFamily(), 'Donor')
def alchemy_nodes(mol): """Featurization for all atoms in a molecule. The atom indices will be preserved. Parameters ---------- mol : rdkit.Chem.rdchem.Mol RDKit molecule object Returns ------- atom_feats_dict : dict Dictionary for atom features """ atom_feats_dict = defaultdict(list) is_donor = defaultdict(int) is_acceptor = defaultdict(int) fdef_name = osp.join(RDConfig.RDDataDir, 'BaseFeatures.fdef') mol_featurizer = ChemicalFeatures.BuildFeatureFactory(fdef_name) mol_feats = mol_featurizer.GetFeaturesForMol(mol) mol_conformers = mol.GetConformers() assert len(mol_conformers) == 1 for i in range(len(mol_feats)): if mol_feats[i].GetFamily() == 'Donor': node_list = mol_feats[i].GetAtomIds() for u in node_list: is_donor[u] = 1 elif mol_feats[i].GetFamily() == 'Acceptor': node_list = mol_feats[i].GetAtomIds() for u in node_list: is_acceptor[u] = 1 num_atoms = mol.GetNumAtoms() for u in range(num_atoms): atom = mol.GetAtomWithIdx(u) atom_type = atom.GetAtomicNum() num_h = atom.GetTotalNumHs() atom_feats_dict['node_type'].append(atom_type) h_u = [] h_u += atom_type_one_hot(atom, ['H', 'C', 'N', 'O', 'F', 'S', 'Cl']) h_u.append(atom_type) h_u.append(is_acceptor[u]) h_u.append(is_donor[u]) h_u += atom_is_aromatic(atom) h_u += atom_hybridization_one_hot(atom, [Chem.rdchem.HybridizationType.SP, Chem.rdchem.HybridizationType.SP2, Chem.rdchem.HybridizationType.SP3]) h_u.append(num_h) atom_feats_dict['n_feat'].append(F.tensor(np.asarray(h_u, dtype=np.float32))) atom_feats_dict['n_feat'] = F.stack(atom_feats_dict['n_feat'], dim=0) atom_feats_dict['node_type'] = F.tensor( np.asarray(atom_feats_dict['node_type'], dtype=np.int64)) return atom_feats_dict
def test4Search(self): featFactory = ChemicalFeatures.BuildFeatureFactory(os.path.join(self.dataDir, 'BaseFeatures.fdef')) activeFeats = [ChemicalFeatures.FreeChemicalFeature('Acceptor', Geometry.Point3D(0.0, 0.0, 0.0)), ChemicalFeatures.FreeChemicalFeature('Donor', Geometry.Point3D(0.0, 0.0, 0.0)), ChemicalFeatures.FreeChemicalFeature('Aromatic', Geometry.Point3D(0.0, 0.0, 0.0))] pcophore= Pharmacophore.Pharmacophore(activeFeats) pcophore.setLowerBound(0,1,2.251) pcophore.setUpperBound(0,1,2.451) pcophore.setUpperBound2D(0,1,3) pcophore.setLowerBound(0,2,4.970) pcophore.setUpperBound(0,2,5.170) pcophore.setUpperBound2D(0,2,6) pcophore.setLowerBound(1,2,2.681) pcophore.setUpperBound(1,2,2.881) pcophore.setUpperBound2D(1,2,6) inF = gzip.open(os.path.join(self.dataDir,'cdk2-syn-clip100.pkl.gz'),'rb') nDone = 0 nMatches = 0 nHits = 0 while 1: try: name,molPkl,boundsMat = cPickle.load(inF, encoding='latin1') if PY3: molPkl = bytes(molPkl, encoding='latin1') except: break nDone += 1 mol = Chem.Mol(molPkl) boundsMat = rdDistGeom.GetMoleculeBoundsMatrix(mol) DG.DoTriangleSmoothing(boundsMat) canMatch,matches = EmbedLib.MatchPharmacophoreToMol(mol,featFactory, pcophore) if canMatch: nMatches+=1 r = EmbedLib.MatchPharmacophore(matches,boundsMat,pcophore, useDownsampling=True,use2DLimits=True, mol=mol) failed,bm,match,details = r if not failed: nHits+=1 self.assertEqual(nDone,100) self.assertEqual(nMatches,93) #print 'nhits:',nHits self.assertEqual(nHits,67)
def __config_feature_factory(self): """ Initialize the 'feature factory' rdkit module with the current molecule. """ fdefName = os.path.join(RDConfig.RDDataDir, 'BaseFeatures.fdef') self.__feat_factory = ChemicalFeatures.BuildFeatureFactory(fdefName) return
def str2molgraph( rawstr, length ): # rawstr :tuple() e.g. ('<RX_6>', 'N', 'c', '1', 'n', 'c', '2', '[', 'n', 'H', ']', 'c', '(', 'C', 'C', 'C', 'c', '3', 'c', 's', 'c', '(', 'C', '(', '=', 'O', ')', 'O', ')', 'c', '3', ')', 'c', 'c', '2', 'c', '(', '=', 'O', ')', '[', 'n', 'H', ']', '1') smiles = ''.join(rawstr[:length]) m = Chem.MolFromSmiles(smiles) g = nx.Graph() fdef_name = os.path.join(RDConfig.RDDataDir, 'BaseFeatures.fdef') factory = ChemicalFeatures.BuildFeatureFactory(fdef_name) feats = factory.GetFeaturesForMol(m) atom_true_index = {} atom_index = 0 # Nodes for i in range(len(rawstr)): if not need_emb(rawstr[i], EMB_ATOMS): g.add_node(i) else: atom_true_index[ atom_index] = i # meanwhile, set a map dict to find the true index of atoms atom_i = m.GetAtomWithIdx(atom_index) atom_index += 1 g.add_node(i, a_type=atom_i.GetSymbol(), a_num=atom_i.GetAtomicNum(), acceptor=0, donor=0, aromatic=atom_i.GetIsAromatic(), hybridization=atom_i.GetHybridization(), num_h=atom_i.GetTotalNumHs()) # Donor and Acceptor properties for i in range(0, len(feats)): if feats[i].GetFamily() == 'Donor': node_list = feats[i].GetAtomIds() for i in node_list: if i in atom_true_index: g.nodes[atom_true_index[i]]['donor'] = 1 elif feats[i].GetFamily() == 'Acceptor': node_list = feats[i].GetAtomIds() for i in node_list: if i in atom_true_index: g.nodes[atom_true_index[i]]['acceptor'] = 1 #Edges for i in range(0, m.GetNumAtoms()): for j in range(0, m.GetNumAtoms()): e_ij = m.GetBondBetweenAtoms(i, j) if e_ij is not None and i in atom_true_index and j in atom_true_index: g.add_edge(atom_true_index[i], atom_true_index[j], b_type=e_ij.GetBondType()) return g
def get_factory(self): """ Generate the Ph4 feature factory :return: """ if self.factory is None: this_dir, this_filename = os.path.split(__file__) data_path = os.path.join(this_dir, "data", "RDKitPh4.fdef") self.factory = ChemicalFeatures.BuildFeatureFactory(data_path) return self.factory
def test4Github252(self): fdef = os.path.join(RDConfig.RDDataDir, 'BaseFeatures.fdef') feat_factory = ChemicalFeatures.BuildFeatureFactory(fdef) m1 = Chem.MolFromSmiles('Cc1ccccc1') feats = feat_factory.GetFeaturesForMol(m1) self.assertRaises(RuntimeError, lambda: Pharmacophore.Pharmacophore(feats)) AllChem.Compute2DCoords(m1) Pharmacophore.Pharmacophore(feats)
def numpy_pp_fps(mols): """ Calculate Gobbi and Poppinger pharmacophore fingerprints and return them as numpy.ndarrays :param mols: {list} list of molecules (RDKit mols) :return: numpy array containing row-wise fingerprints for every molecule """ feat_fact = ChemicalFeatures.BuildFeatureFactory() sig_fact = SigFactory(feat_fact, useCounts=False, minPointCount=2, maxPointCount=3) sig_fact.SetBins([(0, 2), (2, 4), (4, 6), (6, 8), (8, 100)]) sig_fact.Init() return _rdk2numpy([Generate.Gen2DFingerprint(m, sig_fact) for m in mols if m])
def get_instance(cls): try: from rdkit import RDConfig from rdkit.Chem import ChemicalFeatures except ModuleNotFoundError: raise ValueError("This class requires RDKit to be installed.") if not cls._instance: fdefName = os.path.join(RDConfig.RDDataDir, 'BaseFeatures.fdef') cls._instance = ChemicalFeatures.BuildFeatureFactory(fdefName) return cls._instance
def rdkit_featuredefinition() -> ChemicalFeatures.MolChemicalFeatureFactory: """ Loads rdkit chemical feature factory. Returns ------- rdkit.Chem.rdMolChemicalFeatures.MolChemicalFeatureFactory The feature factory. """ fdefName = os.path.join(RDConfig.RDDataDir, 'BaseFeatures.fdef') return ChemicalFeatures.BuildFeatureFactory(fdefName)
def __init__(self, atms: typing.List[str]): self.atms_to_idx = dict(zip(atms, range(len(atms)))) self.number_atom_options = len(self.atms_to_idx) self.hyb_mapping = {Chem.rdchem.HybridizationType.SP:0 , Chem.rdchem.HybridizationType.SP2: 1, Chem.rdchem.HybridizationType.SP3: 2} self.number_hyb_options = len(self.hyb_mapping) self.fdef_name = os.path.join(RDDataDir, 'BaseFeatures.fdef') self.feats_factory = ChemicalFeatures.BuildFeatureFactory(self.fdef_name)
def __call__(self, mol): """Featurizes the input molecule. Parameters ---------- mol : rdkit.Chem.rdchem.Mol RDKit molecule instance. Returns ------- dict Mapping atom_data_field as specified in the input argument to the atom features, which is a float32 tensor of shape (N, M), N is the number of atoms and M is the feature size. """ atom_features = [] AllChem.ComputeGasteigerCharges(mol) num_atoms = mol.GetNumAtoms() # Get information for donor and acceptor fdef_name = osp.join(RDConfig.RDDataDir, 'BaseFeatures.fdef') mol_featurizer = ChemicalFeatures.BuildFeatureFactory(fdef_name) mol_feats = mol_featurizer.GetFeaturesForMol(mol) is_donor, is_acceptor = self.get_donor_acceptor_info(mol_feats) # Get a symmetrized smallest set of smallest rings # Following the practice from Chainer Chemistry (https://github.com/chainer/ # chainer-chemistry/blob/da2507b38f903a8ee333e487d422ba6dcec49b05/chainer_chemistry/ # dataset/preprocessors/weavenet_preprocessor.py) sssr = Chem.GetSymmSSSR(mol) for i in range(num_atoms): atom = mol.GetAtomWithIdx(i) # Features that can be computed directly from RDKit atom instances, which is a list feats = self._featurizer(atom) # Donor/acceptor indicator feats.append(float(is_donor[i])) feats.append(float(is_acceptor[i])) # Count the number of rings the atom belongs to for ring size between 3 and 8 count = [0 for _ in range(3, 9)] for ring in sssr: ring_size = len(ring) if i in ring and 3 <= ring_size <= 8: count[ring_size - 3] += 1 feats.extend(count) atom_features.append(feats) atom_features = np.stack(atom_features) return { self._atom_data_field: F.zerocopy_from_numpy(atom_features.astype(np.float32)) }
def construct_hydrogen_bonding(mol, num_max_atoms=WEAVE_DEFAULT_NUM_MAX_ATOMS): fdefName = os.path.join(RDConfig.RDDataDir, 'BaseFeatures.fdef') factory = ChemicalFeatures.BuildFeatureFactory(fdefName) feats = factory.GetFeaturesForMol(mol) hydrogen_bonding_vec = numpy.zeros((num_max_atoms, 2), dtype=numpy.float32) for f in feats: if f.GetFamily() == 'Donor': idx = f.GetAtomIds()[0] hydrogen_bonding_vec[idx, 0] = 1.0 if f.GetFamily() == 'Acceptor': idx = f.GetAtomIds()[0] hydrogen_bonding_vec[idx, 1] = 1.0 return hydrogen_bonding_vec
def load_factory(filename=None): """ Loads RDKit factory with custom feature patterns from a file. :param filename: file name of fdef format file. If None the default patterns will be loaded. Default: None. :type filename: str :return: object of MolChemicalFeatureFactory class """ if filename is None: filename = path.join(path.abspath(path.dirname(__file__)), 'smarts_features.fdef') return ChemicalFeatures.BuildFeatureFactory(filename)
def __new__(cls): if cls._instance is None: cls._instance = super( _ChemicalFeatureGenerator, cls).__new__(cls) from rdkit import RDConfig from rdkit.Chem import ChemicalFeatures fdef_path = path.join(RDConfig.RDDataDir, 'BaseFeatures.fdef') cls._instance.feature_factory = \ ChemicalFeatures.BuildFeatureFactory(fdef_path) return cls._instance
def extract_features(mol): factory = ChemicalFeatures.BuildFeatureFactory('./LigityFeatures.fdef') feats = factory.GetFeaturesForMol(mol) features = [] for feat in feats: feature = feat.GetFamily() if feature in pharmacophores: id = feat.GetId() x, y, z = list(feat.GetPos()) string = str(id) + ',' + feature + ',' + str(x) + ',' + str( y) + ',' + str(z) features.append(string) return features
def BuildSigFactory(options=None, fdefFile=None, bins=[(2, 3), (3, 4), (4, 5), (5, 6), (6, 7), (7, 8), (8, 100)], skipFeats=('LumpedHydrophobe', 'ZnBinder')): if options: fdefFile = options.fdefFile if not fdefFile: raise ValueError('bad fdef file') from rdkit.Chem import ChemicalFeatures from rdkit.Chem.Pharm2D import SigFactory featFactory = ChemicalFeatures.BuildFeatureFactory(fdefFile) sigFactory = SigFactory.SigFactory(featFactory, skipFeats=skipFeats, trianglePruneBins=False) sigFactory.SetBins(bins) return sigFactory
def get_node_features(self, mol): fdef_name = osp.join(RDConfig.RDDataDir, 'BaseFeatures.fdef') factory = ChemicalFeatures.BuildFeatureFactory(fdef_name) x = [] sp = [] sp2 = [] sp3 = [] donor = [] acceptor = [] type_idx = [] for i in range(4): x.append([]) for atom in mol.GetAtoms(): donor.append(0) acceptor.append(0) #print(atom.GetHybridization()) x[0].append(atom.GetAtomicNum()) x[1].append(atom.GetTotalValence()) x[2].append(atom.GetIsAromatic()) x[3].append(atom.GetTotalNumHs(includeNeighbors=True)) hybridization = atom.GetHybridization() sp.append(1 if hybridization == HybridizationType.SP else 0) sp2.append(1 if hybridization == HybridizationType.SP2 else 0) sp3.append(1 if hybridization == HybridizationType.SP3 else 0) type_idx.append(atom.GetAtomicNum()) # Now calculate donors and acceptors feats = factory.GetFeaturesForMol(mol) for j in range(0, len(feats)): if feats[j].GetFamily() == 'Donor': node_list = feats[j].GetAtomIds() for k in node_list: donor[k] = 1 elif feats[j].GetFamily() == 'Acceptor': node_list = feats[j].GetAtomIds() for k in node_list: acceptor[k] = 1 # Now get the elements and make one hot encoding x_element = [] for i in range(len(self.atomicNbrs)): x_element.append([]) for i in range(len(type_idx)): for j in range(len(self.atomicNbrs)): if type_idx[i] == self.atomicNbrs[j]: x_element[j].append(1) else: x_element[j].append(0) # Now add them all x = x + [donor] + [acceptor] + [sp] + [sp2] + [sp3] + x_element x = np.array(x).T #x = torch.tensor(x, dtype=torch.float) return (x, x.sum(axis=0), len(x))
def smile_to_graph(smile): mol = Chem.MolFromSmiles(smile) mol = Chem.AddHs(mol) fdef_name = os.path.join(RDConfig.RDDataDir, 'BaseFeatures.fdef') factory = ChemicalFeatures.BuildFeatureFactory(fdef_name) feats = factory.GetFeaturesForMol(mol) #Construction of the graph graph = Molecule() graph.atoms = [] graph.bonds = [] for i in range(0, mol.GetNumAtoms()): atom = mol.GetAtomWithIdx(i) node = Atom(i, atom.GetSymbol(), atom.GetAtomicNum(), aromatic=atom.GetIsAromatic(), hybrid=atom.GetHybridization(), nbH=atom.GetTotalNumHs()) graph.add_atom(node) for i in range(0, len(feats)): if feats[i].GetFamily() == 'Donor': node_list = feats[i].GetAtomIds() for i in node_list: graph.atoms[i].don = 1 elif feats[i].GetFamily() == 'Acceptor': node_list = feats[i].GetAtomIds() for i in node_list: graph.atoms[i].acc = 1 for i in range(0, mol.GetNumAtoms()): for j in range(0, mol.GetNumAtoms()): e_ij = mol.GetBondBetweenAtoms(i, j) if e_ij is not None: bond = e_ij.GetBondTypeAsDouble() graph.add_bond(i, j, bond) return graph
def alchemy_nodes(self, mol): atom_feats_dict, is_donor, is_acceptor = defaultdict( list), defaultdict(int), defaultdict(int) ddir = rdf.RDDataDir mol_featurizer = cf.BuildFeatureFactory(osp.join(ddir, BASE_FT)) mol_conformers, mol_feats = mol.GetConformers( ), mol_featurizer.GetFeaturesForMol(mol) geom = mol_conformers[0].GetPositions() for i in range(len(mol_feats)): if mol_feats[i].GetFamily() == 'Acceptor': node_list = mol_feats[i].GetAtomIds() for u in node_list: is_acceptor[u] = 1 elif mol_feats[i].GetFamily() == 'None': continue elif mol_feats[i].GetFamily() == 'Donor': node_list = mol_feats[i].GetAtomIds() for u in node_list: is_donor[u] = 1 num_atoms = mol.GetNumAtoms() for u in range(num_atoms): atom = mol.GetAtomWithIdx(u) h_u = [] symbol, atom_type, aromatic = atom.GetSymbol(), atom.GetAtomicNum( ), atom.GetIsAromatic() atom_feats_dict['node_type'].append(atom_type) hybridization, num_h = atom.GetHybridization(), atom.GetTotalNumHs( ) atom_feats_dict[POSITION].append(torch.FloatTensor(geom[u])) h_u = h_u + [int(symbol == x) for x in ATOMS] h_u.append(atom_type) h_u.append(int(aromatic)) h_u.append(is_donor[u]) h_u.append(is_acceptor[u]) h_u += [ int(hybridization == x) for x in (Chem.rdchem.HybridizationType.SP, Chem.rdchem.HybridizationType.SP2, Chem.rdchem.HybridizationType.SP3) ] h_u.append(num_h) atom_feats_dict[N_FEAT].append(torch.FloatTensor(h_u)) atom_feats_dict['node_type'] = torch.LongTensor( atom_feats_dict['node_type']) atom_feats_dict[N_FEAT] = torch.stack(atom_feats_dict[N_FEAT], dim=0) atom_feats_dict[POSITION] = torch.stack(atom_feats_dict['pos'], dim=0) return atom_feats_dict
def rawsmiles2graph(smiles): # smiles = smiles.strip().replace(' ','') m = Chem.MolFromSmiles(smiles) g = nx.Graph() fdef_name = os.path.join(RDConfig.RDDataDir, 'BaseFeatures.fdef') factory = ChemicalFeatures.BuildFeatureFactory(fdef_name) feats = factory.GetFeaturesForMol(m) # Nodes for i in range(0, m.GetNumAtoms()): atom_i = m.GetAtomWithIdx(i) g.add_node(i, a_type=atom_i.GetSymbol(), a_num=atom_i.GetAtomicNum(), acceptor=0, donor=0, aromatic=atom_i.GetIsAromatic(), hybridization=atom_i.GetHybridization(), num_h=atom_i.GetTotalNumHs()) # Donor and Acceptor properties for i in range(0, len(feats)): if feats[i].GetFamily() == 'Donor': node_list = feats[i].GetAtomIds() for i in node_list: g.node[i]['donor'] = 1 elif feats[i].GetFamily() == 'Acceptor': node_list = feats[i].GetAtomIds() for i in node_list: g.node[i]['acceptor'] = 1 # Edges for i in range(0, m.GetNumAtoms()): for j in range(0, m.GetNumAtoms()): e_ij = m.GetBondBetweenAtoms(i, j) if e_ij is not None: g.add_edge(i, j, b_type=e_ij.GetBondType()) else: # Unbonded g.add_edge( i, j, b_type=None, ) return g
def get_atom_features(mol, dist_matrix): """ Compute the following features for each atom in 'mol': - atom type: H, C, N, O, F (one-hot) - degree: 1, 2, 3, 4, 5 (one-hot) - Hybridization: SP, SP2, SP3, UNSPECIFIED (one-hot) - is aromatic: bool {0, 1} - formal charge: int - atomic number: float - average bond length: float - average weight of neigboring atoms: float - donor: bool {0, 1} - acceptor: bool {0, 1} """ n_atoms = mol.GetNumAtoms() features = np.zeros((n_atoms, C.N_ATOM_FEATURES)) adj_matrix = rdmolops.GetAdjacencyMatrix(mol) for a in mol.GetAtoms(): idx = a.GetIdx() if sum(adj_matrix[idx]) > 0: ave_bond_length = np.mean(dist_matrix[idx][adj_matrix[idx] == 1]) ave_neighbor_wt = np.mean( [n.GetAtomicNum() for n in a.GetNeighbors()]) else: ave_bond_length, ave_neighbor_wt = 0.0, 0.0 sym = a.GetSymbol() a_feats = one_hot_encoding(sym, C.SYMBOLS) \ + one_hot_encoding(a.GetDegree(), C.DEGREES) \ + one_hot_encoding(a.GetHybridization(), C.HYBRIDIZATIONS) \ + [a.GetIsAromatic(), a.GetFormalCharge(), a.GetAtomicNum(), ave_bond_length, ave_neighbor_wt] features[idx, :len(a_feats)] = np.array(a_feats) feat_factory = ChemicalFeatures.BuildFeatureFactory(C.FDEF) try: chem_feats = feat_factory.GetFeaturesForMol(mol) for t in range(len(chem_feats)): if chem_feats[t].GetFamily() == 'Donor': for i in chem_feats[t].GetAtomIds(): features[i, -2] = 1 elif chem_feats[t].GetFamily() == 'Acceptor': for i in chem_feats[t].GetAtomIds(): features[i, -1] = 1 except RuntimeError as e: print(e) return features
def testBasic(self): cfac = ChemicalFeatures.BuildFeatureFactory( os.path.join(RDConfig.RDBaseDir, 'Code', 'GraphMol', 'MolChemicalFeatures', 'test_data', 'featDef.txt')) self.failUnless(cfac.GetNumFeatureDefs() == 2) fNames = cfac.GetFeatureFamilies() self.failUnless(len(fNames) == 2) self.failUnless(fNames[0] == 'HBondDonor') self.failUnless(fNames[1] == 'HBondAcceptor') mol = Chem.MolFromSmiles("COCN") rdDistGeom.EmbedMolecule(mol, 30, 100, useExpTorsionAnglePrefs=False, useBasicKnowledge=False) self.failUnless(cfac.GetNumMolFeatures(mol) == 3) for i in range(cfac.GetNumMolFeatures(mol)): self.failUnless(cfac.GetMolFeature(mol, i)) # check that the recompute argument works: self.failUnless(cfac.GetMolFeature(mol, 0)) for i in range(cfac.GetNumMolFeatures(mol)): self.failUnless(cfac.GetMolFeature(mol, i, "", False)) self.failUnlessRaises(IndexError, lambda: cfac.GetMolFeature(mol, 3)) feats = cfac.GetFeaturesForMol(mol) self.failUnless(len(feats) == 3) fTypes = ['HBondDonor', 'HBondAcceptor', 'HBondAcceptor'] positions = [[1.3041, -0.6079, 0.0924], [-0.7066, 0.5994, 0.1824], [1.3041, -0.6079, 0.0924]] targetAids = [[3], [1], [3]] for i, feat in enumerate(feats): self.assertEqual(feat.GetFamily(), fTypes[i]) pos = list(feat.GetPos()) aids = list(feat.GetAtomIds()) self.assertEqual(aids, targetAids[i]) self.assertTrue(lstFeq(pos, positions[i])) nmol = feat.GetMol() self.assertEqual(Chem.MolToSmiles(nmol), "COCN") ncfac = feat.GetFactory() self.assertEqual(ncfac.GetNumFeatureDefs(), 2) self.assertEqual(feat.GetActiveConformer(), -1)
def get_node_features(self, mol): fdef_name = osp.join(RDConfig.RDDataDir, 'BaseFeatures.fdef') factory = ChemicalFeatures.BuildFeatureFactory(fdef_name) x_one_hot = [] x_normal = [] donor = [] acceptor = [] for i in range(2): x_one_hot.append([]) for i in range(11): x_normal.append([]) for atom in mol.GetAtoms(): x_one_hot[0].append(atom.GetAtomicNum()) x_one_hot[1].append(atom.GetHybridization()) for atom in mol.GetAtoms(): donor.append(0) acceptor.append(0) x_normal[0].append(atom.GetTotalValence()) x_normal[1].append(atom.GetNumImplicitHs()) x_normal[2].append(atom.GetFormalCharge()) x_normal[3].append(atom.GetNumRadicalElectrons()) x_normal[4].append(atom.GetImplicitValence()) x_normal[5].append(atom.GetNumExplicitHs()) x_normal[6].append(atom.GetIsAromatic()) x_normal[7].append(atom.GetIsotope()) x_normal[8].append(atom.GetChiralTag()) feats = factory.GetFeaturesForMol(mol) for j in range(0, len(feats)): if feats[j].GetFamily() == 'Donor': node_list = feats[j].GetAtomIds() for k in node_list: donor[k] = 1 elif feats[j].GetFamily() == 'Acceptor': node_list = feats[j].GetAtomIds() for k in node_list: acceptor[k] = 1 x_normal[9] = donor x_normal[10] = acceptor x_one_hot = np.array(x_one_hot).T x_normal = np.array(x_normal).T #x = torch.tensor(x, dtype=torch.float) return (x_one_hot, x_normal)
def DefaultSigFactory(fdefFile=None, minPointCount=2,maxPointCount=3, bins=[(2,3),(3,4),(4,5),(5,6),(6,7),(7,8),(8,100)]): import SigFactory from rdkit.Chem import ChemicalFeatures if fdefFile is None: from rdkit import RDConfig import os.path fdefFile = os.path.join(RDConfig.RDDataDir,'BaseFeatures.fdef') featFactory = ChemicalFeatures.BuildFeatureFactory(fdefFile,) factory = SigFactory.SigFactory(featFactory, skipFeats=('ZnBinder','LumpedHydrophobe'), minPointCount=minPointCount, maxPointCount=maxPointCount, trianglePruneBins=False) factory.SetBins(tuple(bins)) factory.Init() return factory