def featurize(self, x): if self.input_type == 'smiles': x_ = x x = Chem.MolFromSmiles(x) if x is None: raise ValueError('cannot convert Mol from SMILES %s' % x_) if self.input_type == 'any': if not isinstance(x, Chem.rdchem.Mol): x_ = x x = Chem.MolFromSmiles(x) if x is None: raise ValueError('cannot convert Mol from SMILES %s' % x_) if self.counting: return count_fp(rdMol.GetHashedTopologicalTorsionFingerprint(x, nBits=self.n_bits), dim=self.n_bits) else: return list(rdMol.GetHashedTopologicalTorsionFingerprintAsBitVect(x, nBits=self.n_bits, nBitsPerEntry=self.bit_per_entry))
def testHashedTopologicalTorsions(self): mol = Chem.MolFromSmiles("c1ncccc1") fp1 = rdMD.GetHashedTopologicalTorsionFingerprint(mol) mol = Chem.MolFromSmiles("n1ccccc1") fp2 = rdMD.GetHashedTopologicalTorsionFingerprint(mol) self.assertEqual(DataStructs.DiceSimilarity(fp1, fp2), 1.0)
fpType -- the type of AP fingerprint ('normal', 'hashed', 'bv') nBits -- the size of the bit vector (only for fpType='bv') minLength -- the minimum path length for an atom pair maxLength -- the maxmimum path length for an atom pair nBitsPerEntry -- the number of bits available for each pair """ if fpType not in ['normal', 'hashed', 'bv']: raise ValueError("Unknown Atom pairs fingerprint type") if atomId < 0: return apDict[fpType](mol, nBits, minLength, maxLength, nBitsPerEntry, 0) if atomId >= mol.GetNumAtoms(): raise ValueError("atom index greater than number of atoms") return apDict[fpType](mol, nBits, minLength, maxLength, nBitsPerEntry, [atomId]) ttDict = {} ttDict['normal'] = lambda m, bits, ts, bpe, ia: rdMD.GetTopologicalTorsionFingerprint(m, targetSize=ts, ignoreAtoms=ia) ttDict['hashed'] = lambda m, bits, ts, bpe, ia: rdMD.GetHashedTopologicalTorsionFingerprint(m, nBits=bits, targetSize=ts, ignoreAtoms=ia) ttDict['bv'] = lambda m, bits, ts, bpe, ia: rdMD.GetHashedTopologicalTorsionFingerprintAsBitVect(m, nBits=bits, targetSize=ts, nBitsPerEntry=bpe, ignoreAtoms=ia) # usage: lambda m,i: GetTTFingerprint(m, i, fpType, nBits, targetSize) def GetTTFingerprint(mol, atomId=-1, fpType='normal', nBits=2048, targetSize=4, nBitsPerEntry=4): """ Calculates the topological torsion fingerprint with the pairs of atomId removed. Parameters: mol -- the molecule of interest atomId -- the atom to remove the torsions for (if -1, no torsion is removed) fpType -- the type of TT fingerprint ('normal', 'hashed', 'bv') nBits -- the size of the bit vector (only for fpType='bv') minLength -- the minimum path length for an atom pair maxLength -- the maxmimum path length for an atom pair nBitsPerEntry -- the number of bits available for each torsion