def GenerateTopologicalTorsionsFingerprints(Mols): """Generate TopologicalTorsions fingerprints.""" MiscUtil.PrintInfo("\nGenerating TopologicalTorsions %s fingerprints..." % OptionsInfo["SpecifiedFingerprintsType"]) UseChirality = OptionsInfo["FingerprintsParams"]["TopologicalTorsions"][ "UseChirality"] FPSize = OptionsInfo["FingerprintsParams"]["TopologicalTorsions"]["FPSize"] BitsPerHash = OptionsInfo["FingerprintsParams"]["TopologicalTorsions"][ "BitsPerHash"] if re.match("^BitVect$", OptionsInfo["SpecifiedFingerprintsType"], re.I): # Generate ExplicitBitVect fingerprints... MiscUtil.PrintInfo("FPSize: %s; BitsPerHash: %s" % (FPSize, BitsPerHash)) MolsFingerprints = [ rdMolDescriptors.GetHashedTopologicalTorsionFingerprintAsBitVect( Mol, includeChirality=UseChirality, nBits=FPSize, nBitsPerEntry=BitsPerHash) for Mol in Mols ] else: # Generate LongSparseIntVect fingerprint... MolsFingerprints = [ rdMolDescriptors.GetTopologicalTorsionFingerprint( Mol, includeChirality=UseChirality) for Mol in Mols ] return MolsFingerprints
def GenerateTopologicalTorsionsFingerprints(Mols): """Generate TopologicalTorsions fingerprints.""" MiscUtil.PrintInfo("\nGenerating TopologicalTorsions fingerprints...") UseChirality = OptionsInfo["FingerprintsParams"]["TopologicalTorsions"][ "UseChirality"] if OptionsInfo["GenerateBitVectFingerints"]: FPSize = 2048 BitsPerHash = 4 MolsFingerprints = [ rdMolDescriptors.GetHashedTopologicalTorsionFingerprintAsBitVect( Mol, includeChirality=UseChirality, nBits=FPSize, nBitsPerEntry=BitsPerHash) for Mol in Mols ] else: # Generate LongSparseIntVect fingerprint... MolsFingerprints = [ rdMolDescriptors.GetTopologicalTorsionFingerprint( Mol, includeChirality=UseChirality) for Mol in Mols ] return MolsFingerprints
def get_topological_torsion(molecule, length=512): try: tt = rdMolDescriptors.GetHashedTopologicalTorsionFingerprintAsBitVect( molecule, nBits=length) except Exception as e: print(e) print("error" + str(molecule)) tt = np.nan return tt
def featurize(self, x): if self.input_type == 'smiles': x_ = x x = Chem.MolFromSmiles(x) if x is None: raise ValueError('can not convert Mol from SMILES %s' % x_) if self.input_type == 'any': if not isinstance(x, Chem.rdchem.Mol): x_ = x x = Chem.MolFromSmiles(x) if x is None: raise ValueError('can not convert Mol from SMILES %s' % x_) return list( rdMol.GetHashedTopologicalTorsionFingerprintAsBitVect( x, nBits=self.n_bits))
def testAtomPairOptions(self): m1 = Chem.MolFromSmiles('c1ccccc1') m2 = Chem.MolFromSmiles('c1ccccn1') fp1 = rdMD.GetAtomPairFingerprint(m1) fp2 = rdMD.GetAtomPairFingerprint(m2) self.assertNotEqual(fp1,fp2) fp1 = rdMD.GetAtomPairFingerprint(m1,atomInvariants=[1]*6) fp2 = rdMD.GetAtomPairFingerprint(m2,atomInvariants=[1]*6) self.assertEqual(fp1,fp2) fp1 = rdMD.GetAtomPairFingerprint(m1,atomInvariants=[1]*6) fp2 = rdMD.GetAtomPairFingerprint(m2,atomInvariants=[2]*6) self.assertNotEqual(fp1,fp2) fp1 = rdMD.GetHashedAtomPairFingerprintAsBitVect(m1) fp2 = rdMD.GetHashedAtomPairFingerprintAsBitVect(m2) self.assertNotEqual(fp1,fp2) fp1 = rdMD.GetHashedAtomPairFingerprintAsBitVect(m1,atomInvariants=[1]*6) fp2 = rdMD.GetHashedAtomPairFingerprintAsBitVect(m2,atomInvariants=[1]*6) self.assertEqual(fp1,fp2) fp1 = rdMD.GetHashedAtomPairFingerprintAsBitVect(m1,atomInvariants=[1]*6) fp2 = rdMD.GetHashedAtomPairFingerprintAsBitVect(m2,atomInvariants=[2]*6) self.assertNotEqual(fp1,fp2) fp1 = rdMD.GetTopologicalTorsionFingerprint(m1) fp2 = rdMD.GetTopologicalTorsionFingerprint(m2) self.assertNotEqual(fp1,fp2) fp1 = rdMD.GetTopologicalTorsionFingerprint(m1,atomInvariants=[1]*6) fp2 = rdMD.GetTopologicalTorsionFingerprint(m2,atomInvariants=[1]*6) self.assertEqual(fp1,fp2) fp1 = rdMD.GetTopologicalTorsionFingerprint(m1,atomInvariants=[1]*6) fp2 = rdMD.GetTopologicalTorsionFingerprint(m2,atomInvariants=[2]*6) self.assertNotEqual(fp1,fp2) fp1 = rdMD.GetHashedTopologicalTorsionFingerprintAsBitVect(m1) fp2 = rdMD.GetHashedTopologicalTorsionFingerprintAsBitVect(m2) self.assertNotEqual(fp1,fp2) fp1 = rdMD.GetHashedTopologicalTorsionFingerprintAsBitVect(m1,atomInvariants=[1]*6) fp2 = rdMD.GetHashedTopologicalTorsionFingerprintAsBitVect(m2,atomInvariants=[1]*6) self.assertEqual(fp1,fp2) fp1 = rdMD.GetHashedTopologicalTorsionFingerprintAsBitVect(m1,atomInvariants=[1]*6) fp2 = rdMD.GetHashedTopologicalTorsionFingerprintAsBitVect(m2,atomInvariants=[2]*6) self.assertNotEqual(fp1,fp2)
nBits -- the size of the bit vector (only for fpType='bv') minLength -- the minimum path length for an atom pair maxLength -- the maxmimum path length for an atom pair nBitsPerEntry -- the number of bits available for each pair """ if fpType not in ['normal', 'hashed', 'bv']: raise ValueError("Unknown Atom pairs fingerprint type") if atomId < 0: return apDict[fpType](mol, nBits, minLength, maxLength, nBitsPerEntry, 0) if atomId >= mol.GetNumAtoms(): raise ValueError("atom index greater than number of atoms") return apDict[fpType](mol, nBits, minLength, maxLength, nBitsPerEntry, [atomId]) ttDict = {} ttDict['normal'] = lambda m, bits, ts, bpe, ia: rdMD.GetTopologicalTorsionFingerprint(m, targetSize=ts, ignoreAtoms=ia) ttDict['hashed'] = lambda m, bits, ts, bpe, ia: rdMD.GetHashedTopologicalTorsionFingerprint(m, nBits=bits, targetSize=ts, ignoreAtoms=ia) ttDict['bv'] = lambda m, bits, ts, bpe, ia: rdMD.GetHashedTopologicalTorsionFingerprintAsBitVect(m, nBits=bits, targetSize=ts, nBitsPerEntry=bpe, ignoreAtoms=ia) # usage: lambda m,i: GetTTFingerprint(m, i, fpType, nBits, targetSize) def GetTTFingerprint(mol, atomId=-1, fpType='normal', nBits=2048, targetSize=4, nBitsPerEntry=4): """ Calculates the topological torsion fingerprint with the pairs of atomId removed. Parameters: mol -- the molecule of interest atomId -- the atom to remove the torsions for (if -1, no torsion is removed) fpType -- the type of TT fingerprint ('normal', 'hashed', 'bv') nBits -- the size of the bit vector (only for fpType='bv') minLength -- the minimum path length for an atom pair maxLength -- the maxmimum path length for an atom pair nBitsPerEntry -- the number of bits available for each torsion """
fpdict["ap"] = lambda m: Pairs.GetAtomPairFingerprint(m) fpdict["tt"] = lambda m: Torsions.GetTopologicalTorsionFingerprintAsIntVect(m) fpdict[ "hashap" ] = lambda m: rdMolDescriptors.GetHashedAtomPairFingerprintAsBitVect( m, nBits=nbits ) fpdict[ "hashap_cas_length" ] = lambda m: rdMolDescriptors.GetHashedAtomPairFingerprintAsBitVect( m, nBits=n_cas_bits ) fpdict[ "hashtt" ] = lambda m: rdMolDescriptors.GetHashedTopologicalTorsionFingerprintAsBitVect( m, nBits=nbits ) fpdict[ "hashtt_cas_length" ] = lambda m: rdMolDescriptors.GetHashedTopologicalTorsionFingerprintAsBitVect( m, nBits=n_cas_bits ) fpdict["avalon"] = lambda m: fpAvalon.GetAvalonFP(m, nbits) fpdict["avalon_cas_length"] = lambda m: fpAvalon.GetAvalonFP(m, n_cas_bits) fpdict["laval"] = lambda m: fpAvalon.GetAvalonFP(m, longbits) fpdict["rdk5"] = lambda m: Chem.RDKFingerprint( m, maxPath=5, fpSize=nbits, nBitsPerHash=2 ) fpdict["rdk6"] = lambda m: Chem.RDKFingerprint( m, maxPath=6, fpSize=nbits, nBitsPerHash=2 )
def torsion_fingerprinter(mol): fp = rdMolDescriptors.GetHashedTopologicalTorsionFingerprintAsBitVect( mol, nBits=fpSize, targetSize=targetSize) return _fp_to_bytes(fp)
# dictionary fpFunc_dict = {} fpFunc_dict['ecfp0'] = lambda m: AllChem.GetMorganFingerprintAsBitVect(m, 0, nBits=nbits) fpFunc_dict['ecfp2'] = lambda m: AllChem.GetMorganFingerprintAsBitVect(m, 1, nBits=nbits) fpFunc_dict['ecfp4'] = lambda m: AllChem.GetMorganFingerprintAsBitVect(m, 2, nBits=nbits) fpFunc_dict['ecfp6'] = lambda m: AllChem.GetMorganFingerprintAsBitVect(m, 3, nBits=nbits) fpFunc_dict['fcfp2'] = lambda m: AllChem.GetMorganFingerprintAsBitVect(m, 1, useFeatures=True, nBits=nbits) fpFunc_dict['fcfp4'] = lambda m: AllChem.GetMorganFingerprintAsBitVect(m, 2, useFeatures=True, nBits=nbits) fpFunc_dict['fcfp6'] = lambda m: AllChem.GetMorganFingerprintAsBitVect(m, 3, useFeatures=True, nBits=nbits) fpFunc_dict['lecfp4'] = lambda m: AllChem.GetMorganFingerprintAsBitVect(m, 2, nBits=longbits) fpFunc_dict['lecfp6'] = lambda m: AllChem.GetMorganFingerprintAsBitVect(m, 3, nBits=longbits) fpFunc_dict['lfcfp4'] = lambda m: AllChem.GetMorganFingerprintAsBitVect(m, 2, useFeatures=True, nBits=longbits) fpFunc_dict['lfcfp6'] = lambda m: AllChem.GetMorganFingerprintAsBitVect(m, 3, useFeatures=True, nBits=longbits) fpFunc_dict['maccs'] = lambda m: MACCSkeys.GenMACCSKeys(m) fpFunc_dict['hashap'] = lambda m: rdMolDescriptors.GetHashedAtomPairFingerprintAsBitVect(m, nBits=nbits) fpFunc_dict['hashtt'] = lambda m: rdMolDescriptors.GetHashedTopologicalTorsionFingerprintAsBitVect(m, nBits=nbits) fpFunc_dict['avalon'] = lambda m: fpAvalon.GetAvalonFP(m, nbits) fpFunc_dict['laval'] = lambda m: fpAvalon.GetAvalonFP(m, longbits) fpFunc_dict['rdk5'] = lambda m: Chem.RDKFingerprint(m, maxPath=5, fpSize=nbits, nBitsPerHash=2) fpFunc_dict['rdk6'] = lambda m: Chem.RDKFingerprint(m, maxPath=6, fpSize=nbits, nBitsPerHash=2) fpFunc_dict['rdk7'] = lambda m: Chem.RDKFingerprint(m, maxPath=7, fpSize=nbits, nBitsPerHash=2) fpFunc_dict['tpatf'] = lambda m: get_tpatf(m) fpFunc_dict['rdkDes'] = lambda m: calc.CalcDescriptors(m) long_fps = {'laval', 'lecfp4', 'lecfp6', 'lfcfp4', 'lfcfp6'} fps_to_generate = ['fcfp4', 'rdkDes', 'tpatf', 'rdk5', 'hashap', 'avalon', 'laval', 'rdk7'] ModFileName_LoadedModel_dict = {}
1, useFeatures=True, nBits=nbits) fpdict['fcfp4'] = AllChem.GetMorganFingerprintAsBitVect(smiles, 2, useFeatures=True, nBits=nbits) fpdict['fcfp6'] = AllChem.GetMorganFingerprintAsBitVect(smiles, 3, useFeatures=True, nBits=nbits) fpdict['maccs'] = MACCSkeys.GenMACCSKeys(smiles) fpdict['ap'] = rdMolDescriptors.GetHashedAtomPairFingerprintAsBitVect( smiles, nBits=nbits) fpdict[ 'tt'] = rdMolDescriptors.GetHashedTopologicalTorsionFingerprintAsBitVect( smiles, nBits=nbits) fpdict['rdk5'] = Chem.RDKFingerprint(smiles, maxPath=5, fpSize=nbits, nBitsPerHash=2) fpdict['rdk6'] = Chem.RDKFingerprint(smiles, maxPath=6, fpSize=nbits, nBitsPerHash=2) fpdict['rdk7'] = Chem.RDKFingerprint(smiles, maxPath=7, fpSize=nbits, nBitsPerHash=2) #fpdict['avalon'] = fpAvalon.GetAvalonFP(smiles, nbits) #Convert to hex for space save, to go back to bin use "bin(int(x, 16))[2:]