def get_2dpharm(mols, fp_size=2000):
    factory = Gobbi_Pharm2D.factory
    fps = list()
    for mol in mols:
        try:
            sig = Generate.Gen2DFingerprint(mol,factory)
            indices = np.array([mmh3.hash(str(i)) for i in sig.GetOnBits()])%fp_size
            fp = np.zeros(fp_size, dtype=int)
            if len(indices)>0:
                fp[indices]=1
            fps.append(fp)
        except Exception:
            print('ERROR')
            print(Chem.MolToSmiles(mol))
    fps = np.array(fps)
    return sparse.csr_matrix(fps).astype('int')
    def testDefaultFactory(self):
        from rdkit.Chem import Pharm2D
        factory = Pharm2D.DefaultSigFactory()
        self.assertEqual(factory.GetNumBins(), 7)

        # Generate._verbose=True
        mol = Chem.MolFromSmiles('OCCC(=O)')
        sig = Generate.Gen2DFingerprint(mol, factory)
        self.assertEqual(len(sig), 19355)
        self.assertEqual(tuple(sig.GetOnBits()), (
            2,
            16,
            21,
            84,
            1274,
            4361,
        ))
        nPts, combo, scaffold, labels, dMat = factory._GetBitSummaryData(21)
        self.assertEqual(nPts, 2)
        self.assertEqual(labels, ['Acceptor', 'Hydrophobe'])
        self.assertEqual(list(dMat[0]), [0, 0])
        self.assertEqual(list(dMat[1]), [0, 0])

        txt = factory.GetBitDescription(21)
        self.assertEqual(txt, 'Acceptor Hydrophobe |0 0|0 0|')

        txt = factory.GetBitDescription(21)
        self.assertEqual(txt, 'Acceptor Hydrophobe |0 0|0 0|')

        nPts, combo, scaffold, labels, dMat = factory._GetBitSummaryData(2)
        self.assertEqual(nPts, 2)
        self.assertEqual(labels, ['Acceptor', 'Acceptor'])
        self.assertEqual(list(dMat[0]), [0, 2])
        self.assertEqual(list(dMat[1]), [2, 0])

        nPts, combo, scaffold, labels, dMat = factory._GetBitSummaryData(4361)
        self.assertEqual(nPts, 3)
        self.assertEqual(labels, ['Acceptor', 'Donor', 'Hydrophobe'])
        self.assertEqual(list(dMat[0]), [0, 2, 0])
        self.assertEqual(list(dMat[1]), [2, 0, 0])
        self.assertEqual(list(dMat[2]), [0, 0, 0])
        self.assertEqual(factory.GetBitDescription(4361),
                         'Acceptor Donor Hydrophobe |0 2 0|2 0 0|0 0 0|')

        self.assertRaises(NotImplementedError, factory.GetBitDescriptionAsText,
                          21)
Exemple #3
0
def Fingerprints(mols, fingerprint):

    # Indigo fingerprints
    if fingerprint in indigofps:
        return [mol.fingerprint(fingerprint) for mol in mols]

    # RDKit fingerprints
    if fingerprint in rdkitfps:
        if fingerprint == "atompair":
            return [Pairs.GetAtomPairFingerprintAsBitVect(mol) for mol in mols]
        elif fingerprint == "avalon":
            return [pyAvalonTools.GetAvalonFP(mol) for mol in mols]
        elif fingerprint == "daylight":
            return [Chem.RDKFingerprint(mol, fpSize=2048) for mol in mols]
        elif fingerprint == "maccs":
            return [MACCSkeys.GenMACCSKeys(mol) for mol in mols]
        elif fingerprint == "morgan":
            return [(AllChem.GetMorganFingerprintAsBitVect(mol, 2, nBits=1024))
                    for mol in mols]
        elif fingerprint == "pharm2d":
            return [
                Generate.Gen2DFingerprint(mol, Gobbi_Pharm2D.factory)
                for mol in mols
            ]
        elif fingerprint == "topological":
            return [FingerprintMols.FingerprintMol(mol) for mol in mols]

    # RDKit non-bit (integer or float) fingerprints
    if fingerprint in rdkitnonbitfps:
        if fingerprint == "sheridan":
            return [Sheridan.GetBPFingerprint(mol) for mol in mols]
        elif fingerprint == "topotorsion":
            return [
                Torsions.GetTopologicalTorsionFingerprint(mol) for mol in mols
            ]

    # E-state fingerprints
    if fingerprint in rdkitestatefps:
        if fingerprint == "estate1":
            return [Fingerprinter.FingerprintMol(mol)[0] for mol in mols]
        elif fingerprint == "estate2":
            return [Fingerprinter.FingerprintMol(mol)[1] for mol in mols]

    # unknown fingerprint
    return None
Exemple #4
0
    def get_2Dfp(self, rdmols):
        #: ファーマコフォアの初期設定
        fdefName = r'ensemble/BaseFeatures.fdef'
        featFactory = ChemicalFeatures.BuildFeatureFactory(fdefName)
        sigFactory = SigFactory(featFactory, minPointCount=2, maxPointCount=3)

        #: ファーマコフォア間の距離を離散化する
        sigFactory.SetBins([(0, 2), (2, 4)])
        sigFactory.Init()

        fps1 = [
            Generate.Gen2DFingerprint(mol, sigFactory).ToBitString()
            for mol in rdmols
        ]
        fps2 = [list(map(int, list(fps))) for fps in fps1]
        fps3 = np.array(fps2)

        return fps3
    def test1_simple(self):
        mol = Chem.MolFromSmiles('OCC(=O)CCCN')
        self.factory.skipFeats = ['Donor']
        self.factory.Init()
        self.assertEqual(self.factory.GetSigSize(), 510)
        Generate._verbose = False
        sig = Generate.Gen2DFingerprint(mol, self.factory)
        Generate._verbose = False
        tgt = (1, 2, 11, 52, 117)
        onBits = sig.GetOnBits()
        self.assertEqual(tuple(onBits), tgt)
        self.assertEqual(len(onBits), len(tgt))

        bitMatches = ([((0, ), (3, ))],
                      [((0, ), (7, )), ((3, ), (7, ))],
                      [((0, ), (3, ), (7, ))], )
        for i, bit in enumerate(onBits):
            matches = Matcher.GetAtomsMatchingBit(self.factory, bit, mol)
Exemple #6
0
 def test3Roundtrip(self):
   """ longer-running Bug 28 test
   """
   from rdkit import RDConfig
   import os
   nToDo=20
   with open(os.path.join(RDConfig.RDDataDir,'NCI','first_5K.smi'),'r') as inF:
     inD = inF.readlines()[:nToDo]
   factory = Gobbi_Pharm2D.factory
   factory.SetBins([(2,3),(3,4),(4,5),(5,8),(8,100)])
   for line in inD:
     smi = line.split('\t')[0]
     mol = Chem.MolFromSmiles(smi)
     sig = Generate.Gen2DFingerprint(mol,factory)
     onBits = sig.GetOnBits()
     for bit in onBits:
       atoms = Matcher.GetAtomsMatchingBit(factory,bit,mol,justOne=1)
       assert len(atoms),'bit %d failed to match for smi %s'%(bit,smi)
def read_file(fname, fcfp4, fdef_fname):
    if not fcfp4:
        featFactory = ChemicalFeatures.BuildFeatureFactory(fdef_fname)
        sigFactory = SigFactory(featFactory, minPointCount=2, maxPointCount=3, trianglePruneBins=False)
        sigFactory.SetBins([(0, 2), (2, 5), (5, 8)])
        sigFactory.Init()
    d = defaultdict(list)
    with open(fname) as f:
        for row in f:
            smiles, ids, aff = row.strip().split('\t')
            if smiles is not None:
                mol = Chem.MolFromSmiles(smiles)
                d['mol_name'].append(ids)
                d['smiles'].append(smiles)
                if fcfp4:
                    d['fingerprint'].append(AllChem.GetMorganFingerprint(mol, 2, useFeatures=True))
                else:
                    d['fingerprint'].append(Generate.Gen2DFingerprint(mol, sigFactory))
    return d
Exemple #8
0
def CalculatePharm2D3pointFingerprint(mol, featFactory=featFactory):
    """
    Calculate Pharm2D3point Fingerprints
    """
    sigFactory_3point = SigFactory(featFactory,
                                   minPointCount=3,
                                   maxPointCount=3)
    sigFactory_3point.SetBins([(0, 2), (2, 4), (4, 6), (6, 10)])
    sigFactory_3point.Init()
    res = Generate.Gen2DFingerprint(mol, sigFactory_3point)

    res_keys = tuple(res.GetOnBits())
    init_list = [0] * 2135
    for res_key in res_keys:
        init_list[res_key] = 1

    BitVect = tuple(init_list)

    return BitVect, res_keys, res
Exemple #9
0
def CalculatePharm2D2pointFingerprint(mol, featFactory=featFactory):
    """
    Calculate Pharm2D2point Fingerprints
    """
    sigFactory_2point = SigFactory(featFactory,
                                   minPointCount=2,
                                   maxPointCount=2)
    sigFactory_2point.SetBins([(0, 1), (1, 2), (2, 3), (3, 4), (4, 5), (5, 6),
                               (6, 7), (7, 8), (8, 9)])
    sigFactory_2point.Init()
    res = Generate.Gen2DFingerprint(mol, sigFactory_2point)

    res_keys = tuple(res.GetOnBits())
    init_list = [0] * 135
    for res_key in res_keys:
        init_list[res_key] = 1

    BitVect = tuple(init_list)

    return BitVect, res_keys, res
Exemple #10
0
    def calcfp(self, fptype="rdkit", opt=None):
        """Calculate a molecular fingerprint.

        Optional parameters:
           fptype -- the fingerprint type (default is "rdkit"). See the
                     fps variable for a list of of available fingerprint
                     types.
           opt -- a dictionary of options for fingerprints. Currently only used
                  for radius and bitInfo in Morgan fingerprints.
        """
        if opt == None:
            opt = {}
        fptype = fptype.lower()
        if fptype == "rdkit":
            fp = Fingerprint(Chem.RDKFingerprint(self.Mol))
        elif fptype == "layered":
            fp = Fingerprint(Chem.LayeredFingerprint(self.Mol))
        elif fptype == "maccs":
            fp = Fingerprint(Chem.MACCSkeys.GenMACCSKeys(self.Mol))
        elif fptype == "atompairs":
            # Going to leave as-is. See Atom Pairs documentation.
            fp = Chem.AtomPairs.Pairs.GetAtomPairFingerprintAsIntVect(self.Mol)
        elif fptype == "torsions":
            # Going to leave as-is.
            fp = Chem.AtomPairs.Torsions.GetTopologicalTorsionFingerprintAsIntVect(
                self.Mol)
        elif fptype == "morgan":
            info = opt.get('bitInfo', None)
            radius = opt.get('radius', 4)
            fp = Fingerprint(
                Chem.rdMolDescriptors.GetMorganFingerprintAsBitVect(
                    self.Mol, radius, bitInfo=info))
        elif fptype == "pharm2d":
            fp = Fingerprint(
                Generate.Gen2DFingerprint(self.Mol, Gobbi_Pharm2D.factory))
        else:
            raise ValueError("%s is not a recognised RDKit Fingerprint type" %
                             fptype)
        return fp
Exemple #11
0
def _exampleCode():
    import os
    from rdkit import RDConfig
    from rdkit.Chem import ChemicalFeatures
    from rdkit.Chem.Pharm2D import SigFactory, Generate

    fdefFile = os.path.join(RDConfig.RDCodeDir, 'Chem', 'Pharm2D', 'test_data',
                            'BaseFeatures.fdef')
    featFactory = ChemicalFeatures.BuildFeatureFactory(fdefFile)
    factory = SigFactory.SigFactory(featFactory)
    factory.SetBins([(1, 2), (2, 5), (5, 8)])
    factory.Init()

    mol = Chem.MolFromSmiles('OCC(=O)CCCN')
    sig = Generate.Gen2DFingerprint(mol, factory)
    print('onbits:', list(sig.GetOnBits()))

    _verbose = 0
    for bit in sig.GetOnBits():
        atoms = GetAtomsMatchingBit(factory, bit, mol)
        print('\tBit %d: ' % (bit), atoms)

    print('finished')
Exemple #12
0
 def get_PHCO(self, mol: Mol):
     return Generate.Gen2DFingerprint(mol, Gobbi_Pharm2D.factory)
    def __init__(self, query_smi: utils.SMILES):

        self.sigFactory = factory
        query = Chem.MolFromSmiles(query_smi)
        self.query_fp = Generate.Gen2DFingerprint(query, self.sigFactory)
    lambda s: DataStructs.FingerprintSimilarity(
        Chem.RDKFingerprint(Chem.MolFromSmiles(s)), doxrub_fingerprint))
data_block_1["exact_mol_weight"] = data_block_1.SMILES.apply(
    lambda s: Descriptors.ExactMolWt(Chem.MolFromSmiles(s)))
data_block_1["ring_count"] = data_block_1.SMILES.apply(
    lambda s: Descriptors.RingCount(Chem.MolFromSmiles(s)))

data_block_1["fingerprint"] = data_block_1.SMILES.apply(
    lambda s: list(Chem.RDKFingerprint(Chem.MolFromSmiles(s)).GetOnBits()))
fingerprint_block = data_block_1[[
    "fingerprint"
]].explode("fingerprint").assign(value=1).pivot(columns="fingerprint")
fingerprint_block.columns = fingerprint_block.columns.get_level_values(1)
fingerprint_block = fingerprint_block.add_prefix("fp.").fillna(value=0)
data_block_1 = pd.concat([data_block_1, fingerprint_block],
                         axis="columns").drop(labels="fingerprint",
                                              axis="columns")

data_block_1["pharmacophores"] = data_block_1.SMILES.apply(lambda s: list(
    Generate.Gen2DFingerprint(Chem.MolFromSmiles(s), Gobbi_Pharm2D.factory).
    GetOnBits()))
pharmacophores_block = data_block_1[[
    "pharmacophores"
]].explode("pharmacophores").assign(value=1).pivot(columns="pharmacophores")
pharmacophores_block.columns = pharmacophores_block.columns.get_level_values(1)
pharmacophores_block = pharmacophores_block.add_prefix("ph.").fillna(value=0)
data_block_1 = pd.concat([data_block_1, pharmacophores_block],
                         axis="columns").drop(labels="pharmacophores",
                                              axis="columns")

data_block_1.to_excel("bcl-xl_1_descriptors.xlsx", index=False)
Exemple #15
0
def molfp(smile):
	m = Chem.MolFromSmiles(smile)
	if m is None: raise ValueError('None mol in function "molfp" (line 58)')
	if options.mw is not None and Descriptors.MolWt(m) > options.mw: raise ValueError('Mol too small')
	if not options.gobbifp: return AllChem.GetMorganFingerprintAsBitVect(m,options.radius, nBits=options.bits)
	else: return Generate.Gen2DFingerprint(m,Gobbi_Pharm2D.factory)
    #make new molecules from fragments
    import random
    random.seed(127)
    fragms = [Chem.MolFromSmiles(x) for x in allfrags]
    ms = BRICS.BRICSBuild(fragms)

    prods = [ms.next() for x in range(10000)]
    #clean up generated molecules
    for prod in prods:
        prod.UpdatePropertyCache(strict=False)

    #srpin340 is a low affinity but selective SRPK1 inhibitor

    srpin340Mol = Chem.MolFromSmiles(
        'C1CCN(CC1)C2=C(C=C(C=C2)C(F)(F)F)NC(=O)C3=CC=NC=C3')
    srpin340fps = Generate.Gen2DFingerprint(srpin340Mol, sigFactory)

    #sphinx is a higher affinity but selective SRPK1 inhibitor

    sphinxMol = Chem.MolFromSmiles(
        'C1(=CC=C(C(=C1)N(C(=O)C2=CC=C(O2)C)[H])N3CCOCC3)C(F)(F)F')
    sphinxfps = Generate.Gen2DFingerprint(sphinxMol, sigFactory)

    #CHEMBL2000345 is a broad spectrum kinase inhibitor but also has highest
    #affinity to srpk1 of all ligands
    #not selective - try to do negative design

    CHEMBL2000345Mol = Chem.MolFromSmiles(
        'COc1cc(C=C(C#N)c2nc3cc(C)ccc3[nH]2)c(Br)cc1O')
    CHEMBL2000345fps = Generate.Gen2DFingerprint(CHEMBL2000345Mol, sigFactory)
Exemple #17
0
 def get_Gobbi_Pharm2D(x, from_smiles):
     if from_smiles:
         mol = Chem.MolFromSmiles(x)
     else:
         mol = x
     return Generate.Gen2DFingerprint(mol, Gobbi_Pharm2D.factory)
Exemple #18
0
        if justOne: break
  return res

if __name__ == '__main__':
  from rdkit import Chem
  from rdkit.Chem.Pharm2D import SigFactory,Generate
      
  factory = SigFactory.SigFactory()
  factory.SetBins([(1,2),(2,5),(5,8)])
  factory.SetPatternsFromSmarts(['O','N'])
  factory.SetMinCount(2)
  factory.SetMaxCount(3)
  sig = factory.GetSignature()
  
  mol = Chem.MolFromSmiles('OCC(=O)CCCN')
  Generate.Gen2DFingerprint(mol,sig)
  print 'onbits:',list(sig.GetOnBits())

  _verbose=0
  for bit in sig.GetOnBits():
    atoms = GetAtomsMatchingBit(sig,bit,mol)
    print '\tBit %d: '%(bit),atoms

    
  print '--------------------------'
  sig = factory.GetSignature()
  sig.SetIncludeBondOrder(1)
  Generate.Gen2DFingerprint(mol,sig)
  print 'onbits:',list(sig.GetOnBits())

  for bit in sig.GetOnBits():
Exemple #19
0
def _2DFingerprint(molecule):
    desc = Generate.Gen2DFingerprint(Chem.MolFromSmiles(molecule), Gobbi_Pharm2D.factory)
    arr = np.array(desc)

    return arr
Exemple #20
0
 def get_Pharm2D(x):
     mol = Chem.MolFromSmiles(x)
     if (mol is None) or (len(mol.GetAtoms()) == 0):
         return [0] * sigFactory.GetSigSize()
     else:
         return Generate.Gen2DFingerprint(mol, sigFactory)
Exemple #21
0
        smi.strip()
        mols[i] = Chem.MolFromSmiles(smi)

    sig = factory.GetSignature()

    nBits = 300
    random.seed(23)
    bits = [random.randint(0, sig.GetSize() - 1) for x in range(nBits)]

    print('Using the Lazy Generator')
    t1 = time.time()
    for i in range(len(mols)):
        if not i % 10:
            print('done mol %d of %d' % (i, len(mols)))
        gen = Generator(factory, mols[i])
        for bit in bits:
            v = gen[bit]
    t2 = time.time()
    print('\tthat took %4.2f seconds' % (t2 - t1))

    print('Generating and checking signatures')
    t1 = time.time()
    for i in range(len(mols)):
        if not i % 10:
            print('done mol %d of %d' % (i, len(mols)))
        sig = Generate.Gen2DFingerprint(mols[i], factory)
        for bit in bits:
            v = sig[bit]
    t2 = time.time()
    print('\tthat took %4.2f seconds' % (t2 - t1))
# 每次修改参数后,都要初始化一下
sigFactory.Init()
# 计算指纹的长度
print('指纹长度=', sigFactory.GetSigSize())  # 指纹长度= 2988

# # 4.2 生成2D药效团指纹
# 指纹工厂中的参数设置完毕,接下来就可以生成2D指纹了。
# 计算2D药效团指纹 :
Gen2DFingerprint(
    mol,  # 要计算指纹的mol对象
    sigFactory,  # 设置了参数的指纹工厂
    bitinfo  # 获取指纹id及对应的原子
)

mol = Chem.MolFromSmiles('OCC(=O)CCCN')
fp = Generate.Gen2DFingerprint(mol, sigFactory)
print(len(fp))  # 2988
print(fp.GetNumOnBits())  # 23

# 关于指纹每一位所代表特征的信息、特征的距离矩阵等信息,都可以通过signature factory来查看
print(list(fp.GetOnBits())[:5])  #
print(sigFactory.GetBitDescription(1))  # Acceptor Acceptor |0 1|1 0|

# # 4.3 修改FDef设置
# 如果不需要某个特征,可以直接通过signature factory来跳过某个特征,而不用去修改FDef文件。

# 查看现有药效团(列表)列表:GetFeatureFamilies()

featureFamilies = featFactory.GetFeatureFamilies()
print(featureFamilies)
# ('Donor', 'Acceptor', 'NegIonizable', 'PosIonizable',
Exemple #23
0
def calc_p4_fp(mol):
    fp = Generate.Gen2DFingerprint(mol, sigFact)
    return fp