Esempio n. 1
0
 def testOrderBug2(self):
   from rdkit.Chem import Randomize
   from rdkit import DataStructs
   probes = ['Oc1nc(Oc2ncccc2)ccc1']
   for smi in probes:
     m1 = Chem.MolFromSmiles(smi)
     #m1.Debug()
     sig1 = Generate.Gen2DFingerprint(m1,self.factory)
     csmi = Chem.MolToSmiles(m1)
     m2 = Chem.MolFromSmiles(csmi)
     #m2.Debug()
     sig2 = Generate.Gen2DFingerprint(m2,self.factory)
     self.failUnless(list(sig1.GetOnBits())==list(sig2.GetOnBits()),'%s %s'%(smi,csmi))
     self.failUnlessEqual(DataStructs.DiceSimilarity(sig1,sig2),1.0)
     self.failUnlessEqual(sig1,sig2)
     for i in range(10):
       m2 = Randomize.RandomizeMol(m1)
       sig2 = Generate.Gen2DFingerprint(m2,self.factory)
       if sig2!=sig1:
         Generate._verbose=True
         print '----------------'
         sig1 = Generate.Gen2DFingerprint(m1,self.factory)
         print '----------------'
         sig2 = Generate.Gen2DFingerprint(m2,self.factory)
         print '----------------'
         print Chem.MolToMolBlock(m1)
         print '----------------'
         print Chem.MolToMolBlock(m2)
         print '----------------'
         s1 = set(sig1.GetOnBits())
         s2= set(sig2.GetOnBits())
         print s1.difference(s2)
       self.failUnlessEqual(sig1,sig2)
Esempio n. 2
0
def compute_anti_fp(mols_smiles, sig_fac, antifp_old=None):
    """
    Computes an anti-fingerprint from the given molecules.

    It is possible to specify an existing anti-fingerprint
    for an update. In this case, the returned fingerprint
    will be the result of doing a bitwise :samp:`or` between
    the old fingerprint and the one generated from the
    supplied structures.

    :param mols_smiles: SMILES of the molecules to generate the anti-fingerprint from
    :param sig_fac: RDKit's signature factory used in the 2D pharmacophore fingerprint computation
    :param antifp_old: an old anti-fingerprint to update
    :return: new or updated anti-fingerprint
    """

    antifp_new = antifp_old
    for smiles in mols_smiles:
        mol = Chem.MolFromSmiles(smiles)
        if not antifp_new:
            antifp_new = Generate.Gen2DFingerprint(mol, sig_fac)
        else:
            antifp_new = antifp_new | Generate.Gen2DFingerprint(mol, sig_fac)

    return antifp_new
Esempio n. 3
0
def pharmacophore(mol, target):
    i = 0
    print('mol/target', mol, target)
    mol.standardize()
    target.standardize()
    mol = str(mol)
    mol = mol.replace('N(=O)O', '[N+](=O)[O-]')
    mol = mol.replace('N(O)=O', '[N+]([O-])=O')
    mol = mol.replace('n(O)', '[n+]([O-])')
    target = str(target)
    target = target.replace('N(=O)O', '[N+](=O)[O-]')
    target = target.replace('N(O)=O', '[N+]([O-])=O')
    target = target.replace('n(O)', '[n+]([O-])')
    featfactory = load_factory()
    sigfactory = SigFactory(featfactory,
                            minPointCount=2,
                            maxPointCount=3,
                            trianglePruneBins=False)
    sigfactory.SetBins([(0, 2), (2, 5), (5, 8)])
    sigfactory.Init()
    mol1 = Chem.MolFromSmiles(mol)
    mol2 = Chem.MolFromSmiles(target)
    if mol1 and mol2:
        fp1 = Generate.Gen2DFingerprint(mol1, sigfactory)
        fp2 = Generate.Gen2DFingerprint(mol2, sigfactory)
        sims = DataStructs.TanimotoSimilarity(fp1, fp2)
        return sims
    else:
        i = i + 1
        print('ошибка', i, mol)
        return -100
Esempio n. 4
0
 def testOrderBug(self):
     sdFile = os.path.join(RDConfig.RDCodeDir, 'Chem', 'Pharm2D', 'test_data', 'orderBug.sdf')
     suppl = Chem.SDMolSupplier(sdFile)
     m1 = next(suppl)
     m2 = next(suppl)
     sig1 = Generate.Gen2DFingerprint(m1, self.factory)
     sig2 = Generate.Gen2DFingerprint(m2, self.factory)
     self.assertEqual(sig1, sig2)
Esempio n. 5
0
 def testOrderBug(self):
   sdFile = os.path.join(RDConfig.RDCodeDir,'Chem','Pharm2D','test_data','orderBug.sdf')
   suppl = Chem.SDMolSupplier(sdFile)
   m1 =suppl.next()
   m2 = suppl.next()
   sig1 = Generate.Gen2DFingerprint(m1,self.factory)
   sig2 = Generate.Gen2DFingerprint(m2,self.factory)
   ob1 = set(sig1.GetOnBits())
   ob2 = set(sig2.GetOnBits())
   self.failUnlessEqual(sig1,sig2)
Esempio n. 6
0
def calc_phore_descs(mols, significant_bits=None, testing=False):
    fp_holding = []
    accumulated_bits_set = {}

    for mol in mols:
        fp = Generate.Gen2DFingerprint(mol, Gobbi_Pharm2D.factory)
        fp_holding.append(fp)
        if significant_bits is not None:
            bits_set = list(fp.GetOnBits())
            for fp_bit in bits_set:
                if fp_bit not in accumulated_bits_set.keys():
                    accumulated_bits_set[fp_bit] = 1
                else:
                    accumulated_bits_set[
                        fp_bit] = accumulated_bits_set[fp_bit] + 1

    if significant_bits is not None:
        phore_descs = np.zeros((len(mols), len(significant_bits)))

        for mol_num in range(len(mols)):
            for bit_num in range(len(significant_bits)):
                if significant_bits[bit_num] in fp_holding[mol_num].GetOnBits(
                ):
                    phore_descs[mol_num, bit_num] = 1
        if testing:
            return "significant_bits: %d" % len(
                significant_bits), "fp_descriptors: %s" % str(
                    phore_descs.shape)
        print("significant_bits:", len(significant_bits))
        print("fp_descriptors:", phore_descs.shape)
        return phore_descs
    else:
        return fp_holding
def make_fingerprints(data, length=512, verbose=False):
    fp_list = [
        fingerprint(Chem.rdMolDescriptors.GetHashedTopologicalTorsionFingerprintAsBitVect,
                    "Torsion "),
        fingerprint(lambda x: GetMorganFingerprintAsBitVect(x, 2, nBits=length),
                    "Morgan"),
        fingerprint(FingerprintMol, "Estate (1995)"),
        fingerprint(lambda x: GetAvalonFP(x, nBits=length),
                    "Avalon bit based (2006)"),
        fingerprint(lambda x: np.append(GetAvalonFP(x, nBits=length), Descriptors.MolWt(x)),
                    "Avalon+mol. weight"),
        fingerprint(lambda x: GetErGFingerprint(x), "ErG fingerprint (2006)"),
        fingerprint(lambda x: RDKFingerprint(x, fpSize=length),
                    "RDKit fingerprint"),
        fingerprint(lambda x: MACCSkeys.GenMACCSKeys(x),
                    "MACCS fingerprint"),
        fingerprint(lambda x: get_fingerprint(x,fp_type='pubchem'), "PubChem"),
        # fingerprint(lambda x: get_fingerprint(x, fp_type='FP4'), "FP4")
        fingerprint(lambda x: Generate.Gen2DFingerprint(x,Gobbi_Pharm2D.factory,dMat=Chem.Get3DDistanceMatrix(x)),
                    "3D pharmacophore"),

    ]

    for fp in fp_list:
        if (verbose): print("doing", fp.name)
        fp.apply_fp(data)

    return fp_list
Esempio n. 8
0
def GetPharmacoPFPs(mol,
                    bins=[(i, i + 1) for i in range(20)],
                    minPointCount=2,
                    maxPointCount=2,
                    return_bitInfo=False):
    '''
    Note: maxPointCont with 3 is slowly
    
    bins = [(i,i+1) for i in range(20)], 
    maxPonitCount=2 for large-scale computation
    
    '''
    MysigFactory = SigFactory(featFactory,
                              trianglePruneBins=False,
                              minPointCount=minPointCount,
                              maxPointCount=maxPointCount)
    MysigFactory.SetBins(bins)
    MysigFactory.Init()

    res = Generate.Gen2DFingerprint(mol, MysigFactory)
    arr = np.array(list(res)).astype(np.bool)
    if return_bitInfo:
        description = []
        for i in range(len(res)):
            description.append(MysigFactory.GetBitDescription(i))
        return arr, description

    return arr
Esempio n. 9
0
    def calcfp(self, fptype="rdkit", opt=None):
        """Calculate a molecular fingerprint.

        Optional parameters:
           fptype -- the fingerprint type (default is "rdkit"). See the
                     fps variable for a list of of available fingerprint
                     types.
           opt -- a dictionary of options for fingerprints. Currently only used
                  for radius and bitInfo in Morgan fingerprints.
        """
        if opt == None:
            opt = {}
        fptype = fptype.lower()
        if fptype=="rdkit":
            fp = Fingerprint(Chem.RDKFingerprint(self.Mol))
        elif fptype=="layered":
            fp = Fingerprint(Chem.LayeredFingerprint(self.Mol))
        elif fptype=="maccs":
            fp = Fingerprint(Chem.MACCSkeys.GenMACCSKeys(self.Mol))
        elif fptype=="atompairs":
            # Going to leave as-is. See Atom Pairs documentation.
            fp = Chem.AtomPairs.Pairs.GetAtomPairFingerprintAsIntVect(self.Mol)
        elif fptype=="torsions":
            # Going to leave as-is.
            fp = Chem.AtomPairs.Torsions.GetTopologicalTorsionFingerprintAsIntVect(self.Mol)
        elif fptype == "morgan":
            info = opt.get('bitInfo', None)
            radius = opt.get('radius', 4)
            fp = Fingerprint(Chem.rdMolDescriptors.GetMorganFingerprintAsBitVect(self.Mol,radius,bitInfo=info))
        elif fptype == "pharm2d":
            fp = Fingerprint(Generate.Gen2DFingerprint(self.Mol,Gobbi_Pharm2D.factory))
        else:
            raise ValueError, "%s is not a recognised RDKit Fingerprint type" % fptype
        return fp
 def fingerprints_from_mols(cls, mols):
     fps = [Generate.Gen2DFingerprint(mol, factory) for mol in mols]
     size = 4096
     X = np.zeros((len(mols), size))
     for i, fp in enumerate(fps):
         for k, v in fp.GetNonzeroElements().items():
             idx = k % size
             X[i, idx] = v
     return X
Esempio n. 11
0
    def test8MultiPointMatches(self):
        factory = self.factory
        factory.SetBins([(1, 3), (3, 7), (7, 10)])
        factory.minPointCount = 2
        factory.maxPointCount = 3
        factory.Init()

        mol = Chem.MolFromSmiles('O=Cc1ccccc1')
        sig = Generate.Gen2DFingerprint(mol, factory)
        self.failUnlessEqual(len(sig), 990)
        bs = tuple(sig.GetOnBits())
        self.failUnlessEqual(bs, (3, ))

        mol = Chem.MolFromSmiles('O=CCCCCCCCCc1ccccc1')
        sig = Generate.Gen2DFingerprint(mol, factory)
        self.failUnlessEqual(len(sig), 990)
        bs = tuple(sig.GetOnBits())
        self.failUnlessEqual(bs, ())
Esempio n. 12
0
    def test5SimpleSig(self):
        factory = self.factory
        factory.SetBins([(1, 3), (3, 7), (7, 10)])
        factory.minPointCount = 2
        factory.maxPointCount = 3
        factory.Init()

        mol = Chem.MolFromSmiles('O=CCC=O')
        sig = Generate.Gen2DFingerprint(mol, factory)
        self.failUnlessEqual(len(sig), 990)
        bs = tuple(sig.GetOnBits())
        self.failUnlessEqual(bs, (1, ))

        mol = Chem.MolFromSmiles('O=CC(CC=O)CCC=O')
        sig = Generate.Gen2DFingerprint(mol, factory)
        self.failUnlessEqual(len(sig), 990)
        bs = tuple(sig.GetOnBits())
        self.failUnlessEqual(bs, (1, 2, 67))
Esempio n. 13
0
def BuildPharm2DFP(mol):
    global sigFactory
    from rdkit.Chem.Pharm2D import Generate
    try:
        fp = Generate.Gen2DFingerprint(mol, sigFactory)
    except IndexError:
        print('FAIL:', Chem.MolToSmiles(mol, True))
        raise
    return fp
Esempio n. 14
0
 def testBitInfo(self):
     m = Chem.MolFromSmiles('OCC=CC(=O)O')
     bi = {}
     sig = Generate.Gen2DFingerprint(m, Gobbi_Pharm2D.factory, bitInfo=bi)
     self.assertEqual(sig.GetNumOnBits(), len(bi))
     self.assertEqual(list(sig.GetOnBits()), sorted(bi.keys()))
     self.assertEqual(sorted(bi.keys()), [23, 30, 150, 154, 157, 185, 28878, 30184])
     self.assertEqual(sorted(bi[28878]), [[(0, ), (5, ), (6, )]])
     self.assertEqual(sorted(bi[157]), [[(0, ), (6, )], [(5, ), (0, )]])
    def tanimoto(self, mol):
        try:
            with Timeout(seconds=1):
                fp = Generate.Gen2DFingerprint(mol, self.sigFactory)
            return DataStructs.TanimotoSimilarity(fp, self.query_fp)

        except TimeoutError:
            logging.debug("SMILES Pharmacophore timeout: ",
                          Chem.MolToSmiles(mol, isomericSmiles=False))
            return 0
Esempio n. 16
0
 def test2Sigs(self):
     probes = [('O=CCC=O', (149, )),
               ('OCCC=O', (149, 156)),
               ('OCCC(=O)O', (22, 29, 149, 154, 156, 184, 28822, 30134)), ]
     for smi, tgt in probes:
         sig = Generate.Gen2DFingerprint(Chem.MolFromSmiles(smi), self.factory)
         self.assertEqual(len(sig), 39972)
         bs = tuple(sig.GetOnBits())
         self.assertEqual(len(bs), len(tgt))
         self.assertEqual(bs, tgt)
Esempio n. 17
0
    def test9BondOrderSigs(self):
        # test sigs where bond order is used
        factory = self.factory
        factory.SetBins([(1, 4), (4, 7), (7, 10)])
        factory.minPointCount = 2
        factory.maxPointCount = 3
        factory.Init()

        mol = Chem.MolFromSmiles('[O-]CCC(=O)')
        sig = Generate.Gen2DFingerprint(mol, self.factory)
        self.assertEqual(len(sig), 990)
        bs = tuple(sig.GetOnBits())
        self.assertEqual(bs, (1, ))

        self.factory.includeBondOrder = True
        sig = Generate.Gen2DFingerprint(mol, self.factory)
        self.assertEqual(len(sig), 990)
        bs = tuple(sig.GetOnBits())
        self.assertEqual(bs, (0, ))
Esempio n. 18
0
 def test2Bug28(self):
     smi = 'Cc([s]1)nnc1SCC(\CS2)=C(/C([O-])=O)N3C(=O)[C@H]([C@@H]23)NC(=O)C[n]4cnnn4'
     mol = Chem.MolFromSmiles(smi)
     factory = Gobbi_Pharm2D.factory
     factory.SetBins([(2, 3), (3, 4), (4, 5), (5, 8), (8, 100)])
     sig = Generate.Gen2DFingerprint(mol, factory)
     onBits = sig.GetOnBits()
     for bit in onBits:
         atoms = Matcher.GetAtomsMatchingBit(factory, bit, mol, justOne=1)
         self.assertTrue(len(atoms))
Esempio n. 19
0
def genmol_sdf(ms):
	suppl = Chem.SDMolSupplier(ms)
	ret = []
	for m in suppl:
		if m is None: continue
		try:
			if not options.gobbifp: ret.append(AllChem.GetMorganFingerprintAsBitVect(m,options.radius, nBits=options.bits))
			else: ret.append(Generate.Gen2DFingerprint(m,Gobbi_Pharm2D.factory))
		except: pass
	return ret
Esempio n. 20
0
def numpy_pp_fps(mols):
    """ Calculate Gobbi and Poppinger pharmacophore fingerprints and return them as numpy.ndarrays

    :param mols: {list} list of molecules (RDKit mols)
    :return: numpy array containing row-wise fingerprints for every molecule
    """
    feat_fact = ChemicalFeatures.BuildFeatureFactory()
    sig_fact = SigFactory(feat_fact, useCounts=False, minPointCount=2, maxPointCount=3)
    sig_fact.SetBins([(0, 2), (2, 4), (4, 6), (6, 8), (8, 100)])
    sig_fact.Init()
    return _rdk2numpy([Generate.Gen2DFingerprint(m, sig_fact) for m in mols if m])
Esempio n. 21
0
def _ph_rdkit(mols_tup):
    mol, name, act, _ = mols_tup
    ph = Generate.Gen2DFingerprint(mol, sigFactory)
    tmp = pd.DataFrame(columns=range(ph.GetNumBits()))
    ph_bits = list(ph.GetOnBits())
    for n_bit in ph_bits:
        tmp.loc[name, n_bit] = 1
    tmp.loc[name, 'mol_id'] = name
    tmp.loc[name, 'act'] = act
    tmp = tmp.fillna(0)
    return tmp
Esempio n. 22
0
def fingerprint(mol, fp_type="DL"):
    if fp_type == "DL":
        return FingerprintMols.FingerprintMol(mol)
    elif fp_type == "circular":
        return AllChem.GetMorganFingerprintAsBitVect(mol, 3, nBits=1024)
    elif fp_type == "MACCS":
        return MACCSkeys.GenMACCSKeys(mol)
    elif fp_type == "torsions":
        return Pairs.GetAtomPairFingerprintAsBitVect(mol)
    elif fp_type == "pharm":
        return Generate.Gen2DFingerprint(mol, Gobbi_Pharm2D.factory)
Esempio n. 23
0
def _one_cats(mol):
    """ Function to calculate the CATS pharmacophore descriptor for one molecule.
    Descriptions of the individual features can be obtained from the function ``get_cats_sigfactory``.

    :param mol: {RDKit molecule} molecule to calculate the descriptor for
    :return: {numpy.ndarray} calculated descriptor vector
    """
    factory = get_cats_factory()
    arr = np.zeros((1,))
    ConvertToNumpyArray(Generate.Gen2DFingerprint(mol, factory), arr)
    scale = np.array([10 * [sum(arr[i:i + 10])] for i in range(0, 210, 10)]).flatten()
    return np.divide(arr, scale, out=np.zeros_like(arr), where=scale != 0).astype('float32')
def similarityMeasure(fps, neg, mol2):

    fps2 = Generate.Gen2DFingerprint(mol2, sigFactory)

    similarityPos = DataStructs.FingerprintSimilarity(
        fps, fps2, metric=DataStructs.TanimotoSimilarity)
    similarityNeg = DataStructs.FingerprintSimilarity(
        neg, fps2, metric=DataStructs.TanimotoSimilarity)
    #    if similarityPos>=0.75:

    print Chem.MolToSmiles(mol2), similarityPos, similarityNeg
    return similarityPos, similarityPos - similarityNeg
Esempio n. 25
0
def get_gobbi_similarity(correct_ligand,
                         mol_to_fix,
                         type_fp='normal',
                         use_features=False):
    # ref = Chem.MolFromSmiles('NC(=[NH2+])c1ccc(C[C@@H](NC(=O)CNS(=O)(=O)c2ccc3ccccc3c2)C(=O)N2CCCCC2)cc1')
    ref = Chem.MolFromSmiles(
        'C1=CC(=C(C=C1C2=C(C(=O)C3=C(C=C(C=C3O2)O)O)O)O)O')
    # mol1 = Chem.MolFromPDBFile(RDConfig.RDBaseDir + '/rdkit/Chem/test_data/1DWD_ligand.pdb')
    mol1 = AllChem.AssignBondOrdersFromTemplate(ref, correct_ligand)
    # mol2 = Chem.MolFromPDBFile(RDConfig.RDBaseDir + '/rdkit/Chem/test_data/1PPC_ligand.pdb')
    mol2 = AllChem.AssignBondOrdersFromTemplate(ref, mol_to_fix)

    factory = Gobbi_Pharm2D.factory
    fp1 = Generate.Gen2DFingerprint(mol1,
                                    factory,
                                    dMat=Chem.Get3DDistanceMatrix(mol1))
    fp2 = Generate.Gen2DFingerprint(mol2,
                                    factory,
                                    dMat=Chem.Get3DDistanceMatrix(mol2))
    # Tanimoto similarity
    tani = DataStructs.TanimotoSimilarity(fp1, fp2)
    print('GOBBI similarity is ------> ', tani)
Esempio n. 26
0
    def test6SimpleSigCounts(self):
        factory = self.factory
        factory.SetBins([(1, 3), (3, 7), (7, 10)])
        factory.minPointCount = 2
        factory.maxPointCount = 3
        factory.useCounts = True
        factory.Init()

        mol = Chem.MolFromSmiles('O=CCC=O')
        sig = Generate.Gen2DFingerprint(mol, factory)
        self.failUnlessEqual(sig.GetLength(), 990)
        cs = tuple(sig.GetNonzeroElements().iteritems())
        self.failUnlessEqual(cs, ((1, 1), ))

        mol = Chem.MolFromSmiles('O=CC(CC=O)CCC=O')
        sig = Generate.Gen2DFingerprint(mol, factory)
        self.failUnlessEqual(sig.GetLength(), 990)
        elems = sig.GetNonzeroElements()
        bs = elems.keys()
        bs.sort()
        cs = [(x, elems[x]) for x in bs]
        self.failUnlessEqual(tuple(cs), ((1, 2), (2, 1), (67, 1)))
Esempio n. 27
0
    def test7SimpleSigSkip(self):
        factory = self.factory
        factory.SetBins([(1, 3), (3, 7), (7, 10)])
        factory.minPointCount = 2
        factory.maxPointCount = 3
        factory.skipFeats = 'Acceptor'
        factory.Init()

        mol = Chem.MolFromSmiles('O=CCC=O')
        sig = Generate.Gen2DFingerprint(mol, factory)
        self.failUnlessEqual(len(sig), 570)
        bs = tuple(sig.GetOnBits())
        self.failUnlessEqual(bs, ())
Esempio n. 28
0
def get_distance_func(name):
    if name == 'RDK/T':
        make_representation = (lambda chem: Chem.RDKFingerprint(chem.mol))
        distf = lambda x, y: 1.0 - DataStructs.FingerprintSimilarity(x, y)

        return (make_representation, distf)
    elif name == 'GOBI/T':
        make_representation = lambda chem: Generate.Gen2DFingerprint(
            chem.mol, Gobbi_Pharm2D.factory)
        distf = lambda x, y: 1.0 - DataStructs.FingerprintSimilarity(x, y)

        return (make_representation, distf)
    else:
        raise Exception('Unknown similarity measure: %s' % job.sim_measure)
Esempio n. 29
0
def _cats_corr(mols, q):
    """ private cats descriptor function to be used in multiprocessing

    :param mols: {list/array} molecules (RDKit mol) to calculate the descriptor for
    :param q: {queue} multiprocessing queue instance
    :return: {numpy.ndarray} calculated descriptor vectors
    """
    factory = get_cats_factory()
    fps = []
    for mol in mols:
        arr = np.zeros((1,))
        ConvertToNumpyArray(Generate.Gen2DFingerprint(mol, factory), arr)
        scale = np.array([10 * [sum(arr[i:i + 10])] for i in range(0, 210, 10)]).flatten()
        fps.append(np.divide(arr, scale, out=np.zeros_like(arr), where=scale != 0))
    q.put(np.array(fps).reshape((len(mols), 210)).astype('float32'))
Esempio n. 30
0
 def test3Roundtrip(self):
     # longer-running Bug 28 test
     nToDo = 20
     with open(os.path.join(RDConfig.RDDataDir, 'NCI', 'first_5K.smi'), 'r') as inF:
         inD = inF.readlines()[:nToDo]
     factory = Gobbi_Pharm2D.factory
     factory.SetBins([(2, 3), (3, 4), (4, 5), (5, 8), (8, 100)])
     for line in inD:
         smi = line.split('\t')[0]
         mol = Chem.MolFromSmiles(smi)
         sig = Generate.Gen2DFingerprint(mol, factory)
         onBits = sig.GetOnBits()
         for bit in onBits:
             atoms = Matcher.GetAtomsMatchingBit(factory, bit, mol, justOne=1)
             assert len(atoms), f'bit {bit} failed to match for smi {smi}'