Ejemplo n.º 1
0
def check_mol( mol ):
        match, mList = EmbedLib.MatchPharmacophoreToMol( mol, feat_fact, pcophore)
        if match:
                res = []
                num_match = len( mList )
                for i in range( num_match ):
                        num_feature = len( mList[i] )
                        for j in range( num_feature ):
                                print mList[i][j].GetAtomIds(), mList[i][j].GetFamily()
                bounds = rdDistGeom.GetMoleculeBoundsMatrix( mol )
                pList = EmbedLib.GetAllPharmacophoreMatches( mList, bounds, pcophore )
                #pList = EmbedLib.MatchPharmacophore( mList, bounds, pcophore )
                print pList
                #print raw_input("-----")
                num_match = len( pList )
                print num_match
                phMatches = []
                for i in range( num_match ):
                        num_feature = len( pList[i] )
                        phMatch = []
                        for j in range( num_feature ):
                                phMatch.append( pList[i][j].GetAtomIds() )
                        phMatches.append( phMatch )
                for phMatch in phMatches:
                        bm, embeds, nFail = EmbedLib.EmbedPharmacophore( mol, phMatch, pcophore, count=20, silent=1 )
                        print "-----> embeds num:", len( embeds )
                        for embed in embeds:
                                AllChem.UFFOPtimizeMolecule( embed )
                                align_data = rdAliginment.GetAlignmetTransform( bm, bounds )
                                AllChem.TransformMol( embed, align_data[1] )
                                res.append( embed )
                return res
Ejemplo n.º 2
0
    def _align_molecules(self, molecules: List[Chem.Mol]) -> None:
        """ Align a list of molecules to a given pharmacophore.

        Parameters
        ----------
        molecules : list of rdkit.Chem.Mol
            List of molecules to align.

        """
        self.n_molecules += len(molecules)

        rdkit_pharmacophore, radii = self.pharmacophore.to_rdkit()
        apply_radii_to_bounds(radii, rdkit_pharmacophore)

        fdef = os.path.join(RDConfig.RDDataDir, 'BaseFeatures.fdef')
        featFactory = ChemicalFeatures.BuildFeatureFactory(fdef)

        MolScore = namedtuple("MolScore", ["score", "id", "mol"])

        for mol in tqdm(molecules):

            bounds_matrix = rdDistGeom.GetMoleculeBoundsMatrix(mol)
            can_match, all_matches = EmbedLib.MatchPharmacophoreToMol(
                mol, featFactory, rdkit_pharmacophore)
            if can_match:
                failed, _, matched_mols, _ = EmbedLib.MatchPharmacophore(
                    all_matches,
                    bounds_matrix,
                    rdkit_pharmacophore,
                    useDownsampling=True)
                if failed:
                    matched_mol = MolScore(0.0, mol.GetProp("_Name"), mol)
                    self.molecules.append(matched_mol)
                    continue
            else:
                matched_mol = MolScore(0.0, mol.GetProp("_Name"), mol)
                self.molecules.append(matched_mol)
                continue
            atom_match = [list(x.GetAtomIds()) for x in matched_mols]

            try:
                mol_H = Chem.AddHs(mol)
                _, embeddings, _ = EmbedLib.EmbedPharmacophore(
                    mol_H, atom_match, rdkit_pharmacophore, count=10)
            except:
                continue

            SSDs = transform_embeddings(rdkit_pharmacophore, embeddings,
                                        atom_match)
            if len(SSDs) == 0:
                matched_mol = MolScore(0.0, mol.GetProp("_Name"), mol)
                self.molecules.append(matched_mol)
                continue
            best_fit_index = min(enumerate(SSDs), key=itemgetter(1))[0]

            score = 1 / SSDs[best_fit_index]
            matched_mol = MolScore(score, mol.GetProp("_Name"),
                                   embeddings[best_fit_index])
            self.molecules.append(matched_mol)
Ejemplo n.º 3
0
    def test3Embed(self):
        testResults = {
            'mol_197':
            (218.80, 35.75, 110.33, 11.58, 109.66, 11.09, 90.35, 2.95, 0.00),
            'mol_223':
            (259.19, 6.27, 134.13, 1.12, 134.06, 1.12, 85.74, 0.61, 0.00),
            'mol_269':
            (204.51, 7.89, 103.89, 1.20, 102.66, 1.20, 88.07, 1.21, 6.00),
        }
        inF = gzip.open(os.path.join(self.dataDir, 'cdk2-syn-clip100.pkl.gz'),
                        'rb')
        nDone = 0
        nHits = 0
        while 1:
            try:
                name, molPkl, _ = cPickle.load(inF, encoding='latin1')
                if PY3:
                    molPkl = bytes(molPkl, encoding='latin1')
            except Exception:
                break

            nDone += 1

            mol = Chem.Mol(molPkl)
            nboundsMat = rdDistGeom.GetMoleculeBoundsMatrix(mol)
            DG.DoTriangleSmoothing(nboundsMat)
            matched, matches = EmbedLib.MatchPharmacophoreToMol(
                mol, self.featFactory, self.pcophore)
            if matched:
                failed, _, match, stats = EmbedLib.MatchPharmacophore(
                    matches, nboundsMat, self.pcophore, useDownsampling=1)
                if not failed:
                    nHits += 1

                    if name in testResults:
                        stats = EmbedLib.EmbedOne(mol,
                                                  name,
                                                  match,
                                                  self.pcophore,
                                                  count=10,
                                                  silent=1,
                                                  randomSeed=23)
                        tgt = testResults[name]
                        self.assertEqual(len(tgt), len(stats))
                        print(name)
                        print(','.join(['%.2f' % x for x in stats]))
                        # we'll use different tolerances for the different values:
                        self.assertTrue(feq(tgt[0], stats[0], 5.0),
                                        (tgt[0], stats[0]))
                        for i in range(2, len(tgt)):
                            self.assertTrue(feq(tgt[i], stats[i], 5.0),
                                            (tgt[i], stats[i]))

        self.assertEqual(nDone, 100)
        # print 'nHits:',nHits
        self.assertEqual(nHits, 50)
Ejemplo n.º 4
0
  def test4Search(self):
    featFactory = ChemicalFeatures.BuildFeatureFactory(
      os.path.join(self.dataDir, 'BaseFeatures.fdef'))

    activeFeats = [ChemicalFeatures.FreeChemicalFeature('Acceptor',
                                                        Geometry.Point3D(0.0, 0.0, 0.0)),
                   ChemicalFeatures.FreeChemicalFeature('Donor', Geometry.Point3D(0.0, 0.0, 0.0)),
                   ChemicalFeatures.FreeChemicalFeature('Aromatic',
                                                        Geometry.Point3D(0.0, 0.0, 0.0))]
    pcophore = Pharmacophore.Pharmacophore(activeFeats)
    pcophore.setLowerBound(0, 1, 2.251)
    pcophore.setUpperBound(0, 1, 2.451)
    pcophore.setUpperBound2D(0, 1, 3)

    pcophore.setLowerBound(0, 2, 4.970)
    pcophore.setUpperBound(0, 2, 5.170)
    pcophore.setUpperBound2D(0, 2, 6)

    pcophore.setLowerBound(1, 2, 2.681)
    pcophore.setUpperBound(1, 2, 2.881)
    pcophore.setUpperBound2D(1, 2, 6)

    inF = gzip.open(os.path.join(self.dataDir, 'cdk2-syn-clip100.pkl.gz'), 'rb')
    nDone = 0
    nMatches = 0
    nHits = 0

    while 1:
      try:
        name, molPkl, boundsMat = cPickle.load(inF, encoding='latin1')
        if PY3:
          molPkl = bytes(molPkl, encoding='latin1')
      except Exception:
        break

      nDone += 1

      mol = Chem.Mol(molPkl)
      boundsMat = rdDistGeom.GetMoleculeBoundsMatrix(mol)
      DG.DoTriangleSmoothing(boundsMat)

      canMatch, matches = EmbedLib.MatchPharmacophoreToMol(mol, featFactory, pcophore)
      if canMatch:
        nMatches += 1
        r = EmbedLib.MatchPharmacophore(matches, boundsMat, pcophore, useDownsampling=True,
                                        use2DLimits=True, mol=mol)
        failed, bm, match, details = r
        if not failed:
          nHits += 1

    self.assertEqual(nDone, 100)
    self.assertEqual(nMatches, 93)
    #print 'nhits:',nHits
    self.assertEqual(nHits, 67)
Ejemplo n.º 5
0
 def _matchMol(self, tpl, pcophore, featFactory, downSample):
   name, molPkl, boundsMat = tpl
   mol = Chem.Mol(molPkl)
   matched, matches = EmbedLib.MatchPharmacophoreToMol(mol, featFactory, pcophore)
   if matched:
     r = EmbedLib.MatchPharmacophore(matches, boundsMat, pcophore, useDownsampling=downSample)
     if r[0]:
       return 0
     else:
       return 1
   else:
     return 0
Ejemplo n.º 6
0
    def optimize(self, rdmol, boundsMatrix=None, atomMatch=None):
        """

        Optimizes the rdmol object using UFF.
        Determines the energy level for each of the conformers identified in rdmol.GetConformer.


        :param rdmol:
        :param boundsMatrix:
        :param atomMatch:
        :return rdmol, minEid (index of the lowest energy conformer)
        """

        energy = 0.0
        minEid = 0
        lowestE = 9.999999e99  # start with a very high number, which would never be reached

        for conf in rdmol.GetConformers():
            if (boundsMatrix is None) or (atomMatch is None):
                AllChem.UFFOptimizeMolecule(rdmol, confId=conf.GetId())
                energy = AllChem.UFFGetMoleculeForceField(
                    rdmol, confId=conf.GetId()).CalcEnergy()
            else:
                _, energy = EmbedLib.OptimizeMol(rdmol,
                                                 boundsMatrix,
                                                 atomMatches=atomMatch,
                                                 forceConstant=100000.0)

            if energy < lowestE:
                minEid = conf.GetId()
                lowestE = energy

        return rdmol, minEid
Ejemplo n.º 7
0
def check_mol(mol):
    res = []
    mol.RemoveAllConformers()
    match, mList = EmbedLib.MatchPharmacophoreToMol(mol, feat_fact, pcophore)
    #mList = [ m for m in Set( mList ) ]
    if match:
        num_match = len(mList)
        for i in range(num_match):
            num_feature = len(mList[i])
            for j in range(num_feature):
                print mList[i][j].GetAtomIds(), mList[i][j].GetFamily()
        bounds = rdDistGeom.GetMoleculeBoundsMatrix(mol)
        pList = EmbedLib.GetAllPharmacophoreMatches(mList, bounds, pcophore)
        num_match = len(pList)
        phMatches = []
        for i in range(num_match):
            num_feature = len(pList[i])
            phMatch = []
            for j in range(num_feature):
                phMatch.append(pList[i][j].GetAtomIds())

            phMatches.append(phMatch)
        for phMatch in phMatches:
            bm, embeds, nFail = EmbedLib.EmbedPharmacophore(mol,
                                                            phMatch,
                                                            pcophore,
                                                            count=5,
                                                            silent=1)
            print "-----> embeds num:", len(embeds)
            for embed in embeds:
                AllChem.UFFOptimizeMolecule(embed)
                feats = feat_fact.GetFeaturesForMol(embed)
                feats_dict = GetFeatsPerAtoms(feats)
                match_feats = [feats_dict[atomid] for atomid in phMatch]
                pro_mat = [list(feat.GetPos()) for feat in match_feats]
                align_data = rdAlignment.GetAlignmentTransform(
                    ref_mat, pro_mat, maxIterations=200)
                AllChem.TransformMol(embed, align_data[1])
                print align_data[0]
                print align_data[1]
                res.append(embed)
    else:
        print "no hits"
    return res
Ejemplo n.º 8
0
  def testIssue268(self):
    from rdkit import RDLogger
    #RDLogger.EnableLog('rdApp.debug')
    featFactory = ChemicalFeatures.BuildFeatureFactory(os.path.join(self.dataDir, 'Issue268.fdef'))
    m1 = Chem.MolFromMolFile(os.path.join(self.dataDir, 'Issue268_Mol1.mol'))
    m2 = Chem.MolFromMolFile(os.path.join(self.dataDir, 'Issue268_Mol2.mol'))
    with open(os.path.join(self.dataDir, 'Issue268_Pcop.pkl'), 'r') as inTF:
      buf = inTF.read().replace('\r\n', '\n').encode('utf-8')
      inTF.close()
    with io.BytesIO(buf) as inF:
      pcop = cPickle.load(inF, encoding='latin1')
    #pcop._boundsMat=numpy.array(pcop._boundsMat)
    #pcop._boundsMat2D=numpy.array(pcop._boundsMat2D)
    #cPickle.dump(pcop,file(os.path.join(self.dataDir,
    #                                    'Issue268_Pcop.new.pkl'),'wb+'))
    match, mList1 = EmbedLib.MatchFeatsToMol(m1, featFactory, pcop.getFeatures())
    match, mList2 = EmbedLib.MatchFeatsToMol(m2, featFactory, pcop.getFeatures())
    b1 = rdDistGeom.GetMoleculeBoundsMatrix(m1)
    b2 = rdDistGeom.GetMoleculeBoundsMatrix(m2)

    self.assertEqual(len(EmbedLib.MatchPharmacophore(mList1, b1, pcop)[2]), 4)
    self.assertEqual(len(EmbedLib.MatchPharmacophore(mList2, b2, pcop)[2]), 4)

    self.assertEqual(
      len(EmbedLib.MatchPharmacophore(mList1, b1, pcop, mol=m1, use2DLimits=True)[2]), 4)
    self.assertEqual(
      len(EmbedLib.MatchPharmacophore(mList2, b2, pcop, mol=m2, use2DLimits=True)[2]), 4)

    from rdkit import DistanceGeometry as DG
    self.assertTrue(DG.DoTriangleSmoothing(b1))
    self.assertTrue(DG.DoTriangleSmoothing(b2))

    self.assertEqual(len(EmbedLib.MatchPharmacophore(mList1, b1, pcop)[2]), 4)
    self.assertEqual(len(EmbedLib.MatchPharmacophore(mList2, b2, pcop)[2]), 4)

    self.assertEqual(
      len(EmbedLib.MatchPharmacophore(mList1, b1, pcop, mol=m1, use2DLimits=True)[2]), 4)
    self.assertEqual(
      len(EmbedLib.MatchPharmacophore(mList2, b2, pcop, mol=m2, use2DLimits=True)[2]), 4)
Ejemplo n.º 9
0
def check_mol( mols, FeatFact, pcophore ):
	# define FeatFact To Do
	matched_mols = []
	for mol in mols:
		match, mList = EmbedLib.MatchPharmacophoreToMol( mol, FeatFact, pcophore )
		if match:
			matched_mols.append( mol )
			num_match = len( mList )
			for i in range( num_match ):
				num_feature = len( mList[i] )
				for j in range( num_feature ):
					print mList[i][j].GetAtomIds()
	print len( matched_mols )
	return matched_mols
Ejemplo n.º 10
0
    def rd_embed(self):
        """
        This portion of the script is literally taken from rmgpy but hacked to work without defining a geometry object

        Embed the RDKit molecule and create the crude molecule file.
        """
        numConfAttempts = 10000
        if (self.bm is None) or (self.atom_match is None):
            AllChem.EmbedMultipleConfs(self.rdkit_molecule,
                                       numConfAttempts,
                                       randomSeed=1)

            self.rdkit_molecule, minEid = self.optimize_rdkit_molecule()
        else:
            """
            Embed the molecule according to the bounds matrix. Built to handle possible failures
            of some of the embedding attempts.
            """
            self.rdkit_molecule.RemoveAllConformers()
            for i in range(0, numConfAttempts):
                try:
                    EmbedLib.EmbedMol(self.rdkit_molecule,
                                      self.bm,
                                      atomMatch=self.atom_match)
                    break
                except ValueError:
                    logging.info(
                        "RDKit failed to embed on attempt {0} of {1}".format(
                            i + 1, numConfAttempts))
                except RuntimeError:
                    logging.info("RDKit failed to embed.")
            else:
                logging.error("RDKit failed all attempts to embed")
                return None, None
            """
            RDKit currently embeds the conformers and sets the id as 0, so even though multiple
            conformers have been generated, only 1 can be called. Below the id's are resolved.
            """
            for i in range(len(self.rdkit_molecule.GetConformers())):
                self.rdkit_molecule.GetConformers()[i].SetId(i)

            self.rdkit_molecule, minEid = self.optimize_rdkit_molecule()

        return self.rdkit_molecule, minEid
Ejemplo n.º 11
0
    def _align_molecule(mol, pharmacophore, matches, featFactory, sort=False):
        """ Align a molecule to a given pharmacophore.
        
            Uses rdkit alignment algorithm

            Parameters
            ----------
            mol : rdkit.Chem.Mol
                Molecule to align.
                
            matches : list
                If a moleculed is matched to the pharmacophore it will be appended to this list.
                
            pharmacophore : rdkit.Chem.Pharm3D.Pharmacophore
                An rdkit pharmacophore

            featFactory : rdkit.Chem.rdMolChemicalFeatures.MolChemicalFeatureFactory
                The feature factory.
            
            sort : bool, default=False
                Whether to sort the list with the matches

        """
        bounds_matrix = rdDistGeom.GetMoleculeBoundsMatrix(mol)
        # Check if the molecule features can match with the pharmacophore.
        can_match, all_matches = EmbedLib.MatchPharmacophoreToMol(mol, featFactory, pharmacophore)
        # all_matches is a list of tuples where each tuple contains the chemical features
        if can_match:
            # Match the molecule to the pharmacophore without aligning it
            failed, bounds_matrix_matched, matched_mols, match_details = EmbedLib.MatchPharmacophore(all_matches, 
                                                                                            bounds_matrix,
                                                                                            pharmacophore, 
                                                                                            useDownsampling=True)
            if failed:
                return
        else:
            return
        atom_match = [list(x.GetAtomIds()) for x in matched_mols]
        try:
            mol_H = Chem.AddHs(mol)
            # Embed molecule onto the pharmacophore
            # embeddings is a list of molecules with a single conformer
            b_matrix, embeddings, num_fail = EmbedLib.EmbedPharmacophore(mol_H, atom_match, pharmacophore, count=10)
        except:
            return
        # Align embeddings to the pharmacophore 
        SSDs = transform_embeddings(pharmacophore, embeddings, atom_match)
        if len(SSDs) == 0:
            return
        best_fit_index = min(enumerate(SSDs), key=itemgetter(1))[0]
        try:
            mol_id = mol.GetProp("_Name")
        except:
            mol_id = None

        matched_mol = Match(SSDs[best_fit_index], mol_id, embeddings[best_fit_index])
        if sort:
            # Append to list in ordered manner
            try:
                # Case when a molecule is repeated. It will throw an error since bisect
                # cannot compare molecules.
                bisect.insort(matches, matched_mol)
            except:
                return
        else:
            matches.append(matched_mol)