def get_SucosScore(ref_mol, query_mol, tani=False, ref_features=None, query_features=None, score_mode=FeatMaps.FeatMapScoreMode.All): """ This is the key function that calculates the SuCOS scores and is expected to be called from other modules. To improve performance you can pre-calculate the features and pass them in as optional parameters to avoid having to recalculate them. Use the getRawFeatures function to pre-calculate the features. :param ref_mol: The reference molecule to compare to :param query_mol: The molecule to align to the reference :param tani: Whether to calculate Tanimoto distances :param ref_features: An optional feature map for the reference molecule, avoiding the need to re-calculate it. :param query_features: An optional feature map for the query molecule, avoiding the need to re-calculate it. :return: A tuple of 3 values. 1 the sucos score, 2 the feature map score, 3 the Tanimoto distance or 1 minus the protrude distance """ if not ref_features: ref_features = getRawFeatures(ref_mol) if not query_features: query_features = getRawFeatures(query_mol) fm_score = get_FeatureMapScore(ref_features, query_features, tani, score_mode) fm_score = np.clip(fm_score, 0, 1) if tani: tani_sim = 1 - float(rdShapeHelpers.ShapeTanimotoDist(ref_mol, query_mol)) tani_sim = np.clip(tani_sim, 0, 1) SuCOS_score = 0.5*fm_score + 0.5*tani_sim return SuCOS_score, fm_score, tani_sim else: protrude_dist = rdShapeHelpers.ShapeProtrudeDist(ref_mol, query_mol, allowReordering=False) protrude_dist = np.clip(protrude_dist, 0, 1) protrude_val = 1.0 - protrude_dist SuCOS_score = 0.5 * fm_score + 0.5 * protrude_val return SuCOS_score, fm_score, protrude_val
def CalculateTanimotoShapeDistance(RefMol, ProbeMol): """Calculate Tanimoto shape for a pair of already aligned molecules and return it as a string""" Distance = rdShapeHelpers.ShapeTanimotoDist(ProbeMol, RefMol) Distance = "%.2f" % Distance return Distance
def _shapeClustering(mol1, rdkit_mols): """ Returns the tanimoto row based on shape method Parameters ---------- mol1: rdkit.Chem.rdchem.Mol The reference molecule rdkit_mols: list The list of rdkit.Chem.rdchem.Mol objects Returns ------- tanimotorow: np.array The numpy array containing the tanimoto row """ from rdkit.Chem import rdMolAlign, rdShapeHelpers tanimoto_shape_row = [] for mol2 in rdkit_mols: oa3 = rdMolAlign.GetO3A(mol1, mol2) oa3.Align() tani_shape = rdShapeHelpers.ShapeTanimotoDist(mol1, mol2) tanimoto_shape_row.append(tani_shape) return tanimoto_shape_row
def test1Shape(self): fileN = os.path.join(RDConfig.RDBaseDir, 'Code', 'GraphMol', 'ShapeHelpers', 'test_data', '1oir.mol') m = Chem.MolFromMolFile(fileN) rdmt.CanonicalizeMol(m) dims1, offset1 = rdshp.ComputeConfDimsAndOffset(m.GetConformer()) grd = geom.UniformGrid3D(30.0, 16.0, 10.0) rdshp.EncodeShape(m, grd, 0) ovect = grd.GetOccupancyVect() self.failUnless(ovect.GetTotalVal() == 9250) m = Chem.MolFromMolFile(fileN) trans = rdmt.ComputeCanonicalTransform(m.GetConformer()) dims, offset = rdshp.ComputeConfDimsAndOffset(m.GetConformer(), trans=trans) dims -= dims1 offset -= offset1 self.failUnless(feq(dims.Length(), 0.0)) self.failUnless(feq(offset.Length(), 0.0)) grd1 = geom.UniformGrid3D(30.0, 16.0, 10.0) rdshp.EncodeShape(m, grd1, 0, trans) ovect = grd1.GetOccupancyVect() self.failUnless(ovect.GetTotalVal() == 9250) grd2 = geom.UniformGrid3D(30.0, 16.0, 10.0) rdshp.EncodeShape(m, grd2, 0) fileN2 = os.path.join(RDConfig.RDBaseDir, 'Code', 'GraphMol', 'ShapeHelpers', 'test_data', '1oir_conf.mol') m2 = Chem.MolFromMolFile(fileN2) rmsd = rdMolAlign.AlignMol(m, m2) self.failUnless(feq(rdshp.ShapeTanimotoDist(m, m2), 0.2813)) dist = rdshp.ShapeTanimotoDist(mol1=m, mol2=m2, confId1=0, confId2=0, gridSpacing=0.25, stepSize=0.125) self.failUnless(feq(dist, 0.3021)) m = Chem.MolFromMolFile(fileN) cpt = rdmt.ComputeCentroid(m.GetConformer()) dims, offset = rdshp.ComputeConfDimsAndOffset(m.GetConformer()) grd = geom.UniformGrid3D(dims.x, dims.y, dims.z, 0.5, DataStructs.DiscreteValueType.TWOBITVALUE, offset) dims -= geom.Point3D(13.927, 16.97, 9.775) offset -= geom.Point3D(-4.353, 16.829, 2.782) self.failUnless(feq(dims.Length(), 0.0)) self.failUnless(feq(offset.Length(), 0.0)) rdshp.EncodeShape(m, grd, 0) ovect = grd.GetOccupancyVect() self.failUnless(ovect.GetTotalVal() == 9275) geom.WriteGridToFile(grd, '1oir_shape.grd') m = Chem.MolFromMolFile(fileN) lc, uc = rdshp.ComputeConfBox(m.GetConformer()) rdmt.CanonicalizeMol(m) lc1, uc1 = rdshp.ComputeConfBox(m.GetConformer()) lc2, uc2 = rdshp.ComputeUnionBox((lc, uc), (lc1, uc1)) lc -= geom.Point3D(-4.353, 16.829, 2.782) uc -= geom.Point3D(9.574, 33.799, 12.557) self.failUnless(feq(lc.Length(), 0.0)) self.failUnless(feq(uc.Length(), 0.0)) lc1 -= geom.Point3D(-10.7519, -6.0778, -3.0123) uc1 -= geom.Point3D(8.7163, 5.3279, 3.1621) self.failUnless(feq(lc1.Length(), 0.0)) self.failUnless(feq(uc1.Length(), 0.0)) lc2 -= geom.Point3D(-10.7519, -6.0778, -3.01226) uc2 -= geom.Point3D(9.574, 33.799, 12.557) self.failUnless(feq(lc2.Length(), 0.0)) self.failUnless(feq(uc2.Length(), 0.0))
help='Output Tanimoto distance') args = parser.parse_args() #load reference file molecules refmols = [mol for mol in Chem.SDMolSupplier(args.ref)] collated = collections.defaultdict(list) #for each test mol compare to all refmols for mol in Chem.SDMolSupplier(args.test): try: vals = [] for r in refmols: o3a = rdMolAlign.GetO3A(r, mol) o3a.Align() if args.tanimoto: score = 1.0 - rdShapeHelpers.ShapeTanimotoDist(r, mol) else: score = o3a.Score() vals.append(score) tc = max(vals) if args.collate: collated[mol.GetProp("_Name")].append(tc) else: print mol.GetProp("_Name"), tc except: pass if args.collate: namevals = [(name, max(vals)) for (name, vals) in collated.iteritems()] namevals.sort(key=lambda (n, v): v)