def CalculateProtrudeShapeDistance(RefMol, ProbeMol): """Calculate protrude shape for a pair of already aligned molecules and return it as a string""" Distance = rdShapeHelpers.ShapeProtrudeDist(ProbeMol, RefMol) Distance = "%.2f" % Distance return Distance
def get_SucosScore(ref_mol, query_mol, tani=False, ref_features=None, query_features=None, score_mode=FeatMaps.FeatMapScoreMode.All): """ This is the key function that calculates the SuCOS scores and is expected to be called from other modules. To improve performance you can pre-calculate the features and pass them in as optional parameters to avoid having to recalculate them. Use the getRawFeatures function to pre-calculate the features. :param ref_mol: The reference molecule to compare to :param query_mol: The molecule to align to the reference :param tani: Whether to calculate Tanimoto distances :param ref_features: An optional feature map for the reference molecule, avoiding the need to re-calculate it. :param query_features: An optional feature map for the query molecule, avoiding the need to re-calculate it. :return: A tuple of 3 values. 1 the sucos score, 2 the feature map score, 3 the Tanimoto distance or 1 minus the protrude distance """ if not ref_features: ref_features = getRawFeatures(ref_mol) if not query_features: query_features = getRawFeatures(query_mol) fm_score = get_FeatureMapScore(ref_features, query_features, tani, score_mode) fm_score = np.clip(fm_score, 0, 1) if tani: tani_sim = 1 - float(rdShapeHelpers.ShapeTanimotoDist(ref_mol, query_mol)) tani_sim = np.clip(tani_sim, 0, 1) SuCOS_score = 0.5*fm_score + 0.5*tani_sim return SuCOS_score, fm_score, tani_sim else: protrude_dist = rdShapeHelpers.ShapeProtrudeDist(ref_mol, query_mol, allowReordering=False) protrude_dist = np.clip(protrude_dist, 0, 1) protrude_val = 1.0 - protrude_dist SuCOS_score = 0.5 * fm_score + 0.5 * protrude_val return SuCOS_score, fm_score, protrude_val
def sucos_mol_to_mol(self, mol1, mol2, score_mode=FeatMaps.FeatMapScoreMode.All): ''' Get the SuCOS score for one mol compared to another mol (mol=rdkit mol object) :param mol1: rdkit mol object (small mol) :param mol2: rdkit mol object (large mol) :param score_mode: default = featuremaps score, defined in init :return: ''' ref = Chem.AddHs(mol1) prb = Chem.AddHs(mol2) fm_score = self.get_fm_score(ref, prb, score_mode) fm_score = np.clip(fm_score, 0, 1) protrude_dist = rdShapeHelpers.ShapeProtrudeDist(ref, prb, allowReordering=False) protrude_dist = np.clip(protrude_dist, 0, 1) SuCOS_score = 0.5 * fm_score + 0.5 * (1 - protrude_dist) return SuCOS_score
def calc_SC_RDKit_score(query_mol, ref_mol): fm_score = get_FeatureMapScore(query_mol, ref_mol) protrude_dist = rdShapeHelpers.ShapeProtrudeDist(query_mol, ref_mol, allowReordering=False) SC_RDKit_score = 0.5 * fm_score + 0.5 * (1 - protrude_dist) return SC_RDKit_score
def get_SucosScore(ref_mol, query_mol, field_name): fm_score = get_FeatureMapScore(ref_mol, query_mol) #utils.log("FeatureMapScore:", str(fm_score)) protrude_dist = rdShapeHelpers.ShapeProtrudeDist(ref_mol, query_mol, allowReordering=False) #utils.log("ProtrudeDistance:", str(protrude_dist)) #utils.log("Sucos calc: 0.5 *", str(fm_score), "+ 0.5 * (1.0 -", protrude_dist, ")") score = 0.5 * fm_score + 0.5 * (1.0 - protrude_dist) #utils.log("SucosScore:", str(score)) query_mol.SetDoubleProp(field_name, score) return score
def score(reflig, prb_mols, ids, score_mode=FeatMaps.FeatMapScoreMode.All, p=False): ref = Chem.AddHs(reflig) idx = 0 results_sucos = {} results_tani = {} smi_mol = Chem.MolToSmiles(prb_mols) for i in ids: prb = Chem.AddHs(Chem.MolFromMolBlock(Chem.MolToMolBlock(prb_mols, confId=i))) fm_score = get_FeatureMapScore(ref, prb, score_mode) fm_score = np.clip(fm_score, 0, 1) protrude_dist = rdShapeHelpers.ShapeProtrudeDist(ref, prb, allowReordering=False) protrude_dist = np.clip(protrude_dist, 0, 1) SuCOS_score = 0.5 * fm_score + 0.5 * (1 - protrude_dist) tanimoto_score = Chem.rdShapeHelpers.ShapeTanimotoDist(ref, prb) results_sucos[str(idx)] = SuCOS_score results_tani[str(idx)] = tanimoto_score if p: print("********************************") print("index: " + str(idx)) print("SuCOS score:\t%f" % SuCOS_score) print("Tani score:\t%f" % tanimoto_score) print("********************************") idx += 1 return results_sucos
def get_sucos(frag_sdf_folder, docked_sdf_file): path = frag_sdf_folder + '/' frag_mol_list = [ Chem.MolFromMolFile((path + sdf_file), sanitize=True) for sdf_file in os.listdir(frag_sdf_folder) ] docked_mol_list = Chem.SDMolSupplier(docked_sdf_file, sanitize=True) docked_mol_list = [mol for mol in docked_mol_list if mol is not None] all_frags_scores = [] for docked_mol in docked_mol_list: docked_name = docked_mol.GetProp('_Name') print('Getting values for {}'.format(docked_name)) sucos_scores = [] frags_complete = [] for frag_mol in frag_mol_list: ############################################## ####### Feature map ############################################## fm_score = get_FeatureMapScore(frag_mol, docked_mol) fm_score = np.clip(fm_score, 0, 1) ############################################## protrude_dist = rdShapeHelpers.ShapeProtrudeDist( frag_mol, docked_mol, allowReordering=False) protrude_dist = np.clip(protrude_dist, 0, 1) SuCOS_score = 0.5 * fm_score + 0.5 * (1 - protrude_dist) sucos_scores.append(SuCOS_score) frags_complete.append(frag_mol.GetProp('_Name')) frag_scores = list(zip(frags_complete, sucos_scores)) insp_frags = docked_mol.GetProp('fragments') found, frag, score = get_frag_match(insp_frags, frag_scores) # get frag mol using index of max frag frag_mol_index = frags_complete.index(frag) frag_mol = frag_mol_list[frag_mol_index] # Get frag and compound SMILES frag_SMILES = Chem.MolToSmiles(frag_mol) docked_SMILES = Chem.MolToSmiles(docked_mol) # Get avg sucos scores avg_score = get_avg_sucos(sucos_scores) # Get all scores all_frags_scores.append((docked_name, frag, found, docked_SMILES, frag_SMILES, score, avg_score)) with open('sucos_scores/JC_sucos_scores.csv', 'w') as f: writer = csv.writer(f) writer.writerow([ 'Compound_name', 'Fragment', 'Insp_frag_found', 'dockedSMILES', 'fragSMILES', 'SuCOS_score', 'Avg_SuCOS_score' ]) writer.writerows(all_frags_scores)
def getReverseScores(mols, frags, score_threshold, writer): for mol in mols: # Get the bits compound_bits = getBits(mol) all_scores = [] for bit in compound_bits: # Let's remove wildcard atoms # Removing wildcard atoms does not impact feat score but does lower shape overlay # For scoring should multiply feat score by number of non-wilcard atoms and use # all atoms including wildcard for shape overlay bit_without_wildcard_atoms = Chem.DeleteSubstructs( bit, Chem.MolFromSmarts('[#0]')) # Let's only score bits that have more than one atom (do not count wildcard atoms) # Get number of bit atoms without wildcard atoms no_bit_atoms_without_wild_card = bit_without_wildcard_atoms.GetNumAtoms( ) # Get number of bit atoms no_bit_atoms = bit.GetNumAtoms() # Only score if enough info in bit to describe a vector - this will bias against # cases where frag has long aliphatic chain if no_bit_atoms_without_wild_card > 1: scores = [] for frag_mol in frags: # Get frag name for linking to score frag_name = frag_mol.GetProp('_Name').strip('Mpro-') # Score only if some common structure shared between bit and fragment. # Check if MCS yield > 0 atoms mcs_match = rdFMCS.FindMCS([bit, frag_mol], ringMatchesRingOnly=True, matchValences=True) # Get mcs_mol from mcs_match mcs_mol = Chem.MolFromSmarts(mcs_match.smartsString) # check if frag has MCS mol mcs_test = frag_mol.HasSubstructMatch(mcs_mol) if mcs_test: # Change van der Waals radius scale for stricter overlay protrude_dist = rdShapeHelpers.ShapeProtrudeDist( bit, frag_mol, allowReordering=False, vdwScale=0.2) protrude_dist = np.clip(protrude_dist, 0, 1) protrude_score = 1 - protrude_dist # We are comparing small bits relative to large frags # If overlay poor then assign score of 0 # NB reverse SuCOS scoring. Feat map is also comp # more expensive if protrude_score > score_threshold: fm_score = getFeatureMapScore(bit, frag_mol) fm_score = np.clip(fm_score, 0, 1) # What about good shape overlay but poor feat match? # Let's add a cutoff here to prevent good overlays with # poor feat match - eg. 3 mem ring 2 x C atoms overlay well # with 2 x aromatic ring Cs if fm_score > score_threshold: # Use modified SuCOS score where feat_score scaled by number of bit atoms # without wildcard atoms and the shape overlay score by the number of bit atoms # including wildcard atoms scores.append( (frag_name, protrude_score, no_bit_atoms, fm_score, no_bit_atoms_without_wild_card)) else: scores.append((frag_name, 0, no_bit_atoms, 0, no_bit_atoms_without_wild_card)) else: scores.append((frag_name, 0, no_bit_atoms, 0, no_bit_atoms_without_wild_card)) else: scores.append((frag_name, 0, no_bit_atoms, 0, no_bit_atoms_without_wild_card)) all_scores.append(scores) list_dfs = [] for score in all_scores: df = pd.DataFrame(data=score, columns=[ 'Fragment', 'Shape_score', 'no_bit_atoms', 'Feat_score', 'no_bit_atoms_without_wild_card' ]) # Get maximum scoring fragment for bit match df['Modified_SuCOS_score'] = 0.5 * ( df.Feat_score * df.no_bit_atoms_without_wild_card ) + 0.5 * (df.Shape_score * df.no_bit_atoms) df = df[df['Modified_SuCOS_score'] == df['Modified_SuCOS_score'].max()] list_dfs.append(df) final_df = pd.concat(list_dfs) # Score 1: the score is scaled by the number of bit atoms score_1 = final_df.Modified_SuCOS_score.sum() # Let's only get frags with a score > 0 #final_df['SuCOS_score'] = 0.5 * final_df.Feat_score + 0.5 * final_df.Shape_score final_df = final_df[final_df.Modified_SuCOS_score > 0] # Get the unique fragments above threshold all_frags = pd.unique(final_df.Fragment) # Add props we want mol.SetProp(field_XCosRefMols, ','.join(all_frags)) mol.SetIntProp(field_XCosNumHits, len(all_frags)) mol.SetProp(field_XCosScore1, "{:.4f}".format(score_1)) # Write to file writer.write(mol) writer.flush()
def main(ref_file, prb_file, write=True, return_all=False, score_mode=FeatMaps.FeatMapScoreMode.All): if type(ref_file) == str: if os.path.splitext(ref_file)[-1] == '.sdf': reflig = Chem.MolFromMolFile(ref_file, sanitize=True) elif os.path.splitext(ref_file)[-1] == '.mol2': reflig = Chem.MolFromMol2File(ref_file, sanitize=True) elif type(ref_file) == rdkit.Chem.rdchem.Mol: reflig = ref_file if type(prb_file) == str: if os.path.splitext(prb_file)[-1] == '.sdf': prb_mols = Chem.SDMolSupplier(prb_file, sanitize=True) elif os.path.splitext(prb_file)[-1] == '.gz': tmp = os.path.splitext(prb_file)[0] if os.path.splitext(tmp)[-1] == '.sdf': inf = gzip.open(prb_file) prb_mols = Chem.ForwardSDMolSupplier(inf, sanitize=True) elif type(prb_file) == rdkit.Chem.rdchem.Mol: prb_mols = [prb_file] try: reflig except NameError: raise ValueError("Incorrect file format for ref lig") try: prb_mols except NameError: raise ValueError("Incorrect file format for prb lig") if write: w = Chem.SDWriter("%s_SuCOS_score.sdf" % os.path.splitext(prb_file)[0]) prb_mols = [x for x in prb_mols if x] for prb_mol in prb_mols: ############################################## ####### Feature map ############################################## fm_score = get_FeatureMapScore(reflig, prb_mol, score_mode) fm_score = np.clip(fm_score, 0, 1) ############################################## #tversky_ind = rdShapeHelpers.ShapeTverskyIndex(reflig, prb_mol, 1.0, 0.0) #SuCOS_score = 0.5*fm_score + 0.5*tversky_ind protrude_dist = rdShapeHelpers.ShapeProtrudeDist(reflig, prb_mol, allowReordering=False) protrude_dist = np.clip(protrude_dist, 0, 1) SuCOS_score = 0.5 * fm_score + 0.5 * (1 - protrude_dist) print("********************************") print("SuCOS score:\t%f" % SuCOS_score) print("********************************") prb_mol.SetProp("SuCOS_score", str(SuCOS_score)) prb_mol.SetProp("Volume_score", str(1 - protrude_dist)) prb_mol.SetProp("Feature_score", str(fm_score)) if write: w.write(prb_mol) if return_all: return SuCOS_score, fm_score, (1 - protrude_dist) else: return SuCOS_score