def _similarity( self, id0, id1, **kwarg ) : # Retrieves the parent molecules of the MCS. mol0 = KBASE.ask( id0 ) mol1 = KBASE.ask( id1 ) if (mol0.total_charge() != mol1.total_charge()) : return 0.0 return 1.0
def create(basic_graph, mcs_ids, rule, add_attr=True): """ Returns a graph. Node = molecule's ID or name, edge = similarity score @type mcs_ids : C{list} of C{str} @param mcs_ids : A list of common substructures' IDs @type rule : C{Rule} @param rule : The rule to determine the similarity score between two structures """ g = copy.deepcopy(basic_graph) for id in mcs_ids: id0, id1 = mcs.get_parent_ids(id) simi = rule.similarity(id0, id1, mcs_id=id) if (simi > 0): if (add_attr): try: partial_ring = int(KBASE.ask(id, "partial_ring")) except LookupError: partial_ring = 0 try: slack_simi = KBASE.ask(id, "slack_similarity") except LookupError: slack_simi = 0.0 g.add_edge(id0, id1, similarity=simi, slack_similarity=slack_simi, partial_ring=partial_ring, mcs_id=id) else: g.add_edge(id0, id1, similarity=simi) return g
def _similarity( self, id0, id1, **kwarg ) : # Uses the first common substructure. num_atom_mcs = KBASE.ask( kwarg["mcs_id"], "num_heavy_atoms" ) num_atom_mol0 = len( KBASE.ask( id0 ).heavy_atoms() ) num_atom_mol1 = len( KBASE.ask( id1 ).heavy_atoms() ) return float( (num_atom_mcs >= self._threshold ) or (num_atom_mol0 < self._threshold + 3) or (num_atom_mol1 < self._threshold + 3) )
def annotate_nodes_with_smiles(g): """ """ for molid in g.nodes(): try: smiles = KBASE.ask(molid, "SMILES") except LookupError: smiles = KBASE.ask(molid).smiles() KBASE.deposit_extra(molid, "SMILES", smiles) g.node[molid]["SMILES"] = smiles
def read_n_files( filenames ) : """ `filenames' is a list of file names. Reads the files and deposits them into the `KBASE'. Returns a list of keys. """ strucid = [] for fn in filenames : strucs = read_file( fn ) for e in strucs: id = KBASE.deposit( e.id(), e ) KBASE.deposit_extra(id, "filename", (fn)) e.set_id( id ) strucid.append( id ) return strucid
def read_n_files( filenames ) : """ `filenames' is a list of file names. The format of each file will be determined from the file's extension name. Reads the files and deposits them into the `KBASE'. Returns a list of keys. """ strucid = [] for fn in filenames : strucs = read_file( fn ) for e in strucs : id = KBASE.deposit( e.id(), e ) KBASE.deposit_extra(id, "filename", (fn)) e.set_id( id ) strucid.append( id ) return strucid
def annotate_edges_with_smiles(g): """ """ for e in g.edges(data=True): try: mcs_id = e[2]["mcs_id"] try: smiles = KBASE.ask(mcs_id, "SMILES") except LookupError: smiles = mcs.get_struc(mcs_id).smiles() KBASE.deposit_extra(mcs_id, "SMILES", smiles) g[e[0]][e[1]]["SMILES"] = smiles except KeyError: pass
def annotate_nodes_with_title(g): """ """ for molid in g.nodes(): g.node[molid]["title"] = KBASE.ask(molid).title() g.node[molid]["label"] = molid[:7]
def add_mcs_id( mcs_id_list, graph ): for edge in graph.edges(data = True): mol0_id = edge[0] mol1_id = edge[1] mol0 = KBASE.ask( mol0_id ) mol1 = KBASE.ask( mol1_id ) name0 = mol0.title() name1 = mol1.title() mcs_title = "mcs@%s..%s" % (name0, name1,) mcs_id = hashlib.sha1( mcs_title ).hexdigest() if mcs_id not in mcs_id_list: # the first mcs_id is not in id list need to generate reverse one mcs_title = "mcs@%s..%s" % (name1, name0,) mcs_id = hashlib.sha1( mcs_title ).hexdigest() if mcs_id not in mcs_id_list: sys.exit() graph.add_edge(mol0_id, mol1_id, mcs_id = mcs_id)
def get_parent_ids(mcs_id): """ Returns a pair of IDs of the common substructure's parents. @type mcs_id: C{str} @param mcs_id: ID of the common substructure """ return KBASE.ask(mcs_id, "mcs-parents")
def similarity( self, id0, id1, **kwarg ) : try : mcs_id = kwarg["mcs_id"] simi = KBASE.ask( mcs_id, "similarity" ) except KeyError : simi = Rule.similarity( self, id0, id1, **kwarg ) if (simi < self._cutoff) : simi = 0.0 return simi
def annotate_edges_with_matches(g): """ """ for e in g.edges(data=True): try: mcs_id = e[2]["mcs_id"] mol0 = KBASE.ask(e[0]) mol1 = KBASE.ask(e[1]) mcs_matches = KBASE.ask(mcs_id, "mcs-matches") trimmed_mcs = KBASE.ask(mcs_id, "trimmed-mcs") layout_mcs = KBASE.ask(mcs_id, "layout_mcs") g[e[0]][e[1]]["original-mcs"] = { e[0]: mol0.smarts(mcs_matches[e[0]]), e[1]: mol1.smarts(mcs_matches[e[1]]), } g[e[0]][e[1]]["trimmed-mcs"] = trimmed_mcs g[e[0]][e[1]]["layout_mcs"] = layout_mcs except KeyError: pass
def _similarity( self, id0, id1, **kwarg ) : # Uses the first common substructure. mcs_id = kwarg["mcs_id"] mcs0 = mcs.get_struc( mcs_id ) mol0 = KBASE.ask( id0 ) mol1 = KBASE.ask( id1 ) num_heavy_atoms = len( mcs0.heavy_atoms() ) num_light_atoms = len( mcs0.atom ) - num_heavy_atoms KBASE.deposit_extra( mcs_id, "num_heavy_atoms", num_heavy_atoms ) KBASE.deposit_extra( mcs_id, "num_light_atoms", num_light_atoms ) return similarity.by_heavy_atom_count( mol0, mol1, mcs0 )
def matrix ( mols, mcs_ids, rule ): import numpy id_list = [] id_vs_simi = {} id_vs_title = {} title_vs_simi = {} title_list = [] filename_vs_title = {} for mol in mols: #generate dictionary of id vs title of giving mols title = mol.title() id = mol.id() file_path = KBASE.ask (id, "filename") filename = os.path.basename(file_path) if id not in id_list: id_list.append(id) if title not in title_list: title_list.append(title) id_vs_title [id] = title filename_vs_title [filename] = title for id in mcs_ids: #generate dictionary of pair's title vs similarity score id0, id1 = mcs.get_parent_ids(id) simi = rule.similarity( id0, id1, mcs_id = id ) title0 = id_vs_title[id0] title1 = id_vs_title[id1] title_vs_simi [(title0,title1)] = simi #generate the score matrix size = len( title_list ) scores = numpy.zeros( (size, size,) ) for i in range( size ) : scores[i, i] = 1.0 for j in range( i + 1, size ) : title_i = title_list[i] title_j = title_list[j] if title_vs_simi.has_key((title_i,title_j)): simi = title_vs_simi[(title_i,title_j)] scores[i, j] = simi scores[j, i] = simi return (title_list, id_list,filename_vs_title, scores)
def get_struc(mcs_id): """ get the mcs strcuture based on mcs_id """ title = KBASE.ask(mcs_id) id0, id1 = KBASE.ask(mcs_id, "mcs-parents") mcs_matches = KBASE.ask(mcs_id, "mcs-matches") atom_match0, atom_match1 = mcs_matches[id0], mcs_matches[id1] mol0 = KBASE.ask(id0) mol1 = KBASE.ask(id1) mcs = KBASE.ask(id0).extract(atom_match0) for i, e in enumerate(atom_match1, start=1): mcs.atom_prop[i]["mapped_index"] = e mcs.set_title(title) mcs.set_id(mcs_id) return mcs
def deposit_to_kbase(id0, id1, atom_match0, atom_match1): """ Deposits a MCS substructure and relevant information into the kbase and returns its ID in the C{KBASE}. @type id0: C{str} @param id0: ID of the first (reference) molecule in the C{KBASE} @type id1: C{str} @param id1: ID of the second molecule in the C{KBASE} @type atom_match: C{list} of C{int} @param atom_match: A list of atom indices of matches atoms in the reference molecule @type mcs_mol: C{Struc} @param mcs_mol: C{Struc} object of the MCS substructure """ mol0 = KBASE.ask(id0) mol1 = KBASE.ask(id1) name0 = mol0.title() name1 = mol1.title() mcs_title = "mcs@%s..%s" % ( name0, name1, ) mcs_id = hashlib.sha1(mcs_title).hexdigest() mcs_id = KBASE.deposit(mcs_id, mcs_title) # Sorts the two lists according to the ascending order of atom indices of the first list. atom_match0, atom_match1 = zip(*sorted(zip(atom_match0, atom_match1), cmp=lambda x, y: x[0] - y[0])) atom_match0, atom_match1 = list(atom_match0), list(atom_match1) KBASE.deposit_extra(mcs_id, "mcs-parents", ( id0, id1, )) KBASE.deposit_extra(mcs_id, "mcs-matches", { id0: atom_match0, id1: atom_match1, }) return mcs_id
def main(molid_list, opt, args): """ @type molid_list: C{list} of C{str}'s @param molid_list: A list of molecule IDs in the C{KBASE} """ #load mols files if (opt.graph): g = pickle.load(open(opt.graph)) else: mols = [] for id in molid_list[opt.receptor:]: mols.append(KBASE.ask(id)) #choose mcs search engine and rules if (struc.infrastructure == "schrodinger"): mcs_engine = mcs.SchrodMcs(1) basic_rule = rule.Mcs( rule.EqualCharge(), rule.TrimMcs(True, rule.MinimumNumberOfAtom())) slack_rule = rule.Mcs( rule.EqualCharge(), rule.TrimMcs(False, rule.MinimumNumberOfAtom())) elif (struc.infrastructure == "oechem"): mcs_engine = mcs.OeMcs() basic_rule = rule.Mcs( rule.EqualCharge(), rule.TrimMcs_oe(True, rule.MinimumNumberOfAtom())) slack_rule = rule.Mcs( rule.EqualCharge(), rule.TrimMcs_oe(False, rule.MinimumNumberOfAtom())) logging.info("MCS searching...") mcs_ids = mcs_engine.search_all(mols, opt) logging.info("MCS searching... Done") #build score matrix from mcs search enable Jonathan's graph planning algorithm if (opt.build): import build (title_list, id_list, filename_vs_title, strict_score) = build.matrix(mols, mcs_ids, basic_rule) (title_list, id_list, filename_vs_title, unstrict_score) = build.matrix(mols, mcs_ids, slack_rule) import GraphGenerator4 as gg4 knownCompoundsList = [] #load the name list of coumpounds with known experimental value if there is any try: with open(args[0] + "/knownCompounds") as kcFile: knownCompoundsList = kcFile.readlines() knownCompoundsList = [ filename_vs_title[name.strip()] for name in knownCompoundsList ] except IOError: print "No Known Compounds Listed" gg = gg4.GraphGenerator4(strict_score, unstrict_score, 0.05, 6, title_list, id_list, knownCompoundsList) g = gg.getGraphObject() build.add_mcs_id(mcs_ids, g) c = networkx.connected_component_subgraphs(g) # Gets graph (`g') and clusters (`c') using schrodinger's graph planning algorithm else: logging.info("Creating graph...") g, c = graph.gen_graph(mcs_ids, basic_rule, slack_rule, simi_cutoff=0.05, max_csize=100, num_c2c=1) graph.annotate_nodes_with_smiles(g) graph.annotate_nodes_with_title(g) graph.annotate_edges_with_smiles(g) graph.annotate_edges_with_hexcode(g) graph.annotate_edges_with_matches(g) logging.info("Creating graph... Done") logging.debug( "DEBUG: %d clusters (counted as the connected components in the graph):" % len(c)) c.sort(lambda x, y: len(x) - len(y)) for i, e in enumerate(c): logging.debug("DEBUG: cluster #%d, %d structures:" % ( i, len(e), )) titles = [KBASE.ask(id).title() for id in e] titles.sort() for t in titles: logging.debug("DEBUG: %s" % t) # store graph for reusing and analysing pkl_fname = opt.output + ".pkl" pkl_fh = open(pkl_fname, "w") pickle.dump(g, pkl_fh) pkl_fh.close() try: #use pygraphviz for graph layout import graphviz ag = networkx.to_agraph(g) ag.node_attr["fixedsize"] = True ag.edge_attr["penwidth"] = 2.0 simi = [float(e.attr["similarity"]) for e in ag.edges()] scale = 1.0 / max(simi) for e in ag.edges_iter(): try: partial_ring = int(e.attr["partial_ring"]) except (ValueError, TypeError): partial_ring = 0 saturation = float(e.attr["similarity"]) * scale saturation = 0.0 if (saturation < 0) else (1.0 if ( saturation > 1) else saturation) e.attr["color"] = "0.8396,%f,0.8" % saturation e.attr["weight"] = saturation del e.attr["label"] if (saturation < 0.01 or partial_ring): e.attr["style"] = "dashed" ag.write(opt.output + ".dot") except ImportError: logging.warn( "WARNING: pygraphviz is not installed. Cannot write a .dot output file." ) edges = g.edges(data=True) logging.info("%d edges in total" % len(edges)) #generate schrodinger FEP input files if (opt.siminp): if (opt.siminp_type == "gro"): raise NotImplementedError( "Support for writing Gromacs input files is not yet implemented." ) if (opt.siminp_type == "mae"): import schrodinger.application.desmond.fep_mapping as dfm tmp_mae_fname = mcs.tempfile_basename + "_siminp.mae" receptor_mol = [] if (opt.receptor): for e in range(opt.receptor): mol = KBASE.ask(molid_list[e]) mol._struc.property["s_leadoptmap_moltype"] = "receptor" receptor_mol.append(mol) for id0, id1, attr in edges: mol0 = KBASE.ask(id0) mol1 = KBASE.ask(id1) out_fname = "%s_%s_%s.mae" % ( opt.siminp, id0[:7], id1[:7], ) mol0._struc.property["s_leadoptmap_moltype"] = "ligand" mol1._struc.property["s_leadoptmap_moltype"] = "%s:%s" % ( id0, id1, ) mol0.write(tmp_mae_fname, mode="w") mol1.write(tmp_mae_fname, mode="a") try: overwrite = True data = dfm.get_atom_mapping_data(tmp_mae_fname, atomtype=3) if (opt.receptor): overwrite = False receptor_mol[0].write(out_fname, mode="w") for i in range(1, opt.receptor): receptor_mol[i].write(out_fname, mode="a") dfm.write_fepsubst_to_file(data, out_fname, overwrite=overwrite) except ( RuntimeError, NameError, ): logging.warn( "WARNING: Failed to write the input files for '%s' and '%s'." % ( mol0, mol1, )) if (not opt.save): tmp_fnames = glob.glob(mcs.tempfile_basename + "*") for fname in tmp_fnames: os.remove(fname)
for e in strucs: id = KBASE.deposit( e.id(), e ) KBASE.deposit_extra(id, "filename", (fn)) e.set_id( id ) strucid.append( id ) return strucid infrastructure = "oechem" except ImportError, e : pass if (infrastructure is None) : print "ERROR: Need either Schrodinger's or OEChem's infrastructure to run, but none is found." import sys sys.exit( 1 ) if ("__main__" == __name__) : filenames = ["xfer3.10.mol2", "xfer3.11.mol2",] id_list = read_n_files( filenames ) mol0 = KBASE.ask( id_list[0] ) print mol0.title(), len( mol0.heavy_atoms() ) mol1 = KBASE.ask( id_list[1] ) print mol1.title(), len( mol1.heavy_atoms() )
def search_all(self, mols, opt): if (not opt.mcs): mae_fname = tempfile_basename + ".mae" out_fname = tempfile_basename + ".csv" log_fname = tempfile_basename + ".log" log_fh = open(log_fname, "w") if (os.path.isfile(mae_fname)): os.remove(mae_fname) for mol in mols: title = mol.title() mol.set_title(mol.id()) mol.write(mae_fname) mol.set_title(title) cmd = [ self._cmd, "-imae", mae_fname, "-opw", out_fname, "-atomtype", str(self._typing), "-nobreakring", ] mcs_proc = subprocess.Popen(cmd, stderr=subprocess.STDOUT, stdout=log_fh) null, stderr = mcs_proc.communicate() val = mcs_proc.returncode if (val == 17): raise RuntimeError( "Used a MCS feature that requires Schrodinger's CANVAS_ELEMENTS license." ) if (val != 0): msg = "CanvasMCS exited prematurely. This could be because the input molecules were too dissimilar" \ " or too numerous, or because the chosen atom-typing scheme was too general." with open(out_fname) as fh: msg += "\n\n" msg += fh.read() raise RuntimeError(msg) else: logging.debug( "DEBUG: Reuse previous MCS searching results: '%s'." % opt.mcs) out_fname = opt.mcs with open(out_fname, "r") as fh: import csv lines = fh.readlines()[1:] mcs_match = [] for tokens in csv.reader(lines): mcs_match.append( McsMatch(tokens[1], tokens[3], tokens[11], tokens[14], tokens[9], tokens[12])) ret = [] for m in mcs_match: id0 = m.mol0_id id1 = m.mol1_id mol0 = KBASE.ask(id0) mol1 = KBASE.ask(id1) atom_match0 = [int(i) for i in m.mcs_atom0.split(',')] atom_match1 = [int(i) for i in m.mcs_atom1.split(',')] ret.append( self.deposit_to_kbase(id0, id1, atom_match0, atom_match1)) return ret
mol1 = KBASE.ask(id1) mcs = KBASE.ask(id0).extract(atom_match0) for i, e in enumerate(atom_match1, start=1): mcs.atom_prop[i]["mapped_index"] = e mcs.set_title(title) mcs.set_id(mcs_id) return mcs if ("__main__" == __name__): filenames = [ "xfer3.11.mol2", "xfer3.12.mol2", ] id_list = struc.read_n_files(filenames) mol0 = KBASE.ask(id_list[0]) mol1 = KBASE.ask(id_list[1]) mcs = SchrodMcs(3) mcs_id = mcs.search(mol0, mol1)[0] mol_id = KBASE.ask(mcs_id, "mcs-parents")[0] mcs_struc = KBASE.ask(mcs_id)[0] mol_struc = KBASE.ask(mol_id) out_fname = "out.mae" if (os.path.isfile(out_fname)): os.remove(out_fname) mol_struc.write(out_fname) mcs_struc.write(out_fname)
num_heavy_atoms = len( mcs0.heavy_atoms() ) num_light_atoms = len( mcs0.atom ) - num_heavy_atoms KBASE.deposit_extra( mcs_id, "num_heavy_atoms", num_heavy_atoms ) KBASE.deposit_extra( mcs_id, "num_light_atoms", num_light_atoms ) return similarity.exp_delta( 2 * (orig_num_heavy_atoms - num_heavy_atoms), 0 ) # Example of a complex rule: A combination of a few simple rules (in case, they are Mcs, MinimumNumberOfAtom, and Cutoff). # cutoff_simi = Cutoff( 0.2, Mcs( MinimumNumberOfAtom( 4 ) ) ) if ("__main__" == __name__) : import struc from mcs import SchrodMcs from kbase import KBASE filenames = ["xfer3.11.mol2", "xfer3.12.mol2",] id_list = struc.read_n_files( filenames ) mol0 = KBASE.ask( id_list[0] ) mol1 = KBASE.ask( id_list[1] ) mcs = SchrodMcs( 3 ) mcs_id = mcs.search( mol0, mol1 )[0] mol_id = KBASE.ask( mcs_id, "mcs_parents" ) print MCS.similarity( mol_id[0], mol_id[1] )
def gen_graph(mcs_ids, basic_rule, slack_rule, simi_cutoff, max_csize, num_c2c): """ Generates and returns a graph according to the requirements. @type mcs_ids: C{list} of C{str} @param mcs_ids: A list of ids of the maximum substructures in C{KBASE} @type rule: C{rule.Rule} @param rule: The rule to determine the similarity score between two structures @type simi_cutoff: C{float} @param simi_cutoff: Cutoff of similarity scores. Values less than the cutoff are considered as 0. @type max_csize: C{int} @param max_csize: Maximum cluster size @type num_c2c: C{int} @param num_c2c: Number of cluster-to-cluster edges """ basic_graph = networkx.Graph() all_ids = set() fh = open("simiscore", "w") if (logging.getLogger().getEffectiveLevel() == logging.DEBUG) else None logging.info(" Calculating similarity scores...") for id in mcs_ids: #get the id for mol0 and mol1 id0, id1 = mcs.get_parent_ids(id) #calculate the similarity scores for molecule pair simi = basic_rule.similarity(id0, id1, mcs_id=id) slack_simi = slack_rule.similarity(id0, id1, mcs_id=id) KBASE.deposit_extra(id, "similarity", simi) KBASE.deposit_extra(id, "slack_similarity", slack_simi) all_ids.add(id0) all_ids.add(id1) if (fh): print >> fh, simi logging.info(" Calculating similarity scores... Done") basic_graph.add_nodes_from(all_ids) #create a complete graph complete = create(basic_graph, mcs_ids, rule.Cutoff(0)) #delete connections with scores lower than simi_cutoff desired = cutoff_graph(complete, simi_cutoff) #get molecule clusters clusters = sorted(networkx.connected_components(desired), cmp=lambda x, y: len(y) - len(x)) logging.info(" Original number of clusters: %d" % len(clusters)) #break down big clusters num_big_clusters = 0 for i, c in enumerate(clusters): logging.info(" size of cluster #%02d: %d" % (i, len(c)), ) num_big_clusters += (len(c) > max_csize) if (num_big_clusters and False): logging.info( " %d cluster(s) are too big. Break them into smaller ones. Reclustering..." % num_big_clusters) new_clusters = [] for c in clusters: if (max_csize < len(c)): new_clusters += break_cluster(desired.subgraph(c), simi_cutoff, max_csize) else: new_clusters.append(c) clusters = new_clusters logging.info(" Reclustering... Done") clusters = sorted(clusters, cmp=lambda x, y: len(y) - len(x)) n = len(clusters) logging.info(" %d clusters in total" % n) for i, c in enumerate(clusters): logging.info(" size of cluster #%02d: %d" % ( i, len(c), )) # Optimizes the subgraphs. logging.info(" Optimizing the subgraph of each cluster...") new_desired = networkx.Graph() for e in clusters: sg = optimize_graph(complete.subgraph(e), desired.subgraph(e), "trim", simi_cutoff) new_desired = networkx.union(new_desired, sg) desired = new_desired logging.info(" Optimizing the subgraph of each cluster... Done") # Connects the clusters. unconnected_clusters = set(range(n)) while (unconnected_clusters and n > 1): c2c_edges = [] cluster_index = unconnected_clusters.pop() this_cluster = clusters[cluster_index] other_clusters = copy.copy(clusters) other_clusters.remove(this_cluster) for e in other_clusters: c2c_edges.extend(networkx.edge_boundary(complete, this_cluster, e)) if (len(c2c_edges) == 0): logging.warn("WARNING: Cannot connect cluster #%d with others." % (cluster_index, )) logging.warn( " If there should be connections, consider to adjust the rules to" ) logging.warn( " reduce 0-similarity assignments or loosen the MCS conditions." ) continue c2c_edges.sort(lambda x, y: cmp_edge(complete, x, y)) connected_clusters = set() for k in range(-1, -num_c2c - 1, -1): edge = c2c_edges[k] node0 = edge[0] node1 = edge[1] simi = complete[node0][node1]["similarity"] mcs_id = complete[node0][node1]["mcs_id"] desired.add_edge(node0, node1, similarity=simi, boundary=True, mcs_id=mcs_id) logging.warn(" boundary similarity = %f between '%s' and '%s'" % ( simi, KBASE.ask(node0), KBASE.ask(node1), )) for e in unconnected_clusters: if (node0 in clusters[e] or node1 in clusters[e]): connected_clusters.add(e) unconnected_clusters -= connected_clusters return desired, clusters
def _similarity( self, id0, id1, **kwarg ) : # Uses the first common substructure. mcs_id = kwarg["mcs_id"] mcs0 = mcs.get_struc( mcs_id ).copy() mol0 = KBASE.ask( id0 ) mol1 = KBASE.ask( id1 ) orig_num_heavy_atoms = len( mcs0.heavy_atoms() ) # Deletes chiral atoms. chiral_atoms = mcs0.chiral_atoms() ring_atoms = mcs0. ring_atoms() chiral_atoms.sort( reverse = True ) for atom_index in chiral_atoms : if (atom_index in ring_atoms) : #if the chiral atom is in a ring, delete atoms attached to it but not in ring. bonded_atoms = set( mcs0.bonded_atoms( atom_index ) ) - ring_atoms if (bonded_atoms) : i = 0 n = 0 for atom in bonded_atoms : cp = mcs0.copy() cp.delete_atom( atom ) m = len( cp.atom ) if (m > n) : i = atom n = m mcs0.delete_atom( i ) mcs0 = mcs0.copy() else : logging.warn( "WARNING: Cannot delete chiral atom #%d in structure: %s" % (atom_index, mcs0.title(),) ) else : # If the chiral atom is not a ring atom, we simply delete it. mcs0.delete_atom( atom_index ) mcs0 = mcs0.copy() # If the deletion results in multiple unconnected fragments, we keep only the biggest one. mcs0 = mcs0.copy() atoms_to_delete = [] for e in mcs0.molecules()[1:] : atoms_to_delete.extend( e ) mcs0.delete_atom( atoms_to_delete ) mcs0 = mcs0.copy() partial_ring = self._delete_broken_ring( mol0, mol1, mcs0 ) mcs0 = mcs0.copy() atoms_to_delete_2 = [] for e in mcs0.molecules()[1:] : atoms_to_delete_2.extend( e ) mcs0.delete_atom( atoms_to_delete_2 ) #Here is different from schrodinger's method. Since openeye cannot save mcs searching results as smart strings. Using smiles stirng instead for later layout. smiles0 = mcs0.smiles() #Arbitrarily set the simles0 = smiles1 (do not considering the mcs searching difference between mol0 matching to mol1 vs mol1 matching to mol0) smiles1 = smiles0 KBASE.deposit_extra( mcs_id, "trimmed-mcs", {id0:smiles0, id1:smiles1,} ) KBASE.deposit_extra( mcs_id, "partial_ring", len( partial_ring ) ) KBASE.deposit_extra( mcs_id, "layout_mcs", smiles0 ) num_heavy_atoms = len( mcs0.heavy_atoms() ) num_light_atoms = len( mcs0.atom ) - num_heavy_atoms KBASE.deposit_extra( mcs_id, "num_heavy_atoms", num_heavy_atoms ) KBASE.deposit_extra( mcs_id, "num_light_atoms", num_light_atoms ) return similarity.exp_delta( 2 * (orig_num_heavy_atoms - num_heavy_atoms), 0 )
def _similarity( self, id0, id1, **kwarg ) : # Uses the first common substructure. mcs_id = kwarg["mcs_id"] mcs0 = mcs.get_struc( mcs_id ).copy() mol0 = KBASE.ask( id0 ) mol1 = KBASE.ask( id1 ) orig_num_heavy_atoms = len( mcs0.heavy_atoms() ) partial_ring = self._delete_broken_ring( mol0, mol1, mcs0 ) # Deletes chiral atoms. chiral_atoms = mcs0.chiral_atoms() ring_atoms = mcs0. ring_atoms() chiral_atoms.sort( reverse = True ) for atom_index in chiral_atoms : if (atom_index in ring_atoms) : bonded_atoms = set( mcs0.bonded_atoms( atom_index ) ) - ring_atoms if (bonded_atoms) : i = 0 n = 0 for atom in bonded_atoms : cp = mcs0.copy() cp.delete_atom( atom ) m = len( cp.atom ) if (m > n) : i = atom n = m mcs0.delete_atom( i ) else : # If the chiral atom is not a ring atom, we simply delete it. mcs0.delete_atom( atom_index ) # If the deletion results in multiple unconnected fragments, we keep only the biggest one. atoms_to_delete = [] for e in mcs0.molecules()[1:] : atoms_to_delete.extend( e ) mcs0.delete_atom( atoms_to_delete ) # Gets the SMARTS for the trimmed structure. atom_list0 = [] atom_list1 = [] for e in mcs0.atom_prop[1:] : atom_list0.append( e[ "orig_index"] ) atom_list1.append( e["mapped_index"] ) smarts0 = mol0.smarts( atom_list0 ) try : smarts1 = mol1.smarts( atom_list1 ) except ValueError : smarts1 = "" KBASE.deposit_extra( mcs_id, "trimmed-mcs", {id0:smarts0, id1:smarts1,} ) KBASE.deposit_extra( mcs_id, "partial_ring", len( partial_ring ) ) KBASE.deposit_extra( mcs_id, "layout_mcs", mcs0.smiles() ) num_heavy_atoms = len( mcs0.heavy_atoms() ) num_light_atoms = len( mcs0.atom ) - num_heavy_atoms KBASE.deposit_extra( mcs_id, "num_heavy_atoms", num_heavy_atoms ) KBASE.deposit_extra( mcs_id, "num_light_atoms", num_light_atoms ) return similarity.exp_delta( 2 * (orig_num_heavy_atoms - num_heavy_atoms), 0 )