class PathFinder: def __init__(self, carbon_only=False, pruning_method=None, ignore_chirality=True, use_antimotifs=True, outstream=sys.stderr, reaction_database_fname="../rec/reaction_templates.dat"): self.carbon_only = carbon_only self.ignore_chirality = ignore_chirality self.use_antimotifs = use_antimotifs self.pruning_method = pruning_method self.outstream = outstream self.reaction_database_fname = reaction_database_fname self.reactor = Reactor(carbon_only=self.carbon_only, ignore_chirality=self.ignore_chirality, use_antimotifs=self.use_antimotifs, reaction_database_fname=self.reaction_database_fname) def balance_reaction(self, substrate, product): """ Balances the reaction (by counting atoms) """ atom_gap = compound2graph(substrate).node_bag() - compounds2graph(product).node_bag() extra_bag = bag.Bag() extra_bag['CO2'] = atom_gap['C'] extra_bag['H2O'] = atom_gap['O'] + atom_gap['N'] - 2 * atom_gap['C'] extra_bag['PO3'] = atom_gap['PO3'] for (atom, count) in atom_gap.itercounts(): if (not atom in ['C', 'O', 'N', 'PO3'] and count != 0): raise Exception("cannot balance the number of '%s' atoms, between %s and %s" % (atom, substrate, product)) for (metabolite, count) in extra_bag.itercounts(): if (count > 0): product += (" + " + metabolite) * count if (count < 0): substrate += (" + " + metabolite) * (-count) return (substrate, product) def verify_hash(self, hash): """Returns True iff the hash passes a basic test (based on general requirements for pathways) This method is used for pruning the search tree. """ if (self.pruning_method == 'PP'): # this method has the same assumptions as Melendez-Hevia's paper about the pentose phosephate cycle for (nodes, bonds) in parse_hash(hash): # check each of the molecules in the hash node_bag = bag.Bag() for atom in nodes: (base_atom, valence, hydrogens, charge, chirality) = parse_atom(atom) node_bag[base_atom] += 1 if (node_bag['C'] in [1,2]): # this is a 1 or 2 carbon sugar - invalid! return False elif (node_bag['C'] > 0 and node_bag['PO3'] == 0): # this is a unphosphorylated sugar - invalid! return False elif (node_bag['C'] == 0): # this is not a sugar (might be PO3 or H2O) - valid! pass else: # this is a phosphorylated sugar with at least 3 carbons - valid! pass return True def prune_product_list(self, prod_list): unique_substrate_product_pairs = set([]) verified_list = [] count_failed_verification = 0 count_hash_duplications = 0 for (h_substrate, G_product, rid, mapping) in prod_list: h_product = G_product.hash(ignore_chirality=self.ignore_chirality) if (not self.verify_hash(h_product)): count_failed_verification += 1 elif ((h_substrate, h_product) in unique_substrate_product_pairs): count_hash_duplications += 1 else: verified_list.append((h_substrate, G_product, rid, mapping)) unique_substrate_product_pairs.add((h_substrate, h_product)) return verified_list def generate_new_compounds(self, compounds, write_progress_bar=True, backward=False): """ Produce a list of all the new products that can be derived from the given compounds direction can be: "both", "forward", "backward" """ new_product_list = [] total_count = len(compounds) if (write_progress_bar): n_dots = 80 n_dots_written = 0 self.outstream.write("\t\t- [") counter = 0 for (h, G) in compounds.iteritems(): if (write_progress_bar): dots_to_write = (counter * n_dots / total_count) - n_dots_written self.outstream.write("." * dots_to_write) n_dots_written += dots_to_write counter += 1 for (G_product, rid, mapping) in self.reactor.apply_all_reactions(G, backward): new_product_list.append((h, G_product, rid, mapping)) if (write_progress_bar): self.outstream.write("." * (n_dots - n_dots_written) + "]\n") return self.prune_product_list(new_product_list) def expand_tree(self, compound_map, set_of_processed_compounds, reaction_tree=None, backward=False): """ Expands the tree of compounds by one level * reaction_tree is a multi-map, where the keys are compound hashes, and the values are lists if 3-tuples, containing (predecessor hash, reaction_id, reaction_mapping) describing the reaction from the predecessor to the current compound (in the key). * compound_map is a map from hashes to ChemGraphs, because we need the graph in order to apply all reactions to it. We discard it in the next round of expand_tree to save memory. * set_of_processed_compounds is a set of all the hashes that have been processed, i.e. entered the compound_map in an earlier stage. We need to know them in order not to 'expand' the same compound twice. Note the it is common for both substrate and product compound maps. """ new_compound_list = self.generate_new_compounds(compound_map, backward) compound_map.clear() for (h_predecessor, G, reaction_id, mapping) in new_compound_list: h_compound = G.hash(ignore_chirality=self.ignore_chirality) if (h_compound not in set_of_processed_compounds): compound_map[h_compound] = G set_of_processed_compounds.add(h_compound) if (reaction_tree != None): # add the reaction to the hash if (not reaction_tree.has_key(h_compound)): reaction_tree[h_compound] = [] reaction_tree[h_compound] += [(h_predecessor, reaction_id, mapping)] def reaction_DFS(self, reaction_tree, h, depth): """ Returns all the pathways that lead from a seed to the given compound (h) reaction_tree - is a dictionary mapping compounds to the reactions that create them h - is a hash of the compound to be created depth - will be the maximum number of reactions in the returned paths. """ if (depth < 0): # this means we exceeded the allowed depth, without reaching a seed, i.e. dead-end return [] pathways = [] for (h_predecessor, rid, map) in reaction_tree[h]: if (h_predecessor == None): pathways += [[h]] # this means 'h' can be creating from nothing, i.e. it is a seed else: for pathway in self.reaction_DFS(reaction_tree, h_predecessor, depth-1): pathways += [pathway + [(rid, map)]] return pathways def find_shortest_pathway(self, substrates, products, max_levels=4, stop_after_first_solution=False): """input is a list of substrates and a list of products output is the shortest path between any of the substrates to any of the products """ # reaction_tree is a dictionary mapping each compound (represented by its hash) to a list, # the first value is the hash of the same compound with the ignore-attributes flag on # the second value in the list is the depth of the compound in the tree # the following members in the list are (predecessor, reaction) pairs, i.e. # predecessor - the substrate in the reaction to create this product # reaction - the reaction for creating the product from the substrate if (max_levels < 1): raise Exception("max_levels must be at least 1") # a map containing only the new compounds (from both trees), mapping hashes to ChemGraphs # in order to save memory, only hashes of old compounds are saved, and the ChemGraphs discarded original_compound_map = {} set_of_processed_compounds = set() substrate_reaction_tree = {} product_reaction_tree = {} current_substrate_map = {} current_product_map = {} for G in substrates: G_temp = G.clone() if (self.ignore_chirality): G_temp.reset_chiralities() h = G_temp.hash(ignore_chirality=self.ignore_chirality) substrate_reaction_tree[h] = [(None, -1, [])] original_compound_map[h] = G_temp current_substrate_map[h] = G_temp print >> self.outstream, "Substrate: " + h for G in products: G_temp = G.clone() if (self.ignore_chirality): G_temp.reset_chiralities() h = G_temp.hash(ignore_chirality=self.ignore_chirality) product_reaction_tree[h] = [(None, -1, [])] original_compound_map[h] = G_temp current_product_map[h] = G_temp print >> self.outstream, "Product: " + h time_per_compound = 0 substrate_map_depth = 0 product_map_depth = 0 while (substrate_map_depth + product_map_depth < max_levels): print >> self.outstream, "\t*** Level #%d" % (substrate_map_depth + product_map_depth + 1), begin_time = time.time() if (substrate_map_depth <= product_map_depth): num_current_compounds = len(current_substrate_map) print >> self.outstream, "- estimated time: %.2f sec" % (time_per_compound * len(current_substrate_map)) self.expand_tree(current_substrate_map, set_of_processed_compounds, reaction_tree=substrate_reaction_tree, backward=False) substrate_map_depth += 1 else: num_current_compounds = len(current_product_map) print >> self.outstream, "- estimated time: %.2f sec" % (time_per_compound * len(current_product_map)) self.expand_tree(current_product_map, set_of_processed_compounds, reaction_tree=product_reaction_tree, backward=True) product_map_depth += 1 if (num_current_compounds == 0): print >> self.outstream, "Reached a dead end, no new compounds can be created..." return (original_compound_map, [], -1) elapsed_time = float(time.time() - begin_time) time_per_compound = elapsed_time / num_current_compounds print >> self.outstream, "\t\t- %d substrates + %d products" % (len(substrate_reaction_tree), len(product_reaction_tree)) bridging_compounds = set(substrate_reaction_tree.keys()) & set(product_reaction_tree.keys()) if (stop_after_first_solution and len(bridging_compounds) > 0): break if (bridging_compounds != set()): print >> self.outstream, "\t*** found %d bridging compounds" % len(bridging_compounds) possible_pathways = [] # for each bridging compound, find the pair of pathways list leading to it # one from the substrate and one from the product for h_bridge in bridging_compounds: # gather all the possible pathways that lead from the substrates # to the bridging compound, using the substrate reaction-tree substrate_pathways = self.reaction_DFS(substrate_reaction_tree, h_bridge, substrate_map_depth) # the same but for the products reaction-tree product_pathways = self.reaction_DFS(product_reaction_tree, h_bridge, product_map_depth) possible_pathways.append((substrate_pathways, product_pathways, h_bridge)) return (original_compound_map, possible_pathways, substrate_map_depth + product_map_depth) else: print >> self.outstream, "No path was found, even after %d levels" % max_levels return (original_compound_map, [], -1) def find_distance(self, substrates, products, max_levels=4): """input is a list of substrates and a list of products output is the shortest path between any of the substrates to any of the products """ # reaction_tree is a dictionary mapping each compound (represented by its hash) to a list, # the first value is the hash of the same compound with the ignore-attributes flag on # the second value in the list is the depth of the compound in the tree # the following members in the list are (predecessor, reaction) pairs, i.e. # predecessor - the substrate in the reaction to create this product # reaction - the reaction for creating the product from the substrate if (max_levels < 1): raise Exception("max_levels must be at least 1") set_of_processed_substrates = set() set_of_processed_products = set() current_substrate_map = {} current_product_map = {} for G in substrates: G_temp = G.clone() if (self.ignore_chirality): G_temp.reset_chiralities() h = G_temp.hash(ignore_chirality=self.ignore_chirality) set_of_processed_substrates.add(h) current_substrate_map[h] = G_temp print >> self.outstream, "Substrate: " + h for G in products: G_temp = G.clone() if (self.ignore_chirality): G_temp.reset_chiralities() h = G_temp.hash(ignore_chirality=self.ignore_chirality) set_of_processed_products.add(h) current_product_map[h] = G_temp print >> self.outstream, "Product: " + h time_per_compound = 0 for level in range(1, max_levels+1): print >> self.outstream, "\t*** Level #%d" % level, begin_time = time.time() if (level % 2 == 0): print >> self.outstream, "- estimated time: %.2f sec" % (time_per_compound * len(current_substrate_map)) self.expand_tree(current_substrate_map, set_of_processed_substrates, backward=False) num_current_compounds = len(current_substrate_map) else: print >> self.outstream, "- estimated time: %.2f sec" % (time_per_compound * len(current_product_map)) self.expand_tree(current_product_map, set_of_processed_products, backward=True) num_current_compounds = len(current_product_map) if (num_current_compounds == 0): print >> self.outstream, "Reached a dead end, no new compounds can be created..." return -1 elapsed_time = float(time.time() - begin_time) time_per_compound = elapsed_time / num_current_compounds print >> self.outstream, "\t\t- %d substrates + %d products" % (len(set_of_processed_substrates), len(set_of_processed_products)) bridging_compounds = set_of_processed_substrates & set_of_processed_products if (len(bridging_compounds) > 0): print >> self.outstream, "\t*** found %d bridging compounds" % len(bridging_compounds) return level print >> self.outstream, "No path was found, even after %d levels" % max_levels return -1 def pathway2text(self, G_subs, expanded_reaction_list): num_reactions = len(expanded_reaction_list) num_compounds = len(expanded_reaction_list) + 1 i = 0 G = G_subs.clone() rid = None s = "" while True: if (i == len(expanded_reaction_list)): break (rid, mapping, reaction_list) = expanded_reaction_list[i] s += str(G) + " (" + graph2compound(G, self.ignore_chirality) + ") - " + str(rid) + " : " + str(mapping) + "\n" for reaction in reaction_list: s += "\t" + str(G) + " (" + graph2compound(G, self.ignore_chirality) + ") - " + str(reaction.tostring(mapping)) + "\n" reaction.apply(G, mapping) G.update_attributes() if (self.ignore_chirality): G.reset_chiralities() i += 1 s += str(G) + " (" + graph2compound(G, self.ignore_chirality) + ")\n" return (s, G) def pathway2svg(self, G_subs, expanded_reaction_list, size_x=300, size_y=150, font_size=10): num_reactions = len(expanded_reaction_list) num_compounds = len(expanded_reaction_list) + 1 gap_size_x = 100 gap_size_y = 15 scene = Scene() # first add all the compounds to the graph i = 0 curr_x = 0 G = G_subs.clone() rid = None while True: if (rid != 'hidden'): scene.add(G.svg(Scene(size_x, size_y, font_size)), offset=(curr_x, gap_size_y)) curr_x += size_x if (i == len(expanded_reaction_list)): break (rid, mapping, reaction_list) = expanded_reaction_list[i] for reaction in reaction_list: reaction.apply(G, mapping) G.update_attributes() if (self.ignore_chirality): G.reset_chiralities() if (rid != 'hidden'): # draw the arrows for the direction of the reactions scene.add(ChemicalArrow((curr_x + 30, size_y / 2), (curr_x + 70, size_y / 2), stroke_width=2)) scene.add(Text((curr_x, size_y / 2 - 20), self.reactor.get_reaction_name(rid), font_size, fill_color=red)) scene.add(Text((curr_x, size_y / 2 + 25), str(mapping), font_size, fill_color=red)) curr_x += gap_size_x # calculate the cost of this reaction i += 1 scene.justify() return (scene, G) def expand_rid_list(self, rid_list): """ Attach the list of subreaction corresponding to each Reaction ID in the list """ return [(rid, map, self.reactor.get_reaction_list(rid)) for (rid, map) in rid_list] def apply_rid_list(self, G, rid_list): for (rid, map) in rid_list: subreaction_list = self.reactor.get_reaction_list(rid) for subreaction in subreaction_list: subreaction.apply(G, map) G.update_attributes() return G def reverse_rid_list(self, rid_list): return [(self.reactor.reverse_reaction(rid), map) for (rid, map) in reversed(rid_list)] def get_all_possible_scenes(self, original_compound_map, possible_pathways): def compare_graph_to_hash(G1, h2): h1 = G1.hash(ignore_chirality=self.ignore_chirality) return compare_hashes(h1, h2, self.ignore_chirality) """ returns a list of pairs of (cost, scene) which is a graphical representation of each possible pathway """ scene_list = [] # prepare the SVG scenes for all the possible pathways, and calculate their cost for (substrate_pathways, product_pathways, h_bridge) in possible_pathways: # print >> self.outstream, "Bridge: " + h_bridge for subs_path in substrate_pathways: G_subs = original_compound_map[subs_path[0]] subs_reaction_list = self.expand_rid_list(subs_path[1:]) try: (subs_log, G_last_subs) = self.pathway2text(G_subs.clone(), subs_reaction_list) except ReactionException, msg: print >> self.outstream, msg continue # print >> self.outstream, "*** SUBSTRATE LOG: \n", subs_log # if (G_last_subs.hash(ignore_chirality=self.ignore_chirality) != h_bridge): if (compare_graph_to_hash(G_last_subs, h_bridge) != 0): print "ERROR:" print "subs: ", G_subs.hash(ignore_chirality=self.ignore_chirality) print "last_subs: ", G_last_subs.hash(ignore_chirality=self.ignore_chirality) print "bridge: ", h_bridge sys.exit(-1) print >> self.outstream, "G_last_subs != G_bridge, check the DFS function..." raise Exception("G_last_subs != G_bridge, check the DFS function...") for prod_path in product_pathways: G_prod = original_compound_map[prod_path[0]] prod_reaction_list = self.expand_rid_list(prod_path[1:]) reverse_prod_reaction_list = self.expand_rid_list(self.reverse_rid_list(prod_path[1:])) try: (prod_log, G_last_prod) = self.pathway2text(G_prod.clone(), prod_reaction_list) except ReactionException, msg: print >> self.outstream, msg continue # print >> self.outstream, "*** PRODUCT LOG: \n", prod_log # if (G_last_prod.hash(ignore_chirality=self.ignore_chirality) != h_bridge): if (compare_graph_to_hash(G_last_prod, h_bridge) != 0): print "ERROR:" print "subs: ", G_subs.hash(ignore_chirality=self.ignore_chirality) print "prod: ", G_prod.hash(ignore_chirality=self.ignore_chirality) print "last_prod: ", G_last_prod.hash(ignore_chirality=self.ignore_chirality) print "bridge: ", h_bridge sys.exit(-1) print >> self.outstream, "G_last_prod != G_bridge, check the DFS function..." raise Exception("G_last_prod != G_bridge, check the DFS function...") perm_reaction = self.reactor.get_permutation_reaction(G_last_subs, G_last_prod) full_reaction_list = subs_reaction_list + [perm_reaction] + reverse_prod_reaction_list try: (pathway_scene, G_last) = self.pathway2svg(G_subs, full_reaction_list) except ReactionException, msg: print >> self.outstream, msg continue cost = len(subs_reaction_list) + len(reverse_prod_reaction_list) scene_list.append((cost, pathway_scene))
class PathFinder: def __init__(self, carbon_only=False, pruning_method=None, ignore_chirality=True, use_antimotifs=True, outstream=sys.stderr, reaction_database_fname="../rec/reaction_templates.dat"): self.carbon_only = carbon_only self.ignore_chirality = ignore_chirality self.use_antimotifs = use_antimotifs self.pruning_method = pruning_method self.outstream = outstream self.reaction_database_fname = reaction_database_fname self.reactor = Reactor( carbon_only=self.carbon_only, ignore_chirality=self.ignore_chirality, use_antimotifs=self.use_antimotifs, reaction_database_fname=self.reaction_database_fname) def balance_reaction(self, substrate, product): """ Balances the reaction (by counting atoms) """ atom_gap = compound2graph(substrate).node_bag() - compounds2graph( product).node_bag() extra_bag = bag.Bag() extra_bag['CO2'] = atom_gap['C'] extra_bag['H2O'] = atom_gap['O'] + atom_gap['N'] - 2 * atom_gap['C'] extra_bag['PO3'] = atom_gap['PO3'] for (atom, count) in atom_gap.itercounts(): if (not atom in ['C', 'O', 'N', 'PO3'] and count != 0): raise Exception( "cannot balance the number of '%s' atoms, between %s and %s" % (atom, substrate, product)) for (metabolite, count) in extra_bag.itercounts(): if (count > 0): product += (" + " + metabolite) * count if (count < 0): substrate += (" + " + metabolite) * (-count) return (substrate, product) def verify_hash(self, hash): """Returns True iff the hash passes a basic test (based on general requirements for pathways) This method is used for pruning the search tree. """ if ( self.pruning_method == 'PP' ): # this method has the same assumptions as Melendez-Hevia's paper about the pentose phosephate cycle for (nodes, bonds) in parse_hash( hash): # check each of the molecules in the hash node_bag = bag.Bag() for atom in nodes: (base_atom, valence, hydrogens, charge, chirality) = parse_atom(atom) node_bag[base_atom] += 1 if (node_bag['C'] in [1, 2]): # this is a 1 or 2 carbon sugar - invalid! return False elif (node_bag['C'] > 0 and node_bag['PO3'] == 0): # this is a unphosphorylated sugar - invalid! return False elif (node_bag['C'] == 0 ): # this is not a sugar (might be PO3 or H2O) - valid! pass else: # this is a phosphorylated sugar with at least 3 carbons - valid! pass return True def prune_product_list(self, prod_list): unique_substrate_product_pairs = set([]) verified_list = [] count_failed_verification = 0 count_hash_duplications = 0 for (h_substrate, G_product, rid, mapping) in prod_list: h_product = G_product.hash(ignore_chirality=self.ignore_chirality) if (not self.verify_hash(h_product)): count_failed_verification += 1 elif ((h_substrate, h_product) in unique_substrate_product_pairs): count_hash_duplications += 1 else: verified_list.append((h_substrate, G_product, rid, mapping)) unique_substrate_product_pairs.add((h_substrate, h_product)) return verified_list def generate_new_compounds(self, compounds, write_progress_bar=True, backward=False): """ Produce a list of all the new products that can be derived from the given compounds direction can be: "both", "forward", "backward" """ new_product_list = [] total_count = len(compounds) if (write_progress_bar): n_dots = 80 n_dots_written = 0 self.outstream.write("\t\t- [") counter = 0 for (h, G) in compounds.iteritems(): if (write_progress_bar): dots_to_write = (counter * n_dots / total_count) - n_dots_written self.outstream.write("." * dots_to_write) n_dots_written += dots_to_write counter += 1 for (G_product, rid, mapping) in self.reactor.apply_all_reactions(G, backward): new_product_list.append((h, G_product, rid, mapping)) if (write_progress_bar): self.outstream.write("." * (n_dots - n_dots_written) + "]\n") return self.prune_product_list(new_product_list) def expand_tree(self, compound_map, set_of_processed_compounds, reaction_tree=None, backward=False): """ Expands the tree of compounds by one level * reaction_tree is a multi-map, where the keys are compound hashes, and the values are lists if 3-tuples, containing (predecessor hash, reaction_id, reaction_mapping) describing the reaction from the predecessor to the current compound (in the key). * compound_map is a map from hashes to ChemGraphs, because we need the graph in order to apply all reactions to it. We discard it in the next round of expand_tree to save memory. * set_of_processed_compounds is a set of all the hashes that have been processed, i.e. entered the compound_map in an earlier stage. We need to know them in order not to 'expand' the same compound twice. Note the it is common for both substrate and product compound maps. """ new_compound_list = self.generate_new_compounds(compound_map, backward) compound_map.clear() for (h_predecessor, G, reaction_id, mapping) in new_compound_list: h_compound = G.hash(ignore_chirality=self.ignore_chirality) if (h_compound not in set_of_processed_compounds): compound_map[h_compound] = G set_of_processed_compounds.add(h_compound) if (reaction_tree != None): # add the reaction to the hash if (not reaction_tree.has_key(h_compound)): reaction_tree[h_compound] = [] reaction_tree[h_compound] += [(h_predecessor, reaction_id, mapping)] def reaction_DFS(self, reaction_tree, h, depth): """ Returns all the pathways that lead from a seed to the given compound (h) reaction_tree - is a dictionary mapping compounds to the reactions that create them h - is a hash of the compound to be created depth - will be the maximum number of reactions in the returned paths. """ if ( depth < 0 ): # this means we exceeded the allowed depth, without reaching a seed, i.e. dead-end return [] pathways = [] for (h_predecessor, rid, map) in reaction_tree[h]: if (h_predecessor == None): pathways += [ [h] ] # this means 'h' can be creating from nothing, i.e. it is a seed else: for pathway in self.reaction_DFS(reaction_tree, h_predecessor, depth - 1): pathways += [pathway + [(rid, map)]] return pathways def find_shortest_pathway(self, substrates, products, max_levels=4, stop_after_first_solution=False): """input is a list of substrates and a list of products output is the shortest path between any of the substrates to any of the products """ # reaction_tree is a dictionary mapping each compound (represented by its hash) to a list, # the first value is the hash of the same compound with the ignore-attributes flag on # the second value in the list is the depth of the compound in the tree # the following members in the list are (predecessor, reaction) pairs, i.e. # predecessor - the substrate in the reaction to create this product # reaction - the reaction for creating the product from the substrate if (max_levels < 1): raise Exception("max_levels must be at least 1") # a map containing only the new compounds (from both trees), mapping hashes to ChemGraphs # in order to save memory, only hashes of old compounds are saved, and the ChemGraphs discarded original_compound_map = {} set_of_processed_compounds = set() substrate_reaction_tree = {} product_reaction_tree = {} current_substrate_map = {} current_product_map = {} for G in substrates: G_temp = G.clone() if (self.ignore_chirality): G_temp.reset_chiralities() h = G_temp.hash(ignore_chirality=self.ignore_chirality) substrate_reaction_tree[h] = [(None, -1, [])] original_compound_map[h] = G_temp current_substrate_map[h] = G_temp print >> self.outstream, "Substrate: " + h for G in products: G_temp = G.clone() if (self.ignore_chirality): G_temp.reset_chiralities() h = G_temp.hash(ignore_chirality=self.ignore_chirality) product_reaction_tree[h] = [(None, -1, [])] original_compound_map[h] = G_temp current_product_map[h] = G_temp print >> self.outstream, "Product: " + h time_per_compound = 0 substrate_map_depth = 0 product_map_depth = 0 while (substrate_map_depth + product_map_depth < max_levels): print >> self.outstream, "\t*** Level #%d" % ( substrate_map_depth + product_map_depth + 1), begin_time = time.time() if (substrate_map_depth <= product_map_depth): num_current_compounds = len(current_substrate_map) print >> self.outstream, "- estimated time: %.2f sec" % ( time_per_compound * len(current_substrate_map)) self.expand_tree(current_substrate_map, set_of_processed_compounds, reaction_tree=substrate_reaction_tree, backward=False) substrate_map_depth += 1 else: num_current_compounds = len(current_product_map) print >> self.outstream, "- estimated time: %.2f sec" % ( time_per_compound * len(current_product_map)) self.expand_tree(current_product_map, set_of_processed_compounds, reaction_tree=product_reaction_tree, backward=True) product_map_depth += 1 if (num_current_compounds == 0): print >> self.outstream, "Reached a dead end, no new compounds can be created..." return (original_compound_map, [], -1) elapsed_time = float(time.time() - begin_time) time_per_compound = elapsed_time / num_current_compounds print >> self.outstream, "\t\t- %d substrates + %d products" % ( len(substrate_reaction_tree), len(product_reaction_tree)) bridging_compounds = set(substrate_reaction_tree.keys()) & set( product_reaction_tree.keys()) if (stop_after_first_solution and len(bridging_compounds) > 0): break if (bridging_compounds != set()): print >> self.outstream, "\t*** found %d bridging compounds" % len( bridging_compounds) possible_pathways = [] # for each bridging compound, find the pair of pathways list leading to it # one from the substrate and one from the product for h_bridge in bridging_compounds: # gather all the possible pathways that lead from the substrates # to the bridging compound, using the substrate reaction-tree substrate_pathways = self.reaction_DFS(substrate_reaction_tree, h_bridge, substrate_map_depth) # the same but for the products reaction-tree product_pathways = self.reaction_DFS(product_reaction_tree, h_bridge, product_map_depth) possible_pathways.append( (substrate_pathways, product_pathways, h_bridge)) return (original_compound_map, possible_pathways, substrate_map_depth + product_map_depth) else: print >> self.outstream, "No path was found, even after %d levels" % max_levels return (original_compound_map, [], -1) def find_distance(self, substrates, products, max_levels=4): """input is a list of substrates and a list of products output is the shortest path between any of the substrates to any of the products """ # reaction_tree is a dictionary mapping each compound (represented by its hash) to a list, # the first value is the hash of the same compound with the ignore-attributes flag on # the second value in the list is the depth of the compound in the tree # the following members in the list are (predecessor, reaction) pairs, i.e. # predecessor - the substrate in the reaction to create this product # reaction - the reaction for creating the product from the substrate if (max_levels < 1): raise Exception("max_levels must be at least 1") set_of_processed_substrates = set() set_of_processed_products = set() current_substrate_map = {} current_product_map = {} for G in substrates: G_temp = G.clone() if (self.ignore_chirality): G_temp.reset_chiralities() h = G_temp.hash(ignore_chirality=self.ignore_chirality) set_of_processed_substrates.add(h) current_substrate_map[h] = G_temp print >> self.outstream, "Substrate: " + h for G in products: G_temp = G.clone() if (self.ignore_chirality): G_temp.reset_chiralities() h = G_temp.hash(ignore_chirality=self.ignore_chirality) set_of_processed_products.add(h) current_product_map[h] = G_temp print >> self.outstream, "Product: " + h time_per_compound = 0 for level in range(1, max_levels + 1): print >> self.outstream, "\t*** Level #%d" % level, begin_time = time.time() if (level % 2 == 0): print >> self.outstream, "- estimated time: %.2f sec" % ( time_per_compound * len(current_substrate_map)) self.expand_tree(current_substrate_map, set_of_processed_substrates, backward=False) num_current_compounds = len(current_substrate_map) else: print >> self.outstream, "- estimated time: %.2f sec" % ( time_per_compound * len(current_product_map)) self.expand_tree(current_product_map, set_of_processed_products, backward=True) num_current_compounds = len(current_product_map) if (num_current_compounds == 0): print >> self.outstream, "Reached a dead end, no new compounds can be created..." return -1 elapsed_time = float(time.time() - begin_time) time_per_compound = elapsed_time / num_current_compounds print >> self.outstream, "\t\t- %d substrates + %d products" % ( len(set_of_processed_substrates), len(set_of_processed_products)) bridging_compounds = set_of_processed_substrates & set_of_processed_products if (len(bridging_compounds) > 0): print >> self.outstream, "\t*** found %d bridging compounds" % len( bridging_compounds) return level print >> self.outstream, "No path was found, even after %d levels" % max_levels return -1 def pathway2text(self, G_subs, expanded_reaction_list): num_reactions = len(expanded_reaction_list) num_compounds = len(expanded_reaction_list) + 1 i = 0 G = G_subs.clone() rid = None s = "" while True: if (i == len(expanded_reaction_list)): break (rid, mapping, reaction_list) = expanded_reaction_list[i] s += str(G) + " (" + graph2compound( G, self.ignore_chirality) + ") - " + str(rid) + " : " + str( mapping) + "\n" for reaction in reaction_list: s += "\t" + str(G) + " (" + graph2compound( G, self.ignore_chirality) + ") - " + str( reaction.tostring(mapping)) + "\n" reaction.apply(G, mapping) G.update_attributes() if (self.ignore_chirality): G.reset_chiralities() i += 1 s += str(G) + " (" + graph2compound(G, self.ignore_chirality) + ")\n" return (s, G) def pathway2svg(self, G_subs, expanded_reaction_list, size_x=300, size_y=150, font_size=10): num_reactions = len(expanded_reaction_list) num_compounds = len(expanded_reaction_list) + 1 gap_size_x = 100 gap_size_y = 15 scene = Scene() # first add all the compounds to the graph i = 0 curr_x = 0 G = G_subs.clone() rid = None while True: if (rid != 'hidden'): scene.add(G.svg(Scene(size_x, size_y, font_size)), offset=(curr_x, gap_size_y)) curr_x += size_x if (i == len(expanded_reaction_list)): break (rid, mapping, reaction_list) = expanded_reaction_list[i] for reaction in reaction_list: reaction.apply(G, mapping) G.update_attributes() if (self.ignore_chirality): G.reset_chiralities() if (rid != 'hidden'): # draw the arrows for the direction of the reactions scene.add( ChemicalArrow((curr_x + 30, size_y / 2), (curr_x + 70, size_y / 2), stroke_width=2)) scene.add( Text((curr_x, size_y / 2 - 20), self.reactor.get_reaction_name(rid), font_size, fill_color=red)) scene.add( Text((curr_x, size_y / 2 + 25), str(mapping), font_size, fill_color=red)) curr_x += gap_size_x # calculate the cost of this reaction i += 1 scene.justify() return (scene, G) def expand_rid_list(self, rid_list): """ Attach the list of subreaction corresponding to each Reaction ID in the list """ return [(rid, map, self.reactor.get_reaction_list(rid)) for (rid, map) in rid_list] def apply_rid_list(self, G, rid_list): for (rid, map) in rid_list: subreaction_list = self.reactor.get_reaction_list(rid) for subreaction in subreaction_list: subreaction.apply(G, map) G.update_attributes() return G def reverse_rid_list(self, rid_list): return [(self.reactor.reverse_reaction(rid), map) for (rid, map) in reversed(rid_list)] def get_all_possible_scenes(self, original_compound_map, possible_pathways): def compare_graph_to_hash(G1, h2): h1 = G1.hash(ignore_chirality=self.ignore_chirality) return compare_hashes(h1, h2, self.ignore_chirality) """ returns a list of pairs of (cost, scene) which is a graphical representation of each possible pathway """ scene_list = [] # prepare the SVG scenes for all the possible pathways, and calculate their cost for (substrate_pathways, product_pathways, h_bridge) in possible_pathways: # print >> self.outstream, "Bridge: " + h_bridge for subs_path in substrate_pathways: G_subs = original_compound_map[subs_path[0]] subs_reaction_list = self.expand_rid_list(subs_path[1:]) try: (subs_log, G_last_subs) = self.pathway2text(G_subs.clone(), subs_reaction_list) except ReactionException, msg: print >> self.outstream, msg continue # print >> self.outstream, "*** SUBSTRATE LOG: \n", subs_log # if (G_last_subs.hash(ignore_chirality=self.ignore_chirality) != h_bridge): if (compare_graph_to_hash(G_last_subs, h_bridge) != 0): print "ERROR:" print "subs: ", G_subs.hash( ignore_chirality=self.ignore_chirality) print "last_subs: ", G_last_subs.hash( ignore_chirality=self.ignore_chirality) print "bridge: ", h_bridge sys.exit(-1) print >> self.outstream, "G_last_subs != G_bridge, check the DFS function..." raise Exception( "G_last_subs != G_bridge, check the DFS function...") for prod_path in product_pathways: G_prod = original_compound_map[prod_path[0]] prod_reaction_list = self.expand_rid_list(prod_path[1:]) reverse_prod_reaction_list = self.expand_rid_list( self.reverse_rid_list(prod_path[1:])) try: (prod_log, G_last_prod) = self.pathway2text( G_prod.clone(), prod_reaction_list) except ReactionException, msg: print >> self.outstream, msg continue # print >> self.outstream, "*** PRODUCT LOG: \n", prod_log # if (G_last_prod.hash(ignore_chirality=self.ignore_chirality) != h_bridge): if (compare_graph_to_hash(G_last_prod, h_bridge) != 0): print "ERROR:" print "subs: ", G_subs.hash( ignore_chirality=self.ignore_chirality) print "prod: ", G_prod.hash( ignore_chirality=self.ignore_chirality) print "last_prod: ", G_last_prod.hash( ignore_chirality=self.ignore_chirality) print "bridge: ", h_bridge sys.exit(-1) print >> self.outstream, "G_last_prod != G_bridge, check the DFS function..." raise Exception( "G_last_prod != G_bridge, check the DFS function..." ) perm_reaction = self.reactor.get_permutation_reaction( G_last_subs, G_last_prod) full_reaction_list = subs_reaction_list + [ perm_reaction ] + reverse_prod_reaction_list try: (pathway_scene, G_last) = self.pathway2svg(G_subs, full_reaction_list) except ReactionException, msg: print >> self.outstream, msg continue cost = len(subs_reaction_list) + len( reverse_prod_reaction_list) scene_list.append((cost, pathway_scene))