def main1(): util._mkdir("../results/ec-list") counter = 1 for line in util.parse_text_file("../rec/ec-list.smiles"): print counter, ")", line smiles2svg(line, "../results/ec-list/%d.svg" % counter) counter += 1
def analyze(self, carbon_only=True, use_antimotifs=True, draw_scenes=False): for line in util.parse_text_file("../rec/" + self.modules_file + ".txt"): if (line[0] == '@'): line = line[1:] self.pathfinder = PathFinder(carbon_only=carbon_only, pruning_method=None, ignore_chirality=False, use_antimotifs=use_antimotifs, outstream=self.logfile) else: self.pathfinder = PathFinder(carbon_only=carbon_only, pruning_method=None, ignore_chirality=True, use_antimotifs=use_antimotifs, outstream=self.logfile) pathway = line.split(';') self.find_modules(pathway, draw_scenes=draw_scenes) self.line_counter += 1
def analyze_pairs(self, carbon_only=True, use_antimotifs=True, max_distance=4): distances = [] # the minimal pathway length between the substrate and the product alternatives = [] # each value is the number of alternative pathways with the minimal distance line_counter = 0 for line in util.parse_text_file("../rec/" + self.modules_file + ".txt"): if (line[0] == '@'): line = line[1:] self.pathfinder = PathFinder(carbon_only=carbon_only, pruning_method=None, ignore_chirality=False, use_antimotifs=use_antimotifs, outstream=self.logfile) else: self.pathfinder = PathFinder(carbon_only=carbon_only, pruning_method=None, ignore_chirality=True, use_antimotifs=use_antimotifs, outstream=self.logfile) (subs, prod, max_steps) = line.split(";", 2) if (max_steps in ['-1','inf','']): sys.stdout.write(subs + " -(?)-> " + prod) sys.stdout.flush() (scenes, dist) = self.get_shortest_pathways(subs, prod, max_distance) else: dist = int(max_steps) sys.stdout.write(subs + " -(%d)-> " % dist + prod) sys.stdout.flush() scenes = self.get_all_pathways(subs, prod, dist) if (dist == -1): sys.stdout.write(", Distance(L) = inf, N = 0\n") sys.stdout.flush() distances.append("inf") alternatives.append(0) self.html_writer.write("<li><span style=color:red>%s <-> %s (distance > %d)</span></li>\n" % (subs, prod, self.max_module_size)) self.html_writer.flush() else: sys.stdout.write(", Distance(L) = %d, N = %d\n" % (dist, len(scenes))) sys.stdout.flush() distances.append(dist) alternatives.append(len(scenes)) self.html_writer.write("<li><span style=color:green>%s <-> %s (distance = %d)</span></li>\n" % (subs, prod, dist)) for i in range(len(scenes)): self.html_writer.write("<li>") self.html_writer.write_svg(scenes[i], "pathologic_" + self.experiment_name + "/pair%d_path%d" % (line_counter,i)) self.html_writer.write("</li>\n") self.html_writer.flush() line_counter += 1 result_file = open("../results/" + self.experiment_name + ".txt", "w") result_file.write(str(distances) + "\n" + str(alternatives) + "\n") result_file.close()
def init(): global params params = {} params[ 'MPL'] = 3 # Maximal Path Length. Set to None to restrict all options to have minimal length. params[ 'ID'] = False # Isomerase Down. True will prevent Isomerases from acting on phosphorylated compounds. params[ 'EA'] = False # Epimerase Above. True will prevent Epimerases from acting on unphosphorylated compounds. params['PECK'] = True # Pentose Epimerases Can only work on Ketoses. params['3ECK'] = False # 3-Epimerases Can only work on Ketoses. params['TL'] = 1 # Total Length. params['NE'] = None # Number of Epimerases (counts isoenzymes twice) params['NI'] = None # Number of Isomerases (counts isoenzymes twice) params['ND'] = None # Number of Dehydrogenases (counts isoenzymes twice) params['NK'] = None # Number of Kinases (counts isoenzymes twice) params[ 'NTE'] = 0 # No Two Epimerases. Number maximum number of same-product epimerases. params['MIE'] = None # Counts the number of isoenzyme pairs. params[ 'TPD'] = None # Total Phosphorylation Distance. Counts the total number of steps before phosphorylation. for p in params.keys(): print "%s = %s" % (p, str(params[p])) global enzyme_types, target, sources, pp_enzymes # this is the target if the metabolism (in our case the input to the PP cycle) target = 'D-Ribulose-5P' # sources are sugars that E. coli can grow on without any other carbon source sources = [ 'Ribitol', 'D-Arabitol', 'L-Xylulose', 'D-Ribose', 'D-Xylose', 'L-Arabinose' ] ketoses = ['D-Ribulose', 'L-Ribulose', 'D-Xylulose', 'L-Xylulose'] aldoses = [ 'D-Ribose', 'L-Ribose', 'D-Arabinose', 'L-Arabinose', 'D-Xylose', 'L-Xylose', 'D-Lyxose', 'L-Lyxose' ] polyols = ['Ribitol', 'Xylitol', 'D-Arabitol', 'L-Arabitol'] # these are edges that don't have a cost since they are part of the PP cycle pp_enzymes = [('D-Ribulose-5P', 'D-Ribose-5P'), ('D-Ribulose-5P', 'D-Xylulose-5P')] pp_enzymes += [(j, i) for (i, j) in pp_enzymes] global G_pentoses, G_wildtype, G_conjecture G_wildtype = SparseGraph() G_conjecture = SparseGraph() G_pentoses = SparseGraph() # Add all the known possible enzymes to G_pentoses enzyme_types = {} for line in parse_text_file("../rec/pentoses_edges.txt"): tokens = line.split() for i in [0, 1]: compound = tokens[i] neighbor = tokens[1 - i] (enzyme_type, i_carbon) = tokens[2].split('-') # add the kinases G_pentoses[compound][compound + '-5P'] = 1 enzyme_types[(compound, compound + '-5P')] = "KIN" G_pentoses[compound + '-5P'][compound] = 1 enzyme_types[(compound + '-5P', compound)] = "KIN" if (params['EA'] and (enzyme_type == "EPI")): pass # in EA mode don't use Epimerases on phosphorylated forms elif (params['PECK'] and (enzyme_type == "EPI") and (not compound in ketoses)): pass # in PECK mode don't use Epimerases on non-ketoses elif (params['3ECK'] and (enzyme_type == "EPI") and (i_carbon == 3) and (not compound in ketoses)): pass # in 3ECK mode don't use 3-Epimerases on non-ketoses else: G_pentoses[compound][neighbor] = 1 enzyme_types[(compound, neighbor)] = enzyme_type if (params['ID'] and (enzyme_type == "DHG")): pass # in ID mode don't use Dehydrogenases on phosphorylated forms elif (params['PECK'] and (enzyme_type == "EPI") and (not compound in ketoses)): pass # in PECK mode don't use Epimerases on non-ketoses elif (params['3ECK'] and (enzyme_type == "EPI") and (i_carbon == 3) and (not compound in ketoses)): pass # in 3ECK mode don't use 3-Epimerases on non-ketoses else: G_pentoses[compound + '-5P'][neighbor + '-5P'] = 1 enzyme_types[(compound + '-5P', neighbor + '-5P')] = enzyme_type # Change the cost of the PP enzymes to 0 for (i, j) in pp_enzymes: G_wildtype[i][j] = 0 G_pentoses[i][j] = 0 G_conjecture[i][j] = 0 # these are edges that do exist in E. coli but are not part of the Pentose Phosphate Cycle # according to KEGG and MetaCyc wildtype_enzymes = \ [('D-Arabitol', 'D-Xylulose'),\ ('D-Xylulose', 'D-Xylulose-5P'),\ ('D-Xylose', 'D-Xylulose'),\ ('L-Arabinose', 'L-Ribulose'),\ ('L-Ribulose', 'L-Ribulose-5P'),\ ('L-Xylulose', 'L-Xylulose-5P'),\ ('L-Xylulose-5P','L-Ribulose-5P'),\ ('L-Ribulose-5P','D-Xylulose-5P'),\ ('Ribitol', 'D-Ribulose'),\ ('D-Ribulose', 'D-Ribulose-5P'),\ ('D-Ribose', 'D-Ribose-5P')] for (i, j) in wildtype_enzymes: G_wildtype[i][j] = 1 G_wildtype[j][i] = 1 # these is the conjectured list of non-PP enzymes (where the L-Xylulose path is shorter) conjecture_enzymes = \ [('D-Arabitol', 'D-Xylulose'),\ ('D-Xylulose', 'D-Xylulose-5P'),\ ('D-Xylose', 'D-Xylulose'),\ ('L-Arabinose', 'L-Ribulose'),\ ('L-Ribulose', 'L-Ribulose-5P'),\ ('L-Xylulose', 'L-Xylulose-5P'),\ ('L-Xylulose-5P','D-Ribulose-5P'),\ ('L-Ribulose-5P','D-Xylulose-5P'),\ ('Ribitol', 'D-Ribulose'),\ ('D-Ribulose', 'D-Ribulose-5P'),\ ('D-Ribose', 'D-Ribose-5P')] for (i, j) in conjecture_enzymes: G_conjecture[i][j] = 1 G_conjecture[j][i] = 1
import sys import util from numpy.random import permutation ########################################################################################################## # MAIN # ########################################################################################################## modules = [] compounds = set() precursors = None for line in util.parse_text_file('../rec/p-value-modules.txt'): if (precursors == None): # first line is the set of true precursors precursors = set(line.split(';')) print "The Precursors: " + ', '.join(precursors) continue module = set(line.split(';')) modules.append(module) compounds = compounds.union(module) print "Module: " + ', '.join(module) compounds = list(compounds) overlaps = [] for i in range(len(modules)): for j in range(i): overlap = modules[i].intersection(modules[j]) if (overlap != set()): overlaps.append(overlap)
if (base_atom in group_table): (bonding_atom, valence, hydrogens, charge) = group_table[base_atom] return (base_atom, valence, hydrogens, charge, 0) valence = 0 if (base_atom == atom_wildcard): valence = 0 elif (not base_atom in valence_table): raise ChemException("Parsing %s: cannot find the atom '%s' in the valence table" % (atom, base_atom)) else: valence = valence_table[base_atom] return (base_atom, valence, hydrogens, charge, chirality) bond_energy_table = {} for line in util.parse_text_file(util.get_progdir() + "/../rec/bond_energy.txt"): (bond_type, energy) = line.split(" " ,1) if (bond_type.find("-") != -1): (atom1, atom2) = bond_type.split("-") order = 1 elif (bond_type.find("=") != -1): (atom1, atom2) = bond_type.split("=") order = 2 elif (bond_type.find("#") != -1): (atom1, atom2) = bond_type.split("#") order = 3 else: raise ChemException("unable to parse bond type: " + bond_type) bond_energy_table[(atom1, atom2, order)] = float(energy) bond_energy_table[(atom2, atom1, order)] = float(energy)
(bonding_atom, valence, hydrogens, charge) = group_table[base_atom] return (base_atom, valence, hydrogens, charge, 0) valence = 0 if base_atom == atom_wildcard: valence = 0 elif not base_atom in valence_table: raise ChemException("Parsing %s: cannot find the atom '%s' in the valence table" % (atom, base_atom)) else: valence = valence_table[base_atom] return (base_atom, valence, hydrogens, charge, chirality) bond_energy_table = {} for line in util.parse_text_file(util.get_progdir() + "/../rec/bond_energy.txt"): (bond_type, energy) = line.split(" ", 1) if bond_type.find("-") != -1: (atom1, atom2) = bond_type.split("-") order = 1 elif bond_type.find("=") != -1: (atom1, atom2) = bond_type.split("=") order = 2 elif bond_type.find("#") != -1: (atom1, atom2) = bond_type.split("#") order = 3 else: raise ChemException("unable to parse bond type: " + bond_type) bond_energy_table[(atom1, atom2, order)] = float(energy) bond_energy_table[(atom2, atom1, order)] = float(energy)
def start(self, context, return_queue): # Init script variables start = time() end = time() website_driver = None email_driver = None logger = self.logger disable_logging = self.disable_logging exception_raised = True exception_type = "" proxy_address = "" if BakecaSlave.use_proxy: proxy_address = BakecaSlave.proxy.get_address() if proxy_address is None: raise ProxyException("No more proxies available!") if BakecaSlave.use_lpm: proxy_address = BakecaSlave.lpm_address # Try and read last state from file self.read_last_state() # Get city and category and increment as needed city_id, category_id = self.get_additional_data() # Get image file and text file self.parse_context(context) logger.info("Parsed context %s." % str(context)) try: # Get text from file # Use up to 20 additional text_files with the same bot. # BOT_TEXT_IMAGES/BAKECA/BAKECA_TEXT_FILE.txt # BOT_TEXT_IMAGES/BAKECA/BAKECA_TEXT_FILE1.txt ... _FILE20.txt text_file_list = [BakecaSlave.text_file] for i in range(1, 21): (basename, ext) = os.path.splitext(BakecaSlave.text_file) i_text_file = basename + str(i) + ext if os.path.exists(i_text_file): text_file_list.append(i_text_file) text_file_id = self.city_index % len(text_file_list) text_file_x = text_file_list[text_file_id] logger.info("Getting title and content from: %s" % text_file_x) age, title, content = util.parse_text_file(text_file_x) logger.info("Got title and content.") # First go and get mail email_driver = util.get_chrome_driver(BakecaSlave.is_headless, proxy_address) # util.go_to_page(driver=email_driver, page_url=util.MOAKT_URL) # email = util.moakt_get_email_address(email_driver) # email = util.smail_get_email_address(email_driver) email = self.smailpro_man.get_email_address(self.slave_index) password = util.random_string(10) # Get images logger.info("Got email [%s] and password [%s]" % (email, password)) images, out_message = util.get_images(BakecaSlave.image_dir) logger.info(out_message) # Go to Site logger.info("Opening website page...") website_driver = util.get_chrome_driver(BakecaSlave.is_headless, proxy_address) util.go_to_page(driver=website_driver, page_url=CONSTANTS.WEBSITE_URL) # Post without register logger.info("Make website post...") is_telg_auth, is_chiudi, loaded_images = self.make_website_post( website_driver, city_id, category_id, age, title, content, images, email) # Close website driver website_driver.quit() # If not TELEGRAM Auth continue with post flow if not is_telg_auth: # Sleep for mail to arrive sleep(5) # Go to mail box logger.info("Verify email...") # util.moakt_access_verify_link(email_driver, '/html/body/p[5]/a') # util.smail_validate_link(email_driver) html_file = self.smailpro_man.get_message_as_temp_file() email_driver.get("file://" + html_file) sleep(2) util.smailpro_validate_link(email_driver) os.unlink(html_file) # Click on accept util.scroll_into_view_click_xpath(email_driver, '//*[@id="accetto"]') # Get post link logger.info("Getting post url...") announce_link = email_driver.find_element_by_xpath( '//*[@id="colonna-unica"]/div[1]/p[1]/a') post_url = announce_link.get_attribute('href') # Close email driver email_driver.quit() print(post_url) end = time() exception_raised = False except TimeoutException as e: exception_type = "Timeout on page wait." logger.exception("Timeout on page wait.") raise BakecaException("Timeout on page wait.") except NoSuchElementException as e: exception_type = "Element not found." logger.exception("Element not found.") raise BakecaException("Element not found.") except ElementNotInteractableException as e: exception_type = "Element not interactable." logger.exception("Element not interactable.") raise BakecaException("Element not interactable.") except util.UtilParseError as e: exception_type = "Parse error." logger.exception("Parse error.") raise BakecaException("Parse error.") except util.CaptchaSolverException as e: exception_type = "Failed to solve captcha in time." logger.exception("Failed to solve captcha in time.") raise BakecaException("Failed to solve captcha in time.") except TelegramAuthException as e: exception_type = "TelegramAuth was required." logger.exception("TelegramAuth was required.") raise BakecaException("TelegramAuth was required.") except SMailProException as e: exception_type = "SMailPro exception occurred." logger.exception("SMailPro exception occurred.") raise BakecaException("SMailPro exception occurred.") except BakecaException as e: exception_type = "Bakeca exception occurred" logger.exception("Bakeca exception occurred") raise e except Exception as e: exception_type = "Unknown error." logger.exception("Unknown error.") raise BakecaException("Unknown error.") finally: # Close driver if email_driver is not None: email_driver.quit() if website_driver is not None: website_driver.quit() if BakecaSlave.use_proxy: BakecaSlave.proxy.set_valid(False) BakecaSlave.proxy.__exit__(None, None, None) self.write_last_state() if exception_raised: end = time() logger.info( "Exception was raised. Writing error to credentials.") util.save_credentials_error(BAKECA_CREDENTIALS_PATH, exception_type, "bakeca.com", CONSTANTS.CITIES[city_id], CONSTANTS.CATEGORIES[category_id], end - start, BakecaSlave.bakeca_lock) announce_msg = ( "BAKECA !!!FAILED!!! For City %s and category %s." % (CONSTANTS.CITIES[city_id], CONSTANTS.CATEGORIES[category_id])) logger.info(announce_msg) print(announce_msg) self.push_to_fail_queue(city_id, category_id) bot_logger.close_logger(logger, disable_logging) # if failed to solve captcha simply retry if exception_type is "Failed to solve captcha in time.": return_queue.put(BAKECA_RETRY) return BAKECA_RETRY else: return_queue.put(BAKECA_ERROR) return BAKECA_ERROR # Success - save credentials and post url website = "bakeca.com" + "\n" + "City: " + CONSTANTS.CITIES[city_id] + "\n" + "Category: " + \ CONSTANTS.CATEGORIES[category_id] + "\n" + "Is chiudi: " + str( is_chiudi) + "\n" + "Images loaded: " + str(loaded_images) if is_telg_auth: util.save_credentials(BAKECA_CREDENTIALS_PATH, email, password, "FAILED - TELEGRAM AUTH REQUIRED", website, end - start, BakecaSlave.bakeca_lock) # The telegram banner blocked the posting. Leave it and switch the city_it. announce_msg = ( "BAKECA !!!FAILED-TELEGRAM!!! For City %s and category %s." % (CONSTANTS.CITIES[city_id], CONSTANTS.CATEGORIES[category_id])) print(announce_msg) logger.info(announce_msg) else: util.save_credentials(BAKECA_CREDENTIALS_PATH, email, password, post_url, website, end - start, BakecaSlave.bakeca_lock) # Post succeeded. announce_msg = ( "BAKECA Success For City %s and category %s." % (CONSTANTS.CITIES[city_id], CONSTANTS.CATEGORIES[category_id])) print(announce_msg) logger.info(announce_msg) bot_logger.close_logger(logger, disable_logging) return_queue.put(BAKECA_SUCCESS) if BakecaSlave.use_proxy: BakecaSlave.proxy.set_valid(True) BakecaSlave.proxy.__exit__(None, None, None) return BAKECA_SUCCESS
#!/usr/bin/python import sys import os import util from chemconvert import hash2graph from html_writer import HtmlWriter from svg import Scene html = HtmlWriter("../results/hash_list.html") util._mkdir("../results/hash_list") for line in util.parse_text_file(sys.argv[1]): print line graph = hash2graph(line) graph.initialize_pos() scene = graph.svg(Scene(200, 200, font_size=12)) html.write_svg(scene, "../results/hash_list/" + line) html.display()
def init(): global params; params = {} params['MPL'] = 3 # Maximal Path Length. Set to None to restrict all options to have minimal length. params['ID'] = False # Isomerase Down. True will prevent Isomerases from acting on phosphorylated compounds. params['EA'] = False # Epimerase Above. True will prevent Epimerases from acting on unphosphorylated compounds. params['PECK'] = True # Pentose Epimerases Can only work on Ketoses. params['3ECK'] = False # 3-Epimerases Can only work on Ketoses. params['TL'] = 1 # Total Length. params['NE'] = None # Number of Epimerases (counts isoenzymes twice) params['NI'] = None # Number of Isomerases (counts isoenzymes twice) params['ND'] = None # Number of Dehydrogenases (counts isoenzymes twice) params['NK'] = None # Number of Kinases (counts isoenzymes twice) params['NTE'] = 0 # No Two Epimerases. Number maximum number of same-product epimerases. params['MIE'] = None # Counts the number of isoenzyme pairs. params['TPD'] = None # Total Phosphorylation Distance. Counts the total number of steps before phosphorylation. for p in params.keys(): print "%s = %s" % (p, str(params[p])) global enzyme_types, target, sources, pp_enzymes # this is the target if the metabolism (in our case the input to the PP cycle) target = 'D-Ribulose-5P' # sources are sugars that E. coli can grow on without any other carbon source sources = ['Ribitol', 'D-Arabitol', 'L-Xylulose', 'D-Ribose', 'D-Xylose', 'L-Arabinose'] ketoses = ['D-Ribulose', 'L-Ribulose', 'D-Xylulose', 'L-Xylulose'] aldoses = ['D-Ribose', 'L-Ribose', 'D-Arabinose', 'L-Arabinose', 'D-Xylose', 'L-Xylose', 'D-Lyxose', 'L-Lyxose'] polyols = ['Ribitol', 'Xylitol', 'D-Arabitol', 'L-Arabitol'] # these are edges that don't have a cost since they are part of the PP cycle pp_enzymes = [('D-Ribulose-5P', 'D-Ribose-5P'), ('D-Ribulose-5P', 'D-Xylulose-5P')] pp_enzymes += [(j, i) for (i, j) in pp_enzymes] global G_pentoses, G_wildtype, G_conjecture G_wildtype = SparseGraph() G_conjecture = SparseGraph() G_pentoses = SparseGraph() # Add all the known possible enzymes to G_pentoses enzyme_types = {} for line in parse_text_file("../rec/pentoses_edges.txt"): tokens = line.split() for i in [0, 1]: compound = tokens[i] neighbor = tokens[1-i] (enzyme_type, i_carbon) = tokens[2].split('-') # add the kinases G_pentoses[compound][compound + '-5P'] = 1 enzyme_types[(compound, compound + '-5P')] = "KIN" G_pentoses[compound + '-5P'][compound] = 1 enzyme_types[(compound + '-5P', compound)] = "KIN" if (params['EA'] and (enzyme_type == "EPI")): pass # in EA mode don't use Epimerases on phosphorylated forms elif (params['PECK'] and (enzyme_type == "EPI") and (not compound in ketoses)): pass # in PECK mode don't use Epimerases on non-ketoses elif (params['3ECK'] and (enzyme_type == "EPI") and (i_carbon == 3) and (not compound in ketoses)): pass # in 3ECK mode don't use 3-Epimerases on non-ketoses else: G_pentoses[compound][neighbor] = 1 enzyme_types[(compound, neighbor)] = enzyme_type if (params['ID'] and (enzyme_type == "DHG")): pass # in ID mode don't use Dehydrogenases on phosphorylated forms elif (params['PECK'] and (enzyme_type =="EPI") and (not compound in ketoses)): pass # in PECK mode don't use Epimerases on non-ketoses elif (params['3ECK'] and (enzyme_type == "EPI") and (i_carbon == 3) and (not compound in ketoses)): pass # in 3ECK mode don't use 3-Epimerases on non-ketoses else: G_pentoses[compound + '-5P'][neighbor + '-5P'] = 1 enzyme_types[(compound + '-5P', neighbor + '-5P')] = enzyme_type # Change the cost of the PP enzymes to 0 for (i, j) in pp_enzymes: G_wildtype[i][j] = 0 G_pentoses[i][j] = 0 G_conjecture[i][j] = 0 # these are edges that do exist in E. coli but are not part of the Pentose Phosphate Cycle # according to KEGG and MetaCyc wildtype_enzymes = \ [('D-Arabitol', 'D-Xylulose'),\ ('D-Xylulose', 'D-Xylulose-5P'),\ ('D-Xylose', 'D-Xylulose'),\ ('L-Arabinose', 'L-Ribulose'),\ ('L-Ribulose', 'L-Ribulose-5P'),\ ('L-Xylulose', 'L-Xylulose-5P'),\ ('L-Xylulose-5P','L-Ribulose-5P'),\ ('L-Ribulose-5P','D-Xylulose-5P'),\ ('Ribitol', 'D-Ribulose'),\ ('D-Ribulose', 'D-Ribulose-5P'),\ ('D-Ribose', 'D-Ribose-5P')] for (i, j) in wildtype_enzymes: G_wildtype[i][j] = 1 G_wildtype[j][i] = 1 # these is the conjectured list of non-PP enzymes (where the L-Xylulose path is shorter) conjecture_enzymes = \ [('D-Arabitol', 'D-Xylulose'),\ ('D-Xylulose', 'D-Xylulose-5P'),\ ('D-Xylose', 'D-Xylulose'),\ ('L-Arabinose', 'L-Ribulose'),\ ('L-Ribulose', 'L-Ribulose-5P'),\ ('L-Xylulose', 'L-Xylulose-5P'),\ ('L-Xylulose-5P','D-Ribulose-5P'),\ ('L-Ribulose-5P','D-Xylulose-5P'),\ ('Ribitol', 'D-Ribulose'),\ ('D-Ribulose', 'D-Ribulose-5P'),\ ('D-Ribose', 'D-Ribose-5P')] for (i, j) in conjecture_enzymes: G_conjecture[i][j] = 1 G_conjecture[j][i] = 1
html_path = "../pathways/html" #pathway = "glycolysis-2" pathway = "glycolysis-1" #pathway = "pentose-phosphate" #pathway = "frucolysis" util._mkdir(html_path + "/" + pathway) html_filename = html_path + "/" + pathway + ".html" html_file = open(html_filename, "w") prev_line_bag = None reaction_titles = [] reaction_compounds = [] line_number = 0 for line in util.parse_text_file(pathway_path + "/" + pathway + ".pth"): line_number += 1 if (line[0:2] == "//"): prev_line_bag = None reaction_titles.append("*"*60 + " " + line[2:] + " " + "*"*60) reaction_compounds.append(None) elif (prev_line_bag == None): prev_line_bag = bag.Bag().from_string(line) else: curr_line_bag = bag.Bag().from_string(line) common_bag = curr_line_bag.intersection(prev_line_bag) side_bags = [prev_line_bag - common_bag, curr_line_bag - common_bag, common_bag] # left-side, right-side, common side_strings = ["", "", ""] side_graphs = (ChemGraph(), ChemGraph(), ChemGraph()) for side in range(3):
def analyze_pairs(self, carbon_only=True, use_antimotifs=True, max_distance=4): distances = [ ] # the minimal pathway length between the substrate and the product alternatives = [ ] # each value is the number of alternative pathways with the minimal distance line_counter = 0 for line in util.parse_text_file("../rec/" + self.modules_file + ".txt"): if (line[0] == '@'): line = line[1:] self.pathfinder = PathFinder(carbon_only=carbon_only, pruning_method=None, ignore_chirality=False, use_antimotifs=use_antimotifs, outstream=self.logfile) else: self.pathfinder = PathFinder(carbon_only=carbon_only, pruning_method=None, ignore_chirality=True, use_antimotifs=use_antimotifs, outstream=self.logfile) (subs, prod, max_steps) = line.split(";", 2) if (max_steps in ['-1', 'inf', '']): sys.stdout.write(subs + " -(?)-> " + prod) sys.stdout.flush() (scenes, dist) = self.get_shortest_pathways(subs, prod, max_distance) else: dist = int(max_steps) sys.stdout.write(subs + " -(%d)-> " % dist + prod) sys.stdout.flush() scenes = self.get_all_pathways(subs, prod, dist) if (dist == -1): sys.stdout.write(", Distance(L) = inf, N = 0\n") sys.stdout.flush() distances.append("inf") alternatives.append(0) self.html_writer.write( "<li><span style=color:red>%s <-> %s (distance > %d)</span></li>\n" % (subs, prod, self.max_module_size)) self.html_writer.flush() else: sys.stdout.write(", Distance(L) = %d, N = %d\n" % (dist, len(scenes))) sys.stdout.flush() distances.append(dist) alternatives.append(len(scenes)) self.html_writer.write( "<li><span style=color:green>%s <-> %s (distance = %d)</span></li>\n" % (subs, prod, dist)) for i in range(len(scenes)): self.html_writer.write("<li>") self.html_writer.write_svg( scenes[i], "pathologic_" + self.experiment_name + "/pair%d_path%d" % (line_counter, i)) self.html_writer.write("</li>\n") self.html_writer.flush() line_counter += 1 result_file = open("../results/" + self.experiment_name + ".txt", "w") result_file.write(str(distances) + "\n" + str(alternatives) + "\n") result_file.close()