def pathway_info(metacycdb): """Retrieve pathways from Metacyc. Reads a metacyc padmet file and return a set of taxa as well as the association between reactions and taxa (via the belonging of reactions in pathways). Args: metacycdb (str): Metacyc padmet file Returns: set,dict: taxonomy info of rxn, taxons occuring in all pathways """ padmet = PadmetSpec(metacycdb) rxn_in_pwy = {} all_taxa = set() for rlt in padmet.getAllRelation(): if rlt.type == "is_in_pathway" and "TAXONOMIC-RANGE" in padmet.dicOfNode[ rlt.id_out].misc.keys(): rxn_id, pwy_id = rlt.id_in, rlt.id_out taxons_set = set(padmet.dicOfNode[pwy_id].misc["TAXONOMIC-RANGE"]) all_taxa.update(taxons_set) try: rxn_in_pwy[rxn_id].update(taxons_set) except KeyError: rxn_in_pwy[rxn_id] = taxons_set return all_taxa, rxn_in_pwy
def reaction_to_sbml(reactions, output, padmetRef, verbose = False): """ convert a list of reactions to sbml format based on a given padmet of reference. - ids are encoded for sbml using functions sbmlPlugin.convert_to_coded_id Parameters ---------- reactions: list list of reactions ids padmetRef: padmet.classes.PadmetRef padmet of reference output: str the pathname to the sbml file to create """ if os.path.isfile(reactions): with open(reactions, 'r') as f: reactions = set(f.read().splitlines()) #check if all rxn id are in padmetRef. all_rxn = set([k for k,v in padmetRef.dicOfNode.items() if v.type == "reaction"]) rxn_not_in_ref = reactions.difference(all_rxn) if len(rxn_not_in_ref) == len(reactions): raise KeyError("None of the reactions is in padmetRef") else: for rxn_id in rxn_not_in_ref: if verbose: print("%s not in padmetRef" % rxn_id) reactions.remove(rxn_id) padmet = PadmetSpec() [padmet.copyNode(padmetRef, rxn_id) for rxn_id in reactions] padmet_to_sbml(padmet, output, sbml_lvl=2, verbose = verbose)
def padmet_medium_cli(command_args): args = docopt.docopt(__doc__, argv=command_args) if args["--seeds"]: seeds_file = args["--seeds"] if not os.path.exists(seeds_file): raise FileNotFoundError("No seeds file (--seeds) accessible at " + seeds_file) with open(seeds_file, 'r') as f: seeds = [line.split("\t")[0] for line in f.read().splitlines()] else: seeds = None padmet = PadmetSpec(args["--padmetSpec"]) if args["--padmetRef"]: padmetRef = PadmetRef(args["--padmetRef"]) else: padmetRef = None output = args["--output"] verbose = args["-v"] remove = args["-r"] if output is None: output = args["--padmetSpec"] if not remove and not seeds: g_m = padmet.get_growth_medium() print("List of growth medium:") if g_m: print(list(g_m)) else: print("[]") else: manage_medium(padmet, seeds, padmetRef, verbose) padmet.generateFile(output)
def main(): args = docopt.docopt(__doc__) padmetRef_file = args["--padmetRef"] padmetSpec = PadmetSpec(args["--padmetSpec"]) output_dir = args["--output_dir"] verbose = args["-v"] padmetSpec.network_report(output_dir, padmetRef_file, verbose)
def report_network_cli(command_args): args = docopt.docopt(__doc__, argv=command_args) padmetRef_file = args["--padmetRef"] padmetSpec = PadmetSpec(args["--padmetSpec"]) output_dir = args["--output_dir"] verbose = args["-v"] padmetSpec.network_report(output_dir, padmetRef_file, verbose)
def from_init_source(padmet_file, init_source, output, verbose=False): """ #TODO """ padmet = PadmetSpec(padmet_file) rxn_to_del = set() init_source = init_source.lower() rec_rlts = [rlt for rlt in padmet.getAllRelation() if rlt.type == "has_reconstructionData"] [rxn_to_del.add(rlt.id_in) for rlt in rec_rlts if padmet.dicOfNode[rlt.id_out].misc.get("SOURCE",[""])[0].lower() == init_source] reactions_to_add = set([node.id for node in list(padmet.dicOfNode.values()) if node.type == "reaction"]).difference(rxn_to_del) reaction_to_sbml(reactions_to_add, output, padmet, verbose)
def createPadmet(dict_args): """ function used in mp_createPadmet by each worker the Pool padmet are updated using funciton add_delete_rxn from padmet.utils.connection.manual_curation """ reactions_to_add_path = dict_args["reactions_to_add_path"] padmet_to_update = PadmetSpec(dict_args["padmet_to_update"]) output = dict_args["output"] padmetRef = PadmetSpec(dict_args["padmetRef"]) verbose = dict_args["verbose"] manual_curation.add_delete_rxn(reactions_to_add_path, padmet_to_update, output, padmetRef=padmetRef, category="MANUAL", verbose=verbose)
def add_spontaneous_reactions(padmet_path, padmet_ref_path, output_padmet_path, only_complete_pathways=True): number_spontaneous_reactions = 0 padmetSpec = PadmetSpec(padmet_path) padmetRef = PadmetRef(padmet_ref_path) all_spontaneous_rxns = set([node.id for node in list(padmetRef.dicOfNode.values()) if node.type == "reaction" and "SPONTANEOUS" in node.misc]) for spontaneous_rxn_id in all_spontaneous_rxns: in_pwys = set([rlt.id_out for rlt in padmetRef.dicOfRelationIn.get(spontaneous_rxn_id,None) if rlt.type == "is_in_pathway"]) for pwy_id in in_pwys: if pwy_id in padmetSpec.dicOfNode.keys(): padmet_ref_in_rxns = set([rlt.id_in for rlt in padmetRef.dicOfRelationOut.get(pwy_id,[]) if rlt.type == "is_in_pathway"]) padmet_spec_in_rxns = set([rlt.id_in for rlt in padmetSpec.dicOfRelationOut.get(pwy_id,[]) if rlt.type == "is_in_pathway"]) if only_complete_pathways: difference_rxns = padmet_ref_in_rxns.difference(padmet_spec_in_rxns) if difference_rxns != set(): if difference_rxns.issubset(all_spontaneous_rxns): for difference_rxn in difference_rxns: if difference_rxn not in set([node.id for node in list(padmetSpec.dicOfNode.values()) if node.type == "reaction"]): padmetSpec.copyNode(padmetRef, difference_rxn) number_spontaneous_reactions += 1 else: if spontaneous_rxn_id not in set([node.id for node in list(padmetSpec.dicOfNode.values()) if node.type == "reaction"]): padmetSpec.copyNode(padmetRef, spontaneous_rxn_id) number_spontaneous_reactions += 1 padmetSpec.generateFile(output_padmet_path) print('Add {0} spontaneous reactions to {1}'.format(number_spontaneous_reactions, output_padmet_path))
def padmet_stat(padmet_file): """ Count reactions/pathways/compounds/genes in a padmet file. Parameters ---------- padmet_file: str path to a padmet file Returns ------- list: [path to padmet, number of pathways, number of reactions, number of genes, number of compounds] """ padmetSpec = PadmetSpec(padmet_file) total_pwy_id = set() total_cpd_id = set() all_rxns = [node for node in padmetSpec.dicOfNode.values() if node.type == "reaction"] all_genes = [node for node in padmetSpec.dicOfNode.values() if node.type == "gene"] nb_rxn_with_ga = 0 for rxn_node in all_rxns: total_cpd_id.update([rlt.id_out for rlt in padmetSpec.dicOfRelationIn[rxn_node.id] if rlt.type in ["consumes","produces"]]) pathways_ids = set([rlt.id_out for rlt in padmetSpec.dicOfRelationIn[rxn_node.id] if rlt.type == "is_in_pathway"]) if any([rlt for rlt in padmetSpec.dicOfRelationIn[rxn_node.id] if rlt.type == "is_linked_to"]): nb_rxn_with_ga += 1 total_pwy_id.update(pathways_ids) all_pwys = [node for (node_id, node) in padmetSpec.dicOfNode.items() if node_id in total_pwy_id] all_cpds = [node for (node_id, node) in padmetSpec.dicOfNode.items() if node_id in total_cpd_id] return [padmet_file, len(all_pwys), len(all_rxns), nb_rxn_with_ga, len(all_genes), len(all_cpds)]
def get_pwy_from_rxn_cli(command_args): args = docopt.docopt(__doc__, argv=command_args) reaction_file = args["--reaction_file"] padmet_file = args["--padmetRef"] output = args["--output"] padmet = PadmetSpec(padmet_file) get_pwy_from_rxn(padmet, reaction_file, output)
def test_aucome(): # Set working folder. aucome_set_working_folder = ['aucome', '--setWorkingFolder', os.getcwd()] subprocess.call(aucome_set_working_folder) # Check genbank and create fasta. aucome_cmd_check = ['aucome', 'check', '--run=test'] subprocess.call(aucome_cmd_check) # Run Pathway-Tools. aucome_cmd_pwt = ['aucome', 'reconstruction', '--run=test'] subprocess.call(aucome_cmd_pwt) # Run Orthofinder. aucome_cmd_ortho = ['aucome', 'orthology', '--run=test'] subprocess.call(aucome_cmd_ortho) # Merge all networks. aucome_cmd_merge = ['aucome', 'draft', '--run=test'] subprocess.call(aucome_cmd_merge) padmetSpec = PadmetSpec( 'test/networks/fatty_acid_beta_oxydation_I_1.padmet') expected_fabo_reactions = [ node.id for node in padmetSpec.dicOfNode.values() if node.type == "reaction" ] fabo_reactions = [ "ACYLCOASYN-RXN", "ACYLCOADEHYDROG-RXN", "ENOYL-COA-DELTA-ISOM-RXN", "ENOYL-COA-HYDRAT-RXN", "OHBUTYRYL-COA-EPIM-RXN", "OHACYL-COA-DEHYDROG-RXN", "KETOACYLCOATHIOL-RXN" ] assert set(fabo_reactions).issubset(set(expected_fabo_reactions))
def test_m2m_recon_call(): """ Test m2m recon when called in terminal. """ subprocess.call(['mpwt', '--delete', 'fatty_acid_beta_oxydation_icyc']) subprocess.call([ 'm2m', 'recon', '-g', 'recon_data', '-o', 'recon_data_output', '-c', '1', '-p' ]) reader = SBMLReader() document = reader.readSBML( 'recon_data_output/sbml/fatty_acid_beta_oxydation_I.sbml') expected_fabo_reactions = [ convert_from_coded_id(reaction.getId())[0] for reaction in document.getModel().getListOfReactions() ] assert set(fabo_reactions()).issubset(set(expected_fabo_reactions)) padmet = PadmetSpec( 'recon_data_output/padmet/fatty_acid_beta_oxydation_I.padmet') fabo_rxns = [ node.id for node in padmet.dicOfNode.values() if node.type == "reaction" ] assert set(fabo_reactions()).issubset(set(fabo_rxns)) shutil.rmtree('recon_data_output') subprocess.call(['mpwt', '--delete', 'fatty_acid_beta_oxydation_icyc'])
def manual_curation_cli(command_args): args = docopt.docopt(__doc__, argv=command_args) data_file = args["--data"] output = args["--output"] verbose = args["-v"] if data_file: if not os.path.exists(data_file): raise FileNotFoundError("No form curation file (--data/data_file) accessible at " + data_file) filename = os.path.splitext(os.path.basename(data_file))[0] source = filename category = args["--category"] tool = args["--tool"] if args["--template_new_rxn"]: output = args["--template_new_rxn"] template_new_rxn(output) elif args["--template_add_delete_rxn"]: output = args["--template_add_delete_rxn"] template_add_delete(output) else: padmetSpec = PadmetSpec(args["--padmetSpec"]) if not output: output = args["--padmetSpec"] if args["--padmetRef"]: padmetRef = PadmetRef(args["--padmetRef"]) else: padmetRef = None to_do = sniff_datafile(data_file) if to_do == "rxn_creator": rxn_creator(data_file, padmetSpec, output, padmetRef, source, tool, category, verbose) elif to_do == "add_delete_rxn": add_delete_rxn(data_file, padmetSpec, output, padmetRef, source, tool, category, verbose)
def relation_curation_cli(command_args): args = docopt.docopt(__doc__, argv=command_args) padmet_file = args["--padmet"] id_in = args["--id_in"] id_out = args["--id_out"] _type = args["--type"] output = args["--output"] verbose = args["-v"] to_remove = args["--to-remove"] padmet = PadmetSpec(padmet_file) get_relations(padmet=padmet, id_in=id_in, id_out=id_out, _type=_type, verbose=verbose) if to_remove: if to_remove != "all": to_remove = to_remove.split(";") get_relations(padmet=padmet, id_in=id_in, id_out=id_out, _type=_type, to_remove=to_remove, output=output, verbose=False)
def main(): args = docopt.docopt(__doc__) data_file = args["--data"] output = args["--output"] verbose = args["-v"] if data_file: filename = os.path.splitext(os.path.basename(data_file))[0] source = filename category = args["--category"] tool = args["--tool"] if args["--template_new_rxn"]: output = args["--template_new_rxn"] manual_curation.template_new_rxn(output) elif args["--template_add_delete_rxn"]: output = args["--template_add_delete_rxn"] manual_curation.template_add_delete(output) else: padmetSpec = PadmetSpec(args["--padmetSpec"]) if not output: output = args["--padmetSpec"] if args["--padmetRef"]: padmetRef = PadmetRef(args["--padmetRef"]) else: padmetRef = None to_do = manual_curation.sniff_datafile(data_file) if to_do == "rxn_creator": manual_curation.rxn_creator(data_file, padmetSpec, output, padmetRef, source, tool, category, verbose) elif to_do == "add_delete_rxn": manual_curation.add_delete_rxn(data_file, padmetSpec, output, padmetRef, source, tool, category, verbose)
def gene_to_targets_cli(command_args): #recovering args args = docopt.docopt(__doc__, argv=command_args) padmet = PadmetSpec(args["--padmetSpec"]) genes_file = args["--genes"] output = args["--output"] verbose = args["-v"] gene_to_targets(padmet, genes_file, output, verbose)
def main(): #recovering args args = docopt.docopt(__doc__) padmet = PadmetSpec(args["--padmetSpec"]) genes_file = args["--genes"] output = args["--output"] verbose = args["-v"] gene_to_targets.gene_to_targets(padmet, genes_file, output, verbose)
def padmet_to_padmet(to_add, output, padmetRef=None, verbose=False): """ #TODO """ if os.path.isdir(to_add): path = to_add all_files = [i for i in next(os.walk(path))[2] if not i.startswith(".~lock")] padmetFiles = [os.path.join(path, i) for i in all_files if i.endswith(".padmet")] if len(padmetFiles) == 0: print("No padmet found in %s" %path) return else: padmetFiles = to_add.split(";") padmet_init_file = padmetFiles[0] padmet_init = PadmetSpec(padmet_init_file) padmetFiles.pop(0) for padmet_update_file in padmetFiles: if verbose: print("Updating %s from %s" %(os.path.basename(padmet_init_file),os.path.basename(padmet_update_file))) padmet_update = PadmetSpec(padmet_update_file) padmet_init.updateFromPadmet(padmet_update) if verbose: print("Generated file: %s" %output) padmet_init.generateFile(output)
def main(): #parsing args args = docopt.docopt(__doc__) padmet_file = args["--padmet"] old_compart = args["--old"] new_compart = args["--new"] new_padmet = args["--output"] to_remove = args["--remove"] verbose = args["-v"] if new_padmet is None: new_padmet = args["--padmet"] padmet = PadmetSpec(padmet_file) if to_remove: padmet = padmet_compart.remove_compart(padmet, to_remove, verbose=False) padmet.generateFile(new_padmet) elif old_compart and new_compart: padmet = padmet_compart.remplace_compart(padmet, old_compart, new_compart, verbose) padmet.generateFile(new_padmet) else: print("List of compartments:") print(list(padmet.get_all_compart()))
def test_m2m_recon_call(): """ Test m2m recon when called in terminal. """ sbml_file_path = os.path.join( *['recon_data_output', 'sbml', 'fatty_acid_beta_oxydation_I.sbml']) padmet_path = os.path.join( *['recon_data_output', 'padmet', 'fatty_acid_beta_oxydation_I.padmet']) subprocess.call(['mpwt', '--delete', 'fatty_acid_beta_oxydation_icyc']) subprocess.call([ 'm2m', 'recon', '-g', 'recon_data', '-o', 'recon_data_output', '-c', '1', '-p' ]) reader = SBMLReader() document = reader.readSBML(sbml_file_path) expected_fabo_reactions = [ convert_from_coded_id(reaction.getId())[0] for reaction in document.getModel().getListOfReactions() ] assert set(get_fabo_reactions()).issubset(set(expected_fabo_reactions)) padmet = PadmetSpec(padmet_path) fabo_rxns = [ node.id for node in padmet.dicOfNode.values() if node.type == "reaction" ] assert set(get_fabo_reactions()).issubset(set(fabo_rxns)) shutil.rmtree('recon_data_output') subprocess.call([ 'm2m', 'recon', '-g', 'recon_data', '-o', 'recon_data_output', '-c', '1', '--pwt-xml' ]) reader = SBMLReader() document = reader.readSBML(sbml_file_path) # Extract reaction ID from annotaiton. fabo_reactions = [ reaction.name for reaction in document.getModel().getListOfReactions() ] known_fabo_reactions = get_fabo_reactions() results = {} for known_fabo_reaction in known_fabo_reactions: presence_reaction = sum([ 1 if known_fabo_reaction in fabo_reaction else 0 for fabo_reaction in fabo_reactions ]) if presence_reaction > 0: results[known_fabo_reaction] = True assert all(results.values()) shutil.rmtree('recon_data_output') subprocess.call(['mpwt', '--delete', 'fatty_acid_beta_oxydation_icyc'])
def main(): args = docopt.docopt(__doc__) sbml_file = args["--sbml"] reader = libsbml.SBMLReader() sbml_document = reader.readSBML(sbml_file) for i in range(sbml_document.getNumErrors()): print(sbml_document.getError(i).getMessage()) padmet = PadmetSpec(args["--padmet"]) compare_sbml_padmet.compare_sbml_padmet(sbml_document, padmet)
def main(): args = docopt.docopt(__doc__) reaction_file = args["--reaction_file"] with open(reaction_file, 'r') as f: reactions = set(f.read().splitlines()) padmet_file = args["--padmetRef"] output = args["--output"] padmet = PadmetSpec(padmet_file) dict_pwy = get_pwy_from_rxn.extract_pwys(padmet, reactions) get_pwy_from_rxn.dict_pwys_to_file(dict_pwy, output)
def create_graph_from_padmet(input_file, verbose=False): """ Create JSON formatted for metexploreviz using a padmet file Parameters ---------- input_file: str path to padmet input file verbose: bool if True print information Returns ------- json_dicts: dict JSON formatted for metexploreviz """ nodes_in_graph = OrderedDict() #loading padmetSpec if verbose: print('Loading %s' %input_file) padmetSpec = PadmetSpec(input_file) edges_data = [[], []] for node in padmetSpec.dicOfNode.values(): if node.type == 'reaction': reaction_id = node.id if node.misc["DIRECTION"][0] == 'REVERSIBLE': reversibility = True else: reversibility = False if node.id not in nodes_in_graph: nodes_in_graph[reaction_id] = len(nodes_in_graph) for rlt in padmetSpec.dicOfRelationIn[reaction_id]: if rlt.type in ['consumes']: reactant_id = rlt.id_out if reactant_id not in nodes_in_graph: nodes_in_graph[reactant_id] = len(nodes_in_graph) edges_data[0].append(nodes_in_graph[reactant_id]) edges_data[1].append(nodes_in_graph[reaction_id]) if reversibility is True: edges_data[1].append(nodes_in_graph[reactant_id]) edges_data[0].append(nodes_in_graph[reaction_id]) if rlt.type in ['produces']: product_id = rlt.id_out if product_id not in nodes_in_graph: nodes_in_graph[product_id] = len(nodes_in_graph) edges_data[0].append(nodes_in_graph[reaction_id]) edges_data[1].append(nodes_in_graph[product_id]) if reversibility is True: edges_data[1].append(nodes_in_graph[reaction_id]) edges_data[0].append(nodes_in_graph[product_id]) return nodes_in_graph, edges_data
def reduce_network(padmet_file:str, empty_padmet:str, reaction_list:list, sbml_output:str, del_cof:bool=False): """Create a sbml starting with the desired reactions. Args: padmet_file (str): path to padmet containing all reactions empty_padmet (str): path to empty padmet that will be filled reaction_list (list): list of reactions to be retrieved sbml_output (str): path to sbml file to be written """ p_ref = PadmetRef(padmet_file) p_spec = PadmetSpec(empty_padmet) # retrieve reactions from a given pathway # rxn_list = [rlt.id_in for rlt in p_ref.dicOfRelationOut[args.pathway] if rlt.type == "is_in_pathway" and p_ref.dicOfNode[rlt.id_in].type == "reaction"] reaction_list = [convert_from_coded_id(i)[0] for i in reaction_list] for rxn_id in reaction_list: p_spec.copyNode(p_ref, rxn_id) # p_spec.generateFile("plop.padmet") cofactor_list = [convert_from_coded_id(i)[0] for i in COFACTORS] if del_cof: for rxn_id in reaction_list: cof_linked_rlt = [rlt for rlt in p_spec.dicOfRelationIn[rxn_id] if rlt.id_out in cofactor_list] for rel in cof_linked_rlt: p_spec._delRelation(rel) padmet_to_sbml(p_spec, sbml_output, sbml_lvl=3, verbose=True) return
def parse_reactions_padmet(padmet_file): """ Parse padmets files to extract reactions to create edges and nodes for igraph. Parameters ---------- padmet_file: str pathname of the padmet file Returns ------- edges: list edges between two reactions edges_label: list for each edge the name of the reaction weights: list the weight associated to each edge nodes: list a compound nodes_label: list for each node the name of the compound """ padmetSpec = PadmetSpec(padmet_file) edges = [] edges_label = [] weights = [] nodes = {} nodes_label = [] all_rxns = [node for node in padmetSpec.dicOfNode.values() if node.type == "reaction"] for rxn in all_rxns: for rlt in padmetSpec.dicOfRelationIn[rxn.id]: if rlt.type == "produces": for sec_rlt in padmetSpec.dicOfRelationOut[rlt.id_out]: if sec_rlt.type == "consumes": sec_rxn_id = sec_rlt.id_in if sec_rxn_id not in nodes: new_cpd_id = len(nodes_label) nodes_label.append(sec_rxn_id) nodes[sec_rxn_id] = new_cpd_id if rxn.id not in nodes: new_cpd_id = len(nodes_label) nodes_label.append(rxn.id) nodes[rxn.id] = new_cpd_id if (nodes[rxn.id], nodes[sec_rxn_id]) not in edges: edges.append((nodes[rxn.id], nodes[sec_rxn_id])) weights.append(1) edges_label.append(rxn.id) return edges, edges_label, weights, nodes, nodes_label
def compare_sbml_padmet_cli(command_args): args = docopt.docopt(__doc__, argv=command_args) sbml_file = args["--sbml"] if not os.path.exists(sbml_file): raise FileNotFoundError("No SBML file (--sbml) accessible at " + sbml_file) reader = libsbml.SBMLReader() sbml_document = reader.readSBML(sbml_file) for i in range(sbml_document.getNumErrors()): print(sbml_document.getError(i).getMessage()) padmet = PadmetSpec(args["--padmet"]) compare_sbml_padmet(sbml_document, padmet)
def padmet_to_padmet(to_add, output=None, verbose=False): """ Create a padmet by merging multiple other padmet files. Parameters ---------- to_add: dir or str padmet directory or string with multiple padmet paths separated by ',' output: path to the output file verbose: bool verbose level of script Returns ------- padmet_init: PadmetSpec padmet created from emrging of the other padmet """ if os.path.isdir(to_add): path = to_add all_files = [ i for i in next(os.walk(path))[2] if not i.startswith(".~lock") ] padmetFiles = [ os.path.join(path, i) for i in all_files if i.endswith(".padmet") ] if len(padmetFiles) == 0: print("No padmet found in %s" % path) return else: padmetFiles = to_add.split(",") padmet_init_file = padmetFiles[0] padmet_init = PadmetSpec(padmet_init_file) padmetFiles.pop(0) for padmet_update_file in padmetFiles: if verbose: print("Updating %s from %s" % (os.path.basename(padmet_init_file), os.path.basename(padmet_update_file))) padmet_update = PadmetSpec(padmet_update_file) padmet_init.updateFromPadmet(padmet_update) if output: if verbose: print("Generated file: %s" % output) padmet_init.generateFile(output) return padmet_init
def padmet_stat(padmet_file): """ Count reactions/pathways/compounds/genes in a padmet file. Parameters ---------- padmet_file: str path to a padmet file Returns ------- list: [path to padmet, number of pathways, number of reactions, number of genes, number of compounds, number of class compounds] """ padmetSpec = PadmetSpec(padmet_file) padmet_name = os.path.basename(padmet_file).replace('.padmet', '') total_pwy_id = set() total_cpd_id = set() all_rxns = [node for node in padmetSpec.dicOfNode.values() if node.type == "reaction"] all_genes = [node for node in padmetSpec.dicOfNode.values() if node.type == "gene"] nb_rxn_with_ga = 0 for rxn_node in all_rxns: total_cpd_id.update([rlt.id_out for rlt in padmetSpec.dicOfRelationIn[rxn_node.id] if rlt.type in ["consumes","produces"]]) # Get all pathways having at least a reaction. Remove superpathways containing only pathways. pathways_ids = set([rlt.id_out for rlt in padmetSpec.dicOfRelationIn[rxn_node.id] if rlt.type == "is_in_pathway"]) if any([rlt for rlt in padmetSpec.dicOfRelationIn[rxn_node.id] if rlt.type == "is_linked_to"]): nb_rxn_with_ga += 1 total_pwy_id.update(pathways_ids) all_pwys = [node for (node_id, node) in padmetSpec.dicOfNode.items() if node_id in total_pwy_id] class_cpds = [node.id for (node_id, node) in padmetSpec.dicOfNode.items() if node_id in total_cpd_id if node.type == "class"] compound_cpds = [node.id for (node_id, node) in padmetSpec.dicOfNode.items() if node_id in total_cpd_id if node.type == "compound"] rxn_with_class_cpds = [] rxn_without_class_cpds = [] for rxn_node in all_rxns: rxn_compounds = [rlt.id_out for rlt in padmetSpec.dicOfRelationIn[rxn_node.id] if rlt.type in ["consumes","produces"]] if len(set(rxn_compounds).intersection(set(class_cpds))) > 0: rxn_with_class_cpds.append(rxn_node) else: rxn_without_class_cpds.append(rxn_node) return [padmet_name, len(all_pwys), len(rxn_without_class_cpds), len(rxn_with_class_cpds), nb_rxn_with_ga, len(all_genes), len(compound_cpds), len(class_cpds)]
def orthology_result(padmet_file, padmet_names): """ Count reactions/pathways/compounds/genes in a padmet file. Parameters ---------- padmet_file: str path to a padmet file padmet_names: list all the padmet filenames Returns ------- pandas.DataFrame: Number of reactions given by the other species """ ortholog_species_counts = {} padmetSpec = PadmetSpec(padmet_file) ortholog_reactions = set() for node in padmetSpec.dicOfNode.values(): if node.type == 'suppData': reaction_id = node.id.split('_SuppData_OUTPUT_ORTHOFINDER_')[0] ortholog_reactions.add(reaction_id) ortholog_species = node.id.split('FROM_')[1] if ortholog_species in ortholog_species_counts: ortholog_species_counts[ortholog_species] += 1 else: ortholog_species_counts[ortholog_species] = 1 for species in padmet_names: if species not in ortholog_species_counts: ortholog_species_counts[species] = 0 columns = list(ortholog_species_counts.keys()).append('Orthology') ortholog_species_counts['Orthology'] = len(ortholog_reactions) df = pa.DataFrame([ortholog_species_counts], columns=columns, index=[padmet_file.replace('.padmet', '').upper()]) return df
def orthology_result(padmet_file, padmet_names): """ Count reactions/pathways/compounds/genes in a padmet file. Parameters ---------- padmet_file: str path to a padmet file padmet_names: list all the padmet filenames Returns ------- dictionary: Number of reactions given by the other species """ ortholog_species_counts = {} padmetSpec = PadmetSpec(padmet_file) ortholog_reactions = set() for node in padmetSpec.dicOfNode.values(): if node.type == 'suppData': reaction_id = node.id.split('_SuppData_OUTPUT_ORTHOFINDER_')[0] ortholog_reactions.add(reaction_id) ortholog_species = node.id.split('FROM_')[1] if ortholog_species in ortholog_species_counts: ortholog_species_counts[ortholog_species] += 1 else: ortholog_species_counts[ortholog_species] = 1 for species in padmet_names: if species not in ortholog_species_counts: ortholog_species_counts[species] = 0 ortholog_species_counts['Orthology'] = len(ortholog_reactions) return ortholog_species_counts