def reduce_network(padmet_file:str, empty_padmet:str, reaction_list:list, sbml_output:str, del_cof:bool=False): """Create a sbml starting with the desired reactions. Args: padmet_file (str): path to padmet containing all reactions empty_padmet (str): path to empty padmet that will be filled reaction_list (list): list of reactions to be retrieved sbml_output (str): path to sbml file to be written """ p_ref = PadmetRef(padmet_file) p_spec = PadmetSpec(empty_padmet) # retrieve reactions from a given pathway # rxn_list = [rlt.id_in for rlt in p_ref.dicOfRelationOut[args.pathway] if rlt.type == "is_in_pathway" and p_ref.dicOfNode[rlt.id_in].type == "reaction"] reaction_list = [convert_from_coded_id(i)[0] for i in reaction_list] for rxn_id in reaction_list: p_spec.copyNode(p_ref, rxn_id) # p_spec.generateFile("plop.padmet") cofactor_list = [convert_from_coded_id(i)[0] for i in COFACTORS] if del_cof: for rxn_id in reaction_list: cof_linked_rlt = [rlt for rlt in p_spec.dicOfRelationIn[rxn_id] if rlt.id_out in cofactor_list] for rel in cof_linked_rlt: p_spec._delRelation(rel) padmet_to_sbml(p_spec, sbml_output, sbml_lvl=3, verbose=True) return
def add_spontaneous_reactions(padmet_path, padmet_ref_path, output_padmet_path, only_complete_pathways=True): number_spontaneous_reactions = 0 padmetSpec = PadmetSpec(padmet_path) padmetRef = PadmetRef(padmet_ref_path) all_spontaneous_rxns = set([node.id for node in list(padmetRef.dicOfNode.values()) if node.type == "reaction" and "SPONTANEOUS" in node.misc]) for spontaneous_rxn_id in all_spontaneous_rxns: in_pwys = set([rlt.id_out for rlt in padmetRef.dicOfRelationIn.get(spontaneous_rxn_id,None) if rlt.type == "is_in_pathway"]) for pwy_id in in_pwys: if pwy_id in padmetSpec.dicOfNode.keys(): padmet_ref_in_rxns = set([rlt.id_in for rlt in padmetRef.dicOfRelationOut.get(pwy_id,[]) if rlt.type == "is_in_pathway"]) padmet_spec_in_rxns = set([rlt.id_in for rlt in padmetSpec.dicOfRelationOut.get(pwy_id,[]) if rlt.type == "is_in_pathway"]) if only_complete_pathways: difference_rxns = padmet_ref_in_rxns.difference(padmet_spec_in_rxns) if difference_rxns != set(): if difference_rxns.issubset(all_spontaneous_rxns): for difference_rxn in difference_rxns: if difference_rxn not in set([node.id for node in list(padmetSpec.dicOfNode.values()) if node.type == "reaction"]): padmetSpec.copyNode(padmetRef, difference_rxn) number_spontaneous_reactions += 1 else: if spontaneous_rxn_id not in set([node.id for node in list(padmetSpec.dicOfNode.values()) if node.type == "reaction"]): padmetSpec.copyNode(padmetRef, spontaneous_rxn_id) number_spontaneous_reactions += 1 padmetSpec.generateFile(output_padmet_path) print('Add {0} spontaneous reactions to {1}'.format(number_spontaneous_reactions, output_padmet_path))
def reaction_to_sbml(reactions, output, padmetRef, verbose = False): """ convert a list of reactions to sbml format based on a given padmet of reference. - ids are encoded for sbml using functions sbmlPlugin.convert_to_coded_id Parameters ---------- reactions: list list of reactions ids padmetRef: padmet.classes.PadmetRef padmet of reference output: str the pathname to the sbml file to create """ if os.path.isfile(reactions): with open(reactions, 'r') as f: reactions = set(f.read().splitlines()) #check if all rxn id are in padmetRef. all_rxn = set([k for k,v in padmetRef.dicOfNode.items() if v.type == "reaction"]) rxn_not_in_ref = reactions.difference(all_rxn) if len(rxn_not_in_ref) == len(reactions): raise KeyError("None of the reactions is in padmetRef") else: for rxn_id in rxn_not_in_ref: if verbose: print("%s not in padmetRef" % rxn_id) reactions.remove(rxn_id) padmet = PadmetSpec() [padmet.copyNode(padmetRef, rxn_id) for rxn_id in reactions] padmet_to_sbml(padmet, output, sbml_lvl=2, verbose = verbose)
def from_pgdb_to_padmet(pgdb_folder, db='NA', version='NA', source='GENOME', extract_gene=False, no_orphan=False, enhanced_db=False, padmetRef_file=None, verbose=False): """ Parameters ---------- pgdb_folder: str path to pgdb db: str pgdb name, default is 'NA' version: str pgdb version, default is 'NA' source: str tag reactions for traceability, default is 'GENOME' extract_gene: bool if true extract genes information no_orphan: bool if true, remove reactions without genes associated enhanced_db: bool if true, read metabolix-reactions.xml sbml file and add information in final padmet padmetRef_file: str path to padmetRef corresponding to metacyc in padmet format verbose: bool if True print information Returns ------- padmet.padmetRef: padmet instance with pgdb within pgdb data """ global regex_purge, regex_xref, list_of_relation, def_compart_in, def_compart_out regex_purge = re.compile("<.*?>|\|") regex_xref = re.compile('^\((?P<DB>\S*)\s*"(?P<ID>\S*)"') list_of_relation = [] def_compart_in = "c" def_compart_out = "e" #parsing args source = source.upper() classes_file, compounds_file, proteins_file, reactions_file, enzrxns_file, pathways_file = \ [os.path.join(pgdb_folder,_file) for _file in ["classes.dat", "compounds.dat", "proteins.dat", "reactions.dat", "enzrxns.dat", "pathways.dat"]] if enhanced_db: metabolic_reactions = os.path.join(pgdb_folder,"metabolic-reactions.xml") else: metabolic_reactions = None if extract_gene: genes_file = os.path.join(pgdb_folder,"genes.dat") else: genes_file = None now = datetime.now() today_date = now.strftime("%Y-%m-%d") if padmetRef_file: padmet = PadmetSpec() padmetRef = PadmetRef(padmetRef_file) version = padmetRef.info["DB_info"]["version"] db = padmetRef.info["DB_info"]["DB"] dbNotes = {"PADMET":{"creation":today_date,"version":"2.6"},"DB_info":{"DB":db,"version":version}} padmet.setInfo(dbNotes) padmet.setPolicy(padmetRef) with open(reactions_file, 'r') as f: rxns_id = [line.split(" - ")[1] for line in f.read().splitlines() if line.startswith("UNIQUE-ID")] count = 0 for rxn_id in rxns_id: count += 1 if verbose: print("%s/%s Copy %s" %(count, len(rxns_id), rxn_id)) try: padmet.copyNode(padmetRef, rxn_id) reconstructionData_id = rxn_id+"_reconstructionData_"+source if reconstructionData_id in list(padmet.dicOfNode.keys()) and verbose: print("Warning: The reaction %s seems to be already added from the same source %s" %(rxn_id, source)) reconstructionData = {"SOURCE":[source],"TOOL":["PATHWAYTOOLS"],"CATEGORY":["ANNOTATION"]} reconstructionData_rlt = Relation(rxn_id,"has_reconstructionData",reconstructionData_id) padmet.dicOfNode[reconstructionData_id] = Node("reconstructionData", reconstructionData_id, reconstructionData) padmet._addRelation(reconstructionData_rlt) except TypeError: print("%s not in padmetRef" %(rxn_id)) if extract_gene: if verbose: print("parsing genes") map_gene_ids = genes_parser(genes_file, padmet, verbose) if verbose: print("parsing proteins") dict_protein_gene_id = proteins_parser(proteins_file, padmet, verbose) mapped_dict_protein_gene_id = map_gene_id(dict_protein_gene_id, map_gene_ids) if verbose: print("parsing association enzrxns") enzrxns_parser(enzrxns_file, padmet, mapped_dict_protein_gene_id, source, verbose) else: POLICY_IN_ARRAY = [['class','is_a_class','class'], ['class','has_name','name'], ['class','has_xref','xref'], ['class','has_suppData','suppData'], ['compound','is_a_class','class'], ['compound','has_name','name'], ['compound','has_xref','xref'], ['compound','has_suppData','suppData'], ['gene','is_a_class','class'], ['gene','has_name','name'], ['gene','has_xref','xref'], ['gene','has_suppData','suppData'], ['gene','codes_for','protein'], ['pathway','is_a_class','class'], ['pathway','has_name','name'], ['pathway','has_xref','xref'], ['pathway','is_in_pathway','pathway'], ['protein','is_a_class','class'], ['protein','has_name','name'], ['protein','has_xref','xref'], ['protein','has_suppData','suppData'], ['protein','catalyses','reaction'], ['protein','is_in_species','class'], ['reaction','is_a_class','class'], ['reaction','has_name','name'], ['reaction','has_xref','xref'], ['reaction','has_suppData','suppData'], ['reaction','has_reconstructionData','reconstructionData'], ['reaction','is_in_pathway','pathway'], ['reaction','consumes','class','STOICHIOMETRY','X','COMPARTMENT','Y'], ['reaction','produces','class','STOICHIOMETRY','X','COMPARTMENT','Y'], ['reaction','consumes','compound','STOICHIOMETRY','X','COMPARTMENT','Y'], ['reaction','produces','compound','STOICHIOMETRY','X','COMPARTMENT','Y'], ['reaction','consumes','protein','STOICHIOMETRY','X','COMPARTMENT','Y'], ['reaction','produces','protein','STOICHIOMETRY','X','COMPARTMENT','Y'], ['reaction','is_linked_to','gene','SOURCE:ASSIGNMENT','X:Y']] dbNotes = {"PADMET":{"creation":today_date,"version":"2.6"},"DB_info":{"DB":db,"version":version}} padmet = PadmetRef() if verbose: print("setting policy") padmet.setPolicy(POLICY_IN_ARRAY) if verbose: print("setting dbInfo") padmet.setInfo(dbNotes) if verbose: print("parsing classes") classes_parser(classes_file, padmet, verbose) if verbose: print("parsing compounds") compounds_parser(compounds_file, padmet, verbose) if verbose: print("parsing reactions") reactions_parser(reactions_file, padmet, extract_gene, source, verbose) if verbose: print("parsing pathways") pathways_parser(pathways_file, padmet, verbose) if extract_gene: if verbose: print("parsing genes") map_gene_ids = genes_parser(genes_file, padmet, verbose) if verbose: print("parsing proteins") dict_protein_gene_id = proteins_parser(proteins_file, padmet, verbose) mapped_dict_protein_gene_id = map_gene_id(dict_protein_gene_id, map_gene_ids) if verbose: print("parsing association enzrxns") enzrxns_parser(enzrxns_file, padmet, mapped_dict_protein_gene_id, source, verbose) if metabolic_reactions is not None: if verbose: print("enhancing db from metabolic-reactions.xml") padmet = enhance_db(metabolic_reactions, padmet, extract_gene, verbose) for rlt in list_of_relation: try: padmet.dicOfRelationIn[rlt.id_in].append(rlt) except KeyError: padmet.dicOfRelationIn[rlt.id_in] = [rlt] try: padmet.dicOfRelationOut[rlt.id_out].append(rlt) except KeyError: padmet.dicOfRelationOut[rlt.id_out] = [rlt] if extract_gene and no_orphan: all_reactions = [node for node in list(padmet.dicOfNode.values()) if node.type == "reaction"] rxn_to_del = [r for r in all_reactions if not any([rlt for rlt in padmet.dicOfRelationIn[r.id] if rlt.type == "is_linked_to"])] for rxn in rxn_to_del: padmet.delNode(rxn.id) if verbose: print("%s/%s orphan reactions (without gene association) deleted" %(len(rxn_to_del), len(all_reactions))) all_genes_linked = set([rlt.id_out for rlt in padmet.getAllRelation() if rlt.type == "is_linked_to"]) all_genes = set([node.id for node in list(padmet.dicOfNode.values()) if node.type == "gene"]) count = 0 for gene_id in [g for g in all_genes if g not in all_genes_linked]: count += 1 #if verbose: print("Removing gene without gene assoc %s" %gene_id) padmet.dicOfNode.pop(gene_id) if verbose: print("%s/%s orphan genes (not linked to any reactions) deleted" %(count, len(all_genes))) rxns = [node.id for node in list(padmet.dicOfNode.values()) if node.type == "reaction"] for rxn_id in rxns: cp_rlts = set([rlt.type for rlt in padmet.dicOfRelationIn[rxn_id] if rlt.type in ["consumes","produces"]]) if len(cp_rlts) == 1: print("rxn only consume or produce, transport ???: %s" %rxn_id) padmet.delNode(rxn_id) return padmet
parser.add_argument("-s", "--species", help="padmet species", required=True) parser.add_argument("-p", "--pathway", help="pathway name", required=True) parser.add_argument("-o", "--output", help="SBML output filename", required=False) args = parser.parse_args() if args.output: outfile = args.output else: outfile = args.pathway.lower() + ".sbml" p_ref = PadmetRef(args.reference) #reads reference padmet p_spec = PadmetSpec(args.species) #reads organism padmet rxn_list = [ rlt.id_in for rlt in p_ref.dicOfRelationOut[args.pathway] if rlt.type == "is_in_pathway" and p_ref.dicOfNode[rlt.id_in].type == "reaction" ] for rxn_id in rxn_list: print(rxn_id) p_spec.copyNode(p_ref, rxn_id) padmet_to_sbml(p_spec, outfile, sbml_lvl=2, verbose=True)