def extract_data_sbml(sbml_filepath): reader = libsbml.SBMLReader() document = reader.readSBML(sbml_filepath) model = document.getModel() compounds = model.getListOfSpecies() reactions = model.getListOfReactions() genes = [] for reactionSBML in reactions: notes = sbmlPlugin.parseNotes(reactionSBML) if "GENE_ASSOCIATION" in list(notes.keys()): # Using sbmlPlugin to recover all genes associated to the reaction for gene in sbmlPlugin.parseGeneAssoc( notes["GENE_ASSOCIATION"][0]): if gene not in genes: genes.append(gene) id_compounds = [ sbmlPlugin.convert_from_coded_id(compound.id)[0] for compound in compounds ] id_reactions = [ sbmlPlugin.convert_from_coded_id(reaction.id)[0] for reaction in reactions ] return genes, id_compounds, id_reactions
def check_ids(model_metabolic, model_faa, cutoff, verbose=False): """ check if genes ids of model_metabolic = model_faa for a given cutoff faa genes ids are in the first line of each sequence: >GENE_ID .... metabolic netowkrs genes ids are in note section, GENE_ASSOCIATION: gene_id-1 or gene_id-2 Parameters ---------- model_metabolic: str path to sbml file model_faa: str path to fasta faa file cutoff: int cutoff genes ids from model found in faa verbose: bool verbose Returns ------- bool True if same ids, if verbose, print % of genes under cutoff """ reader = libsbml.SBMLReader() document = reader.readSBML(model_metabolic) model = document.getModel() document.getNumErrors() listOfReactions = model.getListOfReactions() #convert to set model_metabolic_ids = set(itertools.chain.from_iterable([sp.parseGeneAssoc(geneAssoc) for geneAssoc in (sp.parseNotes(r).get("GENE_ASSOCIATION",[None])[0] for r in listOfReactions) if geneAssoc is not None])) with open(model_faa, "r") as f: model_faa_ids = set([record.id for record in SeqIO.parse(f, "fasta")]) diff_genes = model_metabolic_ids.difference(model_faa_ids) try: diff_genes_ratio = float(len(diff_genes))/float(len(model_metabolic_ids)) except ZeroDivisionError: raise SystemExit("No genes found in model metabolic") #if all model_metabolic_ids are in model_faa_ids if diff_genes_ratio == 0: if verbose: print("all genes of the model_metabolic are in the model_faa") return True #if not check if the nb is sup-equal to the cutoff elif diff_genes_ratio <= float(1-cutoff): if verbose: print("Only %.2f%% genes of the model_metabolic are not in the model_faa" % (diff_genes_ratio*100)) return True else: if verbose: print("%s%% genes of the model_metabolic are not in the model_faa" % (diff_genes_ratio*100)) print(";".join(diff_genes)) return False
def create_sbml_stat(species_name, sbml_file): """Extract reactions/pathways/compounds/genes from a sbml file. Args: species_name (str): species names sbml_file (str): path to a sbml file Returns list: [species name, list of genes, list of reactions, list of reactions associated with genes, list of compounds] """ tree = etree.parse(sbml_file) sbml = tree.getroot() genes = [] reactions = [] gene_associated_rxns = [] compounds = [] for e in sbml: if e.tag[0] == "{": uri, tag = e.tag[1:].split("}") else: tag = e.tag if tag == "model": model_element = e for els in model_element: if 'listOfSpecies' in els.tag: for el in els: compounds.append( sbmlPlugin.convert_from_coded_id(el.get('metaid'))[0]) if 'listOfReactions' in els.tag: for el in els: reaction_id = sbmlPlugin.convert_from_coded_id(el.get('id'))[0] reactions.append(reaction_id) for subel in el.getchildren(): if 'notes' in subel.tag: for subsubel in subel.getchildren(): for subsubsubel in subsubel.getchildren(): if 'GENE_ASSOCIATION' in subsubsubel.text: for gene in sbmlPlugin.parseGeneAssoc( subsubsubel.text): genes.append( gene.replace( 'GENE_ASSOCIATION:', '')) if reaction_id not in gene_associated_rxns: gene_associated_rxns.append( reaction_id) return [species_name, genes, reactions, gene_associated_rxns, compounds]
def add_delete_rxn(data_file, padmetSpec, output, padmetRef=None, source=None, tool=None, category="MANUAL", verbose=False): """ Read a data_file (form created with template_add_delete and filed), for each reaction if column 'Action' == 'add': add the reaction from padmetRef to padmetSpec. elif column 'Action' == 'delete': remove the reaction Can't add a reaction without a padmetRef ! the source ensure the traceability of the reaction, its a simple tag ex 'pathway_XX_update' if not given the filename of data_file will be used. if a tool was used to infer the reaction, define tool='name_of_the_tool' Parameters ---------- data_file: str path to file based on template_new_rxn() padmetSpec: padmet.classes.PadmetSpec padmet to update padmetRef: padmet.classes.PadmetRef padmet containing the database of reference output: str path to the new padmet file source: str tag associated to the new reactions to create and add, used for traceability tool: str The eventual tool used to infer the reactions to create and add category: str The default category of the reaction added manually is 'MANUAL'. Must not be changed. verbose: bool if True print information """ if not source: filename = os.path.splitext(os.path.basename(data_file))[0] source = filename source = source.upper() if tool: tool = tool.upper() if not category: category = "MANUAL" with open(data_file, 'r') as csvfile: dialect = csv.Sniffer().sniff(csvfile.read()) csvfile.seek(0) reader = csv.reader(csvfile, dialect) file_name = os.path.basename(data_file) file_name = os.path.splitext(file_name)[0] reader = csv.DictReader(csvfile, delimiter=dialect.delimiter) for row in reader: element_id, comment, action, genes_assoc = row["idRef"], row["Comment"], row["Action"], row.get("Genes", None) if action.upper() == "ADD": if padmetRef is None: if verbose: print("No given padmetRef, unable to copy %s" %element_id) else: if verbose: print("Adding: %s" %(element_id)) padmetSpec.copyNode(padmetRef, element_id) #reconstructionData: if tool: reconstructionData_id = element_id+"_reconstructionData_"+tool reconstructionData = {"SOURCE": [source], "CATEGORY":[category], "TOOL":[tool], "COMMENT":[comment]} if reconstructionData_id in list(padmetSpec.dicOfNode.keys()) and verbose: print("Warning: The reaction %s seems to be already added from the same source %s" %(element_id, tool)) else: reconstructionData_id = element_id+"_reconstructionData_MANUAL" reconstructionData = {"SOURCE": [source], "CATEGORY":["MANUAL"], "COMMENT":[comment]} if reconstructionData_id in list(padmetSpec.dicOfNode.keys()) and verbose: print("Warning: The reaction %s seems to be already added from the same source 'MANUAL'" %element_id) reconstructionData_rlt = Relation(element_id, "has_reconstructionData", reconstructionData_id) padmetSpec.createNode("reconstructionData", reconstructionData_id, reconstructionData, [reconstructionData_rlt]) if genes_assoc: #suppData: if tool: suppData_id = element_id+"_SuppData_"+tool if suppData_id in list(padmetSpec.dicOfNode.keys()) and verbose: print("Warning: The reaction %s seems to be already added from the same source %s" %(element_id, tool)) else: suppData_id = element_id+"_SuppData_MANUAL" if suppData_id in list(padmetSpec.dicOfNode.keys()) and verbose: print("Warning: The reaction %s seems to be already added from the same source 'MANUAL'" %element_id) suppData = {"GENE_ASSOCIATION":[genes_assoc]} #create the node suppData and the relation has_suppData suppData_rlt = Relation(element_id, "has_suppData", suppData_id) padmetSpec.createNode("suppData", suppData_id, suppData, [suppData_rlt]) all_genes = parseGeneAssoc(genes_assoc) nbGenes = len(all_genes) if verbose: print("%s is linked to %s genes" %(element_id, nbGenes)) for gene_id in all_genes: try: #check if gene already in the padmet padmetSpec.dicOfNode[gene_id] except KeyError: padmetSpec.createNode("gene", gene_id) #check if rxn already linked to gene x try: linked_rlt = [rlt for rlt in padmetSpec.dicOfRelationIn[element_id] if rlt.type == "is_linked_to" and rlt.id_out == gene_id][0] #rxn already linked to gene x, update misc try: linked_rlt.misc["SOURCE:ASSIGNMENT"].append(source) except KeyError: linked_rlt.misc["SOURCE:ASSIGNMENT"] = [source] #rxn not linked to gene x except IndexError: linked_rlt = Relation(element_id, "is_linked_to", gene_id, {"SOURCE:ASSIGNMENT":[source]}) padmetSpec._addRelation(linked_rlt) elif action.upper() == "DELETE": if verbose: print("deleting: %s" %(element_id)) padmetSpec.delNode(element_id) elif action == "": print("Nothing to do for: %s" %(element_id)) else: print("Action: %s unknown for %s" %(action, element_id)) print("action must be = 'add' or 'delete' or ''") exit() padmetSpec.generateFile(output)
def rxn_creator(data_file, padmetSpec, output, padmetRef=None, source=None, tool=None, category="MANUAL", verbose=False): """ Read a data_file (form created with template_new_rxn and filed), for each reaction to create, add the reaction in padmetSpec (only if the id of the reaction is not already in padmetSpec or in padmetRef if given) the source ensure the traceability of the reaction, its a simple tag ex 'pathway_XX_update' if not given the filename of data_file will be used. if a tool was used to infer the reaction, define tool='name_of_the_tool' the Padmet of reference padmetRef can be used to check that the reaction id is not already in the database and copy information from the database for existing compounds strongly recommended to give a padmetRef. Parameters ---------- data_file: str path to file based on template_new_rxn() padmetSpec: padmet.classes.PadmetSpec padmet to update output: str path to the new padmet file source: str tag associated to the new reactions to create and add, used for traceability tool: str The eventual tool used to infer the reactions to create and add category: str The default category of the reaction added manually is 'MANUAL'. Must not be changed. padmetRef: padmet.classes.PadmetRef padmet containing the database of reference verbose: bool if True print information """ if not source: filename = os.path.splitext(os.path.basename(data_file))[0] source = filename source = source.upper() if tool: tool = tool.upper() if not category: category = "MANUAL" dict_data = {} with open(data_file, 'r') as f: all_read = f.read() sep = csv.Sniffer().sniff(all_read).delimiter data = (line for line in all_read.splitlines() if len(line) != 0 and not line.startswith("#")) for line in data: #if len of value is 0 then TypeError raised try: attrib, value = line.split(sep) except TypeError: continue attrib = attrib.replace(" ", "") if attrib == "reaction_id": current_id = value dict_data[current_id] = {} else: try: dict_data[current_id][attrib] .append(value) except KeyError: dict_data[current_id][attrib] = [value] if verbose: print("%s reactions to add" %len(list(dict_data.keys()))) for reaction_id, reaction_data in dict_data.items(): if verbose: print("check if the id %s is already used" %reaction_id) if reaction_id in list(padmetSpec.dicOfNode.keys()): print("the id : %s is already associated to an other reaction in padmetSpec, choose an other" %reaction_id) continue if padmetRef is not None and reaction_id in list(padmetRef.dicOfNode.keys()): print("the id : %s is already associated to an other reaction in padmetRef, choose an other" %reaction_id) continue if verbose: print("Adding reaction %s" %reaction_id) reaction_rev = reaction_data["reversible"][0].lower() if reaction_rev.upper() == "TRUE": reaction_rev = "REVERSIBLE" elif reaction_rev.upper() == "FALSE": reaction_rev = "LEFT-TO-RIGHT" else: print("Please choose a value in ['true','false'] for the reversibility of the reaction: %s" %reaction_id) continue comment = reaction_data["comment"] node_misc = {"DIRECTION":[reaction_rev]} padmetSpec.createNode("reaction", reaction_id, node_misc) #reconstructionData: if tool: reconstructionData_id = reaction_id+"_reconstructionData_"+tool reconstructionData = {"SOURCE": [source], "CATEGORY":[category], "TOOL":[tool], "COMMENT":comment} if reconstructionData_id in list(padmetSpec.dicOfNode.keys()) and verbose: print("Warning: The reaction %s seems to be already added from the same source %s" %(reaction_id, tool)) else: reconstructionData_id = reaction_id+"_reconstructionData_MANUAL" reconstructionData = {"SOURCE": [source], "CATEGORY":["MANUAL"], "COMMENT":comment} if reconstructionData_id in list(padmetSpec.dicOfNode.keys()) and verbose: print("Warning: The reaction %s seems to be already added from the same source 'MANUAL'" %reaction_id) reconstructionData_rlt = Relation(reaction_id, "has_reconstructionData", reconstructionData_id) padmetSpec.createNode("reconstructionData", reconstructionData_id, reconstructionData, [reconstructionData_rlt]) genes_assoc = reaction_data["linked_gene"][0] if genes_assoc: #suppData: if tool: suppData_id = reaction_id+"_SuppData_"+tool if suppData_id in list(padmetSpec.dicOfNode.keys()) and verbose: print("Warning: The reaction %s seems to be already added from the same source %s" %(reaction_id, tool)) else: suppData_id = reaction_id+"_SuppData_MANUAL" if suppData_id in list(padmetSpec.dicOfNode.keys()) and verbose: print("Warning: The reaction %s seems to be already added from the same source 'MANUAL'" %reaction_id) suppData = {"GENE_ASSOCIATION":[genes_assoc]} #create the node suppData and the relation has_suppData suppData_rlt = Relation(reaction_id, "has_suppData", suppData_id) padmetSpec.createNode("suppData", suppData_id, suppData, [suppData_rlt]) all_genes = parseGeneAssoc(genes_assoc) nbGenes = len(all_genes) if verbose: print("%s is linked to %s genes" %(reaction_id, nbGenes)) for gene_id in all_genes: try: #check if gene already in the padmet padmetSpec.dicOfNode[gene_id] except KeyError: padmetSpec.createNode("gene", gene_id) #check if rxn already linked to gene x try: linked_rlt = [rlt for rlt in padmetSpec.dicOfRelationIn[reaction_id] if rlt.type == "is_linked_to" and rlt.id_out == gene_id][0] #rxn already linked to gene x, update misc try: linked_rlt.misc["SOURCE:ASSIGNMENT"].append(source) except KeyError: linked_rlt.misc["SOURCE:ASSIGNMENT"] = [source] #rxn not linked to gene x except IndexError: linked_rlt = Relation(reaction_id, "is_linked_to", gene_id, {"SOURCE:ASSIGNMENT":[source]}) padmetSpec._addRelation(linked_rlt) if verbose: print("check if all metabolites are already in the network") try: for reactant_data in reaction_data["reactant"]: stoechio, metabo_id, compart = reactant_data.split(":") stoechio = stoechio.replace(",", ".") #in case comma for sep try: padmetSpec.dicOfNode[metabo_id] except KeyError: if verbose: print("%s not in the network" %metabo_id) try: if padmetRef is not None: if verbose: print("Try to copy from dbref") padmetSpec._copyNodeExtend(padmetRef, metabo_id) else: raise KeyError except KeyError: if padmetRef is not None and verbose: print("%s not in the padmetRef" %metabo_id) if verbose: print("creating a new compound") padmetSpec.createNode("compound", metabo_id) if verbose: print(("new compound created: id = %s" %metabo_id)) rlt = Relation(reaction_id, "consumes", metabo_id) rlt.misc.update({"STOICHIOMETRY":[stoechio], "COMPARTMENT":[compart]}) padmetSpec._addRelation(rlt) except KeyError: if verbose: print("No reactants defined") try: for product_data in reaction_data["product"]: stoechio, metabo_id, compart = product_data.split(":") stoechio = stoechio.replace(",", ".") #in case comma for sep try: padmetSpec.dicOfNode[metabo_id] except KeyError: if verbose: print("%s not in the network" %metabo_id) try: if padmetRef is not None: if verbose: print("Try to copy from dbref") padmetSpec._copyNodeExtend(padmetRef, metabo_id) else: raise KeyError except KeyError: if padmetRef is not None and verbose: print("%s not in the padmetRef" %metabo_id) if verbose: print("creating a new compound") padmetSpec.createNode("compound", metabo_id) print("new compound created: id = %s" % metabo_id) rlt = Relation(reaction_id, "produces", metabo_id) rlt.misc.update({"STOICHIOMETRY":[stoechio], "COMPARTMENT":[compart]}) padmetSpec._addRelation(rlt) except KeyError: if verbose: print("No products defined") if "pathway" in reaction_data.keys(): pathways = reaction_data["pathway"][0].split(";") for pwy_id in pathways: try: padmetSpec.dicOfNode[pwy_id] except KeyError: if verbose: print("%s not in the network" %pwy_id) if padmetRef is not None: if verbose: print("Check if new pathway %s is in dbref" %pwy_id) if pwy_id in padmetRef.dicOfNode.keys(): print("Warning the new pathway %s exist in the dbref, risk of overwritting data, change pwy id" %pwy_id) continue padmetSpec.createNode("pathway", pwy_id) if verbose: print(("new pathway created: id = %s" %pwy_id)) rlt = Relation(reaction_id, "is_in_pathway", pwy_id) padmetSpec._addRelation(rlt) if verbose: print("Creating output: %s" % output) padmetSpec.generateFile(output)
def create_sbml_stat(species_name, sbml_file): """Extract reactions/pathways/compounds/genes from a sbml file. Args: species_name (str): species names sbml_file (str): path to a sbml file Returns list: [species name, list of genes, list of reactions, list of reactions associated with genes, list of compounds] """ tree = etree.parse(sbml_file) sbml = tree.getroot() genes = [] reactions = [] gene_associated_rxns = [] fbc_gene_associated_rxns = [] fbc_rxn_associated_genes = [] compounds = [] for e in sbml: if e.tag[0] == "{": uri, tag = e.tag[1:].split("}") else: tag = e.tag if tag == "model": model_element = e for els in model_element: if 'listOfSpecies' in els.tag: for el in els: compounds.append(sbmlPlugin.convert_from_coded_id(el.get('metaid'))[0]) if 'listOfReactions' in els.tag: for el in els: reaction_id = sbmlPlugin.convert_from_coded_id(el.get('id'))[0] reactions.append(reaction_id) for subel in el.getchildren(): if 'notes' in subel.tag: for subsubel in subel.getchildren(): for subsubsubel in subsubel.getchildren(): if 'GENE_ASSOCIATION' in subsubsubel.text: for gene in sbmlPlugin.parseGeneAssoc(subsubsubel.text): if gene not in genes: genes.append(gene.replace('GENE_ASSOCIATION:', '')) if reaction_id not in gene_associated_rxns: gene_associated_rxns.append(reaction_id) # Use geneProductAssociation for xml from MetaFlux. elif 'geneProductAssociation' in subel.tag: for subsubel in subel.getchildren(): if 'geneProductRef' in subsubel.tag: gene = subsubel.get('{http://www.sbml.org/sbml/level3/version1/fbc/version2}geneProduct') if gene: gene = gene.replace('G_', '') if gene not in fbc_rxn_associated_genes: fbc_rxn_associated_genes.append(gene) if reaction_id not in fbc_gene_associated_rxns: fbc_gene_associated_rxns.append(reaction_id) else: for subsubsubel in subsubel.getchildren(): gene = subsubsubel.get('{http://www.sbml.org/sbml/level3/version1/fbc/version2}geneProduct') if gene: gene = gene.replace('G_', '') if gene not in fbc_rxn_associated_genes: fbc_rxn_associated_genes.append(gene) if reaction_id not in fbc_gene_associated_rxns: fbc_gene_associated_rxns.append(reaction_id) # For XML from MetaFlux, use genes from geneProductAssociation to get genes and reaction with genes. if len(genes) == 0: if len(fbc_rxn_associated_genes) > 0: genes = fbc_rxn_associated_genes if len(gene_associated_rxns) == 0: if len(fbc_gene_associated_rxns) > 0: gene_associated_rxns = fbc_gene_associated_rxns return [species_name, genes, reactions, gene_associated_rxns, compounds]
def enhance_db(metabolic_reactions, padmet, with_genes, verbose = False): """ Parse sbml metabolic_reactions and add reactions in padmet if with_genes: add also genes information Parameters ---------- metabolic_reactions: str path to sbml metabolic-reactions.xml padmet: padmet.PadmetRef padmet instance with_genes: bool if true alos add genes information. Returns ------- padmet.padmetRef: padmet instance with pgdb within pgdb + metabolic-reactions.xml data """ print("loading sbml file: %s" %metabolic_reactions) reader = libsbml.SBMLReader() document = reader.readSBML(metabolic_reactions) for i in range(document.getNumErrors()): print(document.getError(i).getMessage()) model = document.getModel() listOfReactions = model.getListOfReactions() #recovere the reactions that are not in the basic metacyc but in the sbml file #use the reactions_name instead of ids because the ids are encoded, the name is the non-encoded version of the id padmet_reactions_id = set([node.id for node in list(padmet.dicOfNode.values()) if node.type == "reaction"]) reaction_to_add = [reaction for reaction in listOfReactions if reaction.getName() not in padmet_reactions_id] count = 0 if verbose: print(str(len(reaction_to_add))+" reactions to add") for reactionSBML in reaction_to_add: count += 1 reaction_id = reactionSBML.getName() if verbose: print(str(count)+"/"+str(len(reaction_to_add))+"\t"+reaction_id) if reactionSBML.getReversible(): reaction_dir = "REVERSIBLE" else: reaction_dir = "LEFT-TO-RIGHT" try: reaction_node = padmet.dicOfNode[reaction_id] except KeyError: reaction_node = Node("reaction", reaction_id, {"DIRECTION": [reaction_dir]}) padmet.dicOfNode[reaction_id] = reaction_node reactants = reactionSBML.getListOfReactants() for reactant in reactants: #convert ids reactant_id, _type, reactant_compart = sbmlPlugin.convert_from_coded_id(reactant.getSpecies()) if reactant_id not in list(padmet.dicOfNode.keys()): reactant_node = Node("compound",reactant_id) padmet.dicOfNode[reaction_id] = reactant_node reactant_stoich = reactant.getStoichiometry() consumes_rlt = Relation(reaction_id,"consumes",reactant_id, {"STOICHIOMETRY":[reactant_stoich], "COMPARTMENT": [reactant_compart]}) list_of_relation.append(consumes_rlt) products = reactionSBML.getListOfProducts() for product in products: product_id, _type, product_compart = sbmlPlugin.convert_from_coded_id(product.getSpecies()) if product_id not in list(padmet.dicOfNode.keys()): product_node = Node("compound",product_id) padmet.dicOfNode[product_id] = product_node product_stoich = product.getStoichiometry() produces_rlt = Relation(reaction_id,"produces",product_id,{"STOICHIOMETRY": [product_stoich], "COMPARTMENT": [product_compart]}) list_of_relation.append(produces_rlt) if with_genes: notes = sbmlPlugin.parseNotes(reactionSBML) if "GENE_ASSOCIATION" in list(notes.keys()): #Using sbmlPlugin to recover all genes associated to the reaction listOfGenes = sbmlPlugin.parseGeneAssoc(notes["GENE_ASSOCIATION"][0]) if len(listOfGenes) != 0: for gene in listOfGenes: try: #check if gene already in the padmet padmet.dicOfNode[gene] except TypeError: gene_node = Node("gene",gene) padmet.dicOfNode[gene] = gene_node is_linked_rlt = Relation(reaction_id, "is_linked_to", gene) list_of_relation.append(is_linked_rlt) return padmet