def extract_rxn_with_gene_assoc(sbml, output, verbose=False): """ From a given sbml document, create a sbml with only the reactions associated to a gene. Need for a reaction, in section 'note', 'GENE_ASSOCIATION': .... Parameters ---------- sbml_file: libsbml.document sbml document output: str pathname of the output sbml """ reader = libsbml.SBMLReader() sbml_document = reader.readSBML(sbml) for i in range(sbml_document.getNumErrors()): print(sbml_document.getError(i).getMessage()) sbml_model = sbml_document.getModel() listOfReactions = sbml_model.getListOfReactions() reactions_to_remove = [] for reaction in listOfReactions: if "GENE_ASSOCIATION" not in list(parseNotes(reaction).keys()): reactions_to_remove.append(reaction.getId()) for rId in reactions_to_remove: listOfReactions.remove(rId) libsbml.writeSBMLToFile(sbml_document, output)
def extract_data_sbml(sbml_filepath): reader = libsbml.SBMLReader() document = reader.readSBML(sbml_filepath) model = document.getModel() compounds = model.getListOfSpecies() reactions = model.getListOfReactions() genes = [] for reactionSBML in reactions: notes = sbmlPlugin.parseNotes(reactionSBML) if "GENE_ASSOCIATION" in list(notes.keys()): # Using sbmlPlugin to recover all genes associated to the reaction for gene in sbmlPlugin.parseGeneAssoc( notes["GENE_ASSOCIATION"][0]): if gene not in genes: genes.append(gene) id_compounds = [ sbmlPlugin.convert_from_coded_id(compound.id)[0] for compound in compounds ] id_reactions = [ sbmlPlugin.convert_from_coded_id(reaction.id)[0] for reaction in reactions ] return genes, id_compounds, id_reactions
def check_ids(model_metabolic, model_faa, cutoff, verbose=False): """ check if genes ids of model_metabolic = model_faa for a given cutoff faa genes ids are in the first line of each sequence: >GENE_ID .... metabolic netowkrs genes ids are in note section, GENE_ASSOCIATION: gene_id-1 or gene_id-2 Parameters ---------- model_metabolic: str path to sbml file model_faa: str path to fasta faa file cutoff: int cutoff genes ids from model found in faa verbose: bool verbose Returns ------- bool True if same ids, if verbose, print % of genes under cutoff """ reader = libsbml.SBMLReader() document = reader.readSBML(model_metabolic) model = document.getModel() document.getNumErrors() listOfReactions = model.getListOfReactions() #convert to set model_metabolic_ids = set(itertools.chain.from_iterable([sp.parseGeneAssoc(geneAssoc) for geneAssoc in (sp.parseNotes(r).get("GENE_ASSOCIATION",[None])[0] for r in listOfReactions) if geneAssoc is not None])) with open(model_faa, "r") as f: model_faa_ids = set([record.id for record in SeqIO.parse(f, "fasta")]) diff_genes = model_metabolic_ids.difference(model_faa_ids) try: diff_genes_ratio = float(len(diff_genes))/float(len(model_metabolic_ids)) except ZeroDivisionError: raise SystemExit("No genes found in model metabolic") #if all model_metabolic_ids are in model_faa_ids if diff_genes_ratio == 0: if verbose: print("all genes of the model_metabolic are in the model_faa") return True #if not check if the nb is sup-equal to the cutoff elif diff_genes_ratio <= float(1-cutoff): if verbose: print("Only %.2f%% genes of the model_metabolic are not in the model_faa" % (diff_genes_ratio*100)) return True else: if verbose: print("%s%% genes of the model_metabolic are not in the model_faa" % (diff_genes_ratio*100)) print(";".join(diff_genes)) return False
def dict_data_to_sbml(dict_data, dict_orthogroups=None, dict_orthologues=None, strict_match=True): """ Use a dict of data dict_data and dict of orthogroups dict_orthogroup to create sbml files. dict_data and dict_orthogroup are obtained with fun orthofinder_to_sbml 1./ Read dict_orthogroups and check if model associated to dict_data and study org share orthologue 2./ Read sbml of model, parse all reactions and get genes associated to reaction. 3./ For each reactions: Parse genes associated to sub part (ex: (gene-a and gene-b) or gene-c) = [(gene-a,gene-b), gene-c] Check if study org have orthologue with at least one sub part (gene-a, gene-b) or gene-c if yes: add the reaction to the new sbml and change genes ids by study org genes ids 4./ Create the new sbml file. Parameters ---------- dict_data: dict {'study_id': study_id, 'model_id' : model_id, 'sbml_template': path to sbml of model', 'output': path to the output sbml, 'verbose': bool, if true print information } dict_orthogroup: dict k=orthogroup_id, v = {k = name, v = set of genes} verbose: bool if True print information """ #dict_data = {'study_name':'', 'o_compare_name': '', sbml_template':'', 'output':''} study_id = dict_data['study_id'] model_id = dict_data['model_id'] sbml_template = dict_data['sbml_template'] output = dict_data['output'] verbose = dict_data.get('verbose') if dict_orthogroups: if verbose: print( "*Extracting orthogroups data to create sbml of {0} from {1}". format(study_id, model_id)) #k = gene_id from to_compare, v = list of genes id of study sub_dict_orth = {} for k in dict_orthogroups.values(): try: all_to_compare_genes = k[model_id] all_study_genes = k[study_id] for to_compare_gene in all_to_compare_genes: try: sub_dict_orth[to_compare_gene].update(all_study_genes) except KeyError: sub_dict_orth[to_compare_gene] = set(all_study_genes) except KeyError: pass if not sub_dict_orth: if verbose: print("\t{0} and {1} don't share any ortholgue".format( study_id, model_id)) return elif dict_orthologues: if verbose: print( "*Extracting orthologues data to create sbml of {0} from {1}". format(study_id, model_id)) #k = gene_id from to_compare, v = list of genes id of study sub_dict_orth = {} for gene_id, gene_dict in dict_orthologues[model_id].items(): try: sub_dict_orth[gene_id] = gene_dict[study_id] except KeyError: pass if not sub_dict_orth: if verbose: print("\t{0} and {1} don't share any ortholgue".format( study_id, model_id)) return else: ValueError("Must give one dict of orthogroups or orthologue") reader = libsbml.SBMLReader() document_to_compare = reader.readSBML(sbml_template) for i in range(document_to_compare.getNumErrors()): print(document_to_compare.getError(i).getMessage()) model_to_compare = document_to_compare.getModel() listOfReactions_with_genes = [ rxn for rxn in model_to_compare.getListOfReactions() if sp.parseNotes(rxn).get("GENE_ASSOCIATION", [None])[0] ] if verbose: print("\tSbml of {0} contains {1}/{2} reactions with genes assocation". format(model_id, len(listOfReactions_with_genes), len(model_to_compare.getListOfReactions()))) dict_rxn_ga = {} for rxn in listOfReactions_with_genes: ga = sp.parseNotes(rxn)['GENE_ASSOCIATION'][0] ga_for_gbr = re.sub(r" or ", "|", ga) ga_for_gbr = re.sub(r" and ", "&", ga_for_gbr) ga_for_gbr = re.sub(r"\s", "", ga_for_gbr) if re.findall("\||&", ga_for_gbr): to_compare_ga_subsets = list(gbr.compile_input(ga_for_gbr)) else: ga_for_gbr = re.sub(r"\(|\)", "", ga_for_gbr) to_compare_ga_subsets = [[ga_for_gbr]] study_ga_subsets = [] """ to_compare_ga_subsets = [('a','c','d'),('c',)] sub_dict_orth = {'a':['a_a'],'c':['c_c'], 'd':['d_d']} """ for to_compare_subset in to_compare_ga_subsets: study_subset = set() for gene in to_compare_subset: if gene in list(sub_dict_orth.keys()): study_subset.update(sub_dict_orth[gene]) else: study_subset = set() break if study_subset: """ if verbose: print("\t\t{0} == {1}".format(tuple(to_compare_subset), tuple(study_subset))) """ study_ga_subsets.append(study_subset) if study_ga_subsets: study_ga = " or ".join([ "(" + " and ".join(subset) + ")" for subset in study_ga_subsets ]) if verbose: print("\t\tAdding %s" % rxn.id) print("\t\tGENE_ASSOCIATION: %s" % (study_ga)) dict_rxn_ga[rxn.id] = study_ga if not dict_rxn_ga: if verbose: print( "\tNo reaction added from {0} to {1} because of missing orthologues" .format(model_id, study_id)) return rxn_id_to_remove = set([ rxn.id for rxn in model_to_compare.getListOfReactions() ]).difference(list(dict_rxn_ga.keys())) if verbose: print("\tRemoving %s unused reactions" % len(rxn_id_to_remove)) [model_to_compare.removeReaction(rxn_id) for rxn_id in rxn_id_to_remove] cpd_id_to_preserve = set() for rxn_id, study_ga in list(dict_rxn_ga.items()): rxn = model_to_compare.getElementBySId(rxn_id) #update notes notes_in_dict = sp.parseNotes(rxn) notes_in_dict["GENE_ASSOCIATION"] = [study_ga] notes = "<body xmlns=\"http://www.w3.org/1999/xhtml\">" for k, v_list in list(notes_in_dict.items()): for v in v_list: notes += "<p>" + k + ": " + v + "</p>" notes += "</body>" rxn.setNotes(notes) cpd_in_rxn = set([p.getSpecies() for p in rxn.getListOfProducts()]).union(\ set([r.getSpecies() for r in rxn.getListOfReactants()])) cpd_id_to_preserve.update(cpd_in_rxn) all_species = [cpd.id for cpd in model_to_compare.getListOfSpecies()] [ model_to_compare.removeSpecies(cpd_id) for cpd_id in all_species if cpd_id not in cpd_id_to_preserve ] new_id = os.path.basename(os.path.splitext(output)[0]) model_to_compare.setId(new_id) libsbml.writeSBMLToFile(document_to_compare, output)
def sbml_to_curation(sbml_file, rxn_list, output, extract_gene=False, comment="N.A", verbose=False): """ Read a sbml file, check if each reaction ids are in the sbml, if no, raise ValueError Then create the form. this form can then be used with manual_curation.py Parameters ---------- sbml_file: str path to sbml file rxn_list: list list of reaction id, ids must be identic as in the sbml, carrefull to encoded ids. output: str path to the form to create extract_gene: bool if true extract genes association comment: str Comment why the reaction will be added in the network for traceability. verbose: bool if True print information """ if not os.path.exists(sbml_file): raise FileNotFoundError( "No SBML file (--sbml/sbml_file) accessible at " + sbml_file) reader = libsbml.SBMLReader() document = reader.readSBML(sbml_file) for i in range(document.getNumErrors()): print(document.getError(i).getMessage()) model = document.getModel() listOfReactions = model.getListOfReactions() #check if reactions id are in model. if verbose: print("Check if reaction(s) are in sbml file") for rxn_id in rxn_list: if rxn_id in [r.id for r in listOfReactions]: if verbose: print("reaction %s found" % rxn_id) else: raise ValueError("/!\ reaction %s not found" % rxn_id) #create form output with open(output, 'w') as f: for rxn_id in rxn_list: rxn_sbml = listOfReactions.getElementBySId(rxn_id) rxn_id_decoded = convert_from_coded_id(rxn_id)[0] if verbose: print("extracting reaction %s, decoded id as %s" % (rxn_id, rxn_id_decoded)) line = ["reaction_id", rxn_id_decoded] line = "\t".join(line) + "\n" f.write(line) line = ["comment", comment] line = "\t".join(line) + "\n" f.write(line) if rxn_sbml.reversible: line = ["reversible", "true"] else: line = ["reversible", "false"] line = "\t".join(line) + "\n" f.write(line) #check if have gene assoc if extract_gene: try: gene_assoc = parseNotes(rxn_sbml)["GENE_ASSOCIATION"][0] line = ["linked_gene", gene_assoc] except KeyError: line = ["linked_gene", ""] else: line = ["linked_gene", ""] line = "\t".join(line) + "\n" f.write(line) line = ["#reactant/product", "#stoichio:compound_id:compart"] line = "\t".join(line) + "\n" f.write(line) reactants = rxn_sbml.getListOfReactants() products = rxn_sbml.getListOfProducts() for reactant in reactants: stoich = str(abs(reactant.getStoichiometry())) reactant_id, x, compart = convert_from_coded_id( reactant.getSpecies()) line = ":".join([stoich, reactant_id, compart]) line = "reactant" + "\t" + line + "\n" f.write(line) for product in products: stoich = str(abs(product.getStoichiometry())) product_id, x, compart = convert_from_coded_id( product.getSpecies()) line = ":".join([stoich, product_id, compart]) line = "product" + "\t" + line + "\n" f.write(line) f.write("\n")
def enhance_db(metabolic_reactions, padmet, with_genes, verbose = False): """ Parse sbml metabolic_reactions and add reactions in padmet if with_genes: add also genes information Parameters ---------- metabolic_reactions: str path to sbml metabolic-reactions.xml padmet: padmet.PadmetRef padmet instance with_genes: bool if true alos add genes information. Returns ------- padmet.padmetRef: padmet instance with pgdb within pgdb + metabolic-reactions.xml data """ print("loading sbml file: %s" %metabolic_reactions) reader = libsbml.SBMLReader() document = reader.readSBML(metabolic_reactions) for i in range(document.getNumErrors()): print(document.getError(i).getMessage()) model = document.getModel() listOfReactions = model.getListOfReactions() #recovere the reactions that are not in the basic metacyc but in the sbml file #use the reactions_name instead of ids because the ids are encoded, the name is the non-encoded version of the id padmet_reactions_id = set([node.id for node in list(padmet.dicOfNode.values()) if node.type == "reaction"]) reaction_to_add = [reaction for reaction in listOfReactions if reaction.getName() not in padmet_reactions_id] count = 0 if verbose: print(str(len(reaction_to_add))+" reactions to add") for reactionSBML in reaction_to_add: count += 1 reaction_id = reactionSBML.getName() if verbose: print(str(count)+"/"+str(len(reaction_to_add))+"\t"+reaction_id) if reactionSBML.getReversible(): reaction_dir = "REVERSIBLE" else: reaction_dir = "LEFT-TO-RIGHT" try: reaction_node = padmet.dicOfNode[reaction_id] except KeyError: reaction_node = Node("reaction", reaction_id, {"DIRECTION": [reaction_dir]}) padmet.dicOfNode[reaction_id] = reaction_node reactants = reactionSBML.getListOfReactants() for reactant in reactants: #convert ids reactant_id, _type, reactant_compart = sbmlPlugin.convert_from_coded_id(reactant.getSpecies()) if reactant_id not in list(padmet.dicOfNode.keys()): reactant_node = Node("compound",reactant_id) padmet.dicOfNode[reaction_id] = reactant_node reactant_stoich = reactant.getStoichiometry() consumes_rlt = Relation(reaction_id,"consumes",reactant_id, {"STOICHIOMETRY":[reactant_stoich], "COMPARTMENT": [reactant_compart]}) list_of_relation.append(consumes_rlt) products = reactionSBML.getListOfProducts() for product in products: product_id, _type, product_compart = sbmlPlugin.convert_from_coded_id(product.getSpecies()) if product_id not in list(padmet.dicOfNode.keys()): product_node = Node("compound",product_id) padmet.dicOfNode[product_id] = product_node product_stoich = product.getStoichiometry() produces_rlt = Relation(reaction_id,"produces",product_id,{"STOICHIOMETRY": [product_stoich], "COMPARTMENT": [product_compart]}) list_of_relation.append(produces_rlt) if with_genes: notes = sbmlPlugin.parseNotes(reactionSBML) if "GENE_ASSOCIATION" in list(notes.keys()): #Using sbmlPlugin to recover all genes associated to the reaction listOfGenes = sbmlPlugin.parseGeneAssoc(notes["GENE_ASSOCIATION"][0]) if len(listOfGenes) != 0: for gene in listOfGenes: try: #check if gene already in the padmet padmet.dicOfNode[gene] except TypeError: gene_node = Node("gene",gene) padmet.dicOfNode[gene] = gene_node is_linked_rlt = Relation(reaction_id, "is_linked_to", gene) list_of_relation.append(is_linked_rlt) return padmet