def test_sbml_to_curation_form_cli(): subprocess.call([ 'padmet', 'pgdb_to_padmet', '--pgdb', 'test_data/pgdb', '--output', 'test.padmet', '--extract-gene' ]) subprocess.call([ 'padmet', 'sbmlGenerator', '--padmet', 'test.padmet', '--output', 'fabo.sbml', '--sbml_lvl', '3' ]) rxns = ['ACYLCOADEHYDROG-RXN', 'ACYLCOASYN-RXN', 'ENOYL-COA-HYDRAT-RXN'] id_reactions = [ 'R_' + sbmlPlugin.convert_to_coded_id(reaction) for reaction in rxns ] with open('reactions.txt', 'w') as tmp_file: for id_reaction in id_reactions: tmp_file.write(id_reaction + '\n') subprocess.call([ 'padmet', 'sbml_to_curation_form', '--sbml', 'fabo.sbml', '--output', 'form.txt', '--rxn_file', 'reactions.txt' ]) os.remove('test.padmet') os.remove('fabo.sbml') os.remove('reactions.txt') with open('form.txt', 'r') as form_file: form_str = form_file.read() for rxn in rxns: assert rxn in form_str os.remove('form.txt')
def add_ga(rId_encoded, all_ga_subsets): """ if list_ga len == 1: only 1 list of gene: if len of this list is 1: just add gene, else create OR structure else: create OR structure, then for each list of gene for each ga in list_ga: if len == 1: if the only ga len == 1: just add gene, else create OR structure elif len > 1: create AND structure, then for each GA if len GA == 1: just add gene, else create OR structure if no suppdata, if linked_genes: if len linked_genes == 1: just add gene, else create OR structure #TODO """ global all_ga ga_count = len(all_ga) + 1 ga = [' <geneAssociation id="ga_'+str(ga_count)+'" reaction="'+rId_encoded+'">'] if len(all_ga_subsets) == 1: uniqu_ga_list = list(all_ga_subsets[0]) if len(uniqu_ga_list) == 1: gene_id = sp.convert_to_coded_id(uniqu_ga_list[0]) ga.append('<gene reference="'+gene_id+'"/>') else: ga.append('<or>') for gene_id in uniqu_ga_list: gene_id = sp.convert_to_coded_id(gene_id) ga.append('<gene reference="'+gene_id+'"/>') ga.append('</or>') else: ga.append('<or>') for ga_list in all_ga_subsets: ga_list = list(ga_list) if len(ga_list) == 1: gene_id = sp.convert_to_coded_id(ga_list[0]) ga.append('<gene reference="'+gene_id+'"/>') else: ga.append('<and>') for gene_id in ga_list: gene_id = sp.convert_to_coded_id(gene_id) ga.append('<gene reference="'+gene_id+'"/>') ga.append('</and>') ga.append('</or>') ga.append('</geneAssociation>') all_ga.append(ga)
def compound_to_sbml(species_compart, output, verbose = False): """ convert a list of compounds to sbml format if compart_name is not None, then the compounds id will by: M_originalID_compart_name if verbose and specified padmetRef and/or padmetSpec: will check if compounds are in one of the padmet files Ids are encoded for sbml using functions sbmlPlugin.convert_to_coded_id Parameters ---------- species_file: str pathname to the file containing the compounds ids and the compart, line = cpd-id\tcompart. output: str pathname to the sbml file to create verbose: bool print informations """ if os.path.isfile(species_compart): with open(species_compart, 'r') as f: species_compart = [line.split("\t") for line in f.read().splitlines()] document = libsbml.SBMLDocument(2, 1) model = document.createModel() if verbose: print("%s species" %len(species_compart)) for data in species_compart: species_id = data[0] if len(data) == 1: compart = "c" else: compart = data[1] sId_encoded = sp.convert_to_coded_id(species_id,"M",compart) s = model.createSpecies() check(s, 'create species') check(s.setId(sId_encoded), 'set species id') check(s.setName(species_id), 'set species name') check(s.setCompartment(sp.convert_to_coded_id(compart)), 'set species compartment') libsbml.writeSBMLToFile(document, output)
def test_sbml_to_curation_form(): fabo_padmetSpec = from_pgdb_to_padmet('test_data/pgdb', extract_gene=True) padmet_to_sbml(fabo_padmetSpec, 'fabo.sbml') rxns = ['ACYLCOADEHYDROG-RXN', 'ACYLCOASYN-RXN', 'ENOYL-COA-HYDRAT-RXN'] id_reactions = [ 'R_' + sbmlPlugin.convert_to_coded_id(reaction) for reaction in rxns ] sbml_to_curation('fabo.sbml', id_reactions, 'form.txt') os.remove('fabo.sbml') with open('form.txt', 'r') as form_file: form_str = form_file.read() for rxn in rxns: assert rxn in form_str os.remove('form.txt')
def padmet_to_sbml(padmet, output, model_id = None, obj_fct = None, sbml_lvl = 3, mnx_chem_prop = None, mnx_chem_xref = None, verbose = False): """ Convert padmet file to sbml file. Specificity: - ids are encoded for sbml using functions sbmlPlugin.convert_to_coded_id Parameters ---------- padmet: str or padmet.classes.PadmetSpec/PadmetRef the pathname to the padmet file to convert, or PadmetSpec/PadmetRef object output: str the pathname to the sbml file to create model_id: str or None model id to set in sbml file obj_fct: str the identifier of the objection function, the reaction to test in FBA sbml_lvl: int the sbml level sbml_version: int the sbml version verbose: bool print informations """ global all_ga if isinstance(padmet, str): padmet = PadmetSpec(padmet) if not model_id: model_id = os.path.splitext(os.path.basename(output))[0] if sbml_lvl: sbml_lvl = int(sbml_lvl) else: sbml_lvl = 3 #dir_path_gbr = os.path.dirname(os.path.realpath(__file__))+"/grammar-boolean-rapsody.py" all_ga = [] #create an empty sbml model with_mnx = False if mnx_chem_prop and mnx_chem_xref: with_mnx = True dict_mnx_chem_xref = parse_mnx_chem_xref(mnx_chem_xref) dict_mnx_chem_prop = parse_mnx_chem_prop(mnx_chem_prop) if sbml_lvl == 2: sbmlns = libsbml.SBMLNamespaces(2,1) document = libsbml.SBMLDocument(sbmlns) model = document.createModel() association = None # Create a unit definition mmol_per_gDW_per_hr = model.createUnitDefinition() check(mmol_per_gDW_per_hr, 'create unit definition') check(mmol_per_gDW_per_hr.setId('mmol_per_gDW_per_hr'), 'set unit definition id') unit = mmol_per_gDW_per_hr.createUnit() check(unit, 'create mole unit') check(unit.setKind(libsbml.UNIT_KIND_MOLE), 'set unit kind') check(unit.setScale(-3), 'set unit scale') check(unit.setMultiplier(1), 'set unit multiplier') check(unit.setOffset(0), 'set unit offset') unit = mmol_per_gDW_per_hr.createUnit() check(unit, 'create gram unit') check(unit.setKind(libsbml.UNIT_KIND_GRAM), 'set unit kind') check(unit.setExponent(-1), 'set unit exponent') check(unit.setMultiplier(1), 'set unit multiplier') check(unit.setOffset(0), 'set unit offset') unit = mmol_per_gDW_per_hr.createUnit() check(unit, 'create second unit') check(unit.setKind(libsbml.UNIT_KIND_SECOND), 'set unit kind') check(unit.setExponent(-1), 'set unit exponent') check(unit.setMultiplier(0.00027777), 'set unit multiplier') check(unit.setOffset(0), 'set unit offset') elif sbml_lvl == 3: sbmlns = libsbml.SBMLNamespaces(3,1,"fbc",1) document = libsbml.SBMLDocument(sbmlns) document.setPackageRequired("fbc", False) model = document.createModel() mplugin = model.getPlugin("fbc") association = ['<annotation>', '<listOfGeneAssociations xmlns="http://www.sbml.org/sbml/level3/version1/fbc/version1">'] check(model, 'create model') check(model.setTimeUnits("second"), 'set model-wide time units') check(model.setExtentUnits("mole"), 'set model units of extent') check(model.setSubstanceUnits('mole'), 'set model substance units') if not model_id: model_id = os.path.splitext(os.path.basename(output))[0] model.setId(model_id) math_ast = libsbml.parseL3Formula('FLUX_VALUE') check(math_ast, 'create AST for rate expression') #generator of tuple: (x,y) x=species id,y=value of compart, if not defined="" species = [(rlt.id_out, rlt.misc.get("COMPARTMENT",[None])[0]) for rlt in padmet.getAllRelation() if rlt.type in ["consumes","produces"]] if verbose: print("%s species" %len(species)) #compart_dict: k = id_encoded, v = original id compart_dict = {} #species_dict: k = species_id_encoded, v = dict: k' = {species_id, compart, name}, v' = value or None species_dict = {} for species_id, compart in species: #encode id for sbml species_id_encoded = sp.convert_to_coded_id(species_id, "M", compart) #encode compart id for sbml #try to get the common_name, if non value return None name = padmet.dicOfNode[species_id].misc.get("COMMON-NAME",[species_id])[0] #update dicts species_dict[species_id_encoded] = {"species_id":species_id, "compart":compart, "name":name} for species_id_encoded, s_dict in species_dict.items(): compart = s_dict["compart"] name = s_dict["name"] original_id = s_dict["species_id"] s = model.createSpecies() check(s, 'create species') check(s.setId(species_id_encoded), 'set species id %s' %species_id_encoded) check(s.setMetaId(species_id_encoded), 'set species meta id %s' %species_id_encoded) check(s.setBoundaryCondition(False), 'set boundaryCondition to False') check(s.setHasOnlySubstanceUnits(False), 'set setHasOnlySubstanceUnits to False') check(s.setConstant(False), 'set setConstant to False') check(s.setInitialAmount(0.0), 'set initAmount') #check(s.setMetaId(metaId), 'set species MetaId %s' %metaId) if name is not None: check(s.setName(name), 'set species Name %s' %name) else: check(s.setName(name), 'set species Name %s' %species_id) if compart is not None: compart_encoded = sp.convert_to_coded_id(compart) compart_dict[compart_encoded] = compart check(s.setCompartment(compart_encoded), 'set species compartment %s' %compart_encoded) if compart == BOUNDARY_ID: check(s.setBoundaryCondition(True), 'set boundaryCondition to True') if with_mnx: try: mnx_id = dict_mnx_chem_xref[original_id] species_prop = dict(dict_mnx_chem_prop[mnx_id]) except (IndexError, KeyError) as e: #print(species_id) species_prop = None if species_prop: [species_prop.pop(k) for k,v in list(species_prop.items()) if (not v or v == "NA")] try: charge = int(species_prop["charge"]) except (ValueError, KeyError) as e: charge = 0 formula = species_prop.get("formula","") if re.findall("\(|\)|\.",formula): formula = None inchi = species_prop.get("inchi", None) if sbml_lvl == 3: splugin = s.getPlugin("fbc") check(splugin.setCharge(charge), 'set charge') if formula: check(splugin.setChemicalFormula(formula), 'set Formula') if inchi: annot_xml = create_annotation(inchi, species_id_encoded) check(s.setAnnotation(annot_xml), 'set Annotations') for prop, prop_v in list(species_prop.items()): if prop in ["charge", "formula", "source", "description","inchi"] or prop_v in ["NA",""]: species_prop.pop(prop) notes = create_note(species_prop) check(s.setNotes(notes), 'set Notes') for k, v in compart_dict.items(): compart = model.createCompartment() check(compart,'create compartment') check(compart.setId(k),'set compartment id %s' %k) check(compart.setSize(1),'set size for compartment id %s' %k) check(compart.setConstant(True),'set constant for compartment id %s' %k) if v == "c": check(compart.setName("cytosol"),'set compartment name cytosol') elif v == "e": check(compart.setName("extracellular"),'set compartment name extracellular') elif v == "p": check(compart.setName("periplasm"),'set compartment name periplasm') elif v != k: check(compart.setName(v),'set compartment id %s' %v) if obj_fct is not None: obj_fct_encoded = sp.convert_to_coded_id(obj_fct) if verbose: print("the objectif reaction is: %s" %(obj_fct_encoded)) reactions = [node for node in padmet.dicOfNode.values() if node.type == "reaction"] nb_reactions = str(len(reactions)) # Create reactions if verbose: print("%s reactions" %nb_reactions) for rNode in reactions: rId = rNode.id rId_encoded = sp.convert_to_coded_id(rId,"R") rName = rNode.misc.get("COMMON-NAME",[rId])[0] reaction = model.createReaction() check(reaction, 'create reaction') check(reaction.setId(rId_encoded), 'set reaction id %s' %rId_encoded) if rName is not None: check(reaction.setName(rName), 'set reaction name %s' %rName) check(reaction.setFast(False), 'set fast') #generator of tuple (reactant_id,stoichiometry,compart) consumed = ((rlt.id_out, rlt.misc["STOICHIOMETRY"][0], rlt.misc.get("COMPARTMENT",[None])[0]) for rlt in padmet.dicOfRelationIn.get(rId, None) if rlt.type == "consumes") #generator of tuple (product_id,stoichiometry,compart) produced = ((rlt.id_out, rlt.misc["STOICHIOMETRY"][0], rlt.misc.get("COMPARTMENT",[None])[0]) for rlt in padmet.dicOfRelationIn.get(rId, None) if rlt.type == "produces") #set reversibility direction = rNode.misc["DIRECTION"][0] if direction == "LEFT-TO-RIGHT": reversible = False else: reversible = True check(reaction.setReversible(reversible), 'set reaction reversibility flag %s' %reversible) if sbml_lvl == 3: bound= mplugin.createFluxBound() bound.setReaction(rId_encoded) bound.setOperation("lessEqual") bound.setValue(def_max_upper_bound) bound= mplugin.createFluxBound() bound.setReaction(rId_encoded) bound.setOperation("greaterEqual") if reversible: bound.setValue(def_max_lower_bound) else: bound.setValue(0) if rId == obj_fct: objective = mplugin.createObjective() objective.setId("obj1") objective.setType("maximize") mplugin.setActiveObjectiveId("obj1") fluxObjective = objective.createFluxObjective() fluxObjective.setReaction(rId_encoded) fluxObjective.setCoefficient(1) elif sbml_lvl == 2: kinetic_law = reaction.createKineticLaw() check(kinetic_law, 'create kinetic law') check(kinetic_law.setMath(math_ast), 'set math on kinetic law') #add parameter flux_value flux_value_k = kinetic_law.createParameter() check(flux_value_k, 'create parameter flux_value_k') check(flux_value_k.setId('FLUX_VALUE'), 'set parameter flux_value_k id') check(flux_value_k.setValue(0), 'set parameter flux_value_k value') check(flux_value_k.setUnits('mmol_per_gDW_per_hr'), 'set parameter flux_value_k units') #add parameter upper/lower_bound, lower value depend on reversibility upper_bound_k = kinetic_law.createParameter() check(upper_bound_k, 'create parameter upper_bound_k') check(upper_bound_k.setId('UPPER_BOUND'), 'set parameter upper_bound_k') check(upper_bound_k.setValue(def_max_upper_bound),'set parameter upper_bounp_k value') check(upper_bound_k.setUnits('mmol_per_gDW_per_hr'), 'set parameter uppper_bound_k units') if reversible: lower_bound_k = kinetic_law.createParameter() check(lower_bound_k, 'create parameter lower_bound_k') check(lower_bound_k.setId('LOWER_BOUND'), 'set parameter lower_bound_k id') check(lower_bound_k.setValue(def_max_lower_bound), 'set parameter lower_bound_k value') check(lower_bound_k.setUnits('mmol_per_gDW_per_hr'), 'set parameter lower_bound_k units') else: lower_bound_k = kinetic_law.createParameter() check(lower_bound_k, 'create parameter lower_bound_k') check(lower_bound_k.setId('LOWER_BOUND'), 'set parameter lower_bound_k id') check(lower_bound_k.setValue(0), 'set parameter lower_bound_k value') check(lower_bound_k.setUnits('mmol_per_gDW_per_hr'), 'set parameter lower_bound_k units') #objective_coeeficient if rId == obj_fct: obj_fct_k = kinetic_law.createParameter() check(obj_fct_k, 'create parameter obj_fct_k') check(obj_fct_k.setId('OBJECTIVE_COEFFICIENT'), 'set parameter obj_fct_k id') check(obj_fct_k.setValue(1), 'set parameter obj_fct_k value') else: obj_fct_k = kinetic_law.createParameter() check(obj_fct_k, 'create parameter obj_fct_k') check(obj_fct_k.setId('OBJECTIVE_COEFFICIENT'), 'set parameter obj_fct_k id') check(obj_fct_k.setValue(0), 'set parameter obj_fct_k value') for cId, stoich, compart in consumed: cId_encoded = sp.convert_to_coded_id(cId,"M",compart) try: stoich = float(stoich) #for case stoich = n except ValueError: stoich = float(1) species_ref = reaction.createReactant() check(species_ref, 'create reactant') check(species_ref.setSpecies(cId_encoded), 'assign reactant species %s' %cId_encoded) check(species_ref.setStoichiometry(stoich), 'set stoichiometry %s' %stoich) check(species_ref.setStoichiometry(stoich), 'set stoichiometry %s' %stoich) if sbml_lvl == 3: check(species_ref.setConstant(False), 'set constant %s' %False) for pId, stoich, compart in produced: pId_encoded = sp.convert_to_coded_id(pId,"M",compart) try: stoich = float(stoich) except ValueError: stoich = float(1) species_ref = reaction.createProduct() check(species_ref, 'create product') check(species_ref.setSpecies(pId_encoded), 'assign product species %s' %pId_encoded) check(species_ref.setStoichiometry(stoich), 'set stoichiometry %s' %stoich) if sbml_lvl == 3: check(species_ref.setConstant(False), 'set constant %s' %False) linked_genes = set([rlt.id_out for rlt in padmet.dicOfRelationIn.get(rId, []) if rlt.type == "is_linked_to"]) all_suppData = [padmet.dicOfNode[rlt.id_out] for rlt in padmet.dicOfRelationIn[rId] if rlt.type == "has_suppData"] #if rxn has suppdata, check in each suppData, if GENE_ASSOCIATION in misc #if run gbr.py to convert the gene assoc to a list of tuple representing the assoc #ex: #orignia_la: (a or b) and c => #ga_subsets: [(a,b),(c)] #add each ga in ga_subsets in all_ga_subsets #for each ga in all_ga_subsets: if len == 1: if the only ga len == 1: just add gene, else create OR structure #elif len > 1: create AND structure, then for each GA if len GA == 1: just add gene, else create OR structure #if no suppdata, if linked_genes: if len linked_genes == 1: just add gene, else create OR structure all_ga_subsets = list() if all_suppData: for suppData in all_suppData: try: original_ga = suppData.misc["GENE_ASSOCIATION"][0] ga_for_gbr = re.sub(r" or " , "|", original_ga) ga_for_gbr = re.sub(r" and " , "&", ga_for_gbr) ga_for_gbr = re.sub(r"\s" , "", ga_for_gbr) #ga_for_gbr = "\"" + ga_for_gbr + "\"" if re.findall("\||\&",ga_for_gbr) and len(re.findall("\||\&",ga_for_gbr)) < 100: ga_subsets = [] [ga_subsets.append(set(i)) for i in compile_input(ga_for_gbr)] for ga in ga_subsets: if ga not in all_ga_subsets: all_ga_subsets.append(ga) except KeyError: pass if all_ga_subsets: for gene_id in linked_genes: if not any([gene_id in ga for ga in all_ga_subsets]): all_ga_subsets.append([gene_id]) else: for gene_id in linked_genes: all_ga_subsets.append([gene_id]) if association: if all_ga_subsets: add_ga(rId_encoded, all_ga_subsets) elif linked_genes: add_ga(rId_encoded, all_ga_subsets) #set notes notes_dict = {} if linked_genes: notes_dict["GENE_ASSOCIATION"] = " or ".join(["("+" and ".join([i for i in g])+")" for g in all_ga_subsets]) try: categories = set([padmet.dicOfNode[rlt.id_out].misc["CATEGORY"][0] for rlt in padmet.dicOfRelationIn.get(rId,[]) if rlt.type == "has_reconstructionData"]) except KeyError: categories = None if categories: notes_dict["CATEGORIES"] = " and ".join(categories) pathways = set([rlt.id_out for rlt in padmet.dicOfRelationIn.get(rId, []) if rlt.type == "is_in_pathway"]) if len(pathways) != 0: notes_dict["SUBSYSTEM"] = " , ".join(pathways) if list(notes_dict.keys()): notes = create_note(notes_dict) check(reaction.setNotes(notes), 'set notes %s' %notes) if all_ga: for ga in all_ga: association.extend(ga) association.extend(['</listOfGeneAssociations>', '</annotation>']) association = " ".join(association) model.setAnnotation(association) if verbose: print("Done, creating sbml file: %s" %output) libsbml.writeSBMLToFile(document, output)