def __init__(self, id, name='', functional=True, root_dir=None, pdb_file_type='mmtf'): Gene.__init__(self, id=id, name=name, functional=functional) self.pdb_file_type = pdb_file_type # Create directories self._root_dir = None if root_dir: self.root_dir = root_dir self.protein = Protein(ident=id, root_dir=self.gene_dir, pdb_file_type=self.pdb_file_type)
def create_consisten_model(model,metamodel,consistent_reactions): consistent_model = Model() consistent_model.id = model.id consistent_model.description = model.id auxiliar_gene = Gene('MODULAR_GAPFILLING') auxiliar_gene._model = consistent_model consistent_model.genes.append(auxiliar_gene) for reaction_id in consistent_reactions: new_reaction = metamodel.reactions.get_by_id(reaction_id).copy() if reaction_id in model.reactions: reaction_reference = model.reactions.get_by_id(reaction_id) gene_list = [] for gene in reaction_reference.genes: if gene.id in consistent_model.genes: gene_list.append(consistent_model.genes.get_by_id(gene.id)) else: new_gene = Gene(gene.id) new_gene._model = consistent_model consistent_model.genes.append(new_gene) gene_list.append(new_gene) for gene in gene_list: gene._reaction.add(new_reaction) new_reaction._genes = gene_list new_reaction.gene_reaction_rule = reaction_reference.gene_reaction_rule else: new_reaction.gene_reaction_rule = auxiliar_gene.name auxiliar_gene._reaction.add(new_reaction) consistent_model.add_reaction(new_reaction) return consistent_model
def rename_genes(cobra_model, rename_dict): """renames genes in a model from the rename_dict""" recompute_reactions = set() # need to recomptue related genes remove_genes = [] for old_name, new_name in iteritems(rename_dict): # undefined if there a value matches a different key # because dict is unordered try: gene_index = cobra_model.genes.index(old_name) except ValueError: gene_index = None old_gene_present = gene_index is not None new_gene_present = new_name in cobra_model.genes if old_gene_present and new_gene_present: old_gene = cobra_model.genes.get_by_id(old_name) remove_genes.append(old_gene) recompute_reactions.update(old_gene._reaction) elif old_gene_present and not new_gene_present: # rename old gene to new gene gene = cobra_model.genes[gene_index] # trick DictList into updating index cobra_model.genes._dict.pop(gene.id) # ugh gene.id = new_name cobra_model.genes[gene_index] = gene elif not old_gene_present and new_gene_present: pass else: # not old gene_present and not new_gene_present # the new gene's _model will be set by repair cobra_model.genes.append(Gene(new_name)) cobra_model.repair() class Renamer(NodeTransformer): def visit_Name(self, node): node.id = rename_dict.get(node.id, node.id) return node gene_renamer = Renamer() for rxn, rule in iteritems(get_compiled_gene_reaction_rules(cobra_model)): if rule is not None: rxn._gene_reaction_rule = ast2str(gene_renamer.visit(rule)) for rxn in recompute_reactions: rxn.gene_reaction_rule = rxn._gene_reaction_rule for i in remove_genes: cobra_model.genes.remove(i)
def create_consisten_model(model, metamodel, consistent_reactions): consistent_model = Model() consistent_model.id = model.id consistent_model.description = model.id auxiliar_gene = Gene('MODULAR_GAPFILLING') auxiliar_gene._model = consistent_model consistent_model.genes.append(auxiliar_gene) for reaction_id in consistent_reactions: new_reaction = metamodel.reactions.get_by_id(reaction_id).copy() if reaction_id in model.reactions: reaction_reference = model.reactions.get_by_id(reaction_id) gene_list = [] for gene in reaction_reference.genes: if gene.id in consistent_model.genes: gene_list.append(consistent_model.genes.get_by_id(gene.id)) else: new_gene = Gene(gene.id) new_gene._model = consistent_model consistent_model.genes.append(new_gene) gene_list.append(new_gene) for gene in gene_list: gene._reaction.add(new_reaction) new_reaction._genes = gene_list new_reaction.gene_reaction_rule = reaction_reference.gene_reaction_rule else: new_reaction.gene_reaction_rule = auxiliar_gene.name auxiliar_gene._reaction.add(new_reaction) consistent_model.add_reaction(new_reaction) return consistent_model
def convert_kmodel(kmodel, media=None, exchanges=True, model_id="kbase"): model_test = cobra.Model(model_id) comps = {} mets = {} sink = set() reactions = [] extra = set() for mcomp in kmodel['modelcompartments']: mcomp_id = mcomp['id'] name = mcomp['label'] comps[mcomp_id] = name for mc in kmodel["modelcompounds"]: #print(mc.keys()) formula = None if not mc['formula'] == 'null': formula = mc['formula'] name = mc['name'] charge = get_int('charge', 0, mc) mc_id = mc['id'] annotation = {} if 'dblinks' in mc: annotation = get_cpd_annotation(mc['dblinks']) compartment = get_compartment_id(mc, simple=True) id = build_cpd_id(mc_id) if bigg: if "bigg.metabolite" in annotation: id = annotation["bigg.metabolite"] + "_" + compartment #print(id) if mc_id in SINK: logger.info('Add Sink: [%s]', mc_id) extra.add(mc_id) sink.add(mc_id) if compartment.startswith("e"): extra.add(mc_id) met = Metabolite(id=id, formula=formula, name=name, charge=charge, compartment=compartment) met.annotation[ SBO_ANNOTATION] = "SBO:0000247" #simple chemical - Simple, non-repetitive chemical entity. if id.startswith('cpd'): met.annotation["seed.compound"] = id.split("_")[0] #met.annotation[""] = "!!!" met.annotation.update(annotation) mets[mc_id] = met genes = set() #print(mc) for mr in kmodel["modelreactions"]: mr_id = mr['id'] name = mr['name'] lower_bound, upper_bound = get_reaction_constraints(mr) annotation = {} if 'dblinks' in mr: annotation = get_rxn_annotation(mr['dblinks']) id = build_rxn_id(mr_id) if bigg: if "bigg.reaction" in annotation: id = annotation["bigg.reaction"] #print(id) reaction = Reaction(id=id, name=name, lower_bound=lower_bound, upper_bound=upper_bound) #print(mr['maxrevflux'], mr['maxforflux'], reaction.lower_bound) reaction.annotation[SBO_ANNOTATION] = "!!!" if id.startswith('rxn'): reaction.annotation["seed.reaction"] = id.split("_")[0] reaction.annotation.update(annotation) object_stoichiometry = {} for mrr in mr['modelReactionReagents']: modelcompound_ref = mrr['modelcompound_ref'] coefficient = mrr['coefficient'] mc_id = get_id_from_ref(modelcompound_ref) met_id = build_cpd_id(mc_id) met = mets[mc_id] #model_test.metabolites.get_by_id(met_id) #print(met, met_id, coefficient) object_stoichiometry[met] = coefficient reaction.annotation[ SBO_ANNOTATION] = "SBO:0000176" #biochemical reaction reaction.add_metabolites(object_stoichiometry) gpr = get_gpr(mr) gpr_string = get_gpr_string(gpr) #print(gpr_string) reaction.gene_reaction_rule = gpr_string genes |= get_genes(gpr) #print(reaction) reactions.append(reaction) #print(mr.keys()) objective_id = None for biomass in kmodel['biomasses']: reaction = convert_biomass_to_reaction(biomass, mets) reactions.append(reaction) objective_id = reaction.id #print(biomass) #print(media) if exchanges: logger.info('Setup Drains. EX: %d SK: %d', len(extra), len(sink)) for e in extra: met = mets[e] prefix = "EX_" if e in sink: prefix = "DM_" id = prefix + met.id lower_bound = COBRA_DEFAULT_LB upper_bound = COBRA_DEFAULT_UB if not media == None: lower_bound = 0 if not media == None and e.split("_")[0] in media: ct = media[e.split("_")[0]] lower_bound = ct[0] upper_bound = ct[1] #print(e, met, id, lower_bound, upper_bound) object_stoichiometry = {met: -1} reaction = Reaction(id=id, name="Exchange for " + met.name, lower_bound=lower_bound, upper_bound=upper_bound) reaction.add_metabolites(object_stoichiometry) reaction.annotation[ SBO_ANNOTATION] = "SBO:0000627" #exchange reaction - ... provide matter influx or efflux to a model, for example to replenish a metabolic network with raw materials ... reactions.append(reaction) #print(reaction.name) #print("Genes:", genes) for g in genes: gene = Gene(id=build_gene_id(g), name=g) gene.annotation[SBO_ANNOTATION] = "SBO:0000243" model_test.genes.append(gene) model_test.compartments = comps #model_test.add_metabolites(mets.values) try: model_test.add_reactions(reactions) except ValueError as e: warn(str(e)) if not objective_id == None: model_test.objective = model_test.reactions.get_by_id(id=objective_id) linear_reaction_coefficients(model_test) return model_test
def __json_decode__(self, **attrs): Gene.__init__(self, id=attrs['id']) for k, v in attrs.items(): if k not in ['id']: setattr(self, k, v)
def parse_xml_into_model(xml, number=float): xml_model = xml.find(ns("sbml:model")) if get_attrib(xml_model, "fbc:strict") != "true": warn('loading SBML model without fbc:strict="true"') model_id = get_attrib(xml_model, "id") model = Model(model_id) model.name = xml_model.get("name") model.compartments = {c.get("id"): c.get("name") for c in xml_model.findall(COMPARTMENT_XPATH)} # add metabolites for species in xml_model.findall(SPECIES_XPATH % 'false'): met = get_attrib(species, "id", require=True) met = Metabolite(clip(met, "M_")) met.name = species.get("name") annotate_cobra_from_sbml(met, species) met.compartment = species.get("compartment") met.charge = get_attrib(species, "fbc:charge", int) met.formula = get_attrib(species, "fbc:chemicalFormula") model.add_metabolites([met]) # Detect boundary metabolites - In case they have been mistakenly # added. They should not actually appear in a model boundary_metabolites = {clip(i.get("id"), "M_") for i in xml_model.findall(SPECIES_XPATH % 'true')} # add genes for sbml_gene in xml_model.iterfind(GENES_XPATH): gene_id = get_attrib(sbml_gene, "fbc:id").replace(SBML_DOT, ".") gene = Gene(clip(gene_id, "G_")) gene.name = get_attrib(sbml_gene, "fbc:name") if gene.name is None: gene.name = get_attrib(sbml_gene, "fbc:label") annotate_cobra_from_sbml(gene, sbml_gene) model.genes.append(gene) def process_gpr(sub_xml): """recursively convert gpr xml to a gpr string""" if sub_xml.tag == OR_TAG: return "( " + ' or '.join(process_gpr(i) for i in sub_xml) + " )" elif sub_xml.tag == AND_TAG: return "( " + ' and '.join(process_gpr(i) for i in sub_xml) + " )" elif sub_xml.tag == GENEREF_TAG: gene_id = get_attrib(sub_xml, "fbc:geneProduct", require=True) return clip(gene_id, "G_") else: raise Exception("unsupported tag " + sub_xml.tag) bounds = {bound.get("id"): get_attrib(bound, "value", type=number) for bound in xml_model.iterfind(BOUND_XPATH)} # add reactions reactions = [] for sbml_reaction in xml_model.iterfind( ns("sbml:listOfReactions/sbml:reaction")): reaction = get_attrib(sbml_reaction, "id", require=True) reaction = Reaction(clip(reaction, "R_")) reaction.name = sbml_reaction.get("name") annotate_cobra_from_sbml(reaction, sbml_reaction) lb_id = get_attrib(sbml_reaction, "fbc:lowerFluxBound", require=True) ub_id = get_attrib(sbml_reaction, "fbc:upperFluxBound", require=True) try: reaction.upper_bound = bounds[ub_id] reaction.lower_bound = bounds[lb_id] except KeyError as e: raise CobraSBMLError("No constant bound with id '%s'" % str(e)) reactions.append(reaction) stoichiometry = defaultdict(lambda: 0) for species_reference in sbml_reaction.findall( ns("sbml:listOfReactants/sbml:speciesReference")): met_name = clip(species_reference.get("species"), "M_") stoichiometry[met_name] -= \ number(species_reference.get("stoichiometry")) for species_reference in sbml_reaction.findall( ns("sbml:listOfProducts/sbml:speciesReference")): met_name = clip(species_reference.get("species"), "M_") stoichiometry[met_name] += \ get_attrib(species_reference, "stoichiometry", type=number, require=True) # needs to have keys of metabolite objects, not ids object_stoichiometry = {} for met_id in stoichiometry: if met_id in boundary_metabolites: warn("Boundary metabolite '%s' used in reaction '%s'" % (met_id, reaction.id)) continue try: metabolite = model.metabolites.get_by_id(met_id) except KeyError: warn("ignoring unknown metabolite '%s' in reaction %s" % (met_id, reaction.id)) continue object_stoichiometry[metabolite] = stoichiometry[met_id] reaction.add_metabolites(object_stoichiometry) # set gene reaction rule gpr_xml = sbml_reaction.find(GPR_TAG) if gpr_xml is not None and len(gpr_xml) != 1: warn("ignoring invalid geneAssociation for " + repr(reaction)) gpr_xml = None gpr = process_gpr(gpr_xml[0]) if gpr_xml is not None else '' # remove outside parenthesis, if any if gpr.startswith("(") and gpr.endswith(")"): gpr = gpr[1:-1].strip() gpr = gpr.replace(SBML_DOT, ".") reaction.gene_reaction_rule = gpr try: model.add_reactions(reactions) except ValueError as e: warn(str(e)) # objective coefficients are handled after all reactions are added obj_list = xml_model.find(ns("fbc:listOfObjectives")) if obj_list is None: warn("listOfObjectives element not found") return model target_objective_id = get_attrib(obj_list, "fbc:activeObjective") target_objective = obj_list.find( ns("fbc:objective[@fbc:id='{}']".format(target_objective_id))) obj_direction_long = get_attrib(target_objective, "fbc:type") obj_direction = LONG_SHORT_DIRECTION[obj_direction_long] obj_query = OBJECTIVES_XPATH % target_objective_id coefficients = {} for sbml_objective in obj_list.findall(obj_query): rxn_id = clip(get_attrib(sbml_objective, "fbc:reaction"), "R_") try: objective_reaction = model.reactions.get_by_id(rxn_id) except KeyError: raise CobraSBMLError("Objective reaction '%s' not found" % rxn_id) try: coefficients[objective_reaction] = get_attrib( sbml_objective, "fbc:coefficient", type=number) except ValueError as e: warn(str(e)) set_objective(model, coefficients) model.solver.objective.direction = obj_direction return model
def gene_from_dict(gene): new_gene = Gene(gene["id"]) for k, v in iteritems(gene): setattr(new_gene, k, v) return new_gene
def build(self, model_id = None): if model_id is None: model_id = self.fbamodel.id self.metabolites = {} self.reactions = {} self.biomass_reactions = set() self.sink_compounds = set() self.demand_compounds = set() self.exchange_compounds = set() for modelcompound in self.fbamodel.metabolites: cobra_metabolite = self.convert_modelcompound(modelcompound) if cobra_metabolite.id not in self.metabolites: self.metabolites[cobra_metabolite.id] = cobra_metabolite self.metabolites_remap[modelcompound.id] = cobra_metabolite.id else: logger.warning('duplicate compound: %s', cobra_metabolite.id) if cobra_metabolite.id in self.auto_sink: logger.info('Add Sink: [%s]', cobra_metabolite.id) self.demand_compounds.add(cobra_metabolite.id) if cobra_metabolite.compartment == self.auto_exchange: logger.debug('Add Exchange: [%s]', cobra_metabolite.id) self.exchange_compounds.add(cobra_metabolite.id) for modelreaction in self.fbamodel.reactions: cobra_reaction = self.convert_modelreaction(modelreaction) self.add_reaction(cobra_reaction) for biomass in self.fbamodel.data['biomasses']: cobra_reaction = self.convert_biomass_to_reaction(biomass) if not self.add_reaction(cobra_reaction) == None: self.biomass_reactions.add(cobra_reaction.id) for cpd_id in self.exchange_compounds: lower_bound = self.COBRA_DEFAULT_LB if len(self.media_const) == 0 else self.COBRA_0_BOUND upper_bound = self.COBRA_DEFAULT_UB if cpd_id in self.media_const: lower_bound, upper_bound = self.media_const[cpd_id] drain_reaction = self.build_drain_from_metabolite_id(cpd_id, lower_bound, upper_bound) self.add_reaction(drain_reaction) logger.debug('created exchange for [%s]: %s', cpd_id, drain_reaction) for cpd_id in self.demand_compounds: drain_reaction = self.build_drain_from_metabolite_id(cpd_id, self.COBRA_0_BOUND, self.COBRA_DEFAULT_UB, "DM_", "Demand for ") self.add_reaction(drain_reaction) logger.debug('created demand for [%s]: %s', cpd_id, drain_reaction) for cpd_id in self.sink_compounds: drain_reaction = self.build_drain_from_metabolite_id(cpd_id, self.COBRA_0_BOUND, self.COBRA_DEFAULT_UB, "SK_", "Sink for ") self.add_reaction(drain_reaction) logger.debug('created sink for [%s]: %s', cpd_id, drain_reaction) #print(self.genes) cobra_model = Model(model_id) for g in self.genes: gene = Gene(id=build_gene_id(g), name=g) gene.annotation[self.SBO_ANNOTATION] = "SBO:0000243" if g in self.gene_annotation: gene.annotation.update(self.gene_annotation[g]) cobra_model.genes.append(gene) cobra_model.add_metabolites(list(self.metabolites.values())) cobra_model.add_reactions(list(self.reactions.values())) if len(self.biomass_reactions) > 0: default_biomass = list(self.biomass_reactions)[0] logger.info('Default biomass: [%s]', default_biomass) cobra_model.objective = default_biomass linear_reaction_coefficients(cobra_model) return cobra_model