Esempio n. 1
0
def test_gpr():
    model = Model()
    reaction = Reaction("test")

    # Set GPR to a reaction not in a model
    reaction.gene_reaction_rule = "(g1 or g2) and g3"
    assert reaction.gene_reaction_rule == "(g1 or g2) and g3"
    assert len(reaction.genes) == 3

    # Adding reaction with a GPR propagates to the model
    model.add_reactions([reaction])
    assert len(model.genes) == 3

    # Ensure the gene objects are the same in the model and reaction
    reaction_gene = list(reaction.genes)[0]
    model_gene = model.genes.get_by_id(reaction_gene.id)
    assert reaction_gene is model_gene

    # Test ability to handle uppercase AND/OR
    with warnings.catch_warnings():
        warnings.simplefilter("ignore")
        reaction.gene_reaction_rule = "(b1 AND b2) OR (b3 and b4)"
        assert reaction.gene_reaction_rule == "(b1 and b2) or (b3 and b4)"
        assert len(reaction.genes) == 4

    # Ensure regular expressions correctly extract genes from malformed
    # GPR string
    with warnings.catch_warnings():
        warnings.simplefilter("ignore")
        reaction.gene_reaction_rule = "(a1 or a2"
        assert len(reaction.genes) == 2
        reaction.gene_reaction_rule = "(forT or "
        assert len(reaction.genes) == 1
Esempio n. 2
0
def test__has_gene_reaction_rule():
    reaction = Reaction('rxn')
    assert _has_gene_reaction_rule(reaction) is False
    reaction.gene_reaction_rule = 'b1779'
    assert _has_gene_reaction_rule(reaction) is True
    reaction.gene_reaction_rule = ' '
    assert _has_gene_reaction_rule(reaction) is False
Esempio n. 3
0
 def test_gpr(self):
     model = Model()
     reaction = Reaction("test")
     # set a gpr to  reaction not in a model
     reaction.gene_reaction_rule = "(g1 or g2) and g3"
     assert reaction.gene_reaction_rule == "(g1 or g2) and g3"
     assert len(reaction.genes) == 3
     # adding reaction with a GPR propagates to the model
     model.add_reaction(reaction)
     assert len(model.genes) == 3
     # ensure the gene objects are the same in the model and reaction
     reaction_gene = list(reaction.genes)[0]
     model_gene = model.genes.get_by_id(reaction_gene.id)
     assert reaction_gene is model_gene
     # test ability to handle uppercase AND/OR
     with warnings.catch_warnings():
         warnings.simplefilter("ignore")
         reaction.gene_reaction_rule = "(b1 AND b2) OR (b3 and b4)"
     assert reaction.gene_reaction_rule == "(b1 and b2) or (b3 and b4)"
     assert len(reaction.genes) == 4
     # ensure regular expressions correctly extract genes from malformed
     # GPR string
     with warnings.catch_warnings():
         warnings.simplefilter("ignore")
         reaction.gene_reaction_rule = "(a1 or a2"
         assert len(reaction.genes) == 2
         reaction.gene_reaction_rule = "(forT or "
         assert len(reaction.genes) == 1
Esempio n. 4
0
def test__has_gene_reaction_rule():
    reaction = Reaction('rxn')
    assert _has_gene_reaction_rule(reaction) is False
    reaction.gene_reaction_rule = 'b1779'
    assert _has_gene_reaction_rule(reaction) is True
    reaction.gene_reaction_rule = ' '
    assert _has_gene_reaction_rule(reaction) is False
Esempio n. 5
0
def test_gene_knockout(salmonella: Model) -> None:
    """Test gene knockout."""
    gene_list = ["STM1067", "STM0227"]
    dependent_reactions = {
        "3HAD121",
        "3HAD160",
        "3HAD80",
        "3HAD140",
        "3HAD180",
        "3HAD100",
        "3HAD181",
        "3HAD120",
        "3HAD60",
        "3HAD141",
        "3HAD161",
        "T2DECAI",
        "3HAD40",
    }
    _gene_knockout_computation(salmonella, gene_list, dependent_reactions)
    _gene_knockout_computation(salmonella, ["STM4221"], {"PGI"})
    _gene_knockout_computation(salmonella, ["STM1746.S"], {"4PEPTabcpp"})
    # test cumulative behavior
    delete_model_genes(salmonella, gene_list[:1])
    delete_model_genes(salmonella, gene_list[1:], cumulative_deletions=True)
    delete_model_genes(salmonella, ["STM4221"], cumulative_deletions=True)
    dependent_reactions.add("PGI")
    assert _get_removed(salmonella) == dependent_reactions
    # non-cumulative following cumulative
    delete_model_genes(salmonella, ["STM4221"], cumulative_deletions=False)
    assert _get_removed(salmonella) == {"PGI"}
    # make sure on reset that the bounds are correct
    reset_bound = salmonella.reactions.get_by_id("T2DECAI").upper_bound
    assert reset_bound == 1000.0
    # test computation when gene name is a subset of another
    test_model = Model()
    test_reaction_1 = Reaction("test1")
    test_reaction_1.gene_reaction_rule = "eggs or (spam and eggspam)"
    test_model.add_reactions([test_reaction_1])
    _gene_knockout_computation(test_model, ["eggs"], set())
    _gene_knockout_computation(test_model, ["eggs", "spam"], {"test1"})
    # test computation with nested boolean expression
    test_reaction_1.gene_reaction_rule = "g1 and g2 and (g3 or g4 or (g5 and g6))"
    _gene_knockout_computation(test_model, ["g3"], set())
    _gene_knockout_computation(test_model, ["g1"], {"test1"})
    _gene_knockout_computation(test_model, ["g5"], set())
    _gene_knockout_computation(test_model, ["g3", "g4", "g5"], {"test1"})
    # test computation when gene names are python expressions
    test_reaction_1.gene_reaction_rule = "g1 and (for or in)"
    _gene_knockout_computation(test_model, ["for", "in"], {"test1"})
    _gene_knockout_computation(test_model, ["for"], set())
    test_reaction_1.gene_reaction_rule = "g1 and g2 and g2.conjugate"
    _gene_knockout_computation(test_model, ["g2"], {"test1"})
    _gene_knockout_computation(test_model, ["g2.conjugate"], {"test1"})
    test_reaction_1.gene_reaction_rule = "g1 and (try:' or 'except:1)"
    _gene_knockout_computation(test_model, ["try:'"], set())
    _gene_knockout_computation(test_model, ["try:'", "'except:1"], {"test1"})
Esempio n. 6
0
def test__normalize_pseudoreaction_biomass_has_gpr():
    reaction = Reaction('my_biomass_2')
    reaction.gene_reaction_rule = 'b1779'
    with pytest.raises(ConflictingPseudoreaction) as excinfo:
        _ = _normalize_pseudoreaction(reaction.id, reaction)
    assert 'has a gene_reaction_rule' in str(excinfo.value)
    assert reaction.id == 'my_biomass_2'
Esempio n. 7
0
def test_add_reaction_context(model):
    old_reaction_count = len(model.reactions)
    old_metabolite_count = len(model.metabolites)
    dummy_metabolite_1 = Metabolite("test_foo_1")
    dummy_metabolite_2 = Metabolite("test_foo_2")
    actual_metabolite = model.metabolites[0]
    copy_metabolite = model.metabolites[1].copy()
    dummy_reaction = Reaction("test_foo_reaction")
    dummy_reaction.add_metabolites({dummy_metabolite_1: -1,
                                    dummy_metabolite_2: 1,
                                    copy_metabolite: -2,
                                    actual_metabolite: 1})
    dummy_reaction.gene_reaction_rule = 'dummy_gene'

    with model:
        model.add_reaction(dummy_reaction)
        assert model.reactions.get_by_id(
            dummy_reaction.id) == dummy_reaction
        assert len(model.reactions) == old_reaction_count + 1
        assert len(model.metabolites) == old_metabolite_count + 2
        assert dummy_metabolite_1._model == model
        assert 'dummy_gene' in model.genes

    assert len(model.reactions) == old_reaction_count
    assert len(model.metabolites) == old_metabolite_count
    with pytest.raises(KeyError):
        model.reactions.get_by_id(dummy_reaction.id)
    assert dummy_metabolite_1._model is None
    assert 'dummy_gene' not in model.genes
Esempio n. 8
0
def test__normalize_pseudoreaction_biomass_has_gpr():
    reaction = Reaction('my_biomass_2')
    reaction.gene_reaction_rule = 'b1779'
    with pytest.raises(ConflictingPseudoreaction) as excinfo:
        _ = _normalize_pseudoreaction(reaction.id, reaction)
    assert 'has a gene_reaction_rule' in str(excinfo.value)
    assert reaction.id == 'my_biomass_2'
Esempio n. 9
0
    def test_add_reaction_context(self, model):
        old_reaction_count = len(model.reactions)
        old_metabolite_count = len(model.metabolites)
        dummy_metabolite_1 = Metabolite("test_foo_1")
        dummy_metabolite_2 = Metabolite("test_foo_2")
        actual_metabolite = model.metabolites[0]
        copy_metabolite = model.metabolites[1].copy()
        dummy_reaction = Reaction("test_foo_reaction")
        dummy_reaction.add_metabolites({
            dummy_metabolite_1: -1,
            dummy_metabolite_2: 1,
            copy_metabolite: -2,
            actual_metabolite: 1
        })
        dummy_reaction.gene_reaction_rule = 'dummy_gene'

        with model:
            model.add_reaction(dummy_reaction)
            assert model.reactions.get_by_id(dummy_reaction.id) == \
                dummy_reaction
            assert len(model.reactions) == old_reaction_count + 1
            assert len(model.metabolites) == old_metabolite_count + 2
            assert dummy_metabolite_1._model == model
            assert 'dummy_gene' in model.genes

        assert len(model.reactions) == old_reaction_count
        assert len(model.metabolites) == old_metabolite_count
        with pytest.raises(KeyError):
            model.reactions.get_by_id(dummy_reaction.id)
        assert dummy_metabolite_1._model is None
        assert 'dummy_gene' not in model.genes
Esempio n. 10
0
 def convert_modelreaction(self, reaction, bigg=False):
     mr_id = reaction.id
     name = reaction.name
     annotation = reaction.annotation
     lower_bound, upper_bound = reaction.get_reaction_constraints()
     
     id = build_rxn_id(mr_id)
     if bigg and "bigg.reaction" in annotation:
         id = annotation["bigg.reaction"]
     
     gpr = reaction.get_gpr()
     
     cobra_reaction = Reaction(id, 
                               name=name, 
                               lower_bound=lower_bound, 
                               upper_bound=upper_bound)
     cobra_reaction.annotation[self.SBO_ANNOTATION] = "SBO:0000176" #biochemical reaction
     cobra_reaction.annotation.update(annotation)
     
     if id.startswith('rxn'):
         cobra_reaction.annotation["seed.reaction"] = id.split("_")[0]
     
     cobra_reaction.add_metabolites(self.convert_modelreaction_stoichiometry(reaction))
     
     cobra_reaction.gene_reaction_rule = reaction.gene_reaction_rule
     
     for genes in gpr:
         for gene in genes:
             if not gene in self.genes:
                 self.genes[gene] = gene
     
     return cobra_reaction
Esempio n. 11
0
def test__normalize_pseudoreaction_exchange_error_has_gpr():
    reaction = Reaction('EX_gone')
    reaction.add_metabolites({Metabolite('glu__L_e'): -1})
    reaction.gene_reaction_rule = 'b1779'
    with pytest.raises(ConflictingPseudoreaction) as excinfo:
        _ = _normalize_pseudoreaction(reaction.id, reaction)
    assert 'has a gene_reaction_rule' in str(excinfo.value)
    assert reaction.id == 'EX_gone'
Esempio n. 12
0
def test__normalize_pseudoreaction_exchange_error_has_gpr():
    reaction = Reaction('EX_gone')
    reaction.add_metabolites({Metabolite('glu__L_e'): -1})
    reaction.gene_reaction_rule = 'b1779'
    with pytest.raises(ConflictingPseudoreaction) as excinfo:
        _ = _normalize_pseudoreaction(reaction.id, reaction)
    assert 'has a gene_reaction_rule' in str(excinfo.value)
    assert reaction.id == 'EX_gone'
Esempio n. 13
0
def test__normalize_pseudoreaction_atpm_has_gpr():
    reaction = Reaction('NPT1')
    reaction.add_metabolites({Metabolite('atp_c'): -1,
                              Metabolite('h2o_c'): -1,
                              Metabolite('pi_c'): 1,
                              Metabolite('h_c'): 1,
                              Metabolite('adp_c'): 1})
    reaction.gene_reaction_rule = 'b1779'
    _normalize_pseudoreaction(reaction)
    # should not change
    assert reaction.id == 'NPT1'
Esempio n. 14
0
 def test_gene_knock_out(self, model):
     rxn = Reaction('rxn')
     rxn.add_metabolites({Metabolite('A'): -1, Metabolite('B'): 1})
     rxn.gene_reaction_rule = 'A2B1 or A2B2 and A2B3'
     assert hasattr(list(rxn.genes)[0], 'knock_out')
     model.add_reaction(rxn)
     with model:
         model.genes.A2B1.knock_out()
         assert not model.genes.A2B1.functional
         model.genes.A2B3.knock_out()
         assert not rxn.functional
     assert model.genes.A2B3.functional
     assert rxn.functional
     model.genes.A2B1.knock_out()
     assert not model.genes.A2B1.functional
     assert model.reactions.rxn.functional
     model.genes.A2B3.knock_out()
     assert not model.reactions.rxn.functional
Esempio n. 15
0
def test_gene_knock_out(model):
    rxn = Reaction('rxn')
    rxn.add_metabolites({Metabolite('A'): -1, Metabolite('B'): 1})
    rxn.gene_reaction_rule = 'A2B1 or A2B2 and A2B3'
    assert hasattr(list(rxn.genes)[0], 'knock_out')
    model.add_reaction(rxn)
    with model:
        model.genes.A2B1.knock_out()
        assert not model.genes.A2B1.functional
        model.genes.A2B3.knock_out()
        assert not rxn.functional
    assert model.genes.A2B3.functional
    assert rxn.functional
    model.genes.A2B1.knock_out()
    assert not model.genes.A2B1.functional
    assert model.reactions.rxn.functional
    model.genes.A2B3.knock_out()
    assert not model.reactions.rxn.functional
Esempio n. 16
0
def test_gene_knock_out(model: Model) -> None:
    """Test gene knockout effect on reaction."""
    rxn = Reaction("rxn")
    rxn.add_metabolites({Metabolite("A"): -1, Metabolite("B"): 1})
    rxn.gene_reaction_rule = "A2B1 or A2B2 and A2B3"
    assert hasattr(list(rxn.genes)[0], "knock_out")
    model.add_reaction(rxn)
    with model:
        model.genes.A2B1.knock_out()
        assert not model.genes.A2B1.functional
        model.genes.A2B3.knock_out()
        assert not rxn.functional
    assert model.genes.A2B3.functional
    assert rxn.functional
    model.genes.A2B1.knock_out()
    assert not model.genes.A2B1.functional
    assert model.reactions.rxn.functional
    model.genes.A2B3.knock_out()
    assert not model.reactions.rxn.functional
Esempio n. 17
0
def convert_kmodel(kmodel, media=None, exchanges=True, model_id="kbase"):
    model_test = cobra.Model(model_id)

    comps = {}
    mets = {}
    sink = set()
    reactions = []
    extra = set()

    for mcomp in kmodel['modelcompartments']:
        mcomp_id = mcomp['id']
        name = mcomp['label']
        comps[mcomp_id] = name

    for mc in kmodel["modelcompounds"]:
        #print(mc.keys())
        formula = None
        if not mc['formula'] == 'null':
            formula = mc['formula']
        name = mc['name']
        charge = get_int('charge', 0, mc)
        mc_id = mc['id']
        annotation = {}
        if 'dblinks' in mc:
            annotation = get_cpd_annotation(mc['dblinks'])
        compartment = get_compartment_id(mc, simple=True)
        id = build_cpd_id(mc_id)
        if bigg:
            if "bigg.metabolite" in annotation:
                id = annotation["bigg.metabolite"] + "_" + compartment
                #print(id)

        if mc_id in SINK:
            logger.info('Add Sink: [%s]', mc_id)
            extra.add(mc_id)
            sink.add(mc_id)
        if compartment.startswith("e"):
            extra.add(mc_id)

        met = Metabolite(id=id,
                         formula=formula,
                         name=name,
                         charge=charge,
                         compartment=compartment)
        met.annotation[
            SBO_ANNOTATION] = "SBO:0000247"  #simple chemical - Simple, non-repetitive chemical entity.
        if id.startswith('cpd'):
            met.annotation["seed.compound"] = id.split("_")[0]
        #met.annotation[""] = "!!!"
        met.annotation.update(annotation)
        mets[mc_id] = met

    genes = set()
    #print(mc)
    for mr in kmodel["modelreactions"]:
        mr_id = mr['id']
        name = mr['name']

        lower_bound, upper_bound = get_reaction_constraints(mr)
        annotation = {}
        if 'dblinks' in mr:
            annotation = get_rxn_annotation(mr['dblinks'])
        id = build_rxn_id(mr_id)
        if bigg:
            if "bigg.reaction" in annotation:
                id = annotation["bigg.reaction"]
        #print(id)
        reaction = Reaction(id=id,
                            name=name,
                            lower_bound=lower_bound,
                            upper_bound=upper_bound)
        #print(mr['maxrevflux'], mr['maxforflux'], reaction.lower_bound)
        reaction.annotation[SBO_ANNOTATION] = "!!!"
        if id.startswith('rxn'):
            reaction.annotation["seed.reaction"] = id.split("_")[0]
        reaction.annotation.update(annotation)
        object_stoichiometry = {}
        for mrr in mr['modelReactionReagents']:
            modelcompound_ref = mrr['modelcompound_ref']
            coefficient = mrr['coefficient']
            mc_id = get_id_from_ref(modelcompound_ref)
            met_id = build_cpd_id(mc_id)
            met = mets[mc_id]  #model_test.metabolites.get_by_id(met_id)
            #print(met, met_id, coefficient)
            object_stoichiometry[met] = coefficient
        reaction.annotation[
            SBO_ANNOTATION] = "SBO:0000176"  #biochemical reaction
        reaction.add_metabolites(object_stoichiometry)
        gpr = get_gpr(mr)
        gpr_string = get_gpr_string(gpr)
        #print(gpr_string)
        reaction.gene_reaction_rule = gpr_string
        genes |= get_genes(gpr)
        #print(reaction)
        reactions.append(reaction)
        #print(mr.keys())

    objective_id = None
    for biomass in kmodel['biomasses']:
        reaction = convert_biomass_to_reaction(biomass, mets)
        reactions.append(reaction)
        objective_id = reaction.id
        #print(biomass)
    #print(media)

    if exchanges:
        logger.info('Setup Drains. EX: %d SK: %d', len(extra), len(sink))
        for e in extra:
            met = mets[e]
            prefix = "EX_"
            if e in sink:
                prefix = "DM_"
            id = prefix + met.id
            lower_bound = COBRA_DEFAULT_LB
            upper_bound = COBRA_DEFAULT_UB
            if not media == None:
                lower_bound = 0
            if not media == None and e.split("_")[0] in media:
                ct = media[e.split("_")[0]]
                lower_bound = ct[0]
                upper_bound = ct[1]

            #print(e, met, id, lower_bound, upper_bound)

            object_stoichiometry = {met: -1}
            reaction = Reaction(id=id,
                                name="Exchange for " + met.name,
                                lower_bound=lower_bound,
                                upper_bound=upper_bound)
            reaction.add_metabolites(object_stoichiometry)
            reaction.annotation[
                SBO_ANNOTATION] = "SBO:0000627"  #exchange reaction - ... provide matter influx or efflux to a model, for example to replenish a metabolic network with raw materials ...
            reactions.append(reaction)
            #print(reaction.name)
    #print("Genes:", genes)
    for g in genes:
        gene = Gene(id=build_gene_id(g), name=g)
        gene.annotation[SBO_ANNOTATION] = "SBO:0000243"
        model_test.genes.append(gene)

    model_test.compartments = comps
    #model_test.add_metabolites(mets.values)
    try:
        model_test.add_reactions(reactions)
    except ValueError as e:
        warn(str(e))

    if not objective_id == None:
        model_test.objective = model_test.reactions.get_by_id(id=objective_id)
        linear_reaction_coefficients(model_test)
    return model_test
Esempio n. 18
0
def from_mat_struct(mat_struct, model_id=None, inf=inf):
    """create a model from the COBRA toolbox struct

    The struct will be a dict read in by scipy.io.loadmat

    """
    m = mat_struct
    if m.dtype.names is None:
        raise ValueError("not a valid mat struct")
    if not {"rxns", "mets", "S", "lb", "ub"} <= set(m.dtype.names):
        raise ValueError("not a valid mat struct")
    if "c" in m.dtype.names:
        c_vec = m["c"][0, 0]
    else:
        c_vec = None
        warn("objective vector 'c' not found")
    model = Model()
    if model_id is not None:
        model.id = model_id
    elif "description" in m.dtype.names:
        description = m["description"][0, 0][0]
        if not isinstance(description, string_types) and len(description) > 1:
            model.id = description[0]
            warn("Several IDs detected, only using the first.")
        else:
            model.id = description
    else:
        model.id = "imported_model"
    for i, name in enumerate(m["mets"][0, 0]):
        new_metabolite = Metabolite()
        new_metabolite.id = str(name[0][0])
        if all(var in m.dtype.names for var in
               ['metComps', 'comps', 'compNames']):
            comp_index = m["metComps"][0, 0][i][0] - 1
            new_metabolite.compartment = m['comps'][0, 0][comp_index][0][0]
            if new_metabolite.compartment not in model.compartments:
                comp_name = m['compNames'][0, 0][comp_index][0][0]
                model.compartments[new_metabolite.compartment] = comp_name
        else:
            new_metabolite.compartment = _get_id_compartment(new_metabolite.id)
            if new_metabolite.compartment not in model.compartments:
                model.compartments[
                    new_metabolite.compartment] = new_metabolite.compartment
        try:
            new_metabolite.name = str(m["metNames"][0, 0][i][0][0])
        except (IndexError, ValueError):
            pass
        try:
            new_metabolite.formula = str(m["metFormulas"][0][0][i][0][0])
        except (IndexError, ValueError):
            pass
        try:
            new_metabolite.charge = float(m["metCharge"][0, 0][i][0])
            int_charge = int(new_metabolite.charge)
            if new_metabolite.charge == int_charge:
                new_metabolite.charge = int_charge
        except (IndexError, ValueError):
            pass
        model.add_metabolites([new_metabolite])
    new_reactions = []
    coefficients = {}
    for i, name in enumerate(m["rxns"][0, 0]):
        new_reaction = Reaction()
        new_reaction.id = str(name[0][0])
        new_reaction.lower_bound = float(m["lb"][0, 0][i][0])
        new_reaction.upper_bound = float(m["ub"][0, 0][i][0])
        if isinf(new_reaction.lower_bound) and new_reaction.lower_bound < 0:
            new_reaction.lower_bound = -inf
        if isinf(new_reaction.upper_bound) and new_reaction.upper_bound > 0:
            new_reaction.upper_bound = inf
        if c_vec is not None:
            coefficients[new_reaction] = float(c_vec[i][0])
        try:
            new_reaction.gene_reaction_rule = str(m['grRules'][0, 0][i][0][0])
        except (IndexError, ValueError):
            pass
        try:
            new_reaction.name = str(m["rxnNames"][0, 0][i][0][0])
        except (IndexError, ValueError):
            pass
        try:
            new_reaction.subsystem = str(m['subSystems'][0, 0][i][0][0])
        except (IndexError, ValueError):
            pass
        new_reactions.append(new_reaction)
    model.add_reactions(new_reactions)
    set_objective(model, coefficients)
    coo = scipy_sparse.coo_matrix(m["S"][0, 0])
    for i, j, v in zip(coo.row, coo.col, coo.data):
        model.reactions[j].add_metabolites({model.metabolites[i]: v})
    return model
Esempio n. 19
0
    def test_gene_knockout_computation(self, salmonella):
        def find_gene_knockout_reactions_fast(cobra_model, gene_list):
            compiled_rules = get_compiled_gene_reaction_rules(
                cobra_model)
            return find_gene_knockout_reactions(
                cobra_model, gene_list,
                compiled_gene_reaction_rules=compiled_rules)

        def get_removed(m):
            return {x.id for x in m._trimmed_reactions}

        def test_computation(m, gene_ids, expected_reaction_ids):
            genes = [m.genes.get_by_id(i) for i in gene_ids]
            expected_reactions = {m.reactions.get_by_id(i)
                                  for i in expected_reaction_ids}
            removed1 = set(find_gene_knockout_reactions(m, genes))
            removed2 = set(find_gene_knockout_reactions_fast(m, genes))
            assert removed1 == expected_reactions
            assert removed2 == expected_reactions
            delete_model_genes(m, gene_ids, cumulative_deletions=False)
            assert get_removed(m) == expected_reaction_ids
            undelete_model_genes(m)

        gene_list = ['STM1067', 'STM0227']
        dependent_reactions = {'3HAD121', '3HAD160', '3HAD80', '3HAD140',
                               '3HAD180', '3HAD100', '3HAD181', '3HAD120',
                               '3HAD60', '3HAD141', '3HAD161', 'T2DECAI',
                               '3HAD40'}
        test_computation(salmonella, gene_list, dependent_reactions)
        test_computation(salmonella, ['STM4221'], {'PGI'})
        test_computation(salmonella, ['STM1746.S'], {'4PEPTabcpp'})
        # test cumulative behavior
        delete_model_genes(salmonella, gene_list[:1])
        delete_model_genes(salmonella, gene_list[1:],
                           cumulative_deletions=True)
        delete_model_genes(salmonella, ["STM4221"],
                           cumulative_deletions=True)
        dependent_reactions.add('PGI')
        assert get_removed(salmonella) == dependent_reactions
        # non-cumulative following cumulative
        delete_model_genes(salmonella, ["STM4221"],
                           cumulative_deletions=False)
        assert get_removed(salmonella) == {'PGI'}
        # make sure on reset that the bounds are correct
        reset_bound = salmonella.reactions.get_by_id("T2DECAI").upper_bound
        assert reset_bound == 1000.
        # test computation when gene name is a subset of another
        test_model = Model()
        test_reaction_1 = Reaction("test1")
        test_reaction_1.gene_reaction_rule = "eggs or (spam and eggspam)"
        test_model.add_reaction(test_reaction_1)
        test_computation(test_model, ["eggs"], set())
        test_computation(test_model, ["eggs", "spam"], {'test1'})
        # test computation with nested boolean expression
        test_reaction_1.gene_reaction_rule = \
            "g1 and g2 and (g3 or g4 or (g5 and g6))"
        test_computation(test_model, ["g3"], set())
        test_computation(test_model, ["g1"], {'test1'})
        test_computation(test_model, ["g5"], set())
        test_computation(test_model, ["g3", "g4", "g5"], {'test1'})
        # test computation when gene names are python expressions
        test_reaction_1.gene_reaction_rule = "g1 and (for or in)"
        test_computation(test_model, ["for", "in"], {'test1'})
        test_computation(test_model, ["for"], set())
        test_reaction_1.gene_reaction_rule = "g1 and g2 and g2.conjugate"
        test_computation(test_model, ["g2"], {"test1"})
        test_computation(test_model, ["g2.conjugate"], {"test1"})
        test_reaction_1.gene_reaction_rule = "g1 and (try:' or 'except:1)"
        test_computation(test_model, ["try:'"], set())
        test_computation(test_model, ["try:'", "'except:1"], {"test1"})
Esempio n. 20
0
def create_cobra_model_from_sbml_file(sbml_filename, old_sbml=False,
                                      legacy_metabolite=False,
                                      print_time=False, use_hyphens=False):
    """convert an SBML XML file into a cobra.Model object.

    Supports SBML Level 2 Versions 1 and 4.  The function will detect if the
    SBML fbc package is used in the file and run the converter if the fbc
    package is used.

    Parameters
    ----------
    sbml_filename: string
    old_sbml: bool
        Set to True if the XML file has metabolite formula appended to
        metabolite names. This was a poorly designed artifact that persists in
        some models.
    legacy_metabolite: bool
        If True then assume that the metabolite id has the compartment id
         appended after an underscore (e.g. _c for cytosol). This has not been
         implemented but will be soon.
    print_time: bool
         deprecated
    use_hyphens: bool
        If True, double underscores (__) in an SBML ID will be converted to
        hyphens

    Returns
    -------
    Model : The parsed cobra model
    """
    if not libsbml:
        raise ImportError('create_cobra_model_from_sbml_file '
                          'requires python-libsbml')

    __default_lower_bound = -1000
    __default_upper_bound = 1000
    __default_objective_coefficient = 0
    # Ensure that the file exists
    if not isfile(sbml_filename):
        raise IOError('Your SBML file is not found: %s' % sbml_filename)
    # Expressions to change SBML Ids to Palsson Lab Ids
    metabolite_re = re.compile('^M_')
    reaction_re = re.compile('^R_')
    compartment_re = re.compile('^C_')
    if print_time:
        warn("print_time is deprecated", DeprecationWarning)
    model_doc = libsbml.readSBML(sbml_filename)
    if model_doc.getPlugin("fbc") is not None:
        from libsbml import ConversionProperties, LIBSBML_OPERATION_SUCCESS
        conversion_properties = ConversionProperties()
        conversion_properties.addOption(
            "convert fbc to cobra", True, "Convert FBC model to Cobra model")
        result = model_doc.convert(conversion_properties)
        if result != LIBSBML_OPERATION_SUCCESS:
            raise Exception("Conversion of SBML+fbc to COBRA failed")
    sbml_model = model_doc.getModel()
    sbml_model_id = sbml_model.getId()
    sbml_species = sbml_model.getListOfSpecies()
    sbml_reactions = sbml_model.getListOfReactions()
    sbml_compartments = sbml_model.getListOfCompartments()
    compartment_dict = dict([(compartment_re.split(x.getId())[-1], x.getName())
                             for x in sbml_compartments])
    if legacy_metabolite:
        # Deal with the palsson lab appending the compartment id to the
        # metabolite id
        new_dict = {}
        for the_id, the_name in compartment_dict.items():
            if the_name == '':
                new_dict[the_id[0].lower()] = the_id
            else:
                new_dict[the_id] = the_name
        compartment_dict = new_dict
        legacy_compartment_converter = dict(
            [(v, k) for k, v in iteritems(compartment_dict)])

    cobra_model = Model(sbml_model_id)
    metabolites = []
    metabolite_dict = {}
    # Convert sbml_metabolites to cobra.Metabolites
    for sbml_metabolite in sbml_species:
        # Skip sbml boundary species
        if sbml_metabolite.getBoundaryCondition():
            continue

        if (old_sbml or legacy_metabolite) and \
                sbml_metabolite.getId().endswith('_b'):
            # Deal with incorrect sbml from bigg.ucsd.edu
            continue
        tmp_metabolite = Metabolite()
        metabolite_id = tmp_metabolite.id = sbml_metabolite.getId()
        tmp_metabolite.compartment = compartment_re.split(
            sbml_metabolite.getCompartment())[-1]
        if legacy_metabolite:
            if tmp_metabolite.compartment not in compartment_dict:
                tmp_metabolite.compartment = legacy_compartment_converter[
                    tmp_metabolite.compartment]
            tmp_metabolite.id = parse_legacy_id(
                tmp_metabolite.id, tmp_metabolite.compartment,
                use_hyphens=use_hyphens)
        if use_hyphens:
            tmp_metabolite.id = metabolite_re.split(
                tmp_metabolite.id)[-1].replace('__', '-')
        else:
            # Just in case the SBML ids are ill-formed and use -
            tmp_metabolite.id = metabolite_re.split(
                tmp_metabolite.id)[-1].replace('-', '__')
        tmp_metabolite.name = sbml_metabolite.getName()
        tmp_formula = ''
        tmp_metabolite.notes = parse_legacy_sbml_notes(
            sbml_metabolite.getNotesString())
        if sbml_metabolite.isSetCharge():
            tmp_metabolite.charge = sbml_metabolite.getCharge()
        if "CHARGE" in tmp_metabolite.notes:
            note_charge = tmp_metabolite.notes["CHARGE"][0]
            try:
                note_charge = float(note_charge)
                if note_charge == int(note_charge):
                    note_charge = int(note_charge)
            except:
                warn("charge of %s is not a number (%s)" %
                     (tmp_metabolite.id, str(note_charge)))
            else:
                if ((tmp_metabolite.charge is None) or
                        (tmp_metabolite.charge == note_charge)):
                    tmp_metabolite.notes.pop("CHARGE")
                    # set charge to the one from notes if not assigend before
                    # the same
                    tmp_metabolite.charge = note_charge
                else:  # tmp_metabolite.charge != note_charge
                    msg = "different charges specified for %s (%d and %d)"
                    msg = msg % (tmp_metabolite.id,
                                 tmp_metabolite.charge, note_charge)
                    warn(msg)
                    # Chances are a 0 note charge was written by mistake. We
                    # will default to the note_charge in this case.
                    if tmp_metabolite.charge == 0:
                        tmp_metabolite.charge = note_charge

        for the_key in tmp_metabolite.notes.keys():
            if the_key.lower() == 'formula':
                tmp_formula = tmp_metabolite.notes.pop(the_key)[0]
                break
        if tmp_formula == '' and old_sbml:
            tmp_formula = tmp_metabolite.name.split('_')[-1]
            tmp_metabolite.name = tmp_metabolite.name[:-len(tmp_formula) - 1]
        tmp_metabolite.formula = tmp_formula
        metabolite_dict.update({metabolite_id: tmp_metabolite})
        metabolites.append(tmp_metabolite)
    cobra_model.add_metabolites(metabolites)

    # Construct the vectors and matrices for holding connectivity and numerical
    # info to feed to the cobra toolbox.
    # Always assume steady state simulations so b is set to 0
    cobra_reaction_list = []
    coefficients = {}
    for sbml_reaction in sbml_reactions:
        if use_hyphens:
            # Change the ids to match conventions used by the Palsson lab.
            reaction = Reaction(reaction_re.split(
                sbml_reaction.getId())[-1].replace('__', '-'))
        else:
            # Just in case the SBML ids are ill-formed and use -
            reaction = Reaction(reaction_re.split(
                sbml_reaction.getId())[-1].replace('-', '__'))
        cobra_reaction_list.append(reaction)
        # reaction.exchange_reaction = 0
        reaction.name = sbml_reaction.getName()
        cobra_metabolites = {}
        # Use the cobra.Metabolite class here
        for sbml_metabolite in sbml_reaction.getListOfReactants():
            tmp_metabolite_id = sbml_metabolite.getSpecies()
            # This deals with boundary metabolites
            if tmp_metabolite_id in metabolite_dict:
                tmp_metabolite = metabolite_dict[tmp_metabolite_id]
                cobra_metabolites[tmp_metabolite] = - \
                    sbml_metabolite.getStoichiometry()
        for sbml_metabolite in sbml_reaction.getListOfProducts():
            tmp_metabolite_id = sbml_metabolite.getSpecies()
            # This deals with boundary metabolites
            if tmp_metabolite_id in metabolite_dict:
                tmp_metabolite = metabolite_dict[tmp_metabolite_id]
                # Handle the case where the metabolite was specified both
                # as a reactant and as a product.
                if tmp_metabolite in cobra_metabolites:
                    warn("%s appears as a reactant and product %s" %
                         (tmp_metabolite_id, reaction.id))
                    cobra_metabolites[
                        tmp_metabolite] += sbml_metabolite.getStoichiometry()
                    # if the combined stoichiometry is 0, remove the metabolite
                    if cobra_metabolites[tmp_metabolite] == 0:
                        cobra_metabolites.pop(tmp_metabolite)
                else:
                    cobra_metabolites[
                        tmp_metabolite] = sbml_metabolite.getStoichiometry()
        # check for nan
        for met, v in iteritems(cobra_metabolites):
            if isnan(v) or isinf(v):
                warn("invalid value %s for metabolite '%s' in reaction '%s'" %
                     (str(v), met.id, reaction.id))
        reaction.add_metabolites(cobra_metabolites)
        # Parse the kinetic law info here.
        parameter_dict = {}
        # If lower and upper bounds are specified in the Kinetic Law then
        # they override the sbml reversible attribute.  If they are not
        # specified then the bounds are determined by getReversible.
        if not sbml_reaction.getKineticLaw():

            if sbml_reaction.getReversible():
                parameter_dict['lower_bound'] = __default_lower_bound
                parameter_dict['upper_bound'] = __default_upper_bound
            else:
                # Assume that irreversible reactions only proceed from left to
                # right.
                parameter_dict['lower_bound'] = 0
                parameter_dict['upper_bound'] = __default_upper_bound

            parameter_dict[
                'objective_coefficient'] = __default_objective_coefficient
        else:
            for sbml_parameter in \
                    sbml_reaction.getKineticLaw().getListOfParameters():
                parameter_dict[
                    sbml_parameter.getId().lower()] = sbml_parameter.getValue()

        if 'lower_bound' in parameter_dict:
            reaction.lower_bound = parameter_dict['lower_bound']
        elif 'lower bound' in parameter_dict:
            reaction.lower_bound = parameter_dict['lower bound']
        elif sbml_reaction.getReversible():
            reaction.lower_bound = __default_lower_bound
        else:
            reaction.lower_bound = 0

        if 'upper_bound' in parameter_dict:
            reaction.upper_bound = parameter_dict['upper_bound']
        elif 'upper bound' in parameter_dict:
            reaction.upper_bound = parameter_dict['upper bound']
        else:
            reaction.upper_bound = __default_upper_bound

        objective_coefficient = parameter_dict.get(
            'objective_coefficient', parameter_dict.get(
                'objective_coefficient', __default_objective_coefficient))
        if objective_coefficient != 0:
            coefficients[reaction] = objective_coefficient

        # ensure values are not set to nan or inf
        if isnan(reaction.lower_bound) or isinf(reaction.lower_bound):
            reaction.lower_bound = __default_lower_bound
        if isnan(reaction.upper_bound) or isinf(reaction.upper_bound):
            reaction.upper_bound = __default_upper_bound

        reaction_note_dict = parse_legacy_sbml_notes(
            sbml_reaction.getNotesString())
        # Parse the reaction notes.
        # POTENTIAL BUG: DEALING WITH LEGACY 'SBML' THAT IS NOT IN A
        # STANDARD FORMAT
        # TODO: READ IN OTHER NOTES AND GIVE THEM A reaction_ prefix.
        # TODO: Make sure genes get added as objects
        if 'GENE ASSOCIATION' in reaction_note_dict:
            rule = reaction_note_dict['GENE ASSOCIATION'][0]
            try:
                rule.encode('ascii')
            except (UnicodeEncodeError, UnicodeDecodeError):
                warn("gene_reaction_rule '%s' is not ascii compliant" % rule)
            if rule.startswith("&quot;") and rule.endswith("&quot;"):
                rule = rule[6:-6]
            reaction.gene_reaction_rule = rule
            if 'GENE LIST' in reaction_note_dict:
                reaction.systematic_names = reaction_note_dict['GENE LIST'][0]
            elif ('GENES' in reaction_note_dict and
                  reaction_note_dict['GENES'] != ['']):
                reaction.systematic_names = reaction_note_dict['GENES'][0]
            elif 'LOCUS' in reaction_note_dict:
                gene_id_to_object = dict([(x.id, x) for x in reaction._genes])
                for the_row in reaction_note_dict['LOCUS']:
                    tmp_row_dict = {}
                    the_row = 'LOCUS:' + the_row.lstrip('_').rstrip('#')
                    for the_item in the_row.split('#'):
                        k, v = the_item.split(':')
                        tmp_row_dict[k] = v
                    tmp_locus_id = tmp_row_dict['LOCUS']
                    if 'TRANSCRIPT' in tmp_row_dict:
                        tmp_locus_id = tmp_locus_id + \
                                       '.' + tmp_row_dict['TRANSCRIPT']

                    if 'ABBREVIATION' in tmp_row_dict:
                        gene_id_to_object[tmp_locus_id].name = tmp_row_dict[
                            'ABBREVIATION']

        if 'SUBSYSTEM' in reaction_note_dict:
            reaction.subsystem = reaction_note_dict.pop('SUBSYSTEM')[0]

        reaction.notes = reaction_note_dict

    # Now, add all of the reactions to the model.
    cobra_model.id = sbml_model.getId()
    # Populate the compartment list - This will be done based on
    # cobra.Metabolites in cobra.Reactions in the future.
    cobra_model.compartments = compartment_dict

    cobra_model.add_reactions(cobra_reaction_list)
    set_objective(cobra_model, coefficients)
    return cobra_model
Esempio n. 21
0
def parse_xml_into_model(xml, number=float):
    xml_model = xml.find(ns("sbml:model"))
    if get_attrib(xml_model, "fbc:strict") != "true":
        warn('loading SBML model without fbc:strict="true"')

    model_id = get_attrib(xml_model, "id")
    model = Model(model_id)
    model.name = xml_model.get("name")

    model.compartments = {c.get("id"): c.get("name") for c in
                          xml_model.findall(COMPARTMENT_XPATH)}
    # add metabolites
    for species in xml_model.findall(SPECIES_XPATH % 'false'):
        met = get_attrib(species, "id", require=True)
        met = Metabolite(clip(met, "M_"))
        met.name = species.get("name")
        annotate_cobra_from_sbml(met, species)
        met.compartment = species.get("compartment")
        met.charge = get_attrib(species, "fbc:charge", int)
        met.formula = get_attrib(species, "fbc:chemicalFormula")
        model.add_metabolites([met])
    # Detect boundary metabolites - In case they have been mistakenly
    # added. They should not actually appear in a model
    boundary_metabolites = {clip(i.get("id"), "M_")
                            for i in xml_model.findall(SPECIES_XPATH % 'true')}

    # add genes
    for sbml_gene in xml_model.iterfind(GENES_XPATH):
        gene_id = get_attrib(sbml_gene, "fbc:id").replace(SBML_DOT, ".")
        gene = Gene(clip(gene_id, "G_"))
        gene.name = get_attrib(sbml_gene, "fbc:name")
        if gene.name is None:
            gene.name = get_attrib(sbml_gene, "fbc:label")
        annotate_cobra_from_sbml(gene, sbml_gene)
        model.genes.append(gene)

    def process_gpr(sub_xml):
        """recursively convert gpr xml to a gpr string"""
        if sub_xml.tag == OR_TAG:
            return "( " + ' or '.join(process_gpr(i) for i in sub_xml) + " )"
        elif sub_xml.tag == AND_TAG:
            return "( " + ' and '.join(process_gpr(i) for i in sub_xml) + " )"
        elif sub_xml.tag == GENEREF_TAG:
            gene_id = get_attrib(sub_xml, "fbc:geneProduct", require=True)
            return clip(gene_id, "G_")
        else:
            raise Exception("unsupported tag " + sub_xml.tag)

    bounds = {bound.get("id"): get_attrib(bound, "value", type=number)
              for bound in xml_model.iterfind(BOUND_XPATH)}
    # add reactions
    reactions = []
    for sbml_reaction in xml_model.iterfind(
            ns("sbml:listOfReactions/sbml:reaction")):
        reaction = get_attrib(sbml_reaction, "id", require=True)
        reaction = Reaction(clip(reaction, "R_"))
        reaction.name = sbml_reaction.get("name")
        annotate_cobra_from_sbml(reaction, sbml_reaction)
        lb_id = get_attrib(sbml_reaction, "fbc:lowerFluxBound", require=True)
        ub_id = get_attrib(sbml_reaction, "fbc:upperFluxBound", require=True)
        try:
            reaction.upper_bound = bounds[ub_id]
            reaction.lower_bound = bounds[lb_id]
        except KeyError as e:
            raise CobraSBMLError("No constant bound with id '%s'" % str(e))
        reactions.append(reaction)

        stoichiometry = defaultdict(lambda: 0)
        for species_reference in sbml_reaction.findall(
                ns("sbml:listOfReactants/sbml:speciesReference")):
            met_name = clip(species_reference.get("species"), "M_")
            stoichiometry[met_name] -= \
                number(species_reference.get("stoichiometry"))
        for species_reference in sbml_reaction.findall(
                ns("sbml:listOfProducts/sbml:speciesReference")):
            met_name = clip(species_reference.get("species"), "M_")
            stoichiometry[met_name] += \
                get_attrib(species_reference, "stoichiometry",
                           type=number, require=True)
        # needs to have keys of metabolite objects, not ids
        object_stoichiometry = {}
        for met_id in stoichiometry:
            if met_id in boundary_metabolites:
                warn("Boundary metabolite '%s' used in reaction '%s'" %
                     (met_id, reaction.id))
                continue
            try:
                metabolite = model.metabolites.get_by_id(met_id)
            except KeyError:
                warn("ignoring unknown metabolite '%s' in reaction %s" %
                     (met_id, reaction.id))
                continue
            object_stoichiometry[metabolite] = stoichiometry[met_id]
        reaction.add_metabolites(object_stoichiometry)
        # set gene reaction rule
        gpr_xml = sbml_reaction.find(GPR_TAG)
        if gpr_xml is not None and len(gpr_xml) != 1:
            warn("ignoring invalid geneAssociation for " + repr(reaction))
            gpr_xml = None
        gpr = process_gpr(gpr_xml[0]) if gpr_xml is not None else ''
        # remove outside parenthesis, if any
        if gpr.startswith("(") and gpr.endswith(")"):
            gpr = gpr[1:-1].strip()
        gpr = gpr.replace(SBML_DOT, ".")
        reaction.gene_reaction_rule = gpr
    try:
        model.add_reactions(reactions)
    except ValueError as e:
        warn(str(e))

    # objective coefficients are handled after all reactions are added
    obj_list = xml_model.find(ns("fbc:listOfObjectives"))
    if obj_list is None:
        warn("listOfObjectives element not found")
        return model
    target_objective_id = get_attrib(obj_list, "fbc:activeObjective")
    target_objective = obj_list.find(
        ns("fbc:objective[@fbc:id='{}']".format(target_objective_id)))
    obj_direction_long = get_attrib(target_objective, "fbc:type")
    obj_direction = LONG_SHORT_DIRECTION[obj_direction_long]

    obj_query = OBJECTIVES_XPATH % target_objective_id
    coefficients = {}
    for sbml_objective in obj_list.findall(obj_query):
        rxn_id = clip(get_attrib(sbml_objective, "fbc:reaction"), "R_")
        try:
            objective_reaction = model.reactions.get_by_id(rxn_id)
        except KeyError:
            raise CobraSBMLError("Objective reaction '%s' not found" % rxn_id)
        try:
            coefficients[objective_reaction] = get_attrib(
                sbml_objective, "fbc:coefficient", type=number)
        except ValueError as e:
            warn(str(e))
    set_objective(model, coefficients)
    model.solver.objective.direction = obj_direction
    return model
Esempio n. 22
0
def create_cobra_model_from_sbml_file(sbml_filename, old_sbml=False,
                                      legacy_metabolite=False,
                                      print_time=False, use_hyphens=False):
    """convert an SBML XML file into a cobra.Model object.

    Supports SBML Level 2 Versions 1 and 4.  The function will detect if the
    SBML fbc package is used in the file and run the converter if the fbc
    package is used.

    Parameters
    ----------
    sbml_filename: string
    old_sbml: bool
        Set to True if the XML file has metabolite formula appended to
        metabolite names. This was a poorly designed artifact that persists in
        some models.
    legacy_metabolite: bool
        If True then assume that the metabolite id has the compartment id
         appended after an underscore (e.g. _c for cytosol). This has not been
         implemented but will be soon.
    print_time: bool
         deprecated
    use_hyphens: bool
        If True, double underscores (__) in an SBML ID will be converted to
        hyphens

    Returns
    -------
    Model : The parsed cobra model
    """
    if not libsbml:
        raise ImportError('create_cobra_model_from_sbml_file '
                          'requires python-libsbml')

    __default_lower_bound = -1000
    __default_upper_bound = 1000
    __default_objective_coefficient = 0
    # Ensure that the file exists
    if not isfile(sbml_filename):
        raise IOError('Your SBML file is not found: %s' % sbml_filename)
    # Expressions to change SBML Ids to Palsson Lab Ids
    metabolite_re = re.compile('^M_')
    reaction_re = re.compile('^R_')
    compartment_re = re.compile('^C_')
    if print_time:
        warn("print_time is deprecated", DeprecationWarning)
    model_doc = libsbml.readSBML(sbml_filename)
    if model_doc.getPlugin("fbc") is not None:
        from libsbml import ConversionProperties, LIBSBML_OPERATION_SUCCESS
        conversion_properties = ConversionProperties()
        conversion_properties.addOption(
            "convert fbc to cobra", True, "Convert FBC model to Cobra model")
        result = model_doc.convert(conversion_properties)
        if result != LIBSBML_OPERATION_SUCCESS:
            raise Exception("Conversion of SBML+fbc to COBRA failed")
    sbml_model = model_doc.getModel()
    sbml_model_id = sbml_model.getId()
    sbml_species = sbml_model.getListOfSpecies()
    sbml_reactions = sbml_model.getListOfReactions()
    sbml_compartments = sbml_model.getListOfCompartments()
    compartment_dict = dict([(compartment_re.split(x.getId())[-1], x.getName())
                             for x in sbml_compartments])
    if legacy_metabolite:
        # Deal with the palsson lab appending the compartment id to the
        # metabolite id
        new_dict = {}
        for the_id, the_name in compartment_dict.items():
            if the_name == '':
                new_dict[the_id[0].lower()] = the_id
            else:
                new_dict[the_id] = the_name
        compartment_dict = new_dict
        legacy_compartment_converter = dict(
            [(v, k) for k, v in iteritems(compartment_dict)])

    cobra_model = Model(sbml_model_id)
    metabolites = []
    metabolite_dict = {}
    # Convert sbml_metabolites to cobra.Metabolites
    for sbml_metabolite in sbml_species:
        # Skip sbml boundary species
        if sbml_metabolite.getBoundaryCondition():
            continue

        if (old_sbml or legacy_metabolite) and \
                sbml_metabolite.getId().endswith('_b'):
            # Deal with incorrect sbml from bigg.ucsd.edu
            continue
        tmp_metabolite = Metabolite()
        metabolite_id = tmp_metabolite.id = sbml_metabolite.getId()
        tmp_metabolite.compartment = compartment_re.split(
            sbml_metabolite.getCompartment())[-1]
        if legacy_metabolite:
            if tmp_metabolite.compartment not in compartment_dict:
                tmp_metabolite.compartment = legacy_compartment_converter[
                    tmp_metabolite.compartment]
            tmp_metabolite.id = parse_legacy_id(
                tmp_metabolite.id, tmp_metabolite.compartment,
                use_hyphens=use_hyphens)
        if use_hyphens:
            tmp_metabolite.id = metabolite_re.split(
                tmp_metabolite.id)[-1].replace('__', '-')
        else:
            # Just in case the SBML ids are ill-formed and use -
            tmp_metabolite.id = metabolite_re.split(
                tmp_metabolite.id)[-1].replace('-', '__')
        tmp_metabolite.name = sbml_metabolite.getName()
        tmp_formula = ''
        tmp_metabolite.notes = parse_legacy_sbml_notes(
            sbml_metabolite.getNotesString())
        if sbml_metabolite.isSetCharge():
            tmp_metabolite.charge = sbml_metabolite.getCharge()
        if "CHARGE" in tmp_metabolite.notes:
            note_charge = tmp_metabolite.notes["CHARGE"][0]
            try:
                note_charge = float(note_charge)
                if note_charge == int(note_charge):
                    note_charge = int(note_charge)
            except:
                warn("charge of %s is not a number (%s)" %
                     (tmp_metabolite.id, str(note_charge)))
            else:
                if ((tmp_metabolite.charge is None) or
                        (tmp_metabolite.charge == note_charge)):
                    tmp_metabolite.notes.pop("CHARGE")
                    # set charge to the one from notes if not assigend before
                    # the same
                    tmp_metabolite.charge = note_charge
                else:  # tmp_metabolite.charge != note_charge
                    msg = "different charges specified for %s (%d and %d)"
                    msg = msg % (tmp_metabolite.id,
                                 tmp_metabolite.charge, note_charge)
                    warn(msg)
                    # Chances are a 0 note charge was written by mistake. We
                    # will default to the note_charge in this case.
                    if tmp_metabolite.charge == 0:
                        tmp_metabolite.charge = note_charge

        for the_key in tmp_metabolite.notes.keys():
            if the_key.lower() == 'formula':
                tmp_formula = tmp_metabolite.notes.pop(the_key)[0]
                break
        if tmp_formula == '' and old_sbml:
            tmp_formula = tmp_metabolite.name.split('_')[-1]
            tmp_metabolite.name = tmp_metabolite.name[:-len(tmp_formula) - 1]
        tmp_metabolite.formula = tmp_formula
        metabolite_dict.update({metabolite_id: tmp_metabolite})
        metabolites.append(tmp_metabolite)
    cobra_model.add_metabolites(metabolites)

    # Construct the vectors and matrices for holding connectivity and numerical
    # info to feed to the cobra toolbox.
    # Always assume steady state simulations so b is set to 0
    cobra_reaction_list = []
    coefficients = {}
    for sbml_reaction in sbml_reactions:
        if use_hyphens:
            # Change the ids to match conventions used by the Palsson lab.
            reaction = Reaction(reaction_re.split(
                sbml_reaction.getId())[-1].replace('__', '-'))
        else:
            # Just in case the SBML ids are ill-formed and use -
            reaction = Reaction(reaction_re.split(
                sbml_reaction.getId())[-1].replace('-', '__'))
        cobra_reaction_list.append(reaction)
        # reaction.exchange_reaction = 0
        reaction.name = sbml_reaction.getName()
        cobra_metabolites = {}
        # Use the cobra.Metabolite class here
        for sbml_metabolite in sbml_reaction.getListOfReactants():
            tmp_metabolite_id = sbml_metabolite.getSpecies()
            # This deals with boundary metabolites
            if tmp_metabolite_id in metabolite_dict:
                tmp_metabolite = metabolite_dict[tmp_metabolite_id]
                cobra_metabolites[tmp_metabolite] = - \
                    sbml_metabolite.getStoichiometry()
        for sbml_metabolite in sbml_reaction.getListOfProducts():
            tmp_metabolite_id = sbml_metabolite.getSpecies()
            # This deals with boundary metabolites
            if tmp_metabolite_id in metabolite_dict:
                tmp_metabolite = metabolite_dict[tmp_metabolite_id]
                # Handle the case where the metabolite was specified both
                # as a reactant and as a product.
                if tmp_metabolite in cobra_metabolites:
                    warn("%s appears as a reactant and product %s" %
                         (tmp_metabolite_id, reaction.id))
                    cobra_metabolites[
                        tmp_metabolite] += sbml_metabolite.getStoichiometry()
                    # if the combined stoichiometry is 0, remove the metabolite
                    if cobra_metabolites[tmp_metabolite] == 0:
                        cobra_metabolites.pop(tmp_metabolite)
                else:
                    cobra_metabolites[
                        tmp_metabolite] = sbml_metabolite.getStoichiometry()
        # check for nan
        for met, v in iteritems(cobra_metabolites):
            if isnan(v) or isinf(v):
                warn("invalid value %s for metabolite '%s' in reaction '%s'" %
                     (str(v), met.id, reaction.id))
        reaction.add_metabolites(cobra_metabolites)
        # Parse the kinetic law info here.
        parameter_dict = {}
        # If lower and upper bounds are specified in the Kinetic Law then
        # they override the sbml reversible attribute.  If they are not
        # specified then the bounds are determined by getReversible.
        if not sbml_reaction.getKineticLaw():

            if sbml_reaction.getReversible():
                parameter_dict['lower_bound'] = __default_lower_bound
                parameter_dict['upper_bound'] = __default_upper_bound
            else:
                # Assume that irreversible reactions only proceed from left to
                # right.
                parameter_dict['lower_bound'] = 0
                parameter_dict['upper_bound'] = __default_upper_bound

            parameter_dict[
                'objective_coefficient'] = __default_objective_coefficient
        else:
            for sbml_parameter in \
                    sbml_reaction.getKineticLaw().getListOfParameters():
                parameter_dict[
                    sbml_parameter.getId().lower()] = sbml_parameter.getValue()

        if 'lower_bound' in parameter_dict:
            reaction.lower_bound = parameter_dict['lower_bound']
        elif 'lower bound' in parameter_dict:
            reaction.lower_bound = parameter_dict['lower bound']
        elif sbml_reaction.getReversible():
            reaction.lower_bound = __default_lower_bound
        else:
            reaction.lower_bound = 0

        if 'upper_bound' in parameter_dict:
            reaction.upper_bound = parameter_dict['upper_bound']
        elif 'upper bound' in parameter_dict:
            reaction.upper_bound = parameter_dict['upper bound']
        else:
            reaction.upper_bound = __default_upper_bound

        objective_coefficient = parameter_dict.get(
            'objective_coefficient', parameter_dict.get(
                'objective_coefficient', __default_objective_coefficient))
        if objective_coefficient != 0:
            coefficients[reaction] = objective_coefficient

        # ensure values are not set to nan or inf
        if isnan(reaction.lower_bound) or isinf(reaction.lower_bound):
            reaction.lower_bound = __default_lower_bound
        if isnan(reaction.upper_bound) or isinf(reaction.upper_bound):
            reaction.upper_bound = __default_upper_bound

        reaction_note_dict = parse_legacy_sbml_notes(
            sbml_reaction.getNotesString())
        # Parse the reaction notes.
        # POTENTIAL BUG: DEALING WITH LEGACY 'SBML' THAT IS NOT IN A
        # STANDARD FORMAT
        # TODO: READ IN OTHER NOTES AND GIVE THEM A reaction_ prefix.
        # TODO: Make sure genes get added as objects
        if 'GENE ASSOCIATION' in reaction_note_dict:
            rule = reaction_note_dict['GENE ASSOCIATION'][0]
            try:
                rule.encode('ascii')
            except (UnicodeEncodeError, UnicodeDecodeError):
                warn("gene_reaction_rule '%s' is not ascii compliant" % rule)
            if rule.startswith("&quot;") and rule.endswith("&quot;"):
                rule = rule[6:-6]
            reaction.gene_reaction_rule = rule
            if 'GENE LIST' in reaction_note_dict:
                reaction.systematic_names = reaction_note_dict['GENE LIST'][0]
            elif ('GENES' in reaction_note_dict and
                  reaction_note_dict['GENES'] != ['']):
                reaction.systematic_names = reaction_note_dict['GENES'][0]
            elif 'LOCUS' in reaction_note_dict:
                gene_id_to_object = dict([(x.id, x) for x in reaction._genes])
                for the_row in reaction_note_dict['LOCUS']:
                    tmp_row_dict = {}
                    the_row = 'LOCUS:' + the_row.lstrip('_').rstrip('#')
                    for the_item in the_row.split('#'):
                        k, v = the_item.split(':')
                        tmp_row_dict[k] = v
                    tmp_locus_id = tmp_row_dict['LOCUS']
                    if 'TRANSCRIPT' in tmp_row_dict:
                        tmp_locus_id = tmp_locus_id + \
                                       '.' + tmp_row_dict['TRANSCRIPT']

                    if 'ABBREVIATION' in tmp_row_dict:
                        gene_id_to_object[tmp_locus_id].name = tmp_row_dict[
                            'ABBREVIATION']

        if 'SUBSYSTEM' in reaction_note_dict:
            reaction.subsystem = reaction_note_dict.pop('SUBSYSTEM')[0]

        reaction.notes = reaction_note_dict

    # Now, add all of the reactions to the model.
    cobra_model.id = sbml_model.getId()
    # Populate the compartment list - This will be done based on
    # cobra.Metabolites in cobra.Reactions in the future.
    cobra_model.compartments = compartment_dict

    cobra_model.add_reactions(cobra_reaction_list)
    set_objective(cobra_model, coefficients)
    return cobra_model
    def test_gene_knockout_computation(self, salmonella):
        def find_gene_knockout_reactions_fast(cobra_model, gene_list):
            compiled_rules = get_compiled_gene_reaction_rules(cobra_model)
            return find_gene_knockout_reactions(
                cobra_model,
                gene_list,
                compiled_gene_reaction_rules=compiled_rules)

        def get_removed(m):
            return {x.id for x in m._trimmed_reactions}

        def test_computation(m, gene_ids, expected_reaction_ids):
            genes = [m.genes.get_by_id(i) for i in gene_ids]
            expected_reactions = {
                m.reactions.get_by_id(i)
                for i in expected_reaction_ids
            }
            removed1 = set(find_gene_knockout_reactions(m, genes))
            removed2 = set(find_gene_knockout_reactions_fast(m, genes))
            assert removed1 == expected_reactions
            assert removed2 == expected_reactions
            delete_model_genes(m, gene_ids, cumulative_deletions=False)
            assert get_removed(m) == expected_reaction_ids
            undelete_model_genes(m)

        gene_list = ['STM1067', 'STM0227']
        dependent_reactions = {
            '3HAD121', '3HAD160', '3HAD80', '3HAD140', '3HAD180', '3HAD100',
            '3HAD181', '3HAD120', '3HAD60', '3HAD141', '3HAD161', 'T2DECAI',
            '3HAD40'
        }
        test_computation(salmonella, gene_list, dependent_reactions)
        test_computation(salmonella, ['STM4221'], {'PGI'})
        test_computation(salmonella, ['STM1746.S'], {'4PEPTabcpp'})
        # test cumulative behavior
        delete_model_genes(salmonella, gene_list[:1])
        delete_model_genes(salmonella,
                           gene_list[1:],
                           cumulative_deletions=True)
        delete_model_genes(salmonella, ["STM4221"], cumulative_deletions=True)
        dependent_reactions.add('PGI')
        assert get_removed(salmonella) == dependent_reactions
        # non-cumulative following cumulative
        delete_model_genes(salmonella, ["STM4221"], cumulative_deletions=False)
        assert get_removed(salmonella) == {'PGI'}
        # make sure on reset that the bounds are correct
        reset_bound = salmonella.reactions.get_by_id("T2DECAI").upper_bound
        assert reset_bound == 1000.
        # test computation when gene name is a subset of another
        test_model = Model()
        test_reaction_1 = Reaction("test1")
        test_reaction_1.gene_reaction_rule = "eggs or (spam and eggspam)"
        test_model.add_reaction(test_reaction_1)
        test_computation(test_model, ["eggs"], set())
        test_computation(test_model, ["eggs", "spam"], {'test1'})
        # test computation with nested boolean expression
        test_reaction_1.gene_reaction_rule = \
            "g1 and g2 and (g3 or g4 or (g5 and g6))"
        test_computation(test_model, ["g3"], set())
        test_computation(test_model, ["g1"], {'test1'})
        test_computation(test_model, ["g5"], set())
        test_computation(test_model, ["g3", "g4", "g5"], {'test1'})
        # test computation when gene names are python expressions
        test_reaction_1.gene_reaction_rule = "g1 and (for or in)"
        test_computation(test_model, ["for", "in"], {'test1'})
        test_computation(test_model, ["for"], set())
        test_reaction_1.gene_reaction_rule = "g1 and g2 and g2.conjugate"
        test_computation(test_model, ["g2"], {"test1"})
        test_computation(test_model, ["g2.conjugate"], {"test1"})
        test_reaction_1.gene_reaction_rule = "g1 and (try:' or 'except:1)"
        test_computation(test_model, ["try:'"], set())
        test_computation(test_model, ["try:'", "'except:1"], {"test1"})
Esempio n. 24
0
def from_mat_struct(mat_struct, model_id=None, inf=inf):
    """create a model from the COBRA toolbox struct

    The struct will be a dict read in by scipy.io.loadmat

    """
    m = mat_struct
    if m.dtype.names is None:
        raise ValueError("not a valid mat struct")
    if not {"rxns", "mets", "S", "lb", "ub"} <= set(m.dtype.names):
        raise ValueError("not a valid mat struct")
    if "c" in m.dtype.names:
        c_vec = m["c"][0, 0]
    else:
        c_vec = None
        warn("objective vector 'c' not found")
    model = Model()
    if model_id is not None:
        model.id = model_id
    elif "description" in m.dtype.names:
        description = m["description"][0, 0][0]
        if not isinstance(description, string_types) and len(description) > 1:
            model.id = description[0]
            warn("Several IDs detected, only using the first.")
        else:
            model.id = description
    else:
        model.id = "imported_model"
    for i, name in enumerate(m["mets"][0, 0]):
        new_metabolite = Metabolite()
        new_metabolite.id = str(name[0][0])
        if all(var in m.dtype.names
               for var in ['metComps', 'comps', 'compNames']):
            comp_index = m["metComps"][0, 0][i][0] - 1
            new_metabolite.compartment = m['comps'][0, 0][comp_index][0][0]
            if new_metabolite.compartment not in model.compartments:
                comp_name = m['compNames'][0, 0][comp_index][0][0]
                model.compartments[new_metabolite.compartment] = comp_name
        else:
            new_metabolite.compartment = _get_id_compartment(new_metabolite.id)
            if new_metabolite.compartment not in model.compartments:
                model.compartments[
                    new_metabolite.compartment] = new_metabolite.compartment
        try:
            new_metabolite.name = str(m["metNames"][0, 0][i][0][0])
        except (IndexError, ValueError):
            pass
        try:
            new_metabolite.formula = str(m["metFormulas"][0][0][i][0][0])
        except (IndexError, ValueError):
            pass
        try:
            new_metabolite.charge = float(m["metCharge"][0, 0][i][0])
            int_charge = int(new_metabolite.charge)
            if new_metabolite.charge == int_charge:
                new_metabolite.charge = int_charge
        except (IndexError, ValueError):
            pass
        model.add_metabolites([new_metabolite])
    new_reactions = []
    coefficients = {}
    for i, name in enumerate(m["rxns"][0, 0]):
        new_reaction = Reaction()
        new_reaction.id = str(name[0][0])
        new_reaction.lower_bound = float(m["lb"][0, 0][i][0])
        new_reaction.upper_bound = float(m["ub"][0, 0][i][0])
        if isinf(new_reaction.lower_bound) and new_reaction.lower_bound < 0:
            new_reaction.lower_bound = -inf
        if isinf(new_reaction.upper_bound) and new_reaction.upper_bound > 0:
            new_reaction.upper_bound = inf
        if c_vec is not None:
            coefficients[new_reaction] = float(c_vec[i][0])
        try:
            new_reaction.gene_reaction_rule = str(m['grRules'][0, 0][i][0][0])
        except (IndexError, ValueError):
            pass
        try:
            new_reaction.name = str(m["rxnNames"][0, 0][i][0][0])
        except (IndexError, ValueError):
            pass
        try:
            new_reaction.subsystem = str(m['subSystems'][0, 0][i][0][0])
        except (IndexError, ValueError):
            pass
        new_reactions.append(new_reaction)
    model.add_reactions(new_reactions)
    set_objective(model, coefficients)
    coo = scipy_sparse.coo_matrix(m["S"][0, 0])
    for i, j, v in zip(coo.row, coo.col, coo.data):
        model.reactions[j].add_metabolites({model.metabolites[i]: v})
    return model