Ejemplo n.º 1
0
def fix_stoichiometry(model, term_id2clu, species_id2term_id, ub_chebi_ids, onto, r_ids_to_ignore=None):
    clu2term_ids = invert_map(term_id2clu)
    thrds = []
    conflicts = []
    for r in model.getListOfReactions():
        if r_ids_to_ignore and r.getId() in r_ids_to_ignore:
            continue
        t_ids = {species_id2term_id[s_id] if s_id in species_id2term_id else s_id
                 for s_id in chain((species_ref.getSpecies() for species_ref in r.getListOfReactants()),
                                   (species_ref.getSpecies() for species_ref in r.getListOfProducts()))}
        if len(t_ids) > 1:
            conflicts.append(t_ids)
    for clu, term_ids in clu2term_ids.items():
        if len(term_ids) <= 1:
            continue
        clu_conflicts = [set(it) for it in {tuple(t_ids & term_ids) for t_ids in conflicts} if len(it) > 1]
        real_term_ids = {t_id for t_id in term_ids if onto.get_term(t_id)}
        unmapped_s_ids = {s_id for s_id in term_ids if not onto.get_term(s_id)}
        if clu_conflicts:
            thread = StoichiometryFixingThread(model, species_id2term_id, ub_chebi_ids, unmapped_s_ids, real_term_ids,
                                               clu_conflicts, onto, clu, term_id2clu, r_ids_to_ignore=r_ids_to_ignore)
            thrds.append(thread)
            thread.start()  # This actually causes the thread to run
    for th in thrds:
        th.join()  # This waits until the thread has completed
Ejemplo n.º 2
0
def select_representative_terms(term_id2clu, onto):
    """
    Replaces each cluster with a tuple containing one ChEBI term id: (term_id, )
    :param term_id2clu: dict {term_id: cluster}
    :param onto: mod_sbml.onto.obo_ontology.Ontology ChEBI ontology
    :return: updated (inplace) dict term_id2clu {term_id: (cluster_representative_term_id, )}
    """
    clu2term_ids = invert_map(term_id2clu)
    used = set()
    i = 0
    for clu, term_ids in clu2term_ids.items():
        terms = {onto.get_term(t) for t in term_ids if onto.get_term(t)}
        common_ancestors = \
            {t for t in onto.common_points(terms, relationships=EQUIVALENT_RELATIONSHIPS)} if terms else set()
        options = common_ancestors - used
        if options:
            common_ancestor_term = options.pop()
        else:
            name = common_ancestors.pop().get_name() + " (another)" if common_ancestors else 'fake term'
            common_ancestor_term = Term(onto=onto, t_id="chebi:unknown_{0}".format(i), name=name)
            onto.add_term(common_ancestor_term)
            i += 1
        used.add(common_ancestor_term)
        for t in term_ids:
            term_id2clu[t] = (common_ancestor_term.get_id(), )
    return term_id2clu
Ejemplo n.º 3
0
    def __init__(self, st_matrix=None, pws=None,
                 N=None, V=None, m_id2i=None, r_id2i=None, efm_id2i=None, boundary_m_ids=None,
                 r_ids=None, m_ids=None, efm_ids=None,
                 r_id2gr_id=None, gr_id2r_id2c=None, efm_id2gr_id=None, m_id2gr_id=None,):
        if not r_id2i:
            r_id2i = st_matrix.r_id2i if st_matrix else (pws.r_id2i if pws else {})
        self.st_matrix = st_matrix
        if N is not None and m_id2i is not None and r_id2i is not None:
            self.st_matrix = StoichiometricMatrix(N, m_id2i, r_id2i, boundary_m_ids)
        self.pws = pws
        if V is not None and r_id2i is not None:
            self.pws = PathwaySet(V, r_id2i, efm_id2i)

        self.r_ids = set(r_ids) if r_ids else set(self.r_id2i.keys())
        self.m_ids = set(m_ids) if m_ids else set(self.m_id2i.keys())
        self.efm_ids = set(efm_ids) if efm_ids else set(self.efm_id2i.keys())

        self.r_id2gr_id = r_id2gr_id if r_id2gr_id else {}
        self.gr_id2r_id2c = gr_id2r_id2c if gr_id2r_id2c else {}
        self.efm_id2gr_id = efm_id2gr_id if efm_id2gr_id else {}
        self.gr_id2efm_ids = invert_map(self.efm_id2gr_id, list)
        self.m_id2gr_id = m_id2gr_id if m_id2gr_id else {}

        self.coupled_rs = set()
        self.r_types = set()
        self.folded_efms = set()
        self.pathways = set()
Ejemplo n.º 4
0
def generalize_species(model, s_id2chebi_id, ub_s_ids, chebi, ub_chebi_ids, threshold=UBIQUITOUS_THRESHOLD,
                       r_ids_to_ignore=None):
    """
    Groups metabolites of the model into clusters.
    :param model: libsbml.Model model of interest
    :param s_id2chebi_id: dict {metabolite_id: ChEBI_term_id}
    :param ub_s_ids: collection of ubiquitous metabolite ids
    :param chebi: mod_sbml.onto.obo_ontology.Ontology ChEBI ontology
    :param ub_chebi_ids: collection of ubiquitous ChEBI term ids
    :param threshold: threshold for a metabolite to be considered as frequently participating in reactions
    and therefore ubiquitous
    :param r_ids_to_ignore: (optional) ids of reactions whose stoichiometry preserving constraint can be ignores
    :return:
    """
    unmapped_s_ids = {s.getId() for s in model.getListOfSpecies() if s.getId() not in s_id2chebi_id}
    term_id2clu = find_term_clustering(model, chebi, s_id2chebi_id, unmapped_s_ids, ub_chebi_ids,
                                       r_ids_to_ignore=r_ids_to_ignore)
    if term_id2clu:
        term_id2clu = select_representative_terms(term_id2clu, chebi)
        s_id2clu = compute_s_id2clu(unmapped_s_ids, model, s_id2chebi_id, term_id2clu)
        clu2s_ids = invert_map(s_id2clu)
        for s_ids in clu2s_ids.values():
            if len(s_ids) == 1:
                del s_id2clu[s_ids.pop()]
    else:
        s_id2clu = {}
    if not ub_s_ids:
        frequent_ch_ids = get_frequent_term_ids(model, threshold)
        ub_s_ids = select_metabolite_ids_by_term_ids(model, frequent_ch_ids) - set(s_id2clu.keys())
    # unmapped_s_ids = {s_id for s_id in unmapped_s_ids if s_id not in s_id2clu}
    # infer_clusters(model, unmapped_s_ids, s_id2clu, species_id2chebi_id, ub_chebi_ids)
    return s_id2clu, ub_s_ids
Ejemplo n.º 5
0
def write_detailed_r_id2c(model, r_id2c, f):
    c2r_ids = invert_map(r_id2c)
    for c, r_ids in sorted(c2r_ids.items(), key=lambda it: (-abs(it[0]), -it[0])):
        for r_id in sorted(r_ids):
            f.write('%g\t%s:\t%s\n'
                    % (c, r_id, get_sbml_r_formula(model, model.getReaction(r_id), show_compartments=False,
                                                   show_metabolite_ids=True)))
        f.write('\n')
Ejemplo n.º 6
0
 def in_species_conflict(term, candidate_sps, proposal_s_id2clu):
     proposal_clu2s_ids = invert_map(proposal_s_id2clu)
     for s in candidate_sps:
         s_clu = s.getCompartment(), term
         rs = {r_id for r_id in s_id2r_ids[s.getId()]}
         clu_s_ids = clu2s_ids[s_clu] | proposal_clu2s_ids[s_clu]
         for clu_s_id in clu_s_ids:
             if {r_id for r_id in s_id2r_ids[clu_s_id]} & rs:
                 return True
     return False
 def in_species_conflict(term, candidate_sps, proposal_s_id2clu):
     proposal_clu2s_ids = invert_map(proposal_s_id2clu)
     for s in candidate_sps:
         s_clu = s.getCompartment(), term
         rs = {r_id for r_id in s_id2r_ids[s.getId()]}
         clu_s_ids = clu2s_ids[s_clu] | proposal_clu2s_ids[s_clu]
         for clu_s_id in clu_s_ids:
             if {r_id for r_id in s_id2r_ids[clu_s_id]} & rs:
                 return True
     return False
Ejemplo n.º 8
0
def serialize_generalization(r_id2clu, s_id2clu, sbml, chebi, path):
    doc = libsbml.SBMLReader().readSBML(sbml)
    model = doc.getModel()
    groups_plugin = model.getPlugin("groups")
    clu2r_ids, clu2s_ids = invert_map(r_id2clu), invert_map(s_id2clu)
    wb = openpyxl.Workbook()
    ws = wb.create_sheet(0, "Metabolite Groups")
    row = 1
    add_values(ws, row, 1, ["Group Id", "Group Name", "Group CHEBI", "Id", "Name", "Compartment", "CHEBI"], HEADER_STYLE)
    row += 1
    processed_s_ids = set()
    for (g_id, ch_term), s_ids in sorted(clu2s_ids.items(), key=lambda ((g_id, _), s_ids): g_id):
        group = groups_plugin.getGroup(g_id)
        add_values(ws, row, 1, [g_id, group.getName(), ch_term.get_id() if ch_term else ''])
        for s_id in sorted(s_ids, key=lambda s_id: s_id[s_id.find('__'):]):
            species = model.getSpecies(s_id)
            ch_term = get_chebi_term_by_annotation(species, chebi)
            add_values(ws, row, 4, [s_id, species.getName(), model.getCompartment(species.getCompartment()).getName(),
                                    ch_term.get_id() if ch_term else ''])
            row += 1
        processed_s_ids |= s_ids
Ejemplo n.º 9
0
def _log_clusters(term_id2clu, onto, model):
    clu2term = invert_map(term_id2clu)
    blueprint = []
    logging.info("-------------------\nquotient species sets:\n-------------------")
    for clu in sorted(clu2term.keys(), key=lambda k: -len(clu2term[k])):
        term_ids = clu2term[clu]
        if len(term_ids) == 1:
            continue
        blueprint.append(len(term_ids))
        logging.info("(%d)\t%s\n" % (len(term_ids), [
            onto.get_term(it).get_name() if onto.get_term(it) else model.getSpecies(
                it).getName() if model.getSpecies(it) else it for it in term_ids]))
    logging.info("Cluster sizes: %s\n-------------------\n\n" % sorted(blueprint, key=lambda s: -s))
Ejemplo n.º 10
0
def cover_with_onto_terms(model, onto, species_id2chebi_id, term_id2clu, ubiquitous_chebi_ids, r_ids_to_ignore=None):
    onto_updated = update_onto(onto, term_id2clu)
    if onto_updated:
        for clu, t_ids in invert_map(term_id2clu).items():
            if len(t_ids) == 1:
                del term_id2clu[t_ids.pop()]
            else:
                new_t_id2clu = cover_t_ids(model, species_id2chebi_id, ubiquitous_chebi_ids, t_ids, onto, clu,
                                           r_ids_to_ignore=r_ids_to_ignore)
                for t_id in t_ids:
                    if t_id in new_t_id2clu:
                        term_id2clu[t_id] = new_t_id2clu[t_id]
                    else:
                        del term_id2clu[t_id]
    return onto_updated
Ejemplo n.º 11
0
def generalize_model(in_sbml, chebi, groups_sbml, out_sbml, ub_s_ids=None, ub_chebi_ids=None, ignore_biomass=True):
    """
    Generalizes a model.
    :param in_sbml: str, path to the input SBML file
    :param chebi: mod_sbml.onto.obo_ontology.Ontology ChEBI ontology
    :param groups_sbml: str, path to the output SBML file (with groups extension)
    :param out_sbml: str, path to the output SBML file (generalized)
    :param ub_s_ids: optional, ids of ubiquitous species (will be inferred if set to None)
    :param ub_chebi_ids: optional, ids of ubiquitous ChEBI terms (will be inferred if set to None)
    :param ignore_biomass: boolean, whether to ignore the biomass reaction (and its stoichiometry preserving constraint)
    :return: tuple (r_id2g_eq, s_id2gr_id, s_id2chebi_id, ub_s_ids):
    dict {reaction_id: reaction_group_id}, dict {species_id: species_group_id}, dict {species_id: ChEBI_term_id},
    collection of ubiquitous species_ids.
    """
    # input_model
    input_doc = libsbml.SBMLReader().readSBML(in_sbml)
    input_model = input_doc.getModel()
    r_ids_to_ignore = get_biomass_r_ids(input_model) if ignore_biomass else None

    remove_is_a_reactions(input_model)
    annotate_metabolites(input_model, chebi)
    # TODO: fix comp separation
    # separate_boundary_metabolites(input_model)
    remove_unused_elements(input_model)

    logging.info("mapping species to ChEBI")
    s_id2chebi_id = get_species_id2chebi_id(input_model)
    ub_chebi_ids, ub_s_ids = get_ub_elements(input_model, chebi, s_id2chebi_id, ub_chebi_ids, ub_s_ids)

    terms = (t for t in (chebi.get_term(t_id) for t_id in s_id2chebi_id.values()) if t)
    old_onto_len = len(chebi)
    filter_ontology(chebi, terms, relationships=EQUIVALENT_RELATIONSHIPS, min_deepness=3)
    logging.info('Filtered the ontology from %d terms to %d' % (old_onto_len, len(chebi)))

    threshold = min(max(3, int(0.1 * input_model.getNumReactions())), UBIQUITOUS_THRESHOLD)
    s_id2clu, ub_s_ids = generalize_species(input_model, s_id2chebi_id, ub_s_ids, chebi, ub_chebi_ids, threshold,
                                            r_ids_to_ignore=r_ids_to_ignore)
    logging.info("generalized species")
    r_id2clu = generalize_reactions(input_model, s_id2clu, s_id2chebi_id, ub_chebi_ids,
                                    r_ids_to_ignore=r_ids_to_ignore)
    logging.info("generalized reactions")

    clu2s_ids = {(c_id, term): s_ids for ((c_id, (term, )), s_ids) in invert_map(s_id2clu).items()}
    r_id2g_eq, s_id2gr_id = save_as_comp_generalized_sbml(input_model, out_sbml, groups_sbml, r_id2clu, clu2s_ids,
                                                          ub_s_ids, chebi)
    return r_id2g_eq, s_id2gr_id, s_id2chebi_id, ub_s_ids
Ejemplo n.º 12
0
def maximize(unmapped_s_ids, model, term_id2clu, species_id2term_id, ub_chebi_ids, r_ids_to_ignore=None):
    clu2term_ids = invert_map(term_id2clu)
    s_id2clu = compute_s_id2clu(unmapped_s_ids, model, species_id2term_id, term_id2clu)

    r_id2clu = generalize_reactions(model, s_id2clu, species_id2term_id, ub_chebi_ids, r_ids_to_ignore=r_ids_to_ignore)

    thrds = []
    for (clu, term_ids) in clu2term_ids.items():
        if len(term_ids) <= 1:
            continue

        thread = MaximizingThread(model, term_ids, species_id2term_id, clu, term_id2clu,
                                  s_id2clu, ub_chebi_ids, r_id2clu, r_ids_to_ignore=r_ids_to_ignore)
        thrds.append(thread)
        thread.start()  # This actually causes the thread to run
    for th in thrds:
        th.join()  # This waits until the thread has completed
    return term_id2clu
Ejemplo n.º 13
0
def update_onto(onto, term_id2clu):
    ancestors = []
    clu2t_ids = invert_map(term_id2clu)
    for clu, t_ids in clu2t_ids.items():
        if len(t_ids) <= 1:
            continue
        terms = {onto.get_term(t_id) for t_id in t_ids if onto.get_term(t_id)}
        if terms:
            ancestors.extend(set(onto.common_points(terms, relationships=EQUIVALENT_RELATIONSHIPS)))
    removed_something = False
    count = Counter(ancestors)
    for t in (t for t in count.keys() if count[t] > 1):
        # if this term has been already removed as an ancestor/equivalent of another term
        if not onto.get_term(t.get_id()):
            continue
        for it in onto.get_generalized_ancestors(t, relationships=EQUIVALENT_RELATIONSHIPS):
            onto.remove_term(it, True)
        for it in onto.get_equivalents(t, relationships=EQUIVALENT_RELATIONSHIPS):
            onto.remove_term(it, True)
        onto.remove_term(t, True)
        removed_something = True
    return removed_something
Ejemplo n.º 14
0
def infer_clusters(model,
                   unmapped_s_ids,
                   s_id2clu,
                   s_id2term_id,
                   ubiquitous_chebi_ids,
                   r_ids_to_ignore=None):
    # TODO: double check it
    return
    if not unmapped_s_ids:
        return
    term_id2s_ids = invert_map(s_id2term_id)
    clu2s_ids = invert_map(s_id2clu)
    vk2r_ids = get_vk2r_ids(model,
                            s_id2clu,
                            s_id2term_id,
                            ubiquitous_chebi_ids,
                            r_ids_to_ignore=r_ids_to_ignore)
    vk2r_ids = {
        vk: r_ids
        for (vk, r_ids) in vk2r_ids.items() if len(r_ids) > 1
    }

    simplified_vk2vk_set = defaultdict(set)
    for vk in vk2r_ids.keys():
        simplified_vk2vk_set[vertical_key2simplified_vertical_key(vk)].add(vk)

    s_id2r_ids = defaultdict(list)
    for r in (r for r in model.getListOfReactions()
              if r.getNumReactants() + r.getNumProducts() > 2):
        if r_ids_to_ignore and r.getId() in r_ids_to_ignore:
            continue
        r_id = r.getId()
        for s_id in chain((species_ref.getSpecies()
                           for species_ref in r.getListOfReactants()),
                          (species_ref.getSpecies()
                           for species_ref in r.getListOfProducts())):
            s_id2r_ids[s_id].append(r_id)

    def in_species_conflict(term, candidate_sps, proposal_s_id2clu):
        proposal_clu2s_ids = invert_map(proposal_s_id2clu)
        for s in candidate_sps:
            s_clu = s.getCompartment(), term
            rs = {r_id for r_id in s_id2r_ids[s.getId()]}
            clu_s_ids = clu2s_ids[s_clu] | proposal_clu2s_ids[s_clu]
            for clu_s_id in clu_s_ids:
                if {r_id for r_id in s_id2r_ids[clu_s_id]} & rs:
                    return True
        return False

    processed_r_ids = reduce(lambda s1, s2: s1 | s2, vk2r_ids.values(), set())

    for r in model.getListOfReactions():
        if r.getId(
        ) in processed_r_ids or not unmapped_s_ids & get_metabolites(r):
            continue
        if r_ids_to_ignore and r.getId() in r_ids_to_ignore:
            continue
        ub_rs, ub_ps, rs, ps = get_vertical_key(model, r, s_id2clu,
                                                s_id2term_id,
                                                ubiquitous_chebi_ids)
        vk = ub_rs, ub_ps, rs, ps
        rs, ps = set(rs), set(ps)
        partial_rs, partial_ps = {(s_id, c_id) for (s_id, c_id) in rs if s_id not in unmapped_s_ids}, \
                                 {(s_id, c_id) for (s_id, c_id) in ps if s_id not in unmapped_s_ids}
        if len(ub_rs) + len(ub_ps) + len(partial_rs) + len(partial_ps) < 2:
            continue
        simplified_vk = vertical_key2simplified_vertical_key(vk)
        if simplified_vk in simplified_vk2vk_set:
            ub_rs, ub_ps = tuple(sorted(ub_rs)), tuple(sorted(ub_ps))
            for (vk_ub_rs, vk_ub_ps, vk_rs,
                 vk_ps) in simplified_vk2vk_set[simplified_vk]:
                vk_rs, vk_ps = {(s_id if s_id not in s_id2clu else s_id2clu[s_id], c_id) for (s_id, c_id) in vk_rs}, \
                               {(s_id if s_id not in s_id2clu else s_id2clu[s_id], c_id) for (s_id, c_id) in vk_ps}
                proposal = {}
                if vk_ub_rs == ub_ps and vk_ub_ps == ub_rs and not partial_rs - vk_ps and not partial_ps - vk_rs:
                    vk_ub_rs, vk_ub_ps, partial_rs, partial_ps = vk_ub_ps, vk_ub_rs, partial_ps, partial_rs
                if vk_ub_rs == ub_rs and vk_ub_ps == ub_ps and not partial_rs - vk_rs and not partial_ps - vk_ps:
                    r_s_ids = rs - vk_rs
                    p_s_ids = ps - vk_ps
                    if 0 < len(r_s_ids) <= 1 and 0 < len(
                            p_s_ids) <= 1 and r_s_ids or p_s_ids:
                        if r_s_ids and vk_rs - rs:
                            s_id, c_id = r_s_ids.pop()
                            # if it is not a species id but a cluster, continue
                            if not isinstance(s_id, str):
                                continue
                            clu, c_id = (vk_rs - rs).pop()
                            # if it is a species id instead of a cluster, continue
                            if not isinstance(clu, tuple):
                                continue
                            candidate_sps = {
                                model.getSpecies(it)
                                for it in (term_id2s_ids[s_id2term_id[s_id]]
                                           if s_id in s_id2term_id else {s_id})
                            }
                            comp, term = clu
                            if not in_species_conflict(term, candidate_sps,
                                                       proposal):
                                for s in candidate_sps:
                                    proposal[
                                        s.getId()] = s.getCompartment(), term
                            else:
                                continue
                        if p_s_ids and vk_ps - ps:
                            s_id, c_id = p_s_ids.pop()
                            # if it is not a species id but a cluster, continue
                            if not isinstance(s_id, str):
                                continue
                            clu, c_id = (vk_ps - ps).pop()
                            # if it is a species id instead of a cluster, continue
                            if not isinstance(clu, tuple):
                                continue
                            candidate_sps = {
                                model.getSpecies(it)
                                for it in (term_id2s_ids[s_id2term_id[s_id]]
                                           if s_id in s_id2term_id else {s_id})
                            }
                            comp, term = clu
                            if not in_species_conflict(term, candidate_sps,
                                                       proposal):
                                for s in candidate_sps:
                                    proposal[
                                        s.getId()] = s.getCompartment(), term
                            else:
                                continue
                if proposal:
                    s_id2clu.update(proposal)
                    for s_id, clu in proposal.items():
                        clu2s_ids[clu].add(s_id)
                    unmapped_s_ids -= set(proposal.keys())
def suggest_clusters(model, unmapped_s_ids, term_id2clu, s_id2term_id, ubiquitous_chebi_ids, r_ids_to_ignore=None):
    # TODO: double check it
    return
    if not unmapped_s_ids:
        return
    s_id2clu = compute_s_id2clu(set(), model, s_id2term_id, term_id2clu)
    term_id2s_ids = invert_map(s_id2term_id)
    vk2r_ids = get_vk2r_ids(model, s_id2clu, s_id2term_id, ubiquitous_chebi_ids, r_ids_to_ignore=r_ids_to_ignore)
    vk2r_ids = {vk: r_ids for (vk, r_ids) in vk2r_ids.items() if len(r_ids) > 1}
    processed_r_ids = reduce(lambda s1, s2: s1 | s2, vk2r_ids.values(), set())

    s_vk2vk = defaultdict(set)
    for vk in vk2r_ids.keys():
        s_vk2vk[vertical_key2simplified_vertical_key(vk)].add(vk)

    s_id2r_ids = defaultdict(list)
    for r in (r for r in model.getListOfReactions() if r.getNumReactants() + r.getNumProducts() > 2):
        if r_ids_to_ignore and r.getId() in r_ids_to_ignore:
            continue
        r_id = r.getId()
        for s_id in chain((species_ref.getSpecies() for species_ref in r.getListOfReactants()),
                          (species_ref.getSpecies() for species_ref in r.getListOfProducts())):
            s_id2r_ids[s_id].append(r_id)

    for r in model.getListOfReactions():
        if r.getId() in processed_r_ids or not unmapped_s_ids & get_metabolites(r):
            continue
        if r_ids_to_ignore and r.getId() in r_ids_to_ignore:
            continue
        ub_rs, ub_ps, rs, ps = get_vertical_key(model, r, s_id2clu, s_id2term_id, ubiquitous_chebi_ids)
        vk = ub_rs, ub_ps, rs, ps
        rs, ps = set(rs), set(ps)
        partial_rs, partial_ps = {(s_id, c_id) for (s_id, c_id) in rs if s_id not in unmapped_s_ids}, \
                                 {(s_id, c_id) for (s_id, c_id) in ps if s_id not in unmapped_s_ids}
        if vk in vk2r_ids or len(ub_rs) + len(ub_ps) + len(partial_rs) + len(partial_ps) < 2:
            continue
        s_vk = vertical_key2simplified_vertical_key(vk)
        if s_vk in s_vk2vk:
            ub_rs, ub_ps = tuple(sorted(ub_rs)), tuple(sorted(ub_ps))
            for (vk_ub_rs, vk_ub_ps, vk_rs, vk_ps) in s_vk2vk[s_vk]:
                vk_rs, vk_ps = {(s_id if s_id not in s_id2clu else s_id2clu[s_id], c_id) for (s_id, c_id) in vk_rs}, \
                               {(s_id if s_id not in s_id2clu else s_id2clu[s_id], c_id) for (s_id, c_id) in vk_ps}
                proposal = {}
                if vk_ub_rs == ub_ps and vk_ub_ps == ub_rs and not partial_rs - vk_ps and not partial_ps - vk_rs:
                    vk_ub_rs, vk_ub_ps, partial_rs, partial_ps = vk_ub_ps, vk_ub_rs, partial_ps, partial_rs
                if vk_ub_rs == ub_rs and vk_ub_ps == ub_ps and not partial_rs - vk_rs and not partial_ps - vk_ps:
                    r_s_ids = rs - vk_rs
                    p_s_ids = ps - vk_ps
                    if 0 < len(r_s_ids) <= 1 and 0 < len(p_s_ids) <= 1 and r_s_ids or p_s_ids:
                        if r_s_ids and vk_rs - rs:
                            s_id, c_id = r_s_ids.pop()
                            clu, c_id = (vk_rs - rs).pop()
                            # if it is a species id instead of a cluster, continue
                            if not isinstance(clu, tuple):
                                continue
                            # if it is not a species id but a cluster, continue
                            if not isinstance(s_id, str):
                                continue
                            candidate_sps = {model.getSpecies(sp_id) for sp_id in
                                             (term_id2s_ids[s_id2term_id[s_id]] if s_id in s_id2term_id else [s_id])}
                            comp, term = clu
                            for s in candidate_sps:
                                proposal[s.getId()] = term
                        if p_s_ids and vk_ps - ps:
                            s_id, c_id = p_s_ids.pop()
                            clu, c_id = (vk_ps - ps).pop()
                            # if it is a species id instead of a cluster, continue
                            if not isinstance(clu, tuple):
                                continue
                            # if it is not a species id but a cluster, continue
                            if not isinstance(s_id, str):
                                continue
                            candidate_sps = {model.getSpecies(it) for it in
                                             (term_id2s_ids[s_id2term_id[s_id]] if s_id in s_id2term_id else {s_id})}
                            comp, term = clu
                            for s in candidate_sps:
                                proposal[s.getId()] = term
                if proposal:
                    for s_id, clu in proposal.items():
                        term_id2clu[s_id] = (clu, ) if not (isinstance(clu, tuple)) else clu
                        unmapped_s_ids -= {s_id}
Ejemplo n.º 16
0
def save_as_comp_generalized_sbml(input_model, out_sbml, groups_sbml, r_id2clu, clu2s_ids, ub_sps, onto):
    logging.info("serializing generalization")
    s_id_increment, r_id_increment = 0, 0

    if groups_sbml:
        doc = convert_to_lev3_v1(input_model)
        groups_model = doc.getModel()
        groups_plugin = groups_model.getPlugin("groups")
        if groups_plugin:
            logging.info("  saving ubiquitous species annotations")
            s_group = groups_plugin.createGroup()
            s_group.setId("g_ubiquitous_sps")
            s_group.setKind(libsbml.GROUP_KIND_COLLECTION)
            s_group.setSBOTerm(SBO_CHEMICAL_MACROMOLECULE)
            s_group.setName("ubiquitous species")
            for s_id in ub_sps:
                member = s_group.createMember()
                member.setIdRef(s_id)
            add_annotation(s_group, libsbml.BQB_IS_DESCRIBED_BY, GROUP_TYPE_UBIQUITOUS)
    if out_sbml:
        # generalized model
        generalized_doc = libsbml.SBMLDocument(input_model.getSBMLNamespaces())
        generalized_model = generalized_doc.createModel()
        copy_elements(input_model, generalized_model)

    r_id2g_eq, s_id2gr_id = {}, {}
    if not clu2s_ids:
        logging.info("  nothing to serialize")
    else:
        clu2r_ids = invert_map(r_id2clu)
        logging.info("  creating species groups")
        for ((c_id, t), s_ids) in clu2s_ids.items():
            comp = input_model.getCompartment(c_id)
            if len(s_ids) > 1:
                t = onto.get_term(t)
                t_name, t_id = (t.get_name(), t.get_id()) if t \
                    else (' or '.join(input_model.getSpecies(s_id).getName() for s_id in s_ids), None)
                if not t_id:
                    t = t_name

                if out_sbml:
                    new_species = create_species(model=generalized_model, compartment_id=comp.getId(), type_id=None,
                                                 name="{0} ({1}) [{2}]".format(t_name, len(s_ids), comp.getName()))
                    add_annotation(new_species, libsbml.BQB_IS, t_id, CHEBI_PREFIX)
                    new_s_id = new_species.getId()
                else:
                    s_id_increment += 1
                    new_s_id = generate_unique_id(input_model, "s_g_", s_id_increment)
                for s_id in s_ids:
                    s_id2gr_id[s_id] = new_s_id, t

                if groups_sbml and groups_plugin:
                    # save as a group
                    s_group = groups_plugin.createGroup()
                    s_group.setId(new_s_id)
                    s_group.setKind(libsbml.GROUP_KIND_CLASSIFICATION)
                    s_group.setSBOTerm(SBO_CHEMICAL_MACROMOLECULE)
                    g_name = "{0} [{1}]".format(t_name, comp.getName())
                    s_group.setName(g_name)
                    # logging.info("%s: %d" % (g_name, len(s_ids)))
                    if t_id:
                        add_annotation(s_group, libsbml.BQB_IS, t_id, CHEBI_PREFIX)
                    for s_id in s_ids:
                        member = s_group.createMember()
                        member.setIdRef(s_id)
                    add_annotation(s_group, libsbml.BQB_IS_DESCRIBED_BY, GROUP_TYPE_EQUIV)

        generalize_species = lambda species_id: s_id2gr_id[species_id][0] if (species_id in s_id2gr_id) else species_id
        s_id_to_generalize = set(s_id2gr_id.keys())
        logging.info("  creating reaction groups")
        for clu, r_ids in clu2r_ids.items():
            representative = input_model.getReaction(list(r_ids)[0])
            r_name = "generalized %s" % representative.getName()
            if out_sbml:
                reactants = dict(get_reactants(representative, stoichiometry=True))
                products = dict(get_products(representative, stoichiometry=True))
                if (len(r_ids) == 1) and \
                        not ((set(reactants.keys()) | set(products.keys())) & s_id_to_generalize):
                    generalized_model.addReaction(representative)
                    continue
                r_id2st = {generalize_species(it): st for (it, st) in reactants.items()}
                p_id2st = {generalize_species(it): st for (it, st) in products.items()}
                reversible = next((False for r_id in r_ids if not input_model.getReaction(r_id).getReversible()), True)
                new_r_id = create_reaction(generalized_model, r_id2st, p_id2st, name=r_name, reversible=reversible,
                                           id_=representative.getId() if len(r_ids) == 1 else None).getId()
            elif len(r_ids) > 1:
                r_id_increment += 1
                new_r_id = generate_unique_id(input_model, "r_g_", r_id_increment)
            if len(r_ids) > 1:
                for r_id in r_ids:
                    r_id2g_eq[r_id] = new_r_id, r_name
                if groups_sbml and groups_plugin:
                    # save as a group
                    r_group = groups_plugin.createGroup()
                    r_group.setId(new_r_id)
                    r_group.setKind(libsbml.GROUP_KIND_COLLECTION)
                    r_group.setSBOTerm(SBO_BIOCHEMICAL_REACTION)
                    r_group.setName(r_name)
                    for r_id in r_ids:
                        member = r_group.createMember()
                        member.setIdRef(r_id)
                    add_annotation(r_group, libsbml.BQB_IS_DESCRIBED_BY, GROUP_TYPE_EQUIV)
    if out_sbml:
        remove_unused_elements(generalized_model)
        save_as_sbml(generalized_model, out_sbml)
    if groups_sbml and groups_model:
        save_as_sbml(groups_model, groups_sbml)

    logging.info("serialized to " + groups_sbml)
    return r_id2g_eq, s_id2gr_id
def infer_clusters(model, unmapped_s_ids, s_id2clu, s_id2term_id, ubiquitous_chebi_ids, r_ids_to_ignore=None):
    # TODO: double check it
    return
    if not unmapped_s_ids:
        return
    term_id2s_ids = invert_map(s_id2term_id)
    clu2s_ids = invert_map(s_id2clu)
    vk2r_ids = get_vk2r_ids(model, s_id2clu, s_id2term_id, ubiquitous_chebi_ids, r_ids_to_ignore=r_ids_to_ignore)
    vk2r_ids = {vk: r_ids for (vk, r_ids) in vk2r_ids.items() if len(r_ids) > 1}

    simplified_vk2vk_set = defaultdict(set)
    for vk in vk2r_ids.keys():
        simplified_vk2vk_set[vertical_key2simplified_vertical_key(vk)].add(vk)

    s_id2r_ids = defaultdict(list)
    for r in (r for r in model.getListOfReactions() if r.getNumReactants() + r.getNumProducts() > 2):
        if r_ids_to_ignore and r.getId() in r_ids_to_ignore:
            continue
        r_id = r.getId()
        for s_id in chain((species_ref.getSpecies() for species_ref in r.getListOfReactants()),
                          (species_ref.getSpecies() for species_ref in r.getListOfProducts())):
            s_id2r_ids[s_id].append(r_id)

    def in_species_conflict(term, candidate_sps, proposal_s_id2clu):
        proposal_clu2s_ids = invert_map(proposal_s_id2clu)
        for s in candidate_sps:
            s_clu = s.getCompartment(), term
            rs = {r_id for r_id in s_id2r_ids[s.getId()]}
            clu_s_ids = clu2s_ids[s_clu] | proposal_clu2s_ids[s_clu]
            for clu_s_id in clu_s_ids:
                if {r_id for r_id in s_id2r_ids[clu_s_id]} & rs:
                    return True
        return False

    processed_r_ids = reduce(lambda s1, s2: s1 | s2, vk2r_ids.values(), set())

    for r in model.getListOfReactions():
        if r.getId() in processed_r_ids or not unmapped_s_ids & get_metabolites(r):
            continue
        if r_ids_to_ignore and r.getId() in r_ids_to_ignore:
            continue
        ub_rs, ub_ps, rs, ps = get_vertical_key(model, r, s_id2clu, s_id2term_id, ubiquitous_chebi_ids)
        vk = ub_rs, ub_ps, rs, ps
        rs, ps = set(rs), set(ps)
        partial_rs, partial_ps = {(s_id, c_id) for (s_id, c_id) in rs if s_id not in unmapped_s_ids}, \
                                 {(s_id, c_id) for (s_id, c_id) in ps if s_id not in unmapped_s_ids}
        if len(ub_rs) + len(ub_ps) + len(partial_rs) + len(partial_ps) < 2:
            continue
        simplified_vk = vertical_key2simplified_vertical_key(vk)
        if simplified_vk in simplified_vk2vk_set:
            ub_rs, ub_ps = tuple(sorted(ub_rs)), tuple(sorted(ub_ps))
            for (vk_ub_rs, vk_ub_ps, vk_rs, vk_ps) in simplified_vk2vk_set[simplified_vk]:
                vk_rs, vk_ps = {(s_id if s_id not in s_id2clu else s_id2clu[s_id], c_id) for (s_id, c_id) in vk_rs}, \
                               {(s_id if s_id not in s_id2clu else s_id2clu[s_id], c_id) for (s_id, c_id) in vk_ps}
                proposal = {}
                if vk_ub_rs == ub_ps and vk_ub_ps == ub_rs and not partial_rs - vk_ps and not partial_ps - vk_rs:
                    vk_ub_rs, vk_ub_ps, partial_rs, partial_ps = vk_ub_ps, vk_ub_rs, partial_ps, partial_rs
                if vk_ub_rs == ub_rs and vk_ub_ps == ub_ps and not partial_rs - vk_rs and not partial_ps - vk_ps:
                    r_s_ids = rs - vk_rs
                    p_s_ids = ps - vk_ps
                    if 0 < len(r_s_ids) <= 1 and 0 < len(p_s_ids) <= 1 and r_s_ids or p_s_ids:
                        if r_s_ids and vk_rs - rs:
                            s_id, c_id = r_s_ids.pop()
                            # if it is not a species id but a cluster, continue
                            if not isinstance(s_id, str):
                                continue
                            clu, c_id = (vk_rs - rs).pop()
                            # if it is a species id instead of a cluster, continue
                            if not isinstance(clu, tuple):
                                continue
                            candidate_sps = {model.getSpecies(it) for it in
                                             (term_id2s_ids[s_id2term_id[s_id]] if s_id in s_id2term_id else {s_id})}
                            comp, term = clu
                            if not in_species_conflict(term, candidate_sps, proposal):
                                for s in candidate_sps:
                                    proposal[s.getId()] = s.getCompartment(), term
                            else:
                                continue
                        if p_s_ids and vk_ps - ps:
                            s_id, c_id = p_s_ids.pop()
                            # if it is not a species id but a cluster, continue
                            if not isinstance(s_id, str):
                                continue
                            clu, c_id = (vk_ps - ps).pop()
                            # if it is a species id instead of a cluster, continue
                            if not isinstance(clu, tuple):
                                continue
                            candidate_sps = {model.getSpecies(it) for it in
                                             (term_id2s_ids[s_id2term_id[s_id]] if s_id in s_id2term_id else {s_id})}
                            comp, term = clu
                            if not in_species_conflict(term, candidate_sps, proposal):
                                for s in candidate_sps:
                                    proposal[s.getId()] = s.getCompartment(), term
                            else:
                                continue
                if proposal:
                    s_id2clu.update(proposal)
                    for s_id, clu in proposal.items():
                        clu2s_ids[clu].add(s_id)
                    unmapped_s_ids -= set(proposal.keys())
Ejemplo n.º 18
0
def generalize_model(in_sbml,
                     chebi,
                     groups_sbml,
                     out_sbml,
                     ub_s_ids=None,
                     ub_chebi_ids=None,
                     ignore_biomass=True):
    """
    Generalizes a model.
    :param in_sbml: str, path to the input SBML file
    :param chebi: mod_sbml.onto.obo_ontology.Ontology ChEBI ontology
    :param groups_sbml: str, path to the output SBML file (with groups extension)
    :param out_sbml: str, path to the output SBML file (generalized)
    :param ub_s_ids: optional, ids of ubiquitous species (will be inferred if set to None)
    :param ub_chebi_ids: optional, ids of ubiquitous ChEBI terms (will be inferred if set to None)
    :param ignore_biomass: boolean, whether to ignore the biomass reaction (and its stoichiometry preserving constraint)
    :return: tuple (r_id2g_eq, s_id2gr_id, s_id2chebi_id, ub_s_ids):
    dict {reaction_id: reaction_group_id}, dict {species_id: species_group_id}, dict {species_id: ChEBI_term_id},
    collection of ubiquitous species_ids.
    """
    # input_model
    input_doc = libsbml.SBMLReader().readSBML(in_sbml)
    input_model = input_doc.getModel()
    r_ids_to_ignore = get_biomass_r_ids(
        input_model) if ignore_biomass else None

    remove_is_a_reactions(input_model)
    annotate_metabolites(input_model, chebi)
    # TODO: fix comp separation
    # separate_boundary_metabolites(input_model)
    remove_unused_elements(input_model)

    logging.info("mapping species to ChEBI")
    s_id2chebi_id = get_species_id2chebi_id(input_model)
    ub_chebi_ids, ub_s_ids = get_ub_elements(input_model, chebi, s_id2chebi_id,
                                             ub_chebi_ids, ub_s_ids)

    terms = (t for t in (chebi.get_term(t_id)
                         for t_id in s_id2chebi_id.values()) if t)
    old_onto_len = len(chebi)
    filter_ontology(chebi,
                    terms,
                    relationships=EQUIVALENT_RELATIONSHIPS,
                    min_deepness=3)
    logging.info('Filtered the ontology from %d terms to %d' %
                 (old_onto_len, len(chebi)))

    threshold = min(max(3, int(0.1 * input_model.getNumReactions())),
                    UBIQUITOUS_THRESHOLD)
    s_id2clu, ub_s_ids = generalize_species(input_model,
                                            s_id2chebi_id,
                                            ub_s_ids,
                                            chebi,
                                            ub_chebi_ids,
                                            threshold,
                                            r_ids_to_ignore=r_ids_to_ignore)
    logging.info("generalized species")
    r_id2clu = generalize_reactions(input_model,
                                    s_id2clu,
                                    s_id2chebi_id,
                                    ub_chebi_ids,
                                    r_ids_to_ignore=r_ids_to_ignore)
    logging.info("generalized reactions")

    clu2s_ids = {(c_id, term): s_ids
                 for ((c_id, (term, )), s_ids) in invert_map(s_id2clu).items()}
    r_id2g_eq, s_id2gr_id = save_as_comp_generalized_sbml(
        input_model, out_sbml, groups_sbml, r_id2clu, clu2s_ids, ub_s_ids,
        chebi)
    return r_id2g_eq, s_id2gr_id, s_id2chebi_id, ub_s_ids
Ejemplo n.º 19
0
def filter_clu_to_terms(term2clu):
    clu2term = invert_map(term2clu)
    for clu, terms in clu2term.items():
        if len(terms) == 1:
            del term2clu[terms.pop()]
Ejemplo n.º 20
0
def suggest_clusters(model,
                     unmapped_s_ids,
                     term_id2clu,
                     s_id2term_id,
                     ubiquitous_chebi_ids,
                     r_ids_to_ignore=None):
    # TODO: double check it
    return
    if not unmapped_s_ids:
        return
    s_id2clu = compute_s_id2clu(set(), model, s_id2term_id, term_id2clu)
    term_id2s_ids = invert_map(s_id2term_id)
    vk2r_ids = get_vk2r_ids(model,
                            s_id2clu,
                            s_id2term_id,
                            ubiquitous_chebi_ids,
                            r_ids_to_ignore=r_ids_to_ignore)
    vk2r_ids = {
        vk: r_ids
        for (vk, r_ids) in vk2r_ids.items() if len(r_ids) > 1
    }
    processed_r_ids = reduce(lambda s1, s2: s1 | s2, vk2r_ids.values(), set())

    s_vk2vk = defaultdict(set)
    for vk in vk2r_ids.keys():
        s_vk2vk[vertical_key2simplified_vertical_key(vk)].add(vk)

    s_id2r_ids = defaultdict(list)
    for r in (r for r in model.getListOfReactions()
              if r.getNumReactants() + r.getNumProducts() > 2):
        if r_ids_to_ignore and r.getId() in r_ids_to_ignore:
            continue
        r_id = r.getId()
        for s_id in chain((species_ref.getSpecies()
                           for species_ref in r.getListOfReactants()),
                          (species_ref.getSpecies()
                           for species_ref in r.getListOfProducts())):
            s_id2r_ids[s_id].append(r_id)

    for r in model.getListOfReactions():
        if r.getId(
        ) in processed_r_ids or not unmapped_s_ids & get_metabolites(r):
            continue
        if r_ids_to_ignore and r.getId() in r_ids_to_ignore:
            continue
        ub_rs, ub_ps, rs, ps = get_vertical_key(model, r, s_id2clu,
                                                s_id2term_id,
                                                ubiquitous_chebi_ids)
        vk = ub_rs, ub_ps, rs, ps
        rs, ps = set(rs), set(ps)
        partial_rs, partial_ps = {(s_id, c_id) for (s_id, c_id) in rs if s_id not in unmapped_s_ids}, \
                                 {(s_id, c_id) for (s_id, c_id) in ps if s_id not in unmapped_s_ids}
        if vk in vk2r_ids or len(ub_rs) + len(ub_ps) + len(partial_rs) + len(
                partial_ps) < 2:
            continue
        s_vk = vertical_key2simplified_vertical_key(vk)
        if s_vk in s_vk2vk:
            ub_rs, ub_ps = tuple(sorted(ub_rs)), tuple(sorted(ub_ps))
            for (vk_ub_rs, vk_ub_ps, vk_rs, vk_ps) in s_vk2vk[s_vk]:
                vk_rs, vk_ps = {(s_id if s_id not in s_id2clu else s_id2clu[s_id], c_id) for (s_id, c_id) in vk_rs}, \
                               {(s_id if s_id not in s_id2clu else s_id2clu[s_id], c_id) for (s_id, c_id) in vk_ps}
                proposal = {}
                if vk_ub_rs == ub_ps and vk_ub_ps == ub_rs and not partial_rs - vk_ps and not partial_ps - vk_rs:
                    vk_ub_rs, vk_ub_ps, partial_rs, partial_ps = vk_ub_ps, vk_ub_rs, partial_ps, partial_rs
                if vk_ub_rs == ub_rs and vk_ub_ps == ub_ps and not partial_rs - vk_rs and not partial_ps - vk_ps:
                    r_s_ids = rs - vk_rs
                    p_s_ids = ps - vk_ps
                    if 0 < len(r_s_ids) <= 1 and 0 < len(
                            p_s_ids) <= 1 and r_s_ids or p_s_ids:
                        if r_s_ids and vk_rs - rs:
                            s_id, c_id = r_s_ids.pop()
                            clu, c_id = (vk_rs - rs).pop()
                            # if it is a species id instead of a cluster, continue
                            if not isinstance(clu, tuple):
                                continue
                            # if it is not a species id but a cluster, continue
                            if not isinstance(s_id, str):
                                continue
                            candidate_sps = {
                                model.getSpecies(sp_id)
                                for sp_id in (
                                    term_id2s_ids[s_id2term_id[s_id]]
                                    if s_id in s_id2term_id else [s_id])
                            }
                            comp, term = clu
                            for s in candidate_sps:
                                proposal[s.getId()] = term
                        if p_s_ids and vk_ps - ps:
                            s_id, c_id = p_s_ids.pop()
                            clu, c_id = (vk_ps - ps).pop()
                            # if it is a species id instead of a cluster, continue
                            if not isinstance(clu, tuple):
                                continue
                            # if it is not a species id but a cluster, continue
                            if not isinstance(s_id, str):
                                continue
                            candidate_sps = {
                                model.getSpecies(it)
                                for it in (term_id2s_ids[s_id2term_id[s_id]]
                                           if s_id in s_id2term_id else {s_id})
                            }
                            comp, term = clu
                            for s in candidate_sps:
                                proposal[s.getId()] = term
                if proposal:
                    for s_id, clu in proposal.items():
                        term_id2clu[s_id] = (
                            clu, ) if not (isinstance(clu, tuple)) else clu
                        unmapped_s_ids -= {s_id}
Ejemplo n.º 21
0
def save_as_comp_generalized_sbml(input_model, out_sbml, groups_sbml, r_id2clu, clu2s_ids, ub_sps, onto):
    logging.info("serializing generalization")
    s_id_increment, r_id_increment = 0, 0

    if groups_sbml:
        doc = convert_to_lev3_v1(input_model)
        groups_model = doc.getModel()
        groups_plugin = groups_model.getPlugin("groups")
        if groups_plugin:
            logging.info("  saving ubiquitous species annotations")
            s_group = groups_plugin.createGroup()
            s_group.setId("g_ubiquitous_sps")
            s_group.setKind(libsbml.GROUP_KIND_COLLECTION)
            s_group.setSBOTerm(SBO_CHEMICAL_MACROMOLECULE)
            s_group.setName("ubiquitous species")
            for s_id in ub_sps:
                member = s_group.createMember()
                member.setIdRef(s_id)
            add_annotation(s_group, libsbml.BQB_IS_DESCRIBED_BY, GROUP_TYPE_UBIQUITOUS)
    if out_sbml:
        # generalized model
        generalized_doc = convert_to_lev3_v1(input_model)
        generalized_model = generalized_doc.getModel()
        for _ in range(0, generalized_model.getNumReactions()):
            generalized_model.removeReaction(0)

    r_id2g_eq, s_id2gr_id = {}, {}
    if not clu2s_ids:
        logging.info("  nothing to serialize")
    else:
        clu2r_ids = invert_map(r_id2clu)
        logging.info("  creating species groups")
        for ((c_id, t), s_ids) in clu2s_ids.items():
            comp = input_model.getCompartment(c_id)
            if len(s_ids) > 1:
                t = onto.get_term(t)
                t_name, t_id = (t.get_name(), t.get_id()) if t \
                    else (' or '.join(input_model.getSpecies(s_id).getName() for s_id in s_ids), None)
                if not t_id:
                    t = t_name

                if out_sbml:
                    new_species = create_species(model=generalized_model, compartment_id=comp.getId(), type_id=None,
                                                 name="{0} ({1}) [{2}]".format(t_name, len(s_ids), comp.getName()))
                    add_annotation(new_species, libsbml.BQB_IS, t_id, CHEBI_PREFIX)
                    new_s_id = new_species.getId()
                else:
                    s_id_increment += 1
                    new_s_id = generate_unique_id(input_model, "s_g_", s_id_increment)
                for s_id in s_ids:
                    s_id2gr_id[s_id] = new_s_id, t

                if groups_sbml and groups_plugin:
                    # save as a group
                    s_group = groups_plugin.createGroup()
                    s_group.setId(new_s_id)
                    s_group.setKind(libsbml.GROUP_KIND_CLASSIFICATION)
                    s_group.setSBOTerm(SBO_CHEMICAL_MACROMOLECULE)
                    g_name = "{0} [{1}]".format(t_name, comp.getName())
                    s_group.setName(g_name)
                    # logging.info("%s: %d" % (g_name, len(s_ids)))
                    if t_id:
                        add_annotation(s_group, libsbml.BQB_IS, t_id, CHEBI_PREFIX)
                    for s_id in s_ids:
                        member = s_group.createMember()
                        member.setIdRef(s_id)
                    add_annotation(s_group, libsbml.BQB_IS_DESCRIBED_BY, GROUP_TYPE_EQUIV)

        generalize_species = lambda species_id: s_id2gr_id[species_id][0] if (species_id in s_id2gr_id) else species_id
        s_id_to_generalize = set(s_id2gr_id.keys())
        logging.info("  creating reaction groups")
        for clu, r_ids in clu2r_ids.items():
            representative = input_model.getReaction(list(r_ids)[0])
            r_name = "generalized %s" % representative.getName()
            if out_sbml:
                reactants = dict(get_reactants(representative, stoichiometry=True))
                products = dict(get_products(representative, stoichiometry=True))
                if (len(r_ids) == 1) and \
                        not ((set(reactants.keys()) | set(products.keys())) & s_id_to_generalize):
                    create_reaction(generalized_model, reactants, products, name=representative.getName(),
                                    reversible=representative.getReversible(), id_=representative.getId())
                    continue
                r_id2st = {generalize_species(it): st for (it, st) in reactants.items()}
                p_id2st = {generalize_species(it): st for (it, st) in products.items()}
                reversible = next((False for r_id in r_ids if not input_model.getReaction(r_id).getReversible()), True)
                new_r_id = create_reaction(generalized_model, r_id2st, p_id2st, name=r_name, reversible=reversible,
                                           id_=representative.getId() if len(r_ids) == 1 else None).getId()
            elif len(r_ids) > 1:
                r_id_increment += 1
                new_r_id = generate_unique_id(input_model, "r_g_", r_id_increment)
            if len(r_ids) > 1:
                for r_id in r_ids:
                    r_id2g_eq[r_id] = new_r_id, r_name
                if groups_sbml and groups_plugin:
                    # save as a group
                    r_group = groups_plugin.createGroup()
                    r_group.setId(new_r_id)
                    r_group.setKind(libsbml.GROUP_KIND_COLLECTION)
                    r_group.setSBOTerm(SBO_BIOCHEMICAL_REACTION)
                    r_group.setName(r_name)
                    for r_id in r_ids:
                        member = r_group.createMember()
                        member.setIdRef(r_id)
                    add_annotation(r_group, libsbml.BQB_IS_DESCRIBED_BY, GROUP_TYPE_EQUIV)
    if out_sbml:
        remove_unused_elements(generalized_model)
        save_as_sbml(generalized_model, out_sbml)
    if groups_sbml and groups_model:
        save_as_sbml(groups_model, groups_sbml)

    logging.info("serialized to " + groups_sbml)
    return r_id2g_eq, s_id2gr_id