Example #1
0
def generalize_species(model, s_id2chebi_id, ub_s_ids, chebi, ub_chebi_ids, threshold=UBIQUITOUS_THRESHOLD,
                       r_ids_to_ignore=None):
    """
    Groups metabolites of the model into clusters.
    :param model: libsbml.Model model of interest
    :param s_id2chebi_id: dict {metabolite_id: ChEBI_term_id}
    :param ub_s_ids: collection of ubiquitous metabolite ids
    :param chebi: mod_sbml.onto.obo_ontology.Ontology ChEBI ontology
    :param ub_chebi_ids: collection of ubiquitous ChEBI term ids
    :param threshold: threshold for a metabolite to be considered as frequently participating in reactions
    and therefore ubiquitous
    :param r_ids_to_ignore: (optional) ids of reactions whose stoichiometry preserving constraint can be ignores
    :return:
    """
    unmapped_s_ids = {s.getId() for s in model.getListOfSpecies() if s.getId() not in s_id2chebi_id}
    term_id2clu = find_term_clustering(model, chebi, s_id2chebi_id, unmapped_s_ids, ub_chebi_ids,
                                       r_ids_to_ignore=r_ids_to_ignore)
    if term_id2clu:
        term_id2clu = select_representative_terms(term_id2clu, chebi)
        s_id2clu = compute_s_id2clu(unmapped_s_ids, model, s_id2chebi_id, term_id2clu)
        clu2s_ids = invert_map(s_id2clu)
        for s_ids in clu2s_ids.values():
            if len(s_ids) == 1:
                del s_id2clu[s_ids.pop()]
    else:
        s_id2clu = {}
    if not ub_s_ids:
        frequent_ch_ids = get_frequent_term_ids(model, threshold)
        ub_s_ids = select_metabolite_ids_by_term_ids(model, frequent_ch_ids) - set(s_id2clu.keys())
    # unmapped_s_ids = {s_id for s_id in unmapped_s_ids if s_id not in s_id2clu}
    # infer_clusters(model, unmapped_s_ids, s_id2clu, species_id2chebi_id, ub_chebi_ids)
    return s_id2clu, ub_s_ids
Example #2
0
def maximize(unmapped_s_ids, model, term_id2clu, species_id2term_id, ub_chebi_ids, r_ids_to_ignore=None):
    clu2term_ids = invert_map(term_id2clu)
    s_id2clu = compute_s_id2clu(unmapped_s_ids, model, species_id2term_id, term_id2clu)

    r_id2clu = generalize_reactions(model, s_id2clu, species_id2term_id, ub_chebi_ids, r_ids_to_ignore=r_ids_to_ignore)

    thrds = []
    for (clu, term_ids) in clu2term_ids.items():
        if len(term_ids) <= 1:
            continue

        thread = MaximizingThread(model, term_ids, species_id2term_id, clu, term_id2clu,
                                  s_id2clu, ub_chebi_ids, r_id2clu, r_ids_to_ignore=r_ids_to_ignore)
        thrds.append(thread)
        thread.start()  # This actually causes the thread to run
    for th in thrds:
        th.join()  # This waits until the thread has completed
    return term_id2clu
Example #3
0
def cover_t_ids(model, species_id2term_id, ubiquitous_t_ids, t_ids, onto, clu=None, r_ids_to_ignore=None):
    """
    Find ancestor terms that cover (generalize) given terms.
    :param model: libsbml.Model model of interest
    :param species_id2term_id: dict {species_id: term_id}
    :param ubiquitous_t_ids: collection of ubiquitous term ids
    :param t_ids: collection of term ids to be covered
    :param onto: mod_sbml.onto.obo_ontology.Ontology ChEBI ontology
    :param clu: current cluster to which the terms belong
    :param r_ids_to_ignore: collection of reaction ids to ignore (don't fix their Stoichiometry preserving constraints)
    :return: dictionary {term_id: cluster}
    """
    term_id2clu = {}
    real_terms = {onto.get_term(t_id) for t_id in t_ids if onto.get_term(t_id)}

    # If there is no term for t_id in the ontology, we assume it is a metabolite id instead
    unmapped_s_ids = {t_id for t_id in t_ids if not onto.get_term(t_id)}

    roots = onto.common_points(real_terms, relationships=EQUIVALENT_RELATIONSHIPS)
    if roots:
        root_id = roots[0].get_id()
        new_clu = clu + (root_id, ) if clu else (root_id, )
        return {t_id: new_clu for t_id in t_ids}

    roots = set()
    for term in real_terms:
        roots |= onto.get_generalized_ancestors_of_level(term, set(), None, 4)
    terms2root = {tuple(sorted(t.get_id() for t in onto.get_sub_tree(root))): root.get_id() for root in roots}
    for t_set, root_id in greedy({t.get_id() for t in real_terms}, terms2root, {it: 1 for it in terms2root}):
        new_clu = clu + (root_id, ) if clu else (root_id, )
        term_id2clu.update({t_id: new_clu for t_id in t_set})

    s_id2clu = compute_s_id2clu(set(), model, species_id2term_id, term_id2clu)
    infer_clusters(model, unmapped_s_ids, s_id2clu, species_id2term_id, ubiquitous_t_ids,
                   r_ids_to_ignore=r_ids_to_ignore)
    # for s_id in unmapped_s_ids:
    #     if s_id in s_id2clu:
    #         term_id2clu[s_id] = (s_id2clu[s_id][1], )
    return term_id2clu