def generalize_species(model, s_id2chebi_id, ub_s_ids, chebi, ub_chebi_ids, threshold=UBIQUITOUS_THRESHOLD, r_ids_to_ignore=None): """ Groups metabolites of the model into clusters. :param model: libsbml.Model model of interest :param s_id2chebi_id: dict {metabolite_id: ChEBI_term_id} :param ub_s_ids: collection of ubiquitous metabolite ids :param chebi: mod_sbml.onto.obo_ontology.Ontology ChEBI ontology :param ub_chebi_ids: collection of ubiquitous ChEBI term ids :param threshold: threshold for a metabolite to be considered as frequently participating in reactions and therefore ubiquitous :param r_ids_to_ignore: (optional) ids of reactions whose stoichiometry preserving constraint can be ignores :return: """ unmapped_s_ids = {s.getId() for s in model.getListOfSpecies() if s.getId() not in s_id2chebi_id} term_id2clu = find_term_clustering(model, chebi, s_id2chebi_id, unmapped_s_ids, ub_chebi_ids, r_ids_to_ignore=r_ids_to_ignore) if term_id2clu: term_id2clu = select_representative_terms(term_id2clu, chebi) s_id2clu = compute_s_id2clu(unmapped_s_ids, model, s_id2chebi_id, term_id2clu) clu2s_ids = invert_map(s_id2clu) for s_ids in clu2s_ids.values(): if len(s_ids) == 1: del s_id2clu[s_ids.pop()] else: s_id2clu = {} if not ub_s_ids: frequent_ch_ids = get_frequent_term_ids(model, threshold) ub_s_ids = select_metabolite_ids_by_term_ids(model, frequent_ch_ids) - set(s_id2clu.keys()) # unmapped_s_ids = {s_id for s_id in unmapped_s_ids if s_id not in s_id2clu} # infer_clusters(model, unmapped_s_ids, s_id2clu, species_id2chebi_id, ub_chebi_ids) return s_id2clu, ub_s_ids
def maximize(unmapped_s_ids, model, term_id2clu, species_id2term_id, ub_chebi_ids, r_ids_to_ignore=None): clu2term_ids = invert_map(term_id2clu) s_id2clu = compute_s_id2clu(unmapped_s_ids, model, species_id2term_id, term_id2clu) r_id2clu = generalize_reactions(model, s_id2clu, species_id2term_id, ub_chebi_ids, r_ids_to_ignore=r_ids_to_ignore) thrds = [] for (clu, term_ids) in clu2term_ids.items(): if len(term_ids) <= 1: continue thread = MaximizingThread(model, term_ids, species_id2term_id, clu, term_id2clu, s_id2clu, ub_chebi_ids, r_id2clu, r_ids_to_ignore=r_ids_to_ignore) thrds.append(thread) thread.start() # This actually causes the thread to run for th in thrds: th.join() # This waits until the thread has completed return term_id2clu
def cover_t_ids(model, species_id2term_id, ubiquitous_t_ids, t_ids, onto, clu=None, r_ids_to_ignore=None): """ Find ancestor terms that cover (generalize) given terms. :param model: libsbml.Model model of interest :param species_id2term_id: dict {species_id: term_id} :param ubiquitous_t_ids: collection of ubiquitous term ids :param t_ids: collection of term ids to be covered :param onto: mod_sbml.onto.obo_ontology.Ontology ChEBI ontology :param clu: current cluster to which the terms belong :param r_ids_to_ignore: collection of reaction ids to ignore (don't fix their Stoichiometry preserving constraints) :return: dictionary {term_id: cluster} """ term_id2clu = {} real_terms = {onto.get_term(t_id) for t_id in t_ids if onto.get_term(t_id)} # If there is no term for t_id in the ontology, we assume it is a metabolite id instead unmapped_s_ids = {t_id for t_id in t_ids if not onto.get_term(t_id)} roots = onto.common_points(real_terms, relationships=EQUIVALENT_RELATIONSHIPS) if roots: root_id = roots[0].get_id() new_clu = clu + (root_id, ) if clu else (root_id, ) return {t_id: new_clu for t_id in t_ids} roots = set() for term in real_terms: roots |= onto.get_generalized_ancestors_of_level(term, set(), None, 4) terms2root = {tuple(sorted(t.get_id() for t in onto.get_sub_tree(root))): root.get_id() for root in roots} for t_set, root_id in greedy({t.get_id() for t in real_terms}, terms2root, {it: 1 for it in terms2root}): new_clu = clu + (root_id, ) if clu else (root_id, ) term_id2clu.update({t_id: new_clu for t_id in t_set}) s_id2clu = compute_s_id2clu(set(), model, species_id2term_id, term_id2clu) infer_clusters(model, unmapped_s_ids, s_id2clu, species_id2term_id, ubiquitous_t_ids, r_ids_to_ignore=r_ids_to_ignore) # for s_id in unmapped_s_ids: # if s_id in s_id2clu: # term_id2clu[s_id] = (s_id2clu[s_id][1], ) return term_id2clu