def get_used_species(model, include_modifiers=True): """ Returns a list of species participating in any of the model reactions. :param include_modifiers: whether to include reaction modifiers :param model: libsbml.Model model of interest :return: set of used species ids """ species_to_keep = set() for reaction in model.getListOfReactions(): species_to_keep |= get_metabolites(reaction, include_modifiers=include_modifiers) return species_to_keep
def suggest_clusters(model, unmapped_s_ids, term_id2clu, s_id2term_id, ubiquitous_chebi_ids, r_ids_to_ignore=None): # TODO: double check it return if not unmapped_s_ids: return s_id2clu = compute_s_id2clu(set(), model, s_id2term_id, term_id2clu) term_id2s_ids = invert_map(s_id2term_id) vk2r_ids = get_vk2r_ids(model, s_id2clu, s_id2term_id, ubiquitous_chebi_ids, r_ids_to_ignore=r_ids_to_ignore) vk2r_ids = { vk: r_ids for (vk, r_ids) in vk2r_ids.items() if len(r_ids) > 1 } processed_r_ids = reduce(lambda s1, s2: s1 | s2, vk2r_ids.values(), set()) s_vk2vk = defaultdict(set) for vk in vk2r_ids.keys(): s_vk2vk[vertical_key2simplified_vertical_key(vk)].add(vk) s_id2r_ids = defaultdict(list) for r in (r for r in model.getListOfReactions() if r.getNumReactants() + r.getNumProducts() > 2): if r_ids_to_ignore and r.getId() in r_ids_to_ignore: continue r_id = r.getId() for s_id in chain((species_ref.getSpecies() for species_ref in r.getListOfReactants()), (species_ref.getSpecies() for species_ref in r.getListOfProducts())): s_id2r_ids[s_id].append(r_id) for r in model.getListOfReactions(): if r.getId( ) in processed_r_ids or not unmapped_s_ids & get_metabolites(r): continue if r_ids_to_ignore and r.getId() in r_ids_to_ignore: continue ub_rs, ub_ps, rs, ps = get_vertical_key(model, r, s_id2clu, s_id2term_id, ubiquitous_chebi_ids) vk = ub_rs, ub_ps, rs, ps rs, ps = set(rs), set(ps) partial_rs, partial_ps = {(s_id, c_id) for (s_id, c_id) in rs if s_id not in unmapped_s_ids}, \ {(s_id, c_id) for (s_id, c_id) in ps if s_id not in unmapped_s_ids} if vk in vk2r_ids or len(ub_rs) + len(ub_ps) + len(partial_rs) + len( partial_ps) < 2: continue s_vk = vertical_key2simplified_vertical_key(vk) if s_vk in s_vk2vk: ub_rs, ub_ps = tuple(sorted(ub_rs)), tuple(sorted(ub_ps)) for (vk_ub_rs, vk_ub_ps, vk_rs, vk_ps) in s_vk2vk[s_vk]: vk_rs, vk_ps = {(s_id if s_id not in s_id2clu else s_id2clu[s_id], c_id) for (s_id, c_id) in vk_rs}, \ {(s_id if s_id not in s_id2clu else s_id2clu[s_id], c_id) for (s_id, c_id) in vk_ps} proposal = {} if vk_ub_rs == ub_ps and vk_ub_ps == ub_rs and not partial_rs - vk_ps and not partial_ps - vk_rs: vk_ub_rs, vk_ub_ps, partial_rs, partial_ps = vk_ub_ps, vk_ub_rs, partial_ps, partial_rs if vk_ub_rs == ub_rs and vk_ub_ps == ub_ps and not partial_rs - vk_rs and not partial_ps - vk_ps: r_s_ids = rs - vk_rs p_s_ids = ps - vk_ps if 0 < len(r_s_ids) <= 1 and 0 < len( p_s_ids) <= 1 and r_s_ids or p_s_ids: if r_s_ids and vk_rs - rs: s_id, c_id = r_s_ids.pop() clu, c_id = (vk_rs - rs).pop() # if it is a species id instead of a cluster, continue if not isinstance(clu, tuple): continue # if it is not a species id but a cluster, continue if not isinstance(s_id, str): continue candidate_sps = { model.getSpecies(sp_id) for sp_id in ( term_id2s_ids[s_id2term_id[s_id]] if s_id in s_id2term_id else [s_id]) } comp, term = clu for s in candidate_sps: proposal[s.getId()] = term if p_s_ids and vk_ps - ps: s_id, c_id = p_s_ids.pop() clu, c_id = (vk_ps - ps).pop() # if it is a species id instead of a cluster, continue if not isinstance(clu, tuple): continue # if it is not a species id but a cluster, continue if not isinstance(s_id, str): continue candidate_sps = { model.getSpecies(it) for it in (term_id2s_ids[s_id2term_id[s_id]] if s_id in s_id2term_id else {s_id}) } comp, term = clu for s in candidate_sps: proposal[s.getId()] = term if proposal: for s_id, clu in proposal.items(): term_id2clu[s_id] = ( clu, ) if not (isinstance(clu, tuple)) else clu unmapped_s_ids -= {s_id}
def infer_clusters(model, unmapped_s_ids, s_id2clu, s_id2term_id, ubiquitous_chebi_ids, r_ids_to_ignore=None): # TODO: double check it return if not unmapped_s_ids: return term_id2s_ids = invert_map(s_id2term_id) clu2s_ids = invert_map(s_id2clu) vk2r_ids = get_vk2r_ids(model, s_id2clu, s_id2term_id, ubiquitous_chebi_ids, r_ids_to_ignore=r_ids_to_ignore) vk2r_ids = { vk: r_ids for (vk, r_ids) in vk2r_ids.items() if len(r_ids) > 1 } simplified_vk2vk_set = defaultdict(set) for vk in vk2r_ids.keys(): simplified_vk2vk_set[vertical_key2simplified_vertical_key(vk)].add(vk) s_id2r_ids = defaultdict(list) for r in (r for r in model.getListOfReactions() if r.getNumReactants() + r.getNumProducts() > 2): if r_ids_to_ignore and r.getId() in r_ids_to_ignore: continue r_id = r.getId() for s_id in chain((species_ref.getSpecies() for species_ref in r.getListOfReactants()), (species_ref.getSpecies() for species_ref in r.getListOfProducts())): s_id2r_ids[s_id].append(r_id) def in_species_conflict(term, candidate_sps, proposal_s_id2clu): proposal_clu2s_ids = invert_map(proposal_s_id2clu) for s in candidate_sps: s_clu = s.getCompartment(), term rs = {r_id for r_id in s_id2r_ids[s.getId()]} clu_s_ids = clu2s_ids[s_clu] | proposal_clu2s_ids[s_clu] for clu_s_id in clu_s_ids: if {r_id for r_id in s_id2r_ids[clu_s_id]} & rs: return True return False processed_r_ids = reduce(lambda s1, s2: s1 | s2, vk2r_ids.values(), set()) for r in model.getListOfReactions(): if r.getId( ) in processed_r_ids or not unmapped_s_ids & get_metabolites(r): continue if r_ids_to_ignore and r.getId() in r_ids_to_ignore: continue ub_rs, ub_ps, rs, ps = get_vertical_key(model, r, s_id2clu, s_id2term_id, ubiquitous_chebi_ids) vk = ub_rs, ub_ps, rs, ps rs, ps = set(rs), set(ps) partial_rs, partial_ps = {(s_id, c_id) for (s_id, c_id) in rs if s_id not in unmapped_s_ids}, \ {(s_id, c_id) for (s_id, c_id) in ps if s_id not in unmapped_s_ids} if len(ub_rs) + len(ub_ps) + len(partial_rs) + len(partial_ps) < 2: continue simplified_vk = vertical_key2simplified_vertical_key(vk) if simplified_vk in simplified_vk2vk_set: ub_rs, ub_ps = tuple(sorted(ub_rs)), tuple(sorted(ub_ps)) for (vk_ub_rs, vk_ub_ps, vk_rs, vk_ps) in simplified_vk2vk_set[simplified_vk]: vk_rs, vk_ps = {(s_id if s_id not in s_id2clu else s_id2clu[s_id], c_id) for (s_id, c_id) in vk_rs}, \ {(s_id if s_id not in s_id2clu else s_id2clu[s_id], c_id) for (s_id, c_id) in vk_ps} proposal = {} if vk_ub_rs == ub_ps and vk_ub_ps == ub_rs and not partial_rs - vk_ps and not partial_ps - vk_rs: vk_ub_rs, vk_ub_ps, partial_rs, partial_ps = vk_ub_ps, vk_ub_rs, partial_ps, partial_rs if vk_ub_rs == ub_rs and vk_ub_ps == ub_ps and not partial_rs - vk_rs and not partial_ps - vk_ps: r_s_ids = rs - vk_rs p_s_ids = ps - vk_ps if 0 < len(r_s_ids) <= 1 and 0 < len( p_s_ids) <= 1 and r_s_ids or p_s_ids: if r_s_ids and vk_rs - rs: s_id, c_id = r_s_ids.pop() # if it is not a species id but a cluster, continue if not isinstance(s_id, str): continue clu, c_id = (vk_rs - rs).pop() # if it is a species id instead of a cluster, continue if not isinstance(clu, tuple): continue candidate_sps = { model.getSpecies(it) for it in (term_id2s_ids[s_id2term_id[s_id]] if s_id in s_id2term_id else {s_id}) } comp, term = clu if not in_species_conflict(term, candidate_sps, proposal): for s in candidate_sps: proposal[ s.getId()] = s.getCompartment(), term else: continue if p_s_ids and vk_ps - ps: s_id, c_id = p_s_ids.pop() # if it is not a species id but a cluster, continue if not isinstance(s_id, str): continue clu, c_id = (vk_ps - ps).pop() # if it is a species id instead of a cluster, continue if not isinstance(clu, tuple): continue candidate_sps = { model.getSpecies(it) for it in (term_id2s_ids[s_id2term_id[s_id]] if s_id in s_id2term_id else {s_id}) } comp, term = clu if not in_species_conflict(term, candidate_sps, proposal): for s in candidate_sps: proposal[ s.getId()] = s.getCompartment(), term else: continue if proposal: s_id2clu.update(proposal) for s_id, clu in proposal.items(): clu2s_ids[clu].add(s_id) unmapped_s_ids -= set(proposal.keys())
def suggest_clusters(model, unmapped_s_ids, term_id2clu, s_id2term_id, ubiquitous_chebi_ids, r_ids_to_ignore=None): # TODO: double check it return if not unmapped_s_ids: return s_id2clu = compute_s_id2clu(set(), model, s_id2term_id, term_id2clu) term_id2s_ids = invert_map(s_id2term_id) vk2r_ids = get_vk2r_ids(model, s_id2clu, s_id2term_id, ubiquitous_chebi_ids, r_ids_to_ignore=r_ids_to_ignore) vk2r_ids = {vk: r_ids for (vk, r_ids) in vk2r_ids.items() if len(r_ids) > 1} processed_r_ids = reduce(lambda s1, s2: s1 | s2, vk2r_ids.values(), set()) s_vk2vk = defaultdict(set) for vk in vk2r_ids.keys(): s_vk2vk[vertical_key2simplified_vertical_key(vk)].add(vk) s_id2r_ids = defaultdict(list) for r in (r for r in model.getListOfReactions() if r.getNumReactants() + r.getNumProducts() > 2): if r_ids_to_ignore and r.getId() in r_ids_to_ignore: continue r_id = r.getId() for s_id in chain((species_ref.getSpecies() for species_ref in r.getListOfReactants()), (species_ref.getSpecies() for species_ref in r.getListOfProducts())): s_id2r_ids[s_id].append(r_id) for r in model.getListOfReactions(): if r.getId() in processed_r_ids or not unmapped_s_ids & get_metabolites(r): continue if r_ids_to_ignore and r.getId() in r_ids_to_ignore: continue ub_rs, ub_ps, rs, ps = get_vertical_key(model, r, s_id2clu, s_id2term_id, ubiquitous_chebi_ids) vk = ub_rs, ub_ps, rs, ps rs, ps = set(rs), set(ps) partial_rs, partial_ps = {(s_id, c_id) for (s_id, c_id) in rs if s_id not in unmapped_s_ids}, \ {(s_id, c_id) for (s_id, c_id) in ps if s_id not in unmapped_s_ids} if vk in vk2r_ids or len(ub_rs) + len(ub_ps) + len(partial_rs) + len(partial_ps) < 2: continue s_vk = vertical_key2simplified_vertical_key(vk) if s_vk in s_vk2vk: ub_rs, ub_ps = tuple(sorted(ub_rs)), tuple(sorted(ub_ps)) for (vk_ub_rs, vk_ub_ps, vk_rs, vk_ps) in s_vk2vk[s_vk]: vk_rs, vk_ps = {(s_id if s_id not in s_id2clu else s_id2clu[s_id], c_id) for (s_id, c_id) in vk_rs}, \ {(s_id if s_id not in s_id2clu else s_id2clu[s_id], c_id) for (s_id, c_id) in vk_ps} proposal = {} if vk_ub_rs == ub_ps and vk_ub_ps == ub_rs and not partial_rs - vk_ps and not partial_ps - vk_rs: vk_ub_rs, vk_ub_ps, partial_rs, partial_ps = vk_ub_ps, vk_ub_rs, partial_ps, partial_rs if vk_ub_rs == ub_rs and vk_ub_ps == ub_ps and not partial_rs - vk_rs and not partial_ps - vk_ps: r_s_ids = rs - vk_rs p_s_ids = ps - vk_ps if 0 < len(r_s_ids) <= 1 and 0 < len(p_s_ids) <= 1 and r_s_ids or p_s_ids: if r_s_ids and vk_rs - rs: s_id, c_id = r_s_ids.pop() clu, c_id = (vk_rs - rs).pop() # if it is a species id instead of a cluster, continue if not isinstance(clu, tuple): continue # if it is not a species id but a cluster, continue if not isinstance(s_id, str): continue candidate_sps = {model.getSpecies(sp_id) for sp_id in (term_id2s_ids[s_id2term_id[s_id]] if s_id in s_id2term_id else [s_id])} comp, term = clu for s in candidate_sps: proposal[s.getId()] = term if p_s_ids and vk_ps - ps: s_id, c_id = p_s_ids.pop() clu, c_id = (vk_ps - ps).pop() # if it is a species id instead of a cluster, continue if not isinstance(clu, tuple): continue # if it is not a species id but a cluster, continue if not isinstance(s_id, str): continue candidate_sps = {model.getSpecies(it) for it in (term_id2s_ids[s_id2term_id[s_id]] if s_id in s_id2term_id else {s_id})} comp, term = clu for s in candidate_sps: proposal[s.getId()] = term if proposal: for s_id, clu in proposal.items(): term_id2clu[s_id] = (clu, ) if not (isinstance(clu, tuple)) else clu unmapped_s_ids -= {s_id}
def infer_clusters(model, unmapped_s_ids, s_id2clu, s_id2term_id, ubiquitous_chebi_ids, r_ids_to_ignore=None): # TODO: double check it return if not unmapped_s_ids: return term_id2s_ids = invert_map(s_id2term_id) clu2s_ids = invert_map(s_id2clu) vk2r_ids = get_vk2r_ids(model, s_id2clu, s_id2term_id, ubiquitous_chebi_ids, r_ids_to_ignore=r_ids_to_ignore) vk2r_ids = {vk: r_ids for (vk, r_ids) in vk2r_ids.items() if len(r_ids) > 1} simplified_vk2vk_set = defaultdict(set) for vk in vk2r_ids.keys(): simplified_vk2vk_set[vertical_key2simplified_vertical_key(vk)].add(vk) s_id2r_ids = defaultdict(list) for r in (r for r in model.getListOfReactions() if r.getNumReactants() + r.getNumProducts() > 2): if r_ids_to_ignore and r.getId() in r_ids_to_ignore: continue r_id = r.getId() for s_id in chain((species_ref.getSpecies() for species_ref in r.getListOfReactants()), (species_ref.getSpecies() for species_ref in r.getListOfProducts())): s_id2r_ids[s_id].append(r_id) def in_species_conflict(term, candidate_sps, proposal_s_id2clu): proposal_clu2s_ids = invert_map(proposal_s_id2clu) for s in candidate_sps: s_clu = s.getCompartment(), term rs = {r_id for r_id in s_id2r_ids[s.getId()]} clu_s_ids = clu2s_ids[s_clu] | proposal_clu2s_ids[s_clu] for clu_s_id in clu_s_ids: if {r_id for r_id in s_id2r_ids[clu_s_id]} & rs: return True return False processed_r_ids = reduce(lambda s1, s2: s1 | s2, vk2r_ids.values(), set()) for r in model.getListOfReactions(): if r.getId() in processed_r_ids or not unmapped_s_ids & get_metabolites(r): continue if r_ids_to_ignore and r.getId() in r_ids_to_ignore: continue ub_rs, ub_ps, rs, ps = get_vertical_key(model, r, s_id2clu, s_id2term_id, ubiquitous_chebi_ids) vk = ub_rs, ub_ps, rs, ps rs, ps = set(rs), set(ps) partial_rs, partial_ps = {(s_id, c_id) for (s_id, c_id) in rs if s_id not in unmapped_s_ids}, \ {(s_id, c_id) for (s_id, c_id) in ps if s_id not in unmapped_s_ids} if len(ub_rs) + len(ub_ps) + len(partial_rs) + len(partial_ps) < 2: continue simplified_vk = vertical_key2simplified_vertical_key(vk) if simplified_vk in simplified_vk2vk_set: ub_rs, ub_ps = tuple(sorted(ub_rs)), tuple(sorted(ub_ps)) for (vk_ub_rs, vk_ub_ps, vk_rs, vk_ps) in simplified_vk2vk_set[simplified_vk]: vk_rs, vk_ps = {(s_id if s_id not in s_id2clu else s_id2clu[s_id], c_id) for (s_id, c_id) in vk_rs}, \ {(s_id if s_id not in s_id2clu else s_id2clu[s_id], c_id) for (s_id, c_id) in vk_ps} proposal = {} if vk_ub_rs == ub_ps and vk_ub_ps == ub_rs and not partial_rs - vk_ps and not partial_ps - vk_rs: vk_ub_rs, vk_ub_ps, partial_rs, partial_ps = vk_ub_ps, vk_ub_rs, partial_ps, partial_rs if vk_ub_rs == ub_rs and vk_ub_ps == ub_ps and not partial_rs - vk_rs and not partial_ps - vk_ps: r_s_ids = rs - vk_rs p_s_ids = ps - vk_ps if 0 < len(r_s_ids) <= 1 and 0 < len(p_s_ids) <= 1 and r_s_ids or p_s_ids: if r_s_ids and vk_rs - rs: s_id, c_id = r_s_ids.pop() # if it is not a species id but a cluster, continue if not isinstance(s_id, str): continue clu, c_id = (vk_rs - rs).pop() # if it is a species id instead of a cluster, continue if not isinstance(clu, tuple): continue candidate_sps = {model.getSpecies(it) for it in (term_id2s_ids[s_id2term_id[s_id]] if s_id in s_id2term_id else {s_id})} comp, term = clu if not in_species_conflict(term, candidate_sps, proposal): for s in candidate_sps: proposal[s.getId()] = s.getCompartment(), term else: continue if p_s_ids and vk_ps - ps: s_id, c_id = p_s_ids.pop() # if it is not a species id but a cluster, continue if not isinstance(s_id, str): continue clu, c_id = (vk_ps - ps).pop() # if it is a species id instead of a cluster, continue if not isinstance(clu, tuple): continue candidate_sps = {model.getSpecies(it) for it in (term_id2s_ids[s_id2term_id[s_id]] if s_id in s_id2term_id else {s_id})} comp, term = clu if not in_species_conflict(term, candidate_sps, proposal): for s in candidate_sps: proposal[s.getId()] = s.getCompartment(), term else: continue if proposal: s_id2clu.update(proposal) for s_id, clu in proposal.items(): clu2s_ids[clu].add(s_id) unmapped_s_ids -= set(proposal.keys())