예제 #1
0
def generalize_reactions(model, s_id2clu, s_id2term_id, ubiquitous_chebi_ids, r_ids_to_ignore=None):
    vk2r = get_vk2r_ids(model, s_id2clu, s_id2term_id, ubiquitous_chebi_ids, r_ids_to_ignore=r_ids_to_ignore)
    r_id2clu, i = {}, 0
    for r_ids in vk2r.values():
        for r_id in r_ids:
            r_id2clu[r_id] = i
        i += 1
    return r_id2clu
예제 #2
0
def suggest_clusters(model,
                     unmapped_s_ids,
                     term_id2clu,
                     s_id2term_id,
                     ubiquitous_chebi_ids,
                     r_ids_to_ignore=None):
    # TODO: double check it
    return
    if not unmapped_s_ids:
        return
    s_id2clu = compute_s_id2clu(set(), model, s_id2term_id, term_id2clu)
    term_id2s_ids = invert_map(s_id2term_id)
    vk2r_ids = get_vk2r_ids(model,
                            s_id2clu,
                            s_id2term_id,
                            ubiquitous_chebi_ids,
                            r_ids_to_ignore=r_ids_to_ignore)
    vk2r_ids = {
        vk: r_ids
        for (vk, r_ids) in vk2r_ids.items() if len(r_ids) > 1
    }
    processed_r_ids = reduce(lambda s1, s2: s1 | s2, vk2r_ids.values(), set())

    s_vk2vk = defaultdict(set)
    for vk in vk2r_ids.keys():
        s_vk2vk[vertical_key2simplified_vertical_key(vk)].add(vk)

    s_id2r_ids = defaultdict(list)
    for r in (r for r in model.getListOfReactions()
              if r.getNumReactants() + r.getNumProducts() > 2):
        if r_ids_to_ignore and r.getId() in r_ids_to_ignore:
            continue
        r_id = r.getId()
        for s_id in chain((species_ref.getSpecies()
                           for species_ref in r.getListOfReactants()),
                          (species_ref.getSpecies()
                           for species_ref in r.getListOfProducts())):
            s_id2r_ids[s_id].append(r_id)

    for r in model.getListOfReactions():
        if r.getId(
        ) in processed_r_ids or not unmapped_s_ids & get_metabolites(r):
            continue
        if r_ids_to_ignore and r.getId() in r_ids_to_ignore:
            continue
        ub_rs, ub_ps, rs, ps = get_vertical_key(model, r, s_id2clu,
                                                s_id2term_id,
                                                ubiquitous_chebi_ids)
        vk = ub_rs, ub_ps, rs, ps
        rs, ps = set(rs), set(ps)
        partial_rs, partial_ps = {(s_id, c_id) for (s_id, c_id) in rs if s_id not in unmapped_s_ids}, \
                                 {(s_id, c_id) for (s_id, c_id) in ps if s_id not in unmapped_s_ids}
        if vk in vk2r_ids or len(ub_rs) + len(ub_ps) + len(partial_rs) + len(
                partial_ps) < 2:
            continue
        s_vk = vertical_key2simplified_vertical_key(vk)
        if s_vk in s_vk2vk:
            ub_rs, ub_ps = tuple(sorted(ub_rs)), tuple(sorted(ub_ps))
            for (vk_ub_rs, vk_ub_ps, vk_rs, vk_ps) in s_vk2vk[s_vk]:
                vk_rs, vk_ps = {(s_id if s_id not in s_id2clu else s_id2clu[s_id], c_id) for (s_id, c_id) in vk_rs}, \
                               {(s_id if s_id not in s_id2clu else s_id2clu[s_id], c_id) for (s_id, c_id) in vk_ps}
                proposal = {}
                if vk_ub_rs == ub_ps and vk_ub_ps == ub_rs and not partial_rs - vk_ps and not partial_ps - vk_rs:
                    vk_ub_rs, vk_ub_ps, partial_rs, partial_ps = vk_ub_ps, vk_ub_rs, partial_ps, partial_rs
                if vk_ub_rs == ub_rs and vk_ub_ps == ub_ps and not partial_rs - vk_rs and not partial_ps - vk_ps:
                    r_s_ids = rs - vk_rs
                    p_s_ids = ps - vk_ps
                    if 0 < len(r_s_ids) <= 1 and 0 < len(
                            p_s_ids) <= 1 and r_s_ids or p_s_ids:
                        if r_s_ids and vk_rs - rs:
                            s_id, c_id = r_s_ids.pop()
                            clu, c_id = (vk_rs - rs).pop()
                            # if it is a species id instead of a cluster, continue
                            if not isinstance(clu, tuple):
                                continue
                            # if it is not a species id but a cluster, continue
                            if not isinstance(s_id, str):
                                continue
                            candidate_sps = {
                                model.getSpecies(sp_id)
                                for sp_id in (
                                    term_id2s_ids[s_id2term_id[s_id]]
                                    if s_id in s_id2term_id else [s_id])
                            }
                            comp, term = clu
                            for s in candidate_sps:
                                proposal[s.getId()] = term
                        if p_s_ids and vk_ps - ps:
                            s_id, c_id = p_s_ids.pop()
                            clu, c_id = (vk_ps - ps).pop()
                            # if it is a species id instead of a cluster, continue
                            if not isinstance(clu, tuple):
                                continue
                            # if it is not a species id but a cluster, continue
                            if not isinstance(s_id, str):
                                continue
                            candidate_sps = {
                                model.getSpecies(it)
                                for it in (term_id2s_ids[s_id2term_id[s_id]]
                                           if s_id in s_id2term_id else {s_id})
                            }
                            comp, term = clu
                            for s in candidate_sps:
                                proposal[s.getId()] = term
                if proposal:
                    for s_id, clu in proposal.items():
                        term_id2clu[s_id] = (
                            clu, ) if not (isinstance(clu, tuple)) else clu
                        unmapped_s_ids -= {s_id}
예제 #3
0
def infer_clusters(model,
                   unmapped_s_ids,
                   s_id2clu,
                   s_id2term_id,
                   ubiquitous_chebi_ids,
                   r_ids_to_ignore=None):
    # TODO: double check it
    return
    if not unmapped_s_ids:
        return
    term_id2s_ids = invert_map(s_id2term_id)
    clu2s_ids = invert_map(s_id2clu)
    vk2r_ids = get_vk2r_ids(model,
                            s_id2clu,
                            s_id2term_id,
                            ubiquitous_chebi_ids,
                            r_ids_to_ignore=r_ids_to_ignore)
    vk2r_ids = {
        vk: r_ids
        for (vk, r_ids) in vk2r_ids.items() if len(r_ids) > 1
    }

    simplified_vk2vk_set = defaultdict(set)
    for vk in vk2r_ids.keys():
        simplified_vk2vk_set[vertical_key2simplified_vertical_key(vk)].add(vk)

    s_id2r_ids = defaultdict(list)
    for r in (r for r in model.getListOfReactions()
              if r.getNumReactants() + r.getNumProducts() > 2):
        if r_ids_to_ignore and r.getId() in r_ids_to_ignore:
            continue
        r_id = r.getId()
        for s_id in chain((species_ref.getSpecies()
                           for species_ref in r.getListOfReactants()),
                          (species_ref.getSpecies()
                           for species_ref in r.getListOfProducts())):
            s_id2r_ids[s_id].append(r_id)

    def in_species_conflict(term, candidate_sps, proposal_s_id2clu):
        proposal_clu2s_ids = invert_map(proposal_s_id2clu)
        for s in candidate_sps:
            s_clu = s.getCompartment(), term
            rs = {r_id for r_id in s_id2r_ids[s.getId()]}
            clu_s_ids = clu2s_ids[s_clu] | proposal_clu2s_ids[s_clu]
            for clu_s_id in clu_s_ids:
                if {r_id for r_id in s_id2r_ids[clu_s_id]} & rs:
                    return True
        return False

    processed_r_ids = reduce(lambda s1, s2: s1 | s2, vk2r_ids.values(), set())

    for r in model.getListOfReactions():
        if r.getId(
        ) in processed_r_ids or not unmapped_s_ids & get_metabolites(r):
            continue
        if r_ids_to_ignore and r.getId() in r_ids_to_ignore:
            continue
        ub_rs, ub_ps, rs, ps = get_vertical_key(model, r, s_id2clu,
                                                s_id2term_id,
                                                ubiquitous_chebi_ids)
        vk = ub_rs, ub_ps, rs, ps
        rs, ps = set(rs), set(ps)
        partial_rs, partial_ps = {(s_id, c_id) for (s_id, c_id) in rs if s_id not in unmapped_s_ids}, \
                                 {(s_id, c_id) for (s_id, c_id) in ps if s_id not in unmapped_s_ids}
        if len(ub_rs) + len(ub_ps) + len(partial_rs) + len(partial_ps) < 2:
            continue
        simplified_vk = vertical_key2simplified_vertical_key(vk)
        if simplified_vk in simplified_vk2vk_set:
            ub_rs, ub_ps = tuple(sorted(ub_rs)), tuple(sorted(ub_ps))
            for (vk_ub_rs, vk_ub_ps, vk_rs,
                 vk_ps) in simplified_vk2vk_set[simplified_vk]:
                vk_rs, vk_ps = {(s_id if s_id not in s_id2clu else s_id2clu[s_id], c_id) for (s_id, c_id) in vk_rs}, \
                               {(s_id if s_id not in s_id2clu else s_id2clu[s_id], c_id) for (s_id, c_id) in vk_ps}
                proposal = {}
                if vk_ub_rs == ub_ps and vk_ub_ps == ub_rs and not partial_rs - vk_ps and not partial_ps - vk_rs:
                    vk_ub_rs, vk_ub_ps, partial_rs, partial_ps = vk_ub_ps, vk_ub_rs, partial_ps, partial_rs
                if vk_ub_rs == ub_rs and vk_ub_ps == ub_ps and not partial_rs - vk_rs and not partial_ps - vk_ps:
                    r_s_ids = rs - vk_rs
                    p_s_ids = ps - vk_ps
                    if 0 < len(r_s_ids) <= 1 and 0 < len(
                            p_s_ids) <= 1 and r_s_ids or p_s_ids:
                        if r_s_ids and vk_rs - rs:
                            s_id, c_id = r_s_ids.pop()
                            # if it is not a species id but a cluster, continue
                            if not isinstance(s_id, str):
                                continue
                            clu, c_id = (vk_rs - rs).pop()
                            # if it is a species id instead of a cluster, continue
                            if not isinstance(clu, tuple):
                                continue
                            candidate_sps = {
                                model.getSpecies(it)
                                for it in (term_id2s_ids[s_id2term_id[s_id]]
                                           if s_id in s_id2term_id else {s_id})
                            }
                            comp, term = clu
                            if not in_species_conflict(term, candidate_sps,
                                                       proposal):
                                for s in candidate_sps:
                                    proposal[
                                        s.getId()] = s.getCompartment(), term
                            else:
                                continue
                        if p_s_ids and vk_ps - ps:
                            s_id, c_id = p_s_ids.pop()
                            # if it is not a species id but a cluster, continue
                            if not isinstance(s_id, str):
                                continue
                            clu, c_id = (vk_ps - ps).pop()
                            # if it is a species id instead of a cluster, continue
                            if not isinstance(clu, tuple):
                                continue
                            candidate_sps = {
                                model.getSpecies(it)
                                for it in (term_id2s_ids[s_id2term_id[s_id]]
                                           if s_id in s_id2term_id else {s_id})
                            }
                            comp, term = clu
                            if not in_species_conflict(term, candidate_sps,
                                                       proposal):
                                for s in candidate_sps:
                                    proposal[
                                        s.getId()] = s.getCompartment(), term
                            else:
                                continue
                if proposal:
                    s_id2clu.update(proposal)
                    for s_id, clu in proposal.items():
                        clu2s_ids[clu].add(s_id)
                    unmapped_s_ids -= set(proposal.keys())
def suggest_clusters(model, unmapped_s_ids, term_id2clu, s_id2term_id, ubiquitous_chebi_ids, r_ids_to_ignore=None):
    # TODO: double check it
    return
    if not unmapped_s_ids:
        return
    s_id2clu = compute_s_id2clu(set(), model, s_id2term_id, term_id2clu)
    term_id2s_ids = invert_map(s_id2term_id)
    vk2r_ids = get_vk2r_ids(model, s_id2clu, s_id2term_id, ubiquitous_chebi_ids, r_ids_to_ignore=r_ids_to_ignore)
    vk2r_ids = {vk: r_ids for (vk, r_ids) in vk2r_ids.items() if len(r_ids) > 1}
    processed_r_ids = reduce(lambda s1, s2: s1 | s2, vk2r_ids.values(), set())

    s_vk2vk = defaultdict(set)
    for vk in vk2r_ids.keys():
        s_vk2vk[vertical_key2simplified_vertical_key(vk)].add(vk)

    s_id2r_ids = defaultdict(list)
    for r in (r for r in model.getListOfReactions() if r.getNumReactants() + r.getNumProducts() > 2):
        if r_ids_to_ignore and r.getId() in r_ids_to_ignore:
            continue
        r_id = r.getId()
        for s_id in chain((species_ref.getSpecies() for species_ref in r.getListOfReactants()),
                          (species_ref.getSpecies() for species_ref in r.getListOfProducts())):
            s_id2r_ids[s_id].append(r_id)

    for r in model.getListOfReactions():
        if r.getId() in processed_r_ids or not unmapped_s_ids & get_metabolites(r):
            continue
        if r_ids_to_ignore and r.getId() in r_ids_to_ignore:
            continue
        ub_rs, ub_ps, rs, ps = get_vertical_key(model, r, s_id2clu, s_id2term_id, ubiquitous_chebi_ids)
        vk = ub_rs, ub_ps, rs, ps
        rs, ps = set(rs), set(ps)
        partial_rs, partial_ps = {(s_id, c_id) for (s_id, c_id) in rs if s_id not in unmapped_s_ids}, \
                                 {(s_id, c_id) for (s_id, c_id) in ps if s_id not in unmapped_s_ids}
        if vk in vk2r_ids or len(ub_rs) + len(ub_ps) + len(partial_rs) + len(partial_ps) < 2:
            continue
        s_vk = vertical_key2simplified_vertical_key(vk)
        if s_vk in s_vk2vk:
            ub_rs, ub_ps = tuple(sorted(ub_rs)), tuple(sorted(ub_ps))
            for (vk_ub_rs, vk_ub_ps, vk_rs, vk_ps) in s_vk2vk[s_vk]:
                vk_rs, vk_ps = {(s_id if s_id not in s_id2clu else s_id2clu[s_id], c_id) for (s_id, c_id) in vk_rs}, \
                               {(s_id if s_id not in s_id2clu else s_id2clu[s_id], c_id) for (s_id, c_id) in vk_ps}
                proposal = {}
                if vk_ub_rs == ub_ps and vk_ub_ps == ub_rs and not partial_rs - vk_ps and not partial_ps - vk_rs:
                    vk_ub_rs, vk_ub_ps, partial_rs, partial_ps = vk_ub_ps, vk_ub_rs, partial_ps, partial_rs
                if vk_ub_rs == ub_rs and vk_ub_ps == ub_ps and not partial_rs - vk_rs and not partial_ps - vk_ps:
                    r_s_ids = rs - vk_rs
                    p_s_ids = ps - vk_ps
                    if 0 < len(r_s_ids) <= 1 and 0 < len(p_s_ids) <= 1 and r_s_ids or p_s_ids:
                        if r_s_ids and vk_rs - rs:
                            s_id, c_id = r_s_ids.pop()
                            clu, c_id = (vk_rs - rs).pop()
                            # if it is a species id instead of a cluster, continue
                            if not isinstance(clu, tuple):
                                continue
                            # if it is not a species id but a cluster, continue
                            if not isinstance(s_id, str):
                                continue
                            candidate_sps = {model.getSpecies(sp_id) for sp_id in
                                             (term_id2s_ids[s_id2term_id[s_id]] if s_id in s_id2term_id else [s_id])}
                            comp, term = clu
                            for s in candidate_sps:
                                proposal[s.getId()] = term
                        if p_s_ids and vk_ps - ps:
                            s_id, c_id = p_s_ids.pop()
                            clu, c_id = (vk_ps - ps).pop()
                            # if it is a species id instead of a cluster, continue
                            if not isinstance(clu, tuple):
                                continue
                            # if it is not a species id but a cluster, continue
                            if not isinstance(s_id, str):
                                continue
                            candidate_sps = {model.getSpecies(it) for it in
                                             (term_id2s_ids[s_id2term_id[s_id]] if s_id in s_id2term_id else {s_id})}
                            comp, term = clu
                            for s in candidate_sps:
                                proposal[s.getId()] = term
                if proposal:
                    for s_id, clu in proposal.items():
                        term_id2clu[s_id] = (clu, ) if not (isinstance(clu, tuple)) else clu
                        unmapped_s_ids -= {s_id}
def infer_clusters(model, unmapped_s_ids, s_id2clu, s_id2term_id, ubiquitous_chebi_ids, r_ids_to_ignore=None):
    # TODO: double check it
    return
    if not unmapped_s_ids:
        return
    term_id2s_ids = invert_map(s_id2term_id)
    clu2s_ids = invert_map(s_id2clu)
    vk2r_ids = get_vk2r_ids(model, s_id2clu, s_id2term_id, ubiquitous_chebi_ids, r_ids_to_ignore=r_ids_to_ignore)
    vk2r_ids = {vk: r_ids for (vk, r_ids) in vk2r_ids.items() if len(r_ids) > 1}

    simplified_vk2vk_set = defaultdict(set)
    for vk in vk2r_ids.keys():
        simplified_vk2vk_set[vertical_key2simplified_vertical_key(vk)].add(vk)

    s_id2r_ids = defaultdict(list)
    for r in (r for r in model.getListOfReactions() if r.getNumReactants() + r.getNumProducts() > 2):
        if r_ids_to_ignore and r.getId() in r_ids_to_ignore:
            continue
        r_id = r.getId()
        for s_id in chain((species_ref.getSpecies() for species_ref in r.getListOfReactants()),
                          (species_ref.getSpecies() for species_ref in r.getListOfProducts())):
            s_id2r_ids[s_id].append(r_id)

    def in_species_conflict(term, candidate_sps, proposal_s_id2clu):
        proposal_clu2s_ids = invert_map(proposal_s_id2clu)
        for s in candidate_sps:
            s_clu = s.getCompartment(), term
            rs = {r_id for r_id in s_id2r_ids[s.getId()]}
            clu_s_ids = clu2s_ids[s_clu] | proposal_clu2s_ids[s_clu]
            for clu_s_id in clu_s_ids:
                if {r_id for r_id in s_id2r_ids[clu_s_id]} & rs:
                    return True
        return False

    processed_r_ids = reduce(lambda s1, s2: s1 | s2, vk2r_ids.values(), set())

    for r in model.getListOfReactions():
        if r.getId() in processed_r_ids or not unmapped_s_ids & get_metabolites(r):
            continue
        if r_ids_to_ignore and r.getId() in r_ids_to_ignore:
            continue
        ub_rs, ub_ps, rs, ps = get_vertical_key(model, r, s_id2clu, s_id2term_id, ubiquitous_chebi_ids)
        vk = ub_rs, ub_ps, rs, ps
        rs, ps = set(rs), set(ps)
        partial_rs, partial_ps = {(s_id, c_id) for (s_id, c_id) in rs if s_id not in unmapped_s_ids}, \
                                 {(s_id, c_id) for (s_id, c_id) in ps if s_id not in unmapped_s_ids}
        if len(ub_rs) + len(ub_ps) + len(partial_rs) + len(partial_ps) < 2:
            continue
        simplified_vk = vertical_key2simplified_vertical_key(vk)
        if simplified_vk in simplified_vk2vk_set:
            ub_rs, ub_ps = tuple(sorted(ub_rs)), tuple(sorted(ub_ps))
            for (vk_ub_rs, vk_ub_ps, vk_rs, vk_ps) in simplified_vk2vk_set[simplified_vk]:
                vk_rs, vk_ps = {(s_id if s_id not in s_id2clu else s_id2clu[s_id], c_id) for (s_id, c_id) in vk_rs}, \
                               {(s_id if s_id not in s_id2clu else s_id2clu[s_id], c_id) for (s_id, c_id) in vk_ps}
                proposal = {}
                if vk_ub_rs == ub_ps and vk_ub_ps == ub_rs and not partial_rs - vk_ps and not partial_ps - vk_rs:
                    vk_ub_rs, vk_ub_ps, partial_rs, partial_ps = vk_ub_ps, vk_ub_rs, partial_ps, partial_rs
                if vk_ub_rs == ub_rs and vk_ub_ps == ub_ps and not partial_rs - vk_rs and not partial_ps - vk_ps:
                    r_s_ids = rs - vk_rs
                    p_s_ids = ps - vk_ps
                    if 0 < len(r_s_ids) <= 1 and 0 < len(p_s_ids) <= 1 and r_s_ids or p_s_ids:
                        if r_s_ids and vk_rs - rs:
                            s_id, c_id = r_s_ids.pop()
                            # if it is not a species id but a cluster, continue
                            if not isinstance(s_id, str):
                                continue
                            clu, c_id = (vk_rs - rs).pop()
                            # if it is a species id instead of a cluster, continue
                            if not isinstance(clu, tuple):
                                continue
                            candidate_sps = {model.getSpecies(it) for it in
                                             (term_id2s_ids[s_id2term_id[s_id]] if s_id in s_id2term_id else {s_id})}
                            comp, term = clu
                            if not in_species_conflict(term, candidate_sps, proposal):
                                for s in candidate_sps:
                                    proposal[s.getId()] = s.getCompartment(), term
                            else:
                                continue
                        if p_s_ids and vk_ps - ps:
                            s_id, c_id = p_s_ids.pop()
                            # if it is not a species id but a cluster, continue
                            if not isinstance(s_id, str):
                                continue
                            clu, c_id = (vk_ps - ps).pop()
                            # if it is a species id instead of a cluster, continue
                            if not isinstance(clu, tuple):
                                continue
                            candidate_sps = {model.getSpecies(it) for it in
                                             (term_id2s_ids[s_id2term_id[s_id]] if s_id in s_id2term_id else {s_id})}
                            comp, term = clu
                            if not in_species_conflict(term, candidate_sps, proposal):
                                for s in candidate_sps:
                                    proposal[s.getId()] = s.getCompartment(), term
                            else:
                                continue
                if proposal:
                    s_id2clu.update(proposal)
                    for s_id, clu in proposal.items():
                        clu2s_ids[clu].add(s_id)
                    unmapped_s_ids -= set(proposal.keys())