예제 #1
0
def multimodel_pipeline(sbml2parameters, res_dir, treeefm_path, max_efm_number=1000, rewrite=True, org=None):
    create_dirs(res_dir, rewrite)
    get_f_path = lambda f: os.path.join('..', os.path.relpath(f, res_dir)) if f else None
    tab2html = {}

    model_id2sbml, model_id2S, model_id2efm_id2pws = {}, {}, {}

    name2pw = get_name2pw()
    pts = parse_simple(get_pts())
    root_ids = {t.get_id() for t in pts.get_roots()}

    chebi = parse(CHEBI)
    ub_ch_ids = get_ubiquitous_chebi_ids(add_common=True, add_cofactors=True, chebi=chebi)

    efm_id2pws = {}

    model_id2cofactors = {}
    modeld_id2m_id2chebi_id = {}

    for sbml, (r_id2rev, r_id2rev_banned) in sbml2parameters.items():
        doc = libsbml.SBMLReader().readSBML(sbml)
        model = doc.getModel()

        model_name = get_model_name(model=model)
        short_model_name = model_name
        if len(model_name) > 12:
            short_model_name = model_name[:10].strip('-_ ')
            if len(short_model_name) == 10:
                short_model_name += '...'
        safe_m_name = ''.join(ch for ch in short_model_name.replace(' ', '_') if ch.isalnum() or '_' == ch)
        logging.info('Analysing %s...' % model_name)

        # create directories to store results
        logging.info("Preparing directories...")
        m_dir = os.path.join(res_dir, safe_m_name)
        create_dirs(m_dir, rewrite)

        # exchange_rs = get_exchange_reactions(model)
        # csv = '%s/%s.exchanges.csv' % (m_dir, safe_m_name)
        # df2csv(reactions2df(model, exchange_rs), csv)

        cofactors = select_metabolite_ids_by_term_ids(model, ub_ch_ids)

        if r_id2rev:
            constraint_exchange_reactions(model, forsed_r_id2rev=r_id2rev, prohibited_r_id2rev=r_id2rev_banned,
                                          cofactors=cofactors if not r_id2rev_banned else None)

        logging.info("Annotating the model...")
        annotate(model, org=org, reactions=False, pathways=False, chebi=chebi)
        m_id2ch_id = get_species_id2chebi_id(model)

        # copy our model in the result directory
        sbml = os.path.join(m_dir, '%s.constrained.xml' % safe_m_name)
        libsbml.SBMLWriter().writeSBMLToFile(doc, sbml)

        description = model_serializer.serialize(sbml, model, model_name, r_id2rev, m_dir, get_f_path)

        pw2rs = get_pathways(model, pts, name2pw, root_ids)

        logging.info("Performing EFMA...")
        efma_dir = os.path.join(m_dir, 'efma')
        create_dirs(efma_dir, rewrite)

        S, efm_id2pws = analyse_model_efm(model, efma_dir, r_id2rev, tree_efm_path=treeefm_path,
                                          max_efm_number=max_efm_number, rewrite=rewrite, pw2rs=pw2rs)

        for serializer in (efm_serializer.serialize, coupled_reaction_group_serializer.serialize):
            description += \
                serializer(model=model, path=efma_dir, get_f_path=get_f_path, S=S, model_name=model_name)

        if S.gr_id2r_id2c:
            sbml = os.path.join(efma_dir, '%s.folded.xml' % safe_m_name)
            create_folded_model(S, model)
            libsbml.SBMLWriter().writeSBMLToFile(doc, sbml)

        if not S or not S.efm_id2i:
            description += describe('nothing_found.html')

        model_id2sbml[safe_m_name] = sbml
        model_id2S[safe_m_name] = S
        model_id2efm_id2pws[safe_m_name] = efm_id2pws
        model_id2cofactors[safe_m_name] = cofactors
        modeld_id2m_id2chebi_id[safe_m_name] = m_id2ch_id

        tab2html['Analysis of %s' % short_model_name] = description, None

    cofactors = set()
    m_id2ch_id = {}
    if len(model_id2sbml) > 1:
        mm_dir = os.path.join(res_dir, 'merged_model')
        create_dirs(mm_dir)

        sbml, S, model_id2id2id, common_ids, model_id2dfs, mappings = combine_models(model_id2sbml, model_id2S, mm_dir)

        for model_id in model_id2sbml.keys():
            efm_id2pws.update({model_id2id2id[model_id][efm_id]: pws
                               for (efm_id, pws) in model_id2efm_id2pws[model_id].items()
                               if efm_id in model_id2id2id[model_id]})
            cofactors |= {model_id2id2id[model_id][m_id] for m_id in model_id2cofactors[model_id]
                          if m_id in model_id2id2id[model_id]}
            m_id2ch_id.update({model_id2id2id[model_id][m_id]: ch_id
                               for (m_id, ch_id) in modeld_id2m_id2chebi_id[model_id].items()
                               if m_id in model_id2id2id[model_id]})

        tab2html['Model comparison'] = mapping_serializer.serialize(model_id2dfs, *mappings, mm_dir, get_f_path), None
        title = 'Combined model analysis'
    else:
        model_id, sbml = next(model_id2sbml.items())
        efm_id2pws = model_id2efm_id2pws[model_id]
        cofactors = model_id2cofactors[model_id]
        m_id2ch_id = modeld_id2m_id2chebi_id[model_id]
        S = model_id2S[model_id].get_main_S()
        info, title, id2color = '', 'Model analysis', None

    # Communities
    logging.info("Analysing communities...")
    comm_dir = os.path.join(res_dir, 'communities')
    create_dirs(comm_dir, rewrite)

    # id2cluster = detect_communities_by_inputs_of_type(S, 'AMINO ACID', m_id2ch_id, chebi)
    id2cluster = detect_communities_by_boundary_metabolites(S, cofactors=cofactors, threshold=50)

    if id2cluster:
        doc = libsbml.SBMLReader().readSBML(sbml)
        model = doc.getModel()
        description = \
            community_serializer.serialize(model, S, id2cluster, comm_dir, get_f_path, m_id2ch_id, chebi)
        if len(model_id2sbml) > 1:
            tab2html['Model comparison'] = tab2html['Model comparison'][0] + description, None
        else:
            tab2html['EFM communities'] = description, None

    serialize(res_dir, tab2html, title)
예제 #2
0
def analyse_model(
    sbml,
    out_r_id,
    out_rev,
    res_dir,
    in_m_id,
    out_m_id,
    in_r_id2rev=None,
    threshold=ZERO_THRESHOLD,
    do_fva=True,
    do_fba=True,
    do_efm=True,
    max_efm_number=1000,
    mask_shift=4,
    get_f_path=None,
    tree_efm_path=TREEEFM_PATH,
    main_dir=None,
    rewrite=True,
):
    model_name = get_model_name(sbml)
    logging.info("Analysing %s..." % model_name)

    # create directories to store results
    logging.info("Preparing directories...")
    res_dir = os.path.join(res_dir, "".join(ch for ch in model_name.replace(" ", "_") if ch.isalnum() or "_" == ch))
    create_dirs(res_dir, False)
    if not get_f_path:
        get_f_path = lambda f: os.path.join("..", os.path.relpath(f, res_dir))

    doc = libsbml.SBMLReader().readSBML(sbml)
    model = doc.getModel()

    if in_r_id2rev:
        constraint_exchange_reactions(model, forsed_r_id2rev=in_r_id2rev)
        libsbml.SBMLWriter().writeSBMLToFile(doc, sbml)

    # copy our model in the result directory
    if os.path.normpath(res_dir) != os.path.normpath(os.path.dirname(sbml)):
        shutil.copy(sbml, res_dir)
        sbml = os.path.join(res_dir, os.path.basename(sbml))

    r_id2rev = dict(in_r_id2rev)
    r_id2rev[out_r_id] = out_rev
    description = model_serializer.serialize(sbml, model, model_name, r_id2rev, res_dir, get_f_path)

    r_id2mask, layer2mask, vis_r_ids, main_layer = defaultdict(lambda: 0), {}, set(), None

    cobra_model, opt_val, objective_sense = None, None, MINIMIZE if out_rev else MAXIMIZE

    if do_fva:
        cur_dir = _prepare_dir(res_dir, "fva", "Performing FVA...")
        cobra_model = create_cobra_model_from_sbml_file(sbml) if not cobra_model else cobra_model
        r_id2bounds, opt_val = analyse_by_fva(
            cobra_model=cobra_model, bm_r_id=out_r_id, objective_sense=objective_sense, threshold=threshold
        )
        if opt_val:
            mask_shift = update_vis_layers(
                (r_id for (r_id, (l, u)) in r_id2bounds.items() if l * u > 0),
                "FVA essential",
                r_id2mask,
                layer2mask,
                mask_shift,
                vis_r_ids,
            )
            main_layer = "FVA essential"
            fva_sbml = os.path.join(cur_dir, "Model_FVA.xml")
            sbml = create_fva_model(sbml, r_id2bounds, fva_sbml)
        description += fva_serializer.serialize(
            cobra_model, opt_val, r_id2bounds, objective_sense, out_r_id, cur_dir, get_f_path, sbml
        )
    if do_fba:
        cur_dir = _prepare_dir(res_dir, "fba", "Performing FBA...")
        cobra_model = create_cobra_model_from_sbml_file(sbml) if not cobra_model else cobra_model
        r_id2val, opt_val = analyse_by_fba(
            cobra_model, bm_r_id=out_r_id, objective_sense=objective_sense, threshold=threshold
        )
        if opt_val:
            mask_shift = update_vis_layers(r_id2val.keys(), "FBA", r_id2mask, layer2mask, mask_shift, vis_r_ids)
            main_layer = "FBA"
        description += fba_serializer.serialize(
            cobra_model, opt_val, r_id2val, objective_sense, out_r_id, cur_dir, get_f_path
        )

    S = None
    if do_efm:
        cur_dir = _prepare_dir(res_dir, "efma", "Performing EFMA...", rewrite=rewrite)

        doc = libsbml.SBMLReader().readSBML(sbml)
        model = doc.getModel()

        name2pw = get_name2pw()
        pts = parse_simple(get_pts())
        root_ids = {t.get_id() for t in pts.get_roots()}
        pw2rs = get_pathways(model, pts, name2pw, root_ids)
        S, efm_id2pws = analyse_model_efm(
            model,
            cur_dir,
            r_id2rev,
            tree_efm_path=tree_efm_path,
            max_efm_number=max_efm_number,
            rewrite=rewrite,
            pw2rs=pw2rs,
        )

        for serializer in (efm_serializer.serialize, coupled_reaction_group_serializer.serialize):
            description += serializer(
                model=model,
                path=cur_dir,
                get_f_path=get_f_path,
                in_m_id=in_m_id,
                out_m_id=out_m_id,
                out_r_id=out_r_id,
                S=S,
                model_name=model_name,
                main_dir=main_dir,
            )

        if S.gr_id2r_id2c:
            clique_merged_sbml = os.path.join(cur_dir, "Model_folded.xml")
            r_id2new_r_id = create_folded_model(S, model)
            libsbml.SBMLWriter().writeSBMLToFile(doc, clique_merged_sbml)
            sbml = clique_merged_sbml

            vis_r_ids |= {cl_id for (r_id, cl_id) in r_id2new_r_id.items() if r_id in vis_r_ids}
            for r_id, new_r_id in r_id2new_r_id.items():
                if r_id in r_id2mask:
                    r_id2mask[new_r_id] |= r_id2mask[r_id]

    if not opt_val and (not S or not S.efm_id2i):
        description += describe("nothing_found.html")

    return model_name, S, sbml, vis_r_ids, description, mask_shift, r_id2mask, layer2mask, main_layer