Ejemplo n.º 1
0
def maincall(inputfiles,
             flavor=None,
             init=None,
             mediadb=None,
             outputfile=None):

    if not flavor:
        flavor = config.get('sbml', 'default_flavor')

    if outputfile:
        model_id = os.path.splitext(os.path.basename(outputfile))[0]
    else:
        model_id = 'community'
        outputfile = 'community.xml'

    models = [
        load_cbmodel(inputfile, flavor=flavor) for inputfile in inputfiles
    ]
    community = Community(model_id, models)
    model = community.merged_model

    if init:
        if not mediadb:
            mediadb = project_dir + config.get('input', 'media_library')

        try:
            media_db = load_media_db(mediadb)
        except IOError:
            raise IOError('Failed to load media library:' + mediadb)

        init_env = Environment.from_compounds(media_db[init])
        init_env.apply(model, inplace=True)

    save_cbmodel(model, outputfile, flavor=flavor)
Ejemplo n.º 2
0
def compute_bigg_gibbs_energy(modelfile, equi_cmpds_file, outputfile=None):
    """ Calculate standard Gibbs Energy for reactions in a model (as many as possible) using eQuilibrator.

    Args:
        modelfile (str): SBML file
        equi_cmpds_file (str): file containing KEGG compounds accepted by eQuilibrator
        outputfile (str): output CSV file (optional)

    Returns:
        dict: standard Gibbs Energies indexed by reaction ids
        dict: estimation error indexed by reaction ids
    """

    model = load_cbmodel(modelfile)

    kegg_compounds = pd.read_csv(equi_cmpds_file, sep='\t')
    kegg_compounds = set(kegg_compounds['compound_id'])

    dG0, sdG0 = calculate_deltaG0s(model, kegg_compounds, pH=default_pH, I=default_I, T=default_T)

    if outputfile:
        data = pd.DataFrame({'dG0': dG0, 'sdG0': sdG0})
        data.to_csv(outputfile)
    else:
        return dG0, sdG0
Ejemplo n.º 3
0
 def __init__(self, model):
     try:
         self.model = load_cbmodel(
             "C:\\Users\\Pedro\\OneDrive\\Documentos\\UMinho\\WholeNewProject\\PythonThings\\Models\\"
             + model)
         self.useReframed = True
     except:
         self.model = ProcessXML(model)
         self.model.getCompartments()
         self.model.getSpecies()
         self.model.getReactions()
         self.useReframed = False
Ejemplo n.º 4
0
def curate(inputfile=None, outputfile=None, taxa=None, biomass=None, biomass_db_path=None, normalize_biomass=False):

    if inputfile:
        universe_draft = inputfile
        model_specific_data = os.path.splitext(inputfile)[0] + '.csv'
    else:
        universe_draft = project_dir + config.get('generated', 'bigg_universe')
        model_specific_data = project_dir + config.get('generated', 'model_specific_data')

    if not biomass:
        biomass = 'gramneg' if taxa == 'cyanobacteria' else taxa

    if not outputfile:
        folder = project_dir + config.get('generated', 'folder')
        outputfile = folder + f"universe_{taxa}.xml.gz"

    bigg_models = project_dir + config.get('input', 'bigg_models')
    bigg_models = pd.read_csv(bigg_models, sep='\t')

    manual_curation = project_dir + config.get('input', 'manually_curated')
    manually_curated = pd.read_csv(manual_curation, index_col=0, sep='\t')

    unbalanced = project_dir + config.get('input', 'unbalanced_metabolites')
    unbalanced = pd.read_csv(unbalanced, header=None)
    unbalanced = unbalanced[0].tolist()

    try:
        model = load_cbmodel(universe_draft, flavor=config.get('sbml', 'default_flavor'))
        model_specific_data = pd.read_csv(model_specific_data)
    except IOError:
        raise IOError('Universe draft model not found. Please run --build first to download BiGG data.')

    if biomass_db_path is None:
        biomass_db_path = project_dir + config.get('input', 'biomass_library')

    biomass_db = load_biomass_db(biomass_db_path, normalize_weight=normalize_biomass, model=model)

    if biomass not in biomass_db:
        valid_ids = ','.join(biomass_db.keys())
        raise RuntimeError('Biomass identifier not in database. Currently in database: ' + valid_ids)

    biomass_eq = biomass_db[biomass]

    curate_universe(model,
                    outputfile=outputfile,
                    taxa=taxa,
                    biomass_eq=biomass_eq,
                    model_specific_data=model_specific_data,
                    bigg_models=bigg_models,
                    manually_curated=manually_curated,
                    unbalanced_metabolites=unbalanced,
                    )
Ejemplo n.º 5
0
def maincall(mode, noheuristics=False, nothermo=False, allow_unbalanced=False, allow_blocked=False,
         biomass=None, biomass_db_path=None, normalize_biomass=False, taxa=None, outputfile=None):

    if mode == 'draft':

        if outputfile:
            universe_draft = outputfile
            model_specific_data = os.path.splitext(outputfile)[0] + '.csv'
            bigg_gprs = os.path.splitext(outputfile)[0] + '_gprs.csv'
#            fasta_file = os.path.splitext(outputfile)[0] + '.faa'
        else:
            universe_draft = project_dir + config.get('generated', 'universe_draft')
            model_specific_data = project_dir + config.get('generated', 'model_specific_data')
            bigg_gprs = project_dir + config.get('generated', 'bigg_gprs')
#            fasta_file = project_dir + config.get('input', 'fasta_file')

        build_bigg_universe_model(universe_draft)
        data = download_model_specific_data(model_specific_data)
        gprs = create_gpr_table(data, outputfile=bigg_gprs)
#        download_gene_sequences(gprs, outputfile=fasta_file)

    elif mode == 'thermo':
        universe_draft = project_dir + config.get('generated', 'universe_draft')
        equilibrator_compounds = project_dir + config.get('input', 'equilibrator_compounds')

        if outputfile:
            bigg_gibbs = outputfile
        else:
            bigg_gibbs = project_dir + config.get('generated', 'bigg_gibbs')

        compute_bigg_gibbs_energy(universe_draft, equilibrator_compounds, bigg_gibbs)

    elif mode == 'curated':

        universe_draft = project_dir + config.get('generated', 'universe_draft')
        model_specific_data = project_dir + config.get('generated', 'model_specific_data')

        if not biomass:
            if taxa == 'archaea':
                biomass = 'archaea'
            else:
                biomass = config.get('universe', 'default_biomass')

        if outputfile:
            universe_final = outputfile
        else:
            tag = taxa if taxa != 'bacteria' else biomass
            universe_final = "{}{}universe_{}.xml.gz".format(project_dir, config.get('generated', 'folder'), tag)

        bigg_models = project_dir + config.get('input', 'bigg_models')
        bigg_models = pd.read_csv(bigg_models, index_col=0)

        manual_curation = project_dir + config.get('input', 'manually_curated')
        manually_curated = pd.read_csv(manual_curation, index_col=0)

        unbalanced = project_dir + config.get('input', 'unbalanced_metabolites')
        unbalanced = pd.read_csv(unbalanced, header=None)
        unbalanced = unbalanced[0].tolist()

        try:
            model = load_cbmodel(universe_draft, flavor=config.get('sbml', 'default_flavor'))
            model_specific_data = pd.read_csv(model_specific_data)
        except IOError:
            raise IOError('Universe draft not found. Please run --draft first to download BiGG data.')

        if biomass_db_path is None:
            biomass_db_path = project_dir + config.get('input', 'biomass_library')

        biomass_db = load_biomass_db(biomass_db_path, normalize_weight=normalize_biomass, model=model)

        if biomass not in biomass_db:
            valid_ids = ','.join(biomass_db.keys())
            raise RuntimeError('Biomass identifier not in database. Currently in database: ' + valid_ids)

        biomass_eq = biomass_db[biomass]

        if nothermo:
            thermodynamics_data = None
            metabolomics_data = None
        else:
            try:
                bigg_gibbs = project_dir + config.get('generated', 'bigg_gibbs')
                thermodynamics_data = pd.read_csv(bigg_gibbs, index_col=0)
            except IOError:
                raise IOError('Thermodynamic data not found. Please run --thermo first to generate thermodynamic data.')

            metabolomics = project_dir + config.get('input', 'metabolomics')
            metabolomics_data = pd.read_csv(metabolomics, index_col=1)

        curate_universe(model,
                        taxa=taxa,
                        outputfile=universe_final,
                        model_specific_data=model_specific_data,
                        bigg_models=bigg_models,
                        thermodynamics_data=thermodynamics_data,
                        metabolomics_data=metabolomics_data,
                        manually_curated=manually_curated,
                        unbalanced_metabolites=unbalanced,
                        biomass_eq=biomass_eq,
                        use_heuristics=(not noheuristics),
                        remove_unbalanced=(not allow_unbalanced),
                        remove_blocked=(not allow_blocked))

    else:
        print('Unrecognized option:', mode)
Ejemplo n.º 6
0
def maincall(inputfile,
             media,
             mediadb=None,
             universe=None,
             universe_file=None,
             outputfile=None,
             flavor=None,
             spent=None,
             verbose=False):

    if verbose:
        print('Loading model...')

    try:
        model = load_cbmodel(inputfile, flavor=flavor)
    except IOError:
        raise IOError('Failed to load model:' + inputfile)

    if spent:
        if verbose:
            print('Loading model for spent medium species...')

        try:
            spent_model = load_cbmodel(spent, flavor=flavor)
        except IOError:
            raise IOError('Failed to load model:' + spent)
    else:
        spent_model = None

    if verbose:
        print('Loading reaction universe...')

    if not universe_file:
        if universe:
            universe_file = "{}{}universe_{}.xml".format(
                project_dir, config.get('generated', 'folder'), universe)
        else:
            universe_file = project_dir + config.get('generated',
                                                     'default_universe')

    try:
        universe_model = load_cbmodel(universe_file, flavor='cobra')
    except IOError:
        if universe:
            raise IOError(
                'Failed to load universe "{0}". Please run build_universe.py --{0}.'
                .format(universe))
        else:
            raise IOError('Failed to load universe model:' + universe_file)

    if verbose:
        print('Loading media...')

    if not mediadb:
        mediadb = project_dir + config.get('input', 'media_library')

    try:
        media_db = load_media_db(mediadb)
    except IOError:
        raise IOError('Failed to load media database:' + mediadb)

    if verbose:
        m1, n1 = len(model.metabolites), len(model.reactions)
        print('Gap filling for {}...'.format(', '.join(media)))

    max_uptake = config.getint('gapfill', 'max_uptake')
    multiGapFill(model,
                 universe_model,
                 media,
                 media_db,
                 max_uptake=max_uptake,
                 inplace=True,
                 spent_model=spent_model)

    if verbose:
        m2, n2 = len(model.metabolites), len(model.reactions)
        print('Added {} reactions and {} metabolites'.format((n2 - n1),
                                                             (m2 - m1)))

    if verbose:
        print('Saving SBML file...')

    if not outputfile:
        outputfile = os.path.splitext(inputfile)[0] + '_gapfill.xml'

    if not flavor:
        flavor = config.get('sbml', 'default_flavor')

    save_cbmodel(model, outputfile, flavor=flavor)

    if verbose:
        print('Done.')
Ejemplo n.º 7
0
def load_models():
    models = {}
    for org_id in organisms:
        models[org_id] = load_cbmodel(f"{data_path}/models/{org_id}.xml",
                                      flavor='bigg')
    return models
Ejemplo n.º 8
0
def maincall(inputfile, input_type='protein', outputfile=None, diamond_args=None, universe=None, universe_file=None,
         ensemble_size=None, verbose=False, debug=False, flavor=None, gapfill=None, blind_gapfill=False, init=None,
         mediadb=None, default_score=None, uptake_score=None, soft_score=None, soft=None, hard=None, reference=None,
         ref_score=None, recursive_mode=False):

    if recursive_mode:
        model_id = os.path.splitext(os.path.basename(inputfile))[0]

        if outputfile:
            outputfile = f'{outputfile}/{model_id}.xml'
        else:
            outputfile = os.path.splitext(inputfile)[0] + '.xml'

    else:
        if outputfile:
            model_id = os.path.splitext(os.path.basename(outputfile))[0]
        else:
            model_id = os.path.splitext(os.path.basename(inputfile))[0]
            outputfile = os.path.splitext(inputfile)[0] + '.xml'

    model_id = build_model_id(model_id)

    outputfolder = os.path.abspath(os.path.dirname(outputfile))

    if not os.path.exists(outputfolder):
        try:
            os.makedirs(outputfolder)
        except:
            print('Unable to create output folder:', outputfolder)
            return

    if soft:
        try:
            soft_constraints = load_soft_constraints(soft)
        except IOError:
            raise IOError('Failed to load soft-constraints file:' + soft)
    else:
        soft_constraints = None

    if hard:
        try:
            hard_constraints = load_hard_constraints(hard)
        except IOError:
            raise IOError('Failed to load hard-constraints file:' + hard)
    else:
        hard_constraints = None

    if input_type == 'refseq':

        if verbose:
            print(f'Downloading genome {inputfile} from NCBI...')

        ncbi_table = load_ncbi_table(project_dir + config.get('input', 'refseq'))
        inputfile = download_ncbi_genome(inputfile, ncbi_table)

        if not inputfile:
            print('Failed to download genome from NCBI.')
            return

        input_type = 'protein' if inputfile.endswith('.faa.gz') else 'dna'

    if input_type == 'protein' or input_type == 'dna':
        if verbose:
            print('Running diamond...')
        diamond_db = project_dir + config.get('generated', 'diamond_db')
        blast_output = os.path.splitext(inputfile)[0] + '.tsv'
        exit_code = run_blast(inputfile, input_type, blast_output, diamond_db, diamond_args, verbose)

        if exit_code is None:
            print('Unable to run diamond (make sure diamond is available in your PATH).')
            return

        if exit_code != 0:
            print('Failed to run diamond.')
            if diamond_args is not None:
                print('Incorrect diamond args? Please check documentation or use default args.')
            return

        annotations = load_diamond_results(blast_output)
    elif input_type == 'eggnog':
        annotations = load_eggnog_data(inputfile)
    elif input_type == 'diamond':
        annotations = load_diamond_results(inputfile)
    else:
        raise ValueError('Invalid input type: ' + input_type)

    if verbose:
        print('Loading universe model...')

    if not universe_file:
        if universe:
            universe_file = f"{project_dir}{config.get('generated', 'folder')}universe_{universe}.xml.gz"
        else:
            universe_file = project_dir + config.get('generated', 'default_universe')

    try:
        universe_model = load_cbmodel(universe_file, flavor='bigg')
        universe_model.id = model_id
    except IOError:
        available = '\n'.join(glob(f"{project_dir}{config.get('generated', 'folder')}universe_*.xml.gz"))
        raise IOError(f'Failed to load universe model: {universe_file}\nAvailable universe files:\n{available}')

    if reference:
        if verbose:
            print('Loading reference model...')

        try:
            ref_model = load_cbmodel(reference)
        except:
            raise IOError('Failed to load reference model.')
    else:
        ref_model = None

    if gapfill or init:

        if verbose:
            print('Loading media library...')

        if not mediadb:
            mediadb = project_dir + config.get('input', 'media_library')

        try:
            media_db = load_media_db(mediadb)
        except IOError:
            raise IOError('Failed to load media library:' + mediadb)

    if verbose:
        print('Scoring reactions...')

    gene_annotations = pd.read_csv(project_dir + config.get('generated', 'gene_annotations'), sep='\t')
    bigg_gprs = project_dir + config.get('generated', 'bigg_gprs')
    gprs = pd.read_csv(bigg_gprs)
    gprs = gprs[gprs.reaction.isin(universe_model.reactions)]

    debug_output = model_id if debug else None
    scores, gene2gene = reaction_scoring(annotations, gprs, debug_output=debug_output)

    if scores is None:
        print('The input genome did not match sufficient genes/reactions in the database.')
        return

    if not flavor:
        flavor = config.get('sbml', 'default_flavor')

    init_env = None

    if init:
        if init in media_db:
            init_env = Environment.from_compounds(media_db[init])
        else:
            print(f'Error: medium {init} not in media database.')

    universe_model.metadata['Description'] = 'This model was built with CarveMe version ' + version

    if ensemble_size is None or ensemble_size <= 1:
        if verbose:
            print('Reconstructing a single model')

        model = carve_model(universe_model, scores, inplace=(not gapfill), default_score=default_score,
                            uptake_score=uptake_score, soft_score=soft_score, soft_constraints=soft_constraints,
                            hard_constraints=hard_constraints, ref_model=ref_model, ref_score=ref_score,
                            init_env=init_env, debug_output=debug_output)
        annotate_genes(model, gene2gene, gene_annotations)

    else:
        if verbose:
            print('Building an ensemble of', ensemble_size, 'models')

        ensemble = build_ensemble(universe_model, scores, ensemble_size, init_env=init_env)

        annotate_genes(ensemble, gene2gene, gene_annotations)
        save_ensemble(ensemble, outputfile, flavor=flavor)

    if model is None:
        print("Failed to build model.")
        return

    if not gapfill:
        save_cbmodel(model, outputfile, flavor=flavor)

    else:
        media = gapfill.split(',')

        if verbose:
            m1, n1 = len(model.metabolites), len(model.reactions)
            print(f"Gap filling for {', '.join(media)}...")

        max_uptake = config.getint('gapfill', 'max_uptake')

        if blind_gapfill:
            scores = None
        else:
            scores = dict(scores[['reaction', 'normalized_score']].values)
        multiGapFill(model, universe_model, media, media_db, scores=scores, max_uptake=max_uptake, inplace=True)

        if verbose:
            m2, n2 = len(model.metabolites), len(model.reactions)
            print(f'Added {(n2 - n1)} reactions and {(m2 - m1)} metabolites')

        if init_env:  # Initializes environment again as new exchange reactions can be acquired during gap-filling
            init_env.apply(model, inplace=True, warning=False)

        save_cbmodel(model, outputfile, flavor=flavor)

    if verbose:
        print('Done.')
Ejemplo n.º 9
0
def simulate(wrappers, medium, method):
    """
    Run a SteadyCom community simulation.

    Parameters
    ----------
    wrappers: list(storage.ModelWrapper)
        A list of model wrappers containing cobrapy model instances.
    medium: list(str)
        A list of compound names. Exchange reaction identifiers are assumed to
        be formatted according to: "EX_{compound}_e"
    method: str
        The community simulation method. Currently accepted strings:
        "steadycom" or "steadiercom".
    """
    if method not in METHODS:
        raise ValueError(f"Unsupported community simulation method '{method}'")

    with warnings.catch_warnings(record=True) as reframed_warnings:
        logger.debug("Converting cobrapy models to reframed models")
        rf_models = []
        for model in [wrapper.model for wrapper in wrappers]:
            # The most funcational approach (albeit slow) seems to be to write and
            # reload SBML. reframed's cobrapy integration is currently pretty
            # minimal.
            with tempfile.NamedTemporaryFile() as file_:
                cobra.io.write_sbml_model(model, file_.name)
                # TODO: Consider accepting the flavor argument as a parameter
                # instead of always assuming BiGG.
                rf_models.append(
                    reframed.load_cbmodel(file_.name, flavor="bigg"))

        logger.debug("Merging individual models to a community")
        community = reframed.Community("community", rf_models)

        logger.debug("Applying medium to the community")
        environment = reframed.Environment.from_compounds(
            medium, fmt_func=lambda x: f"R_EX_M_{x}_e")
        environment.apply(community.merged_model, inplace=True)

        if method == "steadycom":
            logger.info(f"Simulating community model with SteadyCom")
            solution = reframed.SteadyCom(community)
        elif method == "steadiercom":
            logger.info(f"Simulating community model with SteadyCom")
            solution = reframed.SteadierCom(community)

        logger.debug(f"Formatting solution response")

        def model_id(original_id):
            """Map the models original name back to our platform internal DB IDs."""
            return next(wrapper.id for wrapper in wrappers
                        if wrapper.model.id == original_id)

        # Calculate transactions (cross-feeding, uptake and secretion)
        logger.debug(
            f"Calculating transactions (cross-feeding, uptake and secretion)")
        ex_met_ids = solution.community.merged_model.get_external_metabolites()
        met_id2name = create_metabolite_id2name_mapping(ex_met_ids, community)
        transactions = generate_transactions(met_id2name,
                                             solution.exchange_map)

        # Convert the iterables to dictionaries for easier handling on the frontend
        abundance = [{
            "id": model_id(original_id),
            "value": abundance
        } for original_id, abundance in solution.abundance.items()]
        cross_feeding = []
        for transaction in transactions:
            if transaction[0] == "medium":
                cross_feeding.append({
                    "from": "medium",
                    "to": model_id(transaction[1]),
                    "metabolite_id": transaction[2],
                    "metabolite_name": transaction[3],
                    "value": transaction[4],
                })
            elif transaction[1] == "medium":
                cross_feeding.append({
                    "from": model_id(transaction[0]),
                    "to": "medium",
                    "metabolite_id": transaction[2],
                    "metabolite_name": transaction[3],
                    "value": transaction[4],
                })
            else:
                cross_feeding.append({
                    "from": model_id(transaction[0]),
                    "to": model_id(transaction[1]),
                    "metabolite_id": transaction[2],
                    "metabolite_name": transaction[3],
                    "value": transaction[4],
                })
    return {
        "growth_rate": solution.growth,
        "abundance": abundance,
        "cross_feeding": cross_feeding,
        "warnings": [" ".join(w.message.args) for w in reframed_warnings],
    }