Ejemplo n.º 1
0
def maincall(inputfiles,
             flavor=None,
             init=None,
             mediadb=None,
             outputfile=None):

    if not flavor:
        flavor = config.get('sbml', 'default_flavor')

    if outputfile:
        model_id = os.path.splitext(os.path.basename(outputfile))[0]
    else:
        model_id = 'community'
        outputfile = 'community.xml'

    models = [
        load_cbmodel(inputfile, flavor=flavor) for inputfile in inputfiles
    ]
    community = Community(model_id, models)
    model = community.merged_model

    if init:
        if not mediadb:
            mediadb = project_dir + config.get('input', 'media_library')

        try:
            media_db = load_media_db(mediadb)
        except IOError:
            raise IOError('Failed to load media library:' + mediadb)

        init_env = Environment.from_compounds(media_db[init])
        init_env.apply(model, inplace=True)

    save_cbmodel(model, outputfile, flavor=flavor)
Ejemplo n.º 2
0
def build_bigg_universe_model(outputfile=None):
    """ Download the whole BiGG universe database as a CBModel and (optionally) store in SBML.

    Args:
        outputfile (str): SBML output file (optional)

    Returns:
        CBModel: universe model
    """

    print('Downloading universal data from BiGG...')
    model = CBModel('bigg_universe')
    bigg_rxns = get_request(reactions_url)

    n = len(bigg_rxns['results'])
    for i, entry in enumerate(bigg_rxns['results']):
        build_reaction(model, entry['bigg_id'])
        progress(i, n)

    print('\n')

    if outputfile:
        save_cbmodel(model, outputfile)

    return model
Ejemplo n.º 3
0
def download_universal_model(outputfile, cpd_annotation):
    print("Downloading BiGG universe...")

    cpds = pd.read_csv(cpd_annotation, sep="\t", index_col=0)

    json_model = get_request(UNIVERSE_URL)
    model = CBModel("bigg_universe")
    load_compartments(model)
    load_metabolites(json_model, model, cpds)
    load_reactions(json_model, model)
    save_cbmodel(model, outputfile)
Ejemplo n.º 4
0
def curate_universe(model, outputfile, model_specific_data, bigg_models, taxa, biomass_eq,
                    manually_curated=None, unbalanced_metabolites=None):

    """ Curate universal reaction database from initial database dump.

    Args:
        model (CBModel): universal model
        outputfile (str): output SBML file (optional)
        model_specific_data (pandas.DataFrame): model specific data downloaded from BiGG
        bigg_models (pandas.DataFrame): Additional information on BiGG models
        taxa (str): filter by taxa (optional)
        biomass_eq (str): default biomass equation

    """

    print(f'Curating {taxa} universe...')
    print(f'Initial model size: {len(model.metabolites)} x {len(model.reactions)}')

    remove_compartments(model, taxa)

    model_specific_data['reaction'] = model_specific_data['reaction'].apply(lambda x: 'R_' + x)
    filter_reactions_by_kingdom(model, taxa, model_specific_data, bigg_models)

    clean_up_atp_synthases(model)

    compute_missing_formulae(model)

    curate_transport_by_weight(model)

    remove_unbalanced_reactions(model, proton_relax=True)

    fix_protons_and_charge(model)

    fix_hydrogen_stoichiometry(model)

    constrain_reversibility(model, model_specific_data, manually_curated)

    reversibility_heuristics(model)

    if unbalanced_metabolites is not None:
        create_sink_reactions(model, unbalanced_metabolites)

    create_exchange_reactions(model, default_lb=-1000, default_ub=1000)

    add_biomass_equation(model, biomass_eq)

    add_maintenance_atp(model)

    print('Removing blocked reactions and dead-end metabolites...')
    simplify(model)
    print(f'Final model size: {len(model.metabolites)} x {len(model.reactions)}')

    save_cbmodel(model, outputfile)
Ejemplo n.º 5
0
def maincall(inputfile,
             media,
             mediadb=None,
             universe=None,
             universe_file=None,
             outputfile=None,
             flavor=None,
             spent=None,
             verbose=False):

    if verbose:
        print('Loading model...')

    try:
        model = load_cbmodel(inputfile, flavor=flavor)
    except IOError:
        raise IOError('Failed to load model:' + inputfile)

    if spent:
        if verbose:
            print('Loading model for spent medium species...')

        try:
            spent_model = load_cbmodel(spent, flavor=flavor)
        except IOError:
            raise IOError('Failed to load model:' + spent)
    else:
        spent_model = None

    if verbose:
        print('Loading reaction universe...')

    if not universe_file:
        if universe:
            universe_file = "{}{}universe_{}.xml".format(
                project_dir, config.get('generated', 'folder'), universe)
        else:
            universe_file = project_dir + config.get('generated',
                                                     'default_universe')

    try:
        universe_model = load_cbmodel(universe_file, flavor='cobra')
    except IOError:
        if universe:
            raise IOError(
                'Failed to load universe "{0}". Please run build_universe.py --{0}.'
                .format(universe))
        else:
            raise IOError('Failed to load universe model:' + universe_file)

    if verbose:
        print('Loading media...')

    if not mediadb:
        mediadb = project_dir + config.get('input', 'media_library')

    try:
        media_db = load_media_db(mediadb)
    except IOError:
        raise IOError('Failed to load media database:' + mediadb)

    if verbose:
        m1, n1 = len(model.metabolites), len(model.reactions)
        print('Gap filling for {}...'.format(', '.join(media)))

    max_uptake = config.getint('gapfill', 'max_uptake')
    multiGapFill(model,
                 universe_model,
                 media,
                 media_db,
                 max_uptake=max_uptake,
                 inplace=True,
                 spent_model=spent_model)

    if verbose:
        m2, n2 = len(model.metabolites), len(model.reactions)
        print('Added {} reactions and {} metabolites'.format((n2 - n1),
                                                             (m2 - m1)))

    if verbose:
        print('Saving SBML file...')

    if not outputfile:
        outputfile = os.path.splitext(inputfile)[0] + '_gapfill.xml'

    if not flavor:
        flavor = config.get('sbml', 'default_flavor')

    save_cbmodel(model, outputfile, flavor=flavor)

    if verbose:
        print('Done.')
Ejemplo n.º 6
0
def curate_universe(model,
                    model_specific_data,
                    bigg_models,
                    biomass_eq,
                    taxa=None,
                    thermodynamics_data=None,
                    metabolomics_data=None,
                    thermodynamics_method=None,
                    manually_curated=None,
                    unbalanced_metabolites=None,
                    use_heuristics=True,
                    remove_unbalanced=True,
                    remove_blocked=True,
                    outputfile=None):
    """ Curate universal reaction database from initial database dump.

    Args:
        model (CBModel): universal model
        model_specific_data (pandas.DataFrame): model specific data downloaded from BiGG
        bigg_models (pandas.DataFrame): Additional information on BiGG models
        biomass_eq (str): default biomass equation
        taxa (str): filter by taxa (optional)
        thermodynamics_data (pandas.DataFrame): used for reversibility estimation (optional)
        metabolomics_data (pandas.DataFrame): used for reversibility estimation (optional)
        thermodynamics_method (str): thermodynamics method to use (optional)
        manually_curated (pandas.DataFrame): manually curated reaction bounds (optional)
        unbalanced_metabolites (list): unbalanced metabolites that require sink reactions (optional)
        use_heuristics (bool): apply heuristic rules (no proton pumps, no reversible ATP consumers) (default: True)
        remove_unbalanced (bool): remove unbalanced reactions from model (default: True)
        remove_blocked (bool): remove blocked reactions and dead-end metabolites (default: True)
        outputfile (str): output SBML file (optional)

    Returns:
        CBModel: curated universal model

    Notes:
        Combines thermodynamics and heuristic rules to determine reaction reversibility.
        Adds exchange reactions for all extracellular metabolites.
        Adds sinks reactions for a list of known unbalanced compounds.
        Adds biomass equations from local biomass database (avoids discarding biomass precursors and other
        essencial reactions/metabolites that would otherwise be structurally blocked).
    """

    print('Starting universe curation...')
    print('(initial size: {} x {})\n'.format(len(model.metabolites),
                                             len(model.reactions)))

    trusted_models = bigg_models.query('trusted == True').index.tolist()

    add_bounds_from_extracted_data(model, model_specific_data, trusted_models)

    if taxa:
        print('Filtering by taxa:', taxa)
        kingdom_map = bigg_models['domain'].to_dict()

        if taxa in {'cyanobacteria', 'bacteria'}:
            kingdoms = {'Bacteria'}
        elif taxa == 'archaea':
            kingdoms = {'Archaea', 'Bacteria'}
        else:
            raise ValueError('Unsupported taxa:' + taxa)

        filter_reactions_by_kingdoms(model,
                                     kingdoms,
                                     kingdom_map,
                                     inplace=True)

        if taxa in {'bacteria', 'archaea'}:
            valid_compartments = {'C_c', 'C_p', 'C_e'}
        elif taxa == 'cyanobacteria':
            valid_compartments = {'C_c', 'C_p', 'C_e', 'C_u'}

        other_compartments = set(
            model.compartments.keys()) - valid_compartments
        model.remove_compartments(other_compartments,
                                  delete_metabolites=True,
                                  delete_reactions=True)

        print('(size: {} x {})\n'.format(len(model.metabolites),
                                         len(model.reactions)))

    if thermodynamics_data is not None:
        print('Computing thermodynamics...', end=' ')

        dG0 = thermodynamics_data['dG0'].to_dict()
        sdG0 = thermodynamics_data['sdG0'].to_dict()

        if metabolomics_data is not None:
            x0 = metabolomics_data.median(axis=1).to_dict()
        else:
            x0 = None

        compute_flux_bounds(model,
                            dG0,
                            sdG0,
                            x0,
                            method=thermodynamics_method,
                            inplace=True,
                            override_trusted=False)
        print('done\n')

    print('Applying manual curation rules...', end=' ')

    if use_heuristics:
        reversibility_heuristics(model,
                                 no_reverse_atp=True,
                                 no_proton_pumps=False,
                                 override_trusted=False)

    # manually curated reactions
    if manually_curated is not None:
        for r_id, (lb, ub) in manually_curated.iterrows():
            if r_id in model.reactions:
                model.set_flux_bounds(r_id, lb, ub)

    print('done\n')

    if remove_unbalanced:

        # remove arbitrary 'Z' formula from photons
        if taxa == 'cyanobacteria':
            for m_id in ['M_photon_e', 'M_photon_p', 'M_photon_c']:
                model.metabolites[m_id].metadata['FORMULA'] = ''

        print('Removing unbalanced reactions...')
        remove_unbalanced_reactions(model)
        print('(size: {} x {})\n'.format(len(model.metabolites),
                                         len(model.reactions)))

    print('Creating pseudo-reactions...')

    create_exchange_reactions(model, default_lb=-1000, default_ub=1000)

    if unbalanced_metabolites:
        create_sink_reactions(model, unbalanced_metabolites)

    add_biomass_equation(model, biomass_eq)

    add_maintenance_atp(model)

    print('(size: {} x {})\n'.format(len(model.metabolites),
                                     len(model.reactions)))

    if remove_blocked:
        print('Removing blocked reactions and dead-end metabolites...')
        simplify(model)
        print('(size: {} x {})\n'.format(len(model.metabolites),
                                         len(model.reactions)))

    if outputfile:
        save_cbmodel(model, outputfile)

    print('Done.')
Ejemplo n.º 7
0
def maincall(inputfile, input_type='protein', outputfile=None, diamond_args=None, universe=None, universe_file=None,
         ensemble_size=None, verbose=False, debug=False, flavor=None, gapfill=None, blind_gapfill=False, init=None,
         mediadb=None, default_score=None, uptake_score=None, soft_score=None, soft=None, hard=None, reference=None,
         ref_score=None, recursive_mode=False):

    if recursive_mode:
        model_id = os.path.splitext(os.path.basename(inputfile))[0]

        if outputfile:
            outputfile = f'{outputfile}/{model_id}.xml'
        else:
            outputfile = os.path.splitext(inputfile)[0] + '.xml'

    else:
        if outputfile:
            model_id = os.path.splitext(os.path.basename(outputfile))[0]
        else:
            model_id = os.path.splitext(os.path.basename(inputfile))[0]
            outputfile = os.path.splitext(inputfile)[0] + '.xml'

    model_id = build_model_id(model_id)

    outputfolder = os.path.abspath(os.path.dirname(outputfile))

    if not os.path.exists(outputfolder):
        try:
            os.makedirs(outputfolder)
        except:
            print('Unable to create output folder:', outputfolder)
            return

    if soft:
        try:
            soft_constraints = load_soft_constraints(soft)
        except IOError:
            raise IOError('Failed to load soft-constraints file:' + soft)
    else:
        soft_constraints = None

    if hard:
        try:
            hard_constraints = load_hard_constraints(hard)
        except IOError:
            raise IOError('Failed to load hard-constraints file:' + hard)
    else:
        hard_constraints = None

    if input_type == 'refseq':

        if verbose:
            print(f'Downloading genome {inputfile} from NCBI...')

        ncbi_table = load_ncbi_table(project_dir + config.get('input', 'refseq'))
        inputfile = download_ncbi_genome(inputfile, ncbi_table)

        if not inputfile:
            print('Failed to download genome from NCBI.')
            return

        input_type = 'protein' if inputfile.endswith('.faa.gz') else 'dna'

    if input_type == 'protein' or input_type == 'dna':
        if verbose:
            print('Running diamond...')
        diamond_db = project_dir + config.get('generated', 'diamond_db')
        blast_output = os.path.splitext(inputfile)[0] + '.tsv'
        exit_code = run_blast(inputfile, input_type, blast_output, diamond_db, diamond_args, verbose)

        if exit_code is None:
            print('Unable to run diamond (make sure diamond is available in your PATH).')
            return

        if exit_code != 0:
            print('Failed to run diamond.')
            if diamond_args is not None:
                print('Incorrect diamond args? Please check documentation or use default args.')
            return

        annotations = load_diamond_results(blast_output)
    elif input_type == 'eggnog':
        annotations = load_eggnog_data(inputfile)
    elif input_type == 'diamond':
        annotations = load_diamond_results(inputfile)
    else:
        raise ValueError('Invalid input type: ' + input_type)

    if verbose:
        print('Loading universe model...')

    if not universe_file:
        if universe:
            universe_file = f"{project_dir}{config.get('generated', 'folder')}universe_{universe}.xml.gz"
        else:
            universe_file = project_dir + config.get('generated', 'default_universe')

    try:
        universe_model = load_cbmodel(universe_file, flavor='bigg')
        universe_model.id = model_id
    except IOError:
        available = '\n'.join(glob(f"{project_dir}{config.get('generated', 'folder')}universe_*.xml.gz"))
        raise IOError(f'Failed to load universe model: {universe_file}\nAvailable universe files:\n{available}')

    if reference:
        if verbose:
            print('Loading reference model...')

        try:
            ref_model = load_cbmodel(reference)
        except:
            raise IOError('Failed to load reference model.')
    else:
        ref_model = None

    if gapfill or init:

        if verbose:
            print('Loading media library...')

        if not mediadb:
            mediadb = project_dir + config.get('input', 'media_library')

        try:
            media_db = load_media_db(mediadb)
        except IOError:
            raise IOError('Failed to load media library:' + mediadb)

    if verbose:
        print('Scoring reactions...')

    gene_annotations = pd.read_csv(project_dir + config.get('generated', 'gene_annotations'), sep='\t')
    bigg_gprs = project_dir + config.get('generated', 'bigg_gprs')
    gprs = pd.read_csv(bigg_gprs)
    gprs = gprs[gprs.reaction.isin(universe_model.reactions)]

    debug_output = model_id if debug else None
    scores, gene2gene = reaction_scoring(annotations, gprs, debug_output=debug_output)

    if scores is None:
        print('The input genome did not match sufficient genes/reactions in the database.')
        return

    if not flavor:
        flavor = config.get('sbml', 'default_flavor')

    init_env = None

    if init:
        if init in media_db:
            init_env = Environment.from_compounds(media_db[init])
        else:
            print(f'Error: medium {init} not in media database.')

    universe_model.metadata['Description'] = 'This model was built with CarveMe version ' + version

    if ensemble_size is None or ensemble_size <= 1:
        if verbose:
            print('Reconstructing a single model')

        model = carve_model(universe_model, scores, inplace=(not gapfill), default_score=default_score,
                            uptake_score=uptake_score, soft_score=soft_score, soft_constraints=soft_constraints,
                            hard_constraints=hard_constraints, ref_model=ref_model, ref_score=ref_score,
                            init_env=init_env, debug_output=debug_output)
        annotate_genes(model, gene2gene, gene_annotations)

    else:
        if verbose:
            print('Building an ensemble of', ensemble_size, 'models')

        ensemble = build_ensemble(universe_model, scores, ensemble_size, init_env=init_env)

        annotate_genes(ensemble, gene2gene, gene_annotations)
        save_ensemble(ensemble, outputfile, flavor=flavor)

    if model is None:
        print("Failed to build model.")
        return

    if not gapfill:
        save_cbmodel(model, outputfile, flavor=flavor)

    else:
        media = gapfill.split(',')

        if verbose:
            m1, n1 = len(model.metabolites), len(model.reactions)
            print(f"Gap filling for {', '.join(media)}...")

        max_uptake = config.getint('gapfill', 'max_uptake')

        if blind_gapfill:
            scores = None
        else:
            scores = dict(scores[['reaction', 'normalized_score']].values)
        multiGapFill(model, universe_model, media, media_db, scores=scores, max_uptake=max_uptake, inplace=True)

        if verbose:
            m2, n2 = len(model.metabolites), len(model.reactions)
            print(f'Added {(n2 - n1)} reactions and {(m2 - m1)} metabolites')

        if init_env:  # Initializes environment again as new exchange reactions can be acquired during gap-filling
            init_env.apply(model, inplace=True, warning=False)

        save_cbmodel(model, outputfile, flavor=flavor)

    if verbose:
        print('Done.')