Esempio n. 1
0
def maincall(inputfiles,
             flavor=None,
             init=None,
             mediadb=None,
             outputfile=None):

    if not flavor:
        flavor = config.get('sbml', 'default_flavor')

    if outputfile:
        model_id = os.path.splitext(os.path.basename(outputfile))[0]
    else:
        model_id = 'community'
        outputfile = 'community.xml'

    models = [
        load_cbmodel(inputfile, flavor=flavor) for inputfile in inputfiles
    ]
    community = Community(model_id, models)
    model = community.merged_model

    if init:
        if not mediadb:
            mediadb = project_dir + config.get('input', 'media_library')

        try:
            media_db = load_media_db(mediadb)
        except IOError:
            raise IOError('Failed to load media library:' + mediadb)

        init_env = Environment.from_compounds(media_db[init])
        init_env.apply(model, inplace=True)

    save_cbmodel(model, outputfile, flavor=flavor)
Esempio n. 2
0
def first_run_check():
    diamond_db = project_dir + config.get('generated', 'diamond_db')
    if not os.path.exists(diamond_db):
        print("Running diamond for the first time, please wait while we build the internal database...")
        fasta_file = project_dir + config.get('generated', 'fasta_file')
        cmd = ['diamond', 'makedb', '--in', fasta_file, '-d', diamond_db[:-5]]
        try:
            exit_code = subprocess.call(cmd)
        except OSError:
            print('Unable to run diamond (make sure diamond is available in your PATH).')
        else:
            if exit_code != 0:
                print('Failed to run diamond (wrong arguments).')
Esempio n. 3
0
def curate(inputfile=None, outputfile=None, taxa=None, biomass=None, biomass_db_path=None, normalize_biomass=False):

    if inputfile:
        universe_draft = inputfile
        model_specific_data = os.path.splitext(inputfile)[0] + '.csv'
    else:
        universe_draft = project_dir + config.get('generated', 'bigg_universe')
        model_specific_data = project_dir + config.get('generated', 'model_specific_data')

    if not biomass:
        biomass = 'gramneg' if taxa == 'cyanobacteria' else taxa

    if not outputfile:
        folder = project_dir + config.get('generated', 'folder')
        outputfile = folder + f"universe_{taxa}.xml.gz"

    bigg_models = project_dir + config.get('input', 'bigg_models')
    bigg_models = pd.read_csv(bigg_models, sep='\t')

    manual_curation = project_dir + config.get('input', 'manually_curated')
    manually_curated = pd.read_csv(manual_curation, index_col=0, sep='\t')

    unbalanced = project_dir + config.get('input', 'unbalanced_metabolites')
    unbalanced = pd.read_csv(unbalanced, header=None)
    unbalanced = unbalanced[0].tolist()

    try:
        model = load_cbmodel(universe_draft, flavor=config.get('sbml', 'default_flavor'))
        model_specific_data = pd.read_csv(model_specific_data)
    except IOError:
        raise IOError('Universe draft model not found. Please run --build first to download BiGG data.')

    if biomass_db_path is None:
        biomass_db_path = project_dir + config.get('input', 'biomass_library')

    biomass_db = load_biomass_db(biomass_db_path, normalize_weight=normalize_biomass, model=model)

    if biomass not in biomass_db:
        valid_ids = ','.join(biomass_db.keys())
        raise RuntimeError('Biomass identifier not in database. Currently in database: ' + valid_ids)

    biomass_eq = biomass_db[biomass]

    curate_universe(model,
                    outputfile=outputfile,
                    taxa=taxa,
                    biomass_eq=biomass_eq,
                    model_specific_data=model_specific_data,
                    bigg_models=bigg_models,
                    manually_curated=manually_curated,
                    unbalanced_metabolites=unbalanced,
                    )
Esempio n. 4
0
def main():
    parser = argparse.ArgumentParser(
        description="Generate universal model to use with CarveMe")
    parser.add_argument('-o', '--output', dest='output', help="Output file")

    args = parser.parse_args()

    if args.output:
        universe_draft = args.output
        model_specific_data = os.path.splitext(args.output)[0] + '.csv'
        bigg_gprs = os.path.splitext(args.output)[0] + '_gprs.csv'
        fasta_file = os.path.splitext(args.output)[0] + '.faa'
        gene_annotations = os.path.splitext(args.output)[0] + '.tsv'
    else:
        universe_draft = project_dir + config.get('generated', 'bigg_universe')
        model_specific_data = project_dir + config.get('generated',
                                                       'model_specific_data')
        bigg_gprs = project_dir + config.get('generated', 'bigg_gprs')
        fasta_file = project_dir + config.get('generated', 'fasta_file')
        gene_annotations = project_dir + config.get('generated',
                                                    'gene_annotations')

    cpd_annotation = project_dir + config.get('input', 'mnx_compounds')
    download_universal_model(universe_draft, cpd_annotation)
    download_model_specific_data(model_specific_data, bigg_gprs, fasta_file,
                                 gene_annotations)
Esempio n. 5
0
def main(inputfiles, flavor=None, split_pool=False, no_biomass=False, init=None, mediadb=None, ext_comp_id=None, outputfile=None):

    if not flavor:
        flavor = config.get('sbml', 'default_flavor')

    if outputfile:
        model_id = os.path.splitext(os.path.basename(outputfile))[0]
    else:
        model_id = 'community'
        outputfile = 'community.xml'

    if ext_comp_id is None:
        ext_comp_id = 'C_e'

    models = [load_cbmodel(inputfile, flavor=flavor) for inputfile in inputfiles]

    community = Community(model_id, models,
                          extracellular_compartment_id=ext_comp_id,
                          merge_extracellular_compartments=(not split_pool),
                          create_biomass=(not no_biomass))

    merged = community.generate_merged_model()

    if init:
        if not mediadb:
            mediadb = project_dir + config.get('input', 'media_library')

        try:
            media_db = load_media_db(mediadb)
        except IOError:
            raise IOError('Failed to load media library:' + mediadb)

        if split_pool:
            exchange_format = "'R_EX_M_{}_e_pool'"
        else:
            exchange_format = "'R_EX_{}_e'"
        init_env = Environment.from_compounds(media_db[init], exchange_format=exchange_format)
        init_env.apply(merged, inplace=True)

    save_cbmodel(merged, outputfile, flavor=flavor)
Esempio n. 6
0
def main():
    parser = argparse.ArgumentParser(
        description=
        "Merge single species models into a microbial community model")

    parser.add_argument('input',
                        metavar='INPUTFILES',
                        nargs='+',
                        help="SBML input files (single species)")

    parser.add_argument('-o',
                        '--output',
                        dest='output',
                        help="SBML output file (community)")

    parser.add_argument('-i',
                        '--init',
                        dest='init',
                        help="Initialize model with given medium")

    parser.add_argument('--mediadb', help="Media database file")

    sbml = parser.add_mutually_exclusive_group()
    sbml.add_argument('--cobra',
                      action='store_true',
                      help="SBML input/output in old cobra format")
    sbml.add_argument('--fbc2',
                      action='store_true',
                      help="SBML input/output in sbml-fbc2 format")

    args = parser.parse_args()

    if len(args.input) < 2:
        print(args.input)
        parser.error(
            "Please provide two or more single species models as input files.")

    if args.fbc2:
        flavor = 'fbc2'
    elif args.cobra:
        flavor = 'cobra'
    else:
        flavor = config.get('sbml', 'default_flavor')

    maincall(inputfiles=args.input,
             flavor=flavor,
             init=args.init,
             mediadb=args.mediadb,
             outputfile=args.output)
Esempio n. 7
0
def maincall(mode, noheuristics=False, nothermo=False, allow_unbalanced=False, allow_blocked=False,
         biomass=None, biomass_db_path=None, normalize_biomass=False, taxa=None, outputfile=None):

    if mode == 'draft':

        if outputfile:
            universe_draft = outputfile
            model_specific_data = os.path.splitext(outputfile)[0] + '.csv'
            bigg_gprs = os.path.splitext(outputfile)[0] + '_gprs.csv'
#            fasta_file = os.path.splitext(outputfile)[0] + '.faa'
        else:
            universe_draft = project_dir + config.get('generated', 'universe_draft')
            model_specific_data = project_dir + config.get('generated', 'model_specific_data')
            bigg_gprs = project_dir + config.get('generated', 'bigg_gprs')
#            fasta_file = project_dir + config.get('input', 'fasta_file')

        build_bigg_universe_model(universe_draft)
        data = download_model_specific_data(model_specific_data)
        gprs = create_gpr_table(data, outputfile=bigg_gprs)
#        download_gene_sequences(gprs, outputfile=fasta_file)

    elif mode == 'thermo':
        universe_draft = project_dir + config.get('generated', 'universe_draft')
        equilibrator_compounds = project_dir + config.get('input', 'equilibrator_compounds')

        if outputfile:
            bigg_gibbs = outputfile
        else:
            bigg_gibbs = project_dir + config.get('generated', 'bigg_gibbs')

        compute_bigg_gibbs_energy(universe_draft, equilibrator_compounds, bigg_gibbs)

    elif mode == 'curated':

        universe_draft = project_dir + config.get('generated', 'universe_draft')
        model_specific_data = project_dir + config.get('generated', 'model_specific_data')

        if not biomass:
            if taxa == 'archaea':
                biomass = 'archaea'
            else:
                biomass = config.get('universe', 'default_biomass')

        if outputfile:
            universe_final = outputfile
        else:
            tag = taxa if taxa != 'bacteria' else biomass
            universe_final = "{}{}universe_{}.xml.gz".format(project_dir, config.get('generated', 'folder'), tag)

        bigg_models = project_dir + config.get('input', 'bigg_models')
        bigg_models = pd.read_csv(bigg_models, index_col=0)

        manual_curation = project_dir + config.get('input', 'manually_curated')
        manually_curated = pd.read_csv(manual_curation, index_col=0)

        unbalanced = project_dir + config.get('input', 'unbalanced_metabolites')
        unbalanced = pd.read_csv(unbalanced, header=None)
        unbalanced = unbalanced[0].tolist()

        try:
            model = load_cbmodel(universe_draft, flavor=config.get('sbml', 'default_flavor'))
            model_specific_data = pd.read_csv(model_specific_data)
        except IOError:
            raise IOError('Universe draft not found. Please run --draft first to download BiGG data.')

        if biomass_db_path is None:
            biomass_db_path = project_dir + config.get('input', 'biomass_library')

        biomass_db = load_biomass_db(biomass_db_path, normalize_weight=normalize_biomass, model=model)

        if biomass not in biomass_db:
            valid_ids = ','.join(biomass_db.keys())
            raise RuntimeError('Biomass identifier not in database. Currently in database: ' + valid_ids)

        biomass_eq = biomass_db[biomass]

        if nothermo:
            thermodynamics_data = None
            metabolomics_data = None
        else:
            try:
                bigg_gibbs = project_dir + config.get('generated', 'bigg_gibbs')
                thermodynamics_data = pd.read_csv(bigg_gibbs, index_col=0)
            except IOError:
                raise IOError('Thermodynamic data not found. Please run --thermo first to generate thermodynamic data.')

            metabolomics = project_dir + config.get('input', 'metabolomics')
            metabolomics_data = pd.read_csv(metabolomics, index_col=1)

        curate_universe(model,
                        taxa=taxa,
                        outputfile=universe_final,
                        model_specific_data=model_specific_data,
                        bigg_models=bigg_models,
                        thermodynamics_data=thermodynamics_data,
                        metabolomics_data=metabolomics_data,
                        manually_curated=manually_curated,
                        unbalanced_metabolites=unbalanced,
                        biomass_eq=biomass_eq,
                        use_heuristics=(not noheuristics),
                        remove_unbalanced=(not allow_unbalanced),
                        remove_blocked=(not allow_blocked))

    else:
        print('Unrecognized option:', mode)
Esempio n. 8
0
def maincall(inputfile,
             media,
             mediadb=None,
             universe=None,
             universe_file=None,
             outputfile=None,
             flavor=None,
             spent=None,
             verbose=False):

    if verbose:
        print('Loading model...')

    try:
        model = load_cbmodel(inputfile, flavor=flavor)
    except IOError:
        raise IOError('Failed to load model:' + inputfile)

    if spent:
        if verbose:
            print('Loading model for spent medium species...')

        try:
            spent_model = load_cbmodel(spent, flavor=flavor)
        except IOError:
            raise IOError('Failed to load model:' + spent)
    else:
        spent_model = None

    if verbose:
        print('Loading reaction universe...')

    if not universe_file:
        if universe:
            universe_file = "{}{}universe_{}.xml".format(
                project_dir, config.get('generated', 'folder'), universe)
        else:
            universe_file = project_dir + config.get('generated',
                                                     'default_universe')

    try:
        universe_model = load_cbmodel(universe_file, flavor='cobra')
    except IOError:
        if universe:
            raise IOError(
                'Failed to load universe "{0}". Please run build_universe.py --{0}.'
                .format(universe))
        else:
            raise IOError('Failed to load universe model:' + universe_file)

    if verbose:
        print('Loading media...')

    if not mediadb:
        mediadb = project_dir + config.get('input', 'media_library')

    try:
        media_db = load_media_db(mediadb)
    except IOError:
        raise IOError('Failed to load media database:' + mediadb)

    if verbose:
        m1, n1 = len(model.metabolites), len(model.reactions)
        print('Gap filling for {}...'.format(', '.join(media)))

    max_uptake = config.getint('gapfill', 'max_uptake')
    multiGapFill(model,
                 universe_model,
                 media,
                 media_db,
                 max_uptake=max_uptake,
                 inplace=True,
                 spent_model=spent_model)

    if verbose:
        m2, n2 = len(model.metabolites), len(model.reactions)
        print('Added {} reactions and {} metabolites'.format((n2 - n1),
                                                             (m2 - m1)))

    if verbose:
        print('Saving SBML file...')

    if not outputfile:
        outputfile = os.path.splitext(inputfile)[0] + '_gapfill.xml'

    if not flavor:
        flavor = config.get('sbml', 'default_flavor')

    save_cbmodel(model, outputfile, flavor=flavor)

    if verbose:
        print('Done.')
Esempio n. 9
0
def main():
    parser = argparse.ArgumentParser(
        description="GapFill a metabolic model for a given set of media")

    parser.add_argument('input', metavar='INPUTFILE', help="SBML input file")

    parser.add_argument('-m',
                        '--media',
                        dest='media',
                        required=True,
                        help="List of media (comma-separated)")
    parser.add_argument('--mediadb', help="Media database file")

    parser.add_argument(
        '--spent-medium',
        metavar='SPECIES',
        dest='spent',
        help=
        "Add spent medium compounds generated from given species (SBML model)."
    )

    univ = parser.add_mutually_exclusive_group()
    univ.add_argument('-u',
                      '--universe',
                      dest='universe',
                      help="Pre-built universe model (default: bacteria)")
    univ.add_argument('--universe-file',
                      dest='universe_file',
                      help="Reaction universe file (SBML format)")

    parser.add_argument('-o',
                        '--output',
                        dest='output',
                        type=str,
                        help="SBML output file")

    parser.add_argument('-v',
                        '--verbose',
                        action='store_true',
                        dest='verbose',
                        help="Switch to verbose mode")

    sbml = parser.add_mutually_exclusive_group()
    sbml.add_argument('--cobra',
                      action='store_true',
                      help="Input SBML in old cobra format")
    sbml.add_argument('--fbc2',
                      action='store_true',
                      help="Input SBML in sbml-fbc2 format")

    args = parser.parse_args()

    if args.fbc2:
        flavor = 'fbc2'
    elif args.cobra:
        flavor = 'cobra'
    else:
        flavor = config.get('sbml', 'default_flavor')

    maincall(inputfile=args.input,
             media=args.media.split(','),
             mediadb=args.mediadb,
             universe=args.universe,
             universe_file=args.universe_file,
             outputfile=args.output,
             flavor=flavor,
             spent=args.spent,
             verbose=args.verbose)
Esempio n. 10
0
def maincall(inputfile, input_type='protein', outputfile=None, diamond_args=None, universe=None, universe_file=None,
         ensemble_size=None, verbose=False, debug=False, flavor=None, gapfill=None, blind_gapfill=False, init=None,
         mediadb=None, default_score=None, uptake_score=None, soft_score=None, soft=None, hard=None, reference=None,
         ref_score=None, recursive_mode=False):

    if recursive_mode:
        model_id = os.path.splitext(os.path.basename(inputfile))[0]

        if outputfile:
            outputfile = f'{outputfile}/{model_id}.xml'
        else:
            outputfile = os.path.splitext(inputfile)[0] + '.xml'

    else:
        if outputfile:
            model_id = os.path.splitext(os.path.basename(outputfile))[0]
        else:
            model_id = os.path.splitext(os.path.basename(inputfile))[0]
            outputfile = os.path.splitext(inputfile)[0] + '.xml'

    model_id = build_model_id(model_id)

    outputfolder = os.path.abspath(os.path.dirname(outputfile))

    if not os.path.exists(outputfolder):
        try:
            os.makedirs(outputfolder)
        except:
            print('Unable to create output folder:', outputfolder)
            return

    if soft:
        try:
            soft_constraints = load_soft_constraints(soft)
        except IOError:
            raise IOError('Failed to load soft-constraints file:' + soft)
    else:
        soft_constraints = None

    if hard:
        try:
            hard_constraints = load_hard_constraints(hard)
        except IOError:
            raise IOError('Failed to load hard-constraints file:' + hard)
    else:
        hard_constraints = None

    if input_type == 'refseq':

        if verbose:
            print(f'Downloading genome {inputfile} from NCBI...')

        ncbi_table = load_ncbi_table(project_dir + config.get('input', 'refseq'))
        inputfile = download_ncbi_genome(inputfile, ncbi_table)

        if not inputfile:
            print('Failed to download genome from NCBI.')
            return

        input_type = 'protein' if inputfile.endswith('.faa.gz') else 'dna'

    if input_type == 'protein' or input_type == 'dna':
        if verbose:
            print('Running diamond...')
        diamond_db = project_dir + config.get('generated', 'diamond_db')
        blast_output = os.path.splitext(inputfile)[0] + '.tsv'
        exit_code = run_blast(inputfile, input_type, blast_output, diamond_db, diamond_args, verbose)

        if exit_code is None:
            print('Unable to run diamond (make sure diamond is available in your PATH).')
            return

        if exit_code != 0:
            print('Failed to run diamond.')
            if diamond_args is not None:
                print('Incorrect diamond args? Please check documentation or use default args.')
            return

        annotations = load_diamond_results(blast_output)
    elif input_type == 'eggnog':
        annotations = load_eggnog_data(inputfile)
    elif input_type == 'diamond':
        annotations = load_diamond_results(inputfile)
    else:
        raise ValueError('Invalid input type: ' + input_type)

    if verbose:
        print('Loading universe model...')

    if not universe_file:
        if universe:
            universe_file = f"{project_dir}{config.get('generated', 'folder')}universe_{universe}.xml.gz"
        else:
            universe_file = project_dir + config.get('generated', 'default_universe')

    try:
        universe_model = load_cbmodel(universe_file, flavor='bigg')
        universe_model.id = model_id
    except IOError:
        available = '\n'.join(glob(f"{project_dir}{config.get('generated', 'folder')}universe_*.xml.gz"))
        raise IOError(f'Failed to load universe model: {universe_file}\nAvailable universe files:\n{available}')

    if reference:
        if verbose:
            print('Loading reference model...')

        try:
            ref_model = load_cbmodel(reference)
        except:
            raise IOError('Failed to load reference model.')
    else:
        ref_model = None

    if gapfill or init:

        if verbose:
            print('Loading media library...')

        if not mediadb:
            mediadb = project_dir + config.get('input', 'media_library')

        try:
            media_db = load_media_db(mediadb)
        except IOError:
            raise IOError('Failed to load media library:' + mediadb)

    if verbose:
        print('Scoring reactions...')

    gene_annotations = pd.read_csv(project_dir + config.get('generated', 'gene_annotations'), sep='\t')
    bigg_gprs = project_dir + config.get('generated', 'bigg_gprs')
    gprs = pd.read_csv(bigg_gprs)
    gprs = gprs[gprs.reaction.isin(universe_model.reactions)]

    debug_output = model_id if debug else None
    scores, gene2gene = reaction_scoring(annotations, gprs, debug_output=debug_output)

    if scores is None:
        print('The input genome did not match sufficient genes/reactions in the database.')
        return

    if not flavor:
        flavor = config.get('sbml', 'default_flavor')

    init_env = None

    if init:
        if init in media_db:
            init_env = Environment.from_compounds(media_db[init])
        else:
            print(f'Error: medium {init} not in media database.')

    universe_model.metadata['Description'] = 'This model was built with CarveMe version ' + version

    if ensemble_size is None or ensemble_size <= 1:
        if verbose:
            print('Reconstructing a single model')

        model = carve_model(universe_model, scores, inplace=(not gapfill), default_score=default_score,
                            uptake_score=uptake_score, soft_score=soft_score, soft_constraints=soft_constraints,
                            hard_constraints=hard_constraints, ref_model=ref_model, ref_score=ref_score,
                            init_env=init_env, debug_output=debug_output)
        annotate_genes(model, gene2gene, gene_annotations)

    else:
        if verbose:
            print('Building an ensemble of', ensemble_size, 'models')

        ensemble = build_ensemble(universe_model, scores, ensemble_size, init_env=init_env)

        annotate_genes(ensemble, gene2gene, gene_annotations)
        save_ensemble(ensemble, outputfile, flavor=flavor)

    if model is None:
        print("Failed to build model.")
        return

    if not gapfill:
        save_cbmodel(model, outputfile, flavor=flavor)

    else:
        media = gapfill.split(',')

        if verbose:
            m1, n1 = len(model.metabolites), len(model.reactions)
            print(f"Gap filling for {', '.join(media)}...")

        max_uptake = config.getint('gapfill', 'max_uptake')

        if blind_gapfill:
            scores = None
        else:
            scores = dict(scores[['reaction', 'normalized_score']].values)
        multiGapFill(model, universe_model, media, media_db, scores=scores, max_uptake=max_uptake, inplace=True)

        if verbose:
            m2, n2 = len(model.metabolites), len(model.reactions)
            print(f'Added {(n2 - n1)} reactions and {(m2 - m1)} metabolites')

        if init_env:  # Initializes environment again as new exchange reactions can be acquired during gap-filling
            init_env.apply(model, inplace=True, warning=False)

        save_cbmodel(model, outputfile, flavor=flavor)

    if verbose:
        print('Done.')
Esempio n. 11
0
def main():

    parser = argparse.ArgumentParser(description="Reconstruct a metabolic model using CarveMe",
                                     formatter_class=argparse.RawTextHelpFormatter)

    parser.add_argument('input', metavar='INPUT', nargs='+',
                        help="Input (protein fasta file by default, see other options for details).\n" +
                             "When used with -r an input pattern with wildcards can also be used.\n" +
                             "When used with --refseq an NCBI RefSeq assembly accession is expected."
                        )

    input_type_args = parser.add_mutually_exclusive_group()
    input_type_args.add_argument('--dna', action='store_true', help="Build from DNA fasta file")
    input_type_args.add_argument('--egg', action='store_true', help="Build from eggNOG-mapper output file")
    input_type_args.add_argument('--diamond', action='store_true', help=argparse.SUPPRESS)
    input_type_args.add_argument('--refseq', action='store_true', help="Download genome from NCBI RefSeq and build")

    parser.add_argument('--diamond-args', help="Additional arguments for running diamond")

    parser.add_argument('-r', '--recursive', action='store_true', dest='recursive',
                        help="Bulk reconstruction from folder with genome files")

    parser.add_argument('-o', '--output', dest='output', help="SBML output file (or output folder if -r is used)")

    univ = parser.add_mutually_exclusive_group()
    univ.add_argument('-u', '--universe', dest='universe', help="Pre-built universe model (default: bacteria)")
    univ.add_argument('--universe-file', dest='universe_file', help="Reaction universe file (SBML format)")

    sbml = parser.add_mutually_exclusive_group()
    sbml.add_argument('--cobra', action='store_true', help="Output SBML in old cobra format")
    sbml.add_argument('--fbc2', action='store_true', help="Output SBML in sbml-fbc2 format")

    parser.add_argument('-n', '--ensemble', type=int, dest='ensemble',
                        help="Build model ensemble with N models")

    parser.add_argument('-g', '--gapfill', dest='gapfill',
                        help="Gap fill model for given media")

    parser.add_argument('-i', '--init', dest='init',
                        help="Initialize model with given medium")

    parser.add_argument('--mediadb', help="Media database file")

    parser.add_argument('-v', '--verbose', action='store_true', dest='verbose', help="Switch to verbose mode")
    parser.add_argument('-d', '--debug', action='store_true', dest='debug',
                        help="Debug mode: writes intermediate results into output files")

    parser.add_argument('--soft', help="Soft constraints file")
    parser.add_argument('--hard', help="Hard constraints file")

    parser.add_argument('--reference', help="Manually curated model of a close reference species.")

    parser.add_argument('--default-score', type=float, default=-1.0, help=argparse.SUPPRESS)
    parser.add_argument('--uptake-score', type=float, default=0.0, help=argparse.SUPPRESS)
    parser.add_argument('--soft-score', type=float, default=1.0, help=argparse.SUPPRESS)
    parser.add_argument('--reference-score', type=float, default=0.0, help=argparse.SUPPRESS)

    parser.add_argument('--blind-gapfill', action='store_true', help=argparse.SUPPRESS)

    args = parser.parse_args()

    if args.gapfill and args.ensemble:
        parser.error('Gap fill and ensemble generation cannot currently be combined (not implemented yet).')

    if (args.soft or args.hard) and args.ensemble:
        parser.error('Soft/hard constraints and ensemble generation cannot currently be combined (not implemented yet).')

    if args.mediadb and not args.gapfill:
        parser.error('--mediadb can only be used with --gapfill')

    if args.recursive and args.refseq:
        parser.error('-r cannot be combined with --refseq')

    if args.egg:
        input_type = 'eggnog'
    elif args.dna:
        input_type = 'dna'
    elif args.diamond:
        input_type = 'diamond'
    elif args.refseq:
        input_type = 'refseq'
    else:
        input_type = 'protein'

    if args.fbc2:
        flavor = 'fbc2'
    elif args.cobra:
        flavor = 'cobra'
    else:
        flavor = config.get('sbml', 'default_flavor')

    first_run_check()

    if not args.recursive:
        if len(args.input) > 1:
            parser.error('Use -r when specifying more than one input file')

        maincall(
            inputfile=args.input[0],
            input_type=input_type,
            outputfile=args.output,
            diamond_args=args.diamond_args,
            universe=args.universe,
            universe_file=args.universe_file,
            ensemble_size=args.ensemble,
            verbose=args.verbose,
            debug=args.debug,
            flavor=flavor,
            gapfill=args.gapfill,
            blind_gapfill=False,
            init=args.init,
            mediadb=args.mediadb,
            default_score=args.default_score,
            uptake_score=args.uptake_score,
            soft_score=args.soft_score,
            soft=args.soft,
            hard=args.hard,
            reference=args.reference,
            ref_score=args.reference_score
        )

    else:

        def f(x):
            maincall(
                inputfile=x,
                input_type=input_type,
                outputfile=args.output,
                diamond_args=args.diamond_args,
                universe=args.universe,
                universe_file=args.universe_file,
                ensemble_size=args.ensemble,
                verbose=args.verbose,
                flavor=flavor,
                gapfill=args.gapfill,
                blind_gapfill=False,
                init=args.init,
                mediadb=args.mediadb,
                default_score=args.default_score,
                uptake_score=args.uptake_score,
                soft_score=args.soft_score,
                soft=args.soft,
                hard=args.hard,
                reference=args.reference,
                ref_score=args.reference_score,
                recursive_mode=True
            )

        p = Pool()
        p.map(f, args.input)
Esempio n. 12
0
def main(inputfile, input_type='protein', outputfile=None, diamond_args=None, universe=None, universe_file=None,
         ensemble_size=None, verbose=False, debug=False, flavor=None, gapfill=None, blind_gapfill=False, init=None,
         mediadb=None, default_score=None, uptake_score=None, soft_score=None, soft=None, hard=None, reference=None,
         ref_score=None, recursive_mode=False, specified_solver=None, feas_tol=None, opt_tol=None, int_feas_tol=None):

    if recursive_mode:
        model_id = os.path.splitext(os.path.basename(inputfile))[0]

        if outputfile:
            outputfile = '{}/{}.xml'.format(outputfile, model_id)
        else:
            outputfile = os.path.splitext(inputfile)[0] + '.xml'

    else:
        if outputfile:
            model_id = os.path.splitext(os.path.basename(outputfile))[0]
        else:
            model_id = os.path.splitext(os.path.basename(inputfile))[0]
            outputfile = os.path.splitext(inputfile)[0] + '.xml'

    model_id = build_model_id(model_id)

    outputfolder = os.path.abspath(os.path.dirname(outputfile))

    if not os.path.exists(outputfolder):
        try:
            os.makedirs(outputfolder)
        except:
            print('Unable to create output folder:', outputfolder)
            return

    if soft:
        try:
            soft_constraints = load_soft_constraints(soft)
        except IOError:
            raise IOError('Failed to load soft-constraints file:' + soft)
    else:
        soft_constraints = None

    if hard:
        try:
            hard_constraints = load_hard_constraints(hard)
        except IOError:
            raise IOError('Failed to load hard-constraints file:' + hard)
    else:
        hard_constraints = None

    if input_type == 'refseq' or input_type == 'genbank':

        if verbose:
            print('Downloading genome {} from NCBI...'.format(inputfile))

        ncbi_table = load_ncbi_table(project_dir + config.get('ncbi', input_type))
        inputfile = download_ncbi_genome(inputfile, ncbi_table)

        if not inputfile:
            print('Failed to download genome from NCBI.')
            return

        input_type = 'protein' if inputfile.endswith('.faa.gz') else 'dna'

    if input_type == 'protein' or input_type == 'dna':
        if verbose:
            print('Running diamond...')
        diamond_db = project_dir + config.get('input', 'diamond_db')
        blast_output = os.path.splitext(inputfile)[0] + '.tsv'
        exit_code = run_blast(inputfile, input_type, blast_output, diamond_db, diamond_args, verbose)

        if exit_code is None:
            print('Unable to run diamond (make sure diamond is available in your PATH).')
            return

        if exit_code != 0:
            print('Failed to run diamond.')
            if diamond_args is not None:
                print('Incorrect diamond args? Please check documentation or use default args.')
            return

        annotations = load_diamond_results(blast_output)
    elif input_type == 'eggnog':
        annotations = load_eggnog_data(inputfile)
    elif input_type == 'diamond':
        annotations = load_diamond_results(inputfile)
    else:
        raise ValueError('Invalid input type: ' + input_type)

    if verbose:
        print('Loading universe model...')

    if not universe_file:
        if universe:
            universe_file = "{}{}universe_{}.xml.gz".format(project_dir, config.get('generated', 'folder'), universe)
        else:
            universe_file = project_dir + config.get('generated', 'default_universe')

    # change default solver if a solver is specified in the input
    if specified_solver is not None:

        if specified_solver != config.get('solver', 'default_solver'):
            set_default_solver(specified_solver)

    params_to_set = {'FEASIBILITY_TOL': feas_tol,
                     'OPTIMALITY_TOL': opt_tol,
                     'INT_FEASIBILITY_TOL': int_feas_tol}
    for key,value in params_to_set.items():
        if value is not None:
            set_default_parameter(getattr(Parameter, key), value)

    try:
        universe_model = load_cbmodel(universe_file, flavor=config.get('sbml', 'default_flavor'))
        universe_model.id = model_id
    except IOError:
        available = '\n'.join(glob("{}{}universe_*.xml.gz".format(project_dir, config.get('generated', 'folder'))))
        raise IOError('Failed to load universe model: {}\nAvailable universe files:\n{}'.format(universe_file, available))

    if reference:
        if verbose:
            print('Loading reference model...')

        try:
            ref_model = load_cbmodel(reference)
        except:
            raise IOError('Failed to load reference model.')
    else:
        ref_model = None

    if gapfill or init:

        if verbose:
            print('Loading media library...')

        if not mediadb:
            mediadb = project_dir + config.get('input', 'media_library')

        try:
            media_db = load_media_db(mediadb)
        except IOError:
            raise IOError('Failed to load media library:' + mediadb)

    if verbose:
        print('Scoring reactions...')

    bigg_gprs = project_dir + config.get('generated', 'bigg_gprs')
    gprs = pd.read_csv(bigg_gprs)
    gprs = gprs[gprs.reaction.isin(universe_model.reactions)]

    debug_output = model_id if debug else None
    scores = reaction_scoring(annotations, gprs, debug_output=debug_output)

    if scores is None:
        print('The input genome did not match sufficient genes/reactions in the database.')
        return

    if not flavor:
        flavor = config.get('sbml', 'default_flavor')

    init_env = None

    if init:
        if init in media_db:
            init_env = Environment.from_compounds(media_db[init])
        else:
            print('Error: medium {} not in media database.'.format(init))

    universe_model.metadata['Description'] = 'This model was built with CarveMe version ' + version

    if ensemble_size is None or ensemble_size <= 1:
        if verbose:
            print('Reconstructing a single model')

        if not gapfill:
            carve_model(universe_model, scores,
                        outputfile=outputfile,
                        flavor=flavor,
                        default_score=default_score,
                        uptake_score=uptake_score,
                        soft_score=soft_score,
                        soft_constraints=soft_constraints,
                        hard_constraints=hard_constraints,
                        ref_model=ref_model,
                        ref_score=ref_score,
                        init_env=init_env,
                        debug_output=debug_output)
        else:
            model = carve_model(universe_model, scores,
                                inplace=False,
                                default_score=default_score,
                                uptake_score=uptake_score,
                                soft_score=soft_score,
                                soft_constraints=soft_constraints,
                                hard_constraints=hard_constraints,
                                ref_model=ref_model,
                                ref_score=ref_score,
                                init_env=init_env,
                                debug_output=debug_output)
    else:
        if verbose:
            print('Building an ensemble of', ensemble_size, 'models')
        build_ensemble(universe_model, scores, ensemble_size, outputfile, flavor, init_env=init_env)

    if gapfill and model is not None:

        media = gapfill.split(',')

        if verbose:
            m1, n1 = len(model.metabolites), len(model.reactions)
            print('Gap filling for {}...'.format(', '.join(media)))

        max_uptake = config.getint('gapfill', 'max_uptake')

        if blind_gapfill:
            scores = None
        else:
            scores = dict(scores[['reaction', 'normalized_score']].values)
        multiGapFill(model, universe_model, media, media_db, scores=scores, max_uptake=max_uptake, inplace=True)

        if verbose:
            m2, n2 = len(model.metabolites), len(model.reactions)
            print('Added {} reactions and {} metabolites'.format((n2 - n1), (m2 - m1)))

        if init_env:  #Should initialize enviroment again as new exchange reactions can be acquired during gap-filling
            init_env.apply(model, inplace=True, warning=False)

        save_cbmodel(model, outputfile, flavor=flavor)

    if verbose:
        print('Done.')