Esempio n. 1
0
 def test_check_database(self):
     database = pkg_resources.resource_filename('shogun.tests',
                                                os.path.join('data'))
     with open(os.path.join(database, 'metadata.yaml'), 'r') as stream:
         data_files = yaml.load(stream, Loader=yaml.SafeLoader)
     results = parse_function_db(data_files, database)
     self.assertTrue(results is not None)
Esempio n. 2
0
def _function(inputs, database, output, levels, save_median_taxatable=False):
    # Check if output exists, if not then make
    if not os.path.exists(output):
        os.makedirs(output)

    # Load the datafiles to locate function db
    data_files = _load_metadata(database)

    # Load the functional db
    logger.info("Loading the functional database and converting.")
    func_db = parse_function_db(data_files, database)

    for input, level in zip(inputs, levels):
        # Verify it is in a reasonable level
        if level in ['genus', 'species', 'strain']:
            logger.info(
                "Starting functional prediction with input file %s at level %s"
                % (os.path.abspath(input), level))
            function_run_and_save(input,
                                  func_db,
                                  output,
                                  TAXAMAP[level],
                                  save_median_taxatable=save_median_taxatable)
        else:
            continue
Esempio n. 3
0
def summarize_functional(ctx, input, database, output):
    prefix = ".".join(os.path.basename(input).split('.')[:-1]).replace(".kegg", "")


    # Check if output exists, if not then make
    if not os.path.exists(output):
        os.makedirs(output)

    # Load the datafiles to locate function db
    data_files = _load_metadata(database)

    # Load the functional db
    logger.info("Loading the functional database and converting.")
    func_db = parse_function_db(data_files, database)
    kegg_df = pd.read_csv(input, sep="\t", index_col=0)

    out_kegg_modules_df, out_kegg_modules_coverage = summarize_kegg_table(kegg_df, func_db['modules'])
    out_kegg_pathways_df, out_kegg_pathways_coverage = summarize_kegg_table(kegg_df, func_db['pathways'])

    out_kegg_modules_df.to_csv(os.path.join(output, "%s.kegg.modules.txt" % prefix), sep='\t', float_format="%d",
                               na_rep=0, index_label="#MODULE ID")
    out_kegg_modules_coverage.to_csv(os.path.join(output, "%s.kegg.modules.coverage.txt" % prefix), sep='\t',
                                     float_format="%f", na_rep=0, index_label="#MODULE ID")
    out_kegg_pathways_df.to_csv(os.path.join(output, "%s.kegg.pathways.txt" % prefix), sep='\t', float_format="%d",
                                na_rep=0, index_label="#PATHWAY ID")
    out_kegg_pathways_coverage.to_csv(os.path.join(output, "%s.kegg.pathways.coverage.txt" % prefix), sep='\t',
                                      float_format="%f", na_rep=0, index_label="#PATHWAY ID")