def _redistribute(database, level, outfile, redist_inf, relative_abundance=False): logger.debug("Beginning redistribution for file: %s" % redist_inf) data_files = _load_metadata(database) shear = os.path.join(database, data_files['general']['shear']) shear_df = parse_bayes(shear) output_files = [] output_levels = [] if level == 'all': for l in TAXA: df_output = redistribute_taxatable(redist_inf, shear_df, level=TAXAMAP[l]) tmp_spl = outfile.split('.') tmp_path = '.'.join(tmp_spl[:-1] + [l] + [tmp_spl[-1]]) df_output.to_csv(tmp_path, sep='\t', float_format="%d",na_rep=0, index_label="#OTU ID") output_files.append(tmp_path) output_levels.append(l) elif level == 'off': output_files = [] else: df_output = redistribute_taxatable(redist_inf, shear_df, level=TAXAMAP[level]) df_output.to_csv(outfile, sep='\t', float_format="%d", na_rep=0, index_label="#OTU ID") output_files.append(outfile) output_levels.append(level) return output_files, output_levels
def test_coverage_report(self): bayes = pkg_resources.resource_filename('shogun.tests', os.path.join('data', 'sheared_bayes.32.txt')) df_bayes = parse_bayes(bayes) taxatable = pkg_resources.resource_filename('shogun.tests', os.path.join('data', 'results', 'burst_taxatable.txt')) infile = pkg_resources.resource_filename('shogun.tests', os.path.join('data', 'results', 'burst_results.b6')) self.assertTrue(get_coverage_of_microbes(infile, df_bayes, 6) is not None) self.assertTrue(get_coverage_of_microbes(infile, df_bayes, 7) is not None) self.assertTrue(get_coverage_of_microbes(infile, df_bayes, 8) is not None)
def test_taxatable(self): bayes = pkg_resources.resource_filename( 'shogun.tests', os.path.join('data', 'sheared_bayes.32.txt')) df_bayes = parse_bayes(bayes) taxatable = pkg_resources.resource_filename( 'shogun.tests', os.path.join('data', 'results', 'burst_taxatable.txt')) taxatable_df_5 = redistribute_taxatable(taxatable, df_bayes, level=5) taxatable_df_6 = redistribute_taxatable(taxatable, df_bayes, level=6) taxatable_df_7 = redistribute_taxatable(taxatable, df_bayes, level=7) taxatable_df_8 = redistribute_taxatable(taxatable, df_bayes, level=8) taxatable_df_8.head()
def _coverage(input, database, output, level, taxonomy): data_files = _load_metadata(database) shear = os.path.join(database, data_files['general']['shear']) if taxonomy == 'mapping': taxatree = Taxonomy(os.path.join(database, data_files['general']['taxonomy'])) parse_taxonomy_from_row = lambda row: taxatree(row[1]) else: parse_taxonomy_from_row = lambda row: row[-1] shear_df = parse_bayes(shear) outdf = get_coverage_of_microbes(input, shear_df, level, parse_taxonomy_from_row=parse_taxonomy_from_row) outdf.to_csv(output, sep='\t', float_format="%.5f", na_rep=0)
def test_read_bayes(self): bayes = pkg_resources.resource_filename( 'shogun.tests', os.path.join('data', 'sheared_bayes.100.txt')) df = parse_bayes(bayes)