def test_make_new_summary_file(self):
        """make_new_summary_file works
        """
        lower_percentage, upper_percentage = None, None
        #otu_table = parse_otu_table(self.otu_table, int)
        #otu_table = parse_biom_table(self.otu_table)
        summary, header = make_summary(
            self.otu_table, 3, upper_percentage, lower_percentage)
        self.assertEqual(header, ['Taxon', 's1', 's2', 's3', 's4'])
        self.assertEqual(
            summary, [[('Root', 'Bacteria', 'Actinobacteria'), 1, 0, 2, 4],
                      [('Root', 'Bacteria', 'Firmicutes'),
                       1, 3, 1, 1],
                      [('Root', 'Bacteria', 'Other'), 1, 2, 1, 0]])

        # test that works with relative abundances
        #otu_table = parse_otu_table(self.otu_table, float)
        #otu_table = parse_biom_table(self.otu_table, float)
        #otu_table = convert_otu_table_relative(otu_table)
        otu_table = self.otu_table.norm(axis='sample', inplace=False)
        summary, header = make_summary(
            otu_table, 3, upper_percentage, lower_percentage)
        self.assertEqual(header, ['Taxon', 's1', 's2', 's3', 's4'])
        self.assertEqual(summary[0][0], ('Root', 'Bacteria', 'Actinobacteria'))
        assert_almost_equal(summary[0][1:], [1.0 / 3, 0.0, 0.5, 0.8])
        self.assertEqual(summary[1][0], ('Root', 'Bacteria', 'Firmicutes'))
        assert_almost_equal(summary[1][1:], [1.0 / 3, 0.6, 0.25, 0.2])
        self.assertEqual(summary[2][0], ('Root', 'Bacteria', 'Other'))
        assert_almost_equal(summary[2][1:], [1.0 / 3, 0.4, 0.25, 0.0])

        ##
        # testing lower triming
        lower_percentage, upper_percentage = 0.3, None
        summary, header = make_summary(
            otu_table, 3, upper_percentage, lower_percentage)
        self.assertEqual(summary[0][0], ('Root', 'Bacteria', 'Other'))
        assert_almost_equal(summary[0][1:], [1.0 / 3, 0.4, 0.25, 0.0])

        ##
        # testing upper triming
        lower_percentage, upper_percentage = None, 0.4
        summary, header = make_summary(
            otu_table, 3, upper_percentage, lower_percentage)
        self.assertEqual(summary[0][0], ('Root', 'Bacteria', 'Actinobacteria'))
        assert_almost_equal(summary[0][1:], [1.0 / 3, 0.0, 0.5, 0.8])
Beispiel #2
0
    def test_make_new_summary_file(self):
        """make_new_summary_file works
        """
        lower_percentage, upper_percentage = None, None
        #otu_table = parse_otu_table(self.otu_table, int)
        #otu_table = parse_biom_table(self.otu_table)
        summary, header = make_summary(
            self.otu_table, 3, upper_percentage, lower_percentage)
        self.assertEqual(header, ['Taxon', 's1', 's2', 's3', 's4'])
        self.assertEqual(
            summary, [[('Root', 'Bacteria', 'Actinobacteria'), 1, 0, 2, 4],
                      [('Root', 'Bacteria', 'Firmicutes'),
                       1, 3, 1, 1],
                      [('Root', 'Bacteria', 'Other'), 1, 2, 1, 0]])

        # test that works with relative abundances
        #otu_table = parse_otu_table(self.otu_table, float)
        #otu_table = parse_biom_table(self.otu_table, float)
        #otu_table = convert_otu_table_relative(otu_table)
        otu_table = self.otu_table.norm(axis='sample', inplace=False)
        summary, header = make_summary(
            otu_table, 3, upper_percentage, lower_percentage)
        self.assertEqual(header, ['Taxon', 's1', 's2', 's3', 's4'])
        self.assertEqual(summary[0][0], ('Root', 'Bacteria', 'Actinobacteria'))
        assert_almost_equal(summary[0][1:], [1.0 / 3, 0.0, 0.5, 0.8])
        self.assertEqual(summary[1][0], ('Root', 'Bacteria', 'Firmicutes'))
        assert_almost_equal(summary[1][1:], [1.0 / 3, 0.6, 0.25, 0.2])
        self.assertEqual(summary[2][0], ('Root', 'Bacteria', 'Other'))
        assert_almost_equal(summary[2][1:], [1.0 / 3, 0.4, 0.25, 0.0])

        ##
        # testing lower triming
        lower_percentage, upper_percentage = 0.3, None
        summary, header = make_summary(
            otu_table, 3, upper_percentage, lower_percentage)
        self.assertEqual(summary[0][0], ('Root', 'Bacteria', 'Other'))
        assert_almost_equal(summary[0][1:], [1.0 / 3, 0.4, 0.25, 0.0])

        ##
        # testing upper triming
        lower_percentage, upper_percentage = None, 0.4
        summary, header = make_summary(
            otu_table, 3, upper_percentage, lower_percentage)
        self.assertEqual(summary[0][0], ('Root', 'Bacteria', 'Actinobacteria'))
        assert_almost_equal(summary[0][1:], [1.0 / 3, 0.0, 0.5, 0.8])
Beispiel #3
0
def main():
    option_parser, opts, args = parse_command_line_parameters(**script_info)

    lower_percentage = opts.lower_percentage
    upper_percentage = opts.upper_percentage
    otu_table_fp = opts.otu_table_fp
    otu_table = parse_biom_table(open(otu_table_fp, 'U'))
    delimiter = opts.delimiter
    mapping_fp = opts.mapping
    md_as_string = opts.md_as_string
    md_identifier = opts.md_identifier
    levels = opts.level.split(',')
    suppress_classic_table_output = opts.suppress_classic_table_output
    suppress_biom_table_output = opts.suppress_biom_table_output

    if upper_percentage!=None and lower_percentage!=None:
        raise ValueError("upper_percentage and lower_percentage are mutually exclusive")
    
    if upper_percentage!=None and lower_percentage!=None and mapping:
        raise ValueError("upper_percentage and lower_percentage can not be using with mapping file")
        
    if upper_percentage!=None and (upper_percentage<0 or upper_percentage>1.0):
        raise ValueError('max_otu_percentage should be between 0.0 and 1.0')
    
    if lower_percentage!=None and (lower_percentage<0 or lower_percentage>1.0):
        raise ValueError('lower_percentage should be between 0.0 and 1.0')
        
    if mapping_fp:
        mapping_file = open(mapping_fp, 'U')
        mapping, header, comments = parse_mapping_file(mapping_file)
        
        # use the input Mapping file for producing the output filenames
        map_dir_path,map_fname=split(mapping_fp)
        map_basename,map_fname_ext=splitext(map_fname)
    else:
        if suppress_classic_table_output and suppress_biom_table_output:
            option_parser.error("Both classic and BIOM output formats were "
                                "suppressed.")

    if opts.relative_abundance != '':
        option_parser.error("Deprecated. Please use --absolute_abundances to disable relative abundance")

    if not opts.absolute_abundance:
        otu_table = otu_table.normObservationBySample()

    # introduced output directory to will allow for multiple outputs
    if opts.output_dir:
        create_dir(opts.output_dir,False)
        output_dir_path=opts.output_dir
    else:
        output_dir_path='./'

    # use the input OTU table to produce the output filenames
    dir_path,fname=split(otu_table_fp)
    basename,fname_ext=splitext(fname)
    
    # Iterate over the levels and generate a summarized taxonomy for each
    for level in levels:
        if mapping_fp:
            #define output filename
            output_fname = join(output_dir_path,
                                        map_basename+'_L%s.txt' % (level))
                                        
            summary, tax_order = add_summary_mapping(otu_table, 
                                                     mapping,
                                                     int(level),
                                                     md_as_string,
                                                     md_identifier)
                                                     
            write_add_taxa_summary_mapping(summary,tax_order,mapping,
                                            header,output_fname,delimiter)
        else:
            # define the output filename. The extension will be added to the
            # end depending on the output format
            output_fname = join(output_dir_path, basename + '_L%s' % level)

            summary, header = make_summary(otu_table,
                                           int(level),
                                           upper_percentage,
                                           lower_percentage,
                                           md_as_string,
                                           md_identifier)

            if not suppress_classic_table_output:
                write_summarize_taxa(summary, header, output_fname + '.txt',
                                     delimiter, opts.transposed_output,
                                     file_format='classic')
            if not suppress_biom_table_output:
                write_summarize_taxa(summary, header, output_fname + '.biom',
                                     delimiter, opts.transposed_output,
                                     file_format='biom')
def main():
    option_parser, opts, args = parse_command_line_parameters(**script_info)

    lower_percentage = opts.lower_percentage
    upper_percentage = opts.upper_percentage
    otu_table_fp = opts.otu_table_fp
    otu_table = load_table(otu_table_fp)
    delimiter = opts.delimiter
    mapping_fp = opts.mapping
    md_as_string = opts.md_as_string
    md_identifier = opts.md_identifier
    levels = opts.level.split(',')
    suppress_classic_table_output = opts.suppress_classic_table_output
    suppress_biom_table_output = opts.suppress_biom_table_output

    if upper_percentage is not None and lower_percentage is not None:
        raise ValueError(
            "upper_percentage and lower_percentage are mutually exclusive")

    if upper_percentage is not None and lower_percentage is not None and \
            mapping:
        raise ValueError("upper_percentage and lower_percentage can not be "
                         "using with mapping file")

    if upper_percentage is not None and \
            (upper_percentage < 0 or upper_percentage > 1.0):
        raise ValueError('max_otu_percentage should be between 0.0 and 1.0')

    if lower_percentage is not None and \
            (lower_percentage < 0 or lower_percentage > 1.0):
        raise ValueError('lower_percentage should be between 0.0 and 1.0')

    if mapping_fp:
        mapping_file = open(mapping_fp, 'U')
        mapping, header, comments = parse_mapping_file(mapping_file)

        # use the input Mapping file for producing the output filenames
        map_dir_path, map_fname = split(mapping_fp)
        map_basename, map_fname_ext = splitext(map_fname)
    else:
        if suppress_classic_table_output and suppress_biom_table_output:
            option_parser.error("Both classic and BIOM output formats were "
                                "suppressed.")

    if not opts.absolute_abundance:
        otu_table = otu_table.norm(axis='sample', inplace=False)

    # introduced output directory to will allow for multiple outputs
    if opts.output_dir:
        create_dir(opts.output_dir, False)
        output_dir_path = opts.output_dir
    else:
        output_dir_path = './'

    # use the input OTU table to produce the output filenames
    dir_path, fname = split(otu_table_fp)
    basename, fname_ext = splitext(fname)

    # Iterate over the levels and generate a summarized taxonomy for each
    for level in levels:
        if mapping_fp:
            # define output filename
            output_fname = join(output_dir_path,
                                map_basename + '_L%s.txt' % (level))

            summary, tax_order = add_summary_mapping(otu_table, mapping,
                                                     int(level), md_as_string,
                                                     md_identifier)

            write_add_taxa_summary_mapping(summary, tax_order, mapping, header,
                                           output_fname, delimiter)
        else:
            # define the output filename. The extension will be added to the
            # end depending on the output format
            output_fname = join(output_dir_path, basename + '_L%s' % level)

            summary, header = make_summary(otu_table, int(level),
                                           upper_percentage, lower_percentage,
                                           md_as_string, md_identifier)

            sample_ids = header[1:]

            observation_ids = []
            data = []
            for row in summary:
                # Join taxonomic levels to create an observation ID.
                observation_ids.append(delimiter.join(row[0]))
                data.append(row[1:])

            table = Table(np.asarray(data), observation_ids, sample_ids)
            if opts.transposed_output:
                table = table.transpose()

            if not suppress_classic_table_output:
                with open(output_fname + '.txt', 'w') as outfile:
                    outfile.write(table.to_tsv())

            if not suppress_biom_table_output:
                write_biom_table(table, output_fname + '.biom')
def main():
    option_parser, opts, args = parse_command_line_parameters(**script_info)

    lower_percentage = opts.lower_percentage
    upper_percentage = opts.upper_percentage
    otu_table_fp = opts.otu_table_fp
    otu_table = parse_otu_table(open(otu_table_fp, 'U'))
    delimiter = opts.delimiter
    mapping_fp = opts.mapping
    levels = opts.level.split(',')

    if upper_percentage!=None and lower_percentage!=None:
        raise ValueError("upper_percentage and lower_percentage are mutually exclusive")
    
    if upper_percentage!=None and lower_percentage!=None and mapping:
        raise ValueError("upper_percentage and lower_percentage can not be using with mapping file")
        
    if upper_percentage!=None and (upper_percentage<0 or upper_percentage>1.0):
        raise ValueError('max_otu_percentage should be between 0.0 and 1.0')
    
    if lower_percentage!=None and (lower_percentage<0 or lower_percentage>1.0):
        raise ValueError('lower_percentage should be between 0.0 and 1.0')
        
    if mapping_fp:
        mapping_file = open(mapping_fp, 'U')
        mapping, header, comments = parse_mapping_file(mapping_file)
        
        # use the input Mapping file for producing the output filenames
        map_dir_path,map_fname=split(mapping_fp)
        map_basename,map_fname_ext=splitext(map_fname)

    if opts.relative_abundance != '':
        raise option_parser.error("Deprecated. Please use --absolute_abundances to disable relative abundance")

    if not opts.absolute_abundance:
        otu_table = convert_otu_table_relative(otu_table)

    # introduced output directory to will allow for multiple outputs
    if opts.output_dir:
        create_dir(opts.output_dir,False)
        output_dir_path=opts.output_dir
    else:
        output_dir_path='./'

    # use the input OTU table to produce the output filenames
    dir_path,fname=split(otu_table_fp)
    basename,fname_ext=splitext(fname)
    
    # Iterate over the levels and generate a summarized taxonomy for each
    for level in levels:
        if mapping_fp:
            #define output filename
            output_fname = join(output_dir_path,
                                        map_basename+'_L%s.txt' % (level))
                                        
            summary, tax_order = add_summary_mapping(otu_table, mapping,
                                                     int(level))
            write_add_taxa_summary_mapping(summary,tax_order,mapping,
                                            header,output_fname,delimiter)
        else:
            #define output filename
            output_fname = join(output_dir_path,basename+'_L%s.txt' % (level))
            
            summary, header = make_summary(otu_table, int(level),
                                            upper_percentage, lower_percentage)
            write_summarize_taxa(summary, header, output_fname, delimiter)
def main():
    option_parser, opts, args = parse_command_line_parameters(**script_info)

    lower_percentage = opts.lower_percentage
    upper_percentage = opts.upper_percentage
    otu_table_fp = opts.otu_table_fp
    otu_table = load_table(otu_table_fp)
    delimiter = opts.delimiter
    mapping_fp = opts.mapping
    md_as_string = opts.md_as_string
    md_identifier = opts.md_identifier
    levels = opts.level.split(',')
    suppress_classic_table_output = opts.suppress_classic_table_output
    suppress_biom_table_output = opts.suppress_biom_table_output

    if upper_percentage is not None and lower_percentage is not None:
        raise ValueError(
            "upper_percentage and lower_percentage are mutually exclusive")

    if upper_percentage is not None and lower_percentage is not None and \
            mapping:
        raise ValueError("upper_percentage and lower_percentage can not be "
                         "using with mapping file")

    if upper_percentage is not None and \
            (upper_percentage < 0 or upper_percentage > 1.0):
        raise ValueError('max_otu_percentage should be between 0.0 and 1.0')

    if lower_percentage is not None and \
            (lower_percentage < 0 or lower_percentage > 1.0):
        raise ValueError('lower_percentage should be between 0.0 and 1.0')

    if mapping_fp:
        mapping_file = open(mapping_fp, 'U')
        mapping, header, comments = parse_mapping_file(mapping_file)

        # use the input Mapping file for producing the output filenames
        map_dir_path, map_fname = split(mapping_fp)
        map_basename, map_fname_ext = splitext(map_fname)
    else:
        if suppress_classic_table_output and suppress_biom_table_output:
            option_parser.error("Both classic and BIOM output formats were "
                                "suppressed.")

    if not opts.absolute_abundance:
        otu_table = otu_table.norm(axis='sample', inplace=False)

    # introduced output directory to will allow for multiple outputs
    if opts.output_dir:
        create_dir(opts.output_dir, False)
        output_dir_path = opts.output_dir
    else:
        output_dir_path = './'

    # use the input OTU table to produce the output filenames
    dir_path, fname = split(otu_table_fp)
    basename, fname_ext = splitext(fname)

    # Iterate over the levels and generate a summarized taxonomy for each
    for level in levels:
        if mapping_fp:
            # define output filename
            output_fname = join(output_dir_path,
                                map_basename + '_L%s.txt' % (level))

            summary, tax_order = add_summary_mapping(otu_table,
                                                     mapping,
                                                     int(level),
                                                     md_as_string,
                                                     md_identifier)

            write_add_taxa_summary_mapping(summary, tax_order, mapping,
                                           header, output_fname, delimiter)
        else:
            # define the output filename. The extension will be added to the
            # end depending on the output format
            output_fname = join(output_dir_path, basename + '_L%s' % level)

            summary, header = make_summary(otu_table,
                                           int(level),
                                           upper_percentage,
                                           lower_percentage,
                                           md_as_string,
                                           md_identifier)

            sample_ids = header[1:]

            observation_ids = []
            data = []
            for row in summary:
                # Join taxonomic levels to create an observation ID.
                observation_ids.append(delimiter.join(row[0]))
                data.append(row[1:])

            table = Table(np.asarray(data), observation_ids, sample_ids)
            if opts.transposed_output:
                table = table.transpose()

            if not suppress_classic_table_output:
                with open(output_fname + '.txt', 'w') as outfile:
                    outfile.write(table.to_tsv())

            if not suppress_biom_table_output:
                write_biom_table(table, output_fname + '.biom')