Ejemplo n.º 1
0
def main():
    option_parser, opts, args =\
       parse_command_line_parameters(**script_info)

    input_ext=path.splitext(opts.input_otu_fp)[1]
    if opts.input_format_classic:
        otu_table=parse_classic_table_to_rich_table(open(opts.input_otu_fp,'U'),None,None,None,DenseOTUTable)
    else:
        if input_ext != '.biom':
            sys.stderr.write("\nOTU table does not have '.biom' extension! If loading causes error consider using '-f' option to load tab-delimited OTU table!\n\n")
        otu_table = parse_biom_table(open(opts.input_otu_fp,'U'))

    ext=path.splitext(opts.input_count_fp)[1]
    if (ext == '.gz'):
        count_table = parse_biom_table(gzip.open(opts.input_count_fp,'rb'))
    else:
        count_table = parse_biom_table(open(opts.input_count_fp,'U'))
        
    #Need to only keep data relevant to our otu list
    ids=[]
    for x in otu_table.iterObservations():
        ids.append(str(x[1]))

    ob_id=count_table.ObservationIds[0]

    filtered_otus=[]
    filtered_values=[]
    for x in ids:
        if count_table.sampleExists(x):
            filtered_otus.append(x)
            filtered_values.append(otu_table.observationData(x))

    #filtered_values = map(list,zip(*filtered_values))
    filtered_otu_table=table_factory(filtered_values,otu_table.SampleIds,filtered_otus, constructor=DenseOTUTable)

    copy_numbers_filtered={}
    for x in filtered_otus:
        value = count_table.getValueByIds(ob_id,x)
        try:
            #data can be floats so round them and make them integers
            value = int(round(float(value)))
            
        except ValueError:
            raise ValueError,\
                  "Invalid type passed as copy number for OTU ID %s. Must be int-able." % (value)
        if value < 1:
            raise ValueError, "Copy numbers must be greater than or equal to 1."

        copy_numbers_filtered[x]={opts.metadata_identifer:value}
        
    filtered_otu_table.addObservationMetadata(copy_numbers_filtered)
            

    normalized_table = filtered_otu_table.normObservationByMetadata(opts.metadata_identifer)

    make_output_dir_for_file(opts.output_otu_fp)
    open(opts.output_otu_fp,'w').write(\
     normalized_table.getBiomFormatJsonString('PICRUST'))
Ejemplo n.º 2
0
    def test_write_summarize_taxa(self):
        """write_summarize_taxa functions as expected"""
        # Classic format.
        write_summarize_taxa(self.taxa_summary, self.taxa_header, self.tmp_fp1)
        obs = open(self.tmp_fp1).read()
        exp = "\n".join(["Taxon\tfoo\tbar\tfoobar", "a;b;c\t0\t1\t2", "d;e;f\t3\t4\t5\n"])
        self.assertEqual(obs, exp)
        self.files_to_remove.append(self.tmp_fp1)

        # BIOM format.
        write_summarize_taxa(self.taxa_summary, self.taxa_header, self.tmp_fp2, file_format="biom")
        exp = parse_classic_table_to_rich_table(exp.split("\n"), None, None, None, SparseTaxonTable)
        obs = open(self.tmp_fp2).read()
        obs = parse_biom_table(obs)
        self.assertEqual(obs, exp)
        self.files_to_remove.append(self.tmp_fp2)
Ejemplo n.º 3
0
    def test_format_summarize_taxa(self):
        """format_summarize_taxa functions as expected"""
        # Classic format.
        exp = "\n".join(["Taxon\tfoo\tbar\tfoobar", "a;b;c\t0\t1\t2", "d;e;f\t3\t4\t5\n"])
        obs = "".join(list(format_summarize_taxa(self.taxa_summary, self.taxa_header)))
        self.assertEqual(obs, exp)

        # BIOM format. Test by converting our expected output to a biom table
        # and comparing that to our observed table.
        exp = parse_classic_table_to_rich_table(exp.split("\n"), None, None, None, SparseTaxonTable)
        obs = "".join(list(format_summarize_taxa(self.taxa_summary, self.taxa_header, file_format="biom")))
        obs = parse_biom_table(obs)
        self.assertEqual(obs, exp)

        # Bad file_format argument.
        with self.assertRaises(ValueError):
            list(format_summarize_taxa(self.taxa_summary, self.taxa_header, file_format="foo"))
Ejemplo n.º 4
0
    def test_write_summarize_taxa_transposed_output(self):
        """write_summarize_taxa_transposed_output functions as expected"""
        # Classic format.
        write_summarize_taxa(self.taxa_summary, self.taxa_header, self.tmp_fp1, transposed_output=True)
        obs = open(self.tmp_fp1).read()
        exp = "\n".join(["SampleID\ta;b;c\td;e;f", "foo\t0\t3\nbar\t1\t4", "foobar\t2\t5\n"])
        self.assertEqual(obs, exp)
        self.files_to_remove.append(self.tmp_fp1)

        # BIOM format.
        write_summarize_taxa(
            self.taxa_summary, self.taxa_header, self.tmp_fp2, transposed_output=True, file_format="biom"
        )
        exp = parse_classic_table_to_rich_table(exp.split("\n"), None, None, None, SparseTaxonTable)
        obs = open(self.tmp_fp2).read()
        obs = parse_biom_table(obs)
        self.assertEqual(obs, exp)
        self.files_to_remove.append(self.tmp_fp2)
Ejemplo n.º 5
0
    def test_write_summarize_taxa(self):
        """write_summarize_taxa functions as expected"""
        # Classic format.
        write_summarize_taxa(self.taxa_summary, self.taxa_header, self.tmp_fp1)
        obs = open(self.tmp_fp1).read()
        exp = '\n'.join(['Taxon\tfoo\tbar\tfoobar',
                         'a;b;c\t0\t1\t2',
                         'd;e;f\t3\t4\t5\n'])
        self.assertEqual(obs, exp)
        self.files_to_remove.append(self.tmp_fp1)

        # BIOM format.
        write_summarize_taxa(self.taxa_summary, self.taxa_header, self.tmp_fp2,
                             file_format='biom')
        exp = parse_classic_table_to_rich_table(exp.split('\n'), None, None,
                                                None, SparseTaxonTable)
        obs = open(self.tmp_fp2).read()
        obs = parse_biom_table(obs)
        self.assertEqual(obs, exp)
        self.files_to_remove.append(self.tmp_fp2)
Ejemplo n.º 6
0
    def test_write_summarize_taxa_transposed_output(self):
        """write_summarize_taxa_transposed_output functions as expected"""
        # Classic format.
        write_summarize_taxa(
            self.taxa_summary,
            self.taxa_header,
            self.tmp_fp1,
            transposed_output=True)
        obs = open(self.tmp_fp1).read()
        exp = '\n'.join(['SampleID\ta;b;c\td;e;f',
                         'foo\t0\t3\nbar\t1\t4',
                         'foobar\t2\t5\n'])
        self.assertEqual(obs, exp)
        self.files_to_remove.append(self.tmp_fp1)

        # BIOM format.
        write_summarize_taxa(self.taxa_summary, self.taxa_header, self.tmp_fp2,
                             transposed_output=True, file_format='biom')
        exp = parse_classic_table_to_rich_table(exp.split('\n'), None, None,
                                                None, SparseTaxonTable)
        obs = open(self.tmp_fp2).read()
        obs = parse_biom_table(obs)
        self.assertEqual(obs, exp)
        self.files_to_remove.append(self.tmp_fp2)
Ejemplo n.º 7
0
    def test_format_summarize_taxa(self):
        """format_summarize_taxa functions as expected"""
        # Classic format.
        exp = '\n'.join(['Taxon\tfoo\tbar\tfoobar',
                         'a;b;c\t0\t1\t2',
                         'd;e;f\t3\t4\t5\n'])
        obs = ''.join(list(format_summarize_taxa(self.taxa_summary,
                                                 self.taxa_header)))
        self.assertEqual(obs, exp)

        # BIOM format. Test by converting our expected output to a biom table
        # and comparing that to our observed table.
        exp = parse_classic_table_to_rich_table(exp.split('\n'), None, None,
                                                None, SparseTaxonTable)
        obs = ''.join(list(format_summarize_taxa(self.taxa_summary,
                                                 self.taxa_header,
                                                 file_format='biom')))
        obs = parse_biom_table(obs)
        self.assertEqual(obs, exp)

        # Bad file_format argument.
        with self.assertRaises(ValueError):
            list(format_summarize_taxa(self.taxa_summary, self.taxa_header,
                 file_format='foo'))
Ejemplo n.º 8
0
 def _drop_unknown():
     import os
     import gzip
     import json
     from biom.table import DenseOTUTable
     from biom.parse import (
         OBS_META_TYPES,
         parse_biom_table,
         parse_classic_table_to_rich_table
     )
     idx = set([ row.strip().split('\t')[0]
                 for row in gzip.open(_copy_fname) ])
     filter_func = lambda a, otu_id, c: str(otu_id) in idx
     tmpfile = file+"_tmp.biom"
     with open(file) as f, open(tmpfile, 'w') as f_out:
         try:
             table = parse_biom_table(f)
         except Exception as e:
             table = parse_classic_table_to_rich_table(
                 f, None, None, OBS_META_TYPES['taxonomy'], DenseOTUTable)
         table = table.filterObservations(filter_func)
         json.dump( table.getBiomFormatObject("AnADAMA"), f_out )
     os.rename(file, addtag(file, "unfiltered"))
     os.rename(tmpfile, file)
Ejemplo n.º 9
0
def main():
    option_parser, opts, args =\
       parse_command_line_parameters(**script_info)

    input_ext = path.splitext(opts.input_otu_fp)[1]
    if opts.input_format_classic:
        otu_table = parse_classic_table_to_rich_table(
            open(opts.input_otu_fp, 'U'), None, None, None, DenseOTUTable)
    else:
        try:
            otu_table = parse_biom_table(open(opts.input_otu_fp, 'U'))
        except ValueError:
            raise ValueError(
                "Error loading OTU table! If not in BIOM format use '-f' option.\n"
            )

    ids_to_load = otu_table.ObservationIds

    if (opts.input_count_fp is None):
        #precalc file has specific name (e.g. 16S_13_5_precalculated.tab.gz)
        precalc_file_name = '_'.join(
            ['16S', opts.gg_version, 'precalculated.tab.gz'])
        input_count_table = join(get_picrust_project_dir(), 'picrust', 'data',
                                 precalc_file_name)
    else:
        input_count_table = opts.input_count_fp

    if opts.verbose:
        print "Loading trait table: ", input_count_table

    ext = path.splitext(input_count_table)[1]

    if (ext == '.gz'):
        count_table_fh = gzip.open(input_count_table, 'rb')
    else:
        count_table_fh = open(input_count_table, 'U')

    if opts.load_precalc_file_in_biom:
        count_table = parse_biom_table(count_table_fh.read())
    else:
        count_table = convert_precalc_to_biom(count_table_fh, ids_to_load)

    #Need to only keep data relevant to our otu list
    ids = []
    for x in otu_table.iterObservations():
        ids.append(str(x[1]))

    ob_id = count_table.ObservationIds[0]

    filtered_otus = []
    filtered_values = []
    for x in ids:
        if count_table.sampleExists(x):
            filtered_otus.append(x)
            filtered_values.append(otu_table.observationData(x))

    #filtered_values = map(list,zip(*filtered_values))
    filtered_otu_table = table_factory(filtered_values,
                                       otu_table.SampleIds,
                                       filtered_otus,
                                       constructor=DenseOTUTable)

    copy_numbers_filtered = {}
    for x in filtered_otus:
        value = count_table.getValueByIds(ob_id, x)
        try:
            #data can be floats so round them and make them integers
            value = int(round(float(value)))

        except ValueError:
            raise ValueError,\
                  "Invalid type passed as copy number for OTU ID %s. Must be int-able." % (value)
        if value < 1:
            raise ValueError, "Copy numbers must be greater than or equal to 1."

        copy_numbers_filtered[x] = {opts.metadata_identifer: value}

    filtered_otu_table.addObservationMetadata(copy_numbers_filtered)

    normalized_table = filtered_otu_table.normObservationByMetadata(
        opts.metadata_identifer)

    #move Observation Metadata from original to filtered OTU table
    normalized_table = transfer_observation_metadata(otu_table,
                                                     normalized_table,
                                                     'ObservationMetadata')
    normalized_otu_table = transfer_sample_metadata(otu_table,
                                                    normalized_table,
                                                    'SampleMetadata')

    make_output_dir_for_file(opts.output_otu_fp)
    open(opts.output_otu_fp, 'w').write(format_biom_table(normalized_table))
Ejemplo n.º 10
0
def main():
    option_parser, opts, args =\
       parse_command_line_parameters(**script_info)

    input_ext=path.splitext(opts.input_otu_fp)[1]
    if opts.input_format_classic:
        otu_table=parse_classic_table_to_rich_table(open(opts.input_otu_fp,'U'),None,None,None,DenseOTUTable)
    else:
        try:
            otu_table = parse_biom_table(open(opts.input_otu_fp,'U'))
        except ValueError:
            raise ValueError("Error loading OTU table! If not in BIOM format use '-f' option.\n")

    ids_to_load = otu_table.ObservationIds
    
    if(opts.input_count_fp is None):
        #precalc file has specific name (e.g. 16S_13_5_precalculated.tab.gz)
        precalc_file_name='_'.join(['16S',opts.gg_version,'precalculated.tab.gz'])
        input_count_table=join(get_picrust_project_dir(),'picrust','data',precalc_file_name)
    else:
        input_count_table=opts.input_count_fp

    if opts.verbose:
        print "Loading trait table: ", input_count_table

    ext=path.splitext(input_count_table)[1]
    
    if (ext == '.gz'):
        count_table_fh = gzip.open(input_count_table,'rb')
    else:
        count_table_fh = open(input_count_table,'U')
       
    if opts.load_precalc_file_in_biom:
        count_table = parse_biom_table(count_table_fh.read())
    else:
        count_table = convert_precalc_to_biom(count_table_fh,ids_to_load)

    #Need to only keep data relevant to our otu list
    ids=[]
    for x in otu_table.iterObservations():
        ids.append(str(x[1]))

    ob_id=count_table.ObservationIds[0]

    filtered_otus=[]
    filtered_values=[]
    for x in ids:
        if count_table.sampleExists(x):
            filtered_otus.append(x)
            filtered_values.append(otu_table.observationData(x))

    #filtered_values = map(list,zip(*filtered_values))
    filtered_otu_table=table_factory(filtered_values,otu_table.SampleIds,filtered_otus, constructor=DenseOTUTable)

    copy_numbers_filtered={}
    for x in filtered_otus:
        value = count_table.getValueByIds(ob_id,x)
        try:
            #data can be floats so round them and make them integers
            value = int(round(float(value)))
            
        except ValueError:
            raise ValueError,\
                  "Invalid type passed as copy number for OTU ID %s. Must be int-able." % (value)
        if value < 1:
            raise ValueError, "Copy numbers must be greater than or equal to 1."

        copy_numbers_filtered[x]={opts.metadata_identifer:value}
        
    filtered_otu_table.addObservationMetadata(copy_numbers_filtered)
            

    normalized_table = filtered_otu_table.normObservationByMetadata(opts.metadata_identifer)
    
    #move Observation Metadata from original to filtered OTU table
    normalized_table = transfer_observation_metadata(otu_table,normalized_table,'ObservationMetadata')
    normalized_otu_table = transfer_sample_metadata(otu_table,normalized_table,'SampleMetadata')

    make_output_dir_for_file(opts.output_otu_fp)
    open(opts.output_otu_fp,'w').write(format_biom_table(normalized_table))