def setUp(self):
        """define some top-level data"""

        #self.col_header=['Sample1', 'Sample2']
        #self.row_header=['OTU1','OTU2']
        #self.otu_table=array([[0,0],[1,5]])
        #self.lineages=[['Bacteria'],['Archaea']]

        #self.data={}
        #self.data['otu_counts']=self.col_header,self.row_header,self.otu_table,\
        #                        self.lineages
        self.output_dir = '/tmp/'

        otu_table_vals = array([[0, 0], [1, 5]])
        #{(0,0):0.0,(1,0):1.0, (1,1):5.0}

        self.otu_table = table_factory(otu_table_vals, ['Sample1', 'Sample2'],
                                       ['OTU1', 'OTU2'], [None, None], [{
                                           "taxonomy": ["Bacteria"]
                                       }, {
                                           "taxonomy": ["Archaea"]
                                       }])

        filt_otu_table_vals = array([[1, 5]])
        #{(0,0):1.0, (0,1):5.0}

        self.filt_otu_table = table_factory(filt_otu_table_vals,
                                            ['Sample1', 'Sample2'], ['OTU2'],
                                            [None, None], [{
                                                "taxonomy": ["Archaea"]
                                            }])

        self.num_otu_hits = 5
        self._folders_to_cleanup = []
    def setUp(self):
        """define some top-level data"""

        #self.col_header=['Sample1', 'Sample2']
        #self.row_header=['OTU1','OTU2']
        #self.otu_table=array([[0,0],[1,5]])
        #self.lineages=[['Bacteria'],['Archaea']]

        #self.data={}
        #self.data['otu_counts']=self.col_header,self.row_header,self.otu_table,\
        #                        self.lineages
        self.output_dir='/tmp/'
        
        otu_table_vals = array([[0,0],[1,5]])
        #{(0,0):0.0,(1,0):1.0, (1,1):5.0}

        self.otu_table = table_factory(otu_table_vals,
                                        ['Sample1', 'Sample2'],
                                        ['OTU1', 'OTU2'],
                                        [None, None],
                                        [{"taxonomy": ["Bacteria"]},
                                         {"taxonomy": ["Archaea"]}])

        filt_otu_table_vals = array([[1,5]])
        #{(0,0):1.0, (0,1):5.0}
        
        self.filt_otu_table = table_factory(filt_otu_table_vals,
                                             ['Sample1', 'Sample2'],
                                             ['OTU2'],
                                             [None, None],
                                             [{"taxonomy": ["Archaea"]}])

        self.num_otu_hits=5
        self._folders_to_cleanup=[]
Exemple #3
0
    def test_get_log_transform(self):
        orig_data = array([[0, 1, 2], [1000, 0, 0]])

        orig_otu_table = table_factory(orig_data,
                                       ['Sample1', 'Sample2', 'Sample3'],
                                       ['OTU1', 'OTU2'], [None, None, None], [{
                                           "taxonomy": ["Bacteria"]
                                       }, {
                                           "taxonomy": ["Archaea"]
                                       }])

        exp_data = array([[0, 0.69314718, 1.38629436], [7.60090246, 0, 0]])
        exp_otu_table = table_factory(exp_data,
                                      ['Sample1', 'Sample2', 'Sample3'],
                                      ['OTU1', 'OTU2'], [None, None, None], [{
                                          "taxonomy": ["Bacteria"]
                                      }, {
                                          "taxonomy": ["Archaea"]
                                      }])

        log_otu_table = get_log_transform(orig_otu_table, eps=None)

        # comparing directly log_otu_table against exp_otu_table doesn't work,
        #  needs to be modified in the otu table object
        self.assertFloatEqual(list(log_otu_table.iterSampleData()),
                              list(exp_otu_table.iterSampleData()))
Exemple #4
0
def parse_biom_taxon_table(json_table, constructor=None, data_pump=None):
    """Parse a biom taxon table

    Constructor must have a _biom_type of "taxon table"
    """
    mat_type = json_table['matrix_type']
    table_type = 'taxon table'
    constructors = [SparseTaxonTable, DenseTaxonTable]
    constructor = pick_constructor(mat_type,table_type,constructor,constructors)

    sample_ids = [col['id'] for col in json_table['columns']]
    sample_metadata = [col['metadata'] for col in json_table['columns']]
    obs_ids = [row['id'] for row in json_table['rows']]
    obs_metadata = [row['metadata'] for row in json_table['rows']]
    dtype = MATRIX_ELEMENT_TYPE[json_table['matrix_element_type']]

    if data_pump is None:
        table_obj = table_factory(json_table['data'], sample_ids, obs_ids, 
                                  sample_metadata, obs_metadata, 
                                  constructor=constructor,
                                  shape=json_table['shape'],
                                  dtype=dtype)
    else:
        table_obj = table_factory(data_pump, sample_ids, obs_ids, 
                                  sample_metadata, obs_metadata, 
                                  constructor=constructor,
                                  shape=json_table['shape'],
                                  dtype=dtype)

    return table_obj
 def test_verify_subset(self):
     metadata = [('a','other stuff\tfoo'), ('b', 'asdasdasd'), 
                 ('c','123123123')]
     table = table_factory(array([[1,2,3],[4,5,6]]), ['a','b','c'], ['x','y'])
     self.assertTrue(verify_subset(table, metadata))
     table = table_factory(array([[1,2],[3,4]]), ['a','b'], ['x','y'])
     self.assertTrue(verify_subset(table, metadata))
     table = table_factory(array([[1,2,3],[4,5,6]]), ['a','b','x'], ['x','y'])
     self.assertFalse(verify_subset(table, metadata))
Exemple #6
0
 def test_verify_subset(self):
     metadata = [('a','other stuff\tfoo'), ('b', 'asdasdasd'), 
                 ('c','123123123')]
     table = table_factory(array([[1,2,3],[4,5,6]]), ['a','b','c'], ['x','y'])
     self.assertTrue(verify_subset(table, metadata))
     table = table_factory(array([[1,2],[3,4]]), ['a','b'], ['x','y'])
     self.assertTrue(verify_subset(table, metadata))
     table = table_factory(array([[1,2,3],[4,5,6]]), ['a','b','x'], ['x','y'])
     self.assertFalse(verify_subset(table, metadata))
    def setUp(self):
        """Define some test data."""
        self.qiime_config = load_qiime_config()
        self.dirs_to_remove = []

        self.tmp_dir = self.qiime_config['temp_dir'] or '/tmp/'
        if not exists(self.tmp_dir):
            makedirs(self.tmp_dir)
            # if test creates the temp dir, also remove it
            self.dirs_to_remove.append(self.tmp_dir)

        self.otu_table1 = table_factory(data=array([[2, 0, 0, 1],
                                                   [1, 1, 1, 1],
                                                   [0, 0, 0, 0]]).T,
                                        sample_ids=list('XYZ'),
                                        observation_ids=list('abcd'),
                                        constructor=DenseOTUTable)
        fd, self.otu_table1_fp = mkstemp(dir=self.tmp_dir,
                                              prefix='alpha_diversity_tests',
                                              suffix='.biom')
        close(fd)
        open(self.otu_table1_fp, 'w').write(
            format_biom_table(self.otu_table1))

        self.otu_table2 = table_factory(data=array([[2, 0, 0, 1],
                                                   [1, 1, 1, 1],
                                                   [0, 0, 0, 0]]).T,
                                        sample_ids=list('XYZ'),
                                        observation_ids=['a', 'b', 'c', 'd_'],
                                        constructor=DenseOTUTable)
        fd, self.otu_table2_fp = mkstemp(dir=self.tmp_dir,
                                              prefix='alpha_diversity_tests',
                                              suffix='.biom')
        close(fd)
        open(self.otu_table2_fp, 'w').write(
            format_biom_table(self.otu_table2))

        self.single_sample_otu_table = table_factory(
            data=array([[2, 0, 0, 1]]).T,
            sample_ids=list('X'),
            observation_ids=list(
                'abcd'),
            constructor=DenseOTUTable)
        fd, self.single_sample_otu_table_fp = mkstemp(
            dir=self.tmp_dir,
            prefix='alpha_diversity_tests',
            suffix='.biom')
        close(fd)
        open(self.single_sample_otu_table_fp, 'w').write(
            format_biom_table(self.single_sample_otu_table))

        self.tree1 = parse_newick('((a:2,b:3):2,(c:1,d:2):7);')
        self.tree2 = parse_newick("((a:2,'b':3):2,(c:1,'d_':2):7);")

        self.files_to_remove = [self.otu_table1_fp, self.otu_table2_fp,
                                self.single_sample_otu_table_fp]
Exemple #8
0
    def setUp(self):
        """Define some test data."""
        self.qiime_config = load_qiime_config()
        self.dirs_to_remove = []

        self.tmp_dir = self.qiime_config['temp_dir'] or '/tmp/'
        if not exists(self.tmp_dir):
            makedirs(self.tmp_dir)
            # if test creates the temp dir, also remove it
            self.dirs_to_remove.append(self.tmp_dir)

        self.otu_table1 = table_factory(data=array([[2, 0, 0, 1], [1, 1, 1, 1],
                                                    [0, 0, 0, 0]]).T,
                                        sample_ids=list('XYZ'),
                                        observation_ids=list('abcd'),
                                        constructor=DenseOTUTable)
        self.otu_table1_fp = get_tmp_filename(tmp_dir=self.tmp_dir,
                                              prefix='alpha_diversity_tests',
                                              suffix='.biom',
                                              result_constructor=str)
        open(self.otu_table1_fp,'w').write(\
         format_biom_table(self.otu_table1))

        self.otu_table2 = table_factory(data=array([[2, 0, 0, 1], [1, 1, 1, 1],
                                                    [0, 0, 0, 0]]).T,
                                        sample_ids=list('XYZ'),
                                        observation_ids=['a', 'b', 'c', 'd_'],
                                        constructor=DenseOTUTable)
        self.otu_table2_fp = get_tmp_filename(tmp_dir=self.tmp_dir,
                                              prefix='alpha_diversity_tests',
                                              suffix='.biom',
                                              result_constructor=str)
        open(self.otu_table2_fp,'w').write(\
         format_biom_table(self.otu_table2))

        self.single_sample_otu_table = table_factory(
            data=array([[2, 0, 0, 1]]).T,
            sample_ids=list('X'),
            observation_ids=list('abcd'),
            constructor=DenseOTUTable)
        self.single_sample_otu_table_fp = get_tmp_filename(
            tmp_dir=self.tmp_dir,
            prefix='alpha_diversity_tests',
            suffix='.biom',
            result_constructor=str)
        open(self.single_sample_otu_table_fp,'w').write(\
         format_biom_table(self.single_sample_otu_table))

        self.tree1 = parse_newick('((a:2,b:3):2,(c:1,d:2):7);')
        self.tree2 = parse_newick("((a:2,'b':3):2,(c:1,'d_':2):7);")

        self.files_to_remove = [
            self.otu_table1_fp, self.otu_table2_fp,
            self.single_sample_otu_table_fp
        ]
Exemple #9
0
    def setUp(self):
        """define some top-level data"""

        self.otu_table_values = array([[0, 0, 9, 5, 3, 1], [1, 5, 4, 0, 3, 2],
                                       [2, 3, 1, 1, 2, 5]])
        {
            (0, 2): 9.0,
            (0, 3): 5.0,
            (0, 4): 3.0,
            (0, 5): 1.0,
            (1, 0): 1.0,
            (1, 1): 5.0,
            (1, 2): 4.0,
            (1, 4): 3.0,
            (1, 5): 2.0,
            (2, 0): 2.0,
            (2, 1): 3.0,
            (2, 2): 1.0,
            (2, 3): 1.0,
            (2, 4): 2.0,
            (2, 5): 5.0
        }
        self.otu_table = table_factory(
            self.otu_table_values,
            ['Sample1', 'Sample2', 'Sample3', 'Sample4', 'Sample5', 'Sample6'],
            ['OTU1', 'OTU2', 'OTU3'], [None, None, None, None, None, None], [{
                "taxonomy": ['Bacteria']
            }, {
                "taxonomy": ['Archaea']
            }, {
                "taxonomy": ['Streptococcus']
            }])
        self.otu_table_f = table_factory(
            self.otu_table_values,
            ['Sample1', 'Sample2', 'Sample3', 'Sample4', 'Sample5', 'Sample6'],
            ['OTU1', 'OTU2', 'OTU3'], [None, None, None, None, None, None], [{
                "taxonomy": ['1A', '1B', '1C', 'Bacteria']
            }, {
                "taxonomy": ['2A', '2B', '2C', 'Archaea']
            }, {
                "taxonomy": ['3A', '3B', '3C', 'Streptococcus']
            }])

        self.full_lineages = [['1A', '1B', '1C', 'Bacteria'],
                              ['2A', '2B', '2C', 'Archaea'],
                              ['3A', '3B', '3C', 'Streptococcus']]
        self.metadata = [[['Sample1', 'NA', 'A'], ['Sample2', 'NA', 'B'],
                          ['Sample3', 'NA', 'A'], ['Sample4', 'NA', 'B'],
                          ['Sample5', 'NA', 'A'], ['Sample6', 'NA', 'B']],
                         ['SampleID', 'CAT1', 'CAT2'], []]
        self.tree_text = ["('OTU3',('OTU1','OTU2'))"]
        fh, self.tmp_heatmap_fpath = mkstemp(prefix='test_heatmap_',
                                             suffix='.pdf')
        close(fh)
Exemple #10
0
    def test_sample_mapping_to_biom_table(self):
        """sample_mapping_to_biom_table works"""
        lines = self.SampleMapping
        actual = sample_mapping_to_biom_table(lines)
        exp = table_factory(array([[3., 0., 2.], [1., 2., 0.]]),
                            ['sample1', 'sample2', 'sample3'],
                            ['OTU1', 'OTU2'])
        self.assertEqual(actual.sortBySampleId(), exp.sortBySampleId())

        lines = self.SampleMappingNoMIENS
        actual = sample_mapping_to_biom_table(lines)
        exp = table_factory(array([[3., 0., 2.], [1., 2., 0.]]),
                            ['sample.1', 'sample.2', 'sample.3'],
                            ['OTU1', 'OTU2'])
        self.assertEqual(actual.sortBySampleId(), exp.sortBySampleId())
    def test_sample_mapping_to_biom_table(self):
        """sample_mapping_to_biom_table works"""
        lines = self.SampleMapping
        actual = sample_mapping_to_biom_table(lines)
        exp = table_factory(array([[3.,0.,2.],[1.,2.,0.]]),
                            ['sample1','sample2','sample3'],
                            ['OTU1','OTU2'])
        self.assertEqual(actual.sortBySampleId(), exp.sortBySampleId())

        lines = self.SampleMappingNoMIENS
        actual = sample_mapping_to_biom_table(lines)
        exp = table_factory(array([[3.,0.,2.],[1.,2.,0.]]),
                            ['sample.1','sample.2','sample.3'],
                            ['OTU1','OTU2'])
        self.assertEqual(actual.sortBySampleId(), exp.sortBySampleId())
Exemple #12
0
    def setUp(self):

        self.qiime_config = load_qiime_config()
        self.tmp_dir = self.qiime_config['temp_dir'] or '/tmp/'

        self.otu_table_data = numpy.array([[2, 1, 0], [0, 5, 0], [0, 3, 0],
                                           [1, 2, 0]])
        self.sample_names = list('YXZ')
        self.taxon_names = list('bacd')
        self.otu_metadata = [{
            'domain': 'Archaea'
        }, {
            'domain': 'Bacteria'
        }, {
            'domain': 'Bacteria'
        }, {
            'domain': 'Bacteria'
        }]

        self.otu_table = table_factory(self.otu_table_data, self.sample_names,
                                       self.taxon_names)
        self.otu_table_meta = table_factory(
            self.otu_table_data,
            self.sample_names,
            self.taxon_names,
            observation_metadata=self.otu_metadata)

        self.otu_table_str = format_biom_table(self.otu_table)
        self.otu_table_meta_str = format_biom_table(self.otu_table_meta)

        _, self.otu_table_fp = mkstemp(dir=self.tmp_dir,
                                       prefix='test_rarefaction',
                                       suffix='.biom')
        close(_)
        _, self.otu_table_meta_fp = mkstemp(dir=self.tmp_dir,
                                            prefix='test_rarefaction',
                                            suffix='.biom')
        close(_)

        self.rare_dir = mkdtemp(dir=self.tmp_dir,
                                prefix='test_rarefaction_dir',
                                suffix='')

        open(self.otu_table_fp, 'w').write(self.otu_table_str)
        open(self.otu_table_meta_fp, 'w').write(self.otu_table_meta_str)

        self._paths_to_clean_up = [self.otu_table_fp, self.otu_table_meta_fp]
        self._dirs_to_clean_up = [self.rare_dir]
def sample_mapping_to_biom_table(lines):
    """Converts the UniFrac sample mapping file to biom table object
    
    The sample mapping file is a required input for the UniFrac web interface.

    Corrects the sample ids to be MIENS compliant
    """
    trans_table = build_sample_ids_transtable()

    data = []
    sample_ids = []
    observation_ids = []
    for line in lines:
        fields = line.strip().split()
        observation_id = fields[0]
        sample_id = fields[1].translate(trans_table)
        count = float(fields[2])

        try:
            sample_idx = sample_ids.index(sample_id)
        except ValueError:
            sample_idx = len(sample_ids)
            sample_ids.append(sample_id)
        try:
            observation_idx = observation_ids.index(observation_id)
        except ValueError:
            observation_idx = len(observation_ids)
            observation_ids.append(observation_id)

        data.append([observation_idx, sample_idx, count])

    return table_factory(data, sample_ids, observation_ids)
def table_from_template(new_data,sample_ids,observation_ids,\
    sample_metadata_source=None,observation_metadata_source=None,\
    constructor=SparseGeneTable,verbose=False):
    """Build a new BIOM table from new_data, and transfer metadata from 1-2 existing tables"""

    #Build the BIOM table
    result_table =  table_factory(new_data,sample_ids,observation_ids,\
      constructor=SparseGeneTable)
    
    
    #Transfer sample metadata from the OTU table
    #to the metagenome table (samples are the same)
    if sample_metadata_source:
        result_table = transfer_metadata(sample_metadata_source,result_table,\
          donor_metadata_type='SampleMetadata',\
          recipient_metadata_type='SampleMetadata',verbose=verbose)
    
    #Now transfer observation metadata (e.g. gene metadata) 
    #from the genome table to the result table
    if observation_metadata_source:
        result_table = transfer_metadata(observation_metadata_source,\
          result_table,donor_metadata_type='ObservationMetadata',\
          recipient_metadata_type='ObservationMetadata',verbose=verbose)
    
    return result_table
Exemple #15
0
    def setUp(self):
        self.otu_table_vals = array([[1,0,2,4],
                               [1,2,0,1],
                               [0,1,1,0],
                               [1,2,1,0]])
        
        {(0, 0):1.0, (0, 2):2.0, (0, 3):4.0,
                               (1, 0):1.0, (1, 1):2.0, (1, 3):1.0,
                               (2, 1):1.0, (2, 2):1.0, (3, 0):1.0,
                               (3, 1): 2.0, (3, 2):1.0}

        self.otu_table = table_factory(self.otu_table_vals,
                                        ['s1', 's2', 's3', 's4'],
                                        ['0', '1', '2', '3'],
                                        None,
                                        [{"taxonomy": ["Root", "Bacteria", "Actinobacteria", "Actinobacteria", "Coriobacteridae", "Coriobacteriales", "Coriobacterineae", "Coriobacteriaceae"]},
                                         {"taxonomy": ["Root", "Bacteria", "Firmicutes", "\"Clostridia\""]},
                                         {"taxonomy": ["Root", "Bacteria", "Firmicutes", "\"Clostridia\""]},
                                         {"taxonomy": ["Root", "Bacteria"]}])

#        self.otu_table="""#Full OTU Counts
##OTU ID\ts1\ts2\ts3\ts4\tConsensus Lineage
#0\t1\t0\t2\t4\tRoot;Bacteria;Actinobacteria;Actinobacteria;Coriobacteridae;Coriobacteriales;Coriobacterineae;Coriobacteriaceae
#1\t1\t2\t0\t1\tRoot;Bacteria;Firmicutes;"Clostridia"
#2\t0\t1\t1\t0\tRoot;Bacteria;Firmicutes;"Clostridia"
#3\t1\t2\t1\t0\tRoot;Bacteria""".split('\n')

        self.mapping="""#SampleID\tBarcodeSequence\tTreatment\tDescription
#Test mapping file
s1\tAAAA\tControl\tControl mouse, I.D. 354
s2\tGGGG\tControl\tControl mouse, I.D. 355
s3\tCCCC\tExp\tDisease mouse, I.D. 356
s4\tTTTT\tExp\tDisease mouse, I.D. 357""".split('\n')
Exemple #16
0
    def setUp(self):
        """Set up files/environment that will be used by the tests."""
        # The prefix to use for temporary files. This prefix may be added to,
        # but all temp dirs and files created by the tests will have this
        # prefix at a minimum.
        self.prefix = 'most_wanted_otus_tests_'
        self.files_to_remove = []
        self.dirs_to_remove = []

        self.output_dir = mkdtemp(prefix='%soutput_dir_' % self.prefix)
        self.dirs_to_remove.append(self.output_dir)

        self.grouping_category = 'Environment'
        self.top_n = 100

        self.blast_results_lines = blast_results.split('\n')
        self.blast_results_dupes_lines = blast_results_dupes.split('\n')
        self.rep_set_lines = rep_set.split('\n')
        self.top_n_mw = [('a', 'gi|7|emb|T51700.1|', 87.0),
                         ('b', 'gi|8|emb|Z700.1|', 89.5)]
        self.mw_seqs = {'b':'AAGGTT', 'a':'AGT'}
        self.master_otu_table_ms = table_factory(
                array([[1.0, 2.0], [2.0, 5.0]]), ['Env1', 'Env2'], ['a', 'b'],
                sample_metadata=None,
                observation_metadata=[{'taxonomy':'foo;bar;baz'},
                {'taxonomy':'foo;baz;bar'}], table_id=None,
                constructor=SparseOTUTable)
Exemple #17
0
def parse_classic_table_to_rich_table(lines, sample_mapping, obs_mapping, process_func, **kwargs):
    """Parses an table (tab delimited) (observation x sample)

    sample_mapping : can be None or {'sample_id':something}
    obs_mapping : can be none or {'observation_id':something}
    """
    sample_ids, obs_ids, data, t_md, t_md_name = parse_classic_table(lines, **kwargs)

    # if we have it, keep it
    if t_md is None:
        obs_metadata = None
    else:
        obs_metadata = [{t_md_name: process_func(v)} for v in t_md]

    if sample_mapping is None:
        sample_metadata = None
    else:
        sample_metadata = [sample_mapping[sample_id] for sample_id in sample_ids]

    # will override any metadata from parsed table
    if obs_mapping is not None:
        obs_metadata = [obs_mapping[obs_id] for obs_id in obs_ids]

    data = nparray_to_sparseobj(data)

    return table_factory(data, sample_ids, obs_ids, sample_metadata, obs_metadata)
Exemple #18
0
def combine_tables(tables):
    """Combines multiple biom tables into a signle table, discarding any
    non-shared OTUs.
    """
    samples = [sample for sample_list in [table.SampleIds for table in tables]
               for sample in sample_list]
    duplicate_sample_indices = [
        index for indices in [[index for index, value in indices][1:]
                              for sample, indices
                              in groupby(sorted(enumerate(samples),
                                                key=lambda x: x[1]),
                                         lambda x: x[1])]
        for index in indices]
    otu_data = dict()
    for table in tables:
        for vals, otu, md in table.iterObservations():
            if otu_data.get(otu) is not None:
                otu_data[otu] = append(otu_data[otu], vals)
            else:
                otu_data[otu] = vals
    otus = [otu for otu in otu_data if len(otu_data[otu]) == len(samples)]
    if not otus:
        raise ValueError('No shared OTUs')
    data = [array([v for i, v in enumerate(otu_data[otu])
                   if i not in duplicate_sample_indices]) for otu in otus]
    samples = [v for i, v in enumerate(samples)
               if i not in duplicate_sample_indices]

    return table_factory(data, samples, otus, constructor=SparseOTUTable)
Exemple #19
0
    def setUp(self):
        """Set up files/environment that will be used by the tests."""
        # The prefix to use for temporary files. This prefix may be added to,
        # but all temp dirs and files created by the tests will have this
        # prefix at a minimum.
        self.prefix = 'most_wanted_otus_tests_'
        self.files_to_remove = []
        self.dirs_to_remove = []

        self.output_dir = mkdtemp(prefix='%soutput_dir_' % self.prefix)
        self.dirs_to_remove.append(self.output_dir)

        self.grouping_category = 'Environment'
        self.top_n = 100

        self.blast_results_lines = blast_results.split('\n')
        self.blast_results_dupes_lines = blast_results_dupes.split('\n')
        self.rep_set_lines = rep_set.split('\n')
        self.top_n_mw = [('a', 'gi|7|emb|T51700.1|', 87.0),
                         ('b', 'gi|8|emb|Z700.1|', 89.5)]
        self.mw_seqs = {'b': 'AAGGTT', 'a': 'AGT'}
        self.master_otu_table_ms = table_factory(array([[1.0, 2.0], [2.0,
                                                                     5.0]]),
                                                 ['Env1', 'Env2'], ['a', 'b'],
                                                 sample_metadata=None,
                                                 observation_metadata=[{
                                                     'taxonomy':
                                                     'foo;bar;baz'
                                                 }, {
                                                     'taxonomy':
                                                     'foo;baz;bar'
                                                 }],
                                                 table_id=None,
                                                 constructor=SparseOTUTable)
def make_new_otu_counts(otu_table, sample_to_subtract, samples_from_subject):
    """make the converted otu table
    """
    new_sample_ids = sample_to_subtract.keys()
    new_sample_ids.sort()
    new_otu_counts = zeros([len(otu_table.ObservationIds),
                            len(new_sample_ids)])
    for index1, otu in enumerate(otu_table.ObservationIds):
        for index2, sample in enumerate(new_sample_ids):
            tpz_sample = sample_to_subtract[sample]
            if tpz_sample in otu_table.SampleIds:
                tpz_sample_index = otu_table.SampleIds.index(tpz_sample)
            else:
                raise ValueError("There are samples in the category mapping file that are not in the otu table, such as sample: " + tpz_sample + ". Removing these samples from the category mapping file will allow you to proceed.")
            #get the new count as the relative abundance of the otu at
            #the later timepoint minus the relative abundance at timepoint zero
            old_sample_index = otu_table.SampleIds.index(sample)
            new_count = otu_table[index1, old_sample_index] - \
                otu_table[index1, tpz_sample_index]
            #make sure that the count is not zero across all of the subject's
            #samples
            has_nonzeros = False
            subject_sample_ids = samples_from_subject[sample]
            for i in subject_sample_ids:
                sample_index = otu_table.SampleIds.index(i)
                if otu_table[index1, sample_index] > 0:
                    has_nonzeros = True
            if has_nonzeros:
                new_otu_counts[index1, index2] = new_count
            else:
                new_otu_counts[index1, index2] = 999999999
    return table_factory(new_otu_counts, new_sample_ids,
                         otu_table.ObservationIds,
                         observation_metadata=otu_table.ObservationMetadata)
def merge_otu_tables(vcf_fps):
    """Takes a list of multiple vcf files and returns a single biom table of all files."""
    master_table = None
    #open all of the files with correct extensions. Raise a value error if incorrect extension
    master_observation_ids = None
    for vcf_fp in vcf_fps:
        if vcf_fp.endswith('gz'):
            vcf_fp = gzip.open(vcf_fp)
        elif vcf_fp.endswith('vcf'):
            vcf_fp = open(vcf_fp, 'U')
        else:
            raise ValueError, "Invalid file format or extension, only '.vcf' or '.vcf.gz'\
            are accepted"
        data, sample_ids, observation_ids, sample_md, observation_md =\
        create_biom_table(vcf_fp)
        if master_observation_ids is None:
            master_observation_ids = observation_ids
        else:
            master_observation_ids = set(master_observation_ids) & set(observation_ids)
        biom_table = table_factory(data, 
                                   sample_ids, 
                                   observation_ids,
                                   sample_md, 
                                   observation_md,
                                   constructor=SparseOTUTable)
        if master_table is None:
            master_table = biom_table
        else:
            master_table.merge(biom_table)  
#         try:
#             master_table = master_table.merge(biom_table)
#         except AttributeError:
#             master_table = biom_table
    return master_table, observation_ids
Exemple #22
0
def format_summarize_taxa(summary,
                          header,
                          delimiter=';',
                          file_format='classic'):
    """Formats a summarized taxonomy table for output"""
    if file_format == 'classic':
        yield "%s\n" % '\t'.join(header)
        for row in summary:
            # taxon is tuple, join together for foo;bar;foobar
            taxon = row[0]
            line = [delimiter.join(taxon)]

            # add on otu counts
            line.extend(map(str, row[1:]))

            yield "%s\n" % '\t'.join(line)
    elif file_format == 'biom':
        # Skip 'Taxon' or 'SampleId' label in first column.
        sample_ids = header[1:]

        observation_ids = []
        data = []
        for row in summary:
            # Join taxonomic levels to create an observation ID.
            observation_ids.append(delimiter.join(row[0]))
            data.append(row[1:])

        table = table_factory(asarray(data),
                              sample_ids,
                              observation_ids,
                              constructor=SparseTaxonTable)
        yield format_biom_table(table)
    else:
        raise ValueError("Invalid file format '%s'. Must be either 'classic' "
                         "or 'biom'." % file_format)
def sample_mapping_to_biom_table(lines):
    """Converts the UniFrac sample mapping file to biom table object
    
    The sample mapping file is a required input for the UniFrac web interface.

    Corrects the sample ids to be MIENS compliant
    """
    trans_table = build_sample_ids_transtable()

    data = []
    sample_ids = []
    observation_ids = []
    for line in lines:
        fields = line.strip().split()
        observation_id = fields[0]
        sample_id = fields[1].translate(trans_table)
        count = float(fields[2])

        try:
            sample_idx = sample_ids.index(sample_id)
        except ValueError:
            sample_idx = len(sample_ids)
            sample_ids.append(sample_id)
        try:
            observation_idx = observation_ids.index(observation_id)
        except ValueError:
            observation_idx = len(observation_ids)
            observation_ids.append(observation_id)

        data.append([observation_idx, sample_idx, count])

    return table_factory(data, sample_ids, observation_ids)
Exemple #24
0
def format_summarize_taxa(summary, header, delimiter=';',
                          file_format='classic'):
    """Formats a summarized taxonomy table for output"""
    if file_format == 'classic':
        yield "%s\n" % '\t'.join(header)
        for row in summary:
            # taxon is tuple, join together for foo;bar;foobar
            taxon = row[0]
            line = [delimiter.join(taxon)]

            # add on otu counts
            line.extend(map(str, row[1:]))

            yield "%s\n" % '\t'.join(line)
    elif file_format == 'biom':
        # Skip 'Taxon' or 'SampleId' label in first column.
        sample_ids = header[1:]

        observation_ids = []
        data = []
        for row in summary:
            # Join taxonomic levels to create an observation ID.
            observation_ids.append(delimiter.join(row[0]))
            data.append(row[1:])

        table = table_factory(asarray(data), sample_ids, observation_ids,
                              constructor=SparseTaxonTable)
        yield format_biom_table(table)
    else:
        raise ValueError("Invalid file format '%s'. Must be either 'classic' "
                         "or 'biom'." % file_format)
def make_new_otu_counts(otu_table, sample_to_subtract, samples_from_subject):
    """make the converted otu table
    """
    new_sample_ids = sample_to_subtract.keys()
    new_sample_ids.sort()
    new_otu_counts = zeros([len(otu_table.ObservationIds),
                            len(new_sample_ids)])
    for index1, otu in enumerate(otu_table.ObservationIds):
        for index2, sample in enumerate(new_sample_ids):
            tpz_sample = sample_to_subtract[sample]
            if tpz_sample in otu_table.SampleIds:
                tpz_sample_index = otu_table.SampleIds.index(tpz_sample)
            else:
                raise ValueError("There are samples in the category mapping file that are not in the otu table, such as sample: " + tpz_sample + ". Removing these samples from the category mapping file will allow you to proceed.")
            #get the new count as the relative abundance of the otu at
            #the later timepoint minus the relative abundance at timepoint zero
            old_sample_index = otu_table.SampleIds.index(sample)
            new_count = otu_table[index1, old_sample_index] - \
                otu_table[index1, tpz_sample_index]
            #make sure that the count is not zero across all of the subject's
            #samples
            has_nonzeros = False
            subject_sample_ids = samples_from_subject[sample]
            for i in subject_sample_ids:
                sample_index = otu_table.SampleIds.index(i)
                if otu_table[index1, sample_index] > 0:
                    has_nonzeros = True
            if has_nonzeros:
                new_otu_counts[index1, index2] = new_count
            else:
                new_otu_counts[index1, index2] = 999999999
    return table_factory(new_otu_counts, new_sample_ids,
                         otu_table.ObservationIds,
                         observation_metadata=otu_table.ObservationMetadata)
Exemple #26
0
def simsam_range(table,
                 tree,
                 simulated_sample_sizes,
                 dissimilarities,
                 mapping_f=None):
    """Applies sim_otu_table over a range of parameters
    
     table: the input table to simulate samples from
     tree: tree related OTUs in input table
     simulated_sample_sizes: a list of ints defining how many
      output samples should be create per input sample
     dissimilarities: a list of floats containing the 
      dissimilarities to use in simulating tables
     mapping_f: file handle for metadata mapping file, if 
      a mapping file should be created with the samples from 
      each simulated table
     
     This function will yield tuples with the following form:
      (output table, output mapping lines, simulated_sample_size, dissimilarity)
     
     If the user does not provide mapping_f, the tuples will look like:
      (output table, None, simulated_sample_size, dissimilarity)
    
    """
    if mapping_f != None:
        # if the user provided a mapping file, load it into
        # a list for repeated use, and define the function for
        # processing the mapping file
        mapping_lines = list(mapping_f)
        process_map = create_replicated_mapping_file
    else:
        # otherwise create a dummy function for processing the
        # mapping file so we don't have to check whether it 
        # exists on every iteration
        mapping_lines = None
        def process_map(mapping_lines, simulated_sample_size, sample_ids):
            return None
    
    for simulated_sample_size in simulated_sample_sizes:
        # create the output mapping file data
        output_mapping_lines = \
         process_map(mapping_lines, simulated_sample_size, table.SampleIds)
        for dissimilarity in dissimilarities:
            # create the simulated otu table
            output_sample_ids, output_otu_ids, output_data, output_metadata = \
             sim_otu_table(table.SampleIds,
                           table.ObservationIds,
                           table.iterSamples(),
                           table.ObservationMetadata,
                           tree,
                           simulated_sample_size,
                           dissimilarity)
            output_table = table_factory(output_data,
                                         output_sample_ids,
                                         output_otu_ids,
                                         observation_metadata=output_metadata)
            yield (output_table,
                   output_mapping_lines,
                   simulated_sample_size,
                   dissimilarity)
Exemple #27
0
def make_otu_table(otu_map_f,
                   otu_to_taxonomy=None,
                   delim='_',
                   table_id=None,
                   sample_metadata=None,
                   constructor=SparseOTUTable):
    
    data, sample_ids, otu_ids = parse_otu_map(otu_map_f,delim)
    
    if otu_to_taxonomy != None:
        otu_metadata = []
        for o in otu_ids:
            try:
                otu_metadata.append({'taxonomy':otu_to_taxonomy[o].split(';')})
            except KeyError:
                otu_metadata.append({'taxonomy':["None"]})
    else: 
        otu_metadata = None
    
    if sample_metadata != None:
        raise NotImplementedError,\
         "Passing of sample metadata to make_otu_table is not currently supported."
    try:
        otu_table = table_factory(data, sample_ids, otu_ids, 
                                  sample_metadata=sample_metadata, 
                                  observation_metadata=otu_metadata, 
                                  table_id=table_id, 
                                  constructor=constructor,
                                  dtype=int)
    except ValueError,e:
        raise ValueError,\
         ("Couldn't create OTU table. Is your OTU map empty?"
          " Original error message: %s" % (str(e)))
def predict_metagenomes(otu_table,genome_table,verbose=False):
    """ predict metagenomes from otu table and genome table 
    """
    
    otu_data,genome_data,overlapping_otus = extract_otu_and_genome_data(otu_table,genome_table)
    # matrix multiplication to get the predicted metagenomes
    new_data = dot(array(otu_data).T,array(genome_data)).T
    
    #Round counts to nearest whole numbers
    new_data = around(new_data)
    
    # return the result as a sparse biom table - the sample ids are now the 
    # sample ids from the otu table, and the observation ids are now the 
    # functions (i.e., observations) from the genome table


    result_table =  table_factory(new_data,otu_table.SampleIds,genome_table.ObservationIds,constructor=SparseGeneTable)

    #We need to preserve metadata about the samples from the OTU table, 
    #and metadata about the gene functions from the genome table
    
    #Transfer sample metadata from the OTU table
    #to the metagenome table (samples are the same)
    result_table = transfer_metadata(otu_table,result_table,\
      donor_metadata_type='SampleMetadata',\
      recipient_metadata_type='SampleMetadata',verbose=verbose)
    
    #Now transfer observation metadata (e.g. gene metadata) 
    #from the genome table to the result table
    result_table = transfer_metadata(genome_table,result_table,\
      donor_metadata_type='ObservationMetadata',\
      recipient_metadata_type='ObservationMetadata',verbose=verbose)
    

    return result_table
Exemple #29
0
def simsam_range(table,
                 tree,
                 simulated_sample_sizes,
                 dissimilarities,
                 mapping_f=None):
    """Applies sim_otu_table over a range of parameters
    
     table: the input table to simulate samples from
     tree: tree related OTUs in input table
     simulated_sample_sizes: a list of ints defining how many
      output samples should be create per input sample
     dissimilarities: a list of floats containing the 
      dissimilarities to use in simulating tables
     mapping_f: file handle for metadata mapping file, if 
      a mapping file should be created with the samples from 
      each simulated table
     
     This function will yield tuples with the following form:
      (output table, output mapping lines, simulated_sample_size, dissimilarity)
     
     If the user does not provide mapping_f, the tuples will look like:
      (output table, None, simulated_sample_size, dissimilarity)
    
    """
    if mapping_f != None:
        # if the user provided a mapping file, load it into
        # a list for repeated use, and define the function for
        # processing the mapping file
        mapping_lines = list(mapping_f)
        process_map = create_replicated_mapping_file
    else:
        # otherwise create a dummy function for processing the
        # mapping file so we don't have to check whether it
        # exists on every iteration
        mapping_lines = None

        def process_map(mapping_lines, simulated_sample_size, sample_ids):
            return None

    for simulated_sample_size in simulated_sample_sizes:
        # create the output mapping file data
        output_mapping_lines = \
         process_map(mapping_lines, simulated_sample_size, table.SampleIds)
        for dissimilarity in dissimilarities:
            # create the simulated otu table
            output_sample_ids, output_otu_ids, output_data, output_metadata = \
             sim_otu_table(table.SampleIds,
                           table.ObservationIds,
                           table.iterSamples(),
                           table.ObservationMetadata,
                           tree,
                           simulated_sample_size,
                           dissimilarity)
            output_table = table_factory(output_data,
                                         output_sample_ids,
                                         output_otu_ids,
                                         observation_metadata=output_metadata)
            yield (output_table, output_mapping_lines, simulated_sample_size,
                   dissimilarity)
Exemple #30
0
def transpose_biom(table):
    #files must be in dense format
    if not table.__class__.__name__.startswith("Dense"):
        raise ValueError, "Only 'Dense' biom type tables can be compared. Please convert and try again."
    return table_factory(table._data.T,
                         table.ObservationIds,
                         table.SampleIds,
                         constructor=DenseOTUTable)
def main():
    option_parser, opts, args =\
       parse_command_line_parameters(**script_info)

    input_ext=path.splitext(opts.input_otu_fp)[1]
    if opts.input_format_classic:
        otu_table=parse_classic_table_to_rich_table(open(opts.input_otu_fp,'U'),None,None,None,DenseOTUTable)
    else:
        if input_ext != '.biom':
            sys.stderr.write("\nOTU table does not have '.biom' extension! If loading causes error consider using '-f' option to load tab-delimited OTU table!\n\n")
        otu_table = parse_biom_table(open(opts.input_otu_fp,'U'))

    ext=path.splitext(opts.input_count_fp)[1]
    if (ext == '.gz'):
        count_table = parse_biom_table(gzip.open(opts.input_count_fp,'rb'))
    else:
        count_table = parse_biom_table(open(opts.input_count_fp,'U'))
        
    #Need to only keep data relevant to our otu list
    ids=[]
    for x in otu_table.iterObservations():
        ids.append(str(x[1]))

    ob_id=count_table.ObservationIds[0]

    filtered_otus=[]
    filtered_values=[]
    for x in ids:
        if count_table.sampleExists(x):
            filtered_otus.append(x)
            filtered_values.append(otu_table.observationData(x))

    #filtered_values = map(list,zip(*filtered_values))
    filtered_otu_table=table_factory(filtered_values,otu_table.SampleIds,filtered_otus, constructor=DenseOTUTable)

    copy_numbers_filtered={}
    for x in filtered_otus:
        value = count_table.getValueByIds(ob_id,x)
        try:
            #data can be floats so round them and make them integers
            value = int(round(float(value)))
            
        except ValueError:
            raise ValueError,\
                  "Invalid type passed as copy number for OTU ID %s. Must be int-able." % (value)
        if value < 1:
            raise ValueError, "Copy numbers must be greater than or equal to 1."

        copy_numbers_filtered[x]={opts.metadata_identifer:value}
        
    filtered_otu_table.addObservationMetadata(copy_numbers_filtered)
            

    normalized_table = filtered_otu_table.normObservationByMetadata(opts.metadata_identifer)

    make_output_dir_for_file(opts.output_otu_fp)
    open(opts.output_otu_fp,'w').write(\
     normalized_table.getBiomFormatJsonString('PICRUST'))
    def setUp(self):
        """Define some test data."""
        self.qiime_config = load_qiime_config()
        self.dirs_to_remove = []

        self.tmp_dir = self.qiime_config["temp_dir"] or "/tmp/"
        if not exists(self.tmp_dir):
            makedirs(self.tmp_dir)
            # if test creates the temp dir, also remove it
            self.dirs_to_remove.append(self.tmp_dir)

        self.otu_table1 = table_factory(
            data=array([[2, 0, 0, 1], [1, 1, 1, 1], [0, 0, 0, 0]]).T,
            sample_ids=list("XYZ"),
            observation_ids=list("abcd"),
            constructor=DenseOTUTable,
        )
        self.otu_table1_fp = get_tmp_filename(
            tmp_dir=self.tmp_dir, prefix="alpha_diversity_tests", suffix=".biom", result_constructor=str
        )
        open(self.otu_table1_fp, "w").write(format_biom_table(self.otu_table1))

        self.otu_table2 = table_factory(
            data=array([[2, 0, 0, 1], [1, 1, 1, 1], [0, 0, 0, 0]]).T,
            sample_ids=list("XYZ"),
            observation_ids=["a", "b", "c", "d_"],
            constructor=DenseOTUTable,
        )
        self.otu_table2_fp = get_tmp_filename(
            tmp_dir=self.tmp_dir, prefix="alpha_diversity_tests", suffix=".biom", result_constructor=str
        )
        open(self.otu_table2_fp, "w").write(format_biom_table(self.otu_table2))

        self.single_sample_otu_table = table_factory(
            data=array([[2, 0, 0, 1]]).T, sample_ids=list("X"), observation_ids=list("abcd"), constructor=DenseOTUTable
        )
        self.single_sample_otu_table_fp = get_tmp_filename(
            tmp_dir=self.tmp_dir, prefix="alpha_diversity_tests", suffix=".biom", result_constructor=str
        )
        open(self.single_sample_otu_table_fp, "w").write(format_biom_table(self.single_sample_otu_table))

        self.tree1 = parse_newick("((a:2,b:3):2,(c:1,d:2):7);")
        self.tree2 = parse_newick("((a:2,'b':3):2,(c:1,'d_':2):7);")

        self.files_to_remove = [self.otu_table1_fp, self.otu_table2_fp, self.single_sample_otu_table_fp]
Exemple #33
0
def biom_table_from_predictions(predictions,trait_ids):
    organism_ids=predictions.keys()
    #data is in values (this transposes the matrix)
    data=map(list,zip(*predictions.values()))
    data=array(data,dtype=int)
    #import pdb; pdb.set_trace()
    biom_table=table_factory(data,organism_ids,trait_ids, constructor=DenseOTUTable)
    #biom_table=table_factory(data,organism_ids,trait_ids, constructor=SparseOTUTable)
    return biom_table
Exemple #34
0
    def setUp(self):

        self.qiime_config = load_qiime_config()
        self.tmp_dir = self.qiime_config['temp_dir'] or '/tmp/'

        self.otu_table_data = numpy.array([[2,1,0],
                                      [0,5,0],
                                      [0,3,0],
                                      [1,2,0]])
        self.sample_names = list('YXZ')
        self.taxon_names = list('bacd')
        self.otu_metadata = [{'domain':'Archaea'},
                          {'domain':'Bacteria'},
                          {'domain':'Bacteria'},
                          {'domain':'Bacteria'}]


        self.otu_table = table_factory(self.otu_table_data,
                                        self.sample_names,
                                        self.taxon_names)
        self.otu_table_meta = table_factory(self.otu_table_data,
          self.sample_names, self.taxon_names, 
          observation_metadata=self.otu_metadata)

        self.otu_table_str = format_biom_table(self.otu_table)
        self.otu_table_meta_str = format_biom_table(self.otu_table_meta)

        self.otu_table_fp = get_tmp_filename(tmp_dir=self.tmp_dir,
          prefix='test_rarefaction',suffix='.biom')
        self.otu_table_meta_fp = get_tmp_filename(tmp_dir=self.tmp_dir,
          prefix='test_rarefaction',suffix='.biom')

        self.rare_dir = get_tmp_filename(tmp_dir=self.tmp_dir,
         prefix='test_rarefaction_dir',suffix='',result_constructor=str)
        os.mkdir(self.rare_dir)

        open(self.otu_table_fp,'w').write(self.otu_table_str)
        open(self.otu_table_meta_fp,'w').write(self.otu_table_meta_str)


        self._paths_to_clean_up=[self.otu_table_fp,self.otu_table_meta_fp]
        self._dirs_to_clean_up=[self.rare_dir]
Exemple #35
0
    def test_sample_rare_unique(self):
        t = update_tree(None, tax_strings_by_sample)
        tax_by_sample = {'a':tax_strings_by_sample[0],
                         'b':tax_strings_by_sample[1],
                         'c':tax_strings_by_sample[2]}
        exp = [('a', None, [['k__1','p__x','c__'],['k__1','p__y','c__3']], 
                           [['k__1','p__x','c__1'],['k__1','p__x','c__2']]),
               ('b', None, [['k__1','p__x','c__'],['k__1','p__y','c__3']], []),
               ('c', None, [], [])]
        obs = sample_rare_unique(t, None, tax_by_sample, 0.7)
        self.assertEqual(sorted(obs), exp)

        table_a = table_factory(array([[14,15,16]]), ['a','b','c'], 
                                ['k__1; p__y; c__'])
        table_b = table_factory(array([[1,2,3],
                                       [4,5,6],
                                       [14,15,16]]), ['a','b','c'], 
                                    ['k__1; p__x; c__1',
                                     'k__1; p__x; c__2',
                                     'k__1; p__y; c__'])
        table_c = table_factory(array([[1,2,3],
                                       [4,5,6],
                                       [7,8,9],
                                       [10,11,12],
                                       [14,15,16]]), ['a','b','c'], 
                                    ['k__1; p__x; c__1',
                                     'k__1; p__x; c__2',
                                     'k__1; p__x; c__',
                                     'k__1; p__y; c__3',
                                     'k__1; p__y; c__'])

        exp = [('a', table_a, [['k__1','p__x','c__'],['k__1','p__y','c__3']], 
                           [['k__1','p__x','c__1'],['k__1','p__x','c__2']]),
               ('b', table_b, [['k__1','p__x','c__'],['k__1','p__y','c__3']], []),
               ('c', table_c, [], [])]

        obs = sample_rare_unique(t, table, tax_by_sample, 0.7)
        for o,e in zip(sorted(obs), exp):
            self.assertEqual(o[0], e[0])
            self.assertEqual(o[1], e[1])
            self.assertEqual(o[2], e[2])
            self.assertEqual(o[3], e[3])
Exemple #36
0
    def test_sample_rare_unique(self):
        t = update_tree(None, tax_strings_by_sample)
        tax_by_sample = {'a':tax_strings_by_sample[0],
                         'b':tax_strings_by_sample[1],
                         'c':tax_strings_by_sample[2]}
        exp = [('a', None, [['k__1','p__x','c__'],['k__1','p__y','c__3']], 
                           [['k__1','p__x','c__1'],['k__1','p__x','c__2']]),
               ('b', None, [['k__1','p__x','c__'],['k__1','p__y','c__3']], []),
               ('c', None, [], [])]
        obs = sample_rare_unique(t, None, tax_by_sample, 0.7)
        self.assertEqual(sorted(obs), exp)

        table_a = table_factory(array([[14,15,16]]), ['a','b','c'], 
                                ['k__1; p__y; c__'])
        table_b = table_factory(array([[1,2,3],
                                       [4,5,6],
                                       [14,15,16]]), ['a','b','c'], 
                                    ['k__1; p__x; c__1',
                                     'k__1; p__x; c__2',
                                     'k__1; p__y; c__'])
        table_c = table_factory(array([[1,2,3],
                                       [4,5,6],
                                       [7,8,9],
                                       [10,11,12],
                                       [14,15,16]]), ['a','b','c'], 
                                    ['k__1; p__x; c__1',
                                     'k__1; p__x; c__2',
                                     'k__1; p__x; c__',
                                     'k__1; p__y; c__3',
                                     'k__1; p__y; c__'])

        exp = [('a', table_a, [['k__1','p__x','c__'],['k__1','p__y','c__3']], 
                           [['k__1','p__x','c__1'],['k__1','p__x','c__2']]),
               ('b', table_b, [['k__1','p__x','c__'],['k__1','p__y','c__3']], []),
               ('c', table_c, [], [])]

        obs = sample_rare_unique(t, table, tax_by_sample, 0.7)
        for o,e in zip(sorted(obs), exp):
            self.assertEqual(o[0], e[0])
            self.assertEqual(o[1], e[1])
            self.assertEqual(o[2], e[2])
            self.assertEqual(o[3], e[3])
    def setUp(self):
        """define some top-level data"""

        self.otu_table_values = array([[0, 0, 9, 5, 3, 1],
                                       [1, 5, 4, 0, 3, 2],
                                       [2, 3, 1, 1, 2, 5]])
        {(0, 2): 9.0, (0, 3): 5.0, (0, 4): 3.0, (0, 5): 1.0,
         (1, 0): 1.0, (1, 1): 5.0, (1, 2): 4.0, (1, 4): 3.0, (1, 5): 2.0,
         (2, 0): 2.0, (2, 1): 3.0, (2, 2): 1.0, (2, 3): 1.0, (2, 4): 2.0, (2, 5): 5.0}
        self.otu_table = table_factory(self.otu_table_values,
                                       ['Sample1', 'Sample2', 'Sample3',
                                        'Sample4', 'Sample5', 'Sample6'],
                                       ['OTU1', 'OTU2', 'OTU3'],
                                       [None, None, None, None, None, None],
                                       [{"taxonomy": ['Bacteria']},
                                        {"taxonomy": ['Archaea']},
                                        {"taxonomy": ['Streptococcus']}])
        self.otu_table_f = table_factory(self.otu_table_values,
                                         ['Sample1', 'Sample2', 'Sample3',
                                          'Sample4', 'Sample5', 'Sample6'],
                                         ['OTU1', 'OTU2', 'OTU3'],
                                         [None, None, None, None, None, None],
                                         [{"taxonomy": ['1A', '1B', '1C', 'Bacteria']},
                                          {"taxonomy":
                                           ['2A', '2B', '2C', 'Archaea']},
                                          {"taxonomy": ['3A', '3B', '3C', 'Streptococcus']}])

        self.full_lineages = [['1A', '1B', '1C', 'Bacteria'],
                              ['2A', '2B', '2C', 'Archaea'],
                              ['3A', '3B', '3C', 'Streptococcus']]
        self.metadata = [[['Sample1', 'NA', 'A'],
                          ['Sample2', 'NA', 'B'],
                          ['Sample3', 'NA', 'A'],
                          ['Sample4', 'NA', 'B'],
                          ['Sample5', 'NA', 'A'],
                          ['Sample6', 'NA', 'B']],
                         ['SampleID', 'CAT1', 'CAT2'], []]
        self.tree_text = ["('OTU3',('OTU1','OTU2'))"]
        self.tmp_heatmap_fpath = get_tmp_filename(
            prefix='test_heatmap_',
            suffix='.pdf'
        )
Exemple #38
0
def parse_biom_table_json(json_table, data_pump=None):
    """Parse a biom otu table type"""
    sample_ids = [col['id'] for col in json_table['columns']]
    sample_metadata = [col['metadata'] for col in json_table['columns']]
    obs_ids = [row['id'] for row in json_table['rows']]
    obs_metadata = [row['metadata'] for row in json_table['rows']]
    dtype = MATRIX_ELEMENT_TYPE[json_table['matrix_element_type']]

    if data_pump is None:
        table_obj = table_factory(json_table['data'], sample_ids, obs_ids,
                                  sample_metadata, obs_metadata,
                                  shape=json_table['shape'],
                                  dtype=dtype)
    else:
        table_obj = table_factory(data_pump, sample_ids, obs_ids,
                                  sample_metadata, obs_metadata,
                                  shape=json_table['shape'],
                                  dtype=dtype)

    return table_obj
Exemple #39
0
    def test_generate_heatmap_plots(self):
        """generate_heatmap_plots: create default output files"""

        # create directories and move js files to verify everything works
        # in the script file
        dir_path = join(self.output_dir, 'test')
        create_dir(dir_path)

        js_dir_path = join(dir_path, 'js')
        create_dir(js_dir_path)

        self._folders_to_cleanup.append(dir_path)

        qiime_dir = get_qiime_project_dir()

        js_path = join(qiime_dir, 'qiime/support_files/js')
        shutil.copyfile(join(js_path, 'overlib.js'),
                        join(js_dir_path, 'overlib.js'))
        shutil.copyfile(join(js_path, 'otu_count_display.js'),
                        join(js_dir_path, 'otu_count_display.js'))
        shutil.copyfile(join(js_path, 'jquery.js'),
                        join(js_dir_path, 'jquery.js'))
        shutil.copyfile(join(js_path, 'jquery.tablednd_0_5.js'),
                        join(js_dir_path, 'jquery.tablednd_0_5.js'))

        # generate otu_table object
        orig_data = array([[0, 1, 2], [1000, 0, 0]])

        orig_otu_table = table_factory(orig_data,
                                       ['Sample1', 'Sample2', 'Sample3'],
                                       ['OTU1', 'OTU2'], [None, None, None], [{
                                           "taxonomy": ["Bacteria"]
                                       }, {
                                           "taxonomy": ["Archaea"]
                                       }])

        # put in an OTU sort order and sample order
        otu_sort = ['OTU2', 'OTU1']
        sample_sort = ['Sample2', 'Sample1', 'Sample3']
        num_otu_hits = 3

        # generate test files
        generate_heatmap_plots(num_otu_hits,
                               orig_otu_table,
                               otu_sort,
                               sample_sort,
                               dir_path,
                               js_dir_path,
                               'test',
                               fractional_values=False)

        self.assertEqual(
            open(join(js_dir_path, 'test.js'), 'U').read(), exp_js_output_file)
    def test_get_log_transform(self):
        #data = array([[0,1,2],[1000,0,0]])
        #logdata = get_log_transform(data,eps=None)

        # set zeros to 1/2s
        #exp = log(array([[.5,1,2],[1000,.5,.5]]))
        # translate to 0
        #exp -= exp.min()

        #self.assertFloatEqual(logdata, exp)

        orig_data = array([[0, 1, 2], [1000, 0, 0]])
        #{(0,1):1.0, (0,2):2,(1,0):1000.0}

        orig_otu_table = table_factory(orig_data,
                                       ['Sample1', 'Sample2', 'Sample3'],
                                       ['OTU1', 'OTU2'], [None, None, None], [{
                                           "taxonomy": ["Bacteria"]
                                       }, {
                                           "taxonomy": ["Archaea"]
                                       }])

        exp_data = array([[0, 0.69314718, 1.38629436], [7.60090246, 0, 0]])
        #{(0,1):0.69314718, (0,2):1.38629436,(1,0):7.60090246}
        exp_otu_table = table_factory(exp_data,
                                      ['Sample1', 'Sample2', 'Sample3'],
                                      ['OTU1', 'OTU2'], [None, None, None], [{
                                          "taxonomy": ["Bacteria"]
                                      }, {
                                          "taxonomy": ["Archaea"]
                                      }])

        log_otu_table = get_log_transform(orig_otu_table, eps=None)

        # comparing directly log_otu_table against exp_otu_table doesn't work,
        #  needs to be modified in the otu table object
        self.assertFloatEqual(log_otu_table._data.items(),
                              exp_otu_table._data.items())
Exemple #41
0
def main():
    option_parser, opts, args =\
       parse_command_line_parameters(**script_info)
 
    if not isfile(opts.input_path):
       raise IOError, \
        "Input path (%s) not valid.  Does it exist?" % opts.input_path
    
    samples, otus, data = parse_trflp(open(opts.input_path,'U'))
    
    output_f = open(opts.output_path, 'w')
    t = table_factory(data,samples,otus)
    output_f.write(format_biom_table(t))
    output_f.close()
    def setUp(self):
        """define some top-level data"""
        self.output_dir = '/tmp/'

        otu_table_vals = array([[0, 0], [1, 5]])

        self.otu_table = table_factory(otu_table_vals,
                                       ['Sample1', 'Sample2'],
                                       ['OTU1', 'OTU2'],
                                       [None, None],
                                       [{"taxonomy": ["Bacteria"]},
                                        {"taxonomy": ["Archaea"]}])

        filt_otu_table_vals = array([[1, 5]])

        self.filt_otu_table = table_factory(filt_otu_table_vals,
                                            ['Sample1', 'Sample2'],
                                            ['OTU2'],
                                            [None, None],
                                            [{"taxonomy": ["Archaea"]}])

        self.num_otu_hits = 5
        self._folders_to_cleanup = []
    def test_get_log_transform(self):
        orig_data = array([[0, 1, 2], [1000, 0, 0]])

        orig_otu_table = table_factory(orig_data,
                                       ['Sample1', 'Sample2', 'Sample3'],
                                       ['OTU1', 'OTU2'],
                                       [None, None, None],
                                       [{"taxonomy": ["Bacteria"]},
                                        {"taxonomy": ["Archaea"]}])

        exp_data = array([[0, 0.69314718, 1.38629436], [7.60090246, 0, 0]])
        exp_otu_table = table_factory(exp_data,
                                      ['Sample1', 'Sample2', 'Sample3'],
                                      ['OTU1', 'OTU2'],
                                      [None, None, None],
                                      [{"taxonomy": ["Bacteria"]},
                                       {"taxonomy": ["Archaea"]}])

        log_otu_table = get_log_transform(orig_otu_table, eps=None)

        # comparing directly log_otu_table against exp_otu_table doesn't work,
        #  needs to be modified in the otu table object
        assert_almost_equal(list(log_otu_table.iterSampleData()),
                              list(exp_otu_table.iterSampleData()))
Exemple #44
0
    def setUp(self):

        self.qiime_config = load_qiime_config()
        self.tmp_dir = self.qiime_config["temp_dir"] or "/tmp/"

        self.otu_table_data = numpy.array([[2, 1, 0], [0, 5, 0], [0, 3, 0], [1, 2, 0]])
        self.sample_names = list("YXZ")
        self.taxon_names = list("bacd")
        self.otu_metadata = [
            {"domain": "Archaea"},
            {"domain": "Bacteria"},
            {"domain": "Bacteria"},
            {"domain": "Bacteria"},
        ]

        self.otu_table = table_factory(self.otu_table_data, self.sample_names, self.taxon_names)
        self.otu_table_meta = table_factory(
            self.otu_table_data, self.sample_names, self.taxon_names, observation_metadata=self.otu_metadata
        )

        self.otu_table_str = format_biom_table(self.otu_table)
        self.otu_table_meta_str = format_biom_table(self.otu_table_meta)

        self.otu_table_fp = get_tmp_filename(tmp_dir=self.tmp_dir, prefix="test_rarefaction", suffix=".biom")
        self.otu_table_meta_fp = get_tmp_filename(tmp_dir=self.tmp_dir, prefix="test_rarefaction", suffix=".biom")

        self.rare_dir = get_tmp_filename(
            tmp_dir=self.tmp_dir, prefix="test_rarefaction_dir", suffix="", result_constructor=str
        )
        os.mkdir(self.rare_dir)

        open(self.otu_table_fp, "w").write(self.otu_table_str)
        open(self.otu_table_meta_fp, "w").write(self.otu_table_meta_str)

        self._paths_to_clean_up = [self.otu_table_fp, self.otu_table_meta_fp]
        self._dirs_to_clean_up = [self.rare_dir]
    def test_get_log_transform(self):
        #data = array([[0,1,2],[1000,0,0]])
        #logdata = get_log_transform(data,eps=None)

        # set zeros to 1/2s
        #exp = log(array([[.5,1,2],[1000,.5,.5]]))
        # translate to 0
        #exp -= exp.min()

        #self.assertFloatEqual(logdata, exp)

        orig_data = array([[0,1,2],[1000,0,0]])
        #{(0,1):1.0, (0,2):2,(1,0):1000.0}

        orig_otu_table = table_factory(orig_data,
                                        ['Sample1', 'Sample2', 'Sample3'],
                                        ['OTU1', 'OTU2'],
                                        [None, None, None],
                                        [{"taxonomy": ["Bacteria"]},
                                         {"taxonomy": ["Archaea"]}])

        exp_data = array([[0,0.69314718,1.38629436],[7.60090246,0,0]])
        #{(0,1):0.69314718, (0,2):1.38629436,(1,0):7.60090246}
        exp_otu_table = table_factory(exp_data,
                                       ['Sample1', 'Sample2', 'Sample3'],
                                       ['OTU1', 'OTU2'],
                                       [None, None, None],
                                       [{"taxonomy": ["Bacteria"]},
                                        {"taxonomy": ["Archaea"]}])

        log_otu_table = get_log_transform(orig_otu_table, eps = None)

        # comparing directly log_otu_table against exp_otu_table doesn't work,
        #  needs to be modified in the otu table object
        self.assertFloatEqual(list(log_otu_table.iterSampleData()),
                              list(exp_otu_table.iterSampleData()))
Exemple #46
0
def parse_biom_table_json(json_table, data_pump=None):
    """Parse a biom otu table type"""
    sample_ids = [col["id"] for col in json_table["columns"]]
    sample_metadata = [col["metadata"] for col in json_table["columns"]]
    obs_ids = [row["id"] for row in json_table["rows"]]
    obs_metadata = [row["metadata"] for row in json_table["rows"]]
    dtype = MATRIX_ELEMENT_TYPE[json_table["matrix_element_type"]]

    if data_pump is None:
        table_obj = table_factory(
            json_table["data"],
            sample_ids,
            obs_ids,
            sample_metadata,
            obs_metadata,
            shape=json_table["shape"],
            dtype=dtype,
        )
    else:
        table_obj = table_factory(
            data_pump, sample_ids, obs_ids, sample_metadata, obs_metadata, shape=json_table["shape"], dtype=dtype
        )

    return table_obj
    def setUp(self):
        self.otu_table_vals = array([[1, 0, 2, 4], [1, 2, 0, 1], [0, 1, 1, 0],
                                     [1, 2, 1, 0]])

        {
            (0, 0): 1.0,
            (0, 2): 2.0,
            (0, 3): 4.0,
            (1, 0): 1.0,
            (1, 1): 2.0,
            (1, 3): 1.0,
            (2, 1): 1.0,
            (2, 2): 1.0,
            (3, 0): 1.0,
            (3, 1): 2.0,
            (3, 2): 1.0
        }

        self.otu_table = table_factory(self.otu_table_vals, [
            's1', 's2', 's3', 's4'
        ], ['0', '1', '2', '3'], None, [{
            "taxonomy": [
                "Root", "Bacteria", "Actinobacteria", "Actinobacteria",
                "Coriobacteridae", "Coriobacteriales", "Coriobacterineae",
                "Coriobacteriaceae"
            ]
        }, {
            "taxonomy": ["Root", "Bacteria", "Firmicutes", "\"Clostridia\""]
        }, {
            "taxonomy": ["Root", "Bacteria", "Firmicutes", "\"Clostridia\""]
        }, {
            "taxonomy": ["Root", "Bacteria"]
        }])

        # self.otu_table="""#Full OTU Counts
        # OTU ID\ts1\ts2\ts3\ts4\tConsensus Lineage
        # 0\t1\t0\t2\t4\tRoot;Bacteria;Actinobacteria;Actinobacteria;Coriobacteridae;Coriobacteriales;Coriobacterineae;Coriobacteriaceae
        # 1\t1\t2\t0\t1\tRoot;Bacteria;Firmicutes;"Clostridia"
        # 2\t0\t1\t1\t0\tRoot;Bacteria;Firmicutes;"Clostridia"
        # 3\t1\t2\t1\t0\tRoot;Bacteria""".split('\n')

        self.mapping = """#SampleID\tBarcodeSequence\tTreatment\tDescription
#Test mapping file
s1\tAAAA\tControl\tControl mouse, I.D. 354
s2\tGGGG\tControl\tControl mouse, I.D. 355
s3\tCCCC\tExp\tDisease mouse, I.D. 356
s4\tTTTT\tExp\tDisease mouse, I.D. 357""".split('\n')
Exemple #48
0
def main():
    option_parser, opts, args =\
     parse_command_line_parameters(**script_info)

    out_fh = open(opts.output_file, 'w')
    otu_table_fh = open(opts.otu_table, 'U')
    otu_table = parse_biom_table(otu_table_fh)
    tree_fh = open(opts.tree_file, 'U')
    tree = DndParser(tree_fh)

    res_sam_names, res_otus, res_otu_mtx, res_otu_metadata = \
     sim_otu_table(otu_table.SampleIds, otu_table.ObservationIds, otu_table.iterSamples(),
                   otu_table.ObservationMetadata, tree, opts.num, opts.dissim)

    rich_table = table_factory(res_otu_mtx,
                               res_sam_names,
                               res_otus,
                               observation_metadata=res_otu_metadata)
    out_fh.write(format_biom_table(rich_table))
Exemple #49
0
def merge_otu_tables(vcf_fps):
    """Takes a list of multiple vcf files and returns a single biom table of all files."""
    master_table = None
    #open all of the files with correct extensions. Raise a value error if incorrect extension
    master_observation_ids = None
    for vcf_fp in vcf_fps:
        if vcf_fp.endswith('gz'):
            vcf_fp = gzip.open(vcf_fp)
        elif vcf_fp.endswith('vcf'):
            vcf_fp = open(vcf_fp, 'U')
        else:
            raise ValueError, "Invalid file format or extension, only '.vcf' or '.vcf.gz'\
            are accepted"
        data, sample_ids, observation_ids, sample_md, observation_md =\
        create_biom_table(vcf_fp)
        if master_observation_ids is None:
            master_observation_ids = observation_ids
        else:
            master_observation_ids = set(master_observation_ids) & set(
                observation_ids)
        biom_table = table_factory(data,
                                   sample_ids,
                                   observation_ids,
                                   sample_md,
                                   observation_md,
                                   constructor=SparseOTUTable)
        if master_table is None:
            master_table = biom_table
        else:
            master_table.merge(biom_table)


#         try:
#             master_table = master_table.merge(biom_table)
#         except AttributeError:
#             master_table = biom_table
    return master_table, observation_ids
def table_from_template(new_data,sample_ids,observation_ids,\
    sample_metadata_source=None,observation_metadata_source=None,\
    constructor=SparseGeneTable,verbose=False):
    """Build a new BIOM table from new_data, and transfer metadata from 1-2 existing tables"""

    #Build the BIOM table
    result_table =  table_factory(new_data,sample_ids,observation_ids,\
      constructor=SparseGeneTable)

    #Transfer sample metadata from the OTU table
    #to the metagenome table (samples are the same)
    if sample_metadata_source:
        result_table = transfer_metadata(sample_metadata_source,result_table,\
          donor_metadata_type='SampleMetadata',\
          recipient_metadata_type='SampleMetadata',verbose=verbose)

    #Now transfer observation metadata (e.g. gene metadata)
    #from the genome table to the result table
    if observation_metadata_source:
        result_table = transfer_metadata(observation_metadata_source,\
          result_table,donor_metadata_type='ObservationMetadata',\
          recipient_metadata_type='ObservationMetadata',verbose=verbose)

    return result_table
Exemple #51
0
def tobiom(input_fn,
           output_fn,
           tax_fn=None,
           sampledata_fn=None,
           otuids_fn=None):

    otutable = micca.table.read(input_fn)

    data = otutable.to_numpy()
    observation_ids = otutable.index.tolist()
    sample_ids = otutable.columns.tolist()

    if tax_fn is None:
        observ_metadata = None
    else:
        tax_dict = micca.tax.read(tax_fn)
        observ_metadata = []
        for oid in observation_ids:
            if tax_dict.has_key(oid):
                observ_metadata.append({"taxonomy": tax_dict[oid]})
            else:
                observ_metadata.append({"taxonomy": ["NA"]})

    if sampledata_fn is None:
        sample_metadata = None
    else:
        sampledata = micca.table.read(sampledata_fn)
        # re-index with the sample IDs in the OTU table
        sampledata = sampledata.reindex(sample_ids)
        sampledata.fillna("NA", inplace=True)
        sample_metadata = [sampledata.loc[sid].to_dict() for sid in sample_ids]

    # replace the OTU ids with the original sequence ids when found in otuids
    if otuids_fn is not None:
        with open(otuids_fn, "rU") as otuids_handle:
            otuids_reader = csv.reader(otuids_handle, delimiter="\t")
            otuids = dict([(row[0], row[1]) for row in otuids_reader])

        for i in range(len(observation_ids)):
            try:
                origid = otuids[observation_ids[i]]
            except KeyError:
                pass
            else:
                observation_ids[i] = origid

    generated_by = "micca v.{}".format(micca_version)

    if _biom_version == 2:
        table = Table(data=data,
                      sample_ids=sample_ids,
                      observation_ids=observation_ids,
                      sample_metadata=sample_metadata,
                      observation_metadata=observ_metadata,
                      type="OTU table")
        json_str = table.to_json(generated_by=generated_by)
    else:
        table = table_factory(data=data,
                              sample_ids=sample_ids,
                              observation_ids=observation_ids,
                              sample_metadata=sample_metadata,
                              observation_metadata=observ_metadata,
                              constructor=SparseOTUTable)
        json_str = table.getBiomFormatJsonString(generated_by=generated_by)

    with open(output_fn, 'wb') as output_handle:
        output_handle.write(json_str)
Exemple #52
0
def convert_precalc_to_biom(precalc_in, ids_to_load=None,transpose=True,md_prefix='metadata_'):
    """Loads PICRUSTs tab-delimited version of the precalc file and outputs a BIOM object"""
    
    #if given a string convert to a filehandle
    if type(precalc_in) ==str or type(precalc_in) == unicode:
        fh = StringIO.StringIO(precalc_in)
    else:
        fh=precalc_in

    #first line has to be header
    header_ids=fh.readline().strip().split('\t')
    
    col_meta_locs={}
    for idx,col_id in enumerate(header_ids):
        if col_id.startswith(md_prefix):
            col_meta_locs[col_id[len(md_prefix):]]=idx
    
    end_of_data=len(header_ids)-len(col_meta_locs)
    trait_ids = header_ids[1:end_of_data]
   
    col_meta=[]
    row_meta=[{} for i in trait_ids]

    if ids_to_load:
        ids_to_load=set(ids_to_load)
        load_all_ids=False
    else:
        load_all_ids=True

    matching=[]
    otu_ids=[]
    for line in fh:
        fields = line.strip().split('\t')
        row_id=fields[0]
        if(row_id.startswith(md_prefix)):
            #handle metadata
            
            #determine type of metadata (this may not be perfect)
            metadata_type=determine_metadata_type(line)
            for idx,trait_name in enumerate(trait_ids):
                row_meta[idx][row_id[len(md_prefix):]]=parse_metadata_field(fields[idx+1],metadata_type)

        elif load_all_ids or (row_id in set(ids_to_load)):
            otu_ids.append(row_id)
            matching.append(map(float,fields[1:end_of_data]))

            #add metadata
            col_meta_dict={}
            for meta_name in col_meta_locs:
                col_meta_dict[meta_name]=fields[col_meta_locs[meta_name]]
            col_meta.append(col_meta_dict)

            if not load_all_ids:
                ids_to_load.remove(row_id)

    if not otu_ids:
        raise ValueError,"No OTUs match identifiers in precalculated file. PICRUSt requires an OTU table reference/closed picked against GreenGenes.\nExample of the first 5 OTU ids from your table: {0}".format(', '.join(list(ids_to_load)[:5]))

    if ids_to_load:
       raise ValueError,"One or more OTU ids were not found in the precalculated file!\nAre you using the correct --gg_version?\nExample of (the {0}) unknown OTU ids: {1}".format(len(ids_to_load),', '.join(list(ids_to_load)[:5]))
        
    #note that we transpose the data before making biom obj
    if transpose:
        return table_factory(asarray(matching).T,otu_ids,trait_ids,col_meta,row_meta,constructor=DenseGeneTable)
    else:
        return table_factory(asarray(matching),trait_ids,otu_ids,row_meta,col_meta,constructor=DenseGeneTable)
    def setUp(self):
        self.qiime_config = load_qiime_config()
        self.tmp_dir = self.qiime_config['temp_dir'] or '/tmp/'

        self.map_file = """#SampleID	Day	time	Description
#This is some comment about the study
1	090809	1200	some description of sample1
2	090809	1800	some description of sample2
3	090909	1200	some description of sample3
4	090909	1800	some description of sample4
5	091009	1200	some description of sample5"""
        self.cat_by_sample = {
            "1": [("Day", "090809"), ("time", "1200")],
            "2": [("Day", "090809"), ("time", "1800")],
            "3": [("Day", "090909"), ("time", "1200")],
            "4": [("Day", "090909"), ("time", "1800")],
            "5": [("Day", "091009"), ("time", "1200")]
        }
        self.sample_by_cat = {
            ("Day", "090809"): ["1", "2"],
            ("Day", "090909"): ["3", "4"],
            ("Day", "091009"): ["5"],
            ("time", "1200"): ["1", "3", "5"],
            ("time", "1800"): ["2", "4"]
        }

        self.num_cats = 2
        self.meta_dict = {
            "1": ["090809	1200", 0],
            "2": ["090809	1800", 0],
            "3": ["090909	1200", 0],
            "4": ["090909	1800", 0],
            "5": ["091009	1200", 0]
        }
        self.labels = ["from", "to", "eweight", "consensus_lin", "Day", "time"]
        self.node_labels = [
            "node_name", "node_disp_name", "ntype", "degree",
            "weighted_degree", "consensus_lin", "Day", "time"
        ]
        self.label_list = [["090809", "090909", "091009"], ["1200", "1800"]]

        self.otu_table_vals = array([[0, 1, 0, 0, 6], [2, 0, 0, 0, 0],
                                     [0, 0, 3, 1, 0], [0, 0, 0, 0, 5],
                                     [0, 4, 2, 0, 0], [3, 6, 0, 0, 0],
                                     [0, 0, 4, 2, 0], [0, 0, 0, 0, 3],
                                     [2, 0, 0, 5, 0], [0, 2, 0, 4, 0]])

        otu_table_str = format_biom_table(
            table_factory(self.otu_table_vals, ['1', '2', '3', '4', '5'], [
                'otu_1', 'otu_2', 'otu_3', 'otu_4', 'otu_5', 'otu_6', 'otu_7',
                'otu_8', 'otu_9', 'otu_10'
            ], [None, None, None, None, None], [{
                "taxonomy": ["Bacteria", "Actinobacteria", "Coriobacteridae"]
            }, {
                "taxonomy": [
                    "Bacteria", "Bacteroidetes", "Bacteroidales",
                    "Bacteroidaceae"
                ]
            }, {
                "taxonomy":
                ["Bacteria", "Firmicutes", "Clostridia", "Clostridiales"]
            }, {
                "taxonomy": [
                    "Bacteria", "Spirochaetes", "Spirochaetales",
                    "Spirochaetaceae"
                ]
            }, {
                "taxonomy": [
                    "Bacteria", "Bacteroidetes", "Bacteroidales",
                    "Rikenellaceae"
                ]
            }, {
                "taxonomy": [
                    "Bacteria", "Bacteroidetes", "Bacteroidales",
                    "Dysgonomonaceae"
                ]
            }, {
                "taxonomy": [
                    "Bacteria", "Bacteroidetes", "Bacteroidales",
                    "Odoribacteriaceae"
                ]
            }, {
                "taxonomy": [
                    "Bacteria", "Bacteroidetes", "Bacteroidales",
                    "Dysgonomonaceae", "otu_425"
                ]
            }, {
                "taxonomy": [
                    "Bacteria", "Bacteroidetes", "Bacteroidales",
                    "Dysgonomonaceae", "otu_425"
                ]
            }, {
                "taxonomy": [
                    "Bacteria", "Firmicutes", "Mollicutes",
                    "Clostridium_aff_innocuum_CM970"
                ]
            }]))

        _, self.otu_table_fp = mkstemp(
            dir=self.tmp_dir,
            prefix='test_make_otu_network_otu_table',
            suffix='.biom')
        close(_)
        open(self.otu_table_fp, 'w').write(otu_table_str)

        self.otu_sample_file = """#Full OTU Counts
#OTU ID	1	2	3	4	5	Consensus Lineage
otu_1	0	1	0	0	6	Bacteria; Actinobacteria; Coriobacteridae
otu_2	2	0	0	0	0	Bacteria; Bacteroidetes; Bacteroidales; Bacteroidaceae
otu_3	0	0	3	1	0	Bacteria; Firmicutes; Clostridia; Clostridiales
otu_4	0	0	0	0	5	Bacteria; Spirochaetes; Spirochaetales; Spirochaetaceae
otu_5	0	4	2	0	0	Bacteria; Bacteroidetes; Bacteroidales; Rikenellaceae
otu_6	3	6	0	0	0	Bacteria; Bacteroidetes; Bacteroidales; Dysgonomonaceae
otu_7	0	0	4	2	0	Bacteria; Bacteroidetes; Bacteroidales; Odoribacteriaceae
otu_8	0	0	0	0	3	Bacteria; Bacteroidetes; Bacteroidales; Dysgonomonaceae; otu_425
otu_9	2	0	0	5	0	Bacteria; Bacteroidetes; Bacteroidales; Dysgonomonaceae; otu_425
otu_10	0	2	0	4	0	Bacteria; Firmicutes; Mollicutes; Clostridium_aff_innocuum_CM970"""

        self.con_by_sample = {
            '1': set(['2', '4']),
            '2': set(['5', '3', '1', '4']),
            '3': set(['4', '2']),
            '4': set(['3', '1', '2']),
            '5': set(['2'])
        }

        self.edge_file_str = [
            "2	otu_1	1.0	Bacteria:Actinobacteria:Coriobacteridae	090809	1800",
            "5	otu_1	6.0	Bacteria:Actinobacteria:Coriobacteridae	091009	1200",
            "1	otu_2	2.0	Bacteria:Bacteroidetes:Bacteroidales:Bacteroidaceae	090809	1200",
            "3	otu_3	3.0	Bacteria:Firmicutes:Clostridia:Clostridiales	090909	1200",
            "4	otu_3	1.0	Bacteria:Firmicutes:Clostridia:Clostridiales	090909	1800",
            "5	otu_4	5.0	Bacteria:Spirochaetes:Spirochaetales:Spirochaetaceae	091009	1200",
            "2	otu_5	4.0	Bacteria:Bacteroidetes:Bacteroidales:Rikenellaceae	090809	1800",
            "3	otu_5	2.0	Bacteria:Bacteroidetes:Bacteroidales:Rikenellaceae	090909	1200",
            "1	otu_6	3.0	Bacteria:Bacteroidetes:Bacteroidales:Dysgonomonaceae	090809	1200",
            "2	otu_6	6.0	Bacteria:Bacteroidetes:Bacteroidales:Dysgonomonaceae	090809	1800",
            "3	otu_7	4.0	Bacteria:Bacteroidetes:Bacteroidales:Odoribacteriaceae	090909	1200",
            "4	otu_7	2.0	Bacteria:Bacteroidetes:Bacteroidales:Odoribacteriaceae	090909	1800",
            "5	otu_8	3.0	Bacteria:Bacteroidetes:Bacteroidales:Dysgonomonaceae:otu_425	091009	1200",
            "1	otu_9	2.0	Bacteria:Bacteroidetes:Bacteroidales:Dysgonomonaceae:otu_425	090809	1200",
            "4	otu_9	5.0	Bacteria:Bacteroidetes:Bacteroidales:Dysgonomonaceae:otu_425	090909	1800",
            "2	otu_10	2.0	Bacteria:Firmicutes:Mollicutes:Clostridium_aff_innocuum_CM970	090809	1800",
            "4	otu_10	4.0	Bacteria:Firmicutes:Mollicutes:Clostridium_aff_innocuum_CM970	090909	1800"
        ]

        self.node_file_str = [
            "1	1	user_node	3	7.0	other	090809	1200",
            "2	2	user_node	4	13.0	other	090809	1800",
            "3	3	user_node	3	9.0	other	090909	1200",
            "4	4	user_node	4	12.0	other	090909	1800",
            "5	5	user_node	3	14.0	other	091009	1200",
            "otu_1		otu_node	2	7.0	Bacteria:Actinobacteria:Coriobacteridae	otu	otu",
            "otu_2		otu_node	1	2.0	Bacteria:Bacteroidetes:Bacteroidales:Bacteroidaceae	otu	otu",
            "otu_3		otu_node	2	4.0	Bacteria:Firmicutes:Clostridia:Clostridiales	otu	otu",
            "otu_4		otu_node	1	5.0	Bacteria:Spirochaetes:Spirochaetales:Spirochaetaceae	otu	otu",
            "otu_5		otu_node	2	6.0	Bacteria:Bacteroidetes:Bacteroidales:Rikenellaceae	otu	otu",
            "otu_6		otu_node	2	9.0	Bacteria:Bacteroidetes:Bacteroidales:Dysgonomonaceae	otu	otu",
            "otu_7		otu_node	2	6.0	Bacteria:Bacteroidetes:Bacteroidales:Odoribacteriaceae	otu	otu",
            "otu_8		otu_node	1	3.0	Bacteria:Bacteroidetes:Bacteroidales:Dysgonomonaceae:otu_425	otu	otu",
            "otu_9		otu_node	2	7.0	Bacteria:Bacteroidetes:Bacteroidales:Dysgonomonaceae:otu_425	otu	otu",
            "otu_10		otu_node	2	6.0	Bacteria:Firmicutes:Mollicutes:Clostridium_aff_innocuum_CM970	otu	otu"
        ]

        self.red_edge_file_str = [
            "2	otu_1	1.0	Bacteria:Actinobacteria:Coriobacteridae	090809	1800",
            "5	otu_1	6.0	Bacteria:Actinobacteria:Coriobacteridae	091009	1200",
            "1	@1	1.0	missed	090809	1200",
            "3	otu_3	3.0	Bacteria:Firmicutes:Clostridia:Clostridiales	090909	1200",
            "4	otu_3	1.0	Bacteria:Firmicutes:Clostridia:Clostridiales	090909	1800",
            "5	@5	1.0	missed	091009	1200",
            "2	otu_5	4.0	Bacteria:Bacteroidetes:Bacteroidales:Rikenellaceae	090809	1800",
            "3	otu_5	2.0	Bacteria:Bacteroidetes:Bacteroidales:Rikenellaceae	090909	1200",
            "1	otu_6	3.0	Bacteria:Bacteroidetes:Bacteroidales:Dysgonomonaceae	090809	1200",
            "2	otu_6	6.0	Bacteria:Bacteroidetes:Bacteroidales:Dysgonomonaceae	090809	1800",
            "3	otu_7	4.0	Bacteria:Bacteroidetes:Bacteroidales:Odoribacteriaceae	090909	1200",
            "4	otu_7	2.0	Bacteria:Bacteroidetes:Bacteroidales:Odoribacteriaceae	090909	1800",
            "1	otu_9	2.0	Bacteria:Bacteroidetes:Bacteroidales:Dysgonomonaceae:otu_425	090809	1200",
            "4	otu_9	5.0	Bacteria:Bacteroidetes:Bacteroidales:Dysgonomonaceae:otu_425	090909	1800",
            "2	otu_10	2.0	Bacteria:Firmicutes:Mollicutes:Clostridium_aff_innocuum_CM970	090809	1800",
            "4	otu_10	4.0	Bacteria:Firmicutes:Mollicutes:Clostridium_aff_innocuum_CM970	090909	1800"
        ]

        self.red_node_file_str = [
            "1	1	user_node	3	7.0	other	090809	1200",
            "2	2	user_node	4	13.0	other	090809	1800",
            "3	3	user_node	3	9.0	other	090909	1200",
            "4	4	user_node	4	12.0	other	090909	1800",
            "5	5	user_node	3	14.0	other	091009	1200",
            "otu_1		otu_node	2	7.0	Bacteria:Actinobacteria:Coriobacteridae	otu	otu",
            "@1		otu_collapsed	1	1.0	other	otu	otu",
            "otu_3		otu_node	2	4.0	Bacteria:Firmicutes:Clostridia:Clostridiales	otu	otu",
            "@5		otu_collapsed	2	2.0	other	otu	otu",
            "otu_5		otu_node	2	6.0	Bacteria:Bacteroidetes:Bacteroidales:Rikenellaceae	otu	otu",
            "otu_6		otu_node	2	9.0	Bacteria:Bacteroidetes:Bacteroidales:Dysgonomonaceae	otu	otu",
            "otu_7		otu_node	2	6.0	Bacteria:Bacteroidetes:Bacteroidales:Odoribacteriaceae	otu	otu",
            "otu_9		otu_node	2	7.0	Bacteria:Bacteroidetes:Bacteroidales:Dysgonomonaceae:otu_425	otu	otu",
            "otu_10		otu_node	2	6.0	Bacteria:Firmicutes:Mollicutes:Clostridium_aff_innocuum_CM970	otu	otu"
        ]

        self.otu_dc = {1: 3, 2: 7}
        self.sample_dc = {3: 3, 4: 2}
        self.degree_counts = {1: 3, 2: 7, 3: 3, 4: 2}

        self.num_con_cat = {"Day": 2, "time": 1}
        self.num_con = 6
        self.num_cat = {"Day": 2, "time": 4}
        self.num_cat_less = {"Day": 1, "time": 3}
        self._paths_to_clean_up = [self.otu_table_fp]
        self._dir_to_clean_up = ''
Exemple #54
0
def main():
    option_parser, opts, args =\
       parse_command_line_parameters(**script_info)

    input_ext = path.splitext(opts.input_otu_fp)[1]
    if opts.input_format_classic:
        otu_table = parse_classic_table_to_rich_table(
            open(opts.input_otu_fp, 'U'), None, None, None, DenseOTUTable)
    else:
        try:
            otu_table = parse_biom_table(open(opts.input_otu_fp, 'U'))
        except ValueError:
            raise ValueError(
                "Error loading OTU table! If not in BIOM format use '-f' option.\n"
            )

    ids_to_load = otu_table.ObservationIds

    if (opts.input_count_fp is None):
        #precalc file has specific name (e.g. 16S_13_5_precalculated.tab.gz)
        precalc_file_name = '_'.join(
            ['16S', opts.gg_version, 'precalculated.tab.gz'])
        input_count_table = join(get_picrust_project_dir(), 'picrust', 'data',
                                 precalc_file_name)
    else:
        input_count_table = opts.input_count_fp

    if opts.verbose:
        print "Loading trait table: ", input_count_table

    ext = path.splitext(input_count_table)[1]

    if (ext == '.gz'):
        count_table_fh = gzip.open(input_count_table, 'rb')
    else:
        count_table_fh = open(input_count_table, 'U')

    if opts.load_precalc_file_in_biom:
        count_table = parse_biom_table(count_table_fh.read())
    else:
        count_table = convert_precalc_to_biom(count_table_fh, ids_to_load)

    #Need to only keep data relevant to our otu list
    ids = []
    for x in otu_table.iterObservations():
        ids.append(str(x[1]))

    ob_id = count_table.ObservationIds[0]

    filtered_otus = []
    filtered_values = []
    for x in ids:
        if count_table.sampleExists(x):
            filtered_otus.append(x)
            filtered_values.append(otu_table.observationData(x))

    #filtered_values = map(list,zip(*filtered_values))
    filtered_otu_table = table_factory(filtered_values,
                                       otu_table.SampleIds,
                                       filtered_otus,
                                       constructor=DenseOTUTable)

    copy_numbers_filtered = {}
    for x in filtered_otus:
        value = count_table.getValueByIds(ob_id, x)
        try:
            #data can be floats so round them and make them integers
            value = int(round(float(value)))

        except ValueError:
            raise ValueError,\
                  "Invalid type passed as copy number for OTU ID %s. Must be int-able." % (value)
        if value < 1:
            raise ValueError, "Copy numbers must be greater than or equal to 1."

        copy_numbers_filtered[x] = {opts.metadata_identifer: value}

    filtered_otu_table.addObservationMetadata(copy_numbers_filtered)

    normalized_table = filtered_otu_table.normObservationByMetadata(
        opts.metadata_identifer)

    #move Observation Metadata from original to filtered OTU table
    normalized_table = transfer_observation_metadata(otu_table,
                                                     normalized_table,
                                                     'ObservationMetadata')
    normalized_otu_table = transfer_sample_metadata(otu_table,
                                                    normalized_table,
                                                    'SampleMetadata')

    make_output_dir_for_file(opts.output_otu_fp)
    open(opts.output_otu_fp, 'w').write(format_biom_table(normalized_table))
Exemple #55
0
 def test_slice_mapping_file(self):
     header, metadata = parse_mapping_file(StringIO(test_mapping))
     table = table_factory(array([[1, 2], [4, 5]]), ['a', 'c'], ['x', 'y'])
     exp = ["a\t1\t123123", "c\tpoop\tdoesn't matter"]
     obs = slice_mapping_file(table, metadata)
     self.assertEqual(obs, exp)
Exemple #56
0
print 'copula done'

# all tables

from biom.table import table_factory
tables = [
    copula_table2_gamma_1_0_100, copula_table1_lognorm_3_0, ga_table,
    null_table1, null_table2, eco_table1, eco_table2
]

names = [
    'table_1.biom', 'table_2.biom', 'table_3.biom', 'table_4.biom',
    'table_5.biom', 'table_6.biom', 'table_7.biom'
]


def make_ids(data):
    sids = ['s%i' % i for i in range(data.shape[1])]
    oids = ['o%i' % i for i in range(data.shape[0])]
    return sids, oids


for table, name in zip(tables, names):
    sids, oids = make_ids(table)
    bt = table_factory(table, sids, oids)
    json_str = bt.getBiomFormatJsonString(generated_by='Sophie_Will')
    o = open('/Users/will/Desktop/' + name, 'w')
    o.write(json_str)
    o.close()
Exemple #57
0
nsteps = 21

x = linspace(0, fmax, nsteps)
x[0] = fmin
y = linspace(0, smax, nsteps)
y[0] = smin

otu_ids = ['O_%s' % i for i in range(fmax)]
sample_ids = ['S_%s' % i for i in range(smax)]

out_dir = '/Users/wdwvt1/src/correlations/tables/timings/'

for num_features in x:
    for num_samples in y:
        data = _generate_data(num_features, num_samples)
        bt = table_factory(data, sample_ids[:int(num_samples)], 
            otu_ids[:int(num_features)])
        out_path = os.path.join(out_dir, 'table_f_%s_s_%s.biom' % 
            (num_features, num_samples))
        o = open(out_path, 'w')
        o.writelines(bt.getBiomFormatJsonString('will'))
        o.close()

'''
import glob
os.mkdir(os.join(out_dir, 'text_tables'))
tables = glob(out_dir+'*.biom')
for t in tables:
    out_fp = os.path.join(t.split('/')[:-1], 'text_tables/') + t.split('/')[-1]
    !biom convert -i $t -o $out_fp -b
'''