def get_rare_data(otu_table, seqs_per_sample, include_small_samples=False, subsample_f=subsample): """Filter OTU table to keep only desired sample sizes. - include_small_sampes=False => do not write samples with < seqs_per_sample total sequecnes - otu_table (input and out) is otus(rows) by samples (cols) - no otus are removed, even if they are absent in the rarefied table""" with errstate(empty='raise'): if not include_small_samples: otu_table = filter_samples_from_otu_table(otu_table, otu_table.ids(), seqs_per_sample, inf) # subsample samples that have too many sequences def func(x, s_id, s_md): if x.sum() < seqs_per_sample: return x else: return subsample_f(x.astype(int), seqs_per_sample) subsampled_otu_table = otu_table.transform(func, axis='sample') return subsampled_otu_table
def get_rare_data(otu_table, seqs_per_sample, include_small_samples=False, subsample_f=subsample): """Filter OTU table to keep only desired sample sizes. - include_small_sampes=False => do not write samples with < seqs_per_sample total sequecnes - otu_table (input and out) is otus(rows) by samples (cols) - no otus are removed, even if they are absent in the rarefied table""" with errstate(empty='raise'): if not include_small_samples: otu_table = filter_samples_from_otu_table( otu_table, otu_table.ids(), seqs_per_sample, inf) # subsample samples that have too many sequences def func(x, s_id, s_md): if x.sum() < seqs_per_sample: return x else: return subsample_f(x.astype(int), seqs_per_sample) subsampled_otu_table = otu_table.transform(func, axis='sample') return subsampled_otu_table
def split_otu_table_on_sample_metadata(otu_table, mapping_f, mapping_field): """ split otu table into sub otu tables where each represent samples corresponding to only a certain value in mapping_field """ with errstate(empty='raise'): mapping_f = list(mapping_f) mapping_values = get_mapping_values(mapping_f, mapping_field) tables = 0 for v in mapping_values: v_fp_str = v.replace(' ', '_') sample_ids_to_keep = sample_ids_from_metadata_description( mapping_f, valid_states_str="%s:%s" % (mapping_field, v)) try: # filtering cannot be inplace otherwise we lose data filtered_otu_table = otu_table.filter( lambda values, id_, metadata: id_ in sample_ids_to_keep, axis='sample', inplace=False) tables += 1 except TableException: # all samples are filtered out, so no otu table to write continue yield v_fp_str, filtered_otu_table if not tables: raise OTUTableSplitError( "Could not split OTU tables! There are no matches between the " "sample identifiers in the OTU table and the mapping file.")
def test_errstate(self): def foo(item): return "the callback called" table = Table([], [], []) seterrcall('empty', foo) self.assertNotEqual(geterr()['empty'], 'call') with errstate(empty='call'): result = errcheck(table) self.assertEqual(result, "the callback called") self.assertNotEqual(geterr()['empty'], 'call')