Esempio n. 1
0
def get_rare_data(otu_table,
                  seqs_per_sample,
                  include_small_samples=False,
                  subsample_f=subsample):
    """Filter OTU table to keep only desired sample sizes.

    - include_small_sampes=False => do not write samples with < seqs_per_sample
    total sequecnes
    - otu_table (input and out) is otus(rows) by samples (cols)
    - no otus are removed, even if they are absent in the rarefied table"""

    with errstate(empty='raise'):
        if not include_small_samples:
            otu_table = filter_samples_from_otu_table(otu_table,
                                                      otu_table.ids(),
                                                      seqs_per_sample, inf)

        # subsample samples that have too many sequences
        def func(x, s_id, s_md):
            if x.sum() < seqs_per_sample:
                return x
            else:
                return subsample_f(x.astype(int), seqs_per_sample)

        subsampled_otu_table = otu_table.transform(func, axis='sample')

        return subsampled_otu_table
Esempio n. 2
0
def get_rare_data(otu_table,
                  seqs_per_sample,
                  include_small_samples=False,
                  subsample_f=subsample):
    """Filter OTU table to keep only desired sample sizes.

    - include_small_sampes=False => do not write samples with < seqs_per_sample
    total sequecnes
    - otu_table (input and out) is otus(rows) by samples (cols)
    - no otus are removed, even if they are absent in the rarefied table"""

    with errstate(empty='raise'):
        if not include_small_samples:
            otu_table = filter_samples_from_otu_table(
                otu_table,
                otu_table.ids(),
                seqs_per_sample,
                inf)

        # subsample samples that have too many sequences
        def func(x, s_id, s_md):
            if x.sum() < seqs_per_sample:
                return x
            else:
                return subsample_f(x.astype(int), seqs_per_sample)

        subsampled_otu_table = otu_table.transform(func, axis='sample')

        return subsampled_otu_table
Esempio n. 3
0
def split_otu_table_on_sample_metadata(otu_table, mapping_f, mapping_field):
    """ split otu table into sub otu tables where each represent samples
    corresponding to only a certain value in mapping_field
    """
    with errstate(empty='raise'):
        mapping_f = list(mapping_f)
        mapping_values = get_mapping_values(mapping_f, mapping_field)
        tables = 0

        for v in mapping_values:
            v_fp_str = v.replace(' ', '_')
            sample_ids_to_keep = sample_ids_from_metadata_description(
                mapping_f, valid_states_str="%s:%s" % (mapping_field, v))

            try:
                # filtering cannot be inplace otherwise we lose data
                filtered_otu_table = otu_table.filter(
                    lambda values, id_, metadata: id_ in sample_ids_to_keep,
                    axis='sample', inplace=False)
                tables += 1
            except TableException:
                # all samples are filtered out, so no otu table to write
                continue
            yield v_fp_str, filtered_otu_table

        if not tables:
            raise OTUTableSplitError(
                "Could not split OTU tables! There are no matches between the "
                "sample identifiers in the OTU table and the mapping file.")
Esempio n. 4
0
    def test_errstate(self):
        def foo(item):
            return "the callback called"

        table = Table([], [], [])
        seterrcall('empty', foo)
        self.assertNotEqual(geterr()['empty'], 'call')
        with errstate(empty='call'):
            result = errcheck(table)
        self.assertEqual(result, "the callback called")
        self.assertNotEqual(geterr()['empty'], 'call')