Python Table.data Examples

Programming Language: Python

Namespace/Package Name: biom

Class/Type: Table

Method/Function: data

Examples at hotexamples.com: 3

Python Table.data - 3 examples found. These are the top rated real world Python examples of biom.Table.data extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

Table(30)

ids(30)

is_empty(21)

filter(20)

norm(11)

to_dataframe(9)

collapse(8)

add_metadata(8)

copy(6)

sum(6)

merge(5)

remove_empty(5)

subsample(4)

metadata(4)

descriptive_equality(4)

data(3)

pa(3)

generated_by(3)

sort_order(3)

from_json(3)

del_metadata(2)

from_hdf5(2)

_observation_ids(1)

_sample_ids(1)

rankdata(1)

reset_index(1)

sort(1)

iter_data(1)

Example #1

Show file

File: model_base.py Project: gibsramen/BIRDMAn

 def __init__(self, table: biom.Table, feature_id: str, **kwargs):
     super().__init__(table=table, **kwargs)
     self.feature_id = feature_id
     values = table.data(id=feature_id, axis="observation",
                         dense=True).astype(int)
     self.add_parameters({"y": values})

Example #2

Show file

File: _method.py Project: beiko-lab/q2-ebd

def beta_phylogenetic(table: biom.Table, phylogeny: skbio.TreeNode,
                      metric: str, weighted: bool)-> skbio.DistanceMatrix:
    if metric not in phylogenetic_metrics():
        raise ValueError("Unknown phylogenetic metric: %s" % metric)
    if table.is_empty():
        raise ValueError("The provided table object is empty")

    # Write table to temp file
    with tempfile.TemporaryDirectory() as temp_dir_name:
        table_fp = os.path.join(temp_dir_name, 'otu_table.tsv')
        newick_fp = os.path.join(temp_dir_name, 'tree.newick')
        with open(table_fp, 'w') as out_table, open(newick_fp, 'w') as newick:
            # This is easy, just write to newick
            phylogeny.write(newick)
            # We have to iterate through each sample
            out_table.write("\t" + "\t".join(table.ids(axis='observation')))
            for sample_id in table.ids(axis='sample'):
                row = table.data(sample_id)
                out_table.write("\n" + str(sample_id) + "\t" + \
                        "\t".join([str(x) for x in row]))
    # Run ExpressBetaDiversity on them
        name_map = {'braycurtis': 'Bray-Curtis',
                    'sorensen': 'Bray-Curtis',
                    'canberra': 'Canberra',
                    'chi_squared': 'Chi-squared',
                    'coeff_similarity': 'CS',
                    'complete_tree': 'CT',
                    'euclidean': 'Euclidean',
                    'f_st': 'Fst',
                    'p_st': 'Fst',
                    'gower': 'Gower',
                    'hellinger': 'Hellinger',
                    'kulczynski': 'Kulczynski',
                    'lennon': 'Lennon',
                    'manhattan': 'Manhattan',
                    'weighted_unifrac': 'Manhattan',
                    'mnnd': 'MNND',
                    'mpd': 'MPD',
                    'morisita_horn': 'Morisita-Horn',
                    'normalized_weighted_unifrac': 'NWU',
                    'pearson': 'Pearson',
                    'raohp': 'RaoHp',
                    'soergel': 'Soergel',
                    'jaccard': 'Soergel',
                    'unweighted_unifrac': 'Soergel',
                    'ruzicka': 'Soergel',
                    'tamas_coeff': 'TC',
                    'weighted_corr': 'WC',
                    'whittaker': 'Whittaker',
                    'yue_clayton': 'Yue-Clayton'
                   }
        if weighted:
            weighted = "-w"
        else:
            weighted = ""
        cmd = 'ExpressBetaDiversity -t tree.newick -s otu_table.tsv %s -c %s' \
                                                  % (weighted, name_map[metric])
        subprocess.run(cmd, cwd=temp_dir_name, shell=True)
        with open(os.path.join(temp_dir_name, 'output.diss'), 'r') as dist_file:
            nsamples = int(dist_file.readline())
            dist_mat = np.zeros((nsamples, nsamples))
            ids = []
            for i, line in enumerate(dist_file):
                ids.append(line.split("\t")[0].strip())
                for j, dist in enumerate(line.split("\t")[1:]):
                    dist_mat[i,j] = float(dist)
                    dist_mat[j,i] = float(dist)

    # Suck the data matrix back in
    # Return a DistanceMatrix object
    results = skbio.DistanceMatrix(dist_mat, ids)
    return results

Example #3

Show file

class TestSculptor(TestCase):
    def setUp(self):

        # small synthetic dataset
        sample_ids = [
            's1', 's2', 's3', 's4', 's5', 's6', 's7', 's8', 's9', 's10', 's11'
        ]
        self.mf = pd.DataFrame(data=[
            ['fasting', '8', 'A'],
            ['fasting', '-1', 'A'],
            ['control', '1', 'B'],
            ['control', '2', 'B'],
            ['control', '3', 'B'],
            ['fasting', '2', 'A'],
            ['fasting', '11', 'A'],
            ['control', '4', 'B'],
            ['control', '5', 'B'],
            ['control', '90', 'B'],
            ['fasting', '19.9', 'A'],
        ],
                               columns=['Treatment', 'Day', 'Host'],
                               index=sample_ids)
        self.mf['Day'] = pd.to_numeric(self.mf['Day'], errors='coerce')

        otu_ids = [str(i) for i in range(1, 8)]
        data = np.array([[0.0, 2.0, 5.0, 5.0, 0.0, 0.0, 0.0],
                         [0.0, 0.0, 6.0, 9.0, 0.0, 4.0, 0.0],
                         [2.0, 6.0, 0.0, 0.0, 5.0, 0.0, 0.0],
                         [0.0, 0.0, 0.0, 0.0, 0.0, 5.0, 0.0],
                         [0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 5.0],
                         [1.0, 0.0, 8.0, 9.0, 0.0, 0.0, 0.0],
                         [0.0, 0.0, 1.0, 3.0, 0.0, 0.0, 0.0],
                         [0.0, 0.0, 0.0, 0.0, 2.0, 3.0, 0.0],
                         [0.0, 3.0, 0.0, 0.0, 0.0, 4.0, 0.0],
                         [0.0, 0.0, 0.0, 0.0, 5.0, 5.0, 0.0],
                         [9.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]])

        self.bt = Table(data.T, otu_ids, sample_ids)

        tree_string = ("((1:0.2, 2:0.1)3P:0.3, (((7:0.1, 8:0.1)7P:0.8, (5:0.2,"
                       " 6:0.2)8P:0.1)5P:0.1, (3:0.2, 4:0.7)6P:0.9)4P:0.3)"
                       "root;")
        self.tree = TreeNode.read(StringIO(tree_string))

        # assumes to be only directories
        self.to_delete = []

    def tearDown(self):
        for element in self.to_delete:
            shutil.rmtree(element, ignore_errors=True)

        # delete the directory only if it is empty
        try:
            os.rmdir('roc-curves')
        except (OSError, FileNotFoundError):
            pass

    def test_constructor(self):
        obs = Sculptor(self.bt, self.mf, self.tree, 'Day', 'Host', 'test-name')

        self.assertTrue(obs.mapping_file is None)
        self.assertTrue(obs.biom_table is None)

        self.assertEqual(obs.name, 'test-name')

        self.assertTrue(obs._alpha_diversity_values is None)
        self.assertTrue(obs._beta_diversity_matrices is None)

        pd.util.testing.assert_frame_equal(self.mf, obs._original_mf)

        np.testing.assert_equal(obs._original_bt.ids(), self.bt.ids())
        np.testing.assert_equal(obs._original_bt.ids('observation'),
                                self.bt.ids('observation'))

        a = [self.bt.data(i) for i in self.bt.ids()]
        b = [obs._original_bt.data(i) for i in obs._original_bt.ids()]

        np.testing.assert_allclose(a, b)

        # needed to allow for phylogenetic metrics
        for node in obs.tree.postorder():
            self.assertTrue(node.length is not None)

    def test_constructor_errors(self):
        with self.assertRaisesRegex(ValueError, 'The gradient category'):
            _ = Sculptor(self.bt, self.mf, self.tree, 'XXX', 'Host')

        with self.assertRaisesRegex(ValueError, 'The trajectory category'):
            _ = Sculptor(self.bt, self.mf, self.tree, 'Day', 'XXX')

        with self.assertRaisesRegex(ValueError, 'numeric dtype'):
            _ = Sculptor(self.bt, self.mf, self.tree, 'Treatment', 'Host')

        # create fake metadata
        self.bt.update_ids({i: i + 'xx' for i in self.bt.ids()}, inplace=True)
        with self.assertRaisesRegex(ValueError, 'without metadata'):
            _ = Sculptor(self.bt, self.mf, self.tree, 'Day', 'Host')

    def test_random_select(self):
        np.random.seed(0)
        obs = Sculptor(self.bt, self.mf, self.tree, 'Day', 'Host',
                       'random-select')

        self.assertTrue(obs.mapping_file is None)
        self.assertTrue(obs.biom_table is None)

        obs.randomly_select(3)

        # if we randomly select three samples there should be 6 in total
        self.assertTrue(len(obs.mapping_file) == 6)
        self.assertEqual(obs.biom_table.shape, (7, 6))

    def test_random_select_errors(self):
        obs = Sculptor(self.bt, self.mf, self.tree, 'Day', 'Host',
                       'random-select-errors')

        with self.assertRaisesRegex(ValueError, 'uniformly subsampled'):
            obs.alpha_table()

        with self.assertRaisesRegex(ValueError, 'uniformly subsampled'):
            obs.beta_table()

        with self.assertRaisesRegex(ValueError, 'uniformly subsampled'):
            obs.microbes_over_time()

    def test_alpha(self):
        skl = Sculptor(self.bt, self.mf, self.tree, 'Day', 'Host',
                       'test-alpha')
        np.random.seed(0)
        skl.randomly_select(5)

        obs = skl.alpha_table(['faith_pd', 'observed_otus'])

        self.assertTrue(skl._alpha_diversity_values is not None)

        columns = [
            'faith_pd_absolute_sum_of_diff', 'faith_pd_abs_mean_diff',
            'faith_pd_variance_larger_than_standard_deviation',
            'faith_pd_abs_energy', 'observed_otus_absolute_sum_of_diff',
            'observed_otus_abs_mean_diff',
            'observed_otus_variance_larger_than_standard_deviation',
            'observed_otus_abs_energy'
        ]
        data = [[
            2.1999999999999993, 0.5499999999999998, 0.0, 23.919999999999995, 2,
            0.5, False, 32
        ],
                [
                    2.200000000000001, 0.5500000000000003, 0.0,
                    6.760000000000001, 3, 0.75, False, 22
                ]]

        exp = pd.DataFrame(data=data,
                           index=pd.Index(['A', 'B'], name='Host'),
                           columns=columns)
        pd.util.testing.assert_frame_equal(obs, exp)

    def test_alpha_errors(self):
        skl = Sculptor(self.bt, self.mf, self.tree, 'Day', 'Host',
                       'random-select-errors')
        skl.randomly_select(5)
        with self.assertRaisesRegex(ValueError, 'find one or more metrics'):
            skl.alpha_table(metrics=['does_not_exist'])

    def test_beta(self):
        skl = Sculptor(self.bt, self.mf, self.tree, 'Day', 'Host',
                       'unittest-test-beta')
        path = 'roc-curves/%s/cached-matrices/' % skl.name

        # avoid any unwanted accidents
        self.to_delete.append('roc-curves/%s/' % skl.name)

        np.random.seed(0)
        skl.randomly_select(5)
        obs = skl.beta_table(['unweighted_unifrac', 'jaccard'])

        data = [[
            0.3927777777777778, 0.4126532637086283, 0.9375, 0.12499999999999999
        ], [0.6557886557886559, 0.1365522219610505, 1.0, 0.0]]
        columns = [
            'unweighted_unifrac_mean', 'unweighted_unifrac_std',
            'jaccard_mean', 'jaccard_std'
        ]
        exp = pd.DataFrame(data=data,
                           columns=columns,
                           index=pd.Index(['A', 'B'], name='Host'))

        pd.util.testing.assert_frame_equal(obs, exp)

        self.assertTrue(os.path.exists(path))
        self.assertTrue(
            os.path.exists(os.path.join(path, 'unweighted_unifrac.full.'
                                        'txt')))
        self.assertTrue(os.path.exists(os.path.join(path, 'jaccard.full.txt')))

    def test_beta_errors(self):
        skl = Sculptor(self.bt, self.mf, self.tree, 'Day', 'Host',
                       'unittest-beta-errors')
        self.to_delete.append('roc-curves/%s' % skl.name)
        skl.randomly_select(5)
        with self.assertRaisesRegex(ValueError, 'find one or more metrics'):
            skl.beta_table(metrics=['does_not_exist'])

    def test_microbes_over_time(self):
        skl = Sculptor(self.bt, self.mf, self.tree, 'Day', 'Host',
                       'microbes-over-time')
        np.random.seed(0)
        skl.randomly_select(5)

        obs = skl.microbes_over_time()

        metrics = ['mean', 'abs_energy', 'non_zero_samples', 'abs_mean_diff']
        columns = ['%s_%s' % (a, b) for a, b in product(range(1, 8), metrics)]
        index = ['A', 'B']

        self.assertEqual(obs.columns.tolist(), columns)
        self.assertEqual(obs.index.tolist(), index)
        self.assertEqual(obs.values.shape, (2, 28))