Ejemplo n.º 1
0
    def test_frac_table(self):
        table = prep_table({
            'S1': {
                'G1': 4,
                'G2': 5,
                'G3': 1
            },
            'S2': {
                'G1': 2,
                'G2': 0,
                'G3': 8
            },
            'S3': {
                'G1': 9,
                'G2': 5,
                'G3': 6
            }
        })
        exp = prep_table({
            'S1': {
                'G1': 0.4,
                'G2': 0.5,
                'G3': 0.1
            },
            'S2': {
                'G1': 0.2,
                'G2': 0.0,
                'G3': 0.8
            },
            'S3': {
                'G1': 0.45,
                'G2': 0.25,
                'G3': 0.3
            }
        })

        # regular
        obs = frac_table(table)
        for i in range(4):
            self.assertListEqual(obs[i], exp[i])

        # BIOM
        obs = frac_table(Table(*map(np.array, table)))
        exp = Table(*map(np.array, exp))
        self.assertEqual(obs.descriptive_equality(exp), 'Tables appear equal')

        # zero column
        table = prep_table({
            'S1': {
                'G1': 0,
                'G2': 2
            },
            'S2': {
                'G1': 0,
                'G2': 0
            }
        })
        exp = prep_table({'S1': {'G1': 0, 'G2': 1}, 'S2': {'G1': 0, 'G2': 0}})
        obs = frac_table(table)
        self.assertListEqual(obs[0], exp[0])
Ejemplo n.º 2
0
    def test_biom_match(self):
        table = Table(
            np.array([[0, 0, 1, 1], [2, 3, 4, 4], [5, 5, 3, 3]]).T,
            ['a', 'b', 'c', 'd'], ['s2', 's3', 's4'])
        md = pd.DataFrame({
            'x1': [1, 3, 2],
            'x2': [1, 1, 0]
        },
                          columns=['s1', 's2', 's3']).T

        exp_table = Table(
            np.array([[0, 0, 1, 1], [2, 3, 4, 4]]).T, ['a', 'b', 'c', 'd'],
            ['s2', 's3'])
        exp_md = pd.DataFrame({
            'x1': [3, 2],
            'x2': [1, 0]
        },
                              columns=['s2', 's3']).T

        res_table, res_md = match(table, md)
        exp_df = pd.DataFrame(exp_table.to_dataframe())
        res_df = pd.DataFrame(res_table.to_dataframe())

        exp_df = exp_df.reindex_axis(sorted(exp_df.columns), axis=1)
        res_df = res_df.reindex_axis(sorted(res_df.columns), axis=1)

        pdt.assert_frame_equal(exp_df, res_df)

        exp_md = exp_md.reindex_axis(sorted(exp_md.index), axis=0)
        res_md = res_md.reindex_axis(sorted(res_md.index), axis=0)

        pdt.assert_frame_equal(res_md, exp_md)
Ejemplo n.º 3
0
def deposit_biofilms(output_dir, abs_table1, abs_table2, rel_table1,
                     rel_table2, edges, metadata, sample_id):
    """ Writes down tables and edges into files.

    Parameters
    ----------
    output_dir : str
        output directory
    rel_table1 : biom.Table
        Biom table of relative abundances
    rel_table2 : biom.Table
        Biom table of relative abundances
    abs_table1 : biom.Table
        Biom table of absolute abundances
    abs_table2 : biom.Table
        Biom table of absolute abundances
    edges : list
        Edge list for ground truthing.
    metadata : pd.DataFrame
        Dataframe of sample metadata
    sample_id : str
        sample id
    """
    output_abs_microbes = "%s/table.abs.microbes.%s.biom" % (output_dir,
                                                             sample_id)
    output_abs_metabolites = "%s/table.abs.metabolites.%s.biom" % (output_dir,
                                                                   sample_id)
    output_rel_microbes = "%s/table.rel.microbes.%s.biom" % (output_dir,
                                                             sample_id)
    output_rel_metabolites = "%s/table.rel.metabolites.%s.biom" % (output_dir,
                                                                   sample_id)
    output_md = "%s/metadata.%s.txt" % (output_dir, sample_id)
    output_U = "%s/U.%s.txt" % (output_dir, sample_id)
    output_V = "%s/V.%s.txt" % (output_dir, sample_id)
    output_edges = "%s/edges.%s.txt" % (output_dir, sample_id)
    output_ranks = "%s/ranks.%s.txt" % (output_dir, sample_id)

    # idx1 = table1.sum(axis=0) > 0
    # idx2 = table2.sum(axis=0) > 0
    # table1 = table1.loc[:, idx1]
    # table2 = table2.loc[:, idx2]

    # relative abundances
    table1 = Table(rel_table1.values.T, rel_table1.columns, rel_table1.index)
    table2 = Table(rel_table2.values.T, rel_table2.columns, rel_table2.index)
    with biom_open(output_rel_microbes, 'w') as f:
        table1.to_hdf5(f, generated_by='moi1')
    with biom_open(output_rel_metabolites, 'w') as f:
        table2.to_hdf5(f, generated_by='moi2')

    # absolute abundances
    table1 = Table(abs_table1.values.T, abs_table1.columns, abs_table1.index)
    table2 = Table(abs_table2.values.T, abs_table2.columns, abs_table2.index)
    with biom_open(output_abs_microbes, 'w') as f:
        table1.to_hdf5(f, generated_by='moi1')
    with biom_open(output_abs_metabolites, 'w') as f:
        table2.to_hdf5(f, generated_by='moi2')

    pd.DataFrame(edges).to_csv(output_edges, sep='\t')
    metadata.to_csv(output_md, sep='\t')
Ejemplo n.º 4
0
 def test_add_pseudocount2(self):
     t = Table(np.array([[0, 1, 3], [1, 1, 2]]), ['O1', 'O2'],
               ['S1', 'S2', 'S3'])
     obs = add_pseudocount(t, 2)
     exp = Table(np.array([[2, 3, 5], [3, 3, 4]]), ['O1', 'O2'],
                 ['S1', 'S2', 'S3'])
     self.assertEqual(obs, exp)
Ejemplo n.º 5
0
def main(args):
    os.mkdir(args.output_dir)
    np.random.seed(args.seed)
    sims = multinomial_bioms(
        k=args.latent_dim, D=args.input_dim,
        N=args.samples, M=args.depth)
    Y = sims['Y']
    parts = Y.shape[0] // 10
    samp_ids = list(map(str, range(Y.shape[0])))
    obs_ids = list(map(str, range(Y.shape[1])))
    train = Table(Y[:parts * 8].T, obs_ids, samp_ids[:parts * 8])
    test = Table(Y[parts * 8 : parts * 9].T,
                 obs_ids, samp_ids[parts * 8 : parts * 9])
    valid = Table(Y[parts * 9:].T, obs_ids, samp_ids[parts * 9:])
    output_dir = args.output_dir
    with biom_open(f'{output_dir}/train.biom', 'w') as f:
        train.to_hdf5(f, 'train')
    with biom_open(f'{output_dir}/test.biom', 'w') as f:
        test.to_hdf5(f, 'test')
    with biom_open(f'{output_dir}/valid.biom', 'w') as f:
        valid.to_hdf5(f, 'valid')
    tree = sims['tree']
    tree.write(f'{output_dir}/basis.nwk')
    np.savetxt(f'{output_dir}/eigvals.txt', sims['eigs'])
    np.savetxt(f'{output_dir}/eigvecs.txt', sims['eigvectors'])
    np.savetxt(f'{output_dir}/W.txt', sims['W'])
Ejemplo n.º 6
0
    def setUp(self):
        np.random.seed(0)
        torch.manual_seed(0)
        self.k, self.D, self.N, self.M, self.C = 10, 50, 500, 100000, 3
        self.sims = multinomial_batch_bioms(k=self.k,
                                            D=self.D,
                                            N=self.N,
                                            M=self.M,
                                            C=self.C)
        Y = self.sims['Y']
        parts = Y.shape[0] // 10
        samp_ids = list(map(str, range(Y.shape[0])))
        obs_ids = list(map(str, range(Y.shape[1])))
        train = Table(Y[:parts * 8].T, obs_ids, samp_ids[:parts * 8])
        test = Table(Y[parts * 8:parts * 9].T, obs_ids,
                     samp_ids[parts * 8:parts * 9])
        valid = Table(Y[parts * 9:].T, obs_ids, samp_ids[parts * 9:])
        with biom_open('train.biom', 'w') as f:
            train.to_hdf5(f, 'train')
        with biom_open('test.biom', 'w') as f:
            test.to_hdf5(f, 'test')
        with biom_open('valid.biom', 'w') as f:
            valid.to_hdf5(f, 'valid')

        md = pd.DataFrame({'batch_category': self.sims['batch_idx']},
                          index=samp_ids)
        md.index.name = 'sampleid'
        md.to_csv('metadata.txt', sep='\t')
        batch_priors = pd.Series(self.sims['alphaILR'])
        batch_priors.to_csv('batch_priors.txt', sep='\t')
        self.sims['tree'].write('basis.nwk')
Ejemplo n.º 7
0
def deposit_blocktable(output_dir, abs_table, rel_table, metadata, truth, sample_id):
    choice = 'abcdefghijklmnopqrstuvwxyz'
    output_abstable = "%s/rel_table.%s.biom" % (
        output_dir, sample_id)
    output_reltable = "%s/abs_table.%s.biom" % (
        output_dir, sample_id)
    output_metadata = "%s/metadata.%s.txt" % (
        output_dir, sample_id)
    output_truth = "%s/truth.%s.txt" % (
        output_dir, sample_id)

    abs_t = Table(abs_table.T.values,
                  abs_table.columns.values,
                  abs_table.index.values)
    with biom_open(output_abstable, 'w') as f:
        abs_t.to_hdf5(f, generated_by='moi')

    rel_t = Table(rel_table.T.values,
                  rel_table.columns.values,
                  rel_table.index.values)
    with biom_open(output_reltable, 'w') as f:
        rel_t.to_hdf5(f, generated_by='moi')

    metadata.to_csv(output_metadata, sep='\t')
    truth.to_csv(output_truth, sep='\t')
Ejemplo n.º 8
0
def deposit_biofilm(table1, table2, metadata, U, V, edges, it, rep,
                    output_dir):
    """ Writes down tables, metadata and feature metadata into files.

    Parameters
    ----------
    table : biom.Table
        Biom table
    metadata : pd.DataFrame
        Dataframe of sample metadata
    feature_metadata : pd.DataFrame
        Dataframe of features metadata
    it : int
        iteration number
    rep : int
        repetition number
    output_dir : str
        output directory
    """
    choice = 'abcdefghijklmnopqrstuvwxyz'
    output_microbes = "%s/table_microbes.%d_%s.biom" % (output_dir, it,
                                                        choice[rep])
    output_metabolites = "%s/table_metabolites.%d_%s.biom" % (output_dir, it,
                                                              choice[rep])
    output_md = "%s/metadata.%d_%s.txt" % (output_dir, it, choice[rep])
    output_U = "%s/U.%d_%s.txt" % (output_dir, it, choice[rep])
    output_V = "%s/V.%d_%s.txt" % (output_dir, it, choice[rep])
    output_B = "%s/edges.%d_%s.txt" % (output_dir, it, choice[rep])
    output_ranks = "%s/ranks.%d_%s.txt" % (output_dir, it, choice[rep])

    idx1 = table1.sum(axis=0) > 0
    idx2 = table2.sum(axis=0) > 0
    table1 = table1.loc[:, idx1]
    table2 = table2.loc[:, idx2]

    table1 = Table(table1.values.T, table1.columns, table1.index)
    table2 = Table(table2.values.T, table2.columns, table2.index)

    with biom_open(output_microbes, 'w') as f:
        table1.to_hdf5(f, generated_by='moi1')
    with biom_open(output_metabolites, 'w') as f:
        table2.to_hdf5(f, generated_by='moi2')

    ranks = (U @ V)

    ranks = ranks[idx1, :]
    ranks = ranks[:, idx2]
    ranks = pd.DataFrame(ranks,
                         index=table1.ids(axis='observation'),
                         columns=table2.ids(axis='observation'))
    ranks.to_csv(output_ranks, sep='\t')
    metadata.to_csv(output_md, sep='\t', index_label='#SampleID')

    B = B[:, idx1]

    np.savetxt(output_U, U)
    np.savetxt(output_V, V)
    np.savetxt(output_B, B)
Ejemplo n.º 9
0
    def test_divide_table(self):
        obs = prep_table({
            'S1': {
                'G1': 20,
                'G2': 36,
                'G3': 4
            },
            'S2': {
                'G1': 15,
                'G2': 24,
                'G3': 8
            },
            'S3': {
                'G1': 10,
                'G2': 18,
                'G3': 0
            }
        })
        ob2 = Table(*map(np.array, obs))
        sizes = {'G1': 5, 'G2': 6, 'G3': 2}
        exp = prep_table({
            'S1': {
                'G1': 4,
                'G2': 6,
                'G3': 2
            },
            'S2': {
                'G1': 3,
                'G2': 4,
                'G3': 4
            },
            'S3': {
                'G1': 2,
                'G2': 3,
                'G3': 0
            }
        })
        ex2 = Table(*map(np.array, obs))

        # regular
        divide_table(obs, sizes)
        for i in range(4):
            self.assertListEqual(obs[i], exp[i])

        # BIOM
        divide_table(ob2, sizes)
        ex2 = Table(*map(np.array, exp))
        self.assertEqual(ob2.descriptive_equality(ex2), 'Tables appear equal')

        # missing size
        del (sizes['G3'])
        with self.assertRaises(KeyError):
            divide_table(obs, sizes)
Ejemplo n.º 10
0
    def test_verify_subset(self):
        metadata = [('a', 'other stuff\tfoo'), ('b', 'asdasdasd'),
                    ('c', '123123123')]
        table = Table(array([[1, 2, 3], [4, 5, 6]]), ['x', 'y'],
                      ['a', 'b', 'c'])
        self.assertTrue(verify_subset(table, metadata))

        table = Table(array([[1, 2], [3, 4]]), ['x', 'y'], ['a', 'b'])
        self.assertTrue(verify_subset(table, metadata))

        table = Table(array([[1, 2, 3], [4, 5, 6]]), ['x', 'y'],
                      ['a', 'b', 'x'])
        self.assertFalse(verify_subset(table, metadata))
Ejemplo n.º 11
0
 def test_biom_match_tips_intersect_tips(self):
     # there are less tree tips than table columns
     table = Table(
         np.array([[0, 0, 1, 1], [2, 3, 4, 4], [5, 5, 3, 3],
                   [0, 0, 0, 1]]).T, ['a', 'b', 'c', 'd'],
         ['s1', 's2', 's3', 's4'])
     tree = TreeNode.read([u"((a,b)f,c)r;"])
     exp_table = Table(
         np.array([[0, 0, 1], [2, 3, 4], [5, 5, 3], [0, 0, 0]]).T,
         ['a', 'b', 'c'], ['s1', 's2', 's3', 's4'])
     exp_tree = tree
     res_table, res_tree = match_tips(table, tree)
     self.assertEqual(exp_table, res_table)
     self.assertEqual(str(exp_tree), str(res_tree))
Ejemplo n.º 12
0
 def setUp(self):
     ids_and_md = (['O1', 'O2', 'O3', 'O4'], ['S1', 'S2', 'S3'], [{
         'taxonomy': ['foo', 'bar']
     }, {
         'taxonomy': ['foo', 'not bar']
     }, {
         'taxonomy': ['foo', 'bar']
     }, {
         'taxonomy': ['foo', 'not bar']
     }])
     self.t1 = Table(array([[0, 1, 2], [0, 0, 1], [1, 1, 0], [3, 0, 1]]),
                     *ids_and_md)
     self.t2 = Table(array([[0, 1, 2], [1, 0, 1], [0, 1, 0], [3, 0, 1]]),
                     *ids_and_md)
Ejemplo n.º 13
0
    def test_biom_match_tips_intersect_tree_immutable(self):
        # tests to see if tree changes.
        table = Table(
            np.array([[0, 0, 1], [2, 3, 4], [5, 5, 3], [0, 0, 1]]).T,
            ['a', 'b', 'd'], ['s1', 's2', 's3', 's4'])

        exp_table = Table(
            np.array([[0, 0, 1], [2, 3, 4], [5, 5, 3], [0, 0, 1]]).T,
            ['a', 'b', 'd'], ['s1', 's2', 's3', 's4'])

        tree = TreeNode.read([u"(((a,b)f, c),d)r;"])
        match_tips(table, tree)
        self.assertEqual(exp_table, table)
        self.assertEqual(str(tree), u"(((a,b)f,c),d)r;\n")
Ejemplo n.º 14
0
    def setUp(self):
        X = np.array([[10, 1, 4, 1, 4, 0], [0, 0, 2, 0, 2, 8],
                      [0, 1, 2, 1, 2, 4], [0, 1, 0, 1, 0,
                                           0], [2, 0, 0, 0, 0, 0],
                      [1, 0, 0, 0, 0, 0], [7, 1, 0, 1, 0, 0]])
        oids = ['o1', 'o2', 'o3', 'o4', 'o5', 'o6', 'o7']
        sids = ['s1', 's2', 's3', 's4', 's5', 's6']

        bigX = np.array([[10, 1, 4, 1, 4, 1, 0], [0, 0, 2, 0, 2, 1, 8],
                         [0, 1, 2, 1, 2, 1, 4], [0, 1, 0, 1, 0, 1, 0],
                         [2, 0, 0, 0, 0, 1, 0], [1, 0, 0, 0, 0, 1, 0],
                         [4, 0, 0, 0, 0, 1, 0]])

        self.big_table = Table(
            bigX,
            oids,
            sids + ['s9'],
        )

        self.metadata = pd.DataFrame(
            np.vstack(
                (np.ones(8), np.array(['a', 'a', 'b', 'b', 'a', 'a', 'b',
                                       'a']), np.arange(8).astype(np.float64),
                 np.array([
                     'Test', 'Test', 'Train', 'Train', 'Train', 'Train',
                     'Test', 'Train'
                 ]))).T,
            columns=['intercept', 'categorical', 'continuous', 'train'],
            index=['s1', 's2', 's3', 's4', 's5', 's6', 's7', 's8'])
        self.metadata['continuous'] = self.metadata['continuous'].astype(
            np.float64)
        self.trimmed_metadata = self.metadata.loc[[
            's1', 's2', 's3', 's4', 's5', 's6'
        ]]
        df = pd.DataFrame([{
            'intercept': 1,
            'categorical': 'b',
            'continuous': 1.,
            'train': 'Train'
        }, {
            'intercept': 1,
            'categorical': 'b',
            'continuous': 1.,
            'train': 'Train'
        }],
                          index=['s2', 's4'])
        df = df.reindex(
            columns=['intercept', 'categorical', 'continuous', 'train'])
        self.metadata_dup = self.metadata.append(df)
        self.table = Table(X, oids, sids)
Ejemplo n.º 15
0
    def test_validate_prefix(self):
        httpretty.register_uri(
            httpretty.POST,
            "https://test_server.com/qiita_db/jobs/job-id/step/")
        httpretty.register_uri(
            httpretty.GET,
            "https://test_server.com/qiita_db/prep_template/1/data",
            body='{"data": {"1.S1": {"orig_name": "S1"}, "1.S2": '
            '{"orig_name": "S2"}, "1.S3": {"orig_name": "S3"}}}')

        fd, biom_fp = mkstemp(suffix=".biom")
        close(fd)
        data = np.asarray([[0, 0, 1], [1, 3, 42]])
        table = Table(data, ['O1', 'O2'], ['S1', 'S2', 'S3'])
        with biom_open(biom_fp, 'w') as f:
            table.to_hdf5(f, "Test")

        self._clean_up_files.append(biom_fp)

        self.parameters['files'] = '{"BIOM": ["%s"]}' % biom_fp

        obs_success, obs_ainfo, obs_error = validate(self.qclient, 'job-id',
                                                     self.parameters,
                                                     self.out_dir)
        exp_biom_fp = join(self.out_dir, basename(biom_fp))
        self._clean_up_files.append(exp_biom_fp)
        self.assertTrue(obs_success)
        self.assertEqual(obs_ainfo, [[None, 'BIOM', [exp_biom_fp, 'biom']]])
        self.assertEqual(obs_error, "")
        obs_t = load_table(exp_biom_fp)
        self.assertItemsEqual(obs_t.ids(), ["1.S1", "1.S2", "1.S3"])
Ejemplo n.º 16
0
    def _create_job_and_biom(self, sample_ids, template=None, analysis=None):
        # Create the BIOM table that needs to be valdiated
        fd, biom_fp = mkstemp(suffix=".biom")
        close(fd)
        data = np.random.randint(100, size=(2, len(sample_ids)))
        table = Table(data, ['O1', 'O2'], sample_ids)
        with biom_open(biom_fp, 'w') as f:
            table.to_hdf5(f, "Test")
        self._clean_up_files.append(biom_fp)

        # Create a new job
        parameters = {
            'template': template,
            'files': dumps({'biom': [biom_fp]}),
            'artifact_type': 'BIOM',
            'analysis': analysis
        }
        data = {
            'command': dumps(['BIOM type', '2.1.4', 'Validate']),
            'parameters': dumps(parameters),
            'status': 'running'
        }
        res = self.qclient.post('/apitest/processing_job/', data=data)
        job_id = res['job']

        return biom_fp, job_id, parameters
Ejemplo n.º 17
0
def test_get_stats():
    def entropy(p):
        p = p[p != 0]
        return -(p * log(p)).sum()

    data = array([[0, 0, 1], [1, 3, 42]], dtype=float)
    table = Table(data, ['O1', 'O2'], ['S1', 'S2', 'S3'])
    h, jsd = get_stats(table)

    p = data.sum(axis=1)
    p /= p.sum()
    assert_almost_equal(h, entropy(p))
    avg_h = 0
    weights = [p.sum() / data.sum() for p in data.T]
    for w, p in zip(weights, data.T):
        p /= p.sum()
        avg_h += w * entropy(p)
    assert_almost_equal(jsd, h - avg_h)

    table.norm()
    h, jsd = get_stats(table)

    data /= data.sum(axis=0)
    p = data.sum(axis=1) / 3.
    assert_almost_equal(h, entropy(p))
    avg_h = sum(entropy(p) / 3. for p in data.T)
    assert_almost_equal(jsd, h - avg_h)
 def test_beta_rarefaction_too_many_samples_dropped(self):
     # mantel needs 3x3 or larger distance matrix
     table = Table(np.array([[0, 1, 3], [1, 1, 2]]),
                   ['O1', 'O2'], ['S1', 'S2', 'S3'])
     with self.assertRaisesRegex(ValueError, '3x3 in size'):
         beta_rarefaction(self.output_dir, table, 'braycurtis', 'upgma',
                          self.md, 2)
Ejemplo n.º 19
0
    def setUp(self):
        X = np.array(
            [[10, 1, 4, 1, 4, 0],
             [0, 0, 2, 0, 2, 8],
             [0, 1, 2, 1, 2, 4],
             [0, 1, 0, 1, 0, 0],
             [2, 0, 0, 0, 0, 0],
             [1, 0, 0, 0, 0, 0],
             [7, 1, 0, 1, 0, 0]]
        )
        oids = ['o1', 'o2', 'o3', 'o4', 'o5', 'o6', 'o7']
        sids = ['s1', 's2', 's3', 's4', 's5', 's6']

        self.metadata = pd.DataFrame(
            np.vstack(
                (
                    np.ones(8),
                    np.array(['a', 'a', 'b', 'b', 'a', 'a', 'b', 'a']),
                    np.arange(8).astype(np.float64),
                    np.array(['Test', 'Test', 'Train', 'Train',
                              'Train', 'Train', 'Test', 'Train'])
                )
            ).T,
            columns=['intercept', 'categorical', 'continuous', 'train'],
            index=['s1', 's2', 's3', 's4', 's5', 's6', 's7', 's8']
        )
        self.metadata['continuous'] = self.metadata[
            'continuous'].astype(np.float64)
        self.trimmed_metadata = self.metadata.loc[
            ['s1', 's2', 's3', 's4', 's5', 's6']
        ]

        self.table = Table(X, oids, sids)
    def test_band_table(self):
        res_table, res_md, res_beta, res_theta, res_gamma = band_table(5, 6)
        mat = np.array(
            [[161.0, 88.0, 26.0, 4.0, 0.0],
             [185.0, 144.0, 40.0, 4.0, 4.0],
             [28.0, 39.0, 156.0, 45.0, 12.0],
             [7.0, 64.0, 50.0, 81.0, 56.0],
             [0.0, 29.0, 83.0, 217.0, 194.0],
             [0.0, 0.0, 19.0, 54.0, 127.0]]
        )

        samp_ids = ['S0', 'S1',	'S2', 'S3', 'S4']
        feat_ids = ['F0', 'F1', 'F2', 'F3', 'F4', 'F5']

        exp_table = Table(mat, feat_ids, samp_ids)
        exp_md = pd.DataFrame({'G': [2., 4., 6., 8., 10.]},
                              index=samp_ids)
        exp_beta = np.array(
            [[-0.28284271, -0.48989795, -0.69282032, -0.89442719, -1.09544512]]
        )

        exp_theta = np.array(
            [2.23148138, 3.64417845, 3.9674706, 3.32461839, 2.31151262]
        )

        exp_gamma = np.array(
            [0.79195959, 1.89427207, 3.41791359, 5.36656315, 7.74114548]
        )

        self.assertEqual(exp_table, res_table)
        pdt.assert_frame_equal(exp_md, res_md)
        npt.assert_allclose(exp_beta, res_beta)
        npt.assert_allclose(exp_theta, res_theta)
Ejemplo n.º 21
0
    def setUp(self):
        # Register the URIs for the QiitaClient
        httpretty.register_uri(
            httpretty.POST,
            "https://test_server.com/qiita_db/authenticate/",
            body='{"access_token": "token", "token_type": "Bearer", '
            '"expires_in": "3600"}')

        self.qclient = QiitaClient('https://test_server.com', 'client_id',
                                   'client_secret')
        # Create a biom table
        fd, self.biom_fp = mkstemp(suffix=".biom")
        close(fd)
        data = np.asarray([[0, 0, 1], [1, 3, 42]])
        table = Table(data, ['O1', 'O2'], ['1.S1', '1.S2', '1.S3'])
        with biom_open(self.biom_fp, 'w') as f:
            table.to_hdf5(f, "Test")
        self.out_dir = mkdtemp()
        self.parameters = {
            'template': 1,
            'files': '{"BIOM": ["%s"]}' % self.biom_fp,
            'artifact_type': 'BIOM'
        }

        self._clean_up_files = [self.biom_fp, self.out_dir]
Ejemplo n.º 22
0
 def setUp(self):
     self.seqs = (skbio.Sequence('AACCGGTT'), skbio.Sequence('AACCGAGG'),
                  skbio.Sequence('AACCTTTT'), skbio.Sequence('AACCGCTC'))
     self.table = Table(
         np.array([[0, 1, 1], [0, 2, 1], [1, 0, 1], [0, 0, 1], [9, 1, 1]]),
         ['AACCGG', 'AACCGA', 'AACCTT', 'AACCGC', 'AAAAAA'],
         ['s1', 's2', 's3'])
Ejemplo n.º 23
0
    def test_slice_mapping_file(self):
        header, metadata = parse_mapping_file(StringIO(test_mapping))
        table = Table(array([[1, 2], [4, 5]]), ['x', 'y'], ['a', 'c'])

        exp = ["a\t1\t123123", "c\tpoop\tdoesn't matter"]
        obs = slice_mapping_file(table, metadata)
        self.assertEqual(obs, exp)
Ejemplo n.º 24
0
def hashing(unhashed_otu_table_list, unhashed_rep_seqs_list,
            sample_metadata_list):
    otu_df_list = []
    rep_seq_ids = set()
    seqs = []
    # Create OTU table
    for unhashed_otu_table in unhashed_otu_table_list:
        otu_df_list.append(hash_otu_table(unhashed_otu_table))
    otu_df = pd.concat(otu_df_list, join="outer", axis=1)
    otu_df.fillna(0.0, inplace=True)
    otu_table = Table(otu_df.values, list(otu_df.index), list(otu_df.columns))
    # Create rep seqs
    for unhashed_rep_seqs in unhashed_rep_seqs_list:
        seqs.extend(hash_rep_seqs(unhashed_rep_seqs, rep_seq_ids))
    otu_table_ids = set(otu_df.index)
    assert otu_table_ids == rep_seq_ids
    assert len(otu_df.index) == len(rep_seq_ids)
    # Merge sample metadata
    sample_metadata = pd.concat(
        [pd.read_csv(s, sep="\\t") for s in sample_metadata_list])
    # Write files
    sample_metadata.to_csv("sample_metadata.tsv", sep="\\t", index=False)
    with biom_open("otu_table.biom", "w") as fid:
        otu_table.to_hdf5(fid,
                          "Constructed by micone in dada2/deblur pipeline")
    with open("rep_seqs.fasta", "w") as fid:
        fasta_writer = FastaIO.FastaWriter(fid, wrap=None)
        fasta_writer.write_file(seqs)
Ejemplo n.º 25
0
def import_shogun_biom(f,
                       annotation_table=None,
                       annotation_type=None,
                       names_to_taxonomy=False):
    import_funcs = {
        'module': shogun_parse_module_table,
        'pathway': shogun_parse_pathway_table,
        'enzyme': shogun_parse_enzyme_table
    }

    table = pd.read_csv(f, sep='\t', index_col=0)

    bt = Table(table.values,
               observation_ids=list(map(str, table.index)),
               sample_ids=list(map(str, table.columns)))

    if names_to_taxonomy:
        metadata = {
            x: {
                'taxonomy': x.split(';')
            }
            for x in bt.ids(axis='observation')
        }
        bt.add_metadata(metadata, axis='observation')

    if annotation_table is not None:
        metadata = import_funcs[annotation_type](annotation_table)
        bt.add_metadata(metadata, axis='observation')

    return (bt)
Ejemplo n.º 26
0
    def setUp(self):
        om = [{
            'taxonomy':
            ['foo', 'bar', 'baz', 'bazilus', 'foospiracea', 'spam', 'egg']
        }, {
            'taxonomy':
            ['foo', 'bar', 'baz', 'bazilus', 'foospiracea', 'spam', 'gleep']
        }, {
            'taxonomy': [
                'foo', 'bar', 'baz', 'bazilus', 'foospiracea', 'bloop',
                'Boatface'
            ]
        }]
        self.test_table = Table(data=np.array([[1, 2], [3, 4], [5, 6]]),
                                observation_ids=['o1', 'o2', 'o3'],
                                sample_ids=['s1', 's2'],
                                observation_metadata=om)

        self.md = pd.DataFrame(data=[[True, '++', '00001'],
                                     [False, ':L', '02341'],
                                     [True, '8L', '00221'],
                                     [True, '}8L', '00501']],
                               columns=['SampleID'
                                        'IS_GOOD', 'LOL', 'PLEL'],
                               index=['s0', 's1', 's2', 's3'])
Ejemplo n.º 27
0
def noisify(table_file, metadata_file, sigma, output_file):

    metadata = pd.read_table(metadata_file, index_col=0)
    table = load_table(table_file)
    table = pd.DataFrame(np.array(table.matrix_data.todense()).T,
                         index=table.ids(axis='sample'),
                         columns=table.ids(axis='observation'))
    cov = np.eye(table.shape[1] - 1)
    m_noise = compositional_noise(cov, nsamp=table.shape[0])
    table_ = table.values
    table_ = np.vstack(
        [perturb(table_[i, :], m_noise[i, :]) for i in range(table_.shape[0])])

    # note that this assumes that the column is named `library_size
    table_ = pd.DataFrame(
        multinomial_sample(table_, depths=metadata['library_size']))
    table_.index = table.index
    table_.columns = list(table.columns)

    metadata['observed'] = np.sum(table_.sum(axis=0) > 0)
    metadata['unobserved'] = np.sum(table_.sum(axis=0) == 0)
    metadata.to_csv(metadata_file, sep='\t')

    # drop zeros -- they are not informative
    table_ = table_.loc[:, table_.sum(axis=0) > 0]
    t = Table(table_.T.values, table_.columns.values, table_.index.values)
    with biom_open(output_file, 'w') as f:
        t.to_hdf5(f, generated_by='moi')
Ejemplo n.º 28
0
def deposit(table, groups, truth, output_table, output_groups, output_truth):
    t = Table(table.T.values, table.columns.values, table.index.values)
    with biom_open(output_table, 'w') as f:
        t.to_hdf5(f, generated_by='moi')
    groups.to_csv(output_groups, sep='\t')
    with open(output_truth, 'w') as f:
        f.write(','.join(truth))
Ejemplo n.º 29
0
    def test_faith_pd_invalid_input(self):
        # tests are based of skbio tests, checking for duplicate ids,
        # negative counts are not included but should be incorporated

        # tree has duplicated tip ids
        tree = TreeNode.read(
            StringIO('((OTU1:0.1, OTU2:0.2):0.3, (OTU3:0.5, OTU4:0.7):1.1)'
                     'root;'))
        otu_ids = ['OTU%d' % i for i in range(1, 5)]
        u_counts = [1, 1, 0, 0]

        data = np.array([u_counts]).T

        bt = Table(data, otu_ids, ['u'])

        ta = os.path.join(gettempdir(), 'table.biom')
        tr = os.path.join(gettempdir(), 'tree.biom')

        self.files_to_delete.append(ta)
        self.files_to_delete.append(tr)

        with biom_open(ta, 'w') as fhdf5:
            bt.to_hdf5(fhdf5, 'Table for unit testing')
        tree.write(tr)

        self.assertRaises(IOError, faith_pd, 'dne.biom', tr)
        self.assertRaises(IOError, faith_pd, ta, 'dne.tre')
Ejemplo n.º 30
0
def hash_otu_table(unhashed_otu_table, seqid_hash_dict, output_file):
    table = load_table(unhashed_otu_table)
    df = table.to_dataframe(dense=True)
    seq_ids = [seqid_hash_dict[i] for i in df.index]
    df.index = seq_ids
    new_table = Table(df.values, list(df.index), list(df.columns))
    with biom_open(output_file, "w") as fid:
        new_table.to_hdf5(fid, "Constructed using qiime1 clustering")