Exemple #1
0
def compositional_variable_features(max_changing, fold_change, reps, intervals,
                                    n_species, library_size, asymmetry,
                                    fold_balance, n_contaminants, lam,
                                    template_biom, template_sample_name,
                                    output_dir):

    if template_biom is not None:
        templ = load_table(template_biom)
        template = templ.data(id=template_sample_name, axis='sample')
    else:
        template = None

    gen = compositional_variable_features_generator(
        max_changing=max_changing,
        fold_change=fold_change,
        library_size=library_size,
        reps=reps,
        intervals=intervals,
        n_species=n_species,
        n_contaminants=n_contaminants,
        lam=lam,
        template=template,
        asymmetry=asymmetry,
        fold_balance=fold_balance)

    os.mkdir(output_dir)
    for i, g in enumerate(gen):
        table, groups, truth = g
        output_table = "%s/table.%d.biom" % (output_dir, i)
        output_groups = "%s/metadata.%d.txt" % (output_dir, i)
        output_truth = "%s/truth.%d.csv" % (output_dir, i)
        deposit(table, groups, truth, output_table, output_groups,
                output_truth)
Exemple #2
0
    def test_composition_variable_features_template(self):
        # test template
        np.random.seed(0)

        gen = compositional_variable_features_generator(
            max_changing=2, fold_change=2, reps=5,
            intervals=2, n_species=5,
            fold_balance=False,
            n_contaminants=2, lam=0.1,
            template=np.array([7.0, 3.0, 1.0, 1.0, 2.0, 4.0, 6.0, 1.0, 10.0]))

        table, metadata, truth = next(gen)
        table, metadata, truth = next(gen)

        exp_table = pd.DataFrame([
            [0.062500, 0.125000, 0.031250, 0.109375,
             0.171875, 0.499977, 0.000023],
            [0.062500, 0.125000, 0.031250, 0.109375,
             0.171875, 0.499977, 0.000023],
            [0.062500, 0.125000, 0.031250, 0.109375,
             0.171875, 0.499977, 0.000023],
            [0.062500, 0.125000, 0.031250, 0.109375,
             0.171875, 0.499977, 0.000023],
            [0.062500, 0.125000, 0.031250, 0.109375,
             0.171875, 0.499977, 0.000023],
            [0.022727, 0.045455, 0.022727, 0.159091,
             0.250000, 0.499977, 0.000023],
            [0.022727, 0.045455, 0.022727, 0.159091,
             0.250000, 0.499977, 0.000023],
            [0.022727, 0.045455, 0.022727, 0.159091,
             0.250000, 0.499977, 0.000023],
            [0.022727, 0.045455, 0.022727, 0.159091,
             0.250000, 0.499977, 0.000023],
            [0.022727, 0.045455, 0.022727, 0.159091,
             0.250000, 0.499977, 0.000023]],
            index = ['S0', 'S1', 'S2', 'S3', 'S4',
                     'S5', 'S6', 'S7', 'S8', 'S9'],
            columns = ['F0', 'F1', 'F2', 'F3', 'F4', 'X0', 'X1']
        )

        npt.assert_allclose(table.values, exp_table.values, atol=1e-3, rtol=1e-3)

        exp_metadata = pd.DataFrame(
            {'group': [0] * 5 + [1] * 5,
             'n_diff': [4] * 10,
             'effect_size': [2] * 10,
             'library_size': [10000] * 10
            },
            index = ['S0', 'S1', 'S2', 'S3', 'S4',
                     'S5', 'S6', 'S7', 'S8', 'S9'],
        )

        metadata = metadata.reindex_axis(sorted(metadata.columns), axis=1)
        exp_metadata = exp_metadata.reindex_axis(sorted(exp_metadata.columns), axis=1)
        pdt.assert_frame_equal(metadata, exp_metadata)

        exp_truth = ['F0', 'F1', 'F3', 'F4']
        self.assertListEqual(truth, exp_truth)
Exemple #3
0
    def test_composition_variable_features(self):
        gen = compositional_variable_features_generator(
            max_changing=2, fold_change=2, reps=5,
            intervals=2, n_species=5,
            fold_balance=False,
            n_contaminants=2, lam=0.1)

        table, metadata, truth = next(gen)
        table, metadata, truth = next(gen)

        exp_table = pd.DataFrame(
            closure(
                np.vstack((
                    np.array([0.142857]*2 + [0.071429]*3 +
                             [0.499977, 0.00002269]),
                    np.array([0.142857]*2 + [0.071429]*3 +
                             [0.499977, 0.00002269]),
                    np.array([0.142857]*2 + [0.071429]*3 +
                             [0.499977, 0.00002269]),
                    np.array([0.142857]*2 + [0.071429]*3 +
                             [0.499977, 0.00002269]),
                    np.array([0.142857]*2 + [0.071429]*3 +
                             [0.499977, 0.00002269]),
                    np.array([0.071429]*3 + [0.142857]*2 +
                             [0.499977, 0.00002269]),
                    np.array([0.071429]*3 + [0.142857]*2 +
                             [0.499977, 0.00002269]),
                    np.array([0.071429]*3 + [0.142857] *2+
                             [0.499977, 0.00002269]),
                    np.array([0.071429]*3 + [0.142857]*2 +
                             [0.499977, 0.00002269]),
                    np.array([0.071429]*3 + [0.142857]*2 +
                             [0.499977, 0.00002269])
                ))),
            index = ['S0', 'S1', 'S2', 'S3', 'S4',
                     'S5', 'S6', 'S7', 'S8', 'S9'],
            columns = ['F0', 'F1', 'F2', 'F3', 'F4', 'X0', 'X1']
        )

        pdt.assert_frame_equal(table, exp_table, check_less_precise=True)

        exp_metadata = pd.DataFrame(
            {'group': [0] * 5 + [1] * 5,
             'n_diff': [4] * 10,
             'effect_size': [2] * 10,
             'library_size': [10000] * 10
            },
            index = ['S0', 'S1', 'S2', 'S3', 'S4',
                     'S5', 'S6', 'S7', 'S8', 'S9'],
        )

        metadata = metadata.reindex_axis(sorted(metadata.columns), axis=1)
        exp_metadata = exp_metadata.reindex_axis(sorted(exp_metadata.columns), axis=1)
        pdt.assert_frame_equal(metadata, exp_metadata)

        exp_truth = ['F0', 'F1', 'F3', 'F4']
        self.assertListEqual(truth, exp_truth)
Exemple #4
0
    def test_composition_variable_features_balanced(self):
        gen = compositional_variable_features_generator(
            max_changing=2, fold_change=2, reps=5,
            intervals=2, n_species=5,
            fold_balance=True,
            n_contaminants=2, lam=0.1)

        table, metadata, truth = next(gen)
        table, metadata, truth = next(gen)

        exp_table = pd.DataFrame(
            [[0.100000, 0.100000, 0.100000, 0.100000,
              0.100000, 0.499977, 0.000023],
             [0.100000, 0.100000, 0.100000, 0.100000,
              0.100000, 0.499977, 0.000023],
             [0.100000, 0.100000, 0.100000, 0.100000,
              0.100000, 0.499977, 0.000023],
             [0.100000, 0.100000, 0.100000, 0.100000,
              0.100000, 0.499977, 0.000023],
             [0.100000, 0.100000, 0.100000, 0.100000,
              0.100000, 0.499977, 0.000023],
             [0.041667, 0.041667, 0.083333, 0.166667,
              0.166667, 0.499977, 0.000023],
             [0.041667, 0.041667, 0.083333, 0.166667,
              0.166667, 0.499977, 0.000023],
             [0.041667, 0.041667, 0.083333, 0.166667,
              0.166667, 0.499977, 0.000023],
             [0.041667, 0.041667, 0.083333, 0.166667,
              0.166667, 0.499977, 0.000023],
             [0.041667, 0.041667, 0.083333, 0.166667,
              0.166667, 0.499977, 0.000023]],
            index = ['S0', 'S1', 'S2', 'S3', 'S4',
                     'S5', 'S6', 'S7', 'S8', 'S9'],
            columns = ['F0', 'F1', 'F2', 'F3', 'F4', 'X0', 'X1']
        )

        npt.assert_allclose(table.values, exp_table.values, atol=1e-3, rtol=1e-3)

        exp_metadata = pd.DataFrame(
            {'group': [0] * 5 + [1] * 5,
             'n_diff': [4] * 10,
             'effect_size': [2] * 10,
             'library_size': [10000] * 10
            },
            index = ['S0', 'S1', 'S2', 'S3', 'S4',
                     'S5', 'S6', 'S7', 'S8', 'S9'],
        )

        metadata = metadata.reindex_axis(sorted(metadata.columns), axis=1)
        exp_metadata = exp_metadata.reindex_axis(sorted(exp_metadata.columns), axis=1)

        pdt.assert_frame_equal(metadata, exp_metadata)

        exp_truth = ['F0', 'F1', 'F3', 'F4']
        self.assertListEqual(truth, exp_truth)
Exemple #5
0
def compositional_variable_features(max_changing, fold_change, reps, intervals,
                                    n_species, asymmetry, n_contaminants, lam,
                                    output_dir):

    gen = compositional_variable_features_generator(max_changing, fold_change,
                                                    reps, intervals, n_species,
                                                    asymmetry, n_contaminants,
                                                    lam)
    os.mkdir(output_dir)
    for i, g in enumerate(gen):
        table, groups, truth = g
        output_table = "%s/table.%d.biom" % (output_dir, i)
        output_groups = "%s/metadata.%d.txt" % (output_dir, i)
        output_truth = "%s/truth.%d.csv" % (output_dir, i)
        deposit(table, groups, truth, output_table, output_groups,
                output_truth)