Esempio n. 1
0
 def test_euclidean_perfect(self):
     observed = compare_sinks(self.mpm1, self.mpm1, 'euclidean')
     expected_ids = ['sink1', 'sink2', 'sink3', 'sink4', 'sink5', 'sink6']
     expected_values = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
     expected = pd.DataFrame(expected_values, index=expected_ids,
                             columns=['Euclidean distance'])
     assert_data_frame_almost_equal(observed, expected)
Esempio n. 2
0
    def test_default_valid_multi_line(self):
        fp = get_data_path('blast7_default_multi_line')
        df = _blast7_to_data_frame(fp)
        exp = pd.DataFrame([['query1', 'subject2', 70.00, 5.0, 0.0, 0.0, 7.0,
                             60.0, 3.0, 100.0, 9e-05, 10.5],
                            ['query1', 'subject2', 30.00, 8.0, 0.0, 0.0, 6.0,
                             15.0, 1.0, 100.0, 0.053, 12.0],
                            ['query1', 'subject2', 90.00, 2.0, 0.0, 0.0, 9.0,
                             35.0, 2.0, 100.0, 0.002, 8.3]],
                           columns=['qseqid', 'sseqid', 'pident', 'length',
                                    'mismatch', 'gapopen', 'qstart', 'qend',
                                    'sstart', 'send', 'evalue', 'bitscore'])
        assert_data_frame_almost_equal(df, exp)

        fp = get_data_path('legacy9_multi_line')
        df = _blast7_to_data_frame(fp)
        exp = pd.DataFrame([['query1', 'subject1', 90.00, 7.0, 1.0, 0.0, 0.0,
                             8.0, 4.0, 10.0, 1e-05, 15.5],
                            ['query1', 'subject1', 70.00, 8.0, 0.0, 1.0, 0.0,
                             9.0, 5.0, 7.0, 0.231, 7.8],
                            ['query1', 'subject1', 90.00, 5.0, 1.0, 1.0, 0.0,
                             0.0, 2.0, 10.0, 0.022, 13.0]],
                           columns=['qseqid', 'sseqid', 'pident', 'length',
                                    'mismatch', 'gapopen', 'qstart', 'qend',
                                    'sstart', 'send', 'evalue', 'bitscore'])
        assert_data_frame_almost_equal(df, exp)
Esempio n. 3
0
    def test_ancom_percentiles(self):
        table = pd.DataFrame([[12, 11],
                              [9, 11],
                              [1, 11],
                              [22, 100],
                              [20, 53],
                              [23, 1]],
                             index=['s1', 's2', 's3', 's4', 's5', 's6'],
                             columns=['b1', 'b2'])
        grouping = pd.Series(['a', 'a', 'a', 'b', 'b', 'b'],
                             index=['s1', 's2', 's3', 's4', 's5', 's6'])

        percentiles = [0.0, 25.0, 50.0, 75.0, 100.0]
        groups = ['a', 'b']
        tuples = [(p, g) for g in groups for p in percentiles]
        exp_mi = pd.MultiIndex.from_tuples(tuples,
                                           names=['Percentile', 'Group'])
        exp_data = np.array(
            [[1.0, 11.0], [5.0, 11.0], [9.0, 11.0], [10.5, 11.0], [12.0, 11.0],
             [20.0, 1.0], [21.0, 27.0], [22.0, 53.0], [22.5, 76.5],
             [23.0, 100.0]])
        exp = pd.DataFrame(exp_data.T, columns=exp_mi, index=['b1', 'b2'])

        result = ancom(table, grouping)[1]
        assert_data_frame_almost_equal(result, exp)
Esempio n. 4
0
    def test_init_default_parameters(self):
        seq = ExampleGrammaredSequence('.-ABCXYZ')

        npt.assert_equal(seq.values, np.array('.-ABCXYZ', dtype='c'))
        self.assertEqual(seq.metadata, {})
        assert_data_frame_almost_equal(seq.positional_metadata,
                                       pd.DataFrame(index=range(8)))
Esempio n. 5
0
 def test_ancom_basic_counts_swapped(self):
     result = ancom(self.table8, self.cats8)
     exp = pd.DataFrame({'W': np.array([5, 5, 2, 2, 2, 2, 2]),
                         'reject': np.array([True, True, False, False,
                                             False, False, False],
                                            dtype=bool)})
     assert_data_frame_almost_equal(result, exp)
Esempio n. 6
0
    def test_default_valid_multi_line(self):
        fp = get_data_path('blast7_default_multi_line')
        df = _blast7_to_data_frame(fp)
        exp = pd.DataFrame([['query1', 'subject2', 70.00, 5.0, 0.0, 0.0, 7.0,
                             60.0, 3.0, 100.0, 9e-05, 10.5],
                            ['query1', 'subject2', 30.00, 8.0, 0.0, 0.0, 6.0,
                             15.0, 1.0, 100.0, 0.053, 12.0],
                            ['query1', 'subject2', 90.00, 2.0, 0.0, 0.0, 9.0,
                             35.0, 2.0, 100.0, 0.002, 8.3]],
                           columns=['qseqid', 'sseqid', 'pident', 'length',
                                    'mismatch', 'gapopen', 'qstart', 'qend',
                                    'sstart', 'send', 'evalue', 'bitscore'])
        assert_data_frame_almost_equal(df, exp)

        fp = get_data_path('legacy9_multi_line')
        df = _blast7_to_data_frame(fp)
        exp = pd.DataFrame([['query1', 'subject1', 90.00, 7.0, 1.0, 0.0, 0.0,
                             8.0, 4.0, 10.0, 1e-05, 15.5],
                            ['query1', 'subject1', 70.00, 8.0, 0.0, 1.0, 0.0,
                             9.0, 5.0, 7.0, 0.231, 7.8],
                            ['query1', 'subject1', 90.00, 5.0, 1.0, 1.0, 0.0,
                             0.0, 2.0, 10.0, 0.022, 13.0]],
                           columns=['qseqid', 'sseqid', 'pident', 'length',
                                    'mismatch', 'gapopen', 'qstart', 'qend',
                                    'sstart', 'send', 'evalue', 'bitscore'])
        assert_data_frame_almost_equal(df, exp)
Esempio n. 7
0
 def test_to_data_frame_3x3(self):
     df = self.dm_3x3.to_data_frame()
     exp = pd.DataFrame([[0.0, 0.01, 4.2],
                         [0.01, 0.0, 12.0],
                         [4.2, 12.0, 0.0]],
                        index=['a', 'b', 'c'], columns=['a', 'b', 'c'])
     assert_data_frame_almost_equal(df, exp)
Esempio n. 8
0
 def test_ancom_alpha(self):
     result = ancom(self.table1, self.cats1, alpha=0.5)
     exp = pd.DataFrame({'W': np.array([6, 6, 4, 5, 5, 4, 2]),
                         'reject': np.array([True, True, False, True,
                                             True, False, False],
                                            dtype=bool)})
     assert_data_frame_almost_equal(result, exp)
Esempio n. 9
0
 def test_to_data_frame_3x3(self):
     df = self.dm_3x3.to_data_frame()
     exp = pd.DataFrame(
         [[0.0, 0.01, 4.2], [0.01, 0.0, 12.0], [4.2, 12.0, 0.0]],
         index=['a', 'b', 'c'],
         columns=['a', 'b', 'c'])
     assert_data_frame_almost_equal(df, exp)
Esempio n. 10
0
    def test_id_lookup(self):
        # Matrices have mismatched IDs but a lookup is provided.
        self.minx_dm_extra.ids = ['a', 'b', 'c', 'foo']
        self.minz_dm_extra.ids = ['d', 'e', 'f', 'bar']
        lookup = {'a': '0', 'b': '1', 'c': '2', 'foo': 'foo',
                  'd': '0', 'e': '1', 'f': '2', 'bar': 'bar',
                  '0': '0', '1': '1', '2': '2'}

        x = self.minx_dm_extra.filter(['b', 'a', 'foo', 'c'])
        y = self.miny_dm.filter(['0', '2', '1'])
        z = self.minz_dm_extra.filter(['bar', 'e', 'f', 'd'])

        x_copy = x.copy()
        y_copy = y.copy()
        z_copy = z.copy()

        np.random.seed(0)

        obs = pwmantel((x, y, z), alternative='greater', strict=False,
                       lookup=lookup)
        assert_data_frame_almost_equal(
            obs,
            self.exp_results_reordered_distance_matrices)

        # Make sure the inputs aren't modified.
        self.assertEqual(x, x_copy)
        self.assertEqual(y, y_copy)
        self.assertEqual(z, z_copy)
Esempio n. 11
0
 def test_scale_single_column(self):
     df = pd.DataFrame([[1], [0], [2]], index=['A', 'B', 'C'],
                       columns=['foo'])
     exp = pd.DataFrame([[0.0], [-1.0], [1.0]], index=['A', 'B', 'C'],
                        columns=['foo'])
     obs = _scale(df)
     assert_data_frame_almost_equal(obs, exp)
Esempio n. 12
0
 def test_ancom_no_signal(self):
     result = ancom(self.table3,
                    self.cats3,
                    multiple_comparisons_correction=None)
     exp = pd.DataFrame({'W': np.array([0]*7),
                         'reject': np.array([False]*7, dtype=bool)})
     assert_data_frame_almost_equal(result, exp)
Esempio n. 13
0
 def test_ancom_alpha(self):
     result = ancom(self.table1, self.cats1, alpha=0.5)
     exp = pd.DataFrame({'W': np.array([6, 6, 4, 5, 5, 4, 2]),
                         'reject': np.array([True, True, False, True,
                                             True, False, False],
                                            dtype=bool)})
     assert_data_frame_almost_equal(result, exp)
Esempio n. 14
0
    def test_absolute_difference(self):
        mpm1 = {
            'Unknown': {
                'sink1': 0.25
            },
            'Source1': {
                'sink1': 0.50
            },
            'Source2': {
                'sink1': 0.25
            }
        }
        mpm1 = pd.DataFrame(mpm1)
        mpm2 = {
            'Unknown': {
                'sink1': 0.1
            },
            'Source2': {
                'sink1': 0.8
            },
            'Source1': {
                'sink1': 0.1
            }
        }
        mpm2 = pd.DataFrame(mpm2)

        observed = compare_sinks(mpm1, mpm2, 'absolute_difference')
        expected_ids = ['sink1']
        # expected values computed by hand
        expected_values = [(0.4, 0.55, 0.15)]
        expected = pd.DataFrame(expected_values,
                                index=expected_ids,
                                columns=['Source1', 'Source2', 'Unknown'])
        assert_data_frame_almost_equal(observed.sort_index(axis=1),
                                       expected.sort_index(axis=1))
Esempio n. 15
0
    def test_permutative_f_scaled(self):

        test_table = pd.DataFrame(
            closure([[12, 11, 10, 10, 10, 10, 10],
                     [9,  11, 12, 10, 10, 10, 10],
                     [1,  11, 10, 11, 10, 5,  9],
                     [2,  11, 10, 11, 10, 5,  9],
                     [221, 210, 9,  10, 10, 10, 10],
                     [220, 210, 9,  10, 10, 10, 10],
                     [200, 220, 10, 10, 13, 10, 10],
                     [230, 210, 14, 10, 10, 10, 10]]),
            index=['s1', 's2', 's3', 's4',
                   's5', 's6',  's7', 's8'],
            columns=['b1', 'b2', 'b3', 'b4', 'b5', 'b6', 'b7'])
        test_cats = pd.Series([0, 0, 0, 0, 1, 1, 1, 1],
                              index=['s1', 's2', 's3', 's4',
                                     's5', 's6',  's7', 's8'])

        np.random.seed(0)
        original_table = copy.deepcopy(test_table)
        original_cats = copy.deepcopy(test_cats)
        result = ancom(test_table, test_cats,
                       significance_test='permutative-anova')
        # Test to make sure that the input table hasn't be altered
        assert_data_frame_almost_equal(original_table, test_table)
        # Test to make sure that the input table hasn't be altered
        pdt.assert_series_equal(original_cats, test_cats)
        exp = pd.DataFrame({'W': np.array([5, 5, 2, 2, 2, 2, 2]),
                            'reject': np.array([True, True, False, False,
                                                False, False, False],
                                               dtype=bool)},
                           index=['b1', 'b2', 'b3', 'b4',
                                  'b5', 'b6', 'b7'])
        assert_data_frame_almost_equal(result, exp)
Esempio n. 16
0
    def test_ancom_percentiles(self):
        table = pd.DataFrame([[12, 11],
                              [9, 11],
                              [1, 11],
                              [22, 100],
                              [20, 53],
                              [23, 1]],
                             index=['s1', 's2', 's3', 's4', 's5', 's6'],
                             columns=['b1', 'b2'])
        grouping = pd.Series(['a', 'a', 'a', 'b', 'b', 'b'],
                             index=['s1', 's2', 's3', 's4', 's5', 's6'])

        percentiles = [0.0, 25.0, 50.0, 75.0, 100.0]
        groups = ['a', 'b']
        tuples = [(p, g) for g in groups for p in percentiles]
        exp_mi = pd.MultiIndex.from_tuples(tuples,
                                           names=['Percentile', 'Group'])
        exp_data = np.array(
            [[1.0, 11.0], [5.0, 11.0], [9.0, 11.0], [10.5, 11.0], [12.0, 11.0],
             [20.0, 1.0], [21.0, 27.0], [22.0, 53.0], [22.5, 76.5],
             [23.0, 100.0]])
        exp = pd.DataFrame(exp_data.T, columns=exp_mi, index=['b1', 'b2'])

        result = ancom(table, grouping)[1]
        assert_data_frame_almost_equal(result, exp)
Esempio n. 17
0
 def test_ancom_no_signal(self):
     result = ancom(self.table3,
                    self.cats3,
                    multiple_comparisons_correction=None)
     exp = pd.DataFrame({'W': np.array([0]*7),
                         'reject': np.array([False]*7, dtype=bool)})
     assert_data_frame_almost_equal(result, exp)
Esempio n. 18
0
    def test_id_lookup(self):
        # Matrices have mismatched IDs but a lookup is provided.
        self.minx_dm_extra.ids = ['a', 'b', 'c', 'foo']
        self.minz_dm_extra.ids = ['d', 'e', 'f', 'bar']
        lookup = {'a': '0', 'b': '1', 'c': '2', 'foo': 'foo',
                  'd': '0', 'e': '1', 'f': '2', 'bar': 'bar',
                  '0': '0', '1': '1', '2': '2'}

        x = self.minx_dm_extra.filter(['b', 'a', 'foo', 'c'])
        y = self.miny_dm.filter(['0', '2', '1'])
        z = self.minz_dm_extra.filter(['bar', 'e', 'f', 'd'])

        x_copy = x.copy()
        y_copy = y.copy()
        z_copy = z.copy()

        np.random.seed(0)

        obs = pwmantel((x, y, z), alternative='greater', strict=False,
                       lookup=lookup)
        assert_data_frame_almost_equal(
            obs,
            self.exp_results_reordered_distance_matrices)

        # Make sure the inputs aren't modified.
        self.assertEqual(x, x_copy)
        self.assertEqual(y, y_copy)
        self.assertEqual(z, z_copy)
Esempio n. 19
0
 def test_bioenv_different_column_order(self):
     # Specifying columns in a different order will change the row labels in
     # the results data frame as the column subsets will be reordered, but
     # the actual results (e.g., correlation coefficients) shouldn't change.
     obs = bioenv(self.dm, self.df, columns=self.cols[::-1])
     assert_data_frame_almost_equal(obs,
                                    self.exp_results_different_column_order)
Esempio n. 20
0
    def test_not_equal(self):
        unequal_dfs = [
            self.df,
            # floating point error too large to be "almost equal"
            pd.DataFrame({'foo': [42, 42.001, np.nan, 0],
                          'bar': ['a', 'b', 'cd', 'e']}),
            # extra NaN
            pd.DataFrame({'foo': [42, np.nan, np.nan, 0],
                          'bar': ['a', 'b', 'cd', 'e']}),
            # different column order
            pd.DataFrame(self.df, columns=['foo', 'bar']),
            # different index order
            pd.DataFrame(self.df, index=np.arange(4)[::-1]),
            # different index type
            pd.DataFrame(self.df, index=np.arange(4).astype(float)),
            # various forms of "empty" DataFrames that are not equivalent
            pd.DataFrame(),
            pd.DataFrame(index=np.arange(10)),
            pd.DataFrame(columns=np.arange(10)),
            pd.DataFrame(index=np.arange(10), columns=np.arange(10)),
            pd.DataFrame(index=np.arange(9)),
            pd.DataFrame(columns=np.arange(9)),
            pd.DataFrame(index=np.arange(9), columns=np.arange(9))
        ]

        # each df should compare equal to itself
        for df in unequal_dfs:
            assert_data_frame_almost_equal(df, df)

        # every pair of dfs should not compare equal. use permutations instead
        # of combinations to test that comparing df1 to df2 and df2 to df1 are
        # both not equal
        for df1, df2 in itertools.permutations(unequal_dfs, 2):
            with self.assertRaises(AssertionError):
                assert_data_frame_almost_equal(df1, df2)
Esempio n. 21
0
    def test_ancom_percentiles_alt_categories(self):
        table = pd.DataFrame([[12], [9], [1], [22], [20], [23]],
                             index=['s1', 's2', 's3', 's4', 's5', 's6'],
                             columns=['b1'])
        grouping = pd.Series(['a', 'a', 'c', 'b', 'b', 'c'],
                             index=['s1', 's2', 's3', 's4', 's5', 's6'])

        percentiles = [0.0, 25.0, 50.0, 75.0, 100.0]
        groups = ['a', 'b', 'c']
        tuples = [(p, g) for g in groups for p in percentiles]
        exp_mi = pd.MultiIndex.from_tuples(tuples,
                                           names=['Percentile', 'Group'])
        exp_data = np.array([
            [9.0],
            [9.75],
            [10.5],
            [11.25],
            [12.0],  # a
            [20.0],
            [20.5],
            [21.0],
            [21.5],
            [22.0],  # b
            [1.0],
            [6.5],
            [12.0],
            [17.5],
            [23.0]
        ])  # c
        exp = pd.DataFrame(exp_data.T, columns=exp_mi, index=['b1'])

        result = ancom(table, grouping, percentiles=percentiles)[1]
        assert_data_frame_almost_equal(result, exp)
    def test_init_default_parameters(self):
        seq = ExampleGrammaredSequence('.-ABCXYZ')

        npt.assert_equal(seq.values, np.array('.-ABCXYZ', dtype='c'))
        self.assertEqual(seq.metadata, {})
        assert_data_frame_almost_equal(seq.positional_metadata,
                                       pd.DataFrame(index=range(8)))
Esempio n. 23
0
    def test_pearsonr(self):
        mpm1 = {
            'Unknown': {
                'sink1': 0.25
            },
            'Source1': {
                'sink1': 0.50
            },
            'Source2': {
                'sink1': 0.25
            }
        }
        mpm1 = pd.DataFrame(mpm1)
        mpm2 = {
            'Unknown': {
                'sink1': 0.1
            },
            'Source1': {
                'sink1': 0.1
            },
            'Source2': {
                'sink1': 0.8
            }
        }
        mpm2 = pd.DataFrame(mpm2)

        observed = compare_sinks(mpm1, mpm2, 'pearson')
        expected_ids = ['sink1']
        # expected values computed by calling scipy.stats.pearsonr directly
        expected_values = [(-0.5, 2. / 3)]
        expected = pd.DataFrame(expected_values,
                                index=expected_ids,
                                columns=['Pearson r', 'p'])
        assert_data_frame_almost_equal(observed, expected)
Esempio n. 24
0
    def test_default_valid_single_line(self):
        fp = get_data_path('blast7_default_single_line')
        df = _blast7_to_data_frame(fp)
        exp = pd.DataFrame([[
            'query1', 'subject2', 100.00, 8.0, 0.0, 0.0, 1.0, 8.0, 3.0, 10.0,
            9e-05, 16.9
        ]],
                           columns=[
                               'qseqid', 'sseqid', 'pident', 'length',
                               'mismatch', 'gapopen', 'qstart', 'qend',
                               'sstart', 'send', 'evalue', 'bitscore'
                           ])
        assert_data_frame_almost_equal(df, exp)

        fp = get_data_path('legacy9_single_line')
        df = _blast7_to_data_frame(fp)
        exp = pd.DataFrame([[
            'query1', 'subject1', 90.00, 7.0, 1.0, 0.0, 0.0, 8.0, 4.0, 10.0,
            1e-05, 15.5
        ]],
                           columns=[
                               'qseqid', 'sseqid', 'pident', 'length',
                               'mismatch', 'gapopen', 'qstart', 'qend',
                               'sstart', 'send', 'evalue', 'bitscore'
                           ])
        assert_data_frame_almost_equal(df, exp)
Esempio n. 25
0
    def test_euclidean(self):
        mpm1 = {
            'Unknown': {
                'sink1': 0.25
            },
            'Source1': {
                'sink1': 0.50
            },
            'Source2': {
                'sink1': 0.25
            }
        }
        mpm1 = pd.DataFrame(mpm1)
        mpm2 = {
            'Unknown': {
                'sink1': 0.1
            },
            'Source1': {
                'sink1': 0.1
            },
            'Source2': {
                'sink1': 0.8
            }
        }
        mpm2 = pd.DataFrame(mpm2)

        observed = compare_sinks(mpm1, mpm2, 'euclidean')
        expected_ids = ['sink1']
        # expected values computed by calling
        # scipy.stats.spatial.distance.euclidean directly
        expected_values = [0.6964194]
        expected = pd.DataFrame(expected_values,
                                index=expected_ids,
                                columns=['Euclidean distance'])
        assert_data_frame_almost_equal(observed, expected)
Esempio n. 26
0
 def test_ancom_basic_counts_swapped(self):
     result = ancom(self.table8, self.cats8)
     exp = pd.DataFrame({'W': np.array([5, 5, 2, 2, 2, 2, 2]),
                         'reject': np.array([True, True, False, False,
                                             False, False, False],
                                            dtype=bool)})
     assert_data_frame_almost_equal(result, exp)
Esempio n. 27
0
 def test_euclidean_perfect(self):
     observed = compare_sinks(self.mpm1, self.mpm1, 'euclidean')
     expected_ids = ['sink1', 'sink2', 'sink3', 'sink4', 'sink5', 'sink6']
     expected_values = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
     expected = pd.DataFrame(expected_values,
                             index=expected_ids,
                             columns=['Euclidean distance'])
     assert_data_frame_almost_equal(observed, expected)
Esempio n. 28
0
 def test_ancom_theta(self):
     result = ancom(self.table1, self.cats1, theta=0.3)
     exp = pd.DataFrame(
         {'W': np.array([5, 5, 2, 2, 2, 2, 2]),
          'Reject null hypothesis': np.array([True, True, False, False,
                                              False, False, False],
                                             dtype=bool)})
     assert_data_frame_almost_equal(result[0], exp)
Esempio n. 29
0
 def test_ancom_theta(self):
     result = ancom(self.table1, self.cats1, theta=0.3)
     exp = pd.DataFrame(
         {'W': np.array([5, 5, 2, 2, 2, 2, 2]),
          'Reject null hypothesis': np.array([True, True, False, False,
                                              False, False, False],
                                             dtype=bool)})
     assert_data_frame_almost_equal(result[0], exp)
Esempio n. 30
0
 def test_ancom_multiple_comparisons(self):
     result = ancom(self.table1,
                    self.cats1,
                    multiple_comparisons_correction='holm-bonferroni',
                    significance_test=scipy.stats.mannwhitneyu)
     exp = pd.DataFrame({'W': np.array([0]*7),
                         'reject': np.array([False]*7, dtype=bool)})
     assert_data_frame_almost_equal(result, exp)
Esempio n. 31
0
 def test_custom_valid_single_line(self):
     fp = get_data_path("blast7_custom_single_line")
     df = _blast7_to_data_frame(fp)
     exp = pd.DataFrame([['query1', 100.00, 100.00, 8.0, 0.0, 16.9, 8.0,
                          'PAAWWWWW']],
                        columns=['qseqid', 'ppos', 'pident', 'length',
                                 'sgi', 'bitscore', 'qend', 'qseq'])
     assert_data_frame_almost_equal(df, exp)
Esempio n. 32
0
 def test_custom_valid_single_line(self):
     fp = get_data_path("blast7_custom_single_line")
     df = _blast7_to_data_frame(fp)
     exp = pd.DataFrame([['query1', 100.00, 100.00, 8.0, 0.0, 16.9, 8.0,
                          'PAAWWWWW']],
                        columns=['qseqid', 'ppos', 'pident', 'length',
                                 'sgi', 'bitscore', 'qend', 'qseq'])
     assert_data_frame_almost_equal(df, exp)
Esempio n. 33
0
 def test_ancom_no_percentiles(self):
     table = pd.DataFrame([[12], [9], [1], [22], [20], [23]],
                          index=['s1', 's2', 's3', 's4', 's5', 's6'],
                          columns=['b1'])
     grouping = pd.Series(['a', 'a', 'a', 'b', 'b', 'b'],
                          index=['s1', 's2', 's3', 's4', 's5', 's6'])
     result = ancom(table, grouping, percentiles=[])[1]
     assert_data_frame_almost_equal(result, pd.DataFrame())
Esempio n. 34
0
 def test_ancom_multiple_comparisons(self):
     result = ancom(self.table1,
                    self.cats1,
                    multiple_comparisons_correction='holm-bonferroni',
                    significance_test=scipy.stats.mannwhitneyu)
     exp = pd.DataFrame({'W': np.array([0]*7),
                         'reject': np.array([False]*7, dtype=bool)})
     assert_data_frame_almost_equal(result, exp)
Esempio n. 35
0
    def test_minimal_compatible_input_with_labels(self):
        np.random.seed(0)

        obs = pwmantel(self.min_dms, alternative='greater',
                       labels=('minx', 'miny', 'minz'))
        assert_data_frame_almost_equal(
            obs,
            self.exp_results_minimal_with_labels)
Esempio n. 36
0
    def test_minimal_compatible_input_with_labels(self):
        np.random.seed(0)

        obs = pwmantel(self.min_dms,
                       alternative='greater',
                       labels=('minx', 'miny', 'minz'))
        assert_data_frame_almost_equal(obs,
                                       self.exp_results_minimal_with_labels)
Esempio n. 37
0
 def test_bioenv_different_column_order(self):
     # Specifying columns in a different order will change the row labels in
     # the results data frame as the column subsets will be reordered, but
     # the actual results (e.g., correlation coefficients) shouldn't change.
     obs = bioenv(self.dm, self.df, columns=self.cols[::-1])
     assert_data_frame_almost_equal(
         obs,
         self.exp_results_different_column_order)
Esempio n. 38
0
 def test_pearson_perfect(self):
     observed = compare_sinks(self.mpm1, self.mpm1, 'pearson')
     expected_ids = ['sink1', 'sink2', 'sink3', 'sink4', 'sink5', 'sink6']
     expected_values = [(1.0, 0.0), (1.0, 0.0), (1.0, 0.0), (1.0, 0.0),
                        (1.0, 0.0), (1.0, 0.0)]
     expected = pd.DataFrame(expected_values, index=expected_ids,
                             columns=['Pearson r', 'p'])
     assert_data_frame_almost_equal(observed, expected)
Esempio n. 39
0
    def test_filepaths_as_input(self):
        dms = [
            get_data_path('dm.txt'),
            get_data_path('dm2.txt'),
        ]
        np.random.seed(0)

        obs = pwmantel(dms)
        assert_data_frame_almost_equal(obs, self.exp_results_dm_dm2)
Esempio n. 40
0
 def test_default_valid_single_line(self):
     fp = get_data_path('blast6_default_single_line')
     df = _blast6_to_data_frame(fp, default_columns=True)
     exp = pd.DataFrame([['query1', 'subject2', 75.0, 8.0, 2.0, 0.0, 1.0,
                          8.0, 2.0, 9.0, 0.06, 11.5]],
                        columns=['qseqid', 'sseqid', 'pident', 'length',
                                 'mismatch', 'gapopen', 'qstart', 'qend',
                                 'sstart', 'send', 'evalue', 'bitscore'])
     assert_data_frame_almost_equal(df, exp)
Esempio n. 41
0
    def test_filepaths_as_input(self):
        dms = [
            get_data_path('dm.txt'),
            get_data_path('dm2.txt'),
        ]
        np.random.seed(0)

        obs = pwmantel(dms)
        assert_data_frame_almost_equal(obs, self.exp_results_dm_dm2)
Esempio n. 42
0
    def test_bioenv_all_columns_implicit(self):
        # Test with all columns in data frame (implicitly).
        obs = bioenv(self.dm, self.df)
        assert_data_frame_almost_equal(obs, self.exp_results)

        # Should get the same results if order of rows/cols in distance matrix
        # is changed.
        obs = bioenv(self.dm_reordered, self.df)
        assert_data_frame_almost_equal(obs, self.exp_results)
Esempio n. 43
0
 def test_custom_valid_single_line(self):
     fp = get_data_path('blast6_custom_single_line')
     df = _blast6_to_data_frame(fp, columns=['qacc', 'qseq', 'btop',
                                             'sframe', 'ppos',
                                             'positive', 'gaps'])
     exp = pd.DataFrame([['query1', 'PAAWWWWW', 8.0, 1.0, 100.00, 8.0,
                          0.0]], columns=['qacc', 'qseq', 'btop', 'sframe',
                                          'ppos', 'positive', 'gaps'])
     assert_data_frame_almost_equal(df, exp)
Esempio n. 44
0
 def test_ancom_letter_categories(self):
     result = ancom(self.table7,
                    self.cats7,
                    multiple_comparisons_correction=None)
     exp = pd.DataFrame({'W': np.array([5, 3, 3, 2, 2, 5, 2]),
                         'reject': np.array([True, False, False, False,
                                             False, True, False],
                                            dtype=bool)})
     assert_data_frame_almost_equal(result, exp)
Esempio n. 45
0
 def test_ancom_alpha(self):
     result = ancom(self.table1, self.cats1,
                    multiple_comparisons_correction=None, alpha=0.5)
     exp = pd.DataFrame(
         {'W': np.array([6, 6, 4, 5, 5, 4, 2]),
          'Reject null hypothesis': np.array([True, True, False, True,
                                              True, False, False],
                                             dtype=bool)})
     assert_data_frame_almost_equal(result[0], exp)
Esempio n. 46
0
 def test_ancom_letter_categories(self):
     result = ancom(self.table7,
                    self.cats7,
                    multiple_comparisons_correction=None)
     exp = pd.DataFrame({'W': np.array([5, 3, 3, 2, 2, 5, 2]),
                         'reject': np.array([True, False, False, False,
                                             False, True, False],
                                            dtype=bool)})
     assert_data_frame_almost_equal(result, exp)
Esempio n. 47
0
 def test_ancom_alpha(self):
     result = ancom(self.table1, self.cats1,
                    multiple_comparisons_correction=None, alpha=0.5)
     exp = pd.DataFrame(
         {'W': np.array([6, 6, 4, 5, 5, 4, 2]),
          'Reject null hypothesis': np.array([True, True, False, True,
                                              True, False, False],
                                             dtype=bool)})
     assert_data_frame_almost_equal(result[0], exp)
Esempio n. 48
0
 def test_ancom_noncontiguous(self):
     result = ancom(self.table5,
                    self.cats5,
                    multiple_comparisons_correction=None)
     exp = pd.DataFrame({'W': np.array([6, 2, 2, 2, 2, 6, 2]),
                         'reject': np.array([True, False, False, False,
                                             False, True, False],
                                            dtype=bool)})
     assert_data_frame_almost_equal(result, exp)
Esempio n. 49
0
    def test_bioenv_all_columns_implicit(self):
        # Test with all columns in data frame (implicitly).
        obs = bioenv(self.dm, self.df)
        assert_data_frame_almost_equal(obs, self.exp_results)

        # Should get the same results if order of rows/cols in distance matrix
        # is changed.
        obs = bioenv(self.dm_reordered, self.df)
        assert_data_frame_almost_equal(obs, self.exp_results)
Esempio n. 50
0
 def test_scale_single_column(self):
     df = pd.DataFrame([[1], [0], [2]],
                       index=['A', 'B', 'C'],
                       columns=['foo'])
     exp = pd.DataFrame([[0.0], [-1.0], [1.0]],
                        index=['A', 'B', 'C'],
                        columns=['foo'])
     obs = _scale(df)
     assert_data_frame_almost_equal(obs, exp)
Esempio n. 51
0
 def test_ancom_noncontiguous(self):
     result = ancom(self.table5,
                    self.cats5,
                    multiple_comparisons_correction=None)
     exp = pd.DataFrame({'W': np.array([6, 2, 2, 2, 2, 6, 2]),
                         'reject': np.array([True, False, False, False,
                                             False, True, False],
                                            dtype=bool)})
     assert_data_frame_almost_equal(result, exp)
Esempio n. 52
0
 def test_pearson_perfect(self):
     observed = compare_sinks(self.mpm1, self.mpm1, 'pearson')
     expected_ids = ['sink1', 'sink2', 'sink3', 'sink4', 'sink5', 'sink6']
     expected_values = [(1.0, 0.0), (1.0, 0.0), (1.0, 0.0), (1.0, 0.0),
                        (1.0, 0.0), (1.0, 0.0)]
     expected = pd.DataFrame(expected_values,
                             index=expected_ids,
                             columns=['Pearson r', 'p'])
     assert_data_frame_almost_equal(observed, expected)
Esempio n. 53
0
 def test_ancom_unbalanced(self):
     result = ancom(self.table6,
                    self.cats6,
                    multiple_comparisons_correction=None)
     exp = pd.DataFrame(
         {'W': np.array([5, 3, 3, 2, 2, 5, 2]),
          'Reject null hypothesis': np.array([True, False, False, False,
                                              False, True, False],
                                             dtype=bool)})
     assert_data_frame_almost_equal(result[0], exp)
Esempio n. 54
0
 def test_ancom_alternative_test(self):
     result = ancom(self.table1,
                    self.cats1,
                    multiple_comparisons_correction=None,
                    significance_test=scipy.stats.ttest_ind)
     exp = pd.DataFrame({'W': np.array([5, 5, 2, 2, 2, 2, 2]),
                         'reject': np.array([True,  True, False, False,
                                             False, False, False],
                                            dtype=bool)})
     assert_data_frame_almost_equal(result, exp)
Esempio n. 55
0
    def test_bioenv_all_columns_explicit(self):
        # Test with all columns being specified.
        obs = bioenv(self.dm, self.df, columns=self.cols)
        assert_data_frame_almost_equal(obs, self.exp_results)

        # Test against a data frame that has an extra non-numeric column and
        # some of the rows and columns reordered (we should get the same
        # result since we're specifying the same columns in the same order).
        obs = bioenv(self.dm, self.df_extra_column, columns=self.cols)
        assert_data_frame_almost_equal(obs, self.exp_results)
Esempio n. 56
0
 def test_ancom_alternative_test(self):
     result = ancom(self.table1,
                    self.cats1,
                    multiple_comparisons_correction=None,
                    significance_test=scipy.stats.ttest_ind)
     exp = pd.DataFrame({'W': np.array([5, 5, 2, 2, 2, 2, 2]),
                         'reject': np.array([True,  True, False, False,
                                             False, False, False],
                                            dtype=bool)})
     assert_data_frame_almost_equal(result, exp)
    def test_init_nondefault_parameters(self):
        seq = ExampleGrammaredSequence(
            '.-ABCXYZ',
            metadata={'id': 'foo'},
            positional_metadata={'quality': range(8)})

        npt.assert_equal(seq.values, np.array('.-ABCXYZ', dtype='c'))
        self.assertEqual(seq.metadata, {'id': 'foo'})
        assert_data_frame_almost_equal(seq.positional_metadata,
                                       pd.DataFrame({'quality': range(8)}))
Esempio n. 58
0
 def test_custom_valid_mixed_nans(self):
     fp = get_data_path("blast7_custom_mixed_nans")
     df = _blast7_to_data_frame(fp)
     exp = pd.DataFrame([[0.0, np.nan, 8.0, 13.0, 1.0, 1.0, np.nan,
                          'subject2'],
                         [np.nan, 0.0, 8.0, np.nan, 1.0, 1.0, 'query1',
                         np.nan]],
                        columns=['qgi', 'sgi', 'qlen', 'slen', 'qframe',
                                 'sframe', 'qseqid', 'sseqid'])
     assert_data_frame_almost_equal(df, exp)
Esempio n. 59
0
 def test_ancom_unbalanced(self):
     result = ancom(self.table6,
                    self.cats6,
                    multiple_comparisons_correction=None)
     exp = pd.DataFrame(
         {'W': np.array([5, 3, 3, 2, 2, 5, 2]),
          'Reject null hypothesis': np.array([True, False, False, False,
                                              False, True, False],
                                             dtype=bool)})
     assert_data_frame_almost_equal(result[0], exp)
Esempio n. 60
0
    def test_bioenv_all_columns_explicit(self):
        # Test with all columns being specified.
        obs = bioenv(self.dm, self.df, columns=self.cols)
        assert_data_frame_almost_equal(obs, self.exp_results)

        # Test against a data frame that has an extra non-numeric column and
        # some of the rows and columns reordered (we should get the same
        # result since we're specifying the same columns in the same order).
        obs = bioenv(self.dm, self.df_extra_column, columns=self.cols)
        assert_data_frame_almost_equal(obs, self.exp_results)