コード例 #1
0
ファイル: test_compare.py プロジェクト: biota/sourcetracker2
 def test_euclidean_perfect(self):
     observed = compare_sinks(self.mpm1, self.mpm1, 'euclidean')
     expected_ids = ['sink1', 'sink2', 'sink3', 'sink4', 'sink5', 'sink6']
     expected_values = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
     expected = pd.DataFrame(expected_values, index=expected_ids,
                             columns=['Euclidean distance'])
     assert_data_frame_almost_equal(observed, expected)
コード例 #2
0
    def test_euclidean(self):
        mpm1 = {
            'Unknown': {
                'sink1': 0.25
            },
            'Source1': {
                'sink1': 0.50
            },
            'Source2': {
                'sink1': 0.25
            }
        }
        mpm1 = pd.DataFrame(mpm1)
        mpm2 = {
            'Unknown': {
                'sink1': 0.1
            },
            'Source1': {
                'sink1': 0.1
            },
            'Source2': {
                'sink1': 0.8
            }
        }
        mpm2 = pd.DataFrame(mpm2)

        observed = compare_sinks(mpm1, mpm2, 'euclidean')
        expected_ids = ['sink1']
        # expected values computed by calling
        # scipy.stats.spatial.distance.euclidean directly
        expected_values = [0.6964194]
        expected = pd.DataFrame(expected_values,
                                index=expected_ids,
                                columns=['Euclidean distance'])
        assert_data_frame_almost_equal(observed, expected)
コード例 #3
0
    def test_absolute_difference(self):
        mpm1 = {
            'Unknown': {
                'sink1': 0.25
            },
            'Source1': {
                'sink1': 0.50
            },
            'Source2': {
                'sink1': 0.25
            }
        }
        mpm1 = pd.DataFrame(mpm1)
        mpm2 = {
            'Unknown': {
                'sink1': 0.1
            },
            'Source2': {
                'sink1': 0.8
            },
            'Source1': {
                'sink1': 0.1
            }
        }
        mpm2 = pd.DataFrame(mpm2)

        observed = compare_sinks(mpm1, mpm2, 'absolute_difference')
        expected_ids = ['sink1']
        # expected values computed by hand
        expected_values = [(0.4, 0.55, 0.15)]
        expected = pd.DataFrame(expected_values,
                                index=expected_ids,
                                columns=['Source1', 'Source2', 'Unknown'])
        assert_data_frame_almost_equal(observed.sort_index(axis=1),
                                       expected.sort_index(axis=1))
コード例 #4
0
    def test_pearsonr(self):
        mpm1 = {
            'Unknown': {
                'sink1': 0.25
            },
            'Source1': {
                'sink1': 0.50
            },
            'Source2': {
                'sink1': 0.25
            }
        }
        mpm1 = pd.DataFrame(mpm1)
        mpm2 = {
            'Unknown': {
                'sink1': 0.1
            },
            'Source1': {
                'sink1': 0.1
            },
            'Source2': {
                'sink1': 0.8
            }
        }
        mpm2 = pd.DataFrame(mpm2)

        observed = compare_sinks(mpm1, mpm2, 'pearson')
        expected_ids = ['sink1']
        # expected values computed by calling scipy.stats.pearsonr directly
        expected_values = [(-0.5, 2. / 3)]
        expected = pd.DataFrame(expected_values,
                                index=expected_ids,
                                columns=['Pearson r', 'p'])
        assert_data_frame_almost_equal(observed, expected)
コード例 #5
0
ファイル: test_compare.py プロジェクト: biota/sourcetracker2
 def test_pearson_perfect(self):
     observed = compare_sinks(self.mpm1, self.mpm1, 'pearson')
     expected_ids = ['sink1', 'sink2', 'sink3', 'sink4', 'sink5', 'sink6']
     expected_values = [(1.0, 0.0), (1.0, 0.0), (1.0, 0.0), (1.0, 0.0),
                        (1.0, 0.0), (1.0, 0.0)]
     expected = pd.DataFrame(expected_values, index=expected_ids,
                             columns=['Pearson r', 'p'])
     assert_data_frame_almost_equal(observed, expected)
コード例 #6
0
 def test_euclidean_perfect(self):
     observed = compare_sinks(self.mpm1, self.mpm1, 'euclidean')
     expected_ids = ['sink1', 'sink2', 'sink3', 'sink4', 'sink5', 'sink6']
     expected_values = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
     expected = pd.DataFrame(expected_values,
                             index=expected_ids,
                             columns=['Euclidean distance'])
     assert_data_frame_almost_equal(observed, expected)
コード例 #7
0
 def test_pearson_perfect(self):
     observed = compare_sinks(self.mpm1, self.mpm1, 'pearson')
     expected_ids = ['sink1', 'sink2', 'sink3', 'sink4', 'sink5', 'sink6']
     expected_values = [(1.0, 0.0), (1.0, 0.0), (1.0, 0.0), (1.0, 0.0),
                        (1.0, 0.0), (1.0, 0.0)]
     expected = pd.DataFrame(expected_values,
                             index=expected_ids,
                             columns=['Pearson r', 'p'])
     assert_data_frame_almost_equal(observed, expected)
コード例 #8
0
ファイル: test_compare.py プロジェクト: biota/sourcetracker2
    def test_order_independence_sources(self):
        mpm1 = self.mpm1.sort_index(ascending=False, axis=1)
        mpm2 = self.mpm1.copy().sort_index(ascending=True, axis=1)
        # confirm that the columns are now different
        self.assertEqual(list(mpm1.columns), list(reversed(mpm2.columns)))

        observed = compare_sinks(self.mpm1, self.mpm1, 'spearman')
        expected_ids = ['sink1', 'sink2', 'sink3', 'sink4', 'sink5', 'sink6']
        expected_values = [(1.0, 0.0), (1.0, 0.0), (1.0, 0.0), (1.0, 0.0),
                           (1.0, 0.0), (1.0, 0.0)]
        expected = pd.DataFrame(expected_values, index=expected_ids,
                                columns=['Spearman rho', 'p'])
        assert_data_frame_almost_equal(observed, expected)
コード例 #9
0
    def test_order_independence_sources(self):
        mpm1 = self.mpm1.sort_index(ascending=False, axis=1)
        mpm2 = self.mpm1.copy().sort_index(ascending=True, axis=1)
        # confirm that the columns are now different
        self.assertEqual(list(mpm1.columns), list(reversed(mpm2.columns)))

        observed = compare_sinks(self.mpm1, self.mpm1, 'spearman')
        expected_ids = ['sink1', 'sink2', 'sink3', 'sink4', 'sink5', 'sink6']
        expected_values = [(1.0, 0.0), (1.0, 0.0), (1.0, 0.0), (1.0, 0.0),
                           (1.0, 0.0), (1.0, 0.0)]
        expected = pd.DataFrame(expected_values,
                                index=expected_ids,
                                columns=['Spearman rho', 'p'])
        assert_data_frame_almost_equal(observed, expected)
コード例 #10
0
ファイル: test_compare.py プロジェクト: biota/sourcetracker2
    def test_pearsonr(self):
        mpm1 = {'Unknown': {'sink1': 0.25},
                'Source1': {'sink1': 0.50},
                'Source2': {'sink1': 0.25}}
        mpm1 = pd.DataFrame(mpm1)
        mpm2 = {'Unknown': {'sink1': 0.1},
                'Source1': {'sink1': 0.1},
                'Source2': {'sink1': 0.8}}
        mpm2 = pd.DataFrame(mpm2)

        observed = compare_sinks(mpm1, mpm2, 'pearson')
        expected_ids = ['sink1']
        # expected values computed by calling scipy.stats.pearsonr directly
        expected_values = [(-0.5, 2./3)]
        expected = pd.DataFrame(expected_values, index=expected_ids,
                                columns=['Pearson r', 'p'])
        assert_data_frame_almost_equal(observed, expected)
コード例 #11
0
ファイル: test_compare.py プロジェクト: biota/sourcetracker2
    def test_absolute_difference(self):
        mpm1 = {'Unknown': {'sink1': 0.25},
                'Source1': {'sink1': 0.50},
                'Source2': {'sink1': 0.25}}
        mpm1 = pd.DataFrame(mpm1)
        mpm2 = {'Unknown': {'sink1': 0.1},
                'Source2': {'sink1': 0.8},
                'Source1': {'sink1': 0.1}}
        mpm2 = pd.DataFrame(mpm2)

        observed = compare_sinks(mpm1, mpm2, 'absolute_difference')
        expected_ids = ['sink1']
        # expected values computed by hand
        expected_values = [(0.4, 0.55, 0.15)]
        expected = pd.DataFrame(expected_values, index=expected_ids,
                                columns=['Source1', 'Source2', 'Unknown'])
        assert_data_frame_almost_equal(observed, expected)
コード例 #12
0
ファイル: test_compare.py プロジェクト: biota/sourcetracker2
    def test_euclidean(self):
        mpm1 = {'Unknown': {'sink1': 0.25},
                'Source1': {'sink1': 0.50},
                'Source2': {'sink1': 0.25}}
        mpm1 = pd.DataFrame(mpm1)
        mpm2 = {'Unknown': {'sink1': 0.1},
                'Source1': {'sink1': 0.1},
                'Source2': {'sink1': 0.8}}
        mpm2 = pd.DataFrame(mpm2)

        observed = compare_sinks(mpm1, mpm2, 'euclidean')
        expected_ids = ['sink1']
        # expected values computed by calling
        # scipy.stats.spatial.distance.euclidean directly
        expected_values = [0.6964194]
        expected = pd.DataFrame(expected_values, index=expected_ids,
                                columns=['Euclidean distance'])
        assert_data_frame_almost_equal(observed, expected)
コード例 #13
0
ファイル: test_compare.py プロジェクト: biota/sourcetracker2
 def test_non_overlapping_sources(self):
     mpm2 = self.mpm1.copy()
     mpm2.columns = ['source1', 'source2', 'source4', 'Unknown']
     with self.assertRaisesRegex(ValueError, 'Sources'):
         compare_sinks(self.mpm1, mpm2, 'spearman')
コード例 #14
0
ファイル: test_compare.py プロジェクト: biota/sourcetracker2
 def test_non_overlapping_sinks(self):
     mpm2 = self.mpm1.copy()
     mpm2.index = ['sink1', 'sink2', 'sink3', 'sink4', 'sink5', 'sink7']
     with self.assertRaisesRegex(ValueError, 'Sinks'):
         compare_sinks(self.mpm1, mpm2, 'spearman')
コード例 #15
0
 def test_non_overlapping_sinks(self):
     mpm2 = self.mpm1.copy()
     mpm2.index = ['sink1', 'sink2', 'sink3', 'sink4', 'sink5', 'sink7']
     with self.assertRaisesRegex(ValueError, 'Sinks'):
         compare_sinks(self.mpm1, mpm2, 'spearman')
コード例 #16
0
 def test_non_overlapping_sources(self):
     mpm2 = self.mpm1.copy()
     mpm2.columns = ['source1', 'source2', 'source4', 'Unknown']
     with self.assertRaisesRegex(ValueError, 'Sources'):
         compare_sinks(self.mpm1, mpm2, 'spearman')
コード例 #17
0
ファイル: test_compare.py プロジェクト: biota/sourcetracker2
 def test_unknown_metric(self):
     with self.assertRaises(KeyError):
         compare_sinks(self.mpm1, self.mpm1, 'not-a-metric')
コード例 #18
0
 def test_unknown_metric(self):
     with self.assertRaises(KeyError):
         compare_sinks(self.mpm1, self.mpm1, 'not-a-metric')