Example #1
0
    def test_index_and_column_merge_order(self):
        md1 = Metadata(
            pd.DataFrame([[1], [2], [3], [4]],
                         index=pd.Index(['id1', 'id2', 'id3', 'id4'],
                                        name='id'),
                         columns=['a']))
        md2 = Metadata(
            pd.DataFrame([[5], [6], [7]],
                         index=pd.Index(['id4', 'id3', 'id1'], name='id'),
                         columns=['b']))
        md3 = Metadata(
            pd.DataFrame([[8], [9], [10]],
                         index=pd.Index(['id1', 'id4', 'id3'], name='id'),
                         columns=['c']))

        obs = md1.merge(md2, md3)

        exp = Metadata(
            pd.DataFrame([[1, 7, 8], [3, 6, 10], [4, 5, 9]],
                         index=pd.Index(['id1', 'id3', 'id4'], name='id'),
                         columns=['a', 'b', 'c']))
        self.assertEqual(obs, exp)

        # Merging in different order produces different ID/column order.
        obs = md2.merge(md1, md3)

        exp = Metadata(
            pd.DataFrame([[5, 4, 9], [6, 3, 10], [7, 1, 8]],
                         index=pd.Index(['id4', 'id3', 'id1'], name='id'),
                         columns=['b', 'a', 'c']))
        self.assertEqual(obs, exp)
Example #2
0
    def test_inner_join(self):
        md1 = Metadata(pd.DataFrame(
            {'a': [1, 2, 3], 'b': [4, 5, 6]},
            index=pd.Index(['id1', 'id2', 'id3'], name='id')))
        md2 = Metadata(pd.DataFrame(
            {'c': [7, 8, 9], 'd': [10, 11, 12]},
            index=pd.Index(['id2', 'X', 'Y'], name='id')))
        md3 = Metadata(pd.DataFrame(
            {'e': [13, 14, 15], 'f': [16, 17, 18]},
            index=pd.Index(['X', 'id3', 'id2'], name='id')))

        # Single shared ID.
        obs = md1.merge(md2, md3)

        exp = Metadata(pd.DataFrame(
            {'a': [2], 'b': [5], 'c': [7], 'd': [10], 'e': [15], 'f': [18]},
            index=pd.Index(['id2'], name='id')))
        self.assertEqual(obs, exp)

        # Multiple shared IDs.
        obs = md1.merge(md3)

        exp = Metadata(pd.DataFrame(
            {'a': [2, 3], 'b': [5, 6], 'e': [15, 14], 'f': [18, 17]},
            index=pd.Index(['id2', 'id3'], name='id')))
        self.assertEqual(obs, exp)
Example #3
0
    def test_inner_join(self):
        md1 = Metadata(pd.DataFrame(
            {'a': [1, 2, 3], 'b': [4, 5, 6]},
            index=pd.Index(['id1', 'id2', 'id3'], name='id')))
        md2 = Metadata(pd.DataFrame(
            {'c': [7, 8, 9], 'd': [10, 11, 12]},
            index=pd.Index(['id2', 'X', 'Y'], name='id')))
        md3 = Metadata(pd.DataFrame(
            {'e': [13, 14, 15], 'f': [16, 17, 18]},
            index=pd.Index(['X', 'id3', 'id2'], name='id')))

        # Single shared ID.
        obs = md1.merge(md2, md3)

        exp = Metadata(pd.DataFrame(
            {'a': [2], 'b': [5], 'c': [7], 'd': [10], 'e': [15], 'f': [18]},
            index=pd.Index(['id2'], name='id')))
        self.assertEqual(obs, exp)

        # Multiple shared IDs.
        obs = md1.merge(md3)

        exp = Metadata(pd.DataFrame(
            {'a': [2, 3], 'b': [5, 6], 'e': [15, 14], 'f': [18, 17]},
            index=pd.Index(['id2', 'id3'], name='id')))
        self.assertEqual(obs, exp)
Example #4
0
    def test_index_and_column_merge_order(self):
        md1 = Metadata(pd.DataFrame(
            [[1], [2], [3], [4]],
            index=pd.Index(['id1', 'id2', 'id3', 'id4'], name='id'),
            columns=['a']))
        md2 = Metadata(pd.DataFrame(
            [[5], [6], [7]], index=pd.Index(['id4', 'id3', 'id1'], name='id'),
            columns=['b']))
        md3 = Metadata(pd.DataFrame(
            [[8], [9], [10]], index=pd.Index(['id1', 'id4', 'id3'], name='id'),
            columns=['c']))

        obs = md1.merge(md2, md3)

        exp = Metadata(pd.DataFrame(
            [[1, 7, 8], [3, 6, 10], [4, 5, 9]],
            index=pd.Index(['id1', 'id3', 'id4'], name='id'),
            columns=['a', 'b', 'c']))
        self.assertEqual(obs, exp)

        # Merging in different order produces different ID/column order.
        obs = md2.merge(md1, md3)

        exp = Metadata(pd.DataFrame(
            [[5, 4, 9], [6, 3, 10], [7, 1, 8]],
            index=pd.Index(['id4', 'id3', 'id1'], name='id'),
            columns=['b', 'a', 'c']))
        self.assertEqual(obs, exp)
Example #5
0
    def test_duplicate_columns_self_merge(self):
        md = Metadata(pd.DataFrame(
            {'a': [1, 2], 'b': [3, 4]},
            index=pd.Index(['id1', 'id2'], name='id')))

        with self.assertRaisesRegex(ValueError, "columns overlap: 'a', 'b'"):
            md.merge(md)
Example #6
0
    def test_duplicate_columns_self_merge(self):
        md = Metadata(pd.DataFrame(
            {'a': [1, 2], 'b': [3, 4]},
            index=pd.Index(['id1', 'id2'], name='id')))

        with self.assertRaisesRegex(ValueError, "columns overlap: 'a', 'b'"):
            md.merge(md)
Example #7
0
    def test_merging_nothing(self):
        md = Metadata(pd.DataFrame(
            {'a': [1, 2, 3], 'b': [4, 5, 6]},
            index=pd.Index(['id1', 'id2', 'id3'], name='id')))

        with self.assertRaisesRegex(ValueError,
                                    'At least one Metadata.*nothing to merge'):
            md.merge()
Example #8
0
    def test_merging_nothing(self):
        md = Metadata(pd.DataFrame(
            {'a': [1, 2, 3], 'b': [4, 5, 6]},
            index=pd.Index(['id1', 'id2', 'id3'], name='id')))

        with self.assertRaisesRegex(ValueError,
                                    'At least one Metadata.*nothing to merge'):
            md.merge()
Example #9
0
    def test_duplicate_columns(self):
        md1 = Metadata(pd.DataFrame(
            {'a': [1, 2], 'b': [3, 4]},
            index=pd.Index(['id1', 'id2'], name='id')))
        md2 = Metadata(pd.DataFrame(
            {'c': [5, 6], 'b': [7, 8]},
            index=pd.Index(['id1', 'id2'], name='id')))

        with self.assertRaisesRegex(ValueError, "columns overlap: 'b'"):
            md1.merge(md2)
Example #10
0
    def test_disjoint_indices(self):
        md1 = Metadata(pd.DataFrame(
            {'a': [1, 2, 3], 'b': [4, 5, 6]},
            index=pd.Index(['id1', 'id2', 'id3'], name='id')))
        md2 = Metadata(pd.DataFrame(
            {'c': [7, 8, 9], 'd': [10, 11, 12]},
            index=pd.Index(['X', 'Y', 'Z'], name='id')))

        with self.assertRaisesRegex(ValueError, 'no IDs shared'):
            md1.merge(md2)
Example #11
0
    def test_disjoint_indices(self):
        md1 = Metadata(pd.DataFrame(
            {'a': [1, 2, 3], 'b': [4, 5, 6]},
            index=pd.Index(['id1', 'id2', 'id3'], name='id')))
        md2 = Metadata(pd.DataFrame(
            {'c': [7, 8, 9], 'd': [10, 11, 12]},
            index=pd.Index(['X', 'Y', 'Z'], name='id')))

        with self.assertRaisesRegex(ValueError, 'no IDs shared'):
            md1.merge(md2)
Example #12
0
    def test_duplicate_columns(self):
        md1 = Metadata(pd.DataFrame(
            {'a': [1, 2], 'b': [3, 4]},
            index=pd.Index(['id1', 'id2'], name='id')))
        md2 = Metadata(pd.DataFrame(
            {'c': [5, 6], 'b': [7, 8]},
            index=pd.Index(['id1', 'id2'], name='id')))

        with self.assertRaisesRegex(ValueError, "columns overlap: 'b'"):
            md1.merge(md2)
Example #13
0
    def test_merging_unaligned_indices(self):
        md1 = Metadata(
            pd.DataFrame({
                'a': [1, 2, 3],
                'b': [4, 5, 6]
            },
                         index=pd.Index(['id1', 'id2', 'id3'], name='id')))
        md2 = Metadata(
            pd.DataFrame({
                'c': [9, 8, 7],
                'd': [12, 11, 10]
            },
                         index=pd.Index(['id3', 'id2', 'id1'], name='id')))
        md3 = Metadata(
            pd.DataFrame({
                'e': [13, 15, 14],
                'f': [16, 18, 17]
            },
                         index=pd.Index(['id1', 'id3', 'id2'], name='id')))

        obs = md1.merge(md2, md3)

        exp = Metadata(
            pd.DataFrame(
                {
                    'a': [1, 2, 3],
                    'b': [4, 5, 6],
                    'c': [7, 8, 9],
                    'd': [10, 11, 12],
                    'e': [13, 14, 15],
                    'f': [16, 17, 18]
                },
                index=pd.Index(['id1', 'id2', 'id3'], name='id')))
        self.assertEqual(obs, exp)
Example #14
0
    def test_merging_two(self):
        md1 = Metadata(
            pd.DataFrame({
                'a': [1, 2, 3],
                'b': [4, 5, 6]
            },
                         index=pd.Index(['id1', 'id2', 'id3'], name='id')))
        md2 = Metadata(
            pd.DataFrame({
                'c': [7, 8, 9],
                'd': [10, 11, 12]
            },
                         index=pd.Index(['id1', 'id2', 'id3'], name='id')))

        obs = md1.merge(md2)

        exp = Metadata(
            pd.DataFrame(
                {
                    'a': [1, 2, 3],
                    'b': [4, 5, 6],
                    'c': [7, 8, 9],
                    'd': [10, 11, 12]
                },
                index=pd.Index(['id1', 'id2', 'id3'], name='id')))
        self.assertEqual(obs, exp)
Example #15
0
    def test_no_artifacts(self):
        md1 = Metadata(pd.DataFrame(
            {'a': [1, 2]}, index=pd.Index(['id1', 'id2'], name='id')))
        md2 = Metadata(pd.DataFrame(
            {'b': [3, 4]}, index=pd.Index(['id1', 'id2'], name='id')))

        metadata = md1.merge(md2)

        self.assertEqual(metadata.artifacts, ())
Example #16
0
    def test_no_artifacts(self):
        md1 = Metadata(pd.DataFrame(
            {'a': [1, 2]}, index=pd.Index(['id1', 'id2'], name='id')))
        md2 = Metadata(pd.DataFrame(
            {'b': [3, 4]}, index=pd.Index(['id1', 'id2'], name='id')))

        metadata = md1.merge(md2)

        self.assertEqual(metadata.artifacts, ())
Example #17
0
    def test_merging_one(self):
        md = Metadata(
            pd.DataFrame({
                'a': [1, 2, 3],
                'b': [4, 5, 6]
            },
                         index=pd.Index(['id1', 'id2', 'id3'], name='id')))

        obs = md.merge()

        self.assertIsNot(obs, md)
        self.assertEqual(obs, md)
Example #18
0
    def test_id_column_only(self):
        md1 = Metadata(
            pd.DataFrame({}, index=pd.Index(['id1', 'id2', 'id3'], name='id')))
        md2 = Metadata(
            pd.DataFrame({}, index=pd.Index(['id2', 'X', 'id1'], name='id')))
        md3 = Metadata(
            pd.DataFrame({}, index=pd.Index(['id1', 'id3', 'id2'], name='id')))

        obs = md1.merge(md2, md3)

        exp = Metadata(
            pd.DataFrame({}, index=pd.Index(['id1', 'id2'], name='id')))
        self.assertEqual(obs, exp)
Example #19
0
    def test_id_column_only(self):
        md1 = Metadata(pd.DataFrame({},
                       index=pd.Index(['id1', 'id2', 'id3'], name='id')))
        md2 = Metadata(pd.DataFrame({},
                       index=pd.Index(['id2', 'X', 'id1'], name='id')))
        md3 = Metadata(pd.DataFrame({},
                       index=pd.Index(['id1', 'id3', 'id2'], name='id')))

        obs = md1.merge(md2, md3)

        exp = Metadata(
            pd.DataFrame({}, index=pd.Index(['id1', 'id2'], name='id')))
        self.assertEqual(obs, exp)
Example #20
0
    def test_merged_id_column_name(self):
        md1 = Metadata(pd.DataFrame(
            {'a': [1, 2]},
            index=pd.Index(['id1', 'id2'], name='sample ID')))
        md2 = Metadata(pd.DataFrame(
            {'b': [3, 4]},
            index=pd.Index(['id1', 'id2'], name='feature ID')))

        obs = md1.merge(md2)

        exp = Metadata(pd.DataFrame(
            {'a': [1, 2], 'b': [3, 4]},
            index=pd.Index(['id1', 'id2'], name='id')))
        self.assertEqual(obs, exp)
Example #21
0
    def test_merged_id_column_name(self):
        md1 = Metadata(pd.DataFrame(
            {'a': [1, 2]},
            index=pd.Index(['id1', 'id2'], name='sample ID')))
        md2 = Metadata(pd.DataFrame(
            {'b': [3, 4]},
            index=pd.Index(['id1', 'id2'], name='feature ID')))

        obs = md1.merge(md2)

        exp = Metadata(pd.DataFrame(
            {'a': [1, 2], 'b': [3, 4]},
            index=pd.Index(['id1', 'id2'], name='id')))
        self.assertEqual(obs, exp)
Example #22
0
    def test_merging_two(self):
        md1 = Metadata(pd.DataFrame(
            {'a': [1, 2, 3], 'b': [4, 5, 6]},
            index=pd.Index(['id1', 'id2', 'id3'], name='id')))
        md2 = Metadata(pd.DataFrame(
            {'c': [7, 8, 9], 'd': [10, 11, 12]},
            index=pd.Index(['id1', 'id2', 'id3'], name='id')))

        obs = md1.merge(md2)

        exp = Metadata(pd.DataFrame(
            {'a': [1, 2, 3], 'b': [4, 5, 6],
             'c': [7, 8, 9], 'd': [10, 11, 12]},
            index=pd.Index(['id1', 'id2', 'id3'], name='id')))
        self.assertEqual(obs, exp)
Example #23
0
    def test_merging_unaligned_indices(self):
        md1 = Metadata(pd.DataFrame(
            {'a': [1, 2, 3], 'b': [4, 5, 6]},
            index=pd.Index(['id1', 'id2', 'id3'], name='id')))
        md2 = Metadata(pd.DataFrame(
            {'c': [9, 8, 7], 'd': [12, 11, 10]},
            index=pd.Index(['id3', 'id2', 'id1'], name='id')))
        md3 = Metadata(pd.DataFrame(
            {'e': [13, 15, 14], 'f': [16, 18, 17]},
            index=pd.Index(['id1', 'id3', 'id2'], name='id')))

        obs = md1.merge(md2, md3)

        exp = Metadata(pd.DataFrame(
            {'a': [1, 2, 3], 'b': [4, 5, 6],
             'c': [7, 8, 9], 'd': [10, 11, 12],
             'e': [13, 14, 15], 'f': [16, 17, 18]},
            index=pd.Index(['id1', 'id2', 'id3'], name='id')))
        self.assertEqual(obs, exp)