def test_index_and_column_merge_order(self): md1 = Metadata( pd.DataFrame([[1], [2], [3], [4]], index=pd.Index(['id1', 'id2', 'id3', 'id4'], name='id'), columns=['a'])) md2 = Metadata( pd.DataFrame([[5], [6], [7]], index=pd.Index(['id4', 'id3', 'id1'], name='id'), columns=['b'])) md3 = Metadata( pd.DataFrame([[8], [9], [10]], index=pd.Index(['id1', 'id4', 'id3'], name='id'), columns=['c'])) obs = md1.merge(md2, md3) exp = Metadata( pd.DataFrame([[1, 7, 8], [3, 6, 10], [4, 5, 9]], index=pd.Index(['id1', 'id3', 'id4'], name='id'), columns=['a', 'b', 'c'])) self.assertEqual(obs, exp) # Merging in different order produces different ID/column order. obs = md2.merge(md1, md3) exp = Metadata( pd.DataFrame([[5, 4, 9], [6, 3, 10], [7, 1, 8]], index=pd.Index(['id4', 'id3', 'id1'], name='id'), columns=['b', 'a', 'c'])) self.assertEqual(obs, exp)
def test_inner_join(self): md1 = Metadata(pd.DataFrame( {'a': [1, 2, 3], 'b': [4, 5, 6]}, index=pd.Index(['id1', 'id2', 'id3'], name='id'))) md2 = Metadata(pd.DataFrame( {'c': [7, 8, 9], 'd': [10, 11, 12]}, index=pd.Index(['id2', 'X', 'Y'], name='id'))) md3 = Metadata(pd.DataFrame( {'e': [13, 14, 15], 'f': [16, 17, 18]}, index=pd.Index(['X', 'id3', 'id2'], name='id'))) # Single shared ID. obs = md1.merge(md2, md3) exp = Metadata(pd.DataFrame( {'a': [2], 'b': [5], 'c': [7], 'd': [10], 'e': [15], 'f': [18]}, index=pd.Index(['id2'], name='id'))) self.assertEqual(obs, exp) # Multiple shared IDs. obs = md1.merge(md3) exp = Metadata(pd.DataFrame( {'a': [2, 3], 'b': [5, 6], 'e': [15, 14], 'f': [18, 17]}, index=pd.Index(['id2', 'id3'], name='id'))) self.assertEqual(obs, exp)
def test_index_and_column_merge_order(self): md1 = Metadata(pd.DataFrame( [[1], [2], [3], [4]], index=pd.Index(['id1', 'id2', 'id3', 'id4'], name='id'), columns=['a'])) md2 = Metadata(pd.DataFrame( [[5], [6], [7]], index=pd.Index(['id4', 'id3', 'id1'], name='id'), columns=['b'])) md3 = Metadata(pd.DataFrame( [[8], [9], [10]], index=pd.Index(['id1', 'id4', 'id3'], name='id'), columns=['c'])) obs = md1.merge(md2, md3) exp = Metadata(pd.DataFrame( [[1, 7, 8], [3, 6, 10], [4, 5, 9]], index=pd.Index(['id1', 'id3', 'id4'], name='id'), columns=['a', 'b', 'c'])) self.assertEqual(obs, exp) # Merging in different order produces different ID/column order. obs = md2.merge(md1, md3) exp = Metadata(pd.DataFrame( [[5, 4, 9], [6, 3, 10], [7, 1, 8]], index=pd.Index(['id4', 'id3', 'id1'], name='id'), columns=['b', 'a', 'c'])) self.assertEqual(obs, exp)
def test_duplicate_columns_self_merge(self): md = Metadata(pd.DataFrame( {'a': [1, 2], 'b': [3, 4]}, index=pd.Index(['id1', 'id2'], name='id'))) with self.assertRaisesRegex(ValueError, "columns overlap: 'a', 'b'"): md.merge(md)
def test_merging_nothing(self): md = Metadata(pd.DataFrame( {'a': [1, 2, 3], 'b': [4, 5, 6]}, index=pd.Index(['id1', 'id2', 'id3'], name='id'))) with self.assertRaisesRegex(ValueError, 'At least one Metadata.*nothing to merge'): md.merge()
def test_duplicate_columns(self): md1 = Metadata(pd.DataFrame( {'a': [1, 2], 'b': [3, 4]}, index=pd.Index(['id1', 'id2'], name='id'))) md2 = Metadata(pd.DataFrame( {'c': [5, 6], 'b': [7, 8]}, index=pd.Index(['id1', 'id2'], name='id'))) with self.assertRaisesRegex(ValueError, "columns overlap: 'b'"): md1.merge(md2)
def test_disjoint_indices(self): md1 = Metadata(pd.DataFrame( {'a': [1, 2, 3], 'b': [4, 5, 6]}, index=pd.Index(['id1', 'id2', 'id3'], name='id'))) md2 = Metadata(pd.DataFrame( {'c': [7, 8, 9], 'd': [10, 11, 12]}, index=pd.Index(['X', 'Y', 'Z'], name='id'))) with self.assertRaisesRegex(ValueError, 'no IDs shared'): md1.merge(md2)
def test_merging_unaligned_indices(self): md1 = Metadata( pd.DataFrame({ 'a': [1, 2, 3], 'b': [4, 5, 6] }, index=pd.Index(['id1', 'id2', 'id3'], name='id'))) md2 = Metadata( pd.DataFrame({ 'c': [9, 8, 7], 'd': [12, 11, 10] }, index=pd.Index(['id3', 'id2', 'id1'], name='id'))) md3 = Metadata( pd.DataFrame({ 'e': [13, 15, 14], 'f': [16, 18, 17] }, index=pd.Index(['id1', 'id3', 'id2'], name='id'))) obs = md1.merge(md2, md3) exp = Metadata( pd.DataFrame( { 'a': [1, 2, 3], 'b': [4, 5, 6], 'c': [7, 8, 9], 'd': [10, 11, 12], 'e': [13, 14, 15], 'f': [16, 17, 18] }, index=pd.Index(['id1', 'id2', 'id3'], name='id'))) self.assertEqual(obs, exp)
def test_merging_two(self): md1 = Metadata( pd.DataFrame({ 'a': [1, 2, 3], 'b': [4, 5, 6] }, index=pd.Index(['id1', 'id2', 'id3'], name='id'))) md2 = Metadata( pd.DataFrame({ 'c': [7, 8, 9], 'd': [10, 11, 12] }, index=pd.Index(['id1', 'id2', 'id3'], name='id'))) obs = md1.merge(md2) exp = Metadata( pd.DataFrame( { 'a': [1, 2, 3], 'b': [4, 5, 6], 'c': [7, 8, 9], 'd': [10, 11, 12] }, index=pd.Index(['id1', 'id2', 'id3'], name='id'))) self.assertEqual(obs, exp)
def test_no_artifacts(self): md1 = Metadata(pd.DataFrame( {'a': [1, 2]}, index=pd.Index(['id1', 'id2'], name='id'))) md2 = Metadata(pd.DataFrame( {'b': [3, 4]}, index=pd.Index(['id1', 'id2'], name='id'))) metadata = md1.merge(md2) self.assertEqual(metadata.artifacts, ())
def test_merging_one(self): md = Metadata( pd.DataFrame({ 'a': [1, 2, 3], 'b': [4, 5, 6] }, index=pd.Index(['id1', 'id2', 'id3'], name='id'))) obs = md.merge() self.assertIsNot(obs, md) self.assertEqual(obs, md)
def test_id_column_only(self): md1 = Metadata( pd.DataFrame({}, index=pd.Index(['id1', 'id2', 'id3'], name='id'))) md2 = Metadata( pd.DataFrame({}, index=pd.Index(['id2', 'X', 'id1'], name='id'))) md3 = Metadata( pd.DataFrame({}, index=pd.Index(['id1', 'id3', 'id2'], name='id'))) obs = md1.merge(md2, md3) exp = Metadata( pd.DataFrame({}, index=pd.Index(['id1', 'id2'], name='id'))) self.assertEqual(obs, exp)
def test_id_column_only(self): md1 = Metadata(pd.DataFrame({}, index=pd.Index(['id1', 'id2', 'id3'], name='id'))) md2 = Metadata(pd.DataFrame({}, index=pd.Index(['id2', 'X', 'id1'], name='id'))) md3 = Metadata(pd.DataFrame({}, index=pd.Index(['id1', 'id3', 'id2'], name='id'))) obs = md1.merge(md2, md3) exp = Metadata( pd.DataFrame({}, index=pd.Index(['id1', 'id2'], name='id'))) self.assertEqual(obs, exp)
def test_merged_id_column_name(self): md1 = Metadata(pd.DataFrame( {'a': [1, 2]}, index=pd.Index(['id1', 'id2'], name='sample ID'))) md2 = Metadata(pd.DataFrame( {'b': [3, 4]}, index=pd.Index(['id1', 'id2'], name='feature ID'))) obs = md1.merge(md2) exp = Metadata(pd.DataFrame( {'a': [1, 2], 'b': [3, 4]}, index=pd.Index(['id1', 'id2'], name='id'))) self.assertEqual(obs, exp)
def test_merging_two(self): md1 = Metadata(pd.DataFrame( {'a': [1, 2, 3], 'b': [4, 5, 6]}, index=pd.Index(['id1', 'id2', 'id3'], name='id'))) md2 = Metadata(pd.DataFrame( {'c': [7, 8, 9], 'd': [10, 11, 12]}, index=pd.Index(['id1', 'id2', 'id3'], name='id'))) obs = md1.merge(md2) exp = Metadata(pd.DataFrame( {'a': [1, 2, 3], 'b': [4, 5, 6], 'c': [7, 8, 9], 'd': [10, 11, 12]}, index=pd.Index(['id1', 'id2', 'id3'], name='id'))) self.assertEqual(obs, exp)
def test_merging_unaligned_indices(self): md1 = Metadata(pd.DataFrame( {'a': [1, 2, 3], 'b': [4, 5, 6]}, index=pd.Index(['id1', 'id2', 'id3'], name='id'))) md2 = Metadata(pd.DataFrame( {'c': [9, 8, 7], 'd': [12, 11, 10]}, index=pd.Index(['id3', 'id2', 'id1'], name='id'))) md3 = Metadata(pd.DataFrame( {'e': [13, 15, 14], 'f': [16, 18, 17]}, index=pd.Index(['id1', 'id3', 'id2'], name='id'))) obs = md1.merge(md2, md3) exp = Metadata(pd.DataFrame( {'a': [1, 2, 3], 'b': [4, 5, 6], 'c': [7, 8, 9], 'd': [10, 11, 12], 'e': [13, 14, 15], 'f': [16, 17, 18]}, index=pd.Index(['id1', 'id2', 'id3'], name='id'))) self.assertEqual(obs, exp)