def _test_multicolumn_merge(self): a = pd.DataFrame(dict(x=[1, 2, 3], y=[10, 20, 30], w=[4, 5, 6])) b = pd.DataFrame(dict(x=[1, 5, 3], z=[20, 30, 40], w=[4, 6, 5])) expected = pd.DataFrame(dict(x=[1], y=[10], z=[20], w=[4], index=[0])) actual = merge(a, b) self.check(expected, actual)
def test_separate_on(self): a = make(('i', 'row'), i=[1, 2], l=[2, 3]) b = make(('j', 'row'), j=[1, 2], k=[2, 3]) expected = pd.DataFrame( dict(i=[1, 2], row_a=[0, 1], row_b=[0, 1], l=[2, 3], k=[2, 3])).set_index(['i', 'row_a', 'row_b']) actual = merge(a, b, left_on='i', right_on='j', suffixes=('_a', '_b')) self.check(expected, actual)
def test_dimension_merge(self): a = make(('row', ), x=[1, 2, 3], y=[10, 20, 30]) b = make(('row', ), z=[20, 30, 40]) expected = pd.DataFrame( dict(x=[1, 2, 3], y=[10, 20, 30], z=[20, 30, 40])) expected.index.name = 'row' actual = merge(a, b, on='row') self.check(expected, actual)
def test_manyone(self): a = make(('x', 'row'), x=[1, 2, 3, 1], y=[10, 20, 30, 40]) b = make(('x', 'row'), x=[1, 0, 3, 0], z=[20, 30, 40, 80]) expected = pd.DataFrame(dict(x=[1, 1, 3], y=[10, 40, 30], z=[20, 20, 40], row_x=[0, 3, 2], row_y=[0, 0, 2])).set_index(['x', 'row_x', 'row_y']) actual = merge(a, b, on='x') self.check(expected, actual)
def test_dimension_merge(self): a = make(('row',), x=[1, 2, 3], y=[10, 20, 30]) b = make(('row',), z=[20, 30, 40]) expected = pd.DataFrame(dict(x=[1, 2, 3], y=[10, 20, 30], z=[20, 30, 40])) expected.index.name = 'row' actual = merge(a, b, on='row') self.check(expected, actual)
def test_separate_on(self): a = make(('i', 'row'), i=[1, 2], l=[2, 3]) b = make(('j', 'row'), j=[1, 2], k=[2, 3]) expected = pd.DataFrame(dict(i=[1, 2], row_a=[0, 1], row_b=[0, 1], l=[2, 3], k=[2, 3])).set_index(['i', 'row_a', 'row_b']) actual = merge(a, b, left_on='i', right_on='j', suffixes=('_a', '_b')) self.check(expected, actual)
def test_default_merge(self): a = make(('row', 'a'), x=[1, 2, 3], y=[10, 20, 30], a=[0, 0, 0]) b = make(('row', 'b'), x=[1, 5, 3], z=[20, 30, 40], b=[0, 0, 0]) expected = pd.DataFrame(dict(x_x=[1, 2, 3], x_y=[1, 5, 3], a=[0, 0, 0], b=[0, 0, 0], y=[10, 20, 30], row=[0, 1, 2], z=[20, 30, 40])).set_index(['row', 'a', 'b']) actual = merge(a, b) self.check(expected, actual)
def test_attribute_merge(self): a = make(('row',), x=[1, 2, 3], y=[10, 20, 30]) b = make(('row',), x=[1, 5, 3], z=[20, 30, 40]) expected = pd.DataFrame(dict(x_x=[1, 3], x_y=[1, 3], y=[10, 30], z=[20, 40], x_cat=[0, 2], row_y=[0, 2], row_x=[0, 2])).set_index(['row_x', 'x_cat', 'row_y']) expected.index.name = 'row' actual = merge(a, b, on='x') self.check(expected, actual)
def test_string_join_extra_cells(self): a = np.array([("one", 10), ("two", 20), ("three", 30)], dtype=[(str('x'), '|S8'), (str('y'), int)]) b = np.array([("two", 30), ("five", 50), ("one", 40)], dtype=[(str('x'), '|S8'), (str('z'), int)]) expected = pd.DataFrame(dict(x_1=["one", "two"], x_2=["one", "two"], x_cat=[0, 2], y=[10, 20], z=[40, 30], i0_1=[0, 1], i0_2=[2, 0])).set_index(['i0_1', 'x_cat', 'i0_2']) actual = merge(sdb.from_array(a), sdb.from_array(b), on='x', suffixes=('_1', '_2')) self.check(expected, actual)