Example #1
0
    def _test_multicolumn_merge(self):
        a = pd.DataFrame(dict(x=[1, 2, 3], y=[10, 20, 30], w=[4, 5, 6]))
        b = pd.DataFrame(dict(x=[1, 5, 3], z=[20, 30, 40], w=[4, 6, 5]))

        expected = pd.DataFrame(dict(x=[1], y=[10], z=[20], w=[4], index=[0]))
        actual = merge(a, b)

        self.check(expected, actual)
Example #2
0
    def test_separate_on(self):
        a = make(('i', 'row'), i=[1, 2], l=[2, 3])
        b = make(('j', 'row'), j=[1, 2], k=[2, 3])

        expected = pd.DataFrame(
            dict(i=[1, 2], row_a=[0, 1], row_b=[0, 1], l=[2, 3],
                 k=[2, 3])).set_index(['i', 'row_a', 'row_b'])
        actual = merge(a, b, left_on='i', right_on='j', suffixes=('_a', '_b'))
        self.check(expected, actual)
Example #3
0
    def _test_multicolumn_merge(self):
        a = pd.DataFrame(dict(x=[1, 2, 3], y=[10, 20, 30], w=[4, 5, 6]))
        b = pd.DataFrame(dict(x=[1, 5, 3], z=[20, 30, 40], w=[4, 6, 5]))

        expected = pd.DataFrame(dict(x=[1], y=[10], z=[20], w=[4],
                                     index=[0]))
        actual = merge(a, b)

        self.check(expected, actual)
Example #4
0
    def test_dimension_merge(self):

        a = make(('row', ), x=[1, 2, 3], y=[10, 20, 30])
        b = make(('row', ), z=[20, 30, 40])

        expected = pd.DataFrame(
            dict(x=[1, 2, 3], y=[10, 20, 30], z=[20, 30, 40]))
        expected.index.name = 'row'
        actual = merge(a, b, on='row')

        self.check(expected, actual)
Example #5
0
    def test_manyone(self):
        a = make(('x', 'row'), x=[1, 2, 3, 1], y=[10, 20, 30, 40])
        b = make(('x', 'row'), x=[1, 0, 3, 0], z=[20, 30, 40, 80])
        expected = pd.DataFrame(dict(x=[1, 1, 3],
                                     y=[10, 40, 30],
                                     z=[20, 20, 40],
                                     row_x=[0, 3, 2],
                                     row_y=[0, 0, 2])).set_index(['x', 'row_x', 'row_y'])

        actual = merge(a, b, on='x')
        self.check(expected, actual)
Example #6
0
    def test_manyone(self):
        a = make(('x', 'row'), x=[1, 2, 3, 1], y=[10, 20, 30, 40])
        b = make(('x', 'row'), x=[1, 0, 3, 0], z=[20, 30, 40, 80])
        expected = pd.DataFrame(dict(x=[1, 1, 3],
                                     y=[10, 40, 30],
                                     z=[20, 20, 40],
                                     row_x=[0, 3, 2],
                                     row_y=[0, 0, 2])).set_index(['x', 'row_x', 'row_y'])

        actual = merge(a, b, on='x')
        self.check(expected, actual)
Example #7
0
    def test_dimension_merge(self):

        a = make(('row',), x=[1, 2, 3], y=[10, 20, 30])
        b = make(('row',), z=[20, 30, 40])

        expected = pd.DataFrame(dict(x=[1, 2, 3],
                                     y=[10, 20, 30],
                                     z=[20, 30, 40]))
        expected.index.name = 'row'
        actual = merge(a, b, on='row')

        self.check(expected, actual)
Example #8
0
    def test_separate_on(self):
        a = make(('i', 'row'), i=[1, 2], l=[2, 3])
        b = make(('j', 'row'), j=[1, 2], k=[2, 3])

        expected = pd.DataFrame(dict(i=[1, 2],
                                     row_a=[0, 1],
                                     row_b=[0, 1],
                                     l=[2, 3],
                                     k=[2, 3])).set_index(['i', 'row_a', 'row_b'])
        actual = merge(a, b,
                       left_on='i',
                       right_on='j', suffixes=('_a', '_b'))
        self.check(expected, actual)
Example #9
0
    def test_default_merge(self):
        a = make(('row', 'a'), x=[1, 2, 3], y=[10, 20, 30], a=[0, 0, 0])
        b = make(('row', 'b'), x=[1, 5, 3], z=[20, 30, 40], b=[0, 0, 0])

        expected = pd.DataFrame(dict(x_x=[1, 2, 3],
                                     x_y=[1, 5, 3],
                                     a=[0, 0, 0],
                                     b=[0, 0, 0],
                                     y=[10, 20, 30],
                                     row=[0, 1, 2],
                                     z=[20, 30, 40])).set_index(['row', 'a', 'b'])
        actual = merge(a, b)

        self.check(expected, actual)
Example #10
0
    def test_default_merge(self):
        a = make(('row', 'a'), x=[1, 2, 3], y=[10, 20, 30], a=[0, 0, 0])
        b = make(('row', 'b'), x=[1, 5, 3], z=[20, 30, 40], b=[0, 0, 0])

        expected = pd.DataFrame(dict(x_x=[1, 2, 3],
                                     x_y=[1, 5, 3],
                                     a=[0, 0, 0],
                                     b=[0, 0, 0],
                                     y=[10, 20, 30],
                                     row=[0, 1, 2],
                                     z=[20, 30, 40])).set_index(['row', 'a', 'b'])
        actual = merge(a, b)

        self.check(expected, actual)
Example #11
0
    def test_attribute_merge(self):

        a = make(('row',), x=[1, 2, 3], y=[10, 20, 30])
        b = make(('row',), x=[1, 5, 3], z=[20, 30, 40])

        expected = pd.DataFrame(dict(x_x=[1, 3],
                                     x_y=[1, 3],
                                     y=[10, 30],
                                     z=[20, 40],
                                     x_cat=[0, 2],
                                     row_y=[0, 2],
                                     row_x=[0, 2])).set_index(['row_x', 'x_cat', 'row_y'])
        expected.index.name = 'row'
        actual = merge(a, b, on='x')

        self.check(expected, actual)
Example #12
0
    def test_attribute_merge(self):

        a = make(('row',), x=[1, 2, 3], y=[10, 20, 30])
        b = make(('row',), x=[1, 5, 3], z=[20, 30, 40])

        expected = pd.DataFrame(dict(x_x=[1, 3],
                                     x_y=[1, 3],
                                     y=[10, 30],
                                     z=[20, 40],
                                     x_cat=[0, 2],
                                     row_y=[0, 2],
                                     row_x=[0, 2])).set_index(['row_x', 'x_cat', 'row_y'])
        expected.index.name = 'row'
        actual = merge(a, b, on='x')

        self.check(expected, actual)
Example #13
0
    def test_string_join_extra_cells(self):
        a = np.array([("one", 10), ("two", 20), ("three", 30)],
                     dtype=[(str('x'), '|S8'), (str('y'), int)])
        b = np.array([("two", 30), ("five", 50), ("one", 40)],
                     dtype=[(str('x'), '|S8'), (str('z'), int)])

        expected = pd.DataFrame(dict(x_1=["one", "two"],
                                     x_2=["one", "two"],
                                     x_cat=[0, 2],
                                     y=[10, 20],
                                     z=[40, 30],
                                     i0_1=[0, 1],
                                     i0_2=[2, 0])).set_index(['i0_1', 'x_cat', 'i0_2'])
        actual = merge(sdb.from_array(a), sdb.from_array(b),
                       on='x', suffixes=('_1', '_2'))

        self.check(expected, actual)
Example #14
0
    def test_string_join_extra_cells(self):
        a = np.array([("one", 10), ("two", 20), ("three", 30)],
                     dtype=[(str('x'), '|S8'), (str('y'), int)])
        b = np.array([("two", 30), ("five", 50), ("one", 40)],
                     dtype=[(str('x'), '|S8'), (str('z'), int)])

        expected = pd.DataFrame(dict(x_1=["one", "two"],
                                     x_2=["one", "two"],
                                     x_cat=[0, 2],
                                     y=[10, 20],
                                     z=[40, 30],
                                     i0_1=[0, 1],
                                     i0_2=[2, 0])).set_index(['i0_1', 'x_cat', 'i0_2'])
        actual = merge(sdb.from_array(a), sdb.from_array(b),
                       on='x', suffixes=('_1', '_2'))

        self.check(expected, actual)