def test_merge_3(): table_a = Table() table_b = Table() table_a.keys = ['a', 'b'] table_a.data = [np.array([1, 2, 3]), np.array([1, 2, 3])] table_a.index = np.array(np.array([[1, 1, 1], [1, 1, 1]], dtype=np.uint8)) table_b.keys = ['a', 'c'] table_b.data = [np.array([1, 2, 3]), np.array([1, 2, 3])] table_b.index = np.array(np.array([[1, 1, 1], [1, 1, 1]], dtype=np.uint8)) table_a.merge(table_b, 'a') assert np.all(table_a.a.index == np.array([1, 1, 1, 1, 1, 1])) assert np.all(table_a.b.index == np.array([1, 0, 1, 0, 1, 0])) assert np.all(table_a.c.index == np.array([0, 1, 0, 1, 0, 1]))
def test_fill_table(): t = Table() t.keys = ['a', 'b'] t.data = [np.arange(10), np.array([1, 2])] t.index = np.array( [[1, 1, 1, 1, 1, 1, 1, 1, 1, 1], [0, 0, 1, 0, 0, 0, 0, 1, 0, 0]], dtype=np.uint8) t.fill_column('b', fillvalue=3) assert np.all(t.b.values == np.array([3, 3, 1, 3, 3, 3, 3, 2, 3, 3]))
def test_fill_empty_column(): t = Table() t.keys = ['a', 'b'] t.data = [np.arange(10), np.array([])] t.index = np.array( [[1, 1, 1, 1, 1, 1, 1, 1, 1, 1], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0]], dtype=np.uint8) b = t.b b.fill(fillvalue=3) assert np.all(b.values == np.array([3, 3, 3, 3, 3, 3, 3, 3, 3, 3]))
def test_fill_full_column(): t = Table() t.keys = ['a', 'b'] t.data = [np.arange(10), np.arange(10)] t.index = np.array( [[1, 1, 1, 1, 1, 1, 1, 1, 1, 1], [1, 1, 1, 1, 1, 1, 1, 1, 1, 1]], dtype=np.uint8) b = t.b b.fill(fillvalue=3) assert np.all(b.values == np.array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]))
def test_fillna_reverse_fillvalue(): t = Table() t.keys = ['a', 'b'] t.data = [np.arange(10), np.array([1, 2])] t.index = np.array( [[1, 1, 1, 1, 1, 1, 1, 1, 1, 1], [0, 0, 1, 0, 0, 0, 0, 1, 0, 0]], dtype=np.uint8) b = t.b b.fillna(reverse=True, fillvalue=-1) assert np.all(b.values == np.array([1, 1, 1, 2, 2, 2, 2, 2, -1, -1])) assert np.all(b.index == np.array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1]))
def full_outer_join(table_left, table_right, column, check_sorted=True): """ Inner join. If columns are repeated, the left table has preference. :param table_left: :param table_right: :param column: :param check_sorted: If True may increase performance, but breaks if the column used as index is not sorted. :return: """ if column not in table_left.keys: raise ValueError('{} not in left table'.format(column)) if column not in table_right.keys: raise ValueError('{} not in right table'.format(column)) all_columns = set(chain(table_left.keys, table_right.keys)) joined_columns = all_columns - {column} common_left = table_left.get(column) common_right = table_right.get(column) if check_sorted: if not np.all(common_left.values == np.sort(common_left.values)): raise ValueError('Trying to join with a non sorted column') if not np.all(common_right.values == np.sort(common_right.values)): raise ValueError('Trying to join with a non sorted column') common_rec = union_sorted(common_left.values, common_right.values) data_joined, global_left, global_right = join_low_level( common_left.values, common_left.index, common_right.values, common_right.index, common_rec) data = list() index = list() keys = list() data.append(data_joined) index.append(np.ones(len(data_joined), dtype=np.uint8)) keys.append(column) for i_column in joined_columns: if (i_column in table_left) and (i_column in table_right): cl = table_left.get(i_column) cr = table_right.get(i_column) c_values, c_index = reindex_join_columns( cl, cr, global_left, global_right) keys.append(i_column) data.append(c_values) index.append(c_index) elif i_column in table_left: c = table_left.get(i_column) c = c.reindex(global_left) keys.append(i_column) data.append(c.values) index.append(c.index) elif i_column in table_right: c = table_right.get(i_column) c = c.reindex(global_right) keys.append(i_column) data.append(c.values) index.append(c.index) res = Table() res.data = data res.index = np.vstack(index) res.keys = keys return res
def inner_join(table_left, table_right, column): """ Inner join. If columns are repeated, the left table has preference. :param table_left: :param table_right: :param column: :return: """ if column not in table_left.keys: raise ValueError('{} not in left table'.format(column)) if column not in table_right.keys: raise ValueError('{} not in right table'.format(column)) all_columns = set(chain(table_left.keys, table_right.keys)) joined_columns = all_columns - {column} common_left = table_left.get(column) common_right = table_right.get(column) if not np.all(common_left.values == np.sort(common_left.values)): raise ValueError('Trying to join with a non sorted column') if not np.all(common_right.values == np.sort(common_right.values)): raise ValueError('Trying to join with a non sorted column') common_rec = intersection_sorted(common_left.values, common_right.values) data_joined, global_left, global_right = join_low_level( common_left.values, common_left.index, common_right.values, common_right.index, common_rec) data = list() index = list() keys = list() data.append(data_joined) index.append(np.ones(len(data_joined), dtype=np.uint8)) keys.append(column) for i_column in joined_columns: if i_column in table_left: c = table_left.get(i_column) c = c.reindex(global_left) keys.append(i_column) data.append(c.values) index.append(c.index) elif i_column in table_right: c = table_right.get(i_column) c = c.reindex(global_right) keys.append(i_column) data.append(c.values) index.append(c.index) res = Table() res.data = data res.index = np.vstack(index) res.keys = keys return res