Example #1
0
def test_merge_3():
    table_a = Table()
    table_b = Table()

    table_a.keys = ['a', 'b']
    table_a.data = [np.array([1, 2, 3]), np.array([1, 2, 3])]
    table_a.index = np.array(np.array([[1, 1, 1], [1, 1, 1]], dtype=np.uint8))
    table_b.keys = ['a', 'c']
    table_b.data = [np.array([1, 2, 3]), np.array([1, 2, 3])]
    table_b.index = np.array(np.array([[1, 1, 1], [1, 1, 1]], dtype=np.uint8))
    table_a.merge(table_b, 'a')

    assert np.all(table_a.a.index == np.array([1, 1, 1, 1, 1, 1]))
    assert np.all(table_a.b.index == np.array([1, 0, 1, 0, 1, 0]))
    assert np.all(table_a.c.index == np.array([0, 1, 0, 1, 0, 1]))
Example #2
0
def test_fill_table():
    t = Table()
    t.keys = ['a', 'b']
    t.data = [np.arange(10), np.array([1, 2])]
    t.index = np.array(
        [[1, 1, 1, 1, 1, 1, 1, 1, 1, 1], [0, 0, 1, 0, 0, 0, 0, 1, 0, 0]],
        dtype=np.uint8)

    t.fill_column('b', fillvalue=3)

    assert np.all(t.b.values == np.array([3, 3, 1, 3, 3, 3, 3, 2, 3, 3]))
Example #3
0
def test_fill_empty_column():
    t = Table()
    t.keys = ['a', 'b']
    t.data = [np.arange(10), np.array([])]
    t.index = np.array(
        [[1, 1, 1, 1, 1, 1, 1, 1, 1, 1], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0]],
        dtype=np.uint8)

    b = t.b
    b.fill(fillvalue=3)

    assert np.all(b.values == np.array([3, 3, 3, 3, 3, 3, 3, 3, 3, 3]))
Example #4
0
def test_fill_full_column():
    t = Table()
    t.keys = ['a', 'b']
    t.data = [np.arange(10), np.arange(10)]
    t.index = np.array(
        [[1, 1, 1, 1, 1, 1, 1, 1, 1, 1], [1, 1, 1, 1, 1, 1, 1, 1, 1, 1]],
        dtype=np.uint8)

    b = t.b
    b.fill(fillvalue=3)

    assert np.all(b.values == np.array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]))
Example #5
0
def test_fillna_reverse_fillvalue():
    t = Table()
    t.keys = ['a', 'b']
    t.data = [np.arange(10), np.array([1, 2])]
    t.index = np.array(
        [[1, 1, 1, 1, 1, 1, 1, 1, 1, 1], [0, 0, 1, 0, 0, 0, 0, 1, 0, 0]],
        dtype=np.uint8)

    b = t.b
    b.fillna(reverse=True, fillvalue=-1)

    assert np.all(b.values == np.array([1, 1, 1, 2, 2, 2, 2, 2, -1, -1]))
    assert np.all(b.index == np.array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1]))
Example #6
0
def full_outer_join(table_left, table_right, column, check_sorted=True):
    """
    Inner join. If columns are repeated, the left table has preference.

    :param table_left:
    :param table_right:
    :param column:
    :param check_sorted: If True may increase performance, but breaks if the
      column used as index is not sorted.
    :return:
    """
    if column not in table_left.keys:
        raise ValueError('{} not in left table'.format(column))

    if column not in table_right.keys:
        raise ValueError('{} not in right table'.format(column))

    all_columns = set(chain(table_left.keys, table_right.keys))
    joined_columns = all_columns - {column}

    common_left = table_left.get(column)
    common_right = table_right.get(column)

    if check_sorted:
        if not np.all(common_left.values == np.sort(common_left.values)):
            raise ValueError('Trying to join with a non sorted column')

        if not np.all(common_right.values == np.sort(common_right.values)):
            raise ValueError('Trying to join with a non sorted column')

    common_rec = union_sorted(common_left.values, common_right.values)

    data_joined, global_left, global_right = join_low_level(
        common_left.values, common_left.index,
        common_right.values, common_right.index, common_rec)

    data = list()
    index = list()
    keys = list()

    data.append(data_joined)
    index.append(np.ones(len(data_joined), dtype=np.uint8))
    keys.append(column)

    for i_column in joined_columns:
        if (i_column in table_left) and (i_column in table_right):
            cl = table_left.get(i_column)
            cr = table_right.get(i_column)
            c_values, c_index = reindex_join_columns(
                cl, cr, global_left, global_right)
            keys.append(i_column)
            data.append(c_values)
            index.append(c_index)

        elif i_column in table_left:
            c = table_left.get(i_column)
            c = c.reindex(global_left)
            keys.append(i_column)
            data.append(c.values)
            index.append(c.index)

        elif i_column in table_right:
            c = table_right.get(i_column)
            c = c.reindex(global_right)
            keys.append(i_column)
            data.append(c.values)
            index.append(c.index)

    res = Table()
    res.data = data
    res.index = np.vstack(index)
    res.keys = keys

    return res
Example #7
0
def inner_join(table_left, table_right, column):
    """
    Inner join. If columns are repeated, the left table has preference.

    :param table_left:
    :param table_right:
    :param column:
    :return:
    """
    if column not in table_left.keys:
        raise ValueError('{} not in left table'.format(column))

    if column not in table_right.keys:
        raise ValueError('{} not in right table'.format(column))

    all_columns = set(chain(table_left.keys, table_right.keys))
    joined_columns = all_columns - {column}

    common_left = table_left.get(column)
    common_right = table_right.get(column)

    if not np.all(common_left.values == np.sort(common_left.values)):
        raise ValueError('Trying to join with a non sorted column')

    if not np.all(common_right.values == np.sort(common_right.values)):
        raise ValueError('Trying to join with a non sorted column')

    common_rec = intersection_sorted(common_left.values, common_right.values)

    data_joined, global_left, global_right = join_low_level(
        common_left.values, common_left.index,
        common_right.values, common_right.index, common_rec)

    data = list()
    index = list()
    keys = list()

    data.append(data_joined)
    index.append(np.ones(len(data_joined), dtype=np.uint8))
    keys.append(column)

    for i_column in joined_columns:
        if i_column in table_left:
            c = table_left.get(i_column)
            c = c.reindex(global_left)
            keys.append(i_column)
            data.append(c.values)
            index.append(c.index)

        elif i_column in table_right:
            c = table_right.get(i_column)
            c = c.reindex(global_right)
            keys.append(i_column)
            data.append(c.values)
            index.append(c.index)

    res = Table()
    res.data = data
    res.index = np.vstack(index)
    res.keys = keys

    return res