Esempio n. 1
0
    def testFilterIndexValue(self):
        pd_index = pd.RangeIndex(10)
        index_value = parse_index(pd_index)

        min_max = (0, True, 9, True)
        self.assertEqual(filter_index_value(index_value, min_max).to_pandas().tolist(),
                         pd_index[(pd_index >= 0) & (pd_index <= 9)].tolist())

        min_max = (0, False, 9, False)
        self.assertEqual(filter_index_value(index_value, min_max).to_pandas().tolist(),
                         pd_index[(pd_index > 0) & (pd_index < 9)].tolist())

        pd_index = pd.RangeIndex(1, 11, 3)
        index_value = parse_index(pd_index)

        min_max = (2, True, 10, True)
        self.assertEqual(filter_index_value(index_value, min_max).to_pandas().tolist(),
                         pd_index[(pd_index >= 2) & (pd_index <= 10)].tolist())

        min_max = (2, False, 10, False)
        self.assertEqual(filter_index_value(index_value, min_max).to_pandas().tolist(),
                         pd_index[(pd_index > 2) & (pd_index < 10)].tolist())

        pd_index = pd.RangeIndex(9, -1, -1)
        index_value = parse_index(pd_index)

        min_max = (0, True, 9, True)
        self.assertEqual(filter_index_value(index_value, min_max).to_pandas().tolist(),
                         pd_index[(pd_index >= 0) & (pd_index <= 9)].tolist())

        min_max = (0, False, 9, False)
        self.assertEqual(filter_index_value(index_value, min_max).to_pandas().tolist(),
                         pd_index[(pd_index > 0) & (pd_index < 9)].tolist())

        pd_index = pd.RangeIndex(10, 0, -3)
        index_value = parse_index(pd_index, store_data=False)

        min_max = (2, True, 10, True)
        self.assertEqual(filter_index_value(index_value, min_max).to_pandas().tolist(),
                         pd_index[(pd_index >= 2) & (pd_index <= 10)].tolist())

        min_max = (2, False, 10, False)
        self.assertEqual(filter_index_value(index_value, min_max).to_pandas().tolist(),
                         pd_index[(pd_index > 2) & (pd_index < 10)].tolist())

        pd_index = pd.Int64Index([0, 3, 8])
        index_value = parse_index(pd_index, store_data=True)

        min_max = (2, True, 8, False)
        self.assertEqual(filter_index_value(index_value, min_max, store_data=True).to_pandas().tolist(),
                         pd_index[(pd_index >= 2) & (pd_index < 8)].tolist())

        index_value = parse_index(pd_index)

        min_max = (2, True, 8, False)
        filtered = filter_index_value(index_value, min_max)
        self.assertEqual(len(filtered.to_pandas().tolist()), 0)
        self.assertIsInstance(filtered.value, IndexValue.Int64Index)
Esempio n. 2
0
def test_filter_index_value():
    pd_index = pd.RangeIndex(10)
    index_value = parse_index(pd_index)

    min_max = (0, True, 9, True)
    assert filter_index_value(
        index_value,
        min_max).to_pandas().tolist() == pd_index[(pd_index >= 0)
                                                  & (pd_index <= 9)].tolist()

    min_max = (0, False, 9, False)
    assert filter_index_value(
        index_value,
        min_max).to_pandas().tolist() == pd_index[(pd_index > 0)
                                                  & (pd_index < 9)].tolist()

    pd_index = pd.RangeIndex(1, 11, 3)
    index_value = parse_index(pd_index)

    min_max = (2, True, 10, True)
    assert filter_index_value(
        index_value,
        min_max).to_pandas().tolist() == pd_index[(pd_index >= 2)
                                                  & (pd_index <= 10)].tolist()

    min_max = (2, False, 10, False)
    assert filter_index_value(
        index_value,
        min_max).to_pandas().tolist() == pd_index[(pd_index > 2)
                                                  & (pd_index < 10)].tolist()

    pd_index = pd.RangeIndex(9, -1, -1)
    index_value = parse_index(pd_index)

    min_max = (0, True, 9, True)
    assert filter_index_value(
        index_value,
        min_max).to_pandas().tolist() == pd_index[(pd_index >= 0)
                                                  & (pd_index <= 9)].tolist()

    min_max = (0, False, 9, False)
    assert filter_index_value(
        index_value,
        min_max).to_pandas().tolist() == pd_index[(pd_index > 0)
                                                  & (pd_index < 9)].tolist()

    pd_index = pd.RangeIndex(10, 0, -3)
    index_value = parse_index(pd_index, store_data=False)

    min_max = (2, True, 10, True)
    assert filter_index_value(
        index_value,
        min_max).to_pandas().tolist() == pd_index[(pd_index >= 2)
                                                  & (pd_index <= 10)].tolist()

    min_max = (2, False, 10, False)
    assert filter_index_value(
        index_value,
        min_max).to_pandas().tolist() == pd_index[(pd_index > 2)
                                                  & (pd_index < 10)].tolist()

    pd_index = pd.Int64Index([0, 3, 8])
    index_value = parse_index(pd_index, store_data=True)

    min_max = (2, True, 8, False)
    assert filter_index_value(
        index_value, min_max,
        store_data=True).to_pandas().tolist() == pd_index[
            (pd_index >= 2) & (pd_index < 8)].tolist()

    index_value = parse_index(pd_index)

    min_max = (2, True, 8, False)
    filtered = filter_index_value(index_value, min_max)
    assert len(filtered.to_pandas().tolist()) == 0
    assert isinstance(filtered.value, IndexValue.Int64Index)
Esempio n. 3
0
    def testAddWithoutShuffle(self):
        # all the axes are monotonic
        # data1 with index split into [0...4], [5...9],
        # columns [3...7], [8...12]
        data1 = pd.DataFrame(np.random.rand(10, 10), index=np.arange(10),
                             columns=np.arange(3, 13))
        df1 = from_pandas(data1, chunk_size=5)
        # data2 with index split into [6...11], [2, 5],
        # columns [4...9], [10, 13]
        data2 = pd.DataFrame(np.random.rand(10, 10), index=np.arange(11, 1, -1),
                             columns=np.arange(4, 14))
        df2 = from_pandas(data2, chunk_size=6)

        df3 = add(df1, df2)

        # test df3's index and columns
        pd.testing.assert_index_equal(df3.columns.to_pandas(), (data1 + data2).columns)
        self.assertTrue(df3.columns.should_be_monotonic)
        self.assertIsInstance(df3.index_value.value, IndexValue.Int64Index)
        self.assertTrue(df3.index_value.should_be_monotonic)
        pd.testing.assert_index_equal(df3.index_value.to_pandas(), pd.Int64Index([]))
        self.assertNotEqual(df3.index_value.key, df1.index_value.key)
        self.assertNotEqual(df3.index_value.key, df2.index_value.key)
        self.assertEqual(df3.shape[1], 11)  # columns is recorded, so we can get it

        df3.tiles()

        # test df3's index and columns after tiling
        pd.testing.assert_index_equal(df3.columns.to_pandas(), (data1 + data2).columns)
        self.assertTrue(df3.columns.should_be_monotonic)
        self.assertIsInstance(df3.index_value.value, IndexValue.Int64Index)
        self.assertTrue(df3.index_value.should_be_monotonic)
        pd.testing.assert_index_equal(df3.index_value.to_pandas(), pd.Int64Index([]))
        self.assertNotEqual(df3.index_value.key, df1.index_value.key)
        self.assertNotEqual(df3.index_value.key, df2.index_value.key)
        self.assertEqual(df3.shape[1], 11)  # columns is recorded, so we can get it

        data1_index_min_max = [(0, True, 4, True), (5, True, 9, True)]
        data1_columns_min_max = [[3, True, 7, True], [8, True, 12, True]]
        data2_index_min_max = [(2, True, 5, True), (6, True, 11, True)]
        data2_columns_min_max = [(4, True, 9, True), (10, True, 13, True)]

        left_index_splits, right_index_splits = split_monotonic_index_min_max(
            data1_index_min_max, True, data2_index_min_max, False)
        left_columns_splits, right_columns_splits = split_monotonic_index_min_max(
            data1_columns_min_max, True, data2_columns_min_max, True)

        left_index_idx_to_original_idx = build_split_idx_to_origin_idx(left_index_splits)
        right_index_idx_to_original_idx = build_split_idx_to_origin_idx(right_index_splits, False)
        left_columns_idx_to_original_idx = build_split_idx_to_origin_idx(left_columns_splits)
        right_columns_idx_to_original_idx = build_split_idx_to_origin_idx(right_columns_splits)

        self.assertEqual(df3.chunk_shape, (7, 7))
        for c in df3.chunks:
            self.assertIsInstance(c.op, DataFrameAdd)
            self.assertEqual(len(c.inputs), 2)
            # test shape
            idx = c.index
            # test the left side
            self.assertIsInstance(c.inputs[0].op, DataFrameIndexAlignMap)
            left_row_idx, left_row_inner_idx = left_index_idx_to_original_idx[idx[0]]
            left_col_idx, left_col_inner_idx = left_columns_idx_to_original_idx[idx[1]]
            expect_df1_input = df1.cix[left_row_idx, left_col_idx].data
            self.assertIs(c.inputs[0].inputs[0], expect_df1_input)
            left_index_min_max = left_index_splits[left_row_idx][left_row_inner_idx]
            self.assertEqual(c.inputs[0].op.index_min, left_index_min_max[0])
            self.assertEqual(c.inputs[0].op.index_min_close, left_index_min_max[1])
            self.assertEqual(c.inputs[0].op.index_max, left_index_min_max[2])
            self.assertEqual(c.inputs[0].op.index_max_close, left_index_min_max[3])
            self.assertIsInstance(c.inputs[0].index_value.to_pandas(), type(data1.index))
            left_column_min_max = left_columns_splits[left_col_idx][left_col_inner_idx]
            self.assertEqual(c.inputs[0].op.column_min, left_column_min_max[0])
            self.assertEqual(c.inputs[0].op.column_min_close, left_column_min_max[1])
            self.assertEqual(c.inputs[0].op.column_max, left_column_min_max[2])
            self.assertEqual(c.inputs[0].op.column_max_close, left_column_min_max[3])
            expect_left_columns = filter_index_value(expect_df1_input.columns, left_column_min_max,
                                                     store_data=True)
            pd.testing.assert_index_equal(c.inputs[0].columns.to_pandas(), expect_left_columns.to_pandas())
            pd.testing.assert_index_equal(c.inputs[0].dtypes.index, expect_left_columns.to_pandas())
            # test the right side
            self.assertIsInstance(c.inputs[1].op, DataFrameIndexAlignMap)
            right_row_idx, right_row_inner_idx = right_index_idx_to_original_idx[idx[0]]
            right_col_idx, right_col_inner_idx = right_columns_idx_to_original_idx[idx[1]]
            expect_df2_input = df2.cix[right_row_idx, right_col_idx].data
            self.assertIs(c.inputs[1].inputs[0], expect_df2_input)
            right_index_min_max = right_index_splits[right_row_idx][right_row_inner_idx]
            self.assertEqual(c.inputs[1].op.index_min, right_index_min_max[0])
            self.assertEqual(c.inputs[1].op.index_min_close, right_index_min_max[1])
            self.assertEqual(c.inputs[1].op.index_max, right_index_min_max[2])
            self.assertEqual(c.inputs[1].op.index_max_close, right_index_min_max[3])
            self.assertIsInstance(c.inputs[1].index_value.to_pandas(), type(data2.index))
            right_column_min_max = right_columns_splits[right_col_idx][right_col_inner_idx]
            self.assertEqual(c.inputs[1].op.column_min, right_column_min_max[0])
            self.assertEqual(c.inputs[1].op.column_min_close, right_column_min_max[1])
            self.assertEqual(c.inputs[1].op.column_max, right_column_min_max[2])
            self.assertEqual(c.inputs[1].op.column_max_close, right_column_min_max[3])
            expect_right_columns = filter_index_value(expect_df2_input.columns, left_column_min_max,
                                                      store_data=True)
            pd.testing.assert_index_equal(c.inputs[1].columns.to_pandas(), expect_right_columns.to_pandas())
            pd.testing.assert_index_equal(c.inputs[1].dtypes.index, expect_right_columns.to_pandas())