Ejemplo n.º 1
0
 def test_query_table_int(self):
     pa_table = self._create_dummy_table()
     table = InMemoryTable(pa_table)
     n = pa_table.num_rows
     # classical usage
     subtable = query_table(table, 0)
     self.assertTableEqual(subtable, pa.Table.from_pydict({"a": _COL_A[:1], "b": _COL_B[:1], "c": _COL_C[:1]}))
     subtable = query_table(table, 1)
     self.assertTableEqual(subtable, pa.Table.from_pydict({"a": _COL_A[1:2], "b": _COL_B[1:2], "c": _COL_C[1:2]}))
     subtable = query_table(table, -1)
     self.assertTableEqual(subtable, pa.Table.from_pydict({"a": _COL_A[-1:], "b": _COL_B[-1:], "c": _COL_C[-1:]}))
     # raise an IndexError
     with self.assertRaises(IndexError):
         query_table(table, n)
     with self.assertRaises(IndexError):
         query_table(table, -(n + 1))
     # with indices
     indices = InMemoryTable(self._create_dummy_arrow_indices())
     subtable = query_table(table, 0, indices=indices)
     self.assertTableEqual(
         subtable,
         pa.Table.from_pydict({"a": [_COL_A[_INDICES[0]]], "b": [_COL_B[_INDICES[0]]], "c": [_COL_C[_INDICES[0]]]}),
     )
     with self.assertRaises(IndexError):
         assert len(indices) < n
         query_table(table, len(indices), indices=indices)
Ejemplo n.º 2
0
 def test_query_table_str(self):
     pa_table = self._create_dummy_table()
     subtable = query_table(pa_table, "a")
     self.assertTableEqual(subtable, pa.Table.from_pydict({"a": _COL_A}))
     with self.assertRaises(KeyError):
         query_table(pa_table, "z")
     indices = self._create_dummy_arrow_indices()
     subtable = query_table(pa_table, "a", indices=indices)
     self.assertTableEqual(subtable, pa.Table.from_pydict({"a": [_COL_A[i] for i in _INDICES]}))
Ejemplo n.º 3
0
    def test_query_table_invalid_key_type(self):
        pa_table = self._create_dummy_table()
        with self.assertRaises(TypeError):
            query_table(pa_table, 0.0)
        with self.assertRaises(TypeError):
            query_table(pa_table, [0, "a"])
        with self.assertRaises(TypeError):
            query_table(pa_table, int)
        with self.assertRaises(TypeError):

            def iter_to_inf(start=0):
                while True:
                    yield start
                    start += 1

            query_table(pa_table, iter_to_inf())
Ejemplo n.º 4
0
 def test_query_table_iterable(self):
     pa_table = self._create_dummy_table()
     table = InMemoryTable(pa_table)
     n = pa_table.num_rows
     np_A, np_B, np_C = np.array(
         _COL_A, dtype=np.int64), np.array(_COL_B), np.array(_COL_C)
     # classical usage
     subtable = query_table(table, [0])
     self.assertTableEqual(
         subtable,
         pa.Table.from_pydict({
             "a": np_A[[0]],
             "b": np_B[[0]],
             "c": np_C[[0]].tolist()
         }))
     subtable = query_table(table, [1])
     self.assertTableEqual(
         subtable,
         pa.Table.from_pydict({
             "a": np_A[[1]],
             "b": np_B[[1]],
             "c": np_C[[1]].tolist()
         }))
     subtable = query_table(table, [-1])
     self.assertTableEqual(
         subtable,
         pa.Table.from_pydict({
             "a": np_A[[-1]],
             "b": np_B[[-1]],
             "c": np_C[[-1]].tolist()
         }))
     subtable = query_table(table, [0, -1, 1])
     self.assertTableEqual(
         subtable,
         pa.Table.from_pydict({
             "a": np_A[[0, -1, 1]],
             "b": np_B[[0, -1, 1]],
             "c": np_C[[0, -1, 1]].tolist()
         }),
     )
     # numpy iterable
     subtable = query_table(table, np.array([0, -1, 1]))
     self.assertTableEqual(
         subtable,
         pa.Table.from_pydict({
             "a": np_A[[0, -1, 1]],
             "b": np_B[[0, -1, 1]],
             "c": np_C[[0, -1, 1]].tolist()
         }),
     )
     # empty ouput but no errors
     subtable = query_table(table, [])
     assert len(np_A[[]]) == 0
     self.assertTableEqual(
         subtable, pa.Table.from_batches([], schema=pa_table.schema))
     # raise an IndexError
     with self.assertRaises(IndexError):
         with self.assertRaises(IndexError):
             np_A[[n]]
         query_table(table, [n])
     with self.assertRaises(IndexError):
         with self.assertRaises(IndexError):
             np_A[[-(n + 1)]]
         query_table(table, [-(n + 1)])
     # with indices
     indices = InMemoryTable(self._create_dummy_arrow_indices())
     subtable = query_table(table, [0], indices=indices)
     self.assertTableEqual(
         subtable,
         pa.Table.from_pydict({
             "a": [_COL_A[_INDICES[0]]],
             "b": [_COL_B[_INDICES[0]]],
             "c": [_COL_C[_INDICES[0]]]
         }),
     )
     with self.assertRaises(IndexError):
         assert len(indices) < n
         query_table(table, [len(indices)], indices=indices)
Ejemplo n.º 5
0
 def test_query_table_range(self):
     pa_table = self._create_dummy_table()
     table = InMemoryTable(pa_table)
     n = pa_table.num_rows
     np_A, np_B, np_C = np.array(
         _COL_A, dtype=np.int64), np.array(_COL_B), np.array(_COL_C)
     # classical usage
     subtable = query_table(table, range(0, 1))
     self.assertTableEqual(
         subtable,
         pa.Table.from_pydict({
             "a": np_A[range(0, 1)],
             "b": np_B[range(0, 1)],
             "c": np_C[range(0, 1)].tolist()
         }),
     )
     subtable = query_table(table, range(1, 2))
     self.assertTableEqual(
         subtable,
         pa.Table.from_pydict({
             "a": np_A[range(1, 2)],
             "b": np_B[range(1, 2)],
             "c": np_C[range(1, 2)].tolist()
         }),
     )
     subtable = query_table(table, range(-2, -1))
     self.assertTableEqual(
         subtable,
         pa.Table.from_pydict({
             "a": np_A[range(-2, -1)],
             "b": np_B[range(-2, -1)],
             "c": np_C[range(-2, -1)].tolist()
         }),
     )
     # usage with both negative and positive idx
     subtable = query_table(table, range(-1, 0))
     self.assertTableEqual(
         subtable,
         pa.Table.from_pydict({
             "a": np_A[range(-1, 0)],
             "b": np_B[range(-1, 0)],
             "c": np_C[range(-1, 0)].tolist()
         }),
     )
     subtable = query_table(table, range(-1, n))
     self.assertTableEqual(
         subtable,
         pa.Table.from_pydict({
             "a": np_A[range(-1, n)],
             "b": np_B[range(-1, n)],
             "c": np_C[range(-1, n)].tolist()
         }),
     )
     # usage with step
     subtable = query_table(table, range(0, n, 2))
     self.assertTableEqual(
         subtable,
         pa.Table.from_pydict({
             "a": np_A[range(0, n, 2)],
             "b": np_B[range(0, n, 2)],
             "c": np_C[range(0, n, 2)].tolist()
         }),
     )
     subtable = query_table(table, range(0, n + 1, 2 * n))
     self.assertTableEqual(
         subtable,
         pa.Table.from_pydict({
             "a": np_A[range(0, n + 1, 2 * n)],
             "b": np_B[range(0, n + 1, 2 * n)],
             "c": np_C[range(0, n + 1, 2 * n)].tolist(),
         }),
     )
     # empty ouput but no errors
     subtable = query_table(table, range(2, 1))
     assert len(np_A[range(2, 1)]) == 0
     self.assertTableEqual(
         subtable, pa.Table.from_batches([], schema=pa_table.schema))
     subtable = query_table(table, range(n, n))
     assert len(np_A[range(n, n)]) == 0
     self.assertTableEqual(
         subtable, pa.Table.from_batches([], schema=pa_table.schema))
     # raise an IndexError
     with self.assertRaises(IndexError):
         with self.assertRaises(IndexError):
             np_A[range(0, n + 1)]
         query_table(table, range(0, n + 1))
     with self.assertRaises(IndexError):
         with self.assertRaises(IndexError):
             np_A[range(-(n + 1), -1)]
         query_table(table, range(-(n + 1), -1))
     with self.assertRaises(IndexError):
         with self.assertRaises(IndexError):
             np_A[range(n, n + 1)]
         query_table(table, range(n, n + 1))
     # with indices
     indices = InMemoryTable(self._create_dummy_arrow_indices())
     subtable = query_table(table, range(0, 1), indices=indices)
     self.assertTableEqual(
         subtable,
         pa.Table.from_pydict({
             "a": [_COL_A[_INDICES[0]]],
             "b": [_COL_B[_INDICES[0]]],
             "c": [_COL_C[_INDICES[0]]]
         }),
     )
     with self.assertRaises(IndexError):
         assert len(indices) < n
         query_table(table,
                     range(len(indices),
                           len(indices) + 1),
                     indices=indices)
Ejemplo n.º 6
0
    def test_query_table_slice(self):
        pa_table = self._create_dummy_table()
        table = InMemoryTable(pa_table)
        n = pa_table.num_rows
        # classical usage
        subtable = query_table(table, slice(0, 1))
        self.assertTableEqual(
            subtable,
            pa.Table.from_pydict({
                "a": _COL_A[:1],
                "b": _COL_B[:1],
                "c": _COL_C[:1]
            }))
        subtable = query_table(table, slice(1, 2))
        self.assertTableEqual(
            subtable,
            pa.Table.from_pydict({
                "a": _COL_A[1:2],
                "b": _COL_B[1:2],
                "c": _COL_C[1:2]
            }))
        subtable = query_table(table, slice(-2, -1))
        self.assertTableEqual(
            subtable,
            pa.Table.from_pydict({
                "a": _COL_A[-2:-1],
                "b": _COL_B[-2:-1],
                "c": _COL_C[-2:-1]
            }))
        # usage with None
        subtable = query_table(table, slice(-1, None))
        self.assertTableEqual(
            subtable,
            pa.Table.from_pydict({
                "a": _COL_A[-1:],
                "b": _COL_B[-1:],
                "c": _COL_C[-1:]
            }))
        subtable = query_table(table, slice(None, n + 1))
        self.assertTableEqual(
            subtable,
            pa.Table.from_pydict({
                "a": _COL_A[:n + 1],
                "b": _COL_B[:n + 1],
                "c": _COL_C[:n + 1]
            }))
        self.assertTableEqual(
            subtable,
            pa.Table.from_pydict({
                "a": _COL_A,
                "b": _COL_B,
                "c": _COL_C
            }))
        subtable = query_table(table, slice(-(n + 1), None))
        self.assertTableEqual(
            subtable,
            pa.Table.from_pydict({
                "a": _COL_A[-(n + 1):],
                "b": _COL_B[-(n + 1):],
                "c": _COL_C[-(n + 1):]
            }))
        self.assertTableEqual(
            subtable,
            pa.Table.from_pydict({
                "a": _COL_A,
                "b": _COL_B,
                "c": _COL_C
            }))
        # usage with step
        subtable = query_table(table, slice(None, None, 2))
        self.assertTableEqual(
            subtable,
            pa.Table.from_pydict({
                "a": _COL_A[::2],
                "b": _COL_B[::2],
                "c": _COL_C[::2]
            }))
        # empty ouput but no errors
        subtable = query_table(table, slice(
            -1, 0))  # usage with both negative and positive idx
        assert len(_COL_A[-1:0]) == 0
        self.assertTableEqual(subtable, pa_table.slice(0, 0))
        subtable = query_table(table, slice(2, 1))
        assert len(_COL_A[2:1]) == 0
        self.assertTableEqual(subtable, pa_table.slice(0, 0))
        subtable = query_table(table, slice(n, n))
        assert len(_COL_A[n:n]) == 0
        self.assertTableEqual(subtable, pa_table.slice(0, 0))
        subtable = query_table(table, slice(n, n + 1))
        assert len(_COL_A[n:n + 1]) == 0
        self.assertTableEqual(subtable, pa_table.slice(0, 0))
        # it's not possible to get an error with a slice

        # with indices
        indices = InMemoryTable(self._create_dummy_arrow_indices())
        subtable = query_table(table, slice(0, 1), indices=indices)
        self.assertTableEqual(
            subtable,
            pa.Table.from_pydict({
                "a": [_COL_A[_INDICES[0]]],
                "b": [_COL_B[_INDICES[0]]],
                "c": [_COL_C[_INDICES[0]]]
            }),
        )
        subtable = query_table(table, slice(n - 1, n), indices=indices)
        assert len(indices.column(0).to_pylist()[n - 1:n]) == 0
        self.assertTableEqual(subtable, pa_table.slice(0, 0))