def test_text_zero_chunks_valid(self): validate_table_metadata( pyarrow.Table.from_batches([], pyarrow.schema([("A", pyarrow.string()) ])), TableMetadata(0, [Text("A")]), )
def test_duplicate_column_name(self): with self.assertRaises(DuplicateColumnName): validate_table_metadata( pyarrow.Table.from_arrays( [pyarrow.array(["a"]), pyarrow.array(["b"])], ["A", "A"]), TableMetadata(1, [Text("A"), Text("A")]), )
def test_column_name_mismatch(self): with self.assertRaises(WrongColumnName): validate_table_metadata( pyarrow.table({ "A": ["a"], "B": ["b"] }), TableMetadata(1, [Text("A"), Text("B2")]), )
def test_text_dictionary_zero_chunks_is_valid(self): validate_table_metadata( pyarrow.Table.from_batches( [], pyarrow.schema([("A", pyarrow.dictionary(pyarrow.int32(), pyarrow.string()))]), ), TableMetadata(0, [Text("A")]), )
def test_table_not_one_batch(self): with self.assertRaises(TableHasTooManyRecordBatches): validate_table_metadata( pyarrow.Table.from_batches([ pyarrow.RecordBatch.from_arrays([pyarrow.array(["a"])], ["A"]), pyarrow.RecordBatch.from_arrays([pyarrow.array(["b"])], ["A"]), ]), TableMetadata(2, [Text("A")]), )
def test_column_datetime_must_be_ns_resolution(self): # [2019-09-17] Pandas only supports datetime64[ns] # https://github.com/pandas-dev/pandas/issues/7307#issuecomment-224180563 with self.assertRaises(DatetimeUnitNotAllowed): validate_table_metadata( pyarrow.table({ "A": pyarrow.array([5298375234], type=pyarrow.timestamp("us", tz=None)) }), TableMetadata(1, [Datetime("A")]), )
def test_column_datetime_should_be_tz_naive(self): with self.assertRaises(DatetimeTimezoneNotAllowed): validate_table_metadata( pyarrow.table({ "A": pyarrow.array( [5298375234123], type=pyarrow.timestamp("ns", "America/New_York"), ) }), TableMetadata(1, [Datetime("A")]), )
def test_table_wrong_number_of_rows(self): with self.assertRaises(WrongRowCount): validate_table_metadata(pyarrow.Table.from_pydict({"A": ["x"]}), TableMetadata(2, [Text("A")]))
def test_table_not_none_when_should_be_none(self): with self.assertRaises(TableShouldBeNone): validate_table_metadata(pyarrow.Table.from_arrays([]), TableMetadata(2, []))
def test_table_none_when_should_be_set(self): with self.assertRaises(WrongColumnCount): validate_table_metadata(None, TableMetadata(2, [Text("A")]))
def test_happy_path_table_is_none(self): validate_table_metadata(None, TableMetadata(2, []))
def test_column_str_should_be_datetime(self): with self.assertRaises(WrongColumnType): validate_table_metadata(pyarrow.table({"A": ["x"]}), TableMetadata(1, [Datetime("A")]))
def test_column_int_should_be_text(self): with self.assertRaises(WrongColumnType): validate_table_metadata(pyarrow.table({"A": [1]}), TableMetadata(1, [Text("A")]))