def test_to_dataframe_no_schema_set_raises_type_error(mut, class_under_test, mock_gapic_client, monkeypatch): reader = class_under_test([], mock_gapic_client, "", 0, {}) read_session = types.ReadSession() with pytest.raises(TypeError): reader.to_dataframe(read_session)
def test_to_dataframe_empty_w_dtypes_arrow(class_under_test, mock_gapic_client): arrow_schema = _bq_to_arrow_schema([ { "name": "bigfloat", "type": "float64" }, { "name": "lilfloat", "type": "float64" }, ]) read_session = _generate_arrow_read_session(arrow_schema) arrow_batches = _bq_to_arrow_batches([], arrow_schema) reader = class_under_test(arrow_batches, mock_gapic_client, "", 0, {}) got = reader.to_dataframe(read_session, dtypes={"lilfloat": "float16"}) expected = pandas.DataFrame([], columns=["bigfloat", "lilfloat"]) expected["bigfloat"] = expected["bigfloat"].astype("float64") expected["lilfloat"] = expected["lilfloat"].astype("float16") pandas.testing.assert_frame_equal( got.reset_index(drop=True), # reset_index to ignore row labels expected.reset_index(drop=True), )
def test_to_dataframe_w_scalars(class_under_test): avro_schema = _bq_to_avro_schema(SCALAR_COLUMNS) read_session = _generate_avro_read_session(avro_schema) avro_blocks = _bq_to_avro_blocks(SCALAR_BLOCKS, avro_schema) reader = class_under_test(avro_blocks, mock_gapic_client, "", 0, {}) got = reader.to_dataframe(read_session) expected = pandas.DataFrame(list( itertools.chain.from_iterable(SCALAR_BLOCKS)), columns=SCALAR_COLUMN_NAMES) # fastavro provides its own UTC definition, so # compare the timestamp columns separately. got_ts = got["ts_col"] got = got.drop(columns=["ts_col"]) expected_ts = expected["ts_col"] expected = expected.drop(columns=["ts_col"]) pandas.testing.assert_frame_equal( got.reset_index(drop=True), # reset_index to ignore row labels expected.reset_index(drop=True), ) pandas.testing.assert_series_equal( got_ts.reset_index(drop=True), expected_ts.reset_index(drop=True), check_dtype=False, # fastavro's UTC means different dtype check_datetimelike_compat=True, )
def test_to_dataframe_w_dtypes(class_under_test): avro_schema = _bq_to_avro_schema( [ {"name": "bigfloat", "type": "float64"}, {"name": "lilfloat", "type": "float64"}, ] ) blocks = [ [{"bigfloat": 1.25, "lilfloat": 30.5}, {"bigfloat": 2.5, "lilfloat": 21.125}], [{"bigfloat": 3.75, "lilfloat": 11.0}], ] avro_blocks = _bq_to_avro_blocks(blocks, avro_schema) reader = class_under_test(avro_blocks, mock_gapic_client, "", 0, {}) got = reader.to_dataframe(dtypes={"lilfloat": "float16"}) expected = pandas.DataFrame( { "bigfloat": [1.25, 2.5, 3.75], "lilfloat": pandas.Series([30.5, 21.125, 11.0], dtype="float16"), }, columns=["bigfloat", "lilfloat"], ) pandas.testing.assert_frame_equal( got.reset_index(drop=True), # reset_index to ignore row labels expected.reset_index(drop=True), )
def test_to_dataframe_no_pandas_raises_import_error(mut, class_under_test, mock_client, monkeypatch): monkeypatch.setattr(mut, "pandas", None) avro_schema = _bq_to_avro_schema(SCALAR_COLUMNS) read_session = _generate_avro_read_session(avro_schema) avro_blocks = _bq_to_avro_blocks(SCALAR_BLOCKS, avro_schema) reader = class_under_test(avro_blocks, mock_client, "", 0, {}) with pytest.raises(ImportError): reader.to_dataframe(read_session) with pytest.raises(ImportError): reader.rows(read_session).to_dataframe() with pytest.raises(ImportError): next(reader.rows(read_session).pages).to_dataframe()
def test_to_dataframe_w_scalars_arrow(class_under_test): arrow_schema = _bq_to_arrow_schema(SCALAR_COLUMNS) read_session = _generate_arrow_read_session(arrow_schema) arrow_batches = _bq_to_arrow_batches(SCALAR_BLOCKS, arrow_schema) reader = class_under_test(arrow_batches, mock_gapic_client, "", 0, {}) got = reader.to_dataframe(read_session) expected = pandas.DataFrame(list( itertools.chain.from_iterable(SCALAR_BLOCKS)), columns=SCALAR_COLUMN_NAMES) pandas.testing.assert_frame_equal( got.reset_index(drop=True), # reset_index to ignore row labels expected.reset_index(drop=True), )
def test_to_dataframe_empty_w_scalars_arrow(class_under_test): arrow_schema = _bq_to_arrow_schema(SCALAR_COLUMNS) read_session = _generate_arrow_read_session(arrow_schema) arrow_batches = _bq_to_arrow_batches([], arrow_schema) reader = class_under_test(arrow_batches, mock_gapic_client, "", 0, {}) got = reader.to_dataframe(read_session) expected = pandas.DataFrame([], columns=SCALAR_COLUMN_NAMES) expected["int_col"] = expected["int_col"].astype("int64") expected["float_col"] = expected["float_col"].astype("float64") expected["bool_col"] = expected["bool_col"].astype("bool") expected["ts_col"] = expected["ts_col"].astype("datetime64[ns, UTC]") pandas.testing.assert_frame_equal( got.reset_index(drop=True), # reset_index to ignore row labels expected.reset_index(drop=True), )
def test_to_dataframe_empty_w_scalars_avro(class_under_test): avro_schema = _bq_to_avro_schema(SCALAR_COLUMNS) read_session = _generate_avro_read_session(avro_schema) avro_blocks = _bq_to_avro_blocks([], avro_schema) reader = class_under_test(avro_blocks, mock_gapic_client, "", 0, {}) # Read session is needed to get a schema for empty streams. got = reader.to_dataframe(read_session) expected = pandas.DataFrame(columns=SCALAR_COLUMN_NAMES) expected["int_col"] = expected["int_col"].astype("int64") expected["float_col"] = expected["float_col"].astype("float64") expected["bool_col"] = expected["bool_col"].astype("bool") expected["ts_col"] = ( expected["ts_col"].astype("datetime64[ns]").dt.tz_localize("UTC")) pandas.testing.assert_frame_equal( got.reset_index(drop=True), # reset_index to ignore row labels expected.reset_index(drop=True), )