def test_rows_no_schema_set_raises_type_error(mut, class_under_test, mock_gapic_client, monkeypatch): reader = class_under_test([], mock_gapic_client, "", 0, {}) read_session = types.ReadSession() with pytest.raises(TypeError): reader.rows(read_session)
def test_pyarrow_rows_raises_import_error(mut, class_under_test, mock_gapic_client, monkeypatch): monkeypatch.setattr(mut, "pyarrow", None) reader = class_under_test([], mock_gapic_client, "", 0, {}) bq_columns = [{"name": "int_col", "type": "int64"}] arrow_schema = _bq_to_arrow_schema(bq_columns) read_session = _generate_arrow_read_session(arrow_schema) with pytest.raises(ImportError): reader.rows(read_session)
def test_to_dataframe_by_page(class_under_test, mock_gapic_client): bq_columns = [ {"name": "int_col", "type": "int64"}, {"name": "bool_col", "type": "bool"}, ] avro_schema = _bq_to_avro_schema(bq_columns) block_1 = [{"int_col": 123, "bool_col": True}, {"int_col": 234, "bool_col": False}] block_2 = [{"int_col": 345, "bool_col": True}, {"int_col": 456, "bool_col": False}] block_3 = [{"int_col": 567, "bool_col": True}, {"int_col": 789, "bool_col": False}] block_4 = [{"int_col": 890, "bool_col": True}] # Break blocks into two groups to test that iteration continues across # reconnection. bq_blocks_1 = [block_1, block_2] bq_blocks_2 = [block_3, block_4] avro_blocks_1 = _bq_to_avro_blocks(bq_blocks_1, avro_schema) avro_blocks_2 = _bq_to_avro_blocks(bq_blocks_2, avro_schema) mock_gapic_client.read_rows.return_value = avro_blocks_2 reader = class_under_test( _pages_w_unavailable(avro_blocks_1), mock_gapic_client, "teststream", 0, {"metadata": {"test-key": "test-value"}}, ) got = reader.rows() pages = iter(got.pages) page_1 = next(pages) pandas.testing.assert_frame_equal( page_1.to_dataframe().reset_index(drop=True), pandas.DataFrame(block_1, columns=["int_col", "bool_col"]).reset_index( drop=True ), ) page_2 = next(pages) pandas.testing.assert_frame_equal( page_2.to_dataframe().reset_index(drop=True), pandas.DataFrame(block_2, columns=["int_col", "bool_col"]).reset_index( drop=True ), ) page_3 = next(pages) pandas.testing.assert_frame_equal( page_3.to_dataframe().reset_index(drop=True), pandas.DataFrame(block_3, columns=["int_col", "bool_col"]).reset_index( drop=True ), ) page_4 = next(pages) pandas.testing.assert_frame_equal( page_4.to_dataframe().reset_index(drop=True), pandas.DataFrame(block_4, columns=["int_col", "bool_col"]).reset_index( drop=True ), )
def test_to_arrow_no_pyarrow_raises_import_error(mut, class_under_test, mock_client, monkeypatch): monkeypatch.setattr(mut, "pyarrow", None) arrow_schema = _bq_to_arrow_schema(SCALAR_COLUMNS) read_session = _generate_arrow_read_session(arrow_schema) arrow_batches = _bq_to_arrow_batches(SCALAR_BLOCKS, arrow_schema) reader = class_under_test(arrow_batches, mock_client, "", 0, {}) with pytest.raises(ImportError): reader.to_arrow(read_session) with pytest.raises(ImportError): reader.rows(read_session).to_arrow() with pytest.raises(ImportError): next(reader.rows(read_session).pages).to_arrow()
def test_rows_w_timeout(class_under_test, mock_gapic_client): bq_columns = [{"name": "int_col", "type": "int64"}] avro_schema = _bq_to_avro_schema(bq_columns) bq_blocks_1 = [ [{"int_col": 123}, {"int_col": 234}], [{"int_col": 345}, {"int_col": 456}], ] avro_blocks_1 = _avro_blocks_w_deadline( _bq_to_avro_blocks(bq_blocks_1, avro_schema) ) bq_blocks_2 = [[{"int_col": 567}, {"int_col": 789}], [{"int_col": 890}]] avro_blocks_2 = _bq_to_avro_blocks(bq_blocks_2, avro_schema) mock_gapic_client.read_rows.return_value = avro_blocks_2 reader = class_under_test( avro_blocks_1, mock_gapic_client, "teststream", 0, {"metadata": {"test-key": "test-value"}}, ) with pytest.raises(google.api_core.exceptions.DeadlineExceeded): list(reader.rows()) # Don't reconnect on DeadlineException. This allows user-specified timeouts # to be respected. mock_gapic_client.read_rows.assert_not_called()
def test_rows_w_empty_stream_arrow(class_under_test, mock_gapic_client): bq_columns = [{"name": "int_col", "type": "int64"}] arrow_schema = _bq_to_arrow_schema(bq_columns) read_session = _generate_arrow_read_session(arrow_schema) reader = class_under_test([], mock_gapic_client, "", 0, {}) got = reader.rows(read_session) assert tuple(got) == ()
def test_to_dataframe_no_pandas_raises_import_error(mut, class_under_test, mock_client, monkeypatch): monkeypatch.setattr(mut, "pandas", None) avro_schema = _bq_to_avro_schema(SCALAR_COLUMNS) read_session = _generate_avro_read_session(avro_schema) avro_blocks = _bq_to_avro_blocks(SCALAR_BLOCKS, avro_schema) reader = class_under_test(avro_blocks, mock_client, "", 0, {}) with pytest.raises(ImportError): reader.to_dataframe(read_session) with pytest.raises(ImportError): reader.rows(read_session).to_dataframe() with pytest.raises(ImportError): next(reader.rows(read_session).pages).to_dataframe()
def test_rows_w_scalars(class_under_test, mock_gapic_client): avro_schema = _bq_to_avro_schema(SCALAR_COLUMNS) avro_blocks = _bq_to_avro_blocks(SCALAR_BLOCKS, avro_schema) reader = class_under_test(avro_blocks, mock_gapic_client, "", 0, {}) got = tuple(reader.rows()) expected = tuple(itertools.chain.from_iterable(SCALAR_BLOCKS)) assert got == expected
def test_rows_w_reconnect_by_page(class_under_test, mock_gapic_client): bq_columns = [{"name": "int_col", "type": "int64"}] avro_schema = _bq_to_avro_schema(bq_columns) read_session = _generate_avro_read_session(avro_schema) bq_blocks_1 = [ [{ "int_col": 123 }, { "int_col": 234 }], [{ "int_col": 345 }, { "int_col": 456 }], ] avro_blocks_1 = _bq_to_avro_blocks(bq_blocks_1, avro_schema) bq_blocks_2 = [[{"int_col": 567}, {"int_col": 789}], [{"int_col": 890}]] avro_blocks_2 = _bq_to_avro_blocks(bq_blocks_2, avro_schema) mock_gapic_client.read_rows.return_value = avro_blocks_2 reader = class_under_test( _pages_w_unavailable(avro_blocks_1), mock_gapic_client, "teststream", 0, {"metadata": { "test-key": "test-value" }}, ) got = reader.rows(read_session) pages = iter(got.pages) page_1 = next(pages) assert page_1.num_items == 2 assert page_1.remaining == 2 assert tuple(page_1) == tuple(bq_blocks_1[0]) assert page_1.num_items == 2 assert page_1.remaining == 0 page_2 = next(pages) assert next(page_2) == bq_blocks_1[1][0] assert page_2.num_items == 2 assert page_2.remaining == 1 assert next(page_2) == bq_blocks_1[1][1] page_3 = next(pages) assert tuple(page_3) == tuple(bq_blocks_2[0]) assert page_3.num_items == 2 assert page_3.remaining == 0 page_4 = next(pages) assert tuple(page_4) == tuple(bq_blocks_2[1]) assert page_4.num_items == 1 assert page_4.remaining == 0
def test_rows_w_scalars_arrow(class_under_test, mock_client): arrow_schema = _bq_to_arrow_schema(SCALAR_COLUMNS) read_session = _generate_arrow_read_session(arrow_schema) arrow_batches = _bq_to_arrow_batches(SCALAR_BLOCKS, arrow_schema) reader = class_under_test(arrow_batches, mock_client, "", 0, {}) got = tuple(reader.rows(read_session)) expected = tuple(itertools.chain.from_iterable(SCALAR_BLOCKS)) assert got == expected
def test_rows_w_reconnect(class_under_test, mock_gapic_client): bq_columns = [{"name": "int_col", "type": "int64"}] avro_schema = _bq_to_avro_schema(bq_columns) read_session = _generate_avro_read_session(avro_schema) bq_blocks_1 = [ [{ "int_col": 123 }, { "int_col": 234 }], [{ "int_col": 345 }, { "int_col": 456 }], ] avro_blocks_1 = _pages_w_unavailable( _bq_to_avro_blocks(bq_blocks_1, avro_schema)) bq_blocks_2 = [[{"int_col": 1024}, {"int_col": 512}], [{"int_col": 256}]] avro_blocks_2 = _bq_to_avro_blocks(bq_blocks_2, avro_schema) avro_blocks_2 = _pages_w_resumable_internal_error( _bq_to_avro_blocks(bq_blocks_2, avro_schema)) bq_blocks_3 = [[{"int_col": 567}, {"int_col": 789}], [{"int_col": 890}]] avro_blocks_3 = _bq_to_avro_blocks(bq_blocks_3, avro_schema) mock_gapic_client.read_rows.side_effect = (avro_blocks_2, avro_blocks_3) reader = class_under_test( avro_blocks_1, mock_gapic_client, "teststream", 0, {"metadata": { "test-key": "test-value" }}, ) got = reader.rows(read_session) expected = tuple( itertools.chain( itertools.chain.from_iterable(bq_blocks_1), itertools.chain.from_iterable(bq_blocks_2), itertools.chain.from_iterable(bq_blocks_3), )) assert tuple(got) == expected mock_gapic_client.read_rows.assert_any_call( read_stream="teststream", offset=4, metadata={"test-key": "test-value"}) mock_gapic_client.read_rows.assert_called_with( read_stream="teststream", offset=7, metadata={"test-key": "test-value"})
def test_rows_no_schema_set_raises_type_error(mut, class_under_test, mock_gapic_client, monkeypatch): avro_schema = _bq_to_avro_schema(SCALAR_COLUMNS) avro_blocks = _bq_to_avro_blocks(SCALAR_BLOCKS, avro_schema) avro_blocks[0].avro_schema = None reader = class_under_test(avro_blocks, mock_gapic_client, "", 0, {}) rows = iter(reader.rows()) # Since session isn't passed in, reader doesn't know serialization type # until you start iterating. with pytest.raises(TypeError): next(rows)
def test_avro_rows_raises_import_error(mut, class_under_test, mock_gapic_client, monkeypatch): monkeypatch.setattr(mut, "fastavro", None) avro_schema = _bq_to_avro_schema(SCALAR_COLUMNS) avro_blocks = _bq_to_avro_blocks(SCALAR_BLOCKS, avro_schema) reader = class_under_test(avro_blocks, mock_gapic_client, "", 0, {}) rows = iter(reader.rows()) # Since session isn't passed in, reader doesn't know serialization type # until you start iterating. with pytest.raises(ImportError): next(rows)
def test_rows_w_nonresumable_internal_error(class_under_test, mock_client): bq_columns = [{"name": "int_col", "type": "int64"}] avro_schema = _bq_to_avro_schema(bq_columns) read_session = _generate_avro_read_session(avro_schema) bq_blocks = [[{"int_col": 1024}, {"int_col": 512}], [{"int_col": 256}]] avro_blocks = _pages_w_nonresumable_internal_error( _bq_to_avro_blocks(bq_blocks, avro_schema)) reader = class_under_test(avro_blocks, mock_client, "teststream", 0, {}) with pytest.raises(google.api_core.exceptions.InternalServerError, match="nonresumable error"): list(reader.rows(read_session)) mock_client.read_rows.assert_not_called()
def test_to_dataframe_by_page_arrow(class_under_test, mock_gapic_client): bq_columns = [ { "name": "int_col", "type": "int64" }, { "name": "bool_col", "type": "bool" }, ] arrow_schema = _bq_to_arrow_schema(bq_columns) read_session = _generate_arrow_read_session(arrow_schema) bq_block_1 = [ { "int_col": 123, "bool_col": True }, { "int_col": 234, "bool_col": False }, ] bq_block_2 = [ { "int_col": 345, "bool_col": True }, { "int_col": 456, "bool_col": False }, ] bq_block_3 = [ { "int_col": 567, "bool_col": True }, { "int_col": 789, "bool_col": False }, ] bq_block_4 = [{"int_col": 890, "bool_col": True}] # Break blocks into two groups to test that iteration continues across # reconnection. bq_blocks_1 = [bq_block_1, bq_block_2] bq_blocks_2 = [bq_block_3, bq_block_4] batch_1 = _bq_to_arrow_batches(bq_blocks_1, arrow_schema) batch_2 = _bq_to_arrow_batches(bq_blocks_2, arrow_schema) mock_gapic_client.read_rows.return_value = batch_2 reader = class_under_test(_pages_w_unavailable(batch_1), mock_gapic_client, "", 0, {}) got = reader.rows(read_session) pages = iter(got.pages) page_1 = next(pages) pandas.testing.assert_frame_equal( page_1.to_dataframe(dtypes={ "int_col": "int64", "bool_col": "bool" }).reset_index(drop=True), pandas.DataFrame(bq_block_1, columns=["int_col", "bool_col"]).reset_index(drop=True), ) page_2 = next(pages) pandas.testing.assert_frame_equal( page_2.to_dataframe().reset_index(drop=True), pandas.DataFrame(bq_block_2, columns=["int_col", "bool_col"]).reset_index(drop=True), ) page_3 = next(pages) pandas.testing.assert_frame_equal( page_3.to_dataframe().reset_index(drop=True), pandas.DataFrame(bq_block_3, columns=["int_col", "bool_col"]).reset_index(drop=True), ) page_4 = next(pages) pandas.testing.assert_frame_equal( page_4.to_dataframe().reset_index(drop=True), pandas.DataFrame(bq_block_4, columns=["int_col", "bool_col"]).reset_index(drop=True), )
def test_rows_w_empty_stream(class_under_test, mock_gapic_client): reader = class_under_test([], mock_gapic_client, "", 0, {}) got = reader.rows() assert tuple(got) == ()