def test_rows_no_schema_set_raises_type_error(mut, class_under_test,
                                              mock_gapic_client, monkeypatch):
    reader = class_under_test([], mock_gapic_client, "", 0, {})
    read_session = types.ReadSession()

    with pytest.raises(TypeError):
        reader.rows(read_session)
def test_pyarrow_rows_raises_import_error(mut, class_under_test,
                                          mock_gapic_client, monkeypatch):
    monkeypatch.setattr(mut, "pyarrow", None)
    reader = class_under_test([], mock_gapic_client, "", 0, {})

    bq_columns = [{"name": "int_col", "type": "int64"}]
    arrow_schema = _bq_to_arrow_schema(bq_columns)
    read_session = _generate_arrow_read_session(arrow_schema)

    with pytest.raises(ImportError):
        reader.rows(read_session)
Example #3
0
def test_to_dataframe_by_page(class_under_test, mock_gapic_client):
    bq_columns = [
        {"name": "int_col", "type": "int64"},
        {"name": "bool_col", "type": "bool"},
    ]
    avro_schema = _bq_to_avro_schema(bq_columns)
    block_1 = [{"int_col": 123, "bool_col": True}, {"int_col": 234, "bool_col": False}]
    block_2 = [{"int_col": 345, "bool_col": True}, {"int_col": 456, "bool_col": False}]
    block_3 = [{"int_col": 567, "bool_col": True}, {"int_col": 789, "bool_col": False}]
    block_4 = [{"int_col": 890, "bool_col": True}]
    # Break blocks into two groups to test that iteration continues across
    # reconnection.
    bq_blocks_1 = [block_1, block_2]
    bq_blocks_2 = [block_3, block_4]
    avro_blocks_1 = _bq_to_avro_blocks(bq_blocks_1, avro_schema)
    avro_blocks_2 = _bq_to_avro_blocks(bq_blocks_2, avro_schema)

    mock_gapic_client.read_rows.return_value = avro_blocks_2

    reader = class_under_test(
        _pages_w_unavailable(avro_blocks_1),
        mock_gapic_client,
        "teststream",
        0,
        {"metadata": {"test-key": "test-value"}},
    )
    got = reader.rows()
    pages = iter(got.pages)

    page_1 = next(pages)
    pandas.testing.assert_frame_equal(
        page_1.to_dataframe().reset_index(drop=True),
        pandas.DataFrame(block_1, columns=["int_col", "bool_col"]).reset_index(
            drop=True
        ),
    )

    page_2 = next(pages)
    pandas.testing.assert_frame_equal(
        page_2.to_dataframe().reset_index(drop=True),
        pandas.DataFrame(block_2, columns=["int_col", "bool_col"]).reset_index(
            drop=True
        ),
    )

    page_3 = next(pages)
    pandas.testing.assert_frame_equal(
        page_3.to_dataframe().reset_index(drop=True),
        pandas.DataFrame(block_3, columns=["int_col", "bool_col"]).reset_index(
            drop=True
        ),
    )

    page_4 = next(pages)
    pandas.testing.assert_frame_equal(
        page_4.to_dataframe().reset_index(drop=True),
        pandas.DataFrame(block_4, columns=["int_col", "bool_col"]).reset_index(
            drop=True
        ),
    )
def test_to_arrow_no_pyarrow_raises_import_error(mut, class_under_test,
                                                 mock_client, monkeypatch):
    monkeypatch.setattr(mut, "pyarrow", None)
    arrow_schema = _bq_to_arrow_schema(SCALAR_COLUMNS)
    read_session = _generate_arrow_read_session(arrow_schema)
    arrow_batches = _bq_to_arrow_batches(SCALAR_BLOCKS, arrow_schema)
    reader = class_under_test(arrow_batches, mock_client, "", 0, {})

    with pytest.raises(ImportError):
        reader.to_arrow(read_session)

    with pytest.raises(ImportError):
        reader.rows(read_session).to_arrow()

    with pytest.raises(ImportError):
        next(reader.rows(read_session).pages).to_arrow()
Example #5
0
def test_rows_w_timeout(class_under_test, mock_gapic_client):
    bq_columns = [{"name": "int_col", "type": "int64"}]
    avro_schema = _bq_to_avro_schema(bq_columns)
    bq_blocks_1 = [
        [{"int_col": 123}, {"int_col": 234}],
        [{"int_col": 345}, {"int_col": 456}],
    ]
    avro_blocks_1 = _avro_blocks_w_deadline(
        _bq_to_avro_blocks(bq_blocks_1, avro_schema)
    )
    bq_blocks_2 = [[{"int_col": 567}, {"int_col": 789}], [{"int_col": 890}]]
    avro_blocks_2 = _bq_to_avro_blocks(bq_blocks_2, avro_schema)

    mock_gapic_client.read_rows.return_value = avro_blocks_2

    reader = class_under_test(
        avro_blocks_1,
        mock_gapic_client,
        "teststream",
        0,
        {"metadata": {"test-key": "test-value"}},
    )

    with pytest.raises(google.api_core.exceptions.DeadlineExceeded):
        list(reader.rows())

    # Don't reconnect on DeadlineException. This allows user-specified timeouts
    # to be respected.
    mock_gapic_client.read_rows.assert_not_called()
def test_rows_w_empty_stream_arrow(class_under_test, mock_gapic_client):
    bq_columns = [{"name": "int_col", "type": "int64"}]
    arrow_schema = _bq_to_arrow_schema(bq_columns)
    read_session = _generate_arrow_read_session(arrow_schema)
    reader = class_under_test([], mock_gapic_client, "", 0, {})

    got = reader.rows(read_session)
    assert tuple(got) == ()
def test_to_dataframe_no_pandas_raises_import_error(mut, class_under_test,
                                                    mock_client, monkeypatch):
    monkeypatch.setattr(mut, "pandas", None)
    avro_schema = _bq_to_avro_schema(SCALAR_COLUMNS)
    read_session = _generate_avro_read_session(avro_schema)
    avro_blocks = _bq_to_avro_blocks(SCALAR_BLOCKS, avro_schema)

    reader = class_under_test(avro_blocks, mock_client, "", 0, {})

    with pytest.raises(ImportError):
        reader.to_dataframe(read_session)

    with pytest.raises(ImportError):
        reader.rows(read_session).to_dataframe()

    with pytest.raises(ImportError):
        next(reader.rows(read_session).pages).to_dataframe()
Example #8
0
def test_rows_w_scalars(class_under_test, mock_gapic_client):
    avro_schema = _bq_to_avro_schema(SCALAR_COLUMNS)
    avro_blocks = _bq_to_avro_blocks(SCALAR_BLOCKS, avro_schema)

    reader = class_under_test(avro_blocks, mock_gapic_client, "", 0, {})
    got = tuple(reader.rows())

    expected = tuple(itertools.chain.from_iterable(SCALAR_BLOCKS))
    assert got == expected
def test_rows_w_reconnect_by_page(class_under_test, mock_gapic_client):
    bq_columns = [{"name": "int_col", "type": "int64"}]
    avro_schema = _bq_to_avro_schema(bq_columns)
    read_session = _generate_avro_read_session(avro_schema)
    bq_blocks_1 = [
        [{
            "int_col": 123
        }, {
            "int_col": 234
        }],
        [{
            "int_col": 345
        }, {
            "int_col": 456
        }],
    ]
    avro_blocks_1 = _bq_to_avro_blocks(bq_blocks_1, avro_schema)
    bq_blocks_2 = [[{"int_col": 567}, {"int_col": 789}], [{"int_col": 890}]]
    avro_blocks_2 = _bq_to_avro_blocks(bq_blocks_2, avro_schema)

    mock_gapic_client.read_rows.return_value = avro_blocks_2

    reader = class_under_test(
        _pages_w_unavailable(avro_blocks_1),
        mock_gapic_client,
        "teststream",
        0,
        {"metadata": {
            "test-key": "test-value"
        }},
    )
    got = reader.rows(read_session)
    pages = iter(got.pages)

    page_1 = next(pages)
    assert page_1.num_items == 2
    assert page_1.remaining == 2
    assert tuple(page_1) == tuple(bq_blocks_1[0])
    assert page_1.num_items == 2
    assert page_1.remaining == 0

    page_2 = next(pages)
    assert next(page_2) == bq_blocks_1[1][0]
    assert page_2.num_items == 2
    assert page_2.remaining == 1
    assert next(page_2) == bq_blocks_1[1][1]

    page_3 = next(pages)
    assert tuple(page_3) == tuple(bq_blocks_2[0])
    assert page_3.num_items == 2
    assert page_3.remaining == 0

    page_4 = next(pages)
    assert tuple(page_4) == tuple(bq_blocks_2[1])
    assert page_4.num_items == 1
    assert page_4.remaining == 0
def test_rows_w_scalars_arrow(class_under_test, mock_client):
    arrow_schema = _bq_to_arrow_schema(SCALAR_COLUMNS)
    read_session = _generate_arrow_read_session(arrow_schema)
    arrow_batches = _bq_to_arrow_batches(SCALAR_BLOCKS, arrow_schema)

    reader = class_under_test(arrow_batches, mock_client, "", 0, {})
    got = tuple(reader.rows(read_session))

    expected = tuple(itertools.chain.from_iterable(SCALAR_BLOCKS))
    assert got == expected
def test_rows_w_reconnect(class_under_test, mock_gapic_client):
    bq_columns = [{"name": "int_col", "type": "int64"}]
    avro_schema = _bq_to_avro_schema(bq_columns)
    read_session = _generate_avro_read_session(avro_schema)
    bq_blocks_1 = [
        [{
            "int_col": 123
        }, {
            "int_col": 234
        }],
        [{
            "int_col": 345
        }, {
            "int_col": 456
        }],
    ]
    avro_blocks_1 = _pages_w_unavailable(
        _bq_to_avro_blocks(bq_blocks_1, avro_schema))
    bq_blocks_2 = [[{"int_col": 1024}, {"int_col": 512}], [{"int_col": 256}]]
    avro_blocks_2 = _bq_to_avro_blocks(bq_blocks_2, avro_schema)
    avro_blocks_2 = _pages_w_resumable_internal_error(
        _bq_to_avro_blocks(bq_blocks_2, avro_schema))
    bq_blocks_3 = [[{"int_col": 567}, {"int_col": 789}], [{"int_col": 890}]]
    avro_blocks_3 = _bq_to_avro_blocks(bq_blocks_3, avro_schema)

    mock_gapic_client.read_rows.side_effect = (avro_blocks_2, avro_blocks_3)

    reader = class_under_test(
        avro_blocks_1,
        mock_gapic_client,
        "teststream",
        0,
        {"metadata": {
            "test-key": "test-value"
        }},
    )
    got = reader.rows(read_session)

    expected = tuple(
        itertools.chain(
            itertools.chain.from_iterable(bq_blocks_1),
            itertools.chain.from_iterable(bq_blocks_2),
            itertools.chain.from_iterable(bq_blocks_3),
        ))

    assert tuple(got) == expected
    mock_gapic_client.read_rows.assert_any_call(
        read_stream="teststream",
        offset=4,
        metadata={"test-key": "test-value"})
    mock_gapic_client.read_rows.assert_called_with(
        read_stream="teststream",
        offset=7,
        metadata={"test-key": "test-value"})
Example #12
0
def test_rows_no_schema_set_raises_type_error(mut, class_under_test,
                                              mock_gapic_client, monkeypatch):
    avro_schema = _bq_to_avro_schema(SCALAR_COLUMNS)
    avro_blocks = _bq_to_avro_blocks(SCALAR_BLOCKS, avro_schema)
    avro_blocks[0].avro_schema = None
    reader = class_under_test(avro_blocks, mock_gapic_client, "", 0, {})
    rows = iter(reader.rows())

    # Since session isn't passed in, reader doesn't know serialization type
    # until you start iterating.
    with pytest.raises(TypeError):
        next(rows)
Example #13
0
def test_avro_rows_raises_import_error(mut, class_under_test,
                                       mock_gapic_client, monkeypatch):
    monkeypatch.setattr(mut, "fastavro", None)
    avro_schema = _bq_to_avro_schema(SCALAR_COLUMNS)
    avro_blocks = _bq_to_avro_blocks(SCALAR_BLOCKS, avro_schema)
    reader = class_under_test(avro_blocks, mock_gapic_client, "", 0, {})
    rows = iter(reader.rows())

    # Since session isn't passed in, reader doesn't know serialization type
    # until you start iterating.
    with pytest.raises(ImportError):
        next(rows)
def test_rows_w_nonresumable_internal_error(class_under_test, mock_client):
    bq_columns = [{"name": "int_col", "type": "int64"}]
    avro_schema = _bq_to_avro_schema(bq_columns)
    read_session = _generate_avro_read_session(avro_schema)
    bq_blocks = [[{"int_col": 1024}, {"int_col": 512}], [{"int_col": 256}]]
    avro_blocks = _pages_w_nonresumable_internal_error(
        _bq_to_avro_blocks(bq_blocks, avro_schema))

    reader = class_under_test(avro_blocks, mock_client, "teststream", 0, {})

    with pytest.raises(google.api_core.exceptions.InternalServerError,
                       match="nonresumable error"):
        list(reader.rows(read_session))

    mock_client.read_rows.assert_not_called()
def test_to_dataframe_by_page_arrow(class_under_test, mock_gapic_client):
    bq_columns = [
        {
            "name": "int_col",
            "type": "int64"
        },
        {
            "name": "bool_col",
            "type": "bool"
        },
    ]
    arrow_schema = _bq_to_arrow_schema(bq_columns)
    read_session = _generate_arrow_read_session(arrow_schema)

    bq_block_1 = [
        {
            "int_col": 123,
            "bool_col": True
        },
        {
            "int_col": 234,
            "bool_col": False
        },
    ]
    bq_block_2 = [
        {
            "int_col": 345,
            "bool_col": True
        },
        {
            "int_col": 456,
            "bool_col": False
        },
    ]
    bq_block_3 = [
        {
            "int_col": 567,
            "bool_col": True
        },
        {
            "int_col": 789,
            "bool_col": False
        },
    ]
    bq_block_4 = [{"int_col": 890, "bool_col": True}]
    # Break blocks into two groups to test that iteration continues across
    # reconnection.
    bq_blocks_1 = [bq_block_1, bq_block_2]
    bq_blocks_2 = [bq_block_3, bq_block_4]
    batch_1 = _bq_to_arrow_batches(bq_blocks_1, arrow_schema)
    batch_2 = _bq_to_arrow_batches(bq_blocks_2, arrow_schema)

    mock_gapic_client.read_rows.return_value = batch_2

    reader = class_under_test(_pages_w_unavailable(batch_1), mock_gapic_client,
                              "", 0, {})
    got = reader.rows(read_session)
    pages = iter(got.pages)

    page_1 = next(pages)
    pandas.testing.assert_frame_equal(
        page_1.to_dataframe(dtypes={
            "int_col": "int64",
            "bool_col": "bool"
        }).reset_index(drop=True),
        pandas.DataFrame(bq_block_1,
                         columns=["int_col",
                                  "bool_col"]).reset_index(drop=True),
    )

    page_2 = next(pages)
    pandas.testing.assert_frame_equal(
        page_2.to_dataframe().reset_index(drop=True),
        pandas.DataFrame(bq_block_2,
                         columns=["int_col",
                                  "bool_col"]).reset_index(drop=True),
    )

    page_3 = next(pages)
    pandas.testing.assert_frame_equal(
        page_3.to_dataframe().reset_index(drop=True),
        pandas.DataFrame(bq_block_3,
                         columns=["int_col",
                                  "bool_col"]).reset_index(drop=True),
    )

    page_4 = next(pages)
    pandas.testing.assert_frame_equal(
        page_4.to_dataframe().reset_index(drop=True),
        pandas.DataFrame(bq_block_4,
                         columns=["int_col",
                                  "bool_col"]).reset_index(drop=True),
    )
Example #16
0
def test_rows_w_empty_stream(class_under_test, mock_gapic_client):
    reader = class_under_test([], mock_gapic_client, "", 0, {})
    got = reader.rows()
    assert tuple(got) == ()