def test_create_read_session(self):
        # Setup Expected Response
        name = "name3373707"
        expected_response = {"name": name}
        expected_response = storage_pb2.ReadSession(**expected_response)

        # Mock the API response
        channel = ChannelStub(responses=[expected_response])
        patch = mock.patch("google.api_core.grpc_helpers.create_channel")
        with patch as create_channel:
            create_channel.return_value = channel
            client = big_query_storage_client.BigQueryStorageClient()

        # Setup Request
        table_reference = {}
        parent = "parent-995424086"

        response = client.create_read_session(table_reference, parent)
        assert expected_response == response

        assert len(channel.requests) == 1
        expected_request = storage_pb2.CreateReadSessionRequest(
            table_reference=table_reference, parent=parent
        )
        actual_request = channel.requests[0][1]
        assert expected_request == actual_request
Example #2
0
  def test_fake_server(self):
    """Fake server test."""
    channel = grpc.insecure_channel(BigqueryOpsTest.server.endpoint())
    stub = storage_pb2_grpc.BigQueryStorageStub(channel)

    create_read_session_request = storage_pb2.CreateReadSessionRequest()
    create_read_session_request.table_reference.project_id = self.GCP_PROJECT_ID
    create_read_session_request.table_reference.dataset_id = self.DATASET_ID
    create_read_session_request.table_reference.table_id = self.TABLE_ID
    create_read_session_request.requested_streams = 2

    read_session_response = stub.CreateReadSession(create_read_session_request)
    self.assertEqual(2, len(read_session_response.streams))

    read_rows_request = storage_pb2.ReadRowsRequest()
    read_rows_request.read_position.stream.name = read_session_response.streams[
        0].name
    read_rows_response = stub.ReadRows(read_rows_request)

    row = read_rows_response.next()
    self.assertEqual(
        self._serialize_to_avro(self.STREAM_1_ROWS, self.AVRO_SCHEMA),
        row.avro_rows.serialized_binary_rows)
    self.assertEqual(len(self.STREAM_1_ROWS), row.avro_rows.row_count)

    read_rows_request = storage_pb2.ReadRowsRequest()
    read_rows_request.read_position.stream.name = read_session_response.streams[
        1].name
    read_rows_response = stub.ReadRows(read_rows_request)
    row = read_rows_response.next()
    self.assertEqual(
        self._serialize_to_avro(self.STREAM_2_ROWS, self.AVRO_SCHEMA),
        row.avro_rows.serialized_binary_rows)
    self.assertEqual(len(self.STREAM_2_ROWS), row.avro_rows.row_count)
Example #3
0
    def create_read_session(
        self,
        table_reference,
        parent,
        table_modifiers=None,
        requested_streams=None,
        read_options=None,
        format_=None,
        sharding_strategy=None,
        retry=google.api_core.gapic_v1.method.DEFAULT,
        timeout=google.api_core.gapic_v1.method.DEFAULT,
        metadata=None,
    ):
        """
        Creates a new read session. A read session divides the contents of a
        BigQuery table into one or more streams, which can then be used to read
        data from the table. The read session also specifies properties of the
        data to be read, such as a list of columns or a push-down filter describing
        the rows to be returned.

        A particular row can be read by at most one stream. When the caller has
        reached the end of each stream in the session, then all the data in the
        table has been read.

        Read sessions automatically expire 24 hours after they are created and do
        not require manual clean-up by the caller.

        Example:
            >>> from google.cloud import bigquery_storage_v1beta1
            >>>
            >>> client = bigquery_storage_v1beta1.BigQueryStorageClient()
            >>>
            >>> # TODO: Initialize `table_reference`:
            >>> table_reference = {}
            >>>
            >>> # TODO: Initialize `parent`:
            >>> parent = ''
            >>>
            >>> response = client.create_read_session(table_reference, parent)

        Args:
            table_reference (Union[dict, ~google.cloud.bigquery_storage_v1beta1.types.TableReference]): Required. Reference to the table to read.

                If a dict is provided, it must be of the same form as the protobuf
                message :class:`~google.cloud.bigquery_storage_v1beta1.types.TableReference`
            parent (str): Required. String of the form ``projects/{project_id}`` indicating the
                project this ReadSession is associated with. This is the project that
                will be billed for usage.
            table_modifiers (Union[dict, ~google.cloud.bigquery_storage_v1beta1.types.TableModifiers]): Any modifiers to the Table (e.g. snapshot timestamp).

                If a dict is provided, it must be of the same form as the protobuf
                message :class:`~google.cloud.bigquery_storage_v1beta1.types.TableModifiers`
            requested_streams (int): Initial number of streams. If unset or 0, we will
                provide a value of streams so as to produce reasonable throughput. Must be
                non-negative. The number of streams may be lower than the requested number,
                depending on the amount parallelism that is reasonable for the table and
                the maximum amount of parallelism allowed by the system.

                Streams must be read starting from offset 0.
            read_options (Union[dict, ~google.cloud.bigquery_storage_v1beta1.types.TableReadOptions]): Read options for this session (e.g. column selection, filters).

                If a dict is provided, it must be of the same form as the protobuf
                message :class:`~google.cloud.bigquery_storage_v1beta1.types.TableReadOptions`
            format_ (~google.cloud.bigquery_storage_v1beta1.types.DataFormat): Data output format. Currently default to Avro.
            sharding_strategy (~google.cloud.bigquery_storage_v1beta1.types.ShardingStrategy): The strategy to use for distributing data among multiple streams. Currently
                defaults to liquid sharding.
            retry (Optional[google.api_core.retry.Retry]):  A retry object used
                to retry requests. If ``None`` is specified, requests will
                be retried using a default configuration.
            timeout (Optional[float]): The amount of time, in seconds, to wait
                for the request to complete. Note that if ``retry`` is
                specified, the timeout applies to each individual attempt.
            metadata (Optional[Sequence[Tuple[str, str]]]): Additional metadata
                that is provided to the method.

        Returns:
            A :class:`~google.cloud.bigquery_storage_v1beta1.types.ReadSession` instance.

        Raises:
            google.api_core.exceptions.GoogleAPICallError: If the request
                    failed for any reason.
            google.api_core.exceptions.RetryError: If the request failed due
                    to a retryable error and retry attempts failed.
            ValueError: If the parameters are invalid.
        """
        # Wrap the transport method to add retry and timeout logic.
        if "create_read_session" not in self._inner_api_calls:
            self._inner_api_calls[
                "create_read_session"
            ] = google.api_core.gapic_v1.method.wrap_method(
                self.transport.create_read_session,
                default_retry=self._method_configs["CreateReadSession"].retry,
                default_timeout=self._method_configs["CreateReadSession"].timeout,
                client_info=self._client_info,
            )

        request = storage_pb2.CreateReadSessionRequest(
            table_reference=table_reference,
            parent=parent,
            table_modifiers=table_modifiers,
            requested_streams=requested_streams,
            read_options=read_options,
            format=format_,
            sharding_strategy=sharding_strategy,
        )
        if metadata is None:
            metadata = []
        metadata = list(metadata)
        try:
            routing_header = [
                ("table_reference.project_id", table_reference.project_id),
                ("table_reference.dataset_id", table_reference.dataset_id),
            ]
        except AttributeError:
            pass
        else:
            routing_metadata = google.api_core.gapic_v1.routing_header.to_grpc_metadata(
                routing_header
            )
            metadata.append(routing_metadata)  # pragma: no cover

        return self._inner_api_calls["create_read_session"](
            request, retry=retry, timeout=timeout, metadata=metadata
        )