def test_create_read_session(self): # Setup Expected Response name = "name3373707" expected_response = {"name": name} expected_response = storage_pb2.ReadSession(**expected_response) # Mock the API response channel = ChannelStub(responses=[expected_response]) patch = mock.patch("google.api_core.grpc_helpers.create_channel") with patch as create_channel: create_channel.return_value = channel client = big_query_storage_client.BigQueryStorageClient() # Setup Request table_reference = {} parent = "parent-995424086" response = client.create_read_session(table_reference, parent) assert expected_response == response assert len(channel.requests) == 1 expected_request = storage_pb2.CreateReadSessionRequest( table_reference=table_reference, parent=parent ) actual_request = channel.requests[0][1] assert expected_request == actual_request
def test_fake_server(self): """Fake server test.""" channel = grpc.insecure_channel(BigqueryOpsTest.server.endpoint()) stub = storage_pb2_grpc.BigQueryStorageStub(channel) create_read_session_request = storage_pb2.CreateReadSessionRequest() create_read_session_request.table_reference.project_id = self.GCP_PROJECT_ID create_read_session_request.table_reference.dataset_id = self.DATASET_ID create_read_session_request.table_reference.table_id = self.TABLE_ID create_read_session_request.requested_streams = 2 read_session_response = stub.CreateReadSession(create_read_session_request) self.assertEqual(2, len(read_session_response.streams)) read_rows_request = storage_pb2.ReadRowsRequest() read_rows_request.read_position.stream.name = read_session_response.streams[ 0].name read_rows_response = stub.ReadRows(read_rows_request) row = read_rows_response.next() self.assertEqual( self._serialize_to_avro(self.STREAM_1_ROWS, self.AVRO_SCHEMA), row.avro_rows.serialized_binary_rows) self.assertEqual(len(self.STREAM_1_ROWS), row.avro_rows.row_count) read_rows_request = storage_pb2.ReadRowsRequest() read_rows_request.read_position.stream.name = read_session_response.streams[ 1].name read_rows_response = stub.ReadRows(read_rows_request) row = read_rows_response.next() self.assertEqual( self._serialize_to_avro(self.STREAM_2_ROWS, self.AVRO_SCHEMA), row.avro_rows.serialized_binary_rows) self.assertEqual(len(self.STREAM_2_ROWS), row.avro_rows.row_count)
def create_read_session( self, table_reference, parent, table_modifiers=None, requested_streams=None, read_options=None, format_=None, sharding_strategy=None, retry=google.api_core.gapic_v1.method.DEFAULT, timeout=google.api_core.gapic_v1.method.DEFAULT, metadata=None, ): """ Creates a new read session. A read session divides the contents of a BigQuery table into one or more streams, which can then be used to read data from the table. The read session also specifies properties of the data to be read, such as a list of columns or a push-down filter describing the rows to be returned. A particular row can be read by at most one stream. When the caller has reached the end of each stream in the session, then all the data in the table has been read. Read sessions automatically expire 24 hours after they are created and do not require manual clean-up by the caller. Example: >>> from google.cloud import bigquery_storage_v1beta1 >>> >>> client = bigquery_storage_v1beta1.BigQueryStorageClient() >>> >>> # TODO: Initialize `table_reference`: >>> table_reference = {} >>> >>> # TODO: Initialize `parent`: >>> parent = '' >>> >>> response = client.create_read_session(table_reference, parent) Args: table_reference (Union[dict, ~google.cloud.bigquery_storage_v1beta1.types.TableReference]): Required. Reference to the table to read. If a dict is provided, it must be of the same form as the protobuf message :class:`~google.cloud.bigquery_storage_v1beta1.types.TableReference` parent (str): Required. String of the form ``projects/{project_id}`` indicating the project this ReadSession is associated with. This is the project that will be billed for usage. table_modifiers (Union[dict, ~google.cloud.bigquery_storage_v1beta1.types.TableModifiers]): Any modifiers to the Table (e.g. snapshot timestamp). If a dict is provided, it must be of the same form as the protobuf message :class:`~google.cloud.bigquery_storage_v1beta1.types.TableModifiers` requested_streams (int): Initial number of streams. If unset or 0, we will provide a value of streams so as to produce reasonable throughput. Must be non-negative. The number of streams may be lower than the requested number, depending on the amount parallelism that is reasonable for the table and the maximum amount of parallelism allowed by the system. Streams must be read starting from offset 0. read_options (Union[dict, ~google.cloud.bigquery_storage_v1beta1.types.TableReadOptions]): Read options for this session (e.g. column selection, filters). If a dict is provided, it must be of the same form as the protobuf message :class:`~google.cloud.bigquery_storage_v1beta1.types.TableReadOptions` format_ (~google.cloud.bigquery_storage_v1beta1.types.DataFormat): Data output format. Currently default to Avro. sharding_strategy (~google.cloud.bigquery_storage_v1beta1.types.ShardingStrategy): The strategy to use for distributing data among multiple streams. Currently defaults to liquid sharding. retry (Optional[google.api_core.retry.Retry]): A retry object used to retry requests. If ``None`` is specified, requests will be retried using a default configuration. timeout (Optional[float]): The amount of time, in seconds, to wait for the request to complete. Note that if ``retry`` is specified, the timeout applies to each individual attempt. metadata (Optional[Sequence[Tuple[str, str]]]): Additional metadata that is provided to the method. Returns: A :class:`~google.cloud.bigquery_storage_v1beta1.types.ReadSession` instance. Raises: google.api_core.exceptions.GoogleAPICallError: If the request failed for any reason. google.api_core.exceptions.RetryError: If the request failed due to a retryable error and retry attempts failed. ValueError: If the parameters are invalid. """ # Wrap the transport method to add retry and timeout logic. if "create_read_session" not in self._inner_api_calls: self._inner_api_calls[ "create_read_session" ] = google.api_core.gapic_v1.method.wrap_method( self.transport.create_read_session, default_retry=self._method_configs["CreateReadSession"].retry, default_timeout=self._method_configs["CreateReadSession"].timeout, client_info=self._client_info, ) request = storage_pb2.CreateReadSessionRequest( table_reference=table_reference, parent=parent, table_modifiers=table_modifiers, requested_streams=requested_streams, read_options=read_options, format=format_, sharding_strategy=sharding_strategy, ) if metadata is None: metadata = [] metadata = list(metadata) try: routing_header = [ ("table_reference.project_id", table_reference.project_id), ("table_reference.dataset_id", table_reference.dataset_id), ] except AttributeError: pass else: routing_metadata = google.api_core.gapic_v1.routing_header.to_grpc_metadata( routing_header ) metadata.append(routing_metadata) # pragma: no cover return self._inner_api_calls["create_read_session"]( request, retry=retry, timeout=timeout, metadata=metadata )