def __init__(self, client, path, buffer_size): self._client = client self._path = path self._bucket, self._name = parse_gcs_path(path) self._buffer_size = buffer_size # Get object state. self._get_request = (storage.StorageObjectsGetRequest( bucket=self._bucket, object=self._name)) try: metadata = self._get_object_metadata(self._get_request) except HttpError as http_error: if http_error.status_code == 404: raise IOError(errno.ENOENT, 'Not found: %s' % self._path) else: logging.error('HTTP error while requesting file %s: %s', self._path, http_error) raise self._size = metadata.size # Ensure read is from file of the correct generation. self._get_request.generation = metadata.generation # Initialize read buffer state. self._download_stream = io.BytesIO() self._downloader = transfer.Download(self._download_stream, auto_transfer=False, chunksize=self._buffer_size) self._client.objects.Get(self._get_request, download=self._downloader)
def __init__(self, client, path, buffer_size=DEFAULT_READ_BUFFER_SIZE): self.client = client self.path = path self.bucket, self.name = parse_gcs_path(path) self.buffer_size = buffer_size # Get object state. get_request = (storage.StorageObjectsGetRequest(bucket=self.bucket, object=self.name)) try: metadata = self._get_object_metadata(get_request) except HttpError as http_error: if http_error.status_code == 404: raise IOError(errno.ENOENT, 'Not found: %s' % self.path) else: logging.error('HTTP error while requesting file %s: %s', self.path, http_error) raise self.size = metadata.size # Ensure read is from file of the correct generation. get_request.generation = metadata.generation # Initialize read buffer state. self.download_stream = StringIO.StringIO() self.downloader = transfer.Download(self.download_stream, auto_transfer=False) self.client.objects.Get(get_request, download=self.downloader) self.position = 0 self.buffer = '' self.buffer_start_position = 0 self.closed = False
def __init__(self, client, path, buffer_size, get_project_number): self._client = client self._path = path self._bucket, self._name = parse_gcs_path(path) self._buffer_size = buffer_size self._get_project_number = get_project_number project_number = self._get_project_number(self._bucket) # Create a request count metric resource = resource_identifiers.GoogleCloudStorageBucket(self._bucket) labels = { monitoring_infos.SERVICE_LABEL: 'Storage', monitoring_infos.METHOD_LABEL: 'Objects.get', monitoring_infos.RESOURCE_LABEL: resource, monitoring_infos.GCS_BUCKET_LABEL: self._bucket, monitoring_infos.GCS_PROJECT_ID_LABEL: project_number } service_call_metric = ServiceCallMetric( request_count_urn=monitoring_infos.API_REQUEST_COUNT_URN, base_labels=labels) # Get object state. self._get_request = ( storage.StorageObjectsGetRequest( bucket=self._bucket, object=self._name)) try: metadata = self._get_object_metadata(self._get_request) service_call_metric.call('ok') except HttpError as http_error: service_call_metric.call(http_error) if http_error.status_code == 404: raise IOError(errno.ENOENT, 'Not found: %s' % self._path) else: _LOGGER.error( 'HTTP error while requesting file %s: %s', self._path, http_error) raise self._size = metadata.size # Ensure read is from file of the correct generation. self._get_request.generation = metadata.generation # Initialize read buffer state. self._download_stream = io.BytesIO() self._downloader = transfer.Download( self._download_stream, auto_transfer=False, chunksize=self._buffer_size, num_retries=20) try: self._client.objects.Get(self._get_request, download=self._downloader) service_call_metric.call('ok') except HttpError as e: service_call_metric.call(e)
def _fetch_next_if_buffer_exhausted(self): if not self.buffer or (self.buffer_start_position + len(self.buffer) <= self.position): bytes_to_request = min(self._remaining(), self.buffer_size) self.buffer_start_position = self.position retry_count = 0 while retry_count <= 10: queue = Queue.Queue() t = threading.Thread(target=self._fetch_to_queue, args=(queue, self._get_segment, (self.position, bytes_to_request))) t.daemon = True t.start() try: result, exn, tb = queue.get(timeout=self.segment_timeout) except Queue.Empty: logging.warning(( 'Timed out fetching %d bytes from position %d of %s after %f ' 'seconds; retrying...'), bytes_to_request, self.position, self.path, self.segment_timeout) retry_count += 1 # Reinitialize download objects. self.download_stream = cStringIO.StringIO() self.downloader = transfer.Download( self.download_stream, auto_transfer=False, chunksize=self.buffer_size) self.client.objects.Get(self.get_request, download=self.downloader) continue if exn: logging.error(( 'Exception while fetching %d bytes from position %d of %s: ' '%s\n%s'), bytes_to_request, self.position, self.path, exn, tb) raise exn self.buffer = result return raise GcsIOError( 'Reached retry limit for _fetch_next_if_buffer_exhausted.')