예제 #1
0
    def __init__(self, client, path, buffer_size):
        self._client = client
        self._path = path
        self._bucket, self._name = parse_gcs_path(path)
        self._buffer_size = buffer_size

        # Get object state.
        self._get_request = (storage.StorageObjectsGetRequest(
            bucket=self._bucket, object=self._name))
        try:
            metadata = self._get_object_metadata(self._get_request)
        except HttpError as http_error:
            if http_error.status_code == 404:
                raise IOError(errno.ENOENT, 'Not found: %s' % self._path)
            else:
                logging.error('HTTP error while requesting file %s: %s',
                              self._path, http_error)
                raise
        self._size = metadata.size

        # Ensure read is from file of the correct generation.
        self._get_request.generation = metadata.generation

        # Initialize read buffer state.
        self._download_stream = io.BytesIO()
        self._downloader = transfer.Download(self._download_stream,
                                             auto_transfer=False,
                                             chunksize=self._buffer_size)
        self._client.objects.Get(self._get_request, download=self._downloader)
예제 #2
0
    def checksum(self, path):
        """Looks up the checksum of a GCS object.

    Args:
      path: GCS file path pattern in the form gs://<bucket>/<name>.
    """
        bucket, object_path = parse_gcs_path(path)
        request = storage.StorageObjectsGetRequest(bucket=bucket,
                                                   object=object_path)
        return self.client.objects.Get(request).crc32c
예제 #3
0
파일: gcsio.py 프로젝트: usingh83/beam
  def __init__(self, client, path, buffer_size, get_project_number):
    self._client = client
    self._path = path
    self._bucket, self._name = parse_gcs_path(path)
    self._buffer_size = buffer_size
    self._get_project_number = get_project_number

    project_number = self._get_project_number(self._bucket)

    # Create a request count metric
    resource = resource_identifiers.GoogleCloudStorageBucket(self._bucket)
    labels = {
        monitoring_infos.SERVICE_LABEL: 'Storage',
        monitoring_infos.METHOD_LABEL: 'Objects.get',
        monitoring_infos.RESOURCE_LABEL: resource,
        monitoring_infos.GCS_BUCKET_LABEL: self._bucket,
        monitoring_infos.GCS_PROJECT_ID_LABEL: project_number
    }
    service_call_metric = ServiceCallMetric(
        request_count_urn=monitoring_infos.API_REQUEST_COUNT_URN,
        base_labels=labels)

    # Get object state.
    self._get_request = (
        storage.StorageObjectsGetRequest(
            bucket=self._bucket, object=self._name))
    try:
      metadata = self._get_object_metadata(self._get_request)
      service_call_metric.call('ok')
    except HttpError as http_error:
      service_call_metric.call(http_error)
      if http_error.status_code == 404:
        raise IOError(errno.ENOENT, 'Not found: %s' % self._path)
      else:
        _LOGGER.error(
            'HTTP error while requesting file %s: %s', self._path, http_error)
        raise
    self._size = metadata.size

    # Ensure read is from file of the correct generation.
    self._get_request.generation = metadata.generation

    # Initialize read buffer state.
    self._download_stream = io.BytesIO()
    self._downloader = transfer.Download(
        self._download_stream,
        auto_transfer=False,
        chunksize=self._buffer_size,
        num_retries=20)

    try:
      self._client.objects.Get(self._get_request, download=self._downloader)
      service_call_metric.call('ok')
    except HttpError as e:
      service_call_metric.call(e)
예제 #4
0
    def size(self, path):
        """Returns the size of a single GCS object.

    This method does not perform glob expansion. Hence the given path must be
    for a single GCS object.

    Returns: size of the GCS object in bytes.
    """
        bucket, object_path = parse_gcs_path(path)
        request = storage.StorageObjectsGetRequest(bucket=bucket,
                                                   object=object_path)
        return self.client.objects.Get(request).size
예제 #5
0
    def _gcs_object(self, path):
        """Returns a gcs object for the given path

    This method does not perform glob expansion. Hence the given path must be
    for a single GCS object.

    Returns: GCS object.
    """
        bucket, object_path = parse_gcs_path(path)
        request = storage.StorageObjectsGetRequest(bucket=bucket,
                                                   object=object_path)
        return self.client.objects.Get(request)
예제 #6
0
    def kms_key(self, path):
        """Returns the KMS key of a single GCS object.

    This method does not perform glob expansion. Hence the given path must be
    for a single GCS object.

    Returns: KMS key name of the GCS object as a string, or None if it doesn't
      have one.
    """
        bucket, object_path = parse_gcs_path(path)
        request = storage.StorageObjectsGetRequest(bucket=bucket,
                                                   object=object_path)
        return self.client.objects.Get(request).kmsKeyName
예제 #7
0
    def last_updated(self, path):
        """Returns the last updated epoch time of a single GCS object.

    This method does not perform glob expansion. Hence the given path must be
    for a single GCS object.

    Returns: last updated time of the GCS object in second.
    """
        bucket, object_path = parse_gcs_path(path)
        request = storage.StorageObjectsGetRequest(bucket=bucket,
                                                   object=object_path)
        datetime = self.client.objects.Get(request).updated
        return (time.mktime(datetime.timetuple()) - time.timezone +
                datetime.microsecond / 1000000.0)
예제 #8
0
    def exists(self, path):
        """Returns whether the given GCS object exists.

    Args:
      path: GCS file path pattern in the form gs://<bucket>/<name>.
    """
        bucket, object_path = parse_gcs_path(path)
        try:
            request = storage.StorageObjectsGetRequest(bucket=bucket,
                                                       object=object_path)
            self.client.objects.Get(request)  # metadata
            return True
        except HttpError as http_error:
            if http_error.status_code == 404:
                # HTTP 404 indicates that the file did not exist
                return False
            else:
                # We re-raise all other exceptions
                raise
예제 #9
0
    def __init__(self,
                 client,
                 path,
                 mode='r',
                 buffer_size=DEFAULT_READ_BUFFER_SIZE,
                 segment_timeout=DEFAULT_READ_SEGMENT_TIMEOUT_SECONDS):
        self.client = client
        self.path = path
        self.bucket, self.name = parse_gcs_path(path)
        self.mode = mode
        self.buffer_size = buffer_size
        self.segment_timeout = segment_timeout

        # Get object state.
        self.get_request = (storage.StorageObjectsGetRequest(
            bucket=self.bucket, object=self.name))
        try:
            metadata = self._get_object_metadata(self.get_request)
        except HttpError as http_error:
            if http_error.status_code == 404:
                raise IOError(errno.ENOENT, 'Not found: %s' % self.path)
            else:
                logging.error('HTTP error while requesting file %s: %s',
                              self.path, http_error)
                raise
        self.size = metadata.size

        # Ensure read is from file of the correct generation.
        self.get_request.generation = metadata.generation

        # Initialize read buffer state.
        self.download_stream = cStringIO.StringIO()
        self.downloader = transfer.Download(self.download_stream,
                                            auto_transfer=False,
                                            chunksize=self.buffer_size)
        self.client.objects.Get(self.get_request, download=self.downloader)
        self.position = 0
        self.buffer = ''
        self.buffer_start_position = 0
        self.closed = False