def _upload_to_gcs(self, local_file_path, gcs_location): gcs_bucket, gcs_object = self._get_gcs_bucket_and_name(gcs_location) request = storage.StorageObjectsInsertRequest(bucket=gcs_bucket, name=gcs_object) _LOGGER.info('Starting GCS upload to %s...', gcs_location) total_size = os.path.getsize(local_file_path) from apitools.base.py import exceptions try: with open(local_file_path, 'rb') as stream: upload = storage.Upload(stream, 'application/octet-stream', total_size) self._storage_client.objects.Insert(request, upload=upload) except exceptions.HttpError as e: reportable_errors = { 403: 'access denied', 404: 'bucket not found', } if e.status_code in reportable_errors: raise IOError( ('Could not upload to GCS path %s: %s. Please verify ' 'that credentials are valid and that you have write ' 'access to the specified path.') % (gcs_location, reportable_errors[e.status_code])) raise _LOGGER.info('Completed GCS upload to %s.', gcs_location)
def __init__(self, client, path, mime_type): self._client = client self._path = path self._bucket, self._name = parse_gcs_path(path) self._mime_type = mime_type # Set up communication with child thread. parent_conn, child_conn = multiprocessing.Pipe() self._child_conn = child_conn self._conn = parent_conn # Set up uploader. self._insert_request = (storage.StorageObjectsInsertRequest( bucket=self._bucket, name=self._name)) self._upload = transfer.Upload( PipeStream(self._child_conn), self._mime_type, chunksize=WRITE_CHUNK_SIZE) self._upload.strategy = transfer.RESUMABLE_UPLOAD # Start uploading thread. self._upload_thread = threading.Thread(target=self._start_upload) self._upload_thread.daemon = True self._upload_thread.last_error = None self._upload_thread.start()
def stage_file(self, gcs_or_local_path, file_name, stream, mime_type='application/octet-stream'): """Stages a file at a GCS or local path with stream-supplied contents.""" if not gcs_or_local_path.startswith('gs://'): local_path = FileSystems.join(gcs_or_local_path, file_name) logging.info('Staging file locally to %s', local_path) with open(local_path, 'wb') as f: f.write(stream.read()) return gcs_location = FileSystems.join(gcs_or_local_path, file_name) bucket, name = gcs_location[5:].split('/', 1) request = storage.StorageObjectsInsertRequest(bucket=bucket, name=name) logging.info('Starting GCS upload to %s...', gcs_location) upload = storage.Upload(stream, mime_type) try: response = self._storage_client.objects.Insert(request, upload=upload) except exceptions.HttpError as e: reportable_errors = { 403: 'access denied', 404: 'bucket not found', } if e.status_code in reportable_errors: raise IOError( ('Could not upload to GCS path %s: %s. Please verify ' 'that credentials are valid and that you have write ' 'access to the specified path.') % (gcs_or_local_path, reportable_errors[e.status_code])) raise logging.info('Completed GCS upload to %s', gcs_location) return response
def __init__(self, client, path, mode='w', mime_type='application/octet-stream'): self.client = client self.path = path self.mode = mode self.bucket, self.name = parse_gcs_path(path) self.closed = False self.position = 0 # A small buffer to avoid CPU-heavy per-write pipe calls. self.write_buffer = bytearray() self.write_buffer_size = 128 * 1024 # Set up communication with uploading thread. parent_conn, child_conn = multiprocessing.Pipe() self.child_conn = child_conn self.conn = parent_conn # Set up uploader. self.insert_request = (storage.StorageObjectsInsertRequest( bucket=self.bucket, name=self.name)) self.upload = transfer.Upload(GcsBufferedWriter.PipeStream(child_conn), mime_type, chunksize=WRITE_CHUNK_SIZE) self.upload.strategy = transfer.RESUMABLE_UPLOAD # Start uploading thread. self.upload_thread = threading.Thread(target=self._start_upload) self.upload_thread.daemon = True self.upload_thread.last_error = None self.upload_thread.start()