Esempio n. 1
0
def resumable_upload(bucket, blob, filename):
    url_template = (
        u'https://www.googleapis.com/upload/storage/v1/b/{bucket}/o?'
        u'uploadType=resumable&name=myobj')
    upload_url = url_template.format(bucket=bucket)
    print('resumable_upload to url : ', upload_url)
    chunk_size = 1024 * 1024 * 1
    upload = ResumableUpload(upload_url, chunk_size)

    stream = open(filename, u'rb')
    total_bytes = os.path.getsize(filename)
    print("file %s, size %d" % (filename, total_bytes))

    metadata = {u'name': filename}

    # response = upload.initiate(transport, stream, metadata, u'text/plain', total_bytes = total_bytes)
    response = upload.initiate(transport, stream, metadata, u'text/plain')
    print("upload.resumable_url : ", upload.resumable_url)
    print("response.headers[u'Location']", response.headers[u'Location'])
    print("total %d, upload.total_bytes %d" %
          (total_bytes, upload.total_bytes))
    print("response.headers[u'X-GUploader-UploadID']",
          response.headers[u'X-GUploader-UploadID'])
    while not upload.finished or total_bytes > upload.total_bytes:
        response0 = upload.transmit_next_chunk(transport)
        print("total %d, upload.total_bytes %d" %
              (total_bytes, upload.total_bytes))
    json_response = response0.json()
    print("done, json response : ", json_response)
Esempio n. 2
0
def main(args):
    file_location = Path(args.file_location)
    file_name = file_location.name
    local_file = file_location
    client = storage.Client()
    blob_folder = "word2vec_service/v2"
    bucket_name = "hutoma-datasets"
    bucket = client.get_bucket(bucket_name)
    blob_path = "{}/{}".format(blob_folder, file_name)
    blob = bucket.blob(blob_path)
    bytes_in_1MB = 1024 * 1024

    print("Operation {}: blob is {}, local file is {}".format(
        args.operation, blob_path, local_file))
    transport = g_requests.AuthorizedSession(credentials=client._credentials)

    if args.operation == "download":
        if not blob.exists():
            raise DataError("Blob {} doesn't exist".format(blob_path))
        if local_file.exists():
            confirm_prompt("File {} exists, overwrite?".format(local_file))
        url = ("https://www.googleapis.com/download/storage/v1/b/"
               "{bucket}/o/{blob_name}?alt=media").format(
                   bucket=bucket_name,
                   blob_name=urllib.parse.quote_plus(blob_path))
        chunk_size = bytes_in_1MB * 5  # 5MB
        with local_file.open("wb") as file_stream:
            download = ChunkedDownload(url, chunk_size, file_stream)
            download.finished
            response = download.consume_next_chunk(transport)
            if not download.finished:
                process_operation(transport, download)

    elif args.operation == "upload":
        if not local_file.exists():
            raise DataError("File {} doesn't exist".format(blob_path))
        if blob.exists():
            confirm_prompt("Blob {} exists, overwrite?".format(local_file))

        url = ("https://www.googleapis.com/upload/storage/v1/b/{bucket}" +
               "/o?uploadType=resumable").format(bucket=bucket_name)
        chunk_size = bytes_in_1MB  # 1MB
        upload = ResumableUpload(url, chunk_size)
        metadata = {"name": blob_path}
        content_type = "application/octet-stream"

        with local_file.open("rb") as file_stream:
            response = upload.initiate(transport, file_stream, metadata,
                                       content_type)
            if response.status_code != 200:
                raise DataError("Failed to initiate upload")
            process_operation(transport, upload)
Esempio n. 3
0
 def new_file(self, *args, **kwargs) -> None:
     super(GCPStreamingFileUploadHandler, self).new_file(*args, **kwargs)
     self.file = ResumableUpload(
         self.upload_url,
         self.chunk_size,
     )
     self.file.initiate(
         self.transport,
         self.data,
         {"name": self.file_name},
         self.content_type,
         stream_final=False,
     )
     raise StopFutureHandlers("Continue resumable upload session")
Esempio n. 4
0
class GCPStreamingFileUploadHandler(FileUploadHandler):
    upload_url = "https://www.googleapis.com/upload/storage/v1/b/" \
                 "{bucket}/o?uploadType=resumable"
    chunk_size = 256 * 1024  # needs to be at least 256KB for google

    def __init__(self,
                 request: WSGIRequest = None,
                 bucket: str = None) -> None:
        super(GCPStreamingFileUploadHandler, self).__init__(request)
        self.upload_url: str = self.upload_url.format(bucket=bucket)
        self.transport = tr_requests.AuthorizedSession(
            credentials=Credentials.from_service_account_file(
                settings.GCP_STORAGE_KEY,
                scopes=[
                    "https://www.googleapis.com/auth/devstorage.read_write"
                ]))
        self.data = AutoTruncatingChunkedStreamable(self.chunk_size)
        self.file = None  # type: Optional[ResumableUpload]

    def new_file(self, *args, **kwargs) -> None:
        super(GCPStreamingFileUploadHandler, self).new_file(*args, **kwargs)
        self.file = ResumableUpload(
            self.upload_url,
            self.chunk_size,
        )
        self.file.initiate(
            self.transport,
            self.data,
            {"name": self.file_name},
            self.content_type,
            stream_final=False,
        )
        raise StopFutureHandlers("Continue resumable upload session")

    def receive_data_chunk(self, raw_data: bytes, start: int) -> None:
        # Since we have provided a dynamic size for our upload session,
        # Google will assume our request is finished when:
        # chunk_size > raw_data
        self.data.write(raw_data)
        if self.data.read(anon=True):
            self.file.transmit_next_chunk(self.transport)

        # needed to stop django multipart skipping the upload
        return None

    def file_complete(self, file_size: int) -> Any:
        if self.file.finished:
            return object  # TODO: implement streamed blob response
    def start(self):
        from google.resumable_media.requests import ResumableUpload

        self._stream = BytesIO()
        self._bytes_written = 0

        url = (f"https://www.googleapis.com/upload/storage/v1/b/"
               f"{self._bucket.name}/o?uploadType=resumable")
        self._request = ResumableUpload(upload_url=url,
                                        chunk_size=self._chunk_size)
        self._request.initiate(
            transport=self._transport,
            content_type="application/octet-stream",
            stream=self._stream,
            stream_final=False,
            metadata={"name": self._blob.name},
        )
Esempio n. 6
0
    def _initiate_resumable_upload(self, client, stream, metadata,
                                   num_retries):
        """Initiate a resumable upload.

        :type client: :class:`~google.cloud.bigquery.client.Client`
        :param client: The client to use.

        :type stream: IO[bytes]
        :param stream: A bytes IO object open for reading.

        :type metadata: dict
        :param metadata: The metadata associated with the upload.

        :type num_retries: int
        :param num_retries: Number of upload retries. (Deprecated: This
                            argument will be removed in a future release.)

        :rtype: tuple
        :returns:
            Pair of

            * The :class:`~google.resumable_media.requests.ResumableUpload`
              that was created
            * The ``transport`` used to initiate the upload.
        """
        chunk_size = _DEFAULT_CHUNKSIZE
        transport = self._get_transport(client)
        headers = _get_upload_headers(client._connection.USER_AGENT)
        upload_url = _RESUMABLE_URL_TEMPLATE.format(project=self.project)
        upload = ResumableUpload(upload_url, chunk_size, headers=headers)

        if num_retries is not None:
            upload._retry_strategy = resumable_media.RetryStrategy(
                max_retries=num_retries)

        upload.initiate(transport,
                        stream,
                        metadata,
                        _GENERIC_CONTENT_TYPE,
                        stream_final=False)

        return upload, transport
Esempio n. 7
0
    def download_audio_and_upload(self, recording_sid: str, recording_url: str) -> str:
        # chunk_resp: requests.Response = None
        response = requests.get(url=recording_url, stream=True)
        # filename = f"{recording_sid}.wav"
        content_type = u"audio/wav"
        # #metadata name key is the blob_name to upload to
        metadata = {u'name': recording_sid}

        self.connect(destination="storage")
        #chunk must be divisible by 256.0
        upload = ResumableUpload(self.upload_url, 1024 * 256)
        #the response.raw is the raw bytes from http response. it is only useable if you provide stream=True in the request
        gc_resp = upload.initiate(self.transport, response.raw, metadata, content_type, stream_final=False)

        if gc_resp.status_code == 200:
            while not upload.finished:
                _ = upload.transmit_next_chunk(self.transport)
        else:
            logger.info(f"Google Cloud Response: {gc_resp.status_code}")

        return 'gs://{bucket_name}/{blob_name}'.format(self.bucket_name, recording_sid)
Esempio n. 8
0
def gbucket(filename):
    storage.Client()
    target_scopes = ['https://www.googleapis.com/auth/devstorage.read_only']
    source_credentials = service_account.Credentials.from_service_account_file(
        cwd + "/keelaa.json",
        scopes=target_scopes,
        subject='*****@*****.**')
    #    credentials=cwd+"/keelaa.json"
    target_credentials = impersonated_credentials.Credentials(
        source_credentials=source_credentials,
        target_principal=
        '*****@*****.**',
        target_scopes=target_scopes,
        lifetime=500)
    credentials = service_account.Credentials.from_service_account_file(
        cwd + "/keelaa.json")

    client = storage.Client()
    client = client._credentials
    transport = AuthorizedSession(credentials=client)
    url_template = (
        u'https://www.googleapis.com/upload/storage/v1/b/{bucket}/o?'
        u'uploadType=resumable')

    upload_url = url_template.format(bucket='keelaa-images')
    chunk_size = 3 * 1024 * 1024
    upload = ResumableUpload(upload_url, chunk_size)
    print(upload.total_bytes is None, "klvnsdkfvnk")
    print(upload_url)
    stream = open(filename, u'rb')
    total_bytes = os.path.getsize(filename)
    metadata = {u'name': filename}
    response = upload.initiate(transport,
                               stream,
                               metadata,
                               u'text/plain',
                               total_bytes=total_bytes,
                               stream_final=True)
    print(upload.total_bytes == total_bytes)
    return response
Esempio n. 9
0
url_template = (u'https://www.googleapis.com/download/storage/v1/b/'
                u'{bucket}/o/{blob_name}?alt=media')

url_template_upload = (
    u'https://www.googleapis.com/upload/storage/v1/b/{bucket}/o?'
    u'uploadType=resumable')

upload_url = url_template_upload.format(bucket=bucket_upload)

media_url = url_template.format(bucket=bucket, blob_name=blob_name)

chunk_size = 1 * 1024 * 1024
stream = io.BytesIO()

download = ChunkedDownload(media_url, chunk_size, stream)
upload = ResumableUpload(upload_url, chunk_size)

data = []
while download.finished != True:
    response = download.consume_next_chunk(transport)
    data.append(response.content.decode("utf-8").replace(',', '|'))

new_data = ''.join(data)
stream_upload = io.BytesIO(bytes(new_data, 'UTF-8'))
metadata = {u'name': blob_name_upload}
reponse_upload = upload.initiate(transport, stream_upload, metadata,
                                 content_type)

while upload.finished != True:
    upload.transmit_next_chunk(transport)
class GoogleUploadClient(object):
    def __init__(
        self,
        bucket_name: str,
        blob_name: str,
    ):
        from google.cloud import storage
        from google.auth.transport import requests

        cred = get_google_credentials()
        self._client = storage.Client(project=cred.project_id,
                                      credentials=cred)
        self._bucket = self._client.bucket(bucket_name)
        self._blob = self._bucket.blob(blob_name)

        self._chunk_size = QuerybookSettings.STORE_MIN_UPLOAD_CHUNK_SIZE

        self._transport = requests.AuthorizedSession(
            credentials=self._client._credentials)
        self._request = None  # type: requests.ResumableUpload

    def start(self):
        from google.resumable_media.requests import ResumableUpload

        self._stream = BytesIO()
        self._bytes_written = 0

        url = (f"https://www.googleapis.com/upload/storage/v1/b/"
               f"{self._bucket.name}/o?uploadType=resumable")
        self._request = ResumableUpload(upload_url=url,
                                        chunk_size=self._chunk_size)
        self._request.initiate(
            transport=self._transport,
            content_type="application/octet-stream",
            stream=self._stream,
            stream_final=False,
            metadata={"name": self._blob.name},
        )

    def stop(self):
        self._request.transmit_next_chunk(self._transport)

    def write(self, data: bytes):
        from google.resumable_media import common

        # Get the current stream pos
        cur_pos = self._stream.tell()

        # Move cursor to end for writing
        self._stream.seek(0, SEEK_END)
        data_len = len(data)
        self._stream.write(data)

        # Move it back to original position
        self._stream.seek(cur_pos)
        self._bytes_written += data_len

        bytes_in_buffer = self._bytes_written - self._stream.tell()
        while bytes_in_buffer > self._chunk_size:
            try:
                self._request.transmit_next_chunk(self._transport)
            except common.InvalidResponse:
                self._request.recover(self._transport)
            bytes_in_buffer = self._bytes_written - self._stream.tell()
        return data_len