コード例 #1
0
ファイル: table.py プロジェクト: cksharma/gcloud-python
    def upload_from_file(self,  # pylint: disable=R0913,R0914
                         file_obj,
                         source_format,
                         rewind=False,
                         size=None,
                         num_retries=6,
                         allow_jagged_rows=None,
                         allow_quoted_newlines=None,
                         create_disposition=None,
                         encoding=None,
                         field_delimiter=None,
                         ignore_unknown_values=None,
                         max_bad_records=None,
                         quote_character=None,
                         skip_leading_rows=None,
                         write_disposition=None,
                         client=None):
        """Upload the contents of this table from a file-like object.

        The content type of the upload will either be
        - The value passed in to the function (if any)
        - ``text/csv``.

        :type file_obj: file
        :param file_obj: A file handle open for reading.

        :type source_format: str
        :param source_format: one of 'CSV' or 'NEWLINE_DELIMITED_JSON'.
                              job configuration option; see
                              :meth:`gcloud.bigquery.job.LoadJob`

        :type rewind: boolean
        :param rewind: If True, seek to the beginning of the file handle before
                       writing the file to Cloud Storage.

        :type size: int
        :param size: The number of bytes to read from the file handle.
                     If not provided, we'll try to guess the size using
                     :func:`os.fstat`. (If the file handle is not from the
                     filesystem this won't be possible.)

        :type num_retries: integer
        :param num_retries: Number of upload retries. Defaults to 6.

        :type allow_jagged_rows: boolean
        :param allow_jagged_rows: job configuration option;  see
                                  :meth:`gcloud.bigquery.job.LoadJob`

        :type allow_quoted_newlines: boolean
        :param allow_quoted_newlines: job configuration option; see
                                      :meth:`gcloud.bigquery.job.LoadJob`

        :type create_disposition: str
        :param create_disposition: job configuration option; see
                                   :meth:`gcloud.bigquery.job.LoadJob`

        :type encoding: str
        :param encoding: job configuration option; see
                         :meth:`gcloud.bigquery.job.LoadJob`

        :type field_delimiter: str
        :param field_delimiter: job configuration option; see
                                :meth:`gcloud.bigquery.job.LoadJob`

        :type ignore_unknown_values: boolean
        :param ignore_unknown_values: job configuration option; see
                                      :meth:`gcloud.bigquery.job.LoadJob`

        :type max_bad_records: integer
        :param max_bad_records: job configuration option; see
                                :meth:`gcloud.bigquery.job.LoadJob`

        :type quote_character: str
        :param quote_character: job configuration option; see
                                :meth:`gcloud.bigquery.job.LoadJob`

        :type skip_leading_rows: integer
        :param skip_leading_rows: job configuration option; see
                                  :meth:`gcloud.bigquery.job.LoadJob`

        :type write_disposition: str
        :param write_disposition: job configuration option; see
                                  :meth:`gcloud.bigquery.job.LoadJob`

        :type client: :class:`gcloud.storage.client.Client` or ``NoneType``
        :param client: Optional. The client to use.  If not passed, falls back
                       to the ``client`` stored on the current dataset.

        :rtype: :class:`gcloud.bigquery.jobs.LoadTableFromStorageJob`
        :returns: the job instance used to load the data (e.g., for
                  querying status)
        :raises: :class:`ValueError` if size is not passed in and can not be
                 determined
        """
        client = self._require_client(client)
        connection = client.connection
        content_type = 'application/octet-stream'

        # Rewind the file if desired.
        if rewind:
            file_obj.seek(0, os.SEEK_SET)

        # Get the basic stats about the file.
        total_bytes = size
        if total_bytes is None:
            if hasattr(file_obj, 'fileno'):
                total_bytes = os.fstat(file_obj.fileno()).st_size
            else:
                raise ValueError('total bytes could not be determined. Please '
                                 'pass an explicit size.')
        headers = {
            'Accept': 'application/json',
            'Accept-Encoding': 'gzip, deflate',
            'User-Agent': connection.USER_AGENT,
            'content-type': 'application/json',
        }

        metadata = {
            'configuration': {
                'load': {
                    'sourceFormat': source_format,
                    'schema': {
                        'fields': _build_schema_resource(self._schema),
                    },
                    'destinationTable': {
                        'projectId': self._dataset.project,
                        'datasetId': self._dataset.name,
                        'tableId': self.name,
                    }
                }
            }
        }

        _configure_job_metadata(metadata, allow_jagged_rows,
                                allow_quoted_newlines, create_disposition,
                                encoding, field_delimiter,
                                ignore_unknown_values, max_bad_records,
                                quote_character, skip_leading_rows,
                                write_disposition)

        upload = Upload(file_obj, content_type, total_bytes,
                        auto_transfer=False)

        url_builder = _UrlBuilder()
        upload_config = _UploadConfig()

        # Base URL may change once we know simple vs. resumable.
        base_url = connection.API_BASE_URL + '/upload'
        path = '/projects/%s/jobs' % (self._dataset.project,)
        upload_url = connection.build_api_url(api_base_url=base_url, path=path)

        # Use apitools 'Upload' facility.
        request = Request(upload_url, 'POST', headers,
                          body=json.dumps(metadata))

        upload.configure_request(upload_config, request, url_builder)
        query_params = url_builder.query_params
        base_url = connection.API_BASE_URL + '/upload'
        request.url = connection.build_api_url(api_base_url=base_url,
                                               path=path,
                                               query_params=query_params)
        upload.initialize_upload(request, connection.http)

        if upload.strategy == RESUMABLE_UPLOAD:
            http_response = upload.stream_file(use_chunks=True)
        else:
            http_response = make_api_request(connection.http, request,
                                             retries=num_retries)
        response_content = http_response.content
        if not isinstance(response_content,
                          six.string_types):  # pragma: NO COVER  Python3
            response_content = response_content.decode('utf-8')
        return client.job_from_resource(json.loads(response_content))
コード例 #2
0
    def upload_from_file(self,  # pylint: disable=R0913,R0914
                         file_obj,
                         source_format,
                         rewind=False,
                         size=None,
                         num_retries=6,
                         allow_jagged_rows=None,
                         allow_quoted_newlines=None,
                         create_disposition=None,
                         encoding=None,
                         field_delimiter=None,
                         ignore_unknown_values=None,
                         max_bad_records=None,
                         quote_character=None,
                         skip_leading_rows=None,
                         write_disposition=None,
                         client=None):
        """Upload the contents of this table from a file-like object.

        The content type of the upload will either be
        - The value passed in to the function (if any)
        - ``text/csv``.

        :type file_obj: file
        :param file_obj: A file handle open for reading.

        :type source_format: string
        :param source_format: one of 'CSV' or 'NEWLINE_DELIMITED_JSON'.
                              job configuration option; see
                              :meth:`gcloud.bigquery.job.LoadJob`

        :type rewind: boolean
        :param rewind: If True, seek to the beginning of the file handle before
                       writing the file to Cloud Storage.

        :type size: int
        :param size: The number of bytes to read from the file handle.
                     If not provided, we'll try to guess the size using
                     :func:`os.fstat`. (If the file handle is not from the
                     filesystem this won't be possible.)

        :type num_retries: integer
        :param num_retries: Number of upload retries. Defaults to 6.

        :type allow_jagged_rows: boolean
        :param allow_jagged_rows: job configuration option;  see
                                  :meth:`gcloud.bigquery.job.LoadJob`

        :type allow_quoted_newlines: boolean
        :param allow_quoted_newlines: job configuration option; see
                                      :meth:`gcloud.bigquery.job.LoadJob`

        :type create_disposition: string
        :param create_disposition: job configuration option; see
                                   :meth:`gcloud.bigquery.job.LoadJob`

        :type encoding: string
        :param encoding: job configuration option; see
                         :meth:`gcloud.bigquery.job.LoadJob`

        :type field_delimiter: string
        :param field_delimiter: job configuration option; see
                                :meth:`gcloud.bigquery.job.LoadJob`

        :type ignore_unknown_values: boolean
        :param ignore_unknown_values: job configuration option; see
                                      :meth:`gcloud.bigquery.job.LoadJob`

        :type max_bad_records: integer
        :param max_bad_records: job configuration option; see
                                :meth:`gcloud.bigquery.job.LoadJob`

        :type quote_character: string
        :param quote_character: job configuration option; see
                                :meth:`gcloud.bigquery.job.LoadJob`

        :type skip_leading_rows: integer
        :param skip_leading_rows: job configuration option; see
                                  :meth:`gcloud.bigquery.job.LoadJob`

        :type write_disposition: string
        :param write_disposition: job configuration option; see
                                  :meth:`gcloud.bigquery.job.LoadJob`

        :type client: :class:`gcloud.storage.client.Client` or ``NoneType``
        :param client: Optional. The client to use.  If not passed, falls back
                       to the ``client`` stored on the current dataset.

        :rtype: :class:`gcloud.bigquery.jobs.LoadTableFromStorageJob`
        :returns: the job instance used to load the data (e.g., for
                  querying status)
        :raises: :class:`ValueError` if size is not passed in and can not be
                 determined
        """
        client = self._require_client(client)
        connection = client.connection
        content_type = 'application/octet-stream'

        # Rewind the file if desired.
        if rewind:
            file_obj.seek(0, os.SEEK_SET)

        # Get the basic stats about the file.
        total_bytes = size
        if total_bytes is None:
            if hasattr(file_obj, 'fileno'):
                total_bytes = os.fstat(file_obj.fileno()).st_size
            else:
                raise ValueError('total bytes could not be determined. Please '
                                 'pass an explicit size.')
        headers = {
            'Accept': 'application/json',
            'Accept-Encoding': 'gzip, deflate',
            'User-Agent': connection.USER_AGENT,
            'content-type': 'application/json',
        }

        metadata = {
            'configuration': {
                'load': {
                    'sourceFormat': source_format,
                    'schema': {
                        'fields': _build_schema_resource(self._schema),
                    },
                    'destinationTable': {
                        'projectId': self._dataset.project,
                        'datasetId': self._dataset.name,
                        'tableId': self.name,
                    }
                }
            }
        }

        _configure_job_metadata(metadata, allow_jagged_rows,
                                allow_quoted_newlines, create_disposition,
                                encoding, field_delimiter,
                                ignore_unknown_values, max_bad_records,
                                quote_character, skip_leading_rows,
                                write_disposition)

        upload = Upload(file_obj, content_type, total_bytes,
                        auto_transfer=False)

        url_builder = _UrlBuilder()
        upload_config = _UploadConfig()

        # Base URL may change once we know simple vs. resumable.
        base_url = connection.API_BASE_URL + '/upload'
        path = '/projects/%s/jobs' % (self._dataset.project,)
        upload_url = connection.build_api_url(api_base_url=base_url, path=path)

        # Use apitools 'Upload' facility.
        request = Request(upload_url, 'POST', headers,
                          body=json.dumps(metadata))

        upload.configure_request(upload_config, request, url_builder)
        query_params = url_builder.query_params
        base_url = connection.API_BASE_URL + '/upload'
        request.url = connection.build_api_url(api_base_url=base_url,
                                               path=path,
                                               query_params=query_params)
        upload.initialize_upload(request, connection.http)

        if upload.strategy == RESUMABLE_UPLOAD:
            http_response = upload.stream_file(use_chunks=True)
        else:
            http_response = make_api_request(connection.http, request,
                                             retries=num_retries)
        response_content = http_response.content
        if not isinstance(response_content,
                          six.string_types):  # pragma: NO COVER  Python3
            response_content = response_content.decode('utf-8')
        return client.job_from_resource(json.loads(response_content))
コード例 #3
0
    def upload_from_file(self, file_obj, rewind=False, size=None,
                         encryption_key=None, content_type=None, num_retries=6,
                         client=None):
        """Upload the contents of this blob from a file-like object.

        The content type of the upload will either be
        - The value passed in to the function (if any)
        - The value stored on the current blob
        - The default value of 'application/octet-stream'

        .. note::
           The effect of uploading to an existing blob depends on the
           "versioning" and "lifecycle" policies defined on the blob's
           bucket.  In the absence of those policies, upload will
           overwrite any existing contents.

           See the `object versioning
           <https://cloud.google.com/storage/docs/object-versioning>`_ and
           `lifecycle <https://cloud.google.com/storage/docs/lifecycle>`_
           API documents for details.

        Uploading a file with a `customer-supplied`_ encryption key::

            >>> from gcloud import storage
            >>> from gcloud.storage import Blob

            >>> client = storage.Client(project='my-project')
            >>> bucket = client.get_bucket('my-bucket')
            >>> encryption_key = 'aa426195405adee2c8081bb9e7e74b19'
            >>> blob = Blob('secure-data', bucket)
            >>> with open('my-file', 'rb') as my_file:
            >>>     blob.upload_from_file(my_file,
            ...                           encryption_key=encryption_key)

        The ``encryption_key`` should be a str or bytes with a length of at
        least 32.

        .. _customer-supplied: https://cloud.google.com/storage/docs/\
                               encryption#customer-supplied

        :type file_obj: file
        :param file_obj: A file handle open for reading.

        :type rewind: boolean
        :param rewind: If True, seek to the beginning of the file handle before
                       writing the file to Cloud Storage.

        :type size: int
        :param size: The number of bytes to read from the file handle.
                     If not provided, we'll try to guess the size using
                     :func:`os.fstat`. (If the file handle is not from the
                     filesystem this won't be possible.)

        :type encryption_key: str or bytes
        :param encryption_key: Optional 32 byte encryption key for
                               customer-supplied encryption.

        :type content_type: string or ``NoneType``
        :param content_type: Optional type of content being uploaded.

        :type num_retries: integer
        :param num_retries: Number of upload retries. Defaults to 6.

        :type client: :class:`gcloud.storage.client.Client` or ``NoneType``
        :param client: Optional. The client to use.  If not passed, falls back
                       to the ``client`` stored on the blob's bucket.

        :raises: :class:`ValueError` if size is not passed in and can not be
                 determined; :class:`gcloud.exceptions.GCloudError` if the
                 upload response returns an error status.
        """
        client = self._require_client(client)
        # Use the private ``_connection`` rather than the public
        # ``.connection``, since the public connection may be a batch. A
        # batch wraps a client's connection, but does not store the `http`
        # object. The rest (API_BASE_URL and build_api_url) are also defined
        # on the Batch class, but we just use the wrapped connection since
        # it has all three (http, API_BASE_URL and build_api_url).
        connection = client._connection
        content_type = (content_type or self._properties.get('contentType') or
                        'application/octet-stream')

        # Rewind the file if desired.
        if rewind:
            file_obj.seek(0, os.SEEK_SET)

        # Get the basic stats about the file.
        total_bytes = size
        if total_bytes is None:
            if hasattr(file_obj, 'fileno'):
                total_bytes = os.fstat(file_obj.fileno()).st_size
            else:
                raise ValueError('total bytes could not be determined. Please '
                                 'pass an explicit size.')
        headers = {
            'Accept': 'application/json',
            'Accept-Encoding': 'gzip, deflate',
            'User-Agent': connection.USER_AGENT,
        }

        if encryption_key:
            _set_encryption_headers(encryption_key, headers)

        upload = Upload(file_obj, content_type, total_bytes,
                        auto_transfer=False)

        if self.chunk_size is not None:
            upload.chunksize = self.chunk_size

        url_builder = _UrlBuilder(bucket_name=self.bucket.name,
                                  object_name=self.name)
        upload_config = _UploadConfig()

        # Temporary URL, until we know simple vs. resumable.
        base_url = connection.API_BASE_URL + '/upload'
        upload_url = connection.build_api_url(api_base_url=base_url,
                                              path=self.bucket.path + '/o')

        # Use apitools 'Upload' facility.
        request = Request(upload_url, 'POST', headers)

        upload.configure_request(upload_config, request, url_builder)
        query_params = url_builder.query_params
        base_url = connection.API_BASE_URL + '/upload'
        request.url = connection.build_api_url(api_base_url=base_url,
                                               path=self.bucket.path + '/o',
                                               query_params=query_params)
        upload.initialize_upload(request, connection.http)

        if upload.strategy == RESUMABLE_UPLOAD:
            http_response = upload.stream_file(use_chunks=True)
        else:
            http_response = make_api_request(connection.http, request,
                                             retries=num_retries)

        self._check_response_error(request, http_response)
        response_content = http_response.content

        if not isinstance(response_content,
                          six.string_types):  # pragma: NO COVER  Python3
            response_content = response_content.decode('utf-8')
        self._set_properties(json.loads(response_content))
コード例 #4
0
    def upload_from_file(self,
                         file_obj,
                         rewind=False,
                         size=None,
                         content_type=None,
                         num_retries=6,
                         client=None):
        """Upload the contents of this blob from a file-like object.

        The content type of the upload will either be
        - The value passed in to the function (if any)
        - The value stored on the current blob
        - The default value of 'application/octet-stream'

        .. note::
           The effect of uploading to an existing blob depends on the
           "versioning" and "lifecycle" policies defined on the blob's
           bucket.  In the absence of those policies, upload will
           overwrite any existing contents.

           See the `object versioning
           <https://cloud.google.com/storage/docs/object-versioning>`_ and
           `lifecycle <https://cloud.google.com/storage/docs/lifecycle>`_
           API documents for details.

        :type file_obj: file
        :param file_obj: A file handle open for reading.

        :type rewind: boolean
        :param rewind: If True, seek to the beginning of the file handle before
                       writing the file to Cloud Storage.

        :type size: int
        :param size: The number of bytes to read from the file handle.
                     If not provided, we'll try to guess the size using
                     :func:`os.fstat`. (If the file handle is not from the
                     filesystem this won't be possible.)

        :type content_type: string or ``NoneType``
        :param content_type: Optional type of content being uploaded.

        :type num_retries: integer
        :param num_retries: Number of upload retries. Defaults to 6.

        :type client: :class:`gcloud.storage.client.Client` or ``NoneType``
        :param client: Optional. The client to use.  If not passed, falls back
                       to the ``client`` stored on the blob's bucket.

        :raises: :class:`ValueError` if size is not passed in and can not be
                 determined
        """
        client = self._require_client(client)
        # Use the private ``_connection`` rather than the public
        # ``.connection``, since the public connection may be a batch. A
        # batch wraps a client's connection, but does not store the `http`
        # object. The rest (API_BASE_URL and build_api_url) are also defined
        # on the Batch class, but we just use the wrapped connection since
        # it has all three (http, API_BASE_URL and build_api_url).
        connection = client._connection
        content_type = (content_type or self._properties.get('contentType')
                        or 'application/octet-stream')

        # Rewind the file if desired.
        if rewind:
            file_obj.seek(0, os.SEEK_SET)

        # Get the basic stats about the file.
        total_bytes = size
        if total_bytes is None:
            if hasattr(file_obj, 'fileno'):
                total_bytes = os.fstat(file_obj.fileno()).st_size
            else:
                raise ValueError('total bytes could not be determined. Please '
                                 'pass an explicit size.')
        headers = {
            'Accept': 'application/json',
            'Accept-Encoding': 'gzip, deflate',
            'User-Agent': connection.USER_AGENT,
        }

        upload = Upload(file_obj,
                        content_type,
                        total_bytes,
                        auto_transfer=False)

        if self.chunk_size is not None:
            upload.chunksize = self.chunk_size

        url_builder = _UrlBuilder(bucket_name=self.bucket.name,
                                  object_name=self.name)
        upload_config = _UploadConfig()

        # Temporary URL, until we know simple vs. resumable.
        base_url = connection.API_BASE_URL + '/upload'
        upload_url = connection.build_api_url(api_base_url=base_url,
                                              path=self.bucket.path + '/o')

        # Use apitools 'Upload' facility.
        request = Request(upload_url, 'POST', headers)

        upload.configure_request(upload_config, request, url_builder)
        query_params = url_builder.query_params
        base_url = connection.API_BASE_URL + '/upload'
        request.url = connection.build_api_url(api_base_url=base_url,
                                               path=self.bucket.path + '/o',
                                               query_params=query_params)
        upload.initialize_upload(request, connection.http)

        if upload.strategy == RESUMABLE_UPLOAD:
            http_response = upload.stream_file(use_chunks=True)
        else:
            http_response = make_api_request(connection.http,
                                             request,
                                             retries=num_retries)
        response_content = http_response.content
        if not isinstance(response_content,
                          six.string_types):  # pragma: NO COVER  Python3
            response_content = response_content.decode('utf-8')
        self._set_properties(json.loads(response_content))