Beispiel #1
0
    async def _download_stream(
            self,
            bucket: str,
            object_name: str,
            *,
            params: Optional[Dict[str, str]] = None,
            headers: Optional[Dict[str, str]] = None,
            timeout: int = 10,
            session: Optional[Session] = None) -> StreamResponse:
        # https://cloud.google.com/storage/docs/request-endpoints#encoding
        encoded_object_name = quote(object_name, safe='')
        url = f'{API_ROOT}/{bucket}/o/{encoded_object_name}'
        headers = headers or {}
        headers.update(await self._headers())

        s = AioSession(session) if session else self.session

        if BUILD_GCLOUD_REST:
            # stream argument is only expected by requests.Session.
            # pylint: disable=unexpected-keyword-arg
            return StreamResponse(
                s.get(url,
                      headers=headers,
                      params=params or {},
                      timeout=timeout,
                      stream=True))
        return StreamResponse(await s.get(url,
                                          headers=headers,
                                          params=params or {},
                                          timeout=timeout))
Beispiel #2
0
    async def copy(self,
                   destination_project: str,
                   destination_dataset: str,
                   destination_table: str,
                   session: Optional[Session] = None,
                   timeout: int = 60) -> Dict[str, Any]:
        """
        Copy BQ table to another table in BQ

        https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/insert
        """
        if not (destination_project and destination_dataset
                and destination_table):
            return {}

        project = await self.project()
        url = f'{API_ROOT}/projects/{project}/jobs'
        body = self._make_copy_body(project, destination_project,
                                    destination_dataset, destination_table)
        payload = json.dumps(body).encode('utf-8')

        headers = await self.headers()
        headers.update({
            'Content-Length': str(len(payload)),
            'Content-Type': 'application/json'
        })
        s = AioSession(session) if session else self.session
        resp = await s.post(url,
                            data=payload,
                            headers=headers,
                            params=None,
                            timeout=timeout)
        return await resp.json()
Beispiel #3
0
    async def load(self,
                   source_uris: List[str],
                   session: Optional[Session] = None,
                   timeout: int = 60) -> Dict[str, Any]:
        """
        Loads entities from storage to big query.

        https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/insert
        """
        if not source_uris:
            return {}

        project = await self.project()
        url = f'{API_ROOT}/projects/{project}/jobs'
        body = self._make_load_body(source_uris, project)
        payload = json.dumps(body).encode('utf-8')
        headers = await self.headers()
        headers.update({
            'Content-Length': str(len(payload)),
            'Content-Type': 'application/json'
        })
        s = AioSession(session) if session else self.session
        resp = await s.post(url,
                            data=payload,
                            headers=headers,
                            params=None,
                            timeout=timeout)
        return await resp.json()
Beispiel #4
0
    async def _do_upload(self,
                         session_uri: str,
                         stream: IO[AnyStr],
                         headers: Dict[str, str],
                         *,
                         retries: int = 5,
                         session: Optional[Session] = None,
                         timeout: int = 30) -> Dict[str, Any]:
        s = AioSession(session) if session else self.session

        for tries in range(retries):
            try:
                resp = await s.put(session_uri,
                                   headers=headers,
                                   data=stream,
                                   timeout=timeout)
            except ResponseError:
                headers.update({'Content-Range': '*/*'})
                await sleep(2.**tries)  # type: ignore[func-returns-value]

                continue

            break

        data: Dict[str, Any] = await resp.json(content_type=None)
        return data
Beispiel #5
0
    async def runQuery(self, query: BaseQuery, transaction: str = None,
                       consistency: Consistency = Consistency.EVENTUAL,
                       session: Optional[Session] = None,
                       timeout: int = 10) -> QueryResultBatch:
        project = await self.project()
        url = f'{API_ROOT}/projects/{project}:runQuery'

        if transaction:
            options = {'transaction': transaction}
        else:
            options = {'readConsistency': consistency.value}
        payload = json.dumps({
            'partitionId': {
                'projectId': project,
                'namespaceId': self.namespace,
            },
            query.json_key:  query.to_repr(),
            'readOptions': options,
        }).encode('utf-8')

        headers = await self.headers()
        headers.update({
            'Content-Length': str(len(payload)),
            'Content-Type': 'application/json',
        })

        s = AioSession(session) if session else self.session
        resp = await s.post(url, data=payload, headers=headers, timeout=timeout)

        data: dict = await resp.json()
        return self.query_result_batch_kind.from_repr(data['batch'])
Beispiel #6
0
    async def lookup(self, keys: List[Key], transaction: str = None,
                     consistency: Consistency = Consistency.STRONG,
                     session: Optional[Session] = None,
                     timeout: int = 10) -> Dict[str, Union[EntityResult, Key]]:
        project = await self.project()
        url = f'{API_ROOT}/projects/{project}:lookup'

        if transaction:
            options = {'transaction': transaction}
        else:
            options = {'readConsistency': consistency.value}
        payload = json.dumps({
            'keys': [k.to_repr() for k in keys],
            'readOptions': options,
        }).encode('utf-8')

        headers = await self.headers()
        headers.update({
            'Content-Length': str(len(payload)),
            'Content-Type': 'application/json',
        })

        s = AioSession(session) if session else self.session
        resp = await s.post(url, data=payload, headers=headers, timeout=timeout)

        data: dict = await resp.json()

        return {
            'found': [self.entity_result_kind.from_repr(e)
                      for e in data.get('found', [])],
            'missing': [self.entity_result_kind.from_repr(e)
                        for e in data.get('missing', [])],
            'deferred': [self.key_kind.from_repr(k)
                         for k in data.get('deferred', [])],
        }
Beispiel #7
0
    async def patch(self,
                    table: Dict[str, Any],
                    session: Optional[Session] = None,
                    timeout: int = 60) -> Dict[str, Any]:
        """Patch an existing table specified by tableId from the dataset."""
        project = await self.project()
        url = (f'{API_ROOT}/projects/{project}/datasets/'
               f'{self.dataset_name}/tables/{self.table_name}')

        table['tableReference'] = {
            'projectId': project,
            'datasetId': self.dataset_name,
            'tableId': self.table_name
        }
        table_data = json.dumps(table).encode('utf-8')

        headers = await self.headers()

        s = AioSession(session) if session else self.session
        resp = await s.patch(url,
                             data=table_data,
                             headers=headers,
                             timeout=timeout)
        data: Dict[str, Any] = await resp.json()
        return data
Beispiel #8
0
    async def _upload_simple(self,
                             url: str,
                             object_name: str,
                             stream: io.IOBase,
                             params: dict,
                             headers: dict,
                             *,
                             session: Optional[Session] = None,
                             timeout: int = 30) -> dict:
        # https://cloud.google.com/storage/docs/json_api/v1/how-tos/simple-upload
        params['name'] = object_name
        params['uploadType'] = 'media'

        headers.update({
            'Accept': 'application/json',
        })

        s = AioSession(session) if session else self.session
        resp = await s.post(url,
                            data=stream,
                            headers=headers,
                            params=params,
                            timeout=timeout)
        data: dict = await resp.json()
        return data
Beispiel #9
0
    async def export(self,
                     output_bucket_prefix: str,
                     kinds: Optional[List[str]] = None,
                     namespaces: Optional[List[str]] = None,
                     labels: Optional[Dict[str, str]] = None,
                     session: Optional[Session] = None,
                     timeout: int = 10) -> DatastoreOperation:
        project = await self.project()
        url = f'{API_ROOT}/projects/{project}:export'

        payload = json.dumps({
            'entityFilter': {
                'kinds': kinds or [],
                'namespaceIds': namespaces or [],
            },
            'labels': labels or {},
            'outputUrlPrefix': f'gs://{output_bucket_prefix}',
        }).encode('utf-8')

        headers = await self.headers()
        headers.update({
            'Content-Length': str(len(payload)),
            'Content-Type': 'application/json',
        })

        s = AioSession(session) if session else self.session
        resp = await s.post(url,
                            data=payload,
                            headers=headers,
                            timeout=timeout)
        data: Dict[str, Any] = await resp.json()

        return self.datastore_operation_kind.from_repr(data)
Beispiel #10
0
    async def publish(self,
                      topic: str,
                      messages: List[PubsubMessage],
                      session: Optional[Session] = None,
                      timeout: int = 10) -> Dict[str, Any]:
        if not messages:
            return {}

        url = f'{API_ROOT}/{topic}:publish'

        body = {'messages': [m.to_repr() for m in messages]}
        payload = json.dumps(body).encode('utf-8')

        headers = await self._headers()
        headers.update({
            'Content-Length': str(len(payload)),
            'Content-Type': 'application/json',
        })

        s = AioSession(session) if session else self.session
        resp = await s.post(url,
                            data=payload,
                            headers=headers,
                            timeout=timeout)
        return await resp.json()
Beispiel #11
0
    async def allocateIds(self,
                          keys: List[Key],
                          session: Optional[Session] = None,
                          timeout: int = 10) -> List[Key]:
        project = await self.project()
        url = f'{API_ROOT}/projects/{project}:allocateIds'

        payload = json.dumps({
            'keys': [k.to_repr() for k in keys],
        }).encode('utf-8')

        headers = await self.headers()
        headers.update({
            'Content-Length': str(len(payload)),
            'Content-Type': 'application/json',
        })

        s = AioSession(session) if session else self.session
        resp = await s.post(url,
                            data=payload,
                            headers=headers,
                            timeout=timeout)
        data = await resp.json()

        return [self.key_kind.from_repr(k) for k in data['keys']]
Beispiel #12
0
    async def commit(self,
                     mutations: List[Dict[str, Any]],
                     transaction: Optional[str] = None,
                     mode: Mode = Mode.TRANSACTIONAL,
                     session: Optional[Session] = None,
                     timeout: int = 10) -> Dict[str, Any]:
        project = await self.project()
        url = f'{API_ROOT}/projects/{project}:commit'

        body = self._make_commit_body(mutations,
                                      transaction=transaction,
                                      mode=mode)
        payload = json.dumps(body).encode('utf-8')

        headers = await self.headers()
        headers.update({
            'Content-Length': str(len(payload)),
            'Content-Type': 'application/json',
        })

        s = AioSession(session) if session else self.session
        resp = await s.post(url,
                            data=payload,
                            headers=headers,
                            timeout=timeout)
        data: Dict[str, Any] = await resp.json()

        return {
            'mutationResults': [
                self.mutation_result_kind.from_repr(r)
                for r in data.get('mutationResults', [])
            ],
            'indexUpdates':
            data['indexUpdates'],
        }
Beispiel #13
0
    async def _do_upload(self,
                         session_uri: str,
                         stream: io.IOBase,
                         headers: dict,
                         *,
                         retries: int = 5,
                         session: Optional[Session] = None,
                         timeout: int = 30) -> dict:
        s = AioSession(session) if session else self.session

        for tries in range(retries):
            try:
                resp = await s.put(session_uri,
                                   headers=headers,
                                   data=stream,
                                   timeout=timeout)
            except ResponseError:
                headers.update({'Content-Range': '*/*'})
                await sleep(2.**tries)

                continue

            break

        data: dict = await resp.json(content_type=None)
        return data
Beispiel #14
0
    async def patch_metadata(self,
                             bucket: str,
                             object_name: str,
                             metadata: Dict[str, Any],
                             *,
                             params: Optional[Dict[str, str]] = None,
                             headers: Optional[Dict[str, str]] = None,
                             session: Optional[Session] = None,
                             timeout: int = DEFAULT_TIMEOUT) -> Dict[str, Any]:
        # https://cloud.google.com/storage/docs/json_api/v1/objects/patch
        encoded_object_name = quote(object_name, safe='')
        url = f'{API_ROOT}/{bucket}/o/{encoded_object_name}'
        params = params or {}
        headers = headers or {}
        headers.update(await self._headers())
        headers['Content-Type'] = 'application/json'

        s = AioSession(session) if session else self.session
        resp = await s.patch(url,
                             data=json.dumps(metadata).encode('utf-8'),
                             headers=headers,
                             params=params,
                             timeout=timeout)
        data: Dict[str, Any] = await resp.json(content_type=None)
        return data
Beispiel #15
0
    async def delete(self,
                     session: Optional[Session] = None,
                     timeout: int = 60) -> Dict[str, Any]:
        """Deletes the table specified by tableId from the dataset."""
        project = await self.project()
        url = (f'{API_ROOT}/projects/{project}/datasets/'
               f'{self.dataset_name}/tables/{self.table_name}')

        headers = await self.headers()

        s = AioSession(session) if session else self.session
        resp = await s.session.delete(url,
                                      headers=headers,
                                      params=None,
                                      timeout=timeout)
        try:
            data: Dict[str, Any] = await resp.json()
        except Exception:  # pylint: disable=broad-except
            # For some reason, `gcloud-rest` seems to have intermittent issues
            # parsing this response. In that case, fall back to returning the
            # raw response body.
            try:
                data = {'response': await resp.text()}
            except (AttributeError, TypeError):
                data = {'response': resp.text}

        return data
Beispiel #16
0
 def __init__(self, *,
              service_file: Optional[Union[str, IO[AnyStr]]] = None,
              token: Optional[Token] = None,
              session: Optional[Session] = None) -> None:
     self.session = AioSession(session, verify_ssl=VERIFY_SSL)
     self.token = token or Token(service_file=service_file, scopes=SCOPES,
                                 session=self.session.session)
Beispiel #17
0
    async def _initiate_upload(self,
                               url: str,
                               object_name: str,
                               params: dict,
                               headers: dict,
                               *,
                               metadata: dict = None,
                               session: Optional[Session] = None) -> str:
        params['uploadType'] = 'resumable'

        metadict = (metadata or {}).copy()
        metadict.update({'name': object_name})
        metadata = json.dumps(metadict)

        post_headers = headers.copy()
        post_headers.update({
            'Content-Length':
            str(len(metadata)),
            'Content-Type':
            'application/json; charset=UTF-8',
            'X-Upload-Content-Type':
            headers['Content-Type'],
            'X-Upload-Content-Length':
            headers['Content-Length']
        })

        s = AioSession(session) if session else self.session
        resp = await s.post(url,
                            headers=post_headers,
                            params=params,
                            data=metadata,
                            timeout=10)
        session_uri: str = resp.headers['Location']
        return session_uri
Beispiel #18
0
    async def _download(self,
                        bucket: str,
                        object_name: str,
                        *,
                        params: Optional[Dict[str, str]] = None,
                        headers: Optional[Dict[str, str]] = None,
                        timeout: int = 10,
                        session: Optional[Session] = None) -> bytes:
        # https://cloud.google.com/storage/docs/request-endpoints#encoding
        encoded_object_name = quote(object_name, safe='')
        url = f'{API_ROOT}/{bucket}/o/{encoded_object_name}'
        headers = headers or {}
        headers.update(await self._headers())

        s = AioSession(session) if session else self.session
        response = await s.get(url,
                               headers=headers,
                               params=params or {},
                               timeout=timeout)

        # N.B. the GCS API sometimes returns 'application/octet-stream' when a
        # string was uploaded. To avoid potential weirdness, always return a
        # bytes object.
        try:
            data: bytes = await response.read()
        except (AttributeError, TypeError):
            data = response.content

        return data
Beispiel #19
0
 async def pull(self,
                subscription: str,
                max_messages: int,
                *,
                session: Optional[Session] = None,
                timeout: Optional[int] = 30) -> List[SubscriberMessage]:
     """
     Pull messages from subscription
     """
     url = f'{API_ROOT}/v1/{subscription}:pull'
     headers = await self._headers()
     payload = {
         'maxMessages': max_messages,
     }
     encoded = json.dumps(payload).encode()
     s = AioSession(session) if session else self.session
     resp = await s.post(url,
                         data=encoded,
                         headers=headers,
                         timeout=timeout)
     resp = await resp.json()
     return [
         SubscriberMessage.from_repr(m)
         for m in resp.get('receivedMessages', [])
     ]
Beispiel #20
0
    async def delete(self,
                     bucket: str,
                     object_name: str,
                     *,
                     timeout: int = 10,
                     params: Optional[Dict[str, str]] = None,
                     headers: Optional[Dict[str, str]] = None,
                     session: Optional[Session] = None) -> str:
        # https://cloud.google.com/storage/docs/request-endpoints#encoding
        encoded_object_name = quote(object_name, safe='')
        url = f'{API_ROOT}/{bucket}/o/{encoded_object_name}'
        headers = headers or {}
        headers.update(await self._headers())

        s = AioSession(session) if session else self.session
        resp = await s.delete(url,
                              headers=headers,
                              params=params or {},
                              timeout=timeout)

        try:
            data: str = await resp.text()
        except (AttributeError, TypeError):
            data = str(resp.text)

        return data
Beispiel #21
0
    async def _download(self,
                        bucket: str,
                        object_name: str,
                        *,
                        params: dict = None,
                        timeout: int = 10,
                        session: Optional[Session] = None) -> bytes:
        token = await self.token.get()
        # https://cloud.google.com/storage/docs/json_api/#encoding
        encoded_object_name = quote(object_name, safe='')
        url = f'{API_ROOT}/{bucket}/o/{encoded_object_name}'
        headers = {
            'Authorization': f'Bearer {token}',
        }

        s = AioSession(session) if session else self.session
        response = await s.get(url,
                               headers=headers,
                               params=params or {},
                               timeout=timeout)
        # N.B. the GCS API sometimes returns 'application/octet-stream' when a
        # string was uploaded. To avoid potential weirdness, always return a
        # bytes object.
        try:
            data: bytes = await response.read()
        except (AttributeError, TypeError):
            data: bytes = response.content

        return data
Beispiel #22
0
    async def delete(self,
                     bucket: str,
                     object_name: str,
                     *,
                     params: dict = None,
                     timeout: int = 10,
                     session: Optional[Session] = None) -> str:
        token = await self.token.get()
        # https://cloud.google.com/storage/docs/json_api/#encoding
        encoded_object_name = quote(object_name, safe='')
        url = f'{API_ROOT}/{bucket}/o/{encoded_object_name}'
        headers = {
            'Authorization': f'Bearer {token}',
        }

        s = AioSession(session) if session else self.session
        resp = await s.delete(url,
                              headers=headers,
                              params=params or {},
                              timeout=timeout)

        try:
            data: str = await resp.text()
        except (AttributeError, TypeError):
            data: str = str(resp.text)

        return data
Beispiel #23
0
    async def _upload_multipart(self,
                                url: str,
                                object_name: str,
                                stream: IO[AnyStr],
                                params: Dict[str, str],
                                headers: Dict[str, str],
                                metadata: Dict[str, Any],
                                *,
                                session: Optional[Session] = None,
                                timeout: int = 30) -> Dict[str, Any]:
        # https://cloud.google.com/storage/docs/json_api/v1/how-tos/multipart-upload
        params['uploadType'] = 'multipart'

        metadata_headers = {'Content-Type': 'application/json; charset=UTF-8'}
        metadata = {
            self._format_metadata_key(k): v
            for k, v in metadata.items()
        }
        if 'metadata' in metadata:
            metadata['metadata'] = {
                str(k): str(v) if v is not None else None
                for k, v in metadata['metadata'].items()
            }

        metadata['name'] = object_name

        raw_body: AnyStr = stream.read()
        if isinstance(raw_body, str):
            bytes_body: bytes = raw_body.encode('utf-8')
        else:
            bytes_body = raw_body

        parts = [
            (metadata_headers, json.dumps(metadata).encode('utf-8')),
            ({
                'Content-Type': headers['Content-Type']
            }, bytes_body),
        ]
        boundary = choose_boundary()
        body, content_type = encode_multipart_formdata(parts, boundary)
        headers.update({
            'Content-Type': content_type,
            'Content-Length': str(len(body)),
            'Accept': 'application/json'
        })

        s = AioSession(session) if session else self.session
        if not BUILD_GCLOUD_REST:
            # Wrap data in BytesIO to ensure aiohttp does not emit warning
            # when payload size > 1MB
            body = io.BytesIO(body)  # type: ignore[assignment]

        resp = await s.post(url,
                            data=body,
                            headers=headers,
                            params=params,
                            timeout=timeout)
        data: Dict[str, Any] = await resp.json(content_type=None)
        return data
Beispiel #24
0
    async def copy(self, bucket: str, object_name: str,
                   destination_bucket: str, *, new_name: str = None,
                   headers: dict = None, params: dict = None,
                   timeout: int = 10,
                   session: Optional[Session] = None) -> bytes:

        """
        When files are too large, multiple calls to `rewriteTo` are made. We
        refer to the same copy job by using the `rewriteToken` from the
        previous return payload in subsequent `rewriteTo` calls.

        Using the `rewriteTo` GCS API is preferred in part because it is able
        to make multiple calls to fully copy an object whereas the `copyTo` GCS
        API only calls `rewriteTo` once under the hood, and thus may fail if
        files are large.

        In the rare case you need to resume a copy operation, include the
        `rewriteToken` in the `params` dictionary. Once you begin a multi-part
        copy operation, you then have 1 week to complete the copy job.

        https://cloud.google.com/storage/docs/json_api/v1/objects/rewrite
        """
        token = await self.token.get()

        if not new_name:
            new_name = object_name

        url = (f"{API_ROOT}/{bucket}/o/{quote(object_name, safe='')}/rewriteTo"
               f"/b/{destination_bucket}/o/{quote(new_name, safe='')}")

        # We may optionally supply metadata* to apply to the rewritten
        # object, which explains why `rewriteTo` is a POST endpoint; however,
        # we don't expose that here so we have to send an empty body. Therefore
        # the `Content-Length` and `Content-Type` indicate an empty body.
        #
        # * https://cloud.google.com/storage/docs/json_api/v1/objects#resource
        headers = headers or {}
        headers.update({
            'Authorization': f'Bearer {token}',
            'Content-Length': '0',
            'Content-Type': '',
        })

        params = params or {}

        s = AioSession(session) if session else self.session
        resp = await s.post(url, headers=headers, params=params,
                            timeout=timeout)

        data: dict = await resp.json()

        while not data.get('done') and data.get('rewriteToken'):
            params['rewriteToken'] = data['rewriteToken']
            resp = await s.post(url, headers=headers, params=params,
                                timeout=timeout)
            data = await resp.json()

        return data
Beispiel #25
0
 def __init__(self,
              project: Optional[str] = None,
              service_file: Optional[Union[str, io.IOBase]] = None,
              session: Optional[Session] = None,
              token: Optional[Token] = None) -> None:
     self._project = project
     self.session = AioSession(session)
     self.token = token or Token(service_file=service_file, scopes=SCOPES,
                                 session=self.session.session)
Beispiel #26
0
    async def delete(self,
                     tname: str,
                     session: Optional[Session] = None) -> Any:
        url = f'{self.base_api_root}/{tname}'

        headers = await self.headers()

        s = AioSession(session) if session else self.session
        resp = await s.delete(url, headers=headers)
        return await resp.json()
Beispiel #27
0
 def __init__(self, project: str, taskqueue: str,
              service_file: Optional[Union[str, io.IOBase]] = None,
              location: str = LOCATION,
              session: Optional[Session] = None,
              token: Optional[Token] = None) -> None:
     self.base_api_root = f'{API_ROOT}/v2beta3'
     self.api_root = (f'{self.base_api_root}/projects/{project}/'
                      f'locations/{location}/queues/{taskqueue}')
     self.session = AioSession(session)
     self.token = token or Token(service_file=service_file, scopes=SCOPES,
                                 session=self.session.session)
Beispiel #28
0
    async def get_bucket_metadata(self, bucket: str, *, params: dict = None,
                                  session: Optional[Session] = None,
                                  timeout: int = 10) -> dict:
        url = f'{API_ROOT}/{bucket}'
        headers = await self._headers()

        s = AioSession(session) if session else self.session
        resp = await s.get(url, headers=headers, params=params or {},
                           timeout=timeout)
        data: dict = await resp.json(content_type=None)
        return data
Beispiel #29
0
    async def _get_url(
            self, url: str, session: Optional[Session],
            timeout: int,
            params: Optional[Dict[str, Any]] = None) -> Dict[str, Any]:
        headers = await self.headers()

        s = AioSession(session) if session else self.session
        resp = await s.get(url, headers=headers, timeout=timeout,
                           params=params or {})
        data: Dict[str, Any] = await resp.json()
        return data
Beispiel #30
0
 async def delete_topic(self,
                        topic: str,
                        *,
                        session: Optional[Session] = None,
                        timeout: Optional[int] = 10) -> None:
     """
     Delete topic.
     """
     url = f'{API_ROOT}/{topic}'
     headers = await self._headers()
     s = AioSession(session) if session else self.session
     await s.delete(url, headers=headers, timeout=timeout)