def _delete_part(conf: Config, bucket: str, name: str) -> None: req = Request( url=build_url("/storage/v1/b/{bucket}/o/{object}", bucket=bucket, object=name), method="DELETE", success_codes=(204, 404), ) execute_api_request(conf, req)
def remote_copy(conf: Config, src: str, dst: str, return_md5: bool) -> Optional[str]: srcbucket, srcname = split_path(src) dstbucket, dstname = split_path(dst) params = {} while True: req = Request( url=build_url( "/storage/v1/b/{sourceBucket}/o/{sourceObject}/rewriteTo/b/{destinationBucket}/o/{destinationObject}", sourceBucket=srcbucket, sourceObject=srcname, destinationBucket=dstbucket, destinationObject=dstname, ), method="POST", params=params, success_codes=(200, 404), ) resp = execute_api_request(conf, req) if resp.status == 404: raise FileNotFoundError(f"Source file not found: '{src}'") result = json.loads(resp.data) if result["done"]: if return_md5: return get_md5(result["resource"]) else: return params["rewriteToken"] = result["rewriteToken"]
def create_api_request(req: Request, access_token: str) -> Request: if req.headers is None: headers = {} else: headers = dict(req.headers).copy() if req.params is None: params = {} else: params = dict(req.params).copy() headers["Authorization"] = f"Bearer {access_token}" data = req.data if data is not None and isinstance(data, dict): data = json.dumps(data).encode("utf8") assert "Content-Type" not in headers headers["Content-Type"] = "application/json" return Request( method=req.method, url=req.url, params=params, headers=headers, data=data, preload_content=req.preload_content, success_codes=tuple(req.success_codes), retry_codes=tuple(req.retry_codes), )
def remove(conf: Config, path: str) -> bool: bucket, blob = split_path(path) if blob == "": raise FileNotFoundError(f"The system cannot find the path specified: '{path}'") req = Request( url=build_url("/storage/v1/b/{bucket}/o/{object}", bucket=bucket, object=blob), method="DELETE", success_codes=(204, 404), ) resp = execute_api_request(conf, req) return resp.status == 204
def _upload_part(conf: Config, path: str, start: int, size: int, dst: str) -> str: bucket, blob = split_path(dst) req = Request( url=build_url("/upload/storage/v1/b/{bucket}/o", bucket=bucket), method="POST", params=dict(uploadType="media", name=blob), data=FileBody(path, start=start, end=start + size), success_codes=(200,), ) resp = execute_api_request(conf, req) metadata = json.loads(resp.data) return metadata["generation"]
def mkdirfile(conf: Config, path: str) -> None: if not path.endswith("/"): path += "/" bucket, blob = split_path(path) req = Request( url=build_url("/upload/storage/v1/b/{bucket}/o", bucket=bucket), method="POST", params=dict(uploadType="media", name=blob), success_codes=(200, 400), ) resp = execute_api_request(conf, req) if resp.status == 400: raise Error(f"Unable to create directory, bucket does not exist: '{path}'")
def maybe_stat(conf: Config, path: str) -> Optional[Stat]: bucket, blob = split_path(path) if blob == "": return None req = Request( url=build_url("/storage/v1/b/{bucket}/o/{object}", bucket=bucket, object=blob), method="GET", success_codes=(200, 404), ) resp = execute_api_request(conf, req) if resp.status != 200: return None return make_stat(json.loads(resp.data))
def maybe_update_md5(conf: Config, path: str, generation: str, hexdigest: str) -> bool: bucket, blob = split_path(path) req = Request( url=build_url("/storage/v1/b/{bucket}/o/{object}", bucket=bucket, object=blob), method="PATCH", params=dict(ifGenerationMatch=generation), # it looks like we can't set the underlying md5Hash, only the metadata fields data=dict(metadata={"md5": hexdigest}), success_codes=(200, 404, 412), ) resp = execute_api_request(conf, req) return resp.status == 200
def _create_page_iterator( conf: Config, url: str, method: str, params: Mapping[str, str] ) -> Iterator[Dict[str, Any]]: p = dict(params).copy() while True: req = Request(url=url, method=method, params=p, success_codes=(200, 404)) resp = execute_api_request(conf, req) if resp.status == 404: return result = json.loads(resp.data) yield result if "nextPageToken" not in result: break p["pageToken"] = result["nextPageToken"]
def _request_chunk( self, streaming: bool, start: int, end: Optional[int] = None ) -> urllib3.response.HTTPResponse: bucket, name = split_path(self._path) req = Request( url=build_url("/storage/v1/b/{bucket}/o/{name}", bucket=bucket, name=name), method="GET", params=dict(alt="media"), headers={"Range": common.calc_range(start=start, end=end)}, success_codes=(206, 416), # if we are streaming the data, make # sure we don't preload it preload_content=not streaming, ) return execute_api_request(self._conf, req)
def isdir(conf: Config, path: str) -> bool: if not path.endswith("/"): path += "/" bucket, blob = split_path(path) if blob == "": req = Request( url=build_url("/storage/v1/b/{bucket}", bucket=bucket), method="GET", success_codes=(200, 404), ) resp = execute_api_request(conf, req) return resp.status == 200 else: req = Request( url=build_url("/storage/v1/b/{bucket}/o", bucket=bucket), method="GET", params=dict(prefix=blob, delimiter="/", maxResults="1"), success_codes=(200, 404), ) resp = execute_api_request(conf, req) if resp.status == 404: return False result = json.loads(resp.data) return "items" in result or "prefixes" in result
def _refresh_access_token_request( client_id: str, client_secret: str, refresh_token: str ) -> Request: # https://developers.google.com/identity/protocols/OAuth2WebServer#offline data = { "grant_type": "refresh_token", "refresh_token": refresh_token, "client_id": client_id, "client_secret": client_secret, } return Request( url="https://www.googleapis.com/oauth2/v4/token", method="POST", headers={"Content-Type": "application/x-www-form-urlencoded"}, data=urllib.parse.urlencode(data).encode("utf8"), )
def __init__(self, conf: Config, path: str) -> None: bucket, name = split_path(path) req = Request( url=build_url( "/upload/storage/v1/b/{bucket}/o?uploadType=resumable", bucket=bucket ), method="POST", data=dict(name=name), success_codes=(200, 400, 404), ) resp = execute_api_request(conf, req) if resp.status in (400, 404): raise FileNotFoundError(f"No such file or bucket: '{path}'") self._upload_url = resp.headers["Location"] # https://cloud.google.com/storage/docs/json_api/v1/how-tos/resumable-upload assert conf.google_write_chunk_size % (256 * 1024) == 0 super().__init__(conf=conf, chunk_size=conf.google_write_chunk_size)
def set_mtime( conf: Config, path: str, mtime: float, version: Optional[str] = None ) -> bool: bucket, blob = split_path(path) params = None if version is not None: params = dict(ifGenerationMatch=version) req = Request( url=build_url("/storage/v1/b/{bucket}/o/{object}", bucket=bucket, object=blob), method="PATCH", params=params, data=dict(metadata={"blobfile-mtime": str(mtime)}), success_codes=(200, 404, 412), ) resp = execute_api_request(conf, req) if resp.status == 404: raise FileNotFoundError(f"No such file: '{path}'") return resp.status == 200
def _create_token_request( client_email: str, private_key: str, scopes: List[str] ) -> Request: # https://developers.google.com/identity/protocols/OAuth2ServiceAccount now = time.time() claim_set = { "iss": client_email, "scope": " ".join(scopes), "aud": "https://www.googleapis.com/oauth2/v4/token", "exp": now + 60 * 60, "iat": now, } data = { "grant_type": "urn:ietf:params:oauth:grant-type:jwt-bearer", "assertion": _create_jwt(private_key, claim_set), } return Request( url="https://www.googleapis.com/oauth2/v4/token", method="POST", headers={"Content-Type": "application/x-www-form-urlencoded"}, data=urllib.parse.urlencode(data).encode("utf8"), )
def _upload_chunk(self, chunk: memoryview, finalize: bool) -> None: start = self._offset end = self._offset + len(chunk) - 1 total_size = "*" if finalize: total_size = self._offset + len(chunk) headers = { "Content-Type": "application/octet-stream", "Content-Range": f"bytes {start}-{end}/{total_size}", } if len(chunk) == 0 and finalize: # this is not mentioned in the docs but appears to be allowed # https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Content-Range headers["Content-Range"] = f"bytes */{total_size}" req = Request( url=self._upload_url, data=chunk, headers=headers, method="PUT", success_codes=(200, 201) if finalize else (308,), ) try: execute_api_request(self._conf, req) except RequestFailure as e: # https://cloud.google.com/storage/docs/resumable-uploads#practices if e.response_status in (404, 410): raise RestartableStreamingWriteFailure( message=e.message, request_string=e.request_string, response_status=e.response_status, error=e.error, error_description=e.error_description, ) else: raise
def build_req() -> Request: return Request( method="GET", url="http://metadata.google.internal/computeMetadata/v1/instance/service-accounts/default/token", headers={"Metadata-Flavor": "Google"}, )
def parallel_upload( conf: Config, executor: concurrent.futures.Executor, src: str, dst: str, return_md5: bool, ) -> Optional[str]: with open(src, "rb") as f: md5_digest = common.block_md5(f) s = os.stat(src) dstbucket, dstname = split_path(dst) source_objects = [] object_names = [] max_workers = getattr(executor, "_max_workers", os.cpu_count() or 1) part_size = max( math.ceil(s.st_size / max_workers), common.PARALLEL_COPY_MINIMUM_PART_SIZE ) i = 0 start = 0 futures = [] while start < s.st_size: suffix = f".part.{i}" future = executor.submit( _upload_part, conf, src, start, min(part_size, s.st_size - start), dst + suffix, ) futures.append(future) object_names.append(dstname + suffix) i += 1 start += part_size for name, future in zip(object_names, futures): generation = future.result() source_objects.append( { "name": name, "generation": generation, "objectPreconditions": {"ifGenerationMatch": generation}, } ) req = Request( url=build_url( "/storage/v1/b/{destinationBucket}/o/{destinationObject}/compose", destinationBucket=dstbucket, destinationObject=dstname, ), method="POST", data={"sourceObjects": source_objects}, success_codes=(200,), ) resp = execute_api_request(conf, req) metadata = json.loads(resp.data) hexdigest = binascii.hexlify(md5_digest).decode("utf8") maybe_update_md5(conf, dst, metadata["generation"], hexdigest) # delete parts in parallel delete_futures = [] for name in object_names: future = executor.submit(_delete_part, conf, dstbucket, name) delete_futures.append(future) for future in delete_futures: future.result() return hexdigest if return_md5 else None