Esempio n. 1
0
def _delete_part(conf: Config, bucket: str, name: str) -> None:
    req = Request(
        url=build_url("/storage/v1/b/{bucket}/o/{object}", bucket=bucket, object=name),
        method="DELETE",
        success_codes=(204, 404),
    )
    execute_api_request(conf, req)
Esempio n. 2
0
def remote_copy(conf: Config, src: str, dst: str, return_md5: bool) -> Optional[str]:
    srcbucket, srcname = split_path(src)
    dstbucket, dstname = split_path(dst)
    params = {}
    while True:
        req = Request(
            url=build_url(
                "/storage/v1/b/{sourceBucket}/o/{sourceObject}/rewriteTo/b/{destinationBucket}/o/{destinationObject}",
                sourceBucket=srcbucket,
                sourceObject=srcname,
                destinationBucket=dstbucket,
                destinationObject=dstname,
            ),
            method="POST",
            params=params,
            success_codes=(200, 404),
        )
        resp = execute_api_request(conf, req)
        if resp.status == 404:
            raise FileNotFoundError(f"Source file not found: '{src}'")
        result = json.loads(resp.data)
        if result["done"]:
            if return_md5:
                return get_md5(result["resource"])
            else:
                return
        params["rewriteToken"] = result["rewriteToken"]
Esempio n. 3
0
def create_api_request(req: Request, access_token: str) -> Request:
    if req.headers is None:
        headers = {}
    else:
        headers = dict(req.headers).copy()

    if req.params is None:
        params = {}
    else:
        params = dict(req.params).copy()

    headers["Authorization"] = f"Bearer {access_token}"
    data = req.data
    if data is not None and isinstance(data, dict):
        data = json.dumps(data).encode("utf8")
        assert "Content-Type" not in headers
        headers["Content-Type"] = "application/json"
    return Request(
        method=req.method,
        url=req.url,
        params=params,
        headers=headers,
        data=data,
        preload_content=req.preload_content,
        success_codes=tuple(req.success_codes),
        retry_codes=tuple(req.retry_codes),
    )
Esempio n. 4
0
def remove(conf: Config, path: str) -> bool:
    bucket, blob = split_path(path)
    if blob == "":
        raise FileNotFoundError(f"The system cannot find the path specified: '{path}'")
    req = Request(
        url=build_url("/storage/v1/b/{bucket}/o/{object}", bucket=bucket, object=blob),
        method="DELETE",
        success_codes=(204, 404),
    )
    resp = execute_api_request(conf, req)
    return resp.status == 204
Esempio n. 5
0
def _upload_part(conf: Config, path: str, start: int, size: int, dst: str) -> str:
    bucket, blob = split_path(dst)
    req = Request(
        url=build_url("/upload/storage/v1/b/{bucket}/o", bucket=bucket),
        method="POST",
        params=dict(uploadType="media", name=blob),
        data=FileBody(path, start=start, end=start + size),
        success_codes=(200,),
    )
    resp = execute_api_request(conf, req)
    metadata = json.loads(resp.data)
    return metadata["generation"]
Esempio n. 6
0
def mkdirfile(conf: Config, path: str) -> None:
    if not path.endswith("/"):
        path += "/"
    bucket, blob = split_path(path)
    req = Request(
        url=build_url("/upload/storage/v1/b/{bucket}/o", bucket=bucket),
        method="POST",
        params=dict(uploadType="media", name=blob),
        success_codes=(200, 400),
    )
    resp = execute_api_request(conf, req)
    if resp.status == 400:
        raise Error(f"Unable to create directory, bucket does not exist: '{path}'")
Esempio n. 7
0
def maybe_stat(conf: Config, path: str) -> Optional[Stat]:
    bucket, blob = split_path(path)
    if blob == "":
        return None
    req = Request(
        url=build_url("/storage/v1/b/{bucket}/o/{object}", bucket=bucket, object=blob),
        method="GET",
        success_codes=(200, 404),
    )
    resp = execute_api_request(conf, req)
    if resp.status != 200:
        return None
    return make_stat(json.loads(resp.data))
Esempio n. 8
0
def maybe_update_md5(conf: Config, path: str, generation: str, hexdigest: str) -> bool:
    bucket, blob = split_path(path)
    req = Request(
        url=build_url("/storage/v1/b/{bucket}/o/{object}", bucket=bucket, object=blob),
        method="PATCH",
        params=dict(ifGenerationMatch=generation),
        # it looks like we can't set the underlying md5Hash, only the metadata fields
        data=dict(metadata={"md5": hexdigest}),
        success_codes=(200, 404, 412),
    )

    resp = execute_api_request(conf, req)
    return resp.status == 200
Esempio n. 9
0
def _create_page_iterator(
    conf: Config, url: str, method: str, params: Mapping[str, str]
) -> Iterator[Dict[str, Any]]:
    p = dict(params).copy()

    while True:
        req = Request(url=url, method=method, params=p, success_codes=(200, 404))
        resp = execute_api_request(conf, req)
        if resp.status == 404:
            return
        result = json.loads(resp.data)
        yield result
        if "nextPageToken" not in result:
            break
        p["pageToken"] = result["nextPageToken"]
Esempio n. 10
0
 def _request_chunk(
     self, streaming: bool, start: int, end: Optional[int] = None
 ) -> urllib3.response.HTTPResponse:
     bucket, name = split_path(self._path)
     req = Request(
         url=build_url("/storage/v1/b/{bucket}/o/{name}", bucket=bucket, name=name),
         method="GET",
         params=dict(alt="media"),
         headers={"Range": common.calc_range(start=start, end=end)},
         success_codes=(206, 416),
         # if we are streaming the data, make
         # sure we don't preload it
         preload_content=not streaming,
     )
     return execute_api_request(self._conf, req)
Esempio n. 11
0
def isdir(conf: Config, path: str) -> bool:
    if not path.endswith("/"):
        path += "/"
    bucket, blob = split_path(path)
    if blob == "":
        req = Request(
            url=build_url("/storage/v1/b/{bucket}", bucket=bucket),
            method="GET",
            success_codes=(200, 404),
        )
        resp = execute_api_request(conf, req)
        return resp.status == 200
    else:
        req = Request(
            url=build_url("/storage/v1/b/{bucket}/o", bucket=bucket),
            method="GET",
            params=dict(prefix=blob, delimiter="/", maxResults="1"),
            success_codes=(200, 404),
        )
        resp = execute_api_request(conf, req)
        if resp.status == 404:
            return False
        result = json.loads(resp.data)
        return "items" in result or "prefixes" in result
Esempio n. 12
0
def _refresh_access_token_request(
    client_id: str, client_secret: str, refresh_token: str
) -> Request:
    # https://developers.google.com/identity/protocols/OAuth2WebServer#offline
    data = {
        "grant_type": "refresh_token",
        "refresh_token": refresh_token,
        "client_id": client_id,
        "client_secret": client_secret,
    }
    return Request(
        url="https://www.googleapis.com/oauth2/v4/token",
        method="POST",
        headers={"Content-Type": "application/x-www-form-urlencoded"},
        data=urllib.parse.urlencode(data).encode("utf8"),
    )
Esempio n. 13
0
 def __init__(self, conf: Config, path: str) -> None:
     bucket, name = split_path(path)
     req = Request(
         url=build_url(
             "/upload/storage/v1/b/{bucket}/o?uploadType=resumable", bucket=bucket
         ),
         method="POST",
         data=dict(name=name),
         success_codes=(200, 400, 404),
     )
     resp = execute_api_request(conf, req)
     if resp.status in (400, 404):
         raise FileNotFoundError(f"No such file or bucket: '{path}'")
     self._upload_url = resp.headers["Location"]
     # https://cloud.google.com/storage/docs/json_api/v1/how-tos/resumable-upload
     assert conf.google_write_chunk_size % (256 * 1024) == 0
     super().__init__(conf=conf, chunk_size=conf.google_write_chunk_size)
Esempio n. 14
0
def set_mtime(
    conf: Config, path: str, mtime: float, version: Optional[str] = None
) -> bool:
    bucket, blob = split_path(path)
    params = None
    if version is not None:
        params = dict(ifGenerationMatch=version)
    req = Request(
        url=build_url("/storage/v1/b/{bucket}/o/{object}", bucket=bucket, object=blob),
        method="PATCH",
        params=params,
        data=dict(metadata={"blobfile-mtime": str(mtime)}),
        success_codes=(200, 404, 412),
    )
    resp = execute_api_request(conf, req)
    if resp.status == 404:
        raise FileNotFoundError(f"No such file: '{path}'")
    return resp.status == 200
Esempio n. 15
0
def _create_token_request(
    client_email: str, private_key: str, scopes: List[str]
) -> Request:
    # https://developers.google.com/identity/protocols/OAuth2ServiceAccount
    now = time.time()
    claim_set = {
        "iss": client_email,
        "scope": " ".join(scopes),
        "aud": "https://www.googleapis.com/oauth2/v4/token",
        "exp": now + 60 * 60,
        "iat": now,
    }
    data = {
        "grant_type": "urn:ietf:params:oauth:grant-type:jwt-bearer",
        "assertion": _create_jwt(private_key, claim_set),
    }
    return Request(
        url="https://www.googleapis.com/oauth2/v4/token",
        method="POST",
        headers={"Content-Type": "application/x-www-form-urlencoded"},
        data=urllib.parse.urlencode(data).encode("utf8"),
    )
Esempio n. 16
0
    def _upload_chunk(self, chunk: memoryview, finalize: bool) -> None:
        start = self._offset
        end = self._offset + len(chunk) - 1

        total_size = "*"
        if finalize:
            total_size = self._offset + len(chunk)

        headers = {
            "Content-Type": "application/octet-stream",
            "Content-Range": f"bytes {start}-{end}/{total_size}",
        }
        if len(chunk) == 0 and finalize:
            # this is not mentioned in the docs but appears to be allowed
            # https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Content-Range
            headers["Content-Range"] = f"bytes */{total_size}"

        req = Request(
            url=self._upload_url,
            data=chunk,
            headers=headers,
            method="PUT",
            success_codes=(200, 201) if finalize else (308,),
        )

        try:
            execute_api_request(self._conf, req)
        except RequestFailure as e:
            # https://cloud.google.com/storage/docs/resumable-uploads#practices
            if e.response_status in (404, 410):
                raise RestartableStreamingWriteFailure(
                    message=e.message,
                    request_string=e.request_string,
                    response_status=e.response_status,
                    error=e.error,
                    error_description=e.error_description,
                )
            else:
                raise
Esempio n. 17
0
 def build_req() -> Request:
     return Request(
         method="GET",
         url="http://metadata.google.internal/computeMetadata/v1/instance/service-accounts/default/token",
         headers={"Metadata-Flavor": "Google"},
     )
Esempio n. 18
0
def parallel_upload(
    conf: Config,
    executor: concurrent.futures.Executor,
    src: str,
    dst: str,
    return_md5: bool,
) -> Optional[str]:
    with open(src, "rb") as f:
        md5_digest = common.block_md5(f)

    s = os.stat(src)

    dstbucket, dstname = split_path(dst)
    source_objects = []
    object_names = []
    max_workers = getattr(executor, "_max_workers", os.cpu_count() or 1)
    part_size = max(
        math.ceil(s.st_size / max_workers), common.PARALLEL_COPY_MINIMUM_PART_SIZE
    )
    i = 0
    start = 0
    futures = []
    while start < s.st_size:
        suffix = f".part.{i}"
        future = executor.submit(
            _upload_part,
            conf,
            src,
            start,
            min(part_size, s.st_size - start),
            dst + suffix,
        )
        futures.append(future)
        object_names.append(dstname + suffix)
        i += 1
        start += part_size
    for name, future in zip(object_names, futures):
        generation = future.result()
        source_objects.append(
            {
                "name": name,
                "generation": generation,
                "objectPreconditions": {"ifGenerationMatch": generation},
            }
        )

    req = Request(
        url=build_url(
            "/storage/v1/b/{destinationBucket}/o/{destinationObject}/compose",
            destinationBucket=dstbucket,
            destinationObject=dstname,
        ),
        method="POST",
        data={"sourceObjects": source_objects},
        success_codes=(200,),
    )
    resp = execute_api_request(conf, req)
    metadata = json.loads(resp.data)
    hexdigest = binascii.hexlify(md5_digest).decode("utf8")
    maybe_update_md5(conf, dst, metadata["generation"], hexdigest)

    # delete parts in parallel
    delete_futures = []
    for name in object_names:
        future = executor.submit(_delete_part, conf, dstbucket, name)
        delete_futures.append(future)
    for future in delete_futures:
        future.result()

    return hexdigest if return_md5 else None