Esempio n. 1
0
def do_oneshot_copy(source_replica: Replica, dest_replica: Replica,
                    source_key: str):
    gs = Config.get_native_handle(Replica.gcp)
    if source_replica == Replica.aws and dest_replica == Replica.gcp:
        s3_bucket = resources.s3.Bucket(source_replica.bucket)  # type: ignore
        gs_bucket = gs.bucket(dest_replica.bucket)
        source = BlobLocation(platform="s3",
                              bucket=s3_bucket,
                              blob=s3_bucket.Object(source_key))
        dest = BlobLocation(platform="gs",
                            bucket=gs_bucket,
                            blob=gs_bucket.blob(source_key))
        sync_s3_to_gs_oneshot(source, dest)
    elif source_replica == Replica.gcp and dest_replica == Replica.aws:
        gs_bucket = gs.bucket(source_replica.bucket)
        s3_bucket = resources.s3.Bucket(dest_replica.bucket)  # type: ignore
        source = BlobLocation(platform="gs",
                              bucket=gs_bucket,
                              blob=gs_bucket.blob(source_key))
        source.blob.reload()
        dest = BlobLocation(platform="s3",
                            bucket=s3_bucket,
                            blob=s3_bucket.Object(source_key))
        sync_gs_to_s3_oneshot(source, dest)
    else:
        raise NotImplementedError()
Esempio n. 2
0
 def test_multipart_parallel_upload(self):
     data = os.urandom(7 * 1024 * 1024)
     metadata = {'something': "foolish"}
     part_size = 5 * 1024 * 1024
     s3_client = Config.get_native_handle(Replica.aws)
     bucket = os.environ['DSS_S3_BUCKET_TEST']
     with self.subTest("copy multiple parts"):
         with io.BytesIO(data) as fh:
             multipart_parallel_upload(
                 s3_client,
                 bucket,
                 "fake_key",
                 fh,
                 part_size=part_size,
                 metadata=metadata,
                 content_type="application/octet-stream",
             )
     part_size = 14 * 1024 * 1024
     with self.subTest("should work with single part"):
         with io.BytesIO(data) as fh:
             multipart_parallel_upload(
                 s3_client,
                 bucket,
                 "fake_key",
                 fh,
                 part_size=part_size,
             )
Esempio n. 3
0
def exists(replica: Replica, key: str):
    if replica == Replica.aws:
        try:
            resources.s3.Bucket(
                replica.bucket).Object(key).load()  # type: ignore
            return True
        except clients.s3.exceptions.ClientError:  # type: ignore
            return False
    elif replica == Replica.gcp:
        gs = Config.get_native_handle(Replica.gcp)
        gs_bucket = gs.bucket(Config.get_gs_bucket())
        return gs_bucket.blob(key).exists()
    else:
        raise NotImplementedError()
Esempio n. 4
0
def copy_part(upload_url: str, source_url: str, dest_platform: str,
              part: dict):
    gs = Config.get_native_handle(Replica.gcp)
    boto3_session = boto3.session.Session()
    with closing(range_request(source_url, part["start"], part["end"])) as fh:
        if dest_platform == "s3":
            chunker = S3SigningChunker(fh, part["end"] - part["start"] + 1,
                                       boto3_session.get_credentials(), "s3",
                                       boto3_session.region_name)
            res = http.request("PUT",
                               upload_url,
                               headers=chunker.get_headers("PUT", upload_url),
                               body=chunker,
                               chunked=True,
                               retries=False)
            logger.info(f"Part upload result: {res.status}")
            assert 200 <= res.status < 300
            logger.info("Part etag: {}".format(res.headers["ETag"]))
        elif dest_platform == "gs":
            logger.info(f"Uploading part {part} to gs")
            # TODO: brianh: is mypy suppression ok?
            gs_transport = google.auth.transport.requests.AuthorizedSession(
                gs._credentials)  # type: ignore
            for start in range(0, part["end"] - part["start"] + 1,
                               gs_upload_chunk_size):
                chunk = fh.read(gs_upload_chunk_size)
                headers = {
                    "content-range":
                    get_content_range(start,
                                      start + len(chunk) - 1,
                                      total_bytes=None)
                }
                res = gs_transport.request("PUT",
                                           upload_url,
                                           data=chunk,
                                           headers=headers)
                assert 200 <= res.status_code < 400
            assert res.status_code == 200
    return res