def do_oneshot_copy(source_replica: Replica, dest_replica: Replica, source_key: str): gs = Config.get_native_handle(Replica.gcp) if source_replica == Replica.aws and dest_replica == Replica.gcp: s3_bucket = resources.s3.Bucket(source_replica.bucket) # type: ignore gs_bucket = gs.bucket(dest_replica.bucket) source = BlobLocation(platform="s3", bucket=s3_bucket, blob=s3_bucket.Object(source_key)) dest = BlobLocation(platform="gs", bucket=gs_bucket, blob=gs_bucket.blob(source_key)) sync_s3_to_gs_oneshot(source, dest) elif source_replica == Replica.gcp and dest_replica == Replica.aws: gs_bucket = gs.bucket(source_replica.bucket) s3_bucket = resources.s3.Bucket(dest_replica.bucket) # type: ignore source = BlobLocation(platform="gs", bucket=gs_bucket, blob=gs_bucket.blob(source_key)) source.blob.reload() dest = BlobLocation(platform="s3", bucket=s3_bucket, blob=s3_bucket.Object(source_key)) sync_gs_to_s3_oneshot(source, dest) else: raise NotImplementedError()
def test_multipart_parallel_upload(self): data = os.urandom(7 * 1024 * 1024) metadata = {'something': "foolish"} part_size = 5 * 1024 * 1024 s3_client = Config.get_native_handle(Replica.aws) bucket = os.environ['DSS_S3_BUCKET_TEST'] with self.subTest("copy multiple parts"): with io.BytesIO(data) as fh: multipart_parallel_upload( s3_client, bucket, "fake_key", fh, part_size=part_size, metadata=metadata, content_type="application/octet-stream", ) part_size = 14 * 1024 * 1024 with self.subTest("should work with single part"): with io.BytesIO(data) as fh: multipart_parallel_upload( s3_client, bucket, "fake_key", fh, part_size=part_size, )
def exists(replica: Replica, key: str): if replica == Replica.aws: try: resources.s3.Bucket( replica.bucket).Object(key).load() # type: ignore return True except clients.s3.exceptions.ClientError: # type: ignore return False elif replica == Replica.gcp: gs = Config.get_native_handle(Replica.gcp) gs_bucket = gs.bucket(Config.get_gs_bucket()) return gs_bucket.blob(key).exists() else: raise NotImplementedError()
def copy_part(upload_url: str, source_url: str, dest_platform: str, part: dict): gs = Config.get_native_handle(Replica.gcp) boto3_session = boto3.session.Session() with closing(range_request(source_url, part["start"], part["end"])) as fh: if dest_platform == "s3": chunker = S3SigningChunker(fh, part["end"] - part["start"] + 1, boto3_session.get_credentials(), "s3", boto3_session.region_name) res = http.request("PUT", upload_url, headers=chunker.get_headers("PUT", upload_url), body=chunker, chunked=True, retries=False) logger.info(f"Part upload result: {res.status}") assert 200 <= res.status < 300 logger.info("Part etag: {}".format(res.headers["ETag"])) elif dest_platform == "gs": logger.info(f"Uploading part {part} to gs") # TODO: brianh: is mypy suppression ok? gs_transport = google.auth.transport.requests.AuthorizedSession( gs._credentials) # type: ignore for start in range(0, part["end"] - part["start"] + 1, gs_upload_chunk_size): chunk = fh.read(gs_upload_chunk_size) headers = { "content-range": get_content_range(start, start + len(chunk) - 1, total_bytes=None) } res = gs_transport.request("PUT", upload_url, data=chunk, headers=headers) assert 200 <= res.status_code < 400 assert res.status_code == 200 return res