def put(self, content, **blob_meta_args): meta = self.metadb.new(**blob_meta_args) check_safe_key(meta.key) s3_bucket = self._s3_bucket(create=True) if isinstance(content, BlobStream) and content.blob_db is self: meta.content_length = content.content_length meta.compressed_length = content.compressed_length self.metadb.put(meta) source = {"Bucket": self.s3_bucket_name, "Key": content.blob_key} with self.report_timing('put-via-copy', meta.key): s3_bucket.copy(source, meta.key) else: content.seek(0) if meta.is_compressed: content = GzipStream(content) chunk_sizes = [] def _track_transfer(bytes_sent): chunk_sizes.append(bytes_sent) with self.report_timing('put', meta.key): s3_bucket.upload_fileobj(content, meta.key, Callback=_track_transfer) meta.content_length, meta.compressed_length = get_content_size( content, chunk_sizes) self.metadb.put(meta) return meta
def exists(self, key): check_safe_key(key) try: with maybe_not_found(throw=NotFound(key)), self.report_timing('exists', key): self._s3_bucket().Object(key).load() return True except NotFound: return False
def size(self, identifier=None, bucket=DEFAULT_BUCKET, key=None): if not (identifier is None and bucket == DEFAULT_BUCKET): # legacy: can be removed with old API assert key is None, key key = self.get_path(identifier, bucket) check_safe_key(key) with maybe_not_found(throw=NotFound(key)), self.report_timing('size', key): return self._s3_bucket().Object(key).content_length
def safejoin(root, subpath): """Join root to subpath ensuring that the result is actually inside root """ check_safe_key(subpath) root = realpath(root) path = realpath(join(root, subpath)) if commonprefix([root + sep, path]) != root + sep: raise BadName("invalid relative path: %r" % subpath) return path
def get(self, identifier=None, bucket=DEFAULT_BUCKET, key=None): if not (identifier is None and bucket == DEFAULT_BUCKET): # legacy: can be removed with old API assert key is None, key key = self.get_path(identifier, bucket) check_safe_key(key) with maybe_not_found(throw=NotFound(key)), self.report_timing('get', key): resp = self._s3_bucket().Object(key).get() return BlobStream(resp["Body"], self, key)
def exists(self, key): check_safe_key(key) try: with maybe_not_found(throw=NotFound(key)), self.report_timing( 'exists', key): self._s3_bucket().Object(key).load() return True except NotFound: return False
def exists(self, identifier=None, bucket=DEFAULT_BUCKET, key=None): if not (identifier is None and bucket == DEFAULT_BUCKET): # legacy: can be removed with old API assert key is None, key key = self.get_path(identifier, bucket) check_safe_key(key) try: with maybe_not_found(throw=NotFound(key)), self.report_timing('exists', key): self._s3_bucket().Object(key).load() return True except NotFound: return False
def delete(self, key): deleted_bytes = 0 check_safe_key(key) success = False with maybe_not_found(), self.report_timing('delete', key): obj = self._s3_bucket().Object(key) # may raise a not found error -> return False deleted_bytes = obj.content_length obj.delete() success = True self.metadb.delete(key, deleted_bytes) return success
def delete(self, key): deleted_bytes = 0 check_safe_key(key) success = False with maybe_not_found(), self.report_timing('delete', key): success = True obj = self._s3_bucket().Object(key) # may raise a not found error -> return False deleted_bytes = obj.content_length obj.delete() success = True self.metadb.delete(key, deleted_bytes) return success
def get(self, key=None, type_code=None, meta=None): key = self._validate_get_args(key, type_code, meta) check_safe_key(key) with maybe_not_found(throw=NotFound(key)), self.report_timing( 'get', key): resp = self._s3_bucket().Object(key).get() reported_content_length = resp['ContentLength'] body = resp["Body"] if meta and meta.is_compressed: content_length, compressed_length = meta.content_length, meta.compressed_length body = GzipFile(key, mode='rb', fileobj=body) else: content_length, compressed_length = reported_content_length, None return BlobStream(body, self, key, content_length, compressed_length)
def delete(self, *args, **kw): s3_bucket = self._s3_bucket() deleted_bytes = 0 if "key" in kw: assert set(kw) == {"key"} and not args, (args, kw) key = kw["key"] check_safe_key(key) success = False with maybe_not_found(): success = True obj = s3_bucket.Object(key) # may raise a not found error -> return False deleted_bytes = obj.content_length obj.delete() success = True self.metadb.delete(key, deleted_bytes) return success # legacy: can be removed with old API identifier, bucket = self.get_args_for_delete(*args, **kw) key = self.get_path(identifier, bucket) check_safe_key(key) with maybe_not_found(): success = True if identifier is None: summaries = s3_bucket.objects.filter(Prefix=key + "/") pages = ([{"Key": o.key} for o in page] for page in summaries.pages()) deleted_bytes = sum(o.size for page in summaries.pages() for o in page) deleted_count = 0 for objects in pages: resp = s3_bucket.delete_objects(Delete={"Objects": objects}) deleted = set(d["Key"] for d in resp.get("Deleted", [])) success = success and all(o["Key"] in deleted for o in objects) deleted_count += len(deleted) else: obj = s3_bucket.Object(key) deleted_count = 1 # may raise a not found error -> return False deleted_bytes = obj.content_length obj.delete() datadog_counter('commcare.blobs.deleted.count', value=deleted_count) datadog_counter('commcare.blobs.deleted.bytes', value=deleted_bytes) return success return False
def put(self, content, **blob_meta_args): meta = self.metadb.new(**blob_meta_args) check_safe_key(meta.key) s3_bucket = self._s3_bucket(create=True) if isinstance(content, BlobStream) and content.blob_db is self: obj = s3_bucket.Object(content.blob_key) meta.content_length = obj.content_length self.metadb.put(meta) source = {"Bucket": self.s3_bucket_name, "Key": content.blob_key} with self.report_timing('put-via-copy', meta.key): s3_bucket.copy(source, meta.key) else: content.seek(0) meta.content_length = get_file_size(content) self.metadb.put(meta) with self.report_timing('put', meta.key): s3_bucket.upload_fileobj(content, meta.key) return meta
def put(self, content, identifier=None, bucket=DEFAULT_BUCKET, **blob_meta_args): if identifier is None and bucket == DEFAULT_BUCKET: meta = self.metadb.new(**blob_meta_args) key = meta.key else: # legacy: can be removed with old API assert set(blob_meta_args).issubset({"timeout"}), blob_meta_args meta = None key = self.get_path(identifier, bucket) check_safe_key(key) s3_bucket = self._s3_bucket(create=True) if isinstance(content, BlobStream) and content.blob_db is self: obj = s3_bucket.Object(content.blob_key) if meta is not None: meta.content_length = obj.content_length self.metadb.put(meta) else: # legacy: can be removed with old API # unfortunately cannot get content-md5 here meta = BlobInfo(identifier, obj.content_length, None) source = {"Bucket": self.s3_bucket_name, "Key": content.blob_key} with self.report_timing('put-via-copy', key): s3_bucket.copy(source, key) else: content.seek(0) if meta is not None: meta.content_length = get_file_size(content) self.metadb.put(meta) else: # legacy: can be removed with old API timeout = blob_meta_args.get("timeout") content_md5 = get_content_md5(content) content_length = get_file_size(content) if timeout is not None: set_blob_expire_object(bucket, identifier, content_length, timeout) datadog_counter('commcare.blobs.added.count') datadog_counter('commcare.blobs.added.bytes', value=content_length) meta = BlobInfo(identifier, content_length, "md5-" + content_md5) with self.report_timing('put', key): s3_bucket.upload_fileobj(content, key) return meta
def size(self, key): check_safe_key(key) with maybe_not_found(throw=NotFound(key)), self.report_timing('size', key): return self._s3_bucket().Object(key).content_length
def size(self, key): check_safe_key(key) with maybe_not_found(throw=NotFound(key)), self.report_timing( 'size', key): return self._s3_bucket().Object(key).content_length
def get(self, key): check_safe_key(key) with maybe_not_found(throw=NotFound(key)), self.report_timing( 'get', key): resp = self._s3_bucket().Object(key).get() return BlobStream(resp["Body"], self, key)
def get_path(self, identifier=None, bucket=DEFAULT_BUCKET): if identifier is None: check_safe_key(bucket) return bucket return safejoin(bucket, identifier)
def get(self, key): check_safe_key(key) with maybe_not_found(throw=NotFound(key)), self.report_timing('get', key): resp = self._s3_bucket().Object(key).get() return BlobStream(resp["Body"], self, key)
def safejoin(root, subpath): # legacy: can be removed with old API check_safe_key(root) check_safe_key(subpath) return root + "/" + subpath