def put(self, content, identifier, bucket=DEFAULT_BUCKET): path = self.get_path(identifier, bucket) s3_bucket = self._s3_bucket(create=True) if isinstance(content, BlobStream) and content.blob_db is self: source = {"Bucket": self.s3_bucket_name, "Key": content.blob_path} s3_bucket.copy(source, path) obj = s3_bucket.Object(path) # unfortunately cannot get content-md5 here return BlobInfo(identifier, obj.content_length, None) content.seek(0) content_md5 = get_content_md5(content) content_length = get_file_size(content) s3_bucket.upload_fileobj(content, path) return BlobInfo(identifier, content_length, "md5-" + content_md5)
def put(self, content, identifier, bucket=DEFAULT_BUCKET, timeout=None): path = self.get_path(identifier, bucket) s3_bucket = self._s3_bucket(create=True) if isinstance(content, BlobStream) and content.blob_db is self: source = {"Bucket": self.s3_bucket_name, "Key": content.blob_path} s3_bucket.copy(source, path) obj = s3_bucket.Object(path) # unfortunately cannot get content-md5 here return BlobInfo(identifier, obj.content_length, None) content.seek(0) content_md5 = get_content_md5(content) content_length = get_file_size(content) s3_bucket.upload_fileobj(content, path) if timeout is not None: set_blob_expire_object(bucket, identifier, content_length, timeout) datadog_counter('commcare.blobs.added.count') datadog_counter('commcare.blobs.added.bytes', value=content_length) return BlobInfo(identifier, content_length, "md5-" + content_md5)
def put(self, content, basename="", bucket=DEFAULT_BUCKET): identifier = self.get_identifier(basename) path = self.get_path(identifier, bucket) self._s3_bucket(create=True) osutil = OpenFileOSUtils() transfer = S3Transfer(self.db.meta.client, osutil=osutil) transfer.upload_file(content, self.s3_bucket_name, path) content.seek(0) content_md5 = get_content_md5(content) content_length = osutil.get_file_size(content) return BlobInfo(identifier, content_length, "md5-" + content_md5)
def put(self, content, identifier=None, bucket=DEFAULT_BUCKET, **blob_meta_args): if identifier is None and bucket == DEFAULT_BUCKET: meta = self.metadb.new(**blob_meta_args) key = meta.key else: # legacy: can be removed with old API assert set(blob_meta_args).issubset({"timeout"}), blob_meta_args meta = None key = self.get_path(identifier, bucket) check_safe_key(key) s3_bucket = self._s3_bucket(create=True) if isinstance(content, BlobStream) and content.blob_db is self: obj = s3_bucket.Object(content.blob_key) if meta is not None: meta.content_length = obj.content_length self.metadb.put(meta) else: # legacy: can be removed with old API # unfortunately cannot get content-md5 here meta = BlobInfo(identifier, obj.content_length, None) source = {"Bucket": self.s3_bucket_name, "Key": content.blob_key} with self.report_timing('put-via-copy', key): s3_bucket.copy(source, key) else: content.seek(0) if meta is not None: meta.content_length = get_file_size(content) self.metadb.put(meta) else: # legacy: can be removed with old API timeout = blob_meta_args.get("timeout") content_md5 = get_content_md5(content) content_length = get_file_size(content) if timeout is not None: set_blob_expire_object(bucket, identifier, content_length, timeout) datadog_counter('commcare.blobs.added.count') datadog_counter('commcare.blobs.added.bytes', value=content_length) meta = BlobInfo(identifier, content_length, "md5-" + content_md5) with self.report_timing('put', key): s3_bucket.upload_fileobj(content, key) return meta
def handle(self, zipname, **options): from_zip = zipfile.ZipFile(zipname) to_db = get_blob_db() for filename in from_zip.namelist(): bucket = '/'.join(filename.split('/')[:-1]) identifier = filename.split('/')[-1] blob = io.BytesIO(from_zip.read(filename)) # copy_blob only needs the identifier blob_info = BlobInfo(identifier=identifier, length="", digest="") to_db.copy_blob(blob, blob_info, bucket)
def process_object(self, object): blob_id = object.restore_blob_id info = BlobInfo(identifier=blob_id, length=object.content_length, digest=None) self.total_blobs += 1 db = get_blob_db() try: content = db.get(blob_id) except NotFound: self.not_found += 1 else: with content: self.db.copy_blob(content, info, DEFAULT_BUCKET)
def process_object(self, attachment): from_db = get_blob_db() bucket = attachment.blobdb_bucket() blob_id = attachment.blob_id info = BlobInfo(identifier=blob_id, length=attachment.content_length, digest="md5=" + attachment.md5) self.total_blobs += 1 try: content = from_db.get(blob_id, bucket) except NotFound: self.not_found += 1 else: with content: self.db.copy_blob(content, info, bucket)
def put(self, content, identifier, bucket=DEFAULT_BUCKET): path = self.get_path(identifier, bucket) dirpath = dirname(path) if not isdir(dirpath): os.makedirs(dirpath) length = 0 digest = md5() with openfile(path, "xb") as fh: while True: chunk = content.read(CHUNK_SIZE) if not chunk: break fh.write(chunk) length += len(chunk) digest.update(chunk) b64digest = base64.b64encode(digest.digest()) return BlobInfo(identifier, length, "md5-" + b64digest)
def process_doc(self, doc): from corehq.form_processor.backends.sql.dbaccessors import FormAccessorSQL from corehq.blobs import BlobInfo from_db = get_blob_db() for attachment in FormAccessorSQL.get_attachments(doc['_id']): bucket = attachment.blobdb_bucket() blob_id = attachment.blob_id info = BlobInfo(identifier=blob_id, length=attachment.content_length, digest="md5=" + attachment.md5) self.total_blobs += 1 try: content = from_db.get(blob_id, bucket) except NotFound: self.not_found += 1 else: with content: self.db.copy_blob(content, info, bucket) return True
def put(self, content, identifier=None, bucket=DEFAULT_BUCKET, **blob_meta_args): if identifier is None and bucket == DEFAULT_BUCKET: meta = self.metadb.new(**blob_meta_args) path = self.get_path(key=meta.key) else: # legacy: can be removed with old API assert set(blob_meta_args).issubset({"timeout"}), blob_meta_args meta = None path = self.get_path(identifier, bucket) dirpath = dirname(path) if not isdir(dirpath): os.makedirs(dirpath) length = 0 digest = md5() with open(path, "wb") as fh: while True: chunk = content.read(CHUNK_SIZE) if not chunk: break fh.write(chunk) length += len(chunk) digest.update(chunk) if meta is None: # legacy: can be removed with old API b64digest = base64.b64encode(digest.digest()) timeout = blob_meta_args.get("timeout") if timeout is not None: set_blob_expire_object(bucket, identifier, length, timeout) datadog_counter('commcare.blobs.added.count') datadog_counter('commcare.blobs.added.bytes', value=length) return BlobInfo(identifier, length, "md5-" + b64digest.decode('utf-8')) meta.content_length = length self.metadb.put(meta) return meta
def put(self, content, identifier, bucket=DEFAULT_BUCKET, timeout=None): path = self.get_path(identifier, bucket) dirpath = dirname(path) if not isdir(dirpath): os.makedirs(dirpath) length = 0 digest = md5() with openfile(path, "xb") as fh: while True: chunk = content.read(CHUNK_SIZE) if not chunk: break fh.write(chunk) length += len(chunk) digest.update(chunk) b64digest = base64.b64encode(digest.digest()) if timeout is not None: set_blob_expire_object(bucket, identifier, length, timeout) datadog_counter('commcare.blobs.added.count') datadog_counter('commcare.blobs.added.bytes', value=length) return BlobInfo(identifier, length, "md5-" + b64digest)
def info(self): return BlobInfo(self.id, self.content_length, self.digest)
def info(self): return BlobInfo(self.key, self.content_length, "unknown-md5")