def setUp(self): super(CaseMultimediaS3DBTest, self).setUp() with trap_extra_setup(AttributeError, msg="S3_BLOB_DB_SETTINGS not configured"): config = settings.S3_BLOB_DB_SETTINGS self.s3db = TemporaryS3BlobDB(config) assert get_blob_db() is self.s3db, (get_blob_db(), self.s3db)
def forwards(apps, schema_editor): if settings.UNIT_TESTING: return try: get_blob_db() except Exception: raise MigrationError( "Cannot get blob db:\n{error}{message}".format( error=traceback.format_exc(), message=BLOB_DB_NOT_CONFIGURED, )) try: BlobMigrationState.objects.get(slug=slug) return # already migrated except BlobMigrationState.DoesNotExist: pass migrator = MIGRATIONS[slug] total = 0 for doc_type, model_class in doc_type_tuples_to_dict(migrator.doc_types).items(): total += get_doc_count_by_type(model_class.get_db(), doc_type) if total > 500: message = MIGRATION_INSTRUCTIONS.format(slug=slug, total=total) raise MigrationNotComplete(message) # just do the migration if the number of documents is small migrated, skipped = migrator.migrate() if skipped: raise MigrationNotComplete(DOCS_SKIPPED_WARNING.format(skipped))
def setUp(self): db1 = TemporaryFilesystemBlobDB() assert get_blob_db() is db1, (get_blob_db(), db1) data = b'binary data not valid utf-8 \xe4\x94' self.not_founds = set() self.blob_metas = [] for type_code in [CODES.form_xml, CODES.multimedia, CODES.data_export]: meta = db1.put(BytesIO(data), meta=new_meta(type_code=type_code)) lost = new_meta(type_code=type_code, content_length=42) self.blob_metas.append(meta) self.blob_metas.append(lost) lost.save() self.not_founds.add(( lost.id, lost.domain, lost.type_code, lost.parent_id, lost.key, )) self.test_size = len(self.blob_metas) db2 = TemporaryFilesystemBlobDB() self.db = TemporaryMigratingBlobDB(db2, db1) assert get_blob_db() is self.db, (get_blob_db(), self.db) discard_migration_state(self.slug)
def tearDownClass(cls): for group in Group.by_domain(DOMAIN): group.delete() delete_all_users() for _, item_list in cls.item_lists.items(): item_list[0].delete() item_list[1].delete() get_blob_db().delete(key=FIXTURE_BUCKET + "/" + DOMAIN) cls.domain.delete() super(OtaFixtureTest, cls).tearDownClass()
def setUp(self): super(BaseMigrationTestCase, self).setUp() with trap_extra_setup(AttributeError, msg="S3_BLOB_DB_SETTINGS not configured"): config = settings.S3_BLOB_DB_SETTINGS self.s3db = TemporaryS3BlobDB(config) assert get_blob_db() is self.s3db, (get_blob_db(), self.s3db) FormProcessorTestUtils.delete_all_cases_forms_ledgers() self.domain_name = uuid.uuid4().hex self.domain = create_domain(self.domain_name) # all new domains are set complete when they are created DomainMigrationProgress.objects.filter(domain=self.domain_name).delete() self.assertFalse(should_use_sql_backend(self.domain_name))
def test_hard_delete_forms_and_attachments(self): forms = [create_form_for_test(DOMAIN) for i in range(3)] form_ids = sorted(form.form_id for form in forms) forms = FormAccessorSQL.get_forms(form_ids) self.assertEqual(3, len(forms)) other_form = create_form_for_test('other_domain') self.addCleanup(lambda: FormAccessorSQL.hard_delete_forms('other_domain', [other_form.form_id])) attachments = sorted( get_blob_db().metadb.get_for_parents(form_ids), key=lambda meta: meta.parent_id ) self.assertEqual(3, len(attachments)) deleted = FormAccessorSQL.hard_delete_forms(DOMAIN, form_ids[1:] + [other_form.form_id]) self.assertEqual(2, deleted) forms = FormAccessorSQL.get_forms(form_ids) self.assertEqual(1, len(forms)) self.assertEqual(form_ids[0], forms[0].form_id) for attachment in attachments[1:]: with self.assertRaises(BlobNotFound): attachment.open() with attachments[0].open() as content: self.assertIsNotNone(content.read()) other_form = FormAccessorSQL.get_form(other_form.form_id) self.assertIsNotNone(other_form.get_xml())
def bulk_atomic_blobs(docs): """Atomic blobs persistence to be used with ``db.bulk_save(docs)`` Blobs may be added to or deleted from objects within the context body. Blobs previously added with ``DeferredBlobMixin.deferred_put_attachment`` will be persisted automatically. NOTE this method will persist attachments, but it does not save the documents to couch. Call `db.bulk_save(docs)` within the context to do that. :param docs: A list of model objects. """ save = lambda: None contexts = [d.atomic_blobs(save) for d in docs if hasattr(d, "atomic_blobs")] with ExitStack() as stack: for mgr in contexts: stack.enter_context(mgr) delete_blobs = [] for doc in docs: if isinstance(doc, DeferredBlobMixin) and doc._deferred_blobs: for name, info in list(six.iteritems(doc._deferred_blobs)): if info is not None: doc.put_attachment(name=name, **info) else: meta = doc.external_blobs.pop(name, None) if meta is not None: delete_blobs.append(meta.key) doc._deferred_blobs.pop(name) assert not doc._deferred_blobs, doc._deferred_blobs yield db = get_blob_db() for key in delete_blobs: db.delete(key=key)
def fetch_attachment(self, name, stream=False): """Get named attachment :param stream: When true, return a file-like object that can be read at least once (streamers should not expect to seek within or read the contents of the returned file more than once). """ db = get_blob_db() try: meta = self.external_blobs[name] blob = db.get(meta.id, self._blobdb_bucket()) except (KeyError, NotFound): if self.migrating_blobs_from_couch: return super(BlobMixin, self).fetch_attachment(name, stream=stream) raise ResourceNotFound(u"{model} attachment: {name!r}".format( model=type(self).__name__, name=name)) if stream: return blob with blob: body = blob.read() try: body = body.decode("utf-8", "strict") except UnicodeDecodeError: # Return bytes on decode failure, otherwise unicode. # Ugly, but consistent with restkit.wrappers.Response.body_string pass return body
def test_export_delete(self): blobdb = get_blob_db() data_files = [] for domain_name in [self.domain.name, self.domain2.name]: data_files.append(DataFile.save_blob( BytesIO((domain_name + " csv").encode('utf-8')), domain=domain_name, filename="data.csv", description="data file", content_type="text/csv", delete_after=datetime.utcnow() + timedelta(minutes=10), )) EmailExportWhenDoneRequest.objects.create(domain=domain_name) self._assert_export_counts(domain_name, 1) self.domain.delete() with self.assertRaises(NotFound): blobdb.get(key=data_files[0].blob_id) with blobdb.get(key=data_files[1].blob_id) as f: self.assertEqual(f.read(), (self.domain2.name + " csv").encode('utf-8')) self._assert_export_counts(self.domain.name, 0) self._assert_export_counts(self.domain2.name, 1)
def as_file(self): try: value = self._fileobj except AttributeError: value = get_blob_db().get(key=self.name) if self.name else None self._fileobj = value return value
def delete_old_images(): start = datetime.utcnow() max_age = start - timedelta(days=90) db = get_blob_db() def _get_query(db_name, max_age=max_age): return BlobMeta.objects.using(db_name).filter( content_type='image/jpeg', type_code=CODES.form_attachment, domain='icds-cas', created_on__lt=max_age ) run_again = False for db_name in get_db_aliases_for_partitioned_query(): bytes_deleted = 0 metas = list(_get_query(db_name)[:1000]) if metas: for meta in metas: bytes_deleted += meta.content_length or 0 db.bulk_delete(metas=metas) datadog_counter('commcare.icds_images.bytes_deleted', value=bytes_deleted) datadog_counter('commcare.icds_images.count_deleted', value=len(metas)) run_again = True if run_again: delete_old_images.delay()
def fetch_attachment(self, name, stream=False): """Get named attachment :param stream: When true, return a file-like object that can be read at least once (streamers should not expect to seek within or read the contents of the returned file more than once). """ db = get_blob_db() try: try: key = self.external_blobs[name].key except KeyError: if self._migrating_blobs_from_couch: return super(BlobMixin, self) \ .fetch_attachment(name, stream=stream) raise NotFound(name) blob = db.get(key=key) except NotFound: raise ResourceNotFound( "{model} {model_id} attachment: {name!r}".format( model=type(self).__name__, model_id=self._id, name=name, )) if stream: return blob with blob: return blob.read()
def hard_delete_forms(domain, form_ids, delete_attachments=True): assert isinstance(form_ids, list) if delete_attachments: attachments = list(FormAccessorSQL.get_attachments_for_forms(form_ids)) with get_cursor(XFormInstanceSQL) as cursor: cursor.execute('SELECT hard_delete_forms(%s, %s) AS deleted_count', [domain, form_ids]) results = fetchall_as_namedtuple(cursor) deleted_count = sum([result.deleted_count for result in results]) if delete_attachments: attachments_to_delete = attachments if deleted_count != len(form_ids): # in the unlikely event that we didn't delete all forms (because they weren't all # in the specified domain), only delete attachments for forms that were deleted. deleted_forms = set() for form_id in form_ids: if not FormAccessorSQL.form_exists(form_id): deleted_forms.add(form_id) attachments_to_delete = [] for attachment in attachments: if attachment.form_id in deleted_forms: attachments_to_delete.append(attachment) db = get_blob_db() paths = [ db.get_path(attachment.blob_id, attachment.blobdb_bucket()) for attachment in attachments_to_delete ] db.bulk_delete(paths) return deleted_count
def __init__(self): self.db = get_blob_db() self.total_blobs = 0 self.not_found = 0 if not isinstance(self.db, MigratingBlobDB): raise MigrationError( "Expected to find migrating blob db backend (got %r)" % self.db)
def test_get_blobdb(self, msg, root=True, blob_dir=None): with tempdir() as tmp: if root == "file": tmp = join(tmp, "file") with open(tmp, "w") as fh: fh.write("x") conf = SharedDriveConfiguration( shared_drive_path=tmp if root else root, restore_dir=None, transfer_dir=None, temp_dir=None, blob_dir=blob_dir, ) with override_settings(SHARED_DRIVE_CONF=conf, S3_BLOB_DB_SETTINGS=None): with assert_raises(mod.Error, msg=re.compile(msg)): mod.get_blob_db()
def dump_locations(domain, download_id, include_consumption, headers_only, task=None): exporter = LocationExporter(domain, include_consumption=include_consumption, headers_only=headers_only, async_task=task) fd, path = tempfile.mkstemp() writer = Excel2007ExportWriter() writer.open(header_table=exporter.get_headers(), file=path) with writer: exporter.write_data(writer) with open(path, 'rb') as file_: db = get_blob_db() expiry_mins = 60 db.put( file_, domain=domain, parent_id=domain, type_code=CODES.tempfile, key=download_id, timeout=expiry_mins, ) file_format = Format.from_format(Excel2007ExportWriter.format) expose_blob_download( download_id, expiry=expiry_mins * 60, mimetype=file_format.mimetype, content_disposition=safe_filename_header('{}_locations'.format(domain), file_format.extension), download_id=download_id, )
def _get_restore_xml(self): db = get_blob_db() try: blob = db.get(self.restore_blob_id) except (KeyError, NotFound) as e: # Todo - custom exception raise e return blob
def __init__(self, *args, **kw): super(BlobDbBackendMigrator, self).__init__(*args, **kw) self.db = get_blob_db() self.total_blobs = 0 self.not_found = 0 if not isinstance(self.db, MigratingBlobDB): raise MigrationError( "Expected to find migrating blob db backend (got %r)" % self.db)
def setUp(self): with trap_extra_setup(AttributeError, msg="S3_BLOB_DB_SETTINGS not configured"): config = settings.S3_BLOB_DB_SETTINGS fsdb = TemporaryFilesystemBlobDB() assert get_blob_db() is fsdb, (get_blob_db(), fsdb) self.migrate_docs = docs = [] for i in range(self.test_size): doc = SavedBasicExport(configuration=_mk_config("config-%s" % i)) doc.save() doc.set_payload(("content %s" % i).encode('utf-8')) docs.append(doc) s3db = TemporaryS3BlobDB(config) self.db = TemporaryMigratingBlobDB(s3db, fsdb) assert get_blob_db() is self.db, (get_blob_db(), self.db) BaseMigrationTest.discard_migration_state(self.slug)
def save_for_later(cls, fileobj, timeout, domain, restore_user_id): """Save restore response for later :param fileobj: A file-like object. :param timeout: Minimum content expiration in seconds. :returns: A new `CachedResponse` pointing to the saved content. """ name = 'restore-{}.xml'.format(uuid4().hex) get_blob_db().put( NoClose(fileobj), domain=domain, parent_id=restore_user_id, type_code=CODES.restore, key=name, timeout=max(timeout // 60, 60), ) return cls(name)
def test_get_blobdb(self, msg, root=True, blob_dir=None): with tempdir() as tmp: if (root == "file" and six.PY3) or (root == b"file" and six.PY2): tmp = join(tmp, "file" if six.PY3 else b"file") with open(tmp, "w", encoding='utf-8') as fh: fh.write("x") conf = SharedDriveConfiguration( shared_drive_path=tmp if root else root, restore_dir=None, transfer_dir=None, temp_dir=None, blob_dir=blob_dir, ) with patch("corehq.blobs._db", new=[]): with override_settings(SHARED_DRIVE_CONF=conf, S3_BLOB_DB_SETTINGS=None): with assert_raises(mod.Error, msg=re.compile(msg)): mod.get_blob_db()
def get_default_blob_size(bucket, blob_id): try: length = get_blob_db().size(blob_id, bucket) except NotFound: length = UNKNOWN if blob_id.startswith("restore-response-"): return BlobSize(UNKNOWN, "restore", length, bucket, blob_id) return BlobSize(UNKNOWN, bucket, length, bucket, blob_id)
def __init__(self, slug, couchdb, filename=None): super(BlobDbBackendMigrator, self).__init__(slug, couchdb, filename) self.db = get_blob_db() self.total_blobs = 0 self.not_found = 0 if not isinstance(self.db, MigratingBlobDB): raise MigrationError( "Expected to find migrating blob db backend (got %r)" % self.db)
def delete_content(self): db = get_blob_db() bucket = self._blobdb_bucket() deleted = db.delete(self.blob_id, bucket) if deleted: self.blob_id = None return deleted
def __init__(self, bucket, meta_model): """ :meta_model is a django model used to store meta info must contain columns identifier, filename, length """ self._bucket = bucket self._db = get_blob_db() self._meta_model = meta_model
def __init__(self, *args, **kw): super(TemporaryBlobDBMixin, self).__init__(*args, **kw) blobs._db.append(self) try: # verify get_blob_db() returns our new db assert blobs.get_blob_db() is self, 'got wrong blob db' except: self.close() raise
def write_content(self, content): if not self.name: raise InvalidAttachment("cannot save attachment without name") db = get_blob_db() bucket = self._blobdb_bucket() info = db.put(content, self.name, bucket) self.md5 = info.md5_hash self.content_length = info.length self.blob_id = info.identifier
def expose_zipped_blob_download(data_path, filename, format, domain): """Expose zipped file content as a blob download :param data_path: Path to data file. Will be deleted. :param filename: File name. :param format: `couchexport.models.Format` constant. :param domain: Domain name. :returns: A link to download the file. """ try: _, zip_temp_path = tempfile.mkstemp(".zip") with ZipFile(zip_temp_path, 'w') as zip_file_: zip_file_.write(data_path, filename) finally: os.remove(data_path) try: expiry_mins = 60 * 24 file_format = Format.from_format(format) file_name_header = safe_filename_header(filename, file_format.extension) ref = expose_blob_download( filename, expiry=expiry_mins * 60, mimetype=file_format.mimetype, content_disposition=file_name_header ) with open(zip_temp_path, 'rb') as file_: get_blob_db().put( file_, domain=domain, parent_id=domain, type_code=CODES.tempfile, key=ref.download_id, timeout=expiry_mins ) finally: os.remove(zip_temp_path) return "%s%s?%s" % ( get_url_base(), reverse('retrieve_download', kwargs={'download_id': ref.download_id}), "get_file" # download immediately rather than rendering page )
def check_blobdb(): """Save something to the blobdb and try reading it back.""" db = get_blob_db() contents = "It takes Pluto 248 Earth years to complete one orbit!" info = db.put(StringIO(contents)) with db.get(info.identifier) as fh: res = fh.read() db.delete(info.identifier) if res == contents: return ServiceStatus(True, "Successfully saved a file to the blobdb") return ServiceStatus(False, "Failed to save a file to the blobdb")
def put_attachment(self, content, name=None, content_type=None, content_length=None, domain=None, type_code=None): """Put attachment in blob database See `get_short_identifier()` for restrictions on the upper bound for number of attachments per object. :param content: String or file object. """ db = get_blob_db() if name is None: name = getattr(content, "name", None) if name is None: raise InvalidAttachment("cannot save attachment without name") if self._id is None: raise ResourceNotFound("cannot put attachment on unidentified document") if hasattr(self, "domain"): if domain is not None and self.domain != domain: raise ValueError("domain mismatch: %s != %s" % (self.domain, domain)) domain = self.domain elif domain is None: raise ValueError("domain attribute or argument is required") old_meta = self.blobs.get(name) if isinstance(content, six.text_type): content = BytesIO(content.encode("utf-8")) elif isinstance(content, bytes): content = BytesIO(content) # do we need to worry about BlobDB reading beyond content_length? meta = db.put( content, domain=domain or self.domain, parent_id=self._id, name=name, type_code=(self._blobdb_type_code if type_code is None else type_code), content_type=content_type, ) self.external_blobs[name] = BlobMetaRef( key=meta.key, blobmeta_id=meta.id, content_type=content_type, content_length=meta.content_length, ) if self._migrating_blobs_from_couch and self._attachments: self._attachments.pop(name, None) if self._atomic_blobs is None: self.save() if old_meta and old_meta.key: db.delete(key=old_meta.key) elif old_meta and old_meta.key: self._atomic_blobs[name].append(old_meta.key) return True
def delete_expired_blobs(): run_again = False bytes_deleted = 0 for dbname in get_db_aliases_for_partitioned_query(): expired = list(BlobMeta.objects.using(dbname).filter( expires_on__isnull=False, expires_on__lt=_utcnow(), )[:1000]) if not expired: continue if len(expired) == 1000: run_again = True get_blob_db().bulk_delete(metas=expired) log.info("deleted expired blobs: %r", [m.key for m in expired]) shard_deleted = sum(m.content_length for m in expired) bytes_deleted += shard_deleted datadog_counter('commcare.temp_blobs.bytes_deleted', value=shard_deleted) if run_again: delete_expired_blobs.delay() return bytes_deleted
def toHttpResponse(self): blob_db = get_blob_db() file_obj = blob_db.get(self.identifier, self.bucket) blob_size = blob_db.size(self.identifier, self.bucket) response = StreamingHttpResponse(FileWrapper(file_obj, CHUNK_SIZE), content_type=self.content_type) response['Content-Length'] = blob_size response['Content-Disposition'] = self.content_disposition for k, v in self.extras.items(): response[k] = v return response
def write_file(self, f, filename, domain): identifier = random_url_id(16) meta = get_blob_db().put(f, domain=domain, parent_id=domain, type_code=CODES.data_import, key=identifier) assert identifier == meta.key, (identifier, meta.key) file_meta = self._meta_model(identifier=identifier, filename=filename, length=meta.content_length) file_meta.save() return file_meta
def _generate_incremental_export(incremental_export, last_doc_date=None): export_instance = incremental_export.export_instance export_instance.export_format = Format.UNZIPPED_CSV # force to unzipped CSV # Remove the date period from the ExportInstance, since this is added automatically by Daily Saved exports export_instance.filters.date_period = None filters = export_instance.get_filters() if last_doc_date: filters.append(ServerModifiedOnRangeFilter(gt=last_doc_date)) class LastDocTracker: def __init__(self, doc_iterator): self.doc_iterator = doc_iterator self.last_doc = None self.doc_count = 0 def __iter__(self): for doc in self.doc_iterator: self.last_doc = doc self.doc_count += 1 yield doc with TransientTempfile() as temp_path, metrics_track_errors( 'generate_incremental_exports'): writer = get_export_writer([export_instance], temp_path, allow_pagination=False) with writer.open([export_instance]): query = get_export_query(export_instance, filters) query = query.sort('server_modified_on' ) # reset sort to this instead of opened_on docs = LastDocTracker(query.run().hits) write_export_instance(writer, export_instance, docs) export_file = ExportFile(writer.path, writer.format) if docs.doc_count <= 0: return new_checkpoint = incremental_export.checkpoint( docs.doc_count, docs.last_doc.get('server_modified_on')) with export_file as file_: db = get_blob_db() db.put(file_, domain=incremental_export.domain, parent_id=new_checkpoint.blob_parent_id, type_code=CODES.data_export, key=str(new_checkpoint.blob_key), timeout=24 * 60) return new_checkpoint
def populate_export_download_task(export_instances, filters, download_id, filename=None, expiry=10 * 60): """ :param expiry: Time period for the export to be available for download in minutes """ domain = export_instances[0].domain with TransientTempfile() as temp_path, datadog_track_errors('populate_export_download_task'): export_file = get_export_file( export_instances, filters, temp_path, # We don't have a great way to calculate progress if it's a bulk download, # so only track the progress for single instance exports. progress_tracker=populate_export_download_task if len(export_instances) == 1 else None ) file_format = Format.from_format(export_file.format) filename = filename or export_instances[0].name with export_file as file_: db = get_blob_db() db.put( file_, domain=domain, parent_id=domain, type_code=CODES.data_export, key=download_id, timeout=expiry, ) expose_blob_download( download_id, expiry=expiry * 60, mimetype=file_format.mimetype, content_disposition=safe_filename_header(filename, file_format.extension), download_id=download_id, ) email_requests = EmailExportWhenDoneRequest.objects.filter( domain=domain, download_id=download_id ) for email_request in email_requests: try: couch_user = CouchUser.get_by_user_id(email_request.user_id, domain=domain) except CouchUser.AccountTypeError: pass else: if couch_user is not None: process_email_request(domain, download_id, couch_user.get_email()) email_requests.delete()
def save_dump_to_blob(data_file_path, data_file_name, result_file_format): expiry_mins = 60 * 24 with open(data_file_path, 'rb') as file_: blob_db = get_blob_db() blob_db.put(file_, data_file_name, timeout=expiry_mins) file_format = Format.from_format(result_file_format) file_name_header = safe_filename_header(data_file_name, file_format.extension) blob_dl_object = expose_blob_download( data_file_name, expiry=expiry_mins * 60, mimetype=file_format.mimetype, content_disposition=file_name_header) return blob_dl_object.download_id
def setUpClass(cls): super().setUpClass() cls.db = TemporaryFilesystemBlobDB() assert get_blob_db() is cls.db, (get_blob_db(), cls.db) cls.data = data = b'binary data not valid utf-8 \xe4\x94' cls.blob_metas = [] cls.not_found = set() cls.domain_name = str(uuid.uuid4) for type_code in [CODES.form_xml, CODES.multimedia, CODES.data_export]: for domain in (cls.domain_name, str(uuid.uuid4())): meta = cls.db.put(BytesIO(data), meta=new_meta(domain=domain, type_code=type_code)) lost = new_meta(domain=domain, type_code=type_code, content_length=42) cls.blob_metas.append(meta) cls.blob_metas.append(lost) lost.save() cls.not_found.add(lost.key)
def run_data_pull(data_pull_slug, domain, month, location_id=None, email=None): subject = _('Custom ICDS Data Pull') try: filename = DataExporter(data_pull_slug, "icds-ucr-citus", month=month, location_id=location_id).export() except Exception: if email: message = _(""" Hi, Could not generate the requested data pull. The error has been notified. Please report as an issue for quicker followup """) send_html_email_async.delay(subject, [email], message, email_from=settings.DEFAULT_FROM_EMAIL) raise else: if email and filename: db = get_blob_db() download_id = DownloadBase.new_id_prefix + make_uuid() with open(filename, 'rb') as _file: db.put( _file, domain=domain, parent_id=domain, type_code=CODES.data_export, key=download_id, timeout=24 * 60, ) exposed_download = expose_blob_download( filename, expiry=24 * 60 * 60, mimetype=Format.from_format(Format.ZIP).mimetype, content_disposition=safe_filename_header(filename), download_id=download_id) os.remove(filename) path = reverse( 'retrieve_download', kwargs={'download_id': exposed_download.download_id}) link = f"{web.get_url_base()}{path}?get_file" message = _(""" Hi, Please download the data from {link}. The data is available only for 24 hours. """).format(link=link) send_html_email_async.delay(subject, [email], message, email_from=settings.DEFAULT_FROM_EMAIL)
def store_file_in_blobdb(domain, export_file, expired=BLOB_EXPIRATION_TIME): db = get_blob_db() key = uuid.uuid4().hex try: kw = {"meta": db.metadb.get(parent_id='AaaFile', key=key)} except BlobMeta.DoesNotExist: kw = { "domain": domain, "parent_id": 'AaaFile', "type_code": CODES.tempfile, "key": key, "timeout": expired } return db.put(export_file, **kw)
def diff_form_state(form_id, *, in_couch=False): if form_id is None: old = {"form_state": "unknown"} new = {"form_state": "unknown"} else: in_couch = in_couch or couch_form_exists(form_id) in_sql = sql_form_exists(form_id) couch_miss = "missing" if not in_couch and get_blob_db().metadb.get_for_parent(form_id): couch_miss = MISSING_BLOB_PRESENT log.warning("couch form missing, blob present: %s", form_id) old = {"form_state": FORM_PRESENT if in_couch else couch_miss} new = {"form_state": FORM_PRESENT if in_sql else "missing"} return old, new
def process_object(self, object): blob_id = object.restore_blob_id info = BlobInfo(identifier=blob_id, length=object.content_length, digest=None) self.total_blobs += 1 db = get_blob_db() try: content = db.get(blob_id) except NotFound: self.not_found += 1 else: with content: self.db.copy_blob(content, info, DEFAULT_BUCKET)
def process_object(self, attachment): from_db = get_blob_db() bucket = attachment.blobdb_bucket() blob_id = attachment.blob_id info = BlobInfo(identifier=blob_id, length=attachment.content_length, digest="md5=" + attachment.md5) self.total_blobs += 1 try: content = from_db.get(blob_id, bucket) except NotFound: self.not_found += 1 else: with content: self.db.copy_blob(content, info, bucket)
def process_doc(self, doc): obj = BlobHelper(doc, self.couchdb) bucket = obj._blobdb_bucket() assert obj.external_blobs and obj.external_blobs == obj.blobs, doc from_db = get_blob_db() for name, meta in obj.blobs.iteritems(): self.total_blobs += 1 try: content = from_db.get(meta.id, bucket) except NotFound: self.not_found += 1 else: with content: self.db.copy_blob(content, meta.info, bucket) return True
def handle(self, zipname, **options): from_zip = zipfile.ZipFile(zipname) to_db = get_blob_db() for filename in from_zip.namelist(): bucket = '/'.join(filename.split('/')[:-1]) identifier = filename.split('/')[-1] blob = cStringIO.StringIO(from_zip.read(filename)) # copy_blob only needs the identifier blob_info = BlobInfo(identifier=identifier, length="", digest="") try: to_db.copy_blob(blob, blob_info, bucket) except FileExists: continue
def store_file_in_blobdb(self, file, expired=EXPIRED): db = get_blob_db() try: kw = { "meta": db.metadb.get(parent_id='IcdsFile', key=self.blob_id) } except BlobMeta.DoesNotExist: kw = { "domain": DASHBOARD_DOMAIN, "parent_id": 'IcdsFile', "type_code": CODES.tempfile, "key": self.blob_id, "timeout": expired } db.put(file, **kw)
def create(cls, user_id, restore_content, comment=""): """ The method to create a new DemoUserRestore object ags: user_id: the id of the CommCareUser restore_content: a string or file-like object of user's restore XML """ restore = cls( demo_user_id=user_id, restore_comment=comment, ) with AtomicBlobs(get_blob_db()) as db: restore._write_restore_blob(restore_content, db) restore.save() return restore
def delete_attachment(self, name): if self._migrating_blobs_from_couch and self._attachments: deleted = bool(self._attachments.pop(name, None)) else: deleted = False meta = self.external_blobs.pop(name, None) if meta is not None: if self._atomic_blobs is None: deleted = get_blob_db().delete(key=meta.key) or deleted else: self._atomic_blobs[name].append(meta.key) deleted = True if self._atomic_blobs is None: self.save() return deleted
def dump_locations(domain, download_id, include_consumption, headers_only, owner_id, root_location_ids=None, task=None, **kwargs): exporter = LocationExporter(domain, include_consumption=include_consumption, root_location_ids=root_location_ids, headers_only=headers_only, async_task=task, **kwargs) fd, path = tempfile.mkstemp() writer = Excel2007ExportWriter() writer.open(header_table=exporter.get_headers(), file=path) with writer: exporter.write_data(writer) with open(path, 'rb') as file_: db = get_blob_db() expiry_mins = 60 db.put( file_, domain=domain, parent_id=domain, type_code=CODES.tempfile, key=download_id, timeout=expiry_mins, ) file_format = Format.from_format(Excel2007ExportWriter.format) filename = '{}_locations'.format(domain) if len(root_location_ids) == 1: root_location = SQLLocation.objects.get( location_id=root_location_ids[0]) filename += '_{}'.format(root_location.name) expose_blob_download( download_id, expiry=expiry_mins * 60, mimetype=file_format.mimetype, content_disposition=safe_filename_header(filename, file_format.extension), download_id=download_id, owner_ids=[owner_id], )
def test_cached_global_fixture_user_id(self): sandwich = self.make_data_type("sandwich", is_global=True) self.make_data_item(sandwich, "7.39") frank = self.user.to_ota_restore_user() sammy = CommCareUser.create(self.domain, 'sammy', '***', None, None).to_ota_restore_user() fixtures = call_fixture_generator(frank) self.assertEqual({item.attrib['user_id'] for item in fixtures}, {frank.user_id}) self.assertTrue(get_blob_db().exists(key=FIXTURE_BUCKET + '/' + self.domain)) fixtures = call_fixture_generator(sammy) self.assertEqual({item.attrib['user_id'] for item in fixtures}, {sammy.user_id})
def save_dump_to_blob(self, temp_path): with open(temp_path, 'rb') as file_: blob_db = get_blob_db() blob_db.put( file_, self.result_file_name, timeout=60 * 48) # 48 hours file_format = Format.from_format(Format.CSV) file_name_header = safe_filename_header( self.result_file_name, file_format.extension) blob_dl_object = expose_blob_download( self.result_file_name, mimetype=file_format.mimetype, content_disposition=file_name_header ) return blob_dl_object.download_id
def _store_excel_in_blobdb(report_class, file, domain): key = uuid.uuid4().hex expired = 60 * 24 * 7 # 7 days db = get_blob_db() kw = { "domain": domain, "parent_id": key, "type_code": CODES.tempfile, "key": key, "timeout": expired, "properties": {"report_class": report_class} } file.seek(0) db.put(file, **kw) return key
def check_blobdb(): """Save something to the blobdb and try reading it back.""" db = get_blob_db() contents = b"It takes Pluto 248 Earth years to complete one orbit!" meta = db.put( BytesIO(contents), domain="<unknown>", parent_id="check_blobdb", type_code=CODES.tempfile, ) with db.get(key=meta.key) as fh: res = fh.read() db.delete(key=meta.key) if res == contents: return ServiceStatus(True, "Successfully saved a file to the blobdb") return ServiceStatus(False, "Failed to save a file to the blobdb")
def _migrate_form_attachments(sql_form, couch_form): """Copy over attachment meta - includes form.xml""" attachments = [] metadb = get_blob_db().metadb def try_to_get_blob_meta(parent_id, type_code, name): try: meta = metadb.get(parent_id=parent_id, type_code=type_code, name=name) assert meta.domain == couch_form.domain, (meta.domain, couch_form.domain) return meta except BlobMeta.DoesNotExist: return None if couch_form._attachments and any(name not in couch_form.blobs for name in couch_form._attachments): _migrate_couch_attachments_to_blob_db(couch_form) for name, blob in couch_form.blobs.items(): type_code = CODES.form_xml if name == "form.xml" else CODES.form_attachment meta = try_to_get_blob_meta(sql_form.form_id, type_code, name) # there was a bug in a migration causing the type code for many form attachments to be set as form_xml # this checks the db for a meta resembling this and fixes it for postgres # https://github.com/dimagi/commcare-hq/blob/3788966119d1c63300279418a5bf2fc31ad37f6f/corehq/blobs/migrate.py#L371 if not meta and name != "form.xml": meta = try_to_get_blob_meta(sql_form.form_id, CODES.form_xml, name) if meta: meta.type_code = CODES.form_attachment meta.save() if not meta: meta = metadb.new( domain=couch_form.domain, name=name, parent_id=sql_form.form_id, type_code=type_code, content_type=blob.content_type, content_length=blob.content_length, key=blob.key, ) meta.save() attachments.append(meta) sql_form.attachments_list = attachments
def get_forms_with_attachments_meta(self, form_ids, ordered=False): assert isinstance(form_ids, list) if not form_ids: return [] forms = list(self.get_forms(form_ids)) attachments = sorted( get_blob_db().metadb.get_for_parents(form_ids), key=lambda meta: meta.parent_id ) forms_by_id = {form.form_id: form for form in forms} attach_prefetch_models(forms_by_id, attachments, 'parent_id', 'attachments_list') if ordered: sort_with_id_list(forms, form_ids, 'form_id') return forms
def toHttpResponse(self): if self.download_id.startswith(self.new_id_prefix): blob_key = self.download_id else: # legacy key; remove after all legacy blob downloads have expired blob_key = "_default/" + self.identifier blob_db = get_blob_db() file_obj = blob_db.get(key=blob_key, type_code=CODES.tempfile) response = StreamingHttpResponse(FileWrapper(file_obj, CHUNK_SIZE), content_type=self.content_type) response['Content-Length'] = file_obj.content_length response['Content-Disposition'] = self.content_disposition for k, v in self.extras.items(): response[k] = v return response
def handle(self, files, migrate=False, num_workers=10, **options): set_max_connections(num_workers) blob_db = get_blob_db() if not isinstance(blob_db, MigratingBlobDB): raise CommandError( "Expected to find migrating blob db backend (got %r)" % blob_db) old_db = blob_db.old_db new_db = blob_db.new_db ignored = 0 try: pool = Pool(size=num_workers) for filepath in files: print("Processing {}".format(filepath)) with open(filepath, encoding='utf-8') as fh: for line in fh: if not line: continue try: rec = json.loads(line) except ValueError: ignored += 1 print(("Ignore {}", line)) continue pool.spawn(process, rec, old_db, new_db, migrate) print("CTRL+C to abort") while not pool.join(timeout=10): print("waiting for {} workers to finish...".format(len(pool))) except KeyboardInterrupt: pass if ignored: print("Ignored {} malformed records".format(ignored)) for type_code, stats in sorted(Stats.items.items()): try: group = BLOB_MIXIN_MODELS[type_code].__name__ except KeyError: group = CODES.name_of(type_code, "type_code %s" % type_code) total = stats.new + stats.old + stats.noref + stats.lost print("{}: checked {} records".format(group, total)) print(" Found in new db: {}".format(stats.new)) print(" Found in old db: {}".format(stats.old)) print(" Not referenced: {}".format(stats.noref)) print(" Not found: {}".format(stats.lost))
def cache_fixture_items_data(io_data, domain, fixure_name, key_prefix): db = get_blob_db() try: kw = {"meta": db.metadb.get( parent_id=domain, type_code=CODES.fixture, name=fixure_name, )} except BlobMeta.DoesNotExist: kw = { "domain": domain, "parent_id": domain, "type_code": CODES.fixture, "name": fixure_name, "key": key_prefix + '/' + domain, } db.put(io_data, **kw)
def generate_toggle_csv_download(self, tag, download_id, username): toggles = _get_toggles_with_tag(tag) total = _get_toggle_item_count(toggles) current_progress = [0] def increment_progress(): current_progress[0] += 1 DownloadBase.set_progress(self, current_progress[0], total) timeout_mins = 24 * 60 with TransientTempfile() as temp_path: _write_toggle_data(temp_path, toggles, increment_progress) with open(temp_path, 'rb') as file: db = get_blob_db() meta = db.put( file, domain="__system__", parent_id="__system__", type_code=CODES.tempfile, key=download_id, timeout=timeout_mins, ) now = datetime.utcnow().strftime("%Y-%m-%d-%H-%M-%S") filename = f'{settings.SERVER_ENVIRONMENT}_toggle_export_{now}' expose_blob_download( download_id, expiry=timeout_mins * 60, content_disposition=safe_filename_header(filename, ".csv"), download_id=download_id, ) user = CouchUser.get_by_username(username) if user: url = absolute_reverse("retrieve_download", args=[download_id]) url += "?get_file" valid_until = meta.expires_on.replace( tzinfo=pytz.UTC).strftime(USER_DATETIME_FORMAT) send_HTML_email("Feature Flag download ready", user.get_email(), html_content=inspect.cleandoc(f""" Download URL: {url} Download Valid until: {valid_until} """))
def _get_blob_deletion_pillow(pillow_id, couch_db, checkpoint=None, change_feed=None): if checkpoint is None: checkpoint = PillowCheckpoint(pillow_id) if change_feed is None: change_feed = CouchChangeFeed(couch_db, include_docs=False) return ConstructedPillow( name=pillow_id, checkpoint=checkpoint, change_feed=change_feed, processor=BlobDeletionProcessor(get_blob_db(), couch_db.dbname), change_processed_event_handler=PillowCheckpointEventHandler( checkpoint=checkpoint, checkpoint_frequency=KAFKA_CHECKPOINT_FREQUENCY, ), )
def create_form_with_missing_xml(domain_name): form = submit_form_locally(TEST_FORM, domain_name).xform form = FormAccessors(domain_name).get_form(form.form_id) blobs = get_blob_db() with mock.patch.object(blobs.metadb, "delete"): if isinstance(form, XFormInstance): # couch form.delete_attachment("form.xml") assert form.get_xml() is None, form.get_xml() else: # sql blobs.delete(form.get_attachment_meta("form.xml").key) try: form.get_xml() assert False, "expected BlobNotFound exception" except BlobNotFound: pass return form