def setUp(self): db1 = TemporaryFilesystemBlobDB() assert get_blob_db() is db1, (get_blob_db(), db1) data = b'binary data not valid utf-8 \xe4\x94' self.not_founds = set() self.blob_metas = [] for type_code in [CODES.form_xml, CODES.multimedia, CODES.data_export]: meta = db1.put(BytesIO(data), meta=new_meta(type_code=type_code)) lost = new_meta(type_code=type_code, content_length=42) self.blob_metas.append(meta) self.blob_metas.append(lost) lost.save() self.not_founds.add(( lost.id, lost.domain, lost.type_code, lost.parent_id, lost.key, )) self.test_size = len(self.blob_metas) db2 = TemporaryFilesystemBlobDB() self.db = TemporaryMigratingBlobDB(db2, db1) assert get_blob_db() is self.db, (get_blob_db(), self.db) discard_migration_state(self.slug)
class TestMigrateBackend(TestCase): slug = "migrate_backend" test_size = 5 def setUp(self): with trap_extra_setup(AttributeError, msg="S3_BLOB_DB_SETTINGS not configured"): config = settings.S3_BLOB_DB_SETTINGS fsdb = TemporaryFilesystemBlobDB() assert get_blob_db() is fsdb, (get_blob_db(), fsdb) self.migrate_docs = docs = [] for i in range(self.test_size): doc = SavedBasicExport(configuration=_mk_config("config-%s" % i)) doc.save() doc.set_payload(("content %s" % i).encode('utf-8')) docs.append(doc) s3db = TemporaryS3BlobDB(config) self.db = TemporaryMigratingBlobDB(s3db, fsdb) assert get_blob_db() is self.db, (get_blob_db(), self.db) mod.BlobMigrationState.objects.filter(slug=self.slug).delete() def tearDown(self): self.db.close() mod.BlobMigrationState.objects.filter(slug=self.slug).delete() def test_migrate_backend(self): # verify: attachment is in couch and migration not complete with maybe_not_found(): s3_blobs = sum(1 for b in self.db.new_db._s3_bucket().objects.all()) self.assertEqual(s3_blobs, 0) with tempdir() as tmp: filename = join(tmp, "file.txt") # do migration migrated, skipped = mod.MIGRATIONS[self.slug].migrate(filename) self.assertGreaterEqual(migrated, self.test_size) # verify: migration state recorded mod.BlobMigrationState.objects.get(slug=self.slug) # verify: migrated data was written to the file with open(filename) as fh: lines = list(fh) ids = {d._id for d in self.migrate_docs} migrated = {d["_id"] for d in (json.loads(x) for x in lines)} self.assertEqual(len(ids.intersection(migrated)), self.test_size) # verify: attachment was copied to new blob db for doc in self.migrate_docs: exp = SavedBasicExport.get(doc._id) self.assertEqual(exp._rev, doc._rev) # rev should not change self.assertTrue(doc.blobs) bucket = doc._blobdb_bucket() for meta in doc.blobs.values(): content = self.db.new_db.get(meta.id, bucket) self.assertEqual(len(content.read()), meta.content_length)
def setUp(self): with trap_extra_setup(AttributeError, msg="S3_BLOB_DB_SETTINGS not configured"): config = settings.S3_BLOB_DB_SETTINGS fsdb = TemporaryFilesystemBlobDB() assert get_blob_db() is fsdb, (get_blob_db(), fsdb) self.migrate_docs = docs = [] for i in range(self.test_size): doc = SavedBasicExport(configuration=_mk_config("config-%s" % i)) doc.save() doc.set_payload(("content %s" % i).encode('utf-8')) docs.append(doc) s3db = TemporaryS3BlobDB(config) self.db = TemporaryMigratingBlobDB(s3db, fsdb) assert get_blob_db() is self.db, (get_blob_db(), self.db) BaseMigrationTest.discard_migration_state(self.slug)
class TestMigrateBackend(TestCase): slug = "migrate_backend" def setUp(self): db1 = TemporaryFilesystemBlobDB() assert get_blob_db() is db1, (get_blob_db(), db1) data = b'binary data not valid utf-8 \xe4\x94' self.not_founds = set() self.blob_metas = [] for type_code in [CODES.form_xml, CODES.multimedia, CODES.data_export]: meta = db1.put(BytesIO(data), meta=new_meta(type_code=type_code)) lost = new_meta(type_code=type_code, content_length=42) self.blob_metas.append(meta) self.blob_metas.append(lost) lost.save() self.not_founds.add(( lost.id, lost.domain, lost.type_code, lost.parent_id, lost.key, )) self.test_size = len(self.blob_metas) db2 = TemporaryFilesystemBlobDB() self.db = TemporaryMigratingBlobDB(db2, db1) assert get_blob_db() is self.db, (get_blob_db(), self.db) discard_migration_state(self.slug) def tearDown(self): self.db.close() discard_migration_state(self.slug) for doc in self.blob_metas: doc.delete() def test_migrate_backend(self): # verify: migration not complete with maybe_not_found(): self.assertEqual(os.listdir(self.db.new_db.rootdir), []) with tempdir() as tmp: filename = join(tmp, "file.txt") # do migration migrated, skipped = mod.MIGRATIONS[self.slug].migrate( filename, num_workers=2) self.assertGreaterEqual(migrated, self.test_size) # verify: migration state recorded mod.BlobMigrationState.objects.get(slug=self.slug) # verify: missing blobs written to log files missing_log = set() fields = [ "blobmeta_id", "domain", "type_code", "parent_id", "blob_key", ] with open(filename, encoding='utf-8') as fh: for line in fh: doc = json.loads(line) missing_log.add(tuple(doc[x] for x in fields)) self.assertEqual(self.not_founds, missing_log) # verify: blobs were copied to new blob db not_found = set(t[0] for t in self.not_founds) for meta in self.blob_metas: if meta.id in not_found: with self.assertRaises(mod.NotFound): self.db.new_db.get(key=meta.key) continue content = self.db.new_db.get(key=meta.key) data = content.read() self.assertEqual(data, b'binary data not valid utf-8 \xe4\x94') self.assertEqual(len(data), meta.content_length)
def setUpClass(cls): super(TestMigratingBlobDB, cls).setUpClass() assert isinstance(cls.db, TemporaryS3BlobDB), cls.db cls.s3db = cls.db cls.fsdb = TemporaryFilesystemBlobDB() cls.db = TemporaryMigratingBlobDB(cls.s3db, cls.fsdb)
def setUp(self): lost_db = TemporaryFilesystemBlobDB() # must be created before other dbs db1 = TemporaryFilesystemBlobDB() assert get_blob_db() is db1, (get_blob_db(), db1) missing = "found.not" name = "blob.bin" data = b'binary data not valid utf-8 \xe4\x94' self.not_founds = set() self.couch_docs = [] with lost_db: for doc_type, model_class in self.couch_doc_types.items(): item = model_class() item.doc_type = doc_type item.save() item.put_attachment(data, name) with install_blob_db(lost_db): item.put_attachment(data, missing) self.not_founds.add(( doc_type, item._id, item.external_blobs[missing].id, item._blobdb_bucket(), )) item.save() self.couch_docs.append(item) def create_obj(rex): ident = random_url_id(8) args = {rex.blob_helper.id_attr: ident} fields = {getattr(f, "attname", "") for f in rex.model_class._meta.get_fields()} if "content_length" in fields: args["content_length"] = len(data) elif "length" in fields: args["length"] = len(data) item = rex.model_class(**args) save_attr = rex.model_class.__name__ + "_save" if hasattr(self, save_attr): getattr(self, save_attr)(item, rex) else: item.save() return item, ident self.sql_docs = [] for rex in (x() for x in self.sql_reindex_accessors): item, ident = create_obj(rex) helper = rex.blob_helper({"_obj_not_json": item}) db1.put(StringIO(data), ident, helper._blobdb_bucket()) self.sql_docs.append(item) lost, lost_blob_id = create_obj(rex) self.sql_docs.append(lost) self.not_founds.add(( rex.model_class.__name__, lost.id, lost_blob_id, rex.blob_helper({"_obj_not_json": lost})._blobdb_bucket(), )) self.test_size = len(self.couch_docs) + len(self.sql_docs) db2 = TemporaryFilesystemBlobDB() self.db = TemporaryMigratingBlobDB(db2, db1) assert get_blob_db() is self.db, (get_blob_db(), self.db) BaseMigrationTest.discard_migration_state(self.slug)
class TestMigrateBackend(TestCase): slug = "migrate_backend" couch_doc_types = { "Application": mod.apps.Application, "LinkedApplication": mod.apps.LinkedApplication, "RemoteApp": mod.apps.RemoteApp, "Application-Deleted": mod.apps.Application, "RemoteApp-Deleted": mod.apps.RemoteApp, "SavedBasicExport": mod.SavedBasicExport, "CommCareAudio": mod.hqmedia.CommCareAudio, "CommCareImage": mod.hqmedia.CommCareImage, "CommCareVideo": mod.hqmedia.CommCareVideo, "CommCareMultimedia": mod.hqmedia.CommCareMultimedia, "XFormInstance": mod.xform.XFormInstance, "XFormInstance-Deleted": mod.xform.XFormInstance, "XFormArchived": mod.xform.XFormArchived, "XFormDeprecated": mod.xform.XFormDeprecated, "XFormDuplicate": mod.xform.XFormDuplicate, "XFormError": mod.xform.XFormError, "SubmissionErrorLog": mod.xform.SubmissionErrorLog, "HQSubmission": mod.xform.XFormInstance, "CommCareCase": mod.cases.CommCareCase, 'CommCareCase-deleted': mod.cases.CommCareCase, 'CommCareCase-Deleted': mod.cases.CommCareCase, 'CommCareCase-Deleted-Deleted': mod.cases.CommCareCase, "CaseExportInstance": mod.exports.CaseExportInstance, "FormExportInstance": mod.exports.FormExportInstance, } sql_reindex_accessors = [ mod.CaseUploadFileMetaReindexAccessor, mod.CaseAttachmentSQLReindexAccessor, mod.XFormAttachmentSQLReindexAccessor, mod.DemoUserRestoreReindexAccessor, ] def _sql_save(self, obj, rex): if rex.is_sharded(): # HACK why does it have to be so hard to use form_processor # even just for testing... obj.save(using='default') else: obj.save() def CaseAttachmentSQL_save(self, obj, rex): obj.attachment_id = uuid.uuid4() obj.case_id = "not-there" obj.name = "name" obj.identifier = "what is this?" obj.md5 = "blah" self._sql_save(obj, rex) def XFormAttachmentSQL_save(self, obj, rex): obj.attachment_id = uuid.uuid4() obj.form_id = "not-there" obj.name = "name" obj.identifier = "what is this?" obj.md5 = "blah" self._sql_save(obj, rex) def DemoUserRestore_save(self, obj, rex): obj.attachment_id = uuid.uuid4() obj.demo_user_id = "not-there" self._sql_save(obj, rex) def setUp(self): lost_db = TemporaryFilesystemBlobDB() # must be created before other dbs db1 = TemporaryFilesystemBlobDB() assert get_blob_db() is db1, (get_blob_db(), db1) missing = "found.not" name = "blob.bin" data = b'binary data not valid utf-8 \xe4\x94' self.not_founds = set() self.couch_docs = [] with lost_db: for doc_type, model_class in self.couch_doc_types.items(): item = model_class() item.doc_type = doc_type item.save() item.put_attachment(data, name) with install_blob_db(lost_db): item.put_attachment(data, missing) self.not_founds.add(( doc_type, item._id, item.external_blobs[missing].id, item._blobdb_bucket(), )) item.save() self.couch_docs.append(item) def create_obj(rex): ident = random_url_id(8) args = {rex.blob_helper.id_attr: ident} fields = {getattr(f, "attname", "") for f in rex.model_class._meta.get_fields()} if "content_length" in fields: args["content_length"] = len(data) elif "length" in fields: args["length"] = len(data) item = rex.model_class(**args) save_attr = rex.model_class.__name__ + "_save" if hasattr(self, save_attr): getattr(self, save_attr)(item, rex) else: item.save() return item, ident self.sql_docs = [] for rex in (x() for x in self.sql_reindex_accessors): item, ident = create_obj(rex) helper = rex.blob_helper({"_obj_not_json": item}) db1.put(StringIO(data), ident, helper._blobdb_bucket()) self.sql_docs.append(item) lost, lost_blob_id = create_obj(rex) self.sql_docs.append(lost) self.not_founds.add(( rex.model_class.__name__, lost.id, lost_blob_id, rex.blob_helper({"_obj_not_json": lost})._blobdb_bucket(), )) self.test_size = len(self.couch_docs) + len(self.sql_docs) db2 = TemporaryFilesystemBlobDB() self.db = TemporaryMigratingBlobDB(db2, db1) assert get_blob_db() is self.db, (get_blob_db(), self.db) BaseMigrationTest.discard_migration_state(self.slug) def tearDown(self): self.db.close() BaseMigrationTest.discard_migration_state(self.slug) for doc in self.couch_docs: doc.get_db().delete_doc(doc._id) for doc in self.sql_docs: if isinstance(doc, PartitionedModel): doc.delete(using='default') else: doc.delete() def test_migrate_backend(self): # verify: migration not complete with maybe_not_found(): self.assertEqual(os.listdir(self.db.new_db.rootdir), []) with tempdir() as tmp: filename = join(tmp, "file.txt") # do migration migrated, skipped = mod.MIGRATIONS[self.slug].migrate(filename) self.assertGreaterEqual(migrated, self.test_size) # verify: migration state recorded mod.BlobMigrationState.objects.get(slug=self.slug) # verify: missing blobs written to log files missing_log = set() fields = ["doc_type", "doc_id", "blob_identifier", "blob_bucket"] for n, ignore in enumerate(mod.MIGRATIONS[self.slug].migrators): with open("{}.{}".format(filename, n)) as fh: for line in fh: doc = json.loads(line) missing_log.add(tuple(doc[x] for x in fields)) self.assertEqual( len(self.not_founds.intersection(missing_log)), len(self.not_founds) ) # verify: couch attachments were copied to new blob db for doc in self.couch_docs: exp = type(doc).get(doc._id) self.assertEqual(exp._rev, doc._rev) # rev should not change self.assertTrue(doc.blobs) bucket = doc._blobdb_bucket() for name, meta in doc.blobs.items(): if name == "found.not": continue content = self.db.new_db.get(meta.id, bucket) data = content.read() self.assertEqual(data, b'binary data not valid utf-8 \xe4\x94') self.assertEqual(len(data), meta.content_length)
class TestMigrateBackend(TestCase): slug = "migrate_backend" test_size = 5 def setUp(self): with trap_extra_setup(AttributeError, msg="S3_BLOB_DB_SETTINGS not configured"): config = settings.S3_BLOB_DB_SETTINGS fsdb = TemporaryFilesystemBlobDB() assert get_blob_db() is fsdb, (get_blob_db(), fsdb) self.migrate_docs = docs = [] for i in range(self.test_size): doc = SavedBasicExport(configuration=_mk_config("config-%s" % i)) doc.save() doc.set_payload(("content %s" % i).encode('utf-8')) docs.append(doc) s3db = TemporaryS3BlobDB(config) self.db = TemporaryMigratingBlobDB(s3db, fsdb) assert get_blob_db() is self.db, (get_blob_db(), self.db) BaseMigrationTest.discard_migration_state(self.slug) def tearDown(self): self.db.close() BaseMigrationTest.discard_migration_state(self.slug) for doc in self.migrate_docs: doc.get_db().delete_doc(doc._id) def test_migrate_backend(self): # verify: attachment is in couch and migration not complete with maybe_not_found(): s3_blobs = sum(1 for b in self.db.new_db._s3_bucket().objects.all()) self.assertEqual(s3_blobs, 0) with tempdir() as tmp: filename = join(tmp, "file.txt") # do migration migrated, skipped = mod.MIGRATIONS[self.slug].migrate(filename) self.assertGreaterEqual(migrated, self.test_size) # verify: migration state recorded mod.BlobMigrationState.objects.get(slug=self.slug) # verify: migrated data was written to the file with open(filename) as fh: lines = list(fh) ids = {d._id for d in self.migrate_docs} migrated = {d["_id"] for d in (json.loads(x) for x in lines)} self.assertEqual(len(ids.intersection(migrated)), self.test_size) # verify: attachment was copied to new blob db for doc in self.migrate_docs: exp = SavedBasicExport.get(doc._id) self.assertEqual(exp._rev, doc._rev) # rev should not change self.assertTrue(doc.blobs) bucket = doc._blobdb_bucket() for meta in doc.blobs.values(): content = self.db.new_db.get(meta.id, bucket) self.assertEqual(len(content.read()), meta.content_length)
class TestMigrateBackend(TestCase): slug = "migrate_backend" def setUp(self): db1 = TemporaryFilesystemBlobDB() assert get_blob_db() is db1, (get_blob_db(), db1) data = b'binary data not valid utf-8 \xe4\x94' self.not_founds = set() self.blob_metas = [] for type_code in [CODES.form_xml, CODES.multimedia, CODES.data_export]: meta = db1.put(BytesIO(data), meta=new_meta(type_code=type_code)) lost = new_meta(type_code=type_code, content_length=42) self.blob_metas.append(meta) self.blob_metas.append(lost) lost.save() self.not_founds.add(( lost.id, lost.domain, lost.type_code, lost.parent_id, lost.key, )) self.test_size = len(self.blob_metas) db2 = TemporaryFilesystemBlobDB() self.db = TemporaryMigratingBlobDB(db2, db1) assert get_blob_db() is self.db, (get_blob_db(), self.db) discard_migration_state(self.slug) def tearDown(self): self.db.close() discard_migration_state(self.slug) for doc in self.blob_metas: doc.delete() def test_migrate_backend(self): # verify: migration not complete with maybe_not_found(): self.assertEqual(os.listdir(self.db.new_db.rootdir), []) with tempdir() as tmp: filename = join(tmp, "file.txt") # do migration migrated, skipped = mod.MIGRATIONS[self.slug]().migrate(filename, num_workers=2) self.assertGreaterEqual(migrated, self.test_size) verify_migration(self, self.slug, filename, self.not_founds) # verify: blobs were copied to new blob db not_found = set(t[0] for t in self.not_founds) for meta in self.blob_metas: if meta.id in not_found: with self.assertRaises(mod.NotFound): meta.open(self.db.new_db) continue content = meta.open(self.db.new_db) data = content.read() self.assertEqual(data, b'binary data not valid utf-8 \xe4\x94') self.assertEqual(len(data), meta.content_length)
class TestMigrateBackend(TestCase): slug = "migrate_backend" def setUp(self): db1 = TemporaryFilesystemBlobDB() assert get_blob_db() is db1, (get_blob_db(), db1) data = b'binary data not valid utf-8 \xe4\x94' self.not_founds = set() self.blob_metas = [] for type_code in [CODES.form_xml, CODES.multimedia, CODES.data_export]: meta = db1.put(BytesIO(data), meta=new_meta(type_code=type_code)) lost = new_meta(type_code=type_code, content_length=42) self.blob_metas.append(meta) self.blob_metas.append(lost) lost.save() self.not_founds.add(( lost.id, lost.domain, lost.type_code, lost.parent_id, lost.key, )) self.test_size = len(self.blob_metas) db2 = TemporaryFilesystemBlobDB() self.db = TemporaryMigratingBlobDB(db2, db1) assert get_blob_db() is self.db, (get_blob_db(), self.db) discard_migration_state(self.slug) def tearDown(self): self.db.close() discard_migration_state(self.slug) for doc in self.blob_metas: doc.delete() def test_migrate_backend(self): # verify: migration not complete with maybe_not_found(): self.assertEqual(os.listdir(self.db.new_db.rootdir), []) with tempdir() as tmp: filename = join(tmp, "file.txt") # do migration migrated, skipped = mod.MIGRATIONS[self.slug].migrate(filename, num_workers=2) self.assertGreaterEqual(migrated, self.test_size) # verify: migration state recorded mod.BlobMigrationState.objects.get(slug=self.slug) # verify: missing blobs written to log files missing_log = set() fields = [ "blobmeta_id", "domain", "type_code", "parent_id", "blob_key", ] with open(filename, encoding='utf-8') as fh: for line in fh: doc = json.loads(line) missing_log.add(tuple(doc[x] for x in fields)) self.assertEqual(self.not_founds, missing_log) # verify: blobs were copied to new blob db not_found = set(t[0] for t in self.not_founds) for meta in self.blob_metas: if meta.id in not_found: with self.assertRaises(mod.NotFound): self.db.new_db.get(key=meta.key) continue content = self.db.new_db.get(key=meta.key) data = content.read() self.assertEqual(data, b'binary data not valid utf-8 \xe4\x94') self.assertEqual(len(data), meta.content_length)