def test_get_by_index(self): index = ['some', 'index'] saved_export = SavedBasicExport(configuration=_mk_config(index=index)) saved_export.save() back = SavedBasicExport.by_index(index) self.assertEqual(1, len(back)) self.assertEqual(saved_export._id, back[0]._id)
def rebuild_export(config, schema, output_dir, last_access_cutoff=None, filter=None): if output_dir == "couch": saved = get_saved_export_and_delete_copies(config.index) if last_access_cutoff and saved and saved.last_accessed and \ saved.last_accessed < last_access_cutoff: # ignore exports that haven't been accessed since last_access_cutoff return try: files = schema.get_export_files(format=config.format, filter=filter) except SchemaMismatchException: # fire off a delayed force update to prevent this from happening again rebuild_schemas.delay(config.index) raise ExportRebuildError(u'Schema mismatch for {}. Rebuilding tables...'.format(config.filename)) with files: payload = files.file.payload if output_dir == "couch": if not saved: saved = SavedBasicExport(configuration=config) else: saved.configuration = config if saved.last_accessed is None: saved.last_accessed = datetime.utcnow() saved.last_updated = datetime.utcnow() saved.save() saved.set_payload(payload) else: with open(os.path.join(output_dir, config.filename), "wb") as f: f.write(payload)
def export_for_group(export_id_or_group, output_dir): if isinstance(export_id_or_group, basestring): try: config = GroupExportConfiguration.get(export_id_or_group) except ResourceNotFound: raise Exception("Couldn't find an export with id %s" % export_id_or_group) else: config = export_id_or_group for config, schema in config.all_exports: try: tmp, _ = schema.get_export_files(format=config.format) except SchemaMismatchException, e: # fire off a delayed force update to prevent this from happening again rebuild_schemas.delay(config.index) continue payload = Temp(tmp).payload if output_dir == "couch": saved = SavedBasicExport.view("couchexport/saved_exports", key=json.dumps(config.index), include_docs=True, reduce=False).one() if not saved: saved = SavedBasicExport(configuration=config) else: saved.configuration = config saved.last_updated = datetime.utcnow() saved.save() saved.set_payload(payload) else: with open(os.path.join(output_dir, config.filename), "wb") as f: f.write(payload)
def handle(self, *args, **options): if len(args) < 2: raise CommandError('Please specify %s.' % self.label) export_id = args[0] output_dir = args[1] try: config = GroupExportConfiguration.get(export_id) except ResourceNotFound: raise CommandError("Couldn't find an export with id %s" % export_id) for export_config in config.full_exports: print "exporting %s to %s" % (export_config.name, output_dir) # special case couch storage if output_dir == "couch": fd, path = tempfile.mkstemp() with os.fdopen(fd, 'wb') as f: export(export_config.index, f, format=export_config.format) # got the file, now rewrite it to couch saved = SavedBasicExport.view("couchexport/saved_exports", key=json.dumps(export_config.index), include_docs=True, reduce=False).one() if not saved: saved = SavedBasicExport(configuration=export_config) saved.save() with open(path, "rb") as f: saved.put_attachment(f.read(), export_config.filename) saved.last_updated = datetime.utcnow() saved.save() os.remove(path) else: with open(os.path.join(output_dir, export_config.filename), "wb") as f: export(export_config.index, f, format=export_config.format)
def test_file_save_and_load(self): payload = 'something small and simple' for name in ['normal', u'हिंदी', None]: saved = SavedBasicExport(configuration=_mk_config(name)) saved.save() saved.set_payload(payload) self.assertEqual(payload, saved.get_payload())
def testFileSaveAndLoad(self): def _mk_config(name): return ExportConfiguration(index="dummy_index", name=name, format="xlsx") payload = "something small and simple" for name in ["normal", u"हिंदी", None]: saved = SavedBasicExport(configuration=_mk_config(name)) saved.save() saved.set_payload(payload) self.assertEqual(payload, saved.get_payload())
def test_save_basic_export_to_blobdb(self): index = ['single'] saved_export = SavedBasicExport(configuration=_mk_config(index=index)) saved_export.save() saved_export.set_payload("content") name = saved_export.get_attachment_name() self.assertTrue(saved_export.has_file()) self.assertIn(name, saved_export.external_blobs) self.assertEqual(saved_export.size, 7) with saved_export.get_payload(stream=True) as fh: self.assertEqual(fh.read(), "content")
def test_migrate_happy_path(self): saved = SavedBasicExport(configuration=_mk_config()) saved.save() payload = b'binary data not valid utf-8 \xe4\x94' name = saved.get_attachment_name() super(BlobMixin, saved).put_attachment(payload, name) saved.save() self.do_migration([saved]) exp = SavedBasicExport.get(saved._id) self.assertEqual(exp.get_payload(), payload)
def test_get_saved_and_delete_copies_multiple(self): index = ['multiple'] # make three exports with the last one being the most recently updated timestamp = datetime.datetime.utcnow() for i in range(3): saved_export = SavedBasicExport(configuration=_mk_config(index=index), last_updated=timestamp + datetime.timedelta(days=i)) saved_export.save() self.assertEqual(3, len(SavedBasicExport.by_index(index))) chosen_one = get_saved_export_and_delete_copies(index) # this relies on the variable being set last in the loop which is a bit unintuitive self.assertEqual(saved_export._id, chosen_one._id) saved_after_deletion = SavedBasicExport.by_index(index) self.assertEqual(1, len(saved_after_deletion)) self.assertEqual(chosen_one._id, saved_after_deletion[0]._id)
def test_get_saved_and_delete_copies_multiple(self): index = ['multiple'] # make three exports with the last one being the most recently updated timestamp = datetime.datetime.utcnow() for i in range(3): saved_export = SavedBasicExport( configuration=_mk_config(index=index), last_updated=timestamp + datetime.timedelta(days=i)) saved_export.save() self.assertEqual(3, len(SavedBasicExport.by_index(index))) chosen_one = get_saved_export_and_delete_copies(index) # this relies on the variable being set last in the loop which is a bit unintuitive self.assertEqual(saved_export._id, chosen_one._id) saved_after_deletion = SavedBasicExport.by_index(index) self.assertEqual(1, len(saved_after_deletion)) self.assertEqual(chosen_one._id, saved_after_deletion[0]._id)
def setUp(self): with trap_extra_setup(AttributeError, msg="S3_BLOB_DB_SETTINGS not configured"): config = settings.S3_BLOB_DB_SETTINGS fsdb = TemporaryFilesystemBlobDB() assert get_blob_db() is fsdb, (get_blob_db(), fsdb) self.migrate_docs = docs = [] for i in range(self.test_size): doc = SavedBasicExport(configuration=_mk_config("config-%s" % i)) doc.save() doc.set_payload(("content %s" % i).encode('utf-8')) docs.append(doc) s3db = TemporaryS3BlobDB(config) self.db = TemporaryMigratingBlobDB(s3db, fsdb) assert get_blob_db() is self.db, (get_blob_db(), self.db) BaseMigrationTest.discard_migration_state(self.slug)
def _save_export_payload(files, saved_export, config, is_safe=False): payload = files.file.payload if not saved_export: saved_export = SavedBasicExport(configuration=config) else: saved_export.configuration = config saved_export.is_safe = is_safe if saved_export.last_accessed is None: saved_export.last_accessed = datetime.utcnow() saved_export.last_updated = datetime.utcnow() try: saved_export.save() except ResourceConflict: # task was executed concurrently, so let first to finish win and abort the rest pass else: saved_export.set_payload(payload)
def test_migrate_with_concurrent_modification(self): # setup data saved = SavedBasicExport(configuration=_mk_config()) saved.save() name = saved.get_attachment_name() new_payload = 'something new' old_payload = 'something old' super(BlobMixin, saved).put_attachment(old_payload, name) super(BlobMixin, saved).put_attachment(old_payload, "other") saved.save() # verify: attachments are in couch self.assertEqual(len(saved._attachments), 2) self.assertEqual(len(saved.external_blobs), 0) modified = [] print_status = mod.print_status # setup concurrent modification def modify_doc_and_print_status(num, total): if not modified: # do concurrent modification doc = SavedBasicExport.get(saved._id) doc.set_payload(new_payload) doc.save() modified.append(True) print_status(num, total) # hook print_status() call to simulate concurrent modification with replattr(mod, "print_status", modify_doc_and_print_status): # do migration migrated, skipped = mod.MIGRATIONS[self.slug].migrate() self.assertGreaterEqual(skipped, 1) # verify: migration state not set when docs are skipped with self.assertRaises(mod.BlobMigrationState.DoesNotExist): mod.BlobMigrationState.objects.get(slug=self.slug) # verify: attachments were not migrated exp = SavedBasicExport.get(saved._id) self.assertEqual(len(exp._attachments), 1, exp._attachments) self.assertEqual(len(exp.external_blobs), 1, exp.external_blobs) self.assertEqual(exp.get_payload(), new_payload) self.assertEqual(exp.fetch_attachment("other"), old_payload)
def test_migrate_with_concurrent_modification(self): saved = SavedBasicExport(configuration=_mk_config()) saved.save() name = saved.get_attachment_name() new_payload = 'something new' old_payload = 'something old' super(BlobMixin, saved).put_attachment(old_payload, name) super(BlobMixin, saved).put_attachment(old_payload, "other") saved.save() self.assertEqual(len(saved._attachments), 2) def modify(doc): doc = SavedBasicExport.get(doc._id) doc.set_payload(new_payload) doc.save() self.do_failed_migration({saved: (1, 1)}, modify) exp = SavedBasicExport.get(saved._id) self.assertEqual(exp.get_payload(), new_payload) self.assertEqual(exp.fetch_attachment("other"), old_payload)
def test_migrate_saved_exports(self): # setup data saved = SavedBasicExport(configuration=_mk_config()) saved.save() payload = 'something small and simple' name = saved.get_attachment_name() super(BlobMixin, saved).put_attachment(payload, name) saved.save() # verify: attachment is in couch and migration not complete self.assertEqual(len(saved._attachments), 1) self.assertEqual(len(saved.external_blobs), 0) with tempdir() as tmp, replattr(SavedBasicExport, "migrating_blobs_from_couch", True): filename = join(tmp, "file.txt") # do migration migrated, skipped = mod.MIGRATIONS[self.slug].migrate(filename) self.assertGreaterEqual(migrated, 1) # verify: migration state recorded mod.BlobMigrationState.objects.get(slug=self.slug) # verify: migrated data was written to the file with open(filename) as fh: lines = list(fh) doc = {d["_id"]: d for d in (json.loads(x) for x in lines)}[saved._id] self.assertEqual(doc["_rev"], saved._rev) self.assertEqual(len(lines), migrated, lines) # verify: attachment was moved to blob db exp = SavedBasicExport.get(saved._id) self.assertNotEqual(exp._rev, saved._rev) self.assertEqual(len(exp.blobs), 1, repr(exp.blobs)) self.assertFalse(exp._attachments, exp._attachments) self.assertEqual(len(exp.external_blobs), 1) self.assertEqual(exp.get_payload(), payload)
def test_get_saved_and_delete_copies_single(self): index = ['single'] saved_export = SavedBasicExport(configuration=_mk_config(index=index)) saved_export.save() self.assertEqual(saved_export._id, get_saved_export_and_delete_copies(index)._id)