Example #1
0
    def test_migrate_backend(self):
        # verify: attachment is in couch and migration not complete
        with maybe_not_found():
            s3_blobs = sum(1 for b in self.db.new_db._s3_bucket().objects.all())
            self.assertEqual(s3_blobs, 0)

        with tempdir() as tmp:
            filename = join(tmp, "file.txt")

            # do migration
            migrated, skipped = mod.MIGRATIONS[self.slug].migrate(filename)
            self.assertGreaterEqual(migrated, self.test_size)

            # verify: migration state recorded
            mod.BlobMigrationState.objects.get(slug=self.slug)

            # verify: migrated data was written to the file
            with open(filename) as fh:
                lines = list(fh)
            ids = {d._id for d in self.migrate_docs}
            migrated = {d["_id"] for d in (json.loads(x) for x in lines)}
            self.assertEqual(len(ids.intersection(migrated)), self.test_size)

        # verify: attachment was copied to new blob db
        for doc in self.migrate_docs:
            exp = SavedBasicExport.get(doc._id)
            self.assertEqual(exp._rev, doc._rev)  # rev should not change
            self.assertTrue(doc.blobs)
            bucket = doc._blobdb_bucket()
            for meta in doc.blobs.values():
                content = self.db.new_db.get(meta.id, bucket)
                self.assertEqual(len(content.read()), meta.content_length)
    def test_migrate_backend(self):
        # verify: attachment is in couch and migration not complete
        with maybe_not_found():
            s3_blobs = sum(1
                           for b in self.db.new_db._s3_bucket().objects.all())
            self.assertEqual(s3_blobs, 0)

        with tempdir() as tmp:
            filename = join(tmp, "file.txt")

            # do migration
            migrated, skipped = mod.MIGRATIONS[self.slug].migrate(filename)
            self.assertGreaterEqual(migrated, self.test_size)

            # verify: migration state recorded
            mod.BlobMigrationState.objects.get(slug=self.slug)

            # verify: migrated data was written to the file
            with open(filename) as fh:
                lines = list(fh)
            ids = {d._id for d in self.migrate_docs}
            migrated = {d["_id"] for d in (json.loads(x) for x in lines)}
            self.assertEqual(len(ids.intersection(migrated)), self.test_size)

        # verify: attachment was copied to new blob db
        for doc in self.migrate_docs:
            exp = SavedBasicExport.get(doc._id)
            self.assertEqual(exp._rev, doc._rev)  # rev should not change
            self.assertTrue(doc.blobs)
            bucket = doc._blobdb_bucket()
            for meta in doc.blobs.values():
                content = self.db.new_db.get(meta.id, bucket)
                self.assertEqual(len(content.read()), meta.content_length)
Example #3
0
    def test_migrate_backend(self):
        # verify: migration not complete
        with maybe_not_found():
            self.assertEqual(os.listdir(self.db.new_db.rootdir), [])

        with tempdir() as tmp:
            filename = join(tmp, "file.txt")

            # do migration
            migrated, skipped = mod.MIGRATIONS[self.slug]().migrate(filename, num_workers=2)
            self.assertGreaterEqual(migrated, self.test_size)

            verify_migration(self, self.slug, filename, self.not_founds)

        # verify: blobs were copied to new blob db
        not_found = set(t[0] for t in self.not_founds)
        for meta in self.blob_metas:
            if meta.id in not_found:
                with self.assertRaises(mod.NotFound):
                    meta.open(self.db.new_db)
                continue
            content = meta.open(self.db.new_db)
            data = content.read()
            self.assertEqual(data, b'binary data not valid utf-8 \xe4\x94')
            self.assertEqual(len(data), meta.content_length)
Example #4
0
    def test_migrate_backend(self):
        # verify: migration not complete
        with maybe_not_found():
            self.assertEqual(os.listdir(self.db.new_db.rootdir), [])

        with tempdir() as tmp:
            filename = join(tmp, "file.txt")

            # do migration
            migrated, skipped = mod.MIGRATIONS[self.slug].migrate(
                filename, num_workers=2)
            self.assertGreaterEqual(migrated, self.test_size)

            # verify: migration state recorded
            mod.BlobMigrationState.objects.get(slug=self.slug)

            # verify: missing blobs written to log files
            missing_log = set()
            fields = [
                "blobmeta_id",
                "domain",
                "type_code",
                "parent_id",
                "blob_key",
            ]
            with open(filename, encoding='utf-8') as fh:
                for line in fh:
                    doc = json.loads(line)
                    missing_log.add(tuple(doc[x] for x in fields))
            self.assertEqual(self.not_founds, missing_log)

        # verify: blobs were copied to new blob db
        not_found = set(t[0] for t in self.not_founds)
        for meta in self.blob_metas:
            if meta.id in not_found:
                with self.assertRaises(mod.NotFound):
                    self.db.new_db.get(key=meta.key)
                continue
            content = self.db.new_db.get(key=meta.key)
            data = content.read()
            self.assertEqual(data, b'binary data not valid utf-8 \xe4\x94')
            self.assertEqual(len(data), meta.content_length)
Example #5
0
    def test_migrate_backend(self):
        # verify: migration not complete
        with maybe_not_found():
            self.assertEqual(os.listdir(self.db.new_db.rootdir), [])

        with tempdir() as tmp:
            filename = join(tmp, "file.txt")

            # do migration
            migrated, skipped = mod.MIGRATIONS[self.slug].migrate(filename)
            self.assertGreaterEqual(migrated, self.test_size)

            # verify: migration state recorded
            mod.BlobMigrationState.objects.get(slug=self.slug)

            # verify: missing blobs written to log files
            missing_log = set()
            fields = ["doc_type", "doc_id", "blob_identifier", "blob_bucket"]
            for n, ignore in enumerate(mod.MIGRATIONS[self.slug].migrators):
                with open("{}.{}".format(filename, n)) as fh:
                    for line in fh:
                        doc = json.loads(line)
                        missing_log.add(tuple(doc[x] for x in fields))
            self.assertEqual(
                len(self.not_founds.intersection(missing_log)),
                len(self.not_founds)
            )

        # verify: couch attachments were copied to new blob db
        for doc in self.couch_docs:
            exp = type(doc).get(doc._id)
            self.assertEqual(exp._rev, doc._rev)  # rev should not change
            self.assertTrue(doc.blobs)
            bucket = doc._blobdb_bucket()
            for name, meta in doc.blobs.items():
                if name == "found.not":
                    continue
                content = self.db.new_db.get(meta.id, bucket)
                data = content.read()
                self.assertEqual(data, b'binary data not valid utf-8 \xe4\x94')
                self.assertEqual(len(data), meta.content_length)
Example #6
0
    def test_migrate_backend(self):
        # verify: migration not complete
        with maybe_not_found():
            self.assertEqual(os.listdir(self.db.new_db.rootdir), [])

        with tempdir() as tmp:
            filename = join(tmp, "file.txt")

            # do migration
            migrated, skipped = mod.MIGRATIONS[self.slug].migrate(filename, num_workers=2)
            self.assertGreaterEqual(migrated, self.test_size)

            # verify: migration state recorded
            mod.BlobMigrationState.objects.get(slug=self.slug)

            # verify: missing blobs written to log files
            missing_log = set()
            fields = [
                "blobmeta_id",
                "domain",
                "type_code",
                "parent_id",
                "blob_key",
            ]
            with open(filename, encoding='utf-8') as fh:
                for line in fh:
                    doc = json.loads(line)
                    missing_log.add(tuple(doc[x] for x in fields))
            self.assertEqual(self.not_founds, missing_log)

        # verify: blobs were copied to new blob db
        not_found = set(t[0] for t in self.not_founds)
        for meta in self.blob_metas:
            if meta.id in not_found:
                with self.assertRaises(mod.NotFound):
                    self.db.new_db.get(key=meta.key)
                continue
            content = self.db.new_db.get(key=meta.key)
            data = content.read()
            self.assertEqual(data, b'binary data not valid utf-8 \xe4\x94')
            self.assertEqual(len(data), meta.content_length)
Example #7
0
 def listdir(self):
     summaries = self.s3_bucket.objects.filter(Prefix=self.path + "/")
     with maybe_not_found():
         return [o.key for o in summaries]
     return []
Example #8
0
 def exists(self):
     with maybe_not_found():
         self.s3_bucket.Object(self.path).load()
         return True
     return False
Example #9
0
 def listdir(self):
     summaries = self.s3_bucket.objects.filter(Prefix="/")
     with maybe_not_found():
         return [o.key for o in summaries]
     return []
Example #10
0
 def exists(self):
     with maybe_not_found():
         self.s3_bucket.Object(self.key).load()
         return True
     return False