def setUp(self): db1 = TemporaryFilesystemBlobDB() assert get_blob_db() is db1, (get_blob_db(), db1) data = b'binary data not valid utf-8 \xe4\x94' self.not_founds = set() self.blob_metas = [] for type_code in [CODES.form_xml, CODES.multimedia, CODES.data_export]: meta = db1.put(BytesIO(data), meta=new_meta(type_code=type_code)) lost = new_meta(type_code=type_code, content_length=42) self.blob_metas.append(meta) self.blob_metas.append(lost) lost.save() self.not_founds.add(( lost.id, lost.domain, lost.type_code, lost.parent_id, lost.key, )) self.test_size = len(self.blob_metas) db2 = TemporaryFilesystemBlobDB() self.db = TemporaryMigratingBlobDB(db2, db1) assert get_blob_db() is self.db, (get_blob_db(), self.db) discard_migration_state(self.slug)
def setUp(self): self.db = TemporaryS3BlobDB(settings.S3_BLOB_DB_SETTINGS) assert get_blob_db() is self.db, (get_blob_db(), self.db) data = b'binary data not valid utf-8 \xe4\x94' self.blob_metas, self.not_founds = [], set() for domain, type_code in (('a', CODES.form_xml), ('a', CODES.application), ('b', CODES.form_xml)): self.blob_metas.append( self.db.put(BytesIO(data), meta=new_meta(domain=domain, type_code=type_code))) lost = new_meta(domain=domain, type_code=CODES.form_xml, content_length=42) lost.save() self.blob_metas.append(lost) self.not_founds.add(( lost.id, lost.domain, lost.type_code, lost.parent_id, lost.key, )) discard_migration_state(self.slug) discard_migration_state(self.slug, domain='a')
def test_get_for_parent_with_type_code(self): m1 = self.db.put(BytesIO(b"fx"), meta=new_meta(type_code=CODES.form_xml)) m2 = self.db.put(BytesIO(b"cx"), meta=new_meta(type_code=CODES.multimedia)) self.assertEqual(m1.parent_id, m2.parent_id) items = self.db.metadb.get_for_parent(m1.parent_id, CODES.form_xml) self.assertEqual([x.key for x in items], [m1.key])
def test_put_from_other_s3_db(self): # cleanup will be done by self.db db2 = S3BlobDB(settings.S3_BLOB_DB_SETTINGS) meta = self.db.put(BytesIO(b"content"), meta=new_meta()) with self.db.get(meta.key) as blob: meta2 = db2.put(blob, meta=new_meta()) self.assertEqual(meta2.content_length, meta.content_length) with db2.get(meta2.key) as blob2: self.assertEqual(blob2.read(), b"content")
def test_extends(self): # First export file ... for blob in (b'ham', b'spam', b'eggs'): meta_meta = new_meta( domain=self.domain_name, type_code=CODES.multimedia, ) meta = self.db.put(BytesIO(blob), meta=meta_meta) # Naming ftw self.blob_metas.append(meta) with NamedTemporaryFile() as file_one: exporter = EXPORTERS['all_blobs'](self.domain_name) exporter.migrate(file_one.name, force=True) with tarfile.open(file_one.name, 'r:gz') as tgzfile: keys_in_file_one = set(m.key for m in self.blob_metas[-3:]) self.assertEqual(set(tgzfile.getnames()), keys_in_file_one) # Second export file extends first ... for blob in (b'foo', b'bar', b'baz'): meta_meta = new_meta( domain=self.domain_name, type_code=CODES.multimedia, ) meta = self.db.put(BytesIO(blob), meta=meta_meta) self.blob_metas.append(meta) with NamedTemporaryFile() as file_two: exporter = EXPORTERS['all_blobs'](self.domain_name) exporter.migrate( file_two.name, already_exported=keys_in_file_one, force=True, ) with tarfile.open(file_two.name, 'r:gz') as tgzfile: keys_in_file_two = set(m.key for m in self.blob_metas[-3:]) self.assertEqual(set(tgzfile.getnames()), keys_in_file_two) # Third export file extends first and second ... for blob in (b'wibble', b'wobble', b'wubble'): meta_meta = new_meta( domain=self.domain_name, type_code=CODES.multimedia, ) meta = self.db.put(BytesIO(blob), meta=meta_meta) self.blob_metas.append(meta) with NamedTemporaryFile() as file_three: exporter = EXPORTERS['all_blobs'](self.domain_name) exporter.migrate( file_three.name, already_exported=keys_in_file_one | keys_in_file_two, force=True, ) with tarfile.open(file_three.name, 'r:gz') as tgzfile: keys_in_file_three = set(m.key for m in self.blob_metas[-3:]) self.assertEqual(set(tgzfile.getnames()), keys_in_file_three)
def test_reparent(self): metadb = self.db.metadb self.db.put(BytesIO(b"content"), meta=new_meta(parent_id="no-change")) metas = [] for name in "abc": meta = new_meta(parent_id="old", name=name) metas.append(self.db.put(BytesIO(b"content"), meta=meta)) a, b, c = metas new_parent = new_id_in_same_dbalias("old") metadb.reparent("old", new_parent) self.assertEqual(metadb.get_for_parent("old"), []) self.assertEqual( [m.id for m in metadb.get_for_parent(new_parent)], [m.id for m in metas], ) self.assertEqual(len(metadb.get_for_parent("no-change")), 1)
def test_delete_no_args(self): meta = self.db.put(BytesIO(b"content"), meta=new_meta()) with self.assertRaises(TypeError): self.db.delete() with self.db.get(key=meta.key) as fh: self.assertEqual(fh.read(), b"content") self.assertTrue(self.db.delete(key=meta.key))
def test_get_by_key(self): meta = self.db.put(BytesIO(b"cx"), meta=new_meta()) copy = self.db.metadb.get( parent_id=meta.parent_id, key=meta.key ) self.assertEqual(copy.key, meta.key)
def test_expire(self): meta = self.db.put(BytesIO(b"content"), meta=new_meta()) self.assertIsNone(meta.expires_on) with AtomicBlobs(self.db) as db: db.expire(meta.parent_id, key=meta.key) meta = db.metadb.get(parent_id=meta.parent_id, key=meta.key) self.assertGreater(meta.expires_on, datetime.utcnow())
def test_get_missing_name(self): meta = self.db.put(BytesIO(b"cx"), meta=new_meta()) with self.assertRaises(TypeError): self.db.metadb.get( parent_id=meta.parent_id, type_code=meta.type_code, )
def test_delete_from_both_fs_and_s3(self): meta = self.fsdb.put(BytesIO(b"content"), meta=new_meta()) with self.fsdb.get(key=meta.key) as content: self.db.copy_blob(content, key=meta.key) self.assertTrue(self.db.delete(key=meta.key)) with self.assertRaises(mod.NotFound): self.db.get(key=meta.key)
def test_delete_failed(self): meta = self.db.put(BytesIO(b"content"), meta=new_meta()) with self.assertRaises(Boom), AtomicBlobs(self.db) as db: db.delete(key=meta.key) raise Boom() with self.db.get(key=meta.key) as fh: self.assertEqual(fh.read(), b"content")
def setUpClass(cls): super(TestBlobStream, cls).setUpClass() with trap_extra_setup(AttributeError, msg="S3_BLOB_DB_SETTINGS not configured"): config = settings.S3_BLOB_DB_SETTINGS cls.db = TemporaryS3BlobDB(config) cls.meta = cls.db.put(BytesIO(b"bytes"), meta=new_meta())
def test_delete_failed(self): meta = self.db.put(BytesIO(b"content"), meta=new_meta()) with self.assertRaises(Boom), AtomicBlobs(self.db) as db: db.delete(key=meta.key) raise Boom() with self.db.get(meta=meta) as fh: self.assertEqual(fh.read(), b"content")
def test_get(self): meta = self.db.put(BytesIO(b"cx"), meta=new_meta()) copy = self.db.metadb.get( parent_id=meta.parent_id, type_code=meta.type_code, name="", ) self.assertEqual(copy.key, meta.key)
def test_put_and_size(self): identifier = new_meta() with patch_datadog() as stats: meta = self.db.put(BytesIO(b"content"), meta=identifier) size = len(b'content') self.assertEqual(sum(s for s in stats["commcare.blobs.added.count"]), 1) self.assertEqual(sum(s for s in stats["commcare.blobs.added.bytes"]), size) self.assertEqual(self.db.size(key=meta.key), size)
def test_save_on_put(self): meta = new_meta() self.assertEqual(meta.id, None) self.db.put(BytesIO(b"content"), meta=meta) self.assertTrue(meta.id) saved = get_meta(meta) self.assertTrue(saved is not meta) self.assertEqual(saved.key, meta.key)
def test_save_empty_properties(self): meta = new_meta() self.assertEqual(meta.properties, {}) self.db.put(BytesIO(b"content"), meta=meta) self.assertEqual(get_meta(meta).properties, {}) query = BlobMeta.objects.partitioned_query(meta.parent_id) results = query.filter(id=meta.id).values_list('id', 'properties') self.assertEqual(list(results), [(meta.id, None)])
def test_bulk_delete_temporary_metadata(self): exp = datetime.utcnow() + timedelta(seconds=30) meta = self.db.put(BytesIO(b"content"), meta=new_meta(expires_on=exp)) self.db.bulk_delete(metas=[meta]) with self.assertRaises(BlobMeta.DoesNotExist): get_meta(meta) with self.assertRaises(DeletedBlobMeta.DoesNotExist): get_meta(meta, deleted=True)
def test_get_extra_arg(self): meta = self.db.put(BytesIO(b"cx"), meta=new_meta()) with self.assertRaises(TypeError): self.db.metadb.get( parent_id=meta.parent_id, type_code=meta.type_code, name="", domain="test", )
def test_bulk_delete_permanent_metadata(self): meta = self.db.put(BytesIO(b"content"), meta=new_meta()) now = datetime.utcnow() with patch('corehq.blobs.metadata._utcnow', return_value=now): self.db.bulk_delete(metas=[meta]) with self.assertRaises(BlobMeta.DoesNotExist): get_meta(meta) deleted = get_meta(meta, deleted=True) self.assertEqual(deleted.deleted_on, now)
def setUpClass(cls): super().setUpClass() cls.db = TemporaryFilesystemBlobDB() assert get_blob_db() is cls.db, (get_blob_db(), cls.db) data = b'binary data not valid utf-8 \xe4\x94' cls.blob_metas = [] cls.not_found = set() cls.domain_name = str(uuid.uuid4) for type_code in [CODES.form_xml, CODES.multimedia, CODES.data_export]: for domain in (cls.domain_name, str(uuid.uuid4())): meta = cls.db.put(BytesIO(data), meta=new_meta(domain=domain, type_code=type_code)) lost = new_meta(domain=domain, type_code=type_code, content_length=42) cls.blob_metas.append(meta) cls.blob_metas.append(lost) lost.save() cls.not_found.add(lost.key)
def test_expose_blob_download(self): ref = expose_blob_download( self.identifier, expiry=60, content_disposition='text/xml', ) self.db.put(BytesIO(b'content'), meta=new_meta(key=ref.download_id)) response = BlobDownload.get(ref.download_id).toHttpResponse() self.assertEqual(next(response.streaming_content), b'content')
def test_delete_permanent_metadata(self): early = datetime.utcnow() - timedelta(minutes=5) meta = self.db.put(BytesIO(b"content"), meta=new_meta(created_on=early)) self.db.delete(key=meta.key) with self.assertRaises(BlobMeta.DoesNotExist): get_meta(meta) deleted = get_meta(meta, deleted=True) self.assertFalse(deleted.deleted_on is None) self.assertGreaterEqual(deleted.deleted_on, meta.created_on) self.assertLessEqual(deleted.deleted_on, datetime.utcnow())
def test_user_auth_required_access_denied(self): ref = expose_blob_download('identifier', expiry=60, content_disposition='text/xml', owner_ids=['foo']) self.db.put(BytesIO(b'content'), meta=new_meta(key=ref.download_id)) response = self.client.get( reverse('retrieve_download', args=[ref.download_id]) + "?get_file") self.assertEqual(response.status_code, 403)
def test_copy_blob_masks_old_blob(self): content = BytesIO(b"fs content") meta = self.fsdb.put(content, meta=new_meta()) content.seek(0) self.db.copy_blob(content, key=meta.key) self.assertEndsWith(self.fsdb.get_path(key=meta.key), "/" + meta.key) with replattr(self.fsdb, "get", blow_up, sigcheck=False): with self.assertRaises(Boom): self.fsdb.get(key=meta.key) with self.db.get(key=meta.key) as fh: self.assertEqual(fh.read(), b"fs content")
def test_put_and_size(self): identifier = new_meta() with capture_metrics() as metrics: meta = self.db.put(BytesIO(b"content"), meta=identifier) size = len(b'content') self.assertEqual( metrics.sum('commcare.blobs.added.count', type='form_xml'), 1) self.assertEqual( metrics.sum('commcare.blobs.added.bytes', type='form_xml'), size) self.assertEqual(self.db.size(key=meta.key), size)
def test_save_empty_properties(self): meta = new_meta() self.assertEqual(meta.properties, {}) self.db.put(BytesIO(b"content"), meta=meta) self.assertEqual(get_meta(meta).properties, {}) dbname = get_db_alias_for_partitioned_doc(meta.parent_id) with connections[dbname].cursor() as cursor: cursor.execute( "SELECT id, properties FROM blobs_blobmeta WHERE id = %s", [meta.id], ) self.assertEqual(cursor.fetchall(), [(meta.id, None)])
def test_bulk_delete(self): metas = [] for name in "abc": meta = new_meta(parent_id="parent", name=name) meta.content_length = 0 metas.append(meta) self.db.metadb.put(meta) a, b, c = metas self.db.metadb.bulk_delete([a, b]) for meta in [a, b]: with self.assertRaises(BlobMeta.DoesNotExist): get_meta(meta) get_meta(c) # should not have been deleted
def test_bulk_delete(self): metas = [ self.db.put(BytesIO("content-{}".format(key).encode('utf-8')), meta=new_meta()) for key in ['test.5', 'test.6'] ] with capture_metrics() as metrics: self.assertTrue(self.db.bulk_delete(metas=metas), 'delete failed') self.assertEqual(metrics.sum("commcare.blobs.deleted.count"), 2) self.assertEqual(metrics.sum("commcare.blobs.deleted.bytes"), 28) for meta in metas: with self.assertRaises(mod.NotFound): self.db.get(key=meta.key) return metas
def test_bulk_delete(self): metas = [ self.db.put(BytesIO("content-{}".format(key).encode('utf-8')), meta=new_meta()) for key in ['test.5', 'test.6'] ] with patch_datadog() as stats: self.assertTrue(self.db.bulk_delete(metas=metas), 'delete failed') self.assertEqual(sum(s for s in stats["commcare.blobs.deleted.count"]), 2) self.assertEqual(sum(s for s in stats["commcare.blobs.deleted.bytes"]), 28) for meta in metas: with self.assertRaises(mod.NotFound): self.db.get(key=meta.key) return metas
def test_1_very_big_blob(self): number_of_1mb_blocks = ceil(self.memory / 1024**2) + 1 meta = self.db.put(MockBigBlobIO(self.mb_blocks(), number_of_1mb_blocks), meta=new_meta(domain=self.domain_name, type_code=CODES.multimedia)) self.blob_metas.append(meta) with NamedTemporaryFile() as out: exporter = EXPORTERS['all_blobs'](self.domain_name) exporter.migrate(out.name, force=True) with tarfile.open(out.name, 'r:gz') as tgzfile: self.assertEqual(set(tgzfile.getnames()), {m.key for m in self.blob_metas})
def iter_keys(parent_id, name, code): args = { "parent_id": parent_id, "type_code": (CODES.form_xml if "badcode" in action else code), "name": name, "key": parent_id + "-" + name, "content_length": 2, } if "dup" not in action: args["created_on"] = RECEIVED_ON meta = new_meta(**args) yield parent_id, code, name deprecated = "deprecated" in action if deprecated: form_id = get_new_id(parent_id) yield form_id, code, name else: form_id = parent_id attach(meta, form_id, orig_id=(parent_id if deprecated else None)) meta_count = 1 if action != "normal": meta = self.db.put(BytesIO(b"cx"), meta=meta) meta_count += 1 if "old" in action: assert deprecated, action attach(meta, parent_id, deprecated_form_id=form_id) if "dup" in action: meta_count += 1 else: assert "badcode" not in action, action meta.delete() if "x3" in action: meta_count += 1 third_id = get_new_id(form_id) attach(meta, third_id, deprecated_form_id=form_id) yield third_id, code, name db = get_db_alias_for_partitioned_doc(parent_id) metas = ( list(BlobMeta.objects.using(db).filter(key=meta.key)) + list(get_form_attachment_blob_metas_by_key(meta.key, db)) ) assert len(metas) == meta_count, (metas, action, meta_count)
def test_open(self): meta = self.db.put(BytesIO(b"content"), meta=new_meta()) with meta.open() as fh: self.assertEqual(fh.read(), b"content")
def test_is_image(self, content_type, result): meta = new_meta(content_type=content_type) self.assertEqual(meta.is_image, result)
def tearDown(self): # new_meta always uses the same parent_id by default metas = self.db.metadb.get_for_parent(new_meta().parent_id) self.db.bulk_delete(metas=metas) super(TestPartitionedMetaDB, self).tearDown()
def test_fall_back_to_fsdb(self): meta = self.fsdb.put(BytesIO(b"content"), meta=new_meta()) with self.db.get(key=meta.key) as fh: self.assertEqual(fh.read(), b"content")
def test_put(self): with AtomicBlobs(self.db) as db: meta = db.put(BytesIO(b"content"), meta=new_meta()) with self.db.get(key=meta.key) as fh: self.assertEqual(fh.read(), b"content")
def test_save_properties(self): meta = new_meta(properties={"mood": "Vangelis"}) self.db.put(BytesIO(b"content"), meta=meta) self.assertEqual(get_meta(meta).properties, {"mood": "Vangelis"})
def test_put_failed(self): with self.assertRaises(Boom), AtomicBlobs(self.db) as db: meta = db.put(BytesIO(b"content"), meta=new_meta()) raise Boom() with self.assertRaises(NotFound): self.db.get(key=meta.key)
def test_delete(self): meta = new_meta() self.db.put(BytesIO(b"content"), meta=meta) self.db.delete(key=meta.key) with self.assertRaises(BlobMeta.DoesNotExist): get_meta(meta)
def test_delete_missing_meta(self): meta = new_meta() self.assertFalse(self.db.exists(key=meta.key)) # delete should not raise self.db.metadb.delete(meta.key, 0)
def test_delete(self): meta = self.db.put(BytesIO(b"content"), meta=new_meta()) with AtomicBlobs(self.db) as db: db.delete(key=meta.key) with self.assertRaises(NotFound): self.db.get(key=meta.key)
def test_put_outside_context(self): with AtomicBlobs(self.db) as db: pass with self.assertRaises(InvalidContext): db.put(BytesIO(b"content"), meta=new_meta())