def testSchema2Id(self): root_id = createObjId("roots") group_id = createObjId("groups", rootid=root_id) dataset_id = createObjId("datasets", rootid=root_id) ctype_id = createObjId("datatypes", rootid=root_id) self.assertEqual(getCollectionForId(root_id), "groups") self.assertEqual(getCollectionForId(group_id), "groups") self.assertEqual(getCollectionForId(dataset_id), "datasets") self.assertEqual(getCollectionForId(ctype_id), "datatypes") chunk_id = 'c' + dataset_id[1:] + "_1_2" print(chunk_id) chunk_partition_id = 'c42-' + dataset_id[2:] + "_1_2" for id in (chunk_id, chunk_partition_id): try: getCollectionForId(id) self.assertTrue(False) except ValueError: pass # expected valid_ids = (group_id, dataset_id, ctype_id, chunk_id, chunk_partition_id, root_id) s3prefix = getS3Key(root_id) self.assertTrue(s3prefix.endswith("/.group.json")) s3prefix = s3prefix[:-(len(".group.json"))] for oid in valid_ids: print("oid:", oid) self.assertTrue(len(oid) >= 38) parts = oid.split('-') self.assertEqual(len(parts), 6) self.assertTrue(oid[0] in ('g', 'd', 't', 'c')) self.assertTrue(isSchema2Id(oid)) if oid == root_id: self.assertTrue(isRootObjId(oid)) else: self.assertFalse(isRootObjId(oid)) self.assertEqual(getRootObjId(oid), root_id) s3key = getS3Key(oid) print(s3key) self.assertTrue(s3key.startswith(s3prefix)) self.assertEqual(getObjId(s3key), oid) self.assertTrue(isS3ObjKey(s3key))
def testIsValidUuid(self): group_id = "g-314d61b8-9954-11e6-a733-3c15c2da029e" dataset_id = "d-4c48f3ae-9954-11e6-a3cd-3c15c2da029e" ctype_id = "t-8c785f1c-9953-11e6-9bc2-0242ac110005" chunk_id = "c-8c785f1c-9953-11e6-9bc2-0242ac110005_7_2" domain_id = "mybucket/bob/mydata.h5" valid_ids = (group_id, dataset_id, ctype_id, chunk_id, domain_id) bad_ids = ("g-1e76d862", "/bob/mydata.h5") self.assertTrue(isValidUuid(group_id)) self.assertFalse(isSchema2Id(group_id)) self.assertTrue(isValidUuid(group_id, obj_class="Group")) self.assertTrue(isValidUuid(group_id, obj_class="group")) self.assertTrue(isValidUuid(group_id, obj_class="groups")) self.assertTrue(isValidUuid(dataset_id, obj_class="datasets")) self.assertFalse(isSchema2Id(dataset_id)) self.assertTrue(isValidUuid(ctype_id, obj_class="datatypes")) self.assertFalse(isSchema2Id(ctype_id)) self.assertTrue(isValidUuid(chunk_id, obj_class="chunks")) self.assertFalse(isSchema2Id(chunk_id)) validateUuid(group_id) try: isRootObjId(group_id) self.assertTrue(False) except ValueError: # only works for v2 schema pass # expected for item in valid_ids: self.assertTrue(isObjId(item)) s3key = getS3Key(item) self.assertTrue(s3key[0] != '/') self.assertTrue(isS3ObjKey(s3key)) if item.find('/') > 0: continue # bucket name gets lost when domain ids get converted to s3keys objid = getObjId(s3key) self.assertEqual(objid, item) for item in bad_ids: self.assertFalse(isValidUuid(item)) self.assertFalse(isObjId(item))
async def getS3RootKeysCallback(app, s3keys): log.info(f"getS3RootKeysCallback, {len(s3keys)} items") if not isinstance(s3keys, list): log.error("expected list result for s3keys callback") raise ValueError("unexpected callback format") results = app["bucket_scan"] for s3key in s3keys: log.info(f"got key: {s3key}") if not s3key.startswith("db/") or s3key[-1] != '/': log.error(f"unexpected key for getS3RootKeysCallback: {s3key}") continue root_id = getObjId(s3key + ".group.json") log.info(f"root_id: {root_id}") results["root_count"] += 1 info_key = s3key + ".info.json" if app["scanRootKeys_update"]: log.info("updating...") await scanRoot(app, root_id, update=True) info_obj = None try: info_obj = await getStorJSONObj(app, info_key) except HTTPNotFound: pass # info.json not created yet except HTTPInternalServerError as ie: log.warn(f"error getting s3obj: {ie}") continue if info_obj: log.info(f"got obj: {info_obj}") results["info_count"] += 1 results["group_count"] += info_obj["num_groups"] results["dataset_count"] += len(info_obj["datasets"]) results["datatype_count"] += info_obj["num_datatypes"] results["chunk_count"] += info_obj["num_chunks"] results["allocated_bytes"] += info_obj["allocated_bytes"] results["metadata_bytes"] += info_obj["metadata_bytes"]