예제 #1
0
    def testSchema2Id(self):
        root_id = createObjId("roots")
        group_id = createObjId("groups", rootid=root_id)
        dataset_id = createObjId("datasets", rootid=root_id)
        ctype_id = createObjId("datatypes", rootid=root_id)

        self.assertEqual(getCollectionForId(root_id), "groups")
        self.assertEqual(getCollectionForId(group_id), "groups")
        self.assertEqual(getCollectionForId(dataset_id), "datasets")
        self.assertEqual(getCollectionForId(ctype_id), "datatypes")
        chunk_id = 'c' + dataset_id[1:] + "_1_2"
        print(chunk_id)
        chunk_partition_id = 'c42-' + dataset_id[2:] + "_1_2"

        for id in (chunk_id, chunk_partition_id):
            try:
                getCollectionForId(id)
                self.assertTrue(False)
            except ValueError:
                pass  # expected
        valid_ids = (group_id, dataset_id, ctype_id, chunk_id,
                     chunk_partition_id, root_id)
        s3prefix = getS3Key(root_id)
        self.assertTrue(s3prefix.endswith("/.group.json"))
        s3prefix = s3prefix[:-(len(".group.json"))]
        for oid in valid_ids:
            print("oid:", oid)
            self.assertTrue(len(oid) >= 38)
            parts = oid.split('-')
            self.assertEqual(len(parts), 6)
            self.assertTrue(oid[0] in ('g', 'd', 't', 'c'))
            self.assertTrue(isSchema2Id(oid))
            if oid == root_id:
                self.assertTrue(isRootObjId(oid))
            else:
                self.assertFalse(isRootObjId(oid))
            self.assertEqual(getRootObjId(oid), root_id)

            s3key = getS3Key(oid)
            print(s3key)
            self.assertTrue(s3key.startswith(s3prefix))
            self.assertEqual(getObjId(s3key), oid)
            self.assertTrue(isS3ObjKey(s3key))
예제 #2
0
    def testIsValidUuid(self):
        group_id = "g-314d61b8-9954-11e6-a733-3c15c2da029e"
        dataset_id = "d-4c48f3ae-9954-11e6-a3cd-3c15c2da029e"
        ctype_id = "t-8c785f1c-9953-11e6-9bc2-0242ac110005"
        chunk_id = "c-8c785f1c-9953-11e6-9bc2-0242ac110005_7_2"
        domain_id = "mybucket/bob/mydata.h5"
        valid_ids = (group_id, dataset_id, ctype_id, chunk_id, domain_id)
        bad_ids = ("g-1e76d862", "/bob/mydata.h5")

        self.assertTrue(isValidUuid(group_id))
        self.assertFalse(isSchema2Id(group_id))
        self.assertTrue(isValidUuid(group_id, obj_class="Group"))
        self.assertTrue(isValidUuid(group_id, obj_class="group"))
        self.assertTrue(isValidUuid(group_id, obj_class="groups"))
        self.assertTrue(isValidUuid(dataset_id, obj_class="datasets"))
        self.assertFalse(isSchema2Id(dataset_id))
        self.assertTrue(isValidUuid(ctype_id, obj_class="datatypes"))
        self.assertFalse(isSchema2Id(ctype_id))
        self.assertTrue(isValidUuid(chunk_id, obj_class="chunks"))
        self.assertFalse(isSchema2Id(chunk_id))
        validateUuid(group_id)
        try:
            isRootObjId(group_id)
            self.assertTrue(False)
        except ValueError:
            # only works for v2 schema
            pass  # expected

        for item in valid_ids:
            self.assertTrue(isObjId(item))
            s3key = getS3Key(item)
            self.assertTrue(s3key[0] != '/')
            self.assertTrue(isS3ObjKey(s3key))
            if item.find('/') > 0:
                continue  # bucket name gets lost when domain ids get converted to s3keys
            objid = getObjId(s3key)
            self.assertEqual(objid, item)
        for item in bad_ids:
            self.assertFalse(isValidUuid(item))
            self.assertFalse(isObjId(item))
예제 #3
0
async def getS3RootKeysCallback(app, s3keys):
    log.info(f"getS3RootKeysCallback, {len(s3keys)} items")
    if not isinstance(s3keys, list):
        log.error("expected list result for s3keys callback")
        raise ValueError("unexpected callback format")
    results = app["bucket_scan"]

    for s3key in s3keys:
        log.info(f"got key: {s3key}")
        if not s3key.startswith("db/") or s3key[-1] != '/':
            log.error(f"unexpected key for getS3RootKeysCallback: {s3key}")
            continue
        root_id = getObjId(s3key + ".group.json")
        log.info(f"root_id: {root_id}")
        results["root_count"] += 1

        info_key = s3key + ".info.json"

        if app["scanRootKeys_update"]:
            log.info("updating...")
            await scanRoot(app, root_id, update=True)

        info_obj = None
        try:
            info_obj = await getStorJSONObj(app, info_key)
        except HTTPNotFound:
            pass  # info.json not created yet
        except HTTPInternalServerError as ie:
            log.warn(f"error getting s3obj: {ie}")
            continue

        if info_obj:
            log.info(f"got obj: {info_obj}")
            results["info_count"] += 1
            results["group_count"] += info_obj["num_groups"]
            results["dataset_count"] += len(info_obj["datasets"])
            results["datatype_count"] += info_obj["num_datatypes"]
            results["chunk_count"] += info_obj["num_chunks"]
            results["allocated_bytes"] += info_obj["allocated_bytes"]
            results["metadata_bytes"] += info_obj["metadata_bytes"]