async def bucketCheck(app): """ Verify that contents of bucket are self-consistent """ now = int(time.time()) log.info("bucket check {}".format(unixTimeToUTC(now))) # do initial listKeys await listKeys(app) # clear used flags clearUsedFlags(app) # mark objs await markObjs(app) unlinked_count = 0 s3objs = app["s3objs"] for objid in s3objs: if isValidUuid(objid) and not isValidChunkId(objid): try: s3obj = await getS3Obj(app, objid) if s3obj.used is False: unlinked_count += 1 except HTTPInternalServerError as hpe: log.warn("got error retreiving {}: {}".format(objid, hpe.code)) domains = app["domains"] for domain in domains: print("domain:", domain) roots = app["roots"] for root in roots: print("root:", root) top_level_domains = [] for domain in domains: if domain[0] != '/': log.error("unexpected domain: {}".format(domain)) continue if domain[1:].find('/') == -1: top_level_domains.append(domain) print("top-level-domains:") for domain in top_level_domains: print(domain) print("=" * 80) print("total storage: {}".format(app["bytes_in_bucket"])) print("Num objects: {}".format(len(app["s3objs"]))) print("Num domains: {}".format(len(app["domains"]))) print("Num root groups: {}".format(len(app["roots"]))) print("Unlinked objects: {}".format(unlinked_count))
async def createGroup(): """ create a new group and link it to the parent group with link name of group name """ client = globals["client"] domain = globals["domain"] params = {"host": domain} base_req = getEndpoint() headers = getRequestHeaders() # create a new group req = base_req + "/groups" log.info("POST:" + req) globals["grp_request_count"] += 1 group_name = globals["grp_request_count"] timeout = config.get("timeout") async with client.post(req, headers=headers, timeout=timeout, params=params) as rsp: if rsp.status != 201: log.error("POST {} failed with status: {}, rsp: {}".format(req, rsp.status, str(rsp))) globals["grp_failed_posts"] += 1 raise HttpProcessingError(code=rsp.status, message="Unexpected error") else: globals["group_count"] += 1 log.info("group_count: {}".format(globals["group_count"])) group_json = await rsp.json() group_id = group_json["id"] # link group to parent root_id = globals["root"] group_name = "group_{}".format(group_name) req = base_req + "/groups/" + root_id + "/links/" + group_name data = {"id": group_id } log.info("PUT " + req) globals["lnk_request_count"] += 1 async with client.put(req, data=json.dumps(data), headers=headers, timeout=timeout, params=params) as rsp: if rsp.status == 409: # another task has created this link already log.warn("got 409 in request: " + req) elif rsp.status != 201: globals["lnk_failed_posts"] += 1 log.error("got http error: {} for request: {}, rsp: {}".format(rsp.status, req, rsp)) raise HttpProcessingError(code=rsp.status, message="Unexpected error") else: link_created = True return group_id
async def checkDataset(app, dset_key): log.info(f"checkDataset for key: {dset_key}") dset_json = await getStorJSONObj(app, dset_key) dset_id = dset_json["id"] prefix_old = app["prefix_old"] prefix_new = app["prefix_new"] do_update = app["do_update"] indirect_dataset_keys = app["indirect_dataset_keys"] app["dataset_count"] += 1 log.info(f"checkDataset for id: {dset_id}") if "layout" not in dset_json: log.info("no layout found") return layout_json = dset_json["layout"] if "class" not in layout_json: log.warn(f"no class found in layout for id: {dset_id}") return layout_class = layout_json["class"] log.info(f"got layout_class: {layout_class}") if layout_class in ('H5D_CONTIGUOUS_REF', 'H5D_CHUNKED_REF'): if "file_uri" not in layout_json: log.warn( f"Expected to find key 'file_uri' in layout_json for id: {dset_id}" ) return file_uri = layout_json["file_uri"] if file_uri.startswith(prefix_old): new_file_uri = prefix_new + file_uri[len(prefix_old):] log.info(f"replacing uri: {file_uri} with {new_file_uri}") app["matched_dset_uri"] += 1 if do_update: # update the dataset json layout_json["file_uri"] = new_file_uri dset_json["layout"] = layout_json # write back to storage try: await putStorJSONObj(app, dset_key, dset_json) log.info(f"dataset {dset_id} updated") except Exception as e: log.error(f"get exception writing dataset json: {e}") elif layout_class == 'H5D_CHUNKED_REF_INDIRECT': # add to list to be scanned later indirect_dataset_keys += dset_key[:-len(".dataset.json")] else: log.info(f"skipping check for layout_class: {layout_class}")
async def getS3RootKeysCallback(app, s3keys): log.info(f"getS3RootKeysCallback, {len(s3keys)} items") if not isinstance(s3keys, list): log.error("expected list result for s3keys callback") raise ValueError("unexpected callback format") results = app["bucket_scan"] for s3key in s3keys: log.info(f"got key: {s3key}") if not s3key.startswith("db/") or s3key[-1] != '/': log.error(f"unexpected key for getS3RootKeysCallback: {s3key}") continue root_id = getObjId(s3key + ".group.json") log.info(f"root_id: {root_id}") results["root_count"] += 1 info_key = s3key + ".info.json" if app["scanRootKeys_update"]: log.info("updating...") await scanRoot(app, root_id, update=True) info_obj = None try: info_obj = await getStorJSONObj(app, info_key) except HTTPNotFound: pass # info.json not created yet except HTTPInternalServerError as ie: log.warn(f"error getting s3obj: {ie}") continue if info_obj: log.info(f"got obj: {info_obj}") results["info_count"] += 1 results["group_count"] += info_obj["num_groups"] results["dataset_count"] += len(info_obj["datasets"]) results["datatype_count"] += info_obj["num_datatypes"] results["chunk_count"] += info_obj["num_chunks"] results["allocated_bytes"] += info_obj["allocated_bytes"] results["metadata_bytes"] += info_obj["metadata_bytes"]
async def run_scan(app, rootid, update=False): root_key = getS3Key(rootid) if not root_key.endswith("/.group.json"): raise ValueError("unexpected root key") root_prefix = root_key[:-(len(".group.json"))] app["root_prefix"] = root_prefix try: await getStorKeys(app, prefix=root_prefix, suffix=".dataset.json", include_stats=False, callback=getKeysCallback) except ClientError as ce: log.error(f"removeKeys - getS3Keys faiiled: {ce}") except HTTPNotFound: log.warn( f"getStorKeys - HTTPNotFound error for getStorKeys with prefix: {root_prefix}" ) except HTTPInternalServerError: log.error( f"getStorKeys - HTTPInternalServerError for getStorKeys with prefix: {root_prefix}" ) except Exception as e: log.error( f"getStorKeys - Unexpected Exception for getStorKeys with prefix: {root_prefix}: {e}" ) # update all chunks for datasets with H5D_CHUNKED_REF_INDIRECT layout indirect_dataset_keys = app["indirect_dataset_keys"] for prefix in indirect_dataset_keys: log.info(f"got inidirect prefix: {prefix}") # TBD... await releaseStorageClient(app)
def sig_handler(sig, frame): log.warn("Caught signal: {}".format(str(sig))) print_results() sys.exit()