def fsck_missing_blobs(vol, cwd): '''Look for blobs in tree or snaps which are not in blobstore.''' trees = vol.trees() tree_items = concatMap(lambda t: zipFrom(t,iter(t))) tree_links = ffilter(uncurry(lambda snap, item: item.is_link())) broken_tree_links = partial( ifilter, uncurry(lambda snap, item: not vol.bs.exists(item.csum()))) checksum_grouper = partial(groupby, uncurry(lambda snap, item: item.csum())) def broken_link_printr(csum, snap_items): print(csum) for (snap, item) in snap_items: print( '', snap.name, item.to_path(vol.root).relative_to(cwd), sep='\t') broken_links_printr = fmap(identify(uncurry(broken_link_printr))) num_bad_blobs = pipeline( tree_items, tree_links, broken_tree_links, checksum_grouper, broken_links_printr, count)(trees) return num_bad_blobs
def fsck_blob_permissions(vol, cwd): '''Look for blobstore blobs which are not readonly.''' blob_permissions = pipeline( ffilter(vol.bs.verify_blob_permissions), fmap(partial(print, "writable blob: ")), count )(vol.bs.blobs()) return blob_permissions
def fsck_checksum_mismatches(vol, cwd): '''Look for checksum mismatches.''' #TODO CORRUPTION checksum mismatch in blob <CSUM>, would be nice to know back references. mismatches = pipeline( ffilter(vol.bs.verify_blob_checksum), fmap(lambda csum: print("CORRUPTION checksum mismatch in blob %s" % csum)), count)(vol.bs.blobs()) return mismatches
def fsck_frozen_ignored(vol, cwd): '''Look for frozen links which are in the ignored file.''' #TODO some of this logic could be moved to volume. Which files are members of the volume is a function of the volume. ignore_mdd = partial(skip_ignored, [safetype(vol.mdd)]) ignored_frozen = pipeline(ftype_selector([LINK]), ffilter(uncurry(vol.is_ignored)), fmap(first), fmap(lambda p: p.relative_to(cwd)), fmap(partial(print, "Ignored file frozen")), count)(vol.root.entries(ignore_mdd)) return ignored_frozen
def fsck_checksum_mismatches(vol, cwd): '''Look for checksum mismatches.''' #TODO CORRUPTION checksum mismatch in blob <CSUM>, would be nice to know back references. mismatches = pipeline( pfmap(lambda blob: (blob, vol.bs.blob_checksum(blob))), ffilter(lambda blob_csum: blob_csum[0] != blob_csum[1]), fmap(first), fmap(lambda csum: print("CORRUPTION checksum mismatch in blob %s" % csum)), count )(vol.bs.blobs()) return mismatches
def ftype_selector(keep_types): keep = lambda p, ft: ft in keep_types # Take p and ft since we may want to use it in entries. return ffilter(uncurry(keep))
def test_ffilter(): even_iter = ffilter(even) assert list(even_iter([1, 2, 3, 4])) == [2, 4]
def inc(x): return x + 1 assert inc(1) == 2 def even(x): return x % 2 == 0 assert even(2) == True assert even(1) == False even_list = ffilter(even) assert list(even_list([1, 2, 3, 4])) == [2, 4] def test_empty_default(): # Test empty behavior assert empty_default([], [1]) == [1] # Test non empty behavior l = [1, 2, 3] assert empty_default(l, [4]) == l # Test iterators work assert empty_default(iter([1, 2, 3]), iter([4])) == [1, 2, 3] # Test output is a copy i = [1, 2, 3] d = [5, 6, 7] o = empty_default(i, d)
def dbg_ui(argv, cwd): exitcode = 0 args = docopt(DBG_USAGE, argv) vol = getvol(cwd) if args['reverse']: csum = args['<csum>'] if args['--all']: trees = vol.trees() elif args['--snap']: trees = [vol.snapdb.read(args['--snap'])] else: trees = [vol.tree()] tree_items = concatMap(lambda t: zipFrom(t,iter(t))) tree_links = ffilter(uncurry(lambda snap, item: item.is_link())) matching_links = ffilter(uncurry(lambda snap, item: item.csum() == csum)) def link_printr(snap_item): (snap, item) = snap_item print(snap.name, item.to_path(vol.root).relative_to(cwd)) links_printr = fmap(identify(link_printr)) pipeline( tree_items, tree_links, matching_links, links_printr, consume)(trees) elif args['key']: db = vol.keydb key = args['<key>'] if args['read']: printNotNone(db.readraw(key)) elif args['delete']: db.delete(key) elif args['list']: for v in db.list(key): print(v) elif args['write']: value = args['<value>'] db.write(key, value) elif args['walk']: if args['root']: printr = json_printr if args.get('--json') else snapshot_printr printr(encode_snapshot(vol.tree())) elif args['snap']: #TODO could add a test for output encoding. #TODO could add a test for snap format. Leading '/' on paths. printr = json_printr if args.get('--json') else snapshot_printr printr(encode_snapshot(vol.snapdb.read(args['<snapshot>']))) elif args['userdata']: blobs = vol.bs.blobs() printr = json_printr if args.get('--json') else strs_printr printr(blobs) elif args['keys']: printr = json_printr if args.get('--json') else strs_printr printr(vol.keydb.list()) elif args['checksum']: #TODO <checksum> <full path> paths = empty_default(map(lambda x: Path(x, cwd), args['<path>']), [vol.root]) for p in paths: print(p.checksum(), p.relative_to(cwd)) elif args['link']: f = Path(args['<file>'], cwd) b = ingest(args['<target>']) if not vol.bs.exists(b): print("blob %s doesn't exist" % b) if args['--remote']: remote = vol.remotedb.read(args['--remote']) else: raise(ValueError("aborting due to missing blob")) vol.bs.fetch_blob(remote.bs, b) else: pass #b exists, can we check its checksum? vol.bs.link_to_blob(f, b) elif args['rewrite-links']: target = Path(args['<target>'], cwd) for item in vol.tree(): if not item.is_link(): continue path = item.to_path(vol.root) new = vol.repair_link(path) if new is not None: print("Relinked %s to %s" % (path.relative_to(cwd), new)) elif args['missing']: tree_csums = pipeline( ffilter(lambda item: item.is_link()), fmap(lambda item: item.csum()), set )(iter(vol.tree())) snapNames = args['<snap>'] def missing_printr(csum, pathStrs): paths = sorted(imap(lambda pathStr: vol.root.join(pathStr), pathStrs)) for path in paths: print("%s\t%s" % (csum, path.relative_to(cwd))) missing_csum2pathStr = pipeline( fmap(vol.snapdb.read), concatMap(iter), ffilter(lambda item: item.is_link()), ffilter(lambda item: not vol.is_ignored(item.to_path(vol.root), None)), ffilter(lambda item: item.csum() not in tree_csums), partial(groupby, lambda item: item.csum()), ffilter(uncurry(lambda csum, items: every(lambda item: not item.to_path(vol.root).exists(), items))), fmap(uncurry(lambda csum, items: (csum, list(imap(lambda item: item.pathStr(), items))))), fmap(uncurry(missing_printr)), count )(snapNames) elif args['blobtype']: for blob in args['<blob>']: blob = ingest(blob) #TODO here csum_to_path is really needed. print( blob, maybe("unknown", vol.bs.csum_to_path(blob).filetype())) elif args['blob']: for csum in args['<blob>']: csum = ingest(csum) #TODO here csum_to_path is needed print(csum, vol.bs.csum_to_path(csum).relative_to(cwd)) elif args['s3']: bucket = args['<bucket>'] prefix = args['<prefix>'] access_id, secret_key = load_s3_creds(None) s3bs = S3Blobstore(bucket, prefix, access_id, secret_key) if args['list']: pipeline(fmap(print), consume)(s3bs.blobs()()) elif args['upload']: quiet = args.get('--quiet') print("Fetching remote blobs") s3_blobs = set(tqdm(s3bs.blobs()(), disable=quiet, desc="Fetching remote blobs", smoothing=1.0, dynamic_ncols=True, maxinterval=1.0)) print("Remote Blobs: %s" % len(s3_blobs)) print("Fetching local blobs") #TODO we are looking at tree, so blobs in snaps won't be sent. tree_blobs = set(tqdm(pipeline( ffilter(lambda x: x.is_link()), fmap(lambda x: x.csum()), uniq, )(iter(vol.tree())), disable=quiet, desc="Calculating local blobs", smoothing=1.0, dynamic_ncols=True, maxinterval=1.0)) print("Local Blobs: %s" % len(tree_blobs)) upload_blobs = tree_blobs - s3_blobs print("Uploading %s blobs to s3" % len(upload_blobs)) with tqdm(desc="Uploading to S3", disable=quiet, total=len(upload_blobs), smoothing=1.0, dynamic_ncols=True, maxinterval=1.0) as pbar: def update_pbar(blob): pbar.update(1) pbar.set_description("Uploaded %s" % blob) def upload(blob): s3bs.upload(blob, vol.bs.csum_to_path(blob))() return blob all_success = pipeline( ffilter(lambda x: x not in s3_blobs), pfmap(upload, workers=2), fmap(identify(update_pbar)), partial(every, identity), )(upload_blobs) if all_success: print("Successfully uploaded") else: print("Failed to upload") exitcode = 1 elif args['check']: num_corrupt_blobs = pipeline( ffilter(lambda obj: obj['ETag'][1:-1] != obj['blob']), fmap(identify(lambda obj: print(obj['blob'], obj['ETag'][1:-1]))), count )(s3bs.blob_stats()()) if num_corrupt_blobs == 0: print("All S3 blobs etags match") else: exitcode = 2 return exitcode
def farmfs_ui(argv, cwd): exitcode = 0 args = docopt(UI_USAGE, argv) if args['mkfs']: root = userPath2Path(args['<root>'] or ".", cwd) data = userPath2Path(args['<data>'], cwd) if args.get('<data>') else Path(".farmfs/userdata", root) mkfs(root, data) print("FileSystem Created %s using blobstore %s" % (root, data)) else: vol = getvol(cwd) paths = empty_default(map(lambda x: userPath2Path(x, cwd), args['<path>']), [vol.root]) def delta_printr(delta): deltaPath = delta.path(vol.root).relative_to(cwd) print("diff: %s %s %s" % (delta.mode, deltaPath, delta.csum)) stream_delta_printr = fmap(identify(delta_printr)) def op_printr(op): (blob_op, tree_op, (desc, path)) = op print(desc % path.relative_to(cwd)) stream_op_printr = fmap(identify(op_printr)) if args['status']: get_thawed = fmap(vol.thawed) pipeline(get_thawed, concat, fmap(lambda p: p.relative_to(cwd)), fmap(print), consume)(paths) elif args['freeze']: def printr(freeze_op): s = "Imported %s with checksum %s" % \ (freeze_op['path'].relative_to(cwd), freeze_op['csum']) if freeze_op['was_dup']: print(s, "was a duplicate") else: print(s) importer = fmap(vol.freeze) get_thawed = fmap(vol.thawed) print_list = fmap(printr) pipeline(get_thawed, concat, importer, print_list, consume)(paths) elif args['thaw']: def printr(path): print("Exported %s" % path.relative_to(cwd)) exporter = fmap(vol.thaw) get_frozen = fmap(vol.frozen) print_list = fmap(printr) pipeline(get_frozen, concat, exporter, print_list, consume)(paths) elif args['fsck']: fsck_actions = { '--broken': (fsck_missing_blobs, 1), '--frozen-ignored': (fsck_frozen_ignored, 4), '--blob-permissions': (fsck_blob_permissions, 8), '--checksums': (fsck_checksum_mismatches, 2), } fsck_tasks = [action for (verb, action) in fsck_actions.items() if args[verb]] if len(fsck_tasks) == 0: # No options were specified, run the whole suite. fsck_tasks = fsck_actions.values() for foo, fail_code in fsck_tasks: exitcode = exitcode | (foo(vol, cwd) and fail_code) elif args['count']: trees = vol.trees() tree_items = concatMap(lambda t: zipFrom(t,iter(t))) tree_links = ffilter(uncurry(lambda snap, item: item.is_link())) checksum_grouper = partial(groupby, uncurry(lambda snap, item: item.csum())) def count_printr(csum, snap_items): print(csum, count(snap_items)) for (snap, item) in snap_items: print(snap.name, item.to_path(vol.root).relative_to(cwd)) counts_printr = fmap(identify(uncurry(count_printr))) pipeline( tree_items, tree_links, checksum_grouper, counts_printr, consume )(trees) elif args['similarity']: dir_a = userPath2Path(args['<dir_a>'], cwd) dir_b = userPath2Path(args['<dir_b>'], cwd) print("left", "both", "right", "jaccard_similarity", sep="\t") print(* vol.similarity(dir_a, dir_b), sep="\t") elif args['gc']: applyfn = fmap(identity) if args.get('--noop') else fmap(vol.bs.delete_blob) fns = [fmap(identify(partial(print, "Removing"))), applyfn, consume] pipeline(*fns)(sorted(vol.unused_blobs(vol.items()))) elif args['snap']: snapdb = vol.snapdb if args['list']: #TODO have an optional argument for which remote. print("\n".join(snapdb.list())) else: name = args['<snap>'] if args['delete']: snapdb.delete(name) elif args['make']: snapdb.write(name, vol.tree()) else: snap = snapdb.read(name) if args['read']: for i in snap: print(i) elif args['restore']: tree = vol.tree() diff = tree_diff(vol.tree(), snap) pipeline( stream_delta_printr, tree_patcher(vol, vol), stream_op_printr, stream_op_doer, consume)(diff) elif args['diff']: diff = tree_diff(vol.tree(), snap) pipeline(stream_delta_printr, consume)(diff) elif args['remote']: if args["add"]: remote_vol = getvol(userPath2Path(args['<root>'], cwd)) vol.remotedb.write(args['<remote>'], remote_vol) elif args["remove"]: vol.remotedb.delete(args['<remote>']) elif args["list"]: if args["<remote>"]: remote_vol = vol.remotedb.read(args['<remote>']) print("\n".join(remote_vol.snapdb.list())) else: for remote_name in vol.remotedb.list(): remote_vol = vol.remotedb.read(remote_name) print(remote_name, remote_vol.root) elif args['pull'] or args['diff']: remote_vol = vol.remotedb.read(args['<remote>']) snap_name = args['<snap>'] remote_snap = remote_vol.snapdb.read(snap_name) if snap_name else remote_vol.tree() diff = tree_diff(vol.tree(), remote_snap) if args['pull']: patcher = tree_patcher(vol, remote_vol) pipeline( stream_delta_printr, patcher, stream_op_printr, stream_op_doer, consume)(diff) else: # diff pipeline(stream_delta_printr, consume)(diff) return exitcode