def main(): args = _get_args() mkdirp(conf.get_opt("session_dir"), exit_on_err=True) if args.mode == "list": session_dir = conf.get_opt("session_dir") else: session_dir = os.path.join(conf.get_opt("session_dir"), args.session) if not os.path.exists(session_dir) and args.mode not in ["create", "list"]: fail("Invalid session %s" % args.session) vol_dir = os.path.join(session_dir, args.volume) if not os.path.exists(vol_dir) and args.mode not in ["create", "list"]: fail("Session %s not created with volume %s" % (args.session, args.volume)) mkdirp(os.path.join(conf.get_opt("log_dir"), args.session, args.volume), exit_on_err=True) log_file = os.path.join(conf.get_opt("log_dir"), args.session, args.volume, "cli.log") setup_logger(logger, log_file, args.debug) # globals() will have all the functions already defined. # mode_<args.mode> will be the function name to be called globals()["mode_" + args.mode](session_dir, args)
def main(): global gtmpfilename args = None try: args = _get_args() mkdirp(conf.get_opt("session_dir"), exit_on_err=True) # force the default session name if mode is "query" if args.mode == "query": args.session = "default" if args.mode == "list": session_dir = conf.get_opt("session_dir") else: session_dir = os.path.join(conf.get_opt("session_dir"), args.session) if not os.path.exists(session_dir) and \ args.mode not in ["create", "list", "query"]: fail("Invalid session %s" % args.session) # volume involved, validate the volume first if args.mode not in ["list"]: validate_volume(args.volume) # "default" is a system defined session name if args.mode in ["create", "post", "pre", "delete"] and \ args.session == "default": fail("Invalid session %s" % args.session) vol_dir = os.path.join(session_dir, args.volume) if not os.path.exists(vol_dir) and args.mode not in \ ["create", "list", "query"]: fail("Session %s not created with volume %s" % (args.session, args.volume)) mkdirp(os.path.join(conf.get_opt("log_dir"), args.session, args.volume), exit_on_err=True) log_file = os.path.join(conf.get_opt("log_dir"), args.session, args.volume, "cli.log") setup_logger(logger, log_file, args.debug) # globals() will have all the functions already defined. # mode_<args.mode> will be the function name to be called globals()["mode_" + args.mode](session_dir, args) except KeyboardInterrupt: if args is not None: if args.mode == "pre" or args.mode == "query": # cleanup session if gtmpfilename is not None: # no more interrupts until we clean up signal.signal(signal.SIGINT, signal.SIG_IGN) run_cmd_nodes("cleanup", args, tmpfilename=gtmpfilename) # Interrupted, exit with non zero error code sys.exit(2)
def main(): global gtmpfilename args = None try: args = _get_args() mkdirp(conf.get_opt("session_dir"), exit_on_err=True) # force the default session name if mode is "query" if args.mode == "query": args.session = "default" if args.mode == "list": session_dir = conf.get_opt("session_dir") else: session_dir = os.path.join(conf.get_opt("session_dir"), args.session) if not os.path.exists(session_dir) and \ args.mode not in ["create", "list", "query"]: fail("Invalid session %s" % args.session) # "default" is a system defined session name if args.mode in ["create", "post", "pre", "delete"] and \ args.session == "default": fail("Invalid session %s" % args.session) vol_dir = os.path.join(session_dir, args.volume) if not os.path.exists(vol_dir) and args.mode not in \ ["create", "list", "query"]: fail("Session %s not created with volume %s" % (args.session, args.volume)) mkdirp(os.path.join(conf.get_opt("log_dir"), args.session, args.volume), exit_on_err=True) log_file = os.path.join(conf.get_opt("log_dir"), args.session, args.volume, "cli.log") setup_logger(logger, log_file, args.debug) # globals() will have all the functions already defined. # mode_<args.mode> will be the function name to be called globals()["mode_" + args.mode](session_dir, args) except KeyboardInterrupt: if args is not None: if args.mode == "pre" or args.mode == "query": # cleanup session if gtmpfilename is not None: # no more interrupts until we clean up signal.signal(signal.SIGINT, signal.SIG_IGN) run_cmd_nodes("cleanup", args, tmpfilename=gtmpfilename) # Interrupted, exit with non zero error code sys.exit(2)
def node_cleanup(host, args): localdir = is_host_local(host) # CHANGE_DETECTOR <SESSION> <VOLUME> <BRICK> <OUTFILE> <START> --debug # --gfidpath <TYPE> cmd = [conf.get_opt("nodecleanup"), args.session, args.volume ] + (["--debug"] if args.debug else []) if not localdir: # prefix with ssh command if not local node cmd = ["ssh", "-i", conf.get_opt("secret_pem"), "root@%s" % host] + cmd execute(cmd, exit_msg="%s - Cleanup failed" % host, logger=logger)
def ssh_setup(args): pem_key_path = get_pem_key_path(args.session, args.volume) if not os.path.exists(pem_key_path): # Generate ssh-key cmd = ["ssh-keygen", "-N", "", "-f", pem_key_path] execute(cmd, exit_msg="Unable to generate ssh key %s" % pem_key_path, logger=logger) logger.info("Ssh key generated %s" % pem_key_path) try: shutil.copyfile(pem_key_path + ".pub", os.path.join(conf.get_opt("session_dir"), ".keys", "%s_%s_secret.pem.pub" % (args.session, args.volume))) except (IOError, OSError) as e: fail("Failed to copy public key to %s: %s" % (os.path.join(conf.get_opt("session_dir"), ".keys"), e), logger=logger) # Copy pub file to all nodes cmd = ["gluster", "system::", "copy", "file", "/glusterfind/.keys/%s.pub" % os.path.basename(pem_key_path)] execute(cmd, exit_msg="Failed to distribute ssh keys", logger=logger) logger.info("Distributed ssh key to all nodes of Volume") # Add to authorized_keys file in each node cmd = ["gluster", "system::", "execute", "add_secret_pub", "root", "/glusterfind/.keys/%s.pub" % os.path.basename(pem_key_path)] execute(cmd, exit_msg="Failed to add ssh keys to authorized_keys file", logger=logger) logger.info("Ssh key added to authorized_keys of Volume nodes")
def node_run(volume, host, path, start, outfile, args, fallback=False): """ If host is local node, execute the command locally. If not local execute the CHANGE_DETECTOR command via ssh and copy the output file from remote node using scp. """ localdir = is_host_local(host) # If Full backup is requested or start time is zero, use brickfind change_detector = conf.get_change_detector(args.change_detector) if ((start == 0 or args.full) and args.change_detector == "changelog") or \ fallback: change_detector = conf.get_change_detector("brickfind") # CHANGE_DETECTOR <SESSION> <VOLUME> <BRICK> <OUTFILE> <START> --debug # --gfidpath <TYPE> cmd = [change_detector, args.session, volume, path, outfile, str(start), "--output-prefix", args.output_prefix] + \ (["--debug"] if args.debug else []) + \ (["--full"] if args.full else []) if not localdir: # prefix with ssh command if not local node cmd = ["ssh", "-i", conf.get_opt("secret_pem"), "root@%s" % host] + cmd rc, out, err = execute(cmd, logger=logger) if rc == 2: # Partial History Fallback logger.info("%s %s Fallback to brickfind" % (host, err.strip())) # Exit only from process, handled in main. sys.exit(rc) elif rc != 0: fail("%s - Change detection failed" % host, logger=logger) if not localdir: cmd_copy = [ "scp", "-i", conf.get_opt("secret_pem"), "root@%s:/%s" % (host, outfile), os.path.dirname(outfile) ] execute(cmd_copy, exit_msg="%s - Copy command failed" % host, logger=logger)
def node_run(volume, host, path, start, outfile, args, fallback=False): """ If host is local node, execute the command locally. If not local execute the CHANGE_DETECTOR command via ssh and copy the output file from remote node using scp. """ localdir = is_host_local(host) # If Full backup is requested or start time is zero, use brickfind change_detector = conf.get_change_detector(args.change_detector) if ((start == 0 or args.full) and args.change_detector == "changelog") or \ fallback: change_detector = conf.get_change_detector("brickfind") # CHANGE_DETECTOR <SESSION> <VOLUME> <BRICK> <OUTFILE> <START> --debug # --gfidpath <TYPE> cmd = [change_detector, args.session, volume, path, outfile, str(start), "--output-prefix", args.output_prefix] + \ (["--debug"] if args.debug else []) + \ (["--full"] if args.full else []) if not localdir: # prefix with ssh command if not local node cmd = ["ssh", "-i", conf.get_opt("secret_pem"), "root@%s" % host] + cmd rc, out, err = execute(cmd, logger=logger) if rc == 2: # Partial History Fallback logger.info("%s %s Fallback to brickfind" % (host, err.strip())) # Exit only from process, handled in main. sys.exit(rc) elif rc != 0: fail("%s - Change detection failed" % host, logger=logger) if not localdir: cmd_copy = ["scp", "-i", conf.get_opt("secret_pem"), "root@%s:/%s" % (host, outfile), os.path.dirname(outfile)] execute(cmd_copy, exit_msg="%s - Copy command failed" % host, logger=logger)
def node_cleanup(host, args): localdir = is_host_local(host) # CHANGE_DETECTOR <SESSION> <VOLUME> <BRICK> <OUTFILE> <START> --debug # --gfidpath <TYPE> cmd = [conf.get_opt("nodecleanup"), args.session, args.volume] + (["--debug"] if args.debug else []) if not localdir: # prefix with ssh command if not local node cmd = ["ssh", "-i", conf.get_opt("secret_pem"), "root@%s" % host] + cmd execute(cmd, exit_msg="%s - Cleanup failed" % host, logger=logger)
def cleanup(nodes, args): pool = [] for num, node in enumerate(nodes): host, brick = node[1].split(":") # temp output file node_outfile = os.path.join(conf.get_opt("working_dir"), args.session, args.volume, "tmp_output_%s.txt" % num) try: os.remove(node_outfile) except (OSError, IOError): # TODO: Cleanup Failure, Handle pass p = Process(target=node_cleanup, args=(host, args)) p.start() pool.append(p) exit_codes = 0 for p in pool: p.join() exit_codes += (0 if p.exitcode == 0 else 1) if exit_codes != 0: sys.exit(1)
def brickfind_crawl(brick, args): if brick.endswith("/"): brick = brick[0:len(brick)-1] working_dir = os.path.dirname(args.outfile) mkdirp(working_dir, exit_on_err=True, logger=logger) create_file(args.outfile, exit_on_err=True, logger=logger) with open(args.outfile, "a+") as fout: brick_path_len = len(brick) def output_callback(path, filter_result): path = path.strip() path = path[brick_path_len+1:] output_write(fout, path, args.output_prefix, encode=True) ignore_dirs = [os.path.join(brick, dirname) for dirname in conf.get_opt("brick_ignore_dirs").split(",")] find(brick, callback_func=output_callback, ignore_dirs=ignore_dirs) fout.flush() os.fsync(fout.fileno())
def mode_pre(session_dir, args): """ Read from Session file and write to session.pre file """ endtime_to_update = int(time.time()) - int( conf.get_opt("changelog_rollover_time")) status_file = os.path.join(session_dir, args.volume, "status") status_file_pre = status_file + ".pre" mkdirp(os.path.dirname(args.outfile), exit_on_err=True, logger=logger) start = 0 try: with open(status_file) as f: start = int(f.read().strip()) except ValueError: pass except (OSError, IOError) as e: fail("Error Opening Session file %s: %s" % (status_file, e), logger=logger) logger.debug("Pre is called - Session: %s, Volume: %s, " "Start time: %s, End time: %s" % (args.session, args.volume, start, endtime_to_update)) run_in_nodes(args.volume, start, args) with open(status_file_pre, "w", buffering=0) as f: f.write(str(endtime_to_update)) sys.stdout.write("Generated output file %s\n" % args.outfile)
def brickfind_crawl(brick, args): if brick.endswith("/"): brick = brick[0:len(brick)-1] working_dir = os.path.dirname(args.outfile) mkdirp(working_dir, exit_on_err=True, logger=logger) create_file(args.outfile, exit_on_err=True, logger=logger) with open(args.outfile, "a+") as fout: brick_path_len = len(brick) def output_callback(path, filter_result): path = path.strip() path = path[brick_path_len+1:] output_write(fout, path, args.output_prefix, encode=(not args.no_encode), tag=args.tag) ignore_dirs = [os.path.join(brick, dirname) for dirname in conf.get_opt("brick_ignore_dirs").split(",")] find(brick, callback_func=output_callback, ignore_dirs=ignore_dirs) fout.flush() os.fsync(fout.fileno())
def mode_cleanup(args): working_dir = os.path.join(conf.get_opt("working_dir"), args.session, args.volume, args.tmpfilename) mkdirp(os.path.join(conf.get_opt("log_dir"), args.session, args.volume), exit_on_err=True) log_file = os.path.join(conf.get_opt("log_dir"), args.session, args.volume, "changelog.log") setup_logger(logger, log_file) try: shutil.rmtree(working_dir, onerror=handle_rm_error) except (OSError, IOError) as e: logger.error("Failed to delete working directory: %s" % e) sys.exit(1)
def gfid_to_path_using_batchfind(brick, gfids_file, output_file): """ find -samefile gets the inode number and crawls entire namespace to get the list of files/dirs having same inode number. Do find without any option, except the ignore directory option, print the output in <INODE_NUM> <PATH> format, use this output to look into in-memory dictionary of inode numbers got from the list of GFIDs """ with open(output_file, "a+") as fout: inode_dict = {} with open(gfids_file) as f: for gfid in f: gfid = gfid.strip() backend_path = os.path.join(brick, ".glusterfs", gfid[0:2], gfid[2:4], gfid) try: inode_dict[str(os.stat(backend_path).st_ino)] = 1 except (IOError, OSError) as e: if e.errno == ENOENT: continue else: fail("%s Failed to convert to path from " "GFID %s: %s" % (brick, gfid, e), logger=logger) if not inode_dict: return def inode_filter(path): try: st = os.lstat(path) except (OSError, IOError) as e: if e.errno == ENOENT: st = None else: raise if st and inode_dict.get(str(st.st_ino), None): return True return False brick_path_len = len(brick) def output_callback(path): path = path.strip() path = path[brick_path_len+1:] output_write(fout, path, args.output_prefix) ignore_dirs = [os.path.join(brick, dirname) for dirname in conf.get_opt("brick_ignore_dirs").split(",")] # Length of brick path, to remove from output path find(brick, callback_func=output_callback, filter_func=inode_filter, ignore_dirs=ignore_dirs) fout.flush() os.fsync(fout.fileno())
def changelog_crawl(brick, end, args): """ Init function, prepares working dir and calls Changelog query """ if brick.endswith("/"): brick = brick[0:len(brick)-1] # WORKING_DIR/BRICKHASH/OUTFILE working_dir = os.path.dirname(args.outfile) brickhash = hashlib.sha1(brick) brickhash = str(brickhash.hexdigest()) working_dir = os.path.join(working_dir, brickhash) mkdirp(working_dir, exit_on_err=True, logger=logger) create_file(args.outfile, exit_on_err=True, logger=logger) create_file(args.outfile + ".gfids", exit_on_err=True, logger=logger) log_file = os.path.join(conf.get_opt("log_dir"), args.session, args.volume, "changelog.%s.log" % brickhash) logger.info("%s Started Changelog Crawl. Start: %s, End: %s" % (brick, args.start, end)) get_changes(brick, working_dir, log_file, end, args)
def gfid_to_path_using_batchfind(brick, changelog_data): # If all the GFIDs converted using gfid_to_path_using_pgfid if not changelog_data.inodegfid_exists({"converted": 0}): return def inode_filter(path): # Looks in inodegfid table, if exists returns # inode number else None try: st = os.lstat(path) except (OSError, IOError): st = None if st and changelog_data.inodegfid_exists({"inode": st.st_ino}): return st.st_ino return None # Length of brick path, to remove from output path brick_path_len = len(brick) def output_callback(path, inode): # For each path found, encodes it and updates path1 # Also updates converted flag in inodegfid table as 1 path = path.strip() path = path[brick_path_len + 1 :] path = output_path_prepare(path, args.output_prefix) changelog_data.append_path1(path, inode) ignore_dirs = [os.path.join(brick, dirname) for dirname in conf.get_opt("brick_ignore_dirs").split(",")] # Full Namespace Crawl find(brick, callback_func=output_callback, filter_func=inode_filter, ignore_dirs=ignore_dirs)
def brickfind_crawl(brick, args): if brick.endswith("/"): brick = brick[0:len(brick)-1] working_dir = os.path.dirname(args.outfile) mkdirp(working_dir, exit_on_err=True, logger=logger) create_file(args.outfile, exit_on_err=True, logger=logger) with open(args.outfile, "a+") as fout: brick_path_len = len(brick) def output_callback(path, filter_result, is_dir): path = path.strip() path = path[brick_path_len+1:] if args.type == "both": output_write(fout, path, args.output_prefix, encode=(not args.no_encode), tag=args.tag, field_separator=args.field_separator) else: if (is_dir and args.type == "d") or ( (not is_dir) and args.type == "f"): output_write(fout, path, args.output_prefix, encode=(not args.no_encode), tag=args.tag, field_separator=args.field_separator) ignore_dirs = [os.path.join(brick, dirname) for dirname in conf.get_opt("brick_ignore_dirs").split(",")] find(brick, callback_func=output_callback, ignore_dirs=ignore_dirs) fout.flush() os.fsync(fout.fileno())
def main(): try: args = _get_args() mkdirp(conf.get_opt("session_dir"), exit_on_err=True) # force the default session name if mode is "query" if args.mode == "query": args.session = "default" if args.mode == "list": session_dir = conf.get_opt("session_dir") else: session_dir = os.path.join(conf.get_opt("session_dir"), args.session) if not os.path.exists(session_dir) and \ args.mode not in ["create", "list", "query"]: fail("Invalid session %s" % args.session) # "default" is a system defined session name if args.mode in ["create", "post", "pre", "delete"] and \ args.session == "default": fail("Invalid session %s" % args.session) vol_dir = os.path.join(session_dir, args.volume) if not os.path.exists(vol_dir) and args.mode not in \ ["create", "list", "query"]: fail("Session %s not created with volume %s" % (args.session, args.volume)) mkdirp(os.path.join(conf.get_opt("log_dir"), args.session, args.volume), exit_on_err=True) log_file = os.path.join(conf.get_opt("log_dir"), args.session, args.volume, "cli.log") setup_logger(logger, log_file, args.debug) # globals() will have all the functions already defined. # mode_<args.mode> will be the function name to be called globals()["mode_" + args.mode](session_dir, args) except KeyboardInterrupt: # Interrupted, exit with non zero error code sys.exit(2)
def mode_cleanup(args): working_dir = os.path.join(conf.get_opt("working_dir"), args.session, args.volume) mkdirp(os.path.join(conf.get_opt("log_dir"), args.session, args.volume), exit_on_err=True) log_file = os.path.join(conf.get_opt("log_dir"), args.session, args.volume, "changelog.log") setup_logger(logger, log_file) try: shutil.rmtree(working_dir, onerror=handle_rm_error) except (OSError, IOError) as e: logger.error("Failed to delete working directory: %s" % e) sys.exit(1)
def gfid_to_path_using_pgfid(brick, changelog_data, args): """ For all the pgfids collected, Converts to Path and does readdir on those directories and looks up inodegfid table for matching inode number. """ populate_pgfid_and_inodegfid(brick, changelog_data) # If no GFIDs needs conversion to Path if not changelog_data.inodegfid_exists({"converted": 0}): return def inode_filter(path): # Looks in inodegfid table, if exists returns # inode number else None try: st = os.lstat(path) except (OSError, IOError): st = None if st and changelog_data.inodegfid_exists({"inode": st.st_ino}): return st.st_ino return None # Length of brick path, to remove from output path brick_path_len = len(brick) def output_callback(path, inode): # For each path found, encodes it and updates path1 # Also updates converted flag in inodegfid table as 1 path = path.strip() path = path[brick_path_len + 1:] path = output_path_prepare(path, args) changelog_data.append_path1(path, inode) changelog_data.inodegfid_update({"converted": 1}, {"inode": inode}) ignore_dirs = [ os.path.join(brick, dirname) for dirname in conf.get_opt("brick_ignore_dirs").split(",") ] for row in changelog_data.pgfid_get(): try: path = symlink_gfid_to_path(brick, row[0]) find(os.path.join(brick, path), callback_func=output_callback, filter_func=inode_filter, ignore_dirs=ignore_dirs, subdirs_crawl=False) except (IOError, OSError) as e: logger.warn("Error converting to path: %s" % e) continue
def gfid_to_path_using_pgfid(brick, changelog_data, args): """ For all the pgfids collected, Converts to Path and does readdir on those directories and looks up inodegfid table for matching inode number. """ populate_pgfid_and_inodegfid(brick, changelog_data) # If no GFIDs needs conversion to Path if not changelog_data.inodegfid_exists({"converted": 0}): return def inode_filter(path): # Looks in inodegfid table, if exists returns # inode number else None try: st = os.lstat(path) except (OSError, IOError): st = None if st and changelog_data.inodegfid_exists({"inode": st.st_ino}): return st.st_ino return None # Length of brick path, to remove from output path brick_path_len = len(brick) def output_callback(path, inode): # For each path found, encodes it and updates path1 # Also updates converted flag in inodegfid table as 1 path = path.strip() path = path[brick_path_len+1:] path = output_path_prepare(path, args) changelog_data.append_path1(path, inode) changelog_data.inodegfid_update({"converted": 1}, {"inode": inode}) ignore_dirs = [os.path.join(brick, dirname) for dirname in conf.get_opt("brick_ignore_dirs").split(",")] for row in changelog_data.pgfid_get(): try: path = symlink_gfid_to_path(brick, row[0]) find(os.path.join(brick, path), callback_func=output_callback, filter_func=inode_filter, ignore_dirs=ignore_dirs, subdirs_crawl=False) except (IOError, OSError) as e: logger.warn("Error converting to path: %s" % e) continue
def mode_post(args): session_dir = os.path.join(conf.get_opt("session_dir"), args.session) status_file = os.path.join(session_dir, args.volume, "%s.status" % urllib.quote_plus(args.brick)) mkdirp(os.path.join(session_dir, args.volume), exit_on_err=True, logger=logger) status_file_pre = status_file + ".pre" if os.path.exists(status_file_pre): os.rename(status_file_pre, status_file) sys.exit(0)
def mode_post(args): session_dir = os.path.join(conf.get_opt("session_dir"), args.session) status_file = os.path.join(session_dir, args.volume, "%s.status" % urllib.parse.quote_plus(args.brick)) mkdirp(os.path.join(session_dir, args.volume), exit_on_err=True, logger=logger) status_file_pre = status_file + ".pre" if os.path.exists(status_file_pre): os.rename(status_file_pre, status_file) sys.exit(0)
def ssh_setup(): if not os.path.exists(conf.get_opt("secret_pem")): # Generate ssh-key cmd = ["ssh-keygen", "-N", "", "-f", conf.get_opt("secret_pem")] execute(cmd, exit_msg="Unable to generate ssh key %s" % conf.get_opt("secret_pem"), logger=logger) logger.info("Ssh key generated %s" % conf.get_opt("secret_pem")) # Copy pub file to all nodes cmd = [ "gluster", "system::", "copy", "file", "/" + os.path.basename(conf.get_opt("secret_pem")) + ".pub" ] execute(cmd, exit_msg="Failed to distribute ssh keys", logger=logger) logger.info("Distributed ssh key to all nodes of Volume") # Add to authorized_keys file in each node cmd = [ "gluster", "system::", "execute", "add_secret_pub", "root", os.path.basename(conf.get_opt("secret_pem")) + ".pub" ] execute(cmd, exit_msg="Failed to add ssh keys to authorized_keys file", logger=logger) logger.info("Ssh key added to authorized_keys of Volume nodes")
def mode_delete(args): session_dir = os.path.join(conf.get_opt("session_dir"), args.session) shutil.rmtree(os.path.join(session_dir, args.volume), onerror=handle_rm_error) # If the session contains only this volume, then cleanup the # session directory. If a session contains multiple volumes # then os.rmdir will fail with ENOTEMPTY try: os.rmdir(session_dir) except OSError as e: if not e.errno == ENOTEMPTY: logger.warn("Failed to delete session directory: %s" % e)
def mode_create(args): session_dir = os.path.join(conf.get_opt("session_dir"), args.session) status_file = os.path.join(session_dir, args.volume, "%s.status" % urllib.quote_plus(args.brick)) mkdirp(os.path.join(session_dir, args.volume), exit_on_err=True, logger=logger) if not os.path.exists(status_file) or args.reset_session_time: with open(status_file, "w", buffering=0) as f: f.write(args.time_to_update) sys.exit(0)
def brickfind_crawl(brick, args): if brick.endswith("/"): brick = brick[0:len(brick) - 1] working_dir = os.path.dirname(args.outfile) mkdirp(working_dir, exit_on_err=True, logger=logger) create_file(args.outfile, exit_on_err=True, logger=logger) with open(args.outfile, "a+") as fout: brick_path_len = len(brick) def mtime_filter(path): try: st = os.lstat(path) except (OSError, IOError) as e: if e.errno == ENOENT: st = None else: raise if st and (st.st_mtime > args.start or st.st_ctime > args.start): return True return False def output_callback(path): path = path.strip() path = path[brick_path_len + 1:] output_write(fout, path, args.output_prefix) ignore_dirs = [ os.path.join(brick, dirname) for dirname in conf.get_opt("brick_ignore_dirs").split(",") ] if args.full: find(brick, callback_func=output_callback, ignore_dirs=ignore_dirs) else: find(brick, callback_func=output_callback, filter_func=mtime_filter, ignore_dirs=ignore_dirs) fout.flush() os.fsync(fout.fileno())
def mode_create(session_dir, args): logger.debug("Init is called - Session: %s, Volume: %s" % (args.session, args.volume)) execute(["gluster", "volume", "info", args.volume], exit_msg="Unable to get volume details", logger=logger) mkdirp(session_dir, exit_on_err=True, logger=logger) mkdirp(os.path.join(session_dir, args.volume), exit_on_err=True, logger=logger) status_file = os.path.join(session_dir, args.volume, "status") if os.path.exists(status_file) and not args.force: fail("Session %s already created" % args.session, logger=logger) if not os.path.exists(status_file) or args.force: ssh_setup() execute(["gluster", "volume", "set", args.volume, "build-pgfid", "on"], exit_msg="Failed to set volume option build-pgfid on", logger=logger) logger.info("Volume option set %s, build-pgfid on" % args.volume) execute([ "gluster", "volume", "set", args.volume, "changelog.changelog", "on" ], exit_msg="Failed to set volume option " "changelog.changelog on", logger=logger) logger.info("Volume option set %s, changelog.changelog on" % args.volume) if not os.path.exists(status_file): with open(status_file, "w", buffering=0) as f: # Add Rollover time to current time to make sure changelogs # will be available if we use this time as start time time_to_update = int(time.time()) + int( conf.get_opt("changelog_rollover_time")) f.write(str(time_to_update)) sys.exit(0)
def brickfind_crawl(brick, args): if brick.endswith("/"): brick = brick[0:len(brick)-1] working_dir = os.path.dirname(args.outfile) mkdirp(working_dir, exit_on_err=True, logger=logger) create_file(args.outfile, exit_on_err=True, logger=logger) with open(args.outfile, "a+") as fout: brick_path_len = len(brick) def mtime_filter(path): try: st = os.lstat(path) except (OSError, IOError) as e: if e.errno == ENOENT: st = None else: raise if st and (st.st_mtime > args.start or st.st_ctime > args.start): return True return False def output_callback(path): path = path.strip() path = path[brick_path_len+1:] output_write(fout, path, args.output_prefix) ignore_dirs = [os.path.join(brick, dirname) for dirname in conf.get_opt("brick_ignore_dirs").split(",")] if args.full: find(brick, callback_func=output_callback, ignore_dirs=ignore_dirs) else: find(brick, callback_func=output_callback, filter_func=mtime_filter, ignore_dirs=ignore_dirs) fout.flush() os.fsync(fout.fileno())
def run_in_nodes(volume, start, args): """ Get nodes of volume using gluster volume info, spawn a process each for a Node. Merge the output files once all the process complete their tasks. """ nodes = get_nodes(volume) pool = [] node_outfiles = [] for num, node in enumerate(nodes): host, brick = node[1].split(":") # temp output file node_outfile = os.path.join(conf.get_opt("working_dir"), args.session, volume, "tmp_output_%s.txt" % num) node_outfiles.append(node_outfile) p = Process(target=node_run, args=(volume, host, brick, start, node_outfile, args)) p.start() pool.append(p) exit_codes = 0 for idx, p in enumerate(pool): p.join() # Handle the Changelog failure, fallback to Brickfind if p.exitcode == 2: rc = failback_node_run(nodes[idx][1], idx, volume, start, node_outfiles[idx], args) exit_codes += (0 if rc == 0 else 1) elif p.exitcode != 0: exit_codes += (0 if p.exitcode == 0 else 1) if exit_codes != 0: sys.exit(1) # Merge all output files cmd = ["sort", "-u"] + node_outfiles + ["-o", args.outfile] execute(cmd, exit_msg="Failed to merge output files " "collected from nodes", logger=logger) cleanup(nodes, args)
def mode_create(session_dir, args): logger.debug("Init is called - Session: %s, Volume: %s" % (args.session, args.volume)) execute(["gluster", "volume", "info", args.volume], exit_msg="Unable to get volume details", logger=logger) mkdirp(session_dir, exit_on_err=True, logger=logger) mkdirp(os.path.join(session_dir, args.volume), exit_on_err=True, logger=logger) status_file = os.path.join(session_dir, args.volume, "status") if os.path.exists(status_file) and not args.force: fail("Session %s already created" % args.session, logger=logger) if not os.path.exists(status_file) or args.force: ssh_setup(args) execute(["gluster", "volume", "set", args.volume, "build-pgfid", "on"], exit_msg="Failed to set volume option build-pgfid on", logger=logger) logger.info("Volume option set %s, build-pgfid on" % args.volume) execute(["gluster", "volume", "set", args.volume, "changelog.changelog", "on"], exit_msg="Failed to set volume option " "changelog.changelog on", logger=logger) logger.info("Volume option set %s, changelog.changelog on" % args.volume) if not os.path.exists(status_file): with open(status_file, "w", buffering=0) as f: # Add Rollover time to current time to make sure changelogs # will be available if we use this time as start time time_to_update = int(time.time()) + int( conf.get_opt("changelog_rollover_time")) f.write(str(time_to_update)) sys.exit(0)
def changelog_crawl(brick, start, end, args): """ Init function, prepares working dir and calls Changelog query """ if brick.endswith("/"): brick = brick[0:len(brick) - 1] # WORKING_DIR/BRICKHASH/OUTFILE working_dir = os.path.dirname(args.outfile) brickhash = hashlib.sha1(brick.encode()) brickhash = str(brickhash.hexdigest()) working_dir = os.path.join(working_dir, brickhash) mkdirp(working_dir, exit_on_err=True, logger=logger) log_file = os.path.join(conf.get_opt("log_dir"), args.session, args.volume, "changelog.%s.log" % brickhash) logger.info("%s Started Changelog Crawl. Start: %s, End: %s" % (brick, start, end)) return get_changes(brick, working_dir, log_file, start, end, args)
def gfid_to_path_using_batchfind(brick, changelog_data): # If all the GFIDs converted using gfid_to_path_using_pgfid if not changelog_data.inodegfid_exists({"converted": 0}): return def inode_filter(path): # Looks in inodegfid table, if exists returns # inode number else None try: st = os.lstat(path) except (OSError, IOError): st = None if st and changelog_data.inodegfid_exists({"inode": st.st_ino}): return st.st_ino return None # Length of brick path, to remove from output path brick_path_len = len(brick) def output_callback(path, inode): # For each path found, encodes it and updates path1 # Also updates converted flag in inodegfid table as 1 path = path.strip() path = path[brick_path_len + 1:] path = output_path_prepare(path, args) changelog_data.append_path1(path, inode) ignore_dirs = [ os.path.join(brick, dirname) for dirname in conf.get_opt("brick_ignore_dirs").split(",") ] # Full Namespace Crawl find(brick, callback_func=output_callback, filter_func=inode_filter, ignore_dirs=ignore_dirs)
def ssh_setup(): if not os.path.exists(conf.get_opt("secret_pem")): # Generate ssh-key cmd = ["ssh-keygen", "-N", "", "-f", conf.get_opt("secret_pem")] execute(cmd, exit_msg="Unable to generate ssh key %s" % conf.get_opt("secret_pem"), logger=logger) logger.info("Ssh key generated %s" % conf.get_opt("secret_pem")) # Copy pub file to all nodes cmd = ["gluster", "system::", "copy", "file", "/" + os.path.basename(conf.get_opt("secret_pem")) + ".pub"] execute(cmd, exit_msg="Failed to distribute ssh keys", logger=logger) logger.info("Distributed ssh key to all nodes of Volume") # Add to authorized_keys file in each node cmd = ["gluster", "system::", "execute", "add_secret_pub", "root", os.path.basename(conf.get_opt("secret_pem")) + ".pub"] execute(cmd, exit_msg="Failed to add ssh keys to authorized_keys file", logger=logger) logger.info("Ssh key added to authorized_keys of Volume nodes")
def get_pem_key_path(session, volume): return os.path.join(conf.get_opt("session_dir"), session, volume, "%s_%s_secret.pem" % (session, volume))
import sys import os import logging from errno import ENOENT from utils import setup_logger, mkdirp import conf logger = logging.getLogger() if __name__ == "__main__": # Args: <SESSION> <VOLUME> session = sys.argv[1] volume = sys.argv[2] working_dir = os.path.join(conf.get_opt("working_dir"), session, volume) mkdirp(os.path.join(conf.get_opt("log_dir"), session, volume), exit_on_err=True) log_file = os.path.join(conf.get_opt("log_dir"), session, volume, "changelog.log") setup_logger(logger, log_file) try: def handle_rm_error(func, path, exc_info): if exc_info[1].errno == ENOENT: return raise exc_info[1]
action="store_true") parser.add_argument("--output-prefix", help="File prefix in output", default=".") parser.add_argument("--type", default="both") parser.add_argument("-N", "--only-namespace-changes", help="List only namespace changes", action="store_true") return parser.parse_args() if __name__ == "__main__": args = _get_args() mkdirp(os.path.join(conf.get_opt("log_dir"), args.session, args.volume), exit_on_err=True) log_file = os.path.join(conf.get_opt("log_dir"), args.session, args.volume, "changelog.log") setup_logger(logger, log_file, args.debug) session_dir = os.path.join(conf.get_opt("session_dir"), args.session) status_file = os.path.join(session_dir, args.volume, "%s.status" % urllib.quote_plus(args.brick)) status_file_pre = status_file + ".pre" mkdirp(os.path.join(session_dir, args.volume), exit_on_err=True, logger=logger) end = -1 if args.only_query:
def run_cmd_nodes(task, args, **kwargs): global node_outfiles nodes = get_nodes(args.volume) pool = [] for num, node in enumerate(nodes): host, brick = node[1].split(":") host_uuid = node[0] cmd = [] opts = {} # tmpfilename is valid only for tasks: pre, query and cleanup tmpfilename = kwargs.get("tmpfilename", "BADNAME") node_outfile = os.path.join(conf.get_opt("working_dir"), args.session, args.volume, tmpfilename, "tmp_output_%s" % num) if task == "pre": if vol_statusStr != "Started": fail("Volume %s is not online" % args.volume, logger=logger) # If Full backup is requested or start time is zero, use brickfind change_detector = conf.get_change_detector("changelog") tag = None if args.full: change_detector = conf.get_change_detector("brickfind") tag = args.tag_for_full_find.strip() if tag == "": tag = '""' if not is_host_local(host_uuid) else "" node_outfiles.append(node_outfile) # remote file will be copied into this directory mkdirp(os.path.dirname(node_outfile), exit_on_err=True, logger=logger) FS = args.field_separator if not is_host_local(host_uuid): FS = "'" + FS + "'" cmd = [change_detector, args.session, args.volume, host, brick, node_outfile] + \ ([str(kwargs.get("start")), str(kwargs.get("end"))] if not args.full else []) + \ ([tag] if tag is not None else []) + \ ["--output-prefix", args.output_prefix] + \ (["--debug"] if args.debug else []) + \ (["--no-encode"] if args.no_encode else []) + \ (["--only-namespace-changes"] if args.only_namespace_changes else []) + \ (["--field-separator", FS] if args.full else []) opts["node_outfile"] = node_outfile opts["copy_outfile"] = True elif task == "query": # If Full backup is requested or start time is zero, use brickfind tag = None change_detector = conf.get_change_detector("changelog") if args.full: change_detector = conf.get_change_detector("brickfind") tag = args.tag_for_full_find.strip() if tag == "": tag = '""' if not is_host_local(host_uuid) else "" node_outfiles.append(node_outfile) # remote file will be copied into this directory mkdirp(os.path.dirname(node_outfile), exit_on_err=True, logger=logger) FS = args.field_separator if not is_host_local(host_uuid): FS = "'" + FS + "'" cmd = [change_detector, args.session, args.volume, host, brick, node_outfile] + \ ([str(kwargs.get("start")), str(kwargs.get("end"))] if not args.full else []) + \ ([tag] if tag is not None else []) + \ ["--only-query"] + \ ["--output-prefix", args.output_prefix] + \ (["--debug"] if args.debug else []) + \ (["--no-encode"] if args.no_encode else []) + \ (["--only-namespace-changes"] if args.only_namespace_changes else []) + \ (["--field-separator", FS] if args.full else []) opts["node_outfile"] = node_outfile opts["copy_outfile"] = True elif task == "cleanup": # After pre/query run, cleanup the working directory and other # temp files. Remove the directory to which node_outfile has # been copied in main node try: os.remove(node_outfile) except (OSError, IOError): logger.warn("Failed to cleanup temporary file %s" % node_outfile) pass cmd = [conf.get_opt("nodeagent"), "cleanup", args.session, args.volume, os.path.dirname(node_outfile)] + \ (["--debug"] if args.debug else []) elif task == "create": if vol_statusStr != "Started": fail("Volume %s is not online" % args.volume, logger=logger) # When glusterfind create, create session directory in # each brick nodes cmd = [conf.get_opt("nodeagent"), "create", args.session, args.volume, brick, kwargs.get("time_to_update")] + \ (["--debug"] if args.debug else []) + \ (["--reset-session-time"] if args.reset_session_time else []) elif task == "post": # Rename pre status file to actual status file in each node cmd = [conf.get_opt("nodeagent"), "post", args.session, args.volume, brick] + \ (["--debug"] if args.debug else []) elif task == "delete": # When glusterfind delete, cleanup all the session files/dirs # from each node. cmd = [conf.get_opt("nodeagent"), "delete", args.session, args.volume] + \ (["--debug"] if args.debug else []) if cmd: p = Process(target=node_cmd, args=(host, host_uuid, task, cmd, args, opts)) p.start() pool.append(p) for num, p in enumerate(pool): p.join() if p.exitcode != 0: logger.warn("Command %s failed in %s" % (task, nodes[num][1])) if task in ["create", "delete"]: fail("Command %s failed in %s" % (task, nodes[num][1])) elif task == "pre" and args.disable_partial: sys.exit(1)
def mode_delete(args): session_dir = os.path.join(conf.get_opt("session_dir"), args.session) shutil.rmtree(os.path.join(session_dir, args.volume), onerror=handle_rm_error)
def run_cmd_nodes(task, args, **kwargs): global node_outfiles nodes = get_nodes(args.volume) pool = [] for num, node in enumerate(nodes): host, brick = node[1].split(":") host_uuid = node[0] cmd = [] opts = {} node_outfile = os.path.join(conf.get_opt("working_dir"), args.session, args.volume, "tmp_output_%s" % num) if task == "pre": if vol_statusStr != "Started": fail("Volume %s is not online" % args.volume, logger=logger) # If Full backup is requested or start time is zero, use brickfind change_detector = conf.get_change_detector("changelog") if args.full: change_detector = conf.get_change_detector("brickfind") node_outfiles.append(node_outfile) cmd = [change_detector, args.session, args.volume, brick, node_outfile, str(kwargs.get("start")), "--output-prefix", args.output_prefix] + \ (["--debug"] if args.debug else []) + \ (["--only-namespace-changes"] if args.only_namespace_changes else []) opts["node_outfile"] = node_outfile opts["copy_outfile"] = True elif task == "query": # If Full backup is requested or start time is zero, use brickfind change_detector = conf.get_change_detector("changelog") node_outfiles.append(node_outfile) cmd = [change_detector, args.session, args.volume, brick, node_outfile, str(kwargs.get("start"))] + \ ["--only-query"] + \ ["--output-prefix", args.output_prefix] + \ (["--debug"] if args.debug else []) + \ (["--only-namespace-changes"] if args.only_namespace_changes else []) opts["node_outfile"] = node_outfile opts["copy_outfile"] = True elif task == "cleanup": # After pre run, cleanup the working directory and other temp files # Remove the copied node_outfile in main node try: os.remove(node_outfile) except (OSError, IOError): logger.warn("Failed to cleanup temporary file %s" % node_outfile) pass cmd = [ conf.get_opt("nodeagent"), "cleanup", args.session, args.volume ] + (["--debug"] if args.debug else []) elif task == "create": if vol_statusStr != "Started": fail("Volume %s is not online" % args.volume, logger=logger) # When glusterfind create, create session directory in # each brick nodes cmd = [conf.get_opt("nodeagent"), "create", args.session, args.volume, brick, kwargs.get("time_to_update")] + \ (["--debug"] if args.debug else []) + \ (["--reset-session-time"] if args.reset_session_time else []) elif task == "post": # Rename pre status file to actual status file in each node cmd = [conf.get_opt("nodeagent"), "post", args.session, args.volume, brick] + \ (["--debug"] if args.debug else []) elif task == "delete": # When glusterfind delete, cleanup all the session files/dirs # from each node. cmd = [conf.get_opt("nodeagent"), "delete", args.session, args.volume] + \ (["--debug"] if args.debug else []) if cmd: p = Process(target=node_cmd, args=(host, host_uuid, task, cmd, args, opts)) p.start() pool.append(p) for num, p in enumerate(pool): p.join() if p.exitcode != 0: logger.warn("Command %s failed in %s" % (task, nodes[num][1])) if task in ["create", "delete"]: fail("Command %s failed in %s" % (task, nodes[num][1])) elif task == "pre" and args.disable_partial: sys.exit(1)
def gfid_to_path_using_batchfind(brick, gfids_file, output_file): """ find -samefile gets the inode number and crawls entire namespace to get the list of files/dirs having same inode number. Do find without any option, except the ignore directory option, print the output in <INODE_NUM> <PATH> format, use this output to look into in-memory dictionary of inode numbers got from the list of GFIDs """ with open(output_file, "a+") as fout: inode_dict = {} with open(gfids_file) as f: for gfid in f: gfid = gfid.strip() backend_path = os.path.join(brick, ".glusterfs", gfid[0:2], gfid[2:4], gfid) try: inode_dict[str(os.stat(backend_path).st_ino)] = 1 except (IOError, OSError) as e: if e.errno == ENOENT: continue else: fail("%s Failed to convert to path from " "GFID %s: %s" % (brick, gfid, e), logger=logger) if not inode_dict: return def inode_filter(path): try: st = os.lstat(path) except (OSError, IOError) as e: if e.errno == ENOENT: st = None else: raise if st and inode_dict.get(str(st.st_ino), None): return True return False brick_path_len = len(brick) def output_callback(path): path = path.strip() path = path[brick_path_len + 1:] output_write(fout, path, args.output_prefix) ignore_dirs = [ os.path.join(brick, dirname) for dirname in conf.get_opt("brick_ignore_dirs").split(",") ] # Length of brick path, to remove from output path find(brick, callback_func=output_callback, filter_func=inode_filter, ignore_dirs=ignore_dirs) fout.flush() os.fsync(fout.fileno())
def get_changes(brick, hash_dir, log_file, start, end, args): """ Makes use of libgfchangelog's history API to get changelogs containing changes from start and end time. Further collects the modified gfids from the changelogs and writes the list of gfid to 'gfid_list' file. """ session_dir = os.path.join(conf.get_opt("session_dir"), args.session) status_file = os.path.join(session_dir, args.volume, "%s.status" % urllib.quote_plus(args.brick)) # Get previous session try: with open(status_file) as f: start = int(f.read().strip()) except (ValueError, OSError, IOError): start = args.start try: libgfchangelog.cl_init() libgfchangelog.cl_register(brick, hash_dir, log_file, CHANGELOG_LOG_LEVEL, CHANGELOG_CONN_RETRIES) except libgfchangelog.ChangelogException as e: fail("%s Changelog register failed: %s" % (brick, e), logger=logger) # Output files to record GFIDs and GFID to Path failure GFIDs changelog_data = ChangelogData(args.outfile, args) # Changelogs path(Hard coded to BRICK/.glusterfs/changelogs cl_path = os.path.join(brick, ".glusterfs/changelogs") # Fail if History fails for requested Start and End try: actual_end = libgfchangelog.cl_history_changelog( cl_path, start, end, CHANGELOGAPI_NUM_WORKERS) except libgfchangelog.ChangelogException as e: fail("%s: %s Historical Changelogs not available: %s" % (args.node, brick, e), logger=logger) logger.info("[1/4] Starting changelog parsing ...") try: # scan followed by getchanges till scan returns zero. # history_scan() is blocking call, till it gets the number # of changelogs to process. Returns zero when no changelogs # to be processed. returns positive value as number of changelogs # to be processed, which will be fetched using # history_getchanges() changes = [] while libgfchangelog.cl_history_scan() > 0: changes = libgfchangelog.cl_history_getchanges() for change in changes: # Ignore if last processed changelog comes # again in list if change.endswith(".%s" % start): continue try: parse_changelog_to_db(changelog_data, change, args) libgfchangelog.cl_history_done(change) except IOError as e: logger.warn("Error parsing changelog file %s: %s" % (change, e)) changelog_data.commit() except libgfchangelog.ChangelogException as e: fail("%s Error during Changelog Crawl: %s" % (brick, e), logger=logger) logger.info("[1/4] Finished changelog parsing.") # Convert all pgfid available from Changelogs logger.info("[2/4] Starting 'pgfid to path' conversions ...") pgfid_to_path(brick, changelog_data) changelog_data.commit() logger.info("[2/4] Finished 'pgfid to path' conversions.") # Convert all GFIDs for which no other additional details available logger.info("[3/4] Starting 'gfid to path using pgfid' conversions ...") gfid_to_path_using_pgfid(brick, changelog_data, args) changelog_data.commit() logger.info("[3/4] Finished 'gfid to path using pgfid' conversions.") # If some GFIDs fail to get converted from previous step, # convert using find logger.info("[4/4] Starting 'gfid to path using batchfind' " "conversions ...") gfid_to_path_using_batchfind(brick, changelog_data) changelog_data.commit() logger.info("[4/4] Finished 'gfid to path using batchfind' conversions.") return actual_end
parser.add_argument("session", help="Session Name") parser.add_argument("volume", help="Volume Name") parser.add_argument("brick", help="Brick Name") parser.add_argument("outfile", help="Output File") parser.add_argument("start", help="Start Time", type=float) parser.add_argument("--debug", help="Debug", action="store_true") parser.add_argument("--output-prefix", help="File prefix in output", default=".") return parser.parse_args() if __name__ == "__main__": args = _get_args() session_dir = os.path.join(conf.get_opt("session_dir"), args.session) status_file = os.path.join(session_dir, args.volume, "%s.status" % urllib.quote_plus(args.brick)) status_file_pre = status_file + ".pre" mkdirp(os.path.join(session_dir, args.volume), exit_on_err=True, logger=logger) mkdirp(os.path.join(conf.get_opt("log_dir"), args.session, args.volume), exit_on_err=True) log_file = os.path.join(conf.get_opt("log_dir"), args.session, args.volume, "brickfind.log") setup_logger(logger, log_file, args.debug) time_to_update = int(time.time()) brickfind_crawl(args.brick, args)
parser.add_argument("session", help="Session Name") parser.add_argument("volume", help="Volume Name") parser.add_argument("brick", help="Brick Name") parser.add_argument("outfile", help="Output File") parser.add_argument("start", help="Start Time", type=int) parser.add_argument("--debug", help="Debug", action="store_true") parser.add_argument("--output-prefix", help="File prefix in output", default=".") parser.add_argument("-N", "--only-namespace-changes", help="List only namespace changes", action="store_true") return parser.parse_args() if __name__ == "__main__": args = _get_args() mkdirp(os.path.join(conf.get_opt("log_dir"), args.session, args.volume), exit_on_err=True) log_file = os.path.join(conf.get_opt("log_dir"), args.session, args.volume, "changelog.log") setup_logger(logger, log_file, args.debug) session_dir = os.path.join(conf.get_opt("session_dir"), args.session) status_file = os.path.join(session_dir, args.volume, "%s.status" % urllib.quote_plus(args.brick)) status_file_pre = status_file + ".pre" mkdirp(os.path.join(session_dir, args.volume), exit_on_err=True, logger=logger) try: with open(status_file) as f: start = int(f.read().strip()) except (ValueError, OSError, IOError): start = args.start end = int(time.time()) - get_changelog_rollover_time(args.volume)
def run_cmd_nodes(task, args, **kwargs): global node_outfiles nodes = get_nodes(args.volume) pool = [] for num, node in enumerate(nodes): host, brick = node[1].split(":") host_uuid = node[0] cmd = [] opts = {} # tmpfilename is valid only for tasks: pre, query and cleanup tmpfilename = kwargs.get("tmpfilename", "BADNAME") node_outfile = os.path.join(conf.get_opt("working_dir"), args.session, args.volume, tmpfilename, "tmp_output_%s" % num) if task == "pre": if vol_statusStr != "Started": fail("Volume %s is not online" % args.volume, logger=logger) # If Full backup is requested or start time is zero, use brickfind change_detector = conf.get_change_detector("changelog") tag = None if args.full: change_detector = conf.get_change_detector("brickfind") tag = args.tag_for_full_find.strip() if tag == "": tag = '""' if not is_host_local(host_uuid) else "" node_outfiles.append(node_outfile) # remote file will be copied into this directory mkdirp(os.path.dirname(node_outfile), exit_on_err=True, logger=logger) FS = args.field_separator if not is_host_local(host_uuid): FS = "'" + FS + "'" cmd = [change_detector, args.session, args.volume, host, brick, node_outfile] + \ ([str(kwargs.get("start")), str(kwargs.get("end"))] if not args.full else []) + \ ([tag] if tag is not None else []) + \ ["--output-prefix", args.output_prefix] + \ (["--debug"] if args.debug else []) + \ (["--no-encode"] if args.no_encode else []) + \ (["--only-namespace-changes"] if args.only_namespace_changes else []) + \ (["--type", args.type]) + \ (["--field-separator", FS] if args.full else []) opts["node_outfile"] = node_outfile opts["copy_outfile"] = True elif task == "query": # If Full backup is requested or start time is zero, use brickfind tag = None change_detector = conf.get_change_detector("changelog") if args.full: change_detector = conf.get_change_detector("brickfind") tag = args.tag_for_full_find.strip() if tag == "": tag = '""' if not is_host_local(host_uuid) else "" node_outfiles.append(node_outfile) # remote file will be copied into this directory mkdirp(os.path.dirname(node_outfile), exit_on_err=True, logger=logger) FS = args.field_separator if not is_host_local(host_uuid): FS = "'" + FS + "'" cmd = [change_detector, args.session, args.volume, host, brick, node_outfile] + \ ([str(kwargs.get("start")), str(kwargs.get("end"))] if not args.full else []) + \ ([tag] if tag is not None else []) + \ ["--only-query"] + \ ["--output-prefix", args.output_prefix] + \ (["--debug"] if args.debug else []) + \ (["--no-encode"] if args.no_encode else []) + \ (["--only-namespace-changes"] if args.only_namespace_changes else []) + \ (["--type", args.type]) + \ (["--field-separator", FS] if args.full else []) opts["node_outfile"] = node_outfile opts["copy_outfile"] = True elif task == "cleanup": # After pre/query run, cleanup the working directory and other # temp files. Remove the directory to which node_outfile has # been copied in main node try: os.remove(node_outfile) except (OSError, IOError): logger.warn("Failed to cleanup temporary file %s" % node_outfile) pass cmd = [conf.get_opt("nodeagent"), "cleanup", args.session, args.volume, os.path.dirname(node_outfile)] + \ (["--debug"] if args.debug else []) elif task == "create": if vol_statusStr != "Started": fail("Volume %s is not online" % args.volume, logger=logger) # When glusterfind create, create session directory in # each brick nodes cmd = [conf.get_opt("nodeagent"), "create", args.session, args.volume, brick, kwargs.get("time_to_update")] + \ (["--debug"] if args.debug else []) + \ (["--reset-session-time"] if args.reset_session_time else []) elif task == "post": # Rename pre status file to actual status file in each node cmd = [conf.get_opt("nodeagent"), "post", args.session, args.volume, brick] + \ (["--debug"] if args.debug else []) elif task == "delete": # When glusterfind delete, cleanup all the session files/dirs # from each node. cmd = [conf.get_opt("nodeagent"), "delete", args.session, args.volume] + \ (["--debug"] if args.debug else []) if cmd: p = Process(target=node_cmd, args=(host, host_uuid, task, cmd, args, opts)) p.start() pool.append(p) for num, p in enumerate(pool): p.join() if p.exitcode != 0: logger.warn("Command %s failed in %s" % (task, nodes[num][1])) if task in ["create", "delete"]: fail("Command %s failed in %s" % (task, nodes[num][1])) elif task == "pre" and args.disable_partial: sys.exit(1)
get_changes(brick, working_dir, log_file, end, args) def _get_args(): parser = ArgumentParser(formatter_class=RawDescriptionHelpFormatter, description=PROG_DESCRIPTION) parser.add_argument("session", help="Session Name") parser.add_argument("volume", help="Volume Name") parser.add_argument("brick", help="Brick Name") parser.add_argument("outfile", help="Output File") parser.add_argument("start", help="Start Time", type=int) parser.add_argument("--debug", help="Debug", action="store_true") parser.add_argument("--output-prefix", help="File prefix in output", default=".") return parser.parse_args() if __name__ == "__main__": args = _get_args() mkdirp(os.path.join(conf.get_opt("log_dir"), args.session, args.volume), exit_on_err=True) log_file = os.path.join(conf.get_opt("log_dir"), args.session, args.volume, "changelog.log") setup_logger(logger, log_file, args.debug) end = int(time.time()) - int(conf.get_opt("changelog_rollover_time")) changelog_crawl(args.brick, end, args) sys.exit(0)
def gfid_to_path_using_pgfid(brick, gfids_file, output_file, outfile_failures): """ Parent GFID is saved as xattr, collect Parent GFIDs from all the files from gfids_file. Convert parent GFID to path and Crawl each directories to get the list of files/dirs having same inode number. Do find with maxdepth as 1 and print the output in <INODE_NUM> <PATH> format, use this output to look into in memory dictionary of inode numbers got from the list of GFIDs """ with open(output_file, "a+") as fout: pgfids = set() inode_dict = {} with open(gfids_file) as f: for gfid in f: gfid = gfid.strip() p = os.path.join(brick, ".glusterfs", gfid[0:2], gfid[2:4], gfid) if os.path.islink(p): path = symlink_gfid_to_path(brick, gfid) output_write(fout, path, args.output_prefix) else: try: inode_dict[str(os.stat(p).st_ino)] = 1 file_xattrs = xattr.list(p) num_parent_gfid = 0 for x in file_xattrs: if x.startswith("trusted.pgfid."): num_parent_gfid += 1 pgfids.add(x.split(".")[-1]) if num_parent_gfid == 0: with open(outfile_failures, "a") as f: f.write("%s\n" % gfid) f.flush() os.fsync(f.fileno()) except (IOError, OSError) as e: if e.errno == ENOENT: continue else: fail("%s Failed to convert to path from " "GFID %s: %s" % (brick, gfid, e), logger=logger) if not inode_dict: return def inode_filter(path): try: st = os.lstat(path) except (OSError, IOError) as e: if e.errno == ENOENT: st = None else: raise if st and inode_dict.get(str(st.st_ino), None): return True return False # Length of brick path, to remove from output path brick_path_len = len(brick) def output_callback(path): path = path.strip() path = path[brick_path_len+1:] output_write(fout, path, args.output_prefix) ignore_dirs = [os.path.join(brick, dirname) for dirname in conf.get_opt("brick_ignore_dirs").split(",")] for pgfid in pgfids: path = symlink_gfid_to_path(brick, pgfid) find(os.path.join(brick, path), callback_func=output_callback, filter_func=inode_filter, ignore_dirs=ignore_dirs, subdirs_crawl=False) fout.flush() os.fsync(fout.fileno())
def _get_args(): parser = ArgumentParser(formatter_class=RawDescriptionHelpFormatter, description=PROG_DESCRIPTION) parser.add_argument("session", help="Session Name") parser.add_argument("volume", help="Volume Name") parser.add_argument("brick", help="Brick Name") parser.add_argument("outfile", help="Output File") parser.add_argument("start", help="Start Time", type=int) parser.add_argument("--debug", help="Debug", action="store_true") parser.add_argument("--output-prefix", help="File prefix in output", default=".") return parser.parse_args() if __name__ == "__main__": args = _get_args() mkdirp(os.path.join(conf.get_opt("log_dir"), args.session, args.volume), exit_on_err=True) log_file = os.path.join(conf.get_opt("log_dir"), args.session, args.volume, "changelog.log") setup_logger(logger, log_file, args.debug) end = int(time.time()) - int(conf.get_opt("changelog_rollover_time")) changelog_crawl(args.brick, end, args) sys.exit(0)
def gfid_to_path_using_pgfid(brick, gfids_file, output_file, outfile_failures): """ Parent GFID is saved as xattr, collect Parent GFIDs from all the files from gfids_file. Convert parent GFID to path and Crawl each directories to get the list of files/dirs having same inode number. Do find with maxdepth as 1 and print the output in <INODE_NUM> <PATH> format, use this output to look into in memory dictionary of inode numbers got from the list of GFIDs """ with open(output_file, "a+") as fout: pgfids = set() inode_dict = {} with open(gfids_file) as f: for gfid in f: gfid = gfid.strip() p = os.path.join(brick, ".glusterfs", gfid[0:2], gfid[2:4], gfid) if os.path.islink(p): path = symlink_gfid_to_path(brick, gfid) output_write(fout, path, args.output_prefix) else: try: inode_dict[str(os.stat(p).st_ino)] = 1 file_xattrs = xattr.list(p) num_parent_gfid = 0 for x in file_xattrs: if x.startswith("trusted.pgfid."): num_parent_gfid += 1 pgfids.add(x.split(".")[-1]) if num_parent_gfid == 0: with open(outfile_failures, "a") as f: f.write("%s\n" % gfid) f.flush() os.fsync(f.fileno()) except (IOError, OSError) as e: if e.errno == ENOENT: continue else: fail("%s Failed to convert to path from " "GFID %s: %s" % (brick, gfid, e), logger=logger) if not inode_dict: return def inode_filter(path): try: st = os.lstat(path) except (OSError, IOError) as e: if e.errno == ENOENT: st = None else: raise if st and inode_dict.get(str(st.st_ino), None): return True return False # Length of brick path, to remove from output path brick_path_len = len(brick) def output_callback(path): path = path.strip() path = path[brick_path_len + 1:] output_write(fout, path, args.output_prefix) ignore_dirs = [ os.path.join(brick, dirname) for dirname in conf.get_opt("brick_ignore_dirs").split(",") ] for pgfid in pgfids: path = symlink_gfid_to_path(brick, pgfid) find(os.path.join(brick, path), callback_func=output_callback, filter_func=inode_filter, ignore_dirs=ignore_dirs, subdirs_crawl=False) fout.flush() os.fsync(fout.fileno())
def run_cmd_nodes(task, args, **kwargs): global node_outfiles nodes = get_nodes(args.volume) pool = [] for num, node in enumerate(nodes): host, brick = node[1].split(":") host_uuid = node[0] cmd = [] opts = {} node_outfile = os.path.join(conf.get_opt("working_dir"), args.session, args.volume, "tmp_output_%s" % num) if task == "pre": if vol_statusStr == "Stopped": fail("Volume %s is in stopped state" % args.volume, logger=logger) # If Full backup is requested or start time is zero, use brickfind change_detector = conf.get_change_detector("changelog") if args.full: change_detector = conf.get_change_detector("brickfind") node_outfiles.append(node_outfile) cmd = [change_detector, args.session, args.volume, brick, node_outfile, str(kwargs.get("start")), "--output-prefix", args.output_prefix] + \ (["--debug"] if args.debug else []) + \ (["--only-namespace-changes"] if args.only_namespace_changes else []) opts["node_outfile"] = node_outfile opts["copy_outfile"] = True elif task == "cleanup": # After pre run, cleanup the working directory and other temp files # Remove the copied node_outfile in main node try: os.remove(node_outfile) except (OSError, IOError): logger.warn("Failed to cleanup temporary file %s" % node_outfile) pass cmd = [conf.get_opt("nodeagent"), "cleanup", args.session, args.volume] + (["--debug"] if args.debug else []) elif task == "create": if vol_statusStr == "Stopped": fail("Volume %s is in stopped state" % args.volume, logger=logger) # When glusterfind create, create session directory in # each brick nodes cmd = [conf.get_opt("nodeagent"), "create", args.session, args.volume, brick, kwargs.get("time_to_update")] + \ (["--debug"] if args.debug else []) + \ (["--reset-session-time"] if args.reset_session_time else []) elif task == "post": # Rename pre status file to actual status file in each node cmd = [conf.get_opt("nodeagent"), "post", args.session, args.volume, brick] + \ (["--debug"] if args.debug else []) elif task == "delete": # When glusterfind delete, cleanup all the session files/dirs # from each node. cmd = [conf.get_opt("nodeagent"), "delete", args.session, args.volume] + \ (["--debug"] if args.debug else []) if cmd: p = Process(target=node_cmd, args=(host, host_uuid, task, cmd, args, opts)) p.start() pool.append(p) for num, p in enumerate(pool): p.join() if p.exitcode != 0: logger.warn("Command %s failed in %s" % (task, nodes[num][1])) if task in ["create", "delete"]: fail("Command %s failed in %s" % (task, nodes[num][1])) elif task == "pre" and args.disable_partial: sys.exit(1)