Beispiel #1
0
def main():
    args = _get_args()
    mkdirp(conf.get_opt("session_dir"), exit_on_err=True)

    if args.mode == "list":
        session_dir = conf.get_opt("session_dir")
    else:
        session_dir = os.path.join(conf.get_opt("session_dir"),
                                   args.session)

    if not os.path.exists(session_dir) and args.mode not in ["create", "list"]:
        fail("Invalid session %s" % args.session)

    vol_dir = os.path.join(session_dir, args.volume)
    if not os.path.exists(vol_dir) and args.mode not in ["create", "list"]:
        fail("Session %s not created with volume %s" %
            (args.session, args.volume))

    mkdirp(os.path.join(conf.get_opt("log_dir"), args.session, args.volume),
           exit_on_err=True)
    log_file = os.path.join(conf.get_opt("log_dir"),
                            args.session,
                            args.volume,
                            "cli.log")
    setup_logger(logger, log_file, args.debug)

    # globals() will have all the functions already defined.
    # mode_<args.mode> will be the function name to be called
    globals()["mode_" + args.mode](session_dir, args)
Beispiel #2
0
def main():
    args = _get_args()
    mkdirp(conf.get_opt("session_dir"), exit_on_err=True)

    if args.mode == "list":
        session_dir = conf.get_opt("session_dir")
    else:
        session_dir = os.path.join(conf.get_opt("session_dir"), args.session)

    if not os.path.exists(session_dir) and args.mode not in ["create", "list"]:
        fail("Invalid session %s" % args.session)

    vol_dir = os.path.join(session_dir, args.volume)
    if not os.path.exists(vol_dir) and args.mode not in ["create", "list"]:
        fail("Session %s not created with volume %s" %
             (args.session, args.volume))

    mkdirp(os.path.join(conf.get_opt("log_dir"), args.session, args.volume),
           exit_on_err=True)
    log_file = os.path.join(conf.get_opt("log_dir"), args.session, args.volume,
                            "cli.log")
    setup_logger(logger, log_file, args.debug)

    # globals() will have all the functions already defined.
    # mode_<args.mode> will be the function name to be called
    globals()["mode_" + args.mode](session_dir, args)
Beispiel #3
0
def main():
    global gtmpfilename

    args = None

    try:
        args = _get_args()
        mkdirp(conf.get_opt("session_dir"), exit_on_err=True)

        # force the default session name if mode is "query"
        if args.mode == "query":
            args.session = "default"

        if args.mode == "list":
            session_dir = conf.get_opt("session_dir")
        else:
            session_dir = os.path.join(conf.get_opt("session_dir"),
                                       args.session)

        if not os.path.exists(session_dir) and \
                args.mode not in ["create", "list", "query"]:
            fail("Invalid session %s" % args.session)

        # volume involved, validate the volume first
        if args.mode not in ["list"]:
            validate_volume(args.volume)

        # "default" is a system defined session name
        if args.mode in ["create", "post", "pre", "delete"] and \
                args.session == "default":
            fail("Invalid session %s" % args.session)

        vol_dir = os.path.join(session_dir, args.volume)
        if not os.path.exists(vol_dir) and args.mode not in \
                ["create", "list", "query"]:
            fail("Session %s not created with volume %s" %
                 (args.session, args.volume))

        mkdirp(os.path.join(conf.get_opt("log_dir"), args.session,
                            args.volume),
               exit_on_err=True)
        log_file = os.path.join(conf.get_opt("log_dir"), args.session,
                                args.volume, "cli.log")
        setup_logger(logger, log_file, args.debug)

        # globals() will have all the functions already defined.
        # mode_<args.mode> will be the function name to be called
        globals()["mode_" + args.mode](session_dir, args)
    except KeyboardInterrupt:
        if args is not None:
            if args.mode == "pre" or args.mode == "query":
                # cleanup session
                if gtmpfilename is not None:
                    # no more interrupts until we clean up
                    signal.signal(signal.SIGINT, signal.SIG_IGN)
                    run_cmd_nodes("cleanup", args, tmpfilename=gtmpfilename)

        # Interrupted, exit with non zero error code
        sys.exit(2)
Beispiel #4
0
def main():
    global gtmpfilename

    args = None

    try:
        args = _get_args()
        mkdirp(conf.get_opt("session_dir"), exit_on_err=True)

        # force the default session name if mode is "query"
        if args.mode == "query":
            args.session = "default"

        if args.mode == "list":
            session_dir = conf.get_opt("session_dir")
        else:
            session_dir = os.path.join(conf.get_opt("session_dir"),
                                       args.session)

        if not os.path.exists(session_dir) and \
                args.mode not in ["create", "list", "query"]:
            fail("Invalid session %s" % args.session)

        # "default" is a system defined session name
        if args.mode in ["create", "post", "pre", "delete"] and \
                args.session == "default":
            fail("Invalid session %s" % args.session)

        vol_dir = os.path.join(session_dir, args.volume)
        if not os.path.exists(vol_dir) and args.mode not in \
                ["create", "list", "query"]:
            fail("Session %s not created with volume %s" %
                 (args.session, args.volume))

        mkdirp(os.path.join(conf.get_opt("log_dir"),
                            args.session,
                            args.volume),
               exit_on_err=True)
        log_file = os.path.join(conf.get_opt("log_dir"),
                                args.session,
                                args.volume,
                                "cli.log")
        setup_logger(logger, log_file, args.debug)

        # globals() will have all the functions already defined.
        # mode_<args.mode> will be the function name to be called
        globals()["mode_" + args.mode](session_dir, args)
    except KeyboardInterrupt:
        if args is not None:
            if args.mode == "pre" or args.mode == "query":
                # cleanup session
                if gtmpfilename is not None:
                    # no more interrupts until we clean up
                    signal.signal(signal.SIGINT, signal.SIG_IGN)
                    run_cmd_nodes("cleanup", args, tmpfilename=gtmpfilename)

        # Interrupted, exit with non zero error code
        sys.exit(2)
Beispiel #5
0
def node_cleanup(host, args):
    localdir = is_host_local(host)

    # CHANGE_DETECTOR <SESSION> <VOLUME> <BRICK> <OUTFILE> <START> --debug
    # --gfidpath <TYPE>
    cmd = [conf.get_opt("nodecleanup"), args.session, args.volume
           ] + (["--debug"] if args.debug else [])

    if not localdir:
        # prefix with ssh command if not local node
        cmd = ["ssh", "-i", conf.get_opt("secret_pem"), "root@%s" % host] + cmd

    execute(cmd, exit_msg="%s - Cleanup failed" % host, logger=logger)
Beispiel #6
0
def ssh_setup(args):
    pem_key_path = get_pem_key_path(args.session, args.volume)

    if not os.path.exists(pem_key_path):
        # Generate ssh-key
        cmd = ["ssh-keygen",
               "-N",
               "",
               "-f",
               pem_key_path]
        execute(cmd,
                exit_msg="Unable to generate ssh key %s"
                % pem_key_path,
                logger=logger)

        logger.info("Ssh key generated %s" % pem_key_path)

    try:
        shutil.copyfile(pem_key_path + ".pub",
                        os.path.join(conf.get_opt("session_dir"),
                                     ".keys",
                                     "%s_%s_secret.pem.pub" % (args.session,
                                                               args.volume)))
    except (IOError, OSError) as e:
        fail("Failed to copy public key to %s: %s"
             % (os.path.join(conf.get_opt("session_dir"), ".keys"), e),
             logger=logger)

    # Copy pub file to all nodes
    cmd = ["gluster",
           "system::",
           "copy",
           "file",
           "/glusterfind/.keys/%s.pub" % os.path.basename(pem_key_path)]

    execute(cmd, exit_msg="Failed to distribute ssh keys", logger=logger)

    logger.info("Distributed ssh key to all nodes of Volume")

    # Add to authorized_keys file in each node
    cmd = ["gluster",
           "system::",
           "execute",
           "add_secret_pub",
           "root",
           "/glusterfind/.keys/%s.pub" % os.path.basename(pem_key_path)]
    execute(cmd,
            exit_msg="Failed to add ssh keys to authorized_keys file",
            logger=logger)

    logger.info("Ssh key added to authorized_keys of Volume nodes")
Beispiel #7
0
def ssh_setup(args):
    pem_key_path = get_pem_key_path(args.session, args.volume)

    if not os.path.exists(pem_key_path):
        # Generate ssh-key
        cmd = ["ssh-keygen",
               "-N",
               "",
               "-f",
               pem_key_path]
        execute(cmd,
                exit_msg="Unable to generate ssh key %s"
                % pem_key_path,
                logger=logger)

        logger.info("Ssh key generated %s" % pem_key_path)

    try:
        shutil.copyfile(pem_key_path + ".pub",
                        os.path.join(conf.get_opt("session_dir"),
                                     ".keys",
                                     "%s_%s_secret.pem.pub" % (args.session,
                                                               args.volume)))
    except (IOError, OSError) as e:
        fail("Failed to copy public key to %s: %s"
             % (os.path.join(conf.get_opt("session_dir"), ".keys"), e),
             logger=logger)

    # Copy pub file to all nodes
    cmd = ["gluster",
           "system::",
           "copy",
           "file",
           "/glusterfind/.keys/%s.pub" % os.path.basename(pem_key_path)]

    execute(cmd, exit_msg="Failed to distribute ssh keys", logger=logger)

    logger.info("Distributed ssh key to all nodes of Volume")

    # Add to authorized_keys file in each node
    cmd = ["gluster",
           "system::",
           "execute",
           "add_secret_pub",
           "root",
           "/glusterfind/.keys/%s.pub" % os.path.basename(pem_key_path)]
    execute(cmd,
            exit_msg="Failed to add ssh keys to authorized_keys file",
            logger=logger)

    logger.info("Ssh key added to authorized_keys of Volume nodes")
Beispiel #8
0
def node_run(volume, host, path, start, outfile, args, fallback=False):
    """
    If host is local node, execute the command locally. If not local
    execute the CHANGE_DETECTOR command via ssh and copy the output file from
    remote node using scp.
    """
    localdir = is_host_local(host)

    # If Full backup is requested or start time is zero, use brickfind
    change_detector = conf.get_change_detector(args.change_detector)
    if ((start == 0 or args.full) and args.change_detector == "changelog") or \
       fallback:
        change_detector = conf.get_change_detector("brickfind")

    # CHANGE_DETECTOR <SESSION> <VOLUME> <BRICK> <OUTFILE> <START> --debug
    # --gfidpath <TYPE>
    cmd = [change_detector,
           args.session,
           volume,
           path,
           outfile,
           str(start),
           "--output-prefix",
           args.output_prefix] + \
        (["--debug"] if args.debug else []) + \
        (["--full"] if args.full else [])

    if not localdir:
        # prefix with ssh command if not local node
        cmd = ["ssh", "-i", conf.get_opt("secret_pem"), "root@%s" % host] + cmd

    rc, out, err = execute(cmd, logger=logger)
    if rc == 2:
        # Partial History Fallback
        logger.info("%s %s Fallback to brickfind" % (host, err.strip()))
        # Exit only from process, handled in main.
        sys.exit(rc)
    elif rc != 0:
        fail("%s - Change detection failed" % host, logger=logger)

    if not localdir:
        cmd_copy = [
            "scp", "-i",
            conf.get_opt("secret_pem"),
            "root@%s:/%s" % (host, outfile),
            os.path.dirname(outfile)
        ]
        execute(cmd_copy,
                exit_msg="%s - Copy command failed" % host,
                logger=logger)
Beispiel #9
0
def node_run(volume, host, path, start, outfile, args, fallback=False):
    """
    If host is local node, execute the command locally. If not local
    execute the CHANGE_DETECTOR command via ssh and copy the output file from
    remote node using scp.
    """
    localdir = is_host_local(host)

    # If Full backup is requested or start time is zero, use brickfind
    change_detector = conf.get_change_detector(args.change_detector)
    if ((start == 0 or args.full) and args.change_detector == "changelog") or \
       fallback:
        change_detector = conf.get_change_detector("brickfind")

    # CHANGE_DETECTOR <SESSION> <VOLUME> <BRICK> <OUTFILE> <START> --debug
    # --gfidpath <TYPE>
    cmd = [change_detector,
           args.session,
           volume,
           path,
           outfile,
           str(start),
           "--output-prefix",
           args.output_prefix] + \
        (["--debug"] if args.debug else []) + \
        (["--full"] if args.full else [])

    if not localdir:
        # prefix with ssh command if not local node
        cmd = ["ssh",
               "-i", conf.get_opt("secret_pem"),
               "root@%s" % host] + cmd

    rc, out, err = execute(cmd, logger=logger)
    if rc == 2:
        # Partial History Fallback
        logger.info("%s %s Fallback to brickfind" % (host, err.strip()))
        # Exit only from process, handled in main.
        sys.exit(rc)
    elif rc != 0:
        fail("%s - Change detection failed" % host, logger=logger)

    if not localdir:
        cmd_copy = ["scp",
                    "-i", conf.get_opt("secret_pem"),
                    "root@%s:/%s" % (host, outfile),
                    os.path.dirname(outfile)]
        execute(cmd_copy, exit_msg="%s - Copy command failed" % host,
                logger=logger)
Beispiel #10
0
def node_cleanup(host, args):
    localdir = is_host_local(host)

    # CHANGE_DETECTOR <SESSION> <VOLUME> <BRICK> <OUTFILE> <START> --debug
    # --gfidpath <TYPE>
    cmd = [conf.get_opt("nodecleanup"),
           args.session,
           args.volume] + (["--debug"] if args.debug else [])

    if not localdir:
        # prefix with ssh command if not local node
        cmd = ["ssh",
               "-i", conf.get_opt("secret_pem"),
               "root@%s" % host] + cmd

    execute(cmd, exit_msg="%s - Cleanup failed" % host, logger=logger)
Beispiel #11
0
def cleanup(nodes, args):
    pool = []
    for num, node in enumerate(nodes):
        host, brick = node[1].split(":")
        # temp output file
        node_outfile = os.path.join(conf.get_opt("working_dir"), args.session,
                                    args.volume, "tmp_output_%s.txt" % num)

        try:
            os.remove(node_outfile)
        except (OSError, IOError):
            # TODO: Cleanup Failure, Handle
            pass

        p = Process(target=node_cleanup, args=(host, args))
        p.start()
        pool.append(p)

    exit_codes = 0
    for p in pool:
        p.join()
        exit_codes += (0 if p.exitcode == 0 else 1)

    if exit_codes != 0:
        sys.exit(1)
Beispiel #12
0
def brickfind_crawl(brick, args):
    if brick.endswith("/"):
        brick = brick[0:len(brick)-1]

    working_dir = os.path.dirname(args.outfile)
    mkdirp(working_dir, exit_on_err=True, logger=logger)
    create_file(args.outfile, exit_on_err=True, logger=logger)

    with open(args.outfile, "a+") as fout:
        brick_path_len = len(brick)

        def output_callback(path, filter_result):
            path = path.strip()
            path = path[brick_path_len+1:]
            output_write(fout, path, args.output_prefix, encode=True)

        ignore_dirs = [os.path.join(brick, dirname)
                       for dirname in
                       conf.get_opt("brick_ignore_dirs").split(",")]

        find(brick, callback_func=output_callback,
             ignore_dirs=ignore_dirs)

        fout.flush()
        os.fsync(fout.fileno())
Beispiel #13
0
def mode_pre(session_dir, args):
    """
    Read from Session file and write to session.pre file
    """
    endtime_to_update = int(time.time()) - int(
        conf.get_opt("changelog_rollover_time"))
    status_file = os.path.join(session_dir, args.volume, "status")
    status_file_pre = status_file + ".pre"

    mkdirp(os.path.dirname(args.outfile), exit_on_err=True, logger=logger)

    start = 0
    try:
        with open(status_file) as f:
            start = int(f.read().strip())
    except ValueError:
        pass
    except (OSError, IOError) as e:
        fail("Error Opening Session file %s: %s"
             % (status_file, e), logger=logger)

    logger.debug("Pre is called - Session: %s, Volume: %s, "
                 "Start time: %s, End time: %s"
                 % (args.session, args.volume, start, endtime_to_update))

    run_in_nodes(args.volume, start, args)

    with open(status_file_pre, "w", buffering=0) as f:
        f.write(str(endtime_to_update))

    sys.stdout.write("Generated output file %s\n" % args.outfile)
Beispiel #14
0
def brickfind_crawl(brick, args):
    if brick.endswith("/"):
        brick = brick[0:len(brick)-1]

    working_dir = os.path.dirname(args.outfile)
    mkdirp(working_dir, exit_on_err=True, logger=logger)
    create_file(args.outfile, exit_on_err=True, logger=logger)

    with open(args.outfile, "a+") as fout:
        brick_path_len = len(brick)

        def output_callback(path, filter_result):
            path = path.strip()
            path = path[brick_path_len+1:]
            output_write(fout, path, args.output_prefix,
                         encode=(not args.no_encode), tag=args.tag)

        ignore_dirs = [os.path.join(brick, dirname)
                       for dirname in
                       conf.get_opt("brick_ignore_dirs").split(",")]

        find(brick, callback_func=output_callback,
             ignore_dirs=ignore_dirs)

        fout.flush()
        os.fsync(fout.fileno())
Beispiel #15
0
def mode_cleanup(args):
    working_dir = os.path.join(conf.get_opt("working_dir"), args.session,
                               args.volume, args.tmpfilename)

    mkdirp(os.path.join(conf.get_opt("log_dir"), args.session, args.volume),
           exit_on_err=True)
    log_file = os.path.join(conf.get_opt("log_dir"), args.session, args.volume,
                            "changelog.log")

    setup_logger(logger, log_file)

    try:
        shutil.rmtree(working_dir, onerror=handle_rm_error)
    except (OSError, IOError) as e:
        logger.error("Failed to delete working directory: %s" % e)
        sys.exit(1)
Beispiel #16
0
def gfid_to_path_using_batchfind(brick, gfids_file, output_file):
    """
    find -samefile gets the inode number and crawls entire namespace
    to get the list of files/dirs having same inode number.
    Do find without any option, except the ignore directory option,
    print the output in <INODE_NUM> <PATH> format, use this output
    to look into in-memory dictionary of inode numbers got from the
    list of GFIDs
    """
    with open(output_file, "a+") as fout:
        inode_dict = {}
        with open(gfids_file) as f:
            for gfid in f:
                gfid = gfid.strip()
                backend_path = os.path.join(brick, ".glusterfs",
                                            gfid[0:2], gfid[2:4], gfid)

                try:
                    inode_dict[str(os.stat(backend_path).st_ino)] = 1
                except (IOError, OSError) as e:
                    if e.errno == ENOENT:
                        continue
                    else:
                        fail("%s Failed to convert to path from "
                             "GFID %s: %s" % (brick, gfid, e), logger=logger)

        if not inode_dict:
            return

        def inode_filter(path):
            try:
                st = os.lstat(path)
            except (OSError, IOError) as e:
                if e.errno == ENOENT:
                    st = None
                else:
                    raise

            if st and inode_dict.get(str(st.st_ino), None):
                return True

            return False

        brick_path_len = len(brick)

        def output_callback(path):
            path = path.strip()
            path = path[brick_path_len+1:]
            output_write(fout, path, args.output_prefix)

        ignore_dirs = [os.path.join(brick, dirname)
                       for dirname in
                       conf.get_opt("brick_ignore_dirs").split(",")]
        # Length of brick path, to remove from output path
        find(brick, callback_func=output_callback,
             filter_func=inode_filter,
             ignore_dirs=ignore_dirs)

        fout.flush()
        os.fsync(fout.fileno())
Beispiel #17
0
def cleanup(nodes, args):
    pool = []
    for num, node in enumerate(nodes):
        host, brick = node[1].split(":")
        # temp output file
        node_outfile = os.path.join(conf.get_opt("working_dir"),
                                    args.session,
                                    args.volume,
                                    "tmp_output_%s.txt" % num)

        try:
            os.remove(node_outfile)
        except (OSError, IOError):
            # TODO: Cleanup Failure, Handle
            pass

        p = Process(target=node_cleanup,
                    args=(host, args))
        p.start()
        pool.append(p)

    exit_codes = 0
    for p in pool:
        p.join()
        exit_codes += (0 if p.exitcode == 0 else 1)

    if exit_codes != 0:
        sys.exit(1)
Beispiel #18
0
def mode_pre(session_dir, args):
    """
    Read from Session file and write to session.pre file
    """
    endtime_to_update = int(time.time()) - int(
        conf.get_opt("changelog_rollover_time"))
    status_file = os.path.join(session_dir, args.volume, "status")
    status_file_pre = status_file + ".pre"

    mkdirp(os.path.dirname(args.outfile), exit_on_err=True, logger=logger)

    start = 0
    try:
        with open(status_file) as f:
            start = int(f.read().strip())
    except ValueError:
        pass
    except (OSError, IOError) as e:
        fail("Error Opening Session file %s: %s" % (status_file, e),
             logger=logger)

    logger.debug("Pre is called - Session: %s, Volume: %s, "
                 "Start time: %s, End time: %s" %
                 (args.session, args.volume, start, endtime_to_update))

    run_in_nodes(args.volume, start, args)

    with open(status_file_pre, "w", buffering=0) as f:
        f.write(str(endtime_to_update))

    sys.stdout.write("Generated output file %s\n" % args.outfile)
Beispiel #19
0
def changelog_crawl(brick, end, args):
    """
    Init function, prepares working dir and calls Changelog query
    """
    if brick.endswith("/"):
        brick = brick[0:len(brick)-1]

    # WORKING_DIR/BRICKHASH/OUTFILE
    working_dir = os.path.dirname(args.outfile)
    brickhash = hashlib.sha1(brick)
    brickhash = str(brickhash.hexdigest())
    working_dir = os.path.join(working_dir, brickhash)

    mkdirp(working_dir, exit_on_err=True, logger=logger)
    create_file(args.outfile, exit_on_err=True, logger=logger)
    create_file(args.outfile + ".gfids", exit_on_err=True, logger=logger)

    log_file = os.path.join(conf.get_opt("log_dir"),
                            args.session,
                            args.volume,
                            "changelog.%s.log" % brickhash)

    logger.info("%s Started Changelog Crawl. Start: %s, End: %s"
                % (brick, args.start, end))
    get_changes(brick, working_dir, log_file, end, args)
Beispiel #20
0
def gfid_to_path_using_batchfind(brick, changelog_data):
    # If all the GFIDs converted using gfid_to_path_using_pgfid
    if not changelog_data.inodegfid_exists({"converted": 0}):
        return

    def inode_filter(path):
        # Looks in inodegfid table, if exists returns
        # inode number else None
        try:
            st = os.lstat(path)
        except (OSError, IOError):
            st = None

        if st and changelog_data.inodegfid_exists({"inode": st.st_ino}):
            return st.st_ino

        return None

    # Length of brick path, to remove from output path
    brick_path_len = len(brick)

    def output_callback(path, inode):
        # For each path found, encodes it and updates path1
        # Also updates converted flag in inodegfid table as 1
        path = path.strip()
        path = path[brick_path_len + 1 :]
        path = output_path_prepare(path, args.output_prefix)

        changelog_data.append_path1(path, inode)

    ignore_dirs = [os.path.join(brick, dirname) for dirname in conf.get_opt("brick_ignore_dirs").split(",")]

    # Full Namespace Crawl
    find(brick, callback_func=output_callback, filter_func=inode_filter, ignore_dirs=ignore_dirs)
Beispiel #21
0
def brickfind_crawl(brick, args):
    if brick.endswith("/"):
        brick = brick[0:len(brick)-1]

    working_dir = os.path.dirname(args.outfile)
    mkdirp(working_dir, exit_on_err=True, logger=logger)
    create_file(args.outfile, exit_on_err=True, logger=logger)

    with open(args.outfile, "a+") as fout:
        brick_path_len = len(brick)

        def output_callback(path, filter_result, is_dir):
            path = path.strip()
            path = path[brick_path_len+1:]

            if args.type == "both":
                output_write(fout, path, args.output_prefix,
                             encode=(not args.no_encode), tag=args.tag,
                             field_separator=args.field_separator)
            else:
                if (is_dir and args.type == "d") or (
                    (not is_dir) and args.type == "f"):
                    output_write(fout, path, args.output_prefix,
                    encode=(not args.no_encode), tag=args.tag,
                    field_separator=args.field_separator)

        ignore_dirs = [os.path.join(brick, dirname)
                       for dirname in
                       conf.get_opt("brick_ignore_dirs").split(",")]

        find(brick, callback_func=output_callback,
             ignore_dirs=ignore_dirs)

        fout.flush()
        os.fsync(fout.fileno())
Beispiel #22
0
def main():
    try:
        args = _get_args()
        mkdirp(conf.get_opt("session_dir"), exit_on_err=True)

        # force the default session name if mode is "query"
        if args.mode == "query":
            args.session = "default"

        if args.mode == "list":
            session_dir = conf.get_opt("session_dir")
        else:
            session_dir = os.path.join(conf.get_opt("session_dir"),
                                       args.session)

        if not os.path.exists(session_dir) and \
                args.mode not in ["create", "list", "query"]:
            fail("Invalid session %s" % args.session)

        # "default" is a system defined session name
        if args.mode in ["create", "post", "pre", "delete"] and \
                args.session == "default":
            fail("Invalid session %s" % args.session)

        vol_dir = os.path.join(session_dir, args.volume)
        if not os.path.exists(vol_dir) and args.mode not in \
                ["create", "list", "query"]:
            fail("Session %s not created with volume %s" %
                 (args.session, args.volume))

        mkdirp(os.path.join(conf.get_opt("log_dir"), args.session,
                            args.volume),
               exit_on_err=True)
        log_file = os.path.join(conf.get_opt("log_dir"), args.session,
                                args.volume, "cli.log")
        setup_logger(logger, log_file, args.debug)

        # globals() will have all the functions already defined.
        # mode_<args.mode> will be the function name to be called
        globals()["mode_" + args.mode](session_dir, args)
    except KeyboardInterrupt:
        # Interrupted, exit with non zero error code
        sys.exit(2)
Beispiel #23
0
def mode_cleanup(args):
    working_dir = os.path.join(conf.get_opt("working_dir"),
                               args.session,
                               args.volume)

    mkdirp(os.path.join(conf.get_opt("log_dir"), args.session, args.volume),
           exit_on_err=True)
    log_file = os.path.join(conf.get_opt("log_dir"),
                            args.session,
                            args.volume,
                            "changelog.log")

    setup_logger(logger, log_file)

    try:
        shutil.rmtree(working_dir, onerror=handle_rm_error)
    except (OSError, IOError) as e:
        logger.error("Failed to delete working directory: %s" % e)
        sys.exit(1)
Beispiel #24
0
def gfid_to_path_using_pgfid(brick, changelog_data, args):
    """
    For all the pgfids collected, Converts to Path and
    does readdir on those directories and looks up inodegfid
    table for matching inode number.
    """
    populate_pgfid_and_inodegfid(brick, changelog_data)

    # If no GFIDs needs conversion to Path
    if not changelog_data.inodegfid_exists({"converted": 0}):
        return

    def inode_filter(path):
        # Looks in inodegfid table, if exists returns
        # inode number else None
        try:
            st = os.lstat(path)
        except (OSError, IOError):
            st = None

        if st and changelog_data.inodegfid_exists({"inode": st.st_ino}):
            return st.st_ino

        return None

    # Length of brick path, to remove from output path
    brick_path_len = len(brick)

    def output_callback(path, inode):
        # For each path found, encodes it and updates path1
        # Also updates converted flag in inodegfid table as 1
        path = path.strip()
        path = path[brick_path_len + 1:]

        path = output_path_prepare(path, args)

        changelog_data.append_path1(path, inode)
        changelog_data.inodegfid_update({"converted": 1}, {"inode": inode})

    ignore_dirs = [
        os.path.join(brick, dirname)
        for dirname in conf.get_opt("brick_ignore_dirs").split(",")
    ]

    for row in changelog_data.pgfid_get():
        try:
            path = symlink_gfid_to_path(brick, row[0])
            find(os.path.join(brick, path),
                 callback_func=output_callback,
                 filter_func=inode_filter,
                 ignore_dirs=ignore_dirs,
                 subdirs_crawl=False)
        except (IOError, OSError) as e:
            logger.warn("Error converting to path: %s" % e)
            continue
Beispiel #25
0
def gfid_to_path_using_pgfid(brick, changelog_data, args):
    """
    For all the pgfids collected, Converts to Path and
    does readdir on those directories and looks up inodegfid
    table for matching inode number.
    """
    populate_pgfid_and_inodegfid(brick, changelog_data)

    # If no GFIDs needs conversion to Path
    if not changelog_data.inodegfid_exists({"converted": 0}):
        return

    def inode_filter(path):
        # Looks in inodegfid table, if exists returns
        # inode number else None
        try:
            st = os.lstat(path)
        except (OSError, IOError):
            st = None

        if st and changelog_data.inodegfid_exists({"inode": st.st_ino}):
            return st.st_ino

        return None

    # Length of brick path, to remove from output path
    brick_path_len = len(brick)

    def output_callback(path, inode):
        # For each path found, encodes it and updates path1
        # Also updates converted flag in inodegfid table as 1
        path = path.strip()
        path = path[brick_path_len+1:]

        path = output_path_prepare(path, args)

        changelog_data.append_path1(path, inode)
        changelog_data.inodegfid_update({"converted": 1}, {"inode": inode})

    ignore_dirs = [os.path.join(brick, dirname)
                   for dirname in
                   conf.get_opt("brick_ignore_dirs").split(",")]

    for row in changelog_data.pgfid_get():
        try:
            path = symlink_gfid_to_path(brick, row[0])
            find(os.path.join(brick, path),
                 callback_func=output_callback,
                 filter_func=inode_filter,
                 ignore_dirs=ignore_dirs,
                 subdirs_crawl=False)
        except (IOError, OSError) as e:
            logger.warn("Error converting to path: %s" % e)
            continue
Beispiel #26
0
def mode_post(args):
    session_dir = os.path.join(conf.get_opt("session_dir"), args.session)
    status_file = os.path.join(session_dir, args.volume,
                     "%s.status" % urllib.quote_plus(args.brick))

    mkdirp(os.path.join(session_dir, args.volume), exit_on_err=True,
           logger=logger)
    status_file_pre = status_file + ".pre"

    if os.path.exists(status_file_pre):
        os.rename(status_file_pre, status_file)
        sys.exit(0)
Beispiel #27
0
def mode_post(args):
    session_dir = os.path.join(conf.get_opt("session_dir"), args.session)
    status_file = os.path.join(session_dir, args.volume,
                     "%s.status" % urllib.parse.quote_plus(args.brick))

    mkdirp(os.path.join(session_dir, args.volume), exit_on_err=True,
           logger=logger)
    status_file_pre = status_file + ".pre"

    if os.path.exists(status_file_pre):
        os.rename(status_file_pre, status_file)
        sys.exit(0)
Beispiel #28
0
def ssh_setup():
    if not os.path.exists(conf.get_opt("secret_pem")):
        # Generate ssh-key
        cmd = ["ssh-keygen", "-N", "", "-f", conf.get_opt("secret_pem")]
        execute(cmd,
                exit_msg="Unable to generate ssh key %s" %
                conf.get_opt("secret_pem"),
                logger=logger)

        logger.info("Ssh key generated %s" % conf.get_opt("secret_pem"))

    # Copy pub file to all nodes
    cmd = [
        "gluster", "system::", "copy", "file",
        "/" + os.path.basename(conf.get_opt("secret_pem")) + ".pub"
    ]
    execute(cmd, exit_msg="Failed to distribute ssh keys", logger=logger)

    logger.info("Distributed ssh key to all nodes of Volume")

    # Add to authorized_keys file in each node
    cmd = [
        "gluster", "system::", "execute", "add_secret_pub", "root",
        os.path.basename(conf.get_opt("secret_pem")) + ".pub"
    ]
    execute(cmd,
            exit_msg="Failed to add ssh keys to authorized_keys file",
            logger=logger)

    logger.info("Ssh key added to authorized_keys of Volume nodes")
Beispiel #29
0
def mode_delete(args):
    session_dir = os.path.join(conf.get_opt("session_dir"), args.session)
    shutil.rmtree(os.path.join(session_dir, args.volume),
                  onerror=handle_rm_error)

    # If the session contains only this volume, then cleanup the
    # session directory. If a session contains multiple volumes
    # then os.rmdir will fail with ENOTEMPTY
    try:
        os.rmdir(session_dir)
    except OSError as e:
        if not e.errno == ENOTEMPTY:
            logger.warn("Failed to delete session directory: %s" % e)
Beispiel #30
0
def mode_create(args):
    session_dir = os.path.join(conf.get_opt("session_dir"),
                               args.session)
    status_file = os.path.join(session_dir, args.volume,
                     "%s.status" % urllib.quote_plus(args.brick))

    mkdirp(os.path.join(session_dir, args.volume), exit_on_err=True,
           logger=logger)

    if not os.path.exists(status_file) or args.reset_session_time:
        with open(status_file, "w", buffering=0) as f:
            f.write(args.time_to_update)

    sys.exit(0)
Beispiel #31
0
def mode_delete(args):
    session_dir = os.path.join(conf.get_opt("session_dir"),
                               args.session)
    shutil.rmtree(os.path.join(session_dir, args.volume),
                  onerror=handle_rm_error)

    # If the session contains only this volume, then cleanup the
    # session directory. If a session contains multiple volumes
    # then os.rmdir will fail with ENOTEMPTY
    try:
        os.rmdir(session_dir)
    except OSError as e:
        if not e.errno == ENOTEMPTY:
            logger.warn("Failed to delete session directory: %s" % e)
Beispiel #32
0
def mode_create(args):
    session_dir = os.path.join(conf.get_opt("session_dir"), args.session)
    status_file = os.path.join(session_dir, args.volume,
                               "%s.status" % urllib.quote_plus(args.brick))

    mkdirp(os.path.join(session_dir, args.volume),
           exit_on_err=True,
           logger=logger)

    if not os.path.exists(status_file) or args.reset_session_time:
        with open(status_file, "w", buffering=0) as f:
            f.write(args.time_to_update)

    sys.exit(0)
Beispiel #33
0
def brickfind_crawl(brick, args):
    if brick.endswith("/"):
        brick = brick[0:len(brick) - 1]

    working_dir = os.path.dirname(args.outfile)
    mkdirp(working_dir, exit_on_err=True, logger=logger)
    create_file(args.outfile, exit_on_err=True, logger=logger)

    with open(args.outfile, "a+") as fout:
        brick_path_len = len(brick)

        def mtime_filter(path):
            try:
                st = os.lstat(path)
            except (OSError, IOError) as e:
                if e.errno == ENOENT:
                    st = None
                else:
                    raise

            if st and (st.st_mtime > args.start or st.st_ctime > args.start):
                return True

            return False

        def output_callback(path):
            path = path.strip()
            path = path[brick_path_len + 1:]
            output_write(fout, path, args.output_prefix)

        ignore_dirs = [
            os.path.join(brick, dirname)
            for dirname in conf.get_opt("brick_ignore_dirs").split(",")
        ]

        if args.full:
            find(brick, callback_func=output_callback, ignore_dirs=ignore_dirs)
        else:
            find(brick,
                 callback_func=output_callback,
                 filter_func=mtime_filter,
                 ignore_dirs=ignore_dirs)

        fout.flush()
        os.fsync(fout.fileno())
Beispiel #34
0
def mode_create(session_dir, args):
    logger.debug("Init is called - Session: %s, Volume: %s" %
                 (args.session, args.volume))

    execute(["gluster", "volume", "info", args.volume],
            exit_msg="Unable to get volume details",
            logger=logger)

    mkdirp(session_dir, exit_on_err=True, logger=logger)
    mkdirp(os.path.join(session_dir, args.volume),
           exit_on_err=True,
           logger=logger)
    status_file = os.path.join(session_dir, args.volume, "status")

    if os.path.exists(status_file) and not args.force:
        fail("Session %s already created" % args.session, logger=logger)

    if not os.path.exists(status_file) or args.force:
        ssh_setup()

        execute(["gluster", "volume", "set", args.volume, "build-pgfid", "on"],
                exit_msg="Failed to set volume option build-pgfid on",
                logger=logger)
        logger.info("Volume option set %s, build-pgfid on" % args.volume)

        execute([
            "gluster", "volume", "set", args.volume, "changelog.changelog",
            "on"
        ],
                exit_msg="Failed to set volume option "
                "changelog.changelog on",
                logger=logger)
        logger.info("Volume option set %s, changelog.changelog on" %
                    args.volume)

    if not os.path.exists(status_file):
        with open(status_file, "w", buffering=0) as f:
            # Add Rollover time to current time to make sure changelogs
            # will be available if we use this time as start time
            time_to_update = int(time.time()) + int(
                conf.get_opt("changelog_rollover_time"))
            f.write(str(time_to_update))

    sys.exit(0)
Beispiel #35
0
def brickfind_crawl(brick, args):
    if brick.endswith("/"):
        brick = brick[0:len(brick)-1]

    working_dir = os.path.dirname(args.outfile)
    mkdirp(working_dir, exit_on_err=True, logger=logger)
    create_file(args.outfile, exit_on_err=True, logger=logger)

    with open(args.outfile, "a+") as fout:
        brick_path_len = len(brick)

        def mtime_filter(path):
            try:
                st = os.lstat(path)
            except (OSError, IOError) as e:
                if e.errno == ENOENT:
                    st = None
                else:
                    raise

            if st and (st.st_mtime > args.start or st.st_ctime > args.start):
                return True

            return False

        def output_callback(path):
            path = path.strip()
            path = path[brick_path_len+1:]
            output_write(fout, path, args.output_prefix)

        ignore_dirs = [os.path.join(brick, dirname)
                       for dirname in
                       conf.get_opt("brick_ignore_dirs").split(",")]

        if args.full:
            find(brick, callback_func=output_callback,
                 ignore_dirs=ignore_dirs)
        else:
            find(brick, callback_func=output_callback,
                 filter_func=mtime_filter,
                 ignore_dirs=ignore_dirs)

        fout.flush()
        os.fsync(fout.fileno())
Beispiel #36
0
def run_in_nodes(volume, start, args):
    """
    Get nodes of volume using gluster volume info, spawn a process
    each for a Node. Merge the output files once all the process
    complete their tasks.
    """
    nodes = get_nodes(volume)
    pool = []
    node_outfiles = []
    for num, node in enumerate(nodes):
        host, brick = node[1].split(":")
        # temp output file
        node_outfile = os.path.join(conf.get_opt("working_dir"),
                                    args.session,
                                    volume,
                                    "tmp_output_%s.txt" % num)
        node_outfiles.append(node_outfile)
        p = Process(target=node_run, args=(volume, host, brick, start,
                                           node_outfile, args))
        p.start()
        pool.append(p)

    exit_codes = 0
    for idx, p in enumerate(pool):
        p.join()
        # Handle the Changelog failure, fallback to Brickfind
        if p.exitcode == 2:
            rc = failback_node_run(nodes[idx][1], idx, volume, start,
                                   node_outfiles[idx], args)
            exit_codes += (0 if rc == 0 else 1)
        elif p.exitcode != 0:
            exit_codes += (0 if p.exitcode == 0 else 1)

    if exit_codes != 0:
        sys.exit(1)

    # Merge all output files
    cmd = ["sort", "-u"] + node_outfiles + ["-o", args.outfile]
    execute(cmd,
            exit_msg="Failed to merge output files "
            "collected from nodes", logger=logger)

    cleanup(nodes, args)
Beispiel #37
0
def run_in_nodes(volume, start, args):
    """
    Get nodes of volume using gluster volume info, spawn a process
    each for a Node. Merge the output files once all the process
    complete their tasks.
    """
    nodes = get_nodes(volume)
    pool = []
    node_outfiles = []
    for num, node in enumerate(nodes):
        host, brick = node[1].split(":")
        # temp output file
        node_outfile = os.path.join(conf.get_opt("working_dir"), args.session,
                                    volume, "tmp_output_%s.txt" % num)
        node_outfiles.append(node_outfile)
        p = Process(target=node_run,
                    args=(volume, host, brick, start, node_outfile, args))
        p.start()
        pool.append(p)

    exit_codes = 0
    for idx, p in enumerate(pool):
        p.join()
        # Handle the Changelog failure, fallback to Brickfind
        if p.exitcode == 2:
            rc = failback_node_run(nodes[idx][1], idx, volume, start,
                                   node_outfiles[idx], args)
            exit_codes += (0 if rc == 0 else 1)
        elif p.exitcode != 0:
            exit_codes += (0 if p.exitcode == 0 else 1)

    if exit_codes != 0:
        sys.exit(1)

    # Merge all output files
    cmd = ["sort", "-u"] + node_outfiles + ["-o", args.outfile]
    execute(cmd,
            exit_msg="Failed to merge output files "
            "collected from nodes",
            logger=logger)

    cleanup(nodes, args)
Beispiel #38
0
def mode_create(session_dir, args):
    logger.debug("Init is called - Session: %s, Volume: %s"
                 % (args.session, args.volume))

    execute(["gluster", "volume", "info", args.volume],
            exit_msg="Unable to get volume details",
            logger=logger)

    mkdirp(session_dir, exit_on_err=True, logger=logger)
    mkdirp(os.path.join(session_dir, args.volume), exit_on_err=True,
           logger=logger)
    status_file = os.path.join(session_dir, args.volume, "status")

    if os.path.exists(status_file) and not args.force:
        fail("Session %s already created" % args.session, logger=logger)

    if not os.path.exists(status_file) or args.force:
        ssh_setup(args)

        execute(["gluster", "volume", "set",
                 args.volume, "build-pgfid", "on"],
                exit_msg="Failed to set volume option build-pgfid on",
                logger=logger)
        logger.info("Volume option set %s, build-pgfid on" % args.volume)

        execute(["gluster", "volume", "set",
                 args.volume, "changelog.changelog", "on"],
                exit_msg="Failed to set volume option "
                "changelog.changelog on", logger=logger)
        logger.info("Volume option set %s, changelog.changelog on"
                    % args.volume)

    if not os.path.exists(status_file):
        with open(status_file, "w", buffering=0) as f:
            # Add Rollover time to current time to make sure changelogs
            # will be available if we use this time as start time
            time_to_update = int(time.time()) + int(
                conf.get_opt("changelog_rollover_time"))
            f.write(str(time_to_update))

    sys.exit(0)
Beispiel #39
0
def changelog_crawl(brick, start, end, args):
    """
    Init function, prepares working dir and calls Changelog query
    """
    if brick.endswith("/"):
        brick = brick[0:len(brick) - 1]

    # WORKING_DIR/BRICKHASH/OUTFILE
    working_dir = os.path.dirname(args.outfile)
    brickhash = hashlib.sha1(brick.encode())
    brickhash = str(brickhash.hexdigest())
    working_dir = os.path.join(working_dir, brickhash)

    mkdirp(working_dir, exit_on_err=True, logger=logger)

    log_file = os.path.join(conf.get_opt("log_dir"), args.session, args.volume,
                            "changelog.%s.log" % brickhash)

    logger.info("%s Started Changelog Crawl. Start: %s, End: %s" %
                (brick, start, end))
    return get_changes(brick, working_dir, log_file, start, end, args)
Beispiel #40
0
def gfid_to_path_using_batchfind(brick, changelog_data):
    # If all the GFIDs converted using gfid_to_path_using_pgfid
    if not changelog_data.inodegfid_exists({"converted": 0}):
        return

    def inode_filter(path):
        # Looks in inodegfid table, if exists returns
        # inode number else None
        try:
            st = os.lstat(path)
        except (OSError, IOError):
            st = None

        if st and changelog_data.inodegfid_exists({"inode": st.st_ino}):
            return st.st_ino

        return None

    # Length of brick path, to remove from output path
    brick_path_len = len(brick)

    def output_callback(path, inode):
        # For each path found, encodes it and updates path1
        # Also updates converted flag in inodegfid table as 1
        path = path.strip()
        path = path[brick_path_len + 1:]
        path = output_path_prepare(path, args)

        changelog_data.append_path1(path, inode)

    ignore_dirs = [
        os.path.join(brick, dirname)
        for dirname in conf.get_opt("brick_ignore_dirs").split(",")
    ]

    # Full Namespace Crawl
    find(brick,
         callback_func=output_callback,
         filter_func=inode_filter,
         ignore_dirs=ignore_dirs)
Beispiel #41
0
def ssh_setup():
    if not os.path.exists(conf.get_opt("secret_pem")):
        # Generate ssh-key
        cmd = ["ssh-keygen",
               "-N",
               "",
               "-f",
               conf.get_opt("secret_pem")]
        execute(cmd,
                exit_msg="Unable to generate ssh key %s"
                % conf.get_opt("secret_pem"),
                logger=logger)

        logger.info("Ssh key generated %s" % conf.get_opt("secret_pem"))

    # Copy pub file to all nodes
    cmd = ["gluster",
           "system::",
           "copy",
           "file",
           "/" + os.path.basename(conf.get_opt("secret_pem")) + ".pub"]
    execute(cmd, exit_msg="Failed to distribute ssh keys", logger=logger)

    logger.info("Distributed ssh key to all nodes of Volume")

    # Add to authorized_keys file in each node
    cmd = ["gluster",
           "system::",
           "execute",
           "add_secret_pub",
           "root",
           os.path.basename(conf.get_opt("secret_pem")) + ".pub"]
    execute(cmd,
            exit_msg="Failed to add ssh keys to authorized_keys file",
            logger=logger)

    logger.info("Ssh key added to authorized_keys of Volume nodes")
Beispiel #42
0
def get_pem_key_path(session, volume):
    return os.path.join(conf.get_opt("session_dir"),
                        session,
                        volume,
                        "%s_%s_secret.pem" % (session, volume))
Beispiel #43
0
import sys
import os
import logging
from errno import ENOENT

from utils import setup_logger, mkdirp
import conf

logger = logging.getLogger()

if __name__ == "__main__":
    # Args: <SESSION> <VOLUME>
    session = sys.argv[1]
    volume = sys.argv[2]

    working_dir = os.path.join(conf.get_opt("working_dir"), session, volume)

    mkdirp(os.path.join(conf.get_opt("log_dir"), session, volume),
           exit_on_err=True)
    log_file = os.path.join(conf.get_opt("log_dir"), session, volume,
                            "changelog.log")

    setup_logger(logger, log_file)

    try:

        def handle_rm_error(func, path, exc_info):
            if exc_info[1].errno == ENOENT:
                return

            raise exc_info[1]
Beispiel #44
0
                        action="store_true")
    parser.add_argument("--output-prefix",
                        help="File prefix in output",
                        default=".")
    parser.add_argument("--type", default="both")
    parser.add_argument("-N",
                        "--only-namespace-changes",
                        help="List only namespace changes",
                        action="store_true")

    return parser.parse_args()


if __name__ == "__main__":
    args = _get_args()
    mkdirp(os.path.join(conf.get_opt("log_dir"), args.session, args.volume),
           exit_on_err=True)
    log_file = os.path.join(conf.get_opt("log_dir"), args.session, args.volume,
                            "changelog.log")
    setup_logger(logger, log_file, args.debug)

    session_dir = os.path.join(conf.get_opt("session_dir"), args.session)
    status_file = os.path.join(session_dir, args.volume,
                               "%s.status" % urllib.quote_plus(args.brick))
    status_file_pre = status_file + ".pre"
    mkdirp(os.path.join(session_dir, args.volume),
           exit_on_err=True,
           logger=logger)

    end = -1
    if args.only_query:
Beispiel #45
0
def get_pem_key_path(session, volume):
    return os.path.join(conf.get_opt("session_dir"), session, volume,
                        "%s_%s_secret.pem" % (session, volume))
Beispiel #46
0
def run_cmd_nodes(task, args, **kwargs):
    global node_outfiles
    nodes = get_nodes(args.volume)
    pool = []
    for num, node in enumerate(nodes):
        host, brick = node[1].split(":")
        host_uuid = node[0]
        cmd = []
        opts = {}

        # tmpfilename is valid only for tasks: pre, query and cleanup
        tmpfilename = kwargs.get("tmpfilename", "BADNAME")

        node_outfile = os.path.join(conf.get_opt("working_dir"), args.session,
                                    args.volume, tmpfilename,
                                    "tmp_output_%s" % num)

        if task == "pre":
            if vol_statusStr != "Started":
                fail("Volume %s is not online" % args.volume, logger=logger)

            # If Full backup is requested or start time is zero, use brickfind
            change_detector = conf.get_change_detector("changelog")
            tag = None
            if args.full:
                change_detector = conf.get_change_detector("brickfind")
                tag = args.tag_for_full_find.strip()
                if tag == "":
                    tag = '""' if not is_host_local(host_uuid) else ""

            node_outfiles.append(node_outfile)
            # remote file will be copied into this directory
            mkdirp(os.path.dirname(node_outfile),
                   exit_on_err=True,
                   logger=logger)

            FS = args.field_separator
            if not is_host_local(host_uuid):
                FS = "'" + FS + "'"

            cmd = [change_detector,
                   args.session,
                   args.volume,
                   host,
                   brick,
                   node_outfile] + \
                ([str(kwargs.get("start")), str(kwargs.get("end"))]
                    if not args.full else []) + \
                ([tag] if tag is not None else []) + \
                ["--output-prefix", args.output_prefix] + \
                (["--debug"] if args.debug else []) + \
                (["--no-encode"] if args.no_encode else []) + \
                (["--only-namespace-changes"] if args.only_namespace_changes
                 else []) + \
                (["--field-separator", FS] if args.full else [])

            opts["node_outfile"] = node_outfile
            opts["copy_outfile"] = True
        elif task == "query":
            # If Full backup is requested or start time is zero, use brickfind
            tag = None
            change_detector = conf.get_change_detector("changelog")
            if args.full:
                change_detector = conf.get_change_detector("brickfind")
                tag = args.tag_for_full_find.strip()
                if tag == "":
                    tag = '""' if not is_host_local(host_uuid) else ""

            node_outfiles.append(node_outfile)
            # remote file will be copied into this directory
            mkdirp(os.path.dirname(node_outfile),
                   exit_on_err=True,
                   logger=logger)

            FS = args.field_separator
            if not is_host_local(host_uuid):
                FS = "'" + FS + "'"

            cmd = [change_detector,
                   args.session,
                   args.volume,
                   host,
                   brick,
                   node_outfile] + \
                ([str(kwargs.get("start")), str(kwargs.get("end"))]
                    if not args.full else []) + \
                ([tag] if tag is not None else []) + \
                ["--only-query"] + \
                ["--output-prefix", args.output_prefix] + \
                (["--debug"] if args.debug else []) + \
                (["--no-encode"] if args.no_encode else []) + \
                (["--only-namespace-changes"]
                    if args.only_namespace_changes else []) + \
                (["--field-separator", FS] if args.full else [])

            opts["node_outfile"] = node_outfile
            opts["copy_outfile"] = True
        elif task == "cleanup":
            # After pre/query run, cleanup the working directory and other
            # temp files. Remove the directory to which node_outfile has
            # been copied in main node
            try:
                os.remove(node_outfile)
            except (OSError, IOError):
                logger.warn("Failed to cleanup temporary file %s" %
                            node_outfile)
                pass

            cmd = [conf.get_opt("nodeagent"),
                   "cleanup",
                   args.session,
                   args.volume,
                   os.path.dirname(node_outfile)] + \
                (["--debug"] if args.debug else [])
        elif task == "create":
            if vol_statusStr != "Started":
                fail("Volume %s is not online" % args.volume, logger=logger)

            # When glusterfind create, create session directory in
            # each brick nodes
            cmd = [conf.get_opt("nodeagent"),
                   "create",
                   args.session,
                   args.volume,
                   brick,
                   kwargs.get("time_to_update")] + \
                (["--debug"] if args.debug else []) + \
                (["--reset-session-time"] if args.reset_session_time
                 else [])
        elif task == "post":
            # Rename pre status file to actual status file in each node
            cmd = [conf.get_opt("nodeagent"),
                   "post",
                   args.session,
                   args.volume,
                   brick] + \
                (["--debug"] if args.debug else [])
        elif task == "delete":
            # When glusterfind delete, cleanup all the session files/dirs
            # from each node.
            cmd = [conf.get_opt("nodeagent"),
                   "delete",
                   args.session,
                   args.volume] + \
                (["--debug"] if args.debug else [])

        if cmd:
            p = Process(target=node_cmd,
                        args=(host, host_uuid, task, cmd, args, opts))
            p.start()
            pool.append(p)

    for num, p in enumerate(pool):
        p.join()
        if p.exitcode != 0:
            logger.warn("Command %s failed in %s" % (task, nodes[num][1]))
            if task in ["create", "delete"]:
                fail("Command %s failed in %s" % (task, nodes[num][1]))
            elif task == "pre" and args.disable_partial:
                sys.exit(1)
Beispiel #47
0
def mode_delete(args):
    session_dir = os.path.join(conf.get_opt("session_dir"), args.session)
    shutil.rmtree(os.path.join(session_dir, args.volume),
                  onerror=handle_rm_error)
Beispiel #48
0
def run_cmd_nodes(task, args, **kwargs):
    global node_outfiles
    nodes = get_nodes(args.volume)
    pool = []
    for num, node in enumerate(nodes):
        host, brick = node[1].split(":")
        host_uuid = node[0]
        cmd = []
        opts = {}
        node_outfile = os.path.join(conf.get_opt("working_dir"), args.session,
                                    args.volume, "tmp_output_%s" % num)

        if task == "pre":
            if vol_statusStr != "Started":
                fail("Volume %s is not online" % args.volume, logger=logger)

            # If Full backup is requested or start time is zero, use brickfind
            change_detector = conf.get_change_detector("changelog")
            if args.full:
                change_detector = conf.get_change_detector("brickfind")

            node_outfiles.append(node_outfile)

            cmd = [change_detector,
                   args.session,
                   args.volume,
                   brick,
                   node_outfile,
                   str(kwargs.get("start")),
                   "--output-prefix",
                   args.output_prefix] + \
                (["--debug"] if args.debug else []) + \
                (["--only-namespace-changes"] if args.only_namespace_changes
                 else [])

            opts["node_outfile"] = node_outfile
            opts["copy_outfile"] = True
        elif task == "query":
            # If Full backup is requested or start time is zero, use brickfind
            change_detector = conf.get_change_detector("changelog")
            node_outfiles.append(node_outfile)

            cmd = [change_detector,
                   args.session,
                   args.volume,
                   brick,
                   node_outfile,
                   str(kwargs.get("start"))] + \
                ["--only-query"] + \
                ["--output-prefix", args.output_prefix] + \
                (["--debug"] if args.debug else []) + \
                (["--only-namespace-changes"]
                    if args.only_namespace_changes else [])

            opts["node_outfile"] = node_outfile
            opts["copy_outfile"] = True
        elif task == "cleanup":
            # After pre run, cleanup the working directory and other temp files
            # Remove the copied node_outfile in main node
            try:
                os.remove(node_outfile)
            except (OSError, IOError):
                logger.warn("Failed to cleanup temporary file %s" %
                            node_outfile)
                pass

            cmd = [
                conf.get_opt("nodeagent"), "cleanup", args.session, args.volume
            ] + (["--debug"] if args.debug else [])
        elif task == "create":
            if vol_statusStr != "Started":
                fail("Volume %s is not online" % args.volume, logger=logger)

            # When glusterfind create, create session directory in
            # each brick nodes
            cmd = [conf.get_opt("nodeagent"),
                   "create",
                   args.session,
                   args.volume,
                   brick,
                   kwargs.get("time_to_update")] + \
                (["--debug"] if args.debug else []) + \
                (["--reset-session-time"] if args.reset_session_time
                 else [])
        elif task == "post":
            # Rename pre status file to actual status file in each node
            cmd = [conf.get_opt("nodeagent"),
                   "post",
                   args.session,
                   args.volume,
                   brick] + \
                (["--debug"] if args.debug else [])
        elif task == "delete":
            # When glusterfind delete, cleanup all the session files/dirs
            # from each node.
            cmd = [conf.get_opt("nodeagent"),
                   "delete",
                   args.session,
                   args.volume] + \
                (["--debug"] if args.debug else [])

        if cmd:
            p = Process(target=node_cmd,
                        args=(host, host_uuid, task, cmd, args, opts))
            p.start()
            pool.append(p)

    for num, p in enumerate(pool):
        p.join()
        if p.exitcode != 0:
            logger.warn("Command %s failed in %s" % (task, nodes[num][1]))
            if task in ["create", "delete"]:
                fail("Command %s failed in %s" % (task, nodes[num][1]))
            elif task == "pre" and args.disable_partial:
                sys.exit(1)
Beispiel #49
0
def gfid_to_path_using_batchfind(brick, gfids_file, output_file):
    """
    find -samefile gets the inode number and crawls entire namespace
    to get the list of files/dirs having same inode number.
    Do find without any option, except the ignore directory option,
    print the output in <INODE_NUM> <PATH> format, use this output
    to look into in-memory dictionary of inode numbers got from the
    list of GFIDs
    """
    with open(output_file, "a+") as fout:
        inode_dict = {}
        with open(gfids_file) as f:
            for gfid in f:
                gfid = gfid.strip()
                backend_path = os.path.join(brick, ".glusterfs", gfid[0:2],
                                            gfid[2:4], gfid)

                try:
                    inode_dict[str(os.stat(backend_path).st_ino)] = 1
                except (IOError, OSError) as e:
                    if e.errno == ENOENT:
                        continue
                    else:
                        fail("%s Failed to convert to path from "
                             "GFID %s: %s" % (brick, gfid, e),
                             logger=logger)

        if not inode_dict:
            return

        def inode_filter(path):
            try:
                st = os.lstat(path)
            except (OSError, IOError) as e:
                if e.errno == ENOENT:
                    st = None
                else:
                    raise

            if st and inode_dict.get(str(st.st_ino), None):
                return True

            return False

        brick_path_len = len(brick)

        def output_callback(path):
            path = path.strip()
            path = path[brick_path_len + 1:]
            output_write(fout, path, args.output_prefix)

        ignore_dirs = [
            os.path.join(brick, dirname)
            for dirname in conf.get_opt("brick_ignore_dirs").split(",")
        ]
        # Length of brick path, to remove from output path
        find(brick,
             callback_func=output_callback,
             filter_func=inode_filter,
             ignore_dirs=ignore_dirs)

        fout.flush()
        os.fsync(fout.fileno())
Beispiel #50
0
def get_changes(brick, hash_dir, log_file, start, end, args):
    """
    Makes use of libgfchangelog's history API to get changelogs
    containing changes from start and end time. Further collects
    the modified gfids from the changelogs and writes the list
    of gfid to 'gfid_list' file.
    """
    session_dir = os.path.join(conf.get_opt("session_dir"), args.session)
    status_file = os.path.join(session_dir, args.volume,
                               "%s.status" % urllib.quote_plus(args.brick))

    # Get previous session
    try:
        with open(status_file) as f:
            start = int(f.read().strip())
    except (ValueError, OSError, IOError):
        start = args.start

    try:
        libgfchangelog.cl_init()
        libgfchangelog.cl_register(brick, hash_dir, log_file,
                                   CHANGELOG_LOG_LEVEL, CHANGELOG_CONN_RETRIES)
    except libgfchangelog.ChangelogException as e:
        fail("%s Changelog register failed: %s" % (brick, e), logger=logger)

    # Output files to record GFIDs and GFID to Path failure GFIDs
    changelog_data = ChangelogData(args.outfile, args)

    # Changelogs path(Hard coded to BRICK/.glusterfs/changelogs
    cl_path = os.path.join(brick, ".glusterfs/changelogs")

    # Fail if History fails for requested Start and End
    try:
        actual_end = libgfchangelog.cl_history_changelog(
            cl_path, start, end, CHANGELOGAPI_NUM_WORKERS)
    except libgfchangelog.ChangelogException as e:
        fail("%s: %s Historical Changelogs not available: %s" %
             (args.node, brick, e),
             logger=logger)

    logger.info("[1/4] Starting changelog parsing ...")
    try:
        # scan followed by getchanges till scan returns zero.
        # history_scan() is blocking call, till it gets the number
        # of changelogs to process. Returns zero when no changelogs
        # to be processed. returns positive value as number of changelogs
        # to be processed, which will be fetched using
        # history_getchanges()
        changes = []
        while libgfchangelog.cl_history_scan() > 0:
            changes = libgfchangelog.cl_history_getchanges()

            for change in changes:
                # Ignore if last processed changelog comes
                # again in list
                if change.endswith(".%s" % start):
                    continue
                try:
                    parse_changelog_to_db(changelog_data, change, args)
                    libgfchangelog.cl_history_done(change)
                except IOError as e:
                    logger.warn("Error parsing changelog file %s: %s" %
                                (change, e))

            changelog_data.commit()
    except libgfchangelog.ChangelogException as e:
        fail("%s Error during Changelog Crawl: %s" % (brick, e), logger=logger)

    logger.info("[1/4] Finished changelog parsing.")

    # Convert all pgfid available from Changelogs
    logger.info("[2/4] Starting 'pgfid to path' conversions ...")
    pgfid_to_path(brick, changelog_data)
    changelog_data.commit()
    logger.info("[2/4] Finished 'pgfid to path' conversions.")

    # Convert all GFIDs for which no other additional details available
    logger.info("[3/4] Starting 'gfid to path using pgfid' conversions ...")
    gfid_to_path_using_pgfid(brick, changelog_data, args)
    changelog_data.commit()
    logger.info("[3/4] Finished 'gfid to path using pgfid' conversions.")

    # If some GFIDs fail to get converted from previous step,
    # convert using find
    logger.info("[4/4] Starting 'gfid to path using batchfind' "
                "conversions ...")
    gfid_to_path_using_batchfind(brick, changelog_data)
    changelog_data.commit()
    logger.info("[4/4] Finished 'gfid to path using batchfind' conversions.")

    return actual_end
Beispiel #51
0
    parser.add_argument("session", help="Session Name")
    parser.add_argument("volume", help="Volume Name")
    parser.add_argument("brick", help="Brick Name")
    parser.add_argument("outfile", help="Output File")
    parser.add_argument("start", help="Start Time", type=float)
    parser.add_argument("--debug", help="Debug", action="store_true")
    parser.add_argument("--output-prefix", help="File prefix in output",
                        default=".")

    return parser.parse_args()


if __name__ == "__main__":
    args = _get_args()
    session_dir = os.path.join(conf.get_opt("session_dir"), args.session)
    status_file = os.path.join(session_dir, args.volume,
                               "%s.status" % urllib.quote_plus(args.brick))
    status_file_pre = status_file + ".pre"
    mkdirp(os.path.join(session_dir, args.volume), exit_on_err=True,
           logger=logger)
    mkdirp(os.path.join(conf.get_opt("log_dir"), args.session, args.volume),
           exit_on_err=True)
    log_file = os.path.join(conf.get_opt("log_dir"),
                            args.session,
                            args.volume,
                            "brickfind.log")
    setup_logger(logger, log_file, args.debug)

    time_to_update = int(time.time())
    brickfind_crawl(args.brick, args)
Beispiel #52
0
    parser.add_argument("session", help="Session Name")
    parser.add_argument("volume", help="Volume Name")
    parser.add_argument("brick", help="Brick Name")
    parser.add_argument("outfile", help="Output File")
    parser.add_argument("start", help="Start Time", type=int)
    parser.add_argument("--debug", help="Debug", action="store_true")
    parser.add_argument("--output-prefix", help="File prefix in output", default=".")
    parser.add_argument("-N", "--only-namespace-changes", help="List only namespace changes", action="store_true")

    return parser.parse_args()


if __name__ == "__main__":
    args = _get_args()
    mkdirp(os.path.join(conf.get_opt("log_dir"), args.session, args.volume), exit_on_err=True)
    log_file = os.path.join(conf.get_opt("log_dir"), args.session, args.volume, "changelog.log")
    setup_logger(logger, log_file, args.debug)

    session_dir = os.path.join(conf.get_opt("session_dir"), args.session)
    status_file = os.path.join(session_dir, args.volume, "%s.status" % urllib.quote_plus(args.brick))
    status_file_pre = status_file + ".pre"
    mkdirp(os.path.join(session_dir, args.volume), exit_on_err=True, logger=logger)

    try:
        with open(status_file) as f:
            start = int(f.read().strip())
    except (ValueError, OSError, IOError):
        start = args.start

    end = int(time.time()) - get_changelog_rollover_time(args.volume)
Beispiel #53
0
def run_cmd_nodes(task, args, **kwargs):
    global node_outfiles
    nodes = get_nodes(args.volume)
    pool = []
    for num, node in enumerate(nodes):
        host, brick = node[1].split(":")
        host_uuid = node[0]
        cmd = []
        opts = {}

        # tmpfilename is valid only for tasks: pre, query and cleanup
        tmpfilename = kwargs.get("tmpfilename", "BADNAME")

        node_outfile = os.path.join(conf.get_opt("working_dir"),
                                    args.session, args.volume,
                                    tmpfilename,
                                    "tmp_output_%s" % num)

        if task == "pre":
            if vol_statusStr != "Started":
                fail("Volume %s is not online" % args.volume,
                     logger=logger)

            # If Full backup is requested or start time is zero, use brickfind
            change_detector = conf.get_change_detector("changelog")
            tag = None
            if args.full:
                change_detector = conf.get_change_detector("brickfind")
                tag = args.tag_for_full_find.strip()
                if tag == "":
                    tag = '""' if not is_host_local(host_uuid) else ""

            node_outfiles.append(node_outfile)
            # remote file will be copied into this directory
            mkdirp(os.path.dirname(node_outfile),
                   exit_on_err=True, logger=logger)

            FS = args.field_separator
            if not is_host_local(host_uuid):
                FS = "'" + FS + "'"

            cmd = [change_detector,
                   args.session,
                   args.volume,
                   host,
                   brick,
                   node_outfile] + \
                ([str(kwargs.get("start")), str(kwargs.get("end"))]
                    if not args.full else []) + \
                ([tag] if tag is not None else []) + \
                ["--output-prefix", args.output_prefix] + \
                (["--debug"] if args.debug else []) + \
                (["--no-encode"] if args.no_encode else []) + \
                (["--only-namespace-changes"] if args.only_namespace_changes
                 else []) + \
                (["--type", args.type]) + \
                (["--field-separator", FS] if args.full else [])

            opts["node_outfile"] = node_outfile
            opts["copy_outfile"] = True
        elif task == "query":
            # If Full backup is requested or start time is zero, use brickfind
            tag = None
            change_detector = conf.get_change_detector("changelog")
            if args.full:
                change_detector = conf.get_change_detector("brickfind")
                tag = args.tag_for_full_find.strip()
                if tag == "":
                    tag = '""' if not is_host_local(host_uuid) else ""

            node_outfiles.append(node_outfile)
            # remote file will be copied into this directory
            mkdirp(os.path.dirname(node_outfile),
                   exit_on_err=True, logger=logger)

            FS = args.field_separator
            if not is_host_local(host_uuid):
                FS = "'" + FS + "'"

            cmd = [change_detector,
                   args.session,
                   args.volume,
                   host,
                   brick,
                   node_outfile] + \
                ([str(kwargs.get("start")), str(kwargs.get("end"))]
                    if not args.full else []) + \
                ([tag] if tag is not None else []) + \
                ["--only-query"] + \
                ["--output-prefix", args.output_prefix] + \
                (["--debug"] if args.debug else []) + \
                (["--no-encode"] if args.no_encode else []) + \
                (["--only-namespace-changes"]
                    if args.only_namespace_changes else []) + \
                (["--type", args.type]) + \
                (["--field-separator", FS] if args.full else [])

            opts["node_outfile"] = node_outfile
            opts["copy_outfile"] = True
        elif task == "cleanup":
            # After pre/query run, cleanup the working directory and other
            # temp files. Remove the directory to which node_outfile has
            # been copied in main node
            try:
                os.remove(node_outfile)
            except (OSError, IOError):
                logger.warn("Failed to cleanup temporary file %s" %
                            node_outfile)
                pass

            cmd = [conf.get_opt("nodeagent"),
                   "cleanup",
                   args.session,
                   args.volume,
                   os.path.dirname(node_outfile)] + \
                (["--debug"] if args.debug else [])
        elif task == "create":
            if vol_statusStr != "Started":
                fail("Volume %s is not online" % args.volume,
                     logger=logger)

            # When glusterfind create, create session directory in
            # each brick nodes
            cmd = [conf.get_opt("nodeagent"),
                   "create",
                   args.session,
                   args.volume,
                   brick,
                   kwargs.get("time_to_update")] + \
                (["--debug"] if args.debug else []) + \
                (["--reset-session-time"] if args.reset_session_time
                 else [])
        elif task == "post":
            # Rename pre status file to actual status file in each node
            cmd = [conf.get_opt("nodeagent"),
                   "post",
                   args.session,
                   args.volume,
                   brick] + \
                (["--debug"] if args.debug else [])
        elif task == "delete":
            # When glusterfind delete, cleanup all the session files/dirs
            # from each node.
            cmd = [conf.get_opt("nodeagent"),
                   "delete",
                   args.session,
                   args.volume] + \
                (["--debug"] if args.debug else [])

        if cmd:
            p = Process(target=node_cmd,
                        args=(host, host_uuid, task, cmd, args, opts))
            p.start()
            pool.append(p)

    for num, p in enumerate(pool):
        p.join()
        if p.exitcode != 0:
            logger.warn("Command %s failed in %s" % (task, nodes[num][1]))
            if task in ["create", "delete"]:
                fail("Command %s failed in %s" % (task, nodes[num][1]))
            elif task == "pre" and args.disable_partial:
                sys.exit(1)
Beispiel #54
0
def get_changes(brick, hash_dir, log_file, start, end, args):
    """
    Makes use of libgfchangelog's history API to get changelogs
    containing changes from start and end time. Further collects
    the modified gfids from the changelogs and writes the list
    of gfid to 'gfid_list' file.
    """
    session_dir = os.path.join(conf.get_opt("session_dir"),
                               args.session)
    status_file = os.path.join(session_dir, args.volume,
                     "%s.status" % urllib.quote_plus(args.brick))

    # Get previous session
    try:
        with open(status_file) as f:
            start = int(f.read().strip())
    except (ValueError, OSError, IOError):
        start = args.start

    try:
        libgfchangelog.cl_init()
        libgfchangelog.cl_register(brick, hash_dir, log_file,
                                   CHANGELOG_LOG_LEVEL, CHANGELOG_CONN_RETRIES)
    except libgfchangelog.ChangelogException as e:
        fail("%s Changelog register failed: %s" % (brick, e), logger=logger)

    # Output files to record GFIDs and GFID to Path failure GFIDs
    changelog_data = ChangelogData(args.outfile, args)

    # Changelogs path(Hard coded to BRICK/.glusterfs/changelogs
    cl_path = os.path.join(brick, ".glusterfs/changelogs")

    # Fail if History fails for requested Start and End
    try:
        actual_end = libgfchangelog.cl_history_changelog(
            cl_path, start, end, CHANGELOGAPI_NUM_WORKERS)
    except libgfchangelog.ChangelogException as e:
        fail("%s: %s Historical Changelogs not available: %s" %
             (args.node, brick, e), logger=logger)

    logger.info("[1/4] Starting changelog parsing ...")
    try:
        # scan followed by getchanges till scan returns zero.
        # history_scan() is blocking call, till it gets the number
        # of changelogs to process. Returns zero when no changelogs
        # to be processed. returns positive value as number of changelogs
        # to be processed, which will be fetched using
        # history_getchanges()
        changes = []
        while libgfchangelog.cl_history_scan() > 0:
            changes = libgfchangelog.cl_history_getchanges()

            for change in changes:
                # Ignore if last processed changelog comes
                # again in list
                if change.endswith(".%s" % start):
                    continue
                try:
                    parse_changelog_to_db(changelog_data, change, args)
                    libgfchangelog.cl_history_done(change)
                except IOError as e:
                    logger.warn("Error parsing changelog file %s: %s" %
                                (change, e))

            changelog_data.commit()
    except libgfchangelog.ChangelogException as e:
        fail("%s Error during Changelog Crawl: %s" % (brick, e),
             logger=logger)

    logger.info("[1/4] Finished changelog parsing.")

    # Convert all pgfid available from Changelogs
    logger.info("[2/4] Starting 'pgfid to path' conversions ...")
    pgfid_to_path(brick, changelog_data)
    changelog_data.commit()
    logger.info("[2/4] Finished 'pgfid to path' conversions.")

    # Convert all GFIDs for which no other additional details available
    logger.info("[3/4] Starting 'gfid to path using pgfid' conversions ...")
    gfid_to_path_using_pgfid(brick, changelog_data, args)
    changelog_data.commit()
    logger.info("[3/4] Finished 'gfid to path using pgfid' conversions.")

    # If some GFIDs fail to get converted from previous step,
    # convert using find
    logger.info("[4/4] Starting 'gfid to path using batchfind' "
                "conversions ...")
    gfid_to_path_using_batchfind(brick, changelog_data)
    changelog_data.commit()
    logger.info("[4/4] Finished 'gfid to path using batchfind' conversions.")

    return actual_end
Beispiel #55
0
    get_changes(brick, working_dir, log_file, end, args)


def _get_args():
    parser = ArgumentParser(formatter_class=RawDescriptionHelpFormatter,
                            description=PROG_DESCRIPTION)

    parser.add_argument("session", help="Session Name")
    parser.add_argument("volume", help="Volume Name")
    parser.add_argument("brick", help="Brick Name")
    parser.add_argument("outfile", help="Output File")
    parser.add_argument("start", help="Start Time", type=int)
    parser.add_argument("--debug", help="Debug", action="store_true")
    parser.add_argument("--output-prefix",
                        help="File prefix in output",
                        default=".")

    return parser.parse_args()


if __name__ == "__main__":
    args = _get_args()
    mkdirp(os.path.join(conf.get_opt("log_dir"), args.session, args.volume),
           exit_on_err=True)
    log_file = os.path.join(conf.get_opt("log_dir"), args.session, args.volume,
                            "changelog.log")
    setup_logger(logger, log_file, args.debug)
    end = int(time.time()) - int(conf.get_opt("changelog_rollover_time"))
    changelog_crawl(args.brick, end, args)
    sys.exit(0)
Beispiel #56
0
def gfid_to_path_using_pgfid(brick, gfids_file, output_file, outfile_failures):
    """
    Parent GFID is saved as xattr, collect Parent GFIDs from all
    the files from gfids_file. Convert parent GFID to path and Crawl
    each directories to get the list of files/dirs having same inode number.
    Do find with maxdepth as 1 and print the output in <INODE_NUM> <PATH>
    format, use this output to look into in memory dictionary of inode
    numbers got from the list of GFIDs
    """
    with open(output_file, "a+") as fout:
        pgfids = set()
        inode_dict = {}
        with open(gfids_file) as f:
            for gfid in f:
                gfid = gfid.strip()
                p = os.path.join(brick,
                                 ".glusterfs",
                                 gfid[0:2],
                                 gfid[2:4],
                                 gfid)
                if os.path.islink(p):
                    path = symlink_gfid_to_path(brick, gfid)
                    output_write(fout, path, args.output_prefix)
                else:
                    try:
                        inode_dict[str(os.stat(p).st_ino)] = 1
                        file_xattrs = xattr.list(p)
                        num_parent_gfid = 0
                        for x in file_xattrs:
                            if x.startswith("trusted.pgfid."):
                                num_parent_gfid += 1
                                pgfids.add(x.split(".")[-1])

                        if num_parent_gfid == 0:
                            with open(outfile_failures, "a") as f:
                                f.write("%s\n" % gfid)
                                f.flush()
                                os.fsync(f.fileno())

                    except (IOError, OSError) as e:
                        if e.errno == ENOENT:
                            continue
                        else:
                            fail("%s Failed to convert to path from "
                                 "GFID %s: %s" % (brick, gfid, e),
                                 logger=logger)

        if not inode_dict:
            return

        def inode_filter(path):
            try:
                st = os.lstat(path)
            except (OSError, IOError) as e:
                if e.errno == ENOENT:
                    st = None
                else:
                    raise

            if st and inode_dict.get(str(st.st_ino), None):
                return True

            return False

        # Length of brick path, to remove from output path
        brick_path_len = len(brick)

        def output_callback(path):
            path = path.strip()
            path = path[brick_path_len+1:]
            output_write(fout, path, args.output_prefix)

        ignore_dirs = [os.path.join(brick, dirname)
                       for dirname in
                       conf.get_opt("brick_ignore_dirs").split(",")]

        for pgfid in pgfids:
            path = symlink_gfid_to_path(brick, pgfid)
            find(os.path.join(brick, path),
                 callback_func=output_callback,
                 filter_func=inode_filter,
                 ignore_dirs=ignore_dirs,
                 subdirs_crawl=False)

        fout.flush()
        os.fsync(fout.fileno())
Beispiel #57
0
def mode_delete(args):
    session_dir = os.path.join(conf.get_opt("session_dir"),
                               args.session)
    shutil.rmtree(os.path.join(session_dir, args.volume),
                  onerror=handle_rm_error)
Beispiel #58
0

def _get_args():
    parser = ArgumentParser(formatter_class=RawDescriptionHelpFormatter,
                            description=PROG_DESCRIPTION)

    parser.add_argument("session", help="Session Name")
    parser.add_argument("volume", help="Volume Name")
    parser.add_argument("brick", help="Brick Name")
    parser.add_argument("outfile", help="Output File")
    parser.add_argument("start", help="Start Time", type=int)
    parser.add_argument("--debug", help="Debug", action="store_true")
    parser.add_argument("--output-prefix", help="File prefix in output",
                        default=".")

    return parser.parse_args()


if __name__ == "__main__":
    args = _get_args()
    mkdirp(os.path.join(conf.get_opt("log_dir"), args.session, args.volume),
           exit_on_err=True)
    log_file = os.path.join(conf.get_opt("log_dir"),
                            args.session,
                            args.volume,
                            "changelog.log")
    setup_logger(logger, log_file, args.debug)
    end = int(time.time()) - int(conf.get_opt("changelog_rollover_time"))
    changelog_crawl(args.brick, end, args)
    sys.exit(0)
Beispiel #59
0
def gfid_to_path_using_pgfid(brick, gfids_file, output_file, outfile_failures):
    """
    Parent GFID is saved as xattr, collect Parent GFIDs from all
    the files from gfids_file. Convert parent GFID to path and Crawl
    each directories to get the list of files/dirs having same inode number.
    Do find with maxdepth as 1 and print the output in <INODE_NUM> <PATH>
    format, use this output to look into in memory dictionary of inode
    numbers got from the list of GFIDs
    """
    with open(output_file, "a+") as fout:
        pgfids = set()
        inode_dict = {}
        with open(gfids_file) as f:
            for gfid in f:
                gfid = gfid.strip()
                p = os.path.join(brick, ".glusterfs", gfid[0:2], gfid[2:4],
                                 gfid)
                if os.path.islink(p):
                    path = symlink_gfid_to_path(brick, gfid)
                    output_write(fout, path, args.output_prefix)
                else:
                    try:
                        inode_dict[str(os.stat(p).st_ino)] = 1
                        file_xattrs = xattr.list(p)
                        num_parent_gfid = 0
                        for x in file_xattrs:
                            if x.startswith("trusted.pgfid."):
                                num_parent_gfid += 1
                                pgfids.add(x.split(".")[-1])

                        if num_parent_gfid == 0:
                            with open(outfile_failures, "a") as f:
                                f.write("%s\n" % gfid)
                                f.flush()
                                os.fsync(f.fileno())

                    except (IOError, OSError) as e:
                        if e.errno == ENOENT:
                            continue
                        else:
                            fail("%s Failed to convert to path from "
                                 "GFID %s: %s" % (brick, gfid, e),
                                 logger=logger)

        if not inode_dict:
            return

        def inode_filter(path):
            try:
                st = os.lstat(path)
            except (OSError, IOError) as e:
                if e.errno == ENOENT:
                    st = None
                else:
                    raise

            if st and inode_dict.get(str(st.st_ino), None):
                return True

            return False

        # Length of brick path, to remove from output path
        brick_path_len = len(brick)

        def output_callback(path):
            path = path.strip()
            path = path[brick_path_len + 1:]
            output_write(fout, path, args.output_prefix)

        ignore_dirs = [
            os.path.join(brick, dirname)
            for dirname in conf.get_opt("brick_ignore_dirs").split(",")
        ]

        for pgfid in pgfids:
            path = symlink_gfid_to_path(brick, pgfid)
            find(os.path.join(brick, path),
                 callback_func=output_callback,
                 filter_func=inode_filter,
                 ignore_dirs=ignore_dirs,
                 subdirs_crawl=False)

        fout.flush()
        os.fsync(fout.fileno())
Beispiel #60
0
def run_cmd_nodes(task, args, **kwargs):
    global node_outfiles
    nodes = get_nodes(args.volume)
    pool = []
    for num, node in enumerate(nodes):
        host, brick = node[1].split(":")
        host_uuid = node[0]
        cmd = []
        opts = {}
        node_outfile = os.path.join(conf.get_opt("working_dir"),
                                    args.session, args.volume,
                                    "tmp_output_%s" % num)

        if task == "pre":
            if vol_statusStr == "Stopped":
                fail("Volume %s is in stopped state" % args.volume,
                    logger=logger)

            # If Full backup is requested or start time is zero, use brickfind
            change_detector = conf.get_change_detector("changelog")
            if args.full:
                change_detector = conf.get_change_detector("brickfind")

            node_outfiles.append(node_outfile)

            cmd = [change_detector,
                   args.session,
                   args.volume,
                   brick,
                   node_outfile,
                   str(kwargs.get("start")),
                   "--output-prefix",
                   args.output_prefix] + \
                (["--debug"] if args.debug else []) + \
                (["--only-namespace-changes"] if args.only_namespace_changes
                 else [])

            opts["node_outfile"] = node_outfile
            opts["copy_outfile"] = True
        elif task == "cleanup":
            # After pre run, cleanup the working directory and other temp files
            # Remove the copied node_outfile in main node
            try:
                os.remove(node_outfile)
            except (OSError, IOError):
                logger.warn("Failed to cleanup temporary file %s" %
                            node_outfile)
                pass

            cmd = [conf.get_opt("nodeagent"),
                   "cleanup",
                   args.session,
                   args.volume] + (["--debug"] if args.debug else [])
        elif task == "create":
            if vol_statusStr == "Stopped":
                fail("Volume %s is in stopped state" % args.volume,
                    logger=logger)

            # When glusterfind create, create session directory in
            # each brick nodes
            cmd = [conf.get_opt("nodeagent"),
                   "create",
                   args.session,
                   args.volume,
                   brick,
                   kwargs.get("time_to_update")] + \
                (["--debug"] if args.debug else []) + \
                (["--reset-session-time"] if args.reset_session_time
                 else [])
        elif task == "post":
            # Rename pre status file to actual status file in each node
            cmd = [conf.get_opt("nodeagent"),
                   "post",
                   args.session,
                   args.volume,
                   brick] + \
                (["--debug"] if args.debug else [])
        elif task == "delete":
            # When glusterfind delete, cleanup all the session files/dirs
            # from each node.
            cmd = [conf.get_opt("nodeagent"),
                   "delete",
                   args.session,
                   args.volume] + \
                (["--debug"] if args.debug else [])

        if cmd:
            p = Process(target=node_cmd,
                        args=(host, host_uuid, task, cmd, args, opts))
            p.start()
            pool.append(p)

    for num, p in enumerate(pool):
        p.join()
        if p.exitcode != 0:
            logger.warn("Command %s failed in %s" % (task, nodes[num][1]))
            if task in ["create", "delete"]:
                fail("Command %s failed in %s" % (task, nodes[num][1]))
            elif task == "pre" and args.disable_partial:
                sys.exit(1)