Ejemplo n.º 1
0
def pgfid_to_path(brick, changelog_data):
    """
    For all the pgfids in table, converts into path using recursive
    readlink.
    """
    # pgfid1 to path1 in case of CREATE/MKNOD/MKDIR/LINK/SYMLINK
    for row in changelog_data.gfidpath_get_distinct("pgfid1", {"path1": ""}):
        # In case of Data/Metadata only, pgfid1 will not be their
        if row[0] == "":
            continue

        try:
            path = symlink_gfid_to_path(brick, row[0])
            path = output_path_prepare(path, args)
            changelog_data.gfidpath_set_path1(path, row[0])
        except (IOError, OSError) as e:
            logger.warn("Error converting to path: %s" % e)
            continue

    # pgfid2 to path2 in case of RENAME
    for row in changelog_data.gfidpath_get_distinct("pgfid2", {
            "type": "RENAME",
            "path2": ""
    }):
        # Only in case of Rename pgfid2 exists
        if row[0] == "":
            continue

        try:
            path = symlink_gfid_to_path(brick, row[0])
            path = output_path_prepare(path, args)
            changelog_data.gfidpath_set_path2(path, row[0])
        except (IOError, OSError) as e:
            logger.warn("Error converting to path: %s" % e)
            continue
Ejemplo n.º 2
0
def pgfid_to_path(brick, changelog_data):
    """
    For all the pgfids in table, converts into path using recursive
    readlink.
    """
    # pgfid1 to path1 in case of CREATE/MKNOD/MKDIR/LINK/SYMLINK
    for row in changelog_data.gfidpath_get_distinct("pgfid1", {"path1": ""}):
        # In case of Data/Metadata only, pgfid1 will not be their
        if row[0] == "":
            continue

        try:
            path = symlink_gfid_to_path(brick, row[0])
            path = output_path_prepare(path, args)
            changelog_data.gfidpath_set_path1(path, row[0])
        except (IOError, OSError) as e:
            logger.warn("Error converting to path: %s" % e)
            continue

    # pgfid2 to path2 in case of RENAME
    for row in changelog_data.gfidpath_get_distinct("pgfid2",
                                                    {"type": "RENAME",
                                                     "path2": ""}):
        # Only in case of Rename pgfid2 exists
        if row[0] == "":
            continue

        try:
            path = symlink_gfid_to_path(brick, row[0])
            path = output_path_prepare(path, args)
            changelog_data.gfidpath_set_path2(path, row[0])
        except (IOError, OSError) as e:
            logger.warn("Error converting to path: %s" % e)
            continue
Ejemplo n.º 3
0
def populate_pgfid_and_inodegfid(brick, changelog_data):
    """
    For all the DATA/METADATA modifications GFID,
    If symlink, directly convert to Path using Readlink.
    If not symlink, try to get PGFIDs via xattr query and populate it
    to pgfid table, collect inodes in inodegfid table
    """
    for row in changelog_data.gfidpath_get({"path1": "", "type": "MODIFY"}):
        gfid = row[3].strip()
        p = os.path.join(brick, ".glusterfs", gfid[0:2], gfid[2:4], gfid)
        if os.path.islink(p):
            # It is a Directory if GFID backend path is symlink
            try:
                path = symlink_gfid_to_path(brick, gfid)
                path = output_path_prepare(path, args)
                changelog_data.gfidpath_update({"path1": path}, {"gfid": gfid})
            except (IOError, OSError) as e:
                logger.warn("Error converting to path: %s" % e)
                continue
        else:
            try:
                # INODE and GFID to inodegfid table
                changelog_data.inodegfid_add(os.stat(p).st_ino, gfid)
                file_xattrs = xattr.list(p)
                for x in file_xattrs:
                    if x.startswith("trusted.pgfid."):
                        # PGFID in pgfid table
                        changelog_data.pgfid_add(x.split(".")[-1])
            except (IOError, OSError):
                # All OS Errors ignored, since failures will be logged
                # in End. All GFIDs present in gfidpath table
                continue
Ejemplo n.º 4
0
def enum_hard_links_using_gfid2path(brick, gfid, args):
    hardlinks = []
    p = os.path.join(brick, ".glusterfs", gfid[0:2], gfid[2:4], gfid)
    if not os.path.isdir(p):
        # we have a symlink or a normal file
        try:
            file_xattrs = xattr.list(p)
            for x in file_xattrs:
                x_str = bytearray_to_str(x)
                if x_str.startswith("trusted.gfid2path."):
                    # get the value for the xattr i.e. <PGFID>/<BN>
                    v = xattr.getxattr(p, x_str)
                    v_str = bytearray_to_str(v)
                    pgfid, bn = v_str.split(os.sep)
                    try:
                        path = symlink_gfid_to_path(brick, pgfid)
                        fullpath = os.path.join(path, bn)
                        fullpath = output_path_prepare(fullpath, args)
                        hardlinks.append(fullpath)
                    except (IOError, OSError) as e:
                        logger.warn("Error converting to path: %s" % e)
                        continue
        except (IOError, OSError):
            pass
    return hardlinks
Ejemplo n.º 5
0
def populate_pgfid_and_inodegfid(brick, changelog_data):
    """
    For all the DATA/METADATA modifications GFID,
    If symlink, directly convert to Path using Readlink.
    If not symlink, try to get PGFIDs via xattr query and populate it
    to pgfid table, collect inodes in inodegfid table
    """
    for row in changelog_data.gfidpath_get({"path1": "", "type": "MODIFY"}):
        gfid = row[3].strip()
        p = os.path.join(brick, ".glusterfs", gfid[0:2], gfid[2:4], gfid)
        if os.path.islink(p):
            # It is a Directory if GFID backend path is symlink
            try:
                path = symlink_gfid_to_path(brick, gfid)
                path = output_path_prepare(path, args)
                changelog_data.gfidpath_update({"path1": path},
                                               {"gfid": gfid})
            except (IOError, OSError) as e:
                logger.warn("Error converting to path: %s" % e)
                continue
        else:
            try:
                # INODE and GFID to inodegfid table
                changelog_data.inodegfid_add(os.stat(p).st_ino, gfid)
                file_xattrs = xattr.list(p)
                for x in file_xattrs:
                    if x.startswith("trusted.pgfid."):
                        # PGFID in pgfid table
                        changelog_data.pgfid_add(x.split(".")[-1])
            except (IOError, OSError):
                # All OS Errors ignored, since failures will be logged
                # in End. All GFIDs present in gfidpath table
                continue
Ejemplo n.º 6
0
def gfid_to_path_using_pgfid(brick, changelog_data, args):
    """
    For all the pgfids collected, Converts to Path and
    does readdir on those directories and looks up inodegfid
    table for matching inode number.
    """
    populate_pgfid_and_inodegfid(brick, changelog_data)

    # If no GFIDs needs conversion to Path
    if not changelog_data.inodegfid_exists({"converted": 0}):
        return

    def inode_filter(path):
        # Looks in inodegfid table, if exists returns
        # inode number else None
        try:
            st = os.lstat(path)
        except (OSError, IOError):
            st = None

        if st and changelog_data.inodegfid_exists({"inode": st.st_ino}):
            return st.st_ino

        return None

    # Length of brick path, to remove from output path
    brick_path_len = len(brick)

    def output_callback(path, inode):
        # For each path found, encodes it and updates path1
        # Also updates converted flag in inodegfid table as 1
        path = path.strip()
        path = path[brick_path_len + 1:]

        path = output_path_prepare(path, args)

        changelog_data.append_path1(path, inode)
        changelog_data.inodegfid_update({"converted": 1}, {"inode": inode})

    ignore_dirs = [
        os.path.join(brick, dirname)
        for dirname in conf.get_opt("brick_ignore_dirs").split(",")
    ]

    for row in changelog_data.pgfid_get():
        try:
            path = symlink_gfid_to_path(brick, row[0])
            find(os.path.join(brick, path),
                 callback_func=output_callback,
                 filter_func=inode_filter,
                 ignore_dirs=ignore_dirs,
                 subdirs_crawl=False)
        except (IOError, OSError) as e:
            logger.warn("Error converting to path: %s" % e)
            continue
Ejemplo n.º 7
0
def gfid_to_path_using_pgfid(brick, changelog_data, args):
    """
    For all the pgfids collected, Converts to Path and
    does readdir on those directories and looks up inodegfid
    table for matching inode number.
    """
    populate_pgfid_and_inodegfid(brick, changelog_data)

    # If no GFIDs needs conversion to Path
    if not changelog_data.inodegfid_exists({"converted": 0}):
        return

    def inode_filter(path):
        # Looks in inodegfid table, if exists returns
        # inode number else None
        try:
            st = os.lstat(path)
        except (OSError, IOError):
            st = None

        if st and changelog_data.inodegfid_exists({"inode": st.st_ino}):
            return st.st_ino

        return None

    # Length of brick path, to remove from output path
    brick_path_len = len(brick)

    def output_callback(path, inode):
        # For each path found, encodes it and updates path1
        # Also updates converted flag in inodegfid table as 1
        path = path.strip()
        path = path[brick_path_len+1:]

        path = output_path_prepare(path, args)

        changelog_data.append_path1(path, inode)
        changelog_data.inodegfid_update({"converted": 1}, {"inode": inode})

    ignore_dirs = [os.path.join(brick, dirname)
                   for dirname in
                   conf.get_opt("brick_ignore_dirs").split(",")]

    for row in changelog_data.pgfid_get():
        try:
            path = symlink_gfid_to_path(brick, row[0])
            find(os.path.join(brick, path),
                 callback_func=output_callback,
                 filter_func=inode_filter,
                 ignore_dirs=ignore_dirs,
                 subdirs_crawl=False)
        except (IOError, OSError) as e:
            logger.warn("Error converting to path: %s" % e)
            continue
Ejemplo n.º 8
0
def gfid_to_path_using_pgfid(brick, gfids_file, output_file, outfile_failures):
    """
    Parent GFID is saved as xattr, collect Parent GFIDs from all
    the files from gfids_file. Convert parent GFID to path and Crawl
    each directories to get the list of files/dirs having same inode number.
    Do find with maxdepth as 1 and print the output in <INODE_NUM> <PATH>
    format, use this output to look into in memory dictionary of inode
    numbers got from the list of GFIDs
    """
    with open(output_file, "a+") as fout:
        pgfids = set()
        inode_dict = {}
        with open(gfids_file) as f:
            for gfid in f:
                gfid = gfid.strip()
                p = os.path.join(brick,
                                 ".glusterfs",
                                 gfid[0:2],
                                 gfid[2:4],
                                 gfid)
                if os.path.islink(p):
                    path = symlink_gfid_to_path(brick, gfid)
                    output_write(fout, path, args.output_prefix)
                else:
                    try:
                        inode_dict[str(os.stat(p).st_ino)] = 1
                        file_xattrs = xattr.list(p)
                        num_parent_gfid = 0
                        for x in file_xattrs:
                            if x.startswith("trusted.pgfid."):
                                num_parent_gfid += 1
                                pgfids.add(x.split(".")[-1])

                        if num_parent_gfid == 0:
                            with open(outfile_failures, "a") as f:
                                f.write("%s\n" % gfid)
                                f.flush()
                                os.fsync(f.fileno())

                    except (IOError, OSError) as e:
                        if e.errno == ENOENT:
                            continue
                        else:
                            fail("%s Failed to convert to path from "
                                 "GFID %s: %s" % (brick, gfid, e),
                                 logger=logger)

        if not inode_dict:
            return

        def inode_filter(path):
            try:
                st = os.lstat(path)
            except (OSError, IOError) as e:
                if e.errno == ENOENT:
                    st = None
                else:
                    raise

            if st and inode_dict.get(str(st.st_ino), None):
                return True

            return False

        # Length of brick path, to remove from output path
        brick_path_len = len(brick)

        def output_callback(path):
            path = path.strip()
            path = path[brick_path_len+1:]
            output_write(fout, path, args.output_prefix)

        ignore_dirs = [os.path.join(brick, dirname)
                       for dirname in
                       conf.get_opt("brick_ignore_dirs").split(",")]

        for pgfid in pgfids:
            path = symlink_gfid_to_path(brick, pgfid)
            find(os.path.join(brick, path),
                 callback_func=output_callback,
                 filter_func=inode_filter,
                 ignore_dirs=ignore_dirs,
                 subdirs_crawl=False)

        fout.flush()
        os.fsync(fout.fileno())
Ejemplo n.º 9
0
def gfid_to_path_using_pgfid(brick, gfids_file, output_file, outfile_failures):
    """
    Parent GFID is saved as xattr, collect Parent GFIDs from all
    the files from gfids_file. Convert parent GFID to path and Crawl
    each directories to get the list of files/dirs having same inode number.
    Do find with maxdepth as 1 and print the output in <INODE_NUM> <PATH>
    format, use this output to look into in memory dictionary of inode
    numbers got from the list of GFIDs
    """
    with open(output_file, "a+") as fout:
        pgfids = set()
        inode_dict = {}
        with open(gfids_file) as f:
            for gfid in f:
                gfid = gfid.strip()
                p = os.path.join(brick, ".glusterfs", gfid[0:2], gfid[2:4],
                                 gfid)
                if os.path.islink(p):
                    path = symlink_gfid_to_path(brick, gfid)
                    output_write(fout, path, args.output_prefix)
                else:
                    try:
                        inode_dict[str(os.stat(p).st_ino)] = 1
                        file_xattrs = xattr.list(p)
                        num_parent_gfid = 0
                        for x in file_xattrs:
                            if x.startswith("trusted.pgfid."):
                                num_parent_gfid += 1
                                pgfids.add(x.split(".")[-1])

                        if num_parent_gfid == 0:
                            with open(outfile_failures, "a") as f:
                                f.write("%s\n" % gfid)
                                f.flush()
                                os.fsync(f.fileno())

                    except (IOError, OSError) as e:
                        if e.errno == ENOENT:
                            continue
                        else:
                            fail("%s Failed to convert to path from "
                                 "GFID %s: %s" % (brick, gfid, e),
                                 logger=logger)

        if not inode_dict:
            return

        def inode_filter(path):
            try:
                st = os.lstat(path)
            except (OSError, IOError) as e:
                if e.errno == ENOENT:
                    st = None
                else:
                    raise

            if st and inode_dict.get(str(st.st_ino), None):
                return True

            return False

        # Length of brick path, to remove from output path
        brick_path_len = len(brick)

        def output_callback(path):
            path = path.strip()
            path = path[brick_path_len + 1:]
            output_write(fout, path, args.output_prefix)

        ignore_dirs = [
            os.path.join(brick, dirname)
            for dirname in conf.get_opt("brick_ignore_dirs").split(",")
        ]

        for pgfid in pgfids:
            path = symlink_gfid_to_path(brick, pgfid)
            find(os.path.join(brick, path),
                 callback_func=output_callback,
                 filter_func=inode_filter,
                 ignore_dirs=ignore_dirs,
                 subdirs_crawl=False)

        fout.flush()
        os.fsync(fout.fileno())