Esempio n. 1
0
def get_changes(brick, hash_dir, log_file, start, end, args):
    """
    Makes use of libgfchangelog's history API to get changelogs
    containing changes from start and end time. Further collects
    the modified gfids from the changelogs and writes the list
    of gfid to 'gfid_list' file.
    """
    session_dir = os.path.join(conf.get_opt("session_dir"), args.session)
    status_file = os.path.join(session_dir, args.volume,
                               "%s.status" % urllib.quote_plus(args.brick))

    # Get previous session
    try:
        with open(status_file) as f:
            start = int(f.read().strip())
    except (ValueError, OSError, IOError):
        start = args.start

    try:
        libgfchangelog.cl_init()
        libgfchangelog.cl_register(brick, hash_dir, log_file,
                                   CHANGELOG_LOG_LEVEL, CHANGELOG_CONN_RETRIES)
    except libgfchangelog.ChangelogException as e:
        fail("%s Changelog register failed: %s" % (brick, e), logger=logger)

    # Output files to record GFIDs and GFID to Path failure GFIDs
    changelog_data = ChangelogData(args.outfile, args)

    # Changelogs path(Hard coded to BRICK/.glusterfs/changelogs
    cl_path = os.path.join(brick, ".glusterfs/changelogs")

    # Fail if History fails for requested Start and End
    try:
        actual_end = libgfchangelog.cl_history_changelog(
            cl_path, start, end, CHANGELOGAPI_NUM_WORKERS)
    except libgfchangelog.ChangelogException as e:
        fail("%s: %s Historical Changelogs not available: %s" %
             (args.node, brick, e),
             logger=logger)

    logger.info("[1/4] Starting changelog parsing ...")
    try:
        # scan followed by getchanges till scan returns zero.
        # history_scan() is blocking call, till it gets the number
        # of changelogs to process. Returns zero when no changelogs
        # to be processed. returns positive value as number of changelogs
        # to be processed, which will be fetched using
        # history_getchanges()
        changes = []
        while libgfchangelog.cl_history_scan() > 0:
            changes = libgfchangelog.cl_history_getchanges()

            for change in changes:
                # Ignore if last processed changelog comes
                # again in list
                if change.endswith(".%s" % start):
                    continue
                try:
                    parse_changelog_to_db(changelog_data, change, args)
                    libgfchangelog.cl_history_done(change)
                except IOError as e:
                    logger.warn("Error parsing changelog file %s: %s" %
                                (change, e))

            changelog_data.commit()
    except libgfchangelog.ChangelogException as e:
        fail("%s Error during Changelog Crawl: %s" % (brick, e), logger=logger)

    logger.info("[1/4] Finished changelog parsing.")

    # Convert all pgfid available from Changelogs
    logger.info("[2/4] Starting 'pgfid to path' conversions ...")
    pgfid_to_path(brick, changelog_data)
    changelog_data.commit()
    logger.info("[2/4] Finished 'pgfid to path' conversions.")

    # Convert all GFIDs for which no other additional details available
    logger.info("[3/4] Starting 'gfid to path using pgfid' conversions ...")
    gfid_to_path_using_pgfid(brick, changelog_data, args)
    changelog_data.commit()
    logger.info("[3/4] Finished 'gfid to path using pgfid' conversions.")

    # If some GFIDs fail to get converted from previous step,
    # convert using find
    logger.info("[4/4] Starting 'gfid to path using batchfind' "
                "conversions ...")
    gfid_to_path_using_batchfind(brick, changelog_data)
    changelog_data.commit()
    logger.info("[4/4] Finished 'gfid to path using batchfind' conversions.")

    return actual_end
Esempio n. 2
0
def get_changes(brick, hash_dir, log_file, end, args):
    """
    Makes use of libgfchangelog's history API to get changelogs
    containing changes from start and end time. Further collects
    the modified gfids from the changelogs and writes the list
    of gfid to 'gfid_list' file.
    """
    try:
        libgfchangelog.cl_init()
        libgfchangelog.cl_register(brick, hash_dir, log_file,
                                   CHANGELOG_LOG_LEVEL, CHANGELOG_CONN_RETRIES)
    except libgfchangelog.ChangelogException as e:
        fail("%s Changelog register failed: %s" % (brick, e), logger=logger)

    # Output files to record GFIDs and GFID to Path failure GFIDs
    gfid_list_path = args.outfile + ".gfids"
    gfid_list_failures_file = gfid_list_path + ".failures"
    create_file(gfid_list_path, exit_on_err=True, logger=logger)
    create_file(gfid_list_failures_file, exit_on_err=True, logger=logger)

    # Changelogs path(Hard coded to BRICK/.glusterfs/changelogs
    cl_path = os.path.join(brick, ".glusterfs/changelogs")

    # Fail if History fails for requested Start and End
    try:
        actual_end = libgfchangelog.cl_history_changelog(
            cl_path, args.start, end, CHANGELOGAPI_NUM_WORKERS)
    except libgfchangelog.ChangelogException as e:
        fail("%s Historical Changelogs not available: %s" % (brick, e),
             logger=logger)

    try:
        # scan followed by getchanges till scan returns zero.
        # history_scan() is blocking call, till it gets the number
        # of changelogs to process. Returns zero when no changelogs
        # to be processed. returns positive value as number of changelogs
        # to be processed, which will be fetched using
        # history_getchanges()
        changes = []
        while libgfchangelog.cl_history_scan() > 0:
            changes += libgfchangelog.cl_history_getchanges()

            if changes:
                with open(gfid_list_path, 'a+') as fgfid:
                    for change in changes:
                        with open(change) as f:
                            for line in f:
                                # Space delimited list, collect GFID
                                details = line.split()
                                fgfid.write("%s\n" % details[1])

                        libgfchangelog.cl_history_done(change)
                    fgfid.flush()
                    os.fsync(fgfid.fileno())
    except libgfchangelog.ChangelogException as e:
        fail("%s Error during Changelog Crawl: %s" % (brick, e),
             logger=logger)

    # If TS returned from history_changelog is < end time
    # then FS crawl may be required, since history is only available
    # till TS returned from history_changelog
    if actual_end < end:
        fail("Partial History available with Changelog", 2, logger=logger)

    sort_unique(gfid_list_path)
    gfid_to_path_using_pgfid(brick, gfid_list_path,
                             args.outfile, gfid_list_failures_file)
    gfid_to_path_using_batchfind(brick, gfid_list_failures_file, args.outfile)
Esempio n. 3
0
def get_changes(brick, hash_dir, log_file, start, end, args):
    """
    Makes use of libgfchangelog's history API to get changelogs
    containing changes from start and end time. Further collects
    the modified gfids from the changelogs and writes the list
    of gfid to 'gfid_list' file.
    """
    session_dir = os.path.join(conf.get_opt("session_dir"),
                               args.session)
    status_file = os.path.join(session_dir, args.volume,
                     "%s.status" % urllib.quote_plus(args.brick))

    # Get previous session
    try:
        with open(status_file) as f:
            start = int(f.read().strip())
    except (ValueError, OSError, IOError):
        start = args.start

    try:
        libgfchangelog.cl_init()
        libgfchangelog.cl_register(brick, hash_dir, log_file,
                                   CHANGELOG_LOG_LEVEL, CHANGELOG_CONN_RETRIES)
    except libgfchangelog.ChangelogException as e:
        fail("%s Changelog register failed: %s" % (brick, e), logger=logger)

    # Output files to record GFIDs and GFID to Path failure GFIDs
    changelog_data = ChangelogData(args.outfile, args)

    # Changelogs path(Hard coded to BRICK/.glusterfs/changelogs
    cl_path = os.path.join(brick, ".glusterfs/changelogs")

    # Fail if History fails for requested Start and End
    try:
        actual_end = libgfchangelog.cl_history_changelog(
            cl_path, start, end, CHANGELOGAPI_NUM_WORKERS)
    except libgfchangelog.ChangelogException as e:
        fail("%s: %s Historical Changelogs not available: %s" %
             (args.node, brick, e), logger=logger)

    logger.info("[1/4] Starting changelog parsing ...")
    try:
        # scan followed by getchanges till scan returns zero.
        # history_scan() is blocking call, till it gets the number
        # of changelogs to process. Returns zero when no changelogs
        # to be processed. returns positive value as number of changelogs
        # to be processed, which will be fetched using
        # history_getchanges()
        changes = []
        while libgfchangelog.cl_history_scan() > 0:
            changes = libgfchangelog.cl_history_getchanges()

            for change in changes:
                # Ignore if last processed changelog comes
                # again in list
                if change.endswith(".%s" % start):
                    continue
                try:
                    parse_changelog_to_db(changelog_data, change, args)
                    libgfchangelog.cl_history_done(change)
                except IOError as e:
                    logger.warn("Error parsing changelog file %s: %s" %
                                (change, e))

            changelog_data.commit()
    except libgfchangelog.ChangelogException as e:
        fail("%s Error during Changelog Crawl: %s" % (brick, e),
             logger=logger)

    logger.info("[1/4] Finished changelog parsing.")

    # Convert all pgfid available from Changelogs
    logger.info("[2/4] Starting 'pgfid to path' conversions ...")
    pgfid_to_path(brick, changelog_data)
    changelog_data.commit()
    logger.info("[2/4] Finished 'pgfid to path' conversions.")

    # Convert all GFIDs for which no other additional details available
    logger.info("[3/4] Starting 'gfid to path using pgfid' conversions ...")
    gfid_to_path_using_pgfid(brick, changelog_data, args)
    changelog_data.commit()
    logger.info("[3/4] Finished 'gfid to path using pgfid' conversions.")

    # If some GFIDs fail to get converted from previous step,
    # convert using find
    logger.info("[4/4] Starting 'gfid to path using batchfind' "
                "conversions ...")
    gfid_to_path_using_batchfind(brick, changelog_data)
    changelog_data.commit()
    logger.info("[4/4] Finished 'gfid to path using batchfind' conversions.")

    return actual_end
Esempio n. 4
0
def get_changes(brick, hash_dir, log_file, end, args):
    """
    Makes use of libgfchangelog's history API to get changelogs
    containing changes from start and end time. Further collects
    the modified gfids from the changelogs and writes the list
    of gfid to 'gfid_list' file.
    """
    try:
        libgfchangelog.cl_init()
        libgfchangelog.cl_register(brick, hash_dir, log_file,
                                   CHANGELOG_LOG_LEVEL, CHANGELOG_CONN_RETRIES)
    except libgfchangelog.ChangelogException as e:
        fail("%s Changelog register failed: %s" % (brick, e), logger=logger)

    # Output files to record GFIDs and GFID to Path failure GFIDs
    gfid_list_path = args.outfile + ".gfids"
    gfid_list_failures_file = gfid_list_path + ".failures"
    create_file(gfid_list_path, exit_on_err=True, logger=logger)
    create_file(gfid_list_failures_file, exit_on_err=True, logger=logger)

    # Changelogs path(Hard coded to BRICK/.glusterfs/changelogs
    cl_path = os.path.join(brick, ".glusterfs/changelogs")

    # Fail if History fails for requested Start and End
    try:
        actual_end = libgfchangelog.cl_history_changelog(
            cl_path, args.start, end, CHANGELOGAPI_NUM_WORKERS)
    except libgfchangelog.ChangelogException as e:
        fail("%s Historical Changelogs not available: %s" % (brick, e),
             logger=logger)

    try:
        # scan followed by getchanges till scan returns zero.
        # history_scan() is blocking call, till it gets the number
        # of changelogs to process. Returns zero when no changelogs
        # to be processed. returns positive value as number of changelogs
        # to be processed, which will be fetched using
        # history_getchanges()
        changes = []
        while libgfchangelog.cl_history_scan() > 0:
            changes += libgfchangelog.cl_history_getchanges()

            if changes:
                with open(gfid_list_path, 'a+') as fgfid:
                    for change in changes:
                        with open(change) as f:
                            for line in f:
                                # Space delimited list, collect GFID
                                details = line.split()
                                fgfid.write("%s\n" % details[1])

                        libgfchangelog.cl_history_done(change)
                    fgfid.flush()
                    os.fsync(fgfid.fileno())
    except libgfchangelog.ChangelogException as e:
        fail("%s Error during Changelog Crawl: %s" % (brick, e), logger=logger)

    # If TS returned from history_changelog is < end time
    # then FS crawl may be required, since history is only available
    # till TS returned from history_changelog
    if actual_end < end:
        fail("Partial History available with Changelog", 2, logger=logger)

    sort_unique(gfid_list_path)
    gfid_to_path_using_pgfid(brick, gfid_list_path, args.outfile,
                             gfid_list_failures_file)
    gfid_to_path_using_batchfind(brick, gfid_list_failures_file, args.outfile)