Beispiel #1
0
def backup_data(lb_obj, s3_obj, config, logger):
    qdir = config.QDIR

    tarlist = glob.iglob(
        os.path.join(config.ARCHIVE, '*', _linksrc, '*.tar.xz'))
    ntotal = nbackup_success = nbackup_fail = \
        ns3_success = ns3_fail = nquaran = 0

    for tb in sorted(tarlist):
        ntotal += 1
        # resolve the link
        tar = os.path.realpath(tb)

        logger.debug("Start backup of {}.".format(tar))
        # check tarball exist and it is a regular file
        if os.path.exists(tar) and os.path.isfile(tar):
            pass
        else:
            # tarball does not exist or it is not a regular file
            quarantine(qdir, logger, tb)
            nquaran += 1
            logger.error(
                "Quarantine: {}, {} does not exist or it is not a regular file"
                .format(tb, tar))
            continue

        archive_md5 = ("{}.md5".format(tar))

        # check md5 file exist and it is a regular file
        if os.path.exists(archive_md5) and os.path.isfile(archive_md5):
            pass
        else:
            # md5 file does not exist or it is not a regular file
            quarantine(qdir, logger, tb)
            nquaran += 1
            logger.error(
                "Quarantine: {}, {} does not exist or it is not a regular file"
                .format(tb, archive_md5))
            continue

        # read the md5sum from md5 file
        try:
            with open(archive_md5) as f:
                archive_md5_hex_value = f.readline().split(" ")[0]
        except Exception:
            # Could not read file.
            quarantine(qdir, logger, tb)
            nquaran += 1
            logger.exception("Quarantine: {}, Could not read {}".format(
                tb, archive_md5))
            continue

        # match md5sum of the tarball to its md5 file
        try:
            archive_tar_hex_value = md5sum(tar)
        except Exception:
            # Could not read file.
            quarantine(qdir, logger, tb)
            nquaran += 1
            logger.exception("Quarantine: {}, Could not read {}".format(
                tb, tar))
            continue

        if archive_tar_hex_value != archive_md5_hex_value:
            quarantine(qdir, logger, tb)
            nquaran += 1
            logger.error(
                "Quarantine: {}, md5sum of {} does not match with its md5 file {}"
                .format(tb, tar, archive_md5))
            continue

        resultname = os.path.basename(tar)
        controller_path = os.path.dirname(tar)
        controller = os.path.basename(controller_path)

        # This function call will handle all the local backup related
        # operations and count the number of successes and failures.
        local_backup_result = backup_to_local(lb_obj, logger, controller_path,
                                              controller, tb, tar, resultname,
                                              archive_md5,
                                              archive_md5_hex_value)

        if local_backup_result == Status.SUCCESS:
            nbackup_success += 1
        elif local_backup_result == Status.FAIL:
            nbackup_fail += 1
        else:
            assert False, "Impossible situation, local_backup_result = {!r}".format(
                local_backup_result)

        # This function call will handle all the S3 bucket related operations
        # and count the number of successes and failures.
        s3_backup_result = backup_to_s3(s3_obj, logger, controller_path,
                                        controller, tb, tar, resultname,
                                        archive_md5_hex_value)

        if s3_backup_result == Status.SUCCESS:
            ns3_success += 1
        elif s3_backup_result == Status.FAIL:
            ns3_fail += 1
        else:
            assert False, "Impossible situation, s3_backup_result = {!r}".format(
                s3_backup_result)

        if local_backup_result == Status.SUCCESS \
                and (s3_obj is None or s3_backup_result == Status.SUCCESS):
            # Move tar ball symlink to its final resting place
            rename_tb_link(tb, os.path.join(controller_path, _linkdest),
                           logger)
        else:
            # Do nothing when the backup fails, allowing us to retry on a
            # future pass.
            pass
        logger.debug("End backup of {}.".format(tar))

    return Results(ntotal=ntotal,
                   nbackup_success=nbackup_success,
                   nbackup_fail=nbackup_fail,
                   ns3_success=ns3_success,
                   ns3_fail=ns3_fail,
                   nquaran=nquaran)
Beispiel #2
0
def main(options, name):
    """Main entry point to pbench-index.

       The caller is required to pass the "options" argument with the following
       expected attributes:
           cfg_name              - Name of the configuration file to use
           dump_index_patterns   - Don't do any indexing, but just emit the
                                   list of index patterns that would be used
       All exceptions are caught and logged to syslog with the stacktrace of
       the exception in a sub-object of the logged JSON document.

       Status codes used by es_index and the error below are defined from the
       list below to maintain compatibility with the previous code base when
       pbench-index was a bash script and invoked index-pbench (yes, a bit
       confusing), the predecessor to this program.  The codes were used to
       sort the errors we encountered processing tar balls in to categories
       of retry or not:

            0 - normal, successful exit, no errors
            1 - Operational error while indexing
            2 - Configuration file not specified
            3 - Bad configuration file
            4 - Tar ball does not contain a metadata.log file
            5 - Bad start run date value encountered
            6 - File Not Found error
            7 - Bad metadata.log file encountered
            8 - Error reading a mapping file for Elasticsearch templates
            9 - Error creating one of the Elasticsearch templates
           10 - Bad hostname in a sosreport
           11 - Failure unpacking the tar ball
           12 - generic error, needs to be investigated and can be retried
                after any indexing bugs are fixed.

       Return Values (now a sub-set of the original status codes above):
         0 - Successfully processed all tar balls (errors processing tar
             balls are reported in the logs and in index status reports)
         1 - Failed to process one or more tar balls for unknown reasons
             (see logs)
         2 - Missing configuration file
         3 - Invalid configuration file
         8 - Unable to load and process expected mapping files
         9 - Unable to update index templates in configured Elasticsearch
             instance
    """
    if not options.cfg_name:
        print("{}: ERROR: No config file specified; set _PBENCH_SERVER_CONFIG env variable or"
                " use --config <file> on the command line".format(name),
                file=sys.stderr)
        return 2

    if options.index_tool_data:
        name = "{}-tool-data".format(name)

    idxctx = None
    try:
        idxctx = IdxContext(options, name, _dbg=_DEBUG)
    except (ConfigFileError, ConfigParserError):
        print("{}: {}".format(name, e), file=sys.stderr)
        return 2
    except BadConfig as e:
        print("{}: {}".format(name, e), file=sys.stderr)
        return 3
    except JsonFileError as e:
        print("{}: {}".format(name, e), file=sys.stderr)
        return 8

    if options.dump_index_patterns:
        idxctx.templates.dump_idx_patterns()
        return 0

    if options.dump_templates:
        idxctx.templates.dump_templates()
        return 0

    if options.index_tool_data:
        # The link source and destination for the operation of this script
        # when it only indexes tool data.
        linksrc = "TO-INDEX-TOOL"
        linkdest = "INDEXED"
    else:
        # The link source and destination for the operation of this script
        # when it indexes run, table-of-contents, and result data.
        linksrc = "TO-INDEX"
        linkdest = "TO-INDEX-TOOL"
    # We only ever use a symlink'd error destination for indexing
    # problems.
    linkerrdest = "WONT-INDEX"

    res = 0
    try:
        ARCHIVE_rp = os.path.realpath(idxctx.config.ARCHIVE)
        if not os.path.isdir(ARCHIVE_rp):
            idxctx.logger.error("{}: Bad ARCHIVE={}", name,
                    idxctx.config.ARCHIVE)
            res = 3
        INCOMING_rp = os.path.realpath(idxctx.config.INCOMING)
        if not os.path.isdir(INCOMING_rp):
            idxctx.logger.error("{}: Bad INCOMING={}", name,
                    idxctx.config.INCOMING)
            res = 3
        qdir = idxctx.config.get('pbench-server', 'pbench-quarantine-dir')
        if not os.path.isdir(qdir):
            idxctx.logger.error("{}: {} does not exist, or is not a directory",
                    name, qdir)
            res = 3
    except Exception:
        idxctx.logger.exception("{}: Unexpected setup error", name)
        res = 12

    if res != 0:
        # Exit early if we encounter any errors.
        return res

    idxctx.logger.info("{}.{}: starting", name, idxctx.TS)

    # find -L $ARCHIVE/*/$linksrc -name '*.tar.xz' -printf "%s\t%p\n" 2>/dev/null | sort -n > $list
    tarballs = []
    try:
        tb_glob = os.path.join(ARCHIVE_rp, "*", linksrc, "*.tar.xz")
        for tb in glob.iglob(tb_glob):
            try:
                rp = os.path.realpath(tb)
            except OSError:
                idxctx.logger.warning("{} does not resolve to a real path", tb)
                quarantine(qdir, idxctx.logger, tb)
                continue
            controller_path = os.path.dirname(rp)
            controller = os.path.basename(controller_path)
            archive_path = os.path.dirname(controller_path)
            if archive_path != ARCHIVE_rp:
                idxctx.logger.warning(
                        "For tar ball {}, original home is not {}", tb,
                        ARCHIVE_rp)
                quarantine(qdir, idxctx.logger, tb)
                continue
            if not os.path.isfile(rp + ".md5"):
                idxctx.logger.warning("Missing .md5 file for {}", tb)
                quarantine(qdir, idxctx.logger, tb)
                # Audit should pick up missing .md5 file in ARCHIVE directory.
                continue
            try:
                # get size
                size = os.path.getsize(rp)
            except OSError:
                idxctx.logger.warning(
                        "Could not fetch tar ball size for {}", tb)
                quarantine(qdir, idxctx.logger, tb)
                # Audit should pick up missing .md5 file in ARCHIVE directory.
                continue
            else:
                tarballs.append( (size, controller, tb) )
    except Exception:
        idxctx.logger.exception("Unexpected error encountered generating list"
                " of tar balls to process")
        return 12
    else:
        if not tarballs:
            idxctx.logger.info("No tar balls found that need processing")
            return 0

    # We always process the smallest tar balls first.
    tarballs = sorted(tarballs)

    # At this point, tarballs contains a list of tar balls sorted by size
    # that were available as symlinks in the various 'linksrc' directories.
    idxctx.logger.debug("Preparing to index {:d} tar balls", len(tarballs))

    try:
        # Now that we are ready to begin the actual indexing step, ensure we
        # have the proper index templates in place.
        idxctx.logger.debug("update_templates [start]")
        idxctx.templates.update_templates(idxctx.es)
    except TemplateError as e:
        idxctx.logger.error("update_templates [end], error {}", repr(e))
        res = 9
    except Exception:
        idxctx.logger.exception("update_templates [end]: Unexpected template"
                " processing error")
        res = 12
    else:
        idxctx.logger.debug("update_templates [end]")
        res = 0

    if res != 0:
        # Exit early if we encounter any errors.
        return res

    report = Report(idxctx.config, name, es=idxctx.es, pid=idxctx.getpid(),
            group_id=idxctx.getgid(), user_id=idxctx.getuid(),
            hostname=idxctx.gethostname(), version=VERSION,
            templates=idxctx.templates)
    # We use the "start" report ID as the tracking ID for all indexed
    # documents.
    try:
        tracking_id = report.post_status(tstos(idxctx.time()), "start")
    except Exception:
        idxctx.logger.error("Failed to post initial report status")
        return 12
    else:
        idxctx.set_tracking_id(tracking_id)

    with tempfile.TemporaryDirectory(prefix="{}.".format(name),
                dir=idxctx.config.TMP) as tmpdir:
        idxctx.logger.debug("start processing list of tar balls")
        tb_list = os.path.join(tmpdir, "{}.{}.list".format(name, idxctx.TS))
        try:
            with open(tb_list, "w") as lfp:
                # Write out all the tar balls we are processing so external
                # viewers can follow along from home.
                for size, controller, tb in tarballs:
                    print("{:20d} {} {}".format(size, controller, tb),
                            file=lfp)

            indexed = os.path.join(tmpdir, "{}.{}.indexed".format(
                    name, idxctx.TS))
            erred   = os.path.join(tmpdir, "{}.{}.erred"  .format(
                    name, idxctx.TS))
            skipped = os.path.join(tmpdir, "{}.{}.skipped".format(
                    name, idxctx.TS))
            ie_filename = os.path.join(tmpdir,
                    "{}.{}.indexing-errors.json".format(name, idxctx.TS))

            for size, controller, tb in tarballs:
                # Sanity check source tar ball path
                linksrc_dir = os.path.dirname(tb)
                linksrc_dirname = os.path.basename(linksrc_dir)
                assert linksrc_dirname == linksrc, "Logic bomb!  tar ball " \
                        "path {} does not contain {}".format(tb, linksrc)

                idxctx.logger.info("Starting {} (size {:d})", tb, size)

                ptb = None
                try:
                    # "Open" the tar ball represented by the tar ball object
                    idxctx.logger.debug("open tar ball")
                    ptb = PbenchTarBall(idxctx, os.path.realpath(tb), tmpdir,
                            os.path.join(INCOMING_rp, controller))

                    # Construct the generator for emitting all actions.  The
                    # `idxctx` dictionary is passed along to each generator so
                    # that it can add its context for error handling to the
                    # list.
                    idxctx.logger.debug("generator setup")
                    if options.index_tool_data:
                        actions = mk_tool_data_actions(ptb, idxctx)
                    else:
                        actions = make_all_actions(ptb, idxctx)

                    # File name for containing all indexing errors that
                    # can't/won't be retried.
                    with open(ie_filename, "w") as fp:
                        idxctx.logger.debug("begin indexing")
                        es_res = es_index(idxctx.es, actions, fp,
                                idxctx.logger, idxctx._dbg)
                except UnsupportedTarballFormat as e:
                    idxctx.logger.warning("Unsupported tar ball format: {}", e)
                    tb_res = 4
                except BadDate as e:
                    idxctx.logger.warning("Bad Date: {!r}", e)
                    tb_res = 5
                except _filenotfounderror as e:
                    idxctx.logger.warning("No such file: {}", e)
                    tb_res = 6
                except BadMDLogFormat as e:
                    idxctx.logger.warning("The metadata.log file is curdled in"
                            " tar ball: {}", e)
                    tb_res = 7
                except SosreportHostname as e:
                    idxctx.logger.warning("Bad hostname in sosreport: {}", e)
                    tb_res = 10
                except tarfile.TarError as e:
                    idxctx.logger.error("Can't unpack tar ball into {}: {}",
                            ptb.extracted_root, e)
                    tb_res = 11
                except Exception as e:
                    idxctx.logger.exception("Other indexing error: {}", e)
                    tb_res = 12
                else:
                    beg, end, successes, duplicates, failures, retries = es_res
                    idxctx.logger.info("done indexing (end ts: {}, duration:"
                            " {:.2f}s, successes: {:d}, duplicates: {:d},"
                            " failures: {:d}, retries: {:d})", tstos(end),
                            end - beg, successes, duplicates, failures,
                            retries)
                    tb_res = 1 if failures > 0 else 0
                try:
                    ie_len = os.path.getsize(ie_filename)
                except _filenotfounderror:
                    # Above operation never made it to actual indexing, ignore.
                    pass
                except Exception:
                    idxctx.logger.exception("Unexpected error handling"
                            " indexing errors file: {}", ie_filename)
                else:
                    # Success fetching indexing error file size.
                    if ie_len > len(tb) + 1:
                        try:
                            report.post_status(tstos(end), "errors", ie_filename)
                        except Exception:
                            idxctx.logger.exception("Unexpected error issuing"
                                    " report status with errors: {}",
                                     ie_filename)
                finally:
                    # Unconditionally remove the indexing errors file.
                    try:
                        os.remove(ie_filename)
                    except Exception:
                        pass
                # Distinguish failure cases, so we can retry the indexing
                # easily if possible.  Different `linkerrdest` directories for
                # different failures; the rest are going to end up in
                # `linkerrdest` for later retry.
                controller_path = os.path.dirname(linksrc_dir)

                if tb_res == 0:
                    idxctx.logger.info("{}: {}/{}: success", idxctx.TS,
                            os.path.basename(controller_path),
                            os.path.basename(tb))
                    # Success
                    with open(indexed, "a") as fp:
                        print(tb, file=fp)
                    rename_tb_link(tb, os.path.join(controller_path, linkdest),
                            idxctx.logger)
                elif tb_res == 1:
                    idxctx.logger.warning(
                            "{}: index failures encountered on {}",
                            idxctx.TS, tb)
                    with open(erred, "a") as fp:
                        print(tb, file=fp)
                    rename_tb_link(tb, os.path.join(controller_path,
                            "{}.1".format(linkerrdest)), idxctx.logger)
                elif tb_res in (2, 3):
                    assert False, "Logic Bomb!  Unexpected tar ball handling " \
                            "result status {:d} for tar ball {}".format(
                            tb_res, tb)
                elif tb_res >= 4 or res <= 11:
                    # # Quietly skip these errors
                    with open(skipped, "a") as fp:
                        print(tb, file=fp)
                    rename_tb_link(tb, os.path.join(controller_path,
                            "{}.{:d}".format(linkerrdest, tb_res)),
                            idxctx.logger)
                else:
                    idxctx.logger.error(
                            "{}: index error {:d} encountered on {}",
                            idxctx.TS, tb_res, tb)
                    with open(erred, "a") as fp:
                        print(tb, file=fp)
                    rename_tb_link(tb, os.path.join(controller_path,
                            linkerrdest), idxctx.logger)
                idxctx.logger.info("Finished {} (size {:d})", tb, size)
        except Exception as e:
            idxctx.logger.exception("Unexpected setup error")
            res = 12
        else:
            # No exceptions while processing tar ball, success.
            res = 0
        finally:
            if idxctx:
                idxctx.dump_opctx()
            idxctx.logger.debug("stopped processing list of tar balls")

            idx = _count_lines(indexed)
            skp = _count_lines(skipped)
            err = _count_lines(erred)

            idxctx.logger.info("{}.{}: indexed {:d} (skipped {:d}) results,"
                    " {:d} errors", name, idxctx.TS, idx, skp, err)

            if err > 0:
                if skp > 0:
                    subj = "{}.{} - Indexed {:d} results, skipped {:d}" \
                            " results, w/ {:d} errors".format(name, idxctx.TS,
                            idx, skp, err)
                else:
                    subj = "{}.{} - Indexed {:d} results, w/ {:d}" \
                            " errors".format(name, idxctx.TS, idx, err)
            else:
                if skp > 0:
                    subj = "{}.{} - Indexed {:d} results, skipped {:d}" \
                            " results".format(name, idxctx.TS, idx, skp)
                else:
                    subj = "{}.{} - Indexed {:d} results".format(name,
                            idxctx.TS, idx)

            report_fname = os.path.join(tmpdir, "{}.{}.report".format(
                    name, idxctx.TS))
            with open(report_fname, "w") as fp:
                print(subj, file=fp)
                if idx > 0:
                    print("\nIndexed Results\n===============", file=fp)
                    with open(indexed) as ifp:
                        for line in ifp:
                            print(line, file=fp)
                if err > 0:
                    print("\nResults producing errors"
                              "\n========================", file=fp)
                    with open(erred) as efp:
                        for line in efp:
                            print(line, file=fp)
                if skp > 0:
                    print("\nSkipped Results\n===============", file=fp)
                    with open(skipped) as sfp:
                        for line in sfp:
                            print(line, file=fp)
            try:
                report.post_status(tstos(idxctx.time()), "status", report_fname)
            except Exception:
                pass

    return res