Ejemplo n.º 1
0
def main(options):
    if not options.cfg_name:
        print(
            f"{_NAME_}: ERROR: No config file specified; set"
            " _PBENCH_SERVER_CONFIG env variable",
            file=sys.stderr,
        )
        return 1

    try:
        config = PbenchServerConfig(options.cfg_name)
    except BadConfig as e:
        print(f"{_NAME_}: {e}", file=sys.stderr)
        return 2

    logger = get_pbench_logger(_NAME_, config)

    archivepath = config.ARCHIVE

    incoming = config.INCOMING
    incomingpath = config.get_valid_dir_option("INCOMING", incoming, logger)
    if not incomingpath:
        return 3

    results = config.RESULTS
    resultspath = config.get_valid_dir_option("RESULTS", results, logger)
    if not resultspath:
        return 3

    users = config.USERS
    userspath = config.get_valid_dir_option("USERS", users, logger)
    if not userspath:
        return 3

    # Fetch the configured maximum number of days a tar can remain "unpacked"
    # in the INCOMING tree.
    try:
        max_unpacked_age = config.conf.get("pbench-server", "max-unpacked-age")
    except NoOptionError as e:
        logger.error(f"{e}")
        return 5
    try:
        max_unpacked_age = int(max_unpacked_age)
    except Exception:
        logger.error("Bad maximum unpacked age, {}", max_unpacked_age)
        return 6

    # First phase is to find all the tar balls which are beyond the max
    # unpacked age, and which still have an unpacked directory in INCOMING.
    if config._ref_datetime is not None:
        try:
            curr_dt = config._ref_datetime
        except Exception:
            # Ignore bad dates from test environment.
            curr_dt = datetime.utcnow()
    else:
        curr_dt = datetime.utcnow()

    _msg = "Culling unpacked tar balls {} days older than {}"
    if options.dry_run:
        print(
            _msg.format(max_unpacked_age, curr_dt.strftime(_STD_DATETIME_FMT)))
    else:
        logger.debug(_msg, max_unpacked_age,
                     curr_dt.strftime(_STD_DATETIME_FMT))

    actions_taken = []
    errors = 0
    start = pbench.server._time()

    gen = gen_list_unpacked_aged(incomingpath, archivepath, curr_dt,
                                 max_unpacked_age)
    if config._unittests:
        # force the generator and sort the list
        gen = sorted(list(gen))

    for tb_incoming_dir, controller_name in gen:
        act_set = remove_unpacked(
            tb_incoming_dir,
            controller_name,
            resultspath,
            userspath,
            logger,
            options.dry_run,
        )
        unpacked_dir_name = Path(tb_incoming_dir).name
        act_path = Path(controller_name, unpacked_dir_name)
        act_set.set_name(act_path)
        actions_taken.append(act_set)
        if act_set.errors > 0:
            # Stop any further unpacked tar ball removal if an error is
            # encountered.
            break
    end = pbench.server._time()

    # Generate the ${TOP}/public_html prefix so we can strip it from the
    # various targets in the report.
    public_html = os.path.realpath(os.path.join(config.TOP, "public_html"))

    # Write the actions taken into a report file.
    with tempfile.NamedTemporaryFile(mode="w+t",
                                     prefix=f"{_NAME_}.",
                                     suffix=".report",
                                     dir=config.TMP) as tfp:
        duration = end - start
        total = len(actions_taken)
        print(
            f"Culled {total:d} unpacked tar ball directories ({errors:d}"
            f" errors) in {duration:0.2f} secs",
            file=tfp,
        )
        if total > 0:
            print("\nActions Taken:", file=tfp)
        for act_set in sorted(actions_taken, key=lambda a: a.name):
            print(
                f"  - {act_set.name} ({act_set.errors:d} errors,"
                f" {act_set.duration():0.2f} secs)",
                file=tfp,
            )
            for act in act_set.actions:
                assert act.noun.startswith(
                    public_html
                ), f"Logic bomb! {act.noun} not in .../public_html/"
                tgt = Path(act.noun[len(public_html) + 1:])
                if act.verb == "mv":
                    name = tgt.name
                    controller = tgt.parent
                    ex_tgt = controller / f".delete.{name}"
                    print(f"      $ {act.verb} {tgt} {ex_tgt}  # {act.status}",
                          file=tfp)
                else:
                    print(f"      $ {act.verb} {tgt}  # {act.status}",
                          file=tfp)

        # Flush out the report ahead of posting it.
        tfp.flush()
        tfp.seek(0)

        # We need to generate a report that lists all the actions taken.
        report = Report(config, _NAME_)
        report.init_report_template()
        try:
            report.post_status(config.timestamp(),
                               "status" if errors == 0 else "errors", tfp.name)
        except Exception:
            pass
    return errors
Ejemplo n.º 2
0
def main():
    cfg_name = os.environ.get("_PBENCH_SERVER_CONFIG")
    if not cfg_name:
        print(
            f"{_NAME_}: ERROR: No config file specified; set _PBENCH_SERVER_CONFIG env variable or"
            f" use --config <file> on the command line",
            file=sys.stderr,
        )
        return 2

    try:
        config = PbenchServerConfig(cfg_name)
    except BadConfig as e:
        print(f"{_NAME_}: {e}", file=sys.stderr)
        return 1

    logger = get_pbench_logger(_NAME_, config)

    # We're going to need the Postgres DB to track dataset state, so setup
    # DB access.
    init_db(config, logger)

    # add a BACKUP field to the config object
    config.BACKUP = backup = config.conf.get("pbench-server",
                                             "pbench-backup-dir")

    if not backup:
        logger.error(
            "Unspecified backup directory, no pbench-backup-dir config in"
            " pbench-server section")
        return 1

    backuppath = config.get_valid_dir_option("BACKUP", backup, logger)
    if not backuppath:
        return 1

    # instantiate the s3config class
    s3_config_obj = S3Config(config, logger)
    s3_config_obj = sanity_check(s3_config_obj, logger)

    logger.info("start-{}", config.TS)
    start = config.timestamp()

    prog = Path(sys.argv[0]).name

    sts = 0
    # N.B. tmpdir is the pathname of the temp directory.
    with tempfile.TemporaryDirectory() as tmpdir:

        archive_obj = BackupObject("ARCHIVE", config.ARCHIVE, tmpdir, logger)
        local_backup_obj = BackupObject("BACKUP", config.BACKUP, tmpdir,
                                        logger)
        s3_backup_obj = BackupObject("S3", s3_config_obj, tmpdir, logger)

        with tempfile.NamedTemporaryFile(mode="w+t", dir=tmpdir) as reportfp:
            reportfp.write(
                f"{prog}.{config.TS} ({config.PBENCH_ENV}) started at {start}\n"
            )
            if s3_config_obj is None:
                reportfp.write(
                    "\nNOTICE: S3 backup service is inaccessible; skipping"
                    " ARCHIVE to S3 comparison\n\n")

            # FIXME: Parallelize these three ...

            # Create entry list for archive
            logger.debug("Starting archive list creation")
            ar_start = config.timestamp()
            ret_sts = archive_obj.entry_list_creation()
            if ret_sts == Status.FAIL:
                sts += 1
            logger.debug("Finished archive list ({!r})", ret_sts)

            # Create entry list for backup
            logger.debug("Starting local backup list creation")
            lb_start = config.timestamp()
            ret_sts = local_backup_obj.entry_list_creation()
            if ret_sts == Status.FAIL:
                sts += 1
            logger.debug("Finished local backup list ({!r})", ret_sts)

            # Create entry list for S3
            if s3_config_obj is not None:
                logger.debug("Starting S3 list creation")
                s3_start = config.timestamp()
                ret_sts = s3_backup_obj.entry_list_creation()
                if ret_sts == Status.FAIL:
                    sts += 1
                logger.debug("Finished S3 list ({!r})", ret_sts)

            logger.debug("Checking MD5 signatures of archive")
            ar_md5_start = config.timestamp()
            try:
                # Check the data integrity in ARCHIVE (Question 1).
                md5_result_archive = archive_obj.checkmd5()
            except Exception as ex:
                msg = f"Failed to check data integrity of ARCHIVE ({config.ARCHIVE})"
                logger.exception(msg)
                reportfp.write(f"\n{msg} - '{ex}'\n")
                sts += 1
            else:
                if md5_result_archive > 0:
                    # Create a report for failed MD5 results from ARCHIVE (Question 1)
                    archive_obj.report_failed_md5(reportfp)
                    sts += 1
                    logger.debug(
                        "Checking MD5 signature of archive: {} errors",
                        md5_result_archive,
                    )
            logger.debug("Finished checking MD5 signatures of archive")

            logger.debug("Checking MD5 signatures of local backup")
            lb_md5_start = config.timestamp()
            try:
                # Check the data integrity in BACKUP (Question 2).
                md5_result_backup = local_backup_obj.checkmd5()
            except Exception as ex:
                msg = f"Failed to check data integrity of BACKUP ({config.BACKUP})"
                logger.exception(msg)
                reportfp.write(f"\n{msg} - '{ex}'\n")
            else:
                if md5_result_backup > 0:
                    # Create a report for failed MD5 results from BACKUP (Question 2)
                    local_backup_obj.report_failed_md5(reportfp)
                    sts += 1
                    logger.debug(
                        "Checking MD5 signature of local backup: {} errors",
                        md5_result_backup,
                    )
            logger.debug("Finished checking MD5 signatures of local backup")

            # Compare ARCHIVE with BACKUP (Questions 3 and 3a).
            msg = "Comparing ARCHIVE with BACKUP"
            reportfp.write(f"\n{msg}\n{'-' * len(msg)}\n")
            logger.debug("{}: start", msg)
            compare_entry_lists(archive_obj, local_backup_obj, reportfp,
                                logger)
            logger.debug("{}: end", msg)

            if s3_config_obj is not None:
                # Compare ARCHIVE with S3 (Questions 4, 4a, and 4b).
                msg = "Comparing ARCHIVE with S3"
                reportfp.write(f"\n{msg}\n{'-' * len(msg)}\n")
                logger.debug("{}: start", msg)
                compare_entry_lists(archive_obj, s3_backup_obj, reportfp,
                                    logger)
                logger.debug("{}: end", msg)

            if s3_config_obj is None:
                s3_start = "<skipped>"
            reportfp.write(f"\n\nPhases (started):\n"
                           f"Archive List Creation:       {ar_start}\n"
                           f"Local Backup List Creation:  {lb_start}\n"
                           f"S3 List Creation:            {s3_start}\n"
                           f"Archive MD5 Checks:          {ar_md5_start}\n"
                           f"Local Backup MD5 Checks:     {lb_md5_start}\n")

            end = config.timestamp()
            reportfp.write(
                f"\n{prog}.{config.TS} ({config.PBENCH_ENV}) finished at {end}\n"
            )

            # Rewind to the beginning.
            reportfp.seek(0)

            logger.debug("Sending report: start")
            report = Report(config, _NAME_)
            report.init_report_template()
            try:
                report.post_status(config.timestamp(), "status", reportfp.name)
            except Exception:
                pass
            logger.debug("Sending report: end")

    logger.info("end-{}", config.TS)

    return sts