Esempio n. 1
0
    def __init__(
        self,
        config: PbenchServerConfig,
        logger: Logger,
        schema: Schema,
        *,
        role: API_OPERATION = API_OPERATION.READ,
    ):
        """
        Base class constructor.

        Args:
            config: server configuration
            logger: logger object
            schema: API schema: for example,
                    Schema(
                        Parameter("user", ParamType.USER, required=True),
                        Parameter("start", ParamType.DATE)
                    )
            role: specify the API role, defaulting to READ

        NOTE: each class currently only supports one HTTP method, so we can
        describe only one set of parameters. If we ever need to change this,
        we can add a level and describe distinct parameters for each method.
        """
        super().__init__()
        self.logger = logger
        self.prefix = config.get("Indexing", "index_prefix")
        host = config.get("elasticsearch", "host")
        port = config.get("elasticsearch", "port")
        self.es_url = f"http://{host}:{port}"
        self.schema = schema
        self.role = role
def main(cfg_name):
    if not cfg_name:
        print(
            f"{_NAME_}: ERROR: No config file specified; set"
            " _PBENCH_SERVER_CONFIG env variable or use --config <file> on the"
            " command line",
            file=sys.stderr,
        )
        return 2

    try:
        config = PbenchServerConfig(cfg_name)
    except BadConfig as e:
        print(f"{_NAME_}: {e} (config file {cfg_name})", file=sys.stderr)
        return 1

    logger = get_pbench_logger(_NAME_, config)

    qdir, receive_dir = fetch_config_val(config, logger)

    if qdir is None and receive_dir is None:
        return 2

    qdir_md5 = qdirs_check("quarantine", Path(qdir, "md5-002"), logger)
    duplicates = qdirs_check("duplicates", Path(qdir, "duplicates-002"),
                             logger)

    # The following directory holds tarballs that are quarantined because
    # of operational errors on the server. They should be retried after
    # the problem is fixed: basically, move them back into the reception
    # area for 002 agents and wait.
    errors = qdirs_check("errors", Path(qdir, "errors-002"), logger)

    if qdir_md5 is None or duplicates is None or errors is None:
        return 1

    counts = process_tb(config, logger, receive_dir, qdir_md5, duplicates,
                        errors)

    result_string = (f"{config.TS}: Processed {counts.ntotal} entries,"
                     f" {counts.ntbs} tarballs successful,"
                     f" {counts.nquarantined} quarantined tarballs,"
                     f" {counts.ndups} duplicately-named tarballs,"
                     f" {counts.nerrs} errors.")

    logger.info(result_string)

    # prepare and send report
    with tempfile.NamedTemporaryFile(mode="w+t", dir=config.TMP) as reportfp:
        reportfp.write(f"{counts.nstatus}{result_string}\n")
        reportfp.seek(0)

        report = Report(config, _NAME_)
        report.init_report_template()
        try:
            report.post_status(config.timestamp(), "status", reportfp.name)
        except Exception as exc:
            logger.warning("Report post Unsuccesful: '{}'", exc)

    return 0
Esempio n. 3
0
def test_pbench_logger_level():

    config = PbenchServerConfig(cfg_name)
    logger = get_pbench_logger(_NAME_, config)

    logging_level = config.get("logging", "logging_level")

    logger = mock_the_handler(logger, logging_level, log_files[logging_level])

    logger.debug(log_msgs["10"])
    logger.info(log_msgs["20"])
    logger.warning(log_msgs["30"])
    logger.error(log_msgs["40"])
    logger.critical(log_msgs["50"])
Esempio n. 4
0
def test_pbench_logger():

    config = PbenchServerConfig(cfg_name)
    logger = get_pbench_logger(_NAME_, config)

    logger_type = config.get("logging", "logger_type")

    logger = mock_the_handler(logger, logger_type, log_files[logger_type])
    logger.debug(log_msgs[logger_type])

    if os.path.isfile(os.path.join(logdir, log_files[logger_type])):
        with open(os.path.join(logdir, log_files[logger_type]), "r") as f:
            assert (
                f.read()[:-1] == log_msgs[logger_type]
            ), "Mismatch: the file did not contain the expected message."
Esempio n. 5
0
def create_app():
    """Create Flask app with defined resource endpoints."""

    global app

    cfg_name = os.environ.get("_PBENCH_SERVER_CONFIG")
    if not cfg_name:
        print(
            f"{__name__}: ERROR: No config file specified; set"
            " _PBENCH_SERVER_CONFIG",
            file=sys.stderr,
        )
        sys.exit(1)

    try:
        config = PbenchServerConfig(cfg_name)
    except BadConfig as e:
        print(f"{__name__}: {e} (config file {cfg_name})", file=sys.stderr)
        sys.exit(1)

    app = Flask(__name__)
    api = Api(app)

    app.logger = get_pbench_logger(__name__, config)
    app.config_server = config.conf["pbench-server"]
    app.config_elasticsearch = config.conf["elasticsearch"]
    app.config_graphql = config.conf["graphql"]

    prdp = app.config_server.get("pbench-receive-dir-prefix")
    if not prdp:
        app.logger.error(
            "Missing config variable for pbench-receive-dir-prefix")
        sys.exit(1)
    try:
        upload_directory = Path(f"{prdp}-002").resolve(strict=True)
    except FileNotFoundError:
        app.logger.exception(
            "pbench-receive-dir-prefix does not exist on the host")
        sys.exit(1)
    except Exception:
        app.logger.exception(
            "Exception occurred during setting up the upload directory on the host"
        )
        sys.exit(1)
    else:
        app.upload_directory = upload_directory

    app.config["PORT"] = app.config_server.get("rest_port")
    app.config["VERSION"] = app.config_server.get("rest_version")
    app.config["MAX_CONTENT_LENGTH"] = filesize_bytes(
        app.config_server.get("rest_max_content_length"))
    app.config["REST_URI"] = app.config_server.get("rest_uri")
    app.config["LOG"] = app.config_server.get("rest_log")
    app.config["BIND_HOST"] = app.config_server.get("bind_host")
    app.config["WORKERS"] = app.config_server.get("workers")

    register_endpoints(api, app)

    return app
Esempio n. 6
0
    def __init__(self, config: PbenchServerConfig, logger: Logger):
        """
        __init__ Construct the API resource

        Args:
            :config: server config values
            :logger: message logging

        Report the server configuration to a web client. By default, the Pbench
        server ansible script sets up a local Apache reverse proxy routing
        through the HTTP port (80); an external reverse-proxy can be configured
        without the knowledge of the server, and this API will use reverse-proxy
        Forwarded or X-Forwarded-Host HTTP headers to discover the proxy
        configuration. All server endpoints will be reported with respect to that
        address.
        """
        self.logger = logger
        self.host = config.get("pbench-server", "host")
        self.uri_prefix = config.rest_uri
        self.prefix = config.get("Indexing", "index_prefix")
        self.commit_id = config.COMMIT_ID
Esempio n. 7
0
def get_server_config():
    cfg_name = os.environ.get("_PBENCH_SERVER_CONFIG")
    if not cfg_name:
        raise ConfigFileNotSpecified(
            f"{__name__}: ERROR: No config file specified; set"
            " _PBENCH_SERVER_CONFIG")

    try:
        return PbenchServerConfig(cfg_name)
    except BadConfig as e:
        raise Exception(
            f"{__name__}: {e} (config file {cfg_name})").with_traceback(
                e.__traceback__)
Esempio n. 8
0
    def __init__(self, config: PbenchServerConfig, logger: Logger, schema: Schema):
        """
        __init__ Construct the base class

        Args:
            config: server configuration
            logger: logger object
            schema: API schema: for example,
                    Schema(
                        Parameter("user", ParamType.USER, required=True),
                        Parameter("start", ParamType.DATE)
                    )

        NOTE: each class currently only supports one HTTP method, so we can
        describe only one set of parameters. If we ever need to change this,
        we can add a level and describe parameters by method.
        """
        super().__init__()
        self.logger = logger
        self.prefix = config.get("Indexing", "index_prefix")
        host = config.get("elasticsearch", "host")
        port = config.get("elasticsearch", "port")
        self.es_url = f"http://{host}:{port}"
        self.schema = schema
Esempio n. 9
0
def main(options):
    if not options.cfg_name:
        print(
            f"{_NAME_}: ERROR: No config file specified; set"
            " _PBENCH_SERVER_CONFIG env variable",
            file=sys.stderr,
        )
        return 1

    try:
        config = PbenchServerConfig(options.cfg_name)
    except BadConfig as e:
        print(f"{_NAME_}: {e}", file=sys.stderr)
        return 2

    logger = get_pbench_logger(_NAME_, config)

    # We're going to need the Postgres DB to track dataset state, so setup
    # DB access.
    init_db(config, logger)

    # NOTE: the importer will ignore datasets that already exist in the DB;
    # we do the ACTION links first to get set up, and then sweep other links
    # to record the state of remaining datasets, especially to record those
    # which are quarantined.
    #
    # FIXME: This doesn't sweep the "<root>/quarantine" directory, which might
    # have additional datasets. Are they worth importing?
    actions = {
        "TO-UNPACK": States.UPLOADED,
        "TO-INDEX": States.UNPACKED,
        "INDEXED": States.INDEXED,
        "UNPACKED": States.UNPACKED,
        "WONT-UNPACK": States.QUARANTINED,
        "WONT-INDEX*": States.QUARANTINED,
        "BAD-MD5": States.QUARANTINED,
    }

    importer = Import(logger, options, config)

    return_value = 0
    for link, state in actions.items():
        status = importer.process(link, state)
        if status != 0:
            return_value = 1
    return return_value
Esempio n. 10
0
def create_app():
    global app

    cfg_name = os.environ.get("_PBENCH_SERVER_CONFIG")
    if not cfg_name:
        print(
            f"{__name__}: ERROR: No config file specified; set"
            " _PBENCH_SERVER_CONFIG",
            file=sys.stderr,
        )
        sys.exit(1)

    try:
        config = PbenchServerConfig(cfg_name)
    except BadConfig as e:
        print(f"{__name__}: {e} (config file {cfg_name})", file=sys.stderr)
        sys.exit(1)

    app = Flask(__name__)
    api = Api(app)

    app.logger = get_pbench_logger(__name__, config)

    app.config_server = config.conf["pbench-server"]

    prdp = app.config_server.get("pbench-receive-dir-prefix")
    try:
        upload_directory = Path(f"{prdp}-002").resolve(strict=True)
    except Exception:
        app.logger.error(
            "Missing config variable for pbench-receive-dir-prefix")
        sys.exit(1)
    else:
        app.upload_directory = upload_directory

    app.config["PORT"] = app.config_server.get("rest_port")
    app.config["VERSION"] = app.config_server.get("rest_version")
    app.config["MAX_CONTENT_LENGTH"] = app.config_server.get(
        "rest_max_content_length")
    app.config["REST_URI"] = app.config_server.get("rest_uri")
    app.config["LOG"] = app.config_server.get("rest_log")

    register_endpoints(api, app)

    return app
Esempio n. 11
0
    def execute(self):
        config = PbenchServerConfig(self.context.config)

        logger = get_pbench_logger(_NAME_, config)

        # We're going to need the Postgres DB to track dataset state, so setup
        # DB access.
        Database.init_db(config, logger)

        user = User(
            username=self.context.username,
            password=self.context.password,
            first_name=self.context.first_name,
            last_name=self.context.last_name,
            email=self.context.email,
            role=self.context.role if self.context.role else "",
        )

        user.add()
        if user.is_admin():
            click.echo(f"Admin user {self.context.username} registered")
        else:
            click.echo(f"User {self.context.username} registered")
Esempio n. 12
0
    help="The caller's user ID (optional)",
)
parser.add_argument(
    "-T",
    "--type",
    dest="doctype",
    required=True,
    help="The type of report document to index, one of status|error",
)
parser.add_argument(
    "file_to_index", nargs=1, help="The file containing the report to index"
)
parsed = parser.parse_args()

try:
    config = PbenchServerConfig(parsed.cfg_name)
except BadConfig as e:
    print(f"{_prog}: {e}", file=sys.stderr)
    sys.exit(1)

# We're going to need the Postgres DB to track dataset state, so setup
# DB access. We don't pass a Logger here, because that introduces lots
# of spurious changes in the gold CLI test output.
init_db(config, None)

hostname = gethostname()
pid = parsed.pid
group_id = parsed.group_id
user_id = parsed.user_id

report = Report(
Esempio n. 13
0
    # environment variable set.  Since we really need access to the config
    # file to operate, and we know the relative location of that config file,
    # we check to see if that exists before declaring a problem.
    config_path = Path(_dir.parent, "lib", "config", "pbench-server.cfg")
    if not config_path.exists():
        print(
            f"{_prog}: No config file specified: set _PBENCH_SERVER_CONFIG env variable or use"
            f" --config <file> on the command line",
            file=sys.stderr,
        )
        sys.exit(1)
else:
    config_path = Path(parsed.cfg_name)

try:
    config = PbenchServerConfig(config_path)
except BadConfig as e:
    print(f"{_prog}: {e} (config file {config_path})", file=sys.stderr)
    sys.exit(1)

# Exclude the "files" and "conf" attributes from being exported
vars = sorted([
    key for key in config.__dict__.keys()
    if key not in ("files", "conf", "timestamp", "_unittests", "_ref_datetime",
                   "get")
])
for att in vars:
    try:
        os.environ[att] = str(getattr(config, att))
    except AttributeError:
        print(
Esempio n. 14
0
def main(cfg_name):
    if not cfg_name:
        print(
            f"{_NAME_}: ERROR: No config file specified; set"
            " _PBENCH_SERVER_CONFIG env variable or use --config <file> on the"
            " command line",
            file=sys.stderr,
        )
        return 2

    try:
        config = PbenchServerConfig(cfg_name)
    except BadConfig as e:
        print(f"{_NAME_}: {e}", file=sys.stderr)
        return 1

    logger = get_pbench_logger(_NAME_, config)

    # We're going to need the Postgres DB to track dataset state, so setup
    # DB access.
    init_db(config, logger)

    # Add a BACKUP and QDIR field to the config object
    config.BACKUP = config.conf.get("pbench-server", "pbench-backup-dir")
    config.QDIR = config.get("pbench-server", "pbench-quarantine-dir")

    # call the LocalBackupObject class
    lb_obj = LocalBackupObject(config)

    # call the S3Config class
    s3_obj = S3Config(config, logger)

    lb_obj, s3_obj = sanity_check(lb_obj, s3_obj, config, logger)

    if lb_obj is None and s3_obj is None:
        return 3

    logger.info("start-{}", config.TS)

    # Initiate the backup
    counts = backup_data(lb_obj, s3_obj, config, logger)

    result_string = (f"Total processed: {counts.ntotal},"
                     f" Local backup successes: {counts.nbackup_success},"
                     f" Local backup failures: {counts.nbackup_fail},"
                     f" S3 upload successes: {counts.ns3_success},"
                     f" S3 upload failures: {counts.ns3_fail},"
                     f" Quarantined: {counts.nquaran}")

    logger.info(result_string)

    prog = Path(sys.argv[0]).name

    # prepare and send report
    with tempfile.NamedTemporaryFile(mode="w+t", dir=config.TMP) as reportfp:
        reportfp.write(
            f"{prog}.{config.timestamp()}({config.PBENCH_ENV})\n{result_string}\n"
        )
        reportfp.seek(0)

        report = Report(config, _NAME_)
        report.init_report_template()
        try:
            report.post_status(config.timestamp(), "status", reportfp.name)
        except Exception:
            pass

    logger.info("end-{}", config.TS)

    return 0
Esempio n. 15
0
    # file to operate, and we know the relative location of that config file,
    # we check to see if that exists before declaring a problem.
    config_name = os.path.join(os.path.dirname(_dir), "lib", "config",
                               "pbench-server.cfg")
    if not os.path.exists(config_name):
        print(
            f"{_prog}: No config file specified: set _PBENCH_SERVER_CONFIG env variable or use"
            f" --config <file> on the command line",
            file=sys.stderr,
        )
        sys.exit(1)
else:
    config_name = parsed.cfg_name

try:
    config = PbenchServerConfig(config_name)
except BadConfig as e:
    print(f"{_prog}: {e} (config file {config_name})", file=sys.stderr)
    sys.exit(1)

# Exclude the "files" and "conf" attributes from being exported
vars = sorted([
    key for key in config.__dict__.keys()
    if key not in ("files", "conf", "timestamp", "_unittests", "_ref_datetime",
                   "get")
])
for att in vars:
    try:
        os.environ[att] = str(getattr(config, att))
    except AttributeError:
        print(
Esempio n. 16
0
def main(options):
    if not options.cfg_name:
        print(
            f"{_NAME_}: ERROR: No config file specified; set"
            " _PBENCH_SERVER_CONFIG env variable",
            file=sys.stderr,
        )
        return 1

    try:
        config = PbenchServerConfig(options.cfg_name)
    except BadConfig as e:
        print(f"{_NAME_}: {e}", file=sys.stderr)
        return 2

    try:
        archive_p = Path(config.ARCHIVE).resolve(strict=True)
    except FileNotFoundError:
        print(
            f"The configured ARCHIVE directory, {config.ARCHIVE}, does not exist",
            file=sys.stderr,
        )
        return 3

    if not archive_p.is_dir():
        print(
            f"The configured ARCHIVE directory, {config.ARCHIVE}, is not a valid directory",
            file=sys.stderr,
        )
        return 4

    try:
        incoming_p = Path(config.INCOMING).resolve(strict=True)
    except FileNotFoundError:
        print(
            f"The configured INCOMING directory, {config.INCOMING}, does not exist",
            file=sys.stderr,
        )
        return 5

    if not incoming_p.is_dir():
        print(
            f"The configured INCOMING directory, {config.INCOMING}, is not a valid directory",
            file=sys.stderr,
        )
        return 6

    _fmt = "%Y-%m-%d"
    try:
        oldest_dt = datetime.strptime(options.oldest, _fmt)
        newest_dt = datetime.strptime(options.newest, _fmt)
    except Exception as exc:
        print(
            f"Invalid time range, {options.oldest} to {options.newest}, "
            f"'{exc}', expected time range values in the form YYYY-MM-DD",
            file=sys.stderr,
        )
        return 7
    else:
        if newest_dt < oldest_dt:
            # For convenience, swap oldest and newest dates that are reversed.
            oldest_dt, newest_dt = newest_dt, oldest_dt

    print(f"Re-indexing tar balls in the range {oldest_dt} to {newest_dt}")

    actions = []
    start = pbench.server._time()
    for _val in gen_reindex_list(archive_p, oldest_dt, newest_dt):
        controller_name, tb_name = _val
        act_set = reindex(
            controller_name, tb_name, archive_p, incoming_p, options.dry_run
        )
        actions.append(act_set)
    end = pbench.server._time()

    for act_set in sorted(actions):
        print(f"{act_set!r}")

    print(f"Run-time: {start} {end} {end - start}")
    return 0
Esempio n. 17
0
def main(options):
    try:
        if not options.cfg_name:
            print(
                f"{_NAME_}: ERROR: No config file specified; set"
                " _PBENCH_SERVER_CONFIG env variable",
                file=sys.stderr,
            )
            return 1

        try:
            config = PbenchServerConfig(options.cfg_name)
        except BadConfig as e:
            print(f"{_NAME_}: {e}", file=sys.stderr)
            return 2

        logger = get_pbench_logger(_NAME_, config)

        # We're going to need the Postgres DB to track dataset state, so setup
        # DB access.
        init_db(config, logger)

        args = {}
        if options.create:
            args["owner"] = options.create
        if options.controller:
            args["controller"] = options.controller
        if options.path:
            args["path"] = options.path
        if options.name:
            args["name"] = options.name
        if options.md5:
            args["md5"] = options.md5
        if options.state:
            try:
                new_state = States[options.state.upper()]
            except KeyError:
                print(
                    f"{_NAME_}: Specified string '{options.state}' is not a Pbench dataset state",
                    file=sys.stderr,
                )
                return 1
            args["state"] = new_state

        if "path" not in args and ("controller" not in args or "name" not in args):
            print(
                f"{_NAME_}: Either --path or both --controller and --name must be specified",
                file=sys.stderr,
            )
            return 1

        # Either create a new dataset or attach to an existing dataset
        doit = Dataset.create if options.create else Dataset.attach

        # Find or create the specified dataset.
        doit(**args)
    except Exception as e:
        # Stringify any exception and report it; then fail
        logger.exception("Failed")
        print(f"{_NAME_}: {e}", file=sys.stderr)
        return 1
    else:
        return 0
Esempio n. 18
0
def main(options):
    if not options.cfg_name:
        print(
            f"{_NAME_}: ERROR: No config file specified; set"
            " _PBENCH_SERVER_CONFIG env variable",
            file=sys.stderr,
        )
        return 1

    try:
        config = PbenchServerConfig(options.cfg_name)
    except BadConfig as e:
        print(f"{_NAME_}: {e}", file=sys.stderr)
        return 2

    logger = get_pbench_logger(_NAME_, config)

    archivepath = config.ARCHIVE

    incoming = config.INCOMING
    incomingpath = config.get_valid_dir_option("INCOMING", incoming, logger)
    if not incomingpath:
        return 3

    results = config.RESULTS
    resultspath = config.get_valid_dir_option("RESULTS", results, logger)
    if not resultspath:
        return 3

    users = config.USERS
    userspath = config.get_valid_dir_option("USERS", users, logger)
    if not userspath:
        return 3

    # Fetch the configured maximum number of days a tar can remain "unpacked"
    # in the INCOMING tree.
    try:
        max_unpacked_age = config.conf.get("pbench-server", "max-unpacked-age")
    except NoOptionError as e:
        logger.error(f"{e}")
        return 5
    try:
        max_unpacked_age = int(max_unpacked_age)
    except Exception:
        logger.error("Bad maximum unpacked age, {}", max_unpacked_age)
        return 6

    # First phase is to find all the tar balls which are beyond the max
    # unpacked age, and which still have an unpacked directory in INCOMING.
    if config._ref_datetime is not None:
        try:
            curr_dt = config._ref_datetime
        except Exception:
            # Ignore bad dates from test environment.
            curr_dt = datetime.utcnow()
    else:
        curr_dt = datetime.utcnow()

    _msg = "Culling unpacked tar balls {} days older than {}"
    if options.dry_run:
        print(
            _msg.format(max_unpacked_age, curr_dt.strftime(_STD_DATETIME_FMT)))
    else:
        logger.debug(_msg, max_unpacked_age,
                     curr_dt.strftime(_STD_DATETIME_FMT))

    actions_taken = []
    errors = 0
    start = pbench.server._time()

    gen = gen_list_unpacked_aged(incomingpath, archivepath, curr_dt,
                                 max_unpacked_age)
    if config._unittests:
        # force the generator and sort the list
        gen = sorted(list(gen))

    for tb_incoming_dir, controller_name in gen:
        act_set = remove_unpacked(
            tb_incoming_dir,
            controller_name,
            resultspath,
            userspath,
            logger,
            options.dry_run,
        )
        unpacked_dir_name = Path(tb_incoming_dir).name
        act_path = Path(controller_name, unpacked_dir_name)
        act_set.set_name(act_path)
        actions_taken.append(act_set)
        if act_set.errors > 0:
            # Stop any further unpacked tar ball removal if an error is
            # encountered.
            break
    end = pbench.server._time()

    # Generate the ${TOP}/public_html prefix so we can strip it from the
    # various targets in the report.
    public_html = os.path.realpath(os.path.join(config.TOP, "public_html"))

    # Write the actions taken into a report file.
    with tempfile.NamedTemporaryFile(mode="w+t",
                                     prefix=f"{_NAME_}.",
                                     suffix=".report",
                                     dir=config.TMP) as tfp:
        duration = end - start
        total = len(actions_taken)
        print(
            f"Culled {total:d} unpacked tar ball directories ({errors:d}"
            f" errors) in {duration:0.2f} secs",
            file=tfp,
        )
        if total > 0:
            print("\nActions Taken:", file=tfp)
        for act_set in sorted(actions_taken, key=lambda a: a.name):
            print(
                f"  - {act_set.name} ({act_set.errors:d} errors,"
                f" {act_set.duration():0.2f} secs)",
                file=tfp,
            )
            for act in act_set.actions:
                assert act.noun.startswith(
                    public_html
                ), f"Logic bomb! {act.noun} not in .../public_html/"
                tgt = Path(act.noun[len(public_html) + 1:])
                if act.verb == "mv":
                    name = tgt.name
                    controller = tgt.parent
                    ex_tgt = controller / f".delete.{name}"
                    print(f"      $ {act.verb} {tgt} {ex_tgt}  # {act.status}",
                          file=tfp)
                else:
                    print(f"      $ {act.verb} {tgt}  # {act.status}",
                          file=tfp)

        # Flush out the report ahead of posting it.
        tfp.flush()
        tfp.seek(0)

        # We need to generate a report that lists all the actions taken.
        report = Report(config, _NAME_)
        report.init_report_template()
        try:
            report.post_status(config.timestamp(),
                               "status" if errors == 0 else "errors", tfp.name)
        except Exception:
            pass
    return errors
Esempio n. 19
0
def main(options, name):
    """Main entry point to pbench-index.

       The caller is required to pass the "options" argument with the following
       expected attributes:
           cfg_name              - Name of the configuration file to use
           dump_index_patterns   - Don't do any indexing, but just emit the
                                   list of index patterns that would be used
           dump_templates        - Dump the templates that would be used
           index_tool_data       - Index tool data only
           re_index              - Consider tar balls marked for re-indexing
       All exceptions are caught and logged to syslog with the stacktrace of
       the exception in a sub-object of the logged JSON document.

        Signal Handlers used to establish different patterns for the three
        behaviors:

        1. Gracefully stop processing tar balls
            - SIGQUIT
            - The current tar ball is indexed until completion, but no other
              tar balls are processed.
            - Handler Behavior:
                - Sets a flag that causes the code flow to break out of the
                  for loop.
                - Does not raise an exception.

        2. Interrupt the current tar ball being indexed, and proceed to the
           next one, if any
            - SIGINT
            - Handler Behavior:
                - try/except/finally placed immediately around the es_index()
                  call so that the signal handler will only be established for
                  the duration of the call.
                - Raises an exception caught by above try/except/finally.
                - The finally clause would take down the signal handler.

        3. Stop processing tar balls immediately and exit gracefully
            - SIGTERM
            - Handler Behavior:
                - Raises an exception caught be a new, outer-most, try/except
                  block that does not have a finally clause (as you don't want
                  any code execution in the finally block).

        4. Re-evaluate the list of tar balls to index after indexing of the
            current tar ball has finished
            - SIGHUP
            - Report the current state of indexing
                - Current tar ball being processed
                - Count of Remaining tarballs
                - Count of Completed tarballs
                - No. of Errors encountered
            - Handler Behavior:
                - No exception raised
    """

    _name_suf = "-tool-data" if options.index_tool_data else ""
    _name_re = "-re" if options.re_index else ""
    name = f"{name}{_name_re}{_name_suf}"
    error_code = Index.error_code

    if not options.cfg_name:
        print(
            f"{name}: ERROR: No config file specified; set"
            " _PBENCH_SERVER_CONFIG env variable or"
            " use --config <file> on the command line",
            file=sys.stderr,
        )
        return error_code["CFG_ERROR"].value

    idxctx = None
    try:
        # We're going to need the Postgres DB to track dataset state, so setup
        # DB access. We need to do this before we create the IdxContext, since
        # that will need the template DB; so we create a PbenchServerConfig and
        # Logger independently.
        config = PbenchServerConfig(options.cfg_name)
        logger = get_pbench_logger(name, config)
        init_db(config, logger)

        # Now we can initialize the index context
        idxctx = IdxContext(options, name, config, logger, _dbg=_DEBUG)
    except (ConfigFileError, ConfigParserError) as e:
        print(f"{name}: {e}", file=sys.stderr)
        return error_code["CFG_ERROR"].value
    except BadConfig as e:
        print(f"{name}: {e}", file=sys.stderr)
        return error_code["BAD_CFG"].value
    except JsonFileError as e:
        print(f"{name}: {e}", file=sys.stderr)
        return error_code["MAPPING_ERROR"].value

    if options.dump_index_patterns:
        idxctx.templates.dump_idx_patterns()
        return 0

    if options.dump_templates:
        idxctx.templates.dump_templates()
        return 0

    res = error_code["OK"]

    ARCHIVE_rp = idxctx.config.ARCHIVE

    INCOMING_rp = idxctx.config.INCOMING
    INCOMING_path = idxctx.config.get_valid_dir_option("INCOMING", INCOMING_rp,
                                                       idxctx.logger)
    if not INCOMING_path:
        res = error_code["BAD_CFG"]

    qdir = idxctx.config.get_conf("QUARANTINE", "pbench-server",
                                  "pbench-quarantine-dir", idxctx.logger)
    if not qdir:
        res = error_code["BAD_CFG"]
    else:
        qdir_path = idxctx.config.get_valid_dir_option("QDIR", Path(qdir),
                                                       idxctx.logger)
        if not qdir_path:
            res = error_code["BAD_CFG"]

    if not res.success:
        # Exit early if we encounter any errors.
        return res.value

    idxctx.logger.debug("{}.{}: starting", name, idxctx.TS)

    index_obj = Index(name, options, idxctx, INCOMING_rp, ARCHIVE_rp, qdir)

    status, tarballs = index_obj.collect_tb()
    if status == 0 and tarballs:
        status = index_obj.process_tb(tarballs)

    return status
def main():
    cfg_name = os.environ.get("_PBENCH_SERVER_CONFIG")
    if not cfg_name:
        print(
            f"{_NAME_}: ERROR: No config file specified; set _PBENCH_SERVER_CONFIG env variable or"
            f" use --config <file> on the command line",
            file=sys.stderr,
        )
        return 2

    try:
        config = PbenchServerConfig(cfg_name)
    except BadConfig as e:
        print(f"{_NAME_}: {e}", file=sys.stderr)
        return 1

    logger = get_pbench_logger(_NAME_, config)

    # We're going to need the Postgres DB to track dataset state, so setup
    # DB access.
    init_db(config, logger)

    # add a BACKUP field to the config object
    config.BACKUP = backup = config.conf.get("pbench-server",
                                             "pbench-backup-dir")

    if not backup:
        logger.error(
            "Unspecified backup directory, no pbench-backup-dir config in"
            " pbench-server section")
        return 1

    backuppath = config.get_valid_dir_option("BACKUP", backup, logger)
    if not backuppath:
        return 1

    # instantiate the s3config class
    s3_config_obj = S3Config(config, logger)
    s3_config_obj = sanity_check(s3_config_obj, logger)

    logger.info("start-{}", config.TS)
    start = config.timestamp()

    prog = Path(sys.argv[0]).name

    sts = 0
    # N.B. tmpdir is the pathname of the temp directory.
    with tempfile.TemporaryDirectory() as tmpdir:

        archive_obj = BackupObject("ARCHIVE", config.ARCHIVE, tmpdir, logger)
        local_backup_obj = BackupObject("BACKUP", config.BACKUP, tmpdir,
                                        logger)
        s3_backup_obj = BackupObject("S3", s3_config_obj, tmpdir, logger)

        with tempfile.NamedTemporaryFile(mode="w+t", dir=tmpdir) as reportfp:
            reportfp.write(
                f"{prog}.{config.TS} ({config.PBENCH_ENV}) started at {start}\n"
            )
            if s3_config_obj is None:
                reportfp.write(
                    "\nNOTICE: S3 backup service is inaccessible; skipping"
                    " ARCHIVE to S3 comparison\n\n")

            # FIXME: Parallelize these three ...

            # Create entry list for archive
            logger.debug("Starting archive list creation")
            ar_start = config.timestamp()
            ret_sts = archive_obj.entry_list_creation()
            if ret_sts == Status.FAIL:
                sts += 1
            logger.debug("Finished archive list ({!r})", ret_sts)

            # Create entry list for backup
            logger.debug("Starting local backup list creation")
            lb_start = config.timestamp()
            ret_sts = local_backup_obj.entry_list_creation()
            if ret_sts == Status.FAIL:
                sts += 1
            logger.debug("Finished local backup list ({!r})", ret_sts)

            # Create entry list for S3
            if s3_config_obj is not None:
                logger.debug("Starting S3 list creation")
                s3_start = config.timestamp()
                ret_sts = s3_backup_obj.entry_list_creation()
                if ret_sts == Status.FAIL:
                    sts += 1
                logger.debug("Finished S3 list ({!r})", ret_sts)

            logger.debug("Checking MD5 signatures of archive")
            ar_md5_start = config.timestamp()
            try:
                # Check the data integrity in ARCHIVE (Question 1).
                md5_result_archive = archive_obj.checkmd5()
            except Exception as ex:
                msg = f"Failed to check data integrity of ARCHIVE ({config.ARCHIVE})"
                logger.exception(msg)
                reportfp.write(f"\n{msg} - '{ex}'\n")
                sts += 1
            else:
                if md5_result_archive > 0:
                    # Create a report for failed MD5 results from ARCHIVE (Question 1)
                    archive_obj.report_failed_md5(reportfp)
                    sts += 1
                    logger.debug(
                        "Checking MD5 signature of archive: {} errors",
                        md5_result_archive,
                    )
            logger.debug("Finished checking MD5 signatures of archive")

            logger.debug("Checking MD5 signatures of local backup")
            lb_md5_start = config.timestamp()
            try:
                # Check the data integrity in BACKUP (Question 2).
                md5_result_backup = local_backup_obj.checkmd5()
            except Exception as ex:
                msg = f"Failed to check data integrity of BACKUP ({config.BACKUP})"
                logger.exception(msg)
                reportfp.write(f"\n{msg} - '{ex}'\n")
            else:
                if md5_result_backup > 0:
                    # Create a report for failed MD5 results from BACKUP (Question 2)
                    local_backup_obj.report_failed_md5(reportfp)
                    sts += 1
                    logger.debug(
                        "Checking MD5 signature of local backup: {} errors",
                        md5_result_backup,
                    )
            logger.debug("Finished checking MD5 signatures of local backup")

            # Compare ARCHIVE with BACKUP (Questions 3 and 3a).
            msg = "Comparing ARCHIVE with BACKUP"
            reportfp.write(f"\n{msg}\n{'-' * len(msg)}\n")
            logger.debug("{}: start", msg)
            compare_entry_lists(archive_obj, local_backup_obj, reportfp,
                                logger)
            logger.debug("{}: end", msg)

            if s3_config_obj is not None:
                # Compare ARCHIVE with S3 (Questions 4, 4a, and 4b).
                msg = "Comparing ARCHIVE with S3"
                reportfp.write(f"\n{msg}\n{'-' * len(msg)}\n")
                logger.debug("{}: start", msg)
                compare_entry_lists(archive_obj, s3_backup_obj, reportfp,
                                    logger)
                logger.debug("{}: end", msg)

            if s3_config_obj is None:
                s3_start = "<skipped>"
            reportfp.write(f"\n\nPhases (started):\n"
                           f"Archive List Creation:       {ar_start}\n"
                           f"Local Backup List Creation:  {lb_start}\n"
                           f"S3 List Creation:            {s3_start}\n"
                           f"Archive MD5 Checks:          {ar_md5_start}\n"
                           f"Local Backup MD5 Checks:     {lb_md5_start}\n")

            end = config.timestamp()
            reportfp.write(
                f"\n{prog}.{config.TS} ({config.PBENCH_ENV}) finished at {end}\n"
            )

            # Rewind to the beginning.
            reportfp.seek(0)

            logger.debug("Sending report: start")
            report = Report(config, _NAME_)
            report.init_report_template()
            try:
                report.post_status(config.timestamp(), "status", reportfp.name)
            except Exception:
                pass
            logger.debug("Sending report: end")

    logger.info("end-{}", config.TS)

    return sts
Esempio n. 21
0
def config_setup(context: object) -> None:
    config = PbenchServerConfig(context.config)
    # We're going to need the Postgres DB to track dataset state, so setup
    # DB access.
    init_db(config, None)
Esempio n. 22
0
def main(options):
    try:
        if not options.cfg_name:
            print(
                f"{_NAME_}: ERROR: No config file specified; set"
                " _PBENCH_SERVER_CONFIG env variable",
                file=sys.stderr,
            )
            return 1

        try:
            config = PbenchServerConfig(options.cfg_name)
        except BadConfig as e:
            print(f"{_NAME_}: {e}", file=sys.stderr)
            return 2

        logger = get_pbench_logger(_NAME_, config)

        # We're going to need the Postgres DB to track dataset state, so setup
        # DB access.
        Database.init_db(config, logger)

        args = {}
        if options.create:
            user = options.create
            try:
                user = Auth.validate_user(user)
            except UnknownUser:
                # FIXME: I don't want to be creating the user here or
                # dealing with a non-existing user. The unittest setup
                # should create the test users we want ahead of time
                # using a pbench-user-manager command and we should
                # depend on having them here! The following is a hack
                # until that command exists.
                #
                # The desired behavior would be to remove this try and
                # except and allow UnknownUser to be handled below with
                # an error message and termination.
                User(
                    username=user,
                    first_name=user.capitalize(),
                    last_name="Account",
                    password=f"{user}_password",
                    email=f"{user}@example.com",
                ).add()
            args["owner"] = user
        if options.controller:
            args["controller"] = options.controller
        if options.path:
            args["path"] = options.path
        if options.name:
            args["name"] = options.name
        if options.md5:
            args["md5"] = options.md5
        if options.state:
            try:
                new_state = States[options.state.upper()]
            except KeyError:
                print(
                    f"{_NAME_}: Specified string '{options.state}' is not a Pbench dataset state",
                    file=sys.stderr,
                )
                return 1
            args["state"] = new_state

        if "path" not in args and ("controller" not in args
                                   or "name" not in args):
            print(
                f"{_NAME_}: Either --path or both --controller and --name must be specified",
                file=sys.stderr,
            )
            return 1

        # Either create a new dataset or attach to an existing dataset
        doit = Dataset.create if options.create else Dataset.attach

        # Find or create the specified dataset.
        doit(**args)
    except Exception as e:
        # Stringify any exception and report it; then fail
        logger.exception("Failed")
        print(f"{_NAME_}: {e}", file=sys.stderr)
        return 1
    else:
        return 0
Esempio n. 23
0
def main(options):
    if not options.tb_path:
        print(
            f"{_NAME_}: ERROR: No tar ball path specified",
            file=sys.stderr,
        )
        return 2

    try:
        tb_path = Path(options.tb_path).resolve(strict=True)
    except FileNotFoundError:
        print(
            f"The Tarball Path, '{options.tb_path}', does not resolve to a real location"
        )
    else:
        tb_name = tb_path.name

    if not options.cfg_name:
        print(
            f"{_NAME_}: ERROR: No config file specified; set"
            " _PBENCH_SERVER_CONFIG env variable",
            file=sys.stderr,
        )
        return 3

    try:
        config = PbenchServerConfig(options.cfg_name)
    except BadConfig as e:
        print(f"{_NAME_}: {e}", file=sys.stderr)
        return 4

    archivepath = config.ARCHIVE

    incoming = config.INCOMING
    try:
        incomingpath = incoming.resolve(strict=True)
    except FileNotFoundError:
        print(
            f"The configured INCOMING directory, {incoming}, does not exist",
            file=sys.stderr,
        )
        return 7
    else:
        if not incomingpath.is_dir():
            print(
                f"The configured INCOMING directory, {incoming},"
                " is not a valid directory",
                file=sys.stderr,
            )
            return 8

    # Fetch the configured maximum number of days a tar can remain "unpacked"
    # in the INCOMING tree.
    try:
        max_unpacked_age = config.conf.get("pbench-server", "max-unpacked-age")
    except NoOptionError as e:
        print(f"{e}", file=sys.stderr)
        return 9
    try:
        max_unpacked_age = int(max_unpacked_age)
    except Exception:
        print(f"Bad maximum unpacked age, {max_unpacked_age}", file=sys.stderr)
        return 10

    # Check the unpacked directory name pattern.
    match = tb_pat.fullmatch(tb_name)
    if not match:
        print(f"Unrecognized tar ball name format, {tb_name}", file=sys.stderr)
        return 11

    if not str(tb_path).startswith(str(archivepath)):
        print(f"Given tar ball, {tb_path}, not from the ARCHIVE tree",
              file=sys.stderr)
        return 12

    if not tb_path.exists():
        print(
            f"Given tar ball, {tb_path}, does not seem to exist in the ARCHIVE tree",
            file=sys.stderr,
        )
        return 13

    # Determine the proper time to use as a reference.
    if config._ref_datetime is not None:
        try:
            curr_dt = config._ref_datetime
        except Exception:
            # Ignore bad dates from test environment.
            curr_dt = datetime.utcnow()
    else:
        curr_dt = datetime.utcnow()

    # Turn the pattern components of the match into a datetime object.
    tb_dt = datetime(
        int(match.group(1)),
        int(match.group(2)),
        int(match.group(3)),
        int(match.group(4)),
        int(match.group(5)),
        int(match.group(6)),
    )

    # See if this unpacked tar ball directory has "aged" out.
    timediff = curr_dt - tb_dt
    if timediff.days > max_unpacked_age:
        # Finally, make one last check to see if this tar ball
        # directory should be kept regardless of aging out.
        controller_p = tb_path.parent.name
        if Path(incomingpath, controller_p, tb_name,
                ".__pbench_keep__").is_file():
            ret_val = 0
        else:
            ret_val = 1
    else:
        ret_val = 0

    return ret_val