def config_and_logger(self, valid_config): # Setup the configuration and logger self.config = PbenchAgentConfig(os.environ["_PBENCH_AGENT_CONFIG"]) self.logger = get_pbench_logger("pbench", self.config) yield # Teardown the setup self.config, self.logger = None, None
def test_log_level(self): """Test to verify log level setting.""" # Was test-26.5, test_logger_level.py config_prefix_path = Path("lib/pbench/test/unit/common/config/") config = PbenchConfig(config_prefix_path / "log-level.cfg") logger = get_pbench_logger("test_log_level", config) assert ( config.logger_type == "devlog" ), f"Unexpected logger type encountered, '{config.logger_type}', expected 'devlog'" assert ( logger.logger.getEffectiveLevel() == logging.INFO ), f"Unexpected default logging level, {logger.logger.getEffectiveLevel()}" logger = get_pbench_logger("other", config) assert ( logger.logger.getEffectiveLevel() == logging.CRITICAL ), f"Unexpected logging level, {logger.logger.getEffectiveLevel()}"
def main(cfg_name): if not cfg_name: print( f"{_NAME_}: ERROR: No config file specified; set" " _PBENCH_SERVER_CONFIG env variable or use --config <file> on the" " command line", file=sys.stderr, ) return 2 try: config = PbenchServerConfig(cfg_name) except BadConfig as e: print(f"{_NAME_}: {e} (config file {cfg_name})", file=sys.stderr) return 1 logger = get_pbench_logger(_NAME_, config) qdir, receive_dir = fetch_config_val(config, logger) if qdir is None and receive_dir is None: return 2 qdir_md5 = qdirs_check("quarantine", Path(qdir, "md5-002"), logger) duplicates = qdirs_check("duplicates", Path(qdir, "duplicates-002"), logger) # The following directory holds tarballs that are quarantined because # of operational errors on the server. They should be retried after # the problem is fixed: basically, move them back into the reception # area for 002 agents and wait. errors = qdirs_check("errors", Path(qdir, "errors-002"), logger) if qdir_md5 is None or duplicates is None or errors is None: return 1 counts = process_tb(config, logger, receive_dir, qdir_md5, duplicates, errors) result_string = (f"{config.TS}: Processed {counts.ntotal} entries," f" {counts.ntbs} tarballs successful," f" {counts.nquarantined} quarantined tarballs," f" {counts.ndups} duplicately-named tarballs," f" {counts.nerrs} errors.") logger.info(result_string) # prepare and send report with tempfile.NamedTemporaryFile(mode="w+t", dir=config.TMP) as reportfp: reportfp.write(f"{counts.nstatus}{result_string}\n") reportfp.seek(0) report = Report(config, _NAME_) report.init_report_template() try: report.post_status(config.timestamp(), "status", reportfp.name) except Exception as exc: logger.warning("Report post Unsuccesful: '{}'", exc) return 0
def run_migrations_offline(): """Run migrations in 'offline' mode. This configures the context with just a URL and not an Engine, though an Engine is acceptable here as well. By skipping the Engine creation we don't even need a DBAPI to be available. Calls to context.execute() here emit the given string to the script output. """ try: server_config = get_server_config() logger = get_pbench_logger(__name__, server_config) except Exception as e: print(e) sys.exit(1) url = Database.get_engine_uri(server_config, logger) if url is None: sys.exit(1) context.configure( url=url, target_metadata=target_metadata, literal_binds=True, dialect_opts={"paramstyle": "named"}, ) with context.begin_transaction(): context.run_migrations()
def create_app(server_config): """Create Flask app with defined resource endpoints.""" app = Flask("api-server") CORS(app, resources={r"/api/*": {"origins": "*"}}) app.logger = get_pbench_logger(__name__, server_config) app.config["DEBUG"] = False app.config["TESTING"] = False api = Api(app) register_endpoints(api, app, server_config) try: Database.init_db(server_config=server_config, logger=app.logger) except Exception: app.logger.exception( "Exception while initializing sqlalchemy database") sys.exit(1) @app.teardown_appcontext def shutdown_session(exception=None): Database.db_session.remove() return app
def create_app(): """Create Flask app with defined resource endpoints.""" global app cfg_name = os.environ.get("_PBENCH_SERVER_CONFIG") if not cfg_name: print( f"{__name__}: ERROR: No config file specified; set" " _PBENCH_SERVER_CONFIG", file=sys.stderr, ) sys.exit(1) try: config = PbenchServerConfig(cfg_name) except BadConfig as e: print(f"{__name__}: {e} (config file {cfg_name})", file=sys.stderr) sys.exit(1) app = Flask(__name__) api = Api(app) app.logger = get_pbench_logger(__name__, config) app.config_server = config.conf["pbench-server"] app.config_elasticsearch = config.conf["elasticsearch"] app.config_graphql = config.conf["graphql"] prdp = app.config_server.get("pbench-receive-dir-prefix") if not prdp: app.logger.error( "Missing config variable for pbench-receive-dir-prefix") sys.exit(1) try: upload_directory = Path(f"{prdp}-002").resolve(strict=True) except FileNotFoundError: app.logger.exception( "pbench-receive-dir-prefix does not exist on the host") sys.exit(1) except Exception: app.logger.exception( "Exception occurred during setting up the upload directory on the host" ) sys.exit(1) else: app.upload_directory = upload_directory app.config["PORT"] = app.config_server.get("rest_port") app.config["VERSION"] = app.config_server.get("rest_version") app.config["MAX_CONTENT_LENGTH"] = filesize_bytes( app.config_server.get("rest_max_content_length")) app.config["REST_URI"] = app.config_server.get("rest_uri") app.config["LOG"] = app.config_server.get("rest_log") app.config["BIND_HOST"] = app.config_server.get("bind_host") app.config["WORKERS"] = app.config_server.get("workers") register_endpoints(api, app) return app
def config_and_logger(self): with tempfile.TemporaryDirectory() as target_dir: # Setup the configuration and logger self.target_dir = target_dir self.config = PbenchAgentConfig(os.environ["_PBENCH_AGENT_CONFIG"]) self.logger = get_pbench_logger("unittest", self.config) yield # Teardown the setup self.config, self.logger = None, None if os.path.exists(f"{os.path.realpath(MRT_DIR)}.copied"): os.remove(f"{os.path.realpath(MRT_DIR)}.copied") if os.path.exists(f"{os.path.realpath(MRT_DIR)}/.running"): os.remove(f"{os.path.realpath(MRT_DIR)}/.running")
def test_pbench_logger_level(): config = PbenchServerConfig(cfg_name) logger = get_pbench_logger(_NAME_, config) logging_level = config.get("logging", "logging_level") logger = mock_the_handler(logger, logging_level, log_files[logging_level]) logger.debug(log_msgs["10"]) logger.info(log_msgs["20"]) logger.warning(log_msgs["30"]) logger.error(log_msgs["40"]) logger.critical(log_msgs["50"])
def test_pbench_logger(): config = PbenchServerConfig(cfg_name) logger = get_pbench_logger(_NAME_, config) logger_type = config.get("logging", "logger_type") logger = mock_the_handler(logger, logger_type, log_files[logger_type]) logger.debug(log_msgs[logger_type]) if os.path.isfile(os.path.join(logdir, log_files[logger_type])): with open(os.path.join(logdir, log_files[logger_type]), "r") as f: assert ( f.read()[:-1] == log_msgs[logger_type] ), "Mismatch: the file did not contain the expected message."
def create_app(server_config): """Create Flask app with defined resource endpoints.""" app = Flask("api-server") api = Api(app) CORS(app, resources={r"/api/*": {"origins": "*"}}) app.logger = get_pbench_logger(__name__, server_config) app.config["DEBUG"] = False app.config["TESTING"] = False register_endpoints(api, app, server_config) return app
def main(options): if not options.cfg_name: print( f"{_NAME_}: ERROR: No config file specified; set" " _PBENCH_SERVER_CONFIG env variable", file=sys.stderr, ) return 1 try: config = PbenchServerConfig(options.cfg_name) except BadConfig as e: print(f"{_NAME_}: {e}", file=sys.stderr) return 2 logger = get_pbench_logger(_NAME_, config) # We're going to need the Postgres DB to track dataset state, so setup # DB access. init_db(config, logger) # NOTE: the importer will ignore datasets that already exist in the DB; # we do the ACTION links first to get set up, and then sweep other links # to record the state of remaining datasets, especially to record those # which are quarantined. # # FIXME: This doesn't sweep the "<root>/quarantine" directory, which might # have additional datasets. Are they worth importing? actions = { "TO-UNPACK": States.UPLOADED, "TO-INDEX": States.UNPACKED, "INDEXED": States.INDEXED, "UNPACKED": States.UNPACKED, "WONT-UNPACK": States.QUARANTINED, "WONT-INDEX*": States.QUARANTINED, "BAD-MD5": States.QUARANTINED, } importer = Import(logger, options, config) return_value = 0 for link, state in actions.items(): status = importer.process(link, state) if status != 0: return_value = 1 return return_value
def create_app(): global app cfg_name = os.environ.get("_PBENCH_SERVER_CONFIG") if not cfg_name: print( f"{__name__}: ERROR: No config file specified; set" " _PBENCH_SERVER_CONFIG", file=sys.stderr, ) sys.exit(1) try: config = PbenchServerConfig(cfg_name) except BadConfig as e: print(f"{__name__}: {e} (config file {cfg_name})", file=sys.stderr) sys.exit(1) app = Flask(__name__) api = Api(app) app.logger = get_pbench_logger(__name__, config) app.config_server = config.conf["pbench-server"] prdp = app.config_server.get("pbench-receive-dir-prefix") try: upload_directory = Path(f"{prdp}-002").resolve(strict=True) except Exception: app.logger.error( "Missing config variable for pbench-receive-dir-prefix") sys.exit(1) else: app.upload_directory = upload_directory app.config["PORT"] = app.config_server.get("rest_port") app.config["VERSION"] = app.config_server.get("rest_version") app.config["MAX_CONTENT_LENGTH"] = app.config_server.get( "rest_max_content_length") app.config["REST_URI"] = app.config_server.get("rest_uri") app.config["LOG"] = app.config_server.get("rest_log") register_endpoints(api, app) return app
def test_log_messages_to_devlog(self): """Test to log messages via /dev/log""" # Was test-26.2, via test_logger_type.py fname = "test_log_messages_to_devlog" self.logger = get_pbench_logger(fname, self.config) assert ( self.config.logger_type == "devlog" ), f"Unexpected logger type encountered, '{self.config.logger_type}', expected 'devlog'" assert ( self.config.log_dir is None ), f"Unexpected log directory configuration found, {self.config_log_dir}" assert _handlers[fname] == self.logger.logger.handlers[0] assert ( _handlers[fname].__class__.__name__ == "SysLogHandler" ), f"Unexpected handler set for {fname}, {_handlers[fname].__class__.__name__!r}" assert ( _handlers[fname].address == "/dev/log" ), f"Unexpected handler address set, {_handlers[fname].address!r}"
def test_log_messages_to_file(self): """Test to log messages to a file.""" # Was test-26.1, via test_logger_type.py fname = "test_log_messages_to_file" assert ( self.config.logger_type == "devlog" ), f"Unexpected logger type encountered, '{self.config.logger_type}', expected 'file'" self.config.logger_type = "file" with tempfile.TemporaryDirectory( suffix=".d", prefix="pbench-common-unit-tests.") as TMP: self.config.log_dir = str(Path(TMP) / "log-dir") assert not Path(self.config.log_dir).exists() self.logger = get_pbench_logger(fname, self.config) assert Path(self.config.log_dir).is_dir( ), f"Missing logging directory, {self.config.log_dir}" assert _handlers[fname] == self.logger.logger.handlers[0] assert ( _handlers[fname].__class__.__name__ == "FileHandler" ), f"Unexpected handler set, {_handlers[fname].__class__.__name__!r}"
def main(): try: server_config = get_server_config() except (ConfigFileNotSpecified, BadConfig) as e: print(e) sys.exit(1) logger = get_pbench_logger(__name__, server_config) try: host = str(server_config.get("pbench-server", "bind_host")) port = str(server_config.get("pbench-server", "bind_port")) db = str(server_config.get("Postgres", "db_uri")) workers = str(server_config.get("pbench-server", "workers")) worker_timeout = str( server_config.get("pbench-server", "worker_timeout")) # Multiple gunicorn workers will attempt to connect to the DB; rather # than attempt to synchronize them, detect a missing DB (from the # postgres URI) and create it here. It's safer to do this here, # where we're single-threaded. if not database_exists(db): logger.info("Postgres DB {} doesn't exist", db) create_database(db) logger.info("Created DB {}", db) Database.init_db(server_config, logger) except (NoOptionError, NoSectionError): logger.exception(f"{__name__}: ERROR") sys.exit(1) subprocess.run([ "gunicorn", "--workers", workers, "--timeout", worker_timeout, "--pid", "/run/pbench-server/gunicorn.pid", "--bind", f"{host}:{port}", "pbench.cli.server.shell:app()", ])
def run_migrations_online(): """Run migrations in 'online' mode. In this scenario we need to create an Engine and associate a connection with the context. """ try: server_config = get_server_config() logger = get_pbench_logger(__name__, server_config) except Exception as e: print(e) sys.exit(1) connectable = create_engine(Database.get_engine_uri(server_config, logger)) with connectable.connect() as connection: context.configure(connection=connection, target_metadata=target_metadata) with context.begin_transaction(): context.run_migrations()
def test_log_messages_to_hostport(self): """Test to check error when logger_port and logger_host are not provided with "hostport".""" # Was test-26.3, via test_logger_type.py fname = "test_log_messages_to_hostport" self.config.logger_type = "hostport" self.config.logger_host = "localhost" self.config.logger_port = "42" self.logger = get_pbench_logger(fname, self.config) assert ( self.config.logger_type == "hostport" ), f"Unexpected logger type encountered, '{self.config.logger_type}', expected 'hostport'" assert ( self.config.log_dir is None ), f"Unexpected log directory configuration found, {self.config_log_dir}" assert _handlers[fname] == self.logger.logger.handlers[0] assert ( _handlers[fname].__class__.__name__ == "SysLogHandler" ), f"Unexpected handler set, {_handlers[fname].__class__.__name__!r}" assert ( _handlers[fname].address[0] == "localhost" and _handlers[fname].address[1] == 42 ), f"Unexpected handler address set, {_handlers[fname].address!r}"
def execute(self): config = PbenchServerConfig(self.context.config) logger = get_pbench_logger(_NAME_, config) # We're going to need the Postgres DB to track dataset state, so setup # DB access. Database.init_db(config, logger) user = User( username=self.context.username, password=self.context.password, first_name=self.context.first_name, last_name=self.context.last_name, email=self.context.email, role=self.context.role if self.context.role else "", ) user.add() if user.is_admin(): click.echo(f"Admin user {self.context.username} registered") else: click.echo(f"User {self.context.username} registered")
def __init__( self, config, name, es=None, pid=None, group_id=None, user_id=None, hostname=None, version=None, templates=None, ): self.config = config self.name = name self.logger = get_pbench_logger(name, config) # We always create a base "tracking" document composed of parameters # from the caller, and other environmental data. This document is used # as the foundation for the first document posted to the target # Elasticsearch instance with the `post_status()` method. All # subsequent calls to the `post_status()` method will use that first # document ID as their parent document ID. This allows us to have # multiple status updates associated with the initial Report() caller. if config._unittests: _hostname = "example.com" _pid = 42 _group_id = 43 _user_id = 44 else: _hostname = hostname if hostname else socket.gethostname() _pid = pid if pid else os.getpid() _group_id = group_id if group_id else os.getgid() _user_id = user_id if user_id else os.getuid() self.generated_by = dict([ ("commit_id", self.config.COMMIT_ID), ("group_id", _group_id), ("hostname", _hostname), ("pid", _pid), ("user_id", _user_id), ("version", version if version else ""), ]) # The "tracking_id" is the final MD5 hash of the first document # indexed via the `post_status()` method. self.tracking_id = None try: self.idx_prefix = config.get("Indexing", "index_prefix") except (NoOptionError, NoSectionError): # No index prefix so reporting will be performed via logging. self.idx_prefix = None self.es = None else: if es is None: try: self.es = get_es(config, self.logger) except Exception: self.logger.exception("Unexpected failure fetching" " Elasticsearch configuration") # If we don't have an Elasticsearch configuration just use # None to indicate logging should be used instead. self.es = None else: self.es = es if templates is not None: self.templates = templates else: self.templates = PbenchTemplates(self.config.BINDIR, self.idx_prefix, self.logger)
def main(options): try: if not options.cfg_name: print( f"{_NAME_}: ERROR: No config file specified; set" " _PBENCH_SERVER_CONFIG env variable", file=sys.stderr, ) return 1 try: config = PbenchServerConfig(options.cfg_name) except BadConfig as e: print(f"{_NAME_}: {e}", file=sys.stderr) return 2 logger = get_pbench_logger(_NAME_, config) # We're going to need the Postgres DB to track dataset state, so setup # DB access. init_db(config, logger) args = {} if options.create: args["owner"] = options.create if options.controller: args["controller"] = options.controller if options.path: args["path"] = options.path if options.name: args["name"] = options.name if options.md5: args["md5"] = options.md5 if options.state: try: new_state = States[options.state.upper()] except KeyError: print( f"{_NAME_}: Specified string '{options.state}' is not a Pbench dataset state", file=sys.stderr, ) return 1 args["state"] = new_state if "path" not in args and ("controller" not in args or "name" not in args): print( f"{_NAME_}: Either --path or both --controller and --name must be specified", file=sys.stderr, ) return 1 # Either create a new dataset or attach to an existing dataset doit = Dataset.create if options.create else Dataset.attach # Find or create the specified dataset. doit(**args) except Exception as e: # Stringify any exception and report it; then fail logger.exception("Failed") print(f"{_NAME_}: {e}", file=sys.stderr) return 1 else: return 0
def main(): cfg_name = os.environ.get("_PBENCH_SERVER_CONFIG") if not cfg_name: print( f"{_NAME_}: ERROR: No config file specified; set _PBENCH_SERVER_CONFIG env variable or" f" use --config <file> on the command line", file=sys.stderr, ) return 2 try: config = PbenchServerConfig(cfg_name) except BadConfig as e: print(f"{_NAME_}: {e}", file=sys.stderr) return 1 logger = get_pbench_logger(_NAME_, config) # We're going to need the Postgres DB to track dataset state, so setup # DB access. init_db(config, logger) # add a BACKUP field to the config object config.BACKUP = backup = config.conf.get("pbench-server", "pbench-backup-dir") if not backup: logger.error( "Unspecified backup directory, no pbench-backup-dir config in" " pbench-server section") return 1 backuppath = config.get_valid_dir_option("BACKUP", backup, logger) if not backuppath: return 1 # instantiate the s3config class s3_config_obj = S3Config(config, logger) s3_config_obj = sanity_check(s3_config_obj, logger) logger.info("start-{}", config.TS) start = config.timestamp() prog = Path(sys.argv[0]).name sts = 0 # N.B. tmpdir is the pathname of the temp directory. with tempfile.TemporaryDirectory() as tmpdir: archive_obj = BackupObject("ARCHIVE", config.ARCHIVE, tmpdir, logger) local_backup_obj = BackupObject("BACKUP", config.BACKUP, tmpdir, logger) s3_backup_obj = BackupObject("S3", s3_config_obj, tmpdir, logger) with tempfile.NamedTemporaryFile(mode="w+t", dir=tmpdir) as reportfp: reportfp.write( f"{prog}.{config.TS} ({config.PBENCH_ENV}) started at {start}\n" ) if s3_config_obj is None: reportfp.write( "\nNOTICE: S3 backup service is inaccessible; skipping" " ARCHIVE to S3 comparison\n\n") # FIXME: Parallelize these three ... # Create entry list for archive logger.debug("Starting archive list creation") ar_start = config.timestamp() ret_sts = archive_obj.entry_list_creation() if ret_sts == Status.FAIL: sts += 1 logger.debug("Finished archive list ({!r})", ret_sts) # Create entry list for backup logger.debug("Starting local backup list creation") lb_start = config.timestamp() ret_sts = local_backup_obj.entry_list_creation() if ret_sts == Status.FAIL: sts += 1 logger.debug("Finished local backup list ({!r})", ret_sts) # Create entry list for S3 if s3_config_obj is not None: logger.debug("Starting S3 list creation") s3_start = config.timestamp() ret_sts = s3_backup_obj.entry_list_creation() if ret_sts == Status.FAIL: sts += 1 logger.debug("Finished S3 list ({!r})", ret_sts) logger.debug("Checking MD5 signatures of archive") ar_md5_start = config.timestamp() try: # Check the data integrity in ARCHIVE (Question 1). md5_result_archive = archive_obj.checkmd5() except Exception as ex: msg = f"Failed to check data integrity of ARCHIVE ({config.ARCHIVE})" logger.exception(msg) reportfp.write(f"\n{msg} - '{ex}'\n") sts += 1 else: if md5_result_archive > 0: # Create a report for failed MD5 results from ARCHIVE (Question 1) archive_obj.report_failed_md5(reportfp) sts += 1 logger.debug( "Checking MD5 signature of archive: {} errors", md5_result_archive, ) logger.debug("Finished checking MD5 signatures of archive") logger.debug("Checking MD5 signatures of local backup") lb_md5_start = config.timestamp() try: # Check the data integrity in BACKUP (Question 2). md5_result_backup = local_backup_obj.checkmd5() except Exception as ex: msg = f"Failed to check data integrity of BACKUP ({config.BACKUP})" logger.exception(msg) reportfp.write(f"\n{msg} - '{ex}'\n") else: if md5_result_backup > 0: # Create a report for failed MD5 results from BACKUP (Question 2) local_backup_obj.report_failed_md5(reportfp) sts += 1 logger.debug( "Checking MD5 signature of local backup: {} errors", md5_result_backup, ) logger.debug("Finished checking MD5 signatures of local backup") # Compare ARCHIVE with BACKUP (Questions 3 and 3a). msg = "Comparing ARCHIVE with BACKUP" reportfp.write(f"\n{msg}\n{'-' * len(msg)}\n") logger.debug("{}: start", msg) compare_entry_lists(archive_obj, local_backup_obj, reportfp, logger) logger.debug("{}: end", msg) if s3_config_obj is not None: # Compare ARCHIVE with S3 (Questions 4, 4a, and 4b). msg = "Comparing ARCHIVE with S3" reportfp.write(f"\n{msg}\n{'-' * len(msg)}\n") logger.debug("{}: start", msg) compare_entry_lists(archive_obj, s3_backup_obj, reportfp, logger) logger.debug("{}: end", msg) if s3_config_obj is None: s3_start = "<skipped>" reportfp.write(f"\n\nPhases (started):\n" f"Archive List Creation: {ar_start}\n" f"Local Backup List Creation: {lb_start}\n" f"S3 List Creation: {s3_start}\n" f"Archive MD5 Checks: {ar_md5_start}\n" f"Local Backup MD5 Checks: {lb_md5_start}\n") end = config.timestamp() reportfp.write( f"\n{prog}.{config.TS} ({config.PBENCH_ENV}) finished at {end}\n" ) # Rewind to the beginning. reportfp.seek(0) logger.debug("Sending report: start") report = Report(config, _NAME_) report.init_report_template() try: report.post_status(config.timestamp(), "status", reportfp.name) except Exception: pass logger.debug("Sending report: end") logger.info("end-{}", config.TS) return sts
def execute(self) -> int: logger = get_pbench_logger("pbench-agent", self.config) crt = CopyResultTb(self.context.controller, self.context.result_tb_name, self.config, logger) crt.copy_result_tb(self.context.token) return 0
def execute(self) -> int: logger = get_pbench_logger("pbench-agent", self.config) temp_dir = tempfile.mkdtemp(dir=self.config.pbench_tmp, prefix="pbench-move-results.") runs_copied = 0 failures = 0 no_of_tb = 0 for dirent in self.config.pbench_run.iterdir(): if not dirent.is_dir(): continue if dirent.name.startswith("tools-") or dirent.name == "tmp": continue no_of_tb += 1 result_dir = dirent try: mrt = MakeResultTb(result_dir, temp_dir, self.config, logger) except FileNotFoundError as e: logger.error("File Not Found Error, {}", e) continue except NotADirectoryError as e: logger.error("Bad Directory, {}", e) continue try: result_tb_name = mrt.make_result_tb() except BadMDLogFormat as e: logger.warning("Bad Metadata.log file encountered, {}", e) failures += 1 continue except FileNotFoundError as e: logger.debug("File Not Found error, {}", e) failures += 1 continue except RuntimeError as e: logger.warning("Unexpected Error encountered, {}", e) failures += 1 continue except Exception as e: logger.debug("Unexpected Error occurred, {}", e) failures += 1 continue try: crt = CopyResultTb(self.context.controller, result_tb_name, self.config, logger) except FileNotFoundError as e: logger.error("File Not Found error, {}", e) failures += 1 continue try: crt.copy_result_tb(self.context.token) except (FileUploadError, RuntimeError) as e: logger.error("Error uploading a file, {}", e) failures += 1 continue try: # We always remove the constructed tar ball, regardless of success # or failure, since we keep the result directory below on failure. os.remove(result_tb_name) except OSError: logger.error("Failed to remove {}", result_tb_name) failures += 1 continue try: shutil.rmtree(result_dir) except OSError: logger.error("Failed to remove the {} directory hierarchy", result_dir) failures += 1 continue runs_copied += 1 logger.info( "Status: Total no. of tarballs {}, Successfully moved {}, Encountered {} failures", no_of_tb, runs_copied, failures, ) return 0
def main(cfg_name): if not cfg_name: print( f"{_NAME_}: ERROR: No config file specified; set" " _PBENCH_SERVER_CONFIG env variable or use --config <file> on the" " command line", file=sys.stderr, ) return 2 try: config = PbenchServerConfig(cfg_name) except BadConfig as e: print(f"{_NAME_}: {e}", file=sys.stderr) return 1 logger = get_pbench_logger(_NAME_, config) # We're going to need the Postgres DB to track dataset state, so setup # DB access. init_db(config, logger) # Add a BACKUP and QDIR field to the config object config.BACKUP = config.conf.get("pbench-server", "pbench-backup-dir") config.QDIR = config.get("pbench-server", "pbench-quarantine-dir") # call the LocalBackupObject class lb_obj = LocalBackupObject(config) # call the S3Config class s3_obj = S3Config(config, logger) lb_obj, s3_obj = sanity_check(lb_obj, s3_obj, config, logger) if lb_obj is None and s3_obj is None: return 3 logger.info("start-{}", config.TS) # Initiate the backup counts = backup_data(lb_obj, s3_obj, config, logger) result_string = (f"Total processed: {counts.ntotal}," f" Local backup successes: {counts.nbackup_success}," f" Local backup failures: {counts.nbackup_fail}," f" S3 upload successes: {counts.ns3_success}," f" S3 upload failures: {counts.ns3_fail}," f" Quarantined: {counts.nquaran}") logger.info(result_string) prog = Path(sys.argv[0]).name # prepare and send report with tempfile.NamedTemporaryFile(mode="w+t", dir=config.TMP) as reportfp: reportfp.write( f"{prog}.{config.timestamp()}({config.PBENCH_ENV})\n{result_string}\n" ) reportfp.seek(0) report = Report(config, _NAME_) report.init_report_template() try: report.post_status(config.timestamp(), "status", reportfp.name) except Exception: pass logger.info("end-{}", config.TS) return 0
def move_results(ctx, _user, _prefix, _show_server): config = PbenchAgentConfig(ctx["args"]["config"]) logger = get_pbench_logger("pbench-move-results", config) controller = os.environ.get("full_hostname") if not controller: logger.error("Missing controller name (should be 'hostname -f' value)") sys.exit(1) results_webserver = config.results.get("webserver") if not results_webserver: logger.error( "No web server host configured from which we can fetch the FQDN of the host to which we copy/move results" ) logger.debug("'webserver' variable in 'results' section not set") if not _user: _user = config.agent.get("pbench_user") server_rest_url = config.results.get("server_rest_url") response = requests.get(f"{server_rest_url}/host_info") if response.status_code not in [200, 201]: logger.error("Unable to determine results host info from %s/host_info", server_rest_url) sys.exit(1) if response.text.startswith("MESSAGE"): message = response.text.split("===")[1] logger.info("*** Message from sysadmins of %s:", results_webserver) logger.info("***\n*** %s", message) logger.info("***\n*** No local actions taken.") sys.exit(1) results_path_prefix = response.text.split(":")[1] if not results_path_prefix: logger.error( "fetch results host info did not contain a path prefix: %s", response.text) sys.exit(1) runs_copied = 0 failures = 0 try: temp_dir = tempfile.mkdtemp(dir=config.pbench_tmp, prefix="pbench-move-results.") except Exception: logger.error("Failed to create temporary directory") sys.exit(1) dirs = [ _dir for _dir in next(os.walk(config.pbench_run))[1] if not _dir.startswith("tools-") and not _dir.startswith("tmp") ] for _dir in dirs: result_dir = config.pbench_run / _dir mrt = MakeResultTb(result_dir, temp_dir, _user, _prefix, config, logger) result_tb_name = mrt.make_result_tb() if result_tb_name: crt = CopyResultTb(controller, result_tb_name, config, logger) copy_result = crt.copy_result_tb() try: os.remove(result_tb_name) os.remove(f"{result_tb_name}.md5") except OSError: logger.error("rm failed to remove %s and its .md5 file", result_tb_name) sys.exit(1) if not copy_result: failures += 1 continue try: os.remove(result_dir) except OSError: logger.error("rm failed to remove the %s directory hierarchy", result_dir) sys.exit(1) runs_copied += 1 if runs_copied + failures > 0: logger.debug("successfully moved %s runs, encountered %s failures", runs_copied, failures) return failures
def main(options): if not options.cfg_name: print( f"{_NAME_}: ERROR: No config file specified; set" " _PBENCH_SERVER_CONFIG env variable", file=sys.stderr, ) return 1 try: config = pbench.server.PbenchServerConfig(options.cfg_name) except BadConfig as e: print(f"{_NAME_}: {e}", file=sys.stderr) return 2 logger = get_pbench_logger(_NAME_, config) # We're going to need the Postgres DB to track dataset state, so setup # DB access. init_db(config, logger) archive_p = config.ARCHIVE try: incoming_p = config.INCOMING.resolve(strict=True) except FileNotFoundError: print( f"The configured INCOMING directory, {config.INCOMING}, does not exist", file=sys.stderr, ) return 5 else: if not incoming_p.is_dir(): print( f"The configured INCOMING directory, {config.INCOMING}, is not a valid directory", file=sys.stderr, ) return 6 _fmt = "%Y-%m-%d" try: oldest_dt = datetime.strptime(options.oldest, _fmt) newest_dt = datetime.strptime(options.newest, _fmt) except Exception as exc: print( f"Invalid time range, {options.oldest} to {options.newest}, " f"'{exc}', expected time range values in the form YYYY-MM-DD", file=sys.stderr, ) return 7 else: if newest_dt < oldest_dt: # For convenience, swap oldest and newest dates that are reversed. oldest_dt, newest_dt = newest_dt, oldest_dt print(f"Re-indexing tar balls in the range {oldest_dt} to {newest_dt}") actions = [] start = pbench.server._time() for _val in gen_reindex_list(archive_p, oldest_dt, newest_dt): controller_name, tb_name = _val act_set = reindex(controller_name, tb_name, archive_p, incoming_p, options.dry_run) actions.append(act_set) end = pbench.server._time() for act_set in sorted(actions): print(f"{act_set!r}") print(f"Run-time: {start} {end} {end - start}") return 0
def main(options): if not options.cfg_name: print( f"{_NAME_}: ERROR: No config file specified; set" " _PBENCH_SERVER_CONFIG env variable", file=sys.stderr, ) return 1 try: config = PbenchServerConfig(options.cfg_name) except BadConfig as e: print(f"{_NAME_}: {e}", file=sys.stderr) return 2 logger = get_pbench_logger(_NAME_, config) archivepath = config.ARCHIVE incoming = config.INCOMING incomingpath = config.get_valid_dir_option("INCOMING", incoming, logger) if not incomingpath: return 3 results = config.RESULTS resultspath = config.get_valid_dir_option("RESULTS", results, logger) if not resultspath: return 3 users = config.USERS userspath = config.get_valid_dir_option("USERS", users, logger) if not userspath: return 3 # Fetch the configured maximum number of days a tar can remain "unpacked" # in the INCOMING tree. try: max_unpacked_age = config.conf.get("pbench-server", "max-unpacked-age") except NoOptionError as e: logger.error(f"{e}") return 5 try: max_unpacked_age = int(max_unpacked_age) except Exception: logger.error("Bad maximum unpacked age, {}", max_unpacked_age) return 6 # First phase is to find all the tar balls which are beyond the max # unpacked age, and which still have an unpacked directory in INCOMING. if config._ref_datetime is not None: try: curr_dt = config._ref_datetime except Exception: # Ignore bad dates from test environment. curr_dt = datetime.utcnow() else: curr_dt = datetime.utcnow() _msg = "Culling unpacked tar balls {} days older than {}" if options.dry_run: print( _msg.format(max_unpacked_age, curr_dt.strftime(_STD_DATETIME_FMT))) else: logger.debug(_msg, max_unpacked_age, curr_dt.strftime(_STD_DATETIME_FMT)) actions_taken = [] errors = 0 start = pbench.server._time() gen = gen_list_unpacked_aged(incomingpath, archivepath, curr_dt, max_unpacked_age) if config._unittests: # force the generator and sort the list gen = sorted(list(gen)) for tb_incoming_dir, controller_name in gen: act_set = remove_unpacked( tb_incoming_dir, controller_name, resultspath, userspath, logger, options.dry_run, ) unpacked_dir_name = Path(tb_incoming_dir).name act_path = Path(controller_name, unpacked_dir_name) act_set.set_name(act_path) actions_taken.append(act_set) if act_set.errors > 0: # Stop any further unpacked tar ball removal if an error is # encountered. break end = pbench.server._time() # Generate the ${TOP}/public_html prefix so we can strip it from the # various targets in the report. public_html = os.path.realpath(os.path.join(config.TOP, "public_html")) # Write the actions taken into a report file. with tempfile.NamedTemporaryFile(mode="w+t", prefix=f"{_NAME_}.", suffix=".report", dir=config.TMP) as tfp: duration = end - start total = len(actions_taken) print( f"Culled {total:d} unpacked tar ball directories ({errors:d}" f" errors) in {duration:0.2f} secs", file=tfp, ) if total > 0: print("\nActions Taken:", file=tfp) for act_set in sorted(actions_taken, key=lambda a: a.name): print( f" - {act_set.name} ({act_set.errors:d} errors," f" {act_set.duration():0.2f} secs)", file=tfp, ) for act in act_set.actions: assert act.noun.startswith( public_html ), f"Logic bomb! {act.noun} not in .../public_html/" tgt = Path(act.noun[len(public_html) + 1:]) if act.verb == "mv": name = tgt.name controller = tgt.parent ex_tgt = controller / f".delete.{name}" print(f" $ {act.verb} {tgt} {ex_tgt} # {act.status}", file=tfp) else: print(f" $ {act.verb} {tgt} # {act.status}", file=tfp) # Flush out the report ahead of posting it. tfp.flush() tfp.seek(0) # We need to generate a report that lists all the actions taken. report = Report(config, _NAME_) report.init_report_template() try: report.post_status(config.timestamp(), "status" if errors == 0 else "errors", tfp.name) except Exception: pass return errors
def move_results(ctx, _user, _prefix, _show_server): config = PbenchAgentConfig(ctx["args"]["config"]) logger = get_pbench_logger("pbench", config) controller = os.environ.get("_pbench_full_hostname") if not controller: logger.error("Missing controller name (should be 'hostname -f' value)") sys.exit(1) results_webserver = config.results.get("webserver") if not results_webserver: logger.error( "No web server host configured from which we can fetch the FQDN of the host to which we copy/move results" ) logger.debug("'webserver' variable in 'results' section not set") server_rest_url = config.results.get("server_rest_url") response = requests.get(f"{server_rest_url}/host_info") if response.status_code not in [200, 201]: logger.error( "Unable to determine results host info from %s/host_info", server_rest_url ) sys.exit(1) if response.text.startswith("MESSAGE"): message = response.text.split("===")[1] logger.info("*** Message from sysadmins of %s:", results_webserver) logger.info("***\n*** %s", message) logger.info("***\n*** No local actions taken.") sys.exit(1) results_path_prefix = response.text.split(":")[1] if not results_path_prefix: logger.error( "fetch results host info did not contain a path prefix: %s", response.text ) sys.exit(1) try: temp_dir = tempfile.mkdtemp( dir=config.pbench_tmp, prefix="pbench-move-results." ) except Exception: logger.error("Failed to create temporary directory") sys.exit(1) runs_copied = 0 failures = 0 for dirent in config.pbench_run.iterdir(): if not dirent.is_dir(): continue if dirent.name.startswith("tools-") or dirent.name == "tmp": continue result_dir = dirent mrt = MakeResultTb(result_dir, temp_dir, _user, _prefix, config, logger) result_tb_name = mrt.make_result_tb() assert ( result_tb_name ), "Logic bomb! make_result_tb() always returns a tar ball name" crt = CopyResultTb(controller, result_tb_name, config, logger) crt.copy_result_tb() try: # We always remove the constructed tar ball, regardless of success # or failure, since we keep the result directory below on failure. os.remove(result_tb_name) os.remove(f"{result_tb_name}.md5") except OSError: logger.error("rm failed to remove %s and its .md5 file", result_tb_name) sys.exit(1) try: shutil.rmtree(result_dir) except OSError: logger.error("rm failed to remove the %s directory hierarchy", result_dir) sys.exit(1) runs_copied += 1 if runs_copied + failures > 0: logger.debug( "successfully moved %s runs, encountered %s failures", runs_copied, failures ) return runs_copied, failures
def main(options): try: if not options.cfg_name: print( f"{_NAME_}: ERROR: No config file specified; set" " _PBENCH_SERVER_CONFIG env variable", file=sys.stderr, ) return 1 try: config = PbenchServerConfig(options.cfg_name) except BadConfig as e: print(f"{_NAME_}: {e}", file=sys.stderr) return 2 logger = get_pbench_logger(_NAME_, config) # We're going to need the Postgres DB to track dataset state, so setup # DB access. Database.init_db(config, logger) args = {} if options.create: user = options.create try: user = Auth.validate_user(user) except UnknownUser: # FIXME: I don't want to be creating the user here or # dealing with a non-existing user. The unittest setup # should create the test users we want ahead of time # using a pbench-user-manager command and we should # depend on having them here! The following is a hack # until that command exists. # # The desired behavior would be to remove this try and # except and allow UnknownUser to be handled below with # an error message and termination. User( username=user, first_name=user.capitalize(), last_name="Account", password=f"{user}_password", email=f"{user}@example.com", ).add() args["owner"] = user if options.controller: args["controller"] = options.controller if options.path: args["path"] = options.path if options.name: args["name"] = options.name if options.md5: args["md5"] = options.md5 if options.state: try: new_state = States[options.state.upper()] except KeyError: print( f"{_NAME_}: Specified string '{options.state}' is not a Pbench dataset state", file=sys.stderr, ) return 1 args["state"] = new_state if "path" not in args and ("controller" not in args or "name" not in args): print( f"{_NAME_}: Either --path or both --controller and --name must be specified", file=sys.stderr, ) return 1 # Either create a new dataset or attach to an existing dataset doit = Dataset.create if options.create else Dataset.attach # Find or create the specified dataset. doit(**args) except Exception as e: # Stringify any exception and report it; then fail logger.exception("Failed") print(f"{_NAME_}: {e}", file=sys.stderr) return 1 else: return 0
def main(options, name): """Main entry point to pbench-index. The caller is required to pass the "options" argument with the following expected attributes: cfg_name - Name of the configuration file to use dump_index_patterns - Don't do any indexing, but just emit the list of index patterns that would be used dump_templates - Dump the templates that would be used index_tool_data - Index tool data only re_index - Consider tar balls marked for re-indexing All exceptions are caught and logged to syslog with the stacktrace of the exception in a sub-object of the logged JSON document. Signal Handlers used to establish different patterns for the three behaviors: 1. Gracefully stop processing tar balls - SIGQUIT - The current tar ball is indexed until completion, but no other tar balls are processed. - Handler Behavior: - Sets a flag that causes the code flow to break out of the for loop. - Does not raise an exception. 2. Interrupt the current tar ball being indexed, and proceed to the next one, if any - SIGINT - Handler Behavior: - try/except/finally placed immediately around the es_index() call so that the signal handler will only be established for the duration of the call. - Raises an exception caught by above try/except/finally. - The finally clause would take down the signal handler. 3. Stop processing tar balls immediately and exit gracefully - SIGTERM - Handler Behavior: - Raises an exception caught be a new, outer-most, try/except block that does not have a finally clause (as you don't want any code execution in the finally block). 4. Re-evaluate the list of tar balls to index after indexing of the current tar ball has finished - SIGHUP - Report the current state of indexing - Current tar ball being processed - Count of Remaining tarballs - Count of Completed tarballs - No. of Errors encountered - Handler Behavior: - No exception raised """ _name_suf = "-tool-data" if options.index_tool_data else "" _name_re = "-re" if options.re_index else "" name = f"{name}{_name_re}{_name_suf}" error_code = Index.error_code if not options.cfg_name: print( f"{name}: ERROR: No config file specified; set" " _PBENCH_SERVER_CONFIG env variable or" " use --config <file> on the command line", file=sys.stderr, ) return error_code["CFG_ERROR"].value idxctx = None try: # We're going to need the Postgres DB to track dataset state, so setup # DB access. We need to do this before we create the IdxContext, since # that will need the template DB; so we create a PbenchServerConfig and # Logger independently. config = PbenchServerConfig(options.cfg_name) logger = get_pbench_logger(name, config) init_db(config, logger) # Now we can initialize the index context idxctx = IdxContext(options, name, config, logger, _dbg=_DEBUG) except (ConfigFileError, ConfigParserError) as e: print(f"{name}: {e}", file=sys.stderr) return error_code["CFG_ERROR"].value except BadConfig as e: print(f"{name}: {e}", file=sys.stderr) return error_code["BAD_CFG"].value except JsonFileError as e: print(f"{name}: {e}", file=sys.stderr) return error_code["MAPPING_ERROR"].value if options.dump_index_patterns: idxctx.templates.dump_idx_patterns() return 0 if options.dump_templates: idxctx.templates.dump_templates() return 0 res = error_code["OK"] ARCHIVE_rp = idxctx.config.ARCHIVE INCOMING_rp = idxctx.config.INCOMING INCOMING_path = idxctx.config.get_valid_dir_option("INCOMING", INCOMING_rp, idxctx.logger) if not INCOMING_path: res = error_code["BAD_CFG"] qdir = idxctx.config.get_conf("QUARANTINE", "pbench-server", "pbench-quarantine-dir", idxctx.logger) if not qdir: res = error_code["BAD_CFG"] else: qdir_path = idxctx.config.get_valid_dir_option("QDIR", Path(qdir), idxctx.logger) if not qdir_path: res = error_code["BAD_CFG"] if not res.success: # Exit early if we encounter any errors. return res.value idxctx.logger.debug("{}.{}: starting", name, idxctx.TS) index_obj = Index(name, options, idxctx, INCOMING_rp, ARCHIVE_rp, qdir) status, tarballs = index_obj.collect_tb() if status == 0 and tarballs: status = index_obj.process_tb(tarballs) return status