def update_templates(self, es, target_name=None): """ Register with Elasticsearch the set of index templates used by the Pbench server. Args es: Elasticsearch object to connect to server target_name: Optional template name to update just one template Raises TemplateError Problem updating the template to server """ template_names = {t.template_name: t for t in self.templates.values()} successes = retries = 0 beg = end = None for name in sorted(template_names.keys()): template = template_names[name] if target_name is not None and target_name != template.idxname: # If we were asked to only load a given template name, skip # all non-matching templates. continue try: _beg, _end, _retries, _stat = pyesbulk.put_template( es, name, "pbench-{}".format(template.name), template.body(), ) except Exception as e: self.counters["put_template_failures"] += 1 raise TemplateError(f"Tool {name} update failed: {e}") else: successes += 1 if beg is None: beg = _beg end = _end retries += _retries log_action = self.logger.warning if retries > 0 else self.logger.debug log_action( "done templates (start ts: {}, end ts: {}, duration: {:.2f}s," " successes: {:d}, retries: {:d})", tstos(beg), tstos(end), end - beg, successes, retries, )
def update_templates(self, es, target_name=None): """Push the various Elasticsearch index templates required by pbench. """ if target_name is not None: idxname = self.index_patterns[target_name]["idxname"] else: idxname = None template_names = [name for name in self.templates] template_names.sort() successes = retries = 0 beg = end = None for name in template_names: if idxname is not None and not name.endswith(idxname): # If we were asked to only load a given template name, skip # all non-matching templates. continue try: _beg, _end, _retries, _stat = pyesbulk.put_template( es, name, "pbench-{}".format(name.split(".")[2]), self.templates[name], ) except Exception as e: self.counters["put_template_failures"] += 1 raise TemplateError(e) else: successes += 1 if beg is None: beg = _beg end = _end retries += _retries log_action = self.logger.warning if retries > 0 else self.logger.debug log_action( "done templates (start ts: {}, end ts: {}, duration: {:.2f}s," " successes: {:d}, retries: {:d})", tstos(beg), tstos(end), end - beg, successes, retries, )
def process_tb(self, tarballs): """Process Tarballs For Indexing and creates report "tarballs" - List of tarball, it is the second value of the tuple returned by collect_tb() """ res = 0 idxctx = self.idxctx error_code = self.error_code tb_deque = deque(sorted(tarballs)) # At this point, tarballs contains a list of tar balls sorted by size # that were available as symlinks in the various 'linksrc' directories. idxctx.logger.debug("Preparing to index {:d} tar balls", len(tb_deque)) try: # Now that we are ready to begin the actual indexing step, ensure we # have the proper index templates in place. idxctx.logger.debug("update_templates [start]") idxctx.templates.update_templates(idxctx.es) except TemplateError as e: res = self.emit_error(idxctx.logger.error, "TEMPLATE_CREATION_ERROR", e) except SigTermException: # Re-raise a SIGTERM to avoid it being lumped in with general # exception handling below. raise except Exception: idxctx.logger.exception( "update_templates [end]: Unexpected template" " processing error") res = error_code["GENERIC_ERROR"] else: idxctx.logger.debug("update_templates [end]") res = error_code["OK"] if not res.success: # Exit early if we encounter any errors. return res.value report = Report( idxctx.config, self.name, es=idxctx.es, pid=idxctx.getpid(), group_id=idxctx.getgid(), user_id=idxctx.getuid(), hostname=idxctx.gethostname(), version=VERSION, templates=idxctx.templates, ) # We use the "start" report ID as the tracking ID for all indexed # documents. try: tracking_id = report.post_status(tstos(idxctx.time()), "start") except SigTermException: # Re-raise a SIGTERM to avoid it being lumped in with general # exception handling below. raise except Exception: idxctx.logger.error("Failed to post initial report status") return error_code["GENERIC_ERROR"].value else: idxctx.set_tracking_id(tracking_id) with tempfile.TemporaryDirectory(prefix=f"{self.name}.", dir=idxctx.config.TMP) as tmpdir: idxctx.logger.debug("start processing list of tar balls") tb_list = Path(tmpdir, f"{self.name}.{idxctx.TS}.list") try: with tb_list.open(mode="w") as lfp: # Write out all the tar balls we are processing so external # viewers can follow along from home. for size, controller, tb in tarballs: print(f"{size:20d} {controller} {tb}", file=lfp) indexed = Path(tmpdir, f"{self.name}.{idxctx.TS}.indexed") erred = Path(tmpdir, f"{self.name}.{idxctx.TS}.erred") skipped = Path(tmpdir, f"{self.name}.{idxctx.TS}.skipped") ie_filepath = Path( tmpdir, f"{self.name}.{idxctx.TS}.indexing-errors.json") # We use a list object here so that when we close over this # variable in the handler, the list object will be closed over, # but not its contents. sigquit_interrupt = [False] def sigquit_handler(*args): sigquit_interrupt[0] = True sighup_interrupt = [False] def sighup_handler(*args): sighup_interrupt[0] = True signal.signal(signal.SIGQUIT, sigquit_handler) signal.signal(signal.SIGHUP, sighup_handler) count_processed_tb = 0 try: while len(tb_deque) > 0: size, controller, tb = tb_deque.popleft() # Sanity check source tar ball path linksrc_dir = Path(tb).parent linksrc_dirname = linksrc_dir.name count_processed_tb += 1 assert linksrc_dirname == self.linksrc, ( f"Logic bomb! tar ball " f"path {tb} does not contain {self.linksrc}") idxctx.logger.info("Starting {} (size {:d})", tb, size) dataset = None ptb = None username = None try: path = os.path.realpath(tb) try: dataset = Dataset.attach( path=path, state=States.INDEXING, ) except DatasetNotFound: idxctx.logger.warn( "Unable to locate Dataset {}", path, ) except DatasetTransitionError as e: # TODO: This means the Dataset is known, but not in a # state where we'd expect to be indexing it. So what do # we do with it? (Note: this is where an audit log will # be handy; i.e., how did we get here?) For now, just # let it go. idxctx.logger.warn( "Unable to advance dataset state: {}", str(e)) else: username = dataset.owner # "Open" the tar ball represented by the tar ball object idxctx.logger.debug("open tar ball") ptb = PbenchTarBall( idxctx, username, path, tmpdir, Path(self.incoming, controller), ) # Construct the generator for emitting all actions. The # `idxctx` dictionary is passed along to each generator so # that it can add its context for error handling to the # list. idxctx.logger.debug("generator setup") if self.options.index_tool_data: actions = ptb.mk_tool_data_actions() else: actions = ptb.make_all_actions() # File name for containing all indexing errors that # can't/won't be retried. with ie_filepath.open(mode="w") as fp: idxctx.logger.debug("begin indexing") try: signal.signal(signal.SIGINT, sigint_handler) es_res = es_index( idxctx.es, actions, fp, idxctx.logger, idxctx._dbg, ) except SigIntException: idxctx.logger.exception( "Indexing interrupted by SIGINT, continuing to next tarball" ) continue finally: # Turn off the SIGINT handler when not indexing. signal.signal(signal.SIGINT, signal.SIG_IGN) except UnsupportedTarballFormat as e: tb_res = self.emit_error(idxctx.logger.warning, "TB_META_ABSENT", e) except BadDate as e: tb_res = self.emit_error(idxctx.logger.warning, "BAD_DATE", e) except FileNotFoundError as e: tb_res = self.emit_error(idxctx.logger.warning, "FILE_NOT_FOUND_ERROR", e) except BadMDLogFormat as e: tb_res = self.emit_error(idxctx.logger.warning, "BAD_METADATA", e) except SigTermException: idxctx.logger.exception( "Indexing interrupted by SIGTERM, terminating") break except Exception as e: tb_res = self.emit_error(idxctx.logger.exception, "GENERIC_ERROR", e) else: beg, end, successes, duplicates, failures, retries = es_res idxctx.logger.info( "done indexing (start ts: {}, end ts: {}, duration:" " {:.2f}s, successes: {:d}, duplicates: {:d}," " failures: {:d}, retries: {:d})", tstos(beg), tstos(end), end - beg, successes, duplicates, failures, retries, ) tb_res = error_code[ "OP_ERROR" if failures > 0 else "OK"] finally: if dataset: try: dataset.advance( States.INDEXED if tb_res. success else States.QUARANTINED) Metadata.remove(dataset, Metadata.REINDEX) except DatasetTransitionError: idxctx.logger.exception( "Dataset state error") try: ie_len = ie_filepath.stat().st_size except FileNotFoundError: # Above operation never made it to actual indexing, ignore. pass except SigTermException: # Re-raise a SIGTERM to avoid it being lumped in with # general exception handling below. raise except Exception: idxctx.logger.exception( "Unexpected error handling" " indexing errors file: {}", ie_filepath, ) else: # Success fetching indexing error file size. if ie_len > len(tb) + 1: try: report.post_status(tstos(end), "errors", ie_filepath) except Exception: idxctx.logger.exception( "Unexpected error issuing" " report status with errors: {}", ie_filepath, ) finally: # Unconditionally remove the indexing errors file. try: os.remove(ie_filepath) except SigTermException: # Re-raise a SIGTERM to avoid it being lumped in with # general exception handling below. raise except Exception: pass # Distinguish failure cases, so we can retry the indexing # easily if possible. Different `linkerrdest` directories for # different failures; the rest are going to end up in # `linkerrdest` for later retry. controller_path = linksrc_dir.parent if tb_res is error_code["OK"]: idxctx.logger.info( "{}: {}/{}: success", idxctx.TS, controller_path.name, os.path.basename(tb), ) # Success with indexed.open(mode="a") as fp: print(tb, file=fp) rename_tb_link( tb, Path(controller_path, self.linkdest), idxctx.logger) elif tb_res is error_code["OP_ERROR"]: idxctx.logger.warning( "{}: index failures encountered on {}", idxctx.TS, tb) with erred.open(mode="a") as fp: print(tb, file=fp) rename_tb_link( tb, Path(controller_path, f"{self.linkerrdest}.1"), idxctx.logger, ) elif tb_res in (error_code["CFG_ERROR"], error_code["BAD_CFG"]): assert False, ( f"Logic Bomb! Unexpected tar ball handling " f"result status {tb_res.value:d} for tar ball {tb}" ) elif tb_res.tarball_error: # # Quietly skip these errors with skipped.open(mode="a") as fp: print(tb, file=fp) rename_tb_link( tb, Path( controller_path, f"{self.linkerrdest}.{tb_res.value:d}", ), idxctx.logger, ) else: idxctx.logger.error( "{}: index error {:d} encountered on {}", idxctx.TS, tb_res.value, tb, ) with erred.open(mode="a") as fp: print(tb, file=fp) rename_tb_link( tb, Path(controller_path, self.linkerrdest), idxctx.logger, ) idxctx.logger.info( "Finished{} {} (size {:d})", "[SIGQUIT]" if sigquit_interrupt[0] else "", tb, size, ) if sigquit_interrupt[0]: break if sighup_interrupt[0]: status, new_tb = self.collect_tb() if status == 0: if not set(new_tb).issuperset(tb_deque): idxctx.logger.info( "Tarballs supposed to be in 'TO-INDEX' are no longer present", set(tb_deque).difference(new_tb), ) tb_deque = deque(sorted(new_tb)) idxctx.logger.info( "SIGHUP status (Current tar ball indexed: ({}), Remaining: {}, Completed: {}, Errors_encountered: {}, Status: {})", Path(tb).name, len(tb_deque), count_processed_tb, _count_lines(erred), tb_res, ) sighup_interrupt[0] = False continue except SigTermException: idxctx.logger.exception( "Indexing interrupted by SIGQUIT, stop processing tarballs" ) finally: # Turn off the SIGQUIT and SIGHUP handler when not indexing. signal.signal(signal.SIGQUIT, signal.SIG_IGN) signal.signal(signal.SIGHUP, signal.SIG_IGN) except SigTermException: # Re-raise a SIGTERM to avoid it being lumped in with general # exception handling below. raise except Exception: idxctx.logger.exception(error_code["GENERIC_ERROR"].message) res = error_code["GENERIC_ERROR"] else: # No exceptions while processing tar ball, success. res = error_code["OK"] finally: if idxctx: idxctx.dump_opctx() idxctx.logger.debug("stopped processing list of tar balls") idx = _count_lines(indexed) skp = _count_lines(skipped) err = _count_lines(erred) idxctx.logger.info( "{}.{}: indexed {:d} (skipped {:d}) results," " {:d} errors", self.name, idxctx.TS, idx, skp, err, ) if err > 0: if skp > 0: subj = ( f"{self.name}.{idxctx.TS} - Indexed {idx:d} results, skipped {skp:d}" f" results, w/ {err:d} errors") else: subj = ( f"{self.name}.{idxctx.TS} - Indexed {idx:d} results, w/ {err:d}" " errors") else: if skp > 0: subj = f"{self.name}.{idxctx.TS} - Indexed {idx:d} results, skipped {skp:d} results" else: subj = f"{self.name}.{idxctx.TS} - Indexed {idx:d} results" report_fname = Path(tmpdir, f"{self.name}.{idxctx.TS}.report") with report_fname.open(mode="w") as fp: print(subj, file=fp) if idx > 0: print("\nIndexed Results\n===============", file=fp) with indexed.open() as ifp: for line in sorted(ifp): print(line.strip(), file=fp) if err > 0: print( "\nResults producing errors" "\n========================", file=fp, ) with erred.open() as efp: for line in sorted(efp): print(line.strip(), file=fp) if skp > 0: print("\nSkipped Results\n===============", file=fp) with skipped.open() as sfp: for line in sorted(sfp): print(line.strip(), file=fp) try: report.post_status(tstos(idxctx.time()), "status", report_fname) except SigTermException: # Re-raise a SIGTERM to avoid it being lumped in with general # exception handling below. raise except Exception: pass return res.value
def post_status(self, timestamp, doctype, file_to_index=None): """Post a status record, with an optional file payload to index along with the base tracking document. We return the tracking ID use for this report object. """ try: if timestamp.startswith("run-"): timestamp = timestamp[4:] # Snip off the trailing "-<TZ>" - assumes that <TZ> does not # contain a "-" timestamp_noutc = timestamp.rsplit("-", 1)[0] base_source = { "@timestamp": timestamp_noutc, "@generated-by": self.generated_by, "name": self.name, "doctype": doctype, } if file_to_index: payload_gen = self._gen_json_payload(base_source, file_to_index) else: payload_gen = self._gen_no_json_payload(base_source) if self.es is None: # We don't have an Elasticsearch configuration, use syslog # only. # Prepend the proper sequence to allow rsyslog to recognize # JSON and process it. We only use the first chunk generated. _, self.tracking_id, the_bytes = next(payload_gen) payload = "@cee:{}".format(the_bytes) if len(payload) > 4096: # Compress the full message to a file. fname = "report-status-payload.{}.{}.xz".format( self.name, timestamp_noutc) fpath = Path(self.config.log_dir) / self.name / fname with lzma.open(fpath, mode="w", preset=9) as fp: fp.write(the_bytes) for _, _, the_bytes in payload_gen: fp.write(the_bytes) # Always log the first 4,096 bytes self.logger.info("{}", payload[:4096]) else: # We have an Elasticsearch configuration. def _es_payload_gen(_payload_gen): for source, source_id, _ in _payload_gen: if self.tracking_id is None: # First generated document becomes the tracking ID. self.tracking_id = source_id idx_name = self.templates.generate_index_name( "server-reports", source) action = { "_op_type": _op_type, "_index": idx_name, "_id": source_id, "_source": source, } yield action es_res = es_index(self.es, _es_payload_gen(payload_gen), sys.stderr, self.logger) beg, end, successes, duplicates, failures, retries = es_res if failures > 0: log_action = self.logger.error elif duplicates > 0 or retries > 0: log_action = self.logger.warning else: assert (successes >= 1 and duplicates == 0 and failures == 0 and retries == 0), "Logic Bomb!" log_action = self.logger.debug log_action( "posted status (start ts: {}, end ts: {}, duration: {:.2f}s," " successes: {:d}, duplicates: {:d}, failures: {:d}," " retries: {:d})", tstos(beg), tstos(end), end - beg, successes, duplicates, failures, retries, ) except Exception: self.logger.exception( "Failed to post status, name = {}," " timestamp = {}, doctype = {}, file_to_index = {}", self.name, timestamp, doctype, file_to_index, ) raise return self.tracking_id
def main(options, name): """Main entry point to pbench-index. The caller is required to pass the "options" argument with the following expected attributes: cfg_name - Name of the configuration file to use dump_index_patterns - Don't do any indexing, but just emit the list of index patterns that would be used dump_templates - Dump the templates that would be used index_tool_data - Index tool data only re_index - Consider tar balls marked for re-indexing All exceptions are caught and logged to syslog with the stacktrace of the exception in a sub-object of the logged JSON document. Status codes used by es_index and the error below are defined from the list below to maintain compatibility with the previous code base when pbench-index was a bash script and invoked index-pbench (yes, a bit confusing), the predecessor to this program. The codes were used to sort the errors we encountered processing tar balls in to categories of retry or not: 0 - normal, successful exit, no errors 1 - Operational error while indexing 2 - Configuration file not specified 3 - Bad configuration file 4 - Tar ball does not contain a metadata.log file 5 - Bad start run date value encountered 6 - File Not Found error 7 - Bad metadata.log file encountered 8 - Error reading a mapping file for Elasticsearch templates 9 - Error creating one of the Elasticsearch templates 10 - Bad hostname in a sosreport 11 - Failure unpacking the tar ball 12 - generic error, needs to be investigated and can be retried after any indexing bugs are fixed. Return Values (now a sub-set of the original status codes above): 0 - Successfully processed all tar balls (errors processing tar balls are reported in the logs and in index status reports) 1 - Failed to process one or more tar balls for unknown reasons (see logs) 2 - Missing configuration file 3 - Invalid configuration file 8 - Unable to load and process expected mapping files 9 - Unable to update index templates in configured Elasticsearch instance Signal Handlers used to establish different patterns for the three behaviors: 1. Gracefully stop processing tar balls - SIGQUIT - The current tar ball is indexed until completion, but no other tar balls are processed. - Handler Behavior: - Sets a flag that causes the code flow to break out of the for loop. - Does not raise an exception. 2. Interrupt the current tar ball being indexed, and proceed to the next one, if any - SIGINT - Handler Behavior: - try/except/finally placed immediately around the es_index() call so that the signal handler will only be established for the duration of the call. - Raises an exception caught by above try/except/finally. - The finally clause would take down the signal handler. 3. Stop processing tar balls immediately and exit gracefully - SIGTERM - Handler Behavior: - Raises an exception caught be a new, outer-most, try/except block that does not have a finally clause (as you don't want any code execution in the finally block). """ _name_suf = "-tool-data" if options.index_tool_data else "" _name_re = "-re" if options.re_index else "" name = f"{name}{_name_re}{_name_suf}" if not options.cfg_name: print( f"{name}: ERROR: No config file specified; set" " _PBENCH_SERVER_CONFIG env variable or" " use --config <file> on the command line", file=sys.stderr, ) return 2 idxctx = None try: idxctx = IdxContext(options, name, _dbg=_DEBUG) except (ConfigFileError, ConfigParserError) as e: print(f"{name}: {e}", file=sys.stderr) return 2 except BadConfig as e: print(f"{name}: {e}", file=sys.stderr) return 3 except JsonFileError as e: print(f"{name}: {e}", file=sys.stderr) return 8 if options.dump_index_patterns: idxctx.templates.dump_idx_patterns() return 0 if options.dump_templates: idxctx.templates.dump_templates() return 0 _re_idx = "RE-" if options.re_index else "" if options.index_tool_data: # The link source and destination for the operation of this script # when it only indexes tool data. linksrc = "TO-INDEX-TOOL" linkdest = "INDEXED" else: # The link source and destination for the operation of this script # when it indexes run, table-of-contents, and result data. linksrc = f"TO-{_re_idx}INDEX" linkdest = "TO-INDEX-TOOL" # We only ever use a symlink'd error destination for indexing # problems. linkerrdest = "WONT-INDEX" res = 0 ARCHIVE_rp = idxctx.config.ARCHIVE INCOMING_rp = idxctx.config.INCOMING INCOMING_path = idxctx.config.get_valid_dir_option("INCOMING", INCOMING_rp, idxctx.logger) if not INCOMING_path: res = 3 qdir = idxctx.config.get_conf("QUARANTINE", "pbench-server", "pbench-quarantine-dir", idxctx.logger) if not qdir: res = 3 else: qdir_path = idxctx.config.get_valid_dir_option("QDIR", Path(qdir), idxctx.logger) if not qdir_path: res = 3 if res != 0: # Exit early if we encounter any errors. return res idxctx.logger.debug("{}.{}: starting", name, idxctx.TS) # find -L $ARCHIVE/*/$linksrc -name '*.tar.xz' -printf "%s\t%p\n" 2>/dev/null | sort -n > $list tarballs = [] try: tb_glob = os.path.join(ARCHIVE_rp, "*", linksrc, "*.tar.xz") for tb in glob.iglob(tb_glob): try: rp = Path(tb).resolve(strict=True) except OSError: idxctx.logger.warning("{} does not resolve to a real path", tb) quarantine(qdir, idxctx.logger, tb) continue controller_path = rp.parent controller = controller_path.name archive_path = controller_path.parent if str(archive_path) != str(ARCHIVE_rp): idxctx.logger.warning( "For tar ball {}, original home is not {}", tb, ARCHIVE_rp) quarantine(qdir, idxctx.logger, tb) continue if not Path(f"{rp}.md5").is_file(): idxctx.logger.warning("Missing .md5 file for {}", tb) quarantine(qdir, idxctx.logger, tb) # Audit should pick up missing .md5 file in ARCHIVE directory. continue try: # get size size = rp.stat().st_size except OSError: idxctx.logger.warning("Could not fetch tar ball size for {}", tb) quarantine(qdir, idxctx.logger, tb) # Audit should pick up missing .md5 file in ARCHIVE directory. continue else: tarballs.append((size, controller, tb)) except SigTermException: # Re-raise a SIGTERM to avoid it being lumped in with general # exception handling below. raise except Exception: idxctx.logger.exception("Unexpected error encountered generating list" " of tar balls to process") return 12 else: if not tarballs: idxctx.logger.info("No tar balls found that need processing") return 0 # We always process the smallest tar balls first. tarballs = sorted(tarballs) # At this point, tarballs contains a list of tar balls sorted by size # that were available as symlinks in the various 'linksrc' directories. idxctx.logger.debug("Preparing to index {:d} tar balls", len(tarballs)) try: # Now that we are ready to begin the actual indexing step, ensure we # have the proper index templates in place. idxctx.logger.debug("update_templates [start]") idxctx.templates.update_templates(idxctx.es) except TemplateError as e: idxctx.logger.error("update_templates [end], error {}", repr(e)) res = 9 except SigTermException: # Re-raise a SIGTERM to avoid it being lumped in with general # exception handling below. raise except Exception: idxctx.logger.exception("update_templates [end]: Unexpected template" " processing error") res = 12 else: idxctx.logger.debug("update_templates [end]") res = 0 if res != 0: # Exit early if we encounter any errors. return res report = Report( idxctx.config, name, es=idxctx.es, pid=idxctx.getpid(), group_id=idxctx.getgid(), user_id=idxctx.getuid(), hostname=idxctx.gethostname(), version=VERSION, templates=idxctx.templates, ) # We use the "start" report ID as the tracking ID for all indexed # documents. try: tracking_id = report.post_status(tstos(idxctx.time()), "start") except SigTermException: # Re-raise a SIGTERM to avoid it being lumped in with general # exception handling below. raise except Exception: idxctx.logger.error("Failed to post initial report status") return 12 else: idxctx.set_tracking_id(tracking_id) with tempfile.TemporaryDirectory(prefix=f"{name}.", dir=idxctx.config.TMP) as tmpdir: idxctx.logger.debug("start processing list of tar balls") tb_list = Path(tmpdir, f"{name}.{idxctx.TS}.list") try: with tb_list.open(mode="w") as lfp: # Write out all the tar balls we are processing so external # viewers can follow along from home. for size, controller, tb in tarballs: print(f"{size:20d} {controller} {tb}", file=lfp) indexed = Path(tmpdir, f"{name}.{idxctx.TS}.indexed") erred = Path(tmpdir, f"{name}.{idxctx.TS}.erred") skipped = Path(tmpdir, f"{name}.{idxctx.TS}.skipped") ie_filepath = Path(tmpdir, f"{name}.{idxctx.TS}.indexing-errors.json") # We use a list object here so that when we close over this # variable in the handler, the list object will be closed over, # but not its contents. sigquit_interrupt = [False] def sigquit_handler(*args): sigquit_interrupt[0] = True signal.signal(signal.SIGQUIT, sigquit_handler) for size, controller, tb in tarballs: # Sanity check source tar ball path linksrc_dir = Path(tb).parent linksrc_dirname = linksrc_dir.name assert linksrc_dirname == linksrc, ( f"Logic bomb! tar ball " f"path {tb} does not contain {linksrc}") idxctx.logger.info("Starting {} (size {:d})", tb, size) ptb = None try: # "Open" the tar ball represented by the tar ball object idxctx.logger.debug("open tar ball") ptb = PbenchTarBall( idxctx, os.path.realpath(tb), tmpdir, Path(INCOMING_rp, controller), ) # Construct the generator for emitting all actions. The # `idxctx` dictionary is passed along to each generator so # that it can add its context for error handling to the # list. idxctx.logger.debug("generator setup") if options.index_tool_data: actions = ptb.mk_tool_data_actions() else: actions = ptb.make_all_actions() # File name for containing all indexing errors that # can't/won't be retried. with ie_filepath.open(mode="w") as fp: idxctx.logger.debug("begin indexing") try: signal.signal(signal.SIGINT, sigint_handler) es_res = es_index(idxctx.es, actions, fp, idxctx.logger, idxctx._dbg) except SigIntException: idxctx.logger.exception( "Indexing interrupted by SIGINT, continuing to next tarball" ) continue finally: # Turn off the SIGINT handler when not indexing. signal.signal(signal.SIGINT, signal.SIG_IGN) except UnsupportedTarballFormat as e: idxctx.logger.warning("Unsupported tar ball format: {}", e) tb_res = 4 except BadDate as e: idxctx.logger.warning("Bad Date: {!r}", e) tb_res = 5 except _filenotfounderror as e: idxctx.logger.warning("No such file: {}", e) tb_res = 6 except BadMDLogFormat as e: idxctx.logger.warning( "The metadata.log file is curdled in" " tar ball: {}", e) tb_res = 7 except SosreportHostname as e: idxctx.logger.warning("Bad hostname in sosreport: {}", e) tb_res = 10 except tarfile.TarError as e: idxctx.logger.error("Can't unpack tar ball into {}: {}", ptb.extracted_root, e) tb_res = 11 except SigTermException: idxctx.logger.exception( "Indexing interrupted by SIGTERM, terminating") break except Exception as e: idxctx.logger.exception("Other indexing error: {}", e) tb_res = 12 else: beg, end, successes, duplicates, failures, retries = es_res idxctx.logger.info( "done indexing (start ts: {}, end ts: {}, duration:" " {:.2f}s, successes: {:d}, duplicates: {:d}," " failures: {:d}, retries: {:d})", tstos(beg), tstos(end), end - beg, successes, duplicates, failures, retries, ) tb_res = 1 if failures > 0 else 0 try: ie_len = ie_filepath.stat().st_size except _filenotfounderror: # Above operation never made it to actual indexing, ignore. pass except SigTermException: # Re-raise a SIGTERM to avoid it being lumped in with # general exception handling below. raise except Exception: idxctx.logger.exception( "Unexpected error handling" " indexing errors file: {}", ie_filepath, ) else: # Success fetching indexing error file size. if ie_len > len(tb) + 1: try: report.post_status(tstos(end), "errors", ie_filepath) except Exception: idxctx.logger.exception( "Unexpected error issuing" " report status with errors: {}", ie_filepath, ) finally: # Unconditionally remove the indexing errors file. try: os.remove(ie_filepath) except SigTermException: # Re-raise a SIGTERM to avoid it being lumped in with # general exception handling below. raise except Exception: pass # Distinguish failure cases, so we can retry the indexing # easily if possible. Different `linkerrdest` directories for # different failures; the rest are going to end up in # `linkerrdest` for later retry. controller_path = linksrc_dir.parent if tb_res == 0: idxctx.logger.info( "{}: {}/{}: success", idxctx.TS, controller_path.name, os.path.basename(tb), ) # Success with indexed.open(mode="a") as fp: print(tb, file=fp) rename_tb_link(tb, Path(controller_path, linkdest), idxctx.logger) elif tb_res == 1: idxctx.logger.warning( "{}: index failures encountered on {}", idxctx.TS, tb) with erred.open(mode="a") as fp: print(tb, file=fp) rename_tb_link( tb, Path(controller_path, f"{linkerrdest}.1"), idxctx.logger, ) elif tb_res in (2, 3): assert False, ( f"Logic Bomb! Unexpected tar ball handling " f"result status {tb_res:d} for tar ball {tb}") elif tb_res >= 4 or res <= 11: # # Quietly skip these errors with skipped.open(mode="a") as fp: print(tb, file=fp) rename_tb_link( tb, Path(controller_path, f"{linkerrdest}.{tb_res:d}"), idxctx.logger, ) else: idxctx.logger.error( "{}: index error {:d} encountered on {}", idxctx.TS, tb_res, tb) with erred.open(mode="a") as fp: print(tb, file=fp) rename_tb_link(tb, Path(controller_path, linkerrdest), idxctx.logger) idxctx.logger.info( "Finished{} {} (size {:d})", "[SIGQUIT]" if sigquit_interrupt[0] else "", tb, size, ) if sigquit_interrupt[0]: break except SigTermException: # Re-raise a SIGTERM to avoid it being lumped in with general # exception handling below. raise except Exception: idxctx.logger.exception("Unexpected setup error") res = 12 else: # No exceptions while processing tar ball, success. res = 0 finally: if idxctx: idxctx.dump_opctx() idxctx.logger.debug("stopped processing list of tar balls") idx = _count_lines(indexed) skp = _count_lines(skipped) err = _count_lines(erred) idxctx.logger.info( "{}.{}: indexed {:d} (skipped {:d}) results," " {:d} errors", name, idxctx.TS, idx, skp, err, ) if err > 0: if skp > 0: subj = ( f"{name}.{idxctx.TS} - Indexed {idx:d} results, skipped {skp:d}" f" results, w/ {err:d} errors") else: subj = ( f"{name}.{idxctx.TS} - Indexed {idx:d} results, w/ {err:d}" " errors") else: if skp > 0: subj = f"{name}.{idxctx.TS} - Indexed {idx:d} results, skipped {skp:d} results" else: subj = f"{name}.{idxctx.TS} - Indexed {idx:d} results" report_fname = Path(tmpdir, f"{name}.{idxctx.TS}.report") with report_fname.open(mode="w") as fp: print(subj, file=fp) if idx > 0: print("\nIndexed Results\n===============", file=fp) with indexed.open() as ifp: for line in sorted(ifp): print(line.strip(), file=fp) if err > 0: print( "\nResults producing errors" "\n========================", file=fp, ) with erred.open() as efp: for line in sorted(efp): print(line.strip(), file=fp) if skp > 0: print("\nSkipped Results\n===============", file=fp) with skipped.open() as sfp: for line in sorted(sfp): print(line.strip(), file=fp) try: report.post_status(tstos(idxctx.time()), "status", report_fname) except SigTermException: # Re-raise a SIGTERM to avoid it being lumped in with general # exception handling below. raise except Exception: pass return res