def _get_warnings_text_and_table(self): """ Return a :py:class:`Table <lab.reports.Table>` containing one line for each run where an unexplained error occured. """ if not self.ERROR_ATTRIBUTES: logging.critical("The list of error attributes must not be empty.") table = reports.Table(title="Unexplained errors") table.set_column_order(self.ERROR_ATTRIBUTES) wrote_to_slurm_err = any( "output-to-slurm.err" in run.get("unexplained_errors", []) for run in self.runs.values()) for run in self.runs.values(): error_message = tools.get_unexplained_errors_message(run) if error_message: logging.error(error_message) run_dir = run["run_dir"] for attr in self.ERROR_ATTRIBUTES: value = run.get(attr, "?") if attr == "unexplained_errors": value = self._format_unexplained_errors(value) # Use formatted value as-is. table.cell_formatters[run_dir][ attr] = reports.CellFormatter() table.add_cell(run_dir, attr, value) errors = [] if wrote_to_slurm_err: src_dir = self.eval_dir.rstrip("/")[:-len("-eval")] slurm_err_file = src_dir + "-grid-steps/slurm.err" try: slurm_err_content = tools.get_slurm_err_content(src_dir) except FileNotFoundError: slurm_err_file = "*-grid-steps/slurm.err" errors.append( f"There was output to {slurm_err_file}, but the file was missing " f"when this report was made.") else: slurm_err_content = tools.filter_slurm_err_content( slurm_err_content) errors.append( f"There was output to {slurm_err_file}. Below is the output without" f'"memory cg" errors:\n```\n{slurm_err_content}\n```') logging.error(f"There was output to {slurm_err_file}.") if table: errors.append(str(table)) infai_1_nodes = {f"ase{i:02d}.cluster.bc2.ch" for i in range(1, 25)} infai_2_nodes = {f"ase{i:02d}.cluster.bc2.ch" for i in range(31, 55)} nodes = self._get_node_names() if nodes & infai_1_nodes and nodes & infai_2_nodes: errors.append( "Report combines runs from infai_1 and infai_2 partitions.") return "\n".join(errors)
def _scan_planning_data(self): problems = set() self.domains = defaultdict(list) self.problem_runs = defaultdict(list) self.domain_algorithm_runs = defaultdict(list) self.runs = {} for run in self.props.values(): domain, problem, algo = run["domain"], run["problem"], run[ "algorithm"] problems.add((domain, problem)) self.problem_runs[(domain, problem)].append(run) self.domain_algorithm_runs[(domain, algo)].append(run) self.runs[(domain, problem, algo)] = run for domain, problem in problems: self.domains[domain].append(problem) self.algorithms = self._get_algorithm_order() num_unexplained_errors = sum( int(bool(tools.get_unexplained_errors_message(run))) for run in self.runs.values()) func = logging.info if num_unexplained_errors == 0 else logging.error func("Report contains {num_unexplained_errors} runs with unexplained" " errors.".format(**locals())) if len(problems) * len(self.algorithms) != len(self.runs): logging.warning( "Not every algorithm has been run on every task. " "However, if you applied a filter this is to be " "expected. If not, there might be old properties in the " "eval-dir that got included in the report. " "Algorithms (%d): %s, problems (%d), domains (%d): %s, runs (%d)" % ( len(self.algorithms), self.algorithms, len(problems), len(self.domains), list(self.domains.keys()), len(self.runs), )) # Sort each entry in problem_runs by algorithm. algo_to_index = { algorithm: index for index, algorithm in enumerate(self.algorithms) } def run_key(run): return algo_to_index[run["algorithm"]] for problem_runs in self.problem_runs.values(): problem_runs.sort(key=run_key) self.algorithm_info = self._scan_algorithm_info()
def __call__(self, src_dir, eval_dir=None, merge=None, filter=None, **kwargs): """ This method can be used to copy properties from an exp-dir or eval-dir into an eval-dir. If the destination eval-dir already exist, the data will be merged. This means *src_dir* can either be an exp-dir or an eval-dir and *eval_dir* can be a new or existing directory. We recommend using lab.Experiment.add_fetcher() to add fetchers to an experiment. See the method's documentation for a description of the parameters. """ if not os.path.isdir(src_dir): logging.critical( "{} is missing or not a directory".format(src_dir)) run_filter = tools.RunFilter(filter, **kwargs) eval_dir = eval_dir or src_dir.rstrip("/") + "-eval" logging.info("Fetching properties from {} to {}".format( src_dir, eval_dir)) if merge is None: _check_eval_dir(eval_dir) elif merge: # No action needed, data will be merged. pass else: tools.remove_path(eval_dir) # Load properties in the eval_dir if there are any already. combined_props = tools.Properties(os.path.join(eval_dir, "properties")) fetch_from_eval_dir = not os.path.exists( os.path.join(src_dir, "runs-00001-00100")) if fetch_from_eval_dir: src_props = tools.Properties( filename=os.path.join(src_dir, "properties")) run_filter.apply(src_props) combined_props.update(src_props) logging.info("Fetched properties of {} runs.".format( len(src_props))) else: slurm_err_content = tools.get_slurm_err_content(src_dir) if slurm_err_content: logging.error("There was output to *-grid-steps/slurm.err") new_props = tools.Properties() run_dirs = sorted(glob(os.path.join(src_dir, "runs-*-*", "*"))) total_dirs = len(run_dirs) logging.info( "Scanning properties from {:d} run directories".format( total_dirs)) for index, run_dir in enumerate(run_dirs, start=1): loglevel = logging.INFO if index % 100 == 0 else logging.DEBUG logging.log(loglevel, "Scanning: {:6d}/{:d}".format(index, total_dirs)) props = self.fetch_dir(run_dir) if slurm_err_content: props.add_unexplained_error("output-to-slurm.err") id_string = "-".join(props["id"]) new_props[id_string] = props run_filter.apply(new_props) combined_props.update(new_props) unexplained_errors = 0 for props in combined_props.values(): error_message = tools.get_unexplained_errors_message(props) if error_message: logging.error(error_message) unexplained_errors += 1 tools.makedirs(eval_dir) combined_props.write() logging.info("Wrote properties file (contains {unexplained_errors} " "runs with unexplained errors).".format(**locals()))
def _get_warnings_text_and_table(self): """ Return a :py:class:`Table <lab.reports.Table>` containing one line for each run where an unexplained error occured. """ if not self.ERROR_ATTRIBUTES: logging.critical('The list of error attributes must not be empty.') table = reports.Table(title='Unexplained errors') table.set_column_order(self.ERROR_ATTRIBUTES) wrote_to_slurm_err = any( 'output-to-slurm.err' in run.get('unexplained_errors', []) for run in self.runs.values()) num_unexplained_errors = 0 for run in self.runs.values(): error_message = tools.get_unexplained_errors_message(run) if error_message: logging.error(error_message) num_unexplained_errors += 1 for attr in self.ERROR_ATTRIBUTES: table.add_cell(run['run_dir'], attr, run.get(attr, '?')) if num_unexplained_errors: logging.error( 'There were {num_unexplained_errors} runs with unexplained' ' errors.'.format(**locals())) errors = [] if wrote_to_slurm_err: src_dir = self.eval_dir.rstrip('/')[:-len('-eval')] slurm_err_file = src_dir + '-grid-steps/slurm.err' try: slurm_err_content = tools.get_slurm_err_content(src_dir) except IOError: slurm_err_content = ( 'The slurm.err file was missing while creating the report.' ) else: slurm_err_content = tools.filter_slurm_err_content( slurm_err_content) logging.error( 'There was output to {slurm_err_file}.'.format(**locals())) errors.append( ' Contents of {slurm_err_file} without "memory cg"' ' errors:\n```\n{slurm_err_content}\n```'.format(**locals())) if table: errors.append(str(table)) infai_1_nodes = set('ase{:02d}.cluster.bc2.ch'.format(i) for i in range(1, 25)) infai_2_nodes = set('ase{:02d}.cluster.bc2.ch'.format(i) for i in range(31, 55)) nodes = self._get_node_names() if nodes & infai_1_nodes and nodes & infai_2_nodes: errors.append( 'Report combines runs from infai_1 and infai_2 partitions.') return '\n'.join(errors)