def test_print_to_buffer() -> None: text_buffer = StringIO() json_buffer = StringIO() structlog.configure( processors=[], logger_factory=structlog_overtime.TeeLoggerFactory( structlog_overtime.TeeOutput( processors=[structlog.dev.ConsoleRenderer(colors=False)], logger_factory=structlog.PrintLoggerFactory(text_buffer), ), structlog_overtime.TeeOutput( processors=[structlog.processors.JSONRenderer()], logger_factory=structlog.PrintLoggerFactory(json_buffer), ), ), ) structlog.get_logger().info("hello world", foo="bar") compare(text_buffer.getvalue(), expected="hello world foo=bar\n") compare( json_buffer.getvalue(), expected="""{"foo": "bar", "event": "hello world"}\n""" ) structlog.configure( processors=[], logger_factory=structlog_overtime.TeeLoggerFactory( structlog_overtime.TeeOutput( processors=[structlog.dev.ConsoleRenderer(colors=False)], logger_factory=structlog.PrintLoggerFactory(text_buffer), ), structlog_overtime.TeeOutput( processors=[structlog.processors.JSONRenderer()], logger_factory=structlog.PrintLoggerFactory(json_buffer), ), ), )
def process_common_arguments(args): '''return True if should stop''' processors = [ slogconf.exc_info, ] logger_factory = None if args.logger in ['pretty', 'both']: slogconf.fix_logging() nicelogger.enable_pretty_logging(getattr(logging, args.logging.upper())) processors.append(slogconf.stdlib_renderer) if args.logger == 'pretty': logger_factory = structlog.PrintLoggerFactory(file=open( os.devnull, 'w'), ) processors.append(slogconf.null_renderer) if args.logger in ['json', 'both']: processors.extend([ structlog.processors.format_exc_info, slogconf.json_renderer, ]) if logger_factory is None: logfile = args.json_log_fd or sys.stdout logger_factory = structlog.PrintLoggerFactory(file=logfile) structlog.configure( processors=processors, logger_factory=logger_factory, ) if args.version: progname = os.path.basename(sys.argv[0]) print('%s v%s' % (progname, __version__)) return True
def build_structlog_configuration(log_level: str, log_format: str, log_stream: TextIO) -> dict: log_level = _cook_log_level(log_level) # A bit of struclog architecture: # - lazy proxy: component obtained through `structlog.get_logger()`, lazyness # is needed given it is imported very early on (later it bind operation # will initialize the logger wrapper) # - logger wrapper: component responsible for all the cooking (e.g. calling processors) # - logger: actual component that will spit out the log to stdout/file etc. return { "processors": [ structlog.stdlib.add_log_level, _add_timestamp, # Set `exc_info=True` if method name is `exception` and `exc_info` not set structlog.dev.set_exc_info, # Given sentry need the whole event context as a dictionary, # this processor must be kept just before we start formatting _structlog_to_sentry_processor, # Finally flatten everything into a printable string _format_timestamp, structlog.processors.format_exc_info, _build_formatter_renderer(log_format), ], "wrapper_class": structlog.make_filtering_bound_logger(log_level), "logger_factory": structlog.PrintLoggerFactory(file=log_stream), "cache_logger_on_first_use": True, }
def run(): args = ap.parse_args() if args.ls: print('\n'.join(parser.listPluginNames())) sys.exit(0) if args.verbose: structlog.configure( logger_factory=structlog.PrintLoggerFactory(sys.stderr)) infile = BytesIO(sys.stdin.read()) try: parsed = parse(infile, exclude=args.exclude) except NoWillingParsers: if args.strict: raise else: infile.seek(0) sys.stdout.write(infile.read()) sys.exit(0) if args.format == 'yaml': print(yaml.safe_dump(parsed, default_flow_style=False)) elif args.format == 'json': print(json.dumps(parsed)) elif args.format == 'grep': from ppo.output import giganticGrep giganticGrep(parsed, sys.stdout)
def configure_structlog(): """Set up structlog with one of two predefined config schemes. The config scheme is selected based on ``.settings.debug``. """ processors = [ drop_debug_logs, structlog.stdlib.add_log_level, structlog.stdlib.PositionalArgumentsFormatter(), unix_timestamper, structlog_pretty.NumericRounder(), structlog.processors.format_exc_info, structlog.processors.UnicodeDecoder(), structlog.processors.JSONRenderer(), ] debug_processors = [ structlog.stdlib.add_log_level, structlog.stdlib.PositionalArgumentsFormatter(), structlog_pretty.NumericRounder(), structlog.processors.TimeStamper('iso'), structlog.processors.ExceptionPrettyPrinter(), structlog.processors.UnicodeDecoder(), structlog.dev.ConsoleRenderer(pad_event=25), ] structlog.configure( processors=debug_processors if settings.debug else processors, logger_factory=structlog.PrintLoggerFactory(), wrapper_class=structlog.stdlib.BoundLogger, )
def configure_logger() -> IO: if config.CONFIG.foreground or not config.CONFIG.pidfile or config.CONFIG.log_file == '-': processors = ( structlog.dev.ConsoleRenderer(), ) log_handler = sys.stdout else: processors = ( structlog.processors.JSONRenderer(), ) log_handler = open(config.CONFIG.log_file, 'a') structlog.configure( processors=[ structlog.processors.add_log_level, structlog.processors.StackInfoRenderer(), structlog.dev.set_exc_info, structlog.processors.format_exc_info, structlog.processors.TimeStamper("ISO"), *processors, ], wrapper_class=structlog.BoundLogger, context_class=dict, logger_factory=structlog.PrintLoggerFactory(file=log_handler), cache_logger_on_first_use=True, ) return log_handler
def configure_logging(log_format, utc, endpoint): processors = [ TimeStamper( key='@timestamp', utc=utc, ), ] if endpoint.startswith('file://'): path = endpoint[7:] if path == '/dev/stdout': stream = sys.stdout elif path == '/dev/stderr': stream = sys.stderr else: stream = open(path, 'w') logger_factory = structlog.PrintLoggerFactory(file=stream) if log_format == 'kv': processors.append(structlog.processors.KeyValueRenderer( sort_keys=True, key_order=['@timestamp', 'event'], )) else: processors.append(structlog.processors.JSONRenderer( sort_keys=True, )) elif endpoint.startswith('fluent://'): utc = True logger_factory = FluentLoggerFactory.from_url(endpoint) else: raise ValueError('Invalid logging endpoint "%s".' % endpoint) structlog.configure( processors=processors, logger_factory=logger_factory, )
def configure_structlog(): json_processors = [ drop_debug_logs, structlog.stdlib.add_log_level, structlog.stdlib.PositionalArgumentsFormatter(), structlog.processors.TimeStamper("iso"), structlog.processors.format_exc_info, structlog.processors.UnicodeDecoder(), structlog.processors.JSONRenderer(), ] pretty_print_processors = [ drop_debug_logs, structlog.stdlib.add_log_level, structlog.stdlib.PositionalArgumentsFormatter(), structlog.processors.ExceptionPrettyPrinter(), structlog.processors.TimeStamper("iso"), structlog.processors.UnicodeDecoder(), structlog.dev.ConsoleRenderer(pad_event=0), ] structlog.configure( processors=json_processors if json_logs else pretty_print_processors, logger_factory=structlog.PrintLoggerFactory(), wrapper_class=structlog.stdlib.BoundLogger, context_class=structlog.threadlocal.wrap_dict(dict), )
def configure_structlog(debug: bool): """Configure proper log processors and settings for structlog with regards to debug setting.""" processors = DEBUG_PROCESSORS if debug else PRODUCTION_PROCESSORS structlog.configure_once( processors=processors, logger_factory=structlog.PrintLoggerFactory(), wrapper_class=structlog.stdlib.BoundLogger, context_class=structlog.threadlocal.wrap_dict(dict), )
def configure(profile, profile_read_only, docker_url): epicbox.configure(profiles=[profile, profile_read_only], docker_url=docker_url) structlog.configure( processors=[ structlog.processors.TimeStamper(fmt='iso'), structlog.processors.KeyValueRenderer(key_order=['event']), ], logger_factory=structlog.PrintLoggerFactory(), )
def reporting(outdir: Union[str, Path], ) -> None: """ Produce the LaTeX reports, and final pass/fail summary. """ outdir = Path(outdir) log_fname = outdir.joinpath(DirectoryNames.LOGS.value, LogNames.REPORTING.value) if not log_fname.parent.exists(): log_fname.parent.mkdir(parents=True) with open(log_fname, "w") as fobj: structlog.configure(logger_factory=structlog.PrintLoggerFactory(fobj), processors=LOG_PROCESSORS) comparison_results_fname = outdir.joinpath( DirectoryNames.RESULTS.value, FileNames.RESULTS.value) _LOG.info("opening intercomparison results file", fname=str(comparison_results_fname)) with h5py.File(str(comparison_results_fname), "r") as fid: # read intercomparison general measurements summary dataset_name = PPath(DatasetGroups.SUMMARY.value, DatasetNames.GENERAL_SUMMARISED.value) _LOG.info("reading dataset", dataset_name=str(dataset_name)) dataframe = read_h5_table(fid, str(dataset_name)) n_datasets = fid[DatasetNames.QUERY.value].attrs["nrows"] # read and convert metadata tables _extract_proc_info_results(fid, outdir) _LOG.info( "creating CSV's of the general measurements intercomparison summary" ) create_general_csvs(dataframe, outdir.joinpath(DirectoryNames.RESULTS.value)) results_fname = outdir.joinpath(DirectoryNames.RESULTS.value, FileNames.GENERAL_FRAMING.value) _LOG.info("opening geometry framing general results file", fname=str(results_fname)) gdf = geopandas.read_file(results_fname) reports_outdir = outdir.joinpath(DirectoryNames.REPORT.value) _LOG.info("producing LaTeX documents of general results") latex_documents(gdf, dataframe, reports_outdir, n_datasets) # TODO GQA and ancillary _LOG.info("finished writing the LaTeX documents")
def cli(): # See https://github.com/tqdm/tqdm/issues/313 hostname = socket.gethostname() proc_id = os.getpid() def add_proc_info(_, logger, event_dict): event_dict["hostname"] = hostname event_dict["pid"] = proc_id return event_dict try: from mpi4py import MPI mpi_rank = MPI.COMM_WORLD.rank # Rank of this process mpi_size = MPI.COMM_WORLD.size # Rank of this process def add_mpi_rank(_, logger, event_dict): if mpi_size > 1: event_dict['mpi_rank'] = mpi_rank return event_dict except ImportError: def add_mpi_rank(_, __, event_dict): return event_dict def tqdm_logger_factory(): return TQDMLogger() class TQDMLogger: def msg(self, message): tqdm.write(message) log = debug = info = warm = warning = msg fatal = failure = err = error = critical = exception = msg running_interactively = sys.stdout.isatty() and os.environ.get( 'PBS_NCPUS', None) is None structlog.configure( processors=[ structlog.stdlib.add_log_level, add_proc_info, add_mpi_rank, structlog.stdlib.PositionalArgumentsFormatter(), structlog.processors.TimeStamper(fmt="iso"), structlog.processors.StackInfoRenderer(), structlog.processors.format_exc_info, structlog.processors.UnicodeDecoder(), structlog.dev.ConsoleRenderer() if running_interactively else structlog.processors.JSONRenderer(), ], context_class=dict, logger_factory=tqdm_logger_factory if running_interactively else structlog.PrintLoggerFactory(), cache_logger_on_first_use=True, )
def init_logging(output_file=None, verbosity: int = 0, cache_logger_on_first_use=True): """ Setup structlog for structured logging output. This defaults to stdout as it's the parseable json output of the program. Libraries with "unstructured" logs (such as datacube core logging) go to stderr. """ if output_file is None: output_file = sys.stdout # Note that we can't use functools.partial: it JSONRendering will pass its # own 'default' property that overrides our own. def lenient_json_dump(obj, *args, **kwargs): return rapidjson.dumps( obj, datetime_mode=rapidjson.DM_ISO8601, uuid_mode=rapidjson.UM_CANONICAL, number_mode=rapidjson.NM_NATIVE, sort_keys=True, default=lenient_json_fallback, ) # Direct structlog into standard logging. processors = [ structlog.stdlib.add_log_level, structlog.processors.TimeStamper(fmt="ISO"), structlog.processors.StackInfoRenderer(), structlog.processors.format_exc_info, # Coloured output if to terminal, otherwise json BetterConsoleRenderer() if output_file.isatty() else structlog.processors.JSONRenderer(serializer=lenient_json_dump), ] hide_logging_levels = { # Default: show only warnings/critical 0: ("info", "debug"), # One '-v': Show info logging too. 1: ("debug",), # Any more '-v's, show everything. 2: (), }.get(verbosity, ()) if hide_logging_levels: processors.insert(0, partial(_filter_levels, hide_levels=hide_logging_levels)) structlog.configure( processors=processors, context_class=dict, cache_logger_on_first_use=cache_logger_on_first_use, logger_factory=structlog.PrintLoggerFactory(file=output_file), )
def init_logging(verbose, main_log_file=None, cmd_log_file=None): multi_renderer = MultiRenderer( journal=SystemdJournalRenderer("fc-agent", syslog.LOG_LOCAL1), cmd_output_file=CmdOutputFileRenderer(), text=ConsoleFileRenderer(min_level="debug" if verbose else "info", show_caller_info=verbose), ) processors = [ add_pid, structlog.processors.add_log_level, process_exc_info, format_exc_info, structlog.processors.StackInfoRenderer(), structlog.processors.TimeStamper(fmt="iso", utc=False), add_caller_info, multi_renderer, ] loggers = {} if cmd_log_file: loggers["cmd_output_file"] = structlog.PrintLoggerFactory(cmd_log_file) if main_log_file: loggers["file"] = structlog.PrintLoggerFactory(main_log_file) if journal: loggers["journal"] = JournalLoggerFactory() # If the journal module is available and stdout is connected to journal, we # shouldn't log to console because output would be duplicated in the journal. if journal and not os.environ.get("JOURNAL_STREAM"): loggers["console"] = structlog.PrintLoggerFactory() structlog.configure( processors=processors, wrapper_class=structlog.BoundLogger, logger_factory=MultiOptimisticLoggerFactory(**loggers), )
def query( outdir, product_name_test, product_name_reference, db_env_test, db_env_reference, time, lon, lat, additional_filters, ): """ Database querying of test and reference products. """ outdir = Path(outdir) log_fname = outdir.joinpath(DirectoryNames.LOGS.value, LogNames.QUERY.value) if not log_fname.parent.exists(): log_fname.parent.mkdir(parents=True) with open(log_fname, "w") as fobj: structlog.configure(logger_factory=structlog.PrintLoggerFactory(fobj), processors=LOG_PROCESSORS) results = query_products( product_name_test, product_name_reference, db_env_test, db_env_reference, time, lon, lat, additional_filters, ) results_fname = outdir.joinpath(DirectoryNames.RESULTS.value, FileNames.RESULTS.value) dataset_name = DatasetNames.QUERY.value _LOG.info( "saving results of query", out_fname=str(results_fname), dataset_name=dataset_name, ) with h5py.File(str(results_fname), "w") as fid: write_dataframe(results, dataset_name, fid)
def _configure_logger(): processors = [ structlog.stdlib.add_log_level, structlog.processors.TimeStamper(fmt="%Y-%m-%d %H:%M:%S"), structlog.processors.StackInfoRenderer(), structlog.processors.format_exc_info, structlog.dev.ConsoleRenderer(), ] structlog.configure( processors=processors, context_class=dict, cache_logger_on_first_use=True, logger_factory=structlog.PrintLoggerFactory(), )
def configure_logger(): structlog.configure( processors=[ structlog.processors.StackInfoRenderer(), structlog.dev.set_exc_info, structlog.processors.format_exc_info, structlog.processors.TimeStamper("ISO"), structlog.dev.ConsoleRenderer(), ], wrapper_class=structlog.BoundLogger, context_class=dict, logger_factory=structlog.PrintLoggerFactory(), cache_logger_on_first_use=False, ) log = structlog.get_logger() return log
def configure_logger(debug_mode: bool = False) -> NoReturn: structlog.configure( processors=[ structlog.processors.add_log_level, structlog.processors.StackInfoRenderer(), structlog.dev.set_exc_info, structlog.processors.format_exc_info, structlog.processors.TimeStamper("ISO"), structlog.dev.ConsoleRenderer(), ], wrapper_class=structlog.make_filtering_bound_logger( logging.DEBUG if debug_mode else logging.WARNING), context_class=dict, logger_factory=structlog.PrintLoggerFactory(), cache_logger_on_first_use=True, )
def configure(docker_url): epicbox.configure( profiles=[ epicbox.Profile('python3', 'stepic/epicbox-python', user='******', read_only=True), epicbox.Profile('gcc', 'stepik/epicbox-gcc:5.3.0'), ], docker_url=docker_url, ) structlog.configure( processors=[ structlog.processors.TimeStamper(fmt='iso'), structlog.processors.KeyValueRenderer(key_order=['event']), ], logger_factory=structlog.PrintLoggerFactory(), )
def test_can_be_set_as_processor(time, spec_validator): time.return_value = 1584720997.187709 stream = StringIO() structlog.configure( processors=[ecs_logging.StructlogFormatter()], wrapper_class=structlog.BoundLogger, context_class=dict, logger_factory=structlog.PrintLoggerFactory(stream), ) logger = structlog.get_logger("logger-name") logger.debug("test message", custom="key", **{"dot.ted": 1}) assert spec_validator(stream.getvalue()) == ( '{"@timestamp":"2020-03-20T16:16:37.187Z","log.level":"debug",' '"message":"test message","custom":"key","dot":{"ted":1},' '"ecs":{"version":"1.6.0"}}\n')
def query_filesystem( outdir, product_pathname_test, product_pathname_reference, glob_pattern_test, glob_pattern_reference, ): """ Filesystem querying of test and reference products. """ outdir = Path(outdir) log_fname = outdir.joinpath(DirectoryNames.LOGS.value, LogNames.QUERY.value) if not log_fname.parent.exists(): log_fname.parent.mkdir(parents=True) with open(log_fname, "w") as fobj: structlog.configure(logger_factory=structlog.PrintLoggerFactory(fobj)) results = query_via_filepath( product_pathname_test, product_pathname_reference, glob_pattern_test, glob_pattern_reference, ) results_fname = outdir.joinpath(DirectoryNames.RESULTS.value, FileNames.RESULTS.value) dataset_name = DatasetNames.QUERY.value _LOG.info( "saving results of query", out_fname=str(results_fname), dataset_name=dataset_name, ) if not results_fname.parent.exists(): results_fname.parent.mkdir(parents=True) with h5py.File(str(results_fname), "w") as fid: write_dataframe(results, dataset_name, fid)
def init_logging(output_file=None, verbose=False): """ Setup structlog for structured logging output. This defaults to stdout as it's the parseable json output of the program. Libraries with "unstructured" logs (such as datacube core logging) go to stderr. """ if output_file is None: output_file = sys.stdout # Note that we can't use functools.partial: it JSONRendering will pass its # own 'default' property that overrides our own. def lenient_json_dump(obj, *args, **kwargs): return rapidjson.dumps( obj, datetime_mode=rapidjson.DM_ISO8601, uuid_mode=rapidjson.UM_CANONICAL, number_mode=rapidjson.NM_NATIVE, sort_keys=True, default=lenient_json_fallback, ) # Direct structlog into standard logging. processors = [ structlog.stdlib.add_log_level, structlog.processors.TimeStamper(fmt="ISO"), structlog.processors.StackInfoRenderer(), structlog.processors.format_exc_info, # Coloured output if to terminal, otherwise json structlog.dev.ConsoleRenderer() if output_file.isatty() else structlog.processors.JSONRenderer(serializer=lenient_json_dump), ] if not verbose: processors.insert(0, _filter_informational) structlog.configure( processors=processors, context_class=dict, cache_logger_on_first_use=True, logger_factory=structlog.PrintLoggerFactory(file=output_file), )
def configure_logger(log_handler: IO) -> NoReturn: if log_handler.name == '<stdout>': processors = (structlog.dev.ConsoleRenderer(), ) else: processors = (structlog.processors.JSONRenderer(), ) structlog.configure( processors=[ structlog.processors.add_log_level, structlog.processors.StackInfoRenderer(), structlog.dev.set_exc_info, structlog.processors.format_exc_info, structlog.processors.TimeStamper("ISO"), *processors, ], wrapper_class=structlog.BoundLogger, context_class=dict, logger_factory=structlog.PrintLoggerFactory(file=log_handler), cache_logger_on_first_use=True, )
def setup_logging(log_level: str = "INFO") -> None: """Sets logging up""" processors = [ structlog.stdlib.add_log_level, level_filter(log_level), structlog.stdlib.PositionalArgumentsFormatter(), structlog.processors.TimeStamper(fmt="iso"), structlog.processors.StackInfoRenderer(), structlog.processors.format_exc_info, PyrusRenderer(), ] structlog.configure( processors=processors, context_class=dict, logger_factory=structlog.PrintLoggerFactory(file=sys.stderr), wrapper_class=structlog.stdlib.BoundLogger, cache_logger_on_first_use=True, ) root = logging.getLogger() root.setLevel(log_level) root.addHandler(StructlogHandler()) logging.getLogger("asyncio").setLevel(logging.CRITICAL)
backend.move(id=cell.fetch('.id'), destination=block_path) return class ActionCodes(Enum): preview = auto( ), # Choose cells which would go into a string but don't assemble it assemble = auto( ) # Assemble a new string moving the chosen cells from the pool to individual blocks if __name__ == "__main__": structlog.configure( wrapper_class=structlog.make_filtering_bound_logger(logging.INFO), logger_factory=structlog.PrintLoggerFactory(file=sys.stderr)) load_plugins() parser = argparse.ArgumentParser( description= 'Select cells to build a string of blocks connected in series. Monte-Carlo optimization.' ) parser.add_argument('--loglevel', choices=LOG_LEVEL_NAMES, default='INFO', help='Change log level') add_plugin_args(parser) add_backend_selection_args(parser) add_cell_selection_args(parser) parser.add_argument('--cell-voltage',
from . import tasks from . import context from . import stack from . import mustack # structlog.processors.KeyValueRenderer structlog.configure( processors=[ structlog.processors.StackInfoRenderer(), structlog.dev.set_exc_info, structlog.processors.format_exc_info, # structlog.processors.TimeStamper(), # structlog.processors.KeyValueRenderer(), structlog.dev.ConsoleRenderer(), ], wrapper_class=structlog.BoundLogger, context_class= dict, # or OrderedDict if the runtime's dict is unordered (e.g. Python <3.6) logger_factory=structlog.PrintLoggerFactory(), cache_logger_on_first_use=False) ns = Collection() for module in [tasks, context, stack, mustack]: ns.add_collection(Collection.from_module(module)) program = Program( version="0.1.0", binary_names=["sk", "sanky"], namespace=ns, )
import structlog import pandas as pd from trip_planner_api.util import get_duration_string_from_seconds from trip_planner_api.util import get_gas_cost from trip_planner_api.util import make_distance_matrix_request structlog.configure(logger_factory=structlog.PrintLoggerFactory()) logger = structlog.get_logger(processors=[structlog.processors.JSONRenderer()]) def _get_driving_option(origin_lat, origin_lon, destination_lat, destination_lon): """ Get the travel information for the driving option :param origin_lat: float: latitude of the origin :param origin_lon: float: longitude of the origin :param destination_lat: float: latitude of the destination :param destination_lon: float: longitude of the destination :return: pandas.DataFrame: one row containing the travel option of driving """ driving_info = make_distance_matrix_request( origins="{},{}".format(origin_lat, origin_lon), destinations="{},{}".format(destination_lat, destination_lon), origin_labels=["origin"], destination_labels=["destination"], group_by_origins=True, ) driving_info = driving_info["origin"]["destination"] duration = driving_info["duration"]["value"]
Function intended as a processor for structlog. It adds information about the service environment and reasonable defaults when not running in Lambda. """ event_dict['region'] = os.environ.get('REGION', os.uname().nodename) event_dict['service'] = os.environ.get('SERVICE', os.path.abspath(__file__)) event_dict['stage'] = os.environ.get('STAGE', 'dev') return event_dict # I couldn't get structlog and capsys to cooperate in tests # hence I'm using the stdlib logger in tests so that caplog can # capture it and logging via printing to stdout in production _logger_factory = structlog.stdlib.LoggerFactory() \ if os.environ.get('STAGE') == 'localtest' \ else structlog.PrintLoggerFactory() structlog.configure_once(processors=[ structlog.stdlib.add_log_level, add_service_context, structlog.processors.TimeStamper(fmt='iso', utc=True, key='ts'), structlog.processors.format_exc_info, structlog.processors.UnicodeDecoder(), structlog.processors.JSONRenderer() ], context_class=dict, logger_factory=_logger_factory, cache_logger_on_first_use=True) logger = structlog.get_logger()
def collate(outdir: Union[str, Path]) -> None: """ Collate the results of the product comparison. Firstly the results are merged with the framing geometry, and second they're summarised. """ outdir = Path(outdir) log_fname = outdir.joinpath(DirectoryNames.LOGS.value, LogNames.COLLATE.value) if not log_fname.parent.exists(): log_fname.parent.mkdir(parents=True) with open(log_fname, "w") as fobj: structlog.configure(logger_factory=structlog.PrintLoggerFactory(fobj), processors=LOG_PROCESSORS) comparison_results_fname = outdir.joinpath( DirectoryNames.RESULTS.value, FileNames.RESULTS.value) _LOG.info("opening intercomparison results file", fname=str(comparison_results_fname)) with h5py.File(str(comparison_results_fname), "a") as fid: grp = fid[DatasetGroups.INTERCOMPARISON.value] for dataset_name in grp: _LOG.info("reading dataset", dataset_name=dataset_name) dataframe = read_h5_table(grp, dataset_name) # some important attributes framing = grp[dataset_name].attrs["framing"] thematic = grp[dataset_name].attrs["thematic"] proc_info = grp[dataset_name].attrs["proc-info"] _LOG.info( "merging results with framing", framing=framing, dataset_name=dataset_name, ) geo_dataframe = merge_framing(dataframe, framing) out_fname = outdir.joinpath( DirectoryNames.RESULTS.value, FileNames[MergeLookup[DatasetNames( dataset_name).name].value].value, ) _LOG.info("saving as GeoJSON", out_fname=str(out_fname)) geo_dataframe.to_file(str(out_fname), driver="GeoJSONSeq") _LOG.info("summarising") summary_dataframe = summarise(geo_dataframe, thematic, proc_info) out_dname = PPath( DatasetGroups.SUMMARY.value, DatasetNames[SummaryLookup[DatasetNames( dataset_name).name].value].value, ) _LOG.info("saving summary table", out_dataset_name=str(out_dname)) write_dataframe(summary_dataframe, str(out_dname), fid)