def parse_json_arguments(args): """Parse parameters, variables and extensions. Args: args (dict): Dictionary of command line arguments returned by `parse_arguments`. Is not modified. Returns: A tuple of (params, variables, extensions): - `params` is a dictionary of parameters (all params in args['P']) - `variables` is a dictionary of variables (all vars in args['V']) - `extensions` is a list of dictionaries (all extensions in in args['E']) """ for param in ('P', 'V', 'E'): DictUtils.ensure_exists(args, param, []) params, variables, extensions = ({}, {}, []) DictUtils.add(params, args['P'], pattern='(.+?(?=[=]))=(.+)', must_match=True) DictUtils.add(variables, args['V'], pattern='(.+?(?=[=]))=(.+)', must_match=True) for extension in args['E']: try: extensions.append(json.loads(extension)) except Exception as err: logging.warn("Found non-json parsable extension: %s", extension) raise err return params, variables, extensions
def parse_log_file(filename): """ Parses one log file. Parameters are defined in that file as key-value pairs. Values must be json parsable strings. Every key has a prefix and a suffix equal to ``__`` (two underscores), for instance: * __exp.device_batch__= 16 * __results.training_time__= 33.343 Parameters are keys without prefixes and suffixes i.e. 'exp.device_batch' and 'results.training_time' are parameter names from above example. :param str filename: Name of a file to parse. :return: Dictionary with experiment parameters. :rtype: dict """ # __(.+?(?=__[=]))__=(.+) # [ \t]*__(.+?(?=__[ \t]*[=]))__[ \t]*=(.+) exp_params = {} with open(filename) as logfile: # The 'must_match' must be set to false. It says that not every line # in a log file must match key-value pattern. DictUtils.add(exp_params, logfile, pattern='[ \t]*__(.+?(?=__[ \t]*[=]))__[ \t]*=(.+)', must_match=False) return exp_params
def parse(inputs, recursive=False, ignore_errors=False): """Parse benchmark log files (*.log). Args: inputs: Path specifiers of where to search for log files. recursive (bool): If true, parse directories found in `inputs` recursively. ignore_errors (bool): If true, ignore errors associated with parsing parameter values. Returns: Instance of this class. """ inputs = inputs if isinstance(inputs, list) else [inputs] log_files = set() for file_path in inputs: if os.path.isdir(file_path): log_files.update(IOUtils.gather_files(inputs, "*.log", recursive)) elif file_path.endswith('.log'): log_files.add(file_path) log_files = list(log_files) benchmarks = [] for log_file in log_files: parameters = {} with OpenFile(log_file, 'r') as logfile: # The 'must_match' must be set to false. It says that not # every line in a log file must match key-value pattern. DictUtils.add( parameters, logfile, pattern='[ \t]*__(.+?(?=__[ \t]*[=]))__[ \t]*=(.+)', must_match=False, ignore_errors=ignore_errors ) benchmarks.append(parameters) return BenchData(benchmarks, create_copy=False)
def load(inputs, **kwargs): """Load benchmark data (parsed from log files) from a JSON file. A file name is a JSON file that contains object with 'data' field. This field is a list with dictionaries, each dictionary contains parameters for one benchmark: {"data":[{...}, {...}, {...}]} Args: inputs (str): File name of a JSON (*.json) or a compressed JSON (.json.gz) file. Returns: Instance of this class. """ is_json_file = IOUtils.is_json_file(inputs) if not is_json_file and isinstance(inputs, list) and len(inputs) == 1: is_json_file = IOUtils.is_json_file(inputs[0]) inputs = inputs[0] if is_json_file else inputs if is_json_file: benchmarks = IOUtils.read_json(inputs, check_extension=True) if 'data' not in benchmarks: benchmarks = {'data': []} print("[WARNING]: No benchmark data found in '{}'".format( inputs)) return BenchData(benchmarks['data'], create_copy=False) # is_csv_file = IOUtils.is_csv_file(inputs) if not is_csv_file and isinstance(inputs, list) and len(inputs) == 1: is_csv_file = IOUtils.is_csv_file(inputs[0]) inputs = inputs[0] if is_csv_file else inputs if is_csv_file: with OpenFile(inputs, 'r') as fobj: reader = csv.DictReader(fobj) benchmarks = list(reader) return BenchData(benchmarks, create_copy=False) # is_compressed_tarball = IOUtils.is_compressed_tarball(inputs) if not is_compressed_tarball and isinstance(inputs, list) and len(inputs) == 1: is_compressed_tarball = IOUtils.is_json_file(inputs[0]) inputs = inputs[0] if is_compressed_tarball else inputs if is_compressed_tarball: benchmarks = [] with tarfile.open(inputs, "r:gz") as archive: for member in archive.getmembers(): if member.isfile() and member.name.endswith('.log'): log_file = archive.extractfile(member) if log_file is not None: parameters = {} DictUtils.add( parameters, log_file, pattern= '[ \t]*__(.+?(?=__[ \t]*[=]))__[ \t]*=(.+)', must_match=False, ignore_errors=True) benchmarks.append(parameters) return BenchData(benchmarks, create_copy=False) # return BenchData.parse(inputs, **kwargs)
def parse_log_file(filename, ignore_errors=False): """ Parses one benchmark log file (possible compressed). A log file is a textual log file. This method can also parse compressed log files - files that have *.gz extension. One log file is associated with one benchmark. Parameters are defined in that file as key-value pairs. Values must be json parsable strings. Every key has a prefix and a suffix equal to ``__`` (two underscores), for instance: * __exp.replica_batch__= 16 * __results.training_time__= 33.343 Parameters are keys without prefixes and suffixes i.e. 'exp.device_batch' and 'results.training_time' are parameter names from above example. Not every line must contain parsable parameters. Those that do not match key/value regular expression pattern are ignored. One parameter may present in a log file multiple times. Only the last value is returned. Args: filename (str): Name of a file to parse. ignore_errors (bool): If true, ignore parsing errors associated with parameter values. Returns: Dictionary with experiment parameters, for instance: {"exp.device_batch": 16, "exp.model": "resnet50"} """ # __(.+?(?=__[=]))__=(.+) # [ \t]*__(.+?(?=__[ \t]*[=]))__[ \t]*=(.+) parameters = {} with OpenFile(filename, 'r') as logfile: # The 'must_match' must be set to false. It says that not # every line in a log file must match key-value pattern. DictUtils.add(parameters, logfile, pattern='[ \t]*__(.+?(?=__[ \t]*[=]))__[ \t]*=(.+)', must_match=False, ignore_errors=ignore_errors) return parameters
def main(): if len(sys.argv) != 3: print("Usage: logger.py BACKEND LOG_FILE") exit(1) backend = sys.argv[1] log_file = sys.argv[2] # We may need to iterate multiple times over log records, so, reading log files into # a list is a preferable way. with open(log_file) as records: log_records = [record.strip() for record in records] # Parse parameters params = {} DictUtils.add(params, log_records, pattern='[ \t]*__(.+?(?=__[ \t]*[=]))__[ \t]*=(.+)', must_match=False, ignore_errors=True) updates = {} # Perform common checks that do not depend on particular backend # 1. Check if we need to update a model title model = params.get('exp.model', '') if model != '' and params.get('exp.model_title', '') == '': updates['exp.model_title'] = MODEL_TITLES.get(model, model) # Perform checks that depend on a backend type. if backend == 'tf_cnn_benchmarks': TfCnnBenchmarksBackend.check(log_records, params, updates) # Update a log file if needed if len(updates) > 0: with open(log_file, "a") as file_obj: for param in updates: file_obj.write("__%s__=%s\n" % (param, json.dumps(updates[param])))
def init(self, init_logger=False, load_default_config=True, load_config=True): """Initializes experimenter. :param bool init_logger: If True, initializes loggers :param bool load_default_config: If false, does not load standard configuration. :param bool load_config: If true, loads configuration specified on a command line """ # Parse command line arguments parser = argparse.ArgumentParser() parser.add_argument( 'action', type=str, help= 'Action to perform. Valid actions: "print-config", "run", "build" and "analyze-plan".' ) parser.add_argument('--config', required=False, type=str, help='Configuration file (json) of an experiment.\ Will override values from default configuration.' ) parser.add_argument('--plan', required=False, type=str, help='Pre-built plan of an experiment (json).\ If action is "build", a file name to write plan to.\ If action is "run", a file name to read plan from.' ) parser.add_argument('--progress_file', '--progress-file', required=False, type=str, default=None, help='A JSON file that experimenter will be updating on its progress.'\ 'If not present, no progress info will be available.'\ 'Put it somewhere in /dev/shm') parser.add_argument( '-P', action='append', required=False, default=[], help='Parameters that override parameters in configuration file.\ For instance, -Pexp.phase=2. Values must be json parsable (json.loads()).' ) parser.add_argument( '-V', action='append', required=False, default=[], help= 'Variables that override variables in configuration file in section "variables". \ These variables are used to generate different combinations of experiments.\ For instance: -Vexp.framework=\'["tensorflow", "caffe2"]\'.\ Values must be json parsable (json.loads()).' ) parser.add_argument( '--log_level', '--log-level', required=False, default='info', help= 'Python logging level. Valid values: "critical", "error", "warning", "info" and "debug"' ) parser.add_argument('--discard_default_config', '--discard-default-config', required=False, default=False, action='store_true', help='Do not load default configuration.') parser.add_argument( '--no_validation', '--no-validation', required=False, default=False, action='store_true', help='Do not perform config validation before running benchmarks.') parser.add_argument( '-E', action='append', required=False, default=[], help= 'Extensions to add. Can be usefull to quickly customize experiments.\ Must be valid json parsable array element for "extension" array.' ) args = parser.parse_args() log_level = logging.getLevelName(args.log_level.upper()) self.action = args.action self.config_file = args.config self.plan_file = args.plan self.validation = not args.no_validation self.__progress_file = args.progress_file # Initialize logger if init_logger: logging.debug("Initializing logger to level %s", args.log_level) root = logging.getLogger() root.setLevel(log_level) handler = logging.StreamHandler(sys.stdout) handler.setLevel(log_level) root.addHandler(handler) logging.debug("Parsing parameters on a command line") DictUtils.add(self.params, args.P, pattern='(.+?(?=[=]))=(.+)', must_match=True) logging.debug("Parsing variables on a command line") DictUtils.add(self.variables, args.V, pattern='(.+?(?=[=]))=(.+)', must_match=True) # Load default configuration if load_default_config and not args.discard_default_config: logging.debug("Loading default configuration") _, self.config, self.param_info = ConfigurationLoader.load( os.path.join(os.path.dirname(__file__), 'configs')) # Load configurations specified on a command line if load_config: logging.debug("Loading user configuration") self.load_configuration() # Add extensions from command line DictUtils.ensure_exists(self.config, 'extensions', []) if len(args.E) > 0: logging.debug("Parsing extensions on a command line") for extension in args.E: try: ext = json.loads(extension) logging.debug('Found extension: %s', str(ext)) self.config['extensions'].append(ext) except Exception as err: logging.warn("Found non-json parsable extension: %s", extension) raise err