def build_cache(self, summary_file, target_variable, query): """Loads data from json file.""" with OpenFile(summary_file) as file_obj: summary = json.load(file_obj) self.cache = {} self.nets = Set() self.batches = Set() self.devices = Set() for experiment in summary['data']: if target_variable not in experiment: print("target variable not in experiment, skipping") continue if not DictUtils.match(experiment, query, policy='strict'): continue # batch is an effective batch here key = '{0}_{1}_{2}'.format( experiment['exp.model_title'], experiment['exp.gpus'], experiment['exp.effective_batch'] ) self.cache[key] = float(experiment[target_variable]) self.nets.add(experiment['exp.model_title']) self.batches.add(int(experiment['exp.effective_batch'])) self.devices.add(str(experiment['exp.gpus'])) self.nets = sorted(list(self.nets)) self.batches = sorted(list(self.batches)) self.devices = sorted(list(self.devices), key=len)
def parse(inputs, recursive=False, ignore_errors=False): """Parse benchmark log files (*.log). Args: inputs: Path specifiers of where to search for log files. recursive (bool): If true, parse directories found in `inputs` recursively. ignore_errors (bool): If true, ignore errors associated with parsing parameter values. Returns: Instance of this class. """ inputs = inputs if isinstance(inputs, list) else [inputs] log_files = set() for file_path in inputs: if os.path.isdir(file_path): log_files.update(IOUtils.gather_files(inputs, "*.log", recursive)) elif file_path.endswith('.log'): log_files.add(file_path) log_files = list(log_files) benchmarks = [] for log_file in log_files: parameters = {} with OpenFile(log_file, 'r') as logfile: # The 'must_match' must be set to false. It says that not # every line in a log file must match key-value pattern. DictUtils.add( parameters, logfile, pattern='[ \t]*__(.+?(?=__[ \t]*[=]))__[ \t]*=(.+)', must_match=False, ignore_errors=ignore_errors ) benchmarks.append(parameters) return BenchData(benchmarks, create_copy=False)
def load(inputs, **kwargs): """Load benchmark data (parsed from log files) from a JSON file. A file name is a JSON file that contains object with 'data' field. This field is a list with dictionaries, each dictionary contains parameters for one benchmark: {"data":[{...}, {...}, {...}]} Args: inputs (str): File name of a JSON (*.json) or a compressed JSON (.json.gz) file. Returns: Instance of this class. """ is_json_file = IOUtils.is_json_file(inputs) if not is_json_file and isinstance(inputs, list) and len(inputs) == 1: is_json_file = IOUtils.is_json_file(inputs[0]) inputs = inputs[0] if is_json_file else inputs if is_json_file: benchmarks = IOUtils.read_json(inputs, check_extension=True) if 'data' not in benchmarks: benchmarks = {'data': []} print("[WARNING]: No benchmark data found in '{}'".format( inputs)) return BenchData(benchmarks['data'], create_copy=False) # is_csv_file = IOUtils.is_csv_file(inputs) if not is_csv_file and isinstance(inputs, list) and len(inputs) == 1: is_csv_file = IOUtils.is_csv_file(inputs[0]) inputs = inputs[0] if is_csv_file else inputs if is_csv_file: with OpenFile(inputs, 'r') as fobj: reader = csv.DictReader(fobj) benchmarks = list(reader) return BenchData(benchmarks, create_copy=False) # is_compressed_tarball = IOUtils.is_compressed_tarball(inputs) if not is_compressed_tarball and isinstance(inputs, list) and len(inputs) == 1: is_compressed_tarball = IOUtils.is_json_file(inputs[0]) inputs = inputs[0] if is_compressed_tarball else inputs if is_compressed_tarball: benchmarks = [] with tarfile.open(inputs, "r:gz") as archive: for member in archive.getmembers(): if member.isfile() and member.name.endswith('.log'): log_file = archive.extractfile(member) if log_file is not None: parameters = {} DictUtils.add( parameters, log_file, pattern= '[ \t]*__(.+?(?=__[ \t]*[=]))__[ \t]*=(.+)', must_match=False, ignore_errors=True) benchmarks.append(parameters) return BenchData(benchmarks, create_copy=False) # return BenchData.parse(inputs, **kwargs)
def parse_log_file(filename, ignore_errors=False): """ Parses one benchmark log file (possible compressed). A log file is a textual log file. This method can also parse compressed log files - files that have *.gz extension. One log file is associated with one benchmark. Parameters are defined in that file as key-value pairs. Values must be json parsable strings. Every key has a prefix and a suffix equal to ``__`` (two underscores), for instance: * __exp.replica_batch__= 16 * __results.training_time__= 33.343 Parameters are keys without prefixes and suffixes i.e. 'exp.device_batch' and 'results.training_time' are parameter names from above example. Not every line must contain parsable parameters. Those that do not match key/value regular expression pattern are ignored. One parameter may present in a log file multiple times. Only the last value is returned. Args: filename (str): Name of a file to parse. ignore_errors (bool): If true, ignore parsing errors associated with parameter values. Returns: Dictionary with experiment parameters, for instance: {"exp.device_batch": 16, "exp.model": "resnet50"} """ # __(.+?(?=__[=]))__=(.+) # [ \t]*__(.+?(?=__[ \t]*[=]))__[ \t]*=(.+) parameters = {} with OpenFile(filename, 'r') as logfile: # The 'must_match' must be set to false. It says that not # every line in a log file must match key-value pattern. DictUtils.add(parameters, logfile, pattern='[ \t]*__(.+?(?=__[ \t]*[=]))__[ \t]*=(.+)', must_match=False, ignore_errors=ignore_errors) return parameters