def parse_log_file(filename): """ Parses one log file. Parameters are defined in that file as key-value pairs. Values must be json parsable strings. Every key has a prefix and a suffix equal to ``__`` (two underscores), for instance: * __exp.device_batch__= 16 * __results.training_time__= 33.343 Parameters are keys without prefixes and suffixes i.e. 'exp.device_batch' and 'results.training_time' are parameter names from above example. :param str filename: Name of a file to parse. :return: Dictionary with experiment parameters. :rtype: dict """ # __(.+?(?=__[=]))__=(.+) # [ \t]*__(.+?(?=__[ \t]*[=]))__[ \t]*=(.+) exp_params = {} with open(filename) as logfile: # The 'must_match' must be set to false. It says that not every line # in a log file must match key-value pattern. DictUtils.add(exp_params, logfile, pattern='[ \t]*__(.+?(?=__[ \t]*[=]))__[ \t]*=(.+)', must_match=False) return exp_params
def build_strong_scaling_report(self, jsonfile): """ Builds strong scaling report for multi-GPU training. """ header = "%-20s %-10s" % ('Network', 'Batch') for device in self.devices: header = "%s %-10s" % (header, (1 + device.count(','))) report = [] json_report = {'data': []} for net in self.nets: for batch in self.batches: profile = { 'net': net, 'batch': batch, 'time': [], 'throughput': [], 'efficiency': [], 'speedup': [] } json_profile = SummaryBuilder.default_json_profile( net, 'strong', batch) profile_ok = False # device here is '0', '0,1', '0,1,2,3' ... for device in self.devices: key = '{0}_{1}_{2}'.format(net, device, batch) batch_tm = throughput = efficiency = speedup = -1 num_devices = 1 + device.count(',') if key in self.cache: batch_tm = self.cache[key] throughput = int(batch * (1000.0 / batch_tm)) json_profile['perf']['data'][str( num_devices)] = batch_tm if len(profile['throughput']) == 0: speedup = 1 else: speedup = 1.0 * throughput / profile['throughput'][ 0] if len(profile['efficiency']) == 0: efficiency = 100.00 profile_ok = True elif profile['time'][0] > 0: efficiency = int(10000.0 * profile['time'][0] / (num_devices * batch_tm)) / 100.0 profile_ok = True profile['time'].append(batch_tm) profile['throughput'].append(throughput) profile['efficiency'].append(efficiency) profile['speedup'].append(speedup) if profile_ok: report.append(profile) json_report['data'].append(json_profile) SummaryBuilder.print_report_txt(BATCH_TM_TITLE, header, report, 'net', 'batch', 'time') SummaryBuilder.print_report_txt(IPS_TITLE, header, report, 'net', 'batch', 'throughput') SummaryBuilder.print_report_txt(SPEEDUP_TITLE, header, report, 'net', 'batch', 'speedup') SummaryBuilder.print_report_txt("Efficiency = 100% * t1 / (N * tN)", header, report, 'net', 'batch', 'efficiency') DictUtils.dump_json_to_file(json_report, jsonfile)
def build_exploration_report(self, report_file): """ Builds exploration report for inference and single device training. """ header = "%-20s %-10s" % ('Network', 'Device') for batch in self.batches: header = "%s %-10s" % (header, batch) report = [] json_report = {'data': []} for net in self.nets: for device in self.devices: profile = { 'net': net, 'device': device, 'time': [], 'throughput': [] } profile_ok = False for batch in self.batches: key = '{0}_{1}_{2}'.format(net, device, batch) batch_tm = throughput = -1 if key in self.cache and self.cache[key] > 0: batch_tm = self.cache[key] profile_ok = True throughput = int(batch * (1000.0 / batch_tm)) json_profile = SummaryBuilder.default_json_profile(net, 'strong', batch) json_profile['perf']['data']['1'] = batch_tm json_report['data'].append(json_profile) profile['time'].append(round(batch_tm, 3)) profile['throughput'].append(throughput) if profile_ok: report.append(profile) SummaryBuilder.print_report_txt(BATCH_TM_TITLE, header, report, 'net', 'device', 'time') SummaryBuilder.print_report_txt(IPS_TITLE, header, report, 'net', 'device', 'throughput') DictUtils.dump_json_to_file(json_report, report_file)
def filter_benchmarks(args): """Filter benchmarks by removing those that do not contain provided parameters. :param argparse args: Command line arguments. The following command line arguments are used: * ``args.input_file`` A file with benchmark results. * ``args.params`` Specification of mandatory parameters. For format, read comments of ``get_params`` function * ``args.output_file`` An output file with updated benchmark results. """ # Load benchmarks and parameters input_benchmarks = load_json_file(args.input_file)['data'] params = get_params(args.params) # Filter benchmarks output_benchmarks = [] for input_benchmark in input_benchmarks: keep = True for key in params: if key not in input_benchmark or not input_benchmark[key]: keep = False break if keep: output_benchmarks.append(input_benchmark) # Report results and serialize print("Number of input benchmarks: %d" % len(input_benchmarks)) print("Number of output benchmarks: %d" % len(output_benchmarks)) DictUtils.dump_json_to_file({"data": output_benchmarks}, args.output_file)
def update_benchmarks(args): """Update benchmarks by overriding parameters provided by a user. :param argparse args: Command line arguments. The following command line arguments are used: * ``args.input_file`` A file with benchmark results. * ``args.params`` Specification of mandatory parameters. For format, read comments of ``get_params`` function * ``args.output_file`` An output file with updated benchmark results. """ # Load benchmarks and parameters. benchmarks = load_json_file(args.input_file)['data'] prefix = '__' params = {prefix + k: v for k, v in get_params(args.params).items()} # Add prefixed parameters to all benchmarks. for benchmark in benchmarks: benchmark.update(params) # Process and compute variables Processor().compute_variables(benchmarks) # Replace prefix overwriting variables in case of a conflict prefixed_keys = params.keys() prefix_len = len(prefix) output_benchmarks = [] for benchmark in benchmarks: for k in prefixed_keys: benchmark[k[prefix_len:]] = benchmark[k] del benchmark[k] if benchmark['exp.model'] != '': output_benchmarks.append(benchmark) benchmarks = output_benchmarks # Serialize updated benchmarks. DictUtils.dump_json_to_file({"data": benchmarks}, args.output_file)
def build_weak_scaling_report(self, jsonfile): """ Builds weak scaling report for multi-GPU training. """ header = "%-20s %-10s" % ('Network', 'Batch') for device in self.devices: header = "%s %-10d" % (header, (1 + device.count(','))) report = [] json_report = {'data': []} for net in self.nets: for batch in self.batches: # batch is the base 'batch size' i.e. for a one GPU profile = { 'net': net, # network name 'batch': batch, # per device batch size 'time': [], # batch times 'throughput': [], # throughput 'efficiency': [], # efficiency 'speedup': [] # speedup } json_profile = SummaryBuilder.default_json_profile(net, 'weak', batch) profile_ok = False for device in self.devices: # weak scaling: we want to find results for effective batch size # which is N * batch num_devices = 1 + device.count(',') key = '{0}_{1}_{2}'.format(net, device, (batch*num_devices)) if num_devices == 1 and key not in self.cache: # If we do not have data for one device, does not make sense # to continue break batch_tm = throughput = efficiency = speedup = -1.0 if key in self.cache: batch_tm = self.cache[key] throughput = int((num_devices*batch) * (1000.0 / batch_tm)) json_profile['perf']['data'][str(num_devices)] = batch_tm if len(profile['throughput']) == 0: speedup = 1 else: speedup = 1.0 * throughput / profile['throughput'][0] if len(profile['efficiency']) == 0: efficiency = 100.00 profile_ok = True elif profile['time'][0] > 0: efficiency = int(10000.0 * profile['time'][0] / batch_tm) / 100.0 profile_ok = True profile['time'].append(batch_tm) profile['throughput'].append(int(throughput)) profile['efficiency'].append(efficiency) profile['speedup'].append(speedup) if profile_ok: report.append(profile) json_report['data'].append(json_profile) SummaryBuilder.print_report_txt(BATCH_TM_TITLE, header, report, 'net', 'batch', 'time') SummaryBuilder.print_report_txt(IPS_TITLE, header, report, 'net', 'batch', 'throughput') SummaryBuilder.print_report_txt(SPEEDUP_TITLE, header, report, 'net', 'batch', 'speedup') SummaryBuilder.print_report_txt( "Efficiency = 100% * t1 / tN", header, report, 'net', 'batch', 'efficiency' ) DictUtils.dump_json_to_file(json_report, jsonfile)
def parse(inputs, recursive=False, ignore_errors=False): """Parse benchmark log files (*.log). Args: inputs: Path specifiers of where to search for log files. recursive (bool): If true, parse directories found in `inputs` recursively. ignore_errors (bool): If true, ignore errors associated with parsing parameter values. Returns: Instance of this class. """ inputs = inputs if isinstance(inputs, list) else [inputs] log_files = set() for file_path in inputs: if os.path.isdir(file_path): log_files.update(IOUtils.gather_files(inputs, "*.log", recursive)) elif file_path.endswith('.log'): log_files.add(file_path) log_files = list(log_files) benchmarks = [] for log_file in log_files: parameters = {} with OpenFile(log_file, 'r') as logfile: # The 'must_match' must be set to false. It says that not # every line in a log file must match key-value pattern. DictUtils.add( parameters, logfile, pattern='[ \t]*__(.+?(?=__[ \t]*[=]))__[ \t]*=(.+)', must_match=False, ignore_errors=ignore_errors ) benchmarks.append(parameters) return BenchData(benchmarks, create_copy=False)
def build_cache(self, inputs=None, output=None, output_cols=None): self.input_cols = [None] * len(inputs) for idx, param in enumerate(inputs): self.input_cols[idx] = {"index": idx, "param": param, "width": 0, "title": DictUtils.get(BenchData.Reporter.TITLES, param, param), "vals": sorted(self.bench_data.select_values(param))} self.output_param = output output_cols = output_cols if output_cols else sorted(self.bench_data.select_values(output)) self.output_cols = [None] * len(output_cols) for idx, param_value in enumerate(output_cols): self.output_cols[idx] = {"index": idx, "value": param_value, "title": param_value, "width": len(BenchData.Reporter.to_string(param_value))} self.cache = {} for bench in self.bench_data.benchmarks(): if BenchData.status(bench) != "ok": continue bench_key = [] for input_col in self.input_cols: param_value = DictUtils.get(bench, input_col['param'], None) if not param_value: bench_key = [] break bench_key.append(str(param_value)) if bench_key: output_val = DictUtils.get(bench, self.output_param, None) if output_val: bench_key = '.'.join(bench_key + [str(output_val)]) if bench_key not in self.cache: self.cache[bench_key] = bench else: raise ValueError("Duplicate benchmark with key = {}".format(bench_key))
def run(self): """Runs subprocess with Popen. This method must not be called directly. Use blocking :py:meth:`~dlbs.Worker.work` method instead. """ try: # Dump parameters to a log file or to standard output DictUtils.ensure_exists(self.params, 'exp.log_file', default_value='') if self.params['exp.log_file'].strip() == '': self.params['exp.log_file'] = '/dev/stdout' IOUtils.mkdirf(self.params['exp.log_file']) with open(self.params['exp.log_file'], 'a+') as log_file: self.__dump_parameters(log_file) # This is where we launch process. Keep in mind, that the log file that's # supposed to be created is exp.log_file or exp_log_file in the script. # Other output of the launching script will be printed by this pyhton code # to a stanard output. self.process = subprocess.Popen(self.command, universal_newlines=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, env=self.environ) while True: output = self.process.stdout.readline() if output == '' and self.process.poll() is not None: break if output: sys.stdout.write(output) sys.stdout.flush() self.ret_code = self.process.poll() except Exception as err: logging.warn('Exception has been caught for experiment %s: %s', self.params.get('exp.id'), str(err)) logging.warn(traceback.format_exc()) self.ret_code = -1
def summary(self, params=None): """Return summary of benchmarks providing additional info on `params`. Args: params (list): List of parameters to provide additional info for. If empty, default list is used. Returns: dict: A summary of benchmarks. """ if not params: params = ['exp.node_id', 'exp.node_title', 'exp.gpu_title', 'exp.gpu_id', 'exp.framework_title', 'exp.framework_id'] summary_dict = { 'num_benchmarks': len(self.__benchmarks), 'num_failed_benchmarks': 0, 'num_successful_benchmarks': 0 } for param in params: summary_dict[param] = set() for bench in self.__benchmarks: if DictUtils.get(bench, 'results.time', -1) > 0: summary_dict['num_successful_benchmarks'] += 1 else: summary_dict['num_failed_benchmarks'] += 1 for param in params: summary_dict[param].add(DictUtils.get(bench, param, None)) for param in params: summary_dict[param] = list(summary_dict[param]) return summary_dict
def load(inputs, **kwargs): """Load benchmark data (parsed from log files) from a JSON file. A file name is a JSON file that contains object with 'data' field. This field is a list with dictionaries, each dictionary contains parameters for one benchmark: {"data":[{...}, {...}, {...}]} Args: inputs (str): File name of a JSON (*.json) or a compressed JSON (.json.gz) file. Returns: Instance of this class. """ is_json_file = IOUtils.is_json_file(inputs) if not is_json_file and isinstance(inputs, list) and len(inputs) == 1: is_json_file = IOUtils.is_json_file(inputs[0]) inputs = inputs[0] if is_json_file else inputs if is_json_file: benchmarks = IOUtils.read_json(inputs, check_extension=True) if 'data' not in benchmarks: benchmarks = {'data': []} print("[WARNING]: No benchmark data found in '{}'".format( inputs)) return BenchData(benchmarks['data'], create_copy=False) # is_csv_file = IOUtils.is_csv_file(inputs) if not is_csv_file and isinstance(inputs, list) and len(inputs) == 1: is_csv_file = IOUtils.is_csv_file(inputs[0]) inputs = inputs[0] if is_csv_file else inputs if is_csv_file: with OpenFile(inputs, 'r') as fobj: reader = csv.DictReader(fobj) benchmarks = list(reader) return BenchData(benchmarks, create_copy=False) # is_compressed_tarball = IOUtils.is_compressed_tarball(inputs) if not is_compressed_tarball and isinstance(inputs, list) and len(inputs) == 1: is_compressed_tarball = IOUtils.is_json_file(inputs[0]) inputs = inputs[0] if is_compressed_tarball else inputs if is_compressed_tarball: benchmarks = [] with tarfile.open(inputs, "r:gz") as archive: for member in archive.getmembers(): if member.isfile() and member.name.endswith('.log'): log_file = archive.extractfile(member) if log_file is not None: parameters = {} DictUtils.add( parameters, log_file, pattern= '[ \t]*__(.+?(?=__[ \t]*[=]))__[ \t]*=(.+)', must_match=False, ignore_errors=True) benchmarks.append(parameters) return BenchData(benchmarks, create_copy=False) # return BenchData.parse(inputs, **kwargs)
def report_active(self, log_file): if self.__file_name: self.__progress['active_benchmark'] = { 'status': 'inprogress', 'start_time': str(datetime.datetime.now()), 'stop_time': None, 'log_file': log_file } DictUtils.dump_json_to_file(self.__progress, self.__file_name)
def build_plan(self, serialize=False): """Builds plan combining configuration, parameters and variables.""" self.plan = Builder.build(self.config, self.params, self.variables) if serialize: if self.plan_file: DictUtils.dump_json_to_file(self.plan, self.plan_file) else: json.dump(self.plan, sys.stdout, indent=4) print('')
def report_active_completed(self): if self.__file_name: self.__progress['active_benchmark']['stop_time'] = str( datetime.datetime.now()) self.__progress['active_benchmark']['status'] = 'completed' self.__progress['completed_benchmarks'].append( self.__progress['active_benchmark']) self.__progress['num_completed_benchmarks'] += 1 self.__progress["active_benchmark"] = {} DictUtils.dump_json_to_file(self.__progress, self.__file_name)
def compute_variables(self, experiments): """Main entry point - compute all variables in all experiments. Args: experiments (list): A list of experiments that needs to be computed. It's modified in place. """ for experiment in experiments: # Convert all lists to strings DictUtils.lists_to_strings(experiment) # Build initial version of a forward index (variables -> their dependencies) self.fwd_index = {} for variable in experiment: self.update_index(experiment, variable) # iteratively compute variables while len(self.fwd_index) > 0: computable_vars = self.get_computable_variables() # print("Computable vars: %s" % (str(computable_vars))) if len(computable_vars) == 0: self.report_unsatisfied_deps(experiment) exit(1) # Compute variables. We are either done with a variable or # this variable has nested references and we need to continue # computing it. computed, partially_computed = self.compute_current_variables( experiment, computable_vars) # print("Computed vars: %s" % (str(computed))) # print("Partially computed vars: %s" % (str(partially_computed))) # Remove computed vars from index and update dependencies of # remaining variables for computed_var in computed: self.fwd_index.pop(computed_var) for var in self.fwd_index: self.fwd_index[var]['udeps'].difference_update( set(computed)) # Update partially computed variables - these are variables # that have nested references. for var in partially_computed: self.update_index(experiment, var) deps = self.fwd_index[var]['udeps'].copy() for dep in deps: if dep not in self.fwd_index: self.fwd_index[var]['udeps'].remove(dep) # exit(0) # We need to remove all internal temp variables # We need to remove all internal temp variables. # In P2, keys() makes a copy. In P3 it returns an iterator -> this # 'dictionary changed size during iteration' error. So, making copy for name in list(experiment.keys()): if name.startswith('__dlbs_'): experiment.pop(name)
def test_ensure_exists_1(self): """dlbs -> TestDictUtils::test_ensure_exists_1 [Testing dictionary helpers #1]""" DictUtils.ensure_exists(self.dictionary, 'exp.framework') self.assertEqual('exp.framework' in self.dictionary, True) self.assertEqual('exp.model' in self.dictionary, True) self.assertEqual('exp.device_batch' in self.dictionary, True) self.assertEqual(self.dictionary['exp.framework'], self.framework) self.assertEqual(self.dictionary['exp.model'], self.model) self.assertEqual(self.dictionary['exp.device_batch'], self.device_batch) self.assertEqual(len(self.dictionary), 3)
def report(self, inputs=None, output=None, output_cols=None, report_speedup=False, report_efficiency=False, **kwargs): DictUtils.ensure_exists(kwargs, 'report_batch_times', True) DictUtils.ensure_exists(kwargs, 'report_input_specs', True) # Build cache that will map benchmarks keys to benchmark objects. self.build_cache(inputs, output, output_cols) # Iterate over column values and build table with batch times and throughput cols = [] times = [] throughputs = [] benchmark_keys = [input_col['vals'] for input_col in self.input_cols] # Build tables for batch times and benchmarks throughputs # The `benchmark_key` is a tuple of column values e.g. ('ResNet50', 256) for benchmark_key in itertools.product(*benchmark_keys): cols.append(copy.deepcopy(benchmark_key)) times.append([None] * len(self.output_cols)) throughputs.append([None] * len(self.output_cols)) for output_col in self.output_cols: benchmark_key = [str(key) for key in benchmark_key] key = '.'.join(benchmark_key + [str(output_col['value'])]) if key in self.cache: times[-1][output_col['index']] = self.cache[key]['results.time'] throughputs[-1][output_col['index']] = self.cache[key]['results.throughput'] # Determine minimal widths for columns self.compute_column_widths(times, throughputs) # header = self.get_header() if kwargs['report_batch_times']: self.print_table("Batch time (milliseconds)", header, cols, times) self.print_table("Throughput (instances per second e.g. images/sec)", header, cols, throughputs) if report_speedup: speedups = self.compute_speedups(throughputs) self.print_table("Speedup (based on instances per second table, " "relative to first output column ({} = {}))".format(self.output_param, self.output_cols[0]['value']), header, cols, speedups) if report_efficiency: efficiency = self.compute_efficiency(times) self.print_table("Efficiency (based on batch times table, " "relative to first output column ({} = {}))".format(self.output_param, self.output_cols[0]['value']), header, cols, efficiency) if kwargs['report_input_specs']: print("This report is configured with the following parameters:") print(" inputs = %s" % str(inputs)) print(" output = %s" % output) print(" output_cols = %s" % str(output_cols)) print(" report_speedup = %s" % str(report_speedup)) print(" report_efficiency = %s" % str(report_efficiency))
def report(self, log_file, status, counts=True): if self.__file_name: self.__progress['completed_benchmarks'].append({ 'status': status, 'start_time': str(datetime.datetime.now()), 'stop_time': str(datetime.datetime.now()), 'log_file': log_file }) if counts: self.__progress['num_completed_benchmarks'] += 1 DictUtils.dump_json_to_file(self.__progress, self.__file_name)
def check_variable_value(self, experiment, var): """ Check if variable contains correct value according to parameter info. Args: experiment (dict): A dictionary with experiment parameters. var (str): Name of a parameter. """ if self.param_info is None or var not in self.param_info: return pi = self.param_info[var] ParamUtils.check_value( var, # Parameter name experiment[var], # Parameter value DictUtils.get(pi, 'val_domain', None), # Value domain constraints. DictUtils.get(pi, 'val_regexp', None)) # Value regexp constraints.
def test_lists_to_strings_2(self): """dlbs -> TestDictUtils::test_lists_to_strings_2 [Testing lists-to-strings helpers #2]""" DictUtils.lists_to_strings(self.dictionary, separator=';') self.assertEqual('exp.framework' in self.dictionary, True) self.assertEqual('exp.model' in self.dictionary, True) self.assertEqual('exp.device_batch' in self.dictionary, True) self.assertEqual(self.dictionary['exp.framework'], self.framework) self.assertEqual(self.dictionary['exp.model'], "ResNet50;ResNet101;ResNet152") self.assertEqual(self.dictionary['exp.device_batch'], self.device_batch) self.assertEqual(len(self.dictionary), 3)
def report_active(self, log_file): """ Report that new active benchmark has just started. Args: log_file (str): A log file for a currently active benchmark. """ self.__progress['active_benchmark'] = { 'exec_status': 'inprogress', 'status': None, 'start_time': datetime.datetime.now(), 'end_time': None, 'log_file': log_file } if self.__file_name: DictUtils.dump_json_to_file(self.__progress, self.__file_name)
def build_cache(self, summary_file, target_variable, query): """Loads data from json file.""" with OpenFile(summary_file) as file_obj: summary = json.load(file_obj) self.cache = {} self.nets = Set() self.batches = Set() self.devices = Set() for experiment in summary['data']: if target_variable not in experiment: print("target variable not in experiment, skipping") continue if not DictUtils.match(experiment, query, policy='strict'): continue # batch is an effective batch here key = '{0}_{1}_{2}'.format( experiment['exp.model_title'], experiment['exp.gpus'], experiment['exp.effective_batch'] ) self.cache[key] = float(experiment[target_variable]) self.nets.add(experiment['exp.model_title']) self.batches.add(int(experiment['exp.effective_batch'])) self.devices.add(str(experiment['exp.gpus'])) self.nets = sorted(list(self.nets)) self.batches = sorted(list(self.batches)) self.devices = sorted(list(self.devices), key=len)
def test_ensure_exists_3(self): """dlbs -> TestDictUtils::test_ensure_exists_3 [Testing dictionary helpers #3]""" DictUtils.ensure_exists(self.dictionary, 'exp.data_dir', '/nfs/imagenet') self.assertEqual('exp.framework' in self.dictionary, True) self.assertEqual('exp.model' in self.dictionary, True) self.assertEqual('exp.device_batch' in self.dictionary, True) self.assertEqual('exp.data_dir' in self.dictionary, True) self.assertEqual(self.dictionary['exp.framework'], self.framework) self.assertEqual(self.dictionary['exp.model'], self.model) self.assertEqual(self.dictionary['exp.device_batch'], self.device_batch) self.assertEqual(self.dictionary['exp.data_dir'], '/nfs/imagenet') self.assertEqual(len(self.dictionary), 4)
def __init__(self): self.__validation = True # Validate config before running benchmarks self.__action = None # Action to perform (build, run, ...) self.__config_file = None # Configuration file to load self.__progress_file = None # A JSON file with current progress self.__config = {} # Loaded configuration self.__param_info = { } # Parameter meta-info such as type and value domain self.__plan_file = None # File with pre-built plan self.__plan = [] # Loaded or generated plan self.__params = {} # Override env variables from files self.__variables = {} # Override variables from files # Dirty hacks DictUtils.ensure_exists(os.environ, 'CUDA_CACHE_PATH', '') DictUtils.ensure_exists( os.environ, 'DLBS_ROOT', os.path.join(os.path.dirname(os.path.abspath(__file__)), '../../'))
def test_match_5(self): """dlbs -> TestDictUtils::test_match_5 [Testing matching helpers #5]""" dictionary = {'exp.framework': "bvlc_caffe", 'exp.model': "ResNet150"} matches = {} self.assertEquals( DictUtils.match(dictionary, {'exp.framework': '([^_]+)D(.+)'}, policy='strict', matches=matches), False) self.assertEquals(len(matches), 0)
def test_match_6(self): """Test empty strings can match""" dictionary = {'exp.framework': "bvlc_caffe", 'exp.data_dir': ""} # matches = {} for val in ('', ' ', ' ', ' '): self.assertEquals( DictUtils.match(dictionary, {'exp.framework': val}, policy='strict', matches=matches), False) self.assertEqual(len(matches), 0) # self.assertEquals( DictUtils.match(dictionary, {'exp.data_dir': ''}, policy='strict', matches=matches), True) self.assertEqual(len(matches), 1) self.assertIn('exp.data_dir_0', matches) self.assertEqual(matches['exp.data_dir_0'], '')
def parse_log_files(filenames, opts=None): """ Parses files and returns their parameters. :param list filenames: List of file names to parse. :param dict opts: Dictionary of options. :rtype: tuple<list, list> :return: A tuple of two lists - succeeded and failed benchmarks """ opts = {} if opts is None else opts for key in ('filter_params', 'filter_query', 'output_params'): DictUtils.ensure_exists(opts, key) DictUtils.ensure_exists(opts, 'failed_benchmarks', 'discard') DictUtils.ensure_exists(opts, '_extended_params', {}) succeeded_benchmarks = [] failed_benchmarks = [] for filename in filenames: # Parse log file params = LogParser.parse_log_file(filename) # Check if this benchmark does not match filter if len(params) == 0 or \ not DictUtils.contains(params, opts['filter_params']) or \ not DictUtils.match(params, opts['filter_query']): continue # Add extended parameters and compute them if len(opts['_extended_params']) > 0: params.update(opts['_extended_params']) Processor().compute_variables([params]) #params = params[0] # Identify is this benchmark succeeded of failed. succeeded = 'results.throughput' in params and \ isinstance(params['results.throughput'], (int, long, float)) and \ params['results.throughput'] > 0 # Get only those key/values that need to be serialized params = DictUtils.subdict(params, opts['output_params']) # Append benchmark either to succeeded or failed list if succeeded: succeeded_benchmarks.append(params) else: if opts['failed_benchmarks'] == 'keep': succeeded_benchmarks.append(params) elif opts['failed_benchmarks'] == 'keep_separately': failed_benchmarks.append(params) # return (succeeded_benchmarks, failed_benchmarks)
def get_header(self): header = "" for input_col in self.input_cols: format_str = " %-" + str(input_col['width']) + "s" header = header + format_str % BenchData.Reporter.to_string(input_col['title']) header += " " output_cols_title = " " * len(header) + DictUtils.get(BenchData.Reporter.TITLES, self.output_param, self.output_param) for output_col in self.output_cols: format_str = "%+" + str(output_col['width']) + "s " header = header + format_str % BenchData.Reporter.to_string(output_col['title']) return [output_cols_title, header]
def test_match_3(self): """dlbs -> TestDictUtils::test_match_3 [Testing matching helpers #3]""" dictionary = {'exp.framework': "bvlc_caffe", 'exp.model': "ResNet150"} matches = {} self.assertEquals( DictUtils.match(dictionary, {'exp.framework': '([^_]+)_(.+)'}, policy='strict', matches=matches), True) self.assertEquals(len(matches), 3) self.assertEquals(matches['exp.framework_0'], 'bvlc_caffe') self.assertEquals(matches['exp.framework_1'], 'bvlc') self.assertEquals(matches['exp.framework_2'], 'caffe')
def test_match_2(self): """dlbs -> TestDictUtils::test_match_2 [Testing matching helpers #2]""" dictionary = {'exp.framework': "bvlc_caffe", 'exp.model': "ResNet150"} matches = {} self.assertEquals( DictUtils.match(dictionary, {'exp.model': r'([^\d]+)(\d+)'}, policy='strict', matches=matches), True) self.assertEquals(len(matches), 3) self.assertEquals(matches['exp.model_0'], 'ResNet150') self.assertEquals(matches['exp.model_1'], 'ResNet') self.assertEquals(matches['exp.model_2'], '150')