Ejemplo n.º 1
0
    def parse_log_file(filename):
        """ Parses one log file.

        Parameters are defined in that file as key-value pairs. Values must be
        json parsable strings. Every key has a prefix and a suffix equal to ``__``
        (two underscores), for instance:

        * __exp.device_batch__= 16
        * __results.training_time__= 33.343

        Parameters are keys without prefixes and suffixes i.e. 'exp.device_batch'
        and 'results.training_time' are parameter names from above example.

        :param str filename: Name of a file to parse.
        :return: Dictionary with experiment parameters.
        :rtype: dict
        """
        #       __(.+?(?=__[=]))__=(.+)
        # [ \t]*__(.+?(?=__[ \t]*[=]))__[ \t]*=(.+)
        exp_params = {}
        with open(filename) as logfile:
            # The 'must_match' must be set to false. It says that not every line
            # in a log file must match key-value pattern.
            DictUtils.add(exp_params,
                          logfile,
                          pattern='[ \t]*__(.+?(?=__[ \t]*[=]))__[ \t]*=(.+)',
                          must_match=False)
        return exp_params
Ejemplo n.º 2
0
 def build_strong_scaling_report(self, jsonfile):
     """ Builds strong scaling report for multi-GPU training.
     """
     header = "%-20s %-10s" % ('Network', 'Batch')
     for device in self.devices:
         header = "%s %-10s" % (header, (1 + device.count(',')))
     report = []
     json_report = {'data': []}
     for net in self.nets:
         for batch in self.batches:
             profile = {
                 'net': net,
                 'batch': batch,
                 'time': [],
                 'throughput': [],
                 'efficiency': [],
                 'speedup': []
             }
             json_profile = SummaryBuilder.default_json_profile(
                 net, 'strong', batch)
             profile_ok = False
             # device here is '0', '0,1', '0,1,2,3' ...
             for device in self.devices:
                 key = '{0}_{1}_{2}'.format(net, device, batch)
                 batch_tm = throughput = efficiency = speedup = -1
                 num_devices = 1 + device.count(',')
                 if key in self.cache:
                     batch_tm = self.cache[key]
                     throughput = int(batch * (1000.0 / batch_tm))
                     json_profile['perf']['data'][str(
                         num_devices)] = batch_tm
                     if len(profile['throughput']) == 0:
                         speedup = 1
                     else:
                         speedup = 1.0 * throughput / profile['throughput'][
                             0]
                 if len(profile['efficiency']) == 0:
                     efficiency = 100.00
                     profile_ok = True
                 elif profile['time'][0] > 0:
                     efficiency = int(10000.0 * profile['time'][0] /
                                      (num_devices * batch_tm)) / 100.0
                     profile_ok = True
                 profile['time'].append(batch_tm)
                 profile['throughput'].append(throughput)
                 profile['efficiency'].append(efficiency)
                 profile['speedup'].append(speedup)
             if profile_ok:
                 report.append(profile)
                 json_report['data'].append(json_profile)
     SummaryBuilder.print_report_txt(BATCH_TM_TITLE, header, report, 'net',
                                     'batch', 'time')
     SummaryBuilder.print_report_txt(IPS_TITLE, header, report, 'net',
                                     'batch', 'throughput')
     SummaryBuilder.print_report_txt(SPEEDUP_TITLE, header, report, 'net',
                                     'batch', 'speedup')
     SummaryBuilder.print_report_txt("Efficiency = 100% * t1 / (N * tN)",
                                     header, report, 'net', 'batch',
                                     'efficiency')
     DictUtils.dump_json_to_file(json_report, jsonfile)
Ejemplo n.º 3
0
 def build_exploration_report(self, report_file):
     """ Builds exploration report for inference and single device training.
     """
     header = "%-20s %-10s" % ('Network', 'Device')
     for batch in self.batches:
         header = "%s %-10s" % (header, batch)
     report = []
     json_report = {'data': []}
     for net in self.nets:
         for device in self.devices:
             profile = {
                 'net': net,
                 'device': device,
                 'time': [],
                 'throughput': []
             }
             profile_ok = False
             for batch in self.batches:
                 key = '{0}_{1}_{2}'.format(net, device, batch)
                 batch_tm = throughput = -1
                 if key in self.cache and self.cache[key] > 0:
                     batch_tm = self.cache[key]
                     profile_ok = True
                     throughput = int(batch * (1000.0 / batch_tm))
                     json_profile = SummaryBuilder.default_json_profile(net, 'strong', batch)
                     json_profile['perf']['data']['1'] = batch_tm
                     json_report['data'].append(json_profile)
                 profile['time'].append(round(batch_tm, 3))
                 profile['throughput'].append(throughput)
             if profile_ok:
                 report.append(profile)
     SummaryBuilder.print_report_txt(BATCH_TM_TITLE, header, report, 'net', 'device', 'time')
     SummaryBuilder.print_report_txt(IPS_TITLE, header, report, 'net', 'device', 'throughput')
     DictUtils.dump_json_to_file(json_report, report_file)
Ejemplo n.º 4
0
def filter_benchmarks(args):
    """Filter benchmarks by removing those that do not contain provided parameters.

    :param argparse args: Command line arguments.

    The following command line arguments are used:
    * ``args.input_file`` A file with benchmark results.
    * ``args.params``     Specification of mandatory parameters. For format,
                          read comments of ``get_params`` function
    * ``args.output_file`` An output file with updated benchmark results.
    """
    # Load benchmarks and parameters
    input_benchmarks = load_json_file(args.input_file)['data']
    params = get_params(args.params)
    # Filter benchmarks
    output_benchmarks = []
    for input_benchmark in input_benchmarks:
        keep = True
        for key in params:
            if key not in input_benchmark or not input_benchmark[key]:
                keep = False
                break
        if keep:
            output_benchmarks.append(input_benchmark)
    # Report results and serialize
    print("Number of input benchmarks: %d" % len(input_benchmarks))
    print("Number of output benchmarks: %d" % len(output_benchmarks))
    DictUtils.dump_json_to_file({"data": output_benchmarks}, args.output_file)
Ejemplo n.º 5
0
def update_benchmarks(args):
    """Update benchmarks by overriding parameters provided by a user.

    :param argparse args: Command line arguments.

    The following command line arguments are used:
    * ``args.input_file`` A file with benchmark results.
    * ``args.params``     Specification of mandatory parameters. For format,
                          read comments of ``get_params`` function
    * ``args.output_file`` An output file with updated benchmark results.
    """
    # Load benchmarks and parameters.
    benchmarks = load_json_file(args.input_file)['data']
    prefix = '__'
    params = {prefix + k: v for k, v in get_params(args.params).items()}
    # Add prefixed parameters to all benchmarks.
    for benchmark in benchmarks:
        benchmark.update(params)
    # Process and compute variables
    Processor().compute_variables(benchmarks)
    # Replace prefix overwriting variables in case of a conflict
    prefixed_keys = params.keys()
    prefix_len = len(prefix)

    output_benchmarks = []
    for benchmark in benchmarks:
        for k in prefixed_keys:
            benchmark[k[prefix_len:]] = benchmark[k]
            del benchmark[k]
        if benchmark['exp.model'] != '':
            output_benchmarks.append(benchmark)
    benchmarks = output_benchmarks
    # Serialize updated benchmarks.
    DictUtils.dump_json_to_file({"data": benchmarks}, args.output_file)
Ejemplo n.º 6
0
 def build_weak_scaling_report(self, jsonfile):
     """ Builds weak scaling report for multi-GPU training.
     """
     header = "%-20s %-10s" % ('Network', 'Batch')
     for device in self.devices:
         header = "%s %-10d" % (header, (1 + device.count(',')))
     report = []
     json_report = {'data': []}
     for net in self.nets:
         for batch in self.batches:
             # batch is the base 'batch size' i.e. for a one GPU
             profile = {
                 'net': net,         # network name
                 'batch': batch,     # per device batch size
                 'time': [],         # batch times
                 'throughput': [],   # throughput
                 'efficiency': [],   # efficiency
                 'speedup': []       # speedup
             }
             json_profile = SummaryBuilder.default_json_profile(net, 'weak', batch)
             profile_ok = False
             for device in self.devices:
                 # weak scaling: we want to find results for effective batch size
                 # which is N * batch
                 num_devices = 1 + device.count(',')
                 key = '{0}_{1}_{2}'.format(net, device, (batch*num_devices))
                 if num_devices == 1 and key not in self.cache:
                     # If we do not have data for one device, does not make sense
                     # to continue
                     break
                 batch_tm = throughput = efficiency = speedup = -1.0
                 if key in self.cache:
                     batch_tm = self.cache[key]
                     throughput = int((num_devices*batch) * (1000.0 / batch_tm))
                     json_profile['perf']['data'][str(num_devices)] = batch_tm
                     if len(profile['throughput']) == 0:
                         speedup = 1
                     else:
                         speedup = 1.0 * throughput / profile['throughput'][0]
                 if len(profile['efficiency']) == 0:
                     efficiency = 100.00
                     profile_ok = True
                 elif profile['time'][0] > 0:
                     efficiency = int(10000.0 * profile['time'][0] / batch_tm) / 100.0
                     profile_ok = True
                 profile['time'].append(batch_tm)
                 profile['throughput'].append(int(throughput))
                 profile['efficiency'].append(efficiency)
                 profile['speedup'].append(speedup)
             if profile_ok:
                 report.append(profile)
                 json_report['data'].append(json_profile)
     SummaryBuilder.print_report_txt(BATCH_TM_TITLE, header, report, 'net', 'batch', 'time')
     SummaryBuilder.print_report_txt(IPS_TITLE, header, report, 'net', 'batch', 'throughput')
     SummaryBuilder.print_report_txt(SPEEDUP_TITLE, header, report, 'net', 'batch', 'speedup')
     SummaryBuilder.print_report_txt(
         "Efficiency  = 100% * t1 / tN",
         header, report, 'net', 'batch', 'efficiency'
     )
     DictUtils.dump_json_to_file(json_report, jsonfile)
Ejemplo n.º 7
0
    def parse(inputs, recursive=False, ignore_errors=False):
        """Parse benchmark log files (*.log).

        Args:
            inputs: Path specifiers of where to search for log files.
            recursive (bool): If true, parse directories found in `inputs` recursively.
            ignore_errors (bool): If true, ignore errors associated with parsing parameter values.

        Returns:
            Instance of this class.
        """
        inputs = inputs if isinstance(inputs, list) else [inputs]
        log_files = set()
        for file_path in inputs:
            if os.path.isdir(file_path):
                log_files.update(IOUtils.gather_files(inputs, "*.log", recursive))
            elif file_path.endswith('.log'):
                log_files.add(file_path)
        log_files = list(log_files)
        benchmarks = []
        for log_file in log_files:
            parameters = {}
            with OpenFile(log_file, 'r') as logfile:
                # The 'must_match' must be set to false. It says that not
                # every line in a log file must match key-value pattern.
                DictUtils.add(
                    parameters,
                    logfile,
                    pattern='[ \t]*__(.+?(?=__[ \t]*[=]))__[ \t]*=(.+)',
                    must_match=False,
                    ignore_errors=ignore_errors
                )
            benchmarks.append(parameters)
        return BenchData(benchmarks, create_copy=False)
Ejemplo n.º 8
0
 def build_cache(self, inputs=None, output=None, output_cols=None):
     self.input_cols = [None] * len(inputs)
     for idx, param in enumerate(inputs):
         self.input_cols[idx] = {"index": idx, "param": param, "width": 0,
                                 "title": DictUtils.get(BenchData.Reporter.TITLES, param, param),
                                 "vals": sorted(self.bench_data.select_values(param))}
     self.output_param = output
     output_cols = output_cols if output_cols else sorted(self.bench_data.select_values(output))
     self.output_cols = [None] * len(output_cols)
     for idx, param_value in enumerate(output_cols):
         self.output_cols[idx] = {"index": idx, "value": param_value, "title": param_value,
                                  "width": len(BenchData.Reporter.to_string(param_value))}
     self.cache = {}
     for bench in self.bench_data.benchmarks():
         if BenchData.status(bench) != "ok":
             continue
         bench_key = []
         for input_col in self.input_cols:
             param_value = DictUtils.get(bench, input_col['param'], None)
             if not param_value:
                 bench_key = []
                 break
             bench_key.append(str(param_value))
         if bench_key:
             output_val = DictUtils.get(bench, self.output_param, None)
             if output_val:
                 bench_key = '.'.join(bench_key + [str(output_val)])
                 if bench_key not in self.cache:
                     self.cache[bench_key] = bench
                 else:
                     raise ValueError("Duplicate benchmark with key = {}".format(bench_key))
Ejemplo n.º 9
0
    def run(self):
        """Runs subprocess with Popen.

        This method must not be called directly. Use blocking :py:meth:`~dlbs.Worker.work`
        method instead.
        """
        try:
            # Dump parameters to a log file or to standard output
            DictUtils.ensure_exists(self.params, 'exp.log_file', default_value='')
            if self.params['exp.log_file'].strip() == '':
                self.params['exp.log_file'] = '/dev/stdout'
            IOUtils.mkdirf(self.params['exp.log_file'])
            with open(self.params['exp.log_file'], 'a+') as log_file:
                self.__dump_parameters(log_file)
            # This is where we launch process. Keep in mind, that the log file that's
            # supposed to be created is exp.log_file or exp_log_file in the script.
            # Other output of the launching script will be printed by this pyhton code
            # to a stanard output.
            self.process = subprocess.Popen(self.command, universal_newlines=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, env=self.environ)
            while True:
                output = self.process.stdout.readline()
                if output == '' and self.process.poll() is not None:
                    break
                if output:
                    sys.stdout.write(output)
                    sys.stdout.flush()
            self.ret_code = self.process.poll()
        except Exception as err:
            logging.warn('Exception has been caught for experiment %s: %s', self.params.get('exp.id'), str(err))
            logging.warn(traceback.format_exc())
            self.ret_code = -1
Ejemplo n.º 10
0
    def summary(self, params=None):
        """Return summary of benchmarks providing additional info on `params`.

        Args:
            params (list): List of parameters to provide additional info for. If empty, default list is used.

        Returns:
            dict: A summary of benchmarks.
        """
        if not params:
            params = ['exp.node_id', 'exp.node_title', 'exp.gpu_title', 'exp.gpu_id', 'exp.framework_title',
                      'exp.framework_id']
        summary_dict = {
            'num_benchmarks': len(self.__benchmarks),
            'num_failed_benchmarks': 0,
            'num_successful_benchmarks': 0
        }
        for param in params:
            summary_dict[param] = set()

        for bench in self.__benchmarks:
            if DictUtils.get(bench, 'results.time', -1) > 0:
                summary_dict['num_successful_benchmarks'] += 1
            else:
                summary_dict['num_failed_benchmarks'] += 1
            for param in params:
                summary_dict[param].add(DictUtils.get(bench, param, None))

        for param in params:
            summary_dict[param] = list(summary_dict[param])
        return summary_dict
Ejemplo n.º 11
0
    def load(inputs, **kwargs):
        """Load benchmark data (parsed from log files) from a JSON file.

        A file name is a JSON file that contains object with 'data' field. This field
        is a list with dictionaries, each dictionary contains parameters for one benchmark:
        {"data":[{...}, {...}, {...}]}

        Args:
            inputs (str): File name of a JSON (*.json) or a compressed JSON (.json.gz) file.

        Returns:
            Instance of this class.
        """
        is_json_file = IOUtils.is_json_file(inputs)
        if not is_json_file and isinstance(inputs, list) and len(inputs) == 1:
            is_json_file = IOUtils.is_json_file(inputs[0])
            inputs = inputs[0] if is_json_file else inputs
        if is_json_file:
            benchmarks = IOUtils.read_json(inputs, check_extension=True)
            if 'data' not in benchmarks:
                benchmarks = {'data': []}
                print("[WARNING]: No benchmark data found in '{}'".format(
                    inputs))
            return BenchData(benchmarks['data'], create_copy=False)
        #
        is_csv_file = IOUtils.is_csv_file(inputs)
        if not is_csv_file and isinstance(inputs, list) and len(inputs) == 1:
            is_csv_file = IOUtils.is_csv_file(inputs[0])
            inputs = inputs[0] if is_csv_file else inputs
        if is_csv_file:
            with OpenFile(inputs, 'r') as fobj:
                reader = csv.DictReader(fobj)
                benchmarks = list(reader)
            return BenchData(benchmarks, create_copy=False)
        #
        is_compressed_tarball = IOUtils.is_compressed_tarball(inputs)
        if not is_compressed_tarball and isinstance(inputs,
                                                    list) and len(inputs) == 1:
            is_compressed_tarball = IOUtils.is_json_file(inputs[0])
            inputs = inputs[0] if is_compressed_tarball else inputs
        if is_compressed_tarball:
            benchmarks = []
            with tarfile.open(inputs, "r:gz") as archive:
                for member in archive.getmembers():
                    if member.isfile() and member.name.endswith('.log'):
                        log_file = archive.extractfile(member)
                        if log_file is not None:
                            parameters = {}
                            DictUtils.add(
                                parameters,
                                log_file,
                                pattern=
                                '[ \t]*__(.+?(?=__[ \t]*[=]))__[ \t]*=(.+)',
                                must_match=False,
                                ignore_errors=True)
                            benchmarks.append(parameters)
            return BenchData(benchmarks, create_copy=False)
        #
        return BenchData.parse(inputs, **kwargs)
Ejemplo n.º 12
0
 def report_active(self, log_file):
     if self.__file_name:
         self.__progress['active_benchmark'] = {
             'status': 'inprogress',
             'start_time': str(datetime.datetime.now()),
             'stop_time': None,
             'log_file': log_file
         }
         DictUtils.dump_json_to_file(self.__progress, self.__file_name)
Ejemplo n.º 13
0
 def build_plan(self, serialize=False):
     """Builds plan combining configuration, parameters and variables."""
     self.plan = Builder.build(self.config, self.params, self.variables)
     if serialize:
         if self.plan_file:
             DictUtils.dump_json_to_file(self.plan, self.plan_file)
         else:
             json.dump(self.plan, sys.stdout, indent=4)
             print('')
Ejemplo n.º 14
0
 def report_active_completed(self):
     if self.__file_name:
         self.__progress['active_benchmark']['stop_time'] = str(
             datetime.datetime.now())
         self.__progress['active_benchmark']['status'] = 'completed'
         self.__progress['completed_benchmarks'].append(
             self.__progress['active_benchmark'])
         self.__progress['num_completed_benchmarks'] += 1
         self.__progress["active_benchmark"] = {}
         DictUtils.dump_json_to_file(self.__progress, self.__file_name)
Ejemplo n.º 15
0
    def compute_variables(self, experiments):
        """Main entry point - compute all variables in all experiments.

        Args:
            experiments (list): A list of experiments that needs to be computed. It's modified in place.
        """
        for experiment in experiments:
            # Convert all lists to strings
            DictUtils.lists_to_strings(experiment)
            # Build initial version of a forward index (variables -> their dependencies)
            self.fwd_index = {}
            for variable in experiment:
                self.update_index(experiment, variable)
            # iteratively compute variables
            while len(self.fwd_index) > 0:
                computable_vars = self.get_computable_variables()
                # print("Computable vars: %s" % (str(computable_vars)))
                if len(computable_vars) == 0:
                    self.report_unsatisfied_deps(experiment)
                    exit(1)
                # Compute  variables. We are either done with a variable or
                # this variable has nested references and we need to continue
                # computing it.
                computed, partially_computed = self.compute_current_variables(
                    experiment, computable_vars)
                # print("Computed vars: %s" % (str(computed)))
                # print("Partially computed vars: %s" % (str(partially_computed)))
                # Remove computed vars from index and update dependencies of
                # remaining variables
                for computed_var in computed:
                    self.fwd_index.pop(computed_var)
                for var in self.fwd_index:
                    self.fwd_index[var]['udeps'].difference_update(
                        set(computed))
                # Update partially computed variables - these are variables
                # that have nested references.
                for var in partially_computed:
                    self.update_index(experiment, var)
                    deps = self.fwd_index[var]['udeps'].copy()
                    for dep in deps:
                        if dep not in self.fwd_index:
                            self.fwd_index[var]['udeps'].remove(dep)
                # exit(0)

            # We need to remove all internal temp variables
            # We need to remove all internal temp variables.
            # In P2, keys() makes a copy. In P3 it returns an iterator -> this
            # 'dictionary changed size during iteration' error. So, making copy
            for name in list(experiment.keys()):
                if name.startswith('__dlbs_'):
                    experiment.pop(name)
Ejemplo n.º 16
0
    def test_ensure_exists_1(self):
        """dlbs  ->  TestDictUtils::test_ensure_exists_1                 [Testing dictionary helpers #1]"""
        DictUtils.ensure_exists(self.dictionary, 'exp.framework')

        self.assertEqual('exp.framework' in self.dictionary, True)
        self.assertEqual('exp.model' in self.dictionary, True)
        self.assertEqual('exp.device_batch' in self.dictionary, True)

        self.assertEqual(self.dictionary['exp.framework'], self.framework)
        self.assertEqual(self.dictionary['exp.model'], self.model)
        self.assertEqual(self.dictionary['exp.device_batch'],
                         self.device_batch)

        self.assertEqual(len(self.dictionary), 3)
Ejemplo n.º 17
0
 def report(self, inputs=None, output=None, output_cols=None,
            report_speedup=False, report_efficiency=False, **kwargs):
     DictUtils.ensure_exists(kwargs, 'report_batch_times', True)
     DictUtils.ensure_exists(kwargs, 'report_input_specs', True)
     # Build cache that will map benchmarks keys to benchmark objects.
     self.build_cache(inputs, output, output_cols)
     # Iterate over column values and build table with batch times and throughput
     cols = []
     times = []
     throughputs = []
     benchmark_keys = [input_col['vals'] for input_col in self.input_cols]
     # Build tables for batch times and benchmarks throughputs
     # The `benchmark_key` is a tuple of column values e.g. ('ResNet50', 256)
     for benchmark_key in itertools.product(*benchmark_keys):
         cols.append(copy.deepcopy(benchmark_key))
         times.append([None] * len(self.output_cols))
         throughputs.append([None] * len(self.output_cols))
         for output_col in self.output_cols:
             benchmark_key = [str(key) for key in benchmark_key]
             key = '.'.join(benchmark_key + [str(output_col['value'])])
             if key in self.cache:
                 times[-1][output_col['index']] = self.cache[key]['results.time']
                 throughputs[-1][output_col['index']] = self.cache[key]['results.throughput']
     # Determine minimal widths for columns
     self.compute_column_widths(times, throughputs)
     #
     header = self.get_header()
     if kwargs['report_batch_times']:
         self.print_table("Batch time (milliseconds)", header, cols, times)
     self.print_table("Throughput (instances per second e.g. images/sec)", header, cols, throughputs)
     if report_speedup:
         speedups = self.compute_speedups(throughputs)
         self.print_table("Speedup (based on instances per second table, "
                          "relative to first output column ({} = {}))".format(self.output_param,
                                                                              self.output_cols[0]['value']),
                          header, cols, speedups)
     if report_efficiency:
         efficiency = self.compute_efficiency(times)
         self.print_table("Efficiency (based on batch times table, "
                          "relative to first output column ({} = {}))".format(self.output_param,
                                                                              self.output_cols[0]['value']),
                          header, cols, efficiency)
     if kwargs['report_input_specs']:
         print("This report is configured with the following parameters:")
         print(" inputs = %s" % str(inputs))
         print(" output = %s" % output)
         print(" output_cols = %s" % str(output_cols))
         print(" report_speedup = %s" % str(report_speedup))
         print(" report_efficiency = %s" % str(report_efficiency))
Ejemplo n.º 18
0
 def report(self, log_file, status, counts=True):
     if self.__file_name:
         self.__progress['completed_benchmarks'].append({
             'status':
             status,
             'start_time':
             str(datetime.datetime.now()),
             'stop_time':
             str(datetime.datetime.now()),
             'log_file':
             log_file
         })
         if counts:
             self.__progress['num_completed_benchmarks'] += 1
         DictUtils.dump_json_to_file(self.__progress, self.__file_name)
Ejemplo n.º 19
0
    def check_variable_value(self, experiment, var):
        """ Check if variable contains correct value according to parameter info.

        Args:
            experiment (dict): A dictionary with experiment parameters.
            var (str): Name of a parameter.
        """
        if self.param_info is None or var not in self.param_info:
            return
        pi = self.param_info[var]
        ParamUtils.check_value(
            var,  # Parameter name
            experiment[var],  # Parameter value
            DictUtils.get(pi, 'val_domain', None),  # Value domain constraints.
            DictUtils.get(pi, 'val_regexp', None))  # Value regexp constraints.
Ejemplo n.º 20
0
    def test_lists_to_strings_2(self):
        """dlbs  ->  TestDictUtils::test_lists_to_strings_2              [Testing lists-to-strings helpers #2]"""
        DictUtils.lists_to_strings(self.dictionary, separator=';')

        self.assertEqual('exp.framework' in self.dictionary, True)
        self.assertEqual('exp.model' in self.dictionary, True)
        self.assertEqual('exp.device_batch' in self.dictionary, True)

        self.assertEqual(self.dictionary['exp.framework'], self.framework)
        self.assertEqual(self.dictionary['exp.model'],
                         "ResNet50;ResNet101;ResNet152")
        self.assertEqual(self.dictionary['exp.device_batch'],
                         self.device_batch)

        self.assertEqual(len(self.dictionary), 3)
Ejemplo n.º 21
0
    def report_active(self, log_file):
        """ Report that new active benchmark has just started.

        Args:
            log_file (str): A log file for a currently active benchmark.
        """
        self.__progress['active_benchmark'] = {
            'exec_status': 'inprogress',
            'status': None,
            'start_time': datetime.datetime.now(),
            'end_time': None,
            'log_file': log_file
        }
        if self.__file_name:
            DictUtils.dump_json_to_file(self.__progress, self.__file_name)
Ejemplo n.º 22
0
 def build_cache(self, summary_file, target_variable, query):
     """Loads data from json file."""
     with OpenFile(summary_file) as file_obj:
         summary = json.load(file_obj)
     self.cache = {}
     self.nets = Set()
     self.batches = Set()
     self.devices = Set()
     for experiment in summary['data']:
         if target_variable not in experiment:
             print("target variable not in experiment, skipping")
             continue
         if not DictUtils.match(experiment, query, policy='strict'):
             continue
         # batch is an effective batch here
         key = '{0}_{1}_{2}'.format(
             experiment['exp.model_title'],
             experiment['exp.gpus'],
             experiment['exp.effective_batch']
         )
         self.cache[key] = float(experiment[target_variable])
         self.nets.add(experiment['exp.model_title'])
         self.batches.add(int(experiment['exp.effective_batch']))
         self.devices.add(str(experiment['exp.gpus']))
     self.nets = sorted(list(self.nets))
     self.batches = sorted(list(self.batches))
     self.devices = sorted(list(self.devices), key=len)
Ejemplo n.º 23
0
    def test_ensure_exists_3(self):
        """dlbs  ->  TestDictUtils::test_ensure_exists_3                 [Testing dictionary helpers #3]"""
        DictUtils.ensure_exists(self.dictionary, 'exp.data_dir',
                                '/nfs/imagenet')

        self.assertEqual('exp.framework' in self.dictionary, True)
        self.assertEqual('exp.model' in self.dictionary, True)
        self.assertEqual('exp.device_batch' in self.dictionary, True)
        self.assertEqual('exp.data_dir' in self.dictionary, True)

        self.assertEqual(self.dictionary['exp.framework'], self.framework)
        self.assertEqual(self.dictionary['exp.model'], self.model)
        self.assertEqual(self.dictionary['exp.device_batch'],
                         self.device_batch)
        self.assertEqual(self.dictionary['exp.data_dir'], '/nfs/imagenet')

        self.assertEqual(len(self.dictionary), 4)
Ejemplo n.º 24
0
 def __init__(self):
     self.__validation = True  # Validate config before running benchmarks
     self.__action = None  # Action to perform (build, run, ...)
     self.__config_file = None  # Configuration file to load
     self.__progress_file = None  # A JSON file with current progress
     self.__config = {}  # Loaded configuration
     self.__param_info = {
     }  # Parameter meta-info such as type and value domain
     self.__plan_file = None  # File with pre-built plan
     self.__plan = []  # Loaded or generated plan
     self.__params = {}  # Override env variables from files
     self.__variables = {}  # Override variables from files
     # Dirty hacks
     DictUtils.ensure_exists(os.environ, 'CUDA_CACHE_PATH', '')
     DictUtils.ensure_exists(
         os.environ, 'DLBS_ROOT',
         os.path.join(os.path.dirname(os.path.abspath(__file__)), '../../'))
Ejemplo n.º 25
0
 def test_match_5(self):
     """dlbs  ->  TestDictUtils::test_match_5                         [Testing matching helpers #5]"""
     dictionary = {'exp.framework': "bvlc_caffe", 'exp.model': "ResNet150"}
     matches = {}
     self.assertEquals(
         DictUtils.match(dictionary, {'exp.framework': '([^_]+)D(.+)'},
                         policy='strict',
                         matches=matches), False)
     self.assertEquals(len(matches), 0)
Ejemplo n.º 26
0
 def test_match_6(self):
     """Test empty strings can match"""
     dictionary = {'exp.framework': "bvlc_caffe", 'exp.data_dir': ""}
     #
     matches = {}
     for val in ('', ' ', '  ', '    '):
         self.assertEquals(
             DictUtils.match(dictionary, {'exp.framework': val},
                             policy='strict',
                             matches=matches), False)
         self.assertEqual(len(matches), 0)
     #
     self.assertEquals(
         DictUtils.match(dictionary, {'exp.data_dir': ''},
                         policy='strict',
                         matches=matches), True)
     self.assertEqual(len(matches), 1)
     self.assertIn('exp.data_dir_0', matches)
     self.assertEqual(matches['exp.data_dir_0'], '')
Ejemplo n.º 27
0
    def parse_log_files(filenames, opts=None):
        """ Parses files and returns their parameters.

        :param list filenames: List of file names to parse.
        :param dict opts:      Dictionary of options.

        :rtype:  tuple<list, list>
        :return: A tuple of two lists - succeeded and failed benchmarks
        """
        opts = {} if opts is None else opts
        for key in ('filter_params', 'filter_query', 'output_params'):
            DictUtils.ensure_exists(opts, key)
        DictUtils.ensure_exists(opts, 'failed_benchmarks', 'discard')
        DictUtils.ensure_exists(opts, '_extended_params', {})

        succeeded_benchmarks = []
        failed_benchmarks = []
        for filename in filenames:
            # Parse log file
            params = LogParser.parse_log_file(filename)
            # Check if this benchmark does not match filter
            if len(params) == 0 or \
               not DictUtils.contains(params, opts['filter_params']) or \
               not DictUtils.match(params, opts['filter_query']):
                continue
            # Add extended parameters and compute them
            if len(opts['_extended_params']) > 0:
                params.update(opts['_extended_params'])
                Processor().compute_variables([params])
                #params = params[0]
            # Identify is this benchmark succeeded of failed.
            succeeded = 'results.throughput' in params and \
                        isinstance(params['results.throughput'], (int, long, float)) and \
                        params['results.throughput'] > 0
            # Get only those key/values that need to be serialized
            params = DictUtils.subdict(params, opts['output_params'])
            # Append benchmark either to succeeded or failed list
            if succeeded:
                succeeded_benchmarks.append(params)
            else:
                if opts['failed_benchmarks'] == 'keep':
                    succeeded_benchmarks.append(params)
                elif opts['failed_benchmarks'] == 'keep_separately':
                    failed_benchmarks.append(params)
            #
        return (succeeded_benchmarks, failed_benchmarks)
Ejemplo n.º 28
0
 def get_header(self):
     header = ""
     for input_col in self.input_cols:
         format_str = "  %-" + str(input_col['width']) + "s"
         header = header + format_str % BenchData.Reporter.to_string(input_col['title'])
     header += "    "
     output_cols_title = " " * len(header) + DictUtils.get(BenchData.Reporter.TITLES,
                                                           self.output_param, self.output_param)
     for output_col in self.output_cols:
         format_str = "%+" + str(output_col['width']) + "s  "
         header = header + format_str % BenchData.Reporter.to_string(output_col['title'])
     return [output_cols_title, header]
Ejemplo n.º 29
0
 def test_match_3(self):
     """dlbs  ->  TestDictUtils::test_match_3                         [Testing matching helpers #3]"""
     dictionary = {'exp.framework': "bvlc_caffe", 'exp.model': "ResNet150"}
     matches = {}
     self.assertEquals(
         DictUtils.match(dictionary, {'exp.framework': '([^_]+)_(.+)'},
                         policy='strict',
                         matches=matches), True)
     self.assertEquals(len(matches), 3)
     self.assertEquals(matches['exp.framework_0'], 'bvlc_caffe')
     self.assertEquals(matches['exp.framework_1'], 'bvlc')
     self.assertEquals(matches['exp.framework_2'], 'caffe')
Ejemplo n.º 30
0
 def test_match_2(self):
     """dlbs  ->  TestDictUtils::test_match_2                         [Testing matching helpers #2]"""
     dictionary = {'exp.framework': "bvlc_caffe", 'exp.model': "ResNet150"}
     matches = {}
     self.assertEquals(
         DictUtils.match(dictionary, {'exp.model': r'([^\d]+)(\d+)'},
                         policy='strict',
                         matches=matches), True)
     self.assertEquals(len(matches), 3)
     self.assertEquals(matches['exp.model_0'], 'ResNet150')
     self.assertEquals(matches['exp.model_1'], 'ResNet')
     self.assertEquals(matches['exp.model_2'], '150')