Example #1
0
 def build_cache(self, summary_file, target_variable, query):
     """Loads data from json file."""
     with OpenFile(summary_file) as file_obj:
         summary = json.load(file_obj)
     self.cache = {}
     self.nets = Set()
     self.batches = Set()
     self.devices = Set()
     for experiment in summary['data']:
         if target_variable not in experiment:
             print("target variable not in experiment, skipping")
             continue
         if not DictUtils.match(experiment, query, policy='strict'):
             continue
         # batch is an effective batch here
         key = '{0}_{1}_{2}'.format(
             experiment['exp.model_title'],
             experiment['exp.gpus'],
             experiment['exp.effective_batch']
         )
         self.cache[key] = float(experiment[target_variable])
         self.nets.add(experiment['exp.model_title'])
         self.batches.add(int(experiment['exp.effective_batch']))
         self.devices.add(str(experiment['exp.gpus']))
     self.nets = sorted(list(self.nets))
     self.batches = sorted(list(self.batches))
     self.devices = sorted(list(self.devices), key=len)
Example #2
0
 def test_match_5(self):
     """dlbs  ->  TestDictUtils::test_match_5                         [Testing matching helpers #5]"""
     dictionary = {'exp.framework': "bvlc_caffe", 'exp.model': "ResNet150"}
     matches = {}
     self.assertEquals(
         DictUtils.match(dictionary, {'exp.framework': '([^_]+)D(.+)'},
                         policy='strict',
                         matches=matches), False)
     self.assertEquals(len(matches), 0)
 def test_match_6(self):
     """Test empty strings can match"""
     dictionary = {'exp.framework': "bvlc_caffe", 'exp.data_dir': ""}
     #
     matches = {}
     for val in ('', ' ', '  ', '    '):
         self.assertEquals(
             DictUtils.match(dictionary, {'exp.framework': val},
                             policy='strict',
                             matches=matches), False)
         self.assertEqual(len(matches), 0)
     #
     self.assertEquals(
         DictUtils.match(dictionary, {'exp.data_dir': ''},
                         policy='strict',
                         matches=matches), True)
     self.assertEqual(len(matches), 1)
     self.assertIn('exp.data_dir_0', matches)
     self.assertEqual(matches['exp.data_dir_0'], '')
Example #4
0
 def test_match_3(self):
     """dlbs  ->  TestDictUtils::test_match_3                         [Testing matching helpers #3]"""
     dictionary = {'exp.framework': "bvlc_caffe", 'exp.model': "ResNet150"}
     matches = {}
     self.assertEquals(
         DictUtils.match(dictionary, {'exp.framework': '([^_]+)_(.+)'},
                         policy='strict',
                         matches=matches), True)
     self.assertEquals(len(matches), 3)
     self.assertEquals(matches['exp.framework_0'], 'bvlc_caffe')
     self.assertEquals(matches['exp.framework_1'], 'bvlc')
     self.assertEquals(matches['exp.framework_2'], 'caffe')
Example #5
0
 def test_match_2(self):
     """dlbs  ->  TestDictUtils::test_match_2                         [Testing matching helpers #2]"""
     dictionary = {'exp.framework': "bvlc_caffe", 'exp.model': "ResNet150"}
     matches = {}
     self.assertEquals(
         DictUtils.match(dictionary, {'exp.model': r'([^\d]+)(\d+)'},
                         policy='strict',
                         matches=matches), True)
     self.assertEquals(len(matches), 3)
     self.assertEquals(matches['exp.model_0'], 'ResNet150')
     self.assertEquals(matches['exp.model_1'], 'ResNet')
     self.assertEquals(matches['exp.model_2'], '150')
Example #6
0
    def parse_log_files(filenames, opts=None):
        """ Parses files and returns their parameters.

        :param list filenames: List of file names to parse.
        :param dict opts:      Dictionary of options.

        :rtype:  tuple<list, list>
        :return: A tuple of two lists - succeeded and failed benchmarks
        """
        opts = {} if opts is None else opts
        for key in ('filter_params', 'filter_query', 'output_params'):
            DictUtils.ensure_exists(opts, key)
        DictUtils.ensure_exists(opts, 'failed_benchmarks', 'discard')
        DictUtils.ensure_exists(opts, '_extended_params', {})
        DictUtils.ensure_exists(opts, 'ignore_errors', False)

        succeeded_benchmarks = []
        failed_benchmarks = []
        for filename in filenames:
            # Parse log file
            params = LogParser.parse_log_file(
                filename, ignore_errors=opts['ignore_errors'])
            # Check if this benchmark does not match filter
            if len(params) == 0 or \
               not DictUtils.contains(params, opts['filter_params']) or \
               not DictUtils.match(params, opts['filter_query']):
                continue
            # Add extended parameters and compute them
            if len(opts['_extended_params']) > 0:
                params.update(opts['_extended_params'])
                Processor().compute_variables([params])
                # params = params[0]
            # Identify is this benchmark succeeded of failed.
            succeeded = 'results.throughput' in params and \
                        isinstance(params['results.throughput'], (int, long, float)) and \
                        params['results.throughput'] > 0
            # Get only those key/values that need to be serialized
            params = DictUtils.subdict(params, opts['output_params'])
            # Append benchmark either to succeeded or failed list
            if succeeded:
                succeeded_benchmarks.append(params)
            else:
                if opts['failed_benchmarks'] == 'keep':
                    succeeded_benchmarks.append(params)
                elif opts['failed_benchmarks'] == 'keep_separately':
                    failed_benchmarks.append(params)
            #
        return succeeded_benchmarks, failed_benchmarks
Example #7
0
 def test_match_4(self):
     """dlbs  ->  TestDictUtils::test_match_4                         [Testing matching helpers #4]"""
     dictionary = {'exp.framework': "bvlc_caffe", 'exp.model': "ResNet150"}
     self.assertEquals(
         DictUtils.match(dictionary, {'exp.framework': '([^_]+)_(.+)'},
                         policy='strict'), True)
Example #8
0
 def test_match_1(self):
     """dlbs  ->  TestDictUtils::test_match_1                         [Testing matching helpers #1]"""
     for frameworks in [
             self.framework, [self.framework], [self.framework, "Caffe2"]
     ]:
         # We can match against existing key with strict policy
         self.assertEquals(
             DictUtils.match(self.dictionary, {'exp.framework': frameworks},
                             policy='strict'), True)
         # We cannot match against non existing key with strict policy
         self.assertEquals(
             DictUtils.match(self.dictionary,
                             {'exp.framework_id': self.framework},
                             policy='strict'), False)
         # We can match against non existing key with relaxed policy
         self.assertEquals(
             DictUtils.match(self.dictionary,
                             {'exp.framework_id': self.framework},
                             policy='relaxed'), True)
     # Key exist, different values
     self.assertEquals(
         DictUtils.match(self.dictionary, {'exp.framework': 'Caffe2'},
                         policy='strict'), False)
     # AND condition + strict policy
     self.assertEquals(
         DictUtils.match(self.dictionary, {
             'exp.framework': self.framework,
             'exp.device_batch': self.device_batch
         },
                         policy='strict'), True)
     # AND condition
     self.assertEquals(
         DictUtils.match(self.dictionary, {
             'exp.framework': [self.framework, 'Caffe2'],
             'exp.device_batch': self.device_batch
         },
                         policy='strict'), True)
     self.assertEquals(
         DictUtils.match(self.dictionary, {
             'exp.framework': self.framework,
             'exp.device_batch': 2 * self.device_batch
         },
                         policy='strict'), False)
     # AND condition relaxed policy
     self.assertEquals(
         DictUtils.match(self.dictionary, {
             'exp.framework': self.framework,
             'exp.effective_batch': 2 * self.device_batch
         },
                         policy='relaxed'), True)
     # AND condition
     self.assertEquals(
         DictUtils.match(self.dictionary, {
             'exp.framework': [self.framework, 'Caffe2'],
             'exp.effective_batch': 2 * self.device_batch
         },
                         policy='relaxed'), True)
     # Relaxed policy with multiple fields that exist and do not match
     self.assertEquals(
         DictUtils.match(self.dictionary, {
             'exp.framework': self.framework,
             'exp.device_batch': 2 * self.device_batch
         },
                         policy='relaxed'), False)
    def build(benchmarks, args):
        """Creates a JSON object that can be used to plot charts.

        :param list benchmarks: An array of benchmarks
        :param obj args: A result of argparse.parse. Contains parameters defining
                         the chart.
        """
        series_filters = json.loads(args.series)
        # During pre-processing step, we store series as dictionaries mapping
        # X to Y. Then, we convert it into array.
        chart_data = {
            'ylabel': args.yparam,  # Benchmark parameter for Y-axis
            'xlabel': args.xparam,  # Benchmark parameter for X-axis
            'series': [],  # List of {'filters': dict(), 'data': dict()}
            'xvals': set()  # Possible values for X-axis
        }
        for series_filter in series_filters:
            chart_data['series'].append({
                'filters': series_filter,
                'data': defaultdict(list)
            })
        # Iterate over each benchmark and see if it needs to go into series
        for benchmark in benchmarks:
            # Without 'x' or 'y' data we cannot do anything.
            if args.xparam not in benchmark or args.yparam not in benchmark:
                continue
            # Iterate over series (their filters)
            for idx, series_filter in enumerate(series_filters):
                # If we cannot match all keys from query, ignore it
                if not DictUtils.match(
                        benchmark, series_filter, policy='strict'):
                    continue
                xval = str(benchmark[args.xparam])
                yval = benchmark[args.yparam]
                chart_data['series'][idx]['data'][xval].append(yval)
                chart_data['xvals'].add(xval)
        # Perform final aggregation
        reducers = {
            'min': min,
            'max': max,
            'avg': lambda arr: float(sum(arr)) / len(arr)
        }
        reducer = reducers[args.aggregation]
        baseline_xvalue_exists = True
        for series in chart_data['series']:
            # Reduce multiple matches
            for xval in series['data']:
                series['data'][xval] = reducer(series['data'][xval])
            # Check if normalization to a baseline X value is possible
            if args.baseline_xvalue and args.baseline_xvalue not in series[
                    'data']:
                baseline_xvalue_exists = False
        # In-series normalization with respect to baseline value. It's performed
        # only when all series can be normalized.
        if args.baseline_xvalue and baseline_xvalue_exists:
            for series in chart_data['series']:
                baseline_val = series['data'][args.baseline_xvalue]
                for xval in series['data']:
                    series['data'][xval] /= baseline_val
        # Normalization with respect to baseline series
        if args.baseline_series:
            # We will normalize only when all values from other series can be scaled
            # i.e. baseline series must contain values for x points found in all other
            # series
            baseline_series_norm_ok = True
            baseline_series = chart_data['series'][
                args.baseline_series]['data'].copy()
            for idx, series in enumerate(chart_data['series']):
                if idx == args.baseline_series:
                    continue
                if not baseline_series_norm_ok:
                    break
                for xval in series['data']:
                    if xval not in baseline_series:
                        baseline_series_norm_ok = False
                        break
            if baseline_series_norm_ok:
                for series in chart_data['series']:
                    for xval in series['data']:
                        series['data'][xval] = series['data'][
                            xval] / baseline_series[xval]

        # Return series info
        chart_data['xvals'] = list(chart_data['xvals'])
        print(json.dumps(chart_data, indent=4))
        return chart_data
Example #10
0
 def dict_matcher(bench): return DictUtils.match(bench, query, policy='strict')
 selector = dict_matcher
    def apply_extensions(base_experiment, config):
        """ Apply extensions in *config* to experiment *base_experiment*.

        The algorithm looks like this. We start with a list containing only
        one experiment - *base_experiment*. Then, we each extension we try to
        extend all experiments in a list.

        :param dict base_experiment: Parameters of an experiment
        :param dict config: Configuration dictionary
        :return: List of experiments extended with extensions or list with `base_experiment`.
        """
        experiments = [copy.deepcopy(base_experiment)]
        for extension in config['extensions']:
            # in 'base_experiment' dictionary.
            active_experiments = []
            for experiment in experiments:
                session_id = uuid.uuid4().__str__().replace('-', '')
                # Condition matches will indicate what was matched in the form "field_%d: value"
                # where %d is an integer number. 0 indicates entire match, other
                # indicates groups if present.

                # Now, condition may only be used when referenced parameter in
                # 'condition' section is a constant (does not depend on other parameters)
                Builder.assert_match_is_corrent(experiment,
                                                extension['condition'])

                matches = {}
                if not DictUtils.match(experiment,
                                       extension['condition'],
                                       policy='relaxed',
                                       matches=matches):
                    # Not a match, keep unmodified version of this experiment
                    active_experiments.append(copy.deepcopy(experiment))
                else:
                    # Create base extended version using 'parameters' section
                    # of an extension
                    extension_experiment = copy.deepcopy(experiment)
                    # Add condition matched variables in case they are referenced by parameters or cases
                    for match_key in matches:
                        session_key = '__dlbs_%s_%s' % (session_id, match_key)
                        extension_experiment[session_key] = matches[match_key]
                    # We need to update values in `extension["parameters"]` for
                    # current session id
                    extension_experiment.update(
                        Builder.correct_var_ref_in_extension(
                            session_id, extension['parameters']))
                    if len(extension['cases']) == 0:
                        active_experiments.append(extension_experiment)
                    else:
                        for case in extension['cases']:
                            case_experiment = copy.deepcopy(
                                extension_experiment)
                            # We need to update values in `case` for current session id
                            case_experiment.update(
                                Builder.correct_var_ref_in_extension(
                                    session_id, case))
                            active_experiments.append(case_experiment)

            experiments = active_experiments

        experiments = [
            experiment for experiment in experiments if len(experiment) > 0
        ]
        return experiments