Example #1
0
    def run(self):
        """Runs subprocess with Popen.

        This method must not be called directly. Use blocking :py:meth:`~dlbs.Worker.work`
        method instead.
        """
        try:
            # Dump parameters to a log file or to standard output
            DictUtils.ensure_exists(self.params, 'exp.log_file', default_value='')
            if self.params['exp.log_file'].strip() == '':
                self.params['exp.log_file'] = '/dev/stdout'
            IOUtils.mkdirf(self.params['exp.log_file'])
            with open(self.params['exp.log_file'], 'a+') as log_file:
                self.__dump_parameters(log_file)
            # This is where we launch process. Keep in mind, that the log file that's
            # supposed to be created is exp.log_file or exp_log_file in the script.
            # Other output of the launching script will be printed by this pyhton code
            # to a stanard output.
            self.process = subprocess.Popen(self.command, universal_newlines=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, env=self.environ)
            while True:
                output = self.process.stdout.readline()
                if output == '' and self.process.poll() is not None:
                    break
                if output:
                    sys.stdout.write(output)
                    sys.stdout.flush()
            self.ret_code = self.process.poll()
        except Exception as err:
            logging.warn('Exception has been caught for experiment %s: %s', self.params.get('exp.id'), str(err))
            logging.warn(traceback.format_exc())
            self.ret_code = -1
def parse_json_arguments(args):
    """Parse parameters, variables and extensions.

    Args:
        args (dict): Dictionary of command line arguments returned by `parse_arguments`. Is not modified.

    Returns:
        A tuple of (params, variables, extensions):
          - `params` is a dictionary of parameters (all params in args['P'])
          - `variables` is a dictionary of variables (all vars in args['V'])
          - `extensions` is a list of dictionaries (all extensions in in args['E'])
    """
    for param in ('P', 'V', 'E'):
        DictUtils.ensure_exists(args, param, [])
    params, variables, extensions = ({}, {}, [])
    DictUtils.add(params,
                  args['P'],
                  pattern='(.+?(?=[=]))=(.+)',
                  must_match=True)
    DictUtils.add(variables,
                  args['V'],
                  pattern='(.+?(?=[=]))=(.+)',
                  must_match=True)
    for extension in args['E']:
        try:
            extensions.append(json.loads(extension))
        except Exception as err:
            logging.warn("Found non-json parsable extension: %s", extension)
            raise err
    return params, variables, extensions
Example #3
0
    def test_ensure_exists_1(self):
        """dlbs  ->  TestDictUtils::test_ensure_exists_1                 [Testing dictionary helpers #1]"""
        DictUtils.ensure_exists(self.dictionary, 'exp.framework')

        self.assertEqual('exp.framework' in self.dictionary, True)
        self.assertEqual('exp.model' in self.dictionary, True)
        self.assertEqual('exp.device_batch' in self.dictionary, True)

        self.assertEqual(self.dictionary['exp.framework'], self.framework)
        self.assertEqual(self.dictionary['exp.model'], self.model)
        self.assertEqual(self.dictionary['exp.device_batch'],
                         self.device_batch)

        self.assertEqual(len(self.dictionary), 3)
Example #4
0
 def report(self, inputs=None, output=None, output_cols=None,
            report_speedup=False, report_efficiency=False, **kwargs):
     DictUtils.ensure_exists(kwargs, 'report_batch_times', True)
     DictUtils.ensure_exists(kwargs, 'report_input_specs', True)
     # Build cache that will map benchmarks keys to benchmark objects.
     self.build_cache(inputs, output, output_cols)
     # Iterate over column values and build table with batch times and throughput
     cols = []
     times = []
     throughputs = []
     benchmark_keys = [input_col['vals'] for input_col in self.input_cols]
     # Build tables for batch times and benchmarks throughputs
     # The `benchmark_key` is a tuple of column values e.g. ('ResNet50', 256)
     for benchmark_key in itertools.product(*benchmark_keys):
         cols.append(copy.deepcopy(benchmark_key))
         times.append([None] * len(self.output_cols))
         throughputs.append([None] * len(self.output_cols))
         for output_col in self.output_cols:
             benchmark_key = [str(key) for key in benchmark_key]
             key = '.'.join(benchmark_key + [str(output_col['value'])])
             if key in self.cache:
                 times[-1][output_col['index']] = self.cache[key]['results.time']
                 throughputs[-1][output_col['index']] = self.cache[key]['results.throughput']
     # Determine minimal widths for columns
     self.compute_column_widths(times, throughputs)
     #
     header = self.get_header()
     if kwargs['report_batch_times']:
         self.print_table("Batch time (milliseconds)", header, cols, times)
     self.print_table("Throughput (instances per second e.g. images/sec)", header, cols, throughputs)
     if report_speedup:
         speedups = self.compute_speedups(throughputs)
         self.print_table("Speedup (based on instances per second table, "
                          "relative to first output column ({} = {}))".format(self.output_param,
                                                                              self.output_cols[0]['value']),
                          header, cols, speedups)
     if report_efficiency:
         efficiency = self.compute_efficiency(times)
         self.print_table("Efficiency (based on batch times table, "
                          "relative to first output column ({} = {}))".format(self.output_param,
                                                                              self.output_cols[0]['value']),
                          header, cols, efficiency)
     if kwargs['report_input_specs']:
         print("This report is configured with the following parameters:")
         print(" inputs = %s" % str(inputs))
         print(" output = %s" % output)
         print(" output_cols = %s" % str(output_cols))
         print(" report_speedup = %s" % str(report_speedup))
         print(" report_efficiency = %s" % str(report_efficiency))
Example #5
0
    def test_ensure_exists_3(self):
        """dlbs  ->  TestDictUtils::test_ensure_exists_3                 [Testing dictionary helpers #3]"""
        DictUtils.ensure_exists(self.dictionary, 'exp.data_dir',
                                '/nfs/imagenet')

        self.assertEqual('exp.framework' in self.dictionary, True)
        self.assertEqual('exp.model' in self.dictionary, True)
        self.assertEqual('exp.device_batch' in self.dictionary, True)
        self.assertEqual('exp.data_dir' in self.dictionary, True)

        self.assertEqual(self.dictionary['exp.framework'], self.framework)
        self.assertEqual(self.dictionary['exp.model'], self.model)
        self.assertEqual(self.dictionary['exp.device_batch'],
                         self.device_batch)
        self.assertEqual(self.dictionary['exp.data_dir'], '/nfs/imagenet')

        self.assertEqual(len(self.dictionary), 4)
 def __init__(self):
     self.__validation = True  # Validate config before running benchmarks
     self.__action = None  # Action to perform (build, run, ...)
     self.__config_file = None  # Configuration file to load
     self.__progress_file = None  # A JSON file with current progress
     self.__config = {}  # Loaded configuration
     self.__param_info = {
     }  # Parameter meta-info such as type and value domain
     self.__plan_file = None  # File with pre-built plan
     self.__plan = []  # Loaded or generated plan
     self.__params = {}  # Override env variables from files
     self.__variables = {}  # Override variables from files
     # Dirty hacks
     DictUtils.ensure_exists(os.environ, 'CUDA_CACHE_PATH', '')
     DictUtils.ensure_exists(
         os.environ, 'DLBS_ROOT',
         os.path.join(os.path.dirname(os.path.abspath(__file__)), '../../'))
    def init(self, **kwargs):
        """Initializes experimenter.

        Args:
            **kwargs (dict): Optional initialization parameters:
                - action (str): Action to perform.
                - config (str): A user-provided configuration file.
                - plan (str): A file for generated benchmark plan.
                - no_validation (bool): If true, do not perform validation
                - progress_file (str): A path to progress file (if not None, enables progress reporting).
                - params (dict): User defined parameters.
                - vars (dict): User defined variables.
                - discard_default_config (bool): If True, do not load standard DLBS config.
                - extensions (dict): User provided extensions.

        User provided parameters (`params`), variables (`vars`) and extensions (`extensions`) overwrite values defined
        in user configuration files (`config`) if it is present.
        Information defined in a uses-provided configuration file (`config`) overwrites standard DLBS configuration.
        """
        if self.__initialized:
            raise RuntimeError("Experimenter can only be initialized once.")

        self.action = DictUtils.get(kwargs, 'action', 'run')
        self.config_file = DictUtils.get(kwargs, 'config', None)
        self.plan_file = DictUtils.get(kwargs, 'plan', None)
        self.validation = not DictUtils.get(kwargs, 'no_validation', False)
        self.__progress_file = DictUtils.get(kwargs, 'progress_file', None)
        # Get parameters and variables from a command line/user-provided
        self.params.update(DictUtils.get(kwargs, 'params', {}))
        self.variables.update(DictUtils.get(kwargs, 'vars', {}))

        # Load default configuration
        if not DictUtils.get(kwargs, 'discard_default_config', False):
            logging.debug("Loading default configuration")
            _, self.config, self.param_info = ConfigurationLoader.load(
                os.path.join(os.path.dirname(__file__), 'configs'))
        # Load configurations specified on a command line
        self.load_configuration()
        # Add extensions from command line
        DictUtils.ensure_exists(self.config, 'extensions', [])
        self.config['extensions'].extend(
            DictUtils.get(kwargs, 'extensions', []))
        # All's done
        self.__initialized = True
Example #8
0
    def parse_log_files(filenames, opts=None):
        """ Parses files and returns their parameters.

        :param list filenames: List of file names to parse.
        :param dict opts:      Dictionary of options.

        :rtype:  tuple<list, list>
        :return: A tuple of two lists - succeeded and failed benchmarks
        """
        opts = {} if opts is None else opts
        for key in ('filter_params', 'filter_query', 'output_params'):
            DictUtils.ensure_exists(opts, key)
        DictUtils.ensure_exists(opts, 'failed_benchmarks', 'discard')
        DictUtils.ensure_exists(opts, '_extended_params', {})
        DictUtils.ensure_exists(opts, 'ignore_errors', False)

        succeeded_benchmarks = []
        failed_benchmarks = []
        for filename in filenames:
            # Parse log file
            params = LogParser.parse_log_file(
                filename, ignore_errors=opts['ignore_errors'])
            # Check if this benchmark does not match filter
            if len(params) == 0 or \
               not DictUtils.contains(params, opts['filter_params']) or \
               not DictUtils.match(params, opts['filter_query']):
                continue
            # Add extended parameters and compute them
            if len(opts['_extended_params']) > 0:
                params.update(opts['_extended_params'])
                Processor().compute_variables([params])
                # params = params[0]
            # Identify is this benchmark succeeded of failed.
            succeeded = 'results.throughput' in params and \
                        isinstance(params['results.throughput'], (int, long, float)) and \
                        params['results.throughput'] > 0
            # Get only those key/values that need to be serialized
            params = DictUtils.subdict(params, opts['output_params'])
            # Append benchmark either to succeeded or failed list
            if succeeded:
                succeeded_benchmarks.append(params)
            else:
                if opts['failed_benchmarks'] == 'keep':
                    succeeded_benchmarks.append(params)
                elif opts['failed_benchmarks'] == 'keep_separately':
                    failed_benchmarks.append(params)
            #
        return succeeded_benchmarks, failed_benchmarks
    def build(config, params, variables):
        """ Given input configuration and comamnd line parameters/variables build experiments

        :param dict config: Dictionary of parameters/variables/extensions
        :param dict params: Dictionary of command line parameters
        :param dict variables: Dictionary of command line variables
        :return: Array of experiments. Each experiment is defined by a set of parameters.

        A high level overview of what buidler does is:
        ::

          1. Add **variables** to 'variables' section of a configuration ('config').
          2. Override variables in 'parameters' section in 'config' with those specified on a command line ('params').
          3. For every combination (Cartesian product) of variables in 'config':
             a. Create copy of parameters.
             b. Add combination to those parameters
             c. Apply extensions possibly generating multiple experiments
             d. Add them to a list of experiments.

        In case input configuration contains extensions, this algorithm applies:
        ::

          1. Set experiments <- [experiment]
          2. For extension in extension_list:
          3.     Set active_experiments <- []
          4.     For experiment in experiments:
          5.         If not match(experiment, extension.condition):
          6.             active_experiments.append(experiment)
          7.         active_experiments.extend( extend(extended_config, extension) )
          8.     experiments <- active_experiments
          9. Return experiments
        """
        # This makes parsing life easier.
        for section in ['parameters', 'variables', 'extensions']:
            DictUtils.ensure_exists(config, section, {})
        for extension in config['extensions']:
            for section in ['condition', 'parameters', 'cases']:
                DictUtils.ensure_exists(extension, section, {})
        # First, we need to update variables that contribute to creating
        # different experiments
        for var_name in variables:
            config['variables'][var_name] = copy.deepcopy(variables[var_name])
        # We also need to ensure that those values are lists
        for variable in config['variables']:
            if not isinstance(config['variables'][variable], list):
                config['variables'][variable] = [config['variables'][variable]]

        # Now, we need to override environmental varaibles
        for param in params:
            config['parameters'][param] = copy.deepcopy(params[param])

        plan = []
        # Get order of variables in experiments
        # These are all variables that we will vary
        var_keys = config['variables'].keys()
        # Filter oder of those variables - remove ones that we do not actually have
        if 'sys.plan_builder.var_order' in config['parameters']:
            var_order = [
                k for k in config['parameters']['sys.plan_builder.var_order']
                if k in var_keys
            ]
        else:
            var_order = []
        # Add those that are not in an order array
        for k in var_keys:
            if k not in var_order:
                var_order.append(k)

        var_values = [config['variables'][var_key] for var_key in var_order]
        # This loop will work just once if var_values is empty.
        for variables_combination in itertools.product(*var_values):
            # Create base set of variables.
            experiment = copy.deepcopy(config['parameters'])
            # Add current combination of variables
            experiment.update(
                dict((k, v)
                     for (k, v) in zip(var_order, variables_combination)))
            # Apply extensions possibly generating many experiment configurations
            extended_experiments = Builder.apply_extensions(experiment, config)
            # Add to plan
            plan.extend(extended_experiments)
        return plan
    def init(self,
             init_logger=False,
             load_default_config=True,
             load_config=True):
        """Initializes experimenter.

        :param bool init_logger: If True, initializes loggers
        :param bool load_default_config: If false, does not load standard configuration.
        :param bool load_config: If true, loads configuration specified on a command line
        """
        # Parse command line arguments
        parser = argparse.ArgumentParser()
        parser.add_argument(
            'action',
            type=str,
            help=
            'Action to perform. Valid actions: "print-config", "run", "build" and "analyze-plan".'
        )
        parser.add_argument('--config',
                            required=False,
                            type=str,
                            help='Configuration file (json) of an experiment.\
                                                                        Will override values from default configuration.'
                            )
        parser.add_argument('--plan',
                            required=False,
                            type=str,
                            help='Pre-built plan of an experiment (json).\
                                                                      If action is "build", a file name to write plan to.\
                                                                      If action is "run", a file name to read plan from.'
                            )
        parser.add_argument('--progress_file', '--progress-file', required=False, type=str, default=None,
                            help='A JSON file that experimenter will be updating on its progress.'\
                                 'If not present, no progress info will be available.'\
                                 'Put it somewhere in /dev/shm')
        parser.add_argument(
            '-P',
            action='append',
            required=False,
            default=[],
            help='Parameters that override parameters in configuration file.\
                                                                                     For instance, -Pexp.phase=2. Values must be json parsable (json.loads()).'
        )
        parser.add_argument(
            '-V',
            action='append',
            required=False,
            default=[],
            help=
            'Variables that override variables in configuration file in section "variables". \
                                                                                     These variables are used to generate different combinations of experiments.\
                                                                                     For instance: -Vexp.framework=\'["tensorflow", "caffe2"]\'.\
                                                                                     Values must be json parsable (json.loads()).'
        )
        parser.add_argument(
            '--log_level',
            '--log-level',
            required=False,
            default='info',
            help=
            'Python logging level. Valid values: "critical", "error", "warning", "info" and "debug"'
        )
        parser.add_argument('--discard_default_config',
                            '--discard-default-config',
                            required=False,
                            default=False,
                            action='store_true',
                            help='Do not load default configuration.')
        parser.add_argument(
            '--no_validation',
            '--no-validation',
            required=False,
            default=False,
            action='store_true',
            help='Do not perform config validation before running benchmarks.')
        parser.add_argument(
            '-E',
            action='append',
            required=False,
            default=[],
            help=
            'Extensions to add. Can be usefull to quickly customize experiments.\
                                                                                     Must be valid json parsable array element for "extension" array.'
        )
        args = parser.parse_args()

        log_level = logging.getLevelName(args.log_level.upper())
        self.action = args.action
        self.config_file = args.config
        self.plan_file = args.plan
        self.validation = not args.no_validation
        self.__progress_file = args.progress_file

        # Initialize logger
        if init_logger:
            logging.debug("Initializing logger to level %s", args.log_level)
            root = logging.getLogger()
            root.setLevel(log_level)
            handler = logging.StreamHandler(sys.stdout)
            handler.setLevel(log_level)
            root.addHandler(handler)

        logging.debug("Parsing parameters on a command line")
        DictUtils.add(self.params,
                      args.P,
                      pattern='(.+?(?=[=]))=(.+)',
                      must_match=True)
        logging.debug("Parsing variables on a command line")
        DictUtils.add(self.variables,
                      args.V,
                      pattern='(.+?(?=[=]))=(.+)',
                      must_match=True)

        # Load default configuration
        if load_default_config and not args.discard_default_config:
            logging.debug("Loading default configuration")
            _, self.config, self.param_info = ConfigurationLoader.load(
                os.path.join(os.path.dirname(__file__), 'configs'))

        # Load configurations specified on a command line
        if load_config:
            logging.debug("Loading user configuration")
            self.load_configuration()

        # Add extensions from command line
        DictUtils.ensure_exists(self.config, 'extensions', [])
        if len(args.E) > 0:
            logging.debug("Parsing extensions on a command line")
        for extension in args.E:
            try:
                ext = json.loads(extension)
                logging.debug('Found extension: %s', str(ext))
                self.config['extensions'].append(ext)
            except Exception as err:
                logging.warn("Found non-json parsable extension: %s",
                             extension)
                raise err