def save(self, output_descriptor):
        """ Save contents of this instance into a (compressed) JSON file.

        Args:
            output_descriptor (str): A file name.
        """
        IOUtils.write_json(output_descriptor, {'data': self.__benchmarks})
 def build_tensorflow_human_labels(imagenet_dir, human_labels_file):
     """Builds a textual file with one synset on a line"""
     IOUtils.mkdirf(human_labels_file)
     labels = ImageNetTools.get_labels()
     with open(human_labels_file, 'w') as fobj:
         for label in labels:
             fobj.write("%s\t%s\n" % (label, labels[label]['human_labels']))
Exemple #3
0
    def run(self):
        """Runs subprocess with Popen.

        This method must not be called directly. Use blocking :py:meth:`~dlbs.Worker.work`
        method instead.
        """
        try:
            # Dump parameters to a log file or to standard output
            DictUtils.ensure_exists(self.params, 'exp.log_file', default_value='')
            if self.params['exp.log_file'].strip() == '':
                self.params['exp.log_file'] = '/dev/stdout'
            IOUtils.mkdirf(self.params['exp.log_file'])
            with open(self.params['exp.log_file'], 'a+') as log_file:
                self.__dump_parameters(log_file)
            # This is where we launch process. Keep in mind, that the log file that's
            # supposed to be created is exp.log_file or exp_log_file in the script.
            # Other output of the launching script will be printed by this pyhton code
            # to a stanard output.
            self.process = subprocess.Popen(self.command, universal_newlines=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, env=self.environ)
            while True:
                output = self.process.stdout.readline()
                if output == '' and self.process.poll() is not None:
                    break
                if output:
                    sys.stdout.write(output)
                    sys.stdout.flush()
            self.ret_code = self.process.poll()
        except Exception as err:
            logging.warn('Exception has been caught for experiment %s: %s', self.params.get('exp.id'), str(err))
            logging.warn(traceback.format_exc())
            self.ret_code = -1
 def build_tensorflow_synsets(imagenet_dir, synset_file):
     """Builds a textual file with one synset on a line"""
     IOUtils.mkdirf(synset_file)
     labels = ImageNetTools.get_labels()
     with open(synset_file, 'w') as fobj:
         for label in labels:
             fobj.write("%s\n" % label)
 def build_mxnet_labels(imagenet_dir, labels_file):
     """Generates a textual file with the following content:
        0   45  n02093256/n02093256_3032.JPEG
        1   45  n02093256/n02093256_3353.JPEG
        ...
        image_index   image_class_label   image_path
     """
     IOUtils.mkdirf(labels_file)
     img_files = ImageNetTools.get_image_files(imagenet_dir)
     labels = ImageNetTools.get_labels()
     with open(labels_file, 'w') as fobj:
         for img_index, img_file in enumerate(img_files):
             synset, fname, finfo = ImageNetTools.get_file_info(img_file, labels)
             fobj.write("%d\t%d\t%s/%s\n" % (img_index, finfo['label'], synset, fname))
 def build_caffe_labels(imagenet_dir, labels_file):
     """Generates a textual file with the following content:
        img_0000.jpeg 1
        img_0001.jpeg 0
        ...
        mapping image file name to its class label
     """
     IOUtils.mkdirf(labels_file)
     img_files = ImageNetTools.get_image_files(imagenet_dir)
     labels = ImageNetTools.get_labels()
     with open(labels_file, 'w') as fobj:
         for img_file in img_files:
             synset, fname, finfo = ImageNetTools.get_file_info(img_file, labels)
             fobj.write("%s/%s %d\n" % (synset, fname, finfo['label']))
def main():
    """Does all log parsing work."""
    opts = parse_args()

    files = IOUtils.gather_files(opts['inputs'], "*.log", opts['recursive'])
    succeeded, failed = LogParser.parse_log_files(files, opts)

    def _dump_data(file_name, opts, data):
        with gzip.open(file_name, 'wb') if opts['_gz'] is True else open(file_name, 'w') as file_obj:
            json.dump({'data': data}, file_obj, indent=4)

    if opts['output_file'] is None:
        json.dump(succeeded, sys.stdout, indent=4, sort_keys=True)
        print ("")
    else:
        IOUtils.mkdirf(opts['output_file'])
        output_files = []
        if len(failed) > 0:
            _dump_data(opts['_failed_file'], opts, failed)
            output_files.append(opts['_failed_file'])

        num_benchmarks = len(succeeded)
        if opts['num_output_files'] is not None:
            opts['benchmarks_per_file'] = int(math.ceil(float(num_benchmarks) / opts['num_output_files']))

        if opts['benchmarks_per_file'] is not None:
            file_index = 0
            while True:
                start_index = file_index * opts['benchmarks_per_file']
                end_index = min(start_index + opts['benchmarks_per_file'], num_benchmarks)
                file_name = IOUtils.get_non_existing_file(
                    "%s_%d.%s" % (opts['_output_file_without_ext'], file_index, opts['_ext'])
                )
                _dump_data(
                    file_name,
                    opts,
                    succeeded[start_index:end_index]
                )
                output_files.append(file_name)
                if end_index >= num_benchmarks:
                    break
                file_index += 1
        else:
            _dump_data(opts['output_file'], opts, succeeded)
            output_files.append(opts['output_file'])
        print("Log parser summary.")
        print("Following files have been created:")
        json.dump(output_files, sys.stdout, indent=4, sort_keys=True)
        print ("")
Exemple #8
0
    def compute(log_dir, recursive):
        """ Finds files and compute experiments' statistics.

        :param std log_dir: Directory to search files for.
        :param bool recursive: If True, directory will be searched recursively.
        :return: Dictionary with experiment statistics.
        """
        files = IOUtils.find_files(log_dir, "*.log", recursive)
        exps = LogParser.parse_log_files(files)

        stats = {
            'num_log_files': len(files),
            'num_failed_exps': 0,
            'num_successful_exps': 0,
            'failed_exps': {}
        }
        for exp in exps:
            time_val = str(exp['results.time']).strip() if 'results.time' in exp else ''
            if not time_val:
                stats['num_failed_exps'] += 1
                stats['failed_exps'][exp['exp.id']] = {
                    'msg': 'No %s time found in log file.' % exp['exp.phase'],
                    'log_file': exp['exp.log_file'],
                    'phase': exp['exp.phase'],
                    'framework_title': exp['exp.framework_title']
                }
            else:
                stats['num_successful_exps'] += 1
        return stats
    def get_selector(query):
        """Returns a callable object that returns true when `query` matches dictionary.

        Args:
            query: An object that specifies the query. It can be one of the following:
                - A string:
                    - Load JSON object from this string if possible ELSE
                    - Treat it as a file name and load JSON objects from there.
                  The parsed/loaded object must be either dict or list.
                - A list of dict. Wrap it into a function that calls match method of a DictUtils class.
                - Callable object. Return as is.

        Returns:
            Callable object.
        """
        # If it's a string, assume it's a JSON parsable string and if not - assume it's a JSON file name.
        if isinstance(query, Six.string_types):
            try:
                query = json.loads(query)
            except ValueError:
                query = IOUtils.read_json(query)

        selector = query
        # If it's a list of dict, wrap it into a function.
        if isinstance(query, (list, dict)):
            def dict_matcher(bench): return DictUtils.match(bench, query, policy='strict')
            selector = dict_matcher
        # Here, it must be a callable object.
        if not callable(selector):
            raise ValueError("Invalid type of object that holds parameters (%s)" % type(selector))
        return selector
    def parse(inputs, recursive=False, ignore_errors=False):
        """Parse benchmark log files (*.log).

        Args:
            inputs: Path specifiers of where to search for log files.
            recursive (bool): If true, parse directories found in `inputs` recursively.
            ignore_errors (bool): If true, ignore errors associated with parsing parameter values.

        Returns:
            Instance of this class.
        """
        inputs = inputs if isinstance(inputs, list) else [inputs]
        log_files = set()
        for file_path in inputs:
            if os.path.isdir(file_path):
                log_files.update(IOUtils.gather_files(inputs, "*.log", recursive))
            elif file_path.endswith('.log'):
                log_files.add(file_path)
        log_files = list(log_files)
        benchmarks = []
        for log_file in log_files:
            parameters = {}
            with OpenFile(log_file, 'r') as logfile:
                # The 'must_match' must be set to false. It says that not
                # every line in a log file must match key-value pattern.
                DictUtils.add(
                    parameters,
                    logfile,
                    pattern='[ \t]*__(.+?(?=__[ \t]*[=]))__[ \t]*=(.+)',
                    must_match=False,
                    ignore_errors=ignore_errors
                )
            benchmarks.append(parameters)
        return BenchData(benchmarks, create_copy=False)
    def action_benchdb(self):
        #
        print("Searching for benchmark archives ...")
        file_names = []
        for file_type in ('*.tgz', '*.tar.gz', '*.json.gz'):
            file_names.extend(
                IOUtils.find_files(self.__args['inputs'][0],
                                   file_type,
                                   recursively=True))
        print("    found {} benchmark files.".format(len(file_names)))
        #
        bench_data = {}
        print("Parsing benchmark archives ...")
        for file_name in file_names:
            BenchData.merge_benchmarks(
                bench_data,
                BenchData.load(file_name).as_dict(key_len=5))
            print("    done [{}]".format(file_name))

        print("    found {} benchmarks.".format(len(bench_data)))
        #
        print("Serializing benchmarks ...")
        with open('/dev/shm/benchmark_db.pickle', 'wb') as handle:
            pickle.dump(bench_data, handle, protocol=pickle.HIGHEST_PROTOCOL)
        #
        print("    database generation completed.")
    def load(inputs, **kwargs):
        """Load benchmark data (parsed from log files) from a JSON file.

        A file name is a JSON file that contains object with 'data' field. This field
        is a list with dictionaries, each dictionary contains parameters for one benchmark:
        {"data":[{...}, {...}, {...}]}

        Args:
            inputs (str): File name of a JSON (*.json) or a compressed JSON (.json.gz) file.

        Returns:
            Instance of this class.
        """
        is_json_file = IOUtils.is_json_file(inputs)
        if not is_json_file and isinstance(inputs, list) and len(inputs) == 1:
            is_json_file = IOUtils.is_json_file(inputs[0])
            inputs = inputs[0] if is_json_file else inputs
        if is_json_file:
            benchmarks = IOUtils.read_json(inputs, check_extension=True)
            if 'data' not in benchmarks:
                benchmarks = {'data': []}
                print("[WARNING]: No benchmark data found in '{}'".format(
                    inputs))
            return BenchData(benchmarks['data'], create_copy=False)
        #
        is_csv_file = IOUtils.is_csv_file(inputs)
        if not is_csv_file and isinstance(inputs, list) and len(inputs) == 1:
            is_csv_file = IOUtils.is_csv_file(inputs[0])
            inputs = inputs[0] if is_csv_file else inputs
        if is_csv_file:
            with OpenFile(inputs, 'r') as fobj:
                reader = csv.DictReader(fobj)
                benchmarks = list(reader)
            return BenchData(benchmarks, create_copy=False)
        #
        is_compressed_tarball = IOUtils.is_compressed_tarball(inputs)
        if not is_compressed_tarball and isinstance(inputs,
                                                    list) and len(inputs) == 1:
            is_compressed_tarball = IOUtils.is_json_file(inputs[0])
            inputs = inputs[0] if is_compressed_tarball else inputs
        if is_compressed_tarball:
            benchmarks = []
            with tarfile.open(inputs, "r:gz") as archive:
                for member in archive.getmembers():
                    if member.isfile() and member.name.endswith('.log'):
                        log_file = archive.extractfile(member)
                        if log_file is not None:
                            parameters = {}
                            DictUtils.add(
                                parameters,
                                log_file,
                                pattern=
                                '[ \t]*__(.+?(?=__[ \t]*[=]))__[ \t]*=(.+)',
                                must_match=False,
                                ignore_errors=True)
                            benchmarks.append(parameters)
            return BenchData(benchmarks, create_copy=False)
        #
        return BenchData.parse(inputs, **kwargs)
    def load_data(**kwargs):
        is_dir = os.path.isdir(kwargs['input'])
        is_file = os.path.isfile(kwargs['input'])
        is_log_file = is_file and kwargs['input'].endswith('.log')
        is_json_file = is_file and (kwargs['input'].endswith('.json')
                                    or kwargs['input'].endswith('.json.gz'))

        if is_dir or is_log_file:
            files = IOUtils.find_files(config['input'], "*.log",
                                       config['recursive'])
            benchmarks, failed_benchmarks = LogParser.parse_log_files(files)
            benchmarks.extend(failed_benchmarks)
        elif is_json_file:
            benchmarks = IOUtils.read_json(kwargs['input'])
            benchmarks = benchmarks['data']
        else:
            raise ValueError("Invalid input descriptor: {}".format(
                kwargs['input']))
        return benchmarks
Exemple #14
0
    def compute(log_dir, recursive):
        """ Finds files and compute experiments' statistics.

        :param std log_dir: Directory to search files for.
        :param bool recursive: If True, directory will be searched recursively.
        :return: Dictionary with experiment statistics.
        """
        files = IOUtils.find_files(log_dir, "*.log", recursive)
        benchmarks, failed_benchmarks = LogParser.parse_log_files(files)

        def _get(d, key, val=''):
            return d[key] if key in d else val

        stats = {
            'num_log_files': len(files),
            'num_failed_exps': 0,
            'num_successful_exps': 0,
            'failed_exps': {},
            'node_ids': set(),
            'node_titles': set(),
            'gpu_titles': set()
        }
        for bench in benchmarks:
            time_val = str(bench['results.time']).strip(
            ) if 'results.time' in bench else ''
            if not time_val:
                stats['num_failed_exps'] += 1
                if 'exp.id' not in bench:
                    print("[ERROR] No exp.id found in benchmark (%s)" %
                          str(bench))
                    continue
                stats['failed_exps'][bench['exp.id']] = {
                    'msg':
                    'No %s time found in log file.' %
                    _get(bench, 'exp.phase', 'PHASE_UNKNOWN'),
                    'log_file':
                    _get(bench, 'exp.log_file', 'LOG_FILE_UNKNOWN'),
                    'phase':
                    _get(bench, 'exp.phase', 'PHASE_UNKNOWN'),
                    'framework_title':
                    _get(bench, 'exp.framework_title',
                         'FRAMEWORK_TITLE_UNKNOWN')
                }
            else:
                stats['num_successful_exps'] += 1
            #
            for key in [('exp.node_id', 'node_ids'),
                        ('exp.node_title', 'node_titles'),
                        ('exp.gpu_title', 'gpu_titles')]:
                if key[0] in bench:
                    stats[key[1]].add(bench[key[0]])

        for key in ['node_ids', 'node_titles', 'gpu_titles']:
            stats[key] = list(stats[key])
        return stats
 def get_image_files(folder, shuffle=True, num_files=-1):
     """ Get *.JPEG files in folder. Shuffle files and return at most num_files
         files.
     """
     # Scan the folder recursively and find files.
     files = IOUtils.find_files(folder, '*.JPEG', recursively=True)
     # Shuffle files and return first 'num_files' files.
     if shuffle:
         random.shuffle(files)
     if num_files > 0 and num_files < len(files):
         files = files[0:num_files]
     return files
Exemple #16
0
def main():
    """Does all log parsing work."""
    parser = argparse.ArgumentParser()
    parser.add_argument('--summary-file', type=str, required=False, default=None, help='Write summary of experiments into this JSON file.')
    parser.add_argument('--log-dir', type=str, required=False, default=None, help='Scan this folder for *.log files. Scan recursively if --recursive is set.')
    parser.add_argument('--recursive', required=False, default=False, action='store_true', help='Scan --log-dir folder recursively for log files.')
    parser.add_argument('--keys', nargs='*', required=False, help='Parameters to extract from log files. If not set or empty, all parameters are returned.')
    parser.add_argument('--strict', action='store_true', default=False, help='If set, serialzie only those results that contain all keys specified with --keys arg.')
    parser.add_argument('log_files', nargs='*', help='Log files to parse')
    args = parser.parse_args()

    files = []
    if len(args.log_files) > 0:
        files = args.log_files
    elif args.log_dir is not None:
        files = IOUtils.find_files(args.log_dir, "*.log", args.recursive)

    params = LogParser.parse_log_files(files, keys=args.keys)

    if args.strict and len(args.keys) > 0:
        filtered_params = []
        for param in params:
            param_ok = True
            for key in args.keys:
                if key not in param:
                    param_ok = False
                    #print ("skipping because missing field %s" % key)
                    break
            if param_ok:
                filtered_params.append(param)
        params = filtered_params

    summary = {"data": params}
    if args.summary_file is None:
        json.dump(summary, sys.stdout, indent=4, sort_keys=True)
        print ("")
    else:
        DictUtils.dump_json_to_file(summary, args.summary_file)
def main():
    """Entry point when invoking this scrip from a command line."""
    parser = argparse.ArgumentParser()
    parser.add_argument('inputs',
                        nargs='*',
                        help='Log directory or a JSON file')
    parser.add_argument(
        '--recursive',
        required=False,
        default=False,
        action='store_true',
        help='If input is folder, scan it recursively for log files.')
    parser.add_argument('--xparam',
                        type=str,
                        required=True,
                        default=None,
                        help='A parameter that is associated with x axis.')
    parser.add_argument('--yparam',
                        type=str,
                        required=True,
                        default=None,
                        help='A parameter that is associated with y axis.')
    parser.add_argument('--series',
                        type=str,
                        required=True,
                        default=None,
                        help='A json array with filters for series.')
    parser.add_argument(
        '--aggregation',
        type=str,
        required=True,
        default="avg",
        help=
        'In case of multiple matches, use this to aggregate values (min, max, avg)'
    )
    parser.add_argument('--chart_file',
                        '--chart-file',
                        type=str,
                        required=False,
                        default=None,
                        help='If present, write chart into this file.')
    parser.add_argument(
        '--series_file',
        '--series-file',
        type=str,
        required=False,
        default=None,
        help='If present, write series JSON data into this file.')
    parser.add_argument(
        '--chart_opts',
        '--chart-opts',
        type=str,
        required=False,
        default=None,
        help='If present, a json object specifying chart options.')
    parser.add_argument('--chart_type',
                        '--chart-type',
                        type=str,
                        required=False,
                        default='line',
                        help='Type of a chart ("line" or "bar").')
    parser.add_argument(
        '--baseline_xvalue',
        '--baseline-xvalue',
        type=str,
        required=False,
        default=None,
        help=
        "A value that's used to normalize one series. Useful to plot speedup charts."
    )
    parser.add_argument(
        '--baseline_series',
        '--baseline-series',
        type=int,
        required=False,
        default=None,
        help="An index of a baseline series to use to normalize all series.")
    args = parser.parse_args()

    if len(args.inputs) == 0:
        raise ValueError("Must be at least one input ('--input')")

    # Parse log files and load benchmark data
    logfiles = []  # Original raw log files with benchmark data
    benchmarks = []  # Parsed benchmarks
    for input_path in args.inputs:
        if os.path.isdir(input_path):
            logfiles.extend(
                IOUtils.find_files(input_path, "*.log", args.recursive))
        elif os.path.isfile(input_path) and input_path.endswith(
            ('.json', '.json.gz')):
            file_benchmarks = IOUtils.read_json(input_path)
            if 'data' in file_benchmarks and isinstance(
                    file_benchmarks['data'], list):
                benchmarks.extend(file_benchmarks['data'])
            else:
                logging.warn("Cannot parse file (%s). Invalid content.",
                             input_path)
        else:
            logging.warn("Cannot parse file (%s). Unknown extension. ",
                         input_path)
    if len(logfiles) > 0:
        benchmarks.extend(LogParser.parse_log_files(logfiles))
    else:
        logging.warn("No input log files have been found")
    if len(benchmarks) == 0:
        raise ValueError("No benchmarks have been loaded.")
    # Build data for series
    chart_data = SeriesBuilder.build(benchmarks, args)
    # Write it
    if args.series_file:
        DictUtils.dump_json_to_file(chart_data, args)
    # Plot it
    if args.chart_file:
        SeriesBuilder.plot(chart_data, args)
Exemple #18
0
    parser.add_argument('--log_file',
                        '--log-file',
                        type=str,
                        required=False,
                        default=None,
                        help="Get batch statistics from this experiment.")
    parser.add_argument(
        '--recursive',
        required=False,
        default=False,
        action='store_true',
        help='Scan --log-dir folder recursively for log files.')
    args = parser.parse_args()

    if args.log_dir is not None:
        files = IOUtils.find_files(args.log_dir, "*.log", args.recursive)
    else:
        files = []
    if args.log_file is not None:
        files.append(args.log_file)

    save_file = None
    if args.save_file is not None:
        save_file = args.save_file

    exps = LogParser.parse_log_files(files)
    for exp in exps:
        key = 'results.time_data'
        if key not in exp:
            continue
        times = exp[key]
 def action_summary(self):
     IOUtils.write_json(self.__args['output'], self.__data.summary(), check_extension=False)