Beispiel #1
0
def get_experiment_stats(results, gpu, niter):
    stats = {}

    if os.path.isdir(results):
        tracks = json_from_logs(results)
    else:
        tracks = read_results(results)

    for experiment_name in tracks.iterkeys():
        stats[experiment_name] = {}

        experiment_tracks = tracks[experiment_name]
        experiment_tracks = dict(
            filter(lambda track: gpu == ('GPU' in track[0]),
                   experiment_tracks.items()))

        for algorithm_name in experiment_tracks.iterkeys():
            stats[experiment_name][algorithm_name] = {}
            table_tracks = split_tracks(experiment_tracks[algorithm_name])

            for params, cur_tracks in table_tracks.iteritems():
                stat = calculate_statistics(cur_tracks, niter)
                if stat == {}:
                    continue
                stats[experiment_name][algorithm_name][params] = stat

    stats = dict(
        filter(lambda experiment_stat: len(experiment_stat[1]) > 0,
               stats.items()))

    return stats
def main():
    plot_functions = {
        'time-per-iter': plot_time_per_iter,
        'best': plot_quality,
        'quality-vs-time': plot_quality_vs_time,
        'custom': plot_quality
    }

    parser = argparse.ArgumentParser()
    parser.add_argument('--type', choices=plot_functions.keys(), required=True)
    parser.add_argument('--only', nargs='+', choices=ONLY_TYPES.keys(), required=False)
    parser.add_argument('-i', '--results-file', required=True)
    parser.add_argument('-t', '--title')
    parser.add_argument('-f', '--fig-size', nargs=2, type=int, default=FIGURE_SIZE)
    parser.add_argument('-o', '--out-dir', default='plots')
    parser.add_argument('--params-cases', help='draw plots only with those params (tracks filtering)'
                                               ' path to json file, each line corresponds to learner '
                                               'parameter (e.g. max_depth) and list of its values')
    parser.add_argument('--from-iter', type=int, default=0, help='only custom, best modes')
    parser.add_argument('--to-iter', type=int, default=None, help='only custom, best modes')
    parser.add_argument('--low-percent', type=float, default=0.9, help='only quality-vs-time mode')
    parser.add_argument('--num-bins', type=int, default=200, help='only quality-vs-time mode')
    parser.add_argument('--only-min', action='store_true', help='only quality-vs-time mode')
    parser.add_argument('--top', type=int, default=3, help='only best mode')
    args = parser.parse_args()

    tracks = read_results(args.results_file)

    for experiment_name in tracks.keys():
        plot_experiment(tracks[experiment_name], experiment_name, args)
Beispiel #3
0
def print_n_experiment_duration(results, gpu, n, output):
    niter = 5000

    if os.path.isdir(results):
        tracks = json_from_logs(results)
    else:
        tracks = read_results(results)

    table = []
    index = []

    for experiment_name in tracks.iterkeys():
        experiment_tracks = tracks[experiment_name]
        experiment_tracks = dict(
            filter(lambda track_: gpu == ('GPU' in track_[0]),
                   experiment_tracks.items()))

        row = []

        for algorithm_name in sorted(experiment_tracks.iterkeys()):
            value = 0.
            if len(experiment_tracks[algorithm_name]) < 2:
                continue

            for track in experiment_tracks[algorithm_name]:
                value += (np.median(track.time_per_iter) *
                          niter) / 60.  # minutes

            row.append(value / 60. /
                       float(len(experiment_tracks[algorithm_name])) *
                       n)  # hours

        if len(row) != 0:
            index.append(experiment_name)
            table.append(row)

    header = ['catboost', 'lightgbm', 'xgboost']
    table = pd.DataFrame(table, index=index, columns=header)

    with open(output, 'w') as f:
        f.write('Optimization time, hours')
        f.write('\n')
        f.write(table.to_string())
        f.write('\n')
Beispiel #4
0
def get_experiment_stats(results_file, gpu, niter):
    stats = {}
    tracks = read_results(results_file)

    for experiment_name in tracks:
        stats[experiment_name] = {}

        experiment_tracks = tracks[experiment_name]
        experiment_tracks = dict(
            filter(lambda track: gpu == ('GPU' in track[0]),
                   experiment_tracks.items()))

        for algorithm_name in experiment_tracks:
            stats[experiment_name][algorithm_name] = {}
            table_tracks = split_tracks(experiment_tracks[algorithm_name])

            for params, cur_tracks in table_tracks.items():
                stat = calculate_statistics(cur_tracks, niter)
                if stat == {}:
                    continue
                stats[experiment_name][algorithm_name][params] = stat

    return stats
Beispiel #5
0
def get_experiment_stats(results_directory, gpu, niter):
    EXPERIMENT_NAMES = EXPERIMENT_TYPE.keys()
    stats = {}

    for experiment_name in os.listdir(results_directory):
        if experiment_name not in EXPERIMENT_NAMES:
            continue

        stats[experiment_name] = {}

        tracks = read_results(os.path.join(results_directory, experiment_name))
        tracks = dict(filter(lambda track: gpu == ('GPU' in track[0]), tracks.items()))

        for algorithm_name in tracks.iterkeys():
            stats[experiment_name][algorithm_name] = {}
            table_tracks = split_tracks(tracks[algorithm_name])

            for params, cur_tracks in table_tracks.iteritems():
                stat = calculate_statistics(cur_tracks, niter)
                if stat == {}:
                    continue
                stats[experiment_name][algorithm_name][params] = stat

    return stats
Beispiel #6
0
def main():
    plot_functions = {
        'time-per-iter': plot_time_per_iter,
        'best': plot_quality,
        'quality-vs-time': plot_quality_vs_time,
        'custom': plot_quality
    }

    parser = argparse.ArgumentParser()
    parser.add_argument('--type', choices=plot_functions.keys(), required=True)
    parser.add_argument('--only', nargs='+', choices=ONLY_TYPES.keys(), required=False)
    parser.add_argument('-i', '--results-dir', required=True)
    parser.add_argument('-t', '--title')
    parser.add_argument('-f', '--fig-size', nargs=2, type=int, default=FIGURE_SIZE)
    parser.add_argument('-o', '--out-dir', default='plots')
    parser.add_argument('-d', '--file-name', required=False)
    parser.add_argument('--params-cases', help='draw plots only with those params (tracks filtering)'
                                               ' path to json file, each line corresponds to learner '
                                               'parameter (e.g. max_depth) and list of its values')
    parser.add_argument('--from-iter', type=int, default=0, help='only custom, best modes')
    parser.add_argument('--to-iter', type=int, default=None, help='only custom, best modes')
    parser.add_argument('--low-percent', type=float, default=0.9, help='only quality-vs-time mode')
    parser.add_argument('--num-bins', type=int, default=200, help='only quality-vs-time mode')
    parser.add_argument('--only-min', action='store_true', help='only quality-vs-time mode')
    parser.add_argument('--top', type=int, default=3, help='only best mode')
    args = parser.parse_args()

    tracks = read_results(args.results_dir)

    file_name = args.file_name if args.file_name else get_default_file_name(args.type, args.params_cases)
    save_path = os.path.join(args.out_dir, file_name)

    if args.only:
        filtered_tracks = {}

        for only_type in args.only:
            for alg_name in ONLY_TYPES[only_type]:
                filtered_tracks[alg_name] = tracks[alg_name]

        tracks = filtered_tracks

    if args.params_cases:
        with open(args.params_cases) as f:
            params_cases = json.load(f)

        tracks = filter_tracks(tracks, params_cases)

    if args.type == 'quality-vs-time':
        best_tracks = get_best(tracks)
        best_quality = min(map(lambda tracks: tracks[0].get_best_score(), best_tracks.values()))
        print(best_quality)

        if args.top:
            tracks = get_best(tracks, top=args.top)

        plot_quality_vs_time(tracks, best_quality=best_quality, low_percent=args.low_percent, only_min=args.only_min,
                             figsize=args.fig_size, num_bins=args.num_bins, save_path=save_path)

    if args.type == 'best':
        best_tracks = get_best(tracks, top=args.top)
        for alg in best_tracks.keys():
            for track in best_tracks[alg]:
                print(track)
                print(track.get_best_score())

        plot_quality(best_tracks, args.from_iter, args.to_iter, figsize=args.fig_size,
                     title=args.title, save_path=save_path)

    if args.type == 'custom':
        plot_quality(tracks, args.from_iter, args.to_iter,
                     figsize=args.fig_size, title=args.title, save_path=save_path)

    if args.type == 'time-per-iter':
        plot_time_per_iter(tracks, figsize=args.fig_size, title=args.title, save_path=save_path)