Exemple #1
0
def load_exps_data(exp_folder_paths, disable_variant=False):
    exps = []
    for exp_folder_path in exp_folder_paths:
        exps += [x[0] for x in os.walk(exp_folder_path)]
    exps_data = []
    for exp in exps:
        try:
            exp_path = exp
            params_json_path = os.path.join(exp_path, "params.json")
            variant_json_path = os.path.join(exp_path, "variant.json")
            progress_csv_path = os.path.join(exp_path, "progress.csv")
            progress = load_progress(progress_csv_path)
            if disable_variant:
                params = load_params(params_json_path)
            else:
                try:
                    params = load_params(variant_json_path)
                except IOError:
                    params = load_params(params_json_path)
            exps_data.append(
                ext.AttrDict(progress=progress,
                             params=params,
                             flat_params=flatten_dict(params)))
        except IOError as e:
            print(e)
    return exps_data
Exemple #2
0
def get_plot_instruction(plot_key,
                         split_key=None,
                         group_key=None,
                         filters=None,
                         use_median=False,
                         only_show_best=False,
                         only_show_best_final=False,
                         gen_eps=False,
                         only_show_best_sofar=False,
                         clip_plot_value=None,
                         plot_width=None,
                         plot_height=None,
                         filter_nan=False,
                         smooth_curve=False,
                         custom_filter=None,
                         legend_post_processor=None,
                         normalize_error=False,
                         custom_series_splitter=None):
    print(plot_key, split_key, group_key, filters)
    if filter_nan:
        nonnan_exps_data = list(filter(check_nan, exps_data))
        selector = core.Selector(nonnan_exps_data)
    else:
        selector = core.Selector(exps_data)
    if legend_post_processor is None:

        def default_legend_post_processor(x):
            return x

        legend_post_processor = default_legend_post_processor
    if filters is None:
        filters = dict()
    for k, v in filters.items():
        selector = selector.where(k, str(v))
    if custom_filter is not None:
        selector = selector.custom_filter(custom_filter)
    # print selector._filters

    if split_key is not None:
        vs = [vs for k, vs in distinct_params if k == split_key][0]
        split_selectors = [selector.where(split_key, v) for v in vs]
        split_legends = list(map(str, vs))
    else:
        split_selectors = [selector]
        split_legends = ["Plot"]
    plots = []
    counter = 1
    for split_selector, split_legend in zip(split_selectors, split_legends):
        if custom_series_splitter is not None:
            exps = split_selector.extract()
            splitted_dict = dict()
            for exp in exps:
                key = custom_series_splitter(exp)
                if key not in splitted_dict:
                    splitted_dict[key] = list()
                splitted_dict[key].append(exp)
            splitted = list(splitted_dict.items())
            group_selectors = [core.Selector(list(x[1])) for x in splitted]
            group_legends = [x[0] for x in splitted]
        else:
            if group_key and group_key != "exp_name":
                vs = [vs for k, vs in distinct_params if k == group_key][0]
                group_selectors = [
                    split_selector.where(group_key, v) for v in vs
                ]
                group_legends = [str(x) for x in vs]
            else:
                group_key = "exp_name"
                vs = sorted(
                    [x.params["exp_name"] for x in split_selector.extract()])
                group_selectors = [
                    split_selector.where(group_key, v) for v in vs
                ]
                group_legends = [
                    summary_name(x.extract()[0], split_selector)
                    for x in group_selectors
                ]
        # group_selectors = [split_selector]
        # group_legends = [split_legend]
        to_plot = []
        for group_selector, group_legend in zip(group_selectors,
                                                group_legends):
            filtered_data = group_selector.extract()
            if filtered_data:

                if (only_show_best or only_show_best_final
                        or only_show_best_sofar):
                    # Group by seed and sort.
                    # -----------------------
                    filtered_params = core.extract_distinct_params(
                        filtered_data, l=0)  # noqa: E741
                    filtered_params2 = [p[1] for p in filtered_params]
                    filtered_params_k = [p[0] for p in filtered_params]
                    product_space = list(itertools.product(*filtered_params2))
                    data_best_regret = None
                    best_regret = -np.inf
                    kv_string_best_regret = None
                    for idx, params in enumerate(product_space):
                        selector = core.Selector(exps_data)
                        for k, v in zip(filtered_params_k, params):
                            selector = selector.where(k, str(v))
                        data = selector.extract()
                        if data:
                            progresses = [
                                exp.progress.get(plot_key, np.array([np.nan]))
                                for exp in data
                            ]
                            # progresses = [
                            #     progress[:500] for progress in progresses
                            # ]
                            sizes = list(map(len, progresses))
                            max_size = max(sizes)
                            progresses = [
                                np.concatenate(
                                    [ps,
                                     np.ones(max_size - len(ps)) * np.nan])
                                for ps in progresses
                            ]

                            if only_show_best_final:
                                progresses = np.asarray(progresses)[:, -1]
                            if only_show_best_sofar:
                                progresses = np.max(np.asarray(progresses),
                                                    axis=1)
                            if use_median:
                                medians = np.nanmedian(progresses, axis=0)
                                regret = np.mean(medians)
                            else:
                                means = np.nanmean(progresses, axis=0)
                                regret = np.mean(means)
                            distinct_params_k = [p[0] for p in distinct_params]
                            distinct_params_v = [
                                v for k, v in zip(filtered_params_k, params)
                                if k in distinct_params_k
                            ]
                            distinct_params_kv = [(k, v) for k, v in zip(
                                distinct_params_k, distinct_params_v)]
                            distinct_params_kv_string = str(
                                distinct_params_kv).replace('), ', ')\t')
                            print('{}\t{}\t{}'.format(
                                regret, len(progresses),
                                distinct_params_kv_string))
                            if regret > best_regret:
                                best_regret = regret
                                best_progress = progresses
                                data_best_regret = data
                                kv_string_best_regret = \
                                    distinct_params_kv_string

                    print(group_selector._filters)
                    print('best regret: {}'.format(best_regret))
                    # -----------------------
                    if best_regret != -np.inf:
                        progresses = [
                            exp.progress.get(plot_key, np.array([np.nan]))
                            for exp in data_best_regret
                        ]
                        # progresses = \
                        #     [progress[:500] for progress in progresses]
                        sizes = list(map(len, progresses))
                        # more intelligent:
                        max_size = max(sizes)
                        progresses = [
                            np.concatenate(
                                [ps, np.ones(max_size - len(ps)) * np.nan])
                            for ps in progresses
                        ]
                        legend = '{} (mu: {:.3f}, std: {:.5f})'.format(
                            group_legend, best_regret, np.std(best_progress))
                        window_size = np.maximum(
                            int(np.round(max_size / float(1000))), 1)
                        if use_median:
                            percentile25 = np.nanpercentile(progresses,
                                                            q=25,
                                                            axis=0)
                            percentile50 = np.nanpercentile(progresses,
                                                            q=50,
                                                            axis=0)
                            percentile75 = np.nanpercentile(progresses,
                                                            q=75,
                                                            axis=0)
                            if smooth_curve:
                                percentile25 = sliding_mean(percentile25,
                                                            window=window_size)
                                percentile50 = sliding_mean(percentile50,
                                                            window=window_size)
                                percentile75 = sliding_mean(percentile75,
                                                            window=window_size)
                            if clip_plot_value is not None:
                                percentile25 = np.clip(percentile25,
                                                       -clip_plot_value,
                                                       clip_plot_value)
                                percentile50 = np.clip(percentile50,
                                                       -clip_plot_value,
                                                       clip_plot_value)
                                percentile75 = np.clip(percentile75,
                                                       -clip_plot_value,
                                                       clip_plot_value)
                            to_plot.append(
                                ext.AttrDict(
                                    percentile25=percentile25,
                                    percentile50=percentile50,
                                    percentile75=percentile75,
                                    legend=legend_post_processor(legend)))
                        else:
                            means = np.nanmean(progresses, axis=0)
                            stds = np.nanstd(progresses, axis=0)
                            if normalize_error:  # and len(progresses) > 0:
                                stds /= np.sqrt(
                                    np.sum((1. - np.isnan(progresses)),
                                           axis=0))
                            if smooth_curve:
                                means = sliding_mean(means, window=window_size)
                                stds = sliding_mean(stds, window=window_size)
                            if clip_plot_value is not None:
                                means = np.clip(means, -clip_plot_value,
                                                clip_plot_value)
                                stds = np.clip(stds, -clip_plot_value,
                                               clip_plot_value)
                            to_plot.append(
                                ext.AttrDict(
                                    means=means,
                                    stds=stds,
                                    legend=legend_post_processor(legend)))
                        if to_plot and data:
                            to_plot[-1]["footnote"] = "%s; e.g. %s" % (
                                kv_string_best_regret, data[0].params.get(
                                    "exp_name", "NA"))
                        else:
                            to_plot[-1]["footnote"] = ""
                else:
                    progresses = [
                        exp.progress.get(plot_key, np.array([np.nan]))
                        for exp in filtered_data
                    ]
                    sizes = list(map(len, progresses))
                    # more intelligent:
                    max_size = max(sizes)
                    progresses = [
                        np.concatenate(
                            [ps, np.ones(max_size - len(ps)) * np.nan])
                        for ps in progresses
                    ]
                    window_size = np.maximum(
                        int(np.round(max_size / float(1000))), 1)

                    if use_median:
                        percentile25 = np.nanpercentile(progresses,
                                                        q=25,
                                                        axis=0)
                        percentile50 = np.nanpercentile(progresses,
                                                        q=50,
                                                        axis=0)
                        percentile75 = np.nanpercentile(progresses,
                                                        q=75,
                                                        axis=0)
                        if smooth_curve:
                            percentile25 = sliding_mean(percentile25,
                                                        window=window_size)
                            percentile50 = sliding_mean(percentile50,
                                                        window=window_size)
                            percentile75 = sliding_mean(percentile75,
                                                        window=window_size)
                        if clip_plot_value is not None:
                            percentile25 = np.clip(percentile25,
                                                   -clip_plot_value,
                                                   clip_plot_value)
                            percentile50 = np.clip(percentile50,
                                                   -clip_plot_value,
                                                   clip_plot_value)
                            percentile75 = np.clip(percentile75,
                                                   -clip_plot_value,
                                                   clip_plot_value)
                        to_plot.append(
                            ext.AttrDict(
                                percentile25=percentile25,
                                percentile50=percentile50,
                                percentile75=percentile75,
                                legend=legend_post_processor(group_legend)))
                    else:
                        means = np.nanmean(progresses, axis=0)
                        stds = np.nanstd(progresses, axis=0)
                        if smooth_curve:
                            means = sliding_mean(means, window=window_size)
                            stds = sliding_mean(stds, window=window_size)
                        if clip_plot_value is not None:
                            means = np.clip(means, -clip_plot_value,
                                            clip_plot_value)
                            stds = np.clip(stds, -clip_plot_value,
                                           clip_plot_value)
                        to_plot.append(
                            ext.AttrDict(
                                means=means,
                                stds=stds,
                                legend=legend_post_processor(group_legend)))

        if to_plot and not gen_eps:
            fig_title = "%s: %s" % (split_key, split_legend)
            # plots.append("<h3>%s</h3>" % fig_title)
            plots.append(
                make_plot(to_plot,
                          use_median=use_median,
                          title=fig_title,
                          plot_width=plot_width,
                          plot_height=plot_height))

        if gen_eps:
            make_plot_eps(to_plot, use_median=use_median, counter=counter)
        counter += 1
    return "\n".join(plots)