Esempio n. 1
0
def _get_stage_data_helper(path, stage_idx):
    """
    Return a dataframe, each row of which corresponds to the `stage_idx`-th stage
    of a different run.
    """
    job = HyperSearch(path)
    stage_data = job.extract_stage_data()

    dist_keys = job.dist_keys()

    records = []

    for i, (key, value) in enumerate(sorted(stage_data.items())):
        for (repeat, seed), (df, sc, md) in value.items():
            record = dict(df.iloc[stage_idx])

            for dk in dist_keys:
                record[dk] = md[dk]

            record['idx'] = key.idx
            record['repeat'] = repeat
            record['seed'] = seed

            records.append(record)

    return pd.DataFrame.from_records(records)
Esempio n. 2
0
def get_mnist_data(path, y_keys, spread_measure):
    if isinstance(y_keys, str):
        y_keys = y_keys.split()

    if not isinstance(y_keys, dict):
        y_keys = {yk: lambda y: y for yk in y_keys}

    job = HyperSearch(path)
    stage_data = job.extract_stage_data()
    assert len(stage_data) == 1  # Should only be one parameter setting
    stage_data = next(iter(stage_data.values()))

    data = defaultdict(list)

    for (repeat, seed), (df, sc, md) in stage_data.items():
        for yk, _ in y_keys.items():
            data[yk].append(df['_test_' + yk])

    x = list(range(1, 13))

    data_stats = {}

    for yk, func in y_keys.items():
        data_yk = np.array(data[yk]).T
        data_yk = func(data_yk)

        y = data_yk.mean(axis=1)
        yu, yl = spread_measures[spread_measure](data_yk)

        data_stats[yk] = (x, y, yu, yl)

    return data_stats
Esempio n. 3
0
def get_arithmetic_data(paths,
                        x_key,
                        y_key,
                        stage_idx,
                        spread_measure,
                        y_func=None):
    y_func = y_func or (lambda y: y)

    data = {}
    for path in paths:
        job = HyperSearch(path)
        stage_data = job.extract_stage_data()

        for i, (key, value) in enumerate(sorted(stage_data.items())):
            _data = []

            for (repeat, seed), (df, sc, md) in value.items():
                _data.append(y_func(df[y_key][stage_idx]))

            data[getattr(key, x_key)] = _data

    x = sorted(data)
    _data = np.array([data[key] for key in x])
    y = _data.mean(axis=1)
    yu, yl = spread_measures[spread_measure](_data)
    return x, y, yu, yl
Esempio n. 4
0
    def _checkpoint(self, i):
        print("Fetching results of step {} at: ".format(i))
        print(datetime.datetime.now())

        for i, host in enumerate(self.hosts):
            if host == ':':
                command = "mv {local_scratch}/experiments/* ./experiments"
                self.execute_command(command, robust=True)

                command = "rm -rf {local_scratch}/experiments"
                self.execute_command(command, robust=True)

                command = "cp -ru {local_scratch}/{archive_root} ."
                self.execute_command(command, robust=True)
            else:
                command = (
                    "rsync -az {rsync_verbosity} --timeout=300 -e \"ssh {ssh_options}\" "
                    "{host}:{local_scratch}/experiments/ ./experiments".format(
                        host=host, **self.__dict__))
                self.execute_command(command,
                                     frmt=False,
                                     robust=True,
                                     output="loud")

                command = "rm -rf {local_scratch}/experiments"
                self.ssh_execute(command, host, robust=True, output="loud")

                command = (
                    "rsync -az {rsync_verbosity} --timeout=300 -e \"ssh {ssh_options}\" "
                    "{host}:{local_scratch}/{archive_root} .".format(
                        host=host, **self.__dict__))
                self.execute_command(command,
                                     frmt=False,
                                     robust=True,
                                     output="loud")

        self.execute_command("zip -rq results {archive_root}", robust=True)

        try:
            from dps.hyper import HyperSearch
            search = HyperSearch('.')
            with redirect_stream('stdout', 'results.txt', tee=False):
                search.print_summary(print_config=False, verbose=False)
            print(search.job.summary(verbose=False))
        except Exception:
            job_path = 'results.zip' if os.path.exists(
                'results.zip') else 'orig.zip'
            assert os.path.exists(job_path)
            job = ReadOnlyJob(job_path)
            print(job.summary(verbose=False))
Esempio n. 5
0
def get_transfer_baseline_data(path, x_key, y_key, spread_measure, y_func=None):
    y_func = y_func or (lambda y: y)

    job = HyperSearch(path)
    stage_data = job.extract_stage_data()

    x = range(1, 21)
    y = []

    for i, (key, value) in enumerate(sorted(stage_data.items())):
        data = []
        for (repeat, seed), (df, sc, md) in value.items():
            data.append(df[y_key][0])
        data = y_func(np.array(data))
        y.append(data.mean())

    return x, np.array(y)
Esempio n. 6
0
def get_shapes_data(paths, y_keys, spread_measure):
    data = defaultdict(list)
    x = np.arange(1, 36)
    for path in paths:
        ignore = []

        # For two of the experiments, the training stalled just before starting the 4th stage, so ignore those samples.
        if 'max-shapes=10_small=False_alg=shapes-silot_duration=long_2019_08_13_00_00_51_seed=0' in path:
            ignore = [1]
        if 'max-shapes=20_small=False_alg=shapes-silot_duration=long_2019_08_19_17_30_18_seed=0' in path:
            ignore = [2]

        if isinstance(y_keys, str):
            y_keys = y_keys.split()

        if not isinstance(y_keys, dict):
            y_keys = {yk: lambda y: y for yk in y_keys}

        job = HyperSearch(path)
        stage_data = job.extract_stage_data()

        for (idx, _), d in stage_data.items():
            if idx in ignore:
                continue

            assert len(d) == 1
            df = next(iter(d.values()))[0]
            for yk, _ in y_keys.items():
                data[yk].append(df['_test_' + yk][x-1])

    x = list(x)
    data_stats = {}
    for yk, func in y_keys.items():
        data_yk = np.array(data[yk]).T
        data_yk = func(data_yk)

        y = data_yk.mean(axis=1)
        yu, yl = spread_measures[spread_measure](data_yk)

        data_stats[yk] = (x, y, yu, yl)

    return data_stats
Esempio n. 7
0
def _print_config_cmd(path):
    search = HyperSearch(path)

    print("BASE CONFIG")
    print(search.objects.load_object('metadata', 'config'))

    dist = search.objects.load_object('metadata', 'dist')
    dist = Config(dist)

    print('\n' + '*' * 100)
    print("PARAMETER DISTRIBUTION")
    pprint(dist)
Esempio n. 8
0
def get_transfer_data(path, x_key, y_key, spread_measure, y_func=None):
    y_func = y_func or (lambda y: y)

    job = HyperSearch(path)
    stage_data = job.extract_stage_data()

    all_data = []

    for i, (key, value) in enumerate(sorted(stage_data.items())):
        data = []

        for (repeat, seed), (df, sc, md) in value.items():
            data.append(df[y_key][1:])  # First stage is the learning stage.

        data = np.array(data).T
        data = y_func(data)

        x = range(1, 21)
        y = data.mean(axis=1)
        yu, yl = spread_measures[spread_measure](data)

        all_data.append(((x, y, yu, yl), key))

    return all_data
Esempio n. 9
0
def _resubmit_cmd(path, name=""):
    """ Note the resubmitting still has a limitation: experiments are not copied over
        from the previous submission. Couldn't find a clean way to do this, so just do it manually
        for now. In the future we should revamp the build/run process so that the possibility of
        multiple runs is taken into account, and the results of the runs can be easily combined.

    """
    search = HyperSearch(path)
    archive_path = search.job.path

    try:
        with open(os.path.join(search.path, 'run_kwargs.json'), 'r') as f:
            reference = json.load(f)
    except FileNotFoundError:
        with open(os.path.join(search.path, 'session.pkl'), 'rb') as f:
            reference = dill.load(f).__dict__

    sig = inspect.signature(ParallelSession.__init__)
    _run_kwargs = sig.bind_partial()
    _run_kwargs.apply_defaults()

    run_kwargs = {}
    for k, v in _run_kwargs.arguments.items():
        run_kwargs[k] = reference[k]

    cl_run_kwargs = clify.command_line(run_kwargs).parse()
    run_kwargs.update(cl_run_kwargs)

    done = False
    while not done:
        print("Current values for run_kwargs: ")
        pprint(run_kwargs)

        inp = ""
        while inp not in ["y", "n"]:
            inp = input("Make changes? (y/n): ")

        if inp == "y":
            inp = input("Specify changes: ")
            new = clify.command_line(run_kwargs, cl_args=inp).parse()
            run_kwargs.update(new)
        else:
            done = True

    submit_job(archive_path, "resubmit", **run_kwargs)
Esempio n. 10
0
def _value_plot_cmd(path, mode, field, stage, x_axis, ylim, style):
    """ Plot the trajectory of a single value, specified by field, for each parameter
        setting in a hyperparameter search.

    Example:
         dps-hyper value_plot . rl_val COST_negative_mAP None --x-axis=stages --ylim="-1,0"

    Parameters
    ----------
    path: str
        Path passed to `HyperSearch`.
    mode: str
        Run mode to plot from (e.g. train, val).
    field: str
        Name of value to plot.
    stage: str
        String that is eval-ed to get an object specifying the stages to plot data from.
    x_axis: str, one of {steps, experiences, stages}
        Specifiation of value to use as x-axis for plots. If `stages` is used, then only
        the value obtained by the "chosen" hypothesis for that stage is used.
    ylim: str
        String that is eval-ed to get a tuple specifying y-limits for plots.
    style: str
        Matplotlib style to use for plot.

    """
    print("Plotting {} value of field {} from experiments stored at `{}`.".format(mode, field, path))

    assert x_axis in "steps experiences stages"
    x_axis_key = dict(
        steps="global_step",
        experiences="n_global_experiences",
        stages="stage_idx")[x_axis]

    search = HyperSearch(path)

    stage = eval(stage) if stage else ""
    ylim = eval(ylim) if ylim else ""

    fields = [field, x_axis_key]

    if x_axis == "stages":
        data = search.extract_stage_data(fields, bare=True)
    else:
        data = search.extract_step_data(mode, fields, stage)

    n_plots = len(data) + 1
    w = int(np.ceil(np.sqrt(n_plots)))
    h = int(np.ceil(n_plots / w))

    with plt.style.context(style):
        fig, axes = plt.subplots(h, w, sharex=True, sharey=True, figsize=(15, 10))
        fig.suptitle("{} vs {}".format(field, x_axis_key))

        axes = np.atleast_2d(axes)
        final_ax = axes[-1, -1]

        label_order = []

        for i, (key, value) in enumerate(sorted(data.items())):
            label = ",".join("{}={}".format(*kv) for kv in zip(key._fields, key))
            i = int(key.idx / w)
            j = key.idx % w
            ax = axes[i, j]

            if ylim:
                ax.set_ylim(ylim)

            ax.set_title(label)
            label_order.append(label)

            for (repeat, seed), _data in value.items():
                ax.plot(_data[x_axis_key], _data[field])

            to_concat = [_data.set_index(x_axis_key) for _data in value.values()]
            concat = pd.concat(to_concat, axis=1, ignore_index=True)
            mean = concat.mean(axis=1)
            final_ax.plot(mean, label=label)

        legend_handles = {l: h for h, l in zip(*final_ax.get_legend_handles_labels())}
        ordered_handles = [legend_handles[l] for l in label_order]

        final_ax.legend(
            ordered_handles, label_order, loc='center left',
            bbox_to_anchor=(1.05, 0.5), ncol=1)

        if ylim:
            final_ax.set_ylim(ylim)

        plt.subplots_adjust(left=0.05, bottom=0.05, right=0.9, top=0.90, wspace=0.05, hspace=0.18)

        plt.savefig('value_plot_mode={}_field={}_stage={}'.format(mode, field, stage))
        plt.show()
Esempio n. 11
0
def _summarize_search_cmd(path, no_config, verbose, criteria, maximize):
    search = HyperSearch(path)
    search.print_summary(print_config=not no_config, verbose=verbose, criteria=criteria, maximize=maximize)
Esempio n. 12
0
def _search_plot_cmd(
        path, y_field, x_field, groupby, spread_measure,
        style, do_legend=False, **axes_kwargs):

    path = process_path(path)
    print("Plotting searches stored at {}.".format(path))

    search = HyperSearch(path)

    with plt.style.context(style):
        ax = plt.axes(xlabel=x_field, ylabel=y_field, **axes_kwargs)

        dist = search.objects.load_object('metadata', 'dist')
        dist = Config(dist)

        df = search.extract_summary_data()

        groups = sorted(df.groupby(groupby))

        colours = plt.rcParams['axes.prop_cycle'].by_key()['color']

        legend = []

        for i, (k, _df) in enumerate(groups):
            values = list(_df.groupby(x_field))
            x = [v[0] for v in values]
            ys = [v[1][y_field] for v in values]

            y = [_y.mean() for _y in ys]

            if spread_measure == 'std_dev':
                y_upper = y_lower = [_y.std() for _y in ys]
            elif spread_measure == 'conf_int':
                conf_int = [confidence_interval(_y.values, 0.95) for _y in ys]
                y_lower = y - np.array([ci[0] for ci in conf_int])
                y_upper = np.array([ci[1] for ci in conf_int]) - y
            elif spread_measure == 'std_err':
                y_upper = y_lower = [standard_error(_y.values) for _y in ys]
            else:
                raise Exception("NotImplemented")

            yerr = np.vstack((y_lower, y_upper))

            c = colours[i % len(colours)]

            ax.semilogx(x, y, c=c, basex=2)
            handle = ax.errorbar(x, y, yerr=yerr, c=c)
            label = "{} = {}".format(groupby, k)

            legend.append((handle, label))

        if do_legend:
            handles, labels = zip(*legend)
            ax.legend(
                handles, labels, loc='center left',
                bbox_to_anchor=(1.05, 0.5), ncol=1)

        # plt.subplots_adjust(
        #     left=0.09, bottom=0.13, right=0.7, top=0.93, wspace=0.05, hspace=0.18)

    filename = "value_plot.pdf"
    print("Saving plot as {}".format(filename))
    plt.savefig(filename)