def plot(self):

        assert (self.input_space.num_dims == 2
                or self.input_space.num_dims == 1)

        size = 50
        x = np.linspace(0, 1, size)
        y = np.linspace(0, 1, size)
        X, Y = np.meshgrid(x, y)

        if self.input_space.num_dims == 2:
            k = 0
            for key in self.models:

                Z = np.zeros((size, size))
                for i in range(size):
                    for j in range(size):
                        params = self.input_space.from_unit(
                            np.array([X[i, j], Y[i, j]])).flatten()
                        Z[i, j] = self.f(
                            paramify_no_types(
                                self.input_space.paramify(params)))[key]

                plt.figure()
                im = plt.imshow(Z,
                                interpolation='bilinear',
                                origin='lower',
                                cmap=cm.gray,
                                extent=(0, 1, 0, 1))
                CS = plt.contour(X, Y, Z)
                plt.clabel(CS, inline=1, fontsize=10)
                plt.title(str(key))
                plt.show()
                k += 1
        else:
            k = 0
            for key in self.models:

                Z = np.zeros(size)
                for i in range(size):
                    params = self.input_space.from_unit(np.array(
                        [x[i]])).flatten()
                    Z[i] = self.f(
                        paramify_no_types(
                            self.input_space.paramify(params)))[key]

                plt.figure()
                plt.plot(x, Z, color='red', marker='.', markersize=1)
                plt.title(str(key))
                plt.show()
                k += 1
예제 #2
0
def python_launcher(job):
    # Run a Python function
    sys.stderr.write("Running python job.\n")

    # Add directory to the system path.
    sys.path.append(os.path.realpath(job['expt_dir']))

    # Change into the directory.
    os.chdir(job['expt_dir'])
    sys.stderr.write("Changed into dir %s\n" % (os.getcwd()))

    # Strip off the data types and just get the raw values
    params = paramify_no_types(job['params'])

    # Load up this module and run
    main_file = job['main-file']
    if main_file[-3:] == '.py':
        main_file = main_file[:-3]
    sys.stderr.write('Importing %s.py\n' % main_file)
    module = __import__(main_file)
    sys.stderr.write('Running %s.main()\n' % main_file)

    result = module.main(job['id'], params)

    # Change back out.
    os.chdir('..')

    sys.stderr.write("Got result %s\n" % (result))

    return result
		def fun(params, gradient = False):

			if len(params.shape) > 1 and params.shape[ 1 ] > 1:
				params = params.flatten()

			params = input_space.from_unit(np.array([ params ])).flatten()

			return -1.0 * module.main(0, paramify_no_types(input_space.paramify(params)))[ task ]
예제 #4
0
		def fun(params, gradient = False):

			if len(params.shape) > 1 and params.shape[ 1 ] > 1:

				values = np.zeros(params.shape[ 0 ])
				params_orig = params

				for i in range(params_orig.shape[ 0 ]):
					param = params[ i, : ]
					param = param.flatten()
					param = input_space.from_unit(np.array([ param ])).flatten()
					
					values[ i ] = module.main(0, paramify_no_types(input_space.paramify(param)))[ task ]

			else:
				return module.main(0, paramify_no_types(input_space.paramify(params)))[ task ]

			return values
예제 #5
0
def true_func(stored,
              module,
              violation_value=np.inf,
              constraint_tol=0.0,
              objective=DEFAULTS["task_name"],
              constraints=[]):
    params = stored['params']

    # if we don't actually know the true objective, just return what we think is the best
    if not hasattr(module, 'true_func'):
        if stored['objective'] is None:
            return np.nan
        else:
            return stored['objective']
        # output = module.main(0, paramify_no_types(params))

    # otherwise, assume we know it
    output = module.true_func(0, paramify_no_types(params))

    if not isinstance(output, dict):
        if len(constraints) != 0:
            raise Exception(
                "output is not a dict and yet you said there were constraints..?"
            )
        val = output
    else:
        val = output[objective]

    for c in constraints:
        if output[c] < -constraint_tol:
            print 'Violation value of %f for constraint %s' % (output[c], c)
            # print str(paramify_no_types(params))
            return violation_value

    # if the module defines the true solution value, then use the gap. otherwise don't
    return np.abs(
        val -
        module.true_val())  # abs not really needed, it will always be positive
예제 #6
0
def main(dirs,
         n_repeat=-1,
         n_iter_spec=None,
         rec_type="model",
         average="mean",
         log_scale=False,
         violation_value=1.,
         constraint_tol=0.,
         make_dist_plot=False,
         mainfile=None,
         stretch_x=False,
         task_comp_x=None,
         plot_wall_time=False,
         bin_size=1.0,
         plot_separate=False,
         labels=None,
         y_axis_label=None,
         x_axis_label=None):

    # Create the figure that plots utility gap
    fig = dict()
    ax = dict()
    # averaging function
    if average == "mean":
        avg = np.mean
    elif average == "median":
        avg = np.median
    else:
        raise Exception("Unknown average %s" % average)

    fig['err'] = plt.figure()
    ax['err'] = fig['err'].add_subplot(1, 1, 1)
    if plot_wall_time:
        ax['err'].set_xlabel("wall time (min)", size=25)
    elif x_axis_label:
        ax['err'].set_xlabel(x_axis_label, size=25)
    else:
        ax['err'].set_xlabel('Number of function evaluations', size=25)
    ax['err'].tick_params(axis='both', which='major', labelsize=20)

    # Create the figure that plots L2 distance from solution
    fig['dist'] = plt.figure()
    ax['dist'] = fig['dist'].add_subplot(1, 1, 1)
    if x_axis_label:
        ax['dist'].set_xlabel(x_axis_label, size=25)
    else:
        ax['dist'].set_xlabel('Number of function evaluations', size=25)
    if y_axis_label:
        ax['dist'].set_ylabel(y_axis_label, size=25)
    elif log_scale:
        ax['dist'].set_ylabel('$\log_{10}\, \ell_2$-distance', size=25)
    else:
        ax['dist'].set_ylabel('$\ell_2$-distance', size=25)
    ax['dist'].tick_params(axis='both', which='major', labelsize=20)

    db_document_name = 'recommendations'

    acq_names = list()
    for expt_dir in dirs:
        options = parse_config_file(expt_dir, 'config.json')
        experiment_name = options["experiment_name"]
        input_space = InputSpace(options["variables"])
        chooser_module = importlib.import_module('spearmint.choosers.' +
                                                 options['chooser'])
        chooser = chooser_module.init(input_space, options)
        db = MongoDB(database_address=options['database']['address'])
        jobs = load_jobs(db, experiment_name)
        hypers = db.load(experiment_name, 'hypers')
        tasks = parse_tasks_from_jobs(jobs, experiment_name, options,
                                      input_space)

        if rec_type == "model":
            if mainfile is None:
                main_file = options['main_file']
            else:
                main_file = mainfile

            sys.path.append(options['main_file_path']
                            )  # TODO: make this nicer with proper importin
            if (main_file[-3:] == u'.py') is True:
                module = importlib.import_module(main_file[:len(main_file) -
                                                           3])
            else:
                module = importlib.import_module(main_file)
            sys.path.remove(options['main_file_path'])

        obj, con = get_objectives_and_constraints(options)  # get the names
        obj = obj[0]  # only one objective
        print 'Found %d constraints' % len(con)
        plot_utility_gap = rec_type == "model" and hasattr(module, 'true_val')

        if plot_utility_gap:
            print 'PLOTTING UTILITY GAP'
            if y_axis_label:
                ax['err'].set_ylabel(y_axis_label, size=25)
            elif log_scale:
                ax['err'].set_ylabel('$\log_{10}$ utility gap', size=25)
            else:
                ax['err'].set_ylabel('utility gap', size=25)
        else:
            if y_axis_label:
                ax['err'].set_ylabel(y_axis_label, size=25)
            elif log_scale:
                ax['err'].set_ylabel('$\log_{10}$ objective value', size=25)
            else:
                ax['err'].set_ylabel('objective value', size=25)

        # Make the directory for the plots
        plots_dir = os.path.join(expt_dir, 'plots')
        if not os.path.isdir(plots_dir):
            os.mkdir(plots_dir)

        # if the module provides the location of the true solution, plot the distance to this solution vs iterations
        if make_dist_plot and not hasattr(module, 'true_sol'):
            raise Exception(
                "make_dist_plot turned on but cannot find true sol in the main_file"
            )

        # If repeat >= 0, then we are averaging a number of experiments
        # We assume the experiments are stored with the original name plus a hyphen plus the number
        n_repeat = int(n_repeat)
        if n_repeat < 0:
            recs = db.load(experiment_name, db_document_name)
            if recs is None:
                raise Exception(
                    "Could not find experiment %s in database at %s" %
                    (experiment_name, options['database']['address']))

            # the x axis represents the number of evals of a particular task given by task_comp_x
            # so we only take the data where this number was incrememted, i.e. when this task was evaluated
            if task_comp_x:
                # only include recommendations when you finish a particular task
                new_recs = list()
                last_complete = 0
                for rec in recs:
                    cur_complete = rec['num_complete_tasks'][task_comp_x]
                    if cur_complete > last_complete:
                        last_complete = cur_complete
                        new_recs.append(rec)
                recs = new_recs

            n_iter = len(recs) if n_iter_spec is None else n_iter_spec

            iters = range(n_iter)

            if plot_wall_time:
                if task_comp_x:
                    raise Exception("Do not use plot wall_time with task_x")
                iters = [rec['total_elapsed_time'] / 60.0 for rec in recs]
                iters = iters[:n_iter]
                iters = np.array(iters, dtype=float)

            print 'Found %d iterations' % len(recs)

            if rec_type == "model":
                values = [
                    true_func(rec, module, violation_value, constraint_tol,
                              obj, con) for rec in recs
                ]

                if log_scale:
                    ax['err'].plot(iters, map(np.log10, values))
                else:
                    ax['err'].plot(iters, values)
            else:
                if rec_type == "observations":
                    observations = [x['obj_o'] for x in recs]
                elif rec_type == "mixed":
                    observations = [x['obj_om'] for x in recs]
                else:
                    raise Exception("unknown rec type")

                for i in xrange(len(observations)):
                    if observations[i] is None or np.isnan(observations[i]):
                        observations[i] = violation_value
                # print observations
                # print len(observations)

                if log_scale:
                    ax['err'].plot(iters, np.log10(observations))
                else:
                    ax['err'].plot(iters, observations)

            if make_dist_plot:
                distances = [
                    params_norm(rec['params'], module.true_sol())
                    for rec in recs
                ]
                if log_scale:
                    ax['dist'].plot(iters, np.log10(distances))
                else:
                    ax['dist'].plot(iters, distances)
        else:
            # MULTIPLE REPEATS
            repeat_recs = [
                db.load(repeat_experiment_name(experiment_name, j),
                        db_document_name) for j in xrange(n_repeat)
            ]
            if None in repeat_recs:
                for i, repeat_rec in enumerate(repeat_recs):
                    if repeat_rec is None:
                        print 'Could not load experiment %s repeat %d' % (
                            experiment_name, i)
                print 'Exiting...'
                return

            if task_comp_x:
                # only include recommendations when you finish a particular task
                new_repeat_recs = list()
                for recs in repeat_recs:
                    recs = sorted(recs, key=lambda k: k['id'])  # sort by id
                    new_recs = list()
                    last_complete = 0
                    for rec in recs:
                        cur_complete = rec['num_complete_tasks'][task_comp_x]
                        if cur_complete == last_complete + 1:
                            last_complete = cur_complete
                            new_recs.append(rec)
                        elif cur_complete == last_complete:
                            pass
                        else:
                            print(
                                'WARNING: cur complete=%d, last_complete=%d' %
                                (cur_complete, last_complete))
                            break
                    new_repeat_recs.append(new_recs)

                repeat_recs = new_repeat_recs

            n_iter_each = map(len, repeat_recs)
            if plot_wall_time:
                """ do everything separately from here if plotting wall time
                here is what we do... we can't have a square array because 
                we don't want to take the minimum number of iterations... 
                we want to take ALL iterations for each repeat, and this number
                may be different for different repeats.
                so we store all times/values in a list of arrays
                then we chop things up into bins
                """
                if rec_type != "model":
                    values = list()
                    wall_times = list()
                    for j in xrange(
                            n_repeat):  # loop over repeated experiments

                        wall_times.append(
                            np.array([
                                repeat_recs[j][i]['total_elapsed_time'] / 60.0
                                for i in xrange(n_iter_each[j])
                            ]))

                        if rec_type == "observations":
                            values.append([
                                repeat_recs[j][i]['obj_o']
                                for i in xrange(n_iter_each[j])
                            ])
                        elif rec_type == "mixed":
                            values.append([
                                repeat_recs[j][i]['obj_om']
                                for i in xrange(n_iter_each[j])
                            ])
                        else:
                            raise Exception("unknown rec type")

                        for i in xrange(n_iter_each[j]):
                            if values[-1][i] is None or np.isnan(
                                    values[-1][i]):
                                values[-1][i] = violation_value

                        values[-1] = np.array(values[-1])

                    # print values

                else:  # if plot wall tiem but using model
                    values = list()
                    wall_times = list()
                    for j in xrange(
                            n_repeat):  # loop over repeated experiments

                        # for this repeat, get all wall times
                        wall_times.append(
                            np.array([
                                repeat_recs[j][i]['total_elapsed_time'] / 60.0
                                for i in xrange(n_iter_each[j])
                            ]))

                        values_j = np.zeros(n_iter_each[j])
                        for i in xrange(
                                n_iter_each[j]):  # loop over iterations
                            val = true_func(repeat_recs[j][i], module, None,
                                            constraint_tol, obj, con)
                            if val is None or np.isnan(
                                    val
                            ):  #set to violation value here so we can print out this info...
                                values_j[i] = violation_value
                                print 'Violation with params %s at repeat %d iter %d' % (
                                    paramify_no_types(
                                        repeat_recs[j][i]['params']), j, i)
                            else:
                                values_j[i] = val
                        values.append(values_j)

                # change the data structure to be time bins and include everything in
                # those time bins across repeats
                end_times = map(max, wall_times)
                for j in xrange(n_repeat):
                    print 'end time for repeat %d: %f' % (j, end_times[j])
                iters = np.arange(0.0, np.round(max(end_times)), bin_size)
                new_values = list()
                for i, timestep in enumerate(iters):
                    # print 'Creating wall time bin from %f to %f. (%d/%d)' % (i, i+bin_size, i, len(iters))
                    new_value = list()
                    for j in xrange(n_repeat):
                        new_value = np.append(
                            new_value, values[j][np.logical_and(
                                wall_times[j] >= timestep, wall_times[j] <
                                timestep + bin_size)].flatten())
                    # if a time bin is empty across all repeats:
                    if len(new_value) == 0:
                        if i == 0:
                            new_value = [violation_value]
                        else:
                            new_value = new_values[-1]
                    new_values.append(new_value)
                values = new_values

                # make the first value equal to the violation value (optional)
                iters = np.append(iters, max(iters) + bin_size)
                values.insert(0, np.array([violation_value]))

                # Average over the repeated experiments
                average_values = map(avg, values)
                errorbars = bootstrap_errorbars(values, log=log_scale, avg=avg)
                # plt.yscale('log', nonposy='clip')

                if log_scale:
                    ax['err'].errorbar(iters,
                                       np.log10(average_values),
                                       yerr=errorbars)
                else:
                    ax['err'].errorbar(iters, average_values, yerr=errorbars)

            else:
                # NOT WALL TIME

                n_iter = reduce(min, n_iter_each, np.inf)
                if n_iter_spec is None:
                    print 'Found %d repeats with at least %d iterations' % (
                        n_repeat, n_iter)
                    print {i: n_iter_each[i] for i in xrange(n_repeat)}
                elif n_iter < n_iter_spec:
                    print 'You specified %d iterations but there are only %d available... so plotting %d' % (
                        n_iter_spec, n_iter, n_iter)
                else:
                    n_iter = n_iter_spec
                    print 'Plotting %d iterations' % n_iter

                iters = range(n_iter)

                if rec_type != "model":
                    values = np.zeros((n_iter, n_repeat))
                    for j in xrange(
                            n_repeat):  # loop over repeated experiments
                        for i in iters[j]:  # loop over iterations
                            if rec_type == "observations":
                                values[i, j] = repeat_recs[j][i]['obj_o']
                            elif rec_type == "mixed":
                                values[i, j] = repeat_recs[j][i]['obj_om']
                            else:
                                raise Exception("unknown rec type")
                            if values[i, j] is None or np.isnan(values[i, j]):
                                values[i, j] = violation_value

                    print values

                else:
                    values = np.zeros((n_iter, n_repeat))
                    distances = np.zeros((n_iter, n_repeat))
                    for j in xrange(
                            n_repeat):  # loop over repeated experiments
                        for i in iters:  # loop over iterations
                            val = true_func(repeat_recs[j][i], module, None,
                                            constraint_tol, obj, con)
                            if val is None:  #set to violation value here so we can print out this info...
                                values[i, j] = violation_value
                                print 'Violation with params %s at repeat %d iter %d' % (
                                    paramify_no_types(
                                        repeat_recs[j][i]['params']), j, i)
                            else:
                                values[i, j] = val

                            if make_dist_plot:
                                distances[i, j] = params_norm(
                                    repeat_recs[j][i]['params'],
                                    module.true_sol())

                if plot_separate:
                    if log_scale:
                        ax['err'].plot(iters, np.log10(values))
                    else:
                        ax['err'].plot(iters, values)

                else:
                    # Average over the repeated experiments
                    average_values = map(avg, values)
                    errorbars = bootstrap_errorbars(values,
                                                    log=log_scale,
                                                    avg=avg)
                    # plt.yscale('log', nonposy='clip')

                    if stretch_x:
                        fctr = float(n_iter_spec) / float(n_iter)
                        iters = np.array(iters) * fctr
                        print 'Stretching x axis by a factor of %f' % fctr

                    if log_scale:
                        ax['err'].errorbar(iters,
                                           np.log10(average_values),
                                           yerr=errorbars)
                    else:
                        ax['err'].errorbar(iters,
                                           average_values,
                                           yerr=errorbars)

                    if make_dist_plot:
                        average_dist = map(avg, distances)
                        errorbars_dist = bootstrap_errorbars(distances,
                                                             log=log_scale,
                                                             avg=avg)
                        if log_scale:
                            ax['dist'].errorbar(iters,
                                                np.log10(average_dist),
                                                yerr=errorbars_dist)
                        else:
                            ax['dist'].errorbar(iters,
                                                average_dist,
                                                yerr=errorbars_dist)

        acq_names.append(options["tasks"].values()[0]["acquisition"])
        if acq_names[-1] == 'PES':
            acq_names[-1] = 'PESC'
        if acq_names[-1] == 'ExpectedImprovement':
            acq_names[-1] = 'EIC'

    if labels:
        ax['err'].legend(labels.split(';'), fontsize=16, loc='lower left')
        ax['dist'].legend(labels.split(';'), fontsize=20)
    elif len(acq_names) > 1:
        ax['err'].legend(acq_names, fontsize=20)
        ax['dist'].legend(acq_names, fontsize=20)

    # save it in the last directory... (if there are multiple directories)
    if not plot_wall_time:
        if n_repeat >= 0:
            print 'Made a plot with %d repeats and %d iterations' % (n_repeat,
                                                                     n_iter)
        else:
            print 'Made a plot with %d iterations' % (n_iter)
    else:
        if n_repeat >= 0:
            print 'Made a plot with %d repeats and %f minutes' % (n_repeat,
                                                                  max(iters))
        else:
            print 'Made a plot with %f minutes' % (max(iters))

    file_prefix = '%s_' % average if n_repeat > 0 else ''
    file_postfix = '_wall_time' if plot_wall_time else ''
    fig['err'].tight_layout()
    figname = os.path.join(plots_dir,
                           '%serror%s' % (file_prefix, file_postfix))
    fig['err'].savefig(figname + '.pdf')
    fig['err'].savefig(figname + '.svg')
    print 'Saved to %s' % figname
    if make_dist_plot:
        fig['dist'].tight_layout()
        figname_dist = os.path.join(
            plots_dir, '%sl2_distance%s.pdf' % (file_prefix, file_postfix))
        fig['dist'].savefig(figname_dist)
        print 'Saved to %s' % figname_dist