Example #1
0
def main(expt_dir, repeat=-1):

    options = parse_config_file(expt_dir, 'config.json')
    experiment_name = options["experiment_name"]

    if repeat > 0:
        experiment_name = repeat_experiment_name(experiment_name, repeat)

    input_space = InputSpace(options["variables"])
    chooser_module = importlib.import_module('spearmint.choosers.' +
                                             options['chooser'])
    chooser = chooser_module.init(input_space, options)
    db = MongoDB(database_address=options['database']['address'])
    jobs = load_jobs(db, experiment_name)
    hypers = db.load(experiment_name, 'hypers')
    tasks = parse_tasks_from_jobs(jobs, experiment_name, options, input_space)

    for job in jobs:

        if job['status'] == 'complete':
            print 'Job %d' % job['id']

            input_space.print_params(job['params'], left_indent=0)
            for task, val in job['values'].iteritems():
                print '%s: %s' % (task, val)

            print ''
def main(expt_dir, repeat=-1):

    options = parse_config_file(expt_dir, 'config.json')
    experiment_name = options["experiment-name"]

    if repeat > 0:
        experiment_name = repeat_experiment_name(experiment_name, repeat)

    input_space = InputSpace(options["variables"])
    chooser_module = importlib.import_module('spearmint.choosers.' +
                                             options['chooser'])
    chooser = chooser_module.init(input_space, options)
    db = MongoDB(database_address=options['database']['address'])
    jobs = load_jobs(db, experiment_name)
    hypers = db.load(experiment_name, 'hypers')
    tasks = parse_tasks_from_jobs(jobs, experiment_name, options, input_space)

    for task_name, task in tasks.iteritems():

        # print 'Printing results for task %s' % task_name

        for i in xrange(len(task.values)):

            print 'Iteration %d' % (i + 1)
            input_space.paramify_and_print(task.inputs[i], left_indent=0)

            print '%s: %s' % (task_name, task.values[i])

            print ''
            print ''
Example #3
0
def cleanup(path, repeat=-1):

    if not os.path.isdir(path):
        raise Exception("%s is not a valid directory" % path)

    cfg = parse_config_file(path, 'config.json', verbose=False)

    db_address = cfg['database']['address']
    # client = pymongo.MongoClient(db_address)
    db = MongoDB(database_address=db_address)

    experiment_name = cfg["experiment_name"]

    if repeat >= 0:  # only for advanced use
        experiment_name = repeat_experiment_name(experiment_name, repeat)

    print 'Cleaning up experiment %s in database at %s' % (experiment_name,
                                                           db_address)

    # db.remove_experiment(experiment_name) # does not work
    db.remove_collection(experiment_name, 'jobs')
    db.remove_collection(experiment_name, 'hypers')
    db.remove_collection(experiment_name, 'recommendations')
    db.remove_collection(experiment_name, 'start-time')

    # remove output files
    output_directory = repeat_output_dir(
        path, repeat) if repeat >= 0 else os.path.join(path, 'output')
    if os.path.isdir(output_directory):
        shutil.rmtree(output_directory)
Example #4
0
def main(expt_dir, n_repeat):
    n_repeat = int(n_repeat)
    options = parse_config_file(expt_dir, 'config.json')
    tasks = options['tasks'].keys()

    jobs = dict()
    start_times = dict()
    for j in xrange(n_repeat):
        experiment_name = repeat_experiment_name(options["experiment_name"], j)
        db = MongoDB(database_address=options['database']['address'])
        jobs[j] = load_jobs(db, experiment_name)
        start_times[j] = db.load(experiment_name, 'start-time')['start-time']

    time_in_evals = defaultdict(lambda: np.zeros(n_repeat))
    time_in_fast_updates = np.zeros(n_repeat)
    time_in_slow_updates = np.zeros(n_repeat)

    for j in xrange(n_repeat):

        last_job_end_time = start_times[j]

        for job in jobs[j]:
            if job['status'] == 'complete':
                time_in_evals[job['tasks'][0]][j] += (job['end time'] -
                                                      job['start time']) / 60.0

                if job['fast update']:
                    time_in_fast_updates[j] += (job['start time'] -
                                                last_job_end_time) / 60.0
                else:
                    time_in_slow_updates[j] += (job['start time'] -
                                                last_job_end_time) / 60.0
                last_job_end_time = job['end time']

    for task in tasks:
        print 'Average time on task %s over %d repeats: %f +/- %f minutes (mean +/- std)' % (
            task, n_repeat, np.mean(
                time_in_evals[task]), np.std(time_in_evals[task]))
    total_time_in_evals = sum(time_in_evals.values())
    print 'Average time in JOBS over %d repeats: %f +/- %f minutes (mean +/- std)' % (
        n_repeat, np.mean(total_time_in_evals), np.std(total_time_in_evals))
    print 'Average time in FAST over %d repeats: %f +/- %f minutes (mean +/- std)' % (
        n_repeat, np.mean(time_in_fast_updates), np.std(time_in_fast_updates))
    print 'Average time in SLOW over %d repeats: %f +/- %f minutes (mean +/- std)' % (
        n_repeat, np.mean(time_in_slow_updates), np.std(time_in_slow_updates))
    total_optimizer_time = time_in_fast_updates + time_in_slow_updates
    print 'Average time in OPTIMIZER over %d repeats: %f +/- %f minutes (mean +/- std)' % (
        n_repeat, np.mean(total_optimizer_time), np.std(total_optimizer_time))
    print 'Total average time spent: %f' % np.sum([
        np.mean(total_time_in_evals),
        np.mean(time_in_fast_updates),
        np.mean(time_in_slow_updates)
    ])
Example #5
0
    def _create_spearmint_parameters(self):
        self._create_mongo_instance()
        self.options, self.exp_dir = get_options(self.work_dir)
        self.resources = parse_resources_from_config(self.options)
        self.chooser_module = importlib.import_module('spearmint.choosers.' +
                                                      self.options['chooser'])
        self.chooser = self.chooser_module.init(self.options)
        self.experiment_name = self.options.get('experiment-name',
                                                'unnamed_experiment')

        self.db_address = self.options['database']['address']
        self.db = MongoDB(database_address=self.db_address)
Example #6
0
def main():

    options = parse_config_file('.', 'config.json')
    experiment_name = options["experiment-name"]
    input_space = InputSpace(options["variables"])
    db = MongoDB(database_address=options['database']['address'])

    i = 0
    recommendation = db.load(experiment_name, 'recommendations', {'id': i + 1})
    while recommendation is not None:
        params_last = input_space.vectorify(recommendation['params'])
        recommendation = db.load(experiment_name, 'recommendations',
                                 {'id': i + 1})
        i += 1

    np.savetxt('pareto_front.txt', params_last, fmt='%e')
Example #7
0
def get_optimized_params(tool_name):
    # Database connection
    db = MongoDB(database_address='localhost')
    if not (tool_name == 'macs2' or tool_name == 'cisgenome'
            or tool_name == 'swembl' or tool_name == 'sicer'):
        dic = {}
        print('incorrect tool name')
        return dic

    jobs = load_jobs(db, tool_name + '_test')

    df = pd.DataFrame()

    # params names of each tools
    macs2_params = ['q', 'm_s', 'm_d']
    cisgenome_params = ['b', 'e', 'w']
    swembl_params = ['x', 'm', 'f']
    sicer_params = ['fs', 'gs', 'w']

    if tool_name == 'macs2':
        params = macs2_params
    elif tool_name == 'cisgenome':
        params = cisgenome_params
    elif tool_name == 'swembl':
        params = swembl_params
    elif tool_name == 'sicer':
        params = sicer_params

    # dict
    res = {}

    for job in jobs:
        df = df.append(
            {
                params[0]: float(job['params'][params[0]]['values']),
                params[1]: float(job['params'][params[1]]['values']),
                params[2]: float(job['params'][params[2]]['values']),
                'error_rate': float(job['values']['branin'])
            },
            ignore_index=True)

    df = df.sort_values('error_rate').reset_index(drop=True)

    # print(df)
    #df = df.drop('error_rate', axis=1)

    column_list = df.columns
    for column in column_list:
        res[column] = df[0:1][column][0]
        # print(df[0:1][column][0])

    # return dictionary of params
    return res
Example #8
0
def returnBest(config_directory):
    os.chdir("/home/carrknight/code/oxfish/runs/optimization/spearmint")
    options = get_options(config_directory, config_file="config.json")
    experiment_name = str(options['experiment-name'])
    db = MongoDB()
    resources = parse_resources_from_config(options)
    resource = resources.itervalues().next()

    # load hyper parameters
    chooser_module = importlib.import_module('spearmint.choosers.' +
                                             options['chooser'])
    chooser = chooser_module.init(options)
    print "chooser", chooser

    hypers = db.load(experiment_name, "hypers")
    print "loaded hypers", hypers  # from GP.to_dict()

    jobs = load_jobs(db, experiment_name)
    remove_broken_jobs(db, jobs, experiment_name, resources)
    task_options = {task: options["tasks"][task] for task in resource.tasks}
    task_group = spearmint.main.load_task_group(db, options, resource.tasks)

    hypers = spearmint.main.load_hypers(db, experiment_name)
    print "loaded hypers", hypers  # from GP.to_dict()

    hypers = chooser.fit(task_group, hypers, task_options)
    print "\nfitted hypers:"
    print(hypers)

    lp, x = chooser.best()
    x = x.flatten()
    print "best", lp, x
    bestp = task_group.paramify(task_group.from_unit(x))
    print "expected best position", bestp

    return bestp
def main(expt_dir, n_repeat):
    n_repeat = int(n_repeat)
    options  = parse_config_file(expt_dir, 'config.json')
    tasks    = options['tasks'].keys()

    jobs = dict()
    for j in xrange(n_repeat):
        experiment_name = repeat_experiment_name(options["experiment_name"], j)
        db              = MongoDB(database_address=options['database']['address'])
        jobs[j]         = load_jobs(db, experiment_name)

        
    n_iter_each = map(len, jobs.values())
    print 'Found %s iterations' % n_iter_each
    n_iter = min(n_iter_each)

    cum_evals = defaultdict(lambda: defaultdict(lambda:defaultdict(int)))
    for j in xrange(n_repeat):

        for i in xrange(n_iter):
            for task in tasks:
                if task in jobs[j][i]['tasks']:
                    cum_evals[j][task][i] = cum_evals[j][task][i-1] + 1
                else:
                    cum_evals[j][task][i] = cum_evals[j][task][i-1]

    # average over the j repeats
    for i in xrange(n_iter):
        for task in tasks:
            cum_evals["avg"][task][i] = np.mean([cum_evals[j][task][i] for j in xrange(n_repeat)])
            cum_evals["err"][task][i] =  np.std([cum_evals[j][task][i] for j in xrange(n_repeat)])

    plt.figure()
    iters = range(n_iter)
    for task in tasks:
        plt.errorbar(iters, [cum_evals["avg"][task][i] for i in xrange(n_iter)], 
                       yerr=[cum_evals["err"][task][i] for i in xrange(n_iter)], linewidth=2)
    plt.legend(tasks, loc='upper left')
    plt.xlabel('Iteration number', size=25)
    plt.ylabel('Cumulative evaluations',size=25)

    # Make the directory for the plots
    plots_dir = os.path.join(expt_dir, 'plots')
    if not os.path.isdir(plots_dir):
        os.mkdir(plots_dir)
    figname = os.path.join(plots_dir, 'cumulative_evals.pdf')
    print 'Saving figure at %s' % figname
    plt.savefig(figname)
Example #10
0
def main(dirs,
         n_repeat=-1,
         n_iter_spec=None,
         rec_type="model",
         average="mean",
         log_scale=False,
         violation_value=1.,
         constraint_tol=0.,
         make_dist_plot=False,
         mainfile=None,
         stretch_x=False,
         task_comp_x=None,
         plot_wall_time=False,
         bin_size=1.0,
         plot_separate=False,
         labels=None,
         y_axis_label=None,
         x_axis_label=None):

    # Create the figure that plots utility gap
    fig = dict()
    ax = dict()
    # averaging function
    if average == "mean":
        avg = np.mean
    elif average == "median":
        avg = np.median
    else:
        raise Exception("Unknown average %s" % average)

    fig['err'] = plt.figure()
    ax['err'] = fig['err'].add_subplot(1, 1, 1)
    if plot_wall_time:
        ax['err'].set_xlabel("wall time (min)", size=25)
    elif x_axis_label:
        ax['err'].set_xlabel(x_axis_label, size=25)
    else:
        ax['err'].set_xlabel('Number of function evaluations', size=25)
    ax['err'].tick_params(axis='both', which='major', labelsize=20)

    # Create the figure that plots L2 distance from solution
    fig['dist'] = plt.figure()
    ax['dist'] = fig['dist'].add_subplot(1, 1, 1)
    if x_axis_label:
        ax['dist'].set_xlabel(x_axis_label, size=25)
    else:
        ax['dist'].set_xlabel('Number of function evaluations', size=25)
    if y_axis_label:
        ax['dist'].set_ylabel(y_axis_label, size=25)
    elif log_scale:
        ax['dist'].set_ylabel('$\log_{10}\, \ell_2$-distance', size=25)
    else:
        ax['dist'].set_ylabel('$\ell_2$-distance', size=25)
    ax['dist'].tick_params(axis='both', which='major', labelsize=20)

    db_document_name = 'recommendations'

    acq_names = list()
    for expt_dir in dirs:
        options = parse_config_file(expt_dir, 'config.json')
        experiment_name = options["experiment_name"]
        input_space = InputSpace(options["variables"])
        chooser_module = importlib.import_module('spearmint.choosers.' +
                                                 options['chooser'])
        chooser = chooser_module.init(input_space, options)
        db = MongoDB(database_address=options['database']['address'])
        jobs = load_jobs(db, experiment_name)
        hypers = db.load(experiment_name, 'hypers')
        tasks = parse_tasks_from_jobs(jobs, experiment_name, options,
                                      input_space)

        if rec_type == "model":
            if mainfile is None:
                main_file = options['main_file']
            else:
                main_file = mainfile

            sys.path.append(options['main_file_path']
                            )  # TODO: make this nicer with proper importin
            if (main_file[-3:] == u'.py') is True:
                module = importlib.import_module(main_file[:len(main_file) -
                                                           3])
            else:
                module = importlib.import_module(main_file)
            sys.path.remove(options['main_file_path'])

        obj, con = get_objectives_and_constraints(options)  # get the names
        obj = obj[0]  # only one objective
        print 'Found %d constraints' % len(con)
        plot_utility_gap = rec_type == "model" and hasattr(module, 'true_val')

        if plot_utility_gap:
            print 'PLOTTING UTILITY GAP'
            if y_axis_label:
                ax['err'].set_ylabel(y_axis_label, size=25)
            elif log_scale:
                ax['err'].set_ylabel('$\log_{10}$ utility gap', size=25)
            else:
                ax['err'].set_ylabel('utility gap', size=25)
        else:
            if y_axis_label:
                ax['err'].set_ylabel(y_axis_label, size=25)
            elif log_scale:
                ax['err'].set_ylabel('$\log_{10}$ objective value', size=25)
            else:
                ax['err'].set_ylabel('objective value', size=25)

        # Make the directory for the plots
        plots_dir = os.path.join(expt_dir, 'plots')
        if not os.path.isdir(plots_dir):
            os.mkdir(plots_dir)

        # if the module provides the location of the true solution, plot the distance to this solution vs iterations
        if make_dist_plot and not hasattr(module, 'true_sol'):
            raise Exception(
                "make_dist_plot turned on but cannot find true sol in the main_file"
            )

        # If repeat >= 0, then we are averaging a number of experiments
        # We assume the experiments are stored with the original name plus a hyphen plus the number
        n_repeat = int(n_repeat)
        if n_repeat < 0:
            recs = db.load(experiment_name, db_document_name)
            if recs is None:
                raise Exception(
                    "Could not find experiment %s in database at %s" %
                    (experiment_name, options['database']['address']))

            # the x axis represents the number of evals of a particular task given by task_comp_x
            # so we only take the data where this number was incrememted, i.e. when this task was evaluated
            if task_comp_x:
                # only include recommendations when you finish a particular task
                new_recs = list()
                last_complete = 0
                for rec in recs:
                    cur_complete = rec['num_complete_tasks'][task_comp_x]
                    if cur_complete > last_complete:
                        last_complete = cur_complete
                        new_recs.append(rec)
                recs = new_recs

            n_iter = len(recs) if n_iter_spec is None else n_iter_spec

            iters = range(n_iter)

            if plot_wall_time:
                if task_comp_x:
                    raise Exception("Do not use plot wall_time with task_x")
                iters = [rec['total_elapsed_time'] / 60.0 for rec in recs]
                iters = iters[:n_iter]
                iters = np.array(iters, dtype=float)

            print 'Found %d iterations' % len(recs)

            if rec_type == "model":
                values = [
                    true_func(rec, module, violation_value, constraint_tol,
                              obj, con) for rec in recs
                ]

                if log_scale:
                    ax['err'].plot(iters, map(np.log10, values))
                else:
                    ax['err'].plot(iters, values)
            else:
                if rec_type == "observations":
                    observations = [x['obj_o'] for x in recs]
                elif rec_type == "mixed":
                    observations = [x['obj_om'] for x in recs]
                else:
                    raise Exception("unknown rec type")

                for i in xrange(len(observations)):
                    if observations[i] is None or np.isnan(observations[i]):
                        observations[i] = violation_value
                # print observations
                # print len(observations)

                if log_scale:
                    ax['err'].plot(iters, np.log10(observations))
                else:
                    ax['err'].plot(iters, observations)

            if make_dist_plot:
                distances = [
                    params_norm(rec['params'], module.true_sol())
                    for rec in recs
                ]
                if log_scale:
                    ax['dist'].plot(iters, np.log10(distances))
                else:
                    ax['dist'].plot(iters, distances)
        else:
            # MULTIPLE REPEATS
            repeat_recs = [
                db.load(repeat_experiment_name(experiment_name, j),
                        db_document_name) for j in xrange(n_repeat)
            ]
            if None in repeat_recs:
                for i, repeat_rec in enumerate(repeat_recs):
                    if repeat_rec is None:
                        print 'Could not load experiment %s repeat %d' % (
                            experiment_name, i)
                print 'Exiting...'
                return

            if task_comp_x:
                # only include recommendations when you finish a particular task
                new_repeat_recs = list()
                for recs in repeat_recs:
                    recs = sorted(recs, key=lambda k: k['id'])  # sort by id
                    new_recs = list()
                    last_complete = 0
                    for rec in recs:
                        cur_complete = rec['num_complete_tasks'][task_comp_x]
                        if cur_complete == last_complete + 1:
                            last_complete = cur_complete
                            new_recs.append(rec)
                        elif cur_complete == last_complete:
                            pass
                        else:
                            print(
                                'WARNING: cur complete=%d, last_complete=%d' %
                                (cur_complete, last_complete))
                            break
                    new_repeat_recs.append(new_recs)

                repeat_recs = new_repeat_recs

            n_iter_each = map(len, repeat_recs)
            if plot_wall_time:
                """ do everything separately from here if plotting wall time
                here is what we do... we can't have a square array because 
                we don't want to take the minimum number of iterations... 
                we want to take ALL iterations for each repeat, and this number
                may be different for different repeats.
                so we store all times/values in a list of arrays
                then we chop things up into bins
                """
                if rec_type != "model":
                    values = list()
                    wall_times = list()
                    for j in xrange(
                            n_repeat):  # loop over repeated experiments

                        wall_times.append(
                            np.array([
                                repeat_recs[j][i]['total_elapsed_time'] / 60.0
                                for i in xrange(n_iter_each[j])
                            ]))

                        if rec_type == "observations":
                            values.append([
                                repeat_recs[j][i]['obj_o']
                                for i in xrange(n_iter_each[j])
                            ])
                        elif rec_type == "mixed":
                            values.append([
                                repeat_recs[j][i]['obj_om']
                                for i in xrange(n_iter_each[j])
                            ])
                        else:
                            raise Exception("unknown rec type")

                        for i in xrange(n_iter_each[j]):
                            if values[-1][i] is None or np.isnan(
                                    values[-1][i]):
                                values[-1][i] = violation_value

                        values[-1] = np.array(values[-1])

                    # print values

                else:  # if plot wall tiem but using model
                    values = list()
                    wall_times = list()
                    for j in xrange(
                            n_repeat):  # loop over repeated experiments

                        # for this repeat, get all wall times
                        wall_times.append(
                            np.array([
                                repeat_recs[j][i]['total_elapsed_time'] / 60.0
                                for i in xrange(n_iter_each[j])
                            ]))

                        values_j = np.zeros(n_iter_each[j])
                        for i in xrange(
                                n_iter_each[j]):  # loop over iterations
                            val = true_func(repeat_recs[j][i], module, None,
                                            constraint_tol, obj, con)
                            if val is None or np.isnan(
                                    val
                            ):  #set to violation value here so we can print out this info...
                                values_j[i] = violation_value
                                print 'Violation with params %s at repeat %d iter %d' % (
                                    paramify_no_types(
                                        repeat_recs[j][i]['params']), j, i)
                            else:
                                values_j[i] = val
                        values.append(values_j)

                # change the data structure to be time bins and include everything in
                # those time bins across repeats
                end_times = map(max, wall_times)
                for j in xrange(n_repeat):
                    print 'end time for repeat %d: %f' % (j, end_times[j])
                iters = np.arange(0.0, np.round(max(end_times)), bin_size)
                new_values = list()
                for i, timestep in enumerate(iters):
                    # print 'Creating wall time bin from %f to %f. (%d/%d)' % (i, i+bin_size, i, len(iters))
                    new_value = list()
                    for j in xrange(n_repeat):
                        new_value = np.append(
                            new_value, values[j][np.logical_and(
                                wall_times[j] >= timestep, wall_times[j] <
                                timestep + bin_size)].flatten())
                    # if a time bin is empty across all repeats:
                    if len(new_value) == 0:
                        if i == 0:
                            new_value = [violation_value]
                        else:
                            new_value = new_values[-1]
                    new_values.append(new_value)
                values = new_values

                # make the first value equal to the violation value (optional)
                iters = np.append(iters, max(iters) + bin_size)
                values.insert(0, np.array([violation_value]))

                # Average over the repeated experiments
                average_values = map(avg, values)
                errorbars = bootstrap_errorbars(values, log=log_scale, avg=avg)
                # plt.yscale('log', nonposy='clip')

                if log_scale:
                    ax['err'].errorbar(iters,
                                       np.log10(average_values),
                                       yerr=errorbars)
                else:
                    ax['err'].errorbar(iters, average_values, yerr=errorbars)

            else:
                # NOT WALL TIME

                n_iter = reduce(min, n_iter_each, np.inf)
                if n_iter_spec is None:
                    print 'Found %d repeats with at least %d iterations' % (
                        n_repeat, n_iter)
                    print {i: n_iter_each[i] for i in xrange(n_repeat)}
                elif n_iter < n_iter_spec:
                    print 'You specified %d iterations but there are only %d available... so plotting %d' % (
                        n_iter_spec, n_iter, n_iter)
                else:
                    n_iter = n_iter_spec
                    print 'Plotting %d iterations' % n_iter

                iters = range(n_iter)

                if rec_type != "model":
                    values = np.zeros((n_iter, n_repeat))
                    for j in xrange(
                            n_repeat):  # loop over repeated experiments
                        for i in iters[j]:  # loop over iterations
                            if rec_type == "observations":
                                values[i, j] = repeat_recs[j][i]['obj_o']
                            elif rec_type == "mixed":
                                values[i, j] = repeat_recs[j][i]['obj_om']
                            else:
                                raise Exception("unknown rec type")
                            if values[i, j] is None or np.isnan(values[i, j]):
                                values[i, j] = violation_value

                    print values

                else:
                    values = np.zeros((n_iter, n_repeat))
                    distances = np.zeros((n_iter, n_repeat))
                    for j in xrange(
                            n_repeat):  # loop over repeated experiments
                        for i in iters:  # loop over iterations
                            val = true_func(repeat_recs[j][i], module, None,
                                            constraint_tol, obj, con)
                            if val is None:  #set to violation value here so we can print out this info...
                                values[i, j] = violation_value
                                print 'Violation with params %s at repeat %d iter %d' % (
                                    paramify_no_types(
                                        repeat_recs[j][i]['params']), j, i)
                            else:
                                values[i, j] = val

                            if make_dist_plot:
                                distances[i, j] = params_norm(
                                    repeat_recs[j][i]['params'],
                                    module.true_sol())

                if plot_separate:
                    if log_scale:
                        ax['err'].plot(iters, np.log10(values))
                    else:
                        ax['err'].plot(iters, values)

                else:
                    # Average over the repeated experiments
                    average_values = map(avg, values)
                    errorbars = bootstrap_errorbars(values,
                                                    log=log_scale,
                                                    avg=avg)
                    # plt.yscale('log', nonposy='clip')

                    if stretch_x:
                        fctr = float(n_iter_spec) / float(n_iter)
                        iters = np.array(iters) * fctr
                        print 'Stretching x axis by a factor of %f' % fctr

                    if log_scale:
                        ax['err'].errorbar(iters,
                                           np.log10(average_values),
                                           yerr=errorbars)
                    else:
                        ax['err'].errorbar(iters,
                                           average_values,
                                           yerr=errorbars)

                    if make_dist_plot:
                        average_dist = map(avg, distances)
                        errorbars_dist = bootstrap_errorbars(distances,
                                                             log=log_scale,
                                                             avg=avg)
                        if log_scale:
                            ax['dist'].errorbar(iters,
                                                np.log10(average_dist),
                                                yerr=errorbars_dist)
                        else:
                            ax['dist'].errorbar(iters,
                                                average_dist,
                                                yerr=errorbars_dist)

        acq_names.append(options["tasks"].values()[0]["acquisition"])
        if acq_names[-1] == 'PES':
            acq_names[-1] = 'PESC'
        if acq_names[-1] == 'ExpectedImprovement':
            acq_names[-1] = 'EIC'

    if labels:
        ax['err'].legend(labels.split(';'), fontsize=16, loc='lower left')
        ax['dist'].legend(labels.split(';'), fontsize=20)
    elif len(acq_names) > 1:
        ax['err'].legend(acq_names, fontsize=20)
        ax['dist'].legend(acq_names, fontsize=20)

    # save it in the last directory... (if there are multiple directories)
    if not plot_wall_time:
        if n_repeat >= 0:
            print 'Made a plot with %d repeats and %d iterations' % (n_repeat,
                                                                     n_iter)
        else:
            print 'Made a plot with %d iterations' % (n_iter)
    else:
        if n_repeat >= 0:
            print 'Made a plot with %d repeats and %f minutes' % (n_repeat,
                                                                  max(iters))
        else:
            print 'Made a plot with %f minutes' % (max(iters))

    file_prefix = '%s_' % average if n_repeat > 0 else ''
    file_postfix = '_wall_time' if plot_wall_time else ''
    fig['err'].tight_layout()
    figname = os.path.join(plots_dir,
                           '%serror%s' % (file_prefix, file_postfix))
    fig['err'].savefig(figname + '.pdf')
    fig['err'].savefig(figname + '.svg')
    print 'Saved to %s' % figname
    if make_dist_plot:
        fig['dist'].tight_layout()
        figname_dist = os.path.join(
            plots_dir, '%sl2_distance%s.pdf' % (file_prefix, file_postfix))
        fig['dist'].savefig(figname_dist)
        print 'Saved to %s' % figname_dist
Example #11
0
def main():
    parser = optparse.OptionParser(usage="usage: %prog [options] directory")

    parser.add_option("--config",
                      dest="config_file",
                      help="Configuration file name.",
                      type="string",
                      default="config.json")

    (commandline_kwargs, args) = parser.parse_args()

    # Read in the config file
    expt_dir = os.path.realpath(args[0])
    if not os.path.isdir(expt_dir):
        raise Exception("Cannot find directory %s" % expt_dir)
    expt_file = os.path.join(expt_dir, commandline_kwargs.config_file)

    try:
        with open(expt_file, 'r') as f:
            options = json.load(f, object_pairs_hook=OrderedDict)
    except:
        raise Exception(
            "config.json did not load properly. Perhaps a spurious comma?")
    options["config"] = commandline_kwargs.config_file

    resources = parse_resources_from_config(options)

    # Set sensible defaults for options
    options['chooser'] = options.get('chooser', 'default_chooser')
    options['tasks'] = options.get(
        'tasks', {'main': {
            'type': 'OBJECTIVE',
            'likelihood': 'GAUSSIAN'
        }})
    experiment_name = options.get("experiment-name", 'unnamed-experiment')

    # Set DB address
    db_address = parse_db_address(options)
    if 'database' not in options:
        options['database'] = {'name': 'spearmint', 'address': db_address}
    else:
        options['database']['address'] = db_address

    if not os.path.exists(expt_dir):
        sys.stderr.write("Cannot find experiment directory '%s'. "
                         "Aborting.\n" % (expt_dir))
        sys.exit(-1)

    # Load up the chooser.
    chooser_module = importlib.import_module('spearmint.choosers.' +
                                             options['chooser'])
    chooser = chooser_module.init(options)

    # Connect to the database
    sys.stderr.write('Using database at %s.\n' % db_address)
    db_address = options['database']['address']
    db = MongoDB(database_address=db_address)

    while True:

        for resource_name, resource in resources.iteritems():

            jobs = load_jobs(db, experiment_name)
            # resource.printStatus(jobs)

            # If the resource is currently accepting more jobs
            # TODO: here cost will eventually also be considered: even if the
            #       resource is not full, we might wait because of cost incurred
            # Note: I chose to fill up one resource and them move on to the next
            # You could also do it the other way, by changing "while" to "if" here

            while resource.acceptingJobs(jobs):

                # Load jobs from DB
                # (move out of one or both loops?) would need to pass into load_tasks
                jobs = load_jobs(db, experiment_name)

                # Remove any broken jobs from pending.
                remove_broken_jobs(db, jobs, experiment_name, resources)

                # Get a suggestion for the next job
                suggested_job = get_suggestion(chooser, resource.tasks, db,
                                               expt_dir, options,
                                               resource_name)

                # Submit the job to the appropriate resource
                process_id = resource.attemptDispatch(experiment_name,
                                                      suggested_job,
                                                      db_address, expt_dir)

                # Set the status of the job appropriately (successfully submitted or not)
                if process_id is None:
                    suggested_job['status'] = 'broken'
                    save_job(suggested_job, db, experiment_name)
                else:
                    suggested_job['status'] = 'pending'
                    suggested_job['proc_id'] = process_id
                    save_job(suggested_job, db, experiment_name)

                jobs = load_jobs(db, experiment_name)

                # Print out the status of the resources
                # resource.printStatus(jobs)
                print_resources_status(resources.values(), jobs)

        # If no resources are accepting jobs, sleep
        # (they might be accepting if suggest takes a while and so some jobs already finished by the time this point is reached)
        if tired(db, experiment_name, resources):
            time.sleep(options.get('polling-time', 5))
Example #12
0
def main(expt_dir):

    os.chdir(expt_dir)
    sys.path.append(expt_dir)

    options = parse_config_file(expt_dir, 'config.json')
    experiment_name = options["experiment-name"]

    #	main_file = options['main_file']
    main_file = 'wrapper'
    if main_file[-3:] == '.py':
        main_file = main_file[:-3]
    module = __import__(main_file)

    input_space = InputSpace(options["variables"])
    chooser_module = importlib.import_module('spearmint.choosers.' +
                                             options['chooser'])
    chooser = chooser_module.init(input_space, options)
    db = MongoDB(database_address=options['database']['address'])
    jobs = load_jobs(db, experiment_name)
    hypers = db.load(experiment_name, 'hypers')
    objective = parse_tasks_from_jobs(jobs, experiment_name, options,
                                      input_space).values()[0]

    def create_fun(task):
        def fun(params, gradient=False):

            if len(params.shape) > 1 and params.shape[1] > 1:

                values = np.zeros(params.shape[0])
                params_orig = params

                for i in range(params_orig.shape[0]):
                    param = params[i, :]
                    param = param.flatten()
                    param = input_space.from_unit(np.array([param])).flatten()

                    values[i] = module.main(
                        0, paramify_no_types(input_space.paramify(param)))

            else:
                return module.main(
                    0, paramify_no_types(input_space.paramify(params)))

            return values

        return fun

    fun = create_fun(objective)

    # We iterate through each recommendation made

    i = 0
    more_recommendations = True
    while more_recommendations:

        recommendation = db.load(experiment_name, 'recommendations',
                                 {'id': i + 1})

        if recommendation == None:
            more_recommendations = False
        else:

            solution_om = input_space.vectorify(recommendation['params_om'])

            M = 1
            vsom_acum = 0.0
            for j in range(M):
                vsom_acum += fun(solution_om, gradient=False)['score']
            values_solution_om = -vsom_acum / float(M)

            with open('value_solution_om.txt', 'a') as f:
                print >> f, "%lf" % (values_solution_om)

            with open('params_om.txt', 'a') as f_handle:
                np.savetxt(f_handle,
                           np.array([solution_om]),
                           delimiter=' ',
                           newline='\n')

        i += 1
Example #13
0
def main():
    options, expt_dir = get_options()

    resources = parse_resources_from_config(options)
    # Load up the chooser.
    chooser_module = importlib.import_module('spearmint.choosers.' +
                                             options['chooser'])
    chooser = chooser_module.init(options)
    experiment_name = options.get("experiment-name", 'unnamed-experiment')

    # Connect to the database
    db_address = options['database']['address']
    sys.stderr.write('Using database at %s.\n' % db_address)
    db = MongoDB(database_address=db_address)

    # Setting up record for convergence
    past_best = []
    converg_num = 20
    startTraining = time.time()
    while stoppingCriterion(past_best, converg_num):

        for resource_name, resource in resources.iteritems():

            jobs = load_jobs(db, experiment_name)
            #print jobs[0]['values']['main']
            #resource.printStatus(jobs)

            # If the resource is currently accepting more jobs
            # TODO: here cost will eventually also be considered: even if the
            #       resource is not full, we might wait because of cost incurred
            # Note: I chose to fill up one resource and them move on to the next
            # You could also do it the other way, by changing "while" to "if" here

            while resource.acceptingJobs(jobs):

                # Load jobs from DB
                # (move out of one or both loops?) would need to pass into load_tasks
                jobs = load_jobs(db, experiment_name)

                # Remove any broken jobs from pending.
                remove_broken_jobs(db, jobs, experiment_name, resources)

                # Get a suggestion for the next job
                suggested_job = get_suggestion(chooser, resource.tasks, db,
                                               expt_dir, options,
                                               resource_name)

                # Submit the job to the appropriate resource
                process_id = resource.attemptDispatch(experiment_name,
                                                      suggested_job,
                                                      db_address, expt_dir)

                # Set the status of the job appropriately (successfully submitted or not)
                if process_id is None:
                    suggested_job['status'] = 'broken'
                    save_job(suggested_job, db, experiment_name)
                else:
                    suggested_job['status'] = 'pending'
                    suggested_job['proc_id'] = process_id
                    save_job(suggested_job, db, experiment_name)

                jobs = load_jobs(db, experiment_name)

                # Print out the status of the resources
                # resource.printStatus(jobs)
                print_resources_status(resources.values(), jobs)

                # Record current best
                best_val, best_input = chooser.get_best()
                past_best.append(best_val)
                past_best = [x for x in past_best
                             if x is not None]  #filter out Nones
                if len(past_best) > converg_num:
                    past_best.pop(0)

        # If no resources are accepting jobs, sleep
        # (they might be accepting if suggest takes a while and so some jobs already finished by the time this point is reached)
        if tired(db, experiment_name, resources):
            print "Sleeping..."
            time.sleep(options.get('polling-time', 5))

    endTraining = time.time()
    trainingTime = endTraining - startTraining

    # After training, test best results
    runBestParams(5000, chooser, db, experiment_name, trainingTime)
Example #14
0
def launch(db_address, experiment_name, job_id):
    """
    Launches a job from on a given id.
    """

    db = MongoDB(database_address=db_address)
    job = db.load(experiment_name, 'jobs', {'id': job_id})

    start_time = time.time()
    job['start time'] = start_time
    db.save(job, experiment_name, 'jobs', {'id': job_id})

    sys.stderr.write("Job launching after %0.2f seconds in submission.\n"
                     % (start_time - job['submit time']))

    success = False

    try:
        if job['language'].lower() == 'matlab':
            result = matlab_launcher(job)

        elif job['language'].lower() == 'python':
            result = python_launcher(job)

        elif job['language'].lower() == 'shell':
            result = shell_launcher(job)

        elif job['language'].lower() == 'mcr':
            result = mcr_launcher(job)

        else:
            raise Exception("That language has not been implemented.")

        if not isinstance(result, dict):
            # Returning just NaN means NaN on all tasks
            if np.isnan(result):
                # Apparently this dict generator throws an error for some people??
                # result = {task_name: np.nan for task_name in job['tasks']}
                # So we use the much uglier version below... ????
                result = dict(zip(job['tasks'], [np.nan] * len(job['tasks'])))
            elif len(job['tasks']) == 1:  # Only one named job
                result = {job['tasks'][0]: result}
            else:
                result = {'main': result}

        if set(result.keys()) != set(job['tasks']):
            raise Exception("Result task names %s did not match job task names %s." % (result.keys(), job['tasks']))

        success = True
    except:
        import traceback
        traceback.print_exc()
        sys.stderr.write("Problem executing the function\n")
        print(sys.exc_info())

    end_time = time.time()

    if success:
        sys.stderr.write("Completed successfully in %0.2f seconds. [%s]\n"
                         % (end_time - start_time, result))

        job['values'] = result
        job['status'] = 'complete'
        job['end time'] = end_time

    else:
        sys.stderr.write("Job failed in %0.2f seconds.\n" % (end_time - start_time))

        # Update metadata.
        job['status'] = 'broken'
        job['end time'] = end_time

    db.save(job, experiment_name, 'jobs', {'id': job_id})
Example #15
0
def main():
    """
    Usage: python make_plots.py PATH_TO_DIRECTORY

    TODO: Some aspects of this function are specific to the simple branin example
    We should clean this up so that interpretation of plots are more clear and
    so that it works in more general cases 
    (e.g. if objective likelihood is binomial then values should not be
    unstandardized)
    """
    options, expt_dir = get_options()
    print("options:")
    print_dict(options)

    # reduce the grid size
    options["grid_size"] = 400

    resources = parse_resources_from_config(options)

    # Load up the chooser.
    chooser_module = importlib.import_module('spearmint.choosers.' +
                                             options['chooser'])
    chooser = chooser_module.init(options)
    print("chooser", chooser)
    experiment_name = options.get("experiment-name", 'unnamed-experiment')

    # Connect to the database
    db_address = options['database']['address']
    sys.stderr.write('Using database at %s.\n' % db_address)
    db = MongoDB(database_address=db_address)

    # testing below here
    jobs = load_jobs(db, experiment_name)
    remove_broken_jobs(db, jobs, experiment_name, resources)

    print("resources:", resources)
    print_dict(resources)
    resource = resources.itervalues().next()

    task_options = {task: options["tasks"][task] for task in resource.tasks}
    print("task_options:")
    print_dict(task_options
               )  # {'main': {'likelihood': u'NOISELESS', 'type': 'OBJECTIVE'}}

    task_group = load_task_group(db, options, resource.tasks)
    print("task_group", task_group)  # TaskGroup
    print("tasks:")
    print_dict(task_group.tasks
               )  # {'main': <spearmint.tasks.task.Task object at 0x10bf63290>}

    hypers = load_hypers(db, experiment_name)
    print("loaded hypers", hypers)  # from GP.to_dict()

    hypers = chooser.fit(task_group, hypers, task_options)
    print("\nfitted hypers:")
    print_dict(hypers)

    lp, x = chooser.best()
    x = x.flatten()
    print("best", lp, x)
    bestp = task_group.paramify(task_group.from_unit(x))
    print("expected best position", bestp)

    # get the grid of points
    grid = chooser.grid
    #     print("chooser objectives:", )
    #     print_dict(chooser.objective)
    print("chooser models:", chooser.models)
    print_dict(chooser.models)
    obj_model = chooser.models[chooser.objective['name']]
    obj_mean, obj_var = obj_model.function_over_hypers(obj_model.predict, grid)

    # un-normalize the function values and variances
    obj_task = task_group.tasks['main']
    obj_mean = [
        obj_task.unstandardize_mean(obj_task.unstandardize_variance(v))
        for v in obj_mean
    ]
    obj_std = [obj_task.unstandardize_variance(np.sqrt(v)) for v in obj_var]

    #     for xy, m, v in izip(grid, obj_mean, obj_var):
    #         print(xy, m, v)

    grid = map(task_group.from_unit, grid)
    #     return

    xymv = [(xy[0], xy[1], m, v) for xy, m, v in izip(grid, obj_mean, obj_std)
            ]  # if .2 < xy[0] < .25]

    x = map(lambda x: x[0], xymv)
    y = map(lambda x: x[1], xymv)
    m = map(lambda x: x[2], xymv)
    sig = map(lambda x: x[3], xymv)
    #     print(y)

    fig = plt.figure(dpi=100)
    ax = fig.add_subplot(111, projection='3d')

    ax.plot(x, y, m, marker='.', linestyle="None")

    # plot errorbars
    for i in np.arange(0, len(x)):
        ax.plot([x[i], x[i]], [y[i], y[i]], [m[i] + sig[i], m[i] - sig[i]],
                marker="_",
                color='k')

    # get the observed points
    task = task_group.tasks['main']
    idata = task.valid_normalized_data_dict
    xy = idata["inputs"]
    xy = map(task_group.from_unit, xy)
    xy = np.array(xy)
    vals = idata["values"]
    vals = [
        obj_task.unstandardize_mean(obj_task.unstandardize_variance(v))
        for v in vals
    ]

    ax.plot(xy[:, 0], xy[:, 1], vals, marker='o', color="r", linestyle="None")

    plt.show()
Example #16
0
def main(expt_dir):

    os.chdir(expt_dir)
    sys.path.append(expt_dir)

    options = parse_config_file(expt_dir, 'config.json')
    experiment_name = options["experiment-name"]

    #	main_file = options['main_file']
    main_file = 'OSY_no_noisy'
    if main_file[-3:] == '.py':
        main_file = main_file[:-3]
    module = __import__(main_file)

    input_space = InputSpace(options["variables"])
    chooser_module = importlib.import_module('spearmint.choosers.' +
                                             options['chooser'])
    chooser = chooser_module.init(input_space, options)
    db = MongoDB(database_address=options['database']['address'])
    jobs = load_jobs(db, experiment_name)
    hypers = db.load(experiment_name, 'hypers')
    tasks = parse_tasks_from_jobs(jobs, experiment_name, options, input_space)

    if len(tasks) < 2:
        print 'Not a multi-objective problem!'
        return -1

    if options['language'] != "PYTHON":
        print 'Only python programs supported!'
        return -1

    objectives = dict()
    contraints = dict()

    for task in tasks:
        if tasks[task].type == 'objective':
            objectives[task] = tasks[task]
        else:
            contraints[task] = tasks[task]

    assert len(objectives) >= 2 and len(contraints) >= 1

    def create_fun(task):
        def fun(params, gradient=False):

            if len(params.shape) > 1 and params.shape[1] > 1:

                values = np.zeros(params.shape[0])
                params_orig = params

                for i in range(params_orig.shape[0]):
                    param = params[i, :]
                    param = param.flatten()
                    param = input_space.from_unit(np.array([param])).flatten()

                    values[i] = module.main(
                        0,
                        paramify_no_types(input_space.paramify(param)))[task]

            else:
                return module.main(
                    0, paramify_no_types(input_space.paramify(params)))[task]

            return values

        return fun

    funs_o = [create_fun(task) for task in objectives]
    funs_c = [create_fun(task) for task in contraints]

    moop = MOOP_basis_functions(funs_o,
                                input_space.num_dims,
                                constraints=funs_c)

    grid = sobol_grid.generate(input_space.num_dims,
                               grid_size=1000 * input_space.num_dims)

    # We only retain the feasible points

    moop.solve_using_grid(grid)

    reference = np.ones(len(objectives)) * 1e3

    hyper_volume_solution = moop.get_hypervolume(reference.tolist())

    result = moop.compute_pareto_front_and_set()

    front = result['frontier']
    pareto_set = result['pareto_set']

    with open('hypervolume_solution.txt', 'a') as f:
        print >> f, "%lf" % (hyper_volume_solution)

    # We iterate through each recommendation made

    i = 0
    more_recommendations = True
    while more_recommendations:

        recommendation = db.load(experiment_name, 'recommendations',
                                 {'id': i + 1})

        if recommendation == None:
            more_recommendations = False
        else:

            solution = input_space.to_unit(
                input_space.vectorify(recommendation['params']))

            if len(solution.shape) == 1:
                solution = solution.reshape((1, len(solution)))

            # We compute the objective values associated to this recommendation

            values_solution = np.zeros((solution.shape[0], len(objectives)))

            for j in range(values_solution.shape[0]):
                for k in range(values_solution.shape[1]):
                    values_solution[j, k] = funs_o[k](solution[j:(j + 1), :])

            moop = MOOP_basis_functions(funs_o, input_space.num_dims)

            moop.set_population(solution)

            hyper_volume = moop.get_hypervolume(reference.tolist())

            # We make sure that there are no infeasible points recommended
            # If there are infeasible recommendations we return 0 as the hypervolume

            all_feasible = True

            for k in range(len(funs_c)):
                all_feasible = all_feasible and not np.any(
                    funs_c[k](solution) < 0)

            if not all_feasible:
                hyper_volume = 0.0

            with open('hypervolumes.txt', 'a') as f:
                print >> f, "%lf" % (hyper_volume)

            with open('evaluations.txt', 'a') as f_handle:
                np.savetxt(
                    f_handle,
                    np.array([recommendation['num_complete_tasks'].values()]),
                    delimiter=' ',
                    newline='\n')

        i += 1
def main(expt_dir):

	os.chdir(expt_dir)
	sys.path.append(expt_dir)

	options         = parse_config_file(expt_dir, 'config.json')
	experiment_name = options["experiment-name"]
	options['main_file'] = 'prog_no_noisy'

	main_file = options['main_file']
	if main_file[-3:] == '.py':
		main_file = main_file[:-3]
	module  = __import__(main_file)

	input_space     = InputSpace(options["variables"])
	chooser_module  = importlib.import_module('spearmint.choosers.' + options['chooser'])
	chooser         = chooser_module.init(input_space, options)
	db              = MongoDB(database_address=options['database']['address'])
	jobs            = load_jobs(db, experiment_name)
	hypers          = db.load(experiment_name, 'hypers')
	tasks           = parse_tasks_from_jobs(jobs, experiment_name, options, input_space)

	if len(tasks) < 2:
		print 'Not a multi-objective problem!'
		return -1

	if options['language'] != "PYTHON":
		print 'Only python programs supported!'
		return -1

	for task in tasks:
		if tasks[ task ].type != 'objective':
			print 'Not a multi-objective problem!'
			return -1

	def create_fun(task):
		def fun(params, gradient = False):

			if len(params.shape) > 1 and params.shape[ 1 ] > 1:
				params = params.flatten()

			params = input_space.from_unit(np.array([ params ])).flatten()

			return module.main(0, paramify_no_types(input_space.paramify(params)))[ task ]

		return fun

	funs = [ create_fun(task) for task in tasks ]

	moop = MOOP_basis_functions(funs, input_space.num_dims)

#	moop.evolve(1, 8)

	grid = sobol_grid.generate(input_space.num_dims, grid_size = 1000 * input_space.num_dims)

	moop.solve_using_grid(grid)

#	reference = find_reference_point_using_direct(tasks, module, input_space)

#	reference = reference + np.abs(reference) * 0.1

	reference = np.ones(len(tasks)) * 7

	hyper_volume_solution = moop.get_hypervolume(reference.tolist())

	result = moop.compute_pareto_front_and_set()
	front = result['frontier']
	pareto_set = result['pareto_set']

#	os.remove('hypervolume_solution.txt')

	with open('hypervolume_solution.txt', 'a') as f:
		print >> f, "%lf" % (hyper_volume_solution)

#	os.remove('hypervolumes.txt')

	# We iterate through each recommendation made

	i = 0
	more_recommendations = True
	while more_recommendations:

                recommendation = db.load(experiment_name, 'recommendations', {'id' : i + 1})

		if recommendation == None:
			more_recommendations = False
		else:

			solution = input_space.to_unit(input_space.vectorify(recommendation[ 'params' ]))

			if len(solution.shape) == 1:
				solution = solution.reshape((1, len(solution)))
			
			# We compute the objective values associated to this recommendation
	
			values_solution = np.zeros((solution.shape[ 0 ], len(tasks)))
		
			for j in range(values_solution.shape[ 0 ]):
				for k in range(values_solution.shape[ 1 ]):
					values_solution[ j, k ] = funs[ k ](solution[ j : (j + 1), : ])

			moop = MOOP_basis_functions(funs, input_space.num_dims)

			moop.set_population(solution)

			hyper_volume = moop.get_hypervolume(reference.tolist())

			with open('hypervolumes.txt', 'a') as f:
				print >> f, "%lf" % (hyper_volume)

			with open('mean_min_distance_to_frontier.txt', 'a') as f: 
				print >> f, "%lf" % (average_min_distance(values_solution, front))

			with open('mean_min_distance_from_frontier.txt', 'a') as f: 
				print >> f, "%lf" % (average_min_distance(front, values_solution))

			with open('mean_min_distance_to_pareto_set.txt', 'a') as f: 
				print >> f, "%lf" % (average_min_distance(input_space.from_unit(solution), \
				input_space.from_unit(pareto_set)))

			with open('mean_min_distance_from_pareto_set.txt', 'a') as f: 
				print >> f, "%lf" % (average_min_distance(input_space.from_unit(pareto_set), \
				input_space.from_unit(solution)))

			with open('evaluations.txt','a') as f_handle: 
				np.savetxt(f_handle, np.array([recommendation['num_complete_tasks'].values()]), delimiter = ' ', newline = '\n')

		i += 1
Example #18
0
class Spearmint(Printer):
    def __init__(self, config_file, work_dir):
        Printer.__init__(self, 'Spearmint', color='grey')
        self.work_dir = work_dir
        print(config_file)
        self._parse_config_file(config_file)
        try:
            self.batch_size = self.param_dict['resources']['my-machine'][
                'max-concurrent']
#			self.num_batches = self.param_dict['general']['batches_per_round']
        except KeyError:
            #			self.num_batches = 1
            self.batch_size = 1
        self.all_params, self.all_losses = [], []

    def rand_gens(self, var_type='float', size=1):
        if var_type == 'float':
            return np.random.uniform(low=0, high=1, size=size)
        else:
            raise NotImplementedError

    def _parse_config_file(self, config_file):
        self.json_parser = ParserJSON(file_name=config_file)
        self.json_parser.parse()
        self.param_dict = self.json_parser.param_dict

        # now get the total number of variables
        # and create a dictionary with the size of each variable
        self.total_size = 0
        self.var_sizes = []
        self.var_names = []
        for var_name, var_dict in self.param_dict['variables'].items():
            self.total_size += var_dict['size']
            self.var_sizes.append(int(var_dict['size']))
            self.var_names.append(var_name)

#			self.total_size += var_dict[list(var_dict)[0]]['size']
#			self.var_sizes.append(int(var_dict[list(var_dict)[0]]['size']))
#			self.var_names.append(list(var_dict)[0])
#

    def _generate_uniform(self, num_samples=10):
        self.container, self.sampled_params = {}, {}
        values = []
        for var_index, var_name in enumerate(self.var_names):
            sampled_values = self.rand_gens(
                var_type=self.param_dict['variables'][var_name]['type'],
                size=(self.param_dict['variables'][var_name]['size'],
                      num_samples))
            values.extend(sampled_values)
            self.container[var_name] = sampled_values
        values = np.array(values)
        self.proposed = values.transpose()

    def _parse_observations(self, observations):
        all_params, all_losses = [], []
        for observation in observations:
            params = []
            for var_name in self.var_names:
                params.extend(observation[var_name]['samples'])
            if len(self.all_params) > 0:
                if np.amin([
                        np.linalg.norm(params - old_param)
                        for old_param in self.all_params
                ]) > 1e-6:
                    all_losses.append(observation['loss'])
                    all_params.append(params)
            else:
                all_losses.append(observation['loss'])
                all_params.append(params)
        for index, element in enumerate(all_params):
            self.all_params.append(element)
            self.all_losses.append(all_losses[index])
        return all_params, all_losses

    def _create_mongo_instance(self):
        self.db_path = '%s/db_%s/' % (self.work_dir,
                                      self.param_dict['experiment-name'])
        print(self.db_path)
        try:
            shutil.rmtree(self.db_path)
        except:
            pass
        os.mkdir(self.db_path)
        subprocess.call('mongod --fork --logpath %s/mongodb.log --dbpath %s' %
                        (self.db_path, self.db_path),
                        shell=True)

    def _create_spearmint_parameters(self):
        self._create_mongo_instance()
        self.options, self.exp_dir = get_options(self.work_dir)
        self.resources = parse_resources_from_config(self.options)
        self.chooser_module = importlib.import_module('spearmint.choosers.' +
                                                      self.options['chooser'])
        self.chooser = self.chooser_module.init(self.options)
        self.experiment_name = self.options.get('experiment-name',
                                                'unnamed_experiment')

        self.db_address = self.options['database']['address']
        self.db = MongoDB(database_address=self.db_address)

    def _sample_parameter_sets(self, num_samples, observations):
        all_params, all_losses = self._parse_observations(observations)
        self._create_spearmint_parameters()

        # dump all observations in database
        for index, param in enumerate(all_params):
            print('PARAM', param, all_losses[index])
            params = {}
            start_index = 0
            for var_index, var_name in enumerate(self.var_names):
                var_dict = self.param_dict['variables'][var_name]
                params[var_name] = {
                    'type':
                    var_dict['type'],
                    'values':
                    np.array(param[start_index:start_index + var_dict['size']])
                }
                start_index += var_dict['size']
            job = {
                'id': index + 1,
                'expt_dir': self.work_dir,
                'tasks': ['main'],
                'resource': 'my-machine',
                'main-file': 'main_file.py',
                'language': 'PYTHON',
                'status': 'new',
                'submit time': time.time(),
                'start time': time.time(),
                'end time': None,
                'params': params
            }
            time.sleep(0.1)
            job['values'] = {'main': all_losses[index]}
            job['status'] = 'complete'
            job['end time'] = time.time()

            #			for key, value in job.items():
            #				print(key, value)

            self.db.save(job, self.experiment_name, 'jobs', {'id': job['id']})

        self.proposed = []
        for resource_name, resource in self.resources.items():
            print('RUNNING SPEARMINT')
            suggested_job = get_suggestion(self.chooser, resource.tasks,
                                           self.db, self.exp_dir, self.options,
                                           resource_name)
            print('DONE')
            vector = []
            for var_name in self.var_names:
                vector.extend(suggested_job['params'][var_name]['values'])
            vector = np.array(vector)
            for index in range(num_samples):
                self.proposed.append(vector)

        print('PROPOSED', self.proposed)
        subprocess.call(
            'mongod --shutdown --logpath %s/mongodb.log --dbpath %s' %
            (self.db_path, self.db_path),
            shell=True)

    def choose(self, num_samples=None, observations=None):
        current_dir = os.getcwd()
        os.chdir(self.work_dir)

        if not num_samples:
            num_samples = self.batch_size

        if observations:
            self._print('proposing samples')
            self._sample_parameter_sets(num_samples, observations)
        else:
            self._print('choosing uniformly')
            self._generate_uniform(1)

        os.chdir(current_dir)

        #		print('SHAPE', self.proposed.shape)
        return self.proposed
Example #19
0
def launch(db_address, experiment_name, job_id):
    """
    Launches a job from on a given id.
    """

    db  = MongoDB(database_address=db_address)
    job = db.load(experiment_name, 'jobs', {'id' : job_id})

    start_time        = time.time()
    job['start time'] = start_time
    db.save(job, experiment_name, 'jobs', {'id' : job_id})

    sys.stderr.write("Job launching after %0.2f seconds in submission.\n" 
                     % (start_time-job['submit time']))

    success = False

    try:
        if job['language'].lower() == 'matlab':
            result = matlab_launcher(job)

        elif job['language'].lower() == 'python':
            result = python_launcher(job)

        elif job['language'].lower() == 'shell':
            result = shell_launcher(job)

        elif job['language'].lower() == 'mcr':
            result = mcr_launcher(job)

        else:
            raise Exception("That language has not been implemented.")

        if not isinstance(result, dict):
            # Returning just NaN means NaN on all tasks
            if np.isnan(result):
                # Apparently this dict generator throws an error for some people??
                # result = {task_name: np.nan for task_name in job['tasks']}
                # So we use the much uglier version below... ????
                result = dict(list(zip(job['tasks'], [np.nan]*len(job['tasks']))))
            elif len(job['tasks']) == 1: # Only one named job
                result = {job['tasks'][0] : result}
            else:
                result = {'main' : result}
        
        if set(result.keys()) != set(job['tasks']):
            raise Exception("Result task names %s did not match job task names %s." % (list(result.keys()), job['tasks']))

        success = True
    except:
        import traceback
        traceback.print_exc()
        sys.stderr.write("Problem executing the function\n")
        print(sys.exc_info())
        
    end_time = time.time()

    if success:
        sys.stderr.write("Completed successfully in %0.2f seconds. [%s]\n" 
                         % (end_time-start_time, result))
        
        job['values']   = result
        job['status']   = 'complete'
        job['end time'] = end_time

    else:
        sys.stderr.write("Job failed in %0.2f seconds.\n" % (end_time-start_time))
    
        # Update metadata.
        job['status']   = 'broken'
        job['end time'] = end_time

    db.save(job, experiment_name, 'jobs', {'id' : job_id})
Example #20
0
def main(expt_dir, repeat=None):

    options = parse_config_file(expt_dir, 'config.json')
    experiment_name = options["experiment-name"]

    if repeat is not None:
        experiment_name = repeat_experiment_name(experiment_name,repeat)

    input_space = InputSpace(options["variables"])

    chooser_module = importlib.import_module('spearmint.choosers.' + options['chooser'])
    chooser = chooser_module.init(input_space, options)

    db = MongoDB(database_address=options['database']['address'])

    jobs = load_jobs(db, experiment_name)
    hypers = db.load(experiment_name, 'hypers')

    if input_space.num_dims != 2:
        raise Exception("This plotting script is only for 2D optimizations. This problem has %d dimensions." % input_space.num_dims)

    tasks = parse_tasks_from_jobs(jobs, experiment_name, options, input_space)

    hypers = chooser.fit(tasks, hypers)

    print '\nHypers:'
    print_hypers(hypers)

    recommendation = chooser.best()
    current_best_value = recommendation['model_model_value']
    current_best_location = recommendation['model_model_input']

    plots_dir = os.path.join(expt_dir, 'plots')
    if not os.path.isdir(plots_dir):
        os.mkdir(plots_dir)
    if len(chooser.models) > 1:
        for task_name in chooser.models:
            plots_subdir = os.path.join(plots_dir, task_name)
            if not os.path.isdir(plots_subdir):
                os.mkdir(plots_subdir)

    print 'Plotting...'

    # Plot objective model
    # plot_2d_mean_and_var(chooser.objective_model, plots_dir, 
    #     chooser.objective.name, 
    #     input_space, current_best_location)
    
    # plot_hypers(chooser.objective_model, plots_dir, 'objective_function')
    for task_name, model in chooser.models.iteritems():

        plots_subdir = os.path.join(plots_dir, task_name) if len(chooser.models) > 1 else plots_dir

        plot_hypers(model, plots_subdir, task_name)

        plot_2d_mean_and_var(model, plots_subdir, task_name, input_space, current_best_location)


    if chooser.numConstraints() > 0:
        plot_2d_constraints(chooser, plots_dir, input_space, current_best_location)

    plot_acquisition_function(chooser, plots_dir, input_space, current_best_location, current_best_value)

    print 'Done plotting.'
Example #21
0
def main():
    options, expt_dir = get_options()

    resources = parse_resources_from_config(options)

    # Load up the chooser.
    chooser_module = importlib.import_module('spearmint.choosers.' + options['chooser'])
    chooser = chooser_module.init(options)
    experiment_name     = options.get("experiment-name", 'unnamed-experiment')

    # Connect to the database
    db_address = options['database']['address']
    sys.stderr.write('Using database at %s.\n' % db_address)        
    db         = MongoDB(database_address=db_address)
    
    while True:

        for resource_name, resource in resources.iteritems():

            jobs = load_jobs(db, experiment_name)
            # resource.printStatus(jobs)

            # If the resource is currently accepting more jobs
            # TODO: here cost will eventually also be considered: even if the 
            #       resource is not full, we might wait because of cost incurred
            # Note: I chose to fill up one resource and them move on to the next
            # You could also do it the other way, by changing "while" to "if" here

            while resource.acceptingJobs(jobs):

                # Load jobs from DB 
                # (move out of one or both loops?) would need to pass into load_tasks
                jobs = load_jobs(db, experiment_name)
                
                # Remove any broken jobs from pending.
                remove_broken_jobs(db, jobs, experiment_name, resources)

                # Get a suggestion for the next job
                suggested_job = get_suggestion(chooser, resource.tasks, db, expt_dir, options, resource_name)

                jobs_since_min = db.numJobsSinceMin(experiment_name)
                sys.stderr.write("current min %d. jobs since min %d \n" % (db.minBranin(experiment_name), jobs_since_min))

                if(jobs_since_min > 20):
                    sys.stderr.write("more than 20 jobs since min encountered. exiting.\n")
                    return
                elif suggested_job['id'] >= 1000:
                    sys.stderr.write("did 1000 iterations. dying now\n")
                    return

                # Submit the job to the appropriate resource
                process_id = resource.attemptDispatch(experiment_name, suggested_job, db_address, expt_dir)

                # Set the status of the job appropriately (successfully submitted or not)
                if process_id is None:
                    suggested_job['status'] = 'broken'
                    save_job(suggested_job, db, experiment_name)
                else:
                    suggested_job['status'] = 'pending'
                    suggested_job['proc_id'] = process_id
                    save_job(suggested_job, db, experiment_name)

                jobs = load_jobs(db, experiment_name)

                # Print out the status of the resources
                # resource.printStatus(jobs)
                print_resources_status(resources.values(), jobs)

        # If no resources are accepting jobs, sleep
        # (they might be accepting if suggest takes a while and so some jobs already finished by the time this point is reached)
        if tired(db, experiment_name, resources):
            time.sleep(options.get('polling-time', 5))
Example #22
0
import pandas as pd
import numpy as np
import os

import config as configmod
from config import config
# get data from db

from spearmint.utils.database.mongodb import MongoDB
mdb = MongoDB(config['rnndb'])  #samd db as rnns


def get_runs(xpnm):
    # print mdb
    jobs = mdb.load(xpnm, 'jobs', {'status': 'complete'})
    try:
        jobs[0]
    except KeyError:
        jobs = [jobs]
    finally:
        params = jobs[0]['params'].keys()

    data = []
    for ajb in jobs:
        arow = []
        for ap in params:
            dd = ajb['params'][ap]['values'][0]
            dt = ajb['params'][ap]['type'][0]
            arow.append(np.array(dd, dtype=dt))
        arow.append(ajb['values']['main'])
        arow.append((ajb['id']))
Example #23
0
def main(args=None):
    options, expt_dir = get_options(args)

    resources = parse_resources_from_config(options)

    # Load up the chooser.
    chooser_module = importlib.import_module('spearmint.choosers.' +
                                             options['chooser'])
    chooser = chooser_module.init(options)
    experiment_name = options.get("experiment-name", 'unnamed-experiment')
    resets = options.get("resets", [])
    job_id_offset = 0
    current_phase = 0
    if resets:
        experiment_name += '__' + str(current_phase)
        print 'STARTING PHASE ' + str(current_phase +
                                      1) + ' (' + experiment_name + ')'

    # Connect to the database
    db_address = options['database']['address']
    sys.stderr.write('Using database at %s.\n' % db_address)
    db = MongoDB(database_address=db_address)

    while True:
        pause = False
        if resets:
            jobs = load_jobs(db, experiment_name)
            num_pending_jobs = sum(
                [job['status'] == 'pending' for job in jobs])
            num_finished_jobs = sum(
                [job['status'] == 'complete' for job in jobs])
            if num_finished_jobs == resets[
                    current_phase] and num_pending_jobs == 0:
                job_id_offset += resets[current_phase]

                current_phase += 1
                new_experiment_name = options.get(
                    "experiment-name",
                    'unnamed-experiment') + '__' + str(current_phase)
                print 'STARTING PHASE ' + str(
                    current_phase + 1) + ' (' + new_experiment_name + ')'

                old_hypers = load_hypers(db, experiment_name)
                save_hypers(old_hypers, db, new_experiment_name)
                experiment_name = new_experiment_name
            if num_finished_jobs + num_pending_jobs >= resets[current_phase]:
                pause = True

        for resource_name, resource in resources.iteritems():

            jobs = load_jobs(db, experiment_name)
            # resource.printStatus(jobs)

            # If the resource is currently accepting more jobs
            # TODO: here cost will eventually also be considered: even if the
            #       resource is not full, we might wait because of cost incurred
            # Note: I chose to fill up one resource and them move on to the next
            # You could also do it the other way, by changing "while" to "if" here

            while resource.acceptingJobs(jobs) and not pause:
                # Load jobs from DB
                # (move out of one or both loops?) would need to pass into load_tasks
                jobs = load_jobs(db, experiment_name)
                print 'Found', len(jobs), 'jobs in db'

                # Remove any broken jobs from pending.
                remove_broken_jobs(db, jobs, experiment_name, resources)

                # Get a suggestion for the next job
                suggested_job = get_suggestion(chooser, resource.tasks, db,
                                               experiment_name, expt_dir,
                                               options, resource_name,
                                               job_id_offset)

                # Submit the job to the appropriate resource
                process_id = resource.attemptDispatch(experiment_name,
                                                      suggested_job,
                                                      db_address, expt_dir)

                # Set the status of the job appropriately (successfully submitted or not)
                if process_id is None:
                    suggested_job['status'] = 'broken'
                    save_job(suggested_job, db, experiment_name)
                else:
                    suggested_job['status'] = 'pending'
                    suggested_job['proc_id'] = process_id
                    save_job(suggested_job, db, experiment_name)

                jobs = load_jobs(db, experiment_name)

                # Print out the status of the resources
                # resource.printStatus(jobs)
                print_resources_status(resources.values(), jobs)

        # If no resources are accepting jobs, sleep
        # (they might be accepting if suggest takes a while and so some jobs already finished by the time this point is reached)
        if tired(db, experiment_name, resources) or pause:
            time.sleep(options.get('polling-time', 5))
Example #24
0
def main(expt_dir, config_file="config.json", no_output=False, repeat=-1):
    if not os.path.isdir(expt_dir):
        raise Exception("Cannot find directory %s" % expt_dir)

    options = parse_config_file(expt_dir, config_file)
    experiment_name = options["experiment_name"]

    # Special advanced feature for repeating the same experiment many times
    if repeat >= 0:
        experiment_name = repeat_experiment_name(experiment_name, repeat)

    if not no_output:  # if we want output
        if repeat >= 0:
            output_directory = repeat_output_dir(expt_dir, repeat)
        else:
            output_directory = os.path.join(expt_dir, 'output')
        if not os.path.isdir(output_directory):
            os.mkdir(output_directory)

        if repeat < 0:
            rootLogger = logging.getLogger()
            fileHandler = logging.FileHandler(
                os.path.join(output_directory, 'main.log'))
            fileHandler.setFormatter(logFormatter)
            fileHandler.setLevel(logLevel)
            rootLogger.addHandler(fileHandler)
        # consoleHandler = logging.StreamHandler()
        # consoleHandler.setFormatter(logFormatter)
        # consoleHandler.setLevel(logLevel)
        # rootLogger.addHandler(consoleHandler)
    else:
        output_directory = None

    input_space = InputSpace(options["variables"])

    resources = parse_resources_from_config(options)

    # Load up the chooser.
    chooser_module = importlib.import_module('spearmint.choosers.' +
                                             options['chooser'])
    chooser = chooser_module.init(input_space, options)

    # Connect to the database
    db_address = options['database']['address']
    db = MongoDB(database_address=db_address)

    overall_start_time = time.time()
    db.save({'start-time': overall_start_time}, experiment_name, 'start-time')

    waiting_for_results = False  # for printing purposes only
    while True:

        for resource_name, resource in resources.iteritems():

            jobs = load_jobs(db, experiment_name)
            # resource.printStatus(jobs)

            # If the resource is currently accepting more jobs
            # TODO: here cost will eventually also be considered: even if the
            #       resource is not full, we might wait because of cost incurred
            # Note: I could chose to fill up one resource and them move on to the next ("if")
            # You could also do it the other way, by changing "if" to "while" here

            # Remove any broken jobs from pending
            # note: make sure to do this before the acceptingJobs() condition is checked
            remove_broken_jobs(db, jobs, experiment_name, resources)

            if resource.acceptingJobs(jobs):

                if waiting_for_results:
                    logging.info('\n')
                waiting_for_results = False

                # Load jobs from DB
                # (move out of one or both loops?) would need to pass into load_tasks
                jobs = load_jobs(db, experiment_name)

                # Print out a list of broken jobs
                print_broken_jobs(jobs)

                # Get a suggestion for the next job
                tasks = parse_tasks_from_jobs(jobs, experiment_name, options,
                                              input_space)

                # Special case when coupled and there is a NaN task-- what to do with NaN task when decoupled??
                if 'NaN' in tasks and 'NaN' not in resource.tasks:
                    resource.tasks.append('NaN')

                # Load the model hypers from the database.
                hypers = db.load(experiment_name, 'hypers')

                # "Fit" the chooser - give the chooser data and let it fit the model(s).
                # NOTE: even if we are only suggesting for 1 task, we need to fit all of them
                # because the acquisition function for one task depends on all the tasks
                hypers = chooser.fit(tasks, hypers)

                if hypers:
                    logging.debug('GP covariance hyperparameters:')
                print_hypers(hypers, input_space, options)
                # if 'duration hypers' in hypers:
                # logging.debug('Duration GP covariance hyperparameters:')
                # print_hypers(hypers['duration hypers'], input_space, options)

                # Save the hyperparameters to the database.
                if hypers:
                    db.save(hypers, experiment_name, 'hypers')

                if options['recommendations'] == "during":
                    # Compute the best value so far, a.k.a. the "recommendation"
                    recommendation = chooser.best()

                    # Save the recommendation in the DB if there are more complete jobs than last time
                    store_recommendation(recommendation, db, experiment_name,
                                         tasks, jobs, input_space,
                                         time.time() - overall_start_time)

                # Get the decoupling groups
                task_couplings = {
                    task_name: tasks[task_name].options["group"]
                    for task_name in resource.tasks
                }

                logging.info('\nGetting suggestion for %s...\n' %
                             (', '.join(task_couplings.keys())))

                # Get the next suggested experiment from the chooser.
                suggested_input, suggested_tasks = chooser.suggest(
                    task_couplings)
                suggested_task = suggested_tasks[0]  # hack, deal with later

                suggested_job = {
                    'id': len(jobs) + 1,
                    'params': input_space.paramify(suggested_input),
                    'expt_dir': options['main_file_path'],
                    'tasks': suggested_tasks,
                    'resource': resource_name,
                    'main-file': options['tasks'][suggested_task]['main_file'],
                    'language': options['tasks'][suggested_task]['language'],
                    'status': 'new',
                    'submit time': time.time(),
                    'start time': None,
                    'end time': None,
                    'fast update':
                    chooser.fast_update  # just for plotting - not important
                }

                save_job(suggested_job, db, experiment_name)

                # Submit the job to the appropriate resource
                process_id = resource.attemptDispatch(experiment_name,
                                                      suggested_job,
                                                      db_address, expt_dir,
                                                      output_directory)

                # Print the current time
                logging.info(
                    'Current time: %s' %
                    datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S'))

                # Set the status of the job appropriately (successfully submitted or not)
                if process_id is None:
                    suggested_job['status'] = 'broken'
                    logging.info(
                        'Job %s failed -- check output file for details.' %
                        job['id'])
                    save_job(suggested_job, db, experiment_name)
                else:
                    suggested_job['status'] = 'pending'
                    suggested_job['proc_id'] = process_id
                    save_job(suggested_job, db, experiment_name)

                jobs = load_jobs(db, experiment_name)

                # Print out the status of the resources
                # resource.printStatus(jobs)
                print_resources_status(resources.values(), jobs)

                if len(set(task_couplings.values())) > 1:  # if decoupled
                    print_tasks_status(tasks.values(), jobs)

                # For debug - print pending jobs
                print_pending_jobs(jobs)

        # Terminate the optimization if all resources are finished (run max number of jobs)
        # or ANY task is finished (just my weird convention)
        jobs = load_jobs(db, experiment_name)
        tasks = parse_tasks_from_jobs(jobs, experiment_name, options,
                                      input_space)
        terminate_resources = reduce(
            lambda x, y: x and y,
            map(lambda x: x.maxCompleteReached(jobs), resources.values()),
            True)
        terminate_tasks = reduce(
            lambda x, y: x or y,
            map(lambda x: x.maxCompleteReached(jobs), tasks.values()), False)
        terminate_maxtime = (time.time() - overall_start_time) >= (
            options['max_time_mins'] * 60.0)

        if terminate_resources or terminate_tasks or terminate_maxtime:

            if terminate_resources:
                logging.info(
                    'Maximum number of jobs completed on all resources.')
            if terminate_tasks:
                logging.info(
                    'Maximum number of jobs reached for at least one task.')
            if terminate_maxtime:
                logging.info(
                    'Maximum total experiment time of %f minutes reached.' %
                    options['max_time_mins'])

            # save rec in DB
            if options['recommendations'] in ("during", "end-one"):
                logging.info('Making final recommendation:')
                recommendation = chooser.best()
                store_recommendation(recommendation,
                                     db,
                                     experiment_name,
                                     tasks,
                                     jobs,
                                     input_space,
                                     time.time() - overall_start_time,
                                     final=True)
            elif options['recommendations'] == "end-all":
                logging.info('Making recommendations...')
                all_jobs = jobs
                for i in xrange(len(all_jobs)):
                    logging.info('')
                    logging.info(
                        '-------------------------------------------------')
                    logging.info(
                        '     Getting recommendations for iter %d/%d      ' %
                        (i, len(all_jobs)))
                    logging.info(
                        '-------------------------------------------------')
                    logging.info('')

                    jobs = all_jobs[:i + 1]
                    tasks = parse_tasks_from_jobs(jobs, experiment_name,
                                                  options, input_space)
                    hypers = chooser.fit(tasks, hypers)
                    print_hypers(hypers, input_space, options)
                    # get the biggest end time of the jobs
                    end_time = max([job['end time'] for job in jobs])
                    elapsed_time = end_time - overall_start_time

                    recommendation = chooser.best()
                    store_recommendation(recommendation, db, experiment_name,
                                         tasks, jobs, input_space,
                                         elapsed_time)

            logging.info('Have a nice day.')
            return

        # If no resources are accepting jobs, sleep
        if no_free_resources(db, experiment_name, resources):
            # Don't use logging here because it's too much effort to use logging without a newline at the end
            sys.stdout.write(
                'Waiting for results...' if not waiting_for_results else '.')
            sys.stdout.flush()
            # sys.stderr.flush()
            waiting_for_results = True
            time.sleep(options['polling_time'])
        else:
            sys.stdout.write('\n')
def main():

    parser = optparse.OptionParser(usage="usage: %prog [options] directory")

    parser.add_option("--config", dest="config_file",
                      help="Configuration file name.",
                      type="string", default="config.json")
    parser.add_option("--no-output", action="store_true",
                      help="Do not create output files.")
    parser.add_option("--repeat", dest="repeat",
                      help="Used for repeating the same experiment many times.",
                      type="int", default="-1")

    (commandline_kwargs, args) = parser.parse_args()

    # Read in the config file
    #expt_dir = os.path.realpath('examples/cifar10')
    expt_dir  = os.path.realpath(args[0])
    if not os.path.isdir(expt_dir):
        raise Exception("Cannot find directory %s" % expt_dir)

    options = parse_config_file(expt_dir, commandline_kwargs.config_file)
    experiment_name = options["experiment-name"]

    # Special advanced feature for repeating the same experiment many times
    if commandline_kwargs.repeat >= 0:
        experiment_name = repeat_experiment_name(experiment_name, commandline_kwargs.repeat)

    if not commandline_kwargs.no_output: # if we want output
        if commandline_kwargs.repeat >= 0:
            output_directory = repeat_output_dir(expt_dir, commandline_kwargs.repeat)
        else:
            output_directory = os.path.join(expt_dir, 'output', options["experiment-name"])
        if not os.path.isdir(output_directory):
            os.mkdir(output_directory)

        if commandline_kwargs.repeat < 0:
            rootLogger = logging.getLogger()
            fileHandler = logging.FileHandler(os.path.join(output_directory, 'main.log'))
            fileHandler.setFormatter(logFormatter)
            fileHandler.setLevel(logLevel)
            rootLogger.addHandler(fileHandler)
        # consoleHandler = logging.StreamHandler()
        # consoleHandler.setFormatter(logFormatter)
        # consoleHandler.setLevel(logLevel)
        # rootLogger.addHandler(consoleHandler)
    else:
        output_directory = None

    input_space = InputSpace(options["variables"])

    resources = parse_resources_from_config(options)

    # Load up the chooser.
    chooser_module = importlib.import_module('spearmint.choosers.' + options['chooser'])

    chooser = chooser_module.init(input_space, options)

    # Connect to the database

    db_address = options['database']['address']
    db         = MongoDB(database_address=db_address)

    if os.getenv('SPEARMINT_MAX_ITERATIONS') == None and 'max_iterations' not in set(options.keys()):
	maxiterations = DEFAULT_MAX_ITERATIONS
    elif os.getenv('SPEARMINT_MAX_ITERATIONS') != None:
	maxiterations = int(os.getenv('SPEARMINT_MAX_ITERATIONS'))
    else:
	maxiterations = options['max_iterations']

    # Set random seed

    if 'random_seed' in options.keys():
	    np.random.seed(int(options['random_seed']))
	    seed(int(options['random_seed']))

    waiting_for_results = False  # for printing purposes only
    while True:

        for resource_name, resource in resources.iteritems():

            jobs = load_jobs(db, experiment_name)
            # resource.printStatus(jobs)

            # If the resource is currently accepting more jobs
            # TODO: here cost will eventually also be considered: even if the
            #       resource is not full, we might wait because of cost incurred
            # Note: I could chose to fill up one resource and them move on to the next ("if")
            # You could also do it the other way, by changing "if" to "while" here

            # Remove any broken jobs from pending
            # note: make sure to do this before the acceptingJobs() condition is checked
            remove_broken_jobs(db, jobs, experiment_name, resources)

            if resource.acceptingJobs(jobs):

                if waiting_for_results:
                    logging.info('\n')
                waiting_for_results = False

                optim_start_time = time.time()

                # Load jobs from DB
                # (move out of one or both loops?) would need to pass into load_tasks
                jobs = load_jobs(db, experiment_name)

                # Print out a list of broken jobs
                print_broken_jobs(jobs)

                # Get a suggestion for the next job
                tasks = parse_tasks_from_jobs(jobs, experiment_name, options, input_space)

                # Special case when coupled and there is a NaN task-- what to do with NaN task when decoupled??
                if 'NaN' in tasks and 'NaN' not in resource.tasks:
                    resource.tasks.append('NaN')

                # Load the model hypers from the database.
                hypers = db.load(experiment_name, 'hypers')

                # "Fit" the chooser - give the chooser data and let it fit the model(s).
                # NOTE: even if we are only suggesting for 1 task, we need to fit all of them
                # because the acquisition function for one task depends on all the tasks

                hypers = chooser.fit(tasks, hypers)

                if hypers:
                    logging.debug('GP covariance hyperparameters:')
                print_hypers(hypers)

                # Save the hyperparameters to the database.
                if hypers:
                    db.save(hypers, experiment_name, 'hypers')

                # Compute the best value so far, a.k.a. the "recommendation"

                recommendation = chooser.best()

                # Save the recommendation in the DB

                numComplete_by_task = {task_name : task.numComplete(jobs) for task_name, task in tasks.iteritems()}

                db.save({'num_complete' : resource.numComplete(jobs),
                     'num_complete_tasks' : numComplete_by_task,
                     'params'   : input_space.paramify(recommendation['model_model_input']),
                     'objective': recommendation['model_model_value'],
                     'params_o' : None if recommendation['obser_obser_input'] is None else input_space.paramify(recommendation['obser_obser_input']),
                     'obj_o'    : recommendation['obser_obser_value'],
                     'params_om': None if recommendation['obser_model_input'] is None else input_space.paramify(recommendation['obser_model_input']),
                     'obj_om'   : recommendation['obser_model_value']},
                experiment_name, 'recommendations', {'id' : len(jobs)})

                # Get the decoupling groups
                task_couplings = {task_name : tasks[task_name].options["group"] for task_name in resource.tasks}

                logging.info('\nGetting suggestion for %s...\n' % (', '.join(task_couplings.keys())))

                # Get the next suggested experiment from the chooser.

                suggested_input, suggested_tasks = chooser.suggest(task_couplings, optim_start_time)
                suggested_task = suggested_tasks[0] # hack, deal with later

                suggested_job = {
                    'id'          : len(jobs) + 1,
                    'params'      : input_space.paramify(suggested_input),
                    'expt_dir'    : options['main_file_path'],
                    'tasks'       : suggested_tasks,
                    'resource'    : resource_name,
                    'main-file'   : resource.main_file,
                    'language'    : options['tasks'][suggested_task]['language'],
                    'status'      : 'new',
                    'submit time' : time.time(),
                    'start time'  : None,
                    'end time'    : None
                }

                save_job(suggested_job, db, experiment_name)

                # Submit the job to the appropriate resource
                process_id = resource.attemptDispatch(experiment_name, suggested_job, db_address,
                                                      expt_dir, output_directory)

                # Print the current time
                logging.info('Current time: %s' % datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S'))

                # Set the status of the job appropriately (successfully submitted or not)
                if process_id is None:
                    suggested_job['status'] = 'broken'
                    logging.info('Job %s failed -- check output file for details.' % job['id'])
                    save_job(suggested_job, db, experiment_name)
                else:
                    suggested_job['status'] = 'pending'
                    suggested_job['proc_id'] = process_id
                    save_job(suggested_job, db, experiment_name)

                jobs = load_jobs(db, experiment_name)

                # Print out the status of the resources
                # resource.printStatus(jobs)
                print_resources_status(resources.values(), jobs)

                if len(set(task_couplings.values())) > 1: # if decoupled
                    print_tasks_status(tasks.values(), jobs)

                # For debug - print pending jobs
                print_pending_jobs(jobs)


        # Terminate the optimization if all resources are finished (run max number of jobs)
        # or ANY task is finished (just my weird convention)
        if reduce(lambda x,y: x and y, map(lambda x: x.maxCompleteReached(jobs), resources.values()), True) or \
           reduce(lambda x,y: x or y,  map(lambda x: x.maxCompleteReached(jobs), tasks.values()),     False):
            # Do all this extra work just to save the final recommendation -- would be ok to delete everything
            # in here and just "return"
            sys.stdout.write('\n')
            jobs = load_jobs(db, experiment_name)
            tasks = parse_tasks_from_jobs(jobs, experiment_name, options, input_space)
            hypers = db.load(experiment_name, 'hypers')
            hypers = chooser.fit(tasks, hypers)
            if hypers:
                db.save(hypers, experiment_name, 'hypers')
            # logging.info('\n**All resources have run the maximum number of jobs.**\nFinal recommendation:')
            recommendation = chooser.best()

            # numComplete_per_task
            numComplete_by_task = {task_name : task.numComplete(jobs) for task_name, task in tasks.iteritems()}
            db.save({'num_complete'       : resource.numComplete(jobs),
                     'num_complete_tasks' : numComplete_by_task,
                     'params'   : input_space.paramify(recommendation['model_model_input']),
                     'objective': recommendation['model_model_value'],
                     'params_o' : None if recommendation['obser_obser_input'] is None else input_space.paramify(recommendation['obser_obser_input']),
                     'obj_o'    : recommendation['obser_obser_value'],
                     'params_om': None if recommendation['obser_model_input'] is None else input_space.paramify(recommendation['obser_model_input']),
                     'obj_om'   : recommendation['obser_model_value']},
                experiment_name, 'recommendations', {'id'       : len(jobs)})
            logging.info('Maximum number of jobs completed. Have a nice day.')
            return

        # If no resources are accepting jobs, sleep
        if no_free_resources(db, experiment_name, resources):
            # Don't use logging here because it's too much effort to use logging without a newline at the end
            sys.stdout.write('Waiting for results...' if not waiting_for_results else '.')
            sys.stdout.flush()
            # sys.stderr.flush()
            waiting_for_results = True
            time.sleep(options['polling_time'])
        else:
            sys.stdout.write('\n')
Example #26
0
def main(filter=None):
    """
    Usage: python make_plots.py PATH_TO_DIRECTORY
    """
    parser = argparse.ArgumentParser()
    parser.add_argument('--clean', action='store_true', help='remove broken jobs')
    parser.add_argument('--table', action='store_true', help='print table')
    parser.add_argument('--csv', action='store_true', help='save table as csv')
    parser.add_argument('--d', type=int, help='sort by distance from dth smallest result')
    parser.add_argument('--name', help='experiment name', default=None)
    args, unknown = parser.parse_known_args()

    options, expt_dir = get_options(unknown)
    # print "options:"
    # print_dict(options)

    # reduce the grid size
    options["grid_size"] = 400

    resources = parse_resources_from_config(options)

    # Load up the chooser.
    chooser_module = importlib.import_module('spearmint.choosers.' + options['chooser'])
    chooser = chooser_module.init(options)
    # print "chooser", chooser
    if args.name:
        experiment_name = args.name
    else:
        experiment_name     = options.get("experiment-name", 'unnamed-experiment')

    # Connect to the database
    db_address = options['database']['address']
    # sys.stderr.write('Using database at %s.\n' % db_address)
    db         = MongoDB(database_address=db_address)

    # testing below here
    jobs = load_jobs(db, experiment_name)
    print len(jobs), 'jobs found'
    # print jobs

    # remove_broken_jobs
    if args.clean:
        for job in jobs:
            if job['status'] == 'pending':
                sys.stderr.write('Broken job %s detected.\n' % job['id'])
                job['status'] = 'broken'
                db.save(job, experiment_name, 'jobs', {'id' : job['id']})

    # print "resources:", resources
    # print_dict(resources)
    resource = resources.itervalues().next()

    task_options = {task: options["tasks"][task] for task in resource.tasks}
    # print "task_options:"
    # print_dict(task_options) # {'main': {'likelihood': u'NOISELESS', 'type': 'OBJECTIVE'}}

    task_group = load_task_group(db, options, experiment_name, resource.tasks)
    hypers = load_hypers(db, experiment_name)
    chooser.fit(task_group, hypers, task_options)
    lp, x = chooser.best()

    if args.table:
        os.chdir(unknown[0])
        out_file = open('results.csv', 'w') if args.csv else sys.stdout

        # get the observed points
        task = task_group.tasks.itervalues().next()
        idata = task.valid_normalized_data_dict
        inputs = idata["inputs"]
        inputs = map(lambda i: [paramify(task_group, task_group.from_unit(i)).values(), i], inputs)
        vals = idata["values"]
        vals = [task.unstandardize_mean(task.unstandardize_variance(v)) for v in vals]

        out_file.write('\n%10s' % 'result')
        lengths = [10]
        for name, vdict in task.variables_meta.iteritems():
            name = '%10s' % name
            out_file.write(',' + name)
            lengths.append(len(name))
        out_file.write('\n')

        line_template = '%' + str(lengths[0]) + '.4f,' + ','.join(['%' + str(l) +
            ('.4f' if 'enum' not in inputs[0][0][i]['type'] else 's') for i, l in enumerate(lengths[1:])])

        points = sorted(zip(vals, inputs), key=lambda r: r[0])
        if args.d is not None:
            target = x
            if args.d >= 0:
                target = points[args.d][1][1]
            points = sorted(points, key=lambda r: np.linalg.norm(r[1][1] - target))
        for i, point in enumerate(points):
            subs = [point[0]] + [d['values'][0] for d in point[1][0]]
            out_file.write(line_template % tuple(subs) + '\n')
        out_file.close()
Example #27
0
def launch(db_address, experiment_name, job_id):
    """
    Launches a job from on a given id.
    """

    db = MongoDB(database_address=db_address)
    job = db.load(experiment_name, 'jobs', {'id': job_id})

    start_time = time.time()
    job['start time'] = start_time
    db.save(job, experiment_name, 'jobs', {'id': job_id})

    sys.stderr.write("Job launching after %0.2f seconds in submission.\n" %
                     (start_time - job['submit time']))

    success = False

    try:
        if job['language'].lower() == 'matlab':
            result = matlab_launcher(job)

        elif job['language'].lower() == 'python':
            result = python_launcher(job)
            # sys.stderr.write('RESULT EQUALS %s' % result)
        elif job['language'].lower() == 'shell':
            result = shell_launcher(job)
        else:
            raise Exception("That language has not been implemented.")

        if not isinstance(result, dict):
            # Returning just NaN means NaN on all tasks
            if np.isnan(result):
                # Apparently this dict generator throws an error for some people??
                result = {task_name: np.nan for task_name in job['tasks']}
                # So we use the much uglier version below... ????
                # result = dict(zip(job['tasks'], [np.nan]*len(job['tasks'])))
            elif len(
                    job['tasks']
            ) == 1:  # Only one named job and result is not a dict, stick it in dict
                result = {job['tasks'][0]: result}
            else:
                result = {DEFAULT_TASK_NAME: result}
        else:
            if "objective" in result and "constraints" in result:
                result_new = dict()
                result_new[DEFAULT_TASK_NAME] = result["objective"]
                for i in xrange(len(result["constraints"])):
                    result_new['%s%d' % (DEFAULT_CONSTRAINT_NAME,
                                         i)] = result["constraints"][i]
                result = result_new

        # actually it's ok if the result dict contains extra stuff. so it would be fine just to
        # check that all((t in result for t in job['tasks']))


#        if set(result.keys()) != set(job['tasks']):
        if not set(job['tasks']).issubset(set(result.keys())):
            #            if set(result.keys()).union(['NaN']) != set(job['tasks']):
            if not set(job['tasks']).issubset(
                    set(result.keys()).union(['NaN'])):
                raise Exception(
                    "Result task names %s did not match job task names %s." %
                    (result.keys(), job['tasks']))

        success = True
    except:
        sys.stderr.flush()
        sys.stdout.flush()
        import traceback
        traceback.print_exc()
        sys.stderr.write("Problem executing the function\n")
        print sys.exc_info()

    end_time = time.time()

    if success:
        sys.stderr.write("Completed successfully in %0.2f seconds. [%s]\n" %
                         (end_time - start_time, result))

        job['values'] = result
        job['status'] = 'complete'
        job['end time'] = end_time

    else:
        sys.stderr.write("Job failed in %0.2f seconds.\n" %
                         (end_time - start_time))

        # Update metadata.
        job['status'] = 'broken'
        job['end time'] = end_time

    db.save(job, experiment_name, 'jobs', {'id': job_id})
Example #28
0
def main(args):
    options, expt_dir = get_options(args)
    print(options)
    resources = parse_resources_from_config(options)

    # Load up the chooser.
    chooser_module = importlib.import_module('spearmint.choosers.' + options['chooser'])
    chooser = chooser_module.init(options)
    experiment_name     = options.get("experiment-name", 'unnamed-experiment')

    # Connect to the database
    db_address = options['database']['address']
    sys.stderr.write('Using database at %s.\n' % db_address)        
    db         = MongoDB(database_address=db_address)

    # Changed the loop so that it's not for forever
    budget = options.get("budget", 20)
    count = options.get("count", 0)
    ei_threshold = options.get("ei", 0.10)
    max_budget = options.get("maxbudget", 10)

    while count < budget or (chooser.ei >= ei_threshold and count < max_budget):

        for resource_name, resource in resources.iteritems():

            jobs = load_jobs(db, experiment_name)
            #resource.printStatus(jobs)

            # If the resource is currently accepting more jobs
            # TODO: here cost will eventually also be considered: even if the 
            #       resource is not full, we might wait because of cost incurred
            # Note: I chose to fill up one resource and them move on to the next
            # You could also do it the other way, by changing "while" to "if" here

            while resource.acceptingJobs(jobs):
                if count < budget or (chooser.ei >= ei_threshold and count < max_budget):
                    sys.stderr.write("Proceeding to next experiment\n")
                    # Load jobs from DB
                    # (move out of one or both loops?) would need to pass into load_tasks
                    jobs = load_jobs(db, experiment_name)

                    # Remove any broken jobs from pending.
                    remove_broken_jobs(db, jobs, experiment_name, resources)

                    # Get a suggestion for the next job
                    suggested_job = get_suggestion(chooser, resource.tasks, db, expt_dir, options, resource_name)

                    # Submit the job to the appropriate resource
                    process_id = resource.attemptDispatch(experiment_name, suggested_job, db_address, expt_dir)

                    # Set the status of the job appropriately (successfully submitted or not)
                    if process_id is None:
                        suggested_job['status'] = 'broken'
                        save_job(suggested_job, db, experiment_name)
                    else:
                        suggested_job['status'] = 'pending'
                        suggested_job['proc_id'] = process_id
                        save_job(suggested_job, db, experiment_name)

                    jobs = load_jobs(db, experiment_name)

                    # Print out the status of the resources
                    # resource.printStatus(jobs)
                    print_resources_status(resources.values(), jobs)
                    count += 1
                else:
                    #count += 1
                    break
            #sys.stderr.write('Waiting for a prior job to finish\n')

        # If no resources are accepting jobs, sleep
        # (they might be accepting if suggest takes a while and so some jobs already finished by the time this point is reached)
        if tired(db, experiment_name, resources):
            time.sleep(options.get('polling-time', 5))
    while tired(db, experiment_name, resources):
        time.sleep(options.get('polling-time', 5))
    #best_input, best_val = chooser.best()
    # print(chooser.task_group.paramify(chooser.best_location.flatten()))
    print(chooser.best_value)
    return chooser.best_jobid, chooser.best_value, count
Example #29
0
def main():
    options, expt_dir = get_options()

    resources = parse_resources_from_config(options)

    # Load up the chooser.
    chooser_module = importlib.import_module('spearmint.choosers.' + options['chooser'])
    chooser = chooser_module.init(options)
    experiment_name     = options.get("experiment-name", 'unnamed-experiment')

    # Connect to the database
    db_address = options['database']['address']
    sys.stderr.write('Using database at %s.\n' % db_address)        
    db         = MongoDB(database_address=db_address)
    
    while True:

        for resource_name, resource in resources.iteritems():

            jobs = load_jobs(db, experiment_name)
            # resource.printStatus(jobs)

            # If the resource is currently accepting more jobs
            # TODO: here cost will eventually also be considered: even if the 
            #       resource is not full, we might wait because of cost incurred
            # Note: I chose to fill up one resource and them move on to the next
            # You could also do it the other way, by changing "while" to "if" here

            remove_broken_jobs(db, jobs, experiment_name, resources)
            while resource.acceptingJobs(jobs):

                # Load jobs from DB 
                # (move out of one or both loops?) would need to pass into load_tasks
                jobs = load_jobs(db, experiment_name)
                
                # Remove any broken jobs from pending.
                remove_broken_jobs(db, jobs, experiment_name, resources)

                # Get a suggestion for the next job
                suggested_job = get_suggestion(chooser, resource.tasks, db, expt_dir, options, resource_name)
    
                # Submit the job to the appropriate resource
                process_id = resource.attemptDispatch(experiment_name, suggested_job, db_address, expt_dir)

                # Set the status of the job appropriately (successfully submitted or not)
                if process_id is None:
                    suggested_job['status'] = 'broken'
                    save_job(suggested_job, db, experiment_name)
                else:
                    suggested_job['status'] = 'pending'
                    suggested_job['proc_id'] = process_id
                    save_job(suggested_job, db, experiment_name)

                jobs = load_jobs(db, experiment_name)

                # Print out the status of the resources
                # resource.printStatus(jobs)
                print_resources_status(resources.values(), jobs)

        # If no resources are accepting jobs, sleep
        # (they might be accepting if suggest takes a while and so some jobs already finished by the time this point is reached)
        if tired(db, experiment_name, resources):
            time.sleep(options.get('polling-time', 5))
Example #30
0
def plot(
    config_directory="/home/carrknight/code/oxfish/runs/optimization/spearmint"
):
    os.chdir("/home/carrknight/code/oxfish/runs/optimization/spearmint")
    options = get_options(config_directory, config_file="config.json")
    experiment_name = str(options['experiment-name'])
    db = MongoDB()
    resources = parse_resources_from_config(options)
    resource = resources.itervalues().next()

    # load hyper parameters
    chooser_module = importlib.import_module('spearmint.choosers.' +
                                             options['chooser'])
    chooser = chooser_module.init(options)
    print "chooser", chooser

    hypers = db.load(experiment_name, "hypers")
    print "loaded hypers", hypers  # from GP.to_dict()

    jobs = load_jobs(db, experiment_name)
    remove_broken_jobs(db, jobs, experiment_name, resources)
    task_options = {task: options["tasks"][task] for task in resource.tasks}
    task_group = spearmint.main.load_task_group(db, options, resource.tasks)

    hypers = spearmint.main.load_hypers(db, experiment_name)
    print "loaded hypers", hypers  # from GP.to_dict()

    hypers = chooser.fit(task_group, hypers, task_options)
    print "\nfitted hypers:"
    print(hypers)

    lp, x = chooser.best()
    x = x.flatten()
    print "best", lp, x
    bestp = task_group.paramify(task_group.from_unit(x))
    print "expected best position", bestp

    print "chooser models:", chooser.models
    obj_model = chooser.models[chooser.objective['name']]
    grid = chooser.grid
    obj_mean, obj_var = obj_model.function_over_hypers(obj_model.predict, grid)

    import numpy as np

    bounds = dict()

    for task_name, task in task_group.tasks.iteritems():
        # make a grid, feed it to the predictor:

        dimensions = ()
        for key in options['variables'].keys():
            type = str(options['variables'][key]["type"]).strip().lower()
            keyname = key.encode('utf-8')
            bounds[keyname] = dict()
            bounds[keyname]["type"] = type
            if type == "float":
                dimension = np.linspace(0, 1, num=SPACING)
                dimensions = dimensions + (dimension, )
                bounds[keyname]["min"] = options['variables'][key]["min"]
                bounds[keyname]["max"] = options['variables'][key]["max"]

            elif type == "int":
                min = int(options['variables'][key]["min"])
                max = int(options['variables'][key]["max"])
                dimension = np.linspace(0, 1, num=max - min)
                bounds[keyname]["min"] = min
                bounds[keyname]["max"] = max
                #  dimension = np.array([x + min for x in range(max - min + 1)])
                dimensions = dimensions + (dimension, )
            else:
                bounds[keyname]["options"] = options['variables'][key][
                    "options"]
                assert type == "enum"
                dimension = tuple([
                    (0, 1)
                    for i in range(len(options['variables'][key]["options"]))
                ])
                for t in dimension:
                    dimensions = dimensions + (t, )
                    # print(dimension)

        data = cartesian(np.array(dimensions))

        mean, variance = chooser.models[task_name].predict(data)
        mean = [
            task.unstandardize_mean(task.unstandardize_variance(v))
            for v in mean
        ]
        variance = [task.unstandardize_variance(np.sqrt(v)) for v in variance]

        os.chdir(config_directory)

        # unzip the data
        new_data = zip(*data.transpose().tolist())
        datum = zip(new_data, mean, variance)

        header = ",".join(options['variables'].keys()) + ",mean,variance"
        with open(experiment_name + ".csv", 'w') as fileout:
            fileout.write(header + "\n")
            for i in range(len(datum)):
                fileout.write(
                    str(datum[i]).replace("(", "").replace(")", "").replace(
                        ",,", ",") + "\n")

        with open(experiment_name + "_bounds.yaml", 'w') as outfile:
            outfile.write(yaml.dump(bounds, default_flow_style=False))

        # grid = cartesian(dimensions)
        # mean, variance = obj_model.function_over_hypers(obj_model.predict, grid)
        #
        # mean = [obj_task.unstandardize_mean(obj_task.unstandardize_variance(v)) for v in mean]
        # variance = [obj_task.unstandardize_variance(np.sqrt(v)) for v in variance]
        #
        # # xymv = [([x for x in xy], m, v) for xy, m, v in izip(new_grid, obj_mean, obj_std)]  # if .2 < xy[0] < .25]
        # with open(experiment_name + ".csv", 'w') as fileout:
        #     for i in range(len(mean)):
        #         fileout.write(str(([x for x in grid[i]], mean[i], variance[i])).replace("(", "").replace(")", "").
        #                       replace("[", "").replace("]", "") + "\n")
        xy = np.array(task.inputs)
        # function values:
        vals = task.values
        vals = np.array(vals)
        np.savetxt(experiment_name + "_" + task_name + "_runs.csv",
                   xy,
                   delimiter=",",
                   fmt='%.3e')
        np.savetxt(experiment_name + "_" + task_name + "_runs_values.csv",
                   vals,
                   delimiter=",",
                   fmt='%.3e')