Example #1
0
def launch(db_address, experiment_name, job_id):
    """
    Launches a job from on a given id.
    """

    db = MongoDB(database_address=db_address)
    job = db.load(experiment_name, 'jobs', {'id': job_id})

    start_time = time.time()
    job['start time'] = start_time
    db.save(job, experiment_name, 'jobs', {'id': job_id})

    sys.stderr.write("Job launching after %0.2f seconds in submission.\n" %
                     (start_time - job['submit time']))

    success = False

    try:
        if job['language'].lower() == 'matlab':
            result = matlab_launcher(job)

        elif job['language'].lower() == 'python':
            result = python_launcher(job)
            # sys.stderr.write('RESULT EQUALS %s' % result)
        elif job['language'].lower() == 'shell':
            result = shell_launcher(job)
        else:
            raise Exception("That language has not been implemented.")

        if not isinstance(result, dict):
            # Returning just NaN means NaN on all tasks
            if np.isnan(result):
                # Apparently this dict generator throws an error for some people??
                result = {task_name: np.nan for task_name in job['tasks']}
                # So we use the much uglier version below... ????
                # result = dict(zip(job['tasks'], [np.nan]*len(job['tasks'])))
            elif len(
                    job['tasks']
            ) == 1:  # Only one named job and result is not a dict, stick it in dict
                result = {job['tasks'][0]: result}
            else:
                result = {DEFAULT_TASK_NAME: result}
        else:
            if "objective" in result and "constraints" in result:
                result_new = dict()
                result_new[DEFAULT_TASK_NAME] = result["objective"]
                for i in xrange(len(result["constraints"])):
                    result_new['%s%d' % (DEFAULT_CONSTRAINT_NAME,
                                         i)] = result["constraints"][i]
                result = result_new

        # actually it's ok if the result dict contains extra stuff. so it would be fine just to
        # check that all((t in result for t in job['tasks']))


#        if set(result.keys()) != set(job['tasks']):
        if not set(job['tasks']).issubset(set(result.keys())):
            #            if set(result.keys()).union(['NaN']) != set(job['tasks']):
            if not set(job['tasks']).issubset(
                    set(result.keys()).union(['NaN'])):
                raise Exception(
                    "Result task names %s did not match job task names %s." %
                    (result.keys(), job['tasks']))

        success = True
    except:
        sys.stderr.flush()
        sys.stdout.flush()
        import traceback
        traceback.print_exc()
        sys.stderr.write("Problem executing the function\n")
        print sys.exc_info()

    end_time = time.time()

    if success:
        sys.stderr.write("Completed successfully in %0.2f seconds. [%s]\n" %
                         (end_time - start_time, result))

        job['values'] = result
        job['status'] = 'complete'
        job['end time'] = end_time

    else:
        sys.stderr.write("Job failed in %0.2f seconds.\n" %
                         (end_time - start_time))

        # Update metadata.
        job['status'] = 'broken'
        job['end time'] = end_time

    db.save(job, experiment_name, 'jobs', {'id': job_id})
Example #2
0
def launch(db_address, experiment_name, job_id):
    """
    Launches a job from on a given id.
    """

    db  = MongoDB(database_address=db_address)
    job = db.load(experiment_name, 'jobs', {'id' : job_id})

    start_time        = time.time()
    job['start time'] = start_time
    db.save(job, experiment_name, 'jobs', {'id' : job_id})

    sys.stderr.write("Job launching after %0.2f seconds in submission.\n" 
                     % (start_time-job['submit time']))

    success = False

    try:
        if job['language'].lower() == 'matlab':
            result = matlab_launcher(job)

        elif job['language'].lower() == 'python':
            result = python_launcher(job)

        elif job['language'].lower() == 'shell':
            result = shell_launcher(job)

        elif job['language'].lower() == 'mcr':
            result = mcr_launcher(job)

        else:
            raise Exception("That language has not been implemented.")

        if not isinstance(result, dict):
            # Returning just NaN means NaN on all tasks
            if np.isnan(result):
                # Apparently this dict generator throws an error for some people??
                # result = {task_name: np.nan for task_name in job['tasks']}
                # So we use the much uglier version below... ????
                result = dict(list(zip(job['tasks'], [np.nan]*len(job['tasks']))))
            elif len(job['tasks']) == 1: # Only one named job
                result = {job['tasks'][0] : result}
            else:
                result = {'main' : result}
        
        if set(result.keys()) != set(job['tasks']):
            raise Exception("Result task names %s did not match job task names %s." % (list(result.keys()), job['tasks']))

        success = True
    except:
        import traceback
        traceback.print_exc()
        sys.stderr.write("Problem executing the function\n")
        print(sys.exc_info())
        
    end_time = time.time()

    if success:
        sys.stderr.write("Completed successfully in %0.2f seconds. [%s]\n" 
                         % (end_time-start_time, result))
        
        job['values']   = result
        job['status']   = 'complete'
        job['end time'] = end_time

    else:
        sys.stderr.write("Job failed in %0.2f seconds.\n" % (end_time-start_time))
    
        # Update metadata.
        job['status']   = 'broken'
        job['end time'] = end_time

    db.save(job, experiment_name, 'jobs', {'id' : job_id})
def main():

    parser = optparse.OptionParser(usage="usage: %prog [options] directory")

    parser.add_option("--config", dest="config_file",
                      help="Configuration file name.",
                      type="string", default="config.json")
    parser.add_option("--no-output", action="store_true",
                      help="Do not create output files.")
    parser.add_option("--repeat", dest="repeat",
                      help="Used for repeating the same experiment many times.",
                      type="int", default="-1")

    (commandline_kwargs, args) = parser.parse_args()

    # Read in the config file
    #expt_dir = os.path.realpath('examples/cifar10')
    expt_dir  = os.path.realpath(args[0])
    if not os.path.isdir(expt_dir):
        raise Exception("Cannot find directory %s" % expt_dir)

    options = parse_config_file(expt_dir, commandline_kwargs.config_file)
    experiment_name = options["experiment-name"]

    # Special advanced feature for repeating the same experiment many times
    if commandline_kwargs.repeat >= 0:
        experiment_name = repeat_experiment_name(experiment_name, commandline_kwargs.repeat)

    if not commandline_kwargs.no_output: # if we want output
        if commandline_kwargs.repeat >= 0:
            output_directory = repeat_output_dir(expt_dir, commandline_kwargs.repeat)
        else:
            output_directory = os.path.join(expt_dir, 'output', options["experiment-name"])
        if not os.path.isdir(output_directory):
            os.mkdir(output_directory)

        if commandline_kwargs.repeat < 0:
            rootLogger = logging.getLogger()
            fileHandler = logging.FileHandler(os.path.join(output_directory, 'main.log'))
            fileHandler.setFormatter(logFormatter)
            fileHandler.setLevel(logLevel)
            rootLogger.addHandler(fileHandler)
        # consoleHandler = logging.StreamHandler()
        # consoleHandler.setFormatter(logFormatter)
        # consoleHandler.setLevel(logLevel)
        # rootLogger.addHandler(consoleHandler)
    else:
        output_directory = None

    input_space = InputSpace(options["variables"])

    resources = parse_resources_from_config(options)

    # Load up the chooser.
    chooser_module = importlib.import_module('spearmint.choosers.' + options['chooser'])

    chooser = chooser_module.init(input_space, options)

    # Connect to the database

    db_address = options['database']['address']
    db         = MongoDB(database_address=db_address)

    if os.getenv('SPEARMINT_MAX_ITERATIONS') == None and 'max_iterations' not in set(options.keys()):
	maxiterations = DEFAULT_MAX_ITERATIONS
    elif os.getenv('SPEARMINT_MAX_ITERATIONS') != None:
	maxiterations = int(os.getenv('SPEARMINT_MAX_ITERATIONS'))
    else:
	maxiterations = options['max_iterations']

    # Set random seed

    if 'random_seed' in options.keys():
	    np.random.seed(int(options['random_seed']))
	    seed(int(options['random_seed']))

    waiting_for_results = False  # for printing purposes only
    while True:

        for resource_name, resource in resources.iteritems():

            jobs = load_jobs(db, experiment_name)
            # resource.printStatus(jobs)

            # If the resource is currently accepting more jobs
            # TODO: here cost will eventually also be considered: even if the
            #       resource is not full, we might wait because of cost incurred
            # Note: I could chose to fill up one resource and them move on to the next ("if")
            # You could also do it the other way, by changing "if" to "while" here

            # Remove any broken jobs from pending
            # note: make sure to do this before the acceptingJobs() condition is checked
            remove_broken_jobs(db, jobs, experiment_name, resources)

            if resource.acceptingJobs(jobs):

                if waiting_for_results:
                    logging.info('\n')
                waiting_for_results = False

                optim_start_time = time.time()

                # Load jobs from DB
                # (move out of one or both loops?) would need to pass into load_tasks
                jobs = load_jobs(db, experiment_name)

                # Print out a list of broken jobs
                print_broken_jobs(jobs)

                # Get a suggestion for the next job
                tasks = parse_tasks_from_jobs(jobs, experiment_name, options, input_space)

                # Special case when coupled and there is a NaN task-- what to do with NaN task when decoupled??
                if 'NaN' in tasks and 'NaN' not in resource.tasks:
                    resource.tasks.append('NaN')

                # Load the model hypers from the database.
                hypers = db.load(experiment_name, 'hypers')

                # "Fit" the chooser - give the chooser data and let it fit the model(s).
                # NOTE: even if we are only suggesting for 1 task, we need to fit all of them
                # because the acquisition function for one task depends on all the tasks

                hypers = chooser.fit(tasks, hypers)

                if hypers:
                    logging.debug('GP covariance hyperparameters:')
                print_hypers(hypers)

                # Save the hyperparameters to the database.
                if hypers:
                    db.save(hypers, experiment_name, 'hypers')

                # Compute the best value so far, a.k.a. the "recommendation"

                recommendation = chooser.best()

                # Save the recommendation in the DB

                numComplete_by_task = {task_name : task.numComplete(jobs) for task_name, task in tasks.iteritems()}

                db.save({'num_complete' : resource.numComplete(jobs),
                     'num_complete_tasks' : numComplete_by_task,
                     'params'   : input_space.paramify(recommendation['model_model_input']),
                     'objective': recommendation['model_model_value'],
                     'params_o' : None if recommendation['obser_obser_input'] is None else input_space.paramify(recommendation['obser_obser_input']),
                     'obj_o'    : recommendation['obser_obser_value'],
                     'params_om': None if recommendation['obser_model_input'] is None else input_space.paramify(recommendation['obser_model_input']),
                     'obj_om'   : recommendation['obser_model_value']},
                experiment_name, 'recommendations', {'id' : len(jobs)})

                # Get the decoupling groups
                task_couplings = {task_name : tasks[task_name].options["group"] for task_name in resource.tasks}

                logging.info('\nGetting suggestion for %s...\n' % (', '.join(task_couplings.keys())))

                # Get the next suggested experiment from the chooser.

                suggested_input, suggested_tasks = chooser.suggest(task_couplings, optim_start_time)
                suggested_task = suggested_tasks[0] # hack, deal with later

                suggested_job = {
                    'id'          : len(jobs) + 1,
                    'params'      : input_space.paramify(suggested_input),
                    'expt_dir'    : options['main_file_path'],
                    'tasks'       : suggested_tasks,
                    'resource'    : resource_name,
                    'main-file'   : resource.main_file,
                    'language'    : options['tasks'][suggested_task]['language'],
                    'status'      : 'new',
                    'submit time' : time.time(),
                    'start time'  : None,
                    'end time'    : None
                }

                save_job(suggested_job, db, experiment_name)

                # Submit the job to the appropriate resource
                process_id = resource.attemptDispatch(experiment_name, suggested_job, db_address,
                                                      expt_dir, output_directory)

                # Print the current time
                logging.info('Current time: %s' % datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S'))

                # Set the status of the job appropriately (successfully submitted or not)
                if process_id is None:
                    suggested_job['status'] = 'broken'
                    logging.info('Job %s failed -- check output file for details.' % job['id'])
                    save_job(suggested_job, db, experiment_name)
                else:
                    suggested_job['status'] = 'pending'
                    suggested_job['proc_id'] = process_id
                    save_job(suggested_job, db, experiment_name)

                jobs = load_jobs(db, experiment_name)

                # Print out the status of the resources
                # resource.printStatus(jobs)
                print_resources_status(resources.values(), jobs)

                if len(set(task_couplings.values())) > 1: # if decoupled
                    print_tasks_status(tasks.values(), jobs)

                # For debug - print pending jobs
                print_pending_jobs(jobs)


        # Terminate the optimization if all resources are finished (run max number of jobs)
        # or ANY task is finished (just my weird convention)
        if reduce(lambda x,y: x and y, map(lambda x: x.maxCompleteReached(jobs), resources.values()), True) or \
           reduce(lambda x,y: x or y,  map(lambda x: x.maxCompleteReached(jobs), tasks.values()),     False):
            # Do all this extra work just to save the final recommendation -- would be ok to delete everything
            # in here and just "return"
            sys.stdout.write('\n')
            jobs = load_jobs(db, experiment_name)
            tasks = parse_tasks_from_jobs(jobs, experiment_name, options, input_space)
            hypers = db.load(experiment_name, 'hypers')
            hypers = chooser.fit(tasks, hypers)
            if hypers:
                db.save(hypers, experiment_name, 'hypers')
            # logging.info('\n**All resources have run the maximum number of jobs.**\nFinal recommendation:')
            recommendation = chooser.best()

            # numComplete_per_task
            numComplete_by_task = {task_name : task.numComplete(jobs) for task_name, task in tasks.iteritems()}
            db.save({'num_complete'       : resource.numComplete(jobs),
                     'num_complete_tasks' : numComplete_by_task,
                     'params'   : input_space.paramify(recommendation['model_model_input']),
                     'objective': recommendation['model_model_value'],
                     'params_o' : None if recommendation['obser_obser_input'] is None else input_space.paramify(recommendation['obser_obser_input']),
                     'obj_o'    : recommendation['obser_obser_value'],
                     'params_om': None if recommendation['obser_model_input'] is None else input_space.paramify(recommendation['obser_model_input']),
                     'obj_om'   : recommendation['obser_model_value']},
                experiment_name, 'recommendations', {'id'       : len(jobs)})
            logging.info('Maximum number of jobs completed. Have a nice day.')
            return

        # If no resources are accepting jobs, sleep
        if no_free_resources(db, experiment_name, resources):
            # Don't use logging here because it's too much effort to use logging without a newline at the end
            sys.stdout.write('Waiting for results...' if not waiting_for_results else '.')
            sys.stdout.flush()
            # sys.stderr.flush()
            waiting_for_results = True
            time.sleep(options['polling_time'])
        else:
            sys.stdout.write('\n')
Example #4
0
def launch(db_address, experiment_name, job_id):
    """
    Launches a job from on a given id.
    """

    db = MongoDB(database_address=db_address)
    job = db.load(experiment_name, 'jobs', {'id': job_id})

    start_time = time.time()
    job['start time'] = start_time
    db.save(job, experiment_name, 'jobs', {'id': job_id})

    sys.stderr.write("Job launching after %0.2f seconds in submission.\n"
                     % (start_time - job['submit time']))

    success = False

    try:
        if job['language'].lower() == 'matlab':
            result = matlab_launcher(job)

        elif job['language'].lower() == 'python':
            result = python_launcher(job)

        elif job['language'].lower() == 'shell':
            result = shell_launcher(job)

        elif job['language'].lower() == 'mcr':
            result = mcr_launcher(job)

        else:
            raise Exception("That language has not been implemented.")

        if not isinstance(result, dict):
            # Returning just NaN means NaN on all tasks
            if np.isnan(result):
                # Apparently this dict generator throws an error for some people??
                # result = {task_name: np.nan for task_name in job['tasks']}
                # So we use the much uglier version below... ????
                result = dict(zip(job['tasks'], [np.nan] * len(job['tasks'])))
            elif len(job['tasks']) == 1:  # Only one named job
                result = {job['tasks'][0]: result}
            else:
                result = {'main': result}

        if set(result.keys()) != set(job['tasks']):
            raise Exception("Result task names %s did not match job task names %s." % (result.keys(), job['tasks']))

        success = True
    except:
        import traceback
        traceback.print_exc()
        sys.stderr.write("Problem executing the function\n")
        print(sys.exc_info())

    end_time = time.time()

    if success:
        sys.stderr.write("Completed successfully in %0.2f seconds. [%s]\n"
                         % (end_time - start_time, result))

        job['values'] = result
        job['status'] = 'complete'
        job['end time'] = end_time

    else:
        sys.stderr.write("Job failed in %0.2f seconds.\n" % (end_time - start_time))

        # Update metadata.
        job['status'] = 'broken'
        job['end time'] = end_time

    db.save(job, experiment_name, 'jobs', {'id': job_id})
Example #5
0
def main(filter=None):
    """
    Usage: python make_plots.py PATH_TO_DIRECTORY
    """
    parser = argparse.ArgumentParser()
    parser.add_argument('--clean', action='store_true', help='remove broken jobs')
    parser.add_argument('--table', action='store_true', help='print table')
    parser.add_argument('--csv', action='store_true', help='save table as csv')
    parser.add_argument('--d', type=int, help='sort by distance from dth smallest result')
    parser.add_argument('--name', help='experiment name', default=None)
    args, unknown = parser.parse_known_args()

    options, expt_dir = get_options(unknown)
    # print "options:"
    # print_dict(options)

    # reduce the grid size
    options["grid_size"] = 400

    resources = parse_resources_from_config(options)

    # Load up the chooser.
    chooser_module = importlib.import_module('spearmint.choosers.' + options['chooser'])
    chooser = chooser_module.init(options)
    # print "chooser", chooser
    if args.name:
        experiment_name = args.name
    else:
        experiment_name     = options.get("experiment-name", 'unnamed-experiment')

    # Connect to the database
    db_address = options['database']['address']
    # sys.stderr.write('Using database at %s.\n' % db_address)
    db         = MongoDB(database_address=db_address)

    # testing below here
    jobs = load_jobs(db, experiment_name)
    print len(jobs), 'jobs found'
    # print jobs

    # remove_broken_jobs
    if args.clean:
        for job in jobs:
            if job['status'] == 'pending':
                sys.stderr.write('Broken job %s detected.\n' % job['id'])
                job['status'] = 'broken'
                db.save(job, experiment_name, 'jobs', {'id' : job['id']})

    # print "resources:", resources
    # print_dict(resources)
    resource = resources.itervalues().next()

    task_options = {task: options["tasks"][task] for task in resource.tasks}
    # print "task_options:"
    # print_dict(task_options) # {'main': {'likelihood': u'NOISELESS', 'type': 'OBJECTIVE'}}

    task_group = load_task_group(db, options, experiment_name, resource.tasks)
    hypers = load_hypers(db, experiment_name)
    chooser.fit(task_group, hypers, task_options)
    lp, x = chooser.best()

    if args.table:
        os.chdir(unknown[0])
        out_file = open('results.csv', 'w') if args.csv else sys.stdout

        # get the observed points
        task = task_group.tasks.itervalues().next()
        idata = task.valid_normalized_data_dict
        inputs = idata["inputs"]
        inputs = map(lambda i: [paramify(task_group, task_group.from_unit(i)).values(), i], inputs)
        vals = idata["values"]
        vals = [task.unstandardize_mean(task.unstandardize_variance(v)) for v in vals]

        out_file.write('\n%10s' % 'result')
        lengths = [10]
        for name, vdict in task.variables_meta.iteritems():
            name = '%10s' % name
            out_file.write(',' + name)
            lengths.append(len(name))
        out_file.write('\n')

        line_template = '%' + str(lengths[0]) + '.4f,' + ','.join(['%' + str(l) +
            ('.4f' if 'enum' not in inputs[0][0][i]['type'] else 's') for i, l in enumerate(lengths[1:])])

        points = sorted(zip(vals, inputs), key=lambda r: r[0])
        if args.d is not None:
            target = x
            if args.d >= 0:
                target = points[args.d][1][1]
            points = sorted(points, key=lambda r: np.linalg.norm(r[1][1] - target))
        for i, point in enumerate(points):
            subs = [point[0]] + [d['values'][0] for d in point[1][0]]
            out_file.write(line_template % tuple(subs) + '\n')
        out_file.close()
Example #6
0
def main(expt_dir, config_file="config.json", no_output=False, repeat=-1):
    if not os.path.isdir(expt_dir):
        raise Exception("Cannot find directory %s" % expt_dir)

    options = parse_config_file(expt_dir, config_file)
    experiment_name = options["experiment_name"]

    # Special advanced feature for repeating the same experiment many times
    if repeat >= 0:
        experiment_name = repeat_experiment_name(experiment_name, repeat)

    if not no_output:  # if we want output
        if repeat >= 0:
            output_directory = repeat_output_dir(expt_dir, repeat)
        else:
            output_directory = os.path.join(expt_dir, 'output')
        if not os.path.isdir(output_directory):
            os.mkdir(output_directory)

        if repeat < 0:
            rootLogger = logging.getLogger()
            fileHandler = logging.FileHandler(
                os.path.join(output_directory, 'main.log'))
            fileHandler.setFormatter(logFormatter)
            fileHandler.setLevel(logLevel)
            rootLogger.addHandler(fileHandler)
        # consoleHandler = logging.StreamHandler()
        # consoleHandler.setFormatter(logFormatter)
        # consoleHandler.setLevel(logLevel)
        # rootLogger.addHandler(consoleHandler)
    else:
        output_directory = None

    input_space = InputSpace(options["variables"])

    resources = parse_resources_from_config(options)

    # Load up the chooser.
    chooser_module = importlib.import_module('spearmint.choosers.' +
                                             options['chooser'])
    chooser = chooser_module.init(input_space, options)

    # Connect to the database
    db_address = options['database']['address']
    db = MongoDB(database_address=db_address)

    overall_start_time = time.time()
    db.save({'start-time': overall_start_time}, experiment_name, 'start-time')

    waiting_for_results = False  # for printing purposes only
    while True:

        for resource_name, resource in resources.iteritems():

            jobs = load_jobs(db, experiment_name)
            # resource.printStatus(jobs)

            # If the resource is currently accepting more jobs
            # TODO: here cost will eventually also be considered: even if the
            #       resource is not full, we might wait because of cost incurred
            # Note: I could chose to fill up one resource and them move on to the next ("if")
            # You could also do it the other way, by changing "if" to "while" here

            # Remove any broken jobs from pending
            # note: make sure to do this before the acceptingJobs() condition is checked
            remove_broken_jobs(db, jobs, experiment_name, resources)

            if resource.acceptingJobs(jobs):

                if waiting_for_results:
                    logging.info('\n')
                waiting_for_results = False

                # Load jobs from DB
                # (move out of one or both loops?) would need to pass into load_tasks
                jobs = load_jobs(db, experiment_name)

                # Print out a list of broken jobs
                print_broken_jobs(jobs)

                # Get a suggestion for the next job
                tasks = parse_tasks_from_jobs(jobs, experiment_name, options,
                                              input_space)

                # Special case when coupled and there is a NaN task-- what to do with NaN task when decoupled??
                if 'NaN' in tasks and 'NaN' not in resource.tasks:
                    resource.tasks.append('NaN')

                # Load the model hypers from the database.
                hypers = db.load(experiment_name, 'hypers')

                # "Fit" the chooser - give the chooser data and let it fit the model(s).
                # NOTE: even if we are only suggesting for 1 task, we need to fit all of them
                # because the acquisition function for one task depends on all the tasks
                hypers = chooser.fit(tasks, hypers)

                if hypers:
                    logging.debug('GP covariance hyperparameters:')
                print_hypers(hypers, input_space, options)
                # if 'duration hypers' in hypers:
                # logging.debug('Duration GP covariance hyperparameters:')
                # print_hypers(hypers['duration hypers'], input_space, options)

                # Save the hyperparameters to the database.
                if hypers:
                    db.save(hypers, experiment_name, 'hypers')

                if options['recommendations'] == "during":
                    # Compute the best value so far, a.k.a. the "recommendation"
                    recommendation = chooser.best()

                    # Save the recommendation in the DB if there are more complete jobs than last time
                    store_recommendation(recommendation, db, experiment_name,
                                         tasks, jobs, input_space,
                                         time.time() - overall_start_time)

                # Get the decoupling groups
                task_couplings = {
                    task_name: tasks[task_name].options["group"]
                    for task_name in resource.tasks
                }

                logging.info('\nGetting suggestion for %s...\n' %
                             (', '.join(task_couplings.keys())))

                # Get the next suggested experiment from the chooser.
                suggested_input, suggested_tasks = chooser.suggest(
                    task_couplings)
                suggested_task = suggested_tasks[0]  # hack, deal with later

                suggested_job = {
                    'id': len(jobs) + 1,
                    'params': input_space.paramify(suggested_input),
                    'expt_dir': options['main_file_path'],
                    'tasks': suggested_tasks,
                    'resource': resource_name,
                    'main-file': options['tasks'][suggested_task]['main_file'],
                    'language': options['tasks'][suggested_task]['language'],
                    'status': 'new',
                    'submit time': time.time(),
                    'start time': None,
                    'end time': None,
                    'fast update':
                    chooser.fast_update  # just for plotting - not important
                }

                save_job(suggested_job, db, experiment_name)

                # Submit the job to the appropriate resource
                process_id = resource.attemptDispatch(experiment_name,
                                                      suggested_job,
                                                      db_address, expt_dir,
                                                      output_directory)

                # Print the current time
                logging.info(
                    'Current time: %s' %
                    datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S'))

                # Set the status of the job appropriately (successfully submitted or not)
                if process_id is None:
                    suggested_job['status'] = 'broken'
                    logging.info(
                        'Job %s failed -- check output file for details.' %
                        job['id'])
                    save_job(suggested_job, db, experiment_name)
                else:
                    suggested_job['status'] = 'pending'
                    suggested_job['proc_id'] = process_id
                    save_job(suggested_job, db, experiment_name)

                jobs = load_jobs(db, experiment_name)

                # Print out the status of the resources
                # resource.printStatus(jobs)
                print_resources_status(resources.values(), jobs)

                if len(set(task_couplings.values())) > 1:  # if decoupled
                    print_tasks_status(tasks.values(), jobs)

                # For debug - print pending jobs
                print_pending_jobs(jobs)

        # Terminate the optimization if all resources are finished (run max number of jobs)
        # or ANY task is finished (just my weird convention)
        jobs = load_jobs(db, experiment_name)
        tasks = parse_tasks_from_jobs(jobs, experiment_name, options,
                                      input_space)
        terminate_resources = reduce(
            lambda x, y: x and y,
            map(lambda x: x.maxCompleteReached(jobs), resources.values()),
            True)
        terminate_tasks = reduce(
            lambda x, y: x or y,
            map(lambda x: x.maxCompleteReached(jobs), tasks.values()), False)
        terminate_maxtime = (time.time() - overall_start_time) >= (
            options['max_time_mins'] * 60.0)

        if terminate_resources or terminate_tasks or terminate_maxtime:

            if terminate_resources:
                logging.info(
                    'Maximum number of jobs completed on all resources.')
            if terminate_tasks:
                logging.info(
                    'Maximum number of jobs reached for at least one task.')
            if terminate_maxtime:
                logging.info(
                    'Maximum total experiment time of %f minutes reached.' %
                    options['max_time_mins'])

            # save rec in DB
            if options['recommendations'] in ("during", "end-one"):
                logging.info('Making final recommendation:')
                recommendation = chooser.best()
                store_recommendation(recommendation,
                                     db,
                                     experiment_name,
                                     tasks,
                                     jobs,
                                     input_space,
                                     time.time() - overall_start_time,
                                     final=True)
            elif options['recommendations'] == "end-all":
                logging.info('Making recommendations...')
                all_jobs = jobs
                for i in xrange(len(all_jobs)):
                    logging.info('')
                    logging.info(
                        '-------------------------------------------------')
                    logging.info(
                        '     Getting recommendations for iter %d/%d      ' %
                        (i, len(all_jobs)))
                    logging.info(
                        '-------------------------------------------------')
                    logging.info('')

                    jobs = all_jobs[:i + 1]
                    tasks = parse_tasks_from_jobs(jobs, experiment_name,
                                                  options, input_space)
                    hypers = chooser.fit(tasks, hypers)
                    print_hypers(hypers, input_space, options)
                    # get the biggest end time of the jobs
                    end_time = max([job['end time'] for job in jobs])
                    elapsed_time = end_time - overall_start_time

                    recommendation = chooser.best()
                    store_recommendation(recommendation, db, experiment_name,
                                         tasks, jobs, input_space,
                                         elapsed_time)

            logging.info('Have a nice day.')
            return

        # If no resources are accepting jobs, sleep
        if no_free_resources(db, experiment_name, resources):
            # Don't use logging here because it's too much effort to use logging without a newline at the end
            sys.stdout.write(
                'Waiting for results...' if not waiting_for_results else '.')
            sys.stdout.flush()
            # sys.stderr.flush()
            waiting_for_results = True
            time.sleep(options['polling_time'])
        else:
            sys.stdout.write('\n')
Example #7
0
class Spearmint(Printer):
    def __init__(self, config_file, work_dir):
        Printer.__init__(self, 'Spearmint', color='grey')
        self.work_dir = work_dir
        print(config_file)
        self._parse_config_file(config_file)
        try:
            self.batch_size = self.param_dict['resources']['my-machine'][
                'max-concurrent']
#			self.num_batches = self.param_dict['general']['batches_per_round']
        except KeyError:
            #			self.num_batches = 1
            self.batch_size = 1
        self.all_params, self.all_losses = [], []

    def rand_gens(self, var_type='float', size=1):
        if var_type == 'float':
            return np.random.uniform(low=0, high=1, size=size)
        else:
            raise NotImplementedError

    def _parse_config_file(self, config_file):
        self.json_parser = ParserJSON(file_name=config_file)
        self.json_parser.parse()
        self.param_dict = self.json_parser.param_dict

        # now get the total number of variables
        # and create a dictionary with the size of each variable
        self.total_size = 0
        self.var_sizes = []
        self.var_names = []
        for var_name, var_dict in self.param_dict['variables'].items():
            self.total_size += var_dict['size']
            self.var_sizes.append(int(var_dict['size']))
            self.var_names.append(var_name)

#			self.total_size += var_dict[list(var_dict)[0]]['size']
#			self.var_sizes.append(int(var_dict[list(var_dict)[0]]['size']))
#			self.var_names.append(list(var_dict)[0])
#

    def _generate_uniform(self, num_samples=10):
        self.container, self.sampled_params = {}, {}
        values = []
        for var_index, var_name in enumerate(self.var_names):
            sampled_values = self.rand_gens(
                var_type=self.param_dict['variables'][var_name]['type'],
                size=(self.param_dict['variables'][var_name]['size'],
                      num_samples))
            values.extend(sampled_values)
            self.container[var_name] = sampled_values
        values = np.array(values)
        self.proposed = values.transpose()

    def _parse_observations(self, observations):
        all_params, all_losses = [], []
        for observation in observations:
            params = []
            for var_name in self.var_names:
                params.extend(observation[var_name]['samples'])
            if len(self.all_params) > 0:
                if np.amin([
                        np.linalg.norm(params - old_param)
                        for old_param in self.all_params
                ]) > 1e-6:
                    all_losses.append(observation['loss'])
                    all_params.append(params)
            else:
                all_losses.append(observation['loss'])
                all_params.append(params)
        for index, element in enumerate(all_params):
            self.all_params.append(element)
            self.all_losses.append(all_losses[index])
        return all_params, all_losses

    def _create_mongo_instance(self):
        self.db_path = '%s/db_%s/' % (self.work_dir,
                                      self.param_dict['experiment-name'])
        print(self.db_path)
        try:
            shutil.rmtree(self.db_path)
        except:
            pass
        os.mkdir(self.db_path)
        subprocess.call('mongod --fork --logpath %s/mongodb.log --dbpath %s' %
                        (self.db_path, self.db_path),
                        shell=True)

    def _create_spearmint_parameters(self):
        self._create_mongo_instance()
        self.options, self.exp_dir = get_options(self.work_dir)
        self.resources = parse_resources_from_config(self.options)
        self.chooser_module = importlib.import_module('spearmint.choosers.' +
                                                      self.options['chooser'])
        self.chooser = self.chooser_module.init(self.options)
        self.experiment_name = self.options.get('experiment-name',
                                                'unnamed_experiment')

        self.db_address = self.options['database']['address']
        self.db = MongoDB(database_address=self.db_address)

    def _sample_parameter_sets(self, num_samples, observations):
        all_params, all_losses = self._parse_observations(observations)
        self._create_spearmint_parameters()

        # dump all observations in database
        for index, param in enumerate(all_params):
            print('PARAM', param, all_losses[index])
            params = {}
            start_index = 0
            for var_index, var_name in enumerate(self.var_names):
                var_dict = self.param_dict['variables'][var_name]
                params[var_name] = {
                    'type':
                    var_dict['type'],
                    'values':
                    np.array(param[start_index:start_index + var_dict['size']])
                }
                start_index += var_dict['size']
            job = {
                'id': index + 1,
                'expt_dir': self.work_dir,
                'tasks': ['main'],
                'resource': 'my-machine',
                'main-file': 'main_file.py',
                'language': 'PYTHON',
                'status': 'new',
                'submit time': time.time(),
                'start time': time.time(),
                'end time': None,
                'params': params
            }
            time.sleep(0.1)
            job['values'] = {'main': all_losses[index]}
            job['status'] = 'complete'
            job['end time'] = time.time()

            #			for key, value in job.items():
            #				print(key, value)

            self.db.save(job, self.experiment_name, 'jobs', {'id': job['id']})

        self.proposed = []
        for resource_name, resource in self.resources.items():
            print('RUNNING SPEARMINT')
            suggested_job = get_suggestion(self.chooser, resource.tasks,
                                           self.db, self.exp_dir, self.options,
                                           resource_name)
            print('DONE')
            vector = []
            for var_name in self.var_names:
                vector.extend(suggested_job['params'][var_name]['values'])
            vector = np.array(vector)
            for index in range(num_samples):
                self.proposed.append(vector)

        print('PROPOSED', self.proposed)
        subprocess.call(
            'mongod --shutdown --logpath %s/mongodb.log --dbpath %s' %
            (self.db_path, self.db_path),
            shell=True)

    def choose(self, num_samples=None, observations=None):
        current_dir = os.getcwd()
        os.chdir(self.work_dir)

        if not num_samples:
            num_samples = self.batch_size

        if observations:
            self._print('proposing samples')
            self._sample_parameter_sets(num_samples, observations)
        else:
            self._print('choosing uniformly')
            self._generate_uniform(1)

        os.chdir(current_dir)

        #		print('SHAPE', self.proposed.shape)
        return self.proposed