def launch(db_address, experiment_name, job_id): """ Launches a job from on a given id. """ db = MongoDB(database_address=db_address) job = db.load(experiment_name, 'jobs', {'id': job_id}) start_time = time.time() job['start time'] = start_time db.save(job, experiment_name, 'jobs', {'id': job_id}) sys.stderr.write("Job launching after %0.2f seconds in submission.\n" % (start_time - job['submit time'])) success = False try: if job['language'].lower() == 'matlab': result = matlab_launcher(job) elif job['language'].lower() == 'python': result = python_launcher(job) # sys.stderr.write('RESULT EQUALS %s' % result) elif job['language'].lower() == 'shell': result = shell_launcher(job) else: raise Exception("That language has not been implemented.") if not isinstance(result, dict): # Returning just NaN means NaN on all tasks if np.isnan(result): # Apparently this dict generator throws an error for some people?? result = {task_name: np.nan for task_name in job['tasks']} # So we use the much uglier version below... ???? # result = dict(zip(job['tasks'], [np.nan]*len(job['tasks']))) elif len( job['tasks'] ) == 1: # Only one named job and result is not a dict, stick it in dict result = {job['tasks'][0]: result} else: result = {DEFAULT_TASK_NAME: result} else: if "objective" in result and "constraints" in result: result_new = dict() result_new[DEFAULT_TASK_NAME] = result["objective"] for i in xrange(len(result["constraints"])): result_new['%s%d' % (DEFAULT_CONSTRAINT_NAME, i)] = result["constraints"][i] result = result_new # actually it's ok if the result dict contains extra stuff. so it would be fine just to # check that all((t in result for t in job['tasks'])) # if set(result.keys()) != set(job['tasks']): if not set(job['tasks']).issubset(set(result.keys())): # if set(result.keys()).union(['NaN']) != set(job['tasks']): if not set(job['tasks']).issubset( set(result.keys()).union(['NaN'])): raise Exception( "Result task names %s did not match job task names %s." % (result.keys(), job['tasks'])) success = True except: sys.stderr.flush() sys.stdout.flush() import traceback traceback.print_exc() sys.stderr.write("Problem executing the function\n") print sys.exc_info() end_time = time.time() if success: sys.stderr.write("Completed successfully in %0.2f seconds. [%s]\n" % (end_time - start_time, result)) job['values'] = result job['status'] = 'complete' job['end time'] = end_time else: sys.stderr.write("Job failed in %0.2f seconds.\n" % (end_time - start_time)) # Update metadata. job['status'] = 'broken' job['end time'] = end_time db.save(job, experiment_name, 'jobs', {'id': job_id})
def launch(db_address, experiment_name, job_id): """ Launches a job from on a given id. """ db = MongoDB(database_address=db_address) job = db.load(experiment_name, 'jobs', {'id' : job_id}) start_time = time.time() job['start time'] = start_time db.save(job, experiment_name, 'jobs', {'id' : job_id}) sys.stderr.write("Job launching after %0.2f seconds in submission.\n" % (start_time-job['submit time'])) success = False try: if job['language'].lower() == 'matlab': result = matlab_launcher(job) elif job['language'].lower() == 'python': result = python_launcher(job) elif job['language'].lower() == 'shell': result = shell_launcher(job) elif job['language'].lower() == 'mcr': result = mcr_launcher(job) else: raise Exception("That language has not been implemented.") if not isinstance(result, dict): # Returning just NaN means NaN on all tasks if np.isnan(result): # Apparently this dict generator throws an error for some people?? # result = {task_name: np.nan for task_name in job['tasks']} # So we use the much uglier version below... ???? result = dict(list(zip(job['tasks'], [np.nan]*len(job['tasks'])))) elif len(job['tasks']) == 1: # Only one named job result = {job['tasks'][0] : result} else: result = {'main' : result} if set(result.keys()) != set(job['tasks']): raise Exception("Result task names %s did not match job task names %s." % (list(result.keys()), job['tasks'])) success = True except: import traceback traceback.print_exc() sys.stderr.write("Problem executing the function\n") print(sys.exc_info()) end_time = time.time() if success: sys.stderr.write("Completed successfully in %0.2f seconds. [%s]\n" % (end_time-start_time, result)) job['values'] = result job['status'] = 'complete' job['end time'] = end_time else: sys.stderr.write("Job failed in %0.2f seconds.\n" % (end_time-start_time)) # Update metadata. job['status'] = 'broken' job['end time'] = end_time db.save(job, experiment_name, 'jobs', {'id' : job_id})
def main(): parser = optparse.OptionParser(usage="usage: %prog [options] directory") parser.add_option("--config", dest="config_file", help="Configuration file name.", type="string", default="config.json") parser.add_option("--no-output", action="store_true", help="Do not create output files.") parser.add_option("--repeat", dest="repeat", help="Used for repeating the same experiment many times.", type="int", default="-1") (commandline_kwargs, args) = parser.parse_args() # Read in the config file #expt_dir = os.path.realpath('examples/cifar10') expt_dir = os.path.realpath(args[0]) if not os.path.isdir(expt_dir): raise Exception("Cannot find directory %s" % expt_dir) options = parse_config_file(expt_dir, commandline_kwargs.config_file) experiment_name = options["experiment-name"] # Special advanced feature for repeating the same experiment many times if commandline_kwargs.repeat >= 0: experiment_name = repeat_experiment_name(experiment_name, commandline_kwargs.repeat) if not commandline_kwargs.no_output: # if we want output if commandline_kwargs.repeat >= 0: output_directory = repeat_output_dir(expt_dir, commandline_kwargs.repeat) else: output_directory = os.path.join(expt_dir, 'output', options["experiment-name"]) if not os.path.isdir(output_directory): os.mkdir(output_directory) if commandline_kwargs.repeat < 0: rootLogger = logging.getLogger() fileHandler = logging.FileHandler(os.path.join(output_directory, 'main.log')) fileHandler.setFormatter(logFormatter) fileHandler.setLevel(logLevel) rootLogger.addHandler(fileHandler) # consoleHandler = logging.StreamHandler() # consoleHandler.setFormatter(logFormatter) # consoleHandler.setLevel(logLevel) # rootLogger.addHandler(consoleHandler) else: output_directory = None input_space = InputSpace(options["variables"]) resources = parse_resources_from_config(options) # Load up the chooser. chooser_module = importlib.import_module('spearmint.choosers.' + options['chooser']) chooser = chooser_module.init(input_space, options) # Connect to the database db_address = options['database']['address'] db = MongoDB(database_address=db_address) if os.getenv('SPEARMINT_MAX_ITERATIONS') == None and 'max_iterations' not in set(options.keys()): maxiterations = DEFAULT_MAX_ITERATIONS elif os.getenv('SPEARMINT_MAX_ITERATIONS') != None: maxiterations = int(os.getenv('SPEARMINT_MAX_ITERATIONS')) else: maxiterations = options['max_iterations'] # Set random seed if 'random_seed' in options.keys(): np.random.seed(int(options['random_seed'])) seed(int(options['random_seed'])) waiting_for_results = False # for printing purposes only while True: for resource_name, resource in resources.iteritems(): jobs = load_jobs(db, experiment_name) # resource.printStatus(jobs) # If the resource is currently accepting more jobs # TODO: here cost will eventually also be considered: even if the # resource is not full, we might wait because of cost incurred # Note: I could chose to fill up one resource and them move on to the next ("if") # You could also do it the other way, by changing "if" to "while" here # Remove any broken jobs from pending # note: make sure to do this before the acceptingJobs() condition is checked remove_broken_jobs(db, jobs, experiment_name, resources) if resource.acceptingJobs(jobs): if waiting_for_results: logging.info('\n') waiting_for_results = False optim_start_time = time.time() # Load jobs from DB # (move out of one or both loops?) would need to pass into load_tasks jobs = load_jobs(db, experiment_name) # Print out a list of broken jobs print_broken_jobs(jobs) # Get a suggestion for the next job tasks = parse_tasks_from_jobs(jobs, experiment_name, options, input_space) # Special case when coupled and there is a NaN task-- what to do with NaN task when decoupled?? if 'NaN' in tasks and 'NaN' not in resource.tasks: resource.tasks.append('NaN') # Load the model hypers from the database. hypers = db.load(experiment_name, 'hypers') # "Fit" the chooser - give the chooser data and let it fit the model(s). # NOTE: even if we are only suggesting for 1 task, we need to fit all of them # because the acquisition function for one task depends on all the tasks hypers = chooser.fit(tasks, hypers) if hypers: logging.debug('GP covariance hyperparameters:') print_hypers(hypers) # Save the hyperparameters to the database. if hypers: db.save(hypers, experiment_name, 'hypers') # Compute the best value so far, a.k.a. the "recommendation" recommendation = chooser.best() # Save the recommendation in the DB numComplete_by_task = {task_name : task.numComplete(jobs) for task_name, task in tasks.iteritems()} db.save({'num_complete' : resource.numComplete(jobs), 'num_complete_tasks' : numComplete_by_task, 'params' : input_space.paramify(recommendation['model_model_input']), 'objective': recommendation['model_model_value'], 'params_o' : None if recommendation['obser_obser_input'] is None else input_space.paramify(recommendation['obser_obser_input']), 'obj_o' : recommendation['obser_obser_value'], 'params_om': None if recommendation['obser_model_input'] is None else input_space.paramify(recommendation['obser_model_input']), 'obj_om' : recommendation['obser_model_value']}, experiment_name, 'recommendations', {'id' : len(jobs)}) # Get the decoupling groups task_couplings = {task_name : tasks[task_name].options["group"] for task_name in resource.tasks} logging.info('\nGetting suggestion for %s...\n' % (', '.join(task_couplings.keys()))) # Get the next suggested experiment from the chooser. suggested_input, suggested_tasks = chooser.suggest(task_couplings, optim_start_time) suggested_task = suggested_tasks[0] # hack, deal with later suggested_job = { 'id' : len(jobs) + 1, 'params' : input_space.paramify(suggested_input), 'expt_dir' : options['main_file_path'], 'tasks' : suggested_tasks, 'resource' : resource_name, 'main-file' : resource.main_file, 'language' : options['tasks'][suggested_task]['language'], 'status' : 'new', 'submit time' : time.time(), 'start time' : None, 'end time' : None } save_job(suggested_job, db, experiment_name) # Submit the job to the appropriate resource process_id = resource.attemptDispatch(experiment_name, suggested_job, db_address, expt_dir, output_directory) # Print the current time logging.info('Current time: %s' % datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')) # Set the status of the job appropriately (successfully submitted or not) if process_id is None: suggested_job['status'] = 'broken' logging.info('Job %s failed -- check output file for details.' % job['id']) save_job(suggested_job, db, experiment_name) else: suggested_job['status'] = 'pending' suggested_job['proc_id'] = process_id save_job(suggested_job, db, experiment_name) jobs = load_jobs(db, experiment_name) # Print out the status of the resources # resource.printStatus(jobs) print_resources_status(resources.values(), jobs) if len(set(task_couplings.values())) > 1: # if decoupled print_tasks_status(tasks.values(), jobs) # For debug - print pending jobs print_pending_jobs(jobs) # Terminate the optimization if all resources are finished (run max number of jobs) # or ANY task is finished (just my weird convention) if reduce(lambda x,y: x and y, map(lambda x: x.maxCompleteReached(jobs), resources.values()), True) or \ reduce(lambda x,y: x or y, map(lambda x: x.maxCompleteReached(jobs), tasks.values()), False): # Do all this extra work just to save the final recommendation -- would be ok to delete everything # in here and just "return" sys.stdout.write('\n') jobs = load_jobs(db, experiment_name) tasks = parse_tasks_from_jobs(jobs, experiment_name, options, input_space) hypers = db.load(experiment_name, 'hypers') hypers = chooser.fit(tasks, hypers) if hypers: db.save(hypers, experiment_name, 'hypers') # logging.info('\n**All resources have run the maximum number of jobs.**\nFinal recommendation:') recommendation = chooser.best() # numComplete_per_task numComplete_by_task = {task_name : task.numComplete(jobs) for task_name, task in tasks.iteritems()} db.save({'num_complete' : resource.numComplete(jobs), 'num_complete_tasks' : numComplete_by_task, 'params' : input_space.paramify(recommendation['model_model_input']), 'objective': recommendation['model_model_value'], 'params_o' : None if recommendation['obser_obser_input'] is None else input_space.paramify(recommendation['obser_obser_input']), 'obj_o' : recommendation['obser_obser_value'], 'params_om': None if recommendation['obser_model_input'] is None else input_space.paramify(recommendation['obser_model_input']), 'obj_om' : recommendation['obser_model_value']}, experiment_name, 'recommendations', {'id' : len(jobs)}) logging.info('Maximum number of jobs completed. Have a nice day.') return # If no resources are accepting jobs, sleep if no_free_resources(db, experiment_name, resources): # Don't use logging here because it's too much effort to use logging without a newline at the end sys.stdout.write('Waiting for results...' if not waiting_for_results else '.') sys.stdout.flush() # sys.stderr.flush() waiting_for_results = True time.sleep(options['polling_time']) else: sys.stdout.write('\n')
def launch(db_address, experiment_name, job_id): """ Launches a job from on a given id. """ db = MongoDB(database_address=db_address) job = db.load(experiment_name, 'jobs', {'id': job_id}) start_time = time.time() job['start time'] = start_time db.save(job, experiment_name, 'jobs', {'id': job_id}) sys.stderr.write("Job launching after %0.2f seconds in submission.\n" % (start_time - job['submit time'])) success = False try: if job['language'].lower() == 'matlab': result = matlab_launcher(job) elif job['language'].lower() == 'python': result = python_launcher(job) elif job['language'].lower() == 'shell': result = shell_launcher(job) elif job['language'].lower() == 'mcr': result = mcr_launcher(job) else: raise Exception("That language has not been implemented.") if not isinstance(result, dict): # Returning just NaN means NaN on all tasks if np.isnan(result): # Apparently this dict generator throws an error for some people?? # result = {task_name: np.nan for task_name in job['tasks']} # So we use the much uglier version below... ???? result = dict(zip(job['tasks'], [np.nan] * len(job['tasks']))) elif len(job['tasks']) == 1: # Only one named job result = {job['tasks'][0]: result} else: result = {'main': result} if set(result.keys()) != set(job['tasks']): raise Exception("Result task names %s did not match job task names %s." % (result.keys(), job['tasks'])) success = True except: import traceback traceback.print_exc() sys.stderr.write("Problem executing the function\n") print(sys.exc_info()) end_time = time.time() if success: sys.stderr.write("Completed successfully in %0.2f seconds. [%s]\n" % (end_time - start_time, result)) job['values'] = result job['status'] = 'complete' job['end time'] = end_time else: sys.stderr.write("Job failed in %0.2f seconds.\n" % (end_time - start_time)) # Update metadata. job['status'] = 'broken' job['end time'] = end_time db.save(job, experiment_name, 'jobs', {'id': job_id})
def main(filter=None): """ Usage: python make_plots.py PATH_TO_DIRECTORY """ parser = argparse.ArgumentParser() parser.add_argument('--clean', action='store_true', help='remove broken jobs') parser.add_argument('--table', action='store_true', help='print table') parser.add_argument('--csv', action='store_true', help='save table as csv') parser.add_argument('--d', type=int, help='sort by distance from dth smallest result') parser.add_argument('--name', help='experiment name', default=None) args, unknown = parser.parse_known_args() options, expt_dir = get_options(unknown) # print "options:" # print_dict(options) # reduce the grid size options["grid_size"] = 400 resources = parse_resources_from_config(options) # Load up the chooser. chooser_module = importlib.import_module('spearmint.choosers.' + options['chooser']) chooser = chooser_module.init(options) # print "chooser", chooser if args.name: experiment_name = args.name else: experiment_name = options.get("experiment-name", 'unnamed-experiment') # Connect to the database db_address = options['database']['address'] # sys.stderr.write('Using database at %s.\n' % db_address) db = MongoDB(database_address=db_address) # testing below here jobs = load_jobs(db, experiment_name) print len(jobs), 'jobs found' # print jobs # remove_broken_jobs if args.clean: for job in jobs: if job['status'] == 'pending': sys.stderr.write('Broken job %s detected.\n' % job['id']) job['status'] = 'broken' db.save(job, experiment_name, 'jobs', {'id' : job['id']}) # print "resources:", resources # print_dict(resources) resource = resources.itervalues().next() task_options = {task: options["tasks"][task] for task in resource.tasks} # print "task_options:" # print_dict(task_options) # {'main': {'likelihood': u'NOISELESS', 'type': 'OBJECTIVE'}} task_group = load_task_group(db, options, experiment_name, resource.tasks) hypers = load_hypers(db, experiment_name) chooser.fit(task_group, hypers, task_options) lp, x = chooser.best() if args.table: os.chdir(unknown[0]) out_file = open('results.csv', 'w') if args.csv else sys.stdout # get the observed points task = task_group.tasks.itervalues().next() idata = task.valid_normalized_data_dict inputs = idata["inputs"] inputs = map(lambda i: [paramify(task_group, task_group.from_unit(i)).values(), i], inputs) vals = idata["values"] vals = [task.unstandardize_mean(task.unstandardize_variance(v)) for v in vals] out_file.write('\n%10s' % 'result') lengths = [10] for name, vdict in task.variables_meta.iteritems(): name = '%10s' % name out_file.write(',' + name) lengths.append(len(name)) out_file.write('\n') line_template = '%' + str(lengths[0]) + '.4f,' + ','.join(['%' + str(l) + ('.4f' if 'enum' not in inputs[0][0][i]['type'] else 's') for i, l in enumerate(lengths[1:])]) points = sorted(zip(vals, inputs), key=lambda r: r[0]) if args.d is not None: target = x if args.d >= 0: target = points[args.d][1][1] points = sorted(points, key=lambda r: np.linalg.norm(r[1][1] - target)) for i, point in enumerate(points): subs = [point[0]] + [d['values'][0] for d in point[1][0]] out_file.write(line_template % tuple(subs) + '\n') out_file.close()
def main(expt_dir, config_file="config.json", no_output=False, repeat=-1): if not os.path.isdir(expt_dir): raise Exception("Cannot find directory %s" % expt_dir) options = parse_config_file(expt_dir, config_file) experiment_name = options["experiment_name"] # Special advanced feature for repeating the same experiment many times if repeat >= 0: experiment_name = repeat_experiment_name(experiment_name, repeat) if not no_output: # if we want output if repeat >= 0: output_directory = repeat_output_dir(expt_dir, repeat) else: output_directory = os.path.join(expt_dir, 'output') if not os.path.isdir(output_directory): os.mkdir(output_directory) if repeat < 0: rootLogger = logging.getLogger() fileHandler = logging.FileHandler( os.path.join(output_directory, 'main.log')) fileHandler.setFormatter(logFormatter) fileHandler.setLevel(logLevel) rootLogger.addHandler(fileHandler) # consoleHandler = logging.StreamHandler() # consoleHandler.setFormatter(logFormatter) # consoleHandler.setLevel(logLevel) # rootLogger.addHandler(consoleHandler) else: output_directory = None input_space = InputSpace(options["variables"]) resources = parse_resources_from_config(options) # Load up the chooser. chooser_module = importlib.import_module('spearmint.choosers.' + options['chooser']) chooser = chooser_module.init(input_space, options) # Connect to the database db_address = options['database']['address'] db = MongoDB(database_address=db_address) overall_start_time = time.time() db.save({'start-time': overall_start_time}, experiment_name, 'start-time') waiting_for_results = False # for printing purposes only while True: for resource_name, resource in resources.iteritems(): jobs = load_jobs(db, experiment_name) # resource.printStatus(jobs) # If the resource is currently accepting more jobs # TODO: here cost will eventually also be considered: even if the # resource is not full, we might wait because of cost incurred # Note: I could chose to fill up one resource and them move on to the next ("if") # You could also do it the other way, by changing "if" to "while" here # Remove any broken jobs from pending # note: make sure to do this before the acceptingJobs() condition is checked remove_broken_jobs(db, jobs, experiment_name, resources) if resource.acceptingJobs(jobs): if waiting_for_results: logging.info('\n') waiting_for_results = False # Load jobs from DB # (move out of one or both loops?) would need to pass into load_tasks jobs = load_jobs(db, experiment_name) # Print out a list of broken jobs print_broken_jobs(jobs) # Get a suggestion for the next job tasks = parse_tasks_from_jobs(jobs, experiment_name, options, input_space) # Special case when coupled and there is a NaN task-- what to do with NaN task when decoupled?? if 'NaN' in tasks and 'NaN' not in resource.tasks: resource.tasks.append('NaN') # Load the model hypers from the database. hypers = db.load(experiment_name, 'hypers') # "Fit" the chooser - give the chooser data and let it fit the model(s). # NOTE: even if we are only suggesting for 1 task, we need to fit all of them # because the acquisition function for one task depends on all the tasks hypers = chooser.fit(tasks, hypers) if hypers: logging.debug('GP covariance hyperparameters:') print_hypers(hypers, input_space, options) # if 'duration hypers' in hypers: # logging.debug('Duration GP covariance hyperparameters:') # print_hypers(hypers['duration hypers'], input_space, options) # Save the hyperparameters to the database. if hypers: db.save(hypers, experiment_name, 'hypers') if options['recommendations'] == "during": # Compute the best value so far, a.k.a. the "recommendation" recommendation = chooser.best() # Save the recommendation in the DB if there are more complete jobs than last time store_recommendation(recommendation, db, experiment_name, tasks, jobs, input_space, time.time() - overall_start_time) # Get the decoupling groups task_couplings = { task_name: tasks[task_name].options["group"] for task_name in resource.tasks } logging.info('\nGetting suggestion for %s...\n' % (', '.join(task_couplings.keys()))) # Get the next suggested experiment from the chooser. suggested_input, suggested_tasks = chooser.suggest( task_couplings) suggested_task = suggested_tasks[0] # hack, deal with later suggested_job = { 'id': len(jobs) + 1, 'params': input_space.paramify(suggested_input), 'expt_dir': options['main_file_path'], 'tasks': suggested_tasks, 'resource': resource_name, 'main-file': options['tasks'][suggested_task]['main_file'], 'language': options['tasks'][suggested_task]['language'], 'status': 'new', 'submit time': time.time(), 'start time': None, 'end time': None, 'fast update': chooser.fast_update # just for plotting - not important } save_job(suggested_job, db, experiment_name) # Submit the job to the appropriate resource process_id = resource.attemptDispatch(experiment_name, suggested_job, db_address, expt_dir, output_directory) # Print the current time logging.info( 'Current time: %s' % datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')) # Set the status of the job appropriately (successfully submitted or not) if process_id is None: suggested_job['status'] = 'broken' logging.info( 'Job %s failed -- check output file for details.' % job['id']) save_job(suggested_job, db, experiment_name) else: suggested_job['status'] = 'pending' suggested_job['proc_id'] = process_id save_job(suggested_job, db, experiment_name) jobs = load_jobs(db, experiment_name) # Print out the status of the resources # resource.printStatus(jobs) print_resources_status(resources.values(), jobs) if len(set(task_couplings.values())) > 1: # if decoupled print_tasks_status(tasks.values(), jobs) # For debug - print pending jobs print_pending_jobs(jobs) # Terminate the optimization if all resources are finished (run max number of jobs) # or ANY task is finished (just my weird convention) jobs = load_jobs(db, experiment_name) tasks = parse_tasks_from_jobs(jobs, experiment_name, options, input_space) terminate_resources = reduce( lambda x, y: x and y, map(lambda x: x.maxCompleteReached(jobs), resources.values()), True) terminate_tasks = reduce( lambda x, y: x or y, map(lambda x: x.maxCompleteReached(jobs), tasks.values()), False) terminate_maxtime = (time.time() - overall_start_time) >= ( options['max_time_mins'] * 60.0) if terminate_resources or terminate_tasks or terminate_maxtime: if terminate_resources: logging.info( 'Maximum number of jobs completed on all resources.') if terminate_tasks: logging.info( 'Maximum number of jobs reached for at least one task.') if terminate_maxtime: logging.info( 'Maximum total experiment time of %f minutes reached.' % options['max_time_mins']) # save rec in DB if options['recommendations'] in ("during", "end-one"): logging.info('Making final recommendation:') recommendation = chooser.best() store_recommendation(recommendation, db, experiment_name, tasks, jobs, input_space, time.time() - overall_start_time, final=True) elif options['recommendations'] == "end-all": logging.info('Making recommendations...') all_jobs = jobs for i in xrange(len(all_jobs)): logging.info('') logging.info( '-------------------------------------------------') logging.info( ' Getting recommendations for iter %d/%d ' % (i, len(all_jobs))) logging.info( '-------------------------------------------------') logging.info('') jobs = all_jobs[:i + 1] tasks = parse_tasks_from_jobs(jobs, experiment_name, options, input_space) hypers = chooser.fit(tasks, hypers) print_hypers(hypers, input_space, options) # get the biggest end time of the jobs end_time = max([job['end time'] for job in jobs]) elapsed_time = end_time - overall_start_time recommendation = chooser.best() store_recommendation(recommendation, db, experiment_name, tasks, jobs, input_space, elapsed_time) logging.info('Have a nice day.') return # If no resources are accepting jobs, sleep if no_free_resources(db, experiment_name, resources): # Don't use logging here because it's too much effort to use logging without a newline at the end sys.stdout.write( 'Waiting for results...' if not waiting_for_results else '.') sys.stdout.flush() # sys.stderr.flush() waiting_for_results = True time.sleep(options['polling_time']) else: sys.stdout.write('\n')
class Spearmint(Printer): def __init__(self, config_file, work_dir): Printer.__init__(self, 'Spearmint', color='grey') self.work_dir = work_dir print(config_file) self._parse_config_file(config_file) try: self.batch_size = self.param_dict['resources']['my-machine'][ 'max-concurrent'] # self.num_batches = self.param_dict['general']['batches_per_round'] except KeyError: # self.num_batches = 1 self.batch_size = 1 self.all_params, self.all_losses = [], [] def rand_gens(self, var_type='float', size=1): if var_type == 'float': return np.random.uniform(low=0, high=1, size=size) else: raise NotImplementedError def _parse_config_file(self, config_file): self.json_parser = ParserJSON(file_name=config_file) self.json_parser.parse() self.param_dict = self.json_parser.param_dict # now get the total number of variables # and create a dictionary with the size of each variable self.total_size = 0 self.var_sizes = [] self.var_names = [] for var_name, var_dict in self.param_dict['variables'].items(): self.total_size += var_dict['size'] self.var_sizes.append(int(var_dict['size'])) self.var_names.append(var_name) # self.total_size += var_dict[list(var_dict)[0]]['size'] # self.var_sizes.append(int(var_dict[list(var_dict)[0]]['size'])) # self.var_names.append(list(var_dict)[0]) # def _generate_uniform(self, num_samples=10): self.container, self.sampled_params = {}, {} values = [] for var_index, var_name in enumerate(self.var_names): sampled_values = self.rand_gens( var_type=self.param_dict['variables'][var_name]['type'], size=(self.param_dict['variables'][var_name]['size'], num_samples)) values.extend(sampled_values) self.container[var_name] = sampled_values values = np.array(values) self.proposed = values.transpose() def _parse_observations(self, observations): all_params, all_losses = [], [] for observation in observations: params = [] for var_name in self.var_names: params.extend(observation[var_name]['samples']) if len(self.all_params) > 0: if np.amin([ np.linalg.norm(params - old_param) for old_param in self.all_params ]) > 1e-6: all_losses.append(observation['loss']) all_params.append(params) else: all_losses.append(observation['loss']) all_params.append(params) for index, element in enumerate(all_params): self.all_params.append(element) self.all_losses.append(all_losses[index]) return all_params, all_losses def _create_mongo_instance(self): self.db_path = '%s/db_%s/' % (self.work_dir, self.param_dict['experiment-name']) print(self.db_path) try: shutil.rmtree(self.db_path) except: pass os.mkdir(self.db_path) subprocess.call('mongod --fork --logpath %s/mongodb.log --dbpath %s' % (self.db_path, self.db_path), shell=True) def _create_spearmint_parameters(self): self._create_mongo_instance() self.options, self.exp_dir = get_options(self.work_dir) self.resources = parse_resources_from_config(self.options) self.chooser_module = importlib.import_module('spearmint.choosers.' + self.options['chooser']) self.chooser = self.chooser_module.init(self.options) self.experiment_name = self.options.get('experiment-name', 'unnamed_experiment') self.db_address = self.options['database']['address'] self.db = MongoDB(database_address=self.db_address) def _sample_parameter_sets(self, num_samples, observations): all_params, all_losses = self._parse_observations(observations) self._create_spearmint_parameters() # dump all observations in database for index, param in enumerate(all_params): print('PARAM', param, all_losses[index]) params = {} start_index = 0 for var_index, var_name in enumerate(self.var_names): var_dict = self.param_dict['variables'][var_name] params[var_name] = { 'type': var_dict['type'], 'values': np.array(param[start_index:start_index + var_dict['size']]) } start_index += var_dict['size'] job = { 'id': index + 1, 'expt_dir': self.work_dir, 'tasks': ['main'], 'resource': 'my-machine', 'main-file': 'main_file.py', 'language': 'PYTHON', 'status': 'new', 'submit time': time.time(), 'start time': time.time(), 'end time': None, 'params': params } time.sleep(0.1) job['values'] = {'main': all_losses[index]} job['status'] = 'complete' job['end time'] = time.time() # for key, value in job.items(): # print(key, value) self.db.save(job, self.experiment_name, 'jobs', {'id': job['id']}) self.proposed = [] for resource_name, resource in self.resources.items(): print('RUNNING SPEARMINT') suggested_job = get_suggestion(self.chooser, resource.tasks, self.db, self.exp_dir, self.options, resource_name) print('DONE') vector = [] for var_name in self.var_names: vector.extend(suggested_job['params'][var_name]['values']) vector = np.array(vector) for index in range(num_samples): self.proposed.append(vector) print('PROPOSED', self.proposed) subprocess.call( 'mongod --shutdown --logpath %s/mongodb.log --dbpath %s' % (self.db_path, self.db_path), shell=True) def choose(self, num_samples=None, observations=None): current_dir = os.getcwd() os.chdir(self.work_dir) if not num_samples: num_samples = self.batch_size if observations: self._print('proposing samples') self._sample_parameter_sets(num_samples, observations) else: self._print('choosing uniformly') self._generate_uniform(1) os.chdir(current_dir) # print('SHAPE', self.proposed.shape) return self.proposed