def main(): parser = optparse.OptionParser(usage="usage: %prog [options] directory") parser.add_option("--config", dest="config_file", help="Configuration file name.", type="string", default="config.json") (commandline_kwargs, args) = parser.parse_args() # Read in the config file expt_dir = os.path.realpath(args[0]) if not os.path.isdir(expt_dir): raise Exception("Cannot find directory %s" % expt_dir) expt_file = os.path.join(expt_dir, commandline_kwargs.config_file) try: with open(expt_file, 'r') as f: options = json.load(f, object_pairs_hook=OrderedDict) except: raise Exception("config.json did not load properly. Perhaps a spurious comma?") options["config"] = commandline_kwargs.config_file resources = parse_resources_from_config(options) # Set sensible defaults for options options['chooser'] = options.get('chooser', 'default_chooser') if 'tasks' not in options: options['tasks'] = {'main' : {'type' : 'OBJECTIVE', 'likelihood' : options.get('likelihood', 'GAUSSIAN')}} experiment_name = options.get("experiment-name", 'unnamed-experiment') # Set DB address db_address = parse_db_address(options) if 'database' not in options: options['database'] = {'name': 'spearmint', 'address': db_address} else: options['database']['address'] = db_address if not os.path.exists(expt_dir): sys.stderr.write("Cannot find experiment directory '%s'. " "Aborting.\n" % (expt_dir)) sys.exit(-1) # Load up the chooser. chooser_module = importlib.import_module('spearmint.choosers.' + options['chooser']) chooser = chooser_module.init(options) # Connect to the database sys.stderr.write('Using database at %s.\n' % db_address) db_address = options['database']['address'] db = MongoDB(database_address=db_address) while True: for resource_name, resource in resources.iteritems(): jobs = load_jobs(db, experiment_name) # resource.printStatus(jobs) # If the resource is currently accepting more jobs # TODO: here cost will eventually also be considered: even if the # resource is not full, we might wait because of cost incurred # Note: I chose to fill up one resource and them move on to the next # You could also do it the other way, by changing "while" to "if" here while resource.acceptingJobs(jobs): # Load jobs from DB # (move out of one or both loops?) would need to pass into load_tasks jobs = load_jobs(db, experiment_name) # Remove any broken jobs from pending. remove_broken_jobs(db, jobs, experiment_name, resources) # Get a suggestion for the next job suggested_job = get_suggestion(chooser, resource.tasks, db, expt_dir, options, resource_name) # Submit the job to the appropriate resource process_id = resource.attemptDispatch(experiment_name, suggested_job, db_address, expt_dir) # Set the status of the job appropriately (successfully submitted or not) if process_id is None: suggested_job['status'] = 'broken' save_job(suggested_job, db, experiment_name) else: suggested_job['status'] = 'pending' suggested_job['proc_id'] = process_id save_job(suggested_job, db, experiment_name) jobs = load_jobs(db, experiment_name) # Print out the status of the resources # resource.printStatus(jobs) print_resources_status(resources.values(), jobs) # If no resources are accepting jobs, sleep # (they might be accepting if suggest takes a while and so some jobs already finished by the time this point is reached) if tired(db, experiment_name, resources): time.sleep(options.get('polling-time', 5))
def main(): options, expt_dir = get_options() resources = parse_resources_from_config(options) # Load up the chooser. chooser_module = importlib.import_module('spearmint.choosers.' + options['chooser']) chooser = chooser_module.init(options) experiment_name = options.get("experiment-name", 'unnamed-experiment') # Connect to the database db_address = options['database']['address'] sys.stderr.write('Using database at %s.\n' % db_address) db = MongoDB(database_address=db_address) while True: for resource_name, resource in resources.iteritems(): jobs = load_jobs(db, experiment_name) # resource.printStatus(jobs) # If the resource is currently accepting more jobs # TODO: here cost will eventually also be considered: even if the # resource is not full, we might wait because of cost incurred # Note: I chose to fill up one resource and them move on to the next # You could also do it the other way, by changing "while" to "if" here remove_broken_jobs(db, jobs, experiment_name, resources) while resource.acceptingJobs(jobs): # Load jobs from DB # (move out of one or both loops?) would need to pass into load_tasks jobs = load_jobs(db, experiment_name) # Remove any broken jobs from pending. remove_broken_jobs(db, jobs, experiment_name, resources) # Get a suggestion for the next job suggested_job = get_suggestion(chooser, resource.tasks, db, expt_dir, options, resource_name) # Submit the job to the appropriate resource process_id = resource.attemptDispatch(experiment_name, suggested_job, db_address, expt_dir) # Set the status of the job appropriately (successfully submitted or not) if process_id is None: suggested_job['status'] = 'broken' save_job(suggested_job, db, experiment_name) else: suggested_job['status'] = 'pending' suggested_job['proc_id'] = process_id save_job(suggested_job, db, experiment_name) jobs = load_jobs(db, experiment_name) # Print out the status of the resources # resource.printStatus(jobs) print_resources_status(resources.values(), jobs) # If no resources are accepting jobs, sleep # (they might be accepting if suggest takes a while and so some jobs already finished by the time this point is reached) if tired(db, experiment_name, resources): time.sleep(options.get('polling-time', 5))
def main(): parser = optparse.OptionParser(usage="usage: %prog [options] directory") parser.add_option("--config", dest="config_file", help="Configuration file name.", type="string", default="config.json") parser.add_option("--no-output", action="store_true", help="Do not create output files.") parser.add_option("--repeat", dest="repeat", help="Used for repeating the same experiment many times.", type="int", default="-1") (commandline_kwargs, args) = parser.parse_args() # Read in the config file #expt_dir = os.path.realpath('examples/cifar10') expt_dir = os.path.realpath(args[0]) if not os.path.isdir(expt_dir): raise Exception("Cannot find directory %s" % expt_dir) options = parse_config_file(expt_dir, commandline_kwargs.config_file) experiment_name = options["experiment-name"] # Special advanced feature for repeating the same experiment many times if commandline_kwargs.repeat >= 0: experiment_name = repeat_experiment_name(experiment_name, commandline_kwargs.repeat) if not commandline_kwargs.no_output: # if we want output if commandline_kwargs.repeat >= 0: output_directory = repeat_output_dir(expt_dir, commandline_kwargs.repeat) else: output_directory = os.path.join(expt_dir, 'output', options["experiment-name"]) if not os.path.isdir(output_directory): os.mkdir(output_directory) if commandline_kwargs.repeat < 0: rootLogger = logging.getLogger() fileHandler = logging.FileHandler(os.path.join(output_directory, 'main.log')) fileHandler.setFormatter(logFormatter) fileHandler.setLevel(logLevel) rootLogger.addHandler(fileHandler) # consoleHandler = logging.StreamHandler() # consoleHandler.setFormatter(logFormatter) # consoleHandler.setLevel(logLevel) # rootLogger.addHandler(consoleHandler) else: output_directory = None input_space = InputSpace(options["variables"]) resources = parse_resources_from_config(options) # Load up the chooser. chooser_module = importlib.import_module('spearmint.choosers.' + options['chooser']) chooser = chooser_module.init(input_space, options) # Connect to the database db_address = options['database']['address'] db = MongoDB(database_address=db_address) if os.getenv('SPEARMINT_MAX_ITERATIONS') == None and 'max_iterations' not in set(options.keys()): maxiterations = DEFAULT_MAX_ITERATIONS elif os.getenv('SPEARMINT_MAX_ITERATIONS') != None: maxiterations = int(os.getenv('SPEARMINT_MAX_ITERATIONS')) else: maxiterations = options['max_iterations'] # Set random seed if 'random_seed' in options.keys(): np.random.seed(int(options['random_seed'])) seed(int(options['random_seed'])) waiting_for_results = False # for printing purposes only while True: for resource_name, resource in resources.iteritems(): jobs = load_jobs(db, experiment_name) # resource.printStatus(jobs) # If the resource is currently accepting more jobs # TODO: here cost will eventually also be considered: even if the # resource is not full, we might wait because of cost incurred # Note: I could chose to fill up one resource and them move on to the next ("if") # You could also do it the other way, by changing "if" to "while" here # Remove any broken jobs from pending # note: make sure to do this before the acceptingJobs() condition is checked remove_broken_jobs(db, jobs, experiment_name, resources) if resource.acceptingJobs(jobs): if waiting_for_results: logging.info('\n') waiting_for_results = False optim_start_time = time.time() # Load jobs from DB # (move out of one or both loops?) would need to pass into load_tasks jobs = load_jobs(db, experiment_name) # Print out a list of broken jobs print_broken_jobs(jobs) # Get a suggestion for the next job tasks = parse_tasks_from_jobs(jobs, experiment_name, options, input_space) # Special case when coupled and there is a NaN task-- what to do with NaN task when decoupled?? if 'NaN' in tasks and 'NaN' not in resource.tasks: resource.tasks.append('NaN') # Load the model hypers from the database. hypers = db.load(experiment_name, 'hypers') # "Fit" the chooser - give the chooser data and let it fit the model(s). # NOTE: even if we are only suggesting for 1 task, we need to fit all of them # because the acquisition function for one task depends on all the tasks hypers = chooser.fit(tasks, hypers) if hypers: logging.debug('GP covariance hyperparameters:') print_hypers(hypers) # Save the hyperparameters to the database. if hypers: db.save(hypers, experiment_name, 'hypers') # Compute the best value so far, a.k.a. the "recommendation" recommendation = chooser.best() # Save the recommendation in the DB numComplete_by_task = {task_name : task.numComplete(jobs) for task_name, task in tasks.iteritems()} db.save({'num_complete' : resource.numComplete(jobs), 'num_complete_tasks' : numComplete_by_task, 'params' : input_space.paramify(recommendation['model_model_input']), 'objective': recommendation['model_model_value'], 'params_o' : None if recommendation['obser_obser_input'] is None else input_space.paramify(recommendation['obser_obser_input']), 'obj_o' : recommendation['obser_obser_value'], 'params_om': None if recommendation['obser_model_input'] is None else input_space.paramify(recommendation['obser_model_input']), 'obj_om' : recommendation['obser_model_value']}, experiment_name, 'recommendations', {'id' : len(jobs)}) # Get the decoupling groups task_couplings = {task_name : tasks[task_name].options["group"] for task_name in resource.tasks} logging.info('\nGetting suggestion for %s...\n' % (', '.join(task_couplings.keys()))) # Get the next suggested experiment from the chooser. suggested_input, suggested_tasks = chooser.suggest(task_couplings, optim_start_time) suggested_task = suggested_tasks[0] # hack, deal with later suggested_job = { 'id' : len(jobs) + 1, 'params' : input_space.paramify(suggested_input), 'expt_dir' : options['main_file_path'], 'tasks' : suggested_tasks, 'resource' : resource_name, 'main-file' : resource.main_file, 'language' : options['tasks'][suggested_task]['language'], 'status' : 'new', 'submit time' : time.time(), 'start time' : None, 'end time' : None } save_job(suggested_job, db, experiment_name) # Submit the job to the appropriate resource process_id = resource.attemptDispatch(experiment_name, suggested_job, db_address, expt_dir, output_directory) # Print the current time logging.info('Current time: %s' % datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')) # Set the status of the job appropriately (successfully submitted or not) if process_id is None: suggested_job['status'] = 'broken' logging.info('Job %s failed -- check output file for details.' % job['id']) save_job(suggested_job, db, experiment_name) else: suggested_job['status'] = 'pending' suggested_job['proc_id'] = process_id save_job(suggested_job, db, experiment_name) jobs = load_jobs(db, experiment_name) # Print out the status of the resources # resource.printStatus(jobs) print_resources_status(resources.values(), jobs) if len(set(task_couplings.values())) > 1: # if decoupled print_tasks_status(tasks.values(), jobs) # For debug - print pending jobs print_pending_jobs(jobs) # Terminate the optimization if all resources are finished (run max number of jobs) # or ANY task is finished (just my weird convention) if reduce(lambda x,y: x and y, map(lambda x: x.maxCompleteReached(jobs), resources.values()), True) or \ reduce(lambda x,y: x or y, map(lambda x: x.maxCompleteReached(jobs), tasks.values()), False): # Do all this extra work just to save the final recommendation -- would be ok to delete everything # in here and just "return" sys.stdout.write('\n') jobs = load_jobs(db, experiment_name) tasks = parse_tasks_from_jobs(jobs, experiment_name, options, input_space) hypers = db.load(experiment_name, 'hypers') hypers = chooser.fit(tasks, hypers) if hypers: db.save(hypers, experiment_name, 'hypers') # logging.info('\n**All resources have run the maximum number of jobs.**\nFinal recommendation:') recommendation = chooser.best() # numComplete_per_task numComplete_by_task = {task_name : task.numComplete(jobs) for task_name, task in tasks.iteritems()} db.save({'num_complete' : resource.numComplete(jobs), 'num_complete_tasks' : numComplete_by_task, 'params' : input_space.paramify(recommendation['model_model_input']), 'objective': recommendation['model_model_value'], 'params_o' : None if recommendation['obser_obser_input'] is None else input_space.paramify(recommendation['obser_obser_input']), 'obj_o' : recommendation['obser_obser_value'], 'params_om': None if recommendation['obser_model_input'] is None else input_space.paramify(recommendation['obser_model_input']), 'obj_om' : recommendation['obser_model_value']}, experiment_name, 'recommendations', {'id' : len(jobs)}) logging.info('Maximum number of jobs completed. Have a nice day.') return # If no resources are accepting jobs, sleep if no_free_resources(db, experiment_name, resources): # Don't use logging here because it's too much effort to use logging without a newline at the end sys.stdout.write('Waiting for results...' if not waiting_for_results else '.') sys.stdout.flush() # sys.stderr.flush() waiting_for_results = True time.sleep(options['polling_time']) else: sys.stdout.write('\n')
def main(): options, expt_dir = get_options() resources = parse_resources_from_config(options) # Load up the chooser. chooser_module = importlib.import_module('spearmint.choosers.' + options['chooser']) chooser = chooser_module.init(options) experiment_name = options.get("experiment-name", 'unnamed-experiment') # Connect to the database db_address = options['database']['address'] sys.stderr.write('Using database at %s.\n' % db_address) db = MongoDB(database_address=db_address) while True: for resource_name, resource in resources.iteritems(): jobs = load_jobs(db, experiment_name) # resource.printStatus(jobs) # If the resource is currently accepting more jobs # TODO: here cost will eventually also be considered: even if the # resource is not full, we might wait because of cost incurred # Note: I chose to fill up one resource and them move on to the next # You could also do it the other way, by changing "while" to "if" here while resource.acceptingJobs(jobs): # Load jobs from DB # (move out of one or both loops?) would need to pass into load_tasks jobs = load_jobs(db, experiment_name) # Remove any broken jobs from pending. remove_broken_jobs(db, jobs, experiment_name, resources) # Get a suggestion for the next job suggested_job = get_suggestion(chooser, resource.tasks, db, expt_dir, options, resource_name) # Submit the job to the appropriate resource process_id = resource.attemptDispatch(experiment_name, suggested_job, db_address, expt_dir) # Set the status of the job appropriately (successfully submitted or not) if process_id is None: suggested_job['status'] = 'broken' save_job(suggested_job, db, experiment_name) else: suggested_job['status'] = 'pending' suggested_job['proc_id'] = process_id save_job(suggested_job, db, experiment_name) jobs = load_jobs(db, experiment_name) # Print out the status of the resources # resource.printStatus(jobs) print_resources_status(resources.values(), jobs) # If no resources are accepting jobs, sleep # (they might be accepting if suggest takes a while and so some jobs already finished by the time this point is reached) if tired(db, experiment_name, resources): time.sleep(options.get('polling-time', 5))
def main(args=None): options, expt_dir = get_options(args) resources = parse_resources_from_config(options) # Load up the chooser. chooser_module = importlib.import_module('spearmint.choosers.' + options['chooser']) chooser = chooser_module.init(options) experiment_name = options.get("experiment-name", 'unnamed-experiment') resets = options.get("resets", []) job_id_offset = 0 current_phase = 0 if resets: experiment_name += '__' + str(current_phase) print 'STARTING PHASE ' + str(current_phase + 1) + ' (' + experiment_name + ')' # Connect to the database db_address = options['database']['address'] sys.stderr.write('Using database at %s.\n' % db_address) db = MongoDB(database_address=db_address) while True: pause = False if resets: jobs = load_jobs(db, experiment_name) num_pending_jobs = sum( [job['status'] == 'pending' for job in jobs]) num_finished_jobs = sum( [job['status'] == 'complete' for job in jobs]) if num_finished_jobs == resets[ current_phase] and num_pending_jobs == 0: job_id_offset += resets[current_phase] current_phase += 1 new_experiment_name = options.get( "experiment-name", 'unnamed-experiment') + '__' + str(current_phase) print 'STARTING PHASE ' + str( current_phase + 1) + ' (' + new_experiment_name + ')' old_hypers = load_hypers(db, experiment_name) save_hypers(old_hypers, db, new_experiment_name) experiment_name = new_experiment_name if num_finished_jobs + num_pending_jobs >= resets[current_phase]: pause = True for resource_name, resource in resources.iteritems(): jobs = load_jobs(db, experiment_name) # resource.printStatus(jobs) # If the resource is currently accepting more jobs # TODO: here cost will eventually also be considered: even if the # resource is not full, we might wait because of cost incurred # Note: I chose to fill up one resource and them move on to the next # You could also do it the other way, by changing "while" to "if" here while resource.acceptingJobs(jobs) and not pause: # Load jobs from DB # (move out of one or both loops?) would need to pass into load_tasks jobs = load_jobs(db, experiment_name) print 'Found', len(jobs), 'jobs in db' # Remove any broken jobs from pending. remove_broken_jobs(db, jobs, experiment_name, resources) # Get a suggestion for the next job suggested_job = get_suggestion(chooser, resource.tasks, db, experiment_name, expt_dir, options, resource_name, job_id_offset) # Submit the job to the appropriate resource process_id = resource.attemptDispatch(experiment_name, suggested_job, db_address, expt_dir) # Set the status of the job appropriately (successfully submitted or not) if process_id is None: suggested_job['status'] = 'broken' save_job(suggested_job, db, experiment_name) else: suggested_job['status'] = 'pending' suggested_job['proc_id'] = process_id save_job(suggested_job, db, experiment_name) jobs = load_jobs(db, experiment_name) # Print out the status of the resources # resource.printStatus(jobs) print_resources_status(resources.values(), jobs) # If no resources are accepting jobs, sleep # (they might be accepting if suggest takes a while and so some jobs already finished by the time this point is reached) if tired(db, experiment_name, resources) or pause: time.sleep(options.get('polling-time', 5))
def runSpearmint(self, name): options, expt_dir = self.get_options([os.path.abspath(os.path.join(self.scratchPath,name))]) resources = main.parse_resources_from_config(options) # Load up the chooser. chooser_module = importlib.import_module('spearmint.choosers.' + options['chooser']) chooser = chooser_module.init(options) experiment_name = options.get("experiment-name", 'unnamed-experiment') # Connect to the database db_address = options['database']['address'] sys.stderr.write('Using database at %s.\n' % db_address) db = MongoDB(database_address=db_address) threshold = 1e-2 look_back = 3 stopping = False while not stopping: for resource_name, resource in resources.iteritems(): jobs = main.load_jobs(db, experiment_name) # resource.printStatus(jobs) # If the resource is currently accepting more jobs # TODO: here cost will eventually also be considered: even if the # resource is not full, we might wait because of cost incurred # Note: I chose to fill up one resource and them move on to the next # You could also do it the other way, by changing "while" to "if" here while resource.acceptingJobs(jobs): # Load jobs from DB # (move out of one or both loops?) would need to pass into load_tasks jobs = main.load_jobs(db, experiment_name) #pprint.pprint(main.load_hypers(db, experiment_name)) # Remove any broken jobs from pending. main.remove_broken_jobs(db, jobs, experiment_name, resources) # Get a suggestion for the next job suggested_job = main.get_suggestion(chooser, resource.tasks, db, expt_dir, options, resource_name) # Submit the job to the appropriate resource process_id = resource.attemptDispatch(experiment_name, suggested_job, db_address, expt_dir) # Set the status of the job appropriately (successfully submitted or not) if process_id is None: suggested_job['status'] = 'broken' main.save_job(suggested_job, db, experiment_name) else: suggested_job['status'] = 'pending' suggested_job['proc_id'] = process_id main.save_job(suggested_job, db, experiment_name) jobs = main.load_jobs(db, experiment_name) # Print out the status of the resources # resource.printStatus(jobs) print_resources_status(resources.values(), jobs) stalled = [] for task in main.load_task_group(db, options, resource.tasks).tasks.values(): performance = task.valid_normalized_data_dict["values"][::-1] stalled.append(0) if len(performance) > look_back: print performance[0:look_back] print "Diffs: ", within_thresh = True for i,run in enumerate(performance[0:look_back]): diff = abs(run - performance[i+1]) print str(round(diff,2))+", ", if diff > threshold: within_thresh = False print "...No stall" break if within_thresh: stalled[len(stalled)-1] = 1 if all(stalled): sys.exit("Stalled!") # If no resources are accepting jobs, sleep # (they might be accepting if suggest takes a while and so some jobs already finished by the time this point is reached) if main.tired(db, experiment_name, resources): time.sleep(options.get('polling-time', 5))
def main(): options, expt_dir = get_options() resources = parse_resources_from_config(options) # Load up the chooser. chooser_module = importlib.import_module('spearmint.choosers.' + options['chooser']) chooser = chooser_module.init(options) experiment_name = options.get("experiment-name", 'unnamed-experiment') # Connect to the database db_address = options['database']['address'] sys.stderr.write('Using database at %s.\n' % db_address) db = MongoDB(database_address=db_address) # Setting up record for convergence past_best = [] converg_num = 20 startTraining = time.time() while stoppingCriterion(past_best, converg_num): for resource_name, resource in resources.iteritems(): jobs = load_jobs(db, experiment_name) #print jobs[0]['values']['main'] #resource.printStatus(jobs) # If the resource is currently accepting more jobs # TODO: here cost will eventually also be considered: even if the # resource is not full, we might wait because of cost incurred # Note: I chose to fill up one resource and them move on to the next # You could also do it the other way, by changing "while" to "if" here while resource.acceptingJobs(jobs): # Load jobs from DB # (move out of one or both loops?) would need to pass into load_tasks jobs = load_jobs(db, experiment_name) # Remove any broken jobs from pending. remove_broken_jobs(db, jobs, experiment_name, resources) # Get a suggestion for the next job suggested_job = get_suggestion(chooser, resource.tasks, db, expt_dir, options, resource_name) # Submit the job to the appropriate resource process_id = resource.attemptDispatch(experiment_name, suggested_job, db_address, expt_dir) # Set the status of the job appropriately (successfully submitted or not) if process_id is None: suggested_job['status'] = 'broken' save_job(suggested_job, db, experiment_name) else: suggested_job['status'] = 'pending' suggested_job['proc_id'] = process_id save_job(suggested_job, db, experiment_name) jobs = load_jobs(db, experiment_name) # Print out the status of the resources # resource.printStatus(jobs) print_resources_status(resources.values(), jobs) # Record current best best_val, best_input = chooser.get_best() past_best.append(best_val) past_best = [x for x in past_best if x is not None] #filter out Nones if len(past_best) > converg_num: past_best.pop(0) # If no resources are accepting jobs, sleep # (they might be accepting if suggest takes a while and so some jobs already finished by the time this point is reached) if tired(db, experiment_name, resources): print "Sleeping..." time.sleep(options.get('polling-time', 5)) endTraining = time.time() trainingTime = endTraining - startTraining # After training, test best results runBestParams(5000, chooser, db, experiment_name, trainingTime)
def main(): parser = optparse.OptionParser(usage="usage: %prog [options] directory") parser.add_option("--config", dest="config_file", help="Configuration file name.", type="string", default="config.json") (commandline_kwargs, args) = parser.parse_args() # Read in the config file expt_dir = os.path.realpath(args[0]) if not os.path.isdir(expt_dir): raise Exception("Cannot find directory %s" % expt_dir) expt_file = os.path.join(expt_dir, commandline_kwargs.config_file) try: with open(expt_file, 'r') as f: options = json.load(f, object_pairs_hook=OrderedDict) except: raise Exception( "config.json did not load properly. Perhaps a spurious comma?") options["config"] = commandline_kwargs.config_file resources = parse_resources_from_config(options) # Set sensible defaults for options options['chooser'] = options.get('chooser', 'default_chooser') options['tasks'] = options.get( 'tasks', {'main': { 'type': 'OBJECTIVE', 'likelihood': 'GAUSSIAN' }}) experiment_name = options.get("experiment-name", 'unnamed-experiment') # Set DB address db_address = parse_db_address(options) if 'database' not in options: options['database'] = {'name': 'spearmint', 'address': db_address} else: options['database']['address'] = db_address if not os.path.exists(expt_dir): sys.stderr.write("Cannot find experiment directory '%s'. " "Aborting.\n" % (expt_dir)) sys.exit(-1) # Load up the chooser. chooser_module = importlib.import_module('spearmint.choosers.' + options['chooser']) chooser = chooser_module.init(options) # Connect to the database sys.stderr.write('Using database at %s.\n' % db_address) db_address = options['database']['address'] db = MongoDB(database_address=db_address) while True: for resource_name, resource in resources.iteritems(): jobs = load_jobs(db, experiment_name) # resource.printStatus(jobs) # If the resource is currently accepting more jobs # TODO: here cost will eventually also be considered: even if the # resource is not full, we might wait because of cost incurred # Note: I chose to fill up one resource and them move on to the next # You could also do it the other way, by changing "while" to "if" here while resource.acceptingJobs(jobs): # Load jobs from DB # (move out of one or both loops?) would need to pass into load_tasks jobs = load_jobs(db, experiment_name) # Remove any broken jobs from pending. remove_broken_jobs(db, jobs, experiment_name, resources) # Get a suggestion for the next job suggested_job = get_suggestion(chooser, resource.tasks, db, expt_dir, options, resource_name) # Submit the job to the appropriate resource process_id = resource.attemptDispatch(experiment_name, suggested_job, db_address, expt_dir) # Set the status of the job appropriately (successfully submitted or not) if process_id is None: suggested_job['status'] = 'broken' save_job(suggested_job, db, experiment_name) else: suggested_job['status'] = 'pending' suggested_job['proc_id'] = process_id save_job(suggested_job, db, experiment_name) jobs = load_jobs(db, experiment_name) # Print out the status of the resources # resource.printStatus(jobs) print_resources_status(resources.values(), jobs) # If no resources are accepting jobs, sleep # (they might be accepting if suggest takes a while and so some jobs already finished by the time this point is reached) if tired(db, experiment_name, resources): time.sleep(options.get('polling-time', 5))
def main(args=None): options, expt_dir = get_options(args) resources = parse_resources_from_config(options) # Load up the chooser. chooser_module = importlib.import_module('spearmint.choosers.' + options['chooser']) chooser = chooser_module.init(options) experiment_name = options.get("experiment-name", 'unnamed-experiment') resets = options.get("resets", []) job_id_offset = 0 current_phase = 0 if resets: experiment_name += '__' + str(current_phase) print 'STARTING PHASE ' + str(current_phase + 1) + ' (' + experiment_name + ')' # Connect to the database db_address = options['database']['address'] sys.stderr.write('Using database at %s.\n' % db_address) db = MongoDB(database_address=db_address) while True: pause = False if resets: jobs = load_jobs(db, experiment_name) num_pending_jobs = sum([job['status'] == 'pending' for job in jobs]) num_finished_jobs = sum([job['status'] == 'complete' for job in jobs]) if num_finished_jobs == resets[current_phase] and num_pending_jobs == 0: job_id_offset += resets[current_phase] current_phase += 1 new_experiment_name = options.get("experiment-name", 'unnamed-experiment') + '__' + str(current_phase) print 'STARTING PHASE ' + str(current_phase + 1) + ' (' + new_experiment_name + ')' old_hypers = load_hypers(db, experiment_name) save_hypers(old_hypers, db, new_experiment_name) experiment_name = new_experiment_name if num_finished_jobs + num_pending_jobs >= resets[current_phase]: pause = True for resource_name, resource in resources.iteritems(): jobs = load_jobs(db, experiment_name) # resource.printStatus(jobs) # If the resource is currently accepting more jobs # TODO: here cost will eventually also be considered: even if the # resource is not full, we might wait because of cost incurred # Note: I chose to fill up one resource and them move on to the next # You could also do it the other way, by changing "while" to "if" here while resource.acceptingJobs(jobs) and not pause: # Load jobs from DB # (move out of one or both loops?) would need to pass into load_tasks jobs = load_jobs(db, experiment_name) print 'Found', len(jobs), 'jobs in db' # Remove any broken jobs from pending. remove_broken_jobs(db, jobs, experiment_name, resources) # Get a suggestion for the next job suggested_job = get_suggestion(chooser, resource.tasks, db, experiment_name, expt_dir, options, resource_name, job_id_offset) # Submit the job to the appropriate resource process_id = resource.attemptDispatch(experiment_name, suggested_job, db_address, expt_dir) # Set the status of the job appropriately (successfully submitted or not) if process_id is None: suggested_job['status'] = 'broken' save_job(suggested_job, db, experiment_name) else: suggested_job['status'] = 'pending' suggested_job['proc_id'] = process_id save_job(suggested_job, db, experiment_name) jobs = load_jobs(db, experiment_name) # Print out the status of the resources # resource.printStatus(jobs) print_resources_status(resources.values(), jobs) # If no resources are accepting jobs, sleep # (they might be accepting if suggest takes a while and so some jobs already finished by the time this point is reached) if tired(db, experiment_name, resources) or pause: time.sleep(options.get('polling-time', 5))
def main(): options, expt_dir = get_options() resources = parse_resources_from_config(options) # Load up the chooser. chooser_module = importlib.import_module('spearmint.choosers.' + options['chooser']) chooser = chooser_module.init(options) experiment_name = options.get("experiment-name", 'unnamed-experiment') # Connect to the database db_address = options['database']['address'] sys.stderr.write('Using database at %s.\n' % db_address) db = MongoDB(database_address=db_address) # np.random.seed(0x6b6c26b2) hack_iter = 0 while hack_iter < 50: for resource_name, resource in resources.iteritems(): # TODO:: (moonkey) might be in vain, as the jobs will be loaded later in the inner loop jobs = load_jobs(db, experiment_name) # resource.printStatus(jobs) # If the resource is currently accepting more jobs # TODO: here cost will eventually also be considered: even if the # resource is not full, we might wait because of cost incurred # Note: I chose to fill up one resource and them move on to the next # You could also do it the other way, by changing "while" to "if" here while resource.acceptingJobs(jobs): ################ HACK BEGIN ################ # This is where each rounds begins # If we are running EM or other methods, we may get extra points and objective values # Here is the point where we save them into the database, to fool the algorithm # to treat them as if they are sampled by BO, and utilize them to compute the GP and the # acquisition functions. # We are assuming the data are written in json format at the folder of 'config.json' file. hack_iter += 1 sys.stderr.write('###########hack_iter:' + str(hack_iter) + "###########\n") em_hack.add_historical_points_to_db(db, experiment_name, expt_dir) # # (moonkey) towards removing the randomness # np.random.seed(0x6b6c26b2) # if hack_iter == 3: # em_hack.add_historical_points_to_db(db, experiment_name, expt_dir) # continue ################ HACK END ################ # Load jobs from DB # (move out of one or both loops?) would need to pass into load_tasks jobs = load_jobs(db, experiment_name) # Remove any broken jobs from pending. remove_broken_jobs(db, jobs, experiment_name, resources) # Get a suggestion for the next job suggested_job = get_suggestion(chooser, resource.tasks, db, expt_dir, options, resource_name) # Submit the job to the appropriate resource process_id = resource.attemptDispatch(experiment_name, suggested_job, db_address, expt_dir) # Set the status of the job appropriately (successfully submitted or not) if process_id is None: suggested_job['status'] = 'broken' save_job(suggested_job, db, experiment_name) else: suggested_job['status'] = 'pending' suggested_job['proc_id'] = process_id save_job(suggested_job, db, experiment_name) jobs = load_jobs(db, experiment_name) # Print out the status of the resources # resource.printStatus(jobs) print_resources_status(resources.values(), jobs) # If no resources are accepting jobs, sleep # (they might be accepting if suggest takes a while and so # some jobs already finished by the time this point is reached) if tired(db, experiment_name, resources): time.sleep(options.get('polling-time', 0))
def main(args): options, expt_dir = get_options(args) print(options) resources = parse_resources_from_config(options) # Load up the chooser. chooser_module = importlib.import_module('spearmint.choosers.' + options['chooser']) chooser = chooser_module.init(options) experiment_name = options.get("experiment-name", 'unnamed-experiment') # Connect to the database db_address = options['database']['address'] sys.stderr.write('Using database at %s.\n' % db_address) db = MongoDB(database_address=db_address) # Changed the loop so that it's not for forever budget = options.get("budget", 20) count = options.get("count", 0) ei_threshold = options.get("ei", 0.10) max_budget = options.get("maxbudget", 10) while count < budget or (chooser.ei >= ei_threshold and count < max_budget): for resource_name, resource in resources.iteritems(): jobs = load_jobs(db, experiment_name) #resource.printStatus(jobs) # If the resource is currently accepting more jobs # TODO: here cost will eventually also be considered: even if the # resource is not full, we might wait because of cost incurred # Note: I chose to fill up one resource and them move on to the next # You could also do it the other way, by changing "while" to "if" here while resource.acceptingJobs(jobs): if count < budget or (chooser.ei >= ei_threshold and count < max_budget): sys.stderr.write("Proceeding to next experiment\n") # Load jobs from DB # (move out of one or both loops?) would need to pass into load_tasks jobs = load_jobs(db, experiment_name) # Remove any broken jobs from pending. remove_broken_jobs(db, jobs, experiment_name, resources) # Get a suggestion for the next job suggested_job = get_suggestion(chooser, resource.tasks, db, expt_dir, options, resource_name) # Submit the job to the appropriate resource process_id = resource.attemptDispatch(experiment_name, suggested_job, db_address, expt_dir) # Set the status of the job appropriately (successfully submitted or not) if process_id is None: suggested_job['status'] = 'broken' save_job(suggested_job, db, experiment_name) else: suggested_job['status'] = 'pending' suggested_job['proc_id'] = process_id save_job(suggested_job, db, experiment_name) jobs = load_jobs(db, experiment_name) # Print out the status of the resources # resource.printStatus(jobs) print_resources_status(resources.values(), jobs) count += 1 else: #count += 1 break #sys.stderr.write('Waiting for a prior job to finish\n') # If no resources are accepting jobs, sleep # (they might be accepting if suggest takes a while and so some jobs already finished by the time this point is reached) if tired(db, experiment_name, resources): time.sleep(options.get('polling-time', 5)) while tired(db, experiment_name, resources): time.sleep(options.get('polling-time', 5)) #best_input, best_val = chooser.best() # print(chooser.task_group.paramify(chooser.best_location.flatten())) print(chooser.best_value) return chooser.best_jobid, chooser.best_value, count
def main(expt_dir, config_file="config.json", no_output=False, repeat=-1): if not os.path.isdir(expt_dir): raise Exception("Cannot find directory %s" % expt_dir) options = parse_config_file(expt_dir, config_file) experiment_name = options["experiment_name"] # Special advanced feature for repeating the same experiment many times if repeat >= 0: experiment_name = repeat_experiment_name(experiment_name, repeat) if not no_output: # if we want output if repeat >= 0: output_directory = repeat_output_dir(expt_dir, repeat) else: output_directory = os.path.join(expt_dir, 'output') if not os.path.isdir(output_directory): os.mkdir(output_directory) if repeat < 0: rootLogger = logging.getLogger() fileHandler = logging.FileHandler( os.path.join(output_directory, 'main.log')) fileHandler.setFormatter(logFormatter) fileHandler.setLevel(logLevel) rootLogger.addHandler(fileHandler) # consoleHandler = logging.StreamHandler() # consoleHandler.setFormatter(logFormatter) # consoleHandler.setLevel(logLevel) # rootLogger.addHandler(consoleHandler) else: output_directory = None input_space = InputSpace(options["variables"]) resources = parse_resources_from_config(options) # Load up the chooser. chooser_module = importlib.import_module('spearmint.choosers.' + options['chooser']) chooser = chooser_module.init(input_space, options) # Connect to the database db_address = options['database']['address'] db = MongoDB(database_address=db_address) overall_start_time = time.time() db.save({'start-time': overall_start_time}, experiment_name, 'start-time') waiting_for_results = False # for printing purposes only while True: for resource_name, resource in resources.iteritems(): jobs = load_jobs(db, experiment_name) # resource.printStatus(jobs) # If the resource is currently accepting more jobs # TODO: here cost will eventually also be considered: even if the # resource is not full, we might wait because of cost incurred # Note: I could chose to fill up one resource and them move on to the next ("if") # You could also do it the other way, by changing "if" to "while" here # Remove any broken jobs from pending # note: make sure to do this before the acceptingJobs() condition is checked remove_broken_jobs(db, jobs, experiment_name, resources) if resource.acceptingJobs(jobs): if waiting_for_results: logging.info('\n') waiting_for_results = False # Load jobs from DB # (move out of one or both loops?) would need to pass into load_tasks jobs = load_jobs(db, experiment_name) # Print out a list of broken jobs print_broken_jobs(jobs) # Get a suggestion for the next job tasks = parse_tasks_from_jobs(jobs, experiment_name, options, input_space) # Special case when coupled and there is a NaN task-- what to do with NaN task when decoupled?? if 'NaN' in tasks and 'NaN' not in resource.tasks: resource.tasks.append('NaN') # Load the model hypers from the database. hypers = db.load(experiment_name, 'hypers') # "Fit" the chooser - give the chooser data and let it fit the model(s). # NOTE: even if we are only suggesting for 1 task, we need to fit all of them # because the acquisition function for one task depends on all the tasks hypers = chooser.fit(tasks, hypers) if hypers: logging.debug('GP covariance hyperparameters:') print_hypers(hypers, input_space, options) # if 'duration hypers' in hypers: # logging.debug('Duration GP covariance hyperparameters:') # print_hypers(hypers['duration hypers'], input_space, options) # Save the hyperparameters to the database. if hypers: db.save(hypers, experiment_name, 'hypers') if options['recommendations'] == "during": # Compute the best value so far, a.k.a. the "recommendation" recommendation = chooser.best() # Save the recommendation in the DB if there are more complete jobs than last time store_recommendation(recommendation, db, experiment_name, tasks, jobs, input_space, time.time() - overall_start_time) # Get the decoupling groups task_couplings = { task_name: tasks[task_name].options["group"] for task_name in resource.tasks } logging.info('\nGetting suggestion for %s...\n' % (', '.join(task_couplings.keys()))) # Get the next suggested experiment from the chooser. suggested_input, suggested_tasks = chooser.suggest( task_couplings) suggested_task = suggested_tasks[0] # hack, deal with later suggested_job = { 'id': len(jobs) + 1, 'params': input_space.paramify(suggested_input), 'expt_dir': options['main_file_path'], 'tasks': suggested_tasks, 'resource': resource_name, 'main-file': options['tasks'][suggested_task]['main_file'], 'language': options['tasks'][suggested_task]['language'], 'status': 'new', 'submit time': time.time(), 'start time': None, 'end time': None, 'fast update': chooser.fast_update # just for plotting - not important } save_job(suggested_job, db, experiment_name) # Submit the job to the appropriate resource process_id = resource.attemptDispatch(experiment_name, suggested_job, db_address, expt_dir, output_directory) # Print the current time logging.info( 'Current time: %s' % datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')) # Set the status of the job appropriately (successfully submitted or not) if process_id is None: suggested_job['status'] = 'broken' logging.info( 'Job %s failed -- check output file for details.' % job['id']) save_job(suggested_job, db, experiment_name) else: suggested_job['status'] = 'pending' suggested_job['proc_id'] = process_id save_job(suggested_job, db, experiment_name) jobs = load_jobs(db, experiment_name) # Print out the status of the resources # resource.printStatus(jobs) print_resources_status(resources.values(), jobs) if len(set(task_couplings.values())) > 1: # if decoupled print_tasks_status(tasks.values(), jobs) # For debug - print pending jobs print_pending_jobs(jobs) # Terminate the optimization if all resources are finished (run max number of jobs) # or ANY task is finished (just my weird convention) jobs = load_jobs(db, experiment_name) tasks = parse_tasks_from_jobs(jobs, experiment_name, options, input_space) terminate_resources = reduce( lambda x, y: x and y, map(lambda x: x.maxCompleteReached(jobs), resources.values()), True) terminate_tasks = reduce( lambda x, y: x or y, map(lambda x: x.maxCompleteReached(jobs), tasks.values()), False) terminate_maxtime = (time.time() - overall_start_time) >= ( options['max_time_mins'] * 60.0) if terminate_resources or terminate_tasks or terminate_maxtime: if terminate_resources: logging.info( 'Maximum number of jobs completed on all resources.') if terminate_tasks: logging.info( 'Maximum number of jobs reached for at least one task.') if terminate_maxtime: logging.info( 'Maximum total experiment time of %f minutes reached.' % options['max_time_mins']) # save rec in DB if options['recommendations'] in ("during", "end-one"): logging.info('Making final recommendation:') recommendation = chooser.best() store_recommendation(recommendation, db, experiment_name, tasks, jobs, input_space, time.time() - overall_start_time, final=True) elif options['recommendations'] == "end-all": logging.info('Making recommendations...') all_jobs = jobs for i in xrange(len(all_jobs)): logging.info('') logging.info( '-------------------------------------------------') logging.info( ' Getting recommendations for iter %d/%d ' % (i, len(all_jobs))) logging.info( '-------------------------------------------------') logging.info('') jobs = all_jobs[:i + 1] tasks = parse_tasks_from_jobs(jobs, experiment_name, options, input_space) hypers = chooser.fit(tasks, hypers) print_hypers(hypers, input_space, options) # get the biggest end time of the jobs end_time = max([job['end time'] for job in jobs]) elapsed_time = end_time - overall_start_time recommendation = chooser.best() store_recommendation(recommendation, db, experiment_name, tasks, jobs, input_space, elapsed_time) logging.info('Have a nice day.') return # If no resources are accepting jobs, sleep if no_free_resources(db, experiment_name, resources): # Don't use logging here because it's too much effort to use logging without a newline at the end sys.stdout.write( 'Waiting for results...' if not waiting_for_results else '.') sys.stdout.flush() # sys.stderr.flush() waiting_for_results = True time.sleep(options['polling_time']) else: sys.stdout.write('\n')
def main(): options, expt_dir = get_options() resources = parse_resources_from_config(options) # Load up the chooser. chooser_module = importlib.import_module('spearmint.choosers.' + options['chooser']) chooser = chooser_module.init(options) experiment_name = options.get("experiment-name", 'unnamed-experiment') # Connect to the database db_address = options['database']['address'] db_name = options['database']['name'] sys.stderr.write('Using database %s at %s.\n' % (db_name, db_address)) db = MongoDB(database_address=db_address, database_name=db_name) suggest_file = os.path.join(expt_dir , experiment_name + ".suggest") suggest_idx = 0 while True: for resource_name, resource in resources.iteritems(): jobs = load_jobs(db, experiment_name) # resource.printStatus(jobs) # If the resource is currently accepting more jobs # TODO: here cost will eventually also be considered: even if the # resource is not full, we might wait because of cost incurred # Note: I chose to fill up one resource and them move on to the next # You could also do it the other way, by changing "while" to "if" here while resource.acceptingJobs(jobs): #db['rnn_8.jobs'].remove({status:'new'}) # Load jobs from DB # (move out of one or both loops?) would need to pass into load_tasks jobs = load_jobs(db, experiment_name) # Remove any broken jobs from pending. remove_broken_jobs(db, jobs, experiment_name, resources) # Get a suggestion for the next job suggested_job = get_suggestion(chooser, resource.tasks, db, expt_dir, options, resource_name) try: # Check if the file for the manual suggestions exists if os.path.isfile(suggest_file): suggest_params = [] with open(suggest_file,'r') as csvfile: # There should not be a blank line in the beginning of the file! reader = csv.DictReader(csvfile) # Concatenate all the suggestions in the file for row in reader: suggest_params = suggest_params + [row] # If a new line is added to the file we overwrite the suggested jobs with these values if suggest_idx < len(suggest_params): print "--- Using manual suggestion instead of the one coming from the GP! ---" next_suggestion = suggest_params[suggest_idx] for key, value in next_suggestion.iteritems(): if isinstance(value,str): value=value.strip() suggested_job['params'][key.strip()]['values'][0] = value suggested_job['manual'] = 1 print "%s: %s" %(key.strip(),value) suggest_idx = suggest_idx + 1 except: print "--- Problem using the manual suggestion file! Back to the GP suggestion.. ---" # Submit the job to the appropriate resource process_id = resource.attemptDispatch(experiment_name, suggested_job, db_address, db_name, expt_dir) # Set the status of the job appropriately (successfully submitted or not) if process_id is None: suggested_job['status'] = 'broken' save_job(suggested_job, db, experiment_name) else: suggested_job['status'] = 'pending' suggested_job['proc_id'] = process_id save_job(suggested_job, db, experiment_name) jobs = load_jobs(db, experiment_name) # Print out the status of the resources # resource.printStatus(jobs) print_resources_status(resources.values(), jobs) # If no resources are accepting jobs, sleep # (they might be accepting if suggest takes a while and so some jobs already finished by the time this point is reached) if tired(db, experiment_name, resources): time.sleep(options.get('polling-time', 5))