Exemple #1
0
def main():
    parser = optparse.OptionParser(usage="usage: %prog [options] directory")

    parser.add_option("--config", dest="config_file",
                      help="Configuration file name.",
                      type="string", default="config.json")

    (commandline_kwargs, args) = parser.parse_args()

    # Read in the config file
    expt_dir  = os.path.realpath(args[0])
    if not os.path.isdir(expt_dir):
        raise Exception("Cannot find directory %s" % expt_dir)
    expt_file = os.path.join(expt_dir, commandline_kwargs.config_file)

    try:
        with open(expt_file, 'r') as f:
            options = json.load(f, object_pairs_hook=OrderedDict)
    except:
        raise Exception("config.json did not load properly. Perhaps a spurious comma?")
    options["config"]  = commandline_kwargs.config_file

    resources = parse_resources_from_config(options)

    # Set sensible defaults for options
    options['chooser']  = options.get('chooser', 'default_chooser')
    if 'tasks' not in options:
        options['tasks'] = {'main' : {'type' : 'OBJECTIVE', 'likelihood' : options.get('likelihood', 'GAUSSIAN')}}
    experiment_name     = options.get("experiment-name", 'unnamed-experiment')

    # Set DB address
    db_address = parse_db_address(options)
    if 'database' not in options:
        options['database'] = {'name': 'spearmint', 'address': db_address}
    else:
        options['database']['address'] = db_address

    if not os.path.exists(expt_dir):
        sys.stderr.write("Cannot find experiment directory '%s'. "
                         "Aborting.\n" % (expt_dir))
        sys.exit(-1)

    # Load up the chooser.
    chooser_module = importlib.import_module('spearmint.choosers.' + options['chooser'])
    chooser = chooser_module.init(options)

    # Connect to the database
    sys.stderr.write('Using database at %s.\n' % db_address)        
    db_address = options['database']['address']
    db         = MongoDB(database_address=db_address)

    while True:

        for resource_name, resource in resources.iteritems():

            jobs = load_jobs(db, experiment_name)
            # resource.printStatus(jobs)

            # If the resource is currently accepting more jobs
            # TODO: here cost will eventually also be considered: even if the 
            #       resource is not full, we might wait because of cost incurred
            # Note: I chose to fill up one resource and them move on to the next
            # You could also do it the other way, by changing "while" to "if" here

            while resource.acceptingJobs(jobs):

                # Load jobs from DB 
                # (move out of one or both loops?) would need to pass into load_tasks
                jobs = load_jobs(db, experiment_name)
                
                # Remove any broken jobs from pending.
                remove_broken_jobs(db, jobs, experiment_name, resources)

                # Get a suggestion for the next job
                suggested_job = get_suggestion(chooser, resource.tasks, db, expt_dir, options, resource_name)
    
                # Submit the job to the appropriate resource
                process_id = resource.attemptDispatch(experiment_name, suggested_job, db_address, expt_dir)

                # Set the status of the job appropriately (successfully submitted or not)
                if process_id is None:
                    suggested_job['status'] = 'broken'
                    save_job(suggested_job, db, experiment_name)
                else:
                    suggested_job['status'] = 'pending'
                    suggested_job['proc_id'] = process_id
                    save_job(suggested_job, db, experiment_name)

                jobs = load_jobs(db, experiment_name)

                # Print out the status of the resources
                # resource.printStatus(jobs)
                print_resources_status(resources.values(), jobs)

        # If no resources are accepting jobs, sleep
        # (they might be accepting if suggest takes a while and so some jobs already finished by the time this point is reached)
        if tired(db, experiment_name, resources):
            time.sleep(options.get('polling-time', 5))
Exemple #2
0
def main():
    options, expt_dir = get_options()

    resources = parse_resources_from_config(options)

    # Load up the chooser.
    chooser_module = importlib.import_module('spearmint.choosers.' + options['chooser'])
    chooser = chooser_module.init(options)
    experiment_name     = options.get("experiment-name", 'unnamed-experiment')

    # Connect to the database
    db_address = options['database']['address']
    sys.stderr.write('Using database at %s.\n' % db_address)        
    db         = MongoDB(database_address=db_address)
    
    while True:

        for resource_name, resource in resources.iteritems():

            jobs = load_jobs(db, experiment_name)
            # resource.printStatus(jobs)

            # If the resource is currently accepting more jobs
            # TODO: here cost will eventually also be considered: even if the 
            #       resource is not full, we might wait because of cost incurred
            # Note: I chose to fill up one resource and them move on to the next
            # You could also do it the other way, by changing "while" to "if" here

            remove_broken_jobs(db, jobs, experiment_name, resources)
            while resource.acceptingJobs(jobs):

                # Load jobs from DB 
                # (move out of one or both loops?) would need to pass into load_tasks
                jobs = load_jobs(db, experiment_name)
                
                # Remove any broken jobs from pending.
                remove_broken_jobs(db, jobs, experiment_name, resources)

                # Get a suggestion for the next job
                suggested_job = get_suggestion(chooser, resource.tasks, db, expt_dir, options, resource_name)
    
                # Submit the job to the appropriate resource
                process_id = resource.attemptDispatch(experiment_name, suggested_job, db_address, expt_dir)

                # Set the status of the job appropriately (successfully submitted or not)
                if process_id is None:
                    suggested_job['status'] = 'broken'
                    save_job(suggested_job, db, experiment_name)
                else:
                    suggested_job['status'] = 'pending'
                    suggested_job['proc_id'] = process_id
                    save_job(suggested_job, db, experiment_name)

                jobs = load_jobs(db, experiment_name)

                # Print out the status of the resources
                # resource.printStatus(jobs)
                print_resources_status(resources.values(), jobs)

        # If no resources are accepting jobs, sleep
        # (they might be accepting if suggest takes a while and so some jobs already finished by the time this point is reached)
        if tired(db, experiment_name, resources):
            time.sleep(options.get('polling-time', 5))
def main():

    parser = optparse.OptionParser(usage="usage: %prog [options] directory")

    parser.add_option("--config", dest="config_file",
                      help="Configuration file name.",
                      type="string", default="config.json")
    parser.add_option("--no-output", action="store_true",
                      help="Do not create output files.")
    parser.add_option("--repeat", dest="repeat",
                      help="Used for repeating the same experiment many times.",
                      type="int", default="-1")

    (commandline_kwargs, args) = parser.parse_args()

    # Read in the config file
    #expt_dir = os.path.realpath('examples/cifar10')
    expt_dir  = os.path.realpath(args[0])
    if not os.path.isdir(expt_dir):
        raise Exception("Cannot find directory %s" % expt_dir)

    options = parse_config_file(expt_dir, commandline_kwargs.config_file)
    experiment_name = options["experiment-name"]

    # Special advanced feature for repeating the same experiment many times
    if commandline_kwargs.repeat >= 0:
        experiment_name = repeat_experiment_name(experiment_name, commandline_kwargs.repeat)

    if not commandline_kwargs.no_output: # if we want output
        if commandline_kwargs.repeat >= 0:
            output_directory = repeat_output_dir(expt_dir, commandline_kwargs.repeat)
        else:
            output_directory = os.path.join(expt_dir, 'output', options["experiment-name"])
        if not os.path.isdir(output_directory):
            os.mkdir(output_directory)

        if commandline_kwargs.repeat < 0:
            rootLogger = logging.getLogger()
            fileHandler = logging.FileHandler(os.path.join(output_directory, 'main.log'))
            fileHandler.setFormatter(logFormatter)
            fileHandler.setLevel(logLevel)
            rootLogger.addHandler(fileHandler)
        # consoleHandler = logging.StreamHandler()
        # consoleHandler.setFormatter(logFormatter)
        # consoleHandler.setLevel(logLevel)
        # rootLogger.addHandler(consoleHandler)
    else:
        output_directory = None

    input_space = InputSpace(options["variables"])

    resources = parse_resources_from_config(options)

    # Load up the chooser.
    chooser_module = importlib.import_module('spearmint.choosers.' + options['chooser'])

    chooser = chooser_module.init(input_space, options)

    # Connect to the database

    db_address = options['database']['address']
    db         = MongoDB(database_address=db_address)

    if os.getenv('SPEARMINT_MAX_ITERATIONS') == None and 'max_iterations' not in set(options.keys()):
	maxiterations = DEFAULT_MAX_ITERATIONS
    elif os.getenv('SPEARMINT_MAX_ITERATIONS') != None:
	maxiterations = int(os.getenv('SPEARMINT_MAX_ITERATIONS'))
    else:
	maxiterations = options['max_iterations']

    # Set random seed

    if 'random_seed' in options.keys():
	    np.random.seed(int(options['random_seed']))
	    seed(int(options['random_seed']))

    waiting_for_results = False  # for printing purposes only
    while True:

        for resource_name, resource in resources.iteritems():

            jobs = load_jobs(db, experiment_name)
            # resource.printStatus(jobs)

            # If the resource is currently accepting more jobs
            # TODO: here cost will eventually also be considered: even if the
            #       resource is not full, we might wait because of cost incurred
            # Note: I could chose to fill up one resource and them move on to the next ("if")
            # You could also do it the other way, by changing "if" to "while" here

            # Remove any broken jobs from pending
            # note: make sure to do this before the acceptingJobs() condition is checked
            remove_broken_jobs(db, jobs, experiment_name, resources)

            if resource.acceptingJobs(jobs):

                if waiting_for_results:
                    logging.info('\n')
                waiting_for_results = False

                optim_start_time = time.time()

                # Load jobs from DB
                # (move out of one or both loops?) would need to pass into load_tasks
                jobs = load_jobs(db, experiment_name)

                # Print out a list of broken jobs
                print_broken_jobs(jobs)

                # Get a suggestion for the next job
                tasks = parse_tasks_from_jobs(jobs, experiment_name, options, input_space)

                # Special case when coupled and there is a NaN task-- what to do with NaN task when decoupled??
                if 'NaN' in tasks and 'NaN' not in resource.tasks:
                    resource.tasks.append('NaN')

                # Load the model hypers from the database.
                hypers = db.load(experiment_name, 'hypers')

                # "Fit" the chooser - give the chooser data and let it fit the model(s).
                # NOTE: even if we are only suggesting for 1 task, we need to fit all of them
                # because the acquisition function for one task depends on all the tasks

                hypers = chooser.fit(tasks, hypers)

                if hypers:
                    logging.debug('GP covariance hyperparameters:')
                print_hypers(hypers)

                # Save the hyperparameters to the database.
                if hypers:
                    db.save(hypers, experiment_name, 'hypers')

                # Compute the best value so far, a.k.a. the "recommendation"

                recommendation = chooser.best()

                # Save the recommendation in the DB

                numComplete_by_task = {task_name : task.numComplete(jobs) for task_name, task in tasks.iteritems()}

                db.save({'num_complete' : resource.numComplete(jobs),
                     'num_complete_tasks' : numComplete_by_task,
                     'params'   : input_space.paramify(recommendation['model_model_input']),
                     'objective': recommendation['model_model_value'],
                     'params_o' : None if recommendation['obser_obser_input'] is None else input_space.paramify(recommendation['obser_obser_input']),
                     'obj_o'    : recommendation['obser_obser_value'],
                     'params_om': None if recommendation['obser_model_input'] is None else input_space.paramify(recommendation['obser_model_input']),
                     'obj_om'   : recommendation['obser_model_value']},
                experiment_name, 'recommendations', {'id' : len(jobs)})

                # Get the decoupling groups
                task_couplings = {task_name : tasks[task_name].options["group"] for task_name in resource.tasks}

                logging.info('\nGetting suggestion for %s...\n' % (', '.join(task_couplings.keys())))

                # Get the next suggested experiment from the chooser.

                suggested_input, suggested_tasks = chooser.suggest(task_couplings, optim_start_time)
                suggested_task = suggested_tasks[0] # hack, deal with later

                suggested_job = {
                    'id'          : len(jobs) + 1,
                    'params'      : input_space.paramify(suggested_input),
                    'expt_dir'    : options['main_file_path'],
                    'tasks'       : suggested_tasks,
                    'resource'    : resource_name,
                    'main-file'   : resource.main_file,
                    'language'    : options['tasks'][suggested_task]['language'],
                    'status'      : 'new',
                    'submit time' : time.time(),
                    'start time'  : None,
                    'end time'    : None
                }

                save_job(suggested_job, db, experiment_name)

                # Submit the job to the appropriate resource
                process_id = resource.attemptDispatch(experiment_name, suggested_job, db_address,
                                                      expt_dir, output_directory)

                # Print the current time
                logging.info('Current time: %s' % datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S'))

                # Set the status of the job appropriately (successfully submitted or not)
                if process_id is None:
                    suggested_job['status'] = 'broken'
                    logging.info('Job %s failed -- check output file for details.' % job['id'])
                    save_job(suggested_job, db, experiment_name)
                else:
                    suggested_job['status'] = 'pending'
                    suggested_job['proc_id'] = process_id
                    save_job(suggested_job, db, experiment_name)

                jobs = load_jobs(db, experiment_name)

                # Print out the status of the resources
                # resource.printStatus(jobs)
                print_resources_status(resources.values(), jobs)

                if len(set(task_couplings.values())) > 1: # if decoupled
                    print_tasks_status(tasks.values(), jobs)

                # For debug - print pending jobs
                print_pending_jobs(jobs)


        # Terminate the optimization if all resources are finished (run max number of jobs)
        # or ANY task is finished (just my weird convention)
        if reduce(lambda x,y: x and y, map(lambda x: x.maxCompleteReached(jobs), resources.values()), True) or \
           reduce(lambda x,y: x or y,  map(lambda x: x.maxCompleteReached(jobs), tasks.values()),     False):
            # Do all this extra work just to save the final recommendation -- would be ok to delete everything
            # in here and just "return"
            sys.stdout.write('\n')
            jobs = load_jobs(db, experiment_name)
            tasks = parse_tasks_from_jobs(jobs, experiment_name, options, input_space)
            hypers = db.load(experiment_name, 'hypers')
            hypers = chooser.fit(tasks, hypers)
            if hypers:
                db.save(hypers, experiment_name, 'hypers')
            # logging.info('\n**All resources have run the maximum number of jobs.**\nFinal recommendation:')
            recommendation = chooser.best()

            # numComplete_per_task
            numComplete_by_task = {task_name : task.numComplete(jobs) for task_name, task in tasks.iteritems()}
            db.save({'num_complete'       : resource.numComplete(jobs),
                     'num_complete_tasks' : numComplete_by_task,
                     'params'   : input_space.paramify(recommendation['model_model_input']),
                     'objective': recommendation['model_model_value'],
                     'params_o' : None if recommendation['obser_obser_input'] is None else input_space.paramify(recommendation['obser_obser_input']),
                     'obj_o'    : recommendation['obser_obser_value'],
                     'params_om': None if recommendation['obser_model_input'] is None else input_space.paramify(recommendation['obser_model_input']),
                     'obj_om'   : recommendation['obser_model_value']},
                experiment_name, 'recommendations', {'id'       : len(jobs)})
            logging.info('Maximum number of jobs completed. Have a nice day.')
            return

        # If no resources are accepting jobs, sleep
        if no_free_resources(db, experiment_name, resources):
            # Don't use logging here because it's too much effort to use logging without a newline at the end
            sys.stdout.write('Waiting for results...' if not waiting_for_results else '.')
            sys.stdout.flush()
            # sys.stderr.flush()
            waiting_for_results = True
            time.sleep(options['polling_time'])
        else:
            sys.stdout.write('\n')
Exemple #4
0
def main():
    options, expt_dir = get_options()

    resources = parse_resources_from_config(options)

    # Load up the chooser.
    chooser_module = importlib.import_module('spearmint.choosers.' + options['chooser'])
    chooser = chooser_module.init(options)
    experiment_name     = options.get("experiment-name", 'unnamed-experiment')

    # Connect to the database
    db_address = options['database']['address']
    sys.stderr.write('Using database at %s.\n' % db_address)        
    db         = MongoDB(database_address=db_address)
    
    while True:

        for resource_name, resource in resources.iteritems():

            jobs = load_jobs(db, experiment_name)
            # resource.printStatus(jobs)

            # If the resource is currently accepting more jobs
            # TODO: here cost will eventually also be considered: even if the 
            #       resource is not full, we might wait because of cost incurred
            # Note: I chose to fill up one resource and them move on to the next
            # You could also do it the other way, by changing "while" to "if" here

            while resource.acceptingJobs(jobs):

                # Load jobs from DB 
                # (move out of one or both loops?) would need to pass into load_tasks
                jobs = load_jobs(db, experiment_name)
                
                # Remove any broken jobs from pending.
                remove_broken_jobs(db, jobs, experiment_name, resources)

                # Get a suggestion for the next job
                suggested_job = get_suggestion(chooser, resource.tasks, db, expt_dir, options, resource_name)
    
                # Submit the job to the appropriate resource
                process_id = resource.attemptDispatch(experiment_name, suggested_job, db_address, expt_dir)

                # Set the status of the job appropriately (successfully submitted or not)
                if process_id is None:
                    suggested_job['status'] = 'broken'
                    save_job(suggested_job, db, experiment_name)
                else:
                    suggested_job['status'] = 'pending'
                    suggested_job['proc_id'] = process_id
                    save_job(suggested_job, db, experiment_name)

                jobs = load_jobs(db, experiment_name)

                # Print out the status of the resources
                # resource.printStatus(jobs)
                print_resources_status(resources.values(), jobs)

        # If no resources are accepting jobs, sleep
        # (they might be accepting if suggest takes a while and so some jobs already finished by the time this point is reached)
        if tired(db, experiment_name, resources):
            time.sleep(options.get('polling-time', 5))
Exemple #5
0
def main(args=None):
    options, expt_dir = get_options(args)

    resources = parse_resources_from_config(options)

    # Load up the chooser.
    chooser_module = importlib.import_module('spearmint.choosers.' +
                                             options['chooser'])
    chooser = chooser_module.init(options)
    experiment_name = options.get("experiment-name", 'unnamed-experiment')
    resets = options.get("resets", [])
    job_id_offset = 0
    current_phase = 0
    if resets:
        experiment_name += '__' + str(current_phase)
        print 'STARTING PHASE ' + str(current_phase +
                                      1) + ' (' + experiment_name + ')'

    # Connect to the database
    db_address = options['database']['address']
    sys.stderr.write('Using database at %s.\n' % db_address)
    db = MongoDB(database_address=db_address)

    while True:
        pause = False
        if resets:
            jobs = load_jobs(db, experiment_name)
            num_pending_jobs = sum(
                [job['status'] == 'pending' for job in jobs])
            num_finished_jobs = sum(
                [job['status'] == 'complete' for job in jobs])
            if num_finished_jobs == resets[
                    current_phase] and num_pending_jobs == 0:
                job_id_offset += resets[current_phase]

                current_phase += 1
                new_experiment_name = options.get(
                    "experiment-name",
                    'unnamed-experiment') + '__' + str(current_phase)
                print 'STARTING PHASE ' + str(
                    current_phase + 1) + ' (' + new_experiment_name + ')'

                old_hypers = load_hypers(db, experiment_name)
                save_hypers(old_hypers, db, new_experiment_name)
                experiment_name = new_experiment_name
            if num_finished_jobs + num_pending_jobs >= resets[current_phase]:
                pause = True

        for resource_name, resource in resources.iteritems():

            jobs = load_jobs(db, experiment_name)
            # resource.printStatus(jobs)

            # If the resource is currently accepting more jobs
            # TODO: here cost will eventually also be considered: even if the
            #       resource is not full, we might wait because of cost incurred
            # Note: I chose to fill up one resource and them move on to the next
            # You could also do it the other way, by changing "while" to "if" here

            while resource.acceptingJobs(jobs) and not pause:
                # Load jobs from DB
                # (move out of one or both loops?) would need to pass into load_tasks
                jobs = load_jobs(db, experiment_name)
                print 'Found', len(jobs), 'jobs in db'

                # Remove any broken jobs from pending.
                remove_broken_jobs(db, jobs, experiment_name, resources)

                # Get a suggestion for the next job
                suggested_job = get_suggestion(chooser, resource.tasks, db,
                                               experiment_name, expt_dir,
                                               options, resource_name,
                                               job_id_offset)

                # Submit the job to the appropriate resource
                process_id = resource.attemptDispatch(experiment_name,
                                                      suggested_job,
                                                      db_address, expt_dir)

                # Set the status of the job appropriately (successfully submitted or not)
                if process_id is None:
                    suggested_job['status'] = 'broken'
                    save_job(suggested_job, db, experiment_name)
                else:
                    suggested_job['status'] = 'pending'
                    suggested_job['proc_id'] = process_id
                    save_job(suggested_job, db, experiment_name)

                jobs = load_jobs(db, experiment_name)

                # Print out the status of the resources
                # resource.printStatus(jobs)
                print_resources_status(resources.values(), jobs)

        # If no resources are accepting jobs, sleep
        # (they might be accepting if suggest takes a while and so some jobs already finished by the time this point is reached)
        if tired(db, experiment_name, resources) or pause:
            time.sleep(options.get('polling-time', 5))
Exemple #6
0
	def runSpearmint(self, name):
		options, expt_dir = self.get_options([os.path.abspath(os.path.join(self.scratchPath,name))])

		resources = main.parse_resources_from_config(options)

		# Load up the chooser.
		chooser_module = importlib.import_module('spearmint.choosers.' + options['chooser'])
		chooser = chooser_module.init(options)
		experiment_name = options.get("experiment-name", 'unnamed-experiment')

		# Connect to the database
		db_address = options['database']['address']
		sys.stderr.write('Using database at %s.\n' % db_address)
		db = MongoDB(database_address=db_address)

		threshold = 1e-2
		look_back = 3
		stopping = False
		while not stopping:
			for resource_name, resource in resources.iteritems():
				jobs = main.load_jobs(db, experiment_name)
				# resource.printStatus(jobs)
				# If the resource is currently accepting more jobs
				# TODO: here cost will eventually also be considered: even if the
				#	   resource is not full, we might wait because of cost incurred
				# Note: I chose to fill up one resource and them move on to the next
				# You could also do it the other way, by changing "while" to "if" here

				while resource.acceptingJobs(jobs):
					# Load jobs from DB
					# (move out of one or both loops?) would need to pass into load_tasks
					jobs = main.load_jobs(db, experiment_name)
					#pprint.pprint(main.load_hypers(db, experiment_name))

					# Remove any broken jobs from pending.
					main.remove_broken_jobs(db, jobs, experiment_name, resources)

					# Get a suggestion for the next job
					suggested_job = main.get_suggestion(chooser, resource.tasks, db, expt_dir, options, resource_name)

					# Submit the job to the appropriate resource
					process_id = resource.attemptDispatch(experiment_name, suggested_job, db_address, expt_dir)

					# Set the status of the job appropriately (successfully submitted or not)
					if process_id is None:
						suggested_job['status'] = 'broken'
						main.save_job(suggested_job, db, experiment_name)
					else:
						suggested_job['status'] = 'pending'
						suggested_job['proc_id'] = process_id
						main.save_job(suggested_job, db, experiment_name)

					jobs = main.load_jobs(db, experiment_name)

					# Print out the status of the resources
					# resource.printStatus(jobs)
					print_resources_status(resources.values(), jobs)

					stalled = []
					for task in main.load_task_group(db, options, resource.tasks).tasks.values():
						performance = task.valid_normalized_data_dict["values"][::-1]
						stalled.append(0)
						if len(performance) > look_back:
							print performance[0:look_back]
							print "Diffs: ",
							within_thresh = True
							for i,run in enumerate(performance[0:look_back]):
								diff = abs(run - performance[i+1])
								print str(round(diff,2))+", ",
								if diff > threshold:
									within_thresh = False
									print "...No stall"
									break
							if within_thresh:
								stalled[len(stalled)-1] = 1
					if all(stalled):
						sys.exit("Stalled!")
			# If no resources are accepting jobs, sleep
			# (they might be accepting if suggest takes a while and so some jobs already finished by the time this point is reached)
			if main.tired(db, experiment_name, resources):
				time.sleep(options.get('polling-time', 5))
Exemple #7
0
def main():
    options, expt_dir = get_options()

    resources = parse_resources_from_config(options)
    # Load up the chooser.
    chooser_module = importlib.import_module('spearmint.choosers.' +
                                             options['chooser'])
    chooser = chooser_module.init(options)
    experiment_name = options.get("experiment-name", 'unnamed-experiment')

    # Connect to the database
    db_address = options['database']['address']
    sys.stderr.write('Using database at %s.\n' % db_address)
    db = MongoDB(database_address=db_address)

    # Setting up record for convergence
    past_best = []
    converg_num = 20
    startTraining = time.time()
    while stoppingCriterion(past_best, converg_num):

        for resource_name, resource in resources.iteritems():

            jobs = load_jobs(db, experiment_name)
            #print jobs[0]['values']['main']
            #resource.printStatus(jobs)

            # If the resource is currently accepting more jobs
            # TODO: here cost will eventually also be considered: even if the
            #       resource is not full, we might wait because of cost incurred
            # Note: I chose to fill up one resource and them move on to the next
            # You could also do it the other way, by changing "while" to "if" here

            while resource.acceptingJobs(jobs):

                # Load jobs from DB
                # (move out of one or both loops?) would need to pass into load_tasks
                jobs = load_jobs(db, experiment_name)

                # Remove any broken jobs from pending.
                remove_broken_jobs(db, jobs, experiment_name, resources)

                # Get a suggestion for the next job
                suggested_job = get_suggestion(chooser, resource.tasks, db,
                                               expt_dir, options,
                                               resource_name)

                # Submit the job to the appropriate resource
                process_id = resource.attemptDispatch(experiment_name,
                                                      suggested_job,
                                                      db_address, expt_dir)

                # Set the status of the job appropriately (successfully submitted or not)
                if process_id is None:
                    suggested_job['status'] = 'broken'
                    save_job(suggested_job, db, experiment_name)
                else:
                    suggested_job['status'] = 'pending'
                    suggested_job['proc_id'] = process_id
                    save_job(suggested_job, db, experiment_name)

                jobs = load_jobs(db, experiment_name)

                # Print out the status of the resources
                # resource.printStatus(jobs)
                print_resources_status(resources.values(), jobs)

                # Record current best
                best_val, best_input = chooser.get_best()
                past_best.append(best_val)
                past_best = [x for x in past_best
                             if x is not None]  #filter out Nones
                if len(past_best) > converg_num:
                    past_best.pop(0)

        # If no resources are accepting jobs, sleep
        # (they might be accepting if suggest takes a while and so some jobs already finished by the time this point is reached)
        if tired(db, experiment_name, resources):
            print "Sleeping..."
            time.sleep(options.get('polling-time', 5))

    endTraining = time.time()
    trainingTime = endTraining - startTraining

    # After training, test best results
    runBestParams(5000, chooser, db, experiment_name, trainingTime)
Exemple #8
0
def main():
    parser = optparse.OptionParser(usage="usage: %prog [options] directory")

    parser.add_option("--config",
                      dest="config_file",
                      help="Configuration file name.",
                      type="string",
                      default="config.json")

    (commandline_kwargs, args) = parser.parse_args()

    # Read in the config file
    expt_dir = os.path.realpath(args[0])
    if not os.path.isdir(expt_dir):
        raise Exception("Cannot find directory %s" % expt_dir)
    expt_file = os.path.join(expt_dir, commandline_kwargs.config_file)

    try:
        with open(expt_file, 'r') as f:
            options = json.load(f, object_pairs_hook=OrderedDict)
    except:
        raise Exception(
            "config.json did not load properly. Perhaps a spurious comma?")
    options["config"] = commandline_kwargs.config_file

    resources = parse_resources_from_config(options)

    # Set sensible defaults for options
    options['chooser'] = options.get('chooser', 'default_chooser')
    options['tasks'] = options.get(
        'tasks', {'main': {
            'type': 'OBJECTIVE',
            'likelihood': 'GAUSSIAN'
        }})
    experiment_name = options.get("experiment-name", 'unnamed-experiment')

    # Set DB address
    db_address = parse_db_address(options)
    if 'database' not in options:
        options['database'] = {'name': 'spearmint', 'address': db_address}
    else:
        options['database']['address'] = db_address

    if not os.path.exists(expt_dir):
        sys.stderr.write("Cannot find experiment directory '%s'. "
                         "Aborting.\n" % (expt_dir))
        sys.exit(-1)

    # Load up the chooser.
    chooser_module = importlib.import_module('spearmint.choosers.' +
                                             options['chooser'])
    chooser = chooser_module.init(options)

    # Connect to the database
    sys.stderr.write('Using database at %s.\n' % db_address)
    db_address = options['database']['address']
    db = MongoDB(database_address=db_address)

    while True:

        for resource_name, resource in resources.iteritems():

            jobs = load_jobs(db, experiment_name)
            # resource.printStatus(jobs)

            # If the resource is currently accepting more jobs
            # TODO: here cost will eventually also be considered: even if the
            #       resource is not full, we might wait because of cost incurred
            # Note: I chose to fill up one resource and them move on to the next
            # You could also do it the other way, by changing "while" to "if" here

            while resource.acceptingJobs(jobs):

                # Load jobs from DB
                # (move out of one or both loops?) would need to pass into load_tasks
                jobs = load_jobs(db, experiment_name)

                # Remove any broken jobs from pending.
                remove_broken_jobs(db, jobs, experiment_name, resources)

                # Get a suggestion for the next job
                suggested_job = get_suggestion(chooser, resource.tasks, db,
                                               expt_dir, options,
                                               resource_name)

                # Submit the job to the appropriate resource
                process_id = resource.attemptDispatch(experiment_name,
                                                      suggested_job,
                                                      db_address, expt_dir)

                # Set the status of the job appropriately (successfully submitted or not)
                if process_id is None:
                    suggested_job['status'] = 'broken'
                    save_job(suggested_job, db, experiment_name)
                else:
                    suggested_job['status'] = 'pending'
                    suggested_job['proc_id'] = process_id
                    save_job(suggested_job, db, experiment_name)

                jobs = load_jobs(db, experiment_name)

                # Print out the status of the resources
                # resource.printStatus(jobs)
                print_resources_status(resources.values(), jobs)

        # If no resources are accepting jobs, sleep
        # (they might be accepting if suggest takes a while and so some jobs already finished by the time this point is reached)
        if tired(db, experiment_name, resources):
            time.sleep(options.get('polling-time', 5))
Exemple #9
0
def main(args=None):
    options, expt_dir = get_options(args)

    resources = parse_resources_from_config(options)

    # Load up the chooser.
    chooser_module = importlib.import_module('spearmint.choosers.' + options['chooser'])
    chooser = chooser_module.init(options)
    experiment_name     = options.get("experiment-name", 'unnamed-experiment')
    resets = options.get("resets", [])
    job_id_offset = 0
    current_phase = 0
    if resets:
        experiment_name += '__' + str(current_phase)
        print 'STARTING PHASE ' + str(current_phase + 1) + ' (' + experiment_name + ')'

    # Connect to the database
    db_address = options['database']['address']
    sys.stderr.write('Using database at %s.\n' % db_address)        
    db         = MongoDB(database_address=db_address)
    
    while True:
        pause = False
        if resets:
            jobs = load_jobs(db, experiment_name)
            num_pending_jobs = sum([job['status'] == 'pending' for job in jobs])
            num_finished_jobs = sum([job['status'] == 'complete' for job in jobs])
            if num_finished_jobs == resets[current_phase] and num_pending_jobs == 0:
                job_id_offset += resets[current_phase]

                current_phase += 1
                new_experiment_name = options.get("experiment-name", 'unnamed-experiment') + '__' + str(current_phase)
                print 'STARTING PHASE ' + str(current_phase + 1) + ' (' + new_experiment_name + ')'

                old_hypers = load_hypers(db, experiment_name)
                save_hypers(old_hypers, db, new_experiment_name)
                experiment_name = new_experiment_name
            if num_finished_jobs + num_pending_jobs >= resets[current_phase]:
                pause = True

        for resource_name, resource in resources.iteritems():

            jobs = load_jobs(db, experiment_name)
            # resource.printStatus(jobs)

            # If the resource is currently accepting more jobs
            # TODO: here cost will eventually also be considered: even if the 
            #       resource is not full, we might wait because of cost incurred
            # Note: I chose to fill up one resource and them move on to the next
            # You could also do it the other way, by changing "while" to "if" here

            while resource.acceptingJobs(jobs) and not pause:
                # Load jobs from DB 
                # (move out of one or both loops?) would need to pass into load_tasks
                jobs = load_jobs(db, experiment_name)
                print 'Found', len(jobs), 'jobs in db'
                
                # Remove any broken jobs from pending.
                remove_broken_jobs(db, jobs, experiment_name, resources)

                # Get a suggestion for the next job
                suggested_job = get_suggestion(chooser, resource.tasks, db, experiment_name, expt_dir, options,
                                               resource_name, job_id_offset)

                # Submit the job to the appropriate resource
                process_id = resource.attemptDispatch(experiment_name, suggested_job, db_address, expt_dir)

                # Set the status of the job appropriately (successfully submitted or not)
                if process_id is None:
                    suggested_job['status'] = 'broken'
                    save_job(suggested_job, db, experiment_name)
                else:
                    suggested_job['status'] = 'pending'
                    suggested_job['proc_id'] = process_id
                    save_job(suggested_job, db, experiment_name)

                jobs = load_jobs(db, experiment_name)

                # Print out the status of the resources
                # resource.printStatus(jobs)
                print_resources_status(resources.values(), jobs)

        # If no resources are accepting jobs, sleep
        # (they might be accepting if suggest takes a while and so some jobs already finished by the time this point is reached)
        if tired(db, experiment_name, resources) or pause:
            time.sleep(options.get('polling-time', 5))
Exemple #10
0
def main():
    options, expt_dir = get_options()

    resources = parse_resources_from_config(options)

    # Load up the chooser.
    chooser_module = importlib.import_module('spearmint.choosers.' + options['chooser'])
    chooser = chooser_module.init(options)
    experiment_name = options.get("experiment-name", 'unnamed-experiment')

    # Connect to the database
    db_address = options['database']['address']
    sys.stderr.write('Using database at %s.\n' % db_address)
    db = MongoDB(database_address=db_address)

    # np.random.seed(0x6b6c26b2)
    hack_iter = 0
    while hack_iter < 50:
        for resource_name, resource in resources.iteritems():

            # TODO:: (moonkey) might be in vain, as the jobs will be loaded later in the inner loop
            jobs = load_jobs(db, experiment_name)
            # resource.printStatus(jobs)

            # If the resource is currently accepting more jobs
            # TODO: here cost will eventually also be considered: even if the 
            # resource is not full, we might wait because of cost incurred
            # Note: I chose to fill up one resource and them move on to the next
            # You could also do it the other way, by changing "while" to "if" here

            while resource.acceptingJobs(jobs):

                ################ HACK BEGIN ################
                # This is where each rounds begins
                # If we are running EM or other methods, we may get extra points and objective values
                # Here is the point where we save them into the database, to fool the algorithm
                # to treat them as if they are sampled by BO, and utilize them to compute the GP and the
                # acquisition functions.
                # We are assuming the data are written in json format at the folder of 'config.json' file.
                hack_iter += 1
                sys.stderr.write('###########hack_iter:' + str(hack_iter) + "###########\n")
                em_hack.add_historical_points_to_db(db, experiment_name, expt_dir)
                # # (moonkey) towards removing the randomness
                # np.random.seed(0x6b6c26b2)
                # if hack_iter == 3:
                #     em_hack.add_historical_points_to_db(db, experiment_name, expt_dir)
                    # continue
                ################ HACK END ################

                # Load jobs from DB 
                # (move out of one or both loops?) would need to pass into load_tasks
                jobs = load_jobs(db, experiment_name)

                # Remove any broken jobs from pending.
                remove_broken_jobs(db, jobs, experiment_name, resources)

                # Get a suggestion for the next job
                suggested_job = get_suggestion(chooser, resource.tasks, db, expt_dir, options, resource_name)

                # Submit the job to the appropriate resource
                process_id = resource.attemptDispatch(experiment_name, suggested_job, db_address, expt_dir)

                # Set the status of the job appropriately (successfully submitted or not)
                if process_id is None:
                    suggested_job['status'] = 'broken'
                    save_job(suggested_job, db, experiment_name)
                else:
                    suggested_job['status'] = 'pending'
                    suggested_job['proc_id'] = process_id
                    save_job(suggested_job, db, experiment_name)

                jobs = load_jobs(db, experiment_name)

                # Print out the status of the resources
                # resource.printStatus(jobs)
                print_resources_status(resources.values(), jobs)


        # If no resources are accepting jobs, sleep
        # (they might be accepting if suggest takes a while and so
        # some jobs already finished by the time this point is reached)
        if tired(db, experiment_name, resources):
            time.sleep(options.get('polling-time', 0))
Exemple #11
0
def main(args):
    options, expt_dir = get_options(args)
    print(options)
    resources = parse_resources_from_config(options)

    # Load up the chooser.
    chooser_module = importlib.import_module('spearmint.choosers.' + options['chooser'])
    chooser = chooser_module.init(options)
    experiment_name     = options.get("experiment-name", 'unnamed-experiment')

    # Connect to the database
    db_address = options['database']['address']
    sys.stderr.write('Using database at %s.\n' % db_address)        
    db         = MongoDB(database_address=db_address)

    # Changed the loop so that it's not for forever
    budget = options.get("budget", 20)
    count = options.get("count", 0)
    ei_threshold = options.get("ei", 0.10)
    max_budget = options.get("maxbudget", 10)

    while count < budget or (chooser.ei >= ei_threshold and count < max_budget):

        for resource_name, resource in resources.iteritems():

            jobs = load_jobs(db, experiment_name)
            #resource.printStatus(jobs)

            # If the resource is currently accepting more jobs
            # TODO: here cost will eventually also be considered: even if the 
            #       resource is not full, we might wait because of cost incurred
            # Note: I chose to fill up one resource and them move on to the next
            # You could also do it the other way, by changing "while" to "if" here

            while resource.acceptingJobs(jobs):
                if count < budget or (chooser.ei >= ei_threshold and count < max_budget):
                    sys.stderr.write("Proceeding to next experiment\n")
                    # Load jobs from DB
                    # (move out of one or both loops?) would need to pass into load_tasks
                    jobs = load_jobs(db, experiment_name)

                    # Remove any broken jobs from pending.
                    remove_broken_jobs(db, jobs, experiment_name, resources)

                    # Get a suggestion for the next job
                    suggested_job = get_suggestion(chooser, resource.tasks, db, expt_dir, options, resource_name)

                    # Submit the job to the appropriate resource
                    process_id = resource.attemptDispatch(experiment_name, suggested_job, db_address, expt_dir)

                    # Set the status of the job appropriately (successfully submitted or not)
                    if process_id is None:
                        suggested_job['status'] = 'broken'
                        save_job(suggested_job, db, experiment_name)
                    else:
                        suggested_job['status'] = 'pending'
                        suggested_job['proc_id'] = process_id
                        save_job(suggested_job, db, experiment_name)

                    jobs = load_jobs(db, experiment_name)

                    # Print out the status of the resources
                    # resource.printStatus(jobs)
                    print_resources_status(resources.values(), jobs)
                    count += 1
                else:
                    #count += 1
                    break
            #sys.stderr.write('Waiting for a prior job to finish\n')

        # If no resources are accepting jobs, sleep
        # (they might be accepting if suggest takes a while and so some jobs already finished by the time this point is reached)
        if tired(db, experiment_name, resources):
            time.sleep(options.get('polling-time', 5))
    while tired(db, experiment_name, resources):
        time.sleep(options.get('polling-time', 5))
    #best_input, best_val = chooser.best()
    # print(chooser.task_group.paramify(chooser.best_location.flatten()))
    print(chooser.best_value)
    return chooser.best_jobid, chooser.best_value, count
Exemple #12
0
def main(expt_dir, config_file="config.json", no_output=False, repeat=-1):
    if not os.path.isdir(expt_dir):
        raise Exception("Cannot find directory %s" % expt_dir)

    options = parse_config_file(expt_dir, config_file)
    experiment_name = options["experiment_name"]

    # Special advanced feature for repeating the same experiment many times
    if repeat >= 0:
        experiment_name = repeat_experiment_name(experiment_name, repeat)

    if not no_output:  # if we want output
        if repeat >= 0:
            output_directory = repeat_output_dir(expt_dir, repeat)
        else:
            output_directory = os.path.join(expt_dir, 'output')
        if not os.path.isdir(output_directory):
            os.mkdir(output_directory)

        if repeat < 0:
            rootLogger = logging.getLogger()
            fileHandler = logging.FileHandler(
                os.path.join(output_directory, 'main.log'))
            fileHandler.setFormatter(logFormatter)
            fileHandler.setLevel(logLevel)
            rootLogger.addHandler(fileHandler)
        # consoleHandler = logging.StreamHandler()
        # consoleHandler.setFormatter(logFormatter)
        # consoleHandler.setLevel(logLevel)
        # rootLogger.addHandler(consoleHandler)
    else:
        output_directory = None

    input_space = InputSpace(options["variables"])

    resources = parse_resources_from_config(options)

    # Load up the chooser.
    chooser_module = importlib.import_module('spearmint.choosers.' +
                                             options['chooser'])
    chooser = chooser_module.init(input_space, options)

    # Connect to the database
    db_address = options['database']['address']
    db = MongoDB(database_address=db_address)

    overall_start_time = time.time()
    db.save({'start-time': overall_start_time}, experiment_name, 'start-time')

    waiting_for_results = False  # for printing purposes only
    while True:

        for resource_name, resource in resources.iteritems():

            jobs = load_jobs(db, experiment_name)
            # resource.printStatus(jobs)

            # If the resource is currently accepting more jobs
            # TODO: here cost will eventually also be considered: even if the
            #       resource is not full, we might wait because of cost incurred
            # Note: I could chose to fill up one resource and them move on to the next ("if")
            # You could also do it the other way, by changing "if" to "while" here

            # Remove any broken jobs from pending
            # note: make sure to do this before the acceptingJobs() condition is checked
            remove_broken_jobs(db, jobs, experiment_name, resources)

            if resource.acceptingJobs(jobs):

                if waiting_for_results:
                    logging.info('\n')
                waiting_for_results = False

                # Load jobs from DB
                # (move out of one or both loops?) would need to pass into load_tasks
                jobs = load_jobs(db, experiment_name)

                # Print out a list of broken jobs
                print_broken_jobs(jobs)

                # Get a suggestion for the next job
                tasks = parse_tasks_from_jobs(jobs, experiment_name, options,
                                              input_space)

                # Special case when coupled and there is a NaN task-- what to do with NaN task when decoupled??
                if 'NaN' in tasks and 'NaN' not in resource.tasks:
                    resource.tasks.append('NaN')

                # Load the model hypers from the database.
                hypers = db.load(experiment_name, 'hypers')

                # "Fit" the chooser - give the chooser data and let it fit the model(s).
                # NOTE: even if we are only suggesting for 1 task, we need to fit all of them
                # because the acquisition function for one task depends on all the tasks
                hypers = chooser.fit(tasks, hypers)

                if hypers:
                    logging.debug('GP covariance hyperparameters:')
                print_hypers(hypers, input_space, options)
                # if 'duration hypers' in hypers:
                # logging.debug('Duration GP covariance hyperparameters:')
                # print_hypers(hypers['duration hypers'], input_space, options)

                # Save the hyperparameters to the database.
                if hypers:
                    db.save(hypers, experiment_name, 'hypers')

                if options['recommendations'] == "during":
                    # Compute the best value so far, a.k.a. the "recommendation"
                    recommendation = chooser.best()

                    # Save the recommendation in the DB if there are more complete jobs than last time
                    store_recommendation(recommendation, db, experiment_name,
                                         tasks, jobs, input_space,
                                         time.time() - overall_start_time)

                # Get the decoupling groups
                task_couplings = {
                    task_name: tasks[task_name].options["group"]
                    for task_name in resource.tasks
                }

                logging.info('\nGetting suggestion for %s...\n' %
                             (', '.join(task_couplings.keys())))

                # Get the next suggested experiment from the chooser.
                suggested_input, suggested_tasks = chooser.suggest(
                    task_couplings)
                suggested_task = suggested_tasks[0]  # hack, deal with later

                suggested_job = {
                    'id': len(jobs) + 1,
                    'params': input_space.paramify(suggested_input),
                    'expt_dir': options['main_file_path'],
                    'tasks': suggested_tasks,
                    'resource': resource_name,
                    'main-file': options['tasks'][suggested_task]['main_file'],
                    'language': options['tasks'][suggested_task]['language'],
                    'status': 'new',
                    'submit time': time.time(),
                    'start time': None,
                    'end time': None,
                    'fast update':
                    chooser.fast_update  # just for plotting - not important
                }

                save_job(suggested_job, db, experiment_name)

                # Submit the job to the appropriate resource
                process_id = resource.attemptDispatch(experiment_name,
                                                      suggested_job,
                                                      db_address, expt_dir,
                                                      output_directory)

                # Print the current time
                logging.info(
                    'Current time: %s' %
                    datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S'))

                # Set the status of the job appropriately (successfully submitted or not)
                if process_id is None:
                    suggested_job['status'] = 'broken'
                    logging.info(
                        'Job %s failed -- check output file for details.' %
                        job['id'])
                    save_job(suggested_job, db, experiment_name)
                else:
                    suggested_job['status'] = 'pending'
                    suggested_job['proc_id'] = process_id
                    save_job(suggested_job, db, experiment_name)

                jobs = load_jobs(db, experiment_name)

                # Print out the status of the resources
                # resource.printStatus(jobs)
                print_resources_status(resources.values(), jobs)

                if len(set(task_couplings.values())) > 1:  # if decoupled
                    print_tasks_status(tasks.values(), jobs)

                # For debug - print pending jobs
                print_pending_jobs(jobs)

        # Terminate the optimization if all resources are finished (run max number of jobs)
        # or ANY task is finished (just my weird convention)
        jobs = load_jobs(db, experiment_name)
        tasks = parse_tasks_from_jobs(jobs, experiment_name, options,
                                      input_space)
        terminate_resources = reduce(
            lambda x, y: x and y,
            map(lambda x: x.maxCompleteReached(jobs), resources.values()),
            True)
        terminate_tasks = reduce(
            lambda x, y: x or y,
            map(lambda x: x.maxCompleteReached(jobs), tasks.values()), False)
        terminate_maxtime = (time.time() - overall_start_time) >= (
            options['max_time_mins'] * 60.0)

        if terminate_resources or terminate_tasks or terminate_maxtime:

            if terminate_resources:
                logging.info(
                    'Maximum number of jobs completed on all resources.')
            if terminate_tasks:
                logging.info(
                    'Maximum number of jobs reached for at least one task.')
            if terminate_maxtime:
                logging.info(
                    'Maximum total experiment time of %f minutes reached.' %
                    options['max_time_mins'])

            # save rec in DB
            if options['recommendations'] in ("during", "end-one"):
                logging.info('Making final recommendation:')
                recommendation = chooser.best()
                store_recommendation(recommendation,
                                     db,
                                     experiment_name,
                                     tasks,
                                     jobs,
                                     input_space,
                                     time.time() - overall_start_time,
                                     final=True)
            elif options['recommendations'] == "end-all":
                logging.info('Making recommendations...')
                all_jobs = jobs
                for i in xrange(len(all_jobs)):
                    logging.info('')
                    logging.info(
                        '-------------------------------------------------')
                    logging.info(
                        '     Getting recommendations for iter %d/%d      ' %
                        (i, len(all_jobs)))
                    logging.info(
                        '-------------------------------------------------')
                    logging.info('')

                    jobs = all_jobs[:i + 1]
                    tasks = parse_tasks_from_jobs(jobs, experiment_name,
                                                  options, input_space)
                    hypers = chooser.fit(tasks, hypers)
                    print_hypers(hypers, input_space, options)
                    # get the biggest end time of the jobs
                    end_time = max([job['end time'] for job in jobs])
                    elapsed_time = end_time - overall_start_time

                    recommendation = chooser.best()
                    store_recommendation(recommendation, db, experiment_name,
                                         tasks, jobs, input_space,
                                         elapsed_time)

            logging.info('Have a nice day.')
            return

        # If no resources are accepting jobs, sleep
        if no_free_resources(db, experiment_name, resources):
            # Don't use logging here because it's too much effort to use logging without a newline at the end
            sys.stdout.write(
                'Waiting for results...' if not waiting_for_results else '.')
            sys.stdout.flush()
            # sys.stderr.flush()
            waiting_for_results = True
            time.sleep(options['polling_time'])
        else:
            sys.stdout.write('\n')
Exemple #13
0
def main():
    options, expt_dir = get_options()

    resources = parse_resources_from_config(options)

    # Load up the chooser.
    chooser_module = importlib.import_module('spearmint.choosers.' + options['chooser'])
    chooser = chooser_module.init(options)
    experiment_name     = options.get("experiment-name", 'unnamed-experiment')

    # Connect to the database
    db_address = options['database']['address']
    db_name = options['database']['name']
    sys.stderr.write('Using database %s at %s.\n' % (db_name, db_address))
    db         = MongoDB(database_address=db_address, database_name=db_name)


    suggest_file = os.path.join(expt_dir , experiment_name + ".suggest")
    suggest_idx = 0
    while True:
        for resource_name, resource in resources.iteritems():

            jobs = load_jobs(db, experiment_name)
            # resource.printStatus(jobs)

            # If the resource is currently accepting more jobs
            # TODO: here cost will eventually also be considered: even if the 
            #       resource is not full, we might wait because of cost incurred
            # Note: I chose to fill up one resource and them move on to the next
            # You could also do it the other way, by changing "while" to "if" here


            while resource.acceptingJobs(jobs):

                #db['rnn_8.jobs'].remove({status:'new'})

                # Load jobs from DB 
                # (move out of one or both loops?) would need to pass into load_tasks
                jobs = load_jobs(db, experiment_name)
                
                # Remove any broken jobs from pending.
                remove_broken_jobs(db, jobs, experiment_name, resources)

                # Get a suggestion for the next job
                suggested_job = get_suggestion(chooser, resource.tasks, db, expt_dir, options, resource_name)

                try:
                    # Check if the file for the manual suggestions exists
                    if os.path.isfile(suggest_file):
                        suggest_params = []
                        with open(suggest_file,'r') as csvfile:
                            # There should not be a blank line in the beginning of the file!
                            reader = csv.DictReader(csvfile)
                            # Concatenate all the suggestions in the file
                            for row in reader:
                                suggest_params = suggest_params + [row]

                            # If a new line is added to the file we overwrite the suggested jobs with these values
                            if suggest_idx < len(suggest_params):
                                print "--- Using manual suggestion instead of the one coming from the GP! ---"
                                next_suggestion = suggest_params[suggest_idx]
                                for key, value in next_suggestion.iteritems():
                                    if isinstance(value,str):
                                        value=value.strip()
                                    suggested_job['params'][key.strip()]['values'][0] = value
                                    suggested_job['manual'] = 1
                                    print "%s: %s" %(key.strip(),value)
                                suggest_idx = suggest_idx + 1
                except:
                    print "--- Problem using the manual suggestion file! Back to the GP suggestion.. ---"

                # Submit the job to the appropriate resource
                process_id = resource.attemptDispatch(experiment_name, suggested_job, db_address, db_name, expt_dir)

                # Set the status of the job appropriately (successfully submitted or not)
                if process_id is None:
                    suggested_job['status'] = 'broken'
                    save_job(suggested_job, db, experiment_name)
                else:
                    suggested_job['status'] = 'pending'
                    suggested_job['proc_id'] = process_id
                    save_job(suggested_job, db, experiment_name)

                jobs = load_jobs(db, experiment_name)

                # Print out the status of the resources
                # resource.printStatus(jobs)
                print_resources_status(resources.values(), jobs)

        # If no resources are accepting jobs, sleep
        # (they might be accepting if suggest takes a while and so some jobs already finished by the time this point is reached)
        if tired(db, experiment_name, resources):
            time.sleep(options.get('polling-time', 5))