def attempt_dispatch(expt_config, expt_dir, chooser, driver, options): ''' Dispatches a job containing `num_jobs` jobs, if the number of jobs is greater than 1 they will all have the same proc_id. ''' log("\n" + "-" * 40) if isinstance(expt_config, str): expt = load_experiment(expt_config) else: expt = expt_config # Build the experiment grid. expt_grid = ExperimentGrid(expt_dir, expt.variable, options.grid_size, options.grid_seed) jobs = [] num_jobs = 1 for n in range(num_jobs): # Print out the current best function value. best_val, best_job = expt_grid.get_best() if best_job >= 0: log("Current best: %f (job %d)" % (best_val, best_job)) else: log("Current best: No results returned yet.") # Gets you everything - NaN for unknown values & durations. grid, values, durations = expt_grid.get_grid() # Returns lists of indices. candidates = expt_grid.get_candidates() pending = expt_grid.get_pending() complete = expt_grid.get_complete() n_candidates = candidates.shape[0] n_pending = pending.shape[0] n_complete = complete.shape[0] log("%d candidates %d pending %d complete" % (n_candidates, n_pending, n_complete)) # Verify that pending jobs are actually running, and add them back to the # revisit this. # candidate set if they have crashed or gotten lost. #for job_id in pending: # proc_id = expt_grid.get_proc_id(job_id) # if proc_id != -1 and not driver.is_proc_alive(job_id, proc_id): # log("Set job %d back to candidate status." % (job_id)) # expt_grid.set_candidate(job_id) # Track the time series of optimization. write_trace(expt_dir, best_val, best_job, n_candidates, n_pending, n_complete) # Print out the best job results write_best_job(expt_dir, best_val, best_job, expt_grid) if n_complete >= options.max_finished_jobs: log("Maximum number of finished jobs (%d) reached." "Exiting" % options.max_finished_jobs) return 0, None if n_candidates == 0: log("There are no candidates left. Exiting.") return 0, None #Don't launch unless we can launch the complete bundle. if n_pending >= options.max_concurrent or (n == 0 and n_pending + num_jobs > options.max_concurrent): log("Maximum number of jobs (%d) pending." % (options.max_concurrent)) return 1, None else: # Ask the chooser to pick the next candidate log("Choosing next candidate... ") time_cand_start = time.time() job_id = chooser.next(grid, values, durations, candidates, pending, complete) time_cand = time.time() - time_cand_start log("Chose a candidate (took %i secs)." % (time_cand)) # If the job_id is a tuple, then the chooser picked a new job. # We have to add this to our grid if isinstance(job_id, tuple): (job_id, candidate) = job_id job_id = expt_grid.add_to_grid(candidate) log("selected job %d from the grid." % (job_id)) # Convert this back into an interpretable job and add metadata. job = Job() job.id = job_id job.expt_dir = expt_dir job.name = expt.name job.language = expt.language job.status = 'submitted' job.submit_t = int(time.time()) job.param.extend(expt_grid.get_params(job_id)) if options.nb_mini_batches > 0: batch_i = expt_grid.mini_batch_i expt_grid.mini_batch_i = (batch_i + 1) % options.nb_mini_batches batch_param = Parameter() batch_param.name = 'batch_i' batch_param.int_val.append(batch_i) job.param.extend([batch_param]) save_job(job) if num_jobs == 1: pid = driver.submit_job(job) if pid != None: log("submitted - pid = %s" % (pid)) expt_grid.set_submitted(job_id, pid) else: log("Failed to submit job!") log("Deleting job file.") os.unlink(job_file_for(job)) else: jobs.append(job) #Temporary, we don't have a proc id yet. expt_grid.set_submitted(job_id, -1) #Delayed submit when there is more than one job bundled. if num_jobs > 1: pid = driver.submit_job(jobs) if pid != None: log("Submitted %i jobs with pid = %s" % (num_jobs, pid)) for j in jobs: expt_grid.set_submitted(j.id, pid) else: log("Failed to submit job!") log("Deleting job files.") for j in jobs: os.unlink(job_file_for(j)) return 2, pid
def attempt_dispatch(expt_config, expt_dir, chooser, driver, options): log("\n" + "-" * 40) expt = load_experiment(expt_config) # Build the experiment grid. expt_grid = ExperimentGrid(expt_dir, expt.variable, options.grid_size, options.grid_seed) # Print out the current best function value. best_val, best_job = expt_grid.get_best() if best_job >= 0: log("Current best: %f (job %d)" % (best_val, best_job)) else: log("Current best: No results returned yet.") # Gets you everything - NaN for unknown values & durations. grid, values, durations = expt_grid.get_grid() # Returns lists of indices. candidates = expt_grid.get_candidates() pending = expt_grid.get_pending() complete = expt_grid.get_complete() n_candidates = candidates.shape[0] n_pending = pending.shape[0] n_complete = complete.shape[0] log("%d candidates %d pending %d complete" % (n_candidates, n_pending, n_complete)) # Verify that pending jobs are actually running, and add them back to the # candidate set if they have crashed or gotten lost. for job_id in pending: proc_id = expt_grid.get_proc_id(job_id) if not driver.is_proc_alive(job_id, proc_id): log("Set job %d back to pending status." % (job_id)) expt_grid.set_candidate(job_id) # Track the time series of optimization. write_trace(expt_dir, best_val, best_job, n_candidates, n_pending, n_complete) # Print out the best job results write_best_job(expt_dir, best_val, best_job, expt_grid) if n_complete >= options.max_finished_jobs: log("Maximum number of finished jobs (%d) reached." "Exiting" % options.max_finished_jobs) return False if n_candidates == 0: log("There are no candidates left. Exiting.") return False if n_pending >= options.max_concurrent: log("Maximum number of jobs (%d) pending." % (options.max_concurrent)) return True else: # start a bunch of candidate jobs if possible #to_start = min(options.max_concurrent - n_pending, n_candidates) #log("Trying to start %d jobs" % (to_start)) #for i in xrange(to_start): # Ask the chooser to pick the next candidate log("Choosing next candidate... ") job_id = chooser.next(grid, values, durations, candidates, pending, complete) # If the job_id is a tuple, then the chooser picked a new job. # We have to add this to our grid if isinstance(job_id, tuple): (job_id, candidate) = job_id job_id = expt_grid.add_to_grid(candidate) log("selected job %d from the grid." % (job_id)) # Convert this back into an interpretable job and add metadata. job = Job() job.id = job_id job.expt_dir = expt_dir job.name = expt.name job.language = expt.language job.status = 'submitted' job.submit_t = int(time.time()) job.param.extend(expt_grid.get_params(job_id)) save_job(job) pid = driver.submit_job(job) if pid != None: log("submitted - pid = %d" % (pid)) expt_grid.set_submitted(job_id, pid) else: log("Failed to submit job!") log("Deleting job file.") os.unlink(job_file_for(job)) return True
def explore_space_of_candidates(experiment, objective_function, working_directory, chooser, grid_size=1000, grid_seed=1, max_finished_jobs=100): # Build the experiment grid. expt_grid = ExperimentGrid(working_directory, experiment.variables, grid_size, grid_seed) next_jobid = 0 run_python_job = PythonRunner() while next_jobid < max_finished_jobs: best_val, best_job = expt_grid.get_best() # Gets you everything - NaN for unknown values & durations. grid, values, durations = expt_grid.get_grid() # Returns lists of indices. candidates = expt_grid.get_candidates() pending = expt_grid.get_pending() complete = expt_grid.get_complete() n_candidates = candidates.shape[0] n_pending = pending.shape[0] n_complete = complete.shape[0] logging.info("%d candidates %d pending %d complete", n_candidates, n_pending, n_complete) if n_candidates == 0: logging.info("There are no candidates left. Exiting.") return # Ask the chooser to pick the next candidate logging.info("Choosing next candidate... ") job_id = chooser.next(grid, values, durations, candidates, pending, complete) yield best_val, best_job, expt_grid.get_params(best_job), job_id # If the job_id is a tuple, then the chooser picked a new job. # We have to add this to our grid if isinstance(job_id, tuple): (job_id, candidate) = job_id job_id = expt_grid.add_to_grid(candidate) logging.info("selected job %d from the grid", job_id) expt_grid.set_submitted(job_id, next_jobid) expt_grid.set_running(job_id) start_t = time.time() result, memoized = run_python_job(job_id, objective_function, expt_grid.get_params(job_id), working_directory) duration = time.time() - start_t expt_grid.set_complete(job_id, result, duration) next_jobid += 1 if memoized: max_finished_jobs += 1