コード例 #1
0
def attempt_dispatch(expt_config, expt_dir, chooser, driver, options):
    log("\n" + "-" * 40)
    expt = load_experiment(expt_config)

    # Build the experiment grid.
    expt_grid = ExperimentGrid(expt_dir, expt.variable, options.grid_size,
                               options.grid_seed)

    # Print out the current best function value.
    best_val, best_job = expt_grid.get_best()
    if best_job >= 0:
        log("Current best: %f (job %d)" % (best_val, best_job))
    else:
        log("Current best: No results returned yet.")

    # Gets you everything - NaN for unknown values & durations.
    grid, values, durations = expt_grid.get_grid()

    # Returns lists of indices.
    candidates = expt_grid.get_candidates()
    pending = expt_grid.get_pending()
    complete = expt_grid.get_complete()

    n_candidates = candidates.shape[0]
    n_pending = pending.shape[0]
    n_complete = complete.shape[0]
    log("%d candidates   %d pending   %d complete" %
        (n_candidates, n_pending, n_complete))

    # Verify that pending jobs are actually running, and add them back to the
    # candidate set if they have crashed or gotten lost.
    for job_id in pending:
        proc_id = expt_grid.get_proc_id(job_id)
        if not driver.is_proc_alive(job_id, proc_id):
            log("Set job %d back to pending status." % (job_id))
            expt_grid.set_candidate(job_id)

    # Track the time series of optimization.
    write_trace(expt_dir, best_val, best_job, n_candidates, n_pending,
                n_complete)

    # Print out the best job results
    write_best_job(expt_dir, best_val, best_job, expt_grid)

    if n_complete >= options.max_finished_jobs:
        log("Maximum number of finished jobs (%d) reached."
            "Exiting" % options.max_finished_jobs)
        return False

    if n_candidates == 0:
        log("There are no candidates left.  Exiting.")
        return False

    if n_pending >= options.max_concurrent:
        log("Maximum number of jobs (%d) pending." % (options.max_concurrent))
        return True

    else:

        # start a bunch of candidate jobs if possible
        #to_start = min(options.max_concurrent - n_pending, n_candidates)
        #log("Trying to start %d jobs" % (to_start))
        #for i in xrange(to_start):

        # Ask the chooser to pick the next candidate
        log("Choosing next candidate... ")
        job_id = chooser.next(grid, values, durations, candidates, pending,
                              complete)

        # If the job_id is a tuple, then the chooser picked a new job.
        # We have to add this to our grid
        if isinstance(job_id, tuple):
            (job_id, candidate) = job_id
            job_id = expt_grid.add_to_grid(candidate)

        log("selected job %d from the grid." % (job_id))

        # Convert this back into an interpretable job and add metadata.
        job = Job()
        job.id = job_id
        job.expt_dir = expt_dir
        job.name = expt.name
        job.language = expt.language
        job.status = 'submitted'
        job.submit_t = int(time.time())
        job.param.extend(expt_grid.get_params(job_id))

        save_job(job)
        pid = driver.submit_job(job)
        if pid != None:
            log("submitted - pid = %d" % (pid))
            expt_grid.set_submitted(job_id, pid)
        else:
            log("Failed to submit job!")
            log("Deleting job file.")
            os.unlink(job_file_for(job))

    return True
コード例 #2
0
ファイル: spearmint-plot.py プロジェクト: jucor/spearmint
def attempt_dispatch(expt_name, expt_dir, work_dir, chooser, options):
    #import drmaa

    sys.stderr.write("\n")
    
    expt_file = os.path.join(expt_dir, options.config_file)
    expt      = load_expt(expt_file)

    # Build the experiment grid.
    expt_grid = ExperimentGrid(expt_dir,
                               expt.variable,
                               options.grid_size,
                               options.grid_seed)

    # Print out the current best function value.
    best_val, best_job = expt_grid.get_best()
    sys.stderr.write("Current best: %f (job %d)\n" % (best_val, best_job))
 
    # Gets you everything - NaN for unknown values & durations.
    grid, values, durations = expt_grid.get_grid()
    
    # Returns lists of indices.
    candidates = expt_grid.get_candidates()
    pending    = expt_grid.get_pending()
    complete   = expt_grid.get_complete()
    sys.stderr.write("%d candidates   %d pending   %d complete\n" % 
                     (candidates.shape[0], pending.shape[0], complete.shape[0]))

    ################# START ### ANDREI ########################
    plot_dir = os.path.join(expt_dir, options.plot_dir)
    if not os.path.exists(plot_dir):
        sys.stderr.write("Creating plot directories '%s'.\n" % (plot_dir))
        mkdirp(plot_dir)
        mkdirp(os.path.join(plot_dir, '1D'))
        mkdirp(os.path.join(plot_dir, '2D'))
        mkdirp(os.path.join(plot_dir, 'CSV'))
        mkdirp(os.path.join(plot_dir, 'CSV', '1D'))
        mkdirp(os.path.join(plot_dir, 'CSV', '2D'))
     
    gmap = expt_grid.vmap

    if np.isnan(best_job):
        # TODO: deal with plotting the prior GP with no evaluated points
        sys.stderr.write("Need at least one complete evaluation to plot\n")
        sys.exit(-1)
    best_complete = grid[best_job, :].reshape((1,gmap.cardinality))

    #print('Best complete is ' + str(best_complete))
    #print('best_complete.shape is ' + str(best_complete.shape))

    # Avoid MCMC if not needed
    if options.no_mcmc:
        chooser.mcmc_iters = 0

    # Fit the hyperparameters only once for the whole plotting
    chooser.prepare_evaluation(grid, values, complete)

    # Loop on first dimension
    grid_i = 0
    for v1 in gmap.variables:
        v1_dim = v1['size']
        for i in range(0,v1_dim):
            v1_name = str(v1['name'])
            if v1_dim > 1:
                v1_name = v1_name + "_" + str(i+1)

            print('PLOT1D:',v1_name, 'Min:', v1['min'], 'Max:', v1['max'])

            # Evaluate on the marginal slice containing the best fit
            print('slicing along dim ' + str(grid_i))
            x, candidates = slice_1d(best_complete, grid_i, options.grid_size)
            mean, variance, ei = evaluate_gp(chooser, 
                    candidates, grid[complete, :], values[complete],
                    durations[complete])
            if not options.no_plot:
                plot_1d(x, v1['min'], v1['max'], mean, variance, ei, best_complete, v1_name)
                if options.plot_max < float("+inf"):
                    pplt.ylim(ymax=options.plot_max)
                if options.plot_min > float("-inf"):
                    pplt.ylim(ymin=options.plot_min)
               
               
                # If the space is entirely 1D, plot the evaluation points
                if gmap.cardinality == 1:
                    ylim = pplt.ylim()
                    pplt.scatter(np.asarray(complete).squeeze(),
                               np.asarray(values).squeeze(), 
                                 c='lime', marker='o', s=dot_size)
                    pplt.ylim(ylim)
                
                pplt.savefig(os.path.join(plot_dir, '1D',  v1_name + '.png'))

            if not options.no_csv:
                out_file = os.path.join(plot_dir, 'CSV',  '1D', 
                        v1_name + '.csv')
                save_to_csv(out_file, gmap, candidates, mean, variance, ei)

            # Loop on second dimension
            grid_j = 0
            for v2 in gmap.variables:
                v2_dim = v2['size']
                for j in range(0,v2_dim):
                    # Sub-diagonal is skipped
                    if grid_j <= grid_i:
                        grid_j = grid_j + 1
                        continue

                    v2_name = str(v2['name'])

                    if v2_dim > 1:
                        v2_name = v2_name + "_" + str(j+1)

                    print('PLOT2D:',v1_name, ' vs ', v2_name)

                    # Now let's evaluate the GP on a grid
                    x, y, candidates = slice_2d(best_complete, v1['min'], v1['max'], v2['min'], v2['max'], grid_i, grid_j, options.grid_size)
                    mean, variance, ei = evaluate_gp(chooser,
                            candidates, grid[complete, :], values[complete],
                            durations[complete])                    
                    if not options.no_plot:
                        h, h_mean, h_var, h_ei = plot_2d(x, y, mean, variance, ei, best_complete, v1_name,
                                v2_name)
                        if options.plot_max < float("+inf"):
                            h_mean.set_clim(vmax=options.plot_max)
                            h_var.set_clim(vmax=options.plot_max)
                            h_ei.set_clim(vmax=options.plot_max)
                        if options.plot_min > float("-inf"):
                            h_mean.set_clim(vmin=options.plot_min)
                            h_var.set_clim(vmin=options.plot_min)
                            h_ei.set_clim(vmin=options.plot_min)
                        # If the space is entirely 2D, plot the evaluation points
                        if gmap.cardinality == 2:
                            for i in (131,132,133):
                                pplt.subplot(i)
                                xlim = pplt.xlim()
                                ylim = pplt.ylim()
                                pplt.scatter(np.asarray(complete[:,0]).squeeze(),
                                             np.asarray(complete[:,1]).squeeze(),
                                             c='lime', marker='o', s=dot_size)
                                pplt.xlim(xlim)
                                pplt.ylim(ylim)

                        pplt.savefig(os.path.join(plot_dir, '2D',  
                                                  v1_name + "_" + v2_name + ".png"))

                    if not options.no_csv:
                        out_file = os.path.join(plot_dir,  'CSV', '2D',  
                                                  v1_name + "_" + v2_name + ".csv")
                        save_to_csv(out_file, gmap, candidates, mean, variance, ei)
                    grid_j = grid_j + 1

            grid_i = grid_i + 1
コード例 #3
0
def attempt_dispatch(expt_name, expt_dir, work_dir, chooser, options):
    sys.stderr.write("\n")
    
    expt_file = os.path.join(expt_dir, options.config_file)
    expt      = load_expt(expt_file)

    # Build the experiment grid.
    expt_grid = ExperimentGrid(expt_dir,
                               expt.variable,
                               options.grid_size,
                               options.grid_seed)

    # Print out the current best function value.
    best_val, best_job = expt_grid.get_best()    
    if best_job >= 0:
        sys.stderr.write("Current best: %f (job %d)\n" % (best_val, best_job))
    else:
        sys.stderr.write("Current best: No results returned yet.\n")

    # Gets you everything - NaN for unknown values & durations.
    grid, values, durations = expt_grid.get_grid()
    
    # Returns lists of indices.
    candidates = expt_grid.get_candidates()
    pending    = expt_grid.get_pending()
    complete   = expt_grid.get_complete()
    sys.stderr.write("%d candidates   %d pending   %d complete\n" % 
                     (candidates.shape[0], pending.shape[0], complete.shape[0]))
      
    # Verify that pending jobs are actually running.
    for job_id in pending:
        sgeid = expt_grid.get_sgeid(job_id)
        reset_job = False
        
        try:
            # Send an alive signal to proc (note this could kill it in windows)
            os.kill(sgeid, 0)
        except OSError:
            # Job is no longer running but still in the candidate list. Assume it crashed out.
            expt_grid.set_candidate(job_id)

    # Track the time series of optimization.
    trace_fh = open(os.path.join(expt_dir, 'trace.csv'), 'a')
    trace_fh.write("%d,%f,%d,%d,%d,%d\n"
                   % (time.time(), best_val, best_job,
                      candidates.shape[0], pending.shape[0], complete.shape[0]))
    trace_fh.close()

    # Print out the best job results
    best_job_fh = open(os.path.join(expt_dir, 'best_job_and_result.txt'), 'w')
    best_job_fh.write("Best result: %f\nJob-id: %d\nParameters: \n" % 
                      (best_val, best_job))    
    for best_params in expt_grid.get_params(best_job):
        best_job_fh.write(str(best_params) + '\n')
    best_job_fh.close()

    if complete.shape[0] >= options.max_finished_jobs:
        sys.stderr.write("Maximum number of finished jobs (%d) reached."
                         "Exiting\n" % options.max_finished_jobs)
        sys.exit(0)

    if candidates.shape[0] == 0:
        sys.stderr.write("There are no candidates left.  Exiting.\n")
        sys.exit(0)

    if pending.shape[0] >= options.max_concurrent:
        sys.stderr.write("Maximum number of jobs (%d) pending.\n"
                         % (options.max_concurrent))
        return

    # Ask the chooser to actually pick one.
    job_id = chooser.next(grid, values, durations, candidates, pending,
                          complete)

    # If the job_id is a tuple, then the chooser picked a new job.
    # We have to add this to our grid
    if isinstance(job_id, tuple):
        (job_id, candidate) = job_id
        job_id = expt_grid.add_to_grid(candidate)

    sys.stderr.write("Selected job %d from the grid.\n" % (job_id))

    # Convert this back into an interpretable job and add metadata.
    job = Job()
    job.id        = job_id
    job.expt_dir  = expt_dir
    job.name      = expt.name
    job.language  = expt.language
    job.status    = 'submitted'
    job.submit_t  = int(time.time())
    job.param.extend(expt_grid.get_params(job_id))

    # Make sure we have a job subdirectory.
    job_subdir = os.path.join(expt_dir, 'jobs')
    if not os.path.exists(job_subdir):
        os.mkdir(job_subdir)

    # Name this job file.
    job_file = os.path.join(job_subdir,
                            '%08d.pb' % (job_id))

    # Store the job file.
    save_job(job_file, job)

    # Make sure there is a directory for output.
    output_subdir = os.path.join(expt_dir, 'output')
    if not os.path.exists(output_subdir):
        os.mkdir(output_subdir)
    output_file = os.path.join(output_subdir,
                               '%08d.out' % (job_id))

    process = job_submit("%s-%08d" % (expt_name, job_id),
                         output_file,
                         job_file, work_dir)
    process.poll()
    if process.returncode is not None and process.returncode < 0:
        sys.stderr.write("Failed to submit job or job crashed "
                         "with return code %d !\n" % process.returncode)
        sys.stderr.write("Deleting job file.\n")
        os.unlink(job_file)
        return
    else:
        sys.stderr.write("Submitted job as process: %d\n" % process.pid)

    # Now, update the experiment status to submitted.
    expt_grid.set_submitted(job_id, process.pid)

    return
コード例 #4
0
ファイル: spearmint.py プロジェクト: andreirusu/spearmint
def attempt_dispatch(expt_name, expt_dir, work_dir, chooser, options):
    import drmaa

    sys.stderr.write("\n")
    
    expt_file = os.path.join(expt_dir, options.config_file)
    expt      = load_expt(expt_file)

    # Build the experiment grid.
    expt_grid = ExperimentGrid(expt_dir,
                               expt.variable,
                               options.grid_size,
                               options.grid_seed)

    # Print out the current best function value.
    best_val, best_job = expt_grid.get_best()
    sys.stderr.write("Current best: %f (job %d)\n" % (best_val, best_job))
 
    # Gets you everything - NaN for unknown values & durations.
    grid, values, durations = expt_grid.get_grid()
    
    # Returns lists of indices.
    candidates = expt_grid.get_candidates()
    pending    = expt_grid.get_pending()
    complete   = expt_grid.get_complete()
    sys.stderr.write("%d candidates   %d pending   %d complete\n" % 
                     (candidates.shape[0], pending.shape[0], complete.shape[0]))

    # Verify that pending jobs are actually running.
    s = drmaa.Session()
    s.initialize()
    for job_id in pending:
        sgeid = expt_grid.get_sgeid(job_id)
        reset_job = False
        
        try:
            status = s.jobStatus(str(sgeid))
        except:
            sys.stderr.write("EXC: %s\n" % (str(sys.exc_info()[0])))
            sys.stderr.write("Could not find SGE id for job %d (%d)\n" % 
                             (job_id, sgeid))
            status = -1
            reset_job = True

        if status == drmaa.JobState.UNDETERMINED:
            sys.stderr.write("Job %d (%d) in undetermined state.\n" % 
                             (job_id, sgeid))
            reset_job = True

        elif status in [drmaa.JobState.QUEUED_ACTIVE, drmaa.JobState.RUNNING]:
            pass # Good shape.

        elif status in [drmaa.JobState.SYSTEM_ON_HOLD,
                        drmaa.JobState.USER_ON_HOLD,
                        drmaa.JobState.USER_SYSTEM_ON_HOLD,
                        drmaa.JobState.SYSTEM_SUSPENDED,
                        drmaa.JobState.USER_SUSPENDED]:
            sys.stderr.write("Job %d (%d) is held or suspended.\n" % 
                             (job_id, sgeid))
            reset_job = True

        elif status == drmaa.JobState.DONE:
            sys.stderr.write("Job %d (%d) complete but not yet updated.\n" % 
                             (job_id, sgeid))

        elif status == drmaa.JobState.FAILED:
            sys.stderr.write("Job %d (%d) failed.\n" % (job_id, sgeid))
            reset_job = True

        if reset_job:

            try:
                # Kill the job.
                s.control(str(sgeid), drmaa.JobControlAction.TERMINATE)
                sys.stderr.write("Killed SGE job %d.\n" % (sgeid))
            except:
                sys.stderr.write("Failed to kill SGE job %d.\n" % (sgeid))

            # Set back to being a candidate state.
            expt_grid.set_candidate(job_id)
            sys.stderr.write("Set job %d back to pending status.\n" % (job_id))

    s.exit()
      
    # Track the time series of optimization.
    trace_fh = open(os.path.join(expt_dir, 'trace.csv'), 'a')
    trace_fh.write("%d,%f,%d,%d,%d,%d\n"
                   % (time.time(), best_val, best_job,
                      candidates.shape[0], pending.shape[0], complete.shape[0]))
    trace_fh.close()

    # Print out the best job results
    best_job_fh = open(os.path.join(expt_dir, 'best_job_and_result.txt'), 'a')
    best_job_fh.write("Best result: %f\n Job-id: %d\n Parameters: %s\n"
                      % (best_val, best_job, expt_grid.get_params(best_job)))
    best_job_fh.close()

    if complete.shape[0] >= options.max_finished_jobs:
        sys.stderr.write("Maximum number of finished jobs (%d) reached. "
                         "Exiting\n" % options.max_finished_jobs)
        sys.exit(0)

    if candidates.shape[0] == 0:
        sys.stderr.write("There are no candidates left.  Exiting.\n")
        sys.exit(0)

    if pending.shape[0] >= options.max_concurrent:
        sys.stderr.write("Maximum number of jobs (%d) pending.\n"
                         % (options.max_concurrent))
        return

    # Ask the chooser to actually pick one.
    job_id = chooser.next(grid, values, durations, candidates, pending,
                          complete)

    # If the job_id is a tuple, then the chooser picked a new job.
    # We have to add this to our grid
    if isinstance(job_id, tuple):
        (job_id, candidate) = job_id
        job_id = expt_grid.add_to_grid(candidate)

    sys.stderr.write("Selected job %d from the grid.\n" % (job_id))

    # Convert this back into an interpretable job and add metadata.
    job = Job()
    job.id        = job_id
    job.expt_dir  = expt_dir
    job.name      = expt.name
    job.language  = expt.language
    job.status    = 'submitted'
    job.submit_t  = int(time.time())
    job.param.extend(expt_grid.get_params(job_id))

    # Make sure we have a job subdirectory.
    job_subdir = os.path.join(expt_dir, 'jobs')
    if not os.path.exists(job_subdir):
        os.mkdir(job_subdir)

    # Name this job file.
    job_file = os.path.join(job_subdir,
                            '%08d.pb' % (job_id))

    # Store the job file.
    save_job(job_file, job)

    # Make sure there is a directory for output.
    output_subdir = os.path.join(expt_dir, 'output')
    if not os.path.exists(output_subdir):
        os.mkdir(output_subdir)
    output_file = os.path.join(output_subdir,
                               '%08d.out' % (job_id))

    queue_id, msg = sge_submit("%s-%08d" % (expt_name, job_id),
                             output_file,
                             DEFAULT_MODULES,
                             job_file, work_dir)
    if queue_id is None:
        sys.stderr.write("Failed to submit job: %s" % (msg))
        sys.stderr.write("Deleting job file.\n")
        os.unlink(job_file)
        return
    else:
        sys.stderr.write("Submitted as job %d\n" % (queue_id))

    # Now, update the experiment status to submitted.
    expt_grid.set_submitted(job_id, queue_id)

    return
コード例 #5
0
ファイル: main.py プロジェクト: aurora1625/spearmint
def attempt_dispatch(expt_config, expt_dir, chooser, driver, options):
    log("\n" + "-" * 40)
    expt = load_experiment(expt_config)

    # Build the experiment grid.
    expt_grid = ExperimentGrid(expt_dir,
                               expt.variable,
                               options.grid_size,
                               options.grid_seed)

    # Print out the current best function value.
    best_val, best_job = expt_grid.get_best()
    if best_job >= 0:
        log("Current best: %f (job %d)" % (best_val, best_job))
    else:
        log("Current best: No results returned yet.")

    # Gets you everything - NaN for unknown values & durations.
    grid, values, durations = expt_grid.get_grid()

    # Returns lists of indices.
    candidates = expt_grid.get_candidates()
    pending    = expt_grid.get_pending()
    complete   = expt_grid.get_complete()

    n_candidates = candidates.shape[0]
    n_pending    = pending.shape[0]
    n_complete   = complete.shape[0]
    log("%d candidates   %d pending   %d complete" %
        (n_candidates, n_pending, n_complete))

    # Verify that pending jobs are actually running, and add them back to the
    # candidate set if they have crashed or gotten lost.
    for job_id in pending:
        proc_id = expt_grid.get_proc_id(job_id)
        if not driver.is_proc_alive(job_id, proc_id):
            log("Set job %d back to pending status." % (job_id))
            expt_grid.set_candidate(job_id)

    # Track the time series of optimization.
    write_trace(expt_dir, best_val, best_job, n_candidates, n_pending, n_complete)

    # Print out the best job results
    write_best_job(expt_dir, best_val, best_job, expt_grid)

    if n_complete >= options.max_finished_jobs:
        log("Maximum number of finished jobs (%d) reached."
                         "Exiting" % options.max_finished_jobs)
        return False

    if n_candidates == 0:
        log("There are no candidates left.  Exiting.")
        return False

    if n_pending >= options.max_concurrent:
        log("Maximum number of jobs (%d) pending." % (options.max_concurrent))
        return True

    else:

        # start a bunch of candidate jobs if possible
        #to_start = min(options.max_concurrent - n_pending, n_candidates)
        #log("Trying to start %d jobs" % (to_start))
        #for i in xrange(to_start):

        # Ask the chooser to pick the next candidate
        log("Choosing next candidate... ")
        job_id = chooser.next(grid, values, durations, candidates, pending, complete)

        # If the job_id is a tuple, then the chooser picked a new job.
        # We have to add this to our grid
        if isinstance(job_id, tuple):
            (job_id, candidate) = job_id
            job_id = expt_grid.add_to_grid(candidate)

        log("selected job %d from the grid." % (job_id))

        # Convert this back into an interpretable job and add metadata.
        job = Job()
        job.id        = job_id
        job.expt_dir  = expt_dir
        job.name      = expt.name
        job.language  = expt.language
        job.status    = 'submitted'
        job.submit_t  = int(time.time())
        job.param.extend(expt_grid.get_params(job_id))

        save_job(job)
        pid = driver.submit_job(job)
        if pid != None:
            log("submitted - pid = %d" % (pid))
            expt_grid.set_submitted(job_id, pid)
        else:
            log("Failed to submit job!")
            log("Deleting job file.")
            os.unlink(job_file_for(job))

    return True
コード例 #6
0
def attempt_dispatch(expt_config, expt_dir, chooser, driver, options):
    log("\n" + "-" * 40)
    expt = load_experiment(expt_config)
    print(options)
    # Build the experiment grid.
    expt_grid = ExperimentGrid(expt_dir, expt.variable, options.grid_size,
                               options.grid_seed)

    # Print out the current best function value.
    best_val, best_job = expt_grid.get_best()
    if best_job >= 0:
        log("Current best: %f (job %d)" % (best_val, best_job))
    else:
        log("Current best: No results returned yet.")

    # Gets you everything - NaN for unknown values & durations.
    grid, values, durations = expt_grid.get_grid()

    # Returns lists of indices.
    candidates = expt_grid.get_candidates()
    pending = expt_grid.get_pending()
    complete = expt_grid.get_complete()
    executed = expt_grid.get_executed()

    n_candidates = candidates.shape[0]
    n_pending = pending.shape[0]
    n_complete = complete.shape[0]
    n_executed = executed.shape[0]

    log("%d candidates   %d pending   %d complete   %d executed" %
        (n_candidates, n_pending, n_complete, n_executed))

    # Verify that pending jobs are actually running, and add them back to the
    # candidate set if they have crashed or gotten lost.
    for job_id in pending:
        proc_id = expt_grid.get_proc_id(job_id)
        if not driver.is_proc_alive(job_id, proc_id):
            log("Set job %d back to pending status." % (job_id))
            expt_grid.set_candidate(job_id)

    # Track the time series of optimization.
    write_trace(expt_dir, best_val, best_job, n_candidates, n_pending,
                n_complete, n_executed)

    # Print out the best job results
    write_best_job(expt_dir, best_val, best_job, expt_grid)

    if n_complete >= options.max_finished_jobs:
        log("Maximum number of finished jobs (%d) reached."
            "Exiting" % options.max_finished_jobs)
        return False

    if n_candidates == 0:
        log("There are no candidates left.  Exiting.")
        return False

    if n_pending >= options.max_concurrent:
        log("Maximum number of jobs (%d) pending." % (options.max_concurrent))
        return True

    else:

        # start a bunch of candidate jobs if possible
        #to_start = min(options.max_concurrent - n_pending, n_candidates)
        #log("Trying to start %d jobs" % (to_start))
        #for i in xrange(to_start):

        # Ask the chooser to pick the next candidate
        log("Choosing next candidate... ")
        job_id, ei = chooser.next(grid, values, durations, candidates, pending,
                                  complete)
        log("Expected improvement: %.6f" % ei)

        print ">>>>>>>", n_executed, ei
        if ei < config.EI and n_executed >= config.MIN_ACCEPTED_RUNS:
            config.strikes += 1
            if config.strikes > 0:
                return False
        else:
            config.strikes = 0

        # If the job_id is a tuple, then the chooser picked a new job.
        # We have to add this to our grid
        if isinstance(job_id, tuple):
            (job_id, candidate) = job_id
            job_id = expt_grid.add_to_grid(candidate)

        log("selected job %d from the grid." % (job_id))

        # Convert this back into an interpretable job and add metadata.
        job = Job()
        job.id = job_id
        job.expt_dir = expt_dir
        job.name = expt.name
        job.language = expt.language
        job.status = 'submitted'
        job.submit_t = int(time.time())
        job.param.extend(expt_grid.get_params(job_id))

        #TODO: (@omid) check if the job has been previously completed; if so
        #      mark the job as completed and use the cached value
        params = job_params(job)
        for key, val in params.items():
            if isinstance(val, np.ndarray):
                val = val.tolist()
            if isinstance(val, list):
                val = frozenset(val)
            params[key] = val
        params = frozenset(params.items())
        if params in jobs_executed:
            jid = jobs_executed[params]
            print ">>>> Bypassing job execution."
            for stat in ['status', 'values', 'durs']:
                dic = getattr(expt_grid, stat)
                dic[job_id] = dic[jid]
            expt_grid._save_jobs()
            return True
        jobs_executed[params] = job_id

        save_job(job)
        pid = driver.submit_job(job)
        if pid != None:
            log("submitted - pid = %d" % (pid))
            expt_grid.set_submitted(job_id, pid)
        else:
            log("Failed to submit job!")
            log("Deleting job file.")
            os.unlink(job_file_for(job))

    return True
コード例 #7
0
def attempt_dispatch(expt_name, expt_dir, work_dir, chooser, options):
    import drmaa

    sys.stderr.write("\n")
    
    expt_file = os.path.join(expt_dir, options.config_file)
    expt      = load_expt(expt_file)

    # Build the experiment grid.
    expt_grid = ExperimentGrid(expt_dir,
                               expt.variable,
                               options.grid_size,
                               options.grid_seed)

    # Print out the current best function value.
    best_val, best_job = expt_grid.get_best()
    sys.stderr.write("Current best: %f (job %d)\n" % (best_val, best_job))
 
    # Gets you everything - NaN for unknown values & durations.
    grid, values, durations = expt_grid.get_grid()
    
    # Returns lists of indices.
    candidates = expt_grid.get_candidates()
    pending    = expt_grid.get_pending()
    complete   = expt_grid.get_complete()
    sys.stderr.write("%d candidates   %d pending   %d complete\n" % 
                     (candidates.shape[0], pending.shape[0], complete.shape[0]))

    # Verify that pending jobs are actually running.
    s = drmaa.Session()
    s.initialize()
    for job_id in pending:
        sgeid = expt_grid.get_sgeid(job_id)
        reset_job = False
        
        try:
            status = s.jobStatus(str(sgeid))
        except:
            sys.stderr.write("EXC: %s\n" % (str(sys.exc_info()[0])))
            sys.stderr.write("Could not find SGE id for job %d (%d)\n" % 
                             (job_id, sgeid))
            status = -1
            reset_job = True

        if status == drmaa.JobState.UNDETERMINED:
            sys.stderr.write("Job %d (%d) in undetermined state.\n" % 
                             (job_id, sgeid))
            reset_job = True

        elif status in [drmaa.JobState.QUEUED_ACTIVE, drmaa.JobState.RUNNING]:
            pass # Good shape.

        elif status in [drmaa.JobState.SYSTEM_ON_HOLD,
                        drmaa.JobState.USER_ON_HOLD,
                        drmaa.JobState.USER_SYSTEM_ON_HOLD,
                        drmaa.JobState.SYSTEM_SUSPENDED,
                        drmaa.JobState.USER_SUSPENDED]:
            sys.stderr.write("Job %d (%d) is held or suspended.\n" % 
                             (job_id, sgeid))
            reset_job = True

        elif status == drmaa.JobState.DONE:
            sys.stderr.write("Job %d (%d) complete but not yet updated.\n" % 
                             (job_id, sgeid))

        elif status == drmaa.JobState.FAILED:
            sys.stderr.write("Job %d (%d) failed.\n" % (job_id, sgeid))
            reset_job = True

        if reset_job:

            try:
                # Kill the job.
                s.control(str(sgeid), drmaa.JobControlAction.TERMINATE)
                sys.stderr.write("Killed SGE job %d.\n" % (sgeid))
            except:
                sys.stderr.write("Failed to kill SGE job %d.\n" % (sgeid))

            # Set back to being a candidate state.
            expt_grid.set_candidate(job_id)
            sys.stderr.write("Set job %d back to pending status.\n" % (job_id))

    s.exit()
      
    # Track the time series of optimization.
    trace_fh = open(os.path.join(expt_dir, 'trace.csv'), 'a')
    trace_fh.write("%d,%f,%d,%d,%d,%d\n"
                   % (time.time(), best_val, best_job,
                      candidates.shape[0], pending.shape[0], complete.shape[0]))
    trace_fh.close()

    # Print out the best job results
    best_job_fh = open(os.path.join(expt_dir, 'best_job_and_result.txt'), 'a')
    best_job_fh.write("Best result: %f\n Job-id: %d\n Parameters: %s\n"
                      % (best_val, best_job, expt_grid.get_params(best_job)))
    best_job_fh.close()

    if complete.shape[0] >= options.max_finished_jobs:
        sys.stderr.write("Maximum number of finished jobs (%d) reached. "
                         "Exiting\n" % options.max_finished_jobs)
        sys.exit(0)

    if candidates.shape[0] == 0:
        sys.stderr.write("There are no candidates left.  Exiting.\n")
        sys.exit(0)

    if pending.shape[0] >= options.max_concurrent:
        sys.stderr.write("Maximum number of jobs (%d) pending.\n"
                         % (options.max_concurrent))
        return

    # Ask the chooser to actually pick one.
    job_id = chooser.next(grid, values, durations, candidates, pending,
                          complete)

    # If the job_id is a tuple, then the chooser picked a new job.
    # We have to add this to our grid
    if isinstance(job_id, tuple):
        (job_id, candidate) = job_id
        job_id = expt_grid.add_to_grid(candidate)

    sys.stderr.write("Selected job %d from the grid.\n" % (job_id))

    # Convert this back into an interpretable job and add metadata.
    job = Job()
    job.id        = job_id
    job.expt_dir  = expt_dir
    job.name      = expt.name
    job.language  = expt.language
    job.status    = 'submitted'
    job.submit_t  = int(time.time())
    job.param.extend(expt_grid.get_params(job_id))

    # Make sure we have a job subdirectory.
    job_subdir = os.path.join(expt_dir, 'jobs')
    if not os.path.exists(job_subdir):
        os.mkdir(job_subdir)

    # Name this job file.
    job_file = os.path.join(job_subdir,
                            '%08d.pb' % (job_id))

    # Store the job file.
    save_job(job_file, job)

    # Make sure there is a directory for output.
    output_subdir = os.path.join(expt_dir, 'output')
    if not os.path.exists(output_subdir):
        os.mkdir(output_subdir)
    output_file = os.path.join(output_subdir,
                               '%08d.out' % (job_id))

    queue_id, msg = sge_submit("%s-%08d" % (expt_name, job_id),
                             output_file,
                             DEFAULT_MODULES,
                             job_file, work_dir)
    if queue_id is None:
        sys.stderr.write("Failed to submit job: %s" % (msg))
        sys.stderr.write("Deleting job file.\n")
        os.unlink(job_file)
        return
    else:
        sys.stderr.write("Submitted as job %d\n" % (queue_id))

    # Now, update the experiment status to submitted.
    expt_grid.set_submitted(job_id, queue_id)

    return
コード例 #8
0
def attempt_dispatch(expt_name, expt_dir, work_dir, chooser, options):
    sys.stderr.write("\n")

    expt_file = os.path.join(expt_dir, options.config_file)
    expt = load_expt(expt_file)

    # Build the experiment grid.
    expt_grid = ExperimentGrid(expt_dir,
                               expt.variable,
                               options.grid_size,
                               options.grid_seed,
                               locking=True)

    # Print out the current best function value.
    best_val, best_job = expt_grid.get_best()
    if best_job >= 0:
        sys.stderr.write("Current best: %f (job %d)\n" % (best_val, best_job))
    else:
        sys.stderr.write("Current best: No results returned yet.\n")

    # Gets you everything - NaN for unknown values & durations.
    grid, values, durations = expt_grid.get_grid()

    # Returns lists of indices.
    candidates = expt_grid.get_candidates()
    pending = expt_grid.get_pending()
    complete = expt_grid.get_complete()
    sys.stderr.write(
        "%d candidates   %d pending   %d complete\n" %
        (candidates.shape[0], pending.shape[0], complete.shape[0]))

    # Verify that pending jobs are actually running.
    for job_id in pending:
        sgeid = expt_grid.get_sgeid(job_id)
        if not sgeid in psutil.pids():
            # Job is no longer running but still in the candidate list. Assume it crashed out.
            expt_grid.set_candidate(job_id)

    # Track the time series of optimization.
    trace_fh = open(os.path.join(expt_dir, 'trace.csv'), 'a')
    trace_fh.write("%d,%f,%d,%d,%d,%d\n" %
                   (time.time(), best_val, best_job, candidates.shape[0],
                    pending.shape[0], complete.shape[0]))
    trace_fh.close()

    # Print out the best job results
    best_job_fh = open(os.path.join(expt_dir, 'best_job_and_result.txt'), 'w')
    best_job_fh.write("Best result: %f\nJob-id: %d\nParameters: \n" %
                      (best_val, best_job))
    for best_params in expt_grid.get_params(best_job):
        best_job_fh.write(str(best_params) + '\n')
    best_job_fh.close()

    if complete.shape[0] >= options.max_finished_jobs:
        sys.stderr.write("Maximum number of finished jobs (%d) reached.\n"
                         "Exiting\n" % options.max_finished_jobs)
        sys.exit(0)

    if candidates.shape[0] == 0 and pending.shape[0] > 0:
        sys.stderr.write(
            "There are no candidates left. Waiting for job completion.\n")
        return

    if candidates.shape[0] == 0 and pending.shape[0] == 0:
        sys.stderr.write("There are no candidates left. Exiting.\n")
        sys.exit(0)

    if pending.shape[0] >= options.max_concurrent:
        sys.stderr.write("Maximum number of jobs (%d) pending.\n" %
                         (options.max_concurrent))
        return

    # Dont submit if pending + finished > max_finished_jobs.
    if pending.shape[0] + complete.shape[0] >= options.max_finished_jobs:
        sys.stderr.write("Full number of jobs (%d) submitted. Waiting for "
                         "completion.\n" % (options.max_finished_jobs))
        return

    # Ask the chooser to actually pick one.
    job_id = chooser.next(grid, values, durations, candidates, pending,
                          complete)

    # If the job_id is a tuple, then the chooser picked a new job.
    # We have to add this to our grid
    if isinstance(job_id, tuple):
        (job_id, candidate) = job_id
        job_id = expt_grid.add_to_grid(candidate)

    sys.stderr.write("Selected job %d from the grid.\n" % (job_id))

    # Convert this back into an interpretable job and add metadata.
    job = Job()
    job.id = job_id
    job.expt_dir = expt_dir
    job.name = expt.name
    job.language = expt.language
    job.status = 'submitted'
    job.submit_t = int(time.time())
    job.param.extend(expt_grid.get_params(job_id))

    # Make sure we have a job subdirectory.
    job_subdir = os.path.join(expt_dir, 'jobs')
    if not os.path.exists(job_subdir):
        os.mkdir(job_subdir)

    # Name this job file.
    job_file = os.path.join(job_subdir, '%08d.pb' % (job_id))

    # Store the job file.
    save_job(job_file, job)

    # Make sure there is a directory for output.
    output_subdir = os.path.join(expt_dir, 'output')
    if not os.path.exists(output_subdir):
        os.mkdir(output_subdir)
    output_file = os.path.join(output_subdir, '%08d.out' % (job_id))

    process = job_submit("%s-%08d" % (expt_name, job_id), output_file,
                         job_file, work_dir)
    process.poll()
    if process.returncode is not None and process.returncode < 0:
        sys.stderr.write("Failed to submit job or job crashed "
                         "with return code %d !\n" % process.returncode)
        sys.stderr.write("Deleting job file.\n")
        os.unlink(job_file)
        return
    else:
        sys.stderr.write("Submitted job as process: %d\n" % process.pid)

    # Now, update the experiment status to submitted.
    expt_grid.set_submitted(job_id, process.pid)

    return