Beispiel #1
0
def attempt_dispatch(expt_config, expt_dir, chooser, driver, options):
    '''
    Dispatches a job containing `num_jobs` jobs, if the number of jobs is greater
    than 1 they will all have the same proc_id.
    '''
    log("\n" + "-" * 40)
    if isinstance(expt_config, str):
        expt = load_experiment(expt_config)
    else:
        expt = expt_config


    # Build the experiment grid.
    expt_grid = ExperimentGrid(expt_dir,
                               expt.variable,
                               options.grid_size,
                               options.grid_seed)

    jobs = []
    num_jobs = 1
    for n in range(num_jobs):
        # Print out the current best function value.
        best_val, best_job = expt_grid.get_best()
        if best_job >= 0:
            log("Current best: %f (job %d)" % (best_val, best_job))
        else:
            log("Current best: No results returned yet.")

        # Gets you everything - NaN for unknown values & durations.
        grid, values, durations = expt_grid.get_grid()

        # Returns lists of indices.
        candidates = expt_grid.get_candidates()
        pending    = expt_grid.get_pending()
        complete   = expt_grid.get_complete()

        n_candidates = candidates.shape[0]
        n_pending    = pending.shape[0]
        n_complete   = complete.shape[0]
        log("%d candidates   %d pending   %d complete" %
            (n_candidates, n_pending, n_complete))

        # Verify that pending jobs are actually running, and add them back to the
        # revisit this.
        # candidate set if they have crashed or gotten lost.
        #for job_id in pending:
        #    proc_id = expt_grid.get_proc_id(job_id)
        #    if proc_id != -1 and not driver.is_proc_alive(job_id, proc_id):
        #        log("Set job %d back to candidate status." % (job_id))
        #        expt_grid.set_candidate(job_id)

        # Track the time series of optimization.
        write_trace(expt_dir, best_val, best_job, n_candidates, n_pending, n_complete)

        # Print out the best job results
        write_best_job(expt_dir, best_val, best_job, expt_grid)

        if n_complete >= options.max_finished_jobs:
            log("Maximum number of finished jobs (%d) reached."
                "Exiting" % options.max_finished_jobs)
            return 0, None

        if n_candidates == 0:
            log("There are no candidates left. Exiting.")
            return 0, None

        #Don't launch unless we can launch the complete bundle.
        if n_pending >= options.max_concurrent or (n == 0 and n_pending + num_jobs > options.max_concurrent):
            log("Maximum number of jobs (%d) pending." % (options.max_concurrent))
            return 1, None
        else:
            # Ask the chooser to pick the next candidate
            log("Choosing next candidate... ")
            time_cand_start = time.time()
            job_id = chooser.next(grid, values, durations, candidates, pending, complete)
            time_cand = time.time() - time_cand_start
            log("Chose a candidate (took %i secs)." % (time_cand))

            # If the job_id is a tuple, then the chooser picked a new job.
            # We have to add this to our grid
            if isinstance(job_id, tuple):
                (job_id, candidate) = job_id
                job_id = expt_grid.add_to_grid(candidate)

            log("selected job %d from the grid." % (job_id))

            # Convert this back into an interpretable job and add metadata.
            job = Job()
            job.id        = job_id
            job.expt_dir  = expt_dir
            job.name      = expt.name
            job.language  = expt.language
            job.status    = 'submitted'
            job.submit_t  = int(time.time())
            job.param.extend(expt_grid.get_params(job_id))
            if options.nb_mini_batches > 0:
                batch_i = expt_grid.mini_batch_i
                expt_grid.mini_batch_i = (batch_i + 1) % options.nb_mini_batches
                batch_param = Parameter()
                batch_param.name = 'batch_i'
                batch_param.int_val.append(batch_i)
                job.param.extend([batch_param])

            save_job(job)
            if num_jobs == 1:
                pid = driver.submit_job(job)
                if pid != None:
                    log("submitted - pid = %s" % (pid))
                    expt_grid.set_submitted(job_id, pid)
                else:
                    log("Failed to submit job!")
                    log("Deleting job file.")
                    os.unlink(job_file_for(job))
            else:
                jobs.append(job)
                #Temporary, we don't have a proc id yet.
                expt_grid.set_submitted(job_id, -1)

    #Delayed submit when there is more than one job bundled.
    if num_jobs > 1:
        pid = driver.submit_job(jobs)
        if pid != None:
            log("Submitted %i jobs with pid = %s" % (num_jobs, pid))
            for j in jobs:
                expt_grid.set_submitted(j.id, pid)
        else:
            log("Failed to submit job!")
            log("Deleting job files.")
            for j in jobs:
                os.unlink(job_file_for(j))

    return 2, pid
Beispiel #2
0
def attempt_dispatch(expt_config, expt_dir, chooser, driver, options):
    log("\n" + "-" * 40)
    expt = load_experiment(expt_config)

    # Build the experiment grid.
    expt_grid = ExperimentGrid(expt_dir,
                               expt.variable,
                               options.grid_size,
                               options.grid_seed)

    # Print out the current best function value.
    best_val, best_job = expt_grid.get_best()
    if best_job >= 0:
        log("Current best: %f (job %d)" % (best_val, best_job))
    else:
        log("Current best: No results returned yet.")

    # Gets you everything - NaN for unknown values & durations.
    grid, values, durations = expt_grid.get_grid()

    # Returns lists of indices.
    candidates = expt_grid.get_candidates()
    pending    = expt_grid.get_pending()
    complete   = expt_grid.get_complete()

    n_candidates = candidates.shape[0]
    n_pending    = pending.shape[0]
    n_complete   = complete.shape[0]
    log("%d candidates   %d pending   %d complete" %
        (n_candidates, n_pending, n_complete))

    # Verify that pending jobs are actually running, and add them back to the
    # candidate set if they have crashed or gotten lost.
    for job_id in pending:
        proc_id = expt_grid.get_proc_id(job_id)
        if not driver.is_proc_alive(job_id, proc_id):
            log("Set job %d back to pending status." % (job_id))
            expt_grid.set_candidate(job_id)

    # Track the time series of optimization.
    write_trace(expt_dir, best_val, best_job, n_candidates, n_pending, n_complete)

    # Print out the best job results
    write_best_job(expt_dir, best_val, best_job, expt_grid)

    if n_complete >= options.max_finished_jobs:
        log("Maximum number of finished jobs (%d) reached."
                         "Exiting" % options.max_finished_jobs)
        return False

    if n_candidates == 0:
        log("There are no candidates left.  Exiting.")
        return False

    if n_pending >= options.max_concurrent:
        log("Maximum number of jobs (%d) pending." % (options.max_concurrent))
        return True

    else:

        # start a bunch of candidate jobs if possible
        #to_start = min(options.max_concurrent - n_pending, n_candidates)
        #log("Trying to start %d jobs" % (to_start))
        #for i in xrange(to_start):

        # Ask the chooser to pick the next candidate
        log("Choosing next candidate... ")
        job_id = chooser.next(grid, values, durations, candidates, pending, complete)

        # If the job_id is a tuple, then the chooser picked a new job.
        # We have to add this to our grid
        if isinstance(job_id, tuple):
            (job_id, candidate) = job_id
            job_id = expt_grid.add_to_grid(candidate)

        log("selected job %d from the grid." % (job_id))

        # Convert this back into an interpretable job and add metadata.
        job = Job()
        job.id        = job_id
        job.expt_dir  = expt_dir
        job.name      = expt.name
        job.language  = expt.language
        job.status    = 'submitted'
        job.submit_t  = int(time.time())
        job.param.extend(expt_grid.get_params(job_id))

        save_job(job)
        pid = driver.submit_job(job)
        if pid != None:
            log("submitted - pid = %d" % (pid))
            expt_grid.set_submitted(job_id, pid)
        else:
            log("Failed to submit job!")
            log("Deleting job file.")
            os.unlink(job_file_for(job))

    return True
Beispiel #3
0
def attempt_dispatch(expt_config, expt_dir, chooser, driver, options):
    log("\n" + "-" * 40)
    expt = load_experiment(expt_config)

    # Build the experiment grid.
    expt_grid = ExperimentGrid(expt_dir, expt.variable, options.grid_size,
                               options.grid_seed)

    # Print out the current best function value.
    best_val, best_job = expt_grid.get_best()
    if best_job >= 0:
        log("Current best: %f (job %d)" % (best_val, best_job))
    else:
        log("Current best: No results returned yet.")

    # Gets you everything - NaN for unknown values & durations.
    grid, values, durations = expt_grid.get_grid()

    # Returns lists of indices.
    candidates = expt_grid.get_candidates()
    pending = expt_grid.get_pending()
    complete = expt_grid.get_complete()

    n_candidates = candidates.shape[0]
    n_pending = pending.shape[0]
    n_complete = complete.shape[0]
    log("%d candidates   %d pending   %d complete" %
        (n_candidates, n_pending, n_complete))

    # Verify that pending jobs are actually running, and add them back to the
    # candidate set if they have crashed or gotten lost.
    for job_id in pending:
        proc_id = expt_grid.get_proc_id(job_id)
        if not driver.is_proc_alive(job_id, proc_id):
            log("Set job %d back to pending status." % (job_id))
            expt_grid.set_candidate(job_id)

    # Track the time series of optimization.
    write_trace(expt_dir, best_val, best_job, n_candidates, n_pending,
                n_complete)

    # Print out the best job results
    write_best_job(expt_dir, best_val, best_job, expt_grid)

    if n_complete >= options.max_finished_jobs:
        log("Maximum number of finished jobs (%d) reached."
            "Exiting" % options.max_finished_jobs)
        return False

    if n_candidates == 0:
        log("There are no candidates left.  Exiting.")
        return False

    if n_pending >= options.max_concurrent:
        log("Maximum number of jobs (%d) pending." % (options.max_concurrent))
        return True

    else:

        # start a bunch of candidate jobs if possible
        #to_start = min(options.max_concurrent - n_pending, n_candidates)
        #log("Trying to start %d jobs" % (to_start))
        #for i in xrange(to_start):

        # Ask the chooser to pick the next candidate
        log("Choosing next candidate... ")
        job_id = chooser.next(grid, values, durations, candidates, pending,
                              complete)

        # If the job_id is a tuple, then the chooser picked a new job.
        # We have to add this to our grid
        if isinstance(job_id, tuple):
            (job_id, candidate) = job_id
            job_id = expt_grid.add_to_grid(candidate)

        log("selected job %d from the grid." % (job_id))

        # Convert this back into an interpretable job and add metadata.
        job = Job()
        job.id = job_id
        job.expt_dir = expt_dir
        job.name = expt.name
        job.language = expt.language
        job.status = 'submitted'
        job.submit_t = int(time.time())
        job.param.extend(expt_grid.get_params(job_id))

        save_job(job)
        pid = driver.submit_job(job)
        if pid != None:
            log("submitted - pid = %d" % (pid))
            expt_grid.set_submitted(job_id, pid)
        else:
            log("Failed to submit job!")
            log("Deleting job file.")
            os.unlink(job_file_for(job))

    return True
Beispiel #4
0
def explore_space_of_candidates(experiment, objective_function,
        working_directory, chooser,
        grid_size=1000,
        grid_seed=1,
        max_finished_jobs=100):
   
    # Build the experiment grid.
    expt_grid = ExperimentGrid(working_directory,
                               experiment.variables, grid_size, grid_seed)

    next_jobid = 0
    run_python_job = PythonRunner()

    while next_jobid < max_finished_jobs:
        best_val, best_job = expt_grid.get_best()
 
        # Gets you everything - NaN for unknown values & durations.
        grid, values, durations = expt_grid.get_grid()


        # Returns lists of indices.
        candidates = expt_grid.get_candidates()
        pending    = expt_grid.get_pending()
        complete   = expt_grid.get_complete()

        n_candidates = candidates.shape[0]
        n_pending    = pending.shape[0]
        n_complete   = complete.shape[0]
        logging.info("%d candidates   %d pending   %d complete", n_candidates, 
                n_pending, n_complete)

        if n_candidates == 0:
            logging.info("There are no candidates left.  Exiting.")
            return

        # Ask the chooser to pick the next candidate
        logging.info("Choosing next candidate... ")
        job_id = chooser.next(grid, values, durations, candidates, pending, complete)

        yield best_val, best_job, expt_grid.get_params(best_job), job_id

        # If the job_id is a tuple, then the chooser picked a new job.
        # We have to add this to our grid
        if isinstance(job_id, tuple):
            (job_id, candidate) = job_id
            job_id = expt_grid.add_to_grid(candidate)

        logging.info("selected job %d from the grid", job_id)

        expt_grid.set_submitted(job_id, next_jobid)
        expt_grid.set_running(job_id)

        start_t = time.time()
        result, memoized = run_python_job(job_id, objective_function,
                expt_grid.get_params(job_id), working_directory)
        duration = time.time() - start_t
        expt_grid.set_complete(job_id, result, duration)

        next_jobid += 1

        if memoized:
            max_finished_jobs += 1