예제 #1
0
def main():
    (options, args) = parse_args()

    if options.job:
        job_runner(load_job(options.job))
        exit(0)

    experiment_config = args[0]
    expt_dir = os.path.dirname(os.path.realpath(experiment_config))
    log("Using experiment configuration: " + experiment_config)
    log("experiment dir: " + expt_dir)

    if not os.path.exists(expt_dir):
        log("Cannot find experiment directory '%s'. " "Aborting." % (expt_dir))
        sys.exit(-1)

    check_experiment_dirs(expt_dir)

    # Load up the chooser module.
    module = importlib.import_module('spearmint.chooser.' +
                                     options.chooser_module)
    chooser = module.init(expt_dir, options.chooser_args)

    if options.web_status:
        web_proc = start_web_view(options, experiment_config, chooser)

    # Load up the job execution driver.
    module = importlib.import_module('spearmint.driver.' + options.driver)
    driver = module.init()

    # Loop until we run out of jobs.
    while attempt_dispatch(experiment_config, expt_dir, chooser, driver,
                           options):
        # This is polling frequency. A higher frequency means that the algorithm
        # picks up results more quickly after they finish, but also significantly
        # increases overhead.
        time.sleep(options.polling_time)
예제 #2
0
def main():
    (options, args) = parse_args()

    if options.job:
        job_runner(load_job(options.job))
        exit(0)

    experiment_config = args[0]
    expt_dir  = os.path.dirname(os.path.realpath(experiment_config))
    log("Using experiment configuration: " + experiment_config)
    log("experiment dir: " + expt_dir)

    if not os.path.exists(expt_dir):
        log("Cannot find experiment directory '%s'. "
            "Aborting." % (expt_dir))
        sys.exit(-1)

    check_experiment_dirs(expt_dir)

    # Load up the chooser module.
    module  = importlib.import_module('spearmint.chooser.' + options.chooser_module)
    chooser = module.init(expt_dir, options.chooser_args)

    if options.web_status:
        web_proc = start_web_view(options, experiment_config, chooser)

    # Load up the job execution driver.
    module = importlib.import_module('spearmint.driver.' + options.driver)
    driver = module.init()

    # Loop until we run out of jobs.
    while attempt_dispatch(experiment_config, expt_dir, chooser, driver, options):
        # This is polling frequency. A higher frequency means that the algorithm
        # picks up results more quickly after they finish, but also significantly
        # increases overhead.
        time.sleep(options.polling_time)
예제 #3
0
def main(options=None, experiment_config=None, expt_dir=None):
    #If nothing given, get arguments from sys.argv. Otherwise they are provided
    #by external caller.
    if options == None:
        (options, args) = parse_args()

        if options.job:
            job_runner(load_job(options.job))
            return 0

        experiment_config = args[0]
        expt_dir  = os.path.dirname(os.path.realpath(experiment_config))
    log("Using experiment configuration: " + str(experiment_config))
    log("experiment dir: " + expt_dir)

    if not os.path.exists(expt_dir):
        log("Cannot find experiment directory '%s'. "
            "Aborting." % (expt_dir))
        sys.exit(-1)

    check_experiment_dirs(expt_dir)

    # Load up the chooser module.
    module = load_module('chooser', options.chooser_module)
    chooser = module.init(expt_dir, options.chooser_args)

    if options.web_status:
        web_proc = start_web_view(options, experiment_config, chooser)

    module = load_module('driver', options.driver)
    driver = module.init(run_func=options.run_func)

    if options.jobs_per_node != -1:
        module = load_module('driver', options.distant_driver)
        distant_driver = module.init(**options.distant_driver_params)
    else:
        distant_driver = None

    #Jobs per node is used for hybrid jobs.
    if options.jobs_per_node != -1:
        start_time = time.time()
        total_time = 0
        last_exp_time = 0
        loops = 0
        while True:
            if options.nb_dist_nodes != 1 or (total_time + 1.5 * last_exp_time > 20*60*60):
                #Launch new distant job without selecting any experiment, they
                #will be selected on the distant node.
                log("Launching on new distant nodes.")
                for i in range(options.nb_dist_nodes): #only the first execution should
                                                      #launch more than one distributed job.
                    out = dispatch_empty_job(expt_dir, distant_driver, options)
                    if out == 0:
                        raise Exception("Error trying to dispatch empty job with distant driver.")
                return
            else:
                pids = []
                for i in range(options.jobs_per_node):
                    out, pid = attempt_dispatch(experiment_config, expt_dir, chooser, driver, options)
                    if out == 0:
                        break #stop the local dispatch loop.
                    pids.append(pid)
                if len(pids) == 0:
                    #we are done, no more processes launched.
                    break
                #Wait for all local jobs.
                log("Waiting for local processes.")
                for pid in pids:
                    try:
                        os.waitpid(pid, 0)
                    except:
                        pass
                loops += 1
                last_exp_time = time.time() - total_time - start_time
                total_time = time.time() - start_time
                log("All processes done executing %i times (this batch took %f mins, total time: %f\
 mins)." % (loops, last_exp_time / 60, total_time / 60))
    else:
        #This process won't end until we run out of jobs or time.
        while True:
            out, _ = attempt_dispatch(experiment_config, expt_dir, chooser, driver, options)

            if out == 0:
                break

            # This is polling frequency. A higher frequency means that the algorithm
            # picks up results more quickly after they finish, but also significantly
            # increases overhead.
            time.sleep(options.polling_time)