def list_jobs(table_name): jobs = job_scheduler.load_jobs(table_name) waiting_jobs = [] running_jobs = [] completed_jobs = [] broken_jobs = [] for job in jobs: if job_scheduler.is_pending(job): waiting_jobs.append(job) elif job_scheduler.is_running(job): running_jobs.append(job) elif job_scheduler.is_completed(job): completed_jobs.append(job) elif job_scheduler.is_broken(job): broken_jobs.append(job) print (" # of jobs in total = % 3d" % len(jobs)) print (" waiting = % 3d {%s}" % (len(waiting_jobs), _format_ids(waiting_jobs))) print (" running = % 3d {%s}" % (len(running_jobs), _format_ids(running_jobs))) print (" completed = % 3d {%s}" % (len(completed_jobs), _format_ids(completed_jobs))) print (" broken = % 3d {%s}" % (len(broken_jobs), _format_ids(broken_jobs)))
def plot_experiment(experiment_name): experiments = experiment_scheduler.load_experiments( cluster=None, filter_eq_dct=dict(name=experiment_name)) if len(experiments) == 0: print "No experiments in database %s" % get_db_string("experiments") experiment = experiments[0] jobs = job_scheduler.load_jobs(experiment['table']) return plot(jobs)
def run(function_path, experiment_config_file, job_config_file, force): job_dir = os.path.dirname(job_config_file) experiment_config = load_json(os.path.join(experiment_config_file)) state = load_json(job_config_file) with ChangeDir(job_dir): experiment = experiment_scheduler.save_experiment( name=experiment_config["experiment_name"], table_name=experiment_config["experiment_name"] + "_jobs", clusters=experiment_config["clusters"], duree="", mem="", env="", gpu="") table_name = experiment["table"] channel = Channel() state["jobman"] = dict(status=channel.START) state_to_hash = copy.copy(state) jobs = job_scheduler.load_jobs(table_name, hash_of=state_to_hash) state_to_hash["jobman"] = dict(status=channel.RUNNING) jobs += job_scheduler.load_jobs(table_name, hash_of=state_to_hash) if len(jobs) > 0: logger.warning("Job already registered, loading from database") state = jobs[0] if state["jobman"]["status"] != channel.START: if not force: raise RuntimeError("Job (%d) is not available" % state["id"]) logging.warning("Job (%d) is not available. Forcing it to run" % state["id"]) state["jobman"]["status"] = channel.RUNNING state = job_scheduler.save_job(table_name, state) resolve(function_path_re.sub('', function_path)).jobman_main(state, channel) job_scheduler.save_job(experiment["table"], state)
def remove_experiment(name): experiments = experiment_scheduler.load_experiments( cluster=None, filter_eq_dct=dict(name=name)) if len(experiments) == 0: print "No experiments in database %s" % get_db_string("experiments") return experiment = experiments[0] table_name = experiment["table"] if query_yes_no("Do you really want to delete experiment %s?" % bold(name)): print "Deleting %s..." % name experiment_scheduler.delete_experiments([experiment]) if query_yes_no("Do you want to delete corresponding jobs?"): jobs = job_scheduler.load_jobs(table_name) print "Deleting %d jobs..." % len(jobs) job_scheduler.delete_jobs(table_name, jobs)