def start_evaluations(wait_starts, wait_rerun=120, eval_freq=20, simulate=False): while True: lock = FSLock(os.path.join(TASK_DIR, EVAL_LOCK)) lock_state = lock.acquire(no_raise=True) if lock_state == 0: todo = search_for_experiments_to_evaluate(EVAL_EXP_DIR, interval_h=eval_freq) first = True for exp in todo: if not first: for t in tqdm.trange(wait_starts, 0, -1, leave=False, desc="Waiting"): time.sleep(1) else: first = False free_gpu = gpu.get_free_gpu() if free_gpu < 0: # no free gpus, stop break task = dict(CONFIG=exp, GPU=free_gpu) print("Starting eval %s in %s as %s" % (task, EVAL_EXP_DIR, EVAL_COMMAND)) if not simulate: create_window_and_run(EVAL_COMMAND, "Eval-" + task["CONFIG"], task, EVAL_EXP_DIR) with open("StartedEvals.txt", 'a') as f: if simulate: f.write(dict2str(task) + " - SIMULATED \n") else: f.write(dict2str(task) + "\n") lock.release() else: pass #print("Skipping because locked") gc.collect() for t in tqdm.trange(wait_rerun, 0, -1, ncols=0, leave=False, desc="Waiting for Rerun"): time.sleep(1)
def start_chunky_jobs(script, wait_rerun=30): exec_dir, script = os.path.split(os.path.abspath(script)) while True: free_gpu = gpu.get_free_gpu() if free_gpu < 0: # no free gpus, stop break task = dict(GPU=free_gpu) print("Starting Chunk Job on gpu%i"%free_gpu) cmd = "python %s run --gpu=GPU"%script create_window_and_run(cmd, "ChunkJob", replacements=task, exec_dir=exec_dir) for t in tqdm.trange(wait_rerun, 0, -1, ncols=0, leave=False, desc="Waiting for Rerun"): time.sleep(1)
def start_tasks(self, wait, simulate=False): first = True while len(self.my_tasks): if not first: for t in tqdm.trange(wait, 0, -1, leave=False, desc="Waiting"): time.sleep(1) else: first = False free_gpu = gpu.get_free_gpu() if free_gpu < 0: # no free gpus, stop break task = self.my_tasks.pop() assert 'GPU' not in task task['GPU'] = str(free_gpu) task['HOST'] = str(self.host_num) print("Starting task %s in %s as %s" % (task, self.exec_dir, self.command)) if not simulate: create_window_and_run(self.command, task["CONFIG"], task, self.exec_dir) task.pop('GPU') # don't write this back with open("StartedTasks.txt", 'a') as f: if simulate: f.write(dict2str(task) + " - SIMULATED \n") else: f.write(dict2str(task) + "\n") # Write tasks which are for other hosts and remaining back to file if not simulate: with open(self.path, 'w') as f: f.write("command = %s\n" % self.command) f.write("exec_dir = %s\n" % self.exec_dir) for task in self.skip_tasks: f.write(dict2str(task) + "\n") for task in self.my_tasks: f.write(dict2str(task) + "\n")