Example #1
0
def start_evaluations(wait_starts,
                      wait_rerun=120,
                      eval_freq=20,
                      simulate=False):
    while True:
        lock = FSLock(os.path.join(TASK_DIR, EVAL_LOCK))
        lock_state = lock.acquire(no_raise=True)
        if lock_state == 0:
            todo = search_for_experiments_to_evaluate(EVAL_EXP_DIR,
                                                      interval_h=eval_freq)
            first = True
            for exp in todo:
                if not first:
                    for t in tqdm.trange(wait_starts,
                                         0,
                                         -1,
                                         leave=False,
                                         desc="Waiting"):
                        time.sleep(1)
                else:
                    first = False

                free_gpu = gpu.get_free_gpu()
                if free_gpu < 0:  # no free gpus, stop
                    break

                task = dict(CONFIG=exp, GPU=free_gpu)
                print("Starting eval %s in %s as %s" %
                      (task, EVAL_EXP_DIR, EVAL_COMMAND))
                if not simulate:
                    create_window_and_run(EVAL_COMMAND,
                                          "Eval-" + task["CONFIG"], task,
                                          EVAL_EXP_DIR)

                with open("StartedEvals.txt", 'a') as f:
                    if simulate:
                        f.write(dict2str(task) + " - SIMULATED \n")
                    else:
                        f.write(dict2str(task) + "\n")

            lock.release()

        else:
            pass
            #print("Skipping because locked")

        gc.collect()

        for t in tqdm.trange(wait_rerun,
                             0,
                             -1,
                             ncols=0,
                             leave=False,
                             desc="Waiting for Rerun"):
            time.sleep(1)
Example #2
0
def start_chunky_jobs(script, wait_rerun=30):
    exec_dir, script = os.path.split(os.path.abspath(script))
    while True:
        free_gpu = gpu.get_free_gpu()
        if free_gpu < 0:  # no free gpus, stop
            break
        task = dict(GPU=free_gpu)
        print("Starting Chunk Job on gpu%i"%free_gpu)
        cmd = "python %s run --gpu=GPU"%script
        create_window_and_run(cmd, "ChunkJob",
                            replacements=task, exec_dir=exec_dir)

        for t in tqdm.trange(wait_rerun, 0, -1, ncols=0, leave=False,
                             desc="Waiting for Rerun"):
            time.sleep(1)
Example #3
0
    def start_tasks(self, wait, simulate=False):
        first = True
        while len(self.my_tasks):
            if not first:
                for t in tqdm.trange(wait, 0, -1, leave=False, desc="Waiting"):
                    time.sleep(1)
            else:
                first = False

            free_gpu = gpu.get_free_gpu()
            if free_gpu < 0:  # no free gpus, stop
                break

            task = self.my_tasks.pop()
            assert 'GPU' not in task
            task['GPU'] = str(free_gpu)
            task['HOST'] = str(self.host_num)
            print("Starting task %s in %s as %s" %
                  (task, self.exec_dir, self.command))

            if not simulate:
                create_window_and_run(self.command, task["CONFIG"], task,
                                      self.exec_dir)

            task.pop('GPU')  # don't write this back
            with open("StartedTasks.txt", 'a') as f:
                if simulate:
                    f.write(dict2str(task) + " - SIMULATED \n")
                else:
                    f.write(dict2str(task) + "\n")

        # Write tasks which are for other hosts and remaining back to file
        if not simulate:
            with open(self.path, 'w') as f:
                f.write("command = %s\n" % self.command)
                f.write("exec_dir = %s\n" % self.exec_dir)
                for task in self.skip_tasks:
                    f.write(dict2str(task) + "\n")
                for task in self.my_tasks:
                    f.write(dict2str(task) + "\n")