def run_job_cluster(args_file, seed, nb_seeds, job_class, timestamp, gridargs=None): args, exp_name, _ = read_args(args_file, gridargs) # args = append_args(args, extra_args) # log dir creationg args = create_log_dir(args, exp_name, seed) # adding the seed to arguments and exp_name if '--seed=' not in args: args += ' --seed=%d' % seed exp_name += '-s%d' % seed else: if '--seed=' in args and nb_seeds > 1: raise ValueError( ('gridsearch over seeds is launched while a seed is already' + 'specified in the argument file')) if '--timestamp=' not in args: args += ' --timestamp={}'.format(timestamp) # running the job manage([job_class([exp_name, args])], only_initialization=False, sleep_duration=3) print('...\n...\n...')
def double_run(argv): # Defining runs root_run = JobExample(argv) root_run.job_name = 'root' child_run = JobExample(argv) child_run.job_name = 'child' runs = [root_run, child_run] # Defining dependencies child_run.add_previous_job(root_run) # Running manage(runs)
def send_job(job, seed_path, timestamp, config, args_steps): ''' Send a job to the cluster ''' train_args = '--logdir={} --timestamp={} --config={}'.format( seed_path, timestamp, config) if args_steps is not None: train_args += ' --steps={}'.format(args_steps) exp_path, seed_dir = os.path.split(os.path.normpath(seed_path)) exp_name = os.path.basename(exp_path) exp_name_seed = '{}-s{}'.format(exp_name, seed_dir.replace('seed', '')) manage([job([exp_name_seed, train_args])], only_initialization=False, sleep_duration=1)
def fork_run(argv): # Defining runs root_run = JobExample(argv) root_run.job_name = 'root' child1_run = JobExample(argv) child1_run.job_name = 'child1' child2_run = JobExample(argv) child2_run.job_name = 'child2' runs = [root_run, child1_run, child2_run] # Defining dependencies child1_run.add_previous_job(root_run) child2_run.add_previous_job(root_run) # Running manage(runs, only_initialization=False)
from job.job_machine import JobCPU import os from settings import HOME from job.job_manager import manage EXAMPLE_PATH = os.path.join(HOME, 'src/tools/run/example') class JobExample(JobCPU): def __init__(self, run_argv): JobCPU.__init__(self, run_argv) self.global_path_project = EXAMPLE_PATH self.local_path_exe = 'path_exe_example.py' self.job_name = 'new_example' @property def oarsub_l_options(self): return JobCPU(self).oarsub_l_options + ['nodes=1/core=1,walltime=1:0:0'] if __name__ == '__main__': manage([JobExample([])], only_initialization=False)
from job.job_machine import JobCPU import os from job.job_manager import manage from job.skeleton_sequences.job_deep_learning import SKELETON_SEQUENCES_PATH WEBPAGE_DIR = 'skeleton_webpage/dataset_webpage' class RunDataset(JobCPU): def __init__(self, run_argv): JobCPU.__init__(self, run_argv) self.global_path_project = SKELETON_SEQUENCES_PATH self.local_path_exe = os.path.join(WEBPAGE_DIR, 'dataset_html.py') self.job_name = 'webpage_skeleton' self.interpreter = 'python3' @property def oarsub_options(self): return JobCPU(self).oarsub_options + ' -l "nodes=1/core=8,walltime=12:0:0"' if __name__ == '__main__': manage([RunDataset([])])
return args_to_add_list def get_gridsearch_jobs(args, exp_name, overwrite, args_to_add_list): jobs_list = [] for args_to_add in args_to_add_list: name_spec = args_to_add.replace(' ', '').replace('.', '-').replace( '--', '-').replace('=', '-').replace('_', '-') new_exp_name = exp_name + name_spec args_full = args + ' ' + args_to_add args_full = args_full.replace('--exp=' + exp_name, '--exp=' + new_exp_name) args_full, _ = create_temp_dir(args_full, new_exp_name, overwrite) args_full = create_outworlds_dir(args_full, new_exp_name) print('running {} with args: {}'.format(new_exp_name, args_full)) jobs_list.append(JobQprop([new_exp_name, args_full])) return jobs_list if __name__ == "__main__": if len(sys.argv) != 3: print( "Usage: python3 script.py <args_file_fixed> <args_file_gridsearch>" ) args, exp_name, overwrite = read_args(sys.argv[1]) args_to_add_list = parse_grid_args(sys.argv[2]) jobs_list = get_gridsearch_jobs(args, exp_name, overwrite, args_to_add_list) manage(jobs_list, only_initialization=False)
def parse_args_file(args_file): args, exp_name, overwrite = read_args(args_file) args, overwrite = create_temp_dir(args, exp_name, overwrite) args = create_outworlds_dir(args, exp_name) return args, exp_name class JobQprop(JobCPU): def __init__(self, run_argv): JobCPU.__init__(self, run_argv) self.global_path_project = SCRIPTS_PATH self.local_path_exe = 'qprop_mini.sh' self.job_name = run_argv[0] self.interpreter = '' @property def oarsub_l_options(self): return JobCPU(self).oarsub_l_options + [ 'nodes=1/core=8,walltime=72:0:0' ] if __name__ == '__main__': if len(sys.argv) != 2: print('Usage: python3 script.py <args_file>') args, exp_name = parse_args_file(sys.argv[1]) manage([JobQprop([exp_name, args])], only_initialization=False, sleep_duration=3)
from job.job_machine import JobCPU import os from job.skeleton_sequences.job_deep_learning import SKELETON_SEQUENCES_PATH from job.job_manager import manage DATASET_WRITER_DIR = 'tensorflow_datasets' class JobDataset(JobCPU): def __init__(self, run_argv): JobCPU.__init__(self, run_argv) self.global_path_project = SKELETON_SEQUENCES_PATH self.local_path_exe = os.path.join(DATASET_WRITER_DIR, 'dataset_generator.py') self.job_name = 'rotation_dataset' self.interpreter = 'python3' self.librairies_to_install = ['python3-scipy'] @property def oarsub_l_options(self): return JobCPU(self).oarsub_l_options + ['nodes=1/core=32,walltime=20:0:0'] if __name__ == '__main__': manage([JobDataset([])], only_initialization=False)
# argv2 += ['prediction_type=inattention', 'worst_prediction_nb=' + str(worst_prediction_nb)] # Model hyperparameters for rnn_units, rnn_layers, rnn_type in product([100], [3], ['lstm']): argv3 = argv2[:] argv3.extend([ 'rnn_units=' + str(rnn_units), 'rnn_layers=' + str(rnn_layers), 'rnn_type=' + rnn_type ]) """ END COPY AREA """ # Extend the list of runs # Train argv train_run_argv = argv3 + ['run_prefix=' + run_prefix] if restore: train_run_argv.append('restore_run_dir=' + restore_run_dir) train_run_argv.append('restore_checkpoint_filename=' + restore_checkpoint_filename) # Evaluation argv job_name = run_prefix + run_prefix_suffix(argv3) evaluation_run_argv = argv3 + ['run_prefix=' + job_name] jobs.extend( train_val_test_runs(train_run_argv, evaluation_run_argv, job_name, machine='gpu', only_evaluating=only_evaluating)) manage(jobs, only_initialization=only_initialization)