def launch_grid(grid): grid = copy.deepcopy(grid) grid['model'] = 'InnerProduct' grid['lr_decay'] = 'linear' grid['emb_size'] = 9 for cfg in jobs.param_grid(grid): cfg['train_dir'] = jobs.make_train_dir(cfg, keys_for_dir_name) jobs.submit(commands, cfg, get_slurm_script_gpu) grid = copy.deepcopy(grid) grid['model'] = ['Deep', 'ResidualInnerProduct'] grid['emb_size'] = 8 grid['eval_batch_size'] = 1024 grid['lr_decay'] = 'plateau' grid['hidden_size'] = 128 grid['learning_rate'] = 0.01 for cfg in jobs.param_grid(grid): cfg['train_dir'] = jobs.make_train_dir(cfg, keys_for_dir_name) jobs.submit(commands, cfg, get_slurm_script_gpu)
base_grid.map_items = False base_grid.eval_recall_max = 100 base_grid.test_recall_max = 1000 base_grid.tokenize = False base_grid.target_publication = 0 base_grid.batch_size = 32 base_grid.learning_rate = [2e-5, 3e-5, 4e-5] base_grid.use_gpu = True base_grid.frequency = 200 base_grid.eval_batch_size = 500 base_grid.dict_dir = pathlib.Path( "/scratch/network/altosaar/dat/longform-data/main/dictionaries") base_grid.tokenizer_file = ( "/scratch/network/altosaar/dat/longform-data/main/bert-base-uncased.txt" ) base_grid.model_path = "/scratch/network/altosaar/dat/longform-data/BERT/model" base_grid.index_file_path = ( "/scratch/network/altosaar/dat/longform-data/BERT/eval_indices_list.txt" ) # 100 warmup steps grid = copy.deepcopy(base_grid) grid.warmup_steps = 1000 grid.training_steps = [5000, 50000, 100000] keys_for_dir_name = jobs.get_keys_for_dir_name(grid) keys_for_dir_name.insert(0, "warmup_steps") for cfg in jobs.param_grid(grid): cfg["output_dir"] = jobs.make_output_dir(log_dir, experiment_name, cfg, keys_for_dir_name) jobs.submit(commands, cfg, get_slurm_script_gpu)