dry_run = True else: dry_run = False cartesian_product = [] for data_type, vals in runs_dic.items(): task_format = vals['task_format'] for split in vals['splits']: input_file = vals['input_file'] input_file = input_file.format(split) for encoding_format in vals['encode_format']: for task in vals['tasks']: for control in ['false', 'true']: if control == 'true': out_dir = vals['out_dir'].format( encoding_format, task + '_control', split) else: out_dir = vals['out_dir'].format( encoding_format, task, split) cartesian_product.append([ input_file, f'models/lm/{task}/{encoding_format}/', out_dir, task_format, encoding_format, control ]) parallelize(nodes, cartesian_product, 'amnesic_probing/runs/encode/run_layer_encode.sh', on_gpu=True, dry_run=dry_run)
if arguments['--dry_run']: dry_run = True else: dry_run = False cartesian_product = [] for data_type, vals in runs_dic.items(): base_dir = vals['base_dir'] vecs = base_dir + '/' + vals['vecs'] text = base_dir + '/' + vals['text'] task_type = vals['task_type'] for masking in ['normal', 'masked']: base_dir = vals['base_dir'].format(masking) vecs = base_dir + '/' + vals['vecs'] text = base_dir + '/' + vals['text'] task_type = vals['task_type'] for label in vals['labels']: data_label = label if task_type != 'task': data_label = 'np_start' cartesian_product.append([ vecs, f'{base_dir}/{data_label}.pickle', text, f'models/lm/{label}/{masking}/layer:last/', task_type ]) parallelize(nodes, cartesian_product, 'amnesic_probing/runs/evaluate/run_eval_per_dim.sh', on_gpu=True, dry_run=dry_run)
arguments = docopt(__doc__) if arguments['--dry_run']: dry_run = True else: dry_run = False cartesian_product = [] for data_type, vals in runs_dic.items(): for masking in ['normal', 'masked']: layer_dir = vals['layers_dir'].format(masking) for task in vals['labels']: base_dir = vals['base_dir'].format(masking, task) text_file = f'{base_dir}/tokens.pickle' task_type = vals['task_type'] # using the task labels in the regular case, or random labels from the same data # when the task is generated on the fly (e.g. word_len) data_label = task if task_type != 'task': data_label = 'np_start' cartesian_product.append([layer_dir, base_dir, f'{base_dir}/{data_label}.pickle', text_file, task_type]) parallelize(nodes, cartesian_product, 'amnesic_probing/runs/evaluate/run_layer_wise_deprobe.sh', on_gpu=False, dry_run=dry_run)
base_dir = vals['base_dir'].format(masking) vecs = base_dir + '/' + vals['vecs'] for task in vals['task']: for label in vals['labels']: output_dir = 'models/lm/{0}/{1}/layer:{2}/' if task == 'task': task_name = label else: task_name = task # running over all the model layers if 'layers' in vals: for layer in vals['layers']: cartesian_product.append([ vecs.format(layer), f'{base_dir}/{label}.pickle', output_dir.format(task_name, masking, layer), task, balanced ], ) else: cartesian_product.append([ vecs, f'{base_dir}/{label}.pickle', output_dir.format(task_name, masking, 'last'), task, balanced ]) parallelize(nodes, cartesian_product, 'amnesic_probing/runs/core/run_deprobe.sh', on_gpu=False, dry_run=dry_run)
'dep_p', 'dep_poss', 'dep_adp', 'dep_amod', 'dep_nsubj', 'dep_dep', 'dep_dobj', 'dep_cc', 'dep_conj', 'dep_advmod', 'dep_ROOT', 'dep_ccomp', 'dep_aux', 'dep_xcomp', 'dep_neg' ], }, } if __name__ == '__main__': arguments = docopt(__doc__) if arguments['--dry_run']: dry_run = True else: dry_run = False cartesian_product = [] for data_type, vals in runs_dic.items(): for label in vals['labels']: for masking in ['normal', 'masked']: train_dir = vals['train_dir'].format(masking) cartesian_product.append([ train_dir, f'models/lm/{label}/{masking}/layer:last/', f'models/lm/{label}/{masking}/layer:last/P.npy' ]) parallelize(nodes, cartesian_product, 'amnesic_probing/runs/core/run_ft_reg.sh', on_gpu=True, dry_run=dry_run)