dry_run = True
    else:
        dry_run = False

    cartesian_product = []
    for data_type, vals in runs_dic.items():
        task_format = vals['task_format']
        for split in vals['splits']:
            input_file = vals['input_file']
            input_file = input_file.format(split)
            for encoding_format in vals['encode_format']:
                for task in vals['tasks']:
                    for control in ['false', 'true']:
                        if control == 'true':
                            out_dir = vals['out_dir'].format(
                                encoding_format, task + '_control', split)
                        else:
                            out_dir = vals['out_dir'].format(
                                encoding_format, task, split)

                        cartesian_product.append([
                            input_file, f'models/lm/{task}/{encoding_format}/',
                            out_dir, task_format, encoding_format, control
                        ])

    parallelize(nodes,
                cartesian_product,
                'amnesic_probing/runs/encode/run_layer_encode.sh',
                on_gpu=True,
                dry_run=dry_run)
Esempio n. 2
0
    if arguments['--dry_run']:
        dry_run = True
    else:
        dry_run = False

    cartesian_product = []
    for data_type, vals in runs_dic.items():
        base_dir = vals['base_dir']
        vecs = base_dir + '/' + vals['vecs']
        text = base_dir + '/' + vals['text']
        task_type = vals['task_type']
        for masking in ['normal', 'masked']:
            base_dir = vals['base_dir'].format(masking)
            vecs = base_dir + '/' + vals['vecs']
            text = base_dir + '/' + vals['text']
            task_type = vals['task_type']
            for label in vals['labels']:
                data_label = label
                if task_type != 'task':
                    data_label = 'np_start'
                cartesian_product.append([
                    vecs, f'{base_dir}/{data_label}.pickle', text,
                    f'models/lm/{label}/{masking}/layer:last/', task_type
                ])

    parallelize(nodes,
                cartesian_product,
                'amnesic_probing/runs/evaluate/run_eval_per_dim.sh',
                on_gpu=True,
                dry_run=dry_run)
Esempio n. 3
0
    arguments = docopt(__doc__)

    if arguments['--dry_run']:
        dry_run = True
    else:
        dry_run = False

    cartesian_product = []
    for data_type, vals in runs_dic.items():
        for masking in ['normal', 'masked']:
            layer_dir = vals['layers_dir'].format(masking)
            for task in vals['labels']:
                base_dir = vals['base_dir'].format(masking, task)
                text_file = f'{base_dir}/tokens.pickle'
                task_type = vals['task_type']

                # using the task labels in the regular case, or random labels from the same data
                # when the task is generated on the fly (e.g. word_len)
                data_label = task
                if task_type != 'task':
                    data_label = 'np_start'
                cartesian_product.append([layer_dir,
                                          base_dir,
                                          f'{base_dir}/{data_label}.pickle',
                                          text_file,
                                          task_type])

    parallelize(nodes, cartesian_product,
                'amnesic_probing/runs/evaluate/run_layer_wise_deprobe.sh',
                on_gpu=False, dry_run=dry_run)
Esempio n. 4
0
            base_dir = vals['base_dir'].format(masking)
            vecs = base_dir + '/' + vals['vecs']
            for task in vals['task']:
                for label in vals['labels']:
                    output_dir = 'models/lm/{0}/{1}/layer:{2}/'
                    if task == 'task':
                        task_name = label
                    else:
                        task_name = task
                    # running over all the model layers
                    if 'layers' in vals:
                        for layer in vals['layers']:
                            cartesian_product.append([
                                vecs.format(layer),
                                f'{base_dir}/{label}.pickle',
                                output_dir.format(task_name, masking,
                                                  layer), task, balanced
                            ], )
                    else:
                        cartesian_product.append([
                            vecs, f'{base_dir}/{label}.pickle',
                            output_dir.format(task_name, masking, 'last'),
                            task, balanced
                        ])

    parallelize(nodes,
                cartesian_product,
                'amnesic_probing/runs/core/run_deprobe.sh',
                on_gpu=False,
                dry_run=dry_run)
Esempio n. 5
0
            'dep_p', 'dep_poss', 'dep_adp', 'dep_amod', 'dep_nsubj', 'dep_dep',
            'dep_dobj', 'dep_cc', 'dep_conj', 'dep_advmod', 'dep_ROOT',
            'dep_ccomp', 'dep_aux', 'dep_xcomp', 'dep_neg'
        ],
    },
}

if __name__ == '__main__':
    arguments = docopt(__doc__)

    if arguments['--dry_run']:
        dry_run = True
    else:
        dry_run = False

    cartesian_product = []
    for data_type, vals in runs_dic.items():
        for label in vals['labels']:
            for masking in ['normal', 'masked']:
                train_dir = vals['train_dir'].format(masking)
                cartesian_product.append([
                    train_dir, f'models/lm/{label}/{masking}/layer:last/',
                    f'models/lm/{label}/{masking}/layer:last/P.npy'
                ])

    parallelize(nodes,
                cartesian_product,
                'amnesic_probing/runs/core/run_ft_reg.sh',
                on_gpu=True,
                dry_run=dry_run)