Exemple #1
0
def intervene_attention(gpt2_version,
                        do_filter,
                        stat,
                        device='cuda',
                        filter_quantile=0.25,
                        random_weights=False):
    model = Model(output_attentions=True,
                  gpt2_version=gpt2_version,
                  device=device,
                  random_weights=random_weights)
    tokenizer = GPT2Tokenizer.from_pretrained(gpt2_version)

    interventions, json_data = get_interventions_winogender(
        gpt2_version, do_filter, stat, model, tokenizer, device,
        filter_quantile)
    results = perform_interventions(interventions, model)
    json_data['mean_total_effect'] = DataFrame(results).total_effect.mean()
    json_data['mean_model_indirect_effect'] = DataFrame(
        results).indirect_effect_model.mean()
    json_data['mean_model_direct_effect'] = DataFrame(
        results).direct_effect_model.mean()
    filter_name = 'filtered' if do_filter else 'unfiltered'
    if random_weights:
        gpt2_version += '_random'
    fname = f"winogender_data/attention_intervention_{stat}_{gpt2_version}_{filter_name}.json"
    json_data['results'] = results
    with open(fname, 'w') as f:
        json.dump(json_data, f)
def get_all_contrib(model_type, model, tokenizer, interventions, data,
                    out_dir):
    json_data = {}

    results = perform_interventions(interventions, model)
    df = pd.DataFrame(results)
    effect = np.stack(df['indirect_effect_model'].to_numpy(
    ))  # Convert column to 2d ndarray (num_examples x num_layers)
    mean_effect = effect.mean(axis=0)
    json_data['mean_effect_model'] = mean_effect

    effect = np.stack(df['indirect_effect_layer'].to_numpy(
    ))  # Convert column to 2d ndarray (num_examples x num_layers)
    mean_effect = effect.mean(axis=0)
    json_data['mean_effect_layer'] = mean_effect

    effect = np.stack(df['indirect_effect_head'].to_numpy(
    ))  # Convert column to 2d ndarray (num_examples x num_layers)
    mean_effect = effect.mean(axis=0)
    json_data['mean_effect_head'] = mean_effect

    pickle.dump(
        json_data,
        open(out_dir + "/mean_effect_" + model_type + "_" + data + ".pickle",
             "wb"))
    return mean_effect
def intervene_attention(gpt2_version, do_filter, stat, device='cuda',
                        filter_quantile=0.25, random_weights=False,
                        masking_approach=1):
    model = Model(output_attentions=True, gpt2_version=gpt2_version,
                  device=device, random_weights=random_weights,
                  masking_approach=masking_approach)
    tokenizer = (GPT2Tokenizer if model.is_gpt2 else
                 TransfoXLTokenizer if model.is_txl else
                 XLNetTokenizer if model.is_xlnet else
                 BertTokenizer if model.is_bert else
                 DistilBertTokenizer if model.is_distilbert else
                 RobertaTokenizer).from_pretrained(gpt2_version)

    interventions, json_data = get_interventions_winogender(gpt2_version, do_filter, stat, model, tokenizer,
                                                            device, filter_quantile)
    results = perform_interventions(interventions, model)
    json_data['mean_total_effect'] = DataFrame(results).total_effect.mean()
    json_data['mean_model_indirect_effect'] = DataFrame(results).indirect_effect_model.mean()
    json_data['mean_model_direct_effect'] = DataFrame(results).direct_effect_model.mean()
    filter_name = 'filtered' if do_filter else 'unfiltered'
    if random_weights:
        gpt2_version += '_random'
    if model.is_gpt2 or model.is_txl or model.is_xlnet:
        fname = f"winogender_data/attention_intervention_{stat}_{gpt2_version}_{filter_name}.json"
    else:
        fname = f"winogender_data/attention_intervention_{stat}_{gpt2_version}_{filter_name}_{masking_approach}.json"
    json_data['results'] = results
    with open(fname, 'w') as f:
        json.dump(json_data, f)