예제 #1
0
def predict_and_fit_locality(args, batch, model, tokenizer, batch_features,
                             batch_examples):
    model.eval()
    batch = tuple(t.to(args.device) for t in batch)
    # only allow batch size 1
    assert batch[0].size(0) == 1
    # run predictions
    with torch.no_grad():
        inputs = {
            "input_ids": batch[0],
            "attention_mask": batch[1],
            "token_type_ids": batch[2],
        }

        if args.model_type in ["roberta", "distilbert", "camembert", "bart"]:
            del inputs["token_type_ids"]
        feature_indices = batch[3]
        outputs = model.restricted_forward(**inputs)

    batch_start_logits, batch_end_logits = outputs
    batch_results = []
    for i, feature_index in enumerate(feature_indices):
        eval_feature = batch_features[i]
        unique_id = int(eval_feature.unique_id)

        output = [to_list(output[i]) for output in outputs]
        start_logits, end_logits = output
        result = SquadResult(unique_id, start_logits, end_logits)
        batch_results.append(result)

    batch_prelim_results, batch_predictions = compute_predictions_index_and_logits(
        batch_examples, batch_features, batch_results, args.n_best_size,
        args.max_answer_length, args.do_lower_case, tokenizer, args.dataset)

    # run attributions
    batch_start_indexes = torch.LongTensor(
        [x.start_index for x in batch_prelim_results]).to(args.device)
    batch_end_indexes = torch.LongTensor(
        [x.end_index for x in batch_prelim_results]).to(args.device)

    # for data parallel
    inputs = {
        "input_ids": batch[0],
        "attention_mask": batch[1],
        "token_type_ids": batch[2],
        "start_indexes": batch_start_indexes,
        "end_indexes": batch_end_indexes,
        "final_start_logits": batch_start_logits,
        "final_end_logits": batch_end_logits,
    }
    if args.model_type in ["roberta", "distilbert", "camembert", "bart"]:
        del inputs["token_type_ids"]

    with torch.no_grad():
        importances = fit_locality(args, tokenizer, model, inputs,
                                   batch_features[0])

    return batch_predictions, batch_prelim_results, importances
예제 #2
0
def perturb_interp(args, model, tokenizer, prefix=""):
    if not os.path.exists(args.interp_dir):
        os.makedirs(args.interp_dir)

    # fix the model
    model.requires_grad_(False)

    dataset, examples, features = load_and_cache_examples(args,
                                                          tokenizer,
                                                          evaluate=True,
                                                          output_examples=True)

    # assume one on on mapping
    assert len(examples) == len(features)

    if not os.path.exists(args.output_dir) and args.local_rank in [-1, 0]:
        os.makedirs(args.output_dir)

    args.eval_batch_size = 1
    eval_sampler = SequentialSampler(dataset)
    eval_dataloader = DataLoader(dataset,
                                 sampler=eval_sampler,
                                 batch_size=args.eval_batch_size)

    # Eval!
    logger.info("***** Running evaluation {} *****".format(prefix))
    logger.info("  Num examples = %d", len(dataset))
    logger.info("  Batch size = %d", args.eval_batch_size)

    all_predictions = []
    start_time = timeit.default_timer()

    for batch in tqdm(eval_dataloader, desc="Evaluating"):

        feature_indices = to_list(batch[3])
        batch_features = [features[i] for i in feature_indices]
        batch_examples = [examples[i] for i in feature_indices]
        # batch prem, batch predictions
        batch = remove_padding(batch, batch_features[0])
        batch_predictions, batch_prelim_results, batch_attributions = predict_and_fit_locality(
            args, batch, model, tokenizer, batch_features, batch_examples)
        dump_token_interp_info(args, batch_examples, batch_features, tokenizer,
                               batch_predictions, batch_prelim_results,
                               batch_attributions)
        # lots of info, dump to files immediately
        all_predictions.append(batch_predictions)

    evalTime = timeit.default_timer() - start_time
    logger.info("  Evaluation done in total %f secs (%f sec per example)",
                evalTime, evalTime / len(dataset))

    all_predictions = merge_predictions(all_predictions)
    results = hotpot_evaluate(examples[:len(all_predictions)], all_predictions)
    return results
예제 #3
0
def predict_and_layerwise_attribute(args, batch, model, tokenizer, batch_features, batch_examples):
    model.eval()
    batch = tuple(t.to(args.device) for t in batch)

    num_layers = model.num_hidden_layers

    # run predictions
    with torch.no_grad():
        inputs = {
            "input_ids": batch[0],
            "attention_mask": batch[1],
            "token_type_ids": batch[2],
            "output_attentions": True,
        }

        if args.model_type in ["roberta", "distilbert", "camembert", "bart"]:
            del inputs["token_type_ids"]

        feature_indices = batch[3]
        outputs = model.restricted_forward(**inputs)

    batch_start_logits, batch_end_logits, batch_attentions = outputs
    outputs = outputs[:-1]

    batch_results = []
    for i, feature_index in enumerate(feature_indices):
        eval_feature = batch_features[i]
        unique_id = int(eval_feature.unique_id)

        output = [to_list(output[i]) for output in outputs]
        start_logits, end_logits = output
        result = SquadResult(unique_id, start_logits, end_logits)

        batch_results.append(result)
    
    batch_prelim_results, batch_predictions = compute_predictions_index_and_logits(
        batch_examples,
        batch_features,
        batch_results,
        args.n_best_size,
        args.max_answer_length,
        args.do_lower_case,
        tokenizer,
        args.dataset,
    )

    # run attributions
    batch_start_indexes = torch.LongTensor([x.start_index for x in batch_prelim_results]).to(args.device)
    batch_end_indexes = torch.LongTensor([x.end_index for x in batch_prelim_results]).to(args.device)
    batch_attentions = torch.stack(batch_attentions)
    
    active_layers = [1 for _ in range(num_layers)]

    # for data parallel 
    inputs = {
        "input_ids": batch[0],
        "attention_mask": batch[1],
        "token_type_ids": batch[2],
        "active_layers": active_layers,
        "input_attentions": batch_attentions,
        "start_indexes": batch_start_indexes,
        "end_indexes": batch_end_indexes,
        "final_start_logits": batch_start_logits,
        "final_end_logits": batch_end_logits,
        "num_steps": args.ig_steps,
    }
    if args.model_type in ["roberta", "distilbert", "camembert", "bart"]:
        del inputs["token_type_ids"]
    
    batch_attributions = model.layer_attribute(**inputs)
    # print(batch_attributions.size())
    # attribution in logits
    return batch_predictions, batch_prelim_results, batch_attentions, batch_attributions
예제 #4
0
def attention_interp(args, model, tokenizer, prefix=""):

    if not os.path.exists(args.interp_dir):
        os.makedirs(args.interp_dir)
    dataset, examples, features = load_and_cache_examples(args, tokenizer, evaluate=True, output_examples=True)
    
    # fix the model
    model.requires_grad_(False)
    # assume one on on mapping
    assert len(examples) == len(features)

    if not os.path.exists(args.output_dir) and args.local_rank in [-1, 0]:
        os.makedirs(args.output_dir)

    # restrict evak batch size
    assert args.per_gpu_eval_batch_size == 1 and args.n_gpu <= 1
    args.eval_batch_size = 1

    # Note that DistributedSampler samples randomly
    eval_sampler = SequentialSampler(dataset)
    eval_dataloader = DataLoader(dataset, sampler=eval_sampler, batch_size=args.eval_batch_size)


    # Eval!
    logger.info("***** Running evaluation {} *****".format(prefix))
    logger.info("  Num examples = %d", len(dataset))
    logger.info("  Batch size = %d", args.eval_batch_size)

    all_predictions = []
    start_time = timeit.default_timer()

    for batch in tqdm(eval_dataloader, desc="Evaluating"):
       
        feature_indices = to_list(batch[3])
        batch_features = [features[i] for i in feature_indices]
        batch_examples = [examples[i] for i in feature_indices]
        # batch prem, batch predictions
        batch = remove_padding(batch, batch_features[0])
        batch_predictions, batch_prelim_results, batch_attentions, batch_attributions = predict_and_layerwise_attribute(
            args,
            batch,
            model,
            tokenizer,
            batch_features,
            batch_examples
        )

        # lots of info, dump to files immediately
        dump_attention_interp_info(args, batch_examples, batch_features, tokenizer, batch_predictions, batch_prelim_results, batch_attentions, batch_attributions)
        all_predictions.append(batch_predictions)

    evalTime = timeit.default_timer() - start_time
    logger.info("  Evaluation done in total %f secs (%f sec per example)", evalTime, evalTime / len(dataset))

    # Compute predictions
    # output_prediction_file =  os.path.join(args.output_dir, "predictions_{}.json".format(prefix))
    # output_nbest_file = os.path.join(args.output_dir, "nbest_predictions_{}.json".format(prefix))
    # Compute the F1 and exact scores.
    all_predictions = merge_predictions(all_predictions)
    results = hotpot_evaluate(examples[:len(all_predictions)], all_predictions)
    return results