Exemple #1
0
def _evaluate_f1(best_predictions: List[List[np.ndarray]],
                 best_predictions_probs: List[np.ndarray], vocab: Vocabulary,
                 true_labels: np.ndarray):
    true_labels = clean_target_from_padding(true_labels)
    result_accumulator = PointSuggestionEvaluator()
    unk_id = vocab.get_id_or_unk(vocab.get_unk())

    for x_pred, x_prob, y_target in zip(best_predictions,
                                        best_predictions_probs, true_labels):
        confidences = x_prob.tolist()
        is_exact_prediction = [np.all(pred == y_target) for pred in x_pred]
        precision_recall = [
            token_precision_recall(pred.T, y_target) for pred in x_pred
        ]
        is_unknown_word_predicted = [
            np.all(suggestion == unk_id) for suggestion in x_pred
        ]
        unk_word_accuracy = [
            unk_acc(suggestion.T, y_target, unk_id) for suggestion in x_pred
        ]
        result_accumulator.add_result(confidences, is_exact_prediction,
                                      is_unknown_word_predicted,
                                      precision_recall, unk_word_accuracy)

    return result_accumulator
Exemple #2
0
def evaluate_f1(model: keras.Model,
                vocab: Vocabulary,
                input_method_body_subtokens: np.ndarray,
                target_method_names: np.ndarray,
                hyperparameters: Dict[str, any],
                visualise_prediction=True):
    padding_id = vocab.get_id_or_unk(vocab.get_pad())
    begin_of_sentence_id = vocab.get_id_or_unk(SENTENCE_START_TOKEN)
    end_of_sentence_id = vocab.get_id_or_unk(SENTENCE_END_TOKEN)

    if input_method_body_subtokens.ndim != 3:
        # model prediction expects 3 dimensions, a single input won't have the batch dimension, manually add it
        input_method_body_subtokens = np.expand_dims(
            input_method_body_subtokens, 0)

    predictions = model.predict(input_method_body_subtokens, batch_size=1)

    best_predictions, best_predictions_probs = beam_search(
        predictions,
        padding_id,
        begin_of_sentence_id,
        end_of_sentence_id,
        hyperparameters['beam_width'],
        hyperparameters['beam_top_paths'],
    )
    f1_evaluation = _evaluate_f1(best_predictions, best_predictions_probs,
                                 vocab, target_method_names)
    if visualise_prediction:
        max_results = 10
        visualised_input = visualise_beam_predictions_to_targets(
            vocab, best_predictions[:max_results],
            best_predictions_probs[:max_results],
            input_method_body_subtokens[:max_results],
            target_method_names[:max_results])

        # return best_predictions, best_predictions_probs
        return f1_evaluation, visualised_input
    return f1_evaluation
def translate_tokenized_array_to_list_words(vocab: Vocabulary, token: np.ndarray) -> List[str]:
    """Helper function to translate numpy array tokens back to words"""
    return [vocab.get_name_for_id(n) for n in token[np.nonzero(token != vocab.get_id_or_unk(vocab.get_pad()))]]