def _evaluate_f1(best_predictions: List[List[np.ndarray]], best_predictions_probs: List[np.ndarray], vocab: Vocabulary, true_labels: np.ndarray): true_labels = clean_target_from_padding(true_labels) result_accumulator = PointSuggestionEvaluator() unk_id = vocab.get_id_or_unk(vocab.get_unk()) for x_pred, x_prob, y_target in zip(best_predictions, best_predictions_probs, true_labels): confidences = x_prob.tolist() is_exact_prediction = [np.all(pred == y_target) for pred in x_pred] precision_recall = [ token_precision_recall(pred.T, y_target) for pred in x_pred ] is_unknown_word_predicted = [ np.all(suggestion == unk_id) for suggestion in x_pred ] unk_word_accuracy = [ unk_acc(suggestion.T, y_target, unk_id) for suggestion in x_pred ] result_accumulator.add_result(confidences, is_exact_prediction, is_unknown_word_predicted, precision_recall, unk_word_accuracy) return result_accumulator
def evaluate_f1(model: keras.Model, vocab: Vocabulary, input_method_body_subtokens: np.ndarray, target_method_names: np.ndarray, hyperparameters: Dict[str, any], visualise_prediction=True): padding_id = vocab.get_id_or_unk(vocab.get_pad()) begin_of_sentence_id = vocab.get_id_or_unk(SENTENCE_START_TOKEN) end_of_sentence_id = vocab.get_id_or_unk(SENTENCE_END_TOKEN) if input_method_body_subtokens.ndim != 3: # model prediction expects 3 dimensions, a single input won't have the batch dimension, manually add it input_method_body_subtokens = np.expand_dims( input_method_body_subtokens, 0) predictions = model.predict(input_method_body_subtokens, batch_size=1) best_predictions, best_predictions_probs = beam_search( predictions, padding_id, begin_of_sentence_id, end_of_sentence_id, hyperparameters['beam_width'], hyperparameters['beam_top_paths'], ) f1_evaluation = _evaluate_f1(best_predictions, best_predictions_probs, vocab, target_method_names) if visualise_prediction: max_results = 10 visualised_input = visualise_beam_predictions_to_targets( vocab, best_predictions[:max_results], best_predictions_probs[:max_results], input_method_body_subtokens[:max_results], target_method_names[:max_results]) # return best_predictions, best_predictions_probs return f1_evaluation, visualised_input return f1_evaluation
def translate_tokenized_array_to_list_words(vocab: Vocabulary, token: np.ndarray) -> List[str]: """Helper function to translate numpy array tokens back to words""" return [vocab.get_name_for_id(n) for n in token[np.nonzero(token != vocab.get_id_or_unk(vocab.get_pad()))]]