Beispiel #1
0
            validation_losses.append(np.mean(loss_validation))
            f1 = 0
            em = 0
            for i in range(len(estimated_end_index)):
                if answer_start_batch_actual[i] == -1:
                    if (estimated_start_index[i] == 0
                            or estimated_start_index[i] == 0):
                        f1 += 1.0
                        em += 1.0
                else:
                    estimated_start_index[i] -= 1
                    estimated_end_index[i] -= 1
                    f1 += get_f1_from_tokens(answer_start_batch_actual[i],
                                             answer_end_batch_actual[i],
                                             estimated_start_index[i],
                                             estimated_end_index[i],
                                             context_batch_validation[i], D)
                    em += get_exact_match_from_tokens(
                        answer_start_batch_actual[i],
                        answer_end_batch_actual[i], estimated_start_index[i],
                        estimated_end_index[i], context_batch_validation[i], D)

            f1score.append(f1 / len(estimated_end_index))
            emscore.append(em / len(estimated_end_index))
            #print("f1 score: ", f1/len(estimated_end_index))

        print("F1 mean on validation: ", np.mean(f1score))
        print("EM mean on validation: ", np.mean(emscore))
        print("Mean validation loss on epoch: ", np.mean(validation_losses))
        val_loss_means.append(np.mean(validation_losses))
Beispiel #2
0
                    dropout_keep_rate: 1
                })

            all_answers = np.array(
                list(map(lambda qas: (qas["all_answers"]),
                         batch))).reshape(CONFIG.BATCH_SIZE)

            f1 = 0
            # Calculate f1 and em scores across batch size
            for i in range(CONFIG.BATCH_SIZE):
                f1_score_answers = []
                for true_answer in all_answers[i]:
                    f1_score_answers.append(
                        get_f1_from_tokens(true_answer["answer_start"],
                                           true_answer["answer_end"],
                                           estimated_start_index[i],
                                           estimated_end_index[i],
                                           context_batch[i], D))
                f1 += max(f1_score_answers)

            f1score_curr = f1 / CONFIG.BATCH_SIZE

            print("Current f1 score: ", f1score_curr)
            f1score.append(f1score_curr)

            print("Tested (", iteration, "/", len(padded_data), ")")

        if len(f1score) != 0:
            results = dict()
            results["average"] = np.mean(f1score)
            results["max"] = np.max(f1score)