Exemple #1
0
def evaluator(params):

    title = params['title']
    data_path = params['data_path']
    idx_words = params['idx_words']
    words_idx = params['words_idx']
    preprocessed_v2 = params['preprocessed_v2']
    preprocessed_data = params['preprocessed_data']
    decode_path = params['decode_path']
    result_path = params['result_path']
    decoded = params['decoded_files']
    y_true = params['y_true']
    '''
    Reading vocabulary dictionaries

    '''

    indices_words_connector = DataConnector(preprocessed_v2,
                                            idx_words,
                                            data=None)
    indices_words_connector.read_pickle()
    indices_words = indices_words_connector.read_file

    words_indices_connector = DataConnector(preprocessed_v2,
                                            words_idx,
                                            data=None)
    words_indices_connector.read_pickle()
    words_indices = words_indices_connector.read_file
    '''

    indices_words_connector = DataConnector(preprocessed_v2, 'all_indices_words_fsoftmax.pkl', data=None)
    indices_words_connector.read_pickle()
    indices_words = indices_words_connector.read_file

    words_indices_connector = DataConnector(preprocessed_v2, 'all_words_indices_fsoftmax.pkl', data=None)
    words_indices_connector.read_pickle()
    words_indices = words_indices_connector.read_file

    '''

    # y_true (true keyphrases) from test set

    y_test_true_connector = DataConnector(data_path, y_true, data=None)
    y_test_true_connector.read_numpys()
    y_test_true = y_test_true_connector.read_file
    '''
    Reading generated keyphrases

    '''
    # read N-generated keyphrases
    #kp_paths = ['keyphrases-beam-sts-kp20k-v1-%s' % (i) for i in range(500)]
    #kp_paths = ['keyphrases-beam-sts-kp20k-att-v1-%s' % (i) for i in range(500)]
    #kp_paths = ['keyphrases-beam-sts-kp20k-fsoftmax-v1-%s' % (i) for i in range(500)]
    kp_paths = ['%s-%s' % (decoded, i) for i in range(500)]

    dataconn = DataConnector(filepath=None, filename=None)

    # uncomment this, for reading all generated hypothesis in one list
    # hypotheses = dataconn.read_pickles_all(result_path, kp_paths)

    # uncomment this, for reading all generated hypothesis in list of arrays
    hypotheses = dataconn.read_pickles_doc(decode_path, kp_paths)
    '''
    print("length of hypotheses: %s" % (len(hypotheses)))
    print("len hypotheses[0]: %s" % len(hypotheses[0]))
    print("len hypotheses[1]: %s" % len(hypotheses[1]))
    print("len hypotheses[2]: %s" % len(hypotheses[2]))
    print("len hypotheses[3]: %s" % len(hypotheses[3]))
    '''

    # on average
    n_rank = [1, 5, 10, 15, 20]
    all_rank_prediction = []
    for n in range(len(n_rank)):
        beam_predicted_keyphrases = []
        for keyphrase_list in hypotheses:
            stemmed_kps = []
            for keyphrases in keyphrase_list:

                beam_decoded = BeamDecoded(keyphrases, words_indices,
                                           indices_words, result_path)
                keyphrase = beam_decoded.decript_hypotheses()

                tokenized_keyphrase = keyphrase.split()
                #stemmed_kps = set()
                for kp in tokenized_keyphrase:
                    stemmed = sno.stem(kp)
                    if stemmed not in stemmed_kps:
                        stemmed_kps.append(stemmed)

            decoded_kps = stemmed_kps[:n_rank[n]]

            beam_predicted_keyphrases.append(decoded_kps)
            #beam_predicted_keyphrases.extend(decoded_kps)
        all_rank_prediction.append(beam_predicted_keyphrases)
    '''
    Evaluating generated keyphrases of sampled softmax model + beam search decoding approach
    '''

    all_rank_acc = []
    all_rank_precision = []
    all_rank_recall = []
    all_rank_fscore = []

    all_rank_tps = []
    all_rank_fns = []
    all_rank_fps = []

    print("******************")
    print("Model: %s" % (title))
    print("******************")

    for i, beam_predicted in enumerate(all_rank_prediction):
        evaluate_beam = Evaluate(beam_predicted, y_test_true, result_path)

        evaluate_beam.get_true_label_list()

        y_true = evaluate_beam.y_true

        y_pred = evaluate_beam.y_pred
        '''
        print("length of y_true: %s"%(len(y_true)))
        print("y_true[0]: %s"%str(y_true[0]))
        print("y_pred[0]: %s"%str(y_pred[0]))
        print("=============================")
        print("y_true[1]: %s"%str(y_true[1]))
        print("y_pred[1]: %s"%str(y_pred[1]))
        print("=============================")
        print("y_true[2]: %s"%str(y_true[2]))
        print("y_pred[2]: %s"%str(y_pred[2]))
        print("=============================")
        print("y_true[3]: %s"%str(y_true[3]))
        print("y_pred[3]: %s"%str(y_pred[3]))
        print("=============================")
        print("y_true[4]: %s"%str(y_true[4]))
        print("y_pred[4]: %s"%str(y_pred[4]))
        print("=============================")
        print("y_true[5]: %s"%str(y_true[5]))
        print("y_pred[5]: %s"%str(y_pred[5]))
        print("=============================")
        print("y_true[6]: %s"%str(y_true[6]))
        print("y_pred[6]: %s"%str(y_pred[6]))
        print("=============================")
        print("y_true[7]: %s"%str(y_true[7]))
        print("y_pred[7]: %s"%str(y_pred[7]))
        print("=============================")
        '''

        evaluate_beam.compute_true_positive()
        evaluate_beam.compute_false_negative()
        evaluate_beam.compute_false_positive()

        evaluate_beam.compute_accuracy()
        evaluate_beam.compute_precision()
        evaluate_beam.compute_recall()
        evaluate_beam.compute_fscore()

        mean_acc, mean_precision, mean_recall, mean_fscore = evaluate_beam.compute_mean_evals(
        )
        all_rank_acc.append(mean_acc)
        all_rank_precision.append(mean_precision)
        all_rank_recall.append(mean_recall)
        all_rank_fscore.append(mean_fscore)

        mean_tps, mean_fns, mean_fps = evaluate_beam.compute_mean_cm()
        all_rank_tps.append(mean_tps)
        all_rank_fns.append(mean_fns)
        all_rank_fps.append(mean_fps)

        print("===================")

        print("N-Rank: %s" % (n_rank[i]))

        evaluate_beam.print_mean_evals()
        evaluate_beam.print_mean_cm()
        '''

        
        tps, tp_list = evaluate_beam.compute_true_positive_all()
        fns, fn_list = evaluate_beam.compute_false_negative_all()
        fps, fp_list = evaluate_beam.compute_false_positive_all()

        all_rank_tps.append(tps)
        all_rank_fns.append(fns)
        all_rank_fps.append(fps)

        print("===================")
        print("N-Rank: %s"%(n_rank[i]))

        acc = evaluate_beam.compute_accuracy_all()
        precision = evaluate_beam.compute_precision_all()
        recall = evaluate_beam.compute_recall_all()
        fscore = evaluate_beam.compute_fscore_all()

        all_rank_acc.append(acc)
        all_rank_precision.append(precision)
        all_rank_recall.append(recall)
        all_rank_fscore.append(fscore)
        
        '''

    all_rank_acc_conn = DataConnector(result_path, 'all_rank_acc_unigrams',
                                      all_rank_acc)
    all_rank_acc_conn.save_pickle()

    all_rank_precision_conn = DataConnector(result_path,
                                            'all_rank_precision_unigrams',
                                            all_rank_precision)
    all_rank_precision_conn.save_pickle()

    all_rank_recall_conn = DataConnector(result_path,
                                         'all_rank_recall_unigrams',
                                         all_rank_recall)
    all_rank_recall_conn.save_pickle()

    all_rank_fscore_conn = DataConnector(result_path,
                                         'all_rank_fscore_unigrams',
                                         all_rank_fscore)
    all_rank_fscore_conn.save_pickle()

    all_rank_tps_conn = DataConnector(result_path, 'all_rank_tps_unigrams',
                                      all_rank_tps)
    all_rank_tps_conn.save_pickle()

    all_rank_fns_conn = DataConnector(result_path, 'all_rank_fns_unigrams',
                                      all_rank_fns)
    all_rank_fns_conn.save_pickle()

    all_rank_fps_conn = DataConnector(result_path, 'all_rank_fps_unigrams',
                                      all_rank_fps)
    all_rank_fps_conn.save_pickle()

    plot_metrics = Plotting(all_rank_acc, all_rank_precision, all_rank_recall,
                            all_rank_fscore, all_rank_tps, all_rank_fps,
                            all_rank_fns, result_path)
    plot_metrics.plot_acc_fscore('plot_acc_fscore_unigrams.png')
    plot_metrics.plot_metrics('plot_metrics_unigrams.png')
    plot_metrics.plot_confusion_matrix('plot_confusion_matrix_unigrams.png')
    '''
Exemple #2
0
def evaluator(params):

    data_path = params['data_path']
    preprocessed_v2 = params['preprocessed_v2']
    preprocessed_data = params['preprocessed_data']
    decode_path = params['decode_path']
    model_path = params['model_path']
    result_path = params['result_path']
    file_name = params['file_name']
    weights = params['weights']

    idx_words = params['idx_words']
    words_idx = params['words_idx']
    decoded = params['decoded_files']
    y_1 = params['y1']
    y_2 = params['y2']
    y_3 = params['y3']
    '''
    Reading vocabulary dictionaries

    '''
    indices_words_connector = DataConnector(preprocessed_v2,
                                            idx_words,
                                            data=None)
    indices_words_connector.read_pickle()
    indices_words = indices_words_connector.read_file

    words_indices_connector = DataConnector(preprocessed_v2,
                                            words_idx,
                                            data=None)
    words_indices_connector.read_pickle()
    words_indices = words_indices_connector.read_file

    ## merge all set into one test set for trained model

    train_outputs_conn = DataConnector(data_path, y_1, data=None)
    train_outputs_conn.read_numpys()
    train_outputs = train_outputs_conn.read_file

    valid_outputs_conn = DataConnector(data_path, y_2, data=None)
    valid_outputs_conn.read_numpys()
    valid_outputs = valid_outputs_conn.read_file

    test_outputs_conn = DataConnector(data_path, y_3, data=None)
    test_outputs_conn.read_numpys()
    test_outputs = test_outputs_conn.read_file

    y_test_true = np.concatenate((train_outputs, valid_outputs, test_outputs))

    print("Ground truth of keyphrases shape: %s" %
          str(y_test_true.shape))  # input for encoder
    sys.stdout.flush()
    '''
    Reading generated keyphrases

    '''

    # number of documents in inspec datasets
    kp_paths = ['%s-%s' % (decoded, i) for i in range(500)]

    #kp_paths = ['keyphrases-beam-decode-inspec-%s'%(i) for i in range(2000)]
    dataconn = DataConnector(filepath=None, filename=None)
    # uncomment this, for reading all generated hypothesis in one list
    #hypotheses = dataconn.read_pickles_all(result_path, kp_paths)

    # uncomment this, for reading all generated hypothesis in list of arrays

    hypotheses = dataconn.read_pickles_doc(decode_path, kp_paths)

    print("length of hypotheses: %s" % (len(hypotheses)))
    print("len hypotheses[0]: %s" % len(hypotheses[0]))
    print("len hypotheses[1]: %s" % len(hypotheses[1]))
    print("len hypotheses[2]: %s" % len(hypotheses[2]))
    print("len hypotheses[3]: %s" % len(hypotheses[3]))

    # on average
    n_rank = [1, 5, 10, 15, 20]

    all_rank_prediction = []
    for n in range(len(n_rank)):

        beam_predicted_keyphrases = []

        for keyphrase_list in hypotheses:

            i = 0

            decoded_kps = []
            for keyphrases in keyphrase_list:

                beam_decoded = BeamDecoded(keyphrases, words_indices,
                                           indices_words, result_path)
                keyphrase = beam_decoded.decript_hypotheses()
                if i < n_rank[n]:
                    decoded_kps.append(keyphrase)
                    i += 1
                continue

            # print("len(decoded_kps): %s"%len(decoded_kps))

            beam_predicted_keyphrases.append(decoded_kps)
            #beam_predicted_keyphrases.extend(decoded_kps)
        all_rank_prediction.append(beam_predicted_keyphrases)
    '''
    Evaluating generated keyphrases of sampled softmax model + beam search decoding approach
    '''

    all_rank_acc = []
    all_rank_precision = []
    all_rank_recall = []
    all_rank_fscore = []

    all_rank_tps = []
    all_rank_fns = []
    all_rank_fps = []

    for i, beam_predicted in enumerate(all_rank_prediction):
        evaluate_beam = Evaluate(beam_predicted, y_test_true, result_path)

        evaluate_beam.get_true_keyphrases()
        #evaluate_beam.get_true_keyphrases_all()
        y_true = evaluate_beam.y_true
        print("length of y_true: %s" % (len(y_true)))
        '''

        print("length of y_true: %s"%(len(y_true)))
        print("y_true[0]: %s"%str(y_true[0]))
        print("y_true[1]: %s"%str(y_true[1]))
        print("y_true[2]: %s"%str(y_true[2]))
        print("y_true[3]: %s"%str(y_true[3]))
        print("y_true[4]: %s"%str(y_true[4]))
        print("y_true[5]: %s"%str(y_true[5]))
        print("y_true[6]: %s"%str(y_true[6]))
        print("y_true[7]: %s"%str(y_true[7]))

        '''

        evaluate_beam.compute_true_positive()
        evaluate_beam.compute_false_negative()
        evaluate_beam.compute_false_positive()

        evaluate_beam.compute_accuracy()
        evaluate_beam.compute_precision()
        evaluate_beam.compute_recall()
        evaluate_beam.compute_fscore()

        mean_acc, mean_precision, mean_recall, mean_fscore = evaluate_beam.compute_mean_evals(
        )
        all_rank_acc.append(mean_acc)
        all_rank_precision.append(mean_precision)
        all_rank_recall.append(mean_recall)
        all_rank_fscore.append(mean_fscore)

        mean_tps, mean_fns, mean_fps = evaluate_beam.compute_mean_cm()
        all_rank_tps.append(mean_tps)
        all_rank_fns.append(mean_fns)
        all_rank_fps.append(mean_fps)

        print("===================")
        print("N-Rank: %s" % (n_rank[i]))

        evaluate_beam.print_mean_evals()
        evaluate_beam.print_mean_cm()
        '''

        
        tps, tp_list = evaluate_beam.compute_true_positive_all()
        fns, fn_list = evaluate_beam.compute_false_negative_all()
        fps, fp_list = evaluate_beam.compute_false_positive_all()

        all_rank_tps.append(tps)
        all_rank_fns.append(fns)
        all_rank_fps.append(fps)

        print("===================")
        print("N-Rank: %s"%(n_rank[i]))

        acc = evaluate_beam.compute_accuracy_all()
        precision = evaluate_beam.compute_precision_all()
        recall = evaluate_beam.compute_recall_all()
        fscore = evaluate_beam.compute_fscore_all()

        all_rank_acc.append(acc)
        all_rank_precision.append(precision)
        all_rank_recall.append(recall)
        all_rank_fscore.append(fscore)
        
        '''

    all_rank_acc_conn = DataConnector(result_path, 'all_rank_acc',
                                      all_rank_acc)
    all_rank_acc_conn.save_pickle()

    all_rank_precision_conn = DataConnector(result_path, 'all_rank_precision',
                                            all_rank_precision)
    all_rank_precision_conn.save_pickle()

    all_rank_recall_conn = DataConnector(result_path, 'all_rank_recall',
                                         all_rank_recall)
    all_rank_recall_conn.save_pickle()

    all_rank_fscore_conn = DataConnector(result_path, 'all_rank_fscore',
                                         all_rank_fscore)
    all_rank_fscore_conn.save_pickle()

    all_rank_tps_conn = DataConnector(result_path, 'all_rank_tps',
                                      all_rank_tps)
    all_rank_tps_conn.save_pickle()

    all_rank_fns_conn = DataConnector(result_path, 'all_rank_fns',
                                      all_rank_fns)
    all_rank_fns_conn.save_pickle()

    all_rank_fps_conn = DataConnector(result_path, 'all_rank_fps',
                                      all_rank_fps)
    all_rank_fps_conn.save_pickle()

    plot_metrics = Plotting(all_rank_acc, all_rank_precision, all_rank_recall,
                            all_rank_fscore, all_rank_tps, all_rank_fps,
                            all_rank_fns, result_path)
    plot_metrics.plot_acc_fscore()
    plot_metrics.plot_metrics()
    plot_metrics.plot_confusion_matrix()
    '''