Ejemplo n.º 1
0
def analyze_results(predictions):
    # recalls = {key:value for key,value in recalls.items() if key[2] == 'cosine'}
    # precisions = {key:value for key,value in precisions.items() if key[2] == 'cosine'}
    recalls, precisions, fscores = {}, {}, {}
    for key in predictions:
        # if key[3] != ('lemma',):
        #     continue
        r, p, f1 = bcubed(predictions[GOLD], predictions[key])
        recalls[key] = r
        precisions[key] = p
        fscores[key] = f1
        print key, r, p, f1

    list_names = {
        hash_d(recalls): 'Best Recall Score',
        hash_d(precisions): 'Best Precision Score',
        hash_d(fscores): 'Best F1 Score'
    }

    for d in (recalls, precisions, fscores):
        di = copy.deepcopy(d)
        print '\nTop 10 param sets for %s:' % list_names[hash_d(d)]
        for i in range(25):
            params = key_with_max_value(di)
            print 'Number %d: Params = %s Scores = %0.5f %0.5f %0.5f'\
                  %(i+1, str(params), recalls[params], precisions[params], fscores[params])
            del di[params]
Ejemplo n.º 2
0
def analyze_singleton_accuracy(gold, predictions):
    assert len(gold) == len(predictions)
    ### comparing singleton detection ###
    gold_sing_idxs = set()
    pred_sing_idxs = set()
    for lst in [gold, predictions, ]:
        sing_idxs = set()
        nonsing_idxs = set()
        for i in xrange(len(lst)):
            for j in xrange(len(lst)):
                if i != j:
                    if lst[i] == lst[j]:  # not singleton
                        nonsing_idxs.add(i)
                        nonsing_idxs.add(j)
            if i not in nonsing_idxs:
                sing_idxs.add(i)

        if len(gold_sing_idxs) == 0:
            gold_sing_idxs = sing_idxs.copy()
        else:
            pred_sing_idxs = sing_idxs.copy()

    print "PERCENT OF SINGLETONS: %0.3f"%(len(gold_sing_idxs) / float(len(gold)))

    gold_sing = [1 if i in gold_sing_idxs else 0 for i in xrange(len(gold))]
    pred_sing = [1 if i in pred_sing_idxs else 0 for i in xrange(len(predictions))]

    print 'Accuracy with respect to the correct identification of mentions as singletons:'
    print 'R: %0.4f,   P: %0.4f,   F1: %0.4f' % (
    recall_score(gold_sing, pred_sing), precision_score(gold_sing, pred_sing), f1_score(gold_sing, pred_sing))

    gold_non_sing = [0 if i in gold_sing_idxs else 1 for i in xrange(len(gold))]
    pred_non_sing = [0 if i in pred_sing_idxs else 1 for i in xrange(len(predictions))]

    print 'Accuracy with respect to the correct identification of mentions as NOT being singletons:'
    print 'R: %0.4f,   P: %0.4f,   F1: %0.4f' % (
    recall_score(gold_non_sing, pred_non_sing), precision_score(gold_non_sing, pred_non_sing), f1_score(gold_non_sing, pred_non_sing))

    # make coref chains without singletons
    non_sing_gold = []
    non_sing_pred = []
    for i,val in enumerate(gold_non_sing):
        if val == 1:
            non_sing_gold.append(gold[i])
            non_sing_pred.append(predictions[i])
    results = bcubed(non_sing_gold, non_sing_pred)
    print 'B3 results obtained after removing all GOLD singletons:'
    print results
Ejemplo n.º 3
0
    def neural_predict(self, x, y, threshold_range, metric='cosine', link_function='single', rand_score=False, train_data=None, delta_filter=False, lemma_init=False, lemma_predictor=None):
        c = AgglomerativeClusterer(x, distance_metric=metric, train_data=train_data)
        best_score = (0., 0., 0.,)
        best_thresh = 0.
        best_delta = 0
        best_clusters = None
        all_scores = {}
        if (type(threshold_range) != float):
            for threshold in np.linspace(0.65, 1.0, threshold_range)[1:-1]:
                clust_idxs = c.cluster(threshold, linktype=link_function)
                clusters = np.zeros(len(x))
                for i,cluster in enumerate(clust_idxs):
                    clusters[cluster] = i

                # recall,precision,f1
                rpf1 = bcubed(y, clusters)
                all_scores[threshold] = rpf1

                # delta filtering for each threshold, sloooowww
                print "delta filter for threshold",threshold
                print "result with no delta",rpf1
                if delta_filter: 
                    for delta in np.linspace(0, 1, 101):
                        _, new_clusters = self.delta_filter(clusters, delta)
                        drpf1 = bcubed(y, new_clusters)
                        print delta, drpf1
                        if drpf1[2] > best_score[2]:
                            best_score = drpf1
                            best_thresh = threshold
                            best_delta = delta
                    print "best delta & thresh so far",best_delta,best_thresh
                    print "best result so far",best_score

        else:
            if not lemma_init:
                clust_idxs = c.cluster(threshold_range, linktype=link_function)
                clusters = np.zeros(len(x))
                for i,cluster in enumerate(clust_idxs):
                    clusters[cluster] = i
                best_score = bcubed(y, clusters)
                best_clusters = clusters

            # use lemma initialization and tune to a certain value for delta!
            if lemma_init:
                print('Doing lemma initialization tests!')
                best_score = 0
                best_params = None
                for delta in np.linspace(0.5, 1, 11):
                    tmp, lemma_preds = lemma_predictor.predict(build_test_comparison=False, delta=delta)
                    for thresh in np.linspace(0.6, 1, 21):
                        for mkt in np.linspace(0,0,1):#0.5, 1, 26): # optimize min keep thresh
                            clust_idxs = c.cluster(thresh, linktype=link_function, preset_predictions=lemma_preds, minimum_keeping_threshold=mkt)
                            clusters = np.zeros(len(x))
                            for i,cluster in enumerate(clust_idxs):
                                clusters[cluster] = i

                            # recall,precision,f1
                            score = bcubed(y, clusters)
                            print delta, thresh, mkt, score
                            if score[2] > best_score:
                                best_score = score[2]
                                best_params = (thresh, delta, mkt,)
                
                print 'Best score and best params:'
                print best_score, ': with - d=%0.2f, t=%0.2f'%(best_params[1], best_params[0])
                print 'with minimum keeping threshold: %0.2f'%best_params[2]
                best_thresh = best_params # return best_thresh as a tuple

            # delta filtering       
            if delta_filter:
                new_best_score = best_score
                best_delta = 0
                for delta in np.linspace(0, 1, 101):
                    _, new_clusters = self.delta_filter(best_clusters, delta)
                    rpf1 = bcubed(y, new_clusters)
                    print delta,rpf1
                    if rpf1[2] > new_best_score[2]:
                        new_best_score = rpf1
                        best_delta = delta

                print "BEST DELTA ",best_delta
                print "NEW BEST SCORE ",new_best_score


        # print "OLD BEST SCORE ",best_score
        return best_score, best_thresh, all_scores
Ejemplo n.º 4
0
    #                               cd.get_all_tokens(topics=helpers.TEST),
    #                               topics=helpers.TEST,
    #                               events_only=True,
    #                               data_set='test',
    #                               with_topics=False)
    # predicted = pred.predict()
    # pred.save_predictions_mention_based(predicted)
    cd = load(events_only=False)
    gold = None
    for met in ['cosine']:
        for thresh in [0.7]:
            pred = ClusteringPredictor(cd.gold_mention_clusters,
                                       cd.get_all_tokens(topics=helpers.VAL),
                                       events_only=True,
                                       data_set='val',
                                       topics=helpers.VAL)
            print 'Predicting...'
            g, predicted = pred.predict(threshold=thresh,
                                        metric=met,
                                        link_function='single',
                                        build_test_comparison=gold is None,
                                        split_into_topics=False)
            if gold is None:
                gold = g

            print thresh, ':', bcubed(gold, predicted)

            # pred.save_predictions_mention_based(predicted, gold_list=gold)

    print
Ejemplo n.º 5
0
    # cd = load(data_set='all')

    # pred = BaselineLemmaPredictor(cd.gold_mention_clusters,
    #                               cd.get_all_tokens(topics=helpers.TEST),
    #                               topics=helpers.TEST,
    #                               events_only=True,
    #                               data_set='test',
    #                               with_topics=False)
    # predicted = pred.predict()
    # pred.save_predictions_mention_based(predicted)
    cd = load(events_only=False)
    gold = None
    for met in ['cosine']:
        for thresh in [0.7]:
            pred = ClusteringPredictor(cd.gold_mention_clusters, cd.get_all_tokens(topics=helpers.VAL), events_only=True, data_set='val', topics=helpers.VAL)
            print 'Predicting...'
            g, predicted = pred.predict(threshold=thresh,
                                        metric=met,
                                        link_function='single',
                                        build_test_comparison=gold is None,
                                        split_into_topics=False
                                        )
            if gold is None:
                gold = g

            print thresh, ':', bcubed(gold, predicted)

            # pred.save_predictions_mention_based(predicted, gold_list=gold)

    print
Ejemplo n.º 6
0
    test_set = helpers.VAL
    split_into_topics = False

    train_pred, x_train, y_train, train_mentions, val_pred, x_val, y_val, val_mentions = \
        initialize_data_sets(events_only, test_set, split_into_topics,
                             cluster_singletons=PUT_SINGLETONS, remove_train_singletons=REMOVE_TRAIN_SINGLETONS)

    if LEMMA_PRED:
        lpred = get_lemma_predictor(helpers.VAL) # start with validation one
        test_comp = None
        best_score = 0
        best_delta = 0
        for delta in np.linspace(0,1,101):
            tmp, preds = lpred.predict(build_test_comparison=test_comp is None, delta=delta)
            test_comp = tmp if test_comp is None else test_comp
            score = bcubed(test_comp, preds)[2] # returns r,p,f1
            if score > best_score:
                best_score = score
                best_delta = delta
            print 'Delta %0.2f gets us %0.5f accuracy!'%(delta, score)

        exit(0)
        # probably can be optimized, but just need tfidf to be built

        initialize_data_sets(events_only, helpers.TEST, split_into_topics,
                             cluster_singletons=PUT_SINGLETONS, remove_train_singletons=REMOVE_TRAIN_SINGLETONS)

        ltestpred = get_lemma_predictor(helpers.TEST)
        gold, preds = ltestpred.predict(delta=best_delta)
        analyze_singleton_accuracy(gold, preds)
        ltestpred.save_predictions_mention_based(preds, 'HEAD_LEMMA_DELTA')
Ejemplo n.º 7
0
def analyze_singleton_accuracy(gold, predictions):
    assert len(gold) == len(predictions)
    ### comparing singleton detection ###
    gold_sing_idxs = set()
    pred_sing_idxs = set()
    for lst in [
            gold,
            predictions,
    ]:
        sing_idxs = set()
        nonsing_idxs = set()
        for i in xrange(len(lst)):
            for j in xrange(len(lst)):
                if i != j:
                    if lst[i] == lst[j]:  # not singleton
                        nonsing_idxs.add(i)
                        nonsing_idxs.add(j)
            if i not in nonsing_idxs:
                sing_idxs.add(i)

        if len(gold_sing_idxs) == 0:
            gold_sing_idxs = sing_idxs.copy()
        else:
            pred_sing_idxs = sing_idxs.copy()

    print "PERCENT OF SINGLETONS: %0.3f" % (len(gold_sing_idxs) /
                                            float(len(gold)))

    gold_sing = [1 if i in gold_sing_idxs else 0 for i in xrange(len(gold))]
    pred_sing = [
        1 if i in pred_sing_idxs else 0 for i in xrange(len(predictions))
    ]

    print 'Accuracy with respect to the correct identification of mentions as singletons:'
    print 'R: %0.4f,   P: %0.4f,   F1: %0.4f' % (recall_score(
        gold_sing, pred_sing), precision_score(
            gold_sing, pred_sing), f1_score(gold_sing, pred_sing))

    gold_non_sing = [
        0 if i in gold_sing_idxs else 1 for i in xrange(len(gold))
    ]
    pred_non_sing = [
        0 if i in pred_sing_idxs else 1 for i in xrange(len(predictions))
    ]

    print 'Accuracy with respect to the correct identification of mentions as NOT being singletons:'
    print 'R: %0.4f,   P: %0.4f,   F1: %0.4f' % (
        recall_score(gold_non_sing, pred_non_sing),
        precision_score(gold_non_sing,
                        pred_non_sing), f1_score(gold_non_sing, pred_non_sing))

    # make coref chains without singletons
    non_sing_gold = []
    non_sing_pred = []
    for i, val in enumerate(gold_non_sing):
        if val == 1:
            non_sing_gold.append(gold[i])
            non_sing_pred.append(predictions[i])
    results = bcubed(non_sing_gold, non_sing_pred)
    print 'B3 results obtained after removing all GOLD singletons:'
    print results
Ejemplo n.º 8
0
    split_into_topics = False

    train_pred, x_train, y_train, train_mentions, val_pred, x_val, y_val, val_mentions = \
        initialize_data_sets(events_only, test_set, split_into_topics,
                             cluster_singletons=PUT_SINGLETONS, remove_train_singletons=REMOVE_TRAIN_SINGLETONS)

    if LEMMA_PRED:
        lpred = get_lemma_predictor(helpers.VAL)  # start with validation one
        test_comp = None
        best_score = 0
        best_delta = 0
        for delta in np.linspace(0, 1, 101):
            tmp, preds = lpred.predict(build_test_comparison=test_comp is None,
                                       delta=delta)
            test_comp = tmp if test_comp is None else test_comp
            score = bcubed(test_comp, preds)[2]  # returns r,p,f1
            if score > best_score:
                best_score = score
                best_delta = delta
            print 'Delta %0.2f gets us %0.5f accuracy!' % (delta, score)

        exit(0)
        # probably can be optimized, but just need tfidf to be built

        initialize_data_sets(events_only,
                             helpers.TEST,
                             split_into_topics,
                             cluster_singletons=PUT_SINGLETONS,
                             remove_train_singletons=REMOVE_TRAIN_SINGLETONS)

        ltestpred = get_lemma_predictor(helpers.TEST)