Esempio n. 1
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("--embedding_cache")
    parser.add_argument("--print_statistics", action='store_true')
    args = parser.parse_args()

    embeddings = get_token_embeddings("", [], [], cache_name=args.embedding_cache)
    avg_embeddings = embeddings.make_average()

    # Table 5
    entity_to_sent_score, entity_to_sent_count, entity_to_values = most_positive_entities(embeddings, avg_embeddings, subject_header="Perspective(ws)", object_header="Perspective(wo)")
    print_top100(entity_to_sent_count, entity_to_sent_score, print_header="Sentiment")

    # # Table 6
    entity_to_power_score, entity_to_power_count, entity_to_power_values = entity_power_agency(embeddings, avg_embeddings, power=True, filter_set=None, article_filter_set=None, entity_map=None)
    print_top100(entity_to_power_count, entity_to_power_score, print_header="Power")

    # # Table 7
    entity_to_agency_score, entity_to_agency_count, entity_to_agency_values = entity_power_agency(embeddings, avg_embeddings, power=False, filter_set=None, article_filter_set=None, entity_map=None)
    print_top100(entity_to_agency_count, entity_to_agency_score, print_header="Agency")

    print("######################################  Figures 2 and 3 #####################################################")
    target_entities = ["Donald Trump", "Hillary Clinton", "Al Franken", "Roy Moore", "Rose McGowan", "Leeann Tweeden", "Harvey Weinstein", "Bill Cosby"]
    for e in target_entities:
        print(e, entity_to_sent_score[e], entity_to_power_score[e])
    if args.print_statistics:
        print_statistics(entity_to_values, target_entities, "Sentiment")
        print_statistics(entity_to_power_values, target_entities, "Power")

    # Figure 4
    print("######################################  Outlet Franken Moore (Figure 4) #####################################################")
    franken_to_score, franken_to_values = most_positive_entities(embeddings, avg_embeddings, subject_header="Perspective(ws)", object_header="Perspective(wo)", by_outlet=True, filter_set=['Al Franken'])
    moore_to_score, moore_to_values  = most_positive_entities(embeddings, avg_embeddings, subject_header="Perspective(ws)", object_header="Perspective(wo)", by_outlet=True, filter_set=['Roy Moore'])

    print("Al Franken", "Roy Moore")
    outlet_key_to_values = {}
    for o in franken_to_score:
        if o in moore_to_score:
            print(o, franken_to_score[o], moore_to_score[o])
            outlet_key_to_values[("Franken", o)] = franken_to_values[o]
            outlet_key_to_values[("Moore", o)] = moore_to_values[o]
    if args.print_statistics:
        print_statistics(outlet_key_to_values, list(outlet_key_to_values.keys()), "Sentiment")

    aziz_analysis(embeddings, avg_embeddings, args.print_statistics)
    return
Esempio n. 2
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("--cache")
    parser.add_argument("--from_scratch", action='store_true')
    args = parser.parse_args()

    # other parameters don't matter when we read from the cache
    embeddings = get_token_embeddings("", [],
                                      weights=[0, 1, 0],
                                      cache_name=args.cache)
    embeddings.normalize()
    avg_embeddings = embeddings.make_average()

    if args.from_scratch:
        run_connotation_frames(embeddings, avg_embeddings)
    else:
        paper_runs(embeddings, avg_embeddings)
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("--embedding_cache")
    parser.add_argument("--score_cache",
                        help="optionally cache scores for faster running")
    args = parser.parse_args()

    ARTICLE_COUNTS = [5, 10, 15, 20, 25, 30]
    AGREE_ONLY = False

    embeddings = get_token_embeddings("", [], [],
                                      cache_name=args.embedding_cache)
    embeddings.normalize()
    avg_embeddings = embeddings.make_average()

    _, keys, _, _ = get_annotations(restrict_match=True)
    names = set([key[1] for key in keys])

    AZIZ_MAP = {}
    for line in open(cfg.AZIZ_ENTITIES).readlines():
        parts = line.split(",")
        for p in parts:
            AZIZ_MAP[p.strip()] = parts[0].strip()
    for n in names:
        AZIZ_MAP[n] = n

    if args.score_cache and os.path.exists(args.score_cache):
        entity_to_our_score = pickle.load(open(args.score_cache, "rb"))
    else:
        entity_to_our_score = {}
        for c in ARTICLE_COUNTS:
            article_ids, qq = get_articles(c)
            print(c, article_ids, qq)
            entity_to_score, entity_to_count, _ = entity_power_agency(
                embeddings,
                avg_embeddings,
                power=True,
                filter_set=None,
                article_filter_set=article_ids,
                entity_map=AZIZ_MAP)
            for entity, score in entity_to_score.items():
                if entity_to_count[entity] >= MIN_MENTIONS:
                    entity_to_our_score[(c, entity)] = score
        if args.score_cache:
            pickle.dump(entity_to_our_score, open(args.score_cache, "wb"))

    # Gather the scores
    freq_scores = score_ents_by_frequency(embeddings, AZIZ_MAP)
    raw_scores = score_ents_raw_frames(embeddings, AZIZ_MAP)

    # We ultiamtely want evaluation over the same set of pairs, but these metrics
    # work over different pairs sets. Run the evaluation once over everyone to get
    # the pairs. Then run actual evaluations only over pairs we have all scores for

    auto_pairs = pairwise_compare(entity_to_our_score,
                                  ARTICLE_COUNTS,
                                  agree_only=AGREE_ONLY,
                                  verbose=False)
    freq_pairs = pairwise_compare(freq_scores,
                                  ARTICLE_COUNTS,
                                  agree_only=AGREE_ONLY,
                                  verbose=False)
    raw_pairs = pairwise_compare(raw_scores,
                                 ARTICLE_COUNTS,
                                 agree_only=AGREE_ONLY,
                                 verbose=False)
    merged_pairs = auto_pairs.intersection(freq_pairs, raw_pairs)

    # Now run the actual evaluations
    print("COMPLETE AUTOMATED")
    pairwise_compare(entity_to_our_score,
                     ARTICLE_COUNTS,
                     agree_only=AGREE_ONLY,
                     pairs_to_keep=merged_pairs,
                     verbose=True)
    print("FREQ SCORES")
    pairwise_compare(freq_scores,
                     ARTICLE_COUNTS,
                     agree_only=AGREE_ONLY,
                     pairs_to_keep=merged_pairs,
                     verbose=True)
    print("RAW SCORES")
    pairwise_compare(raw_scores,
                     ARTICLE_COUNTS,
                     agree_only=AGREE_ONLY,
                     pairs_to_keep=merged_pairs,
                     verbose=True)
    print("ANNOTATOR SCORES")
    gold_pairwise_compare(ARTICLE_COUNTS, pairs_to_keep=merged_pairs)
    print(
        "#######################################################################################"
    )

    # Run evaluations over all the pairs for the automated approach that we can
    # Compare that to the frequency mentions, since we should have frequency scores for everybody
    merged_pairs = auto_pairs.intersection(freq_pairs)
    print("COMPLETE AUTOMATED")
    pairwise_compare(entity_to_our_score,
                     ARTICLE_COUNTS,
                     agree_only=AGREE_ONLY,
                     pairs_to_keep=merged_pairs,
                     verbose=True)
    print("FREQ SCORES")
    pairwise_compare(freq_scores,
                     ARTICLE_COUNTS,
                     agree_only=AGREE_ONLY,
                     pairs_to_keep=merged_pairs,
                     verbose=True)

    # Now run the hand annotations. We only annotated the first 10 articles
    # This is independent of others
    print(
        "##################### HAND ANNOTATIONS ####################################"
    )
    key_to_embeds, key_to_signs, m = process_idx_files(cfg.AZIZ_HAND_PARSED)
    hand_to_score = score_keyed_embeddings(key_to_embeds, key_to_signs, m,
                                           avg_embeddings)
    hand_pairs = pairwise_compare(hand_to_score, [5, 10],
                                  agree_only=False,
                                  verbose=True)
    print("COMPLETE AUTOMATED")
    pairwise_compare(entity_to_our_score, [5, 10],
                     agree_only=AGREE_ONLY,
                     pairs_to_keep=hand_pairs,
                     verbose=True)
    print("FREQ SCORES")
    pairwise_compare(freq_scores, [5, 10],
                     agree_only=AGREE_ONLY,
                     pairs_to_keep=hand_pairs,
                     verbose=True)
    print("ANNOTATOR SCORES")
    gold_pairwise_compare(ARTICLE_COUNTS, pairs_to_keep=hand_pairs)

    print(
        "###################################### CORRELATIONS #############################"
    )
    compare_corr(entity_to_our_score)
    print(
        "###################################### Inter-annotator correlations #############"
    )
    get_annotations(restrict_match=True, verbose=True)