def main(): parser = argparse.ArgumentParser() parser.add_argument("--embedding_cache") parser.add_argument("--print_statistics", action='store_true') args = parser.parse_args() embeddings = get_token_embeddings("", [], [], cache_name=args.embedding_cache) avg_embeddings = embeddings.make_average() # Table 5 entity_to_sent_score, entity_to_sent_count, entity_to_values = most_positive_entities(embeddings, avg_embeddings, subject_header="Perspective(ws)", object_header="Perspective(wo)") print_top100(entity_to_sent_count, entity_to_sent_score, print_header="Sentiment") # # Table 6 entity_to_power_score, entity_to_power_count, entity_to_power_values = entity_power_agency(embeddings, avg_embeddings, power=True, filter_set=None, article_filter_set=None, entity_map=None) print_top100(entity_to_power_count, entity_to_power_score, print_header="Power") # # Table 7 entity_to_agency_score, entity_to_agency_count, entity_to_agency_values = entity_power_agency(embeddings, avg_embeddings, power=False, filter_set=None, article_filter_set=None, entity_map=None) print_top100(entity_to_agency_count, entity_to_agency_score, print_header="Agency") print("###################################### Figures 2 and 3 #####################################################") target_entities = ["Donald Trump", "Hillary Clinton", "Al Franken", "Roy Moore", "Rose McGowan", "Leeann Tweeden", "Harvey Weinstein", "Bill Cosby"] for e in target_entities: print(e, entity_to_sent_score[e], entity_to_power_score[e]) if args.print_statistics: print_statistics(entity_to_values, target_entities, "Sentiment") print_statistics(entity_to_power_values, target_entities, "Power") # Figure 4 print("###################################### Outlet Franken Moore (Figure 4) #####################################################") franken_to_score, franken_to_values = most_positive_entities(embeddings, avg_embeddings, subject_header="Perspective(ws)", object_header="Perspective(wo)", by_outlet=True, filter_set=['Al Franken']) moore_to_score, moore_to_values = most_positive_entities(embeddings, avg_embeddings, subject_header="Perspective(ws)", object_header="Perspective(wo)", by_outlet=True, filter_set=['Roy Moore']) print("Al Franken", "Roy Moore") outlet_key_to_values = {} for o in franken_to_score: if o in moore_to_score: print(o, franken_to_score[o], moore_to_score[o]) outlet_key_to_values[("Franken", o)] = franken_to_values[o] outlet_key_to_values[("Moore", o)] = moore_to_values[o] if args.print_statistics: print_statistics(outlet_key_to_values, list(outlet_key_to_values.keys()), "Sentiment") aziz_analysis(embeddings, avg_embeddings, args.print_statistics) return
def main(): parser = argparse.ArgumentParser() parser.add_argument("--cache") parser.add_argument("--from_scratch", action='store_true') args = parser.parse_args() # other parameters don't matter when we read from the cache embeddings = get_token_embeddings("", [], weights=[0, 1, 0], cache_name=args.cache) embeddings.normalize() avg_embeddings = embeddings.make_average() if args.from_scratch: run_connotation_frames(embeddings, avg_embeddings) else: paper_runs(embeddings, avg_embeddings)
def main(): parser = argparse.ArgumentParser() parser.add_argument("--embedding_cache") parser.add_argument("--score_cache", help="optionally cache scores for faster running") args = parser.parse_args() ARTICLE_COUNTS = [5, 10, 15, 20, 25, 30] AGREE_ONLY = False embeddings = get_token_embeddings("", [], [], cache_name=args.embedding_cache) embeddings.normalize() avg_embeddings = embeddings.make_average() _, keys, _, _ = get_annotations(restrict_match=True) names = set([key[1] for key in keys]) AZIZ_MAP = {} for line in open(cfg.AZIZ_ENTITIES).readlines(): parts = line.split(",") for p in parts: AZIZ_MAP[p.strip()] = parts[0].strip() for n in names: AZIZ_MAP[n] = n if args.score_cache and os.path.exists(args.score_cache): entity_to_our_score = pickle.load(open(args.score_cache, "rb")) else: entity_to_our_score = {} for c in ARTICLE_COUNTS: article_ids, qq = get_articles(c) print(c, article_ids, qq) entity_to_score, entity_to_count, _ = entity_power_agency( embeddings, avg_embeddings, power=True, filter_set=None, article_filter_set=article_ids, entity_map=AZIZ_MAP) for entity, score in entity_to_score.items(): if entity_to_count[entity] >= MIN_MENTIONS: entity_to_our_score[(c, entity)] = score if args.score_cache: pickle.dump(entity_to_our_score, open(args.score_cache, "wb")) # Gather the scores freq_scores = score_ents_by_frequency(embeddings, AZIZ_MAP) raw_scores = score_ents_raw_frames(embeddings, AZIZ_MAP) # We ultiamtely want evaluation over the same set of pairs, but these metrics # work over different pairs sets. Run the evaluation once over everyone to get # the pairs. Then run actual evaluations only over pairs we have all scores for auto_pairs = pairwise_compare(entity_to_our_score, ARTICLE_COUNTS, agree_only=AGREE_ONLY, verbose=False) freq_pairs = pairwise_compare(freq_scores, ARTICLE_COUNTS, agree_only=AGREE_ONLY, verbose=False) raw_pairs = pairwise_compare(raw_scores, ARTICLE_COUNTS, agree_only=AGREE_ONLY, verbose=False) merged_pairs = auto_pairs.intersection(freq_pairs, raw_pairs) # Now run the actual evaluations print("COMPLETE AUTOMATED") pairwise_compare(entity_to_our_score, ARTICLE_COUNTS, agree_only=AGREE_ONLY, pairs_to_keep=merged_pairs, verbose=True) print("FREQ SCORES") pairwise_compare(freq_scores, ARTICLE_COUNTS, agree_only=AGREE_ONLY, pairs_to_keep=merged_pairs, verbose=True) print("RAW SCORES") pairwise_compare(raw_scores, ARTICLE_COUNTS, agree_only=AGREE_ONLY, pairs_to_keep=merged_pairs, verbose=True) print("ANNOTATOR SCORES") gold_pairwise_compare(ARTICLE_COUNTS, pairs_to_keep=merged_pairs) print( "#######################################################################################" ) # Run evaluations over all the pairs for the automated approach that we can # Compare that to the frequency mentions, since we should have frequency scores for everybody merged_pairs = auto_pairs.intersection(freq_pairs) print("COMPLETE AUTOMATED") pairwise_compare(entity_to_our_score, ARTICLE_COUNTS, agree_only=AGREE_ONLY, pairs_to_keep=merged_pairs, verbose=True) print("FREQ SCORES") pairwise_compare(freq_scores, ARTICLE_COUNTS, agree_only=AGREE_ONLY, pairs_to_keep=merged_pairs, verbose=True) # Now run the hand annotations. We only annotated the first 10 articles # This is independent of others print( "##################### HAND ANNOTATIONS ####################################" ) key_to_embeds, key_to_signs, m = process_idx_files(cfg.AZIZ_HAND_PARSED) hand_to_score = score_keyed_embeddings(key_to_embeds, key_to_signs, m, avg_embeddings) hand_pairs = pairwise_compare(hand_to_score, [5, 10], agree_only=False, verbose=True) print("COMPLETE AUTOMATED") pairwise_compare(entity_to_our_score, [5, 10], agree_only=AGREE_ONLY, pairs_to_keep=hand_pairs, verbose=True) print("FREQ SCORES") pairwise_compare(freq_scores, [5, 10], agree_only=AGREE_ONLY, pairs_to_keep=hand_pairs, verbose=True) print("ANNOTATOR SCORES") gold_pairwise_compare(ARTICLE_COUNTS, pairs_to_keep=hand_pairs) print( "###################################### CORRELATIONS #############################" ) compare_corr(entity_to_our_score) print( "###################################### Inter-annotator correlations #############" ) get_annotations(restrict_match=True, verbose=True)