def main(): gold_tl_fname = sys.argv[1] sys_tl_fname = sys.argv[2] with open(gold_tl_fname, errors="ignore") as f: gold_tl = Timeline.from_file(f) with open(sys_tl_fname, errors="ignore") as f: sys_tl = Timeline.from_file(f) all_dates = sorted(set(gold_tl.get_dates()).union(set(sys_tl.get_dates()))) for date in all_dates: print(date) if date in gold_tl.dates_to_summaries: gold_sum = "\n".join(gold_tl.dates_to_summaries[date]) else: gold_sum = "-----" if date in sys_tl.dates_to_summaries: sys_sum = "\n".join(sys_tl.dates_to_summaries[date]) else: sys_sum = "-----" print_vertical_split(gold_sum, sys_sum) print()
def generate_question_html(sys_tl_dir, tl_questions): for (tl_topic, tl_name), questions in tl_questions.items(): sys_tl_path = os.path.join(sys_tl_dir, tl_topic, tl_name) with open(sys_tl_path, errors="ignore") as f_tl: tl = Timeline.from_file(f_tl) yield tl_topic, tl_name, generate_template(tl, questions)
def read_gold_tl_dir(gold_dir): timelines = {} for tl_fname in os.listdir(gold_dir): tl_path = os.path.join(gold_dir, tl_fname) with open(tl_path, errors="ignore") as f: timelines[tl_fname] = Timeline.from_file(f) return timelines
def load_all_gold_timelines(): all_timelines = {} gold_tl_dir = "gold-timelines" for topic_gold_tl_dir in iter_dirs(gold_tl_dir): for gold_tl_fname in iter_files(topic_gold_tl_dir, ".txt"): with open(gold_tl_fname, errors="ignore") as f: tl = Timeline.from_file(f) all_timelines[os.path.basename(topic_gold_tl_dir), os.path.basename(gold_tl_fname)] = tl return all_timelines
def compute_reliability(date_scores): tl_base_path = Path("./gold-timelines") all_annotators = sorted(date_scores) all_topics = set() for scores in date_scores.values(): for topic, tl_name in scores: all_topics.add((topic, tl_name)) for topic, tl_name in all_topics: with open(tl_base_path / topic / (tl_name + ".txt"), errors="ignore") as f: tl = Timeline.from_file(f) score_matrix = np.zeros((len(date_scores), len(tl.get_dates()))) all_dates = sorted(tl.get_dates()) for annotator_idx, annotator in enumerate(all_annotators): annotator_tl_scores = date_scores[annotator][(topic, tl_name)] sorted_dates = sorted( all_dates, key=lambda date: annotator_tl_scores.get(date, 0), reverse=True) new_annotator_tl_scores = {} curr_idx = 0 prev_score = None for date in sorted_dates: score = annotator_tl_scores.get(date, 0) if prev_score is None or prev_score != score: curr_idx += 1 prev_score = score new_annotator_tl_scores[date] = curr_idx for date_idx, date in enumerate(all_dates): score_matrix[annotator_idx, date_idx] = new_annotator_tl_scores.get( date, 0) print(topic, tl_name, k.alpha(score_matrix, level_of_measurement="ordinal")) for annotator_1_idx, annotator_2_idx in it.combinations( range(len(all_annotators)), 2): annotator_1 = ANNOTATORS[all_annotators[annotator_1_idx]] annotator_2 = ANNOTATORS[all_annotators[annotator_2_idx]] annotator_rows = score_matrix[[annotator_1_idx, annotator_2_idx]] print(annotator_1, annotator_2, k.alpha(annotator_rows, level_of_measurement="interval"))
def main(): with open(sys.argv[2], "w") as f_out: for topic_dirname in os.listdir(sys.argv[1]): for tl_fname in os.listdir(os.path.join(sys.argv[1], topic_dirname)): with open(os.path.join(sys.argv[1], topic_dirname, tl_fname), errors="ignore") as f_tl: tl = Timeline.from_file(f_tl) f_out.write("== {} {}\n".format(topic_dirname, tl_fname)) for date in sorted(tl.get_dates()): f_out.write(str(date)) f_out.write("\t\t\n")
def main(): all_timelines = [] gold_tl_dir = "gold-timelines" for topic_gold_tl_dir in iter_dirs(gold_tl_dir): for gold_tl_fname in iter_files(topic_gold_tl_dir, ".txt"): with open(gold_tl_fname, errors="ignore") as f: tl = Timeline.from_file(f) print(topic_gold_tl_dir, (gold_tl_fname)) if (os.path.split(topic_gold_tl_dir)[-1], os.path.split(gold_tl_fname)[-1]) in [ ("tl17-bpoil", "bbc.txt"), ("crisis-syria", "bbc.txt"), ("tl17-mj", "bbc.txt"), ("crisis-libya", "xinhua.txt") ]: all_timelines.append(tl) print(len(tl)) print("Original TL-Count", len(all_timelines)) all_timelines = list(filter(lambda i: len(i) <= 50, all_timelines)) print("New TL-Count", len(all_timelines)) tl_tuple_counts = [len(tl.get_dates()) * 2 for tl in all_timelines] print(sum(tl_tuple_counts)) price_per_tuple = STUDENT_PRICE_PER_TUPLE num_annotations = 3 tl_words = [ tokenize(" ".join(summary_sents)) for tl in all_timelines for summary_sents in tl.dates_to_summaries.values() ] tl_reading_times = [len(w) / 200 for w in tl_words] print("Reading time", sum(tl_reading_times)) print( "Reading cost", sum(tl_reading_times) * STUDENT_PRICE_PER_READING_MINUTE * num_annotations) tl_tuple_price = sum(tl_tuple_counts) * price_per_tuple * num_annotations print("Number of Tuples", sum(tl_tuple_counts)) print("Annotation Time", sum(tl_tuple_counts) / 1.5) print("Annotation Cost", tl_tuple_price)
def main(): parser = argparse.ArgumentParser() parser.add_argument("timeline") parser.add_argument("replay_file") parser.add_argument("--replay", action="store_true", default=False) args = parser.parse_args() with open(args.timeline, errors="ignore") as f: tl = Timeline.from_file(f) comparisons = play_comparison_game(tl) all_comparisons = [] if not args.replay: with open(args.replay_file, "w") as f_out: for date_1, date_2, result in comparisons: f_out.write("{}\t{}\t{}\n".format(date_1, date_2, result)) all_comparisons.append((date_1, date_2, result)) else: with open(args.replay_file) as f: for line in f: date_1, date_2, result = line.split() date_1 = datetime.date(*map(int, date_1.split("-"))) date_2 = datetime.date(*map(int, date_2.split("-"))) result = int(result) all_comparisons.append((date_1, date_2, result)) scores = {key: 0 for key in tl.get_dates()} for date_1, date_2, result in all_comparisons: if result == 0: scores[date_1] += 0.5 scores[date_2] += 0.5 elif result == -1: scores[date_1] += 1 elif result == 1: scores[date_2] += 1 for date, score in sorted(scores.items(), key=lambda x: x[1], reverse=True): print(date, "\n", "\n".join(tl.dates_to_summaries[date])) print()
def retrieve_hit_ranking(hit_task_name): client = MTurkClient(hit_task_name) hit_ids = client.list_reviewable_hit_ids_and_annotations() date_scores = defaultdict(Counter) for hit_id, info in hit_ids: results = client.load_answers_for_hit(hit_id) task_name, hit_info = info.split(":") tl_topic, tl_name = hit_info.split("__") for assignment in results: for worker_id, assignment_answer in assignment: for key, answer in assignment_answer.items(): _, row_idx, answer_type = key.split("-") date_scores[tl_topic, tl_name][str_to_tl_date( answer)] += 1 if answer_type == "best" else -1 if "K" not in hit_task_name: print(answer, answer_type) return date_scores tl_base_path = Path("./gold-timelines") timelines_and_scores = {} for (topic, tl_name), results in date_scores.items(): with open(tl_base_path / topic / (tl_name + ".txt"), errors="ignore") as f: tl = Timeline.from_file(f) timelines_and_scores[topic, tl_name] = (results, tl) #summaries = sorted(tl.dates_to_summaries.items(), key=lambda it: results[it[0]], reverse=True) #if (info_path / hit_id).is_file(): # pass return timelines_and_scores
def compute_stats_for_tl(tlfname): num_splitted_sents = 0 root_counts = [] with open(tlfname, errors="ignore") as f: timeline = Timeline.from_file(f) num_sents = 0 for date in timeline: for sent in timeline[date]: num_sents += 1 sents = list(nlp(sent).sents) if len(sents) > 1: num_splitted_sents += 1 for sent in sents: num_roots = 0 for tok in sent: if tok.head == tok: num_roots += 1 root_counts.append(num_roots) return num_splitted_sents, root_counts, num_sents
def cross_eval_main(): parser = argparse.ArgumentParser() parser.add_argument("corpus_def") parser.add_argument("config") parser.add_argument("param_file") args = parser.parse_args() corpora_and_timelines = [] with open(args.corpus_def) as f: corpus_defs = json.load(f) for corpus_def in corpus_defs["corpora"]: timeline_dir = corpus_def["tl_dir"] corpus_pickle = corpus_def["corpus_pkl"] corpus = load_corpus(corpus_pickle) timelines = [] for tl_fname in iter_files(timeline_dir, ".txt"): with open(tl_fname, encoding="latin-1") as f: timeline = Timeline.from_file(f) timelines.append((os.path.basename(tl_fname), timeline)) corpora_and_timelines.append((corpus, timelines)) with open(args.config) as f: config = json.load(f) tl_gen = GloballyClusteredSentenceCompressionTimelineGenerator(config) parameters = tl_gen.run_scoring_cv_train_mode(corpora_and_timelines) with open(args.param_file, "wb") as f_out: pickle.dump(parameters, f_out)
def evaluate_tl_main(): parser = argparse.ArgumentParser() parser.add_argument("-f", dest="filter_corpus", default=False, action="store_true") parser.add_argument("-c", dest="constraint", default="sent") parser.add_argument("-t", dest="timelines", nargs="+") parser.add_argument("-m", dest="num_multi_selection_runs", type=int, default=None) parser.add_argument("--queryfile") parser.add_argument("corpus_pickle") parser.add_argument("config") args = parser.parse_args() if args.constraint == "sent": use_token_count = False elif args.constraint == "tok": use_token_count = True else: raise ValueError("Unknown constraint {}".format(args.constraint)) corpus = load_corpus(args.corpus_pickle, filter_blacklist=args.filter_corpus) timelines = [] for tl_fname in args.timelines: with open(tl_fname, errors="ignore") as f: timeline = Timeline.from_file(f) timelines.append((os.path.basename(tl_fname), timeline)) #tl_gen = APClusteringTimelineGenerator(True) with open(args.config) as f: config = json.load(f) tl_gen = GloballyClusteredSentenceCompressionTimelineGenerator(config) corpus_basename = os.path.basename(corpus.name).split(".")[0] print(corpus_basename) config_basename = os.path.basename(args.config) results_basename = config_basename if args.queryfile: results_basename += "+queryfilter" out_timelines_dir = os.path.join("system_timelines", results_basename + "+" + args.constraint, corpus_basename) results_dir = os.path.join("evaluation_results", results_basename + "+" + args.constraint) if not os.path.isdir(out_timelines_dir): os.makedirs(out_timelines_dir) if not os.path.isdir(results_dir): os.makedirs(results_dir) query_words = None if args.queryfile is not None: with open(args.queryfile) as f: query_words = [l.strip() for l in f] debug_identifier = results_basename + "+" + corpus_basename if use_token_count: config["scoring"]["use_length"] = True if args.num_multi_selection_runs is None: sys_timelines = tl_gen.generate_timelines( corpus, [ determine_tl_parameters(tl, use_token_count=use_token_count) for _, tl in timelines ], reference_timelines=list(map(lambda x: x[1], timelines)), query_words=query_words, debug_identifier=debug_identifier) write_results_file(os.path.join(results_dir, corpus_basename + ".txt"), out_timelines_dir, timelines, sys_timelines) else: with open("multirun-results+{}.txt".format(config_basename), "a") as f_out: print(timelines) evaluator = rouge.TimelineRougeEvaluator( measures=["rouge_1", "rouge_2"]) all_run_timelines = tl_gen.generate_timelines( corpus, [ determine_tl_parameters(tl, use_token_count=use_token_count) for _, tl in timelines ], reference_timelines=list(map(lambda x: x[1], timelines)), query_words=query_words, debug_identifier=debug_identifier, num_selection_runs=args.num_multi_selection_runs) for sys_timelines in all_run_timelines: for (timeline_name, gold_timeline), sys_timeline in zip( timelines, sys_timelines): reference_timeline = GroundTruth([gold_timeline]) eval_results = evaluator.evaluate_concat( "TL", sys_timeline, reference_timeline) eval_results_agree = evaluator.evaluate_agreement( "TL", sys_timeline, reference_timeline) eval_results_align = evaluator.evaluate_align_date_content_costs_many_to_one( "TL", sys_timeline, reference_timeline) f_out.write(" ".join( map(str, [ eval_results["rouge_1"]["f_score"], eval_results["rouge_2"]["f_score"], eval_results_agree["rouge_1"]["f_score"], eval_results_agree["rouge_2"]["f_score"], eval_results_align["rouge_1"]["f_score"], eval_results_align["rouge_2"]["f_score"] ]))) f_out.write("\n") f_out.write("--------\n") f_out.write("========\n")
def main(): parser = argparse.ArgumentParser() parser.add_argument("timeline") parser.add_argument("event_info") parser.add_argument("--tuple-file") args = parser.parse_args() tl_path = PurePath(args.timeline) tl_topic = tl_path.parts[-2] tl_name = tl_path.parts[-1].split(".")[0] with open(args.event_info) as f: all_event_info = json.load(f) event_info = all_event_info[tl_topic] with open(args.timeline, errors="ignore") as f: tl = Timeline.from_file(f) if args.tuple_file is None: result_tuples = generate_date_tuples(tl) else: with open(args.tuple_file) as f: result_tuples = f.read().strip().split("\n")[1:] tuples = [ tuple( sorted((str_to_tl_date(date), tl.dates_to_summaries[str_to_tl_date(date)]) for date in tuple_line.split("\t"))) for tuple_line in result_tuples ] batches = [] curr_idx = 0 while curr_idx < len(tuples): batches.append(tuples[curr_idx:curr_idx + 20]) curr_idx += 20 print("Uploading", len(tuples), "tuples") client = MTurkClient("Timeline Importance Annotation VIII (K)") hit_info_path = Path("./hitinfo") hit_info_path.mkdir(parents=True, exist_ok=True) for batch in batches: hit_ids = client.upload_html([ ("importance:" + tl_topic + "__" + tl_name, generate_template(tl, batch, event_info)) ]) hit_id = hit_ids[0] hit_info_file_path = hit_info_path / hit_id with open(hit_info_file_path, "w") as f: f.write(tl_topic) f.write("\t") f.write(tl_name) f.write("\n") for candidate_tuple in batch: f.write("\t".join(map(lambda it: str(it[0]), candidate_tuple))) f.write("\n")
def evaluate(tls_model, dataset, result_path, trunc_timelines=False, time_span_extension=0, word_mover_stop_words='nltk'): results = [] metric = 'align_date_content_costs_many_to_one' evaluator = tilse_rouge.TimelineRougeEvaluator( measures=["rouge_1", "rouge_2"]) n_topics = len(dataset.collections) for i, collection in enumerate(dataset.collections): ref_timelines = [ TilseTimeline(tl.date_to_summaries) for tl in collection.timelines ] topic = collection.name n_ref = len(ref_timelines) if trunc_timelines: ref_timelines = data.truncate_timelines(ref_timelines, collection) for j, ref_timeline in enumerate(ref_timelines): print( f'topic {i + 1}/{n_topics}: {topic}, ref timeline {j + 1}/{n_ref}' ) tls_model.load(ignored_topics=[collection.name]) ref_dates = sorted(ref_timeline.dates_to_summaries) start, end = data.get_input_time_span(ref_dates, time_span_extension) collection.start = start collection.end = end # utils.plot_date_stats(collection, ref_dates) l = len(ref_dates) k = data.get_average_summary_length(ref_timeline) pred_timeline_ = tls_model.predict( collection, max_dates=l, max_summary_sents=k, ref_tl=ref_timeline # only oracles need this ) print('*** PREDICTED ***') utils.print_tl(pred_timeline_) print('timeline done') pred_timeline = TilseTimeline(pred_timeline_.date_to_summaries) sys_len = len(pred_timeline.get_dates()) ground_truth = TilseGroundTruth([ref_timeline]) rouge_scores = get_scores(metric, pred_timeline, ground_truth, evaluator) date_scores = evaluate_dates(pred_timeline, ground_truth) wm_scores = get_wordmover_score(pred_timeline, ground_truth, word_mover_stop_words, device='cpu') dd_scores = date_dist_scores(pred_timeline, ground_truth) print('sys-len:', sys_len, 'gold-len:', l, 'gold-k:', k) print('Alignment-based ROUGE:') pprint(rouge_scores) print('Date selection:') pprint(date_scores) pprint(dd_scores) print('WordMover scores:') pprint(wm_scores) print('-' * 100) results.append((rouge_scores, date_scores, wm_scores, dd_scores, pred_timeline_.to_dict())) print("Running average:") print(get_average_results(results)) print() avg_results = get_average_results(results) print('Average results:') pprint(avg_results) output = { 'average': avg_results, 'results': results, } utils.write_json(output, result_path)
from tilse.data.timelines import Timeline import sys if __name__ == "__main__": timelines = [] for tl_fname in sys.argv[1:]: with open(tl_fname, encoding="latin-1") as f: #print(tl_fname) timelines.append(Timeline.from_file(f)) timeline_dates = [sorted(tl.get_dates()) for tl in timelines] rows = [] for date_idx in range(5): date_row = [] sum_row = [] for tl_idx in range(len(timelines)): tl_date = timeline_dates[tl_idx][date_idx] tl_sum = timelines[tl_idx][tl_date] date_row.append("\\textbf{" + str(tl_date) + "}") sum_row.append(str(" \\newline ".join(tl_sum))) rows.append(date_row) rows.append(sum_row) for row in rows: print(" & ".join(row), "\\\\\\hline")
args = parser.parse_args() relevant_systems = set(args.relevant_systems) all_relevant_timelines = defaultdict(lambda: defaultdict(dict)) for directory in iter_dirs(args.system_tl_dir): system_name = os.path.basename(directory) for tl_dir in iter_dirs(directory): for tlfilename in iter_files(tl_dir, ".txt"): #print(system_name, relevant_systems) if system_name in relevant_systems: with open(tlfilename) as tlfile: all_relevant_timelines[system_name][os.path.basename( tl_dir)][os.path.basename( tlfilename)] = Timeline.from_file(tlfile) #for directory in iter_dirs(args.human_tl_dir): # source_name = os.path.basename(directory) # for tlfilename in iter_files(directory, ".txt"): # with open(tlfilename, errors='ignore') as tlfile: # all_relevant_timelines["human"][source_name][os.path.basename(tlfilename)] = Timeline.from_file(tlfile) vectorized_timelines = vectorize_timelines(all_relevant_timelines) num_samples_per_tl = 5 all_samples = [] for system, timelines in vectorized_timelines.items(): system_samples = set() for topic_name, tl_name, timeline in timelines:
def main(): with open(sys.argv[1]) as f, open("readability-samples-fixed-withcopy.csv", "w") as f_out: reader = csv.reader(f) writer = csv.writer(f_out) sentences_per_system = defaultdict(list) corpus_basepath = pathlib.Path("./corpora") header = next(reader) writer.writerow(header + ["Copied?"]) lines = sorted(enumerate(reader), key=lambda x: x[1][1], reverse=True) prev_corpus = None new_lines = [] for l_idx, line in lines: did_copy = False if line[0] != "gold": corpus_path = corpus_basepath / (line[1] + ".pkl") print(corpus_path) corpus = CachedCorpusReader.load_corpus(str(corpus_path)) if prev_corpus != corpus: corpus_sentences = set( map( lambda s: tuple( s.as_token_attr_sequence("form_lowercase")), corpus.sentences)) #corpus_sentences = set(map(lambda s: "".join(s.as_token_attr_sequence("form_lowercase")).translate(str.maketrans('', '', string.punctuation)).lower(), corpus.sentences)) #sents = line[-1].split(". ") #did_copy = False #for sent in sents: # if len("".join(sent.split()).translate(str.maketrans('', '', string.punctuation)).lower()) == 0: # continue # if "".join(sent.split()).translate(str.maketrans('', '', string.punctuation)).lower() in corpus_sentences: # did_copy = True tl_name = line[2] if "nn" not in line[0] and line[1] == "tl17-mj" and line[ 2] == "bbc.txt": tl_name = "bbc.co.uk.txt" tl_path = os.path.join("filtered", "system_timelines", line[0], line[1], tl_name) print(tl_path) with open(tl_path, errors="ignore") as f: tl = Timeline.from_file(f) summary = tl.dates_to_summaries[datetime.date( *map(int, line[3].split("-")))] for summary_line in summary: summary_line = summary_line.strip() if tuple(summary_line.lower().split() ) in corpus_sentences: print("COPY!") did_copy = True new_line = list(line) new_line.append("y" if did_copy else "n") new_lines.append((l_idx, new_line)) for idx, new_line in sorted(new_lines): writer.writerow(new_line)
def main(): base_config_path = sys.argv[1] corpus_fname = sys.argv[2] gold_tl_fname = sys.argv[3] with open(gold_tl_fname, errors="ignore") as f: gold_tl = Timeline.from_file(f) with open(base_config_path) as f: config = json.load(f) corpus = reader.load_corpus(corpus_fname) generator = GloballyClusteredSentenceCompressionTimelineGenerator(config) corpus_promise, cluster_promise, dated_cluster_promise, cluster_candidates = generator.get_promises(corpus) dated_clusters = dated_cluster_promise.get() all_svo_tuples_per_date = defaultdict(Counter) for cluster, date in dated_clusters: for sentence in cluster: for pred, subj, obj in sentence.dependency_tree.extract_svo_tuples(): if pred is not None: pred = pred.lemma.lower() if obj is not None: obj = obj.lemma.lower() if subj is not None: subj = subj.lemma.lower() all_svo_tuples_per_date[date][(pred, subj, obj)] += 1 triples_by_frequency = [] for date, counter in sorted(all_svo_tuples_per_date.items()): for triple, count in counter.items(): triples_by_frequency.append((count, date, triple)) params = determine_tl_parameters(gold_tl) selected_triples = dict() for (cnt, date, triple) in sorted(triples_by_frequency, reverse=True, key=lambda x: x[0]): if date < params.first_date or date > params.last_date: continue if date not in selected_triples: if len(selected_triples) < params.max_date_count: selected_triples[date] = [] else: continue if len(selected_triples[date]) > params.max_date_sent_count: continue if triple[0] in ("happen", "say"): continue selected_triples[date].append(str(triple)) print(Timeline(selected_triples))
def read_tl(fname): with open(fname, errors="ignore") as f: return Timeline.from_file(f)
def main(): parser = argparse.ArgumentParser() parser.add_argument("sys_1_results_dir") parser.add_argument("sys_2_results_dir") args = parser.parse_args() results_1 = read_results_dir(args.sys_1_results_dir) results_2 = read_results_dir(args.sys_2_results_dir) score_diffs = [] available_sents = [] compression_rates = [] spreads = [] for corpus_name in results_1: if corpus_name not in results_2: continue corpus = load_corpus("corpora/" + corpus_name.rsplit(".")[0] + ".pkl") for tl_name, result_1 in results_1[corpus_name].items(): result_2 = results_2[corpus_name][tl_name] with open("gold-timelines/" + corpus_name.split(".")[0] + "/" + tl_name, errors="ignore") as f: print("gold-timelines/" + corpus_name.split(".")[0] + "/" + tl_name) gold_tl = Timeline.from_file(f) total_tl_length = sum(map(len, gold_tl.dates_to_summaries.values())) total_corpus_length = len(corpus.sentences) score_diffs.append(result_1.rouge_2_align.f1 - result_2.rouge_2_align.f1) available_sents.append( len( corpus.docs_between_dates(min(gold_tl.get_dates()), max(gold_tl.get_dates())))) compression_rates.append(1.0 - (total_tl_length / total_corpus_length)) spreads.append(compute_spread(gold_tl)) print("Sents", scipy.stats.spearmanr(available_sents, score_diffs)) print("Compression", scipy.stats.spearmanr(compression_rates, score_diffs)) print("Spread", scipy.stats.spearmanr(spreads, score_diffs)) plt.axhline(color="b") plt.scatter( available_sents, score_diffs, c=["r" if score_diff <= 0.0 else "b" for score_diff in score_diffs]) plt.figure() plt.axhline(color="b") plt.scatter( compression_rates, score_diffs, c=["r" if score_diff <= 0.0 else "b" for score_diff in score_diffs]) plt.figure() plt.axhline(color="b") plt.scatter( spreads, score_diffs, c=["r" if score_diff <= 0.0 else "b" for score_diff in score_diffs]) plt.show()
def main(): event_annotations = defaultdict(dict) current_tl_key = None system_name = None if len(sys.argv) == 3: system_name = sys.argv[2] with open(sys.argv[1]) as f: for lidx, line in enumerate(f): if line.startswith("=="): _, corpus, tl_name = line.split() current_tl_key = (corpus, tl_name) continue elems = line.split() if len(elems) == 1: continue date, response = elems year, month, day = map(int, date.split("-")) event_annotations[current_tl_key][datetime.date(year, month, day)] = response event_present_in_first_3_ratios = {} event_present_in_last_3_ratios = {} event_present_ratios = {} for tl_key, annotations in event_annotations.items(): dates_to_consider = None if system_name: with open(os.path.join("gold-timelines", tl_key[0], tl_key[1]), errors="ignore") as f: gold_tl = Timeline.from_file(f) with open(os.path.join("system_timelines", system_name, tl_key[0], tl_key[1]), errors="ignore") as f: sys_tl = Timeline.from_file(f) dates_to_consider = set(gold_tl.get_dates()).intersection( set(sys_tl.get_dates())) num_correct_first_3 = 0 num_correct_last_3 = 0 num_correct = 0 num_total = 0 annotations = annotations.items() for idx, (date, annotation) in enumerate(sorted(annotations)): if dates_to_consider and date not in dates_to_consider: continue if annotation != "-": #print("==>", annotation) num_correct += 1 if idx < 3: num_correct_first_3 += 1 elif idx >= (len(annotations) - 3): num_correct_last_3 += 1 num_total += 1 if num_total == 0: event_present_ratios[tl_key] = 0 else: event_present_ratios[tl_key] = num_correct / num_total event_present_in_first_3_ratios[tl_key] = num_correct_first_3 / 3 event_present_in_last_3_ratios[tl_key] = num_correct_last_3 / 3 #print(event_present_ratios) print( "Last", sum(event_present_in_last_3_ratios.values()) / len(event_present_in_last_3_ratios)) print( "First", sum(event_present_in_first_3_ratios.values()) / len(event_present_in_first_3_ratios)) print("Total", sum(event_present_ratios.values()) / len(event_present_ratios))
def main_twotasks(): annotations_j = retrieve_hit_ranking( "Timeline Importance Annotation V (J)") annotations_k = retrieve_hit_ranking( "Timeline Importance Annotation VI (K)") assert list(annotations_j) == list(annotations_k) tl_base_path = Path("./gold-timelines") for topic, tl_name in annotations_j: with open(tl_base_path / topic / (tl_name + ".txt"), errors="ignore") as f: tl = Timeline.from_file(f) all_dates = tl.get_dates() scores_j = annotations_j[topic, tl_name] scores_k = annotations_k[topic, tl_name] for date in all_dates: if date not in scores_j: scores_j[date] = 0 if date not in scores_k: scores_k[date] = 0 obs_j = np.zeros(len(tl.get_dates())) obs_k = np.zeros(len(tl.get_dates())) top_5_dates_j = [ i[0] for i in sorted(scores_j.items(), key=lambda i: i[1])[:10] ] top_5_dates_k = [ i[0] for i in sorted(scores_k.items(), key=lambda i: i[1])[:10] ] print(top_5_dates_j) print(top_5_dates_k) print( len(set(top_5_dates_j).intersection(set(top_5_dates_k))) / len(top_5_dates_j)) ranking_j = map( lambda x: x[0], sorted(scores_j.items(), key=lambda x: x[1], reverse=True)) ranking_k = map( lambda x: x[0], sorted(scores_k.items(), key=lambda x: x[1], reverse=True)) ranks_j = dict(map(lambda x: tuple(reversed(x)), enumerate(ranking_j))) ranks_k = dict(map(lambda x: tuple(reversed(x)), enumerate(ranking_k))) print(ranks_j) for idx, date in enumerate(all_dates): score_j = scores_j[date] score_k = scores_k[date] obs_j[idx] = score_j obs_k[idx] = score_k print(ranks_j[date] + 1, ranks_k[date] + 1) print(score_j, score_k) print(date) print(tl.dates_to_summaries[date]) print( k.alpha(np.stack([obs_j, obs_k]), level_of_measurement="interval")) print(scipy.stats.kendalltau(obs_j, obs_k))
def main(): parser = argparse.ArgumentParser() parser.add_argument("-e", dest="evaluation_results_dir", default="evaluation_results") parser.add_argument("-s", dest="system_timelines_dir", default="system_timelines") parser.add_argument("-g", dest="gold_timelines_dir", default="gold-timelines") parser.add_argument("-c", dest="cutoff_constraint", default="none") parser.add_argument("system_name") parser.add_argument("system_tl_files", nargs="+") args = parser.parse_args() per_corpus_timelines = defaultdict(dict) for tl_fname in args.system_tl_files: corpus_name, tl_name = parse_tl_name(tl_fname) with open(tl_fname) as f: timeline = Timeline.from_file(f) per_corpus_timelines[corpus_name][tl_name] = timeline system_name = args.system_name + "+" + args.cutoff_constraint system_sys_tl_dir = os.path.join(args.system_timelines_dir, system_name) system_evaluation_dir = os.path.join(args.evaluation_results_dir, system_name) ensure_is_dir(system_sys_tl_dir) ensure_is_dir(system_evaluation_dir) for corpus_name, tls in per_corpus_timelines.items(): corpus_sys_tl_dir = os.path.join(system_sys_tl_dir, corpus_name) ensure_is_dir(corpus_sys_tl_dir) gold_tl_dir = os.path.join(args.gold_timelines_dir, corpus_name[:-len(".pkl")]) gold_tls = read_gold_tl_dir(gold_tl_dir) sys_tls = [it[1] for it in sorted(tls.items())] gold_tls = [it for it in sorted(gold_tls.items())] if args.cutoff_constraint != "none": sys_tls = [ timeline_by_applying_constraints( sys_tl, determine_tl_parameters(gold_tl), constraint_type=args.cutoff_constraint) for sys_tl, (_, gold_tl) in zip(sys_tls, gold_tls) ] write_results_file( os.path.join(system_evaluation_dir, corpus_name[:-len(".pkl")] + ".txt"), corpus_sys_tl_dir, gold_tls, sys_tls)
db_file.write( "\t".join([ os.path.basename(topic_gold_tl_dir), os.path.basename(gold_tl_fname), "detail", str(date) ]) ) db_file.write("\n") annotator_file.write("# Ask about a detail of what happened on that day.\n\n") if __name__ == "__main__": gold_tl_dir = sys.argv[1] out_file_prefix = sys.argv[2] db_file = open(out_file_prefix + ".db", "w") annotator_file = open(out_file_prefix + ".txt", "w") for topic_gold_tl_dir in iter_dirs(gold_tl_dir): for gold_tl_fname in iter_files(topic_gold_tl_dir, ".txt"): with open(gold_tl_fname, errors="ignore") as f: tl = Timeline.from_file(f) #write_entity_questions(topic_gold_tl_dir, gold_tl_fname, tl, db_file, annotator_file) #write_date_questions(topic_gold_tl_dir, gold_tl_fname, tl, db_file, annotator_file) write_entity_detail_question(topic_gold_tl_dir, gold_tl_fname, tl, db_file, annotator_file) db_file.close() annotator_file.close()
def evaluate(tls_model, dataset, result_path, trunc_timelines=False, time_span_extension=0): results = [] metric = 'align_date_content_costs_many_to_one' evaluator = rouge.TimelineRougeEvaluator(measures=["rouge_1", "rouge_2"]) n_topics = len(dataset.collections) ave_cluster = 0 for i, collection in enumerate(dataset.collections): ref_timelines = [ TilseTimeline(tl.date_to_summaries) for tl in collection.timelines ] topic = collection.name n_ref = len(ref_timelines) # only for entity if trunc_timelines: ref_timelines = data.truncate_timelines(ref_timelines, collection) for j, ref_timeline in enumerate(ref_timelines): print( f'topic {i+1}/{n_topics}: {topic}, ref timeline {j+1}/{n_ref}') tls_model.load(ignored_topics=[collection.name]) ref_dates = sorted(ref_timeline.dates_to_summaries) #print("data to summaries = {}".format(ref_dates)) start, end = data.get_input_time_span(ref_dates, time_span_extension) collection.start = start collection.end = end print("name = {} start = {} end = {}".format(topic, start, end)) #utils.plot_date_stats(collection, ref_dates) l = len(ref_dates) k = data.get_average_summary_length(ref_timeline) pred_timeline_, n_clusters = tls_model.predict( collection, max_dates=l, max_summary_sents=k, ref_tl=ref_timeline # only oracles need this ) ave_cluster = ave_cluster + n_clusters # print('*** PREDICTED ***') # utils.print_tl(pred_timeline_) print('timeline done') pred_timeline = TilseTimeline(pred_timeline_.date_to_summaries) sys_len = len(pred_timeline.get_dates()) ground_truth = TilseGroundTruth([ref_timeline]) rouge_scores = get_scores(metric, pred_timeline, ground_truth, evaluator) date_scores = evaluate_dates(pred_timeline, ground_truth) print('sys-len:', sys_len, 'gold-len:', l, 'gold-k:', k) print('Alignment-based ROUGE:') pprint(rouge_scores) print('Date selection:') pprint(date_scores) print('-' * 100) results.append( (rouge_scores, date_scores, pred_timeline_.to_dict())) avg_results = get_average_results(results) print('Average results:') pprint(avg_results) output = { 'average_clusters': ave_cluster / len(dataset.collections), 'average': avg_results, 'results': results, } utils.write_json(output, result_path)