def main(args): span_file = args.spans_file article_file = args.article_file print_line_numbers = bool(args.add_line_numbers) fix_from_char_index = int(args.fix_from_char_index) offset = int(args.offset) propaganda_techniques_file = args.propaganda_techniques_file annotations = aa.Articles_annotations() an.Annotation.set_propaganda_technique_list_obj(pt.Propaganda_Techniques(filename=propaganda_techniques_file)) annotations.load_article_annotations_from_csv_file(span_file) annotations.shift_spans(fix_from_char_index, offset) with codecs.open(article_file, "r", encoding="utf8") as f: article_content = f.read() output_text, footnotes, legend = annotations.mark_text(article_content, print_line_numbers) print(output_text) print(legend) print(footnotes) if offset != 0: annotations.save_annotations_to_file(span_file + ".fix") print("Fixed annotations saved to file %s.fix"%(span_file))
def main(args): span_file = args.spans_file article_file = args.article_file propaganda_techniques_list_file = args.propaganda_techniques_list_file debug_on_std = bool(args.debug_on_std) if not debug_on_std: logging.getLogger("propaganda_scorer").setLevel(logging.ERROR) propaganda_techniques = pt.Propaganda_Techniques( propaganda_techniques_list_file) annotations = aa.Articles_annotations() aa.Articles_annotations.techniques = propaganda_techniques annotations.load_article_annotations_from_csv_file(span_file) with codecs.open(article_file, "r", encoding="utf8") as f: article_content = f.read() #print("\n".join([str(i)+") "+x for i,x in enumerate(str(aa.techniques).split("\n"))])) output_text, footnotes = annotations.tag_text_with_annotations( article_content) #add html tags #output_text, footnotes, legend = annotations.mark_text(article_content) #mark annotations for terminal print(output_text) print(footnotes)
def add_annotation(self, annotation: an.Annotation, article_id: str): """ Add a single annotation to the article with id article_id. If such article does not exists, the annotation is created. """ if not self.has_article(article_id): self.annotations[article_id] = aa.Articles_annotations( article_id=article_id) self.annotations[article_id].add_annotation(annotation)
def main(args): span_file = args.spans_file write_technique_on_output = bool(args.write_technique_on_output) prop_vs_non_propaganda = bool(args.fragments_only) output_file = args.output_file if not output_file: output_file = span_file + ".merged" article_annotations = an.Articles_annotations() article_annotations.load_article_annotations_from_csv_file(span_file) article_annotations.has_overlapping_spans(prop_vs_non_propaganda, True) article_annotations.set_output_format(True, True, write_technique_on_output) article_annotations.save_annotations_to_file(output_file)
def main(args): span_file = args.spans_file article_file = args.article_file print_line_numbers = bool(args.add_line_numbers) an.techniques = pt.Propaganda_Techniques(filename="data/propaganda-techniques-names-semeval2020task11.txt") annotations = an.Articles_annotations() annotations.load_article_annotations_from_csv_file(span_file) with codecs.open(article_file, "r", encoding="utf8") as f: article_content = f.read() output_text, footnotes, legend = annotations.mark_text(article_content, print_line_numbers) print(output_text) print(legend) print(footnotes)
def main(args): span_file = args.spans_file article_file = args.article_file print_line_numbers = bool(args.add_line_numbers) annotations = an.Articles_annotations() annotations.load_article_annotations_from_csv_file(span_file) with codecs.open(article_file, "r", encoding="utf8") as f: article_content = f.read() output_text, footnotes, legend = annotations.mark_text( article_content, print_line_numbers) print(output_text) print(legend) print(footnotes)
def main(args): span_file = args.spans_file article_file = args.article_file propaganda_techniques_list_file = args.propaganda_techniques_list_file propaganda_techniques = pt.Propaganda_Techniques( propaganda_techniques_list_file) annotations = aa.Articles_annotations() aa.Articles_annotations.techniques = propaganda_techniques annotations.load_article_annotations_from_csv_file(span_file) with codecs.open(article_file, "r", encoding="utf8") as f: article_content = f.read() # print("\n".join([str(i)+") "+x for i,x in enumerate(str(aa.techniques).split("\n"))])) # output_text, footnotes = annotations.tag_text_with_annotations(article_content) output_text, footnotes, legend = annotations.mark_text(article_content) print(output_text) print(footnotes)
def create_article_annotations_object(self, article_id: str) -> None: self.annotations[article_id] = aa.Articles_annotations( article_id=article_id)
def add_annotation(self, annotation, article_id): if not self.has_article(article_id): self.annotations[article_id] = an.Articles_annotations( article_id=article_id) self.annotations[article_id].add_annotation(annotation)
def main(args): span_file = args.spans_file article_file = args.article_file original_text_file = args.original_text_file gold_spans = args.gold_spans debug = args.debug if args.output_file_name is None: output_file_name = span_file + ".aligned.txt" else: output_file_name = args.output_file_name annotations = aa.Articles_annotations() annotations.load_article_annotations_from_csv_file( span_file, an.AnnotationWithOutLabel) annotations.sort_spans() annotations.has_overlapping_spans(True, True) with codecs.open(article_file, "r", encoding="utf8") as f: article_content = f.read() with codecs.open(original_text_file, "r", encoding="utf8") as f: original_text = f.read() if debug == True: gold_annotations = aa.Articles_annotations() gold_annotations.load_article_annotations_from_csv_file( gold_spans, an.AnnotationWithOutLabel) gold_annotations.sort_spans() gold_annotations_content = gold_annotations.get_spans_content( original_text) print(gold_annotations) print("gold annotations content (no spaces):\n%s\n---" % (gold_annotations_content)) before = annotations.get_spans_content(article_content) print(annotations) print("content of annotations of converted text:\n%s\n---" % (before)) original_annotations = copy.deepcopy(annotations) annotations.align_annotation_to_new_text(original_text, article_content) after = annotations.get_spans_content(original_text) annotations.set_output_format(True, True, False) annotations.save_annotations_to_file(output_file_name) if gold_spans is not None: gold_annotations = aa.Articles_annotations() gold_annotations.load_article_annotations_from_csv_file( gold_spans, an.AnnotationWithOutLabel) gold_annotations.sort_spans() gold_annotations.has_overlapping_spans(True, True) annotations.sort_spans() if not gold_annotations == annotations: print("%s: The following annotations are different: " % (article_file), end=""), for p in annotations - gold_annotations: print("1: %s; 2: %s -- %s -- %s" % (p[0], p[1], p[0].get_span_content(article_content), p[1].get_span_content(original_text))), print( "%s: The following annotations are problematic:\n(USER ANNOTATION) %s\n(GOLD ANNOTATION) %s\n" % (article_file, annotations, gold_annotations)) gold_annotations_content = gold_annotations.get_spans_content( original_text) print("gold annotations content (no spaces):\n%s\n---" % (gold_annotations_content)) before = original_annotations.get_spans_content(article_content) print( "content of annotations of converted text before alignment:\n%s\n---" % (before)) else: print("OK: %s -> %s" % (article_file, output_file_name))
import sys sys.path.append("../") import src.propaganda_techniques as pt import src.annotation as an import src.article_annotations as aa def test_remove_annotation(artannotations): before = str(artannotations) print("removing annotation: " + str(artannotations[0])) artannotations.remove_annotation(artannotations[0]) after = str(artannotations) assert after == before.replace("\n\t[0, 59] -> Exaggeration,Minimisation", "", 1) if __name__ == "__main__": propaganda_techniques = pt.Propaganda_Techniques( filename="../data/propaganda-techniques-names.txt") an.Annotation.set_propaganda_technique_list_obj(propaganda_techniques) artannotations = aa.Articles_annotations() artannotations.load_article_annotations_from_csv_file( "../data/article736757214.task-FLC.labels") test_remove_annotation(artannotations)