def tag_text_with_annotations(self, original_text, print_line_numbers=False): """ mark the string original_text with object's annotations original_text: string with the text to be marked print_line_numbers: add line numbers to the text :return output_text the text in string original_text with added marks footnotes the list of techniques in the text legend description of the marks added """ if Articles_annotations.techniques is None: if ans.Annotation.propaganda_techniques is None: Articles_annotations.techniques = Propaganda_Techniques() else: Articles_annotations.techniques = ans.Annotation.propaganda_techniques self.get_markers_from_spans() output_text, curr_output_text_index, self.curr_marker = ("", 0, 0) techniques_found = set() row_counter = 1 #print(self.markers) annotations_stack = [] # to handle overlapping annotations when assigning color background while curr_output_text_index < len(original_text): if self.curr_marker >= len(self.markers): # done marking text, need to flush the remaining content of <original_text> into <output_text> output_text += original_text[curr_output_text_index:] curr_output_text_index = len(original_text) else: # more markers have to be added to the content string if self.marker_position() <= curr_output_text_index: # it is time to add a marker techniques_index = self.techniques.indexOf(self.marker_label()) techniques_found.add(techniques_index) if self.is_starting_marker(): output_text += self.start_annotation_marker_function(annotations_stack, self.curr_marker, row_counter) annotations_stack.append(self.marker_annotation()) else: output_text += self.end_annotation_marker_function(annotations_stack, self.curr_marker, row_counter) annotations_stack.remove(self.marker_annotation()) self.curr_marker += 1 else: # flush string content up to the next marker text_to_be_added = original_text[curr_output_text_index:self.marker_position()] row_counter += text_to_be_added.count('\n') output_text += text_to_be_added curr_output_text_index = self.marker_position() final_text = "" for row_counter, line in enumerate(output_text.split("\n"), 1): final_text += self.add_sentence_marker(line, row_counter) footnotes = "\n<div>List of techniques found in the article</div>\n\n" for technique_index in sorted(techniques_found): footnotes += "<div>%d: %s</div>\n" % (technique_index, self.techniques.get_technique(technique_index)) return final_text, footnotes
import src.annotation_w_o_label as anwol from src.propaganda_techniques import Propaganda_Techniques import logging.handlers __author__ = "Giovanni Da San Martino" __copyright__ = "Copyright 2019" __credits__ = ["Giovanni Da San Martino"] __license__ = "GPL" __version__ = "0.1" __maintainer__ = "Giovanni Da San Martino" __email__ = "*****@*****.**" __status__ = "Beta" logger = logging.getLogger("propaganda_scorer") techniques = Propaganda_Techniques() class Articles_annotations(object): """ Class for handling annotations for one article. Articles_annotations is composed of an article id and a list of Annotation objects. """ start_annotation_effect = "" end_annotation_effect = "" start_annotation_str = "<span -" end_annotation_str = "- span>" annotation_background_color = ""
def mark_text(self, original_text, print_line_numbers=False): """ mark the string original_text with object's annotations original_text: string with the text to be marked print_line_numbers: add line numbers to the text :return output_text the text in string original_text with added marks footnotes the list of techniques in the text legend description of the marks added """ self.get_markers_from_spans() if Articles_annotations.techniques is None: if ans.Annotation.propaganda_techniques is None: Articles_annotations.techniques = Propaganda_Techniques() else: Articles_annotations.techniques = ans.Annotation.propaganda_techniques output_text, curr_output_text_index, self.curr_marker = ("", 0, 0) footnotes = "List of techniques found in the article\n\n" techniques_found = set() annotations_stack = ( [] ) # to handle overlapping annotations when assigning color background while curr_output_text_index < len(original_text): if self.curr_marker >= len(self.markers): output_text += original_text[curr_output_text_index:] curr_output_text_index = len(original_text) else: if self.marker_position() <= curr_output_text_index: if self.is_starting_marker(): output_text += (self.start_annotation_effect + self.start_annotation_str) annotations_stack.append(self.marker_annotation()) else: output_text += "%s%s%s" % ( self.end_annotation_effect, "" if len(annotations_stack) > 1 else " ", self.start_annotation_effect, ) techniques_index = Articles_annotations.techniques.indexOf( self.marker_label()) output_text += str(techniques_index) techniques_found.add(techniques_index) if self.is_ending_marker(): output_text += (self.end_annotation_str + self.end_annotation_effect) annotations_stack.remove(self.marker_annotation()) if len(annotations_stack) > 0: output_text += self.annotation_background_color else: output_text += (self.end_annotation_effect + " " + self.annotation_background_color) self.curr_marker += 1 else: output_text += original_text[curr_output_text_index:self. marker_position()] curr_output_text_index = self.marker_position() if print_line_numbers: indices, char_index = ([], 0) for line in original_text.split("\n"): indices.append(char_index) char_index += len(line) + 1 # output_text = "\n".join(["%d (%d) %s"%(i, x[0], x[1]) output_text = "\n".join([ "%d %s" % (i, x[1]) for i, x in enumerate(zip(indices, output_text.split("\n")), 1) ]) legend = ( "---\n%sHighlighted text%s: any propagandistic fragment\n%s%si%s: start of the i-th technique" "\n%si%s%s: end of the i-th technque\n---" % ( self.annotation_background_color, self.end_annotation_effect, self.start_annotation_effect, self.start_annotation_str, self.end_annotation_effect, self.start_annotation_effect, self.end_annotation_str, self.end_annotation_effect, )) for technique_index in sorted(techniques_found): footnotes += "%d: %s\n" % ( technique_index, Articles_annotations.techniques.get_technique(technique_index), ) return output_text, footnotes, legend