예제 #1
0
    def tag_text_with_annotations(self, original_text, print_line_numbers=False):
        """
        mark the string original_text with object's annotations

        original_text: string with the text to be marked
        print_line_numbers: add line numbers to the text

        :return output_text the text in string original_text with added marks
                footnotes the list of techniques in the text
                legend description of the marks added
        """

        if Articles_annotations.techniques is None:
            if ans.Annotation.propaganda_techniques is None:
                Articles_annotations.techniques = Propaganda_Techniques()
            else:   
                Articles_annotations.techniques = ans.Annotation.propaganda_techniques

        self.get_markers_from_spans()

        output_text, curr_output_text_index, self.curr_marker = ("", 0, 0)
        techniques_found = set()
        row_counter = 1
        #print(self.markers)
        annotations_stack = []  # to handle overlapping annotations when assigning color background
        while curr_output_text_index < len(original_text):
            if self.curr_marker >= len(self.markers): # done marking text, need to flush the remaining content of <original_text> into <output_text>
                output_text += original_text[curr_output_text_index:]
                curr_output_text_index = len(original_text)
            else: # more markers have to be added to the content string
                if self.marker_position() <= curr_output_text_index: # it is time to add a marker
                    techniques_index = self.techniques.indexOf(self.marker_label())
                    techniques_found.add(techniques_index)
                    if self.is_starting_marker():
                        output_text += self.start_annotation_marker_function(annotations_stack, self.curr_marker, row_counter)
                        annotations_stack.append(self.marker_annotation())
                    else: 
                        output_text += self.end_annotation_marker_function(annotations_stack, self.curr_marker, row_counter)
                        annotations_stack.remove(self.marker_annotation())
                    self.curr_marker += 1
                else: # flush string content up to the next marker
                    text_to_be_added = original_text[curr_output_text_index:self.marker_position()]
                    row_counter += text_to_be_added.count('\n')
                    output_text += text_to_be_added
                    curr_output_text_index = self.marker_position()

        final_text = ""
        for row_counter, line in enumerate(output_text.split("\n"), 1):
            final_text += self.add_sentence_marker(line, row_counter)

        footnotes = "\n<div>List of techniques found in the article</div>\n\n"
        for technique_index in sorted(techniques_found):
            footnotes += "<div>%d: %s</div>\n" % (technique_index, self.techniques.get_technique(technique_index))

        return final_text, footnotes
import src.annotation_w_o_label as anwol
from src.propaganda_techniques import Propaganda_Techniques
import logging.handlers

__author__ = "Giovanni Da San Martino"
__copyright__ = "Copyright 2019"
__credits__ = ["Giovanni Da San Martino"]
__license__ = "GPL"
__version__ = "0.1"
__maintainer__ = "Giovanni Da San Martino"
__email__ = "*****@*****.**"
__status__ = "Beta"

logger = logging.getLogger("propaganda_scorer")

techniques = Propaganda_Techniques()

class Articles_annotations(object):

    """
    Class for handling annotations for one article. 
    Articles_annotations is composed of an article id
    and a list of Annotation objects. 
    """

    start_annotation_effect = ""
    end_annotation_effect = ""
    start_annotation_str = "<span -"
    end_annotation_str = "- span>"
    annotation_background_color = ""
예제 #3
0
    def mark_text(self, original_text, print_line_numbers=False):
        """
        mark the string original_text with object's annotations

        original_text: string with the text to be marked
        print_line_numbers: add line numbers to the text

        :return output_text the text in string original_text with added marks
                footnotes the list of techniques in the text
                legend description of the marks added
        """

        self.get_markers_from_spans()
        if Articles_annotations.techniques is None:
            if ans.Annotation.propaganda_techniques is None:
                Articles_annotations.techniques = Propaganda_Techniques()
            else:
                Articles_annotations.techniques = ans.Annotation.propaganda_techniques

        output_text, curr_output_text_index, self.curr_marker = ("", 0, 0)
        footnotes = "List of techniques found in the article\n\n"
        techniques_found = set()
        annotations_stack = (
            []
        )  # to handle overlapping annotations when assigning color background
        while curr_output_text_index < len(original_text):
            if self.curr_marker >= len(self.markers):
                output_text += original_text[curr_output_text_index:]
                curr_output_text_index = len(original_text)
            else:
                if self.marker_position() <= curr_output_text_index:
                    if self.is_starting_marker():
                        output_text += (self.start_annotation_effect +
                                        self.start_annotation_str)
                        annotations_stack.append(self.marker_annotation())
                    else:
                        output_text += "%s%s%s" % (
                            self.end_annotation_effect,
                            "" if len(annotations_stack) > 1 else " ",
                            self.start_annotation_effect,
                        )
                    techniques_index = Articles_annotations.techniques.indexOf(
                        self.marker_label())
                    output_text += str(techniques_index)
                    techniques_found.add(techniques_index)
                    if self.is_ending_marker():
                        output_text += (self.end_annotation_str +
                                        self.end_annotation_effect)
                        annotations_stack.remove(self.marker_annotation())
                        if len(annotations_stack) > 0:
                            output_text += self.annotation_background_color
                    else:
                        output_text += (self.end_annotation_effect + " " +
                                        self.annotation_background_color)
                    self.curr_marker += 1
                else:
                    output_text += original_text[curr_output_text_index:self.
                                                 marker_position()]
                    curr_output_text_index = self.marker_position()

        if print_line_numbers:
            indices, char_index = ([], 0)
            for line in original_text.split("\n"):
                indices.append(char_index)
                char_index += len(line) + 1
            # output_text = "\n".join(["%d (%d) %s"%(i, x[0], x[1])
            output_text = "\n".join([
                "%d %s" % (i, x[1])
                for i, x in enumerate(zip(indices, output_text.split("\n")), 1)
            ])

        legend = (
            "---\n%sHighlighted text%s: any propagandistic fragment\n%s%si%s: start of the i-th technique"
            "\n%si%s%s: end of the i-th technque\n---" % (
                self.annotation_background_color,
                self.end_annotation_effect,
                self.start_annotation_effect,
                self.start_annotation_str,
                self.end_annotation_effect,
                self.start_annotation_effect,
                self.end_annotation_str,
                self.end_annotation_effect,
            ))

        for technique_index in sorted(techniques_found):
            footnotes += "%d: %s\n" % (
                technique_index,
                Articles_annotations.techniques.get_technique(technique_index),
            )

        return output_text, footnotes, legend