Ejemplo n.º 1
0
    def setUp(self):
        self.fact1 = Fact(
            "corpus1",
            "corpus_type",
            "timestamp_from",
            "timestamp_to",
            "timestamp_type",
            "analysis_type",
            "result_key",
            "result_value",
            "outlierness",
        )
        self.message1 = Message(self.fact1, 0.1, 0.2, 0.3)

        self.fact2 = Fact(
            "corpus2",
            "corpus_type",
            "timestamp_from",
            "timestamp_to",
            "timestamp_type",
            "analysis_type",
            "result_key",
            "result_value",
            "outlierness",
        )
        self.message2 = Message(self.fact2, 0.1, 0.2, 0.3)

        self.document_plan_node = DocumentPlanNode(
            [self.message1, self.message2], Relation.ELABORATION)
Ejemplo n.º 2
0
    def run(
        self, registry: Registry, random: Generator, language: str,
        scored_messages: List[Message]
    ) -> Tuple[DocumentPlanNode, List[Message]]:
        log.debug("Creating body document plan")

        # Root contains a sequence of children
        document_plan = DocumentPlanNode(children=[],
                                         relation=Relation.SEQUENCE)

        available_messages = scored_messages[:]  # Make a copy s.t. we can modify in place
        selected_nuclei: List[Message] = []

        while True:
            nucleus: Message
            nucleus_score: float
            nucleus, nucleus_score = self.select_next_nucleus(
                available_messages, selected_nuclei)

            if (nucleus_score < self.new_paragraph_absolute_threshold
                    or nucleus_score <
                    self.new_paragraph_relative_threshold(selected_nuclei)):
                if selected_nuclei:
                    return (document_plan, scored_messages)

            selected_nuclei.append(nucleus)

            # Messages are only allowed in the DP once
            available_messages = [
                m for m in available_messages if m != nucleus
            ]

            # Get a suitable amount of satellites
            satellites: List[Message] = self.select_satellites_for_nucleus(
                nucleus, available_messages)

            # Messages are only allowed in the DP once
            available_messages = [
                m for m in available_messages if m not in satellites
            ]

            document_plan.children.append(
                DocumentPlanNode([nucleus] + satellites, Relation.SEQUENCE))
Ejemplo n.º 3
0
    def run(self, registry: Registry, random: Generator, language: str,
            scored_messages) -> Tuple[DocumentPlanNode, List[Message]]:
        """
        Run this pipeline component.
        """

        log.debug("Creating headline document plan")

        # Root contains a sequence of children
        document_plan = DocumentPlanNode(children=[],
                                         relation=Relation.SEQUENCE)

        headline_message, _ = self.select_next_nucleus(scored_messages, [])
        all_messages = scored_messages

        document_plan.children.append(
            DocumentPlanNode(children=[headline_message],
                             relation=Relation.SEQUENCE))

        return document_plan, all_messages