Ejemplo n.º 1
0
    def _parse_pack(self, data_source: str) -> Iterator[MultiPack]:
        """
        Takes a raw string and converts into a MultiPack

        Args:
            data_source: str that contains text of a document

        Returns: MultiPack containing a datapack for the current query

        """

        multi_pack = MultiPack()

        # use context to build the query
        if self.resource.get("user_utterance"):
            user_pack = self.resource.get("user_utterance")[-1]
            multi_pack.update_pack({"user_utterance": user_pack})

        if self.resource.get("bot_utterance"):
            bot_pack = self.resource.get("bot_utterance")[-1]
            multi_pack.update_pack({"bot_utterance": bot_pack})

        pack = DataPack()
        utterance = Utterance(pack, 0, len(data_source))
        pack.add_entry(utterance)

        pack.set_text(data_source, replace_func=self.text_replace_operation)
        multi_pack.update_pack({self.config.pack_name: pack})

        yield multi_pack
Ejemplo n.º 2
0
    def _parse_pack(self, data_source: str) -> Iterator[MultiPack]:
        r"""Takes a raw string and converts into a MultiPack.

        Args:
            data_source: str that contains text of a document.

        Returns: MultiPack containing a datapack for the current query.
        """
        multi_pack = MultiPack()

        # use context to build the query
        if self.resources is not None and self.resources.get("user_utterance"):
            multi_pack.add_pack_(
                self.resources.get("user_utterance")[-1], "user_utterance")

        if self.resources is not None and self.resources.get("bot_utterance"):
            multi_pack.add_pack_(
                self.resources.get("bot_utterance")[-1], "bot_utterance")

        pack = multi_pack.add_pack(self.configs.pack_name)
        pack.set_text(data_source, replace_func=self.text_replace_operation)

        Utterance(pack, 0, len(data_source))

        yield multi_pack
Ejemplo n.º 3
0
    def _parse_pack(self, file_path: str) -> Iterator[MultiPack]:

        m_pack: MultiPack = MultiPack()

        input_pack_name = self.config.input_pack_name
        output_pack_name = self.config.output_pack_name

        text = ""
        offset = 0
        with open(file_path, "r", encoding="utf8") as doc:

            input_pack = DataPack(doc_id=file_path)

            for line in doc:
                line = line.strip()

                if len(line) == 0:
                    continue

                # add sentence
                sent = Sentence(input_pack, offset, offset + len(line))
                input_pack.add_entry(sent)
                text += line + '\n'
                offset = offset + len(line) + 1

            input_pack.set_text(text, replace_func=self.text_replace_operation)

            output_pack = DataPack()

            m_pack.update_pack({
                input_pack_name: input_pack,
                output_pack_name: output_pack
            })

            yield m_pack
Ejemplo n.º 4
0
    def _parse_pack(self, base_and_path: Tuple[str,
                                               str]) -> Iterator[MultiPack]:
        base_dir, file_path = base_and_path

        m_pack: MultiPack = MultiPack()

        input_pack_name = self.config.input_pack_name
        output_pack_name = self.config.output_pack_name

        text = ""
        offset = 0
        with open(file_path, "r", encoding="utf8") as doc:
            # Remove long path from the beginning.
            doc_id = file_path[file_path.startswith(base_dir) and len(base_dir
                                                                      ):]
            doc_id = doc_id.strip(os.path.sep)

            input_pack = m_pack.add_pack(input_pack_name)
            input_pack.doc_id = doc_id

            for line in doc:
                line = line.strip()

                if len(line) == 0:
                    continue

                # add sentence
                Sentence(input_pack, offset, offset + len(line))
                text += line + '\n'
                offset = offset + len(line) + 1

            input_pack.set_text(text, replace_func=self.text_replace_operation)
            # Create a output pack without text.
            m_pack.add_pack(output_pack_name)
            yield m_pack
Ejemplo n.º 5
0
 def _parse_pack(self, data_source: str) -> Iterator[MultiPack]:
     fields = data_source.split("\t")
     data_pack = DataPack(doc_id=fields[0])
     multi_pack = MultiPack()
     document = Document(pack=data_pack, begin=0, end=len(fields[1]))
     data_pack.add_entry(document)
     data_pack.set_text(fields[1])
     multi_pack.update_pack({self.config.pack_name: data_pack})
     yield multi_pack
Ejemplo n.º 6
0
    def test_wrong_attribute(self):
        input_pack = MultiPack()
        mp_entry = ExampleMPEntry(input_pack)
        p1 = input_pack.add_pack('pack1')
        e1: DifferentEntry = p1.add_entry(DifferentEntry(p1))

        with self.assertRaises(TypeError):
            mp_entry.refer_entry = e1

        mp_entry.regret_creation()
Ejemplo n.º 7
0
    def setUp(self) -> None:
        self.multi_pack = MultiPack()

        data_pack1 = self.multi_pack.add_pack(ref_name="pack1")
        data_pack2 = self.multi_pack.add_pack(ref_name="pack2")
        data_pack3 = self.multi_pack.add_pack(ref_name="pack_three")

        data_pack1.pack_name = "1"
        data_pack2.pack_name = "2"
        data_pack3.pack_name = "Three"
Ejemplo n.º 8
0
    def _parse_pack(self, collection: Any) -> Iterator[MultiPack]:
        multi_pack: MultiPack = MultiPack()
        data_pack1 = multi_pack.add_pack(ref_name="pack1")
        data_pack2 = multi_pack.add_pack(ref_name="pack2")
        data_pack3 = multi_pack.add_pack(ref_name="pack_three")

        data_pack1.pack_name = "1"
        data_pack2.pack_name = "2"
        data_pack3.pack_name = "Three"
        yield multi_pack
Ejemplo n.º 9
0
    def cast(self, pack: DataPack) -> MultiPack:
        """
        Args:
            pack: The data pack to be boxed

        Returns: An iterator that produces the boxed multi pack.

        """
        p = MultiPack()
        p.add_pack_(pack, self.configs.pack_name)
        return p
Ejemplo n.º 10
0
    def test_wrong_attribute(self):
        import warnings

        input_pack = MultiPack()
        mp_entry = ExampleMPEntry(input_pack)
        p1 = input_pack.add_pack("pack1")
        e1: DifferentEntry = p1.add_entry(DifferentEntry(p1))
        with warnings.catch_warnings(record=True) as w:
            warnings.simplefilter("always")
            mp_entry.refer_entry = e1
            mp_entry.regret_creation()
            assert issubclass(w[-1].category, UserWarning)
Ejemplo n.º 11
0
    def _parse_pack(self, data_source: str) -> Iterator[MultiPack]:
        fields = data_source.split("\t")
        multi_pack = MultiPack()

        data_pack = multi_pack.add_pack(self.config.pack_name)

        data_pack.doc_id = fields[0]
        data_pack.set_text(fields[1])

        Document(pack=data_pack, begin=0, end=len(fields[1]))

        yield multi_pack
Ejemplo n.º 12
0
    def setUp(self) -> None:
        # Note: input source is created automatically by the system, but we
        #  can also set it manually at test cases.
        self.multi_pack = MultiPack()
        self.data_pack1 = self.multi_pack.add_pack(ref_name="left pack")
        self.data_pack2 = self.multi_pack.add_pack(ref_name="right pack")

        self.data_pack1.pack_name = "some pack"
        self.data_pack1.set_text("This pack contains some sample data.")

        self.data_pack2.pack_name = "another pack"
        self.data_pack2.set_text("This pack contains some other sample data.")
Ejemplo n.º 13
0
    def new_pack(self, pack_name: Optional[str] = None) -> MultiPack:
        """
        Create a new multi pack using the current pack manager.

        Args:
            pack_name (str, Optional): The name to be used for the pack. If not
              set, the pack name will remained unset.

        Returns:

        """
        return MultiPack(self._pack_manager, pack_name)
Ejemplo n.º 14
0
    def cast(self, pack: DataPack) -> MultiPack:
        """
        Auto-box the data-pack into a multi-pack by simple wrapping.

        Args:
            pack: The data pack to be boxed

        Returns: An iterator that produces the boxed multi pack.

        """
        p = MultiPack()
        p.add_pack_(pack, self.configs.pack_name)
        return p
Ejemplo n.º 15
0
    def _parse_pack(self, file_path: str) -> Iterator[DataPack]:  # type: ignore
        with open(file_path, "r", encoding="utf8") as doc:
            for line in doc:
                line = line.strip()
                if len(line) == 0:
                    continue

                m_pack = MultiPack()
                pack = m_pack.add_pack("pack")
                pack.set_text(line)

                Sentence(pack, 0, len(line))
                self.count += 1

                yield m_pack  # type: ignore
Ejemplo n.º 16
0
 def _parse_pack(self,
                 file_path: str) -> Iterator[DataPack]:  # type: ignore
     with open(file_path, "r", encoding="utf8") as doc:
         for line in doc:
             m_pack = MultiPack()
             pack = DataPack(doc_id=file_path)
             line = line.strip()
             if len(line) == 0:
                 continue
             sent = Sentence(pack, 0, len(line))
             pack.add_entry(sent)
             pack.set_text(line)
             self.count += 1
             m_pack.update_pack({"pack": pack})
             yield m_pack  # type: ignore
    def test_multi_pack_copy_link_or_group(self):
        processor = ReplacementDataAugmentProcessor()
        m_pack = MultiPack()
        src_pack = m_pack.add_pack("src")
        tgt_pack = m_pack.add_pack("tgt")

        src_pack.set_text("input")
        tgt_pack.set_text("output")
        src_token = src_pack.add_entry(Token(src_pack, 0, len(src_pack.text)))
        tgt_token = tgt_pack.add_entry(Token(tgt_pack, 0, len(tgt_pack.text)))

        mpl = m_pack.add_entry(MultiPackLink(m_pack, src_token, tgt_token))
        # The MultiPackLink should not be copied, because its children are not copied.
        self.assertEqual(processor._copy_multi_pack_link_or_group(mpl, m_pack), False)
        new_src_pack = processor._auto_align_annotations(src_pack, [])
        self.assertEqual(len(list(new_src_pack.get(Token))), 1)
Ejemplo n.º 18
0
    def _parse_pack(self, file_path: str) -> Iterator[MultiPack]:
        m_pack: MultiPack = MultiPack()

        input_pack_name = "input_src"
        output_pack_name = "output_tgt"

        with open(file_path, "r", encoding="utf8") as doc:
            text = ""
            offset = 0

            sentence_cnt = 0

            input_pack = DataPack(doc_id=file_path)

            for line in doc:
                line = line.strip()
                if len(line) == 0:
                    # skip empty lines
                    continue
                # add sentence
                sent = Sentence(input_pack, offset, offset + len(line))
                input_pack.add_entry(sent)
                text += line + '\n'
                offset = offset + len(line) + 1

                sentence_cnt += 1

                if sentence_cnt >= 20:
                    break

            input_pack.set_text(text, replace_func=self.text_replace_operation)

        output_pack = DataPack()

        m_pack.update_pack({
            input_pack_name: input_pack,
            output_pack_name: output_pack
        })

        yield m_pack
Ejemplo n.º 19
0
 def new_pack(self, pack_name: Optional[str] = None) -> MultiPack:
     return MultiPack(self._pack_manager, pack_name)
Ejemplo n.º 20
0
 def _parse_pack(self, name: str) -> Iterator[MultiPack]:
     p = MultiPack()
     p.pack_name = name
     yield p