Esempio n. 1
0
    def test_unknown_tokens_dont_show_up(self):
        text = "the beginning of the prompt"
        text_with_token = f"{text}{TransformerXLNetTokenTypes.UNKNOWN_TOKEN} tell me a story {TransformerXLNetTokenTypes.UNKNOWN_TOKEN}"
        result = serialize_text_algo_individual_values(text_with_token)
        unknown_count = result.count(TransformerXLNetTokenTypes.UNKNOWN_TOKEN)

        self.assertEqual(unknown_count, 0)
Esempio n. 2
0
    def test_end_of_prompts_removes_after(self):
        start = "the beginning of the prompt"
        text_with_beginning_promt = f"{start}{TransformerXLNetTokenTypes.ENDING_OF_PROMPT} nothing should show up"

        result = serialize_text_algo_individual_values(
            text_with_beginning_promt)
        self.assertEqual(result, start)
Esempio n. 3
0
    def test_end_of_paragraph_returns_double_space(self):
        text = "the beginning of the prompt"
        text_with_token = f"{text}{TransformerXLNetTokenTypes.ENDING_OF_PARAGRAPH}should have multiple newlines {TransformerXLNetTokenTypes.ENDING_OF_PARAGRAPH}"

        result = serialize_text_algo_individual_values(text_with_token)
        new_line_count = result.count("\n")
        # two end of paragraphs should return 4 new lines
        self.assertEqual(new_line_count, 4)
Esempio n. 4
0
    def test_gpt2_text_cleanup(self):
        fresh_prince = (
            "Now this is a story all about how\n\nMy life got flipped upside down"
        )
        two_of_us = "Just the two of us, building castles in the sky, Just the two of us, you and I"
        mock_response = fresh_prince + GPT2_END_TEXT_STRING + two_of_us

        serialized = serialize_text_algo_individual_values(mock_response)
        self.assertEqual(fresh_prince, serialized)
Esempio n. 5
0
    def test_gpt2_text_cleanup_remove_new_lines(self):
        too_many_newlines = "\n\nCat\n\n"

        serialized = serialize_text_algo_individual_values(too_many_newlines)
        self.assertEqual("Cat", serialized)