def test_add_unknown_word_to_utterances_with_zero_max_unknownword(self):
        # Given
        utterances = [text_to_utterance("yo")]
        replacement_string = "yo"
        unknown_word_prob = 1
        max_unknown_words = 0
        random_state = np.random.RandomState

        # When / Then
        with self.fail_if_exception("Failed to augment utterances with "
                                    "unknown_word_prob=0"):
            add_unknown_word_to_utterances(utterances, replacement_string,
                                           unknown_word_prob,
                                           max_unknown_words, random_state)
Ejemplo n.º 2
0
    def test_add_unknown_words_to_utterances(self):
        # Given
        base_utterances = {
            "data": [
                {
                    "text": "hello "
                },
                {
                    "text": " you ",
                    "entity": "you"
                },
                {
                    "text": " how are you "
                },
                {
                    "text": "cat",
                    "entity": "cat"
                }
            ]
        }
        utterances = []
        for _ in range(6):
            utterances.append(deepcopy(base_utterances))

        rand_it = cycle([0, 1])

        def mocked_rand():
            return next(rand_it)

        max_unknown_words = 3
        rg_it = cycle([i for i in range(1, max_unknown_words + 1)])

        def mocked_randint(a, b):  # pylint: disable=unused-argument
            return next(rg_it)

        unknownword_prob = .5

        random_state = MagicMock()
        random_state_rand = MagicMock()
        random_state_rand.side_effect = mocked_rand
        random_state_choice = MagicMock()
        random_state_choice.side_effect = mocked_randint

        random_state.rand = random_state_rand
        random_state.randint = random_state_choice

        # When
        replacement_string = "unknownword"
        noisy_utterances = add_unknown_word_to_utterances(
            utterances, unknown_word_prob=unknownword_prob,
            replacement_string=replacement_string,
            max_unknown_words=max_unknown_words,
            random_state=random_state
        )

        # Then
        expected_utterances = [
            {
                "data": [
                    {
                        "text": "hello "
                    },
                    {
                        "text": " you ",
                        "entity": "you"
                    },
                    {
                        "text": " how are you "
                    },
                    {
                        "text": "cat",
                        "entity": "cat"
                    },
                    {
                        "text": " unknownword"
                    }
                ]
            },
            {
                "data": [
                    {
                        "text": "hello "
                    },
                    {
                        "text": " you ",
                        "entity": "you"
                    },
                    {
                        "text": " how are you "
                    },
                    {
                        "text": "cat",
                        "entity": "cat"
                    },
                ]
            },
            {
                "data": [
                    {
                        "text": "hello "
                    },
                    {
                        "text": " you ",
                        "entity": "you"
                    },
                    {
                        "text": " how are you "
                    },
                    {
                        "text": "cat",
                        "entity": "cat"
                    },
                    {
                        "text": " unknownword unknownword"
                    }
                ]
            },
            {
                "data": [
                    {
                        "text": "hello "
                    },
                    {
                        "text": " you ",
                        "entity": "you"
                    },
                    {
                        "text": " how are you "
                    },
                    {
                        "text": "cat",
                        "entity": "cat"
                    },
                ]
            },
            {
                "data": [
                    {
                        "text": "hello "
                    },
                    {
                        "text": " you ",
                        "entity": "you"
                    },
                    {
                        "text": " how are you "
                    },
                    {
                        "text": "cat",
                        "entity": "cat"
                    },
                    {
                        "text": " unknownword unknownword unknownword"
                    }

                ]
            },
            {
                "data": [
                    {
                        "text": "hello "
                    },
                    {
                        "text": " you ",
                        "entity": "you"
                    },
                    {
                        "text": " how are you "
                    },
                    {
                        "text": "cat",
                        "entity": "cat"
                    },
                ]
            }
        ]
        self.assertEqual(expected_utterances, noisy_utterances)
    def test_add_unknown_words_to_utterances(self):
        # Given
        utterances = [{
            "data": [{
                "text": "hello "
            }, {
                "text": " you ",
                "entity": "you"
            }, {
                "text": " how are you "
            }, {
                "text": "dude",
                "entity": "you"
            }]
        }, {
            "data": [{
                "text": "hello "
            }, {
                "text": "dude",
                "entity": "you"
            }, {
                "text": " how are you "
            }, {
                "text": " you ",
                "entity": "you"
            }]
        }]
        unknownword_prob = .5
        random_state = np.random.RandomState(1)

        # When
        replacement_string = "unknownword"
        noisy_utterances = add_unknown_word_to_utterances(
            utterances,
            unknown_word_prob=unknownword_prob,
            replacement_string=replacement_string,
            random_state=random_state)

        # Then
        expected_utterances = [{
            "data": [{
                "text": "hello "
            }, {
                "text": " unknownword ",
                "entity": "you"
            }, {
                "text": " how are you "
            }, {
                "text": "dude",
                "entity": "you"
            }]
        }, {
            "data": [{
                "text": "hello "
            }, {
                "text": "unknownword",
                "entity": "you"
            }, {
                "text": " how are you "
            }, {
                "text": " unknownword ",
                "entity": "you"
            }]
        }]
        self.assertEqual(expected_utterances, noisy_utterances)
    def test_add_unknown_words_to_utterances(self):
        # Given
        utterances = [
            {
                "data": [
                    {
                        "text": "hello "
                    },
                    {
                        "text": " you ",
                        "entity": "you"
                    },
                    {
                        "text": " how are you "
                    },
                    {
                        "text": "dude",
                        "entity": "you"
                    }
                ]
            },
            {
                "data": [
                    {
                        "text": "hello "
                    },
                    {
                        "text": "dude",
                        "entity": "you"
                    },
                    {
                        "text": " how are you "

                    },
                    {
                        "text": " you ",
                        "entity": "you"
                    }
                ]
            }
        ]
        unknownword_prob = .5
        random_state = np.random.RandomState(1)

        # When
        replacement_string = "unknownword"
        noisy_utterances = add_unknown_word_to_utterances(
            utterances, unknown_word_prob=unknownword_prob,
            replacement_string=replacement_string, random_state=random_state
        )

        # Then
        expected_utterances = [
            {
                "data": [
                    {
                        "text": "hello "
                    },
                    {
                        "text": " unknownword ",
                        "entity": "you"
                    },
                    {
                        "text": " how are you "
                    },
                    {
                        "text": "dude",
                        "entity": "you"
                    }
                ]
            },
            {
                "data": [
                    {
                        "text": "hello "
                    },
                    {
                        "text": "unknownword",
                        "entity": "you"
                    },
                    {
                        "text": " how are you "
                    },
                    {
                        "text": " unknownword ",
                        "entity": "you"
                    }
                ]
            }
        ]
        self.assertEqual(expected_utterances, noisy_utterances)