def test_should_build_training_data_with_noise(self,
                                                   mocked_augment_utterances,
                                                   mocked_get_noise):
        # Given
        mocked_noises = ["mocked_noise_%s" % i for i in range(100)]
        mocked_get_noise.return_value = mocked_noises
        mocked_augment_utterances.side_effect = get_mocked_augment_utterances

        num_intents = 3
        utterances_length = 5
        num_queries_per_intent = 3
        fake_utterance = {
            "data": [{
                "text": " ".join("1" for _ in range(utterances_length))
            }]
        }
        dataset = {
            "intents": {
                str(i): {
                    "utterances": [fake_utterance] * num_queries_per_intent
                }
                for i in range(num_intents)
            }
        }
        random_state = np.random.RandomState(1)

        # When
        np.random.seed(42)
        noise_factor = 2
        data_augmentation_config = IntentClassifierDataAugmentationConfig(
            noise_factor=noise_factor,
            unknown_word_prob=0,
            unknown_words_replacement_string=None)
        utterances, _, intent_mapping = build_training_data(
            dataset, LANGUAGE_EN, data_augmentation_config, random_state)

        # Then
        expected_utterances = [
            utterance for intent in itervalues(dataset[INTENTS])
            for utterance in intent[UTTERANCES]
        ]
        np.random.seed(42)
        noise = list(mocked_noises)
        noise_size = int(min(noise_factor * num_queries_per_intent,
                             len(noise)))
        noise_it = get_noise_it(mocked_noises, utterances_length, 0,
                                random_state)
        noisy_utterances = [
            text_to_utterance(next(noise_it)) for _ in range(noise_size)
        ]
        expected_utterances += noisy_utterances
        expected_intent_mapping = sorted(dataset["intents"])
        expected_intent_mapping.append(None)
        self.assertListEqual(expected_utterances, utterances)
        self.assertListEqual(intent_mapping, expected_intent_mapping)
    def test_should_build_training_data_with_noise(
            self, mocked_augment_utterances, mocked_get_noises):
        # Given
        mocked_noises = ["mocked_noise_%s" % i for i in range(100)]
        mocked_get_noises.return_value = mocked_noises
        mocked_augment_utterances.side_effect = get_mocked_augment_utterances

        num_intents = 3
        utterances_length = 5
        num_queries_per_intent = 3
        fake_utterance = {
            "data": [
                {"text": " ".join("1" for _ in range(utterances_length))}
            ]
        }
        dataset = {
            "intents": {
                str(i): {
                    "utterances": [fake_utterance] * num_queries_per_intent
                } for i in range(num_intents)
            }
        }
        random_state = np.random.RandomState(1)

        # When
        np.random.seed(42)
        noise_factor = 2
        data_augmentation_config = IntentClassifierDataAugmentationConfig(
            noise_factor=noise_factor, unknown_word_prob=0,
            unknown_words_replacement_string=None)
        utterances, _, intent_mapping = build_training_data(
            dataset, LANGUAGE_EN, data_augmentation_config, random_state)

        # Then
        expected_utterances = [get_text_from_chunks(utterance[DATA])
                               for intent in itervalues(dataset[INTENTS])
                               for utterance in intent[UTTERANCES]]
        np.random.seed(42)
        noise = list(mocked_noises)
        noise_size = int(min(noise_factor * num_queries_per_intent,
                             len(noise)))
        noise_it = get_noise_it(mocked_noises, utterances_length, 0,
                                random_state)
        noisy_utterances = [next(noise_it) for _ in range(noise_size)]
        expected_utterances += list(noisy_utterances)
        expected_intent_mapping = sorted(dataset["intents"])
        expected_intent_mapping.append(None)
        self.assertListEqual(utterances, expected_utterances)
        self.assertListEqual(intent_mapping, expected_intent_mapping)