Esempio n. 1
0
    def _prepare_data(self, data, save_preprocessed=False):
        words = []
        phrases = []
        for phrase in data:
            prep_phrase = self.preprocess(phrase)
            if save_preprocessed:
                self._data.append(prep_phrase)
            else:
                self._data.append(phrase)

            phrases.append(prep_phrase)

            words += prep_phrase.split() if isinstance(prep_phrase, str) else [prep_phrase]

        self._words_freq = Freq(words)
        self.__recovery_word_freq = self._words_freq.copy()

        self._phrases_freq = Freq(phrases)
        self.__recovery_phrase_freq = self._phrases_freq.copy()
Esempio n. 2
0
    def test_sanity(self):
        self.assertEqual(preprocess.separate('how are you?', sep='?'),
                         'how are you ?')
        self.assertEqual(
            preprocess.separate('how are you,man?',
                                sep=('?', ','),
                                between_char=True), 'how are you , man ?')
        self.assertEqual(preprocess.separate('how are! you?'),
                         'how are ! you ?')

        freq = Freq([1, 1, 2, 2, 3, 3, 4, 5, 6, 7, 8, 'hi', 'o', 'a'])
        self.assertEqual(freq.sample(max_freq=1), {
            4: 1,
            5: 1,
            6: 1,
            7: 1,
            8: 1,
            'hi': 1,
            'o': 1,
            'a': 1
        })
        self.assertEqual(freq.sample(freq=2), {1: 2, 3: 2, 2: 2})

        self.assertRaises(AssertionError, freq.sample, freq=1, max_freq=2)
        self.assertRaises(AssertionError, freq.sample, freq=1, min_freq=2)

        freq = Freq([1, 2, 3, 3, 4, 5, 6, 7, 6, 7, 12, 31, 123, 5, 3])
        self.assertEqual(freq.least_freq(), {
            123: 1,
            31: 1,
            12: 1,
            4: 1,
            2: 1,
            1: 1,
            7: 2,
            6: 2,
            5: 2,
            3: 3
        })
Esempio n. 3
0
 def setUp(self) -> None:
     self.freq = Freq([1, 2, 3, 4, 4])