def _prepare_data(self, data, save_preprocessed=False): words = [] phrases = [] for phrase in data: prep_phrase = self.preprocess(phrase) if save_preprocessed: self._data.append(prep_phrase) else: self._data.append(phrase) phrases.append(prep_phrase) words += prep_phrase.split() if isinstance(prep_phrase, str) else [prep_phrase] self._words_freq = Freq(words) self.__recovery_word_freq = self._words_freq.copy() self._phrases_freq = Freq(phrases) self.__recovery_phrase_freq = self._phrases_freq.copy()
def test_sanity(self): self.assertEqual(preprocess.separate('how are you?', sep='?'), 'how are you ?') self.assertEqual( preprocess.separate('how are you,man?', sep=('?', ','), between_char=True), 'how are you , man ?') self.assertEqual(preprocess.separate('how are! you?'), 'how are ! you ?') freq = Freq([1, 1, 2, 2, 3, 3, 4, 5, 6, 7, 8, 'hi', 'o', 'a']) self.assertEqual(freq.sample(max_freq=1), { 4: 1, 5: 1, 6: 1, 7: 1, 8: 1, 'hi': 1, 'o': 1, 'a': 1 }) self.assertEqual(freq.sample(freq=2), {1: 2, 3: 2, 2: 2}) self.assertRaises(AssertionError, freq.sample, freq=1, max_freq=2) self.assertRaises(AssertionError, freq.sample, freq=1, min_freq=2) freq = Freq([1, 2, 3, 3, 4, 5, 6, 7, 6, 7, 12, 31, 123, 5, 3]) self.assertEqual(freq.least_freq(), { 123: 1, 31: 1, 12: 1, 4: 1, 2: 1, 1: 1, 7: 2, 6: 2, 5: 2, 3: 3 })
def setUp(self) -> None: self.freq = Freq([1, 2, 3, 4, 4])