예제 #1
0
    def test_evalutate_decoding_complete_eng(self):
        decoded_text = base_text = 'This is a sample text.'

        alphabet = textstatistics.get_char_frequencies(base_text)
        dictionary = textstatistics.get_word_frequencies(base_text)
        language = textstatistics.Languauge(alphabet, dictionary)

        result = decode.evalutate_decoding(decoded_text, language)
        self.assertEqual(result, 1.0)
예제 #2
0
    def test_evalutate_decoding_complete_eng(self):
        decoded_text = base_text = 'This is a sample text.'

        alphabet = textstatistics.get_char_frequencies(base_text)
        dictionary = textstatistics.get_word_frequencies(base_text)
        language = textstatistics.Languauge(alphabet, dictionary)

        result = decode.evalutate_decoding(decoded_text, language)
        self.assertEqual(result, 1.0)
예제 #3
0
    def test_evalutate_decoding_almost_eng(self):
        base_text = 'This is a sample text.'
        decoded_text = 'Thas as i simple text.'

        alphabet = textstatistics.get_char_frequencies(base_text)
        dictionary = textstatistics.get_word_frequencies(base_text)
        language = textstatistics.Languauge(alphabet, dictionary)

        result = decode.evalutate_decoding(decoded_text, language)
        self.assertTrue(result < 1.0 and result > 0.5)
예제 #4
0
    def test_evalutate_decoding_almost_eng(self):
        base_text = 'This is a sample text.'
        decoded_text = 'Thas as i simple text.'

        alphabet = textstatistics.get_char_frequencies(base_text)
        dictionary = textstatistics.get_word_frequencies(base_text)
        language = textstatistics.Languauge(alphabet, dictionary)

        result = decode.evalutate_decoding(decoded_text, language)
        self.assertTrue(result < 1.0 and result > 0.5)
예제 #5
0
    def test_evalutate_decoding_subset_eng(self):
        base_text = 'This is a sample text.'
        base_text_words = textstatistics.split_to_words(base_text)
        decoded_text = ' '.join(base_text_words[:len(base_text_words) // 2])

        alphabet = textstatistics.get_char_frequencies(base_text)
        dictionary = textstatistics.get_word_frequencies(base_text)
        language = textstatistics.Languauge(alphabet, dictionary)

        result = decode.evalutate_decoding(decoded_text, language)
        self.assertEqual(result, 1.0)
예제 #6
0
    def test_evalutate_decoding_subset_eng(self):
        base_text = 'This is a sample text.'
        base_text_words = textstatistics.split_to_words(base_text)
        decoded_text = ' '.join(base_text_words[: len(base_text_words) / 2])

        alphabet = textstatistics.get_char_frequencies(base_text)
        dictionary = textstatistics.get_word_frequencies(base_text)
        language = textstatistics.Languauge(alphabet, dictionary)

        result = decode.evalutate_decoding(decoded_text, language)
        self.assertEqual(result, 1.0)
예제 #7
0
    def setUp(self):
        original_alphabet = list(u'абвгдеёжзийклмнопрстуфхцчшщъыьэюя')

        self.original_text = data.QUOTE_FROM_ILF_AND_PETROV
        alphabet = textstatistics.get_char_frequencies(self.original_text)
        alphabet = {char: frequency for (char, frequency) in 
                    alphabet.iteritems() if char in original_alphabet}
        dictionary = textstatistics.get_word_frequencies(self.original_text)
        self.language = textstatistics.Languauge(alphabet, dictionary)

        actual_original_alphabet = alphabet.keys()
        shuffled_alphabet = list(actual_original_alphabet)
        random.seed(1001)
        random.shuffle(shuffled_alphabet)
        self.code = dict(zip(actual_original_alphabet, shuffled_alphabet))
예제 #8
0
 def test_get_word_frequencies_russian(self):
     text = QUOTE_FROM_TOLSTOY
     expected = {
         u'Не': 1,
         u'слушайте': 1,
         u'тех': 1,
         u'кто': 1,
         u'говорит': 1,
         u'дурно': 1,
         u'о': 2,
         u'других': 1,
         u'и': 1,
         u'хорошо': 1,
         u'вас': 1
     }
     result = textstatistics.get_word_frequencies(text)
     self.assertDictEqual(result, expected)
예제 #9
0
 def test_get_word_frequencies_russian(self):
     text = QUOTE_FROM_TOLSTOY
     expected = {
                 u'Не': 1,
                 u'слушайте': 1,
                 u'тех': 1,
                 u'кто': 1,
                 u'говорит': 1,
                 u'дурно': 1,
                 u'о': 2,
                 u'других': 1,
                 u'и': 1,
                 u'хорошо': 1,
                 u'вас': 1
                 }
     result = textstatistics.get_word_frequencies(text)
     self.assertDictEqual(result, expected)
예제 #10
0
    def setUp(self):
        original_alphabet = list(u'абвгдеёжзийклмнопрстуфхцчшщъыьэюя')

        self.original_text = data.QUOTE_FROM_ILF_AND_PETROV
        alphabet = textstatistics.get_char_frequencies(self.original_text)
        alphabet = {
            char: frequency
            for (char, frequency) in alphabet.items()
            if char in original_alphabet
        }
        dictionary = textstatistics.get_word_frequencies(self.original_text)
        self.language = textstatistics.Languauge(alphabet, dictionary)

        actual_original_alphabet = alphabet.keys()
        shuffled_alphabet = list(actual_original_alphabet)
        random.seed(1001)
        random.shuffle(shuffled_alphabet)
        self.code = dict(zip(actual_original_alphabet, shuffled_alphabet))
예제 #11
0
 def test_get_word_frequencies_english(self):
     text = QUOTE_FROM_SHAKESPEARE
     expected = {'To': 1, 'be': 2, 'or': 1, 'not': 1, 'to': 1}
     result = textstatistics.get_word_frequencies(text)
     self.assertDictEqual(result, expected)
예제 #12
0
 def test_get_word_frequencies_empty(self):
     text = ''
     expected = {}
     result = textstatistics.get_word_frequencies(text)
     self.assertEqual(result, expected)
예제 #13
0
 def test_get_word_frequencies_english(self):
     text = QUOTE_FROM_SHAKESPEARE
     expected = {'To': 1, 'be': 2, 'or': 1, 'not': 1, 'to': 1}
     result = textstatistics.get_word_frequencies(text)
     self.assertDictEqual(result, expected)
예제 #14
0
 def test_get_word_frequencies_empty(self):
     text = ''
     expected = {}
     result = textstatistics.get_word_frequencies(text)
     self.assertEqual(result, expected)