Esempio n. 1
0
 def test_split_to_words_russian(self):
     text = QUOTE_FROM_TOLSTOY
     expected = [
         u'Не', u'слушайте', u'тех', u'кто', u'говорит', u'дурно', u'о',
         u'других', u'и', u'хорошо', u'о', u'вас'
     ]
     result = textstatistics.split_to_words(text)
     self.assertEqual(result, expected)
Esempio n. 2
0
    def test_evalutate_decoding_subset_eng(self):
        base_text = 'This is a sample text.'
        base_text_words = textstatistics.split_to_words(base_text)
        decoded_text = ' '.join(base_text_words[:len(base_text_words) // 2])

        alphabet = textstatistics.get_char_frequencies(base_text)
        dictionary = textstatistics.get_word_frequencies(base_text)
        language = textstatistics.Languauge(alphabet, dictionary)

        result = decode.evalutate_decoding(decoded_text, language)
        self.assertEqual(result, 1.0)
Esempio n. 3
0
    def test_evalutate_decoding_subset_eng(self):
        base_text = 'This is a sample text.'
        base_text_words = textstatistics.split_to_words(base_text)
        decoded_text = ' '.join(base_text_words[: len(base_text_words) / 2])

        alphabet = textstatistics.get_char_frequencies(base_text)
        dictionary = textstatistics.get_word_frequencies(base_text)
        language = textstatistics.Languauge(alphabet, dictionary)

        result = decode.evalutate_decoding(decoded_text, language)
        self.assertEqual(result, 1.0)
Esempio n. 4
0
def evalutate_decoding(text, language):
    '''
    Evaluates how the decoded text corresponds to the language.
    Returns estimated fitness as a float value from the range [0; 1], where
    0 means doesn't correspond at all,
    1 means all words are correct.
    '''
    fitness_sum = 0.0
    words = textstatistics.split_to_words(text)
    for word in words:
        fitness_sum += language.word_fitness(word)
    return fitness_sum / len(words)
Esempio n. 5
0
def evalutate_decoding(text, language):
    '''
    Evaluates how the decoded text corresponds to the language.
    Returns estimated fitness as a float value from the range [0; 1], where
    0 means doesn't correspond at all,
    1 means all words are correct.
    '''
    fitness_sum = 0.0
    words = textstatistics.split_to_words(text)
    for word in words:
        fitness_sum += language.word_fitness(word)
    return fitness_sum / len(words)
Esempio n. 6
0
 def test_split_to_words_russian(self):
     text = QUOTE_FROM_TOLSTOY
     expected = [
                 u'Не',
                 u'слушайте',
                 u'тех',
                 u'кто',
                 u'говорит',
                 u'дурно',
                 u'о',
                 u'других',
                 u'и',
                 u'хорошо',
                 u'о',
                 u'вас'
                 ]
     result = textstatistics.split_to_words(text)
     self.assertEqual(result, expected)
Esempio n. 7
0
 def test_split_to_words_english(self):
     text = QUOTE_FROM_SHAKESPEARE
     expected = ['To', 'be', 'or', 'not', 'to', 'be']
     result = textstatistics.split_to_words(text)
     self.assertEqual(result, expected)
Esempio n. 8
0
 def test_split_to_words_english(self):
     text = QUOTE_FROM_SHAKESPEARE
     expected = ['To', 'be', 'or', 'not', 'to', 'be']
     result = textstatistics.split_to_words(text)
     self.assertEqual(result, expected)