def test_count_fail(self):

        vocabulary = Vocabulary()

        for sentence in TOKENIZED_CORPUS:
            vocabulary.add_tokenized_text(sentence)

        with self.assertRaises(ValueError):
            vocabulary.truncate_by_min_freq(2)
Ejemplo n.º 2
0
    def test_min_freq(self):

        vocabulary = Vocabulary()

        for sentence in TOKENIZED_CORPUS:
            vocabulary.add_tokenized_text(sentence)

        vocabulary.truncate_by_min_freq(2)

        self.assertTrue("walrus" in vocabulary)
        self.assertFalse("colorless" in vocabulary)
Ejemplo n.º 3
0
from neuralmonkey.vocabulary import Vocabulary

CORPUS = [
    "the colorless ideas slept furiously",
    "pooh slept all night",
    "working class hero is something to be",
    "I am the working class walrus",
    "walrus for president"
]

TOKENIZED_CORPUS = [s.split(" ") for s in CORPUS]

VOCABULARY = Vocabulary()

for s in TOKENIZED_CORPUS:
    VOCABULARY.add_tokenized_text(s)

class TestVacabulary(unittest.TestCase):
    def test_all_words_in(self):
        for sentence in TOKENIZED_CORPUS:
            for word in sentence:
                self.assertTrue(word in VOCABULARY)

    def test_unknown_word(self):
        self.assertFalse("jindrisek" in VOCABULARY)

    def test_padding(self):
        pass

    def test_weights(self):
        pass
Ejemplo n.º 4
0
import unittest

from neuralmonkey.vocabulary import Vocabulary

CORPUS = [
    "the colorless ideas slept furiously", "pooh slept all night",
    "working class hero is something to be", "I am the working class walrus",
    "walrus for president"
]

TOKENIZED_CORPUS = [s.split(" ") for s in CORPUS]

VOCABULARY = Vocabulary()

for s in TOKENIZED_CORPUS:
    VOCABULARY.add_tokenized_text(s)


class TestVocabulary(unittest.TestCase):
    def test_all_words_in(self):
        for sentence in TOKENIZED_CORPUS:
            for word in sentence:
                self.assertTrue(word in VOCABULARY)

    def test_unknown_word(self):
        self.assertFalse("jindrisek" in VOCABULARY)

    def test_padding(self):
        pass

    def test_weights(self):