Ejemplo n.º 1
0
 def setup_method(self, test_method):
     self.m = Model('1')
     sentences = [
         Sentence(['<s>', 'a', 'b', '</s>']),
         Sentence(['<s>', 'a', 'b', '</s>'])
     ]
     self.m.train(sentences)
Ejemplo n.º 2
0
class TestSentenceClass:
    def setup_method(self, m):
        self.s = Sentence(['<s>', 'a', 'b', ',', 'b', 'b', 'a', 'c', '</s>'])

    def teardown_method(self, m):
        Sentence.word_counts = dict()
        self.s = None

    def test_check_for_equality_with_other_sentence(self):
        t = Sentence(['<s>', 'a', 'b', ',', 'b', 'b', 'a', 'c', '</s>'])
        assert self.s == t

        t = Sentence(['<s>', 'a', 'b', ',', 'c', '</s>'])
        assert self.s != t

    def test_check_for_equality_with_list(self):
        t = ['<s>', 'a', 'b', ',', 'b', 'b', 'a', 'c', '</s>']
        assert self.s == t

    def test_check_for_equality_with_string(self):
        assert self.s == 'a b, b b a c.'

    def test_representation_as_string(self):
        assert str(self.s) == 'a b, b b a c.'

    def test_error_when_replacing_without_training(self):
        with pytest.raises(RuntimeError):
            self.s.replace_words()
class TestSentenceClass:

    def setup_method(self, m):
        self.s = Sentence(['<s>', 'a', 'b', ',', 'b', 'b', 'a', 'c', '</s>'])

    def teardown_method(self, m):
        Sentence.word_counts = dict()
        self.s = None

    def test_check_for_equality_with_other_sentence(self):
        t = Sentence(['<s>', 'a', 'b', ',', 'b', 'b', 'a', 'c', '</s>'])
        assert self.s == t

        t = Sentence(['<s>', 'a', 'b', ',', 'c', '</s>'])
        assert self.s != t

    def test_check_for_equality_with_list(self):
        t = ['<s>', 'a', 'b', ',', 'b', 'b', 'a', 'c', '</s>']
        assert self.s == t

    def test_check_for_equality_with_string(self):
        assert self.s == 'a b, b b a c.'

    def test_representation_as_string(self):
        assert str(self.s) == 'a b, b b a c.'

    def test_error_when_replacing_without_training(self):
        with pytest.raises(RuntimeError):
            self.s.replace_words()
def make_sentences(text, full_tokenizer=False):
    """
    Make sentences from input text

    Determines whether the input text has been processed already or needs to
    be processed.

    If the text needs to be processed, it passes the text off to the tokenizer.

    If the text is preprocessed, then it can simply split by lines, and then
    by spaces between the words.

    Arguments:
        text (str): the text to process

    Returns:
        (list(Sentence)): the processed Sentences
    """
    if text.startswith('<s>'):
        tokenized_sentences = split_preprocessed_text(text)
    else:
        if full_tokenizer:
            tokenized_sentences = tokenize_real_text(text)
        else:
            tokenized_sentences = tokenize_text(text)
    tokenized_sentences = filter(lambda a: len(a) != 0, tokenized_sentences)
    return list(map(lambda s: Sentence(s), tokenized_sentences))
Ejemplo n.º 5
0
 def setup_method(self, m):
     self.s = Sentence(['<s>', 'a', 'b', ',', 'b', 'b', 'a', 'c', '</s>'])
Ejemplo n.º 6
0
 def test_replacing_unseen_word_in_test_data(self):
     r = Sentence(['<s>', 'a', 'j', '</s>'])
     assert r == 'a <unk>.'
Ejemplo n.º 7
0
 def test_one_training_method(self):
     r = Sentence(['<s>', 'a', 'b', '</s>'])
     assert r == 'a <unk>.'
Ejemplo n.º 8
0
 def teardown_method(self, m):
     self.test_sentence = None
     Sentence._reset()
Ejemplo n.º 9
0
 def setup_method(self, m):
     Sentence._reset()
     self.test_sentence = Sentence(['<s>', 'a', 'a', 'b', '</s>'])
     Sentence.replaced_words = True
Ejemplo n.º 10
0
    def test_check_for_equality_with_other_sentence(self):
        t = Sentence(['<s>', 'a', 'b', ',', 'b', 'b', 'a', 'c', '</s>'])
        assert self.s == t

        t = Sentence(['<s>', 'a', 'b', ',', 'c', '</s>'])
        assert self.s != t
 def setup_method(self, m):
     self.s = Sentence(['<s>', 'a', 'b', ',', 'b', 'b', 'a', 'c', '</s>'])
 def teardown_method(self, m):
     self.test_sentence = None
     Sentence._reset()
 def setup_method(self, m):
     Sentence._reset()
     self.test_sentence = Sentence(['<s>', 'a', 'a', 'b', '</s>'])
     Sentence.replaced_words = True