예제 #1
0
    def test_sent_prob_3and4gram(self):
        ngram = NGram(3, self.sents3)
        ngram2 = NGram(4, self.sents3)

        sents = {
            'el gato come pescado y ronca .': 0.0,  # 'ronca' unseen
            'la la la': 0.0,  # 'la' after 'la' unseen
            # la probabilidad se da por el principio,
            # si empieza con 'la' o 'el'
            'el gato come pescado y duerme . ': 0.5,
            'la gata come pescado y duerme . ': 0.5
        }

        for sent, prob in sents.items():
            self.assertAlmostEqual(ngram.sent_prob(sent.split()),
                                   prob, msg=sent)
            self.assertAlmostEqual(ngram2.sent_prob(sent.split()),
                                   prob, msg=sent)
예제 #2
0
    def test_sent_prob_1gram(self):
        ngram = NGram(1, self.sents)

        sents = {
            # 'come', '.' and '</s>' have prob 1/6, the rest have 1/12.
            'el gato come pescado .': (1 / 6.0)**3 * (1 / 12.0)**3,
            'la gata come salmón .': (1 / 6.0)**3 * (1 / 12.0)**3,
            'el gato come salame .': 0.0,  # 'salame' unseen
            'la la la': (1 / 6.0)**1 * (1 / 12.0)**3,
        }
        for sent, prob in sents.items():
            self.assertAlmostEqual(ngram.sent_prob(sent.split()), prob, msg=sent)
예제 #3
0
    def test_sent_prob_1gram(self):
        ngram = NGram(1, self.sents)

        sents = {
            # 'come', '.' and '</s>' have prob 1/6, the rest have 1/12.
            'el gato come pescado .': (1 / 6.0)**3 * (1 / 12.0)**3,
            'la gata come salmón .': (1 / 6.0)**3 * (1 / 12.0)**3,
            'el gato come salame .': 0.0,  # 'salame' unseen
            'la la la': (1 / 6.0)**1 * (1 / 12.0)**3,
        }
        for sent, prob in sents.items():
            self.assertAlmostEqual(ngram.sent_prob(sent.split()), prob, msg=sent)
예제 #4
0
    def test_sent_prob_3gram(self):
        ngram = NGram(3, self.sents)

        sents = {
            # after '<s>, <s>': 'el' and 'la' have prob 0.5.
            'el gato come pescado .': 0.5 * 1 * 1 * 1 * 1,
            'la gata come salmón .': 0.5 * 1 * 1 * 1 * 1,
            'el gato come salmón .': 0.5 * 1 * 1 * 0 * 1 * 1,  # prob('gato come salmon') = 0 
            'el gato come salame .': 0.0,  # 'salame' unseen
            'la la la': 0.0,  # 'la' after 'la' unseen
        }
        for sent, prob in sents.items():
            self.assertAlmostEqual(ngram.sent_prob(sent.split()), prob, msg=sent)
예제 #5
0
    def test_sent_prob_2gram(self):
        ngram = NGram(2, self.sents)

        sents = {
            # after '<s>': 'el' and 'la' have prob 0.5.
            # after 'come': 'pescado' and 'salmón' have prob 0.5.
            'el gato come pescado .': 0.5 * 0.5,
            'la gata come salmón .': 0.5 * 0.5,
            'el gato come salmón .': 0.5 * 0.5,
            'la gata come pescado .': 0.5 * 0.5,
            'el gato come salame .': 0.0,  # 'salame' unseen
            'la la la': 0.0,  # 'la' after 'la' unseen
        }
        for sent, prob in sents.items():
            self.assertAlmostEqual(ngram.sent_prob(sent.split()), prob, msg=sent)
예제 #6
0
    def test_sent_prob_2gram(self):
        ngram = NGram(2, self.sents)

        sents = {
            # after '<s>': 'el' and 'la' have prob 0.5.
            # after 'come': 'pescado' and 'salmón' have prob 0.5.
            'el gato come pescado .': 0.5 * 0.5,
            'la gata come salmón .': 0.5 * 0.5,
            'el gato come salmón .': 0.5 * 0.5,
            'la gata come pescado .': 0.5 * 0.5,
            'el gato come salame .': 0.0,  # 'salame' unseen
            'la la la': 0.0,  # 'la' after 'la' unseen
        }
        for sent, prob in sents.items():
            self.assertAlmostEqual(ngram.sent_prob(sent.split()), prob, msg=sent)
예제 #7
0
    def test_sent_prob_3gram(self):
        ngram = NGram(3, self.sents)

        sents = {
            # after '<s>, <s>': 'el' and 'la' have prob 0.5.
            'el gato come pescado .': 0.5 * 1 * 1 * 1 * 1,
            'la gata come salmón .': 0.5 * 1 * 1 * 1 * 1,
            'el gato come salmón .':
            0.5 * 1 * 1 * 0 * 1 * 1,  # prob('gato come salmon') = 0 
            'el gato come salame .': 0.0,  # 'salame' unseen
            'la la la': 0.0,  # 'la' after 'la' unseen
        }
        for sent, prob in sents.items():
            self.assertAlmostEqual(ngram.sent_prob(sent.split()),
                                   prob,
                                   msg=sent)