コード例 #1
0
    def test_eq(self):
        h1 = History('el gato come pescado .'.split(), ('<s>', '<s>'), 0)
        h2 = History('el gato come pescado .'.split(), ('<s>', '<s>'), 0)

        self.assertEqual(h1, h2)

        h3 = History('la gata come salmón .'.split(), ('<s>', '<s>'), 0)

        self.assertNotEqual(h1, h3)
コード例 #2
0
    def tag(self, sent):
        """Tag a sentence.

        sent -- the sentence.
        """
        prev_tags = ('<s>', ) * (self.n - 1)
        tags = [self.tag_history(History(sent, prev_tags, 0))]
        for i in range(1, len(sent)):
            prev_tags = (prev_tags + (tags[i - 1], ))[1:]
            tags.append(self.tag_history(History(sent, prev_tags, i)))

        return tags
コード例 #3
0
    def test_sent_histories_1gram(self):
        model = MEMM(1, self.tagged_sents)

        hs = list(model.sent_histories(self.tagged_sents[0]))

        sent = 'el gato come pescado .'.split()
        hs2 = [
            History(sent, (), 0),
            History(sent, (), 1),
            History(sent, (), 2),
            History(sent, (), 3),
            History(sent, (), 4),
        ]
        self.assertEqual(hs, hs2)
コード例 #4
0
    def test_sent_histories_3gram(self):
        model = MEMM(3, self.tagged_sents)

        hs = list(model.sent_histories(self.tagged_sents[0]))

        sent = 'el gato come pescado .'.split()
        hs2 = [
            History(sent, ('<s>', '<s>'), 0),
            History(sent, ('<s>', 'D'), 1),
            History(sent, ('D', 'N'), 2),
            History(sent, ('N', 'V'), 3),
            History(sent, ('V', 'N'), 4),
        ]
        self.assertEqual(hs, hs2)
コード例 #5
0
ファイル: memm.py プロジェクト: famaf/PLN_2017
    def sent_histories(self, tagged_sent):
        """
        Iterator over the histories of a tagged sentence.

        tagged_sent -- the tagged sentence (a list of pairs (word, tag)).
        """
        # Recordar:
        # History = namedtuple('History', 'sent prev_tags i')
        # sent -- the whole sentence.
        # prev_tags -- a tuple with the n previous tags.
        # i -- the position to be tagged.
        n = self.n
        words = [word for word, tag in tagged_sent]  # W[1:n] = sent
        tags = [tag for word, tag in tagged_sent]  # lista de tags

        tags = ["<s>"
                ] * (n - 1) + tags  # Lista de tags para los casos de borde

        m = len(words)  # Largo de la lista

        my_histories = []
        for i in range(m):
            prev_tags = tuple(tags[i:i + n -
                                   1])  # n tags previos a la posicion i
            # print(i, prev_tags)
            my_histories += [History(words, prev_tags, i)]

        return my_histories
コード例 #6
0
ファイル: memm.py プロジェクト: jonathanmutal/PLN-2017
    def tag(self, sent):
        """Tag a sentence.

        sent -- the sentence.
        """
        n = self.n

        tags = []
        prev_tags = ['<s>'] * (n - 1)
        tags.append(self.tag_history(History(sent, prev_tags, 0)))
        for index in range(1, len(sent)):
            prev_tags = (prev_tags + [tags[index - 1]])[1:]
            h = History(sent, prev_tags, index)
            tags.append(self.tag_history(h))

        return tags
コード例 #7
0
 def test_prev_tags(self):
     sent0 = 'El gato come pescado .'.split()
     sent1 = 'La gata come salmón .'.split()
     feature_values = [
         (History(sent0, ('<s>', '<s>'), 0), ('<s>', '<s>')),
         (History(sent0, ('<s>', 'D'), 1), ('<s>', 'D')),
         (History(sent0, ('D', 'N'), 2), ('D', 'N')),
         (History(sent0, ('N', 'V'), 3), ('N', 'V')),
         (History(sent0, ('V', 'N'), 4), ('V', 'N')),
         (History(sent1, ('<s>', '<s>'), 0), ('<s>', '<s>')),
         (History(sent1, ('<s>', 'D'), 1), ('<s>', 'D')),
         (History(sent1, ('D', 'N'), 2), ('D', 'N')),
         (History(sent1, ('N', 'V'), 3), ('N', 'V')),
         (History(sent1, ('V', 'N'), 4), ('V', 'N')),
     ]
     for h, v in feature_values:
         self.assertEqual(prev_tags(h), v)
コード例 #8
0
 def test_word_isupper(self):
     sent0 = 'EL gato come pescado .'.split()
     sent1 = 'La gata come SALMÓN .'.split()
     feature_values = [
         (History(sent0, ('<s>', '<s>'), 0), True),
         (History(sent0, ('<s>', 'D'), 1), False),
         (History(sent0, ('D', 'N'), 2), False),
         (History(sent0, ('N', 'V'), 3), False),
         (History(sent0, ('V', 'N'), 4), False),
         (History(sent1, ('<s>', '<s>'), 0), False),
         (History(sent1, ('<s>', 'D'), 1), False),
         (History(sent1, ('D', 'N'), 2), False),
         (History(sent1, ('N', 'V'), 3), True),
         (History(sent1, ('V', 'N'), 4), False),
     ]
     for h, v in feature_values:
         self.assertEqual(word_isupper(h), v, h)
コード例 #9
0
 def test_word_lower(self):
     sent0 = 'El gato come pescado .'.split()
     sent1 = 'La gata come salmón .'.split()
     feature_values = [
         (History(sent0, ('<s>', '<s>'), 0), 'el'),
         (History(sent0, ('<s>', 'D'), 1), 'gato'),
         (History(sent0, ('D', 'N'), 2), 'come'),
         (History(sent0, ('N', 'V'), 3), 'pescado'),
         (History(sent0, ('V', 'N'), 4), '.'),
         (History(sent1, ('<s>', '<s>'), 0), 'la'),
         (History(sent1, ('<s>', 'D'), 1), 'gata'),
         (History(sent1, ('D', 'N'), 2), 'come'),
         (History(sent1, ('N', 'V'), 3), 'salmón'),
         (History(sent1, ('V', 'N'), 4), '.'),
     ]
     for h, v in feature_values:
         self.assertEqual(word_lower(h), v)
コード例 #10
0
ファイル: memm.py プロジェクト: famaf/PLN_2017
    def tag(self, sent):
        """
        Tag a sentence.

        sent -- the sentence.
        """
        n = self.n
        m = len(sent)  # Largo de la oracion

        prev_tags = ("<s>", ) * (n - 1)
        history = History(sent, prev_tags, 0)
        my_tagging = [self.tag_history(history)]

        for i in range(1, m):
            prev_tags = (prev_tags + (my_tagging[i - 1], ))[1:]
            history = History(sent, prev_tags, i)
            my_tagging += [self.tag_history(history)]

        return my_tagging
コード例 #11
0
    def test_prev_word_istitle(self):
        prev_word_istitle = PrevWord(word_istitle)

        sent0 = 'EL gato come pescado .'.split()
        sent1 = 'La gata come SALMÓN .'.split()
        feature_values = [
            (History(sent0, ('<s>', '<s>'), 0), 'BOS'),
            (History(sent0, ('<s>', 'D'), 1), 'False'),
            (History(sent0, ('D', 'N'), 2), 'False'),
            (History(sent0, ('N', 'V'), 3), 'False'),
            (History(sent0, ('V', 'N'), 4), 'False'),
            (History(sent1, ('<s>', '<s>'), 0), 'BOS'),
            (History(sent1, ('<s>', 'D'), 1), 'True'),
            (History(sent1, ('D', 'N'), 2), 'False'),
            (History(sent1, ('N', 'V'), 3), 'False'),
            (History(sent1, ('V', 'N'), 4), 'False'),
        ]
        for h, v in feature_values:
            self.assertEqual(prev_word_istitle(h), v)
コード例 #12
0
    def test_prev_word_lower(self):
        prev_word_lower = PrevWord(word_lower)

        sent0 = 'El gato come pescado .'.split()
        sent1 = 'La gata come salmón .'.split()
        feature_values = [
            (History(sent0, ('<s>', '<s>'), 0), 'BOS'),  # beginning of sentence
            (History(sent0, ('<s>', 'D'), 1), 'el'),
            (History(sent0, ('D', 'N'), 2), 'gato'),
            (History(sent0, ('N', 'V'), 3), 'come'),
            (History(sent0, ('V', 'N'), 4), 'pescado'),
            (History(sent1, ('<s>', '<s>'), 0), 'BOS'),  # beginning of sentence
            (History(sent1, ('<s>', 'D'), 1), 'la'),
            (History(sent1, ('D', 'N'), 2), 'gata'),
            (History(sent1, ('N', 'V'), 3), 'come'),
            (History(sent1, ('V', 'N'), 4), 'salmón'),
        ]
        for h, v in feature_values:
            self.assertEqual(prev_word_lower(h), v)
コード例 #13
0
ファイル: memm.py プロジェクト: macfernandez/PLN-UBA2018
    def sent_histories(self, tagged_sent):
        """
        Iterator over the histories of a tagged sentence.

        tagged_sent -- the tagged sentence (a list of pairs (word, tag)).
        """
        prev_tags = ('<s>', ) * (self.n - 1)
        sent = [w for w, _ in tagged_sent]
        for i, (w, t) in enumerate(tagged_sent):
            yield History(sent, prev_tags, i)
            prev_tags = (prev_tags + (t, ))[1:]
コード例 #14
0
    def sent_histories(self, tagged_sent):
        """
        Iterator over the histories of a tagged sentence.

        tagged_sent -- the tagged sentence (a list of pairs (word, tag)).
        """

        n = self.n
        sent, tags = zip(*tagged_sent) if len(tagged_sent) != 0 else ((), ())
        sent = list(sent)
        tags = (START, ) * (n - 1) + tags
        return [History(sent, tags[i:i + n - 1], i) for i in range(len(sent))]
コード例 #15
0
    def tag(self, sent):
        """Tag a sentence using beam inference with beam of size 1.

        sent -- the sentence.
        """
        prev_tags = ('<s>',) * (self.n - 1)
        tags = []

        for i, _ in enumerate(sent):
            h = History(sent, prev_tags, i)
            tag = self.tag_history(h)
            tags += [tag]
            prev_tags = (prev_tags + (tag,))[1:]

        return tags
コード例 #16
0
    def tag(self, sent):
        """Tag a sentence.

        sent -- the sentence.
        """
        n = self.n
        prev_tags = (START, ) * (n - 1)
        tagging = []
        for i in range(len(sent)):
            h = History(sent=sent, prev_tags=prev_tags, i=i)
            tag = self.tag_history(h)
            prev_tags = (prev_tags + (tag, ))[1:]
            tagging.append(tag)

        return tagging
コード例 #17
0
    def sent_histories(self, tagged_sent):
        """
        Iterator over the histories of a tagged sentence.

        tagged_sent -- the tagged sentence (a list of pairs (word, tag)).
        """
        n = self.n
        result = []
        sent = []
        tags = ['<s>'] * (n - 1)
        for word, tag in tagged_sent:
            sent.append(word)
            tags.append(tag)
        for i in range(len(tagged_sent)):
            prev_tags = tuple(tags[i:i + n - 1])
            result.append(History(sent, prev_tags, i))
        return result
コード例 #18
0
ファイル: memm.py プロジェクト: jonathanmutal/PLN-2017
    def sent_histories(self, tagged_sent):
        """
        Iterator over the histories of a tagged sentence.

        tagged_sent -- the tagged sentence (a list of pairs (word, tag)).
        """
        n = self.n

        if not tagged_sent:
            return []
        words, tags = zip(*tagged_sent)
        tags = ('<s>', ) * (n - 1) + tags
        sent = list(words)

        return [
            History(sent, tags[index:index + n - 1], index)
            for index in range(len(words))
        ]
コード例 #19
0
 def test_word_isdigit(self):
     sent0 = 'El gato come 3 pescados .'.split()
     sent1 = 'Las 10 gatas c0m3n salmón .'.split()
     feature_values = [
         (History(sent0, ('<s>', '<s>'), 0), False),
         (History(sent0, ('<s>', 'D'), 1), False),
         (History(sent0, ('D', 'N'), 2), False),
         (History(sent0, ('N', 'V'), 3), True),
         (History(sent0, ('V', 'C'), 4), False),
         (History(sent0, ('C', 'N'), 5), False),
         (History(sent1, ('<s>', '<s>'), 0), False),
         (History(sent1, ('<s>', 'D'), 1), True),
         (History(sent1, ('D', 'C'), 2), False),
         (History(sent1, ('C', 'N'), 3), False),
         (History(sent1, ('N', 'V'), 4), False),
         (History(sent1, ('V', 'N'), 5), False),
     ]
     for h, v in feature_values:
         self.assertEqual(word_isdigit(h), v, h)