Esempio n. 1
0
    def test_dictionary_matcher(self):
        """ Basic tests for dictionary matcher """
        D1 = ddlite_matcher.DictionaryMatch(dictionary=self.d1,
                                            label='Boat',
                                            match_attrib='lemmas')
        matches_1 = [[i for i, _ in D1.apply(s)] for s in self.sents]
        for i in range(len(matches_1)):
            if i == 1:
                self.assertEqual(matches_1[i], [[4], [9], [21], [24]])
            else:
                self.assertEqual(len(matches_1[i]), 0)

        D2 = ddlite_matcher.DictionaryMatch(dictionary=self.d2,
                                            label='PPL',
                                            match_attrib='lemmas')
        matches_2 = [[i for i, _ in D2.apply(s)] for s in self.sents]
        self.assertEqual(sorted(matches_2[64]), sorted([[3, 4, 5], [0, 1]]))
        self.assertEqual(sorted(matches_2[1]), sorted([[30, 31], [6], [28]]))
Esempio n. 2
0
    def test_composition(self):
        s = self.sents[1]

        D_bt = ddlite_matcher.DictionaryMatch(dictionary=self.d1,
                                              label='Boat',
                                              match_attrib='lemmas')
        CE = ddlite_matcher.RegexNgramMatch(D_bt,
                                            label='root',
                                            regex_pattern=r'ROO+',
                                            match_attrib='dep_labels',
                                            ignore_case=False)
        self.assertEqual(list(CE.apply(s)), [([4], 'root : Boat')])
Esempio n. 3
0
    def test_union(self):
        """ Test union operator """
        s = self.sents[1]

        D_ppl = ddlite_matcher.DictionaryMatch(dictionary=self.d2,
                                               label='PPL',
                                               match_attrib='lemmas')
        R_adj = ddlite_matcher.RegexNgramMatch(label='adj_nn',
                                               regex_pattern=r'jj[0-9]nn+',
                                               match_attrib='poses',
                                               ignore_case=True,
                                               sep='8')
        U = ddlite_matcher.Union(D_ppl, R_adj)
        self.assertEqual(
            sorted(list(U.apply(s))),
            sorted([([30, 31], 'PPL'), ([6], 'PPL'), ([28], 'PPL'),
                    ([8, 9], 'adj_nn')]))