Beispiel #1
0
    def test_tag(self):
        processed = tag(u'install')
        self.assertEqual([[u'install', u'NN']], processed)

        processed = tag(u'1- بسمك اللهم، :)')

        self.assertEqual(processed[0], [u'1', u'CD'])
        self.assertEqual(processed[1], [u'-', u'PUNC'])
        self.assertEqual(processed[2], [
            (Prefix(u'', classe=u'pC1'), u'بسمك', Suffix(u'', classe=u'sC1')),
            (Prefix(u'', classe=u'pC1'), u'بسم',
             Suffix(u'ك', classe=u'sC4', desc="PRP|OBJP")),
            (Prefix(u'ب', classe=u'pN25',
                    desc="IN"), u'سمك', Suffix(u'', classe=u'sC1')),
            (Prefix(u'ب', classe=u'pN25', desc="IN"), u'سم',
             Suffix(u'ك', classe=u'sC4', desc="PRP|OBJP")),
        ])
        self.assertEqual(processed[3], [
            (Prefix(u'', classe=u'pC1',
                    desc=u''), u'اللهم', Suffix(u'', classe=u'sC1', desc=u'')),
            (Prefix(u'', classe=u'pC1', desc=u''), u'الل',
             Suffix(u'هم', classe=u'sC13', desc=u"PRP|OBJP")),
            (Prefix(u'ال', classe=u'pN1',
                    desc=u"DT"), u'لهم', Suffix(u'', classe=u'sC1', desc=u'')),
        ])
        self.assertEqual(processed[4], [u'،', u'PUNC'])
        self.assertEqual(processed[5], [u':)', u'EMO'])
Beispiel #2
0
    def test_tag(self):
        processed = tag(u'install')
        self.assertEqual([[u'install', u'NN']], processed)

        processed = tag(u'1- بسمك اللهم، :)')

        self.assertEqual(processed[0], [u'1', u'CD'])
        self.assertEqual(processed[1], [u'-', u'PUNC'])
        self.assertEqual(processed[2], [
            (Prefix(u'', classe=u'pC1'), u'بسمك', Suffix(u'', classe=u'sC1')),
            (Prefix(u'', classe=u'pC1'), u'بسم', Suffix(u'ك',classe=u'sC4', desc="PRP|OBJP")),
            (Prefix(u'ب',classe=u'pN25', desc="IN"), u'سمك', Suffix(u'', classe=u'sC1')),
            (Prefix(u'ب',classe=u'pN25', desc="IN"), u'سم', Suffix(u'ك',classe=u'sC4', desc="PRP|OBJP")),
         ])
        self.assertEqual(processed[3], [
            (Prefix(u'', classe=u'pC1', desc=u''), u'اللهم', Suffix(u'', classe=u'sC1', desc=u'')),
            (Prefix(u'', classe=u'pC1', desc=u''), u'الل', Suffix(u'هم',classe= u'sC13', desc=u"PRP|OBJP")),
            (Prefix(u'ال', classe=u'pN1', desc=u"DT"), u'لهم', Suffix(u'', classe=u'sC1', desc=u'')),
        ])
        self.assertEqual(processed[4], [u'،', u'PUNC'])
        self.assertEqual(processed[5], [u':)', u'EMO'])
Beispiel #3
0
 def test_cvs_file(self):
     for raw, expected in self.test_cases:
         sol = tag(raw)
         synt = iter(expected.split())
         for item in sol:
             if isinstance(item[0], tuple):
                 word = synt.next()
                 prefix = u""
                 suffix = u""
                 matched = False
                 for ((ex_prefix, ex_prefix_type),
                      ex_word,
                      (ex_suffix, ex_suffix_type)) in item:
                     if word == ex_prefix:
                         if not prefix:
                             prefix = word
                         word = synt.next()
                         if word == ex_word:
                             if ex_suffix:
                                 if not suffix:
                                     suffix = synt.next()
                                 if suffix == ex_suffix:
                                     matched = True
                                     break
                     if word == ex_word:
                         if ex_suffix:
                             if not suffix:
                                 suffix = synt.next()
                             if suffix == ex_suffix:
                                 matched = True
                                 break
                         else:
                             matched = True
                             break
                 if word[-1] in [u'ى', u'ة']:
                     for ((ex_prefix, ex_prefix_type),
                          ex_word,
                          (ex_suffix, ex_suffix_type)) in item:
                         if ex_suffix:
                             synt.next()
                             break
                     continue
                 if not matched:
                     print "Bad match: "
                     print item[0][1], "<=>", word
                     print 80 * "="
             else:
                 if synt.next() != item[0]:
                     print "Bad match: "
                     print synt.next(), "<=>", item[0]
                     print 80 * "="
Beispiel #4
0
 def test_cvs_file(self):
     for raw, expected in self.test_cases:
         sol = tag(raw)
         synt = iter(expected.split())
         for item in sol:
             if isinstance(item[0], tuple):
                 word = synt.next()
                 prefix = u""
                 suffix = u""
                 matched = False
                 for ((ex_prefix, ex_prefix_type), ex_word,
                      (ex_suffix, ex_suffix_type)) in item:
                     if word == ex_prefix:
                         if not prefix:
                             prefix = word
                         word = synt.next()
                         if word == ex_word:
                             if ex_suffix:
                                 if not suffix:
                                     suffix = synt.next()
                                 if suffix == ex_suffix:
                                     matched = True
                                     break
                     if word == ex_word:
                         if ex_suffix:
                             if not suffix:
                                 suffix = synt.next()
                             if suffix == ex_suffix:
                                 matched = True
                                 break
                         else:
                             matched = True
                             break
                 if word[-1] in [u'ى', u'ة']:
                     for ((ex_prefix, ex_prefix_type), ex_word,
                          (ex_suffix, ex_suffix_type)) in item:
                         if ex_suffix:
                             synt.next()
                             break
                     continue
                 if not matched:
                     print "Bad match: "
                     print item[0][1], "<=>", word
                     print 80 * "="
             else:
                 if synt.next() != item[0]:
                     print "Bad match: "
                     print synt.next(), "<=>", item[0]
                     print 80 * "="