Esempio n. 1
0
class PosTaggerTestCase(unittest.TestCase):
    def setUp(self):
        base_dir = os.path.dirname(__file__)
        self.pos_tagger = PosTagger(SegTagger(), os.path.join(base_dir, "./model/pos.crf.model"))

    def tearDown(self):
        pass

    def testPosTagger(self):
        res = self.pos_tagger.pos_as_iter(u"据以色列电台昨天报道")
        self.assertEquals(list(res), [(u"据", "p"), (u"以色列", "nr"), (u"电台", "nn"), (u"昨天", "nt"), (u"报道", "vv")])

    def testPosTaggerWithMultipleLines(self):
        res = self.pos_tagger.pos_as_iter(u"据以色列电台昨天报道\n\n 巴勒斯坦和以色列")
        self.assertEquals(list(res),
                          [(u"据", "p"), (u"以色列", "nr"), (u"电台", "nn"), (u"昨天", "nt"), (u"报道", "vv"), ("\n", "pu"),
                           ("\n", "pu"), (" ", "pu"), (u"巴勒斯坦", "nr"), (u"和", "cc"), (u"以色列", "nr")])
Esempio n. 2
0
class PosTaggerTestCase(unittest.TestCase):
    def setUp(self):
        base_dir = os.path.dirname(__file__)
        self.pos_tagger = PosTagger(
            SegTagger(), os.path.join(base_dir, "./model/pos.crf.model"))

    def tearDown(self):
        pass

    def testPosTagger(self):
        res = self.pos_tagger.pos_as_iter(u"据以色列电台昨天报道")
        self.assertEquals(list(res), [(u"据", "p"), (u"以色列", "nr"),
                                      (u"电台", "nn"), (u"昨天", "nt"),
                                      (u"报道", "vv")])

    def testPosTaggerWithMultipleLines(self):
        res = self.pos_tagger.pos_as_iter(u"据以色列电台昨天报道\n\n 巴勒斯坦和以色列")
        self.assertEquals(list(res),
                          [(u"据", "p"), (u"以色列", "nr"), (u"电台", "nn"),
                           (u"昨天", "nt"), (u"报道", "vv"), ("\n", "pu"),
                           ("\n", "pu"), (" ", "pu"), (u"巴勒斯坦", "nr"),
                           (u"和", "cc"), (u"以色列", "nr")])
Esempio n. 3
0
 def setUp(self):
     base_dir = os.path.dirname(__file__)
     self.pos_tagger = PosTagger(SegTagger(), os.path.join(base_dir, "./model/pos.crf.model"))
Esempio n. 4
0
 def setUp(self):
     self.extractor = ImpressionExtractor(PosTagger(SegTagger()))
Esempio n. 5
0
                                    | select(lambda nums: [words[num] for num in nums]) \
                                    | select(lambda words: ''.join(words)) \
                                    | as_list
                    short_phrase = ''.join(short_matches)
                    first_index = matches | select(extract_nums) | chain | min
                    last_index = matches | select(extract_nums) | chain | max
                    complete_phrase = ''.join(words[first_index:last_index +
                                                    1])
                    index = cur_index + (
                        [len(words[i]) for i in range(first_index)] | add)
                    yield (short_phrase, complete_phrase, index)

            cur_index += len(line) + 1


if __name__ == '__main__':
    from pynlpini import PosTagger
    from pynlpini import SegTagger

    with open("../../data/app/travel_comments/mafengwo_comments_raw.txt"
              ) as comment_file:
        index = 0
        ie = ImpressionExtractor(PosTagger(SegTagger()))
        for line in comment_file:
            line = line.decode("utf-8")
        for item in ie.extract(line):
            print item
        index += 1
        if index > 10:
            exit()
Esempio n. 6
0
def pos(txt):
    global pos_tagger
    if pos_tagger is None:
        pos_tagger = PosTagger(seg_tagger)
    return json.dumps(pos_tagger.pos_as_iter(txt) | as_list, ensure_ascii=False)
Esempio n. 7
0
 def setUp(self):
     base_dir = os.path.dirname(__file__)
     self.pos_tagger = PosTagger(
         SegTagger(), os.path.join(base_dir, "./model/pos.crf.model"))