예제 #1
0
                                    | select(lambda nums: [words[num] for num in nums]) \
                                    | select(lambda words: ''.join(words)) \
                                    | as_list
                    short_phrase = ''.join(short_matches)
                    first_index = matches | select(extract_nums) | chain | min
                    last_index = matches | select(extract_nums) | chain | max
                    complete_phrase = ''.join(words[first_index:last_index +
                                                    1])
                    index = cur_index + (
                        [len(words[i]) for i in range(first_index)] | add)
                    yield (short_phrase, complete_phrase, index)

            cur_index += len(line) + 1


if __name__ == '__main__':
    from pynlpini import PosTagger
    from pynlpini import SegTagger

    with open("../../data/app/travel_comments/mafengwo_comments_raw.txt"
              ) as comment_file:
        index = 0
        ie = ImpressionExtractor(PosTagger(SegTagger()))
        for line in comment_file:
            line = line.decode("utf-8")
        for item in ie.extract(line):
            print item
        index += 1
        if index > 10:
            exit()
예제 #2
0
 def setUp(self):
     self.extractor = ImpressionExtractor(PosTagger(SegTagger()))
예제 #3
0
파일: test_pos.py 프로젝트: 52nlp/pynlpini
 def setUp(self):
     base_dir = os.path.dirname(__file__)
     self.pos_tagger = PosTagger(
         SegTagger(), os.path.join(base_dir, "./model/pos.crf.model"))