Esempio n. 1
0
 def segment_merge_newword_single(self, text):
     results = [word for word in get_single_cns(text)]
     results += [
         word
         for word in seg.Segment(text, libsegment.SEG_MERGE_NEWWORD)
     ]
     return results
Esempio n. 2
0
 def segment_nodupe(self, text):
     results = [word for word in seg.Segment(text)]
     results += [
         wrod for word in seg.Segment(text, libsegment.SEG_NEWWORD)
     ]
     results += [
         wrod for word in seg.Segment(text, libsegment.SEG_BASIC)
     ]
     results += [word for word in get_single_cns(text)]
     return gezi.dedupe_list(results)
Esempio n. 3
0
 def segment_nodupe_noseq(self, text):
     results = set()
     for word in seg.Segment(text):
         results.add(word)
     for word in seg.Segment(text, libsegment.SEG_NEWWORD):
         results.add(word)
     for word in seg.Segment(text, libsegment.SEG_BASIC):
         results.add(word)
     for word in get_single_cns(text):
         results.add(word)
     return list(results)
Esempio n. 4
0
        def segment_seq_all(self, text):
            results = [word for word in get_single_cns(text)]

            results.append('<SEP0>')
            for word in seg.Segment(text, libsegment.SEG_BASIC):
                results.append(word)

            results.append('<SEP1>')
            for word in seg.Segment(text):
                results.append(word)

            results.append('<SEP2>')
            for word in seg.Segment(text, libsegment.SEG_NEWWORD):
                results.append(word)

            return results
Esempio n. 5
0
    def segment(self, text):
        results = [word for word in get_single_cns(text)]
        results_set = set(results)

        for word in seg.Segment(text):
            if word not in results_set:
                results.append(word)
                results_set.add(word)

        for word in seg.Segment(text, libsegment.SEG_NEWWORD):
            if word not in results_set:
                results.append(word)
                results_set.add(word)

        for word in seg.Segment(text, libsegment.SEG_BASIC):
            if word not in results_set:
                results.append(word)
                results_set.add(word)

        return results
Esempio n. 6
0
 def segment_basic_single(self, text):
     results = [word for word in get_single_cns(text)]
     results += [
         word for word in seg.Segment(text, libsegment.SEG_BASIC)
     ]
     return results
Esempio n. 7
0
 def segment_phrase_single(self, text):
     results = [word for word in get_single_cns(text)]
     results += [word for word in seg.Segment(text)]
     return results