def segment_merge_newword_single(self, text): results = [word for word in get_single_cns(text)] results += [ word for word in seg.Segment(text, libsegment.SEG_MERGE_NEWWORD) ] return results
def segment_nodupe(self, text): results = [word for word in seg.Segment(text)] results += [ wrod for word in seg.Segment(text, libsegment.SEG_NEWWORD) ] results += [ wrod for word in seg.Segment(text, libsegment.SEG_BASIC) ] results += [word for word in get_single_cns(text)] return gezi.dedupe_list(results)
def segment_nodupe_noseq(self, text): results = set() for word in seg.Segment(text): results.add(word) for word in seg.Segment(text, libsegment.SEG_NEWWORD): results.add(word) for word in seg.Segment(text, libsegment.SEG_BASIC): results.add(word) for word in get_single_cns(text): results.add(word) return list(results)
def segment_seq_all(self, text): results = [word for word in get_single_cns(text)] results.append('<SEP0>') for word in seg.Segment(text, libsegment.SEG_BASIC): results.append(word) results.append('<SEP1>') for word in seg.Segment(text): results.append(word) results.append('<SEP2>') for word in seg.Segment(text, libsegment.SEG_NEWWORD): results.append(word) return results
def segment(self, text): results = [word for word in get_single_cns(text)] results_set = set(results) for word in seg.Segment(text): if word not in results_set: results.append(word) results_set.add(word) for word in seg.Segment(text, libsegment.SEG_NEWWORD): if word not in results_set: results.append(word) results_set.add(word) for word in seg.Segment(text, libsegment.SEG_BASIC): if word not in results_set: results.append(word) results_set.add(word) return results
def segment_basic_single(self, text): results = [word for word in get_single_cns(text)] results += [ word for word in seg.Segment(text, libsegment.SEG_BASIC) ] return results
def segment_phrase_single(self, text): results = [word for word in get_single_cns(text)] results += [word for word in seg.Segment(text)] return results