Python seg 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: nlpir

메소드/함수: seg

hotexamples.com에서의 예제들: 6

Python seg - 6개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 nlpir.seg에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

예제 #1

파일 보기

파일: FontCN_NLPtools.py 프로젝트: yyHaker/NLP_tools

    def NLPIRCutWithPos(self, isClearText=False):

        if isClearText == False:
            self.__strWithPos = list(nlpir.seg(self.__text))
        else:
            self.__strWithPos = list(nlpir.seg(self.clearText()))

        return self.__strWithPos

예제 #2

파일 보기

파일: myword.py 프로젝트: summerspringwei/weibo_tiopic_clasification_v1

 def parse_sentence(self):
     feature_attr_list = ["a", "v", "z", "d", "e"]
     feature_word_count = 0
     feature_word_num = 3
     token_list = nlp.seg(self.sentence)
     for token in token_list:
         # 获取整个句子每个单词的正向、0、负向之和
         if token[0] in self.mdictionary:
             mword = self.mdictionary.get(token[0])
             self.positive_word_count += mword.positive_count
             self.negative_word_count += mword.netative_count
             self.zero_word_count += mword.zero_count
             # 如果单词有特征属性，则加入句子的单词列表
             if mword.attr[0] in feature_attr_list and feature_word_count < feature_word_num:
                 self.word_list.append(mword)
                 feature_word_count += 1
     # 如果特征单词不足三个
     if feature_word_count < feature_word_num:
         # 放入句子前两个
         if len(token_list) > 2:
             for i in range(feature_word_num - len(self.word_list)):
                 self.word_list.append(self.mdictionary[token_list[i]])
         else:
             for i in range(feature_word_num - len(self.word_list)):
                 self.word_list.append(self.mdictionary[token_list[0]])

예제 #3

파일 보기

def ChineseWordsSegmentationByNLPIR2016(text):
    txt = nlpir.seg(text)
    seg_list = []

    for t in txt:
        seg_list.append(t[0].encode('utf-8'))

    return seg_list

예제 #4

파일 보기

파일: FontCN_NLPtools.py 프로젝트: yyHaker/NLP_tools

    def NLPIRCutText(self, isAddWord=False):

        if isAddWord == True:
            for i in self.__newWords:
                nlpir.AddUserWord(i)

        for i in self.__userWords:
            nlpir.AddUserWord(i)

        txt = nlpir.seg(self.__text)
        self.__seg_list = []

        for t in txt:
            self.__seg_list.append(t[0].encode('utf-8'))

        return ' '.join(self.__seg_list)

예제 #5

파일 보기

파일: myword.py 프로젝트: summerspringwei/weibo_tiopic_clasification_v1

 def parse_file(self):
     """
     把训练文件的句子分解成词语，并打上标签
     :return:
     """
     f = open(self.file_path, "r")
     line = f.readline()
     label = self.get_label(line)
     text = self.get_text(line)
     for token in nlp.seg(text):
         if self.word_filter(token[1]):
             continue
         if token[0] in self.mdict.keys():
             mword = self.mdict.get(token[0])
             inc_operator = {
                 "+1": mword.inc_positive_count,
                 "0": mword.inc_zero_count,
                 "-1": mword.inc_negative_count,
             }
             inc_operator.get(label)()
         else:
             mword = MyWord(token[0], token[1])
             self.mdict[token[0]] = mword

예제 #6

파일 보기

posstr = cutstrpos(filestr2)

print type(posstr)

# print filestr

print '**** show is end ****'

print ' '
print 'This is posster'
print posstr

strtag = [nltk.tag.str2tuple(word) for word in posstr.split()]
# for item in strtag:
#     print item
strsBySeg = nlpir.seg(filestr)
strsBySeg2 = nlpir.seg(filestr2)
strsByParagraphProcess = nlpir.ParagraphProcess(filestr, 1)
strsByParagraphProcessA = nlpir.ParagraphProcessA(
    filestr,
    ChineseWordsSegmentationByNLPIR2016(filestr)[0], 1)

print ' '
print ' '
print '**** strtag ****'

for word, tag in strtag:
    print word, "/", tag, "|",

print ' '
print ' '