예제 #1
0
# 单句测试
# li1 = ['让无数玩家心痛不已岩田聪的离去让。','岩田聪的离去让无数玩家心痛不已。']



# 特征
for i in li1:
    data1 = re.sub('@[\s\S]*?:','',i)
    data1 = re.sub('http://','',data1)
    data_1 = re.sub("[^a-zA-Z\u4e00-\u9fa5]","",data1)
    if data_1=='':
            wordList=[]
            jishu+=1
    else:
            jishu+=1
            wordList_2 = ltp(data_1) 
    list_for_cv.append(n_gram(wordList_2))
    print(jishu)
    t_bd_p = biaodian(i)
    t_fdc_p = fdc(data_1)
    tmp1 = zzqgc(list_ai,data_1)
    tmp2 = zzqgc(list_e,data_1)
    tmp3 = zzqgc(list_hao,data_1)
    tmp4= zzqgc(list_jing,data_1)
    tmp5 = zzqgc(list_ju,data_1)
    tmp6= zzqgc(list_le,data_1)
    tmp7 = zzqgc(list_nu,data_1)
    numtezheng = [tmp1,tmp2,tmp3,tmp4,tmp5,tmp6,tmp7,len(wordList_2),t_fdc_p,t_bd_p]
    list_num_tezheng.append(numtezheng)
    
tezheng_sadness = tz('sadness',list_for_cv,list_num_tezheng)
예제 #2
0
# 先去掉微博用户名 然后只留下中文和英文字符 其他丢掉        

        data = re.sub('@[\s\S]*?:','',node.firstChild.data)
        data = re.sub("[^a-zA-Z\u4e00-\u9fa5]","",data)
        list_qgqd.append(qgqd(data))
        list_qgjx.append(qgjx(data))
     
     
     
    #     分词
        if data=='':
            wordList=[]
            jishu+=1
        else:
            jishu+=1
            wordList = ltp(data)
        list_ngram.append(n_gram(wordList))
        print(jishu)
        
         
         
    #     否定词特征
        t_fdc = fdc(data)
         
         
     
#     标点特征
        t_bd = biaodian(node.firstChild.data)
    #     情感词典0&1特征
        tmp1 = zzqgc(list_ai,data)
        tmp2 = zzqgc(list_e,data)