Exemple #1
0
def LabelDicts(labelNum,fileNum,featureNum):
    t=util.Counter()
    for i in range(fileNum):
        t+=featureSelected.featureDict(labelNum,i)
        print "%d.txt" %i
    print "legth of dict label",len(t)
    result=sorted(t.items(),key=lambda e:e[1],reverse=True)
    featureDict={}
    for i in range(featureNum):
        featureDict[result[i][0]]=result[i][1]
    print "features are as follows:"
    print json.dumps(featureDict,encoding='UTF-8',ensure_ascii=False)
    return featureDict
Exemple #2
0
def fileToDict(labelNum=2,num=765):
    result=featureSelected.featureDict(labelNum,num)
    sort=sorted(result.items(),key=lambda e:e[1],reverse=True)
    print "文本单词提取"
    print json.dumps(sort,encoding='UTF-8',ensure_ascii=False)
    
    str=('../data/trainData/Dict/%d.txt')   % labelNum
    f=open(str,'rb')
    DICT=pickle.load(f)
    f.close()
    print "字典:"
    print json.dumps(DICT,encoding='UTF-8',ensure_ascii=False)

    return sort
Exemple #3
0
def fileToVector(labelNum=2,num=765):
    result=featureSelected.featureDict(labelNum,num)
    vector=[]
    str=('../data/trainData/Dict/%d.txt') % labelNum
    f=open(str,'rb')
    loadDict=pickle.load(f)
    f.close()
    
    loadDict=sorted(loadDict.items(),key=lambda e:e[1],reverse=True)
    for item in loadDict:
        if item[0] in result:
            vector.append(result[item[0]])
        else:
            vector.append(0)
    print "文本向量化已经完成"
    print vector
Exemple #4
0
def saveWordTextDict(label,fileNum):
    str1=('../data/trainData/Dict/%d.txt') % label
    str2=('../data/trainData/Dict/words%d.txt') %label
    f1=open(str1,'rb')
    Dict=pickle.load(f1)
    f1.close()
    wordDict=util.Counter()
    for item in Dict.items():
        wordDict[item]=0
    for item in Dict.items():
        print 'item',item
        for i in range(fileNum):
            tmp=featureSelected.featureDict(label,i)
            if item in tmp:
                wordDict[item]+=1
    print "saveWordTextDict has finished!"
    f2=open(str2,'wb')
    pickle.dump(wordDict,f2)
    f2.close()
Exemple #5
0
#-*- coding = utf-8 -*-
#HuangYao THU
#12-9-2013

import featureSelected
import json


labelNum=2
num=450
result=featureSelected.featureDict(labelNum,num)
sort=sorted(result.items(),key=lambda e:e[1],reverse=True)
res=[]
length=200
if len(sort)<200:
    length=len(sort)
for i in range(length):
    res.append((sort[i][0],sort[i][1]))
print 'txt features are as follows:'
print json.dumps(res,encoding='UTF-8',ensure_ascii=False)