def addExternDic(sentimentDic, filename, anls): lines = fileutil.readFile(filename) for l in lines: l = l.strip("\n") if l not in sentimentDic: sentimentDic[l] = anls return sentimentDic
def getThemeDic(): filename = "./data/themedic.in" rows = fileutil.readFile(filename) themes = set([]) for r in rows: r = r.strip("\n") themes.add(r) return themes
def getTestData(filename): rows = fileutil.readFile(filename) rawdata = [] for r in rows: r = r.strip("\n") l = r.split(" ") rawdata.append(l) return rawdata
def getDegreeDic(): filename = "./data/degree.in" degree = set([]) if fileutil.checkFileIfExist(filename): rows = fileutil.readFile(filename) for r in rows: r = r.strip("\n") degree.add(r) return degree
def getPreDic(): filename = "./data/predic.in" constPreDic = [ '没有', '不是', '别', '不', '不能', '不如', '不想', '没', '不敢', '本来', '不大', '不要', '没什么', '无法', '不用', '不然', '非', '不会', '无', '未', '不怎么', '不够', '不算', '减少', '从不', '不再', '不让', '不见得', '省了', '不服', '不正', '不可', '没法', '不比' ] pre = set([]) if fileutil.checkFileIfExist(filename): rows = fileutil.readFile(filename) for r in rows: r = r.strip("\n") pre.add(r) for r in constPreDic: pre.add(r) return pre
def getSentimentDic(): filename = "./data/sentimentdic.in" sentimentDic = {} rows = fileutil.readFile(filename) for row in rows: row = row.strip("\n") row = row.split(" ") word = row[0] sentiment = row[1] if word not in sentimentDic: sentimentDic[word] = sentiment filenameExNe = "./data/ne.in" filenameExPo = "./data/po.in" # sentimentDic = addExternDic(sentimentDic, filenameExNe, str(-1)) # sentimentDic = addExternDic(sentimentDic, filenameExPo, str(1)) return sentimentDic
def getSqlFormat(filename): return fileutil.readFile(filename)