def get_cut_words2(comment): stop_words = get_stop_list("stop_words") positive_words = get_stop_list("positive_dict") negative_words = get_stop_list("negative_dict") words = cut_word.CutWord(comment).cut() return [ word for word in words if word in positive_words or word in negative_words or word not in stop_words ]
def __init__(self): QtGui.QMainWindow.__init__(self) Ui_MainWindow.__init__(self) self.setupUi(self) self.RMMButton.clicked.connect(self.rmm) self.MMButton.clicked.connect(self.mm) self.CW = cut_word.CutWord() # self.setWindowTitle("Darlin") self.setWindowTitle(QtCore.QString(u"Darlin 陈文达 吴嘉琳 翁靖达"))
def get_data_from_sql(): """获取数据""" mysql_cn = MySQLdb.connect(host='10.250.30.158', port=3306, user='******', passwd='88888888', db='zjy_test') sql = "SELECT content,sort FROM language_filterdata where sort!=0 and sort!=3" cursor = mysql_cn.cursor() cursor.execute(sql) alldata = cursor.fetchall() cursor.close() mysql_cn.close() r = '{机器型号:[\s\S]*?}|回复:[\s\S]*' alldata = [(cut_word.CutWord(re.sub(r, '', data[0].decode("utf-8")), 'zh').cut(), data[1]) for data in alldata] return alldata
def get_cut_words(comments): stop_words = get_stop_list("stop_words") positive_words = get_stop_list("positive_dict") negative_words = get_stop_list("negative_dict") words = [] for comment in comments: print comment cut_words = cut_word.CutWord(comment, 'zh').cut() new_cut_words = [ word for word in cut_words if word in positive_words or word in negative_words or word not in stop_words ] words += new_cut_words print "-" * 50 print "cut words:", "/".join(new_cut_words) print "=" * 100 return words