Esempio n. 1
0
def get_cut_words2(comment):
    stop_words = get_stop_list("stop_words")
    positive_words = get_stop_list("positive_dict")
    negative_words = get_stop_list("negative_dict")
    words = cut_word.CutWord(comment).cut()
    return [
        word for word in words if word in positive_words
        or word in negative_words or word not in stop_words
    ]
 def __init__(self):
     QtGui.QMainWindow.__init__(self)
     Ui_MainWindow.__init__(self)
     self.setupUi(self)
     self.RMMButton.clicked.connect(self.rmm)
     self.MMButton.clicked.connect(self.mm)
     self.CW = cut_word.CutWord()
     # self.setWindowTitle("Darlin")
     self.setWindowTitle(QtCore.QString(u"Darlin 陈文达 吴嘉琳 翁靖达"))
def get_data_from_sql():
    """获取数据"""
    mysql_cn = MySQLdb.connect(host='10.250.30.158',
                               port=3306,
                               user='******',
                               passwd='88888888',
                               db='zjy_test')
    sql = "SELECT content,sort FROM language_filterdata where sort!=0 and sort!=3"
    cursor = mysql_cn.cursor()
    cursor.execute(sql)
    alldata = cursor.fetchall()
    cursor.close()
    mysql_cn.close()
    r = '{机器型号:[\s\S]*?}|回复:[\s\S]*'
    alldata = [(cut_word.CutWord(re.sub(r, '', data[0].decode("utf-8")),
                                 'zh').cut(), data[1]) for data in alldata]
    return alldata
Esempio n. 4
0
def get_cut_words(comments):
    stop_words = get_stop_list("stop_words")
    positive_words = get_stop_list("positive_dict")
    negative_words = get_stop_list("negative_dict")
    words = []
    for comment in comments:
        print comment
        cut_words = cut_word.CutWord(comment, 'zh').cut()
        new_cut_words = [
            word for word in cut_words if word in positive_words
            or word in negative_words or word not in stop_words
        ]
        words += new_cut_words
        print "-" * 50
        print "cut words:", "/".join(new_cut_words)
        print "=" * 100
    return words