Example #1
0
def wechat0327():
    message_conn = cd.MySQLCommand()
    message_conn.connectMysql(table="wechat_message")

    contact_conn = cd.MySQLCommand()
    contact_conn.connectMysql(table="wechat_contact")

    chatroomNums = message_conn.select_distinct()
    with open("firstTime.txt", "a", encoding="utf-8") as f:
        for chatroomNum in chatroomNums:
            title_list = ["createTime"]
            situation = "WHERE talker = '%s'" % chatroomNum
            cursor = message_conn.select_order(title_list,
                                               situation,
                                               order_title="createTime")
            informationTime = cursor.fetchone()[0]
            firstTime = time.strftime(
                "%Y-%m-%d %H:%M:%S",
                time.localtime(int(informationTime) / 1000))
            chatroomName = contact_conn.select_order(["nickname"],
                                                     "WHERE username = '******'" %
                                                     chatroomNum).fetchone()[0]
            f.write(chatroomName + "\t第一次发言:\t" + firstTime + "\n")
    contact_conn.closeMysql()
    message_conn.closeMysql()
Example #2
0
def getFile(chatroomNum):
    """
    通过聊天群编号获得聊天记录
    :param chatroomNum:
    :return:None
    """
    # 建立message表数据库连接
    message_conn = cd.MySQLCommand()
    message_conn.connectMysql(table="wechat_message")
    # 建立contact表数据库连接
    contact_conn = cd.MySQLCommand()
    contact_conn.connectMysql(table="wechat_contact")
    getChatroomContent(message_conn, contact_conn, chatroomNum)
    message_conn.closeMysql()
    contact_conn.closeMysql()
Example #3
0
def saveResult(chatroomId, chatroomName):
    # 建立数据库连接
    word_conn = cd.MySQLCommand()
    word_conn.connectMysql(table="wechat_word")

    # 构建跨表查询语句,为了分群标注,减少来回查找用户的数量
    sql = "SELECT msgId, context, atList From wechat_word  where tag = 0 and msgId in (SELECT msgId from  wechat_message where talker = '%s')" % chatroomId
    word_cursor = word_conn.cursor
    res = word_cursor.execute(sql)
    print("结果:", res)

    while True:
        res = word_cursor.fetchone()
        # 遍历结束之后,退出循环

        if res is None:
            break
        # 如果没有@的人,进入下一条
        msgId = res[0]
        atList = res[2]
        nicknames = []
        print("第%s条信息正在处理" % msgId)
        # 如果@的人为空,开启新线程修改数据
        if atList is not "" and atList is not None:
            name_list = atList.split("/")
            dealByNickName(chatroomId, chatroomName, res, name_list)
            continue
            # 将数据存储到数据库
        getInformationBymsgId(update_conn, msgId, [], nicknames)
    # 关闭数据库连接
    word_conn.closeMysql()
Example #4
0
def speed_word_vector(coreNum):
    word_conn = cd.MySQLCommand()
    word_conn.connectMysql(table="wechat_word")
    situation = "where msgId > %s" % str(90909)
    word_cursor = word_conn.select_order(["msgId", "jieba_word"], situation=situation)
    ignore_word = set()
    conn_dict = {}
    # 建立数据库连接

    for i in range(coreNum):
        vector_conn = cd.MySQLCommand()
        vector_conn.db = "tencent_word_vec"
        vector_conn.connectMysql(table="tc_word_vec")
        new_conn = cd.MySQLCommand()
        new_conn.connectMysql(table="wechat_vector")
        conn_dict["conn_%s" % str(i)] = (vector_conn, new_conn)
    TAG = True
    while TAG:
        ts = []
        for conn_tuple in conn_dict.values():
            try:
                (msgId, words) = word_cursor.fetchone()
            except TypeError:
                TAG = False
                break
            # print(words)
            th = threading.Thread(target=get_word_vector, args=(conn_tuple[0], conn_tuple[1], words, ignore_word,))
            th.start()
            print("第%s条信息开始处理!" % str(msgId))
            ts.append(th)
        for th in ts:
            th.join()

    with open("data/ignore_word.txt", "w", encoding="utf-8") as f:
        for word in ignore_word:
            f.write(str(word) + "\n")

    for conn_tuple in conn_dict.values():
        conn_tuple[0].closeMysql()
        conn_tuple[1].closeMysql()
    word_conn.closeMysql()
Example #5
0
def multi_run(coreNum, targetTable, targetFunction):
    """
    多线程启动函数
    :param targetFunction: 多线程调用的函数
    :param targetTable: 要存储的表名
    :param coreNum: 线程数
    :return:
    """
    # 与message表建立数据库连接
    message_conn = cd.MySQLCommand()
    message_conn.connectMysql(table="wechat_message")
    message_cursor = message_conn.select_order(["msgId", "type", "talker", "content"])

    conn_dict = {}
    for j in range(coreNum):
        multi_conn = cd.MySQLCommand()
        multi_conn.connectMysql(table=targetTable)
        conn_dict["conn_%s" % str(j)] = multi_conn

    # 设计一个钩子
    TAG = True
    while TAG:
        # 多线程解析content
        for multi_conn in conn_dict.values():
            message = message_cursor.fetchone()
            # 如果已经遍历结束,直接结束
            if message is None:
                # message_conn.closeMysql()
                TAG = False
                break
            th = threading.Thread(target=targetFunction, args=(multi_conn, message, ))
            # print("第", i, "个线程开启")
            th.start()
            th.join()
    # 关闭连接
    for conn_j in conn_dict.values():
        conn_j.closeMysql()
    message_conn.closeMysql()
Example #6
0
def multiThread():
    """
    多线程启动,降低IO延时造成的问题;
    :return:None
    """
    # 建立message表数据库连接
    message_conn = cd.MySQLCommand()
    message_conn.connectMysql(table="wechat_message")
    chatroomNums = message_conn.select_distinct()
    for chatroomNum in chatroomNums:
        chatroomNum = chatroomNum[0]
        # 判断talker是否为群编号,例如“weixin”这样的私人信息要排除
        if "chatroom" not in chatroomNum:
            continue
        th = threading.Thread(target=getFile, args=(chatroomNum, ))
        th.start()
    message_conn.closeMysql()
Example #7
0
def getStopWords():
    contact_conn = cd.MySQLCommand()
    contact_conn.connectMysql(table="wechat_vector")

    #
    f = open("data/ignore_word.txt", "a", encoding="utf-8")
    contact_cursor = contact_conn.cursor
    sql = "SELECT word FROM wechat_vector WHERE vector = '0'"
    contact_cursor.execute(sql)

    while True:
        word = contact_cursor.fetchone()[0]
        if word is None:
            break
        print(word)
        f.write(word + '\n')
    f.close()
    contact_conn.closeMysql()
Example #8
0
def clear_wechat_message():
    """
    # 清除数据库中发言数量少于20的聊天记录。
    :return:
    """
    message_conn = cd.MySQLCommand()
    message_conn.connectMysql(table="wechat_message")
    for chatroom in message_conn.select_distinct():
        chatroom = chatroom[0]
        res = message_conn.cursor.execute("select talker from wechat_message where talker = '%s'" % chatroom)
        print("*****", res)
        if res < 20:
            message_conn.cursor.execute("delete from wechat_message where talker = '%s'" % chatroom)
            message_conn.cursor.execute("delete from wechat_sender where chatroom = '%s'" % chatroom)
            message_conn.conn.commit()
            message_conn.cursor.execute("select nickname from wechat_contact where username = '******'" % chatroom)
            print(message_conn.cursor.fetchone()[0], "*****已删除")
    message_conn.closeMysql()
Example #9
0
# -*- coding: utf-8 -*-
"""
修改备注的工具类
"""
from commonTools import ConnectDatabase as cd
from commonTools import wechatContent as wc

# 声明全局变量
message_conn = cd.MySQLCommand()
message_conn.connectMysql(table="wechat_message")
contact_conn = cd.MySQLCommand()
contact_conn.connectMysql(table="wechat_contact")
update_conn = cd.MySQLCommand()
update_conn.connectMysql(table="wechat_word")


def getInformationBymsgId(word_conn, msgId, name_list, nicknames):
    """
    根据msgId获取消息的相关信息
    :param name_list: @的备注列表
    :param word_conn: 数据库连接
    :param msgId: 信息ID
    :param nicknames: 被@的用户名昵称列表
    :return:
    """
    title_list = ["content"]
    situation = "WHERE msgId = '%s'" % msgId
    message_conn.select_order(title_list=title_list, situation=situation)
    content = message_conn.cursor.fetchone()[0]
    wechat_content = wc.WechatContent(content)
    result = wechat_content.splitContent()