def WordcloudAll(chatrooms, filename="wc_all", maxwords=50, Des=2, skip_useless=0, start_time="1970-01-01", end_time="", title=""): ''' filename:str,文件名,存储在output文件夹下 maxwords:int,最大词云量 Des:0:发出,1:接收,2:全部 ''' message_list = [] for chatroom in chatrooms: for row in basicTool.GetData(chatroom=chatroom, columns=["Message", "Type"], Des=Des, start_time=start_time, end_time=end_time): if row[1] == 1: message_list.append(row[0]) Normal(message_list, filename=filename, maxwords=maxwords, skip_useless=skip_useless, title="")
def TimeSlice(chatrooms_single, start=1, end=6, filename="Time_slice", start_time="1970-01-02", end_time=""): ''' 返回一定时间段的所有聊天内容 start:开始时间 end:截止时间 ''' # 发出 my_message = [] with open(filename + ".txt", "w+", encoding="utf-8") as f: for i in chatrooms_single: for j in basicTool.GetData( i, ["CreateTime", "Message", "Des", "Type"], start_time=start_time, end_time=end_time, Type=2): time_array = time.localtime(j[0]) if start <= time_array[3] <= end: CreateTime = time.strftime("%Y-%m-%d %H:%M:%S", time_array) Message = j[1] if j[2] == 0 and j[3] == 1: my_message.append(Message) f.write( basicTool.GetName(i) + "," + str(j[2]) + "," + CreateTime + "," + Message + "\n")
def WordCloudSingle(chatroom, filename="wc_single", maxwords=200, Des=2, from_user="", start_time="1970-01-01", end_time="", title=""): ''' filename:str,文件名 from_user:str,用户名 maxwords:int,最大词云量 Des:0:发送,1:接收,2:全部 ''' message_list = [] for row in basicTool.GetData(chatroom=chatroom, columns=["Message", "Type", "SentFrom"], Des=Des, start_time=start_time, end_time=end_time): if row[1] == 1: if from_user == "": message_list.append(row[0]) else: if row[2] == from_user: message_list.append(row[0]) Normal(message_list, filename=filename, maxwords=maxwords, title=title)
def UsageSingle(chatroom, chartname="", filename="usage_ana_single", Des=2, start_time="1970-01-02", end_time=""): ''' chatroom:str,聊天对象 chartname:str,图表名 filename:str,文件名,存储在output文件夹下 Des:0:发出,1:接收,2:全部 ''' CreateTime_counter = {} for i in basicTool.GetData(chatroom=chatroom, columns=["CreateTime"], Des=Des, start_time=start_time, end_time=end_time): time_array = time.localtime(i[0]) CreateTime = time.strftime("%Y-%m-%d", time_array) if CreateTime in CreateTime_counter: CreateTime_counter[CreateTime] += 1 else: CreateTime_counter[CreateTime] = 1 sorted_list = sorted(CreateTime_counter.items(), key=operator.itemgetter(0), reverse=False) Normal(sorted_list, chartname=chartname, filename=filename)
def UsageAll(chatrooms, chartname="", filename="usage_ana_all", Des=0, start_time="1970-01-02", end_time=""): ''' chartname:str,图表名 filename:str,文件名,存储在output文件夹下 typename:int,0:全部,1:群组,2:个人,3:公众号 Des:0:发出,1:接收,2:全部 ''' CreateTime_counter = {} for i in chatrooms: for j in basicTool.GetData(i, ["CreateTime"], Des=Des, start_time=start_time, end_time=end_time): time_array = time.localtime(j[0]) CreateTime = time.strftime("%Y-%m-%d", time_array) if CreateTime in CreateTime_counter: CreateTime_counter[CreateTime] += 1 else: CreateTime_counter[CreateTime] = 1 sorted_list = sorted(CreateTime_counter.items(), key=operator.itemgetter(0), reverse=False) Normal(sorted_list, chartname=chartname, filename=filename)
def MostDay(chatrooms_group, chatrooms_single, filename="mostday_to", Des=0, start_time="1970-01-02", end_time=""): ''' 发/收信息最多的一天 ''' chatrooms_all = chatrooms_group + chatrooms_single CreateTime_counter = {} for i in chatrooms_single: for j in basicTool.GetData(i, ["CreateTime", "Des"], start_time=start_time, end_time=end_time, Type=2): if j[1] == Des: time_array = time.localtime(j[0]) CreateTime = time.strftime("%Y-%m-%d", time_array) if CreateTime in CreateTime_counter: CreateTime_counter[CreateTime] += 1 else: CreateTime_counter[CreateTime] = 1 sorted_list = sorted(CreateTime_counter.items(), key=operator.itemgetter(1), reverse=True) format_time1 = sorted_list[0][0] + ' 00:00:00' format_time2 = sorted_list[0][0] + ' 23:59:59' time1 = int(time.mktime(time.strptime(format_time1, "%Y-%m-%d %H:%M:%S"))) time2 = int(time.mktime(time.strptime(format_time2, "%Y-%m-%d %H:%M:%S"))) chat_with = {} my_message = [] with basicTool.SqliteInit() as sqlite_cur: for i in chatrooms_all: temp_list = [] sql = "select Message,Des,Type from " + i + " where CreateTime>=" + str( time1) + " and CreateTime<=" + str(time2) sqlite_cur.execute(sql) result = sqlite_cur.fetchall() for row in result: if i != "Chat_b7ebbe67d8f64c77cda5415f4d749cc6" and row[ 1] == Des and row[2] == 1: temp_list.append(row[0]) if row[1] == Des and row[2] == 1: my_message.append(row[0]) if len(temp_list) > 0: chat_with[i] = temp_list with open(filename + ".txt", "w+", encoding="utf-8") as f: f.write(sorted_list[0][0] + "\n") for key, value in chat_with.items(): for i in value: f.write(basicTool.GetName(key) + "," + i + "\n")
def RowLine(chatrooms, filename, limit=10, start_time="1970-01-02", end_time=""): ''' 统计聊天条数走势 chatrooms:list,聊天记录表,如["Chat_67183be064c8c3ef11df9bb7a53014c8"] ''' chatrooms_temp = [] for chatroom in chatrooms: chatrooms_temp.append((chatroom, basicTool.GetRowNum(chatroom, start_time=start_time, end_time=end_time))) chatrooms_sorted = sorted(chatrooms_temp, key=operator.itemgetter(1), reverse=True) if len(chatrooms_sorted) >= limit: chatrooms_inuse = [i[0] for i in chatrooms_sorted[:limit]] else: chatrooms_inuse = [i[0] for i in chatrooms_sorted] id_time_dict = {} for i in range(len(chatrooms_inuse)): temp_arr = np.array(basicTool.GetData(chatrooms_inuse[i], ["id", "CreateTime"], start_time=start_time, end_time=end_time), dtype="int") id_time_dict[chatrooms_inuse[i]] = np.append( temp_arr[temp_arr[:, 0] % 20 == 1], [temp_arr[-1, :]], axis=0) f = plt.figure(figsize=(16, 9)) plt.grid(True) # font0 = FontProperties(fname='./Symbola.ttf') # prop = FontProperties(fname="./Symbola.ttf") font = {'family': 'DengXian'} plt.rc('font', **font) ax = plt.gca() for key, value in id_time_dict.items(): dateframe_x = [datetime.fromtimestamp(i) for i in value[:, 1]] x = md.date2num(dateframe_x) y = value[:, 0] # ax=plt.gca() xfmt = md.DateFormatter('%Y-%m-%d') ax.xaxis.set_major_formatter(xfmt) # plt.plot(x,y) plt.plot(x, y, label=basicTool.GetName(key)) # plt.xlabel(basicTool.GetName(key),fontname='symbola') plt.legend(loc='upper left') f.savefig(filename + ".pdf", bbox_inches='tight')
def TimeSingle(chatroom, chartname="", filename="time_ana_single", Des=2, start_time="1970-01-02", end_time=""): ''' chatroom:str,聊天记录表,如"Chat_67183be064c8c3ef11df9bb7a53014c8" chartname:str,图表名 filename:str,文件名,存储在output文件夹下 Des:0:发出,1:接收,2:全部 ''' message_list = [] for row in basicTool.GetData(chatroom=chatroom, columns=["id", "CreateTime"], Des=Des, start_time=start_time, end_time=end_time): message_list.append(row) Normal(message_list, chartname=chartname, filename=filename)
def GroupRankingSingle(chatroom, filename="group_ranking_single", num=10, Des=2, start_time="1970-01-01", end_time="", title=""): ''' 统计群里发言最多的人 chatrooms:list,聊天记录表 filename:str,文件名,存储在output文件夹下 num:int,横坐标数量,即显示的用户数 Des:0:发出,1:接收,2:全部 ''' message_list = [] for row in basicTool.GetData(chatroom=chatroom, columns=["SentFrom", "Message"], start_time=start_time, end_time=end_time, Type=1): if row[0] != "system": message_list.append(row) Normal(message_list, filename=filename, num=num, title=title)
def TypeAnalyse(chatrooms_single, filename="Type_ana", start_time="1970-01-02", end_time=""): single_type_counter_to = { 1: 0, 3: 0, 34: 0, 42: 0, 43: 0, 47: 0, 48: 0, 49: 0, 50: 0, 10000: 0 } single_type_counter_from = { 1: 0, 3: 0, 34: 0, 42: 0, 43: 0, 47: 0, 48: 0, 49: 0, 50: 0, 10000: 0 } for i in chatrooms_single: for j in basicTool.GetData(i, ["Type", "Des"], start_time=start_time, end_time=end_time, Type=2): if j[1] == 0: if j[0] in single_type_counter_to.keys(): single_type_counter_to[j[0]] += 1 else: single_type_counter_to[j[0]] = 1 else: if j[0] in single_type_counter_from.keys(): single_type_counter_from[j[0]] += j[1] else: single_type_counter_from[j[0]] = j[1] if 10002 in single_type_counter_to.keys(): if not 10000 in single_type_counter_to.keys(): single_type_counter_to[10000] = single_type_counter_to[10002] else: single_type_counter_to[10000] += single_type_counter_to[10002] del single_type_counter_to[10002] if 10002 in single_type_counter_from.keys(): if not 10000 in single_type_counter_from.keys(): single_type_counter_from[10000] = single_type_counter_from[10002] else: single_type_counter_from[10000] += single_type_counter_from[10002] del single_type_counter_from[10002] if 62 in single_type_counter_to.keys(): if not 43 in single_type_counter_to.keys(): single_type_counter_to[43] = single_type_counter_to[62] else: single_type_counter_to[43] += single_type_counter_to[62] del single_type_counter_to[62] if 62 in single_type_counter_from.keys(): if not 43 in single_type_counter_from.keys(): single_type_counter_from[43] = single_type_counter_from[62] else: single_type_counter_from[43] += single_type_counter_from[62] del single_type_counter_from[62] define_dict = { "文字": 1, "图片": 3, "语音": 34, "名片": 42, "视频": 43, "表情": 47, "定位": 48, "链接": 49, "微信电话": 50, "系统消息": 10000 } attr = ["文字", "表情", "图片", "视频", "语音", "名片", "定位", "链接", "微信电话", "系统消息"] v1 = [single_type_counter_to[define_dict[i]] for i in attr] v2 = [single_type_counter_from[define_dict[i]] for i in attr] pie = Pie("", width=1000, height=400) pie.add("发出", attr, v1, center=[25, 50], is_random=True, radius=[30, 75], is_legend_show=True, is_label_show=True, legend_top="bottom") pie.add("接收", attr, v2, center=[75, 50], is_random=True, radius=[30, 75], is_legend_show=True, is_label_show=True, legend_top="bottom") pie.render(path=filename + ".html")
def BaseData(chatrooms_group, chatrooms_single, filename="basic_ana", start_time="1970-01-02", end_time=""): ''' 好友总数 群聊总数 总发出消息(个人+群组) 总发出消息(个人) 总接收消息(个人+群组) 总接收消息(个人) 总撤回消息(自己) 总撤回消息(个人) ''' counter1 = 0 counter2 = 0 counter3 = 0 counter4 = 0 counter5 = 0 counter6 = 0 chatrooms_all = chatrooms_group + chatrooms_single message_length_to = [] message_length_from = [] for chatroom in chatrooms_single: result = basicTool.getAvgLen(chatroom, Des=0, start_time=start_time, end_time=end_time) if result[0] != None: message_length_to.append(float(result[0])) result = basicTool.getAvgLen(chatroom, Des=1, start_time=start_time, end_time=end_time) if result[0] != None: message_length_from.append(float(result[0])) for chatroom in chatrooms_all: counter1 += basicTool.GetRowNum(chatroom, Des=0, start_time=start_time, end_time=end_time) counter3 += basicTool.GetRowNum(chatroom, Des=1, start_time=start_time, end_time=end_time) for chatroom in chatrooms_single: counter2 += basicTool.GetRowNum(chatroom, Des=0, start_time=start_time, end_time=end_time) counter4 += basicTool.GetRowNum(chatroom, Des=1, start_time=start_time, end_time=end_time) for chatroom in chatrooms_group: counter5 += basicTool.GetRowNum(chatroom, Des=0, start_time=start_time, end_time=end_time) counter6 += basicTool.GetRowNum(chatroom, Des=1, start_time=start_time, end_time=end_time) recall_to_sum = {"撤回消息": 0} recall_from_sum = {"撤回消息": 0} for chatroom in chatrooms_single: for i in basicTool.GetData(chatroom, columns=["Message", "Des"], start_time=start_time, end_time=end_time, Type=2): if i[0] == "撤回消息": if i[1] == 0: recall_to_sum["撤回消息"] += 1 else: recall_from_sum["撤回消息"] += 1 with open(filename + ".txt", "w+", encoding="utf-8") as f: f.write("好友总数:" + str(len(chatrooms_single)) + "个\n") f.write("群聊总数:" + str(len(chatrooms_group)) + "个\n") f.write("总共发出:" + str(counter1) + "条\n") f.write("总共发出(好友):" + str(counter2) + "条\n") f.write("总共发出(群聊):" + str(counter5) + "条\n") f.write("总共接收:" + str(counter3) + "条\n") f.write("总共接收(好友):" + str(counter4) + "条\n") f.write("总共接收(群聊):" + str(counter6) + "条\n") f.write("平均发出消息长度为(好友):" + "%.2f" % np.mean(message_length_to) + "个字\n") f.write("平均接收消息长度为(好友):" + "%.2f" % np.mean(message_length_from) + "个字\n") f.write("我总共撤回(好友):" + str(recall_to_sum["撤回消息"]) + "次\n") f.write("总共被撤回(好友):" + str(recall_from_sum["撤回消息"]) + "次\n")
def Lonelydude(chatrooms, filename="lonelydude", start_time="1970-01-02", end_time=""): ''' 用于获取发出但没有收到回复的消息和收到但没有回复对方的消息 filename:str,文件名,存储在output文件夹下 typename:int,0:全部,1:群组,2:个人,3:公众号 ''' CreateTime_counter_to = {} CreateTime_counter_from = {} for i in chatrooms: for j in basicTool.GetData(i, ["CreateTime", "Des"], start_time=start_time, end_time=end_time): time_array = time.localtime(j[0]) CreateTime = time.strftime("%Y-%m-%d", time_array) if j[1] == 0: if CreateTime in CreateTime_counter_to: CreateTime_counter_to[CreateTime] += 1 else: CreateTime_counter_to[CreateTime] = 1 elif j[1] == 1: if CreateTime in CreateTime_counter_from: CreateTime_counter_from[CreateTime] += 1 else: CreateTime_counter_from[CreateTime] = 1 no_response = [] no_reply = [] no_response = [ i for i in CreateTime_counter_to.keys() if i not in CreateTime_counter_from.keys() ] no_reply = [ i for i in CreateTime_counter_from.keys() if i not in CreateTime_counter_to.keys() ] no_response_with = {} for i in no_response: format_time1 = i + ' 00:00:00' format_time2 = i + ' 23:59:59' time1 = int( time.mktime(time.strptime(format_time1, "%Y-%m-%d %H:%M:%S"))) time2 = int( time.mktime(time.strptime(format_time2, "%Y-%m-%d %H:%M:%S"))) with basicTool.SqliteInit() as mysql_cur: for j in chatrooms: temp_list = [] sql = "select Message from " + j + " where CreateTime>=" + str( time1) + " and CreateTime<=" + str(time2) + " and Des=0" mysql_cur.execute(sql) result = mysql_cur.fetchall() for row in result: temp_list.append(row[0]) if len(temp_list) > 0: no_response_with[j] = temp_list no_reply_with = {} for i in no_reply: format_time1 = i + ' 00:00:00' format_time2 = i + ' 23:59:59' time1 = int( time.mktime(time.strptime(format_time1, "%Y-%m-%d %H:%M:%S"))) time2 = int( time.mktime(time.strptime(format_time2, "%Y-%m-%d %H:%M:%S"))) with basicTool.SqliteInit() as mysql_cur: for j in chatrooms: temp_list = [] sql = "select Message from " + j + " where CreateTime>=" + str( time1) + " and CreateTime<=" + str(time2) + " and Des=1" mysql_cur.execute(sql) result = mysql_cur.fetchall() for row in result: temp_list.append(row[0]) if len(temp_list) > 0: no_reply_with[j] = temp_list with open(filename + ".txt", "w+", encoding="utf-8") as f: f.write("未收到回复:" + ",".join(no_response) + "\n") f.write("未回复对方:" + ",".join(no_reply) + "\n") f.write("未获得回复:\n") for key, value in no_response_with.items(): for i in value: f.write(basicTool.GetName(key) + ": " + i + "\n") f.write("\n未回复对方:\n") for key, value in no_reply_with.items(): for i in value: f.write(basicTool.GetName(key) + ": " + i + "\n")