def TimeSlice(chatrooms_single, start=1, end=6, filename="Time_slice", start_time="1970-01-02", end_time=""): ''' 返回一定时间段的所有聊天内容 start:开始时间 end:截止时间 ''' # 发出 my_message = [] with open(filename + ".txt", "w+", encoding="utf-8") as f: for i in chatrooms_single: for j in basicTool.GetData( i, ["CreateTime", "Message", "Des", "Type"], start_time=start_time, end_time=end_time, Type=2): time_array = time.localtime(j[0]) if start <= time_array[3] <= end: CreateTime = time.strftime("%Y-%m-%d %H:%M:%S", time_array) Message = j[1] if j[2] == 0 and j[3] == 1: my_message.append(Message) f.write( basicTool.GetName(i) + "," + str(j[2]) + "," + CreateTime + "," + Message + "\n")
def MostDay(chatrooms_group, chatrooms_single, filename="mostday_to", Des=0, start_time="1970-01-02", end_time=""): ''' 发/收信息最多的一天 ''' chatrooms_all = chatrooms_group + chatrooms_single CreateTime_counter = {} for i in chatrooms_single: for j in basicTool.GetData(i, ["CreateTime", "Des"], start_time=start_time, end_time=end_time, Type=2): if j[1] == Des: time_array = time.localtime(j[0]) CreateTime = time.strftime("%Y-%m-%d", time_array) if CreateTime in CreateTime_counter: CreateTime_counter[CreateTime] += 1 else: CreateTime_counter[CreateTime] = 1 sorted_list = sorted(CreateTime_counter.items(), key=operator.itemgetter(1), reverse=True) format_time1 = sorted_list[0][0] + ' 00:00:00' format_time2 = sorted_list[0][0] + ' 23:59:59' time1 = int(time.mktime(time.strptime(format_time1, "%Y-%m-%d %H:%M:%S"))) time2 = int(time.mktime(time.strptime(format_time2, "%Y-%m-%d %H:%M:%S"))) chat_with = {} my_message = [] with basicTool.SqliteInit() as sqlite_cur: for i in chatrooms_all: temp_list = [] sql = "select Message,Des,Type from " + i + " where CreateTime>=" + str( time1) + " and CreateTime<=" + str(time2) sqlite_cur.execute(sql) result = sqlite_cur.fetchall() for row in result: if i != "Chat_b7ebbe67d8f64c77cda5415f4d749cc6" and row[ 1] == Des and row[2] == 1: temp_list.append(row[0]) if row[1] == Des and row[2] == 1: my_message.append(row[0]) if len(temp_list) > 0: chat_with[i] = temp_list with open(filename + ".txt", "w+", encoding="utf-8") as f: f.write(sorted_list[0][0] + "\n") for key, value in chat_with.items(): for i in value: f.write(basicTool.GetName(key) + "," + i + "\n")
def RowLine(chatrooms, filename, limit=10, start_time="1970-01-02", end_time=""): ''' 统计聊天条数走势 chatrooms:list,聊天记录表,如["Chat_67183be064c8c3ef11df9bb7a53014c8"] ''' chatrooms_temp = [] for chatroom in chatrooms: chatrooms_temp.append((chatroom, basicTool.GetRowNum(chatroom, start_time=start_time, end_time=end_time))) chatrooms_sorted = sorted(chatrooms_temp, key=operator.itemgetter(1), reverse=True) if len(chatrooms_sorted) >= limit: chatrooms_inuse = [i[0] for i in chatrooms_sorted[:limit]] else: chatrooms_inuse = [i[0] for i in chatrooms_sorted] id_time_dict = {} for i in range(len(chatrooms_inuse)): temp_arr = np.array(basicTool.GetData(chatrooms_inuse[i], ["id", "CreateTime"], start_time=start_time, end_time=end_time), dtype="int") id_time_dict[chatrooms_inuse[i]] = np.append( temp_arr[temp_arr[:, 0] % 20 == 1], [temp_arr[-1, :]], axis=0) f = plt.figure(figsize=(16, 9)) plt.grid(True) # font0 = FontProperties(fname='./Symbola.ttf') # prop = FontProperties(fname="./Symbola.ttf") font = {'family': 'DengXian'} plt.rc('font', **font) ax = plt.gca() for key, value in id_time_dict.items(): dateframe_x = [datetime.fromtimestamp(i) for i in value[:, 1]] x = md.date2num(dateframe_x) y = value[:, 0] # ax=plt.gca() xfmt = md.DateFormatter('%Y-%m-%d') ax.xaxis.set_major_formatter(xfmt) # plt.plot(x,y) plt.plot(x, y, label=basicTool.GetName(key)) # plt.xlabel(basicTool.GetName(key),fontname='symbola') plt.legend(loc='upper left') f.savefig(filename + ".pdf", bbox_inches='tight')
def Normal(params, filename="group_ranking", num=10, title=""): ''' num:int,横坐标数量,即显示的用户数 ''' # getNamed_list = [[basicTool.GetName(i[0]),i[1]] for i in params] id_counter_dict = dict.fromkeys([i[0] for i in params], 0) for i in params: id_counter_dict[i[0]] += 1 name_counter_dict = {} for key, value in id_counter_dict.items(): name = basicTool.GetName(key) if not name in name_counter_dict.keys(): name_counter_dict[name] = value else: name_counter_dict[name] += value name_counter_sorted_list = sorted(name_counter_dict.items(), key=lambda x: x[1], reverse=True) x_list = [i[0] for i in name_counter_sorted_list] y_list = [i[1] for i in name_counter_sorted_list] grid = Grid() bar = Bar(title=title, title_pos="40%") if len(x_list) < num: bar.add( "", x_list, y_list, is_label_show=True, xaxis_interval=0, is_xaxislabel_align=True, xaxis_rotate=30, is_xaxis_show=True, is_yaxis_show=True, # is_datazoom_show=True, is_splitline_show=False) else: bar.add( "", x_list[:num], y_list[:num], is_label_show=True, xaxis_interval=0, is_xaxislabel_align=True, xaxis_rotate=30, is_xaxis_show=True, is_yaxis_show=True, # is_datazoom_show=True, is_splitline_show=False) grid.add(bar, grid_bottom="30%") grid.render(path=filename + ".html")
def RowAnalyse(chatrooms_single, filename="Row_ana", start_time="1970-01-02", end_time=""): ''' 统计聊天条数分布 个人 ''' chatrooms = chatrooms_single RowNum = {} for chatroom in chatrooms: RowNum[chatroom] = basicTool.GetRowNum(chatroom, start_time=start_time, end_time=end_time) # sorted_list = sorted(RowNum.items(), key=operator.itemgetter(1),reverse=True) # f = open("../../rows.txt","w+",encoding="utf-8") # for i in sorted_list: # f.write(i[0]+","+str(basicTool.GetWXID(i[0]))+","+str(i[1])+"\n") # f.close() sorted_list = sorted(RowNum.items(), key=operator.itemgetter(1), reverse=True) #x_axis = list(range(len(sorted_list))) #不显示姓名时用这个 x_axis = [ str(i + 1) + "," + basicTool.GetName(sorted_list[i][0]) for i in range(len(sorted_list)) ] y_axis = [i[1] for i in sorted_list] bar_top = Bar("条数统计", title_pos="10%") def label_formatter(params): return params.split(",")[0] bar_top.add( "", x_axis, y_axis, # xaxis_interval=0, # xaxis_rotate = 30, xaxis_formatter=label_formatter, yaxis_name="条数", is_xaxislabel_align=True, is_datazoom_show=True, datazoom_range=[0, 100]) # bar_bottom = Bar("条数统计-对数坐标", title_top="55%",title_pos="10%") # bar_bottom.add( # "", # x_axis, # y_axis, # # xaxis_interval=0, # # xaxis_rotate = 30, # xaxis_formatter = label_formatter, # yaxis_name="条数", # yaxis_type='log', # is_xaxislabel_align=True # ) # grid = Grid(width=1920, height=1080) # grid.add(bar_top, grid_bottom="60%") # grid.add(bar_bottom, grid_top="60%") bar_top.render(path=filename + ".html")
def MostEmoji(chatrooms_group, chatrooms_single, filename="emoji_ranking", start_time="1970-01-02", end_time=""): chatrooms_all = chatrooms_group + chatrooms_single pattern = re.compile(' md5="(.*?)"') emoji_dict_to = {} emoji_dict_from = {} #选择时间段 start_time_stamp = int(time.mktime(time.strptime(start_time, "%Y-%m-%d"))) if end_time == "": end_time_stamp = int(time.time()) else: end_time_stamp = int(time.mktime(time.strptime(end_time, "%Y-%m-%d"))) for chatroom in chatrooms_all: sql = "SELECT Message,CreateTime as num FROM " + chatroom + " WHERE Type=47 and Des=0 and CreateTime>=? and CreateTime<=?" Name = basicTool.GetName(chatroom) with basicTool.SqliteInit() as sqlite_cur: sqlite_cur.execute(sql, (str(start_time_stamp), str(end_time_stamp))) result = sqlite_cur.fetchall() for row in result: emoji_md5 = pattern.findall(row[0])[0] if len(emoji_md5) > 0: if emoji_md5 in emoji_dict_to.keys(): emoji_dict_to[emoji_md5][0] += 1 else: emoji_dict_to[emoji_md5] = [1, Name, row[1]] if (Name != "") and (emoji_dict_to[emoji_md5][1] == ""): emoji_dict_to[emoji_md5][1] = Name emoji_dict_to[emoji_md5][2] = row[1] sorted_list_to = sorted(emoji_dict_to.items(), key=lambda x: x[1][0], reverse=True) for chatroom in chatrooms_single: sql = "SELECT Message,CreateTime as num FROM " + chatroom + " WHERE Type=47 and Des=1 and CreateTime>=? and CreateTime<=?" Name = basicTool.GetName(chatroom) with basicTool.SqliteInit() as sqlite_cur: sqlite_cur.execute(sql, (str(start_time_stamp), str(end_time_stamp))) result = sqlite_cur.fetchall() for row in result: emoji_md5 = pattern.findall(row[0])[0] if len(emoji_md5) > 0: if emoji_md5 in emoji_dict_from.keys(): emoji_dict_from[emoji_md5][0] += 1 else: emoji_dict_from[emoji_md5] = [1, Name, row[1]] if (Name != "") and (emoji_dict_from[emoji_md5][1] == ""): emoji_dict_from[emoji_md5][1] = Name emoji_dict_from[emoji_md5][2] = row[1] sorted_list_from = sorted(emoji_dict_from.items(), key=lambda x: x[1][0], reverse=True) with open(filename + ".txt", "w+", encoding="utf-8") as f: f.write("发出最多的表情包:" + sorted_list_to[0][0] + "\n") f.write("共" + str(sorted_list_to[0][1][0]) + "次" + "\n") f.write("聊天记录定位:微信名:" + sorted_list_to[0][1][1] + ",时间:" + str(datetime.fromtimestamp(sorted_list_to[0][1][2])) + "\n\n") f.write("接收最多的表情包:" + sorted_list_from[0][0] + "\n") f.write("共" + str(sorted_list_from[0][1][0]) + "次" + "\n") f.write("聊天记录定位:微信名:" + sorted_list_from[0][1][1] + ",时间:" + str(datetime.fromtimestamp(sorted_list_from[0][1][2])) + "\n")
def AutoRun(outputdir = "./outputs"): start_time = "2019-01-01" end_time = "2019-12-31" if not os.path.exists(outputdir): os.mkdir(outputdir) print("正在获取聊天数据表...") chatrooms_group = basicTool.GetChatrooms(typename=1) chatrooms_single = basicTool.GetChatrooms(typename=2) chatrooms_all = chatrooms_group + chatrooms_single print("正在生成基本数据...") # 好友总数、群聊总数、总共发出 basicData.BaseData(chatrooms_group, chatrooms_single, filename=outputdir+"/基本数据",start_time=start_time,end_time=end_time) print("正在生成表情包数据...") basicData.MostEmoji(chatrooms_group, chatrooms_single, filename=outputdir+"/表情包数据",start_time=start_time,end_time=end_time) print("正在生成消息类型图...") basicData.TypeAnalyse(chatrooms_single, filename=outputdir+"/消息类型(仅好友)(饼图)",start_time=start_time,end_time=end_time) print("正在生成群聊数据...") groupAnalyse.GroupRankingAll(chatrooms_group, filename=outputdir+"/所有群聊发出消息排名(柱状图)",start_time=start_time,end_time=end_time) # 群聊中发出消息排名 chatrooms_temp = [] for chatroom in chatrooms_group: chatrooms_temp.append((chatroom,basicTool.GetRowNum(chatroom,start_time=start_time,end_time=end_time))) chatrooms_top_group = sorted(chatrooms_temp, key=lambda x: x[1],reverse=True)[:10] counter = 0 for i in chatrooms_top_group: counter += 1 groupAnalyse.GroupRankingSingle(i[0],filename=outputdir+"/"+str(counter)+".排名前十群聊中的发出消息排名(柱状图)", num = 25, Des=2, title=basicTool.GetName(i[0]),start_time=start_time,end_time=end_time) print("正在生成消息总量图...") basicData.RowAnalyse(chatrooms_single, filename=outputdir+"/消息总量(柱状图)",start_time=start_time,end_time=end_time) print("正在获取深夜消息...") basicData.TimeSlice(chatrooms_single,1,6,filename=outputdir+"/深夜消息(1-6点)",start_time=start_time,end_time=end_time) print("正在寻找产生消息最多的日期...") basicData.MostDay(chatrooms_group,chatrooms_single,filename=outputdir+"/消息最多的一天(接收)",Des=1,start_time=start_time,end_time=end_time) basicData.MostDay(chatrooms_group,chatrooms_single,filename=outputdir+"/消息最多的一天(发送)",Des=0,start_time=start_time,end_time=end_time) print("正在寻找未回复的消息...") usageAnalyse.Lonelydude(chatrooms_single,filename=outputdir+"/未回复的消息",start_time=start_time,end_time=end_time) print("正在生成使用情况日历图...")#! usageAnalyse.UsageAll(chatrooms_all,chartname="使用日历-发出(全部)",filename=outputdir+"/使用日历-发出(全部)(日历图)",Des=0,start_time=start_time,end_time=end_time) usageAnalyse.UsageAll(chatrooms_single, chartname="使用日历-发出(个人)",filename=outputdir+"/使用日历-发出(个人)(日历图)",Des=0,start_time=start_time,end_time=end_time) usageAnalyse.UsageAll(chatrooms_single, chartname="使用日历-接收(个人)",filename=outputdir+"/使用日历-接收(个人)(日历图)",Des=1,start_time=start_time,end_time=end_time) print("正在生成消息时频分布图...") timeAnalyse.TimeAll(chatrooms_single, chartname="时频分布-接收(个人)",filename=outputdir+"/时频分布-接收(个人)(柱状图)",Des=1,start_time=start_time,end_time=end_time) timeAnalyse.TimeAll(chatrooms_all, chartname="时频分布-发出(全部)",filename=outputdir+"/时频分布-发出(全部)(柱状图)",Des=0,start_time=start_time,end_time=end_time) # 消息总量走势图(总量前十) print("正在生成消息总量走势图...") timeAnalyse.RowLine(chatrooms_single, filename=outputdir+"/temp",start_time=start_time,end_time=end_time) if os.path.exists(outputdir+"/总量走势(折线图).pdf"): os.remove(outputdir+"/总量走势(折线图).pdf") os.rename(outputdir+"/temp.pdf",outputdir+"/总量走势(折线图).pdf") print("正在生成词云...") wordcloudAnalyse.WordcloudAll(chatrooms_single,filename=outputdir+"/接收词频(个人)(词云图)",maxwords=100,Des=1,start_time=start_time,end_time=end_time) wordcloudAnalyse.WordcloudAll(chatrooms_group,filename=outputdir+"/发送词频(群组)(词云图)",maxwords=50,Des=0,start_time=start_time,end_time=end_time) wordcloudAnalyse.WordcloudAll(chatrooms_single,filename=outputdir+"/发送词频(个人)(词云图)",maxwords=100,Des=0,start_time=start_time,end_time=end_time) wordcloudAnalyse.WordcloudAll(chatrooms_all,filename=outputdir+"/发送词频(全部)(词云图)",maxwords=100,Des=0,start_time=start_time,end_time=end_time) print("已完成!")
def Lonelydude(chatrooms, filename="lonelydude", start_time="1970-01-02", end_time=""): ''' 用于获取发出但没有收到回复的消息和收到但没有回复对方的消息 filename:str,文件名,存储在output文件夹下 typename:int,0:全部,1:群组,2:个人,3:公众号 ''' CreateTime_counter_to = {} CreateTime_counter_from = {} for i in chatrooms: for j in basicTool.GetData(i, ["CreateTime", "Des"], start_time=start_time, end_time=end_time): time_array = time.localtime(j[0]) CreateTime = time.strftime("%Y-%m-%d", time_array) if j[1] == 0: if CreateTime in CreateTime_counter_to: CreateTime_counter_to[CreateTime] += 1 else: CreateTime_counter_to[CreateTime] = 1 elif j[1] == 1: if CreateTime in CreateTime_counter_from: CreateTime_counter_from[CreateTime] += 1 else: CreateTime_counter_from[CreateTime] = 1 no_response = [] no_reply = [] no_response = [ i for i in CreateTime_counter_to.keys() if i not in CreateTime_counter_from.keys() ] no_reply = [ i for i in CreateTime_counter_from.keys() if i not in CreateTime_counter_to.keys() ] no_response_with = {} for i in no_response: format_time1 = i + ' 00:00:00' format_time2 = i + ' 23:59:59' time1 = int( time.mktime(time.strptime(format_time1, "%Y-%m-%d %H:%M:%S"))) time2 = int( time.mktime(time.strptime(format_time2, "%Y-%m-%d %H:%M:%S"))) with basicTool.SqliteInit() as mysql_cur: for j in chatrooms: temp_list = [] sql = "select Message from " + j + " where CreateTime>=" + str( time1) + " and CreateTime<=" + str(time2) + " and Des=0" mysql_cur.execute(sql) result = mysql_cur.fetchall() for row in result: temp_list.append(row[0]) if len(temp_list) > 0: no_response_with[j] = temp_list no_reply_with = {} for i in no_reply: format_time1 = i + ' 00:00:00' format_time2 = i + ' 23:59:59' time1 = int( time.mktime(time.strptime(format_time1, "%Y-%m-%d %H:%M:%S"))) time2 = int( time.mktime(time.strptime(format_time2, "%Y-%m-%d %H:%M:%S"))) with basicTool.SqliteInit() as mysql_cur: for j in chatrooms: temp_list = [] sql = "select Message from " + j + " where CreateTime>=" + str( time1) + " and CreateTime<=" + str(time2) + " and Des=1" mysql_cur.execute(sql) result = mysql_cur.fetchall() for row in result: temp_list.append(row[0]) if len(temp_list) > 0: no_reply_with[j] = temp_list with open(filename + ".txt", "w+", encoding="utf-8") as f: f.write("未收到回复:" + ",".join(no_response) + "\n") f.write("未回复对方:" + ",".join(no_reply) + "\n") f.write("未获得回复:\n") for key, value in no_response_with.items(): for i in value: f.write(basicTool.GetName(key) + ": " + i + "\n") f.write("\n未回复对方:\n") for key, value in no_reply_with.items(): for i in value: f.write(basicTool.GetName(key) + ": " + i + "\n")