Ejemplo n.º 1
0
def TimeSlice(chatrooms_single,
              start=1,
              end=6,
              filename="Time_slice",
              start_time="1970-01-02",
              end_time=""):
    '''
    返回一定时间段的所有聊天内容
    start:开始时间
    end:截止时间
    '''
    # 发出
    my_message = []
    with open(filename + ".txt", "w+", encoding="utf-8") as f:
        for i in chatrooms_single:
            for j in basicTool.GetData(
                    i, ["CreateTime", "Message", "Des", "Type"],
                    start_time=start_time,
                    end_time=end_time,
                    Type=2):
                time_array = time.localtime(j[0])
                if start <= time_array[3] <= end:
                    CreateTime = time.strftime("%Y-%m-%d %H:%M:%S", time_array)
                    Message = j[1]
                    if j[2] == 0 and j[3] == 1:
                        my_message.append(Message)
                    f.write(
                        basicTool.GetName(i) + "," + str(j[2]) + "," +
                        CreateTime + "," + Message + "\n")
Ejemplo n.º 2
0
def MostDay(chatrooms_group,
            chatrooms_single,
            filename="mostday_to",
            Des=0,
            start_time="1970-01-02",
            end_time=""):
    '''
    发/收信息最多的一天
    '''
    chatrooms_all = chatrooms_group + chatrooms_single
    CreateTime_counter = {}
    for i in chatrooms_single:
        for j in basicTool.GetData(i, ["CreateTime", "Des"],
                                   start_time=start_time,
                                   end_time=end_time,
                                   Type=2):
            if j[1] == Des:
                time_array = time.localtime(j[0])
                CreateTime = time.strftime("%Y-%m-%d", time_array)
                if CreateTime in CreateTime_counter:
                    CreateTime_counter[CreateTime] += 1
                else:
                    CreateTime_counter[CreateTime] = 1
    sorted_list = sorted(CreateTime_counter.items(),
                         key=operator.itemgetter(1),
                         reverse=True)
    format_time1 = sorted_list[0][0] + ' 00:00:00'
    format_time2 = sorted_list[0][0] + ' 23:59:59'
    time1 = int(time.mktime(time.strptime(format_time1, "%Y-%m-%d %H:%M:%S")))
    time2 = int(time.mktime(time.strptime(format_time2, "%Y-%m-%d %H:%M:%S")))
    chat_with = {}
    my_message = []

    with basicTool.SqliteInit() as sqlite_cur:
        for i in chatrooms_all:
            temp_list = []
            sql = "select Message,Des,Type from " + i + " where CreateTime>=" + str(
                time1) + " and CreateTime<=" + str(time2)
            sqlite_cur.execute(sql)
            result = sqlite_cur.fetchall()
            for row in result:
                if i != "Chat_b7ebbe67d8f64c77cda5415f4d749cc6" and row[
                        1] == Des and row[2] == 1:
                    temp_list.append(row[0])
                if row[1] == Des and row[2] == 1:
                    my_message.append(row[0])
            if len(temp_list) > 0:
                chat_with[i] = temp_list

    with open(filename + ".txt", "w+", encoding="utf-8") as f:
        f.write(sorted_list[0][0] + "\n")
        for key, value in chat_with.items():
            for i in value:
                f.write(basicTool.GetName(key) + "," + i + "\n")
Ejemplo n.º 3
0
def RowLine(chatrooms,
            filename,
            limit=10,
            start_time="1970-01-02",
            end_time=""):
    '''
    统计聊天条数走势
    chatrooms:list,聊天记录表,如["Chat_67183be064c8c3ef11df9bb7a53014c8"]
    '''
    chatrooms_temp = []
    for chatroom in chatrooms:
        chatrooms_temp.append((chatroom,
                               basicTool.GetRowNum(chatroom,
                                                   start_time=start_time,
                                                   end_time=end_time)))
    chatrooms_sorted = sorted(chatrooms_temp,
                              key=operator.itemgetter(1),
                              reverse=True)
    if len(chatrooms_sorted) >= limit:
        chatrooms_inuse = [i[0] for i in chatrooms_sorted[:limit]]
    else:
        chatrooms_inuse = [i[0] for i in chatrooms_sorted]
    id_time_dict = {}
    for i in range(len(chatrooms_inuse)):
        temp_arr = np.array(basicTool.GetData(chatrooms_inuse[i],
                                              ["id", "CreateTime"],
                                              start_time=start_time,
                                              end_time=end_time),
                            dtype="int")
        id_time_dict[chatrooms_inuse[i]] = np.append(
            temp_arr[temp_arr[:, 0] % 20 == 1], [temp_arr[-1, :]], axis=0)

    f = plt.figure(figsize=(16, 9))
    plt.grid(True)
    # font0 = FontProperties(fname='./Symbola.ttf')

    # prop = FontProperties(fname="./Symbola.ttf")
    font = {'family': 'DengXian'}
    plt.rc('font', **font)
    ax = plt.gca()
    for key, value in id_time_dict.items():
        dateframe_x = [datetime.fromtimestamp(i) for i in value[:, 1]]
        x = md.date2num(dateframe_x)
        y = value[:, 0]
        # ax=plt.gca()
        xfmt = md.DateFormatter('%Y-%m-%d')
        ax.xaxis.set_major_formatter(xfmt)
        # plt.plot(x,y)
        plt.plot(x, y, label=basicTool.GetName(key))
        # plt.xlabel(basicTool.GetName(key),fontname='symbola')
        plt.legend(loc='upper left')

    f.savefig(filename + ".pdf", bbox_inches='tight')
Ejemplo n.º 4
0
def Normal(params, filename="group_ranking", num=10, title=""):
    '''
    num:int,横坐标数量,即显示的用户数
    '''
    # getNamed_list = [[basicTool.GetName(i[0]),i[1]] for i in params]
    id_counter_dict = dict.fromkeys([i[0] for i in params], 0)
    for i in params:
        id_counter_dict[i[0]] += 1
    name_counter_dict = {}
    for key, value in id_counter_dict.items():
        name = basicTool.GetName(key)
        if not name in name_counter_dict.keys():
            name_counter_dict[name] = value
        else:
            name_counter_dict[name] += value
    name_counter_sorted_list = sorted(name_counter_dict.items(),
                                      key=lambda x: x[1],
                                      reverse=True)
    x_list = [i[0] for i in name_counter_sorted_list]
    y_list = [i[1] for i in name_counter_sorted_list]
    grid = Grid()
    bar = Bar(title=title, title_pos="40%")
    if len(x_list) < num:
        bar.add(
            "",
            x_list,
            y_list,
            is_label_show=True,
            xaxis_interval=0,
            is_xaxislabel_align=True,
            xaxis_rotate=30,
            is_xaxis_show=True,
            is_yaxis_show=True,
            # is_datazoom_show=True,
            is_splitline_show=False)
    else:
        bar.add(
            "",
            x_list[:num],
            y_list[:num],
            is_label_show=True,
            xaxis_interval=0,
            is_xaxislabel_align=True,
            xaxis_rotate=30,
            is_xaxis_show=True,
            is_yaxis_show=True,
            # is_datazoom_show=True,
            is_splitline_show=False)
    grid.add(bar, grid_bottom="30%")
    grid.render(path=filename + ".html")
Ejemplo n.º 5
0
def RowAnalyse(chatrooms_single,
               filename="Row_ana",
               start_time="1970-01-02",
               end_time=""):
    '''
    统计聊天条数分布
    个人
    '''
    chatrooms = chatrooms_single
    RowNum = {}
    for chatroom in chatrooms:
        RowNum[chatroom] = basicTool.GetRowNum(chatroom,
                                               start_time=start_time,
                                               end_time=end_time)
    # sorted_list = sorted(RowNum.items(), key=operator.itemgetter(1),reverse=True)
    # f = open("../../rows.txt","w+",encoding="utf-8")
    # for i in sorted_list:
    #     f.write(i[0]+","+str(basicTool.GetWXID(i[0]))+","+str(i[1])+"\n")
    # f.close()
    sorted_list = sorted(RowNum.items(),
                         key=operator.itemgetter(1),
                         reverse=True)

    #x_axis = list(range(len(sorted_list))) #不显示姓名时用这个
    x_axis = [
        str(i + 1) + "," + basicTool.GetName(sorted_list[i][0])
        for i in range(len(sorted_list))
    ]
    y_axis = [i[1] for i in sorted_list]
    bar_top = Bar("条数统计", title_pos="10%")

    def label_formatter(params):
        return params.split(",")[0]

    bar_top.add(
        "",
        x_axis,
        y_axis,
        # xaxis_interval=0,
        # xaxis_rotate = 30,
        xaxis_formatter=label_formatter,
        yaxis_name="条数",
        is_xaxislabel_align=True,
        is_datazoom_show=True,
        datazoom_range=[0, 100])

    # bar_bottom = Bar("条数统计-对数坐标", title_top="55%",title_pos="10%")
    # bar_bottom.add(
    #     "",
    #     x_axis,
    #     y_axis,
    #     # xaxis_interval=0,
    #     # xaxis_rotate = 30,
    #     xaxis_formatter = label_formatter,
    #     yaxis_name="条数",
    #     yaxis_type='log',
    #     is_xaxislabel_align=True
    # )
    # grid = Grid(width=1920, height=1080)
    # grid.add(bar_top, grid_bottom="60%")
    # grid.add(bar_bottom, grid_top="60%")
    bar_top.render(path=filename + ".html")
Ejemplo n.º 6
0
def MostEmoji(chatrooms_group,
              chatrooms_single,
              filename="emoji_ranking",
              start_time="1970-01-02",
              end_time=""):
    chatrooms_all = chatrooms_group + chatrooms_single
    pattern = re.compile(' md5="(.*?)"')
    emoji_dict_to = {}
    emoji_dict_from = {}

    #选择时间段
    start_time_stamp = int(time.mktime(time.strptime(start_time, "%Y-%m-%d")))
    if end_time == "":
        end_time_stamp = int(time.time())
    else:
        end_time_stamp = int(time.mktime(time.strptime(end_time, "%Y-%m-%d")))

    for chatroom in chatrooms_all:
        sql = "SELECT Message,CreateTime as num FROM " + chatroom + " WHERE Type=47 and Des=0 and CreateTime>=? and CreateTime<=?"
        Name = basicTool.GetName(chatroom)
        with basicTool.SqliteInit() as sqlite_cur:
            sqlite_cur.execute(sql,
                               (str(start_time_stamp), str(end_time_stamp)))
            result = sqlite_cur.fetchall()
            for row in result:
                emoji_md5 = pattern.findall(row[0])[0]
                if len(emoji_md5) > 0:
                    if emoji_md5 in emoji_dict_to.keys():
                        emoji_dict_to[emoji_md5][0] += 1
                    else:
                        emoji_dict_to[emoji_md5] = [1, Name, row[1]]
                    if (Name != "") and (emoji_dict_to[emoji_md5][1] == ""):
                        emoji_dict_to[emoji_md5][1] = Name
                        emoji_dict_to[emoji_md5][2] = row[1]

    sorted_list_to = sorted(emoji_dict_to.items(),
                            key=lambda x: x[1][0],
                            reverse=True)

    for chatroom in chatrooms_single:
        sql = "SELECT Message,CreateTime as num FROM " + chatroom + " WHERE Type=47 and Des=1 and CreateTime>=? and CreateTime<=?"
        Name = basicTool.GetName(chatroom)
        with basicTool.SqliteInit() as sqlite_cur:
            sqlite_cur.execute(sql,
                               (str(start_time_stamp), str(end_time_stamp)))
            result = sqlite_cur.fetchall()
            for row in result:
                emoji_md5 = pattern.findall(row[0])[0]
                if len(emoji_md5) > 0:
                    if emoji_md5 in emoji_dict_from.keys():
                        emoji_dict_from[emoji_md5][0] += 1
                    else:
                        emoji_dict_from[emoji_md5] = [1, Name, row[1]]
                    if (Name != "") and (emoji_dict_from[emoji_md5][1] == ""):
                        emoji_dict_from[emoji_md5][1] = Name
                        emoji_dict_from[emoji_md5][2] = row[1]
    sorted_list_from = sorted(emoji_dict_from.items(),
                              key=lambda x: x[1][0],
                              reverse=True)
    with open(filename + ".txt", "w+", encoding="utf-8") as f:
        f.write("发出最多的表情包:" + sorted_list_to[0][0] + "\n")
        f.write("共" + str(sorted_list_to[0][1][0]) + "次" + "\n")
        f.write("聊天记录定位:微信名:" + sorted_list_to[0][1][1] + ",时间:" +
                str(datetime.fromtimestamp(sorted_list_to[0][1][2])) + "\n\n")

        f.write("接收最多的表情包:" + sorted_list_from[0][0] + "\n")
        f.write("共" + str(sorted_list_from[0][1][0]) + "次" + "\n")
        f.write("聊天记录定位:微信名:" + sorted_list_from[0][1][1] + ",时间:" +
                str(datetime.fromtimestamp(sorted_list_from[0][1][2])) + "\n")
Ejemplo n.º 7
0
def AutoRun(outputdir = "./outputs"):
    start_time = "2019-01-01"
    end_time = "2019-12-31"
    if not os.path.exists(outputdir):
        os.mkdir(outputdir)

    print("正在获取聊天数据表...")
    chatrooms_group = basicTool.GetChatrooms(typename=1)
    chatrooms_single = basicTool.GetChatrooms(typename=2)
    chatrooms_all = chatrooms_group + chatrooms_single

    print("正在生成基本数据...")
    # 好友总数、群聊总数、总共发出
    basicData.BaseData(chatrooms_group, chatrooms_single, filename=outputdir+"/基本数据",start_time=start_time,end_time=end_time)

    print("正在生成表情包数据...")
    basicData.MostEmoji(chatrooms_group, chatrooms_single, filename=outputdir+"/表情包数据",start_time=start_time,end_time=end_time)

    print("正在生成消息类型图...")
    basicData.TypeAnalyse(chatrooms_single, filename=outputdir+"/消息类型(仅好友)(饼图)",start_time=start_time,end_time=end_time)

    print("正在生成群聊数据...")
    groupAnalyse.GroupRankingAll(chatrooms_group, filename=outputdir+"/所有群聊发出消息排名(柱状图)",start_time=start_time,end_time=end_time)

    # 群聊中发出消息排名
    chatrooms_temp = []
    for chatroom in chatrooms_group:
        chatrooms_temp.append((chatroom,basicTool.GetRowNum(chatroom,start_time=start_time,end_time=end_time)))
    chatrooms_top_group = sorted(chatrooms_temp, key=lambda x: x[1],reverse=True)[:10]
    counter = 0
    for i in chatrooms_top_group:
        counter += 1
        groupAnalyse.GroupRankingSingle(i[0],filename=outputdir+"/"+str(counter)+".排名前十群聊中的发出消息排名(柱状图)", num = 25, Des=2, title=basicTool.GetName(i[0]),start_time=start_time,end_time=end_time)

    print("正在生成消息总量图...")
    basicData.RowAnalyse(chatrooms_single, filename=outputdir+"/消息总量(柱状图)",start_time=start_time,end_time=end_time)

    print("正在获取深夜消息...")
    basicData.TimeSlice(chatrooms_single,1,6,filename=outputdir+"/深夜消息(1-6点)",start_time=start_time,end_time=end_time)

    print("正在寻找产生消息最多的日期...")
    basicData.MostDay(chatrooms_group,chatrooms_single,filename=outputdir+"/消息最多的一天(接收)",Des=1,start_time=start_time,end_time=end_time)
    basicData.MostDay(chatrooms_group,chatrooms_single,filename=outputdir+"/消息最多的一天(发送)",Des=0,start_time=start_time,end_time=end_time)
    print("正在寻找未回复的消息...")
    usageAnalyse.Lonelydude(chatrooms_single,filename=outputdir+"/未回复的消息",start_time=start_time,end_time=end_time)
    print("正在生成使用情况日历图...")#!
    usageAnalyse.UsageAll(chatrooms_all,chartname="使用日历-发出(全部)",filename=outputdir+"/使用日历-发出(全部)(日历图)",Des=0,start_time=start_time,end_time=end_time)
    usageAnalyse.UsageAll(chatrooms_single, chartname="使用日历-发出(个人)",filename=outputdir+"/使用日历-发出(个人)(日历图)",Des=0,start_time=start_time,end_time=end_time)
    usageAnalyse.UsageAll(chatrooms_single, chartname="使用日历-接收(个人)",filename=outputdir+"/使用日历-接收(个人)(日历图)",Des=1,start_time=start_time,end_time=end_time)
    print("正在生成消息时频分布图...")
    timeAnalyse.TimeAll(chatrooms_single, chartname="时频分布-接收(个人)",filename=outputdir+"/时频分布-接收(个人)(柱状图)",Des=1,start_time=start_time,end_time=end_time)
    timeAnalyse.TimeAll(chatrooms_all, chartname="时频分布-发出(全部)",filename=outputdir+"/时频分布-发出(全部)(柱状图)",Des=0,start_time=start_time,end_time=end_time)

    # 消息总量走势图(总量前十)
    print("正在生成消息总量走势图...")
    timeAnalyse.RowLine(chatrooms_single, filename=outputdir+"/temp",start_time=start_time,end_time=end_time)
    if os.path.exists(outputdir+"/总量走势(折线图).pdf"):
        os.remove(outputdir+"/总量走势(折线图).pdf")
    os.rename(outputdir+"/temp.pdf",outputdir+"/总量走势(折线图).pdf")
    print("正在生成词云...")
    wordcloudAnalyse.WordcloudAll(chatrooms_single,filename=outputdir+"/接收词频(个人)(词云图)",maxwords=100,Des=1,start_time=start_time,end_time=end_time)
    wordcloudAnalyse.WordcloudAll(chatrooms_group,filename=outputdir+"/发送词频(群组)(词云图)",maxwords=50,Des=0,start_time=start_time,end_time=end_time)
    wordcloudAnalyse.WordcloudAll(chatrooms_single,filename=outputdir+"/发送词频(个人)(词云图)",maxwords=100,Des=0,start_time=start_time,end_time=end_time)
    wordcloudAnalyse.WordcloudAll(chatrooms_all,filename=outputdir+"/发送词频(全部)(词云图)",maxwords=100,Des=0,start_time=start_time,end_time=end_time)
    print("已完成!")
Ejemplo n.º 8
0
def Lonelydude(chatrooms,
               filename="lonelydude",
               start_time="1970-01-02",
               end_time=""):
    '''
    用于获取发出但没有收到回复的消息和收到但没有回复对方的消息
    filename:str,文件名,存储在output文件夹下
    typename:int,0:全部,1:群组,2:个人,3:公众号
    '''
    CreateTime_counter_to = {}
    CreateTime_counter_from = {}
    for i in chatrooms:
        for j in basicTool.GetData(i, ["CreateTime", "Des"],
                                   start_time=start_time,
                                   end_time=end_time):
            time_array = time.localtime(j[0])
            CreateTime = time.strftime("%Y-%m-%d", time_array)
            if j[1] == 0:
                if CreateTime in CreateTime_counter_to:
                    CreateTime_counter_to[CreateTime] += 1
                else:
                    CreateTime_counter_to[CreateTime] = 1
            elif j[1] == 1:
                if CreateTime in CreateTime_counter_from:
                    CreateTime_counter_from[CreateTime] += 1
                else:
                    CreateTime_counter_from[CreateTime] = 1
    no_response = []
    no_reply = []
    no_response = [
        i for i in CreateTime_counter_to.keys()
        if i not in CreateTime_counter_from.keys()
    ]
    no_reply = [
        i for i in CreateTime_counter_from.keys()
        if i not in CreateTime_counter_to.keys()
    ]

    no_response_with = {}
    for i in no_response:
        format_time1 = i + ' 00:00:00'
        format_time2 = i + ' 23:59:59'
        time1 = int(
            time.mktime(time.strptime(format_time1, "%Y-%m-%d %H:%M:%S")))
        time2 = int(
            time.mktime(time.strptime(format_time2, "%Y-%m-%d %H:%M:%S")))
        with basicTool.SqliteInit() as mysql_cur:
            for j in chatrooms:
                temp_list = []
                sql = "select Message from " + j + " where CreateTime>=" + str(
                    time1) + " and CreateTime<=" + str(time2) + " and Des=0"
                mysql_cur.execute(sql)
                result = mysql_cur.fetchall()
                for row in result:
                    temp_list.append(row[0])
                if len(temp_list) > 0:
                    no_response_with[j] = temp_list

    no_reply_with = {}
    for i in no_reply:
        format_time1 = i + ' 00:00:00'
        format_time2 = i + ' 23:59:59'
        time1 = int(
            time.mktime(time.strptime(format_time1, "%Y-%m-%d %H:%M:%S")))
        time2 = int(
            time.mktime(time.strptime(format_time2, "%Y-%m-%d %H:%M:%S")))
        with basicTool.SqliteInit() as mysql_cur:
            for j in chatrooms:
                temp_list = []
                sql = "select Message from " + j + " where CreateTime>=" + str(
                    time1) + " and CreateTime<=" + str(time2) + " and Des=1"
                mysql_cur.execute(sql)
                result = mysql_cur.fetchall()
                for row in result:
                    temp_list.append(row[0])
                if len(temp_list) > 0:
                    no_reply_with[j] = temp_list

    with open(filename + ".txt", "w+", encoding="utf-8") as f:
        f.write("未收到回复:" + ",".join(no_response) + "\n")
        f.write("未回复对方:" + ",".join(no_reply) + "\n")
        f.write("未获得回复:\n")
        for key, value in no_response_with.items():
            for i in value:
                f.write(basicTool.GetName(key) + ": " + i + "\n")
        f.write("\n未回复对方:\n")
        for key, value in no_reply_with.items():
            for i in value:
                f.write(basicTool.GetName(key) + ": " + i + "\n")