Example #1
0
def WordcloudAll(chatrooms,
                 filename="wc_all",
                 maxwords=50,
                 Des=2,
                 skip_useless=0,
                 start_time="1970-01-01",
                 end_time="",
                 title=""):
    '''
    filename:str,文件名,存储在output文件夹下
    maxwords:int,最大词云量
    Des:0:发出,1:接收,2:全部
    '''
    message_list = []
    for chatroom in chatrooms:
        for row in basicTool.GetData(chatroom=chatroom,
                                     columns=["Message", "Type"],
                                     Des=Des,
                                     start_time=start_time,
                                     end_time=end_time):
            if row[1] == 1:
                message_list.append(row[0])

    Normal(message_list,
           filename=filename,
           maxwords=maxwords,
           skip_useless=skip_useless,
           title="")
Example #2
0
def TimeSlice(chatrooms_single,
              start=1,
              end=6,
              filename="Time_slice",
              start_time="1970-01-02",
              end_time=""):
    '''
    返回一定时间段的所有聊天内容
    start:开始时间
    end:截止时间
    '''
    # 发出
    my_message = []
    with open(filename + ".txt", "w+", encoding="utf-8") as f:
        for i in chatrooms_single:
            for j in basicTool.GetData(
                    i, ["CreateTime", "Message", "Des", "Type"],
                    start_time=start_time,
                    end_time=end_time,
                    Type=2):
                time_array = time.localtime(j[0])
                if start <= time_array[3] <= end:
                    CreateTime = time.strftime("%Y-%m-%d %H:%M:%S", time_array)
                    Message = j[1]
                    if j[2] == 0 and j[3] == 1:
                        my_message.append(Message)
                    f.write(
                        basicTool.GetName(i) + "," + str(j[2]) + "," +
                        CreateTime + "," + Message + "\n")
Example #3
0
def WordCloudSingle(chatroom,
                    filename="wc_single",
                    maxwords=200,
                    Des=2,
                    from_user="",
                    start_time="1970-01-01",
                    end_time="",
                    title=""):
    '''
    filename:str,文件名
    from_user:str,用户名
    maxwords:int,最大词云量
    Des:0:发送,1:接收,2:全部
    '''
    message_list = []
    for row in basicTool.GetData(chatroom=chatroom,
                                 columns=["Message", "Type", "SentFrom"],
                                 Des=Des,
                                 start_time=start_time,
                                 end_time=end_time):
        if row[1] == 1:
            if from_user == "":
                message_list.append(row[0])
            else:
                if row[2] == from_user:
                    message_list.append(row[0])
    Normal(message_list, filename=filename, maxwords=maxwords, title=title)
Example #4
0
def UsageSingle(chatroom,
                chartname="",
                filename="usage_ana_single",
                Des=2,
                start_time="1970-01-02",
                end_time=""):
    '''
    chatroom:str,聊天对象
    chartname:str,图表名
    filename:str,文件名,存储在output文件夹下
    Des:0:发出,1:接收,2:全部
    '''
    CreateTime_counter = {}
    for i in basicTool.GetData(chatroom=chatroom,
                               columns=["CreateTime"],
                               Des=Des,
                               start_time=start_time,
                               end_time=end_time):
        time_array = time.localtime(i[0])
        CreateTime = time.strftime("%Y-%m-%d", time_array)
        if CreateTime in CreateTime_counter:
            CreateTime_counter[CreateTime] += 1
        else:
            CreateTime_counter[CreateTime] = 1
    sorted_list = sorted(CreateTime_counter.items(),
                         key=operator.itemgetter(0),
                         reverse=False)
    Normal(sorted_list, chartname=chartname, filename=filename)
Example #5
0
def UsageAll(chatrooms,
             chartname="",
             filename="usage_ana_all",
             Des=0,
             start_time="1970-01-02",
             end_time=""):
    '''
    chartname:str,图表名
    filename:str,文件名,存储在output文件夹下
    typename:int,0:全部,1:群组,2:个人,3:公众号
    Des:0:发出,1:接收,2:全部
    '''
    CreateTime_counter = {}
    for i in chatrooms:
        for j in basicTool.GetData(i, ["CreateTime"],
                                   Des=Des,
                                   start_time=start_time,
                                   end_time=end_time):
            time_array = time.localtime(j[0])
            CreateTime = time.strftime("%Y-%m-%d", time_array)
            if CreateTime in CreateTime_counter:
                CreateTime_counter[CreateTime] += 1
            else:
                CreateTime_counter[CreateTime] = 1
    sorted_list = sorted(CreateTime_counter.items(),
                         key=operator.itemgetter(0),
                         reverse=False)
    Normal(sorted_list, chartname=chartname, filename=filename)
Example #6
0
def MostDay(chatrooms_group,
            chatrooms_single,
            filename="mostday_to",
            Des=0,
            start_time="1970-01-02",
            end_time=""):
    '''
    发/收信息最多的一天
    '''
    chatrooms_all = chatrooms_group + chatrooms_single
    CreateTime_counter = {}
    for i in chatrooms_single:
        for j in basicTool.GetData(i, ["CreateTime", "Des"],
                                   start_time=start_time,
                                   end_time=end_time,
                                   Type=2):
            if j[1] == Des:
                time_array = time.localtime(j[0])
                CreateTime = time.strftime("%Y-%m-%d", time_array)
                if CreateTime in CreateTime_counter:
                    CreateTime_counter[CreateTime] += 1
                else:
                    CreateTime_counter[CreateTime] = 1
    sorted_list = sorted(CreateTime_counter.items(),
                         key=operator.itemgetter(1),
                         reverse=True)
    format_time1 = sorted_list[0][0] + ' 00:00:00'
    format_time2 = sorted_list[0][0] + ' 23:59:59'
    time1 = int(time.mktime(time.strptime(format_time1, "%Y-%m-%d %H:%M:%S")))
    time2 = int(time.mktime(time.strptime(format_time2, "%Y-%m-%d %H:%M:%S")))
    chat_with = {}
    my_message = []

    with basicTool.SqliteInit() as sqlite_cur:
        for i in chatrooms_all:
            temp_list = []
            sql = "select Message,Des,Type from " + i + " where CreateTime>=" + str(
                time1) + " and CreateTime<=" + str(time2)
            sqlite_cur.execute(sql)
            result = sqlite_cur.fetchall()
            for row in result:
                if i != "Chat_b7ebbe67d8f64c77cda5415f4d749cc6" and row[
                        1] == Des and row[2] == 1:
                    temp_list.append(row[0])
                if row[1] == Des and row[2] == 1:
                    my_message.append(row[0])
            if len(temp_list) > 0:
                chat_with[i] = temp_list

    with open(filename + ".txt", "w+", encoding="utf-8") as f:
        f.write(sorted_list[0][0] + "\n")
        for key, value in chat_with.items():
            for i in value:
                f.write(basicTool.GetName(key) + "," + i + "\n")
Example #7
0
def RowLine(chatrooms,
            filename,
            limit=10,
            start_time="1970-01-02",
            end_time=""):
    '''
    统计聊天条数走势
    chatrooms:list,聊天记录表,如["Chat_67183be064c8c3ef11df9bb7a53014c8"]
    '''
    chatrooms_temp = []
    for chatroom in chatrooms:
        chatrooms_temp.append((chatroom,
                               basicTool.GetRowNum(chatroom,
                                                   start_time=start_time,
                                                   end_time=end_time)))
    chatrooms_sorted = sorted(chatrooms_temp,
                              key=operator.itemgetter(1),
                              reverse=True)
    if len(chatrooms_sorted) >= limit:
        chatrooms_inuse = [i[0] for i in chatrooms_sorted[:limit]]
    else:
        chatrooms_inuse = [i[0] for i in chatrooms_sorted]
    id_time_dict = {}
    for i in range(len(chatrooms_inuse)):
        temp_arr = np.array(basicTool.GetData(chatrooms_inuse[i],
                                              ["id", "CreateTime"],
                                              start_time=start_time,
                                              end_time=end_time),
                            dtype="int")
        id_time_dict[chatrooms_inuse[i]] = np.append(
            temp_arr[temp_arr[:, 0] % 20 == 1], [temp_arr[-1, :]], axis=0)

    f = plt.figure(figsize=(16, 9))
    plt.grid(True)
    # font0 = FontProperties(fname='./Symbola.ttf')

    # prop = FontProperties(fname="./Symbola.ttf")
    font = {'family': 'DengXian'}
    plt.rc('font', **font)
    ax = plt.gca()
    for key, value in id_time_dict.items():
        dateframe_x = [datetime.fromtimestamp(i) for i in value[:, 1]]
        x = md.date2num(dateframe_x)
        y = value[:, 0]
        # ax=plt.gca()
        xfmt = md.DateFormatter('%Y-%m-%d')
        ax.xaxis.set_major_formatter(xfmt)
        # plt.plot(x,y)
        plt.plot(x, y, label=basicTool.GetName(key))
        # plt.xlabel(basicTool.GetName(key),fontname='symbola')
        plt.legend(loc='upper left')

    f.savefig(filename + ".pdf", bbox_inches='tight')
Example #8
0
def TimeSingle(chatroom,
               chartname="",
               filename="time_ana_single",
               Des=2,
               start_time="1970-01-02",
               end_time=""):
    '''
    chatroom:str,聊天记录表,如"Chat_67183be064c8c3ef11df9bb7a53014c8"
    chartname:str,图表名
    filename:str,文件名,存储在output文件夹下
    Des:0:发出,1:接收,2:全部
    '''
    message_list = []
    for row in basicTool.GetData(chatroom=chatroom,
                                 columns=["id", "CreateTime"],
                                 Des=Des,
                                 start_time=start_time,
                                 end_time=end_time):
        message_list.append(row)
    Normal(message_list, chartname=chartname, filename=filename)
Example #9
0
def GroupRankingSingle(chatroom,
                       filename="group_ranking_single",
                       num=10,
                       Des=2,
                       start_time="1970-01-01",
                       end_time="",
                       title=""):
    '''
    统计群里发言最多的人
    chatrooms:list,聊天记录表
    filename:str,文件名,存储在output文件夹下
    num:int,横坐标数量,即显示的用户数
    Des:0:发出,1:接收,2:全部
    '''
    message_list = []
    for row in basicTool.GetData(chatroom=chatroom,
                                 columns=["SentFrom", "Message"],
                                 start_time=start_time,
                                 end_time=end_time,
                                 Type=1):
        if row[0] != "system":
            message_list.append(row)
    Normal(message_list, filename=filename, num=num, title=title)
Example #10
0
def TypeAnalyse(chatrooms_single,
                filename="Type_ana",
                start_time="1970-01-02",
                end_time=""):
    single_type_counter_to = {
        1: 0,
        3: 0,
        34: 0,
        42: 0,
        43: 0,
        47: 0,
        48: 0,
        49: 0,
        50: 0,
        10000: 0
    }
    single_type_counter_from = {
        1: 0,
        3: 0,
        34: 0,
        42: 0,
        43: 0,
        47: 0,
        48: 0,
        49: 0,
        50: 0,
        10000: 0
    }
    for i in chatrooms_single:
        for j in basicTool.GetData(i, ["Type", "Des"],
                                   start_time=start_time,
                                   end_time=end_time,
                                   Type=2):
            if j[1] == 0:
                if j[0] in single_type_counter_to.keys():
                    single_type_counter_to[j[0]] += 1
                else:
                    single_type_counter_to[j[0]] = 1
            else:
                if j[0] in single_type_counter_from.keys():
                    single_type_counter_from[j[0]] += j[1]
                else:
                    single_type_counter_from[j[0]] = j[1]

    if 10002 in single_type_counter_to.keys():
        if not 10000 in single_type_counter_to.keys():
            single_type_counter_to[10000] = single_type_counter_to[10002]
        else:
            single_type_counter_to[10000] += single_type_counter_to[10002]
            del single_type_counter_to[10002]
    if 10002 in single_type_counter_from.keys():
        if not 10000 in single_type_counter_from.keys():
            single_type_counter_from[10000] = single_type_counter_from[10002]
        else:
            single_type_counter_from[10000] += single_type_counter_from[10002]
            del single_type_counter_from[10002]

    if 62 in single_type_counter_to.keys():
        if not 43 in single_type_counter_to.keys():
            single_type_counter_to[43] = single_type_counter_to[62]
        else:
            single_type_counter_to[43] += single_type_counter_to[62]
            del single_type_counter_to[62]
    if 62 in single_type_counter_from.keys():
        if not 43 in single_type_counter_from.keys():
            single_type_counter_from[43] = single_type_counter_from[62]
        else:
            single_type_counter_from[43] += single_type_counter_from[62]
            del single_type_counter_from[62]

    define_dict = {
        "文字": 1,
        "图片": 3,
        "语音": 34,
        "名片": 42,
        "视频": 43,
        "表情": 47,
        "定位": 48,
        "链接": 49,
        "微信电话": 50,
        "系统消息": 10000
    }
    attr = ["文字", "表情", "图片", "视频", "语音", "名片", "定位", "链接", "微信电话", "系统消息"]
    v1 = [single_type_counter_to[define_dict[i]] for i in attr]
    v2 = [single_type_counter_from[define_dict[i]] for i in attr]
    pie = Pie("", width=1000, height=400)
    pie.add("发出",
            attr,
            v1,
            center=[25, 50],
            is_random=True,
            radius=[30, 75],
            is_legend_show=True,
            is_label_show=True,
            legend_top="bottom")
    pie.add("接收",
            attr,
            v2,
            center=[75, 50],
            is_random=True,
            radius=[30, 75],
            is_legend_show=True,
            is_label_show=True,
            legend_top="bottom")
    pie.render(path=filename + ".html")
Example #11
0
def BaseData(chatrooms_group,
             chatrooms_single,
             filename="basic_ana",
             start_time="1970-01-02",
             end_time=""):
    '''
    好友总数
    群聊总数
    总发出消息(个人+群组)
    总发出消息(个人)
    总接收消息(个人+群组)
    总接收消息(个人)
    总撤回消息(自己)
    总撤回消息(个人)
    '''
    counter1 = 0
    counter2 = 0
    counter3 = 0
    counter4 = 0
    counter5 = 0
    counter6 = 0
    chatrooms_all = chatrooms_group + chatrooms_single
    message_length_to = []
    message_length_from = []
    for chatroom in chatrooms_single:
        result = basicTool.getAvgLen(chatroom,
                                     Des=0,
                                     start_time=start_time,
                                     end_time=end_time)
        if result[0] != None:
            message_length_to.append(float(result[0]))

        result = basicTool.getAvgLen(chatroom,
                                     Des=1,
                                     start_time=start_time,
                                     end_time=end_time)
        if result[0] != None:
            message_length_from.append(float(result[0]))

    for chatroom in chatrooms_all:
        counter1 += basicTool.GetRowNum(chatroom,
                                        Des=0,
                                        start_time=start_time,
                                        end_time=end_time)
        counter3 += basicTool.GetRowNum(chatroom,
                                        Des=1,
                                        start_time=start_time,
                                        end_time=end_time)
    for chatroom in chatrooms_single:
        counter2 += basicTool.GetRowNum(chatroom,
                                        Des=0,
                                        start_time=start_time,
                                        end_time=end_time)
        counter4 += basicTool.GetRowNum(chatroom,
                                        Des=1,
                                        start_time=start_time,
                                        end_time=end_time)
    for chatroom in chatrooms_group:
        counter5 += basicTool.GetRowNum(chatroom,
                                        Des=0,
                                        start_time=start_time,
                                        end_time=end_time)
        counter6 += basicTool.GetRowNum(chatroom,
                                        Des=1,
                                        start_time=start_time,
                                        end_time=end_time)
    recall_to_sum = {"撤回消息": 0}
    recall_from_sum = {"撤回消息": 0}
    for chatroom in chatrooms_single:
        for i in basicTool.GetData(chatroom,
                                   columns=["Message", "Des"],
                                   start_time=start_time,
                                   end_time=end_time,
                                   Type=2):
            if i[0] == "撤回消息":
                if i[1] == 0:
                    recall_to_sum["撤回消息"] += 1
                else:
                    recall_from_sum["撤回消息"] += 1
    with open(filename + ".txt", "w+", encoding="utf-8") as f:
        f.write("好友总数:" + str(len(chatrooms_single)) + "个\n")
        f.write("群聊总数:" + str(len(chatrooms_group)) + "个\n")
        f.write("总共发出:" + str(counter1) + "条\n")
        f.write("总共发出(好友):" + str(counter2) + "条\n")
        f.write("总共发出(群聊):" + str(counter5) + "条\n")
        f.write("总共接收:" + str(counter3) + "条\n")
        f.write("总共接收(好友):" + str(counter4) + "条\n")
        f.write("总共接收(群聊):" + str(counter6) + "条\n")
        f.write("平均发出消息长度为(好友):" + "%.2f" % np.mean(message_length_to) +
                "个字\n")
        f.write("平均接收消息长度为(好友):" + "%.2f" % np.mean(message_length_from) +
                "个字\n")
        f.write("我总共撤回(好友):" + str(recall_to_sum["撤回消息"]) + "次\n")
        f.write("总共被撤回(好友):" + str(recall_from_sum["撤回消息"]) + "次\n")
Example #12
0
def Lonelydude(chatrooms,
               filename="lonelydude",
               start_time="1970-01-02",
               end_time=""):
    '''
    用于获取发出但没有收到回复的消息和收到但没有回复对方的消息
    filename:str,文件名,存储在output文件夹下
    typename:int,0:全部,1:群组,2:个人,3:公众号
    '''
    CreateTime_counter_to = {}
    CreateTime_counter_from = {}
    for i in chatrooms:
        for j in basicTool.GetData(i, ["CreateTime", "Des"],
                                   start_time=start_time,
                                   end_time=end_time):
            time_array = time.localtime(j[0])
            CreateTime = time.strftime("%Y-%m-%d", time_array)
            if j[1] == 0:
                if CreateTime in CreateTime_counter_to:
                    CreateTime_counter_to[CreateTime] += 1
                else:
                    CreateTime_counter_to[CreateTime] = 1
            elif j[1] == 1:
                if CreateTime in CreateTime_counter_from:
                    CreateTime_counter_from[CreateTime] += 1
                else:
                    CreateTime_counter_from[CreateTime] = 1
    no_response = []
    no_reply = []
    no_response = [
        i for i in CreateTime_counter_to.keys()
        if i not in CreateTime_counter_from.keys()
    ]
    no_reply = [
        i for i in CreateTime_counter_from.keys()
        if i not in CreateTime_counter_to.keys()
    ]

    no_response_with = {}
    for i in no_response:
        format_time1 = i + ' 00:00:00'
        format_time2 = i + ' 23:59:59'
        time1 = int(
            time.mktime(time.strptime(format_time1, "%Y-%m-%d %H:%M:%S")))
        time2 = int(
            time.mktime(time.strptime(format_time2, "%Y-%m-%d %H:%M:%S")))
        with basicTool.SqliteInit() as mysql_cur:
            for j in chatrooms:
                temp_list = []
                sql = "select Message from " + j + " where CreateTime>=" + str(
                    time1) + " and CreateTime<=" + str(time2) + " and Des=0"
                mysql_cur.execute(sql)
                result = mysql_cur.fetchall()
                for row in result:
                    temp_list.append(row[0])
                if len(temp_list) > 0:
                    no_response_with[j] = temp_list

    no_reply_with = {}
    for i in no_reply:
        format_time1 = i + ' 00:00:00'
        format_time2 = i + ' 23:59:59'
        time1 = int(
            time.mktime(time.strptime(format_time1, "%Y-%m-%d %H:%M:%S")))
        time2 = int(
            time.mktime(time.strptime(format_time2, "%Y-%m-%d %H:%M:%S")))
        with basicTool.SqliteInit() as mysql_cur:
            for j in chatrooms:
                temp_list = []
                sql = "select Message from " + j + " where CreateTime>=" + str(
                    time1) + " and CreateTime<=" + str(time2) + " and Des=1"
                mysql_cur.execute(sql)
                result = mysql_cur.fetchall()
                for row in result:
                    temp_list.append(row[0])
                if len(temp_list) > 0:
                    no_reply_with[j] = temp_list

    with open(filename + ".txt", "w+", encoding="utf-8") as f:
        f.write("未收到回复:" + ",".join(no_response) + "\n")
        f.write("未回复对方:" + ",".join(no_reply) + "\n")
        f.write("未获得回复:\n")
        for key, value in no_response_with.items():
            for i in value:
                f.write(basicTool.GetName(key) + ": " + i + "\n")
        f.write("\n未回复对方:\n")
        for key, value in no_reply_with.items():
            for i in value:
                f.write(basicTool.GetName(key) + ": " + i + "\n")