예제 #1
0
 def basedonday(data):
     basedonDay = dict.my_dictionary()
     datauser = data.groupby(["USERNAME", "DAY"], as_index=False)["MESSAGE"]
     dataall = data.groupby(['DAY'], as_index=False)['MESSAGE']
     data = datauser.count()
     dataall = dataall.count()
     dataall.sort_values(by=['DAY', 'MESSAGE'],
                         ascending=False,
                         inplace=True,
                         ignore_index=True)
     data.sort_values(by='MESSAGE', ascending=False, inplace=True)
     for i in data['USERNAME'].unique():
         basedonDay.add(i, [
             data[data['USERNAME'] == i][['DAY', 'MESSAGE']].to_dict(
                 orient='records'), {
                     "mostActiveDay":
                     configvars.userdata.get(i)['mostActiveDay'],
                     "averageTexts":
                     configvars.userdata.get(i)['totalMessages'] /
                     configvars.no_of_days,
                     "leastActiveDay":
                     configvars.userdata.get(i)['leastActiveDay']
                 }
         ])
     basedonDay.add("All", [
         dataall.to_dict(orient='records'), {
             "averageTexts":
             sum(dataall['MESSAGE']) / configvars.no_of_days,
             "mostActiveDay": dataall['DAY'][dataall['MESSAGE'].idxmax()],
             "leastActiveDay": dataall['DAY'][dataall['MESSAGE'].idxmin()]
         }
     ])
     return basedonDay
예제 #2
0
 def radarmap(data):
     radarmap = dict.my_dictionary()
     radarmapuser = data.groupby(["USERNAME", "HOURS"],
                                 as_index=False)['MESSAGE']
     radarmapall = data.groupby(["HOURS"], as_index=False)['MESSAGE']
     radarmapuserdf = radarmapuser.count()
     radarmapalldf = radarmapall.count()
     radarmapuserdf.columns = ['USERNAME', 'time', 'count']
     radarmapalldf.columns = ['time', 'count']
     for i in radarmapuserdf['USERNAME'].unique():
         user = radarmapuserdf[radarmapuserdf['USERNAME'] == i][[
             'time', 'count'
         ]]
         Radarmap_stats = {
             "radarmapStat": {
                 "mostActiveHour":
                 str(user.sort_values("count").iloc[-1]['time']),
                 "leastActiveHour":
                 str(user.sort_values("count").iloc[0]['time']),
                 "averageTextsPerHour":
                 sum(user['count']) / (configvars.no_of_days * 24)
             }
         }
         lefthours = list(
             set([*range(0, 23, 1)]) - set(
                 list(radarmapuserdf[radarmapuserdf['USERNAME'] == i]
                      ['time'])))
         if lefthours:
             d = {'time': lefthours}
             df = pd.DataFrame(data=d)
             df['count'] = 0
             user = user.append(df).sort_values("time", ignore_index=True)
         Radarmap_Usage = {"radarmapUsage": user.to_dict(orient="records")}
         Radarmap_Usage.update(Radarmap_stats)
         radarmap.add(i, Radarmap_Usage)
     lefthoursall = list(
         set([*range(0, 23, 1)]) - set(list(radarmapalldf['time'])))
     if lefthoursall:
         d = {'time': lefthoursall}
         df = pd.DataFrame(data=d)
         df['count'] = 0
         radarmapalldf = radarmapalldf.append(df).sort_values(
             "time", ignore_index=True)
     Radarmap_statsall = {
         "radarmapStat": {
             "mostActiveHour":
             str(radarmapalldf.sort_values("count").iloc[-1]['time']),
             "leastActiveHour":
             str(radarmapalldf.sort_values("count").iloc[0]['time']),
             "averageTextsPerHour":
             sum(radarmapalldf['count']) / (configvars.no_of_days * 24)
         }
     }
     Radarmap_Usageall = {
         "radarmapUsage": radarmapalldf.to_dict(orient="records")
     }
     Radarmap_Usageall.update(Radarmap_statsall)
     radarmap.add("All", Radarmap_Usageall)
     return radarmap
예제 #3
0
 def heatmap(data):
     heatmap = dict.my_dictionary()
     heatmapuser = data.groupby(["USERNAME", "DATE"],
                                as_index=False)['MESSAGE']
     heatmapall = data.groupby(["DATE"], as_index=False)['MESSAGE']
     heatmapuser = heatmapuser.count().sort_values(by=['MESSAGE'],
                                                   ascending=False)
     heatmapuser.columns = ['USERNAME', 'date', 'count']
     configvars.no_of_days = len(heatmapall)
     heatmapall = heatmapall.count().sort_values(by='MESSAGE',
                                                 ascending=False)
     heatmapall.columns = ['date', 'count']
     for i in heatmapuser['USERNAME'].unique():
         heatmap.add(
             i, heatmapuser[heatmapuser['USERNAME'] == i][[
                 'date', 'count'
             ]].to_dict(orient='records'))
     heatmap.add("All", heatmapall.to_dict(orient='records'))
     return heatmap
예제 #4
0
 def wordCountUser(data, username):
     wordcloud = dict.my_dictionary()
     word, link = getData.wordsListNestedUser(data, username)
     wordcounter = Counter(word).most_common()
     if wordcounter:
         word_usage = {
             "wordUsage":
             pd.DataFrame(wordcounter[:50],
                          columns=['text',
                                   'value']).to_dict(orient='records')
         }
         word_stat = {
             "wordStat": {
                 'mostUsedWord': wordcounter[0][0],
                 'leastUsedWord': wordcounter[-1][0]
             }
         }
         word_usage.update(word_stat)
         wordcloud.add(username, word_usage)
     return word, link, wordcloud
예제 #5
0
    def timeline(data):
        timeline = dict.my_dictionary()
        timelineuser = data.groupby(["USERNAME", "DATETIME"],
                                    as_index=False)['MESSAGE']
        timelineall = data.groupby(["DATETIME"], as_index=False)['MESSAGE']
        timelineuserdf = timelineuser.count()
        timelinealldf = timelineall.count()
        timelineuserdf.columns = ['USERNAME', 'date', 'count']
        configvars.no_of_days = len(timelineall)
        timelinealldf.columns = ['date', 'count']
        for i in timelineuserdf['USERNAME'].unique():
            Timeline_stats = {
                "timelineStat": {
                    "mostActiveDate":
                    configvars.userdata.get(i)['mostActiveDate'],
                    "value":
                    str(data[data['USERNAME'] == i]['DATE'].value_counts()[0])
                }
            }
            Timeline_data = {
                "timelineUsage":
                timelineuserdf[timelineuserdf['USERNAME'] == i][[
                    'date', 'count'
                ]].to_dict(orient='records')
            }
            Timeline_stats.update(Timeline_data)
            timeline.add(i, Timeline_stats)
        Timeline_statsall = {
            "timelineStat": {
                "mostActiveDate": data['DATE'].value_counts().idxmax(),
                "value": str(data['DATE'].value_counts()[0])
            }
        }
        Timeline_dataall = {
            "timelineUsage": timelinealldf.to_dict(orient='records')
        }
        Timeline_statsall.update(Timeline_dataall)
        timeline.add("All", Timeline_statsall)

        return timeline
예제 #6
0
 def emojiall(data, name):
     emojidict = dict.my_dictionary()
     emoji_list = []
     for data1 in data['MESSAGE']:
         for word in data1:
             if word in emoji.UNICODE_EMOJI:  # emoji search
                 emoji_list.append(word)
     Emoji_stats = {
         "emojiStat": {
             'totalUniqueEmojis': len(Counter(emoji_list).most_common()),
             'totalEmojis': len(emoji_list),
             "emojiPerText": len(emoji_list) / len(data)
         }
     }
     Emoji_data = {
         "emojiUsage":
         pd.DataFrame((Counter(emoji_list).most_common()[:20]),
                      columns=['emoji', 'value']).to_dict(orient='records')
     }
     Emoji_stats.update(Emoji_data)
     emojidict.add(name, Emoji_stats)
     configvars.emojidata.update(emojidict)
예제 #7
0
 def wordcloudall(data):
     wordcloud = dict.my_dictionary()
     word = []
     for i in data['MESSAGE']:
         for j in i.split():
             if j != "<Media" and j != "omitted>":
                 word.append(j)
     wordcounter = Counter(word).most_common()
     if wordcounter:
         word_usage = {
             "wordUsage":
             pd.DataFrame(wordcounter[:50],
                          columns=['text',
                                   'value']).to_dict(orient='records')
         }
         word_stat = {
             "wordStat": {
                 'mostUsedWord': wordcounter[0][0],
                 'leastUsedWord': wordcounter[-1][0]
             }
         }
         word_usage.update(word_stat)
         wordcloud.add("All", word_usage)
     return wordcloud
예제 #8
0
 def emojidata(data):
     userspecificemoji = dict.my_dictionary()
     for i in getData.usernameonly(data):
         userspecificemoji.add(
             i, getData.emojiall(data[data["USERNAME"] == i], i))
예제 #9
0
 def userspecific(data):
     userspecific = dict.my_dictionary()
     for i in getData.usernameonly(data):
         userspecific.add(i, getData.userSpecificInfo(data, i))
     return userspecific