Example #1
0
    def generateDataFrameSingleWordCountBy(self, mFun, word):
        agglomeratedMessages = ConvStats._getMessagesBy(
            mFun, self.conversation.messages)
        df = self._generateDataFrameAgglomeratedStatsBy(
            mFun, agglomeratedMessages)

        wOcc1 = [
            (d,
             statsUtil.getWordsCount(
                 list(
                     filter(lambda m: m.sender == self.conversation.sender1,
                            a)))) for d, a in agglomeratedMessages.items()
        ]
        wOcc2 = [
            (d,
             statsUtil.getWordsCount(
                 list(
                     filter(lambda m: m.sender == self.conversation.sender2,
                            a)))) for d, a in agglomeratedMessages.items()
        ]

        s1Count = [
            count[word] if word in count else (by, 0) for (by, count) in wOcc1
        ]
        s2Count = [
            count[word] if word in count else (by, 0) for (by, count) in wOcc2
        ]

        df[self.conversation.sender1 + '_count'] = np.array(s1Count)
        df[self.conversation.sender2 + '_count'] = np.array(s2Count)
        df['totCount'] = df[self.conversation.sender1 +
                            '_count'] + df[self.conversation.sender2 +
                                           '_count']
        return df
 def _getWordsUsedJustByStats(sender1Messages, sender2Messages):
     wordsSaidBySender1 = statsUtil.getWordsCount(sender1Messages).keys()
     wordsSaidBySender2 = statsUtil.getWordsCount(sender2Messages).keys()
     wordsSaidByBoth = set(wordsSaidBySender1).intersection(wordsSaidBySender2)
     wordsSaidJustByS1 = set(wordsSaidBySender1).difference(wordsSaidBySender2)
     wordsSaidJustByS2 = set(wordsSaidBySender2).difference(wordsSaidBySender1)
     return wordsSaidByBoth, wordsSaidJustByS1, wordsSaidJustByS2
Example #3
0
 def _getWordsUsedJustByStats(sender1Messages, sender2Messages):
     wordsSaidBySender1 = statsUtil.getWordsCount(sender1Messages).keys()
     wordsSaidBySender2 = statsUtil.getWordsCount(sender2Messages).keys()
     wordsSaidByBoth = set(wordsSaidBySender1).intersection(
         wordsSaidBySender2)
     wordsSaidJustByS1 = set(wordsSaidBySender1).difference(
         wordsSaidBySender2)
     wordsSaidJustByS2 = set(wordsSaidBySender2).difference(
         wordsSaidBySender1)
     return wordsSaidByBoth, wordsSaidJustByS1, wordsSaidJustByS2
Example #4
0
    def _getWordsCountStats(messages, limit=0):
        wCount = statsUtil.getWordsCount(messages)

        if limit == 0:
            return wCount.most_common()
        else:
            return wCount.most_common(limit)
    def _getWordsCountStats(messages, limit=0):
        wCount = statsUtil.getWordsCount(messages)

        if limit == 0:
            return wCount.most_common()
        else:
            return wCount.most_common(limit)
Example #6
0
 def _generateWordCountStatsBy(self, groupByColumns=[], word=None):
     fun = lambda x: tuple(sorted(
         statsUtil.getWordsCount(" ".join(x)).items(), key=lambda y: y[1], reverse=True))
     label = 'wordCount'
     countId = 'word'
     results = self._generateCountStatsBy(fun, label, countId, groupByColumns, word)
     return results
    def generateDataFrameSingleWordCountBy(self, mFun, word):
        agglomeratedMessages = ConvStats._getMessagesBy(mFun, self.conversation.messages)
        df = self._generateDataFrameAgglomeratedStatsBy(mFun, agglomeratedMessages)

        wOcc1 = [
            (d, statsUtil.getWordsCount(list(filter(lambda m: m.sender == self.conversation.sender1, a))))
            for d, a in agglomeratedMessages.items()
        ]
        wOcc2 = [
            (d, statsUtil.getWordsCount(list(filter(lambda m: m.sender == self.conversation.sender2, a))))
            for d, a in agglomeratedMessages.items()
        ]

        s1Count = [count[word] if word in count else (by, 0) for (by, count) in wOcc1]
        s2Count = [count[word] if word in count else (by, 0) for (by, count) in wOcc2]

        df[self.conversation.sender1 + "_count"] = np.array(s1Count)
        df[self.conversation.sender2 + "_count"] = np.array(s2Count)
        df["totCount"] = df[self.conversation.sender1 + "_count"] + df[self.conversation.sender2 + "_count"]
        return df