예제 #1
0
 def summarize(self, campaign, start, end, interval, tweetlist=None):
     collection_name = 'summarized_tweets_%s' % campaign.getId()
     timerange = self.calculateSummarizedIntervals(campaign, start, end, interval, tweetlist)
     for interv in timerange:
         res = MongoManager.findOne(collection_name, filters={'start': start, 'end': end})
         if res: interv['_id'] = res['_id']
         MongoManager.saveDocument(collection_name, interv)
예제 #2
0
    def processItem(self, tweet):
        #accs = MongoManager.getActiveAccounts(max_age=timedelta(seconds=10)) // ES NECESARIO?? LO COMENTO POR AHORA
        #pprint (tweet)
        #pprint (tweet.getExtractedInfo())
        follow_accounts = MongoManager.getFollowAccountsbyCampaign(
            max_age=timedelta(seconds=10))
        bcs = ClassifierManager.getBrandClassifiers(
        )  #esto tendria que esta cacheado tambien en classifiermanager
        tcs = None
        pms = self.getBrandClassifiersByCampaign(
            tweet, bcs, follow_accounts
        )  ##FALTA AGREGAR TAMBIEN A LOS TWEETS QUE NO MATCHEAN PERO QUE SON DE UN USUARIO SEGUIDO POR LA MARCA
        #pprint(pms)
        for cid, pmlist in pms.items():
            if tcs is None: tcs = ClassifierManager.getTopicClassifiers()
            tms = self.getTopicClassifiers(tweet, cid, tcs)
            tweet.setExtractedTopics(tms)
            tweet.setExtractedInfo(pmlist)
            tweet.setGender(
                GenderClassifier.extractGender(tweet.getDisplayName()))
            tweet.resetFollowAccountsMentionCount()
            user_mentions = tweet.getUserMentions()
            for fa in follow_accounts:
                if fa in user_mentions:
                    for fainfo in follow_accounts[fa]:
                        if fainfo['cid'] == cid:
                            tweet.setFollowAccountsMentionCount(fa, 1)
            #pprint(pmlist)
            #pprint("saving tweet to campaign %s" % cid)
            MongoManager.saveDocument("tweets_%s" % cid, tweet.getDictionary())

        return None  #no devuelvo nada para que no se acumulen los tweets en la ultima lista y se sature la memoria
예제 #3
0
 def processItem(self, entry):
     campaigns = entry['campaigns']
     del entry['campaigns']
     for campaign in campaigns:
         collection_name = "fb_posts_%s" % campaign.getId()
         #pprint("saving entry to campaign %s" % campaign.getName())
         MongoManager.saveDocument(collection_name, entry)
예제 #4
0
 def processItem(self, item):
     polls_ht = MongoManager.getPollsByHashtag(max_age=timedelta(
         seconds=10))
     tweet = Tweet.createFromUnknownSource(item)
     #pprint(tweet)
     for ht in tweet.getHashtags():
         if ht in polls_ht:
             for poll in polls_ht[ht]:
                 MongoManager.saveDocument("polls_" + poll.getId(),
                                           tweet.getDictionary())
                 #pprint("grabando tweet para poll %s" % poll.getName())
     return tweet
예제 #5
0
 def summarize(self, campaign, start, end, interval, tweetlist=None):
     collection_name = 'summarized_tweets_%s' % campaign.getId()
     timerange = self.calculateSummarizedIntervals(campaign, start, end,
                                                   interval, tweetlist)
     for interv in timerange:
         res = MongoManager.findOne(collection_name,
                                    filters={
                                        'start': start,
                                        'end': end
                                    })
         if res: interv['_id'] = res['_id']
         MongoManager.saveDocument(collection_name, interv)
예제 #6
0
    def processItem(self, feed):
        #pprint (feed)
        #pprint (tweet.getExtractedInfo())
        bcs = ClassifierManager.getCampaignBrandClassifiers(
            feed.account, feed.campaign
        )  #esto tendria que esta cacheado tambien en classifiermanager
        tcs = None
        pms = self.getBrandClassifiersByCampaign(
            feed.getText(), bcs
        )  ##FALTA AGREGAR TAMBIEN A LOS TWEETS QUE NO MATCHEAN PERO QUE SON DE UN USUARIO SEGUIDO POR LA MARCA
        #print "processing feed:", feed
        for cid, pmlist in pms.items():
            if tcs is None:
                tcs = ClassifierManager.getCampaignTopicClassifiers(
                    feed.campaign)
            tms = self.getTopicClassifiers(feed.getText(), cid, tcs)
            feed.setExtractedTopics(tms)
            feed.setExtractedInfo(pmlist)
        if not self.APPLY_BRAND_FILTERS or feed.getExtractedInfo():
            mongores = MongoManager.saveDocument(
                "feeds_%s" % feed.campaign.getId(), feed.getDictionary())
            #print "mongo result: ", mongores

        return None  #no devuelvo nada para que no se acumulen los feeds en la ultima lista y se sature la memoria