def summarize(self, campaign, start, end, interval, tweetlist=None): collection_name = 'summarized_tweets_%s' % campaign.getId() timerange = self.calculateSummarizedIntervals(campaign, start, end, interval, tweetlist) for interv in timerange: res = MongoManager.findOne(collection_name, filters={'start': start, 'end': end}) if res: interv['_id'] = res['_id'] MongoManager.saveDocument(collection_name, interv)
def processItem(self, tweet): #accs = MongoManager.getActiveAccounts(max_age=timedelta(seconds=10)) // ES NECESARIO?? LO COMENTO POR AHORA #pprint (tweet) #pprint (tweet.getExtractedInfo()) follow_accounts = MongoManager.getFollowAccountsbyCampaign( max_age=timedelta(seconds=10)) bcs = ClassifierManager.getBrandClassifiers( ) #esto tendria que esta cacheado tambien en classifiermanager tcs = None pms = self.getBrandClassifiersByCampaign( tweet, bcs, follow_accounts ) ##FALTA AGREGAR TAMBIEN A LOS TWEETS QUE NO MATCHEAN PERO QUE SON DE UN USUARIO SEGUIDO POR LA MARCA #pprint(pms) for cid, pmlist in pms.items(): if tcs is None: tcs = ClassifierManager.getTopicClassifiers() tms = self.getTopicClassifiers(tweet, cid, tcs) tweet.setExtractedTopics(tms) tweet.setExtractedInfo(pmlist) tweet.setGender( GenderClassifier.extractGender(tweet.getDisplayName())) tweet.resetFollowAccountsMentionCount() user_mentions = tweet.getUserMentions() for fa in follow_accounts: if fa in user_mentions: for fainfo in follow_accounts[fa]: if fainfo['cid'] == cid: tweet.setFollowAccountsMentionCount(fa, 1) #pprint(pmlist) #pprint("saving tweet to campaign %s" % cid) MongoManager.saveDocument("tweets_%s" % cid, tweet.getDictionary()) return None #no devuelvo nada para que no se acumulen los tweets en la ultima lista y se sature la memoria
def processItem(self, entry): campaigns = entry['campaigns'] del entry['campaigns'] for campaign in campaigns: collection_name = "fb_posts_%s" % campaign.getId() #pprint("saving entry to campaign %s" % campaign.getName()) MongoManager.saveDocument(collection_name, entry)
def processItem(self, item): polls_ht = MongoManager.getPollsByHashtag(max_age=timedelta( seconds=10)) tweet = Tweet.createFromUnknownSource(item) #pprint(tweet) for ht in tweet.getHashtags(): if ht in polls_ht: for poll in polls_ht[ht]: MongoManager.saveDocument("polls_" + poll.getId(), tweet.getDictionary()) #pprint("grabando tweet para poll %s" % poll.getName()) return tweet
def summarize(self, campaign, start, end, interval, tweetlist=None): collection_name = 'summarized_tweets_%s' % campaign.getId() timerange = self.calculateSummarizedIntervals(campaign, start, end, interval, tweetlist) for interv in timerange: res = MongoManager.findOne(collection_name, filters={ 'start': start, 'end': end }) if res: interv['_id'] = res['_id'] MongoManager.saveDocument(collection_name, interv)
def processItem(self, feed): #pprint (feed) #pprint (tweet.getExtractedInfo()) bcs = ClassifierManager.getCampaignBrandClassifiers( feed.account, feed.campaign ) #esto tendria que esta cacheado tambien en classifiermanager tcs = None pms = self.getBrandClassifiersByCampaign( feed.getText(), bcs ) ##FALTA AGREGAR TAMBIEN A LOS TWEETS QUE NO MATCHEAN PERO QUE SON DE UN USUARIO SEGUIDO POR LA MARCA #print "processing feed:", feed for cid, pmlist in pms.items(): if tcs is None: tcs = ClassifierManager.getCampaignTopicClassifiers( feed.campaign) tms = self.getTopicClassifiers(feed.getText(), cid, tcs) feed.setExtractedTopics(tms) feed.setExtractedInfo(pmlist) if not self.APPLY_BRAND_FILTERS or feed.getExtractedInfo(): mongores = MongoManager.saveDocument( "feeds_%s" % feed.campaign.getId(), feed.getDictionary()) #print "mongo result: ", mongores return None #no devuelvo nada para que no se acumulen los feeds en la ultima lista y se sature la memoria