Example #1
0
    def processItem(self, tweet):
        #accs = MongoManager.getActiveAccounts(max_age=timedelta(seconds=10)) // ES NECESARIO?? LO COMENTO POR AHORA
        #pprint (tweet)
        #pprint (tweet.getExtractedInfo())
        follow_accounts = MongoManager.getFollowAccountsbyCampaign(
            max_age=timedelta(seconds=10))
        bcs = ClassifierManager.getBrandClassifiers(
        )  #esto tendria que esta cacheado tambien en classifiermanager
        tcs = None
        pms = self.getBrandClassifiersByCampaign(
            tweet, bcs, follow_accounts
        )  ##FALTA AGREGAR TAMBIEN A LOS TWEETS QUE NO MATCHEAN PERO QUE SON DE UN USUARIO SEGUIDO POR LA MARCA
        #pprint(pms)
        for cid, pmlist in pms.items():
            if tcs is None: tcs = ClassifierManager.getTopicClassifiers()
            tms = self.getTopicClassifiers(tweet, cid, tcs)
            tweet.setExtractedTopics(tms)
            tweet.setExtractedInfo(pmlist)
            tweet.setGender(
                GenderClassifier.extractGender(tweet.getDisplayName()))
            tweet.resetFollowAccountsMentionCount()
            user_mentions = tweet.getUserMentions()
            for fa in follow_accounts:
                if fa in user_mentions:
                    for fainfo in follow_accounts[fa]:
                        if fainfo['cid'] == cid:
                            tweet.setFollowAccountsMentionCount(fa, 1)
            #pprint(pmlist)
            #pprint("saving tweet to campaign %s" % cid)
            MongoManager.saveDocument("tweets_%s" % cid, tweet.getDictionary())

        return None  #no devuelvo nada para que no se acumulen los tweets en la ultima lista y se sature la memoria