Beispiel #1
0
        def onInstanceLoadFunc(instanceKey,
                               oauthToken,
                               oauthSecret,
                               geographicSetupString,
                               keywords,
                               instanceSetupCode,
                               startTime,
                               temporalLastTimeId,
                               count=count):
            temporal = dict()
            for providerId, value in temporalLastTimeId.iteritems():
                providerId = int(providerId)
                for placeId, timeId in value.iteritems():
                    placeId = int(placeId)
                    timeId = int(timeId)
                    temporal[GeocodeResultAbstract.buildCacheIdTuple(
                        providerId, placeId)] = timeId
                    logger.debug(
                        'Loaded instance %s last temporal change source %d/%d -> %d'
                        % (instanceKey, providerId, placeId, timeId))

            TwitterInstance(
                instanceKey, webApplication.twitter_instances,
                TwitterAuthentication(Configuration.CONSUMER_TOKEN,
                                      Configuration.CONSUMER_SECRET,
                                      oauthToken, oauthSecret),
                unicode(geographicSetupString), keywords, instanceSetupCode,
                startTime, temporal)

            # can delete, was debugging indexes.
            if args.rebuild_instance_indexes:
                logger.info('Dropping indexes of instance %s' % instanceKey)
                getUserCollection(instanceKey).drop_indexes()
                getTweetCollection(instanceKey).drop_indexes()
            count[0] += 1
    def addTemporalEntry(self, temporalCollection, timeId, userProviderId, userPlaceId, followerProviderId, followerPlaceId, followerPlaceType):
        if self.is_shutdown:
            return

        tupleUserCacheId = GeocodeResultAbstract.buildCacheIdTuple(userProviderId, userPlaceId)
        dictUserCacheId = GeocodeResultAbstract.buildCacheId(userProviderId, userPlaceId)
        lastTimeId = self.last_temporal_time_id_by_source.get(tupleUserCacheId,None)

        destination = '%s_%s' % (followerPlaceType, followerPlaceId)
        addTemporalEntry(temporalCollection, lastTimeId, timeId, dictUserCacheId, destination, followerProviderId)

        self.last_temporal_time_id_by_source[tupleUserCacheId] = timeId
        setInstanceTemporalSourceLastTime(self.instance_key, userProviderId, userPlaceId, timeId)
    def addTemporalEntry(self, temporalCollection, timeId, userProviderId, userPlaceId, followerProviderId, followerPlaceId, followerPlaceType):
        if self.is_shutdown:
            return

        tupleUserCacheId = GeocodeResultAbstract.buildCacheIdTuple(userProviderId, userPlaceId)
        dictUserCacheId = GeocodeResultAbstract.buildCacheId(userProviderId, userPlaceId)
        lastTimeId = self.last_temporal_time_id_by_source.get(tupleUserCacheId,None)

        destination = '%s_%s' % (followerPlaceType, followerPlaceId)
        addTemporalEntry(temporalCollection, lastTimeId, timeId, dictUserCacheId, destination, followerProviderId)

        self.last_temporal_time_id_by_source[tupleUserCacheId] = timeId
        setInstanceTemporalSourceLastTime(self.instance_key, userProviderId, userPlaceId, timeId)
Beispiel #4
0
        def onInstanceLoadFunc(instanceKey,
                               oauthToken,
                               oauthSecret,
                               geographicSetupString,
                               keywords,
                               instanceSetupCode,
                               startTime,
                               temporalLastTimeId,
                               count = count):
             temporal = dict()
             for providerId, value in temporalLastTimeId.iteritems():
                 providerId = int(providerId)
                 for placeId, timeId in value.iteritems():
                     placeId = int(placeId)
                     timeId = int(timeId)
                     temporal[GeocodeResultAbstract.buildCacheIdTuple(providerId, placeId)] = timeId
                     logger.debug('Loaded instance %s last temporal change source %d/%d -> %d' % (instanceKey, providerId, placeId, timeId))

             TwitterInstance(instanceKey,
                             webApplication.twitter_instances,
                             TwitterAuthentication(Configuration.CONSUMER_TOKEN, Configuration.CONSUMER_SECRET, oauthToken, oauthSecret),
                             unicode(geographicSetupString),
                             keywords,
                             instanceSetupCode,
                             startTime,
                             temporal,

                             # Critical because it once worked, if it fails when we restarted
                             # then maybe our server lost network connectivity.
                             isCritical = True)

             # can delete, was debugging indexes.
             if args.rebuild_instance_indexes:
                logger.info('Dropping indexes of instance %s' % instanceKey)
                getUserCollection(instanceKey).drop_indexes()
                getTweetCollection(instanceKey).drop_indexes()
             count[0] += 1
    def extractItemFromData(self, data, signalerKey):
        if signalerKey.provider_id is not None and signalerKey.place_id is not None:
            data = data['location_tweets']
            locationCacheId = GeocodeResultAbstract.buildCacheIdTuple(signalerKey.provider_id, signalerKey.place_id)
        else:
            data = data['instance_tweets']
            locationCacheId = None

        aux = data.get(signalerKey.instance)
        if aux is None:
            return None

        if locationCacheId is not None:
            aux = aux.get(locationCacheId)
            if aux is None:
                return None

        aux = aux.get('success',None)

        if aux is not None:
            assert isinstance(aux, RealtimePerformance)
            tweetsPerDay, tweetsPerDayUpdated = aux.tweets_per_day.time_period_count_updated(True,True,True)
            tweetsPerHour, tweetsPerHourUpdated = aux.tweets_per_hour.time_period_count_updated(True,True,True)
            tweetsPerMinute, tweetsPerMinuteUpdated = aux.tweets_per_minute.time_period_count_updated(True,True,True)
        else:
            tweetsPerDayUpdated = tweetsPerHourUpdated = tweetsPerMinuteUpdated = False
            tweetsPerDay = tweetsPerHour = tweetsPerMinute = None

        aux = data.get(signalerKey.instance)
        aux = aux.get('geocode_fail')
        if aux is not None:
            assert isinstance(aux, RealtimePerformance)
            failGeocodeTweetsPerDay, failGeocodeTweetsPerDayUpdated = aux.tweets_per_day.time_period_count_updated(True,True,True)
            failGeocodeTweetsPerHour, failGeocodeTweetsPerHourUpdated = aux.tweets_per_hour.time_period_count_updated(True,True,True)
            failGeocodeTweetsPerMinute, failGeocodeTweetsPerMinuteUpdated = aux.tweets_per_minute.time_period_count_updated(True,True,True)
        else:
            failGeocodeTweetsPerDayUpdated = failGeocodeTweetsPerHourUpdated = failGeocodeTweetsPerMinuteUpdated = False
            failGeocodeTweetsPerDay = failGeocodeTweetsPerHour = failGeocodeTweetsPerMinute = None

        if tweetsPerDayUpdated is tweetsPerHourUpdated is tweetsPerMinuteUpdated is \
           failGeocodeTweetsPerDayUpdated is failGeocodeTweetsPerHourUpdated is failGeocodeTweetsPerMinuteUpdated is False:
            newData = False
        else:
            newData = True

        if tweetsPerMinute is None:
            tweetsPerSecond = None
        else:
            tweetsPerSecond = int(tweetsPerMinute / 60)
            if tweetsPerHour is None:
                tweetsPerHour = tweetsPerMinute * 60

        if tweetsPerDay is None and tweetsPerHour is not None:
            tweetsPerDay = tweetsPerHour * 24

        if failGeocodeTweetsPerMinute is None:
            failGeocodeTweetsPerSecond = None
        else:
            failGeocodeTweetsPerSecond = int(failGeocodeTweetsPerMinute / 60)
            if failGeocodeTweetsPerHour is None:
                failGeocodeTweetsPerHour = failGeocodeTweetsPerMinute * 60

        if failGeocodeTweetsPerDay is None and failGeocodeTweetsPerHour is not None:
            failGeocodeTweetsPerDay = failGeocodeTweetsPerHour * 24

        if tweetsPerSecond < 1 and tweetsPerDay > 0:
            tweetsPerSecond = '< 1'

        if tweetsPerMinute < 1 and tweetsPerDay > 0:
            tweetsPerMinute = '< 1'

        if tweetsPerHour < 1 and tweetsPerDay > 0:
            tweetsPerHour = '< 1'

        return newData, json.dumps([[tweetsPerSecond,                    tweetsPerMinute,                    tweetsPerHour,                  tweetsPerDay],
                                    [failGeocodeTweetsPerSecond,         failGeocodeTweetsPerMinute,         failGeocodeTweetsPerHour,       failGeocodeTweetsPerDay]])
    def continueExtractItemFromData(self, data, instanceData, signalerKey):
        location = signalerKey.location
        providerId = signalerKey.provider

        tup = GeocodeResultAbstract.buildCacheIdTuple(providerId,location)
        return set(instanceData.get(tup, dict()).keys())