Ejemplo n.º 1
0
        def func(templateArguments, instance, location, provider):
            twitterInstance = self.application.twitter_instances.getInstanceByInstanceKey(
                instance)
            if twitterInstance is None:
                abort(404, "No active search stream found at this address")

            geocode = geocodeFromCacheById(
                GeocodeResultAbstract.buildCacheId(provider, location))
            assert geocode is not None

            instanceDescription = getInstanceDescription(twitterInstance)
            instanceLink = getInstanceLink(twitterInstance)

            homeLink = getHomeLink(Configuration.PROJECT_NAME)

            templateArguments.update(
                {
                    'home_link': homeLink,
                    'instance': instance,
                    'location': location,
                    'provider': provider,
                    'instance_link': instanceLink,
                    'instance_description': instanceDescription,
                    'place': geocode.display_name_short,
                    'place_coord': geocode.coordinate,
                    'startEpoch': twitterInstance.constructed_at,
                    'server_current_epoch': getEpochMs(),
                    'max_tweets': Configuration.MAX_CLIENT_LIVE_TWEETS
                }
            )  # Client needs to offset to this epoch in case its clock is wrong.

            if geocode.has_bounding_box:
                templateArguments.update(
                    {'place_bounding_box': geocode.bounding_box})

            if geocode.has_country:
                templateArguments.update({
                    'place_country_link':
                    LocationsPage.link_info.getPageLink(
                        instance, geocode.country.place_id,
                        geocode.country.provider_id)
                })
                templateArguments.update(
                    {'place_country': geocode.country.display_name_short})

            if geocode.has_continent:
                templateArguments.update({
                    'place_continent_link':
                    LocationsPage.link_info.getPageLink(
                        instance, geocode.continent.place_id,
                        geocode.continent.provider_id)
                })
                templateArguments.update(
                    {'place_continent': geocode.continent.display_name_short})

            return template('location.tpl', templateArguments)
Ejemplo n.º 2
0
def _constructTweet(instanceId, twitterSession, cacheData, retrieveUserData, userProjection):
    if cacheData is not None:
        geocode = geocodeFromCacheById(cacheData['geocode'])
        item = Tweet.FromCache(cacheData['data'], twitterSession, cacheData['timestamp'], geocode)

        if retrieveUserData:
            item.user = readUserFromCache(item.user.id, twitterSession, instanceId, False, userProjection)

        return item
    else:
        return None
Ejemplo n.º 3
0
def _constructTweet(instanceId, twitterSession, cacheData, retrieveUserData,
                    userProjection):
    if cacheData is not None:
        geocode = geocodeFromCacheById(cacheData['geocode'])
        item = Tweet.FromCache(cacheData['data'], twitterSession,
                               cacheData['timestamp'], geocode)

        if retrieveUserData:
            item.user = readUserFromCache(item.user.id, twitterSession,
                                          instanceId, False, userProjection)

        return item
    else:
        return None
Ejemplo n.º 4
0
    def from_cache(self, data):
        geocoded = data['geocoded_followers_by_location']
        for item in geocoded:
            theId = item['_id']
            quantity = item['quantity']

            geocodeResult = geocodeFromCacheById(theId)
            if geocodeResult is None:
                logger.error('Failed to find geocode data in cache with ID: %s, while processing followers analysis result' % str(theId))

            self.onGeocodedFollower(geocodeResult)
            self.num_geocoded_followers_by_location[geocodeResult] = quantity

        self.num_followers =                data['num_followers']
        self.num_non_geocoded_followers =   data['num_non_geocoded_followers']
        self.num_geocoded_followers =       data['num_geocoded_followers']
Ejemplo n.º 5
0
    def from_cache(self, data):
        geocoded = data['geocoded_followers_by_location']
        for item in geocoded:
            theId = item['_id']
            quantity = item['quantity']

            geocodeResult = geocodeFromCacheById(theId)
            if geocodeResult is None:
                logger.error('Failed to find geocode data in cache with ID: %s, while processing followers analysis result' % str(theId))

            self.onGeocodedFollower(geocodeResult)
            self.num_geocoded_followers_by_location[geocodeResult] = quantity

        self.num_followers =                data['num_followers']
        self.num_non_geocoded_followers =   data['num_non_geocoded_followers']
        self.num_geocoded_followers =       data['num_geocoded_followers']
Ejemplo n.º 6
0
        def func(templateArguments, instance, location, provider):
            twitterInstance = self.application.twitter_instances.getInstanceByInstanceKey(instance)
            if twitterInstance is None:
                abort(404, "No active search stream found at this address")

            geocode = geocodeFromCacheById(GeocodeResultAbstract.buildCacheId(provider,location))
            assert geocode is not None

            instanceDescription = getInstanceDescription(twitterInstance)
            instanceLink = getInstanceLink(twitterInstance)

            homeLink = getHomeLink(Configuration.PROJECT_NAME)

            templateArguments.update({'home_link' : homeLink,
                                      'instance' : instance,
                                      'location' : location,
                                      'provider' : provider,
                                      'instance_link' : instanceLink,
                                      'instance_description' : instanceDescription,
                                      'place' : geocode.display_name_short,
                                      'place_coord' : geocode.coordinate,
                                      'startEpoch' : twitterInstance.constructed_at,
                                      'server_current_epoch' : getEpochMs(),
                                      'max_tweets' : Configuration.MAX_CLIENT_LIVE_TWEETS}) # Client needs to offset to this epoch in case its clock is wrong.

            if geocode.has_bounding_box:
                templateArguments.update({'place_bounding_box' : geocode.bounding_box})

            if geocode.has_country:
                templateArguments.update({'place_country_link' : LocationsPage.link_info.getPageLink(instance, geocode.country.place_id, geocode.country.provider_id)})
                templateArguments.update({'place_country' : geocode.country.display_name_short})

            if geocode.has_continent:
                templateArguments.update({'place_continent_link' : LocationsPage.link_info.getPageLink(instance, geocode.continent.place_id, geocode.continent.provider_id)})
                templateArguments.update({'place_continent' : geocode.continent.display_name_short})

            return template('location.tpl', templateArguments)
Ejemplo n.º 7
0
        def func(templateArguments, instance):
            twitterInstance = self.application.twitter_instances.getInstanceByInstanceKey(
                instance)
            if twitterInstance is None:
                return dict()

            baseEpoch = twitterInstance.constructed_at

            start_epoch = parseInteger(request.GET.start_epoch, default=None)
            end_epoch = parseInteger(request.GET.end_epoch, default=None)
            source_place_id = parseInteger(request.GET.source_place_id)
            source_provider_id = parseInteger(request.GET.source_provider_id)

            if source_place_id is None:
                logger.error(
                    'Invalid place ID specified while providing influence data: %s'
                    % unicode(source_place_id))
                return dict()

            source_cache_id = GeocodeResultAbstract.buildCacheId(
                source_provider_id, source_place_id)

            temporalCollection = getTemporalInfluenceCollection(instance)

            if start_epoch is not None:
                start_time_id = getTimeIdFromTimestamp(
                    baseEpoch, Configuration.TEMPORAL_STEP, start_epoch)
            else:
                start_time_id = None

            if end_epoch is not None:
                end_time_id = getTimeIdFromTimestamp(
                    baseEpoch, Configuration.TEMPORAL_STEP, end_epoch)
            else:
                end_time_id = None

            timerMs = getEpochMs()
            cacheData = getTemporalRange(temporalCollection,
                                         start_time_id,
                                         end_time_id,
                                         source_cache_id,
                                         preciseFromBack=True,
                                         preciseFromFront=True)
            logger.info('Took %dms to read temporal range data' %
                        (getEpochMs() - timerMs))

            timerMs = getEpochMs()

            geocodeByPlaceType = dict()
            totalsByPlaceType = dict()

            if cacheData is not None:
                for providerId, providerIdData in cacheData.iteritems():
                    providerId = int(providerId)

                    for destination, count in providerIdData.iteritems():
                        split = destination.split('_')
                        placeType = int(split[0])
                        placeId = int(split[1])

                        record = [placeId, providerId, None, None, count, None]

                        geocodeByPlaceType.setdefault(placeType,
                                                      list()).append(record)

                # Process only the records we are going to display.
                for placeType, records in geocodeByPlaceType.iteritems():
                    aux = sorted(records, key=lambda x: x[4], reverse=True)
                    aux = aux[:Configuration.
                              DISPLAY_MAX_NUM_INFLUENCE_RECORDS_PER_PLACE_TYPE]
                    geocodeByPlaceType[placeType] = aux

                    for record in aux:
                        cacheId = GeocodeResultAbstract.buildCacheId(
                            record[1], record[0])
                        geocode = geocodeFromCacheById(cacheId)

                        record[2] = geocode.display_name
                        record[3] = geocode.coordinate
                        count = record[4]
                        record[5] = geocode.bounding_box

                        totalsByPlaceType[placeType] = totalsByPlaceType.get(
                            placeType, 0) + count

            def getResultPart(placeType):
                return {
                    'geocode_list': geocodeByPlaceType.get(placeType, list()),
                    'total': totalsByPlaceType.get(placeType, 0)
                }

            resultData = dict()
            resultData['city'] = getResultPart(
                GeocodeResultAbstract.PlaceTypes.CITY)
            resultData['country'] = getResultPart(
                GeocodeResultAbstract.PlaceTypes.COUNTRY)
            resultData['continent'] = getResultPart(
                GeocodeResultAbstract.PlaceTypes.CONTINENT)

            logger.info('Took %dms to build temporal range result data' %
                        (getEpochMs() - timerMs))

            return {'json': resultData}
Ejemplo n.º 8
0
        def func(templateArguments, instance):
            twitterInstance = self.application.twitter_instances.getInstanceByInstanceKey(instance)
            if twitterInstance is None:
                return dict()

            baseEpoch = twitterInstance.constructed_at

            start_epoch = parseInteger(request.GET.start_epoch, default=None)
            end_epoch = parseInteger(request.GET.end_epoch, default=None)
            source_place_id = parseInteger(request.GET.source_place_id)
            source_provider_id = parseInteger(request.GET.source_provider_id)

            if source_place_id is None:
                logger.error('Invalid place ID specified while providing influence data: %s' % unicode(source_place_id))
                return dict()

            source_cache_id = GeocodeResultAbstract.buildCacheId(source_provider_id, source_place_id)

            temporalCollection = getTemporalInfluenceCollection(instance)

            if start_epoch is not None:
                start_time_id = getTimeIdFromTimestamp(baseEpoch, Configuration.TEMPORAL_STEP, start_epoch)
            else:
                start_time_id = None

            if end_epoch is not None:
                end_time_id = getTimeIdFromTimestamp(baseEpoch, Configuration.TEMPORAL_STEP, end_epoch)
            else:
                end_time_id = None

            timerMs = getEpochMs()
            cacheData = getTemporalRange(temporalCollection, start_time_id, end_time_id, source_cache_id, preciseFromBack=True, preciseFromFront=True)
            logger.info('Took %dms to read temporal range data' % (getEpochMs() - timerMs))

            timerMs = getEpochMs()

            geocodeByPlaceType = dict()
            totalsByPlaceType = dict()

            if cacheData is not None:
                for providerId, providerIdData in cacheData.iteritems():
                    providerId = int(providerId)

                    for destination, count in providerIdData.iteritems():
                        split = destination.split('_')
                        placeType = int(split[0])
                        placeId = int(split[1])

                        record = [placeId,
                                  providerId,
                                  None,
                                  None,
                                  count,
                                  None]

                        geocodeByPlaceType.setdefault(placeType,list()).append(record)

                # Process only the records we are going to display.
                for placeType, records in geocodeByPlaceType.iteritems():
                    aux = sorted(records, key=lambda x: x[4], reverse=True)
                    aux = aux[:Configuration.DISPLAY_MAX_NUM_INFLUENCE_RECORDS_PER_PLACE_TYPE]
                    geocodeByPlaceType[placeType] = aux

                    for record in aux:
                        cacheId = GeocodeResultAbstract.buildCacheId(record[1], record[0])
                        geocode = geocodeFromCacheById(cacheId)

                        record[2] = geocode.display_name
                        record[3] = geocode.coordinate
                        count = record[4]
                        record[5] = geocode.bounding_box

                        totalsByPlaceType[placeType] = totalsByPlaceType.get(placeType,0) + count

            def getResultPart(placeType):
                return {'geocode_list' : geocodeByPlaceType.get(placeType,list()), 'total' : totalsByPlaceType.get(placeType, 0)}

            resultData = dict()
            resultData['city'] =        getResultPart(GeocodeResultAbstract.PlaceTypes.CITY)
            resultData['country'] =     getResultPart(GeocodeResultAbstract.PlaceTypes.COUNTRY)
            resultData['continent'] =   getResultPart(GeocodeResultAbstract.PlaceTypes.CONTINENT)

            logger.info('Took %dms to build temporal range result data' % (getEpochMs() - timerMs))

            return {'json' : resultData}
Ejemplo n.º 9
0
def _constructUser(instanceId,
                   twitterSession,
                   cacheData,
                   recursive=True,
                   userProjection=None,
                   onIterationFunc=None):
    if cacheData is None:
        return None

    geocodePlaceId = cacheData.get('geocode', None)
    if geocodePlaceId is None:
        geocode = None
    else:
        geocode = geocodeFromCacheById(geocodePlaceId)

    userId = cacheData.get('_id')
    geocode_bias = cacheData.get('geocode_bias', None)
    geocoded_from = cacheData.get('geocoded_from', None)
    is_followers_loaded = cacheData.get('is_followers_loaded', None)
    is_followee = cacheData.get('is_followee', None)
    is_associated_with_tweet = cacheData.get('is_associated_with_tweet', None)
    last_follower_enrichment_error = cacheData.get(
        'last_follower_enrichment_error', None)
    queued_for_follower_enrichment = cacheData.get(
        'queued_for_follower_enrichment', None)

    known_followees_id = cacheData.get('known_followees', None)

    user_data = cacheData.get('data', None)
    num_followers = None
    if user_data is not None:
        num_followers = user_data.get('followers_count', None)

    totalSubUsers = 0
    subUserIndex = [0]

    # First calculate total size.
    if recursive:
        followees_count = 0
        followers_count = 0

        if known_followees_id is not None:
            followees_count = len(known_followees_id)
            totalSubUsers += followees_count

        if is_followers_loaded and num_followers is not None:
            followers_count = num_followers
            totalSubUsers += followers_count

        if followees_count > 0 or followers_count > 0:
            logger.info(
                'User %s has %d followees and %d followers which will be recursed'
                % (userId, followees_count, followers_count))

    # Process followees.
    if known_followees_id is None:
        known_followees = None
    elif not recursive:
        known_followees = set()
        for followeeId in known_followees_id:
            followee = User.Id(followeeId, twitterSession)
            known_followees.add(followee)
    else:
        known_followees = set()
        followeeProjection = None
        if userProjection is not None:
            followeeProjection = userProjection.followee_projection

        for followeeId in known_followees_id:
            subUserIndex[0] += 1

            followee = readUserFromCache(followeeId,
                                         twitterSession,
                                         instanceId,
                                         recursive=False,
                                         userProjection=followeeProjection)
            if followee is not None:
                known_followees.add(followee)
            else:
                logger.error(
                    'Followee could not be found in database (followee id: %s)'
                    % followeeId)

            if onIterationFunc is not None:
                if not onIterationFunc(subUserIndex[0], totalSubUsers, False,
                                       followee, 'followee'):
                    return

    # Process followers
    followers = None
    followerIds = None
    if is_followers_loaded:
        followerIds = []
        followers = []

        followerProjection = None
        if userProjection is not None:
            followerProjection = userProjection.follower_projection

        if recursive and is_followers_loaded:

            def onFollowerLoadFunc(iteration,
                                   total,
                                   isFinished,
                                   data,
                                   iteratorId,
                                   subUserIndex=subUserIndex):
                if data is None:
                    return True

                assert isinstance(data, User)
                followerIds.append(data.id)
                followers.append(data)

                if onIterationFunc is not None:
                    if subUserIndex[0] < totalSubUsers:
                        subUserIndex[0] += 1

                    return onIterationFunc(subUserIndex[0], totalSubUsers,
                                           isFinished, data, 'follower')
                else:
                    return True

            followersCursor = cursorUsersFromCache(
                instanceId,
                pageNum=0,
                pageSize=200,
                followeeOfRequirement=userId,
                userProjection=followerProjection,
                sortByTimestamp=False)

            assert num_followers is not None
            processCursor(
                followersCursor,
                buildConstructUserFromCacheFunc(
                    twitterSession,
                    instanceId,
                    recursive=False,
                    userProjection=followerProjection,
                    onIterationFunc=onFollowerLoadFunc), onFollowerLoadFunc,
                num_followers, None)

    tup = cacheData.get('follower_enrichment_progress', None)
    if tup is not None:
        queue_progress, user_progress, user_id_progress, enrichment_progress_description, queue_waiting_for_user = tup

        if queue_waiting_for_user is not None:
            queue_waiting_for_user = readUserFromCache(
                queue_waiting_for_user,
                twitterSession,
                instanceId,
                recursive=False,
                userProjection=UserProjection.IdName())

        tup = (queue_progress, user_progress, user_id_progress,
               enrichment_progress_description, queue_waiting_for_user)

    place = None
    twitter_place_data = cacheData.get('twitter_place', None)
    if twitter_place_data is not None:
        place = Place.FromCache(twitter_place_data)

    item = User.FromCache(cacheData.get('data', None), twitterSession,
                          cacheData.get('timestamp',
                                        None), geocode, tup, geocode_bias,
                          geocoded_from, is_followee, is_associated_with_tweet,
                          last_follower_enrichment_error, known_followees,
                          place, queued_for_follower_enrichment)

    if followers is not None:
        item.loadFollowers(followerIds, followers)

    analysis = cacheData.get('analysis', None)
    if analysis is not None:
        for analysisSub in analysis:
            for analysisName, analysisData in analysisSub.iteritems():
                analyser = buildAnalyserFromName(analysisName)
                analyser.from_cache(analysisData)
                item.addAnalyser(analyser)

    return item
Ejemplo n.º 10
0
def _constructUser(instanceId, twitterSession, cacheData, recursive=True, userProjection=None, onIterationFunc=None):
    if cacheData is None:
        return None

    geocodePlaceId = cacheData.get('geocode',None)
    if geocodePlaceId is None:
        geocode = None
    else:
        geocode = geocodeFromCacheById(geocodePlaceId)

    userId = cacheData.get('_id')
    geocode_bias = cacheData.get('geocode_bias',None)
    geocoded_from = cacheData.get('geocoded_from',None)
    is_followers_loaded = cacheData.get('is_followers_loaded',None)
    is_followee = cacheData.get('is_followee',None)
    is_associated_with_tweet = cacheData.get('is_associated_with_tweet',None)
    last_follower_enrichment_error = cacheData.get('last_follower_enrichment_error',None)
    queued_for_follower_enrichment = cacheData.get('queued_for_follower_enrichment',None)

    known_followees_id = cacheData.get('known_followees',None)

    user_data = cacheData.get('data',None)
    num_followers = None
    if user_data is not None:
        num_followers = user_data.get('followers_count',None)

    totalSubUsers = 0
    subUserIndex = [0]

    # First calculate total size.
    if recursive:
        followees_count = 0
        followers_count = 0

        if known_followees_id is not None:
            followees_count = len(known_followees_id)
            totalSubUsers += followees_count

        if is_followers_loaded and num_followers is not None:
            followers_count = num_followers
            totalSubUsers += followers_count

        if followees_count > 0 or followers_count > 0:
            logger.info('User %s has %d followees and %d followers which will be recursed' % (userId, followees_count, followers_count))

    # Process followees.
    if known_followees_id is None:
        known_followees = None
    elif not recursive:
        known_followees = set()
        for followeeId in known_followees_id:
            followee = User.Id(followeeId,twitterSession)
            known_followees.add(followee)
    else:
        known_followees = set()
        followeeProjection = None
        if userProjection is not None:
            followeeProjection = userProjection.followee_projection

        for followeeId in known_followees_id:
            subUserIndex[0] += 1

            followee = readUserFromCache(followeeId, twitterSession, instanceId, recursive=False, userProjection=followeeProjection)
            if followee is not None:
                known_followees.add(followee)
            else:
                logger.error('Followee could not be found in database (followee id: %s)' % followeeId)

            if onIterationFunc is not None:
                if not onIterationFunc(subUserIndex[0], totalSubUsers, False, followee, 'followee'):
                    return

    # Process followers
    followers = None
    followerIds = None
    if is_followers_loaded:
        followerIds = []
        followers = []

        followerProjection = None
        if userProjection is not None:
            followerProjection = userProjection.follower_projection

        if recursive and is_followers_loaded:
            def onFollowerLoadFunc(iteration, total, isFinished, data, iteratorId, subUserIndex=subUserIndex):
                if data is None:
                    return True

                assert isinstance(data, User)
                followerIds.append(data.id)
                followers.append(data)

                if onIterationFunc is not None:
                    if subUserIndex[0] < totalSubUsers:
                        subUserIndex[0] += 1

                    return onIterationFunc(subUserIndex[0], totalSubUsers, isFinished, data, 'follower')
                else:
                    return True

            followersCursor = cursorUsersFromCache(instanceId, pageNum=0, pageSize=200, followeeOfRequirement=userId, userProjection=followerProjection, sortByTimestamp=False)

            assert num_followers is not None
            processCursor(followersCursor, buildConstructUserFromCacheFunc(twitterSession, instanceId, recursive=False, userProjection=followerProjection, onIterationFunc=onFollowerLoadFunc), onFollowerLoadFunc, num_followers, None)

    tup = cacheData.get('follower_enrichment_progress',None)
    if tup is not None:
        queue_progress, user_progress, user_id_progress, enrichment_progress_description, queue_waiting_for_user = tup

        if queue_waiting_for_user is not None:
            queue_waiting_for_user = readUserFromCache(queue_waiting_for_user, twitterSession, instanceId, recursive=False, userProjection=UserProjection.IdName())

        tup = (queue_progress,
               user_progress,
               user_id_progress,
               enrichment_progress_description,
               queue_waiting_for_user)

    place = None
    twitter_place_data = cacheData.get('twitter_place',None)
    if twitter_place_data is not None:
        place = Place.FromCache(twitter_place_data)

    item = User.FromCache(cacheData.get('data',None),
                          twitterSession,
                          cacheData.get('timestamp',None),
                          geocode,
                          tup,
                          geocode_bias,
                          geocoded_from,
                          is_followee,
                          is_associated_with_tweet,
                          last_follower_enrichment_error,
                          known_followees,
                          place,
                          queued_for_follower_enrichment)

    if followers is not None:
        item.loadFollowers(followerIds, followers)

    analysis = cacheData.get('analysis',None)
    if analysis is not None:
        for analysisSub in analysis:
            for analysisName, analysisData in analysisSub.iteritems():
                analyser = buildAnalyserFromName(analysisName)
                analyser.from_cache(analysisData)
                item.addAnalyser(analyser)

    return item