def func(templateArguments, instance, location, provider): twitterInstance = self.application.twitter_instances.getInstanceByInstanceKey( instance) if twitterInstance is None: abort(404, "No active search stream found at this address") geocode = geocodeFromCacheById( GeocodeResultAbstract.buildCacheId(provider, location)) assert geocode is not None instanceDescription = getInstanceDescription(twitterInstance) instanceLink = getInstanceLink(twitterInstance) homeLink = getHomeLink(Configuration.PROJECT_NAME) templateArguments.update( { 'home_link': homeLink, 'instance': instance, 'location': location, 'provider': provider, 'instance_link': instanceLink, 'instance_description': instanceDescription, 'place': geocode.display_name_short, 'place_coord': geocode.coordinate, 'startEpoch': twitterInstance.constructed_at, 'server_current_epoch': getEpochMs(), 'max_tweets': Configuration.MAX_CLIENT_LIVE_TWEETS } ) # Client needs to offset to this epoch in case its clock is wrong. if geocode.has_bounding_box: templateArguments.update( {'place_bounding_box': geocode.bounding_box}) if geocode.has_country: templateArguments.update({ 'place_country_link': LocationsPage.link_info.getPageLink( instance, geocode.country.place_id, geocode.country.provider_id) }) templateArguments.update( {'place_country': geocode.country.display_name_short}) if geocode.has_continent: templateArguments.update({ 'place_continent_link': LocationsPage.link_info.getPageLink( instance, geocode.continent.place_id, geocode.continent.provider_id) }) templateArguments.update( {'place_continent': geocode.continent.display_name_short}) return template('location.tpl', templateArguments)
def _constructTweet(instanceId, twitterSession, cacheData, retrieveUserData, userProjection): if cacheData is not None: geocode = geocodeFromCacheById(cacheData['geocode']) item = Tweet.FromCache(cacheData['data'], twitterSession, cacheData['timestamp'], geocode) if retrieveUserData: item.user = readUserFromCache(item.user.id, twitterSession, instanceId, False, userProjection) return item else: return None
def from_cache(self, data): geocoded = data['geocoded_followers_by_location'] for item in geocoded: theId = item['_id'] quantity = item['quantity'] geocodeResult = geocodeFromCacheById(theId) if geocodeResult is None: logger.error('Failed to find geocode data in cache with ID: %s, while processing followers analysis result' % str(theId)) self.onGeocodedFollower(geocodeResult) self.num_geocoded_followers_by_location[geocodeResult] = quantity self.num_followers = data['num_followers'] self.num_non_geocoded_followers = data['num_non_geocoded_followers'] self.num_geocoded_followers = data['num_geocoded_followers']
def func(templateArguments, instance, location, provider): twitterInstance = self.application.twitter_instances.getInstanceByInstanceKey(instance) if twitterInstance is None: abort(404, "No active search stream found at this address") geocode = geocodeFromCacheById(GeocodeResultAbstract.buildCacheId(provider,location)) assert geocode is not None instanceDescription = getInstanceDescription(twitterInstance) instanceLink = getInstanceLink(twitterInstance) homeLink = getHomeLink(Configuration.PROJECT_NAME) templateArguments.update({'home_link' : homeLink, 'instance' : instance, 'location' : location, 'provider' : provider, 'instance_link' : instanceLink, 'instance_description' : instanceDescription, 'place' : geocode.display_name_short, 'place_coord' : geocode.coordinate, 'startEpoch' : twitterInstance.constructed_at, 'server_current_epoch' : getEpochMs(), 'max_tweets' : Configuration.MAX_CLIENT_LIVE_TWEETS}) # Client needs to offset to this epoch in case its clock is wrong. if geocode.has_bounding_box: templateArguments.update({'place_bounding_box' : geocode.bounding_box}) if geocode.has_country: templateArguments.update({'place_country_link' : LocationsPage.link_info.getPageLink(instance, geocode.country.place_id, geocode.country.provider_id)}) templateArguments.update({'place_country' : geocode.country.display_name_short}) if geocode.has_continent: templateArguments.update({'place_continent_link' : LocationsPage.link_info.getPageLink(instance, geocode.continent.place_id, geocode.continent.provider_id)}) templateArguments.update({'place_continent' : geocode.continent.display_name_short}) return template('location.tpl', templateArguments)
def func(templateArguments, instance): twitterInstance = self.application.twitter_instances.getInstanceByInstanceKey( instance) if twitterInstance is None: return dict() baseEpoch = twitterInstance.constructed_at start_epoch = parseInteger(request.GET.start_epoch, default=None) end_epoch = parseInteger(request.GET.end_epoch, default=None) source_place_id = parseInteger(request.GET.source_place_id) source_provider_id = parseInteger(request.GET.source_provider_id) if source_place_id is None: logger.error( 'Invalid place ID specified while providing influence data: %s' % unicode(source_place_id)) return dict() source_cache_id = GeocodeResultAbstract.buildCacheId( source_provider_id, source_place_id) temporalCollection = getTemporalInfluenceCollection(instance) if start_epoch is not None: start_time_id = getTimeIdFromTimestamp( baseEpoch, Configuration.TEMPORAL_STEP, start_epoch) else: start_time_id = None if end_epoch is not None: end_time_id = getTimeIdFromTimestamp( baseEpoch, Configuration.TEMPORAL_STEP, end_epoch) else: end_time_id = None timerMs = getEpochMs() cacheData = getTemporalRange(temporalCollection, start_time_id, end_time_id, source_cache_id, preciseFromBack=True, preciseFromFront=True) logger.info('Took %dms to read temporal range data' % (getEpochMs() - timerMs)) timerMs = getEpochMs() geocodeByPlaceType = dict() totalsByPlaceType = dict() if cacheData is not None: for providerId, providerIdData in cacheData.iteritems(): providerId = int(providerId) for destination, count in providerIdData.iteritems(): split = destination.split('_') placeType = int(split[0]) placeId = int(split[1]) record = [placeId, providerId, None, None, count, None] geocodeByPlaceType.setdefault(placeType, list()).append(record) # Process only the records we are going to display. for placeType, records in geocodeByPlaceType.iteritems(): aux = sorted(records, key=lambda x: x[4], reverse=True) aux = aux[:Configuration. DISPLAY_MAX_NUM_INFLUENCE_RECORDS_PER_PLACE_TYPE] geocodeByPlaceType[placeType] = aux for record in aux: cacheId = GeocodeResultAbstract.buildCacheId( record[1], record[0]) geocode = geocodeFromCacheById(cacheId) record[2] = geocode.display_name record[3] = geocode.coordinate count = record[4] record[5] = geocode.bounding_box totalsByPlaceType[placeType] = totalsByPlaceType.get( placeType, 0) + count def getResultPart(placeType): return { 'geocode_list': geocodeByPlaceType.get(placeType, list()), 'total': totalsByPlaceType.get(placeType, 0) } resultData = dict() resultData['city'] = getResultPart( GeocodeResultAbstract.PlaceTypes.CITY) resultData['country'] = getResultPart( GeocodeResultAbstract.PlaceTypes.COUNTRY) resultData['continent'] = getResultPart( GeocodeResultAbstract.PlaceTypes.CONTINENT) logger.info('Took %dms to build temporal range result data' % (getEpochMs() - timerMs)) return {'json': resultData}
def func(templateArguments, instance): twitterInstance = self.application.twitter_instances.getInstanceByInstanceKey(instance) if twitterInstance is None: return dict() baseEpoch = twitterInstance.constructed_at start_epoch = parseInteger(request.GET.start_epoch, default=None) end_epoch = parseInteger(request.GET.end_epoch, default=None) source_place_id = parseInteger(request.GET.source_place_id) source_provider_id = parseInteger(request.GET.source_provider_id) if source_place_id is None: logger.error('Invalid place ID specified while providing influence data: %s' % unicode(source_place_id)) return dict() source_cache_id = GeocodeResultAbstract.buildCacheId(source_provider_id, source_place_id) temporalCollection = getTemporalInfluenceCollection(instance) if start_epoch is not None: start_time_id = getTimeIdFromTimestamp(baseEpoch, Configuration.TEMPORAL_STEP, start_epoch) else: start_time_id = None if end_epoch is not None: end_time_id = getTimeIdFromTimestamp(baseEpoch, Configuration.TEMPORAL_STEP, end_epoch) else: end_time_id = None timerMs = getEpochMs() cacheData = getTemporalRange(temporalCollection, start_time_id, end_time_id, source_cache_id, preciseFromBack=True, preciseFromFront=True) logger.info('Took %dms to read temporal range data' % (getEpochMs() - timerMs)) timerMs = getEpochMs() geocodeByPlaceType = dict() totalsByPlaceType = dict() if cacheData is not None: for providerId, providerIdData in cacheData.iteritems(): providerId = int(providerId) for destination, count in providerIdData.iteritems(): split = destination.split('_') placeType = int(split[0]) placeId = int(split[1]) record = [placeId, providerId, None, None, count, None] geocodeByPlaceType.setdefault(placeType,list()).append(record) # Process only the records we are going to display. for placeType, records in geocodeByPlaceType.iteritems(): aux = sorted(records, key=lambda x: x[4], reverse=True) aux = aux[:Configuration.DISPLAY_MAX_NUM_INFLUENCE_RECORDS_PER_PLACE_TYPE] geocodeByPlaceType[placeType] = aux for record in aux: cacheId = GeocodeResultAbstract.buildCacheId(record[1], record[0]) geocode = geocodeFromCacheById(cacheId) record[2] = geocode.display_name record[3] = geocode.coordinate count = record[4] record[5] = geocode.bounding_box totalsByPlaceType[placeType] = totalsByPlaceType.get(placeType,0) + count def getResultPart(placeType): return {'geocode_list' : geocodeByPlaceType.get(placeType,list()), 'total' : totalsByPlaceType.get(placeType, 0)} resultData = dict() resultData['city'] = getResultPart(GeocodeResultAbstract.PlaceTypes.CITY) resultData['country'] = getResultPart(GeocodeResultAbstract.PlaceTypes.COUNTRY) resultData['continent'] = getResultPart(GeocodeResultAbstract.PlaceTypes.CONTINENT) logger.info('Took %dms to build temporal range result data' % (getEpochMs() - timerMs)) return {'json' : resultData}
def _constructUser(instanceId, twitterSession, cacheData, recursive=True, userProjection=None, onIterationFunc=None): if cacheData is None: return None geocodePlaceId = cacheData.get('geocode', None) if geocodePlaceId is None: geocode = None else: geocode = geocodeFromCacheById(geocodePlaceId) userId = cacheData.get('_id') geocode_bias = cacheData.get('geocode_bias', None) geocoded_from = cacheData.get('geocoded_from', None) is_followers_loaded = cacheData.get('is_followers_loaded', None) is_followee = cacheData.get('is_followee', None) is_associated_with_tweet = cacheData.get('is_associated_with_tweet', None) last_follower_enrichment_error = cacheData.get( 'last_follower_enrichment_error', None) queued_for_follower_enrichment = cacheData.get( 'queued_for_follower_enrichment', None) known_followees_id = cacheData.get('known_followees', None) user_data = cacheData.get('data', None) num_followers = None if user_data is not None: num_followers = user_data.get('followers_count', None) totalSubUsers = 0 subUserIndex = [0] # First calculate total size. if recursive: followees_count = 0 followers_count = 0 if known_followees_id is not None: followees_count = len(known_followees_id) totalSubUsers += followees_count if is_followers_loaded and num_followers is not None: followers_count = num_followers totalSubUsers += followers_count if followees_count > 0 or followers_count > 0: logger.info( 'User %s has %d followees and %d followers which will be recursed' % (userId, followees_count, followers_count)) # Process followees. if known_followees_id is None: known_followees = None elif not recursive: known_followees = set() for followeeId in known_followees_id: followee = User.Id(followeeId, twitterSession) known_followees.add(followee) else: known_followees = set() followeeProjection = None if userProjection is not None: followeeProjection = userProjection.followee_projection for followeeId in known_followees_id: subUserIndex[0] += 1 followee = readUserFromCache(followeeId, twitterSession, instanceId, recursive=False, userProjection=followeeProjection) if followee is not None: known_followees.add(followee) else: logger.error( 'Followee could not be found in database (followee id: %s)' % followeeId) if onIterationFunc is not None: if not onIterationFunc(subUserIndex[0], totalSubUsers, False, followee, 'followee'): return # Process followers followers = None followerIds = None if is_followers_loaded: followerIds = [] followers = [] followerProjection = None if userProjection is not None: followerProjection = userProjection.follower_projection if recursive and is_followers_loaded: def onFollowerLoadFunc(iteration, total, isFinished, data, iteratorId, subUserIndex=subUserIndex): if data is None: return True assert isinstance(data, User) followerIds.append(data.id) followers.append(data) if onIterationFunc is not None: if subUserIndex[0] < totalSubUsers: subUserIndex[0] += 1 return onIterationFunc(subUserIndex[0], totalSubUsers, isFinished, data, 'follower') else: return True followersCursor = cursorUsersFromCache( instanceId, pageNum=0, pageSize=200, followeeOfRequirement=userId, userProjection=followerProjection, sortByTimestamp=False) assert num_followers is not None processCursor( followersCursor, buildConstructUserFromCacheFunc( twitterSession, instanceId, recursive=False, userProjection=followerProjection, onIterationFunc=onFollowerLoadFunc), onFollowerLoadFunc, num_followers, None) tup = cacheData.get('follower_enrichment_progress', None) if tup is not None: queue_progress, user_progress, user_id_progress, enrichment_progress_description, queue_waiting_for_user = tup if queue_waiting_for_user is not None: queue_waiting_for_user = readUserFromCache( queue_waiting_for_user, twitterSession, instanceId, recursive=False, userProjection=UserProjection.IdName()) tup = (queue_progress, user_progress, user_id_progress, enrichment_progress_description, queue_waiting_for_user) place = None twitter_place_data = cacheData.get('twitter_place', None) if twitter_place_data is not None: place = Place.FromCache(twitter_place_data) item = User.FromCache(cacheData.get('data', None), twitterSession, cacheData.get('timestamp', None), geocode, tup, geocode_bias, geocoded_from, is_followee, is_associated_with_tweet, last_follower_enrichment_error, known_followees, place, queued_for_follower_enrichment) if followers is not None: item.loadFollowers(followerIds, followers) analysis = cacheData.get('analysis', None) if analysis is not None: for analysisSub in analysis: for analysisName, analysisData in analysisSub.iteritems(): analyser = buildAnalyserFromName(analysisName) analyser.from_cache(analysisData) item.addAnalyser(analyser) return item
def _constructUser(instanceId, twitterSession, cacheData, recursive=True, userProjection=None, onIterationFunc=None): if cacheData is None: return None geocodePlaceId = cacheData.get('geocode',None) if geocodePlaceId is None: geocode = None else: geocode = geocodeFromCacheById(geocodePlaceId) userId = cacheData.get('_id') geocode_bias = cacheData.get('geocode_bias',None) geocoded_from = cacheData.get('geocoded_from',None) is_followers_loaded = cacheData.get('is_followers_loaded',None) is_followee = cacheData.get('is_followee',None) is_associated_with_tweet = cacheData.get('is_associated_with_tweet',None) last_follower_enrichment_error = cacheData.get('last_follower_enrichment_error',None) queued_for_follower_enrichment = cacheData.get('queued_for_follower_enrichment',None) known_followees_id = cacheData.get('known_followees',None) user_data = cacheData.get('data',None) num_followers = None if user_data is not None: num_followers = user_data.get('followers_count',None) totalSubUsers = 0 subUserIndex = [0] # First calculate total size. if recursive: followees_count = 0 followers_count = 0 if known_followees_id is not None: followees_count = len(known_followees_id) totalSubUsers += followees_count if is_followers_loaded and num_followers is not None: followers_count = num_followers totalSubUsers += followers_count if followees_count > 0 or followers_count > 0: logger.info('User %s has %d followees and %d followers which will be recursed' % (userId, followees_count, followers_count)) # Process followees. if known_followees_id is None: known_followees = None elif not recursive: known_followees = set() for followeeId in known_followees_id: followee = User.Id(followeeId,twitterSession) known_followees.add(followee) else: known_followees = set() followeeProjection = None if userProjection is not None: followeeProjection = userProjection.followee_projection for followeeId in known_followees_id: subUserIndex[0] += 1 followee = readUserFromCache(followeeId, twitterSession, instanceId, recursive=False, userProjection=followeeProjection) if followee is not None: known_followees.add(followee) else: logger.error('Followee could not be found in database (followee id: %s)' % followeeId) if onIterationFunc is not None: if not onIterationFunc(subUserIndex[0], totalSubUsers, False, followee, 'followee'): return # Process followers followers = None followerIds = None if is_followers_loaded: followerIds = [] followers = [] followerProjection = None if userProjection is not None: followerProjection = userProjection.follower_projection if recursive and is_followers_loaded: def onFollowerLoadFunc(iteration, total, isFinished, data, iteratorId, subUserIndex=subUserIndex): if data is None: return True assert isinstance(data, User) followerIds.append(data.id) followers.append(data) if onIterationFunc is not None: if subUserIndex[0] < totalSubUsers: subUserIndex[0] += 1 return onIterationFunc(subUserIndex[0], totalSubUsers, isFinished, data, 'follower') else: return True followersCursor = cursorUsersFromCache(instanceId, pageNum=0, pageSize=200, followeeOfRequirement=userId, userProjection=followerProjection, sortByTimestamp=False) assert num_followers is not None processCursor(followersCursor, buildConstructUserFromCacheFunc(twitterSession, instanceId, recursive=False, userProjection=followerProjection, onIterationFunc=onFollowerLoadFunc), onFollowerLoadFunc, num_followers, None) tup = cacheData.get('follower_enrichment_progress',None) if tup is not None: queue_progress, user_progress, user_id_progress, enrichment_progress_description, queue_waiting_for_user = tup if queue_waiting_for_user is not None: queue_waiting_for_user = readUserFromCache(queue_waiting_for_user, twitterSession, instanceId, recursive=False, userProjection=UserProjection.IdName()) tup = (queue_progress, user_progress, user_id_progress, enrichment_progress_description, queue_waiting_for_user) place = None twitter_place_data = cacheData.get('twitter_place',None) if twitter_place_data is not None: place = Place.FromCache(twitter_place_data) item = User.FromCache(cacheData.get('data',None), twitterSession, cacheData.get('timestamp',None), geocode, tup, geocode_bias, geocoded_from, is_followee, is_associated_with_tweet, last_follower_enrichment_error, known_followees, place, queued_for_follower_enrichment) if followers is not None: item.loadFollowers(followerIds, followers) analysis = cacheData.get('analysis',None) if analysis is not None: for analysisSub in analysis: for analysisName, analysisData in analysisSub.iteritems(): analyser = buildAnalyserFromName(analysisName) analyser.from_cache(analysisData) item.addAnalyser(analyser) return item