def geocodeFromCacheById(cacheId, inMemoryOnly=None): if inMemoryOnly is None: inMemoryOnly = False if 'importance_rating' in cacheId: importanceRating = cacheId['importance_rating'] cacheId = dict(cacheId) # don't modify what was passed in. del cacheId['importance_rating'] # remove so doesn't conflict with mongoDB query. else: importanceRating = None if isinstance(cacheId, list): success = False for item in cacheId: if isIntendedForDirectUse(GeocodeResultAbstract.getProviderIdFromCacheId(item)): cacheId = item success = True break if not success: logger.error('Could not find useful ID from ID list %s' % (unicode(cacheId))) return None if isIntendedForDirectUse(GeocodeResultAbstract.getProviderIdFromCacheId(cacheId)): tup = GeocodeResultAbstract.buildTupleFromCacheId(cacheId) returnVal = inMemoryCacheGeocodeData.get(tup,None) if returnVal is None: if not inMemoryOnly: db = getDatabase() assert isinstance(db, Database) result = db.place.find_one({'_id' : cacheId}) if result is None: logger.warn('Could not find place cache ID in database: %s' % unicode(cacheId)) return None else: return None returnVal = buildGeocodeResult(result['place_data'], cacheId['providerId'], importanceRating) # Always update, this will move the item to the back of the ordered dict # meaning we have a 'least recently used' cache. if returnVal is not None: inMemoryCacheGeocodeData[tup] = returnVal return returnVal else: geocode = GeocodeResultAbstract.getGnsByPlaceId(GeocodeResultAbstract.getPlaceIdFromCacheId(cacheId)) if geocode is None: logger.error('Failed to retrieve GNS data with cache ID: %s' % unicode(cacheId)) return geocode
def addTemporalEntry(self, temporalCollection, timeId, userProviderId, userPlaceId, followerProviderId, followerPlaceId, followerPlaceType): if self.is_shutdown: return tupleUserCacheId = GeocodeResultAbstract.buildCacheIdTuple(userProviderId, userPlaceId) dictUserCacheId = GeocodeResultAbstract.buildCacheId(userProviderId, userPlaceId) lastTimeId = self.last_temporal_time_id_by_source.get(tupleUserCacheId,None) destination = '%s_%s' % (followerPlaceType, followerPlaceId) addTemporalEntry(temporalCollection, lastTimeId, timeId, dictUserCacheId, destination, followerProviderId) self.last_temporal_time_id_by_source[tupleUserCacheId] = timeId setInstanceTemporalSourceLastTime(self.instance_key, userProviderId, userPlaceId, timeId)
def func(templateArguments): placeName = parseString(request.GET.place_name) data = list() if placeName is None: return {'json' : data} locations = list() maxLocations = 10 locations += GeocodeResultAbstract.searchGnsByName(placeName,3) maxLocations -= len(locations) if maxLocations > 0: newLocations = geocodeSearch(self.provider_id,placeName,maxLocations) maxLocations -= len(newLocations) locations += newLocations for location in locations: assert isinstance(location,GeocodeResultAbstract) data.append((location.cache_id,location.bounding_box,location.coordinate,location.display_name)) return {'json' : data}
def func(templateArguments): placeName = parseString(request.GET.place_name) data = list() if placeName is None: return {'json': data} locations = list() maxLocations = 10 locations += GeocodeResultAbstract.searchGnsByName(placeName, 3) maxLocations -= len(locations) if maxLocations > 0: newLocations = geocodeSearch(self.provider_id, placeName, maxLocations) maxLocations -= len(newLocations) locations += newLocations for location in locations: assert isinstance(location, GeocodeResultAbstract) data.append((location.cache_id, location.bounding_box, location.coordinate, location.display_name)) return {'json': data}
def onInstanceLoadFunc(instanceKey, oauthToken, oauthSecret, geographicSetupString, keywords, instanceSetupCode, startTime, temporalLastTimeId, count=count): temporal = dict() for providerId, value in temporalLastTimeId.iteritems(): providerId = int(providerId) for placeId, timeId in value.iteritems(): placeId = int(placeId) timeId = int(timeId) temporal[GeocodeResultAbstract.buildCacheIdTuple( providerId, placeId)] = timeId logger.debug( 'Loaded instance %s last temporal change source %d/%d -> %d' % (instanceKey, providerId, placeId, timeId)) TwitterInstance( instanceKey, webApplication.twitter_instances, TwitterAuthentication(Configuration.CONSUMER_TOKEN, Configuration.CONSUMER_SECRET, oauthToken, oauthSecret), unicode(geographicSetupString), keywords, instanceSetupCode, startTime, temporal) # can delete, was debugging indexes. if args.rebuild_instance_indexes: logger.info('Dropping indexes of instance %s' % instanceKey) getUserCollection(instanceKey).drop_indexes() getTweetCollection(instanceKey).drop_indexes() count[0] += 1
def func(templateArguments, instance, location, provider): twitterInstance = self.application.twitter_instances.getInstanceByInstanceKey( instance) if twitterInstance is None: abort(404, "No active search stream found at this address") geocode = geocodeFromCacheById( GeocodeResultAbstract.buildCacheId(provider, location)) assert geocode is not None instanceDescription = getInstanceDescription(twitterInstance) instanceLink = getInstanceLink(twitterInstance) homeLink = getHomeLink(Configuration.PROJECT_NAME) templateArguments.update( { 'home_link': homeLink, 'instance': instance, 'location': location, 'provider': provider, 'instance_link': instanceLink, 'instance_description': instanceDescription, 'place': geocode.display_name_short, 'place_coord': geocode.coordinate, 'startEpoch': twitterInstance.constructed_at, 'server_current_epoch': getEpochMs(), 'max_tweets': Configuration.MAX_CLIENT_LIVE_TWEETS } ) # Client needs to offset to this epoch in case its clock is wrong. if geocode.has_bounding_box: templateArguments.update( {'place_bounding_box': geocode.bounding_box}) if geocode.has_country: templateArguments.update({ 'place_country_link': LocationsPage.link_info.getPageLink( instance, geocode.country.place_id, geocode.country.provider_id) }) templateArguments.update( {'place_country': geocode.country.display_name_short}) if geocode.has_continent: templateArguments.update({ 'place_continent_link': LocationsPage.link_info.getPageLink( instance, geocode.continent.place_id, geocode.continent.provider_id) }) templateArguments.update( {'place_continent': geocode.continent.display_name_short}) return template('location.tpl', templateArguments)
def func(templateArguments, instance): dataType = parseString(request.GET.type,['tweet','user']) start_epoch = parseInteger(request.GET.start_epoch) end_epoch = parseInteger(request.GET.end_epoch) page_num = parseInteger(request.GET.page) place_id = parseInteger(request.GET.place_id) provider_id = parseInteger(request.GET.provider_id) projection_type = parseString(request.GET.projection_type) followee = parseInteger(request.GET.followee) cache_id = GeocodeResultAbstract.buildCacheId(provider_id, place_id) if dataType is None: return redirect_problem('type is a required argument') if page_num is None: page_num = 0 data = [] if dataType == 'tweet': tweets = readTweetsFromCache(None, instance, cache_id, start_epoch, end_epoch, page_num, TwitterCachePage.PAGE_SIZE_FULL_DATA) if tweets is not None: for tweet in tweets: assert isinstance(tweet, Tweet) userHtml = UserInformationPage.getPageLinkImage(instance, tweet.user, target='_self') data.append([ tweet.created_at, userHtml, tweet.user.location_text, tweet.text ]) elif dataType == 'user': if len(projection_type) == 0: projection = None pageSize = TwitterCachePage.PAGE_SIZE_FULL_DATA elif projection_type == 'name-only': projection = UserProjection.IdNameImage() pageSize = TwitterCachePage.PAGE_SIZE_ID_NAME_DATA else: return redirect_problem('Unsupported projection type: %s' % projection_type) if followee is None: return redirect_problem('Followee is required') users = readUsersFromCache(None, instance, cache_id, start_epoch, end_epoch, page_num, pageSize, followee, userProjection=projection) if users is not None: for user in users: assert isinstance(user, User) data.append([user.id, user.name, user.profile_image_url, UserInformationPage.link_info.getPageLink(instance, user.id)]) return {'json' : data}
def addTemporalEntryForCurrentUser(follower): timeId = getTimeIdFromTimestamp(startTime, Configuration.TEMPORAL_STEP, getEpochMs()) userCacheIds = user.location_geocode.all_geocode_results_cache_id followerGeocodeResults = follower.location_geocode.all_geocode_results for userCacheId in userCacheIds: userPlaceId = GeocodeResultAbstract.getPlaceIdFromCacheId( userCacheId) userProviderId = GeocodeResultAbstract.getProviderIdFromCacheId( userCacheId) for followerGeocodeResult in followerGeocodeResults: followerPlaceId = followerGeocodeResult.place_id followerProviderId = followerGeocodeResult.provider_id followerPlaceType = followerGeocodeResult.place_type instance.addTemporalEntry(temporalCollection, timeId, userProviderId, userPlaceId, followerProviderId, followerPlaceId, followerPlaceType)
def func(templateArguments, instance, location, provider): twitterInstance = self.application.twitter_instances.getInstanceByInstanceKey(instance) if twitterInstance is None: abort(404, "No active search stream found at this address") geocode = geocodeFromCacheById(GeocodeResultAbstract.buildCacheId(provider,location)) assert geocode is not None instanceDescription = getInstanceDescription(twitterInstance) instanceLink = getInstanceLink(twitterInstance) homeLink = getHomeLink(Configuration.PROJECT_NAME) templateArguments.update({'home_link' : homeLink, 'instance' : instance, 'location' : location, 'provider' : provider, 'instance_link' : instanceLink, 'instance_description' : instanceDescription, 'place' : geocode.display_name_short, 'place_coord' : geocode.coordinate, 'startEpoch' : twitterInstance.constructed_at, 'server_current_epoch' : getEpochMs(), 'max_tweets' : Configuration.MAX_CLIENT_LIVE_TWEETS}) # Client needs to offset to this epoch in case its clock is wrong. if geocode.has_bounding_box: templateArguments.update({'place_bounding_box' : geocode.bounding_box}) if geocode.has_country: templateArguments.update({'place_country_link' : LocationsPage.link_info.getPageLink(instance, geocode.country.place_id, geocode.country.provider_id)}) templateArguments.update({'place_country' : geocode.country.display_name_short}) if geocode.has_continent: templateArguments.update({'place_continent_link' : LocationsPage.link_info.getPageLink(instance, geocode.continent.place_id, geocode.continent.provider_id)}) templateArguments.update({'place_continent' : geocode.continent.display_name_short}) return template('location.tpl', templateArguments)
def onInstanceLoadFunc(instanceKey, oauthToken, oauthSecret, geographicSetupString, keywords, instanceSetupCode, startTime, temporalLastTimeId, count = count): temporal = dict() for providerId, value in temporalLastTimeId.iteritems(): providerId = int(providerId) for placeId, timeId in value.iteritems(): placeId = int(placeId) timeId = int(timeId) temporal[GeocodeResultAbstract.buildCacheIdTuple(providerId, placeId)] = timeId logger.debug('Loaded instance %s last temporal change source %d/%d -> %d' % (instanceKey, providerId, placeId, timeId)) TwitterInstance(instanceKey, webApplication.twitter_instances, TwitterAuthentication(Configuration.CONSUMER_TOKEN, Configuration.CONSUMER_SECRET, oauthToken, oauthSecret), unicode(geographicSetupString), keywords, instanceSetupCode, startTime, temporal, # Critical because it once worked, if it fails when we restarted # then maybe our server lost network connectivity. isCritical = True) # can delete, was debugging indexes. if args.rebuild_instance_indexes: logger.info('Dropping indexes of instance %s' % instanceKey) getUserCollection(instanceKey).drop_indexes() getTweetCollection(instanceKey).drop_indexes() count[0] += 1
def continueExtractItemFromData(self, data, instanceData, signalerKey): location = signalerKey.location providerId = signalerKey.provider tup = GeocodeResultAbstract.buildCacheIdTuple(providerId,location) return set(instanceData.get(tup, dict()).keys())
def extractItemFromData(self, data, signalerKey): if signalerKey.provider_id is not None and signalerKey.place_id is not None: data = data['location_tweets'] locationCacheId = GeocodeResultAbstract.buildCacheIdTuple(signalerKey.provider_id, signalerKey.place_id) else: data = data['instance_tweets'] locationCacheId = None aux = data.get(signalerKey.instance) if aux is None: return None if locationCacheId is not None: aux = aux.get(locationCacheId) if aux is None: return None aux = aux.get('success',None) if aux is not None: assert isinstance(aux, RealtimePerformance) tweetsPerDay, tweetsPerDayUpdated = aux.tweets_per_day.time_period_count_updated(True,True,True) tweetsPerHour, tweetsPerHourUpdated = aux.tweets_per_hour.time_period_count_updated(True,True,True) tweetsPerMinute, tweetsPerMinuteUpdated = aux.tweets_per_minute.time_period_count_updated(True,True,True) else: tweetsPerDayUpdated = tweetsPerHourUpdated = tweetsPerMinuteUpdated = False tweetsPerDay = tweetsPerHour = tweetsPerMinute = None aux = data.get(signalerKey.instance) aux = aux.get('geocode_fail') if aux is not None: assert isinstance(aux, RealtimePerformance) failGeocodeTweetsPerDay, failGeocodeTweetsPerDayUpdated = aux.tweets_per_day.time_period_count_updated(True,True,True) failGeocodeTweetsPerHour, failGeocodeTweetsPerHourUpdated = aux.tweets_per_hour.time_period_count_updated(True,True,True) failGeocodeTweetsPerMinute, failGeocodeTweetsPerMinuteUpdated = aux.tweets_per_minute.time_period_count_updated(True,True,True) else: failGeocodeTweetsPerDayUpdated = failGeocodeTweetsPerHourUpdated = failGeocodeTweetsPerMinuteUpdated = False failGeocodeTweetsPerDay = failGeocodeTweetsPerHour = failGeocodeTweetsPerMinute = None if tweetsPerDayUpdated is tweetsPerHourUpdated is tweetsPerMinuteUpdated is \ failGeocodeTweetsPerDayUpdated is failGeocodeTweetsPerHourUpdated is failGeocodeTweetsPerMinuteUpdated is False: newData = False else: newData = True if tweetsPerMinute is None: tweetsPerSecond = None else: tweetsPerSecond = int(tweetsPerMinute / 60) if tweetsPerHour is None: tweetsPerHour = tweetsPerMinute * 60 if tweetsPerDay is None and tweetsPerHour is not None: tweetsPerDay = tweetsPerHour * 24 if failGeocodeTweetsPerMinute is None: failGeocodeTweetsPerSecond = None else: failGeocodeTweetsPerSecond = int(failGeocodeTweetsPerMinute / 60) if failGeocodeTweetsPerHour is None: failGeocodeTweetsPerHour = failGeocodeTweetsPerMinute * 60 if failGeocodeTweetsPerDay is None and failGeocodeTweetsPerHour is not None: failGeocodeTweetsPerDay = failGeocodeTweetsPerHour * 24 if tweetsPerSecond < 1 and tweetsPerDay > 0: tweetsPerSecond = '< 1' if tweetsPerMinute < 1 and tweetsPerDay > 0: tweetsPerMinute = '< 1' if tweetsPerHour < 1 and tweetsPerDay > 0: tweetsPerHour = '< 1' return newData, json.dumps([[tweetsPerSecond, tweetsPerMinute, tweetsPerHour, tweetsPerDay], [failGeocodeTweetsPerSecond, failGeocodeTweetsPerMinute, failGeocodeTweetsPerHour, failGeocodeTweetsPerDay]])
def func(templateArguments, instance): twitterInstance = self.application.twitter_instances.getInstanceByInstanceKey( instance) if twitterInstance is None: return dict() baseEpoch = twitterInstance.constructed_at start_epoch = parseInteger(request.GET.start_epoch, default=None) end_epoch = parseInteger(request.GET.end_epoch, default=None) source_place_id = parseInteger(request.GET.source_place_id) source_provider_id = parseInteger(request.GET.source_provider_id) if source_place_id is None: logger.error( 'Invalid place ID specified while providing influence data: %s' % unicode(source_place_id)) return dict() source_cache_id = GeocodeResultAbstract.buildCacheId( source_provider_id, source_place_id) temporalCollection = getTemporalInfluenceCollection(instance) if start_epoch is not None: start_time_id = getTimeIdFromTimestamp( baseEpoch, Configuration.TEMPORAL_STEP, start_epoch) else: start_time_id = None if end_epoch is not None: end_time_id = getTimeIdFromTimestamp( baseEpoch, Configuration.TEMPORAL_STEP, end_epoch) else: end_time_id = None timerMs = getEpochMs() cacheData = getTemporalRange(temporalCollection, start_time_id, end_time_id, source_cache_id, preciseFromBack=True, preciseFromFront=True) logger.info('Took %dms to read temporal range data' % (getEpochMs() - timerMs)) timerMs = getEpochMs() geocodeByPlaceType = dict() totalsByPlaceType = dict() if cacheData is not None: for providerId, providerIdData in cacheData.iteritems(): providerId = int(providerId) for destination, count in providerIdData.iteritems(): split = destination.split('_') placeType = int(split[0]) placeId = int(split[1]) record = [placeId, providerId, None, None, count, None] geocodeByPlaceType.setdefault(placeType, list()).append(record) # Process only the records we are going to display. for placeType, records in geocodeByPlaceType.iteritems(): aux = sorted(records, key=lambda x: x[4], reverse=True) aux = aux[:Configuration. DISPLAY_MAX_NUM_INFLUENCE_RECORDS_PER_PLACE_TYPE] geocodeByPlaceType[placeType] = aux for record in aux: cacheId = GeocodeResultAbstract.buildCacheId( record[1], record[0]) geocode = geocodeFromCacheById(cacheId) record[2] = geocode.display_name record[3] = geocode.coordinate count = record[4] record[5] = geocode.bounding_box totalsByPlaceType[placeType] = totalsByPlaceType.get( placeType, 0) + count def getResultPart(placeType): return { 'geocode_list': geocodeByPlaceType.get(placeType, list()), 'total': totalsByPlaceType.get(placeType, 0) } resultData = dict() resultData['city'] = getResultPart( GeocodeResultAbstract.PlaceTypes.CITY) resultData['country'] = getResultPart( GeocodeResultAbstract.PlaceTypes.COUNTRY) resultData['continent'] = getResultPart( GeocodeResultAbstract.PlaceTypes.CONTINENT) logger.info('Took %dms to build temporal range result data' % (getEpochMs() - timerMs)) return {'json': resultData}
def func(templateArguments, instance): dataType = parseString(request.GET.type, ['tweet', 'user']) start_epoch = parseInteger(request.GET.start_epoch) end_epoch = parseInteger(request.GET.end_epoch) page_num = parseInteger(request.GET.page) place_id = parseInteger(request.GET.place_id) provider_id = parseInteger(request.GET.provider_id) projection_type = parseString(request.GET.projection_type) followee = parseInteger(request.GET.followee) cache_id = GeocodeResultAbstract.buildCacheId( provider_id, place_id) if dataType is None: return redirect_problem('type is a required argument') if page_num is None: page_num = 0 data = [] if dataType == 'tweet': tweets = readTweetsFromCache( None, instance, cache_id, start_epoch, end_epoch, page_num, TwitterCachePage.PAGE_SIZE_FULL_DATA) if tweets is not None: for tweet in tweets: assert isinstance(tweet, Tweet) userHtml = UserInformationPage.getPageLinkImage( instance, tweet.user, target='_self') data.append([ tweet.created_at, userHtml, tweet.user.location_text, tweet.text ]) elif dataType == 'user': if len(projection_type) == 0: projection = None pageSize = TwitterCachePage.PAGE_SIZE_FULL_DATA elif projection_type == 'name-only': projection = UserProjection.IdNameImage() pageSize = TwitterCachePage.PAGE_SIZE_ID_NAME_DATA else: return redirect_problem('Unsupported projection type: %s' % projection_type) if followee is None: return redirect_problem('Followee is required') users = readUsersFromCache(None, instance, cache_id, start_epoch, end_epoch, page_num, pageSize, followee, userProjection=projection) if users is not None: for user in users: assert isinstance(user, User) data.append([ user.id, user.name, user.profile_image_url, UserInformationPage.link_info.getPageLink( instance, user.id) ]) return {'json': data}
def manageSocket(self, webSocket, tupleArguments, socketId): instanceId = tupleArguments[0] mainControl = webSocket.controls[self.key] assert isinstance(mainControl, DocumentControl) bytesPerBatch = parseInteger(request.GET.batchSizeBytes, maximum=1024 * 1024 * 256, default=1024 * 1024 * 1) tweetInfo = parseBoolean(request.GET.tweet_info, False) followerInfo = parseBoolean(request.GET.follower_info_full, False) followerInfoShort = parseBoolean(request.GET.follower_info_short, False) providerId = parseInteger(request.GET.provider_id) placeId = parseInteger(request.GET.place_id) startEpoch = parseInteger(request.GET.start_epoch) endEpoch = parseInteger(request.GET.end_epoch) if placeId is not None and providerId is not None: placeCacheId = GeocodeResultAbstract.buildCacheId(providerId, placeId) else: placeCacheId = None if followerInfo: tweetInfo = False followerInfoShort = False elif tweetInfo: followerInfo = False followerInfoShort = False elif followerInfoShort: followerInfo = False tweetInfo = False else: followerInfo = True userTunnelId = 'user_tunnel' tweetTunnelId = None if tweetInfo: tweetTunnelId = 'tweet_tunnel' def openRequiredTunnels(): if tweetInfo: return self.openTunnels(webSocket) else: return self.openTunnel(userTunnelId, webSocket) if not openRequiredTunnels(): logger.error('Failed to open initial tunnels') return False if tweetInfo: followerIdsFlag = False followeeIdsFlag = False analysisFlag = False isFollowersLoadedRequirement = None associatedWithTweetRequirement = True recursiveCacheFlag = False followerIdsProjection = None outputType = 1 # for csv. elif followerInfo: followerIdsFlag = True followeeIdsFlag = True analysisFlag = True isFollowersLoadedRequirement = True associatedWithTweetRequirement = None recursiveCacheFlag = True followerIdsProjection = None # this gives us all data on each follower. outputType = 2 elif followerInfoShort: followerIdsFlag = True followeeIdsFlag = True followerIdsProjection = NoQueryProjection() analysisFlag = True isFollowersLoadedRequirement = True associatedWithTweetRequirement = None recursiveCacheFlag = True outputType = 3 else: raise NotImplementedError() userProjection = UserProjection(True, True, None, True, followerIdsFlag, followerIdsProjection, followeeIdsFlag, UserProjection.Id(), True, False, False, True, True, False, False, False, False, analysisFlag) isFirstIteration = [True] twitterInstance = self.application.twitter_instances.getInstanceByInstanceKey(instanceId) if twitterInstance is None: return False twitterSession = twitterInstance.twitter_thread.twitter_session progressBarTotalId = 'progress-bar-total' progressBarCurrentBatchId = 'progress-bar-current-batch' signaler = EventSignaler(self.key, [webSocket]) updateProgressBarFreq = Timer(400,True) def sendData(tunnelId, data): self.sendDataOnTunnel(webSocket, tunnelId, (unicode(data) + '\r\n')) def sendHeader(): sendData(userTunnelId, getUserHeader(outputType)) if tweetTunnelId is not None: sendData(tweetTunnelId, getTweetHeader()) def doProgressBarChange(percentage, progressBarId): mainControl.executeJavascript('$("#%s").width("%.3f%%");' % (progressBarId, percentage)) sendHeader() counter = [0] previousCounter = [0] def updateSocket(controls, data, bytesCounter=counter, bytesPerBatch=bytesPerBatch, previousCounter=previousCounter, isFirstIteration=isFirstIteration): user = data['user_data'] tweet = data['tweet_data'] percentage = data['percentage'] isFinished = data['isFinished'] control = controls[self.key] assert isinstance(control, DocumentControl) def updateProgressBars(): previousCounter[0] = thisCounter = bytesCounter[0] percentageCurrentBatch = float(thisCounter) / float(bytesPerBatch) * 100 percentageTotal = percentage if percentageTotal >= 100: percentageCurrentBatch = 100 if isFirstIteration[0] and percentageCurrentBatch < percentageTotal: percentageCurrentBatch = percentageTotal doProgressBarChange(percentageTotal, progressBarTotalId) doProgressBarChange(percentageCurrentBatch, progressBarCurrentBatchId) if previousCounter[0] != bytesCounter[0] and updateProgressBarFreq.ticked(): updateProgressBars() dataToSendToClient = '' if user is not None: assert isinstance(user,User) dataToSendToClient = getUserRepresentation(user, outputType) sendData(userTunnelId, dataToSendToClient) if tweet is not None: assert isinstance(tweet, Tweet) dataToSendToClient = getTweetRepresentation(tweet) sendData(tweetTunnelId, dataToSendToClient) dataLength = len(dataToSendToClient) bytesCounter[0] += dataLength if bytesCounter[0] > bytesPerBatch or isFinished: updateProgressBars() isFirstIteration[0] = False bytesCounter[0] = 0 mainControl.executeJavascript('onBatchEnd();') self.closeTunnels(webSocket) if not isFinished: logger.debug('Waiting to receive next data provider') if not openRequiredTunnels(): logger.warning('Failed to reinitialize tunnel slots') webSocket.cleanup() return sendHeader() else: mainControl.executeJavascript('onFinished();') webSocket.cleanup() def onCacheIteration(iteration, total, isFinished, data, iteratorId): # Don't write followee data to output as it would duplicate alot of data. if iteratorId == 'followee': data = None running = not webSocket.is_cleaned_up if running: # We need to do this so that if the client closes the socket we are notified. webSocket.pingFreqLimited() percentage = getPercentage(iteration, total) dataId = None if data is not None: dataId = data.id #logger.info('iteration %.2f of %.2f (%.1f%%) - it: %s, userId: %s' % (iteration, total, percentage,iteratorId,dataId)) user = None tweet = None if data is None: pass elif isinstance(data, User): user = data elif isinstance(data, Tweet): tweet = data if tweet.has_user: user = tweet.user else: logger.error('Invalid data from cache, type: %s' % type(data)) return running signaler.signalEvent({SignalActions.SOCKET: updateSocket, 'percentage' : percentage, 'user_data' : user, 'tweet_data' : tweet, 'isFinished' : isFinished}) gevent.sleep(0) else: logger.debug('Ending cache download prematurely') return running logger.debug('Starting to read data from cache...') # This makes sure the search is finite. epochNow = getEpochMs() if endEpoch is None or endEpoch > epochNow: endEpoch = epochNow if followerInfo or followerInfoShort: readUsersFromCache(twitterSession, instanceId, placeId = placeCacheId, epochMsStartRange=startEpoch, epochMsEndRange=endEpoch, isFollowersLoadedRequirement=isFollowersLoadedRequirement, associatedWithTweetRequirement=associatedWithTweetRequirement, onIterationFunc=onCacheIteration, recursive=recursiveCacheFlag, userProjection=userProjection) else: readTweetsFromCache(twitterSession, instanceId, placeId = placeCacheId, epochMsStartRange=startEpoch, epochMsEndRange=endEpoch, onIterationFunc=onCacheIteration, retrieveUserData=True, userProjection=userProjection) # We want to cleanup everything now since we are done. return False
def func(templateArguments, instance): twitterInstance = self.application.twitter_instances.getInstanceByInstanceKey(instance) if twitterInstance is None: return dict() baseEpoch = twitterInstance.constructed_at start_epoch = parseInteger(request.GET.start_epoch, default=None) end_epoch = parseInteger(request.GET.end_epoch, default=None) source_place_id = parseInteger(request.GET.source_place_id) source_provider_id = parseInteger(request.GET.source_provider_id) if source_place_id is None: logger.error('Invalid place ID specified while providing influence data: %s' % unicode(source_place_id)) return dict() source_cache_id = GeocodeResultAbstract.buildCacheId(source_provider_id, source_place_id) temporalCollection = getTemporalInfluenceCollection(instance) if start_epoch is not None: start_time_id = getTimeIdFromTimestamp(baseEpoch, Configuration.TEMPORAL_STEP, start_epoch) else: start_time_id = None if end_epoch is not None: end_time_id = getTimeIdFromTimestamp(baseEpoch, Configuration.TEMPORAL_STEP, end_epoch) else: end_time_id = None timerMs = getEpochMs() cacheData = getTemporalRange(temporalCollection, start_time_id, end_time_id, source_cache_id, preciseFromBack=True, preciseFromFront=True) logger.info('Took %dms to read temporal range data' % (getEpochMs() - timerMs)) timerMs = getEpochMs() geocodeByPlaceType = dict() totalsByPlaceType = dict() if cacheData is not None: for providerId, providerIdData in cacheData.iteritems(): providerId = int(providerId) for destination, count in providerIdData.iteritems(): split = destination.split('_') placeType = int(split[0]) placeId = int(split[1]) record = [placeId, providerId, None, None, count, None] geocodeByPlaceType.setdefault(placeType,list()).append(record) # Process only the records we are going to display. for placeType, records in geocodeByPlaceType.iteritems(): aux = sorted(records, key=lambda x: x[4], reverse=True) aux = aux[:Configuration.DISPLAY_MAX_NUM_INFLUENCE_RECORDS_PER_PLACE_TYPE] geocodeByPlaceType[placeType] = aux for record in aux: cacheId = GeocodeResultAbstract.buildCacheId(record[1], record[0]) geocode = geocodeFromCacheById(cacheId) record[2] = geocode.display_name record[3] = geocode.coordinate count = record[4] record[5] = geocode.bounding_box totalsByPlaceType[placeType] = totalsByPlaceType.get(placeType,0) + count def getResultPart(placeType): return {'geocode_list' : geocodeByPlaceType.get(placeType,list()), 'total' : totalsByPlaceType.get(placeType, 0)} resultData = dict() resultData['city'] = getResultPart(GeocodeResultAbstract.PlaceTypes.CITY) resultData['country'] = getResultPart(GeocodeResultAbstract.PlaceTypes.COUNTRY) resultData['continent'] = getResultPart(GeocodeResultAbstract.PlaceTypes.CONTINENT) logger.info('Took %dms to build temporal range result data' % (getEpochMs() - timerMs)) return {'json' : resultData}
getInstanceCodeCollection().drop() if Configuration.PROXIES_ENABLED: httpProxy = Configuration.PROXIES.get('http',None) httpsProxy = Configuration.PROXIES.get('https',None) # Requests API will use these environment variables. if httpProxy is not None: os.environ['HTTP_PROXY'] = Configuration.PROXIES['http'] if httpsProxy is not None: os.environ['HTTPS_PROXY'] = Configuration.PROXIES['https'] bottle.debug(Configuration.BOTTLE_DEBUG) GeocodeResultAbstract.initializeCountryContinentDataFromCsv() dataCollection = DataCollection() webApplication = WebApplicationTwitter(None, Configuration.MAX_INSTANCE_INACTIVE_TIME_MS, dataCollection) landingPage = LandingPage(webApplication) oauthSignIn = OAuthSignIn(webApplication, Configuration.CONSUMER_TOKEN, Configuration.CONSUMER_SECRET) oauthCallback = OAuthCallback(webApplication, Configuration.CONSUMER_TOKEN, Configuration.CONSUMER_SECRET, GateInstance.link_info.getPageLink()) mapWebSocketGroup = LocationMapWsg(application=webApplication,locations=dataCollection.tweets_by_location) tweetsByLocationWebSocketGroup = TweetsByLocationWsg(application=webApplication,signaler=dataCollection.tweets_by_location) userInformationWebSocketGroup = UserWsg(application=webApplication, dataCollection=dataCollection, signaler=dataCollection.all_users) instanceGate = GateInstance(webApplication) startInstance = StartInstancePost(webApplication, Configuration.CONSUMER_TOKEN, Configuration.CONSUMER_SECRET, None)