def __init__(self, geocodeUserConfig, inputQueue=None, outputQueue=None, dataCollection=None, userAnalysisList=None): super(FollowerExtractorGateThread, self).__init__(self.__class__.__name__, criticalThread=True) if inputQueue is None: inputQueue = QueueEx() if outputQueue is None: outputQueue = QueueEx() assert dataCollection is not None assert isinstance(dataCollection, DataCollection) assert isinstance(geocodeUserConfig, UserGeocodeConfig) self.input_queue = inputQueue self.output_queue = outputQueue self.geocode_user_config = geocodeUserConfig # We use the data collection to check for users which already have followers. self.data_collection = dataCollection self.user_analysis_list = userAnalysisList self.num_dropped = 0 self.num_processed = 0 self.log_num_dropped_timer = Timer( Configuration.LOG_DROP_AMOUNT_FREQ_MS, False)
def __init__(self, webSocket, onRegisteredFunc=None): super(WebSocket, self).__init__(processSignalFunc=self.onUpdate, onRegisteredFunc=onRegisteredFunc) assert webSocket is not None self.web_socket = webSocket self.is_cleaned_up = False self.controls = dict() self.pingTimer = Timer(4000, False) self.cleanup_funcs = []
def getCursorSizeSlow(cursor): timer = Timer() if cursor is None: return 0 logger.info('Retrieving cursor size...') success = False attempt = 1 maxAttempts = 5 sizeOfCursor = 0 while not success: try: sizeOfCursor = cursor.count(True) success = True except AutoReconnect as e: if attempt <= maxAttempts: logger.error( 'Failed to retrieve cursor size, AutoReconnect exception: %s (%d of %d attempts), errors: %s' % (e.message, attempt, maxAttempts, unicode(e.errors))) attempt += 1 cursor.rewind() else: raise logger.info('Successfully retrieved size of cursor: %d in %dms' % (sizeOfCursor, timer.time_since_constructed)) return sizeOfCursor
def __init__(self, geocodeUserConfig, outputQueue=None, twitterSession=None, onTerminateFunc=None, userAnalysisList=None): super(FollowerExtractorThread, self).__init__(self.__class__.__name__ + "_" + str(getUniqueId()), onTerminateFunc, criticalThread=False) if outputQueue is None: outputQueue = QueueEx() if userAnalysisList is None: userAnalysisList = list() assert isinstance(twitterSession, TwitterSession) assert isinstance(geocodeUserConfig, UserGeocodeConfig) def continueRunningCheck(): return twitterSession.is_session_active def notifyPositionFunc(item, position, lastPoppedItem): user = getUser(item) if user is None: return assert isinstance(user, User) user.follower_enrichment_progress.onQueuePositionChange( user, position, lastPoppedItem) self.output_queue.put(user) self.input_queue = QueueNotify(continueRunningCheck, 2, notifyPositionFunc) self.output_queue = outputQueue self.twitter_session = twitterSession self.user_analysis_list = userAnalysisList self.geocode_user_config = geocodeUserConfig self.num_followers_processed = 0 self.num_followers_geocoded = 0 self.num_followees_processed = 0 self.log_performance_timer = Timer(60000, False)
def __init__(self, feed, outputQueue=None, initialData=None): super(TwitterThread, self).__init__(self.__class__.__name__ + "_" + str(getUniqueId()), criticalThread=False) if feed is None: feed = TwitterFeed([], [], [], DummyIterable(), None) if outputQueue is None: outputQueue = QueueEx() assert isinstance(feed, TwitterFeed) assert isinstance(feed.twitter_session, TwitterSession) self.input_queue = feed self.twitter_session = feed.twitter_session self.twitter_feed = feed self.output_queue = outputQueue if initialData is not None: for item in initialData: item = copy.deepcopy(item) user = getUser(item) assert isinstance(user, User) logger.info('Retrieved tweet/user from file: %s' % item) item.setTwitterSession(self.twitter_session) self.output_queue.put(item) self.num_dropped = 0 self.num_processed = 0 self.num_twitter_geocoded_place = 0 self.num_twitter_geocoded_coordinate = 0 self.num_twitter_geocoded_both = 0 self.num_not_twitter_geocoded = 0 self.num_no_location = 0 self.num_geocodeable = 0 self.log_num_dropped_timer = Timer( Configuration.LOG_DROP_AMOUNT_FREQ_MS, False)
def __init__(self, geocodeConfig, inputQueue=None, successOutputQueue=None, primaryFailureOutputQueue=None, highLoadFailureOutputQueue=None, inMemoryOnly=None): if inMemoryOnly: inMemoryOnlyStr = '_MEMORY_ONLY' else: inMemoryOnlyStr = '' super(GeocodeFromCacheThread, self).__init__( '%s%s' % (self.__class__.__name__, inMemoryOnlyStr), criticalThread=True) assert isinstance(geocodeConfig, UserGeocodeConfig) if inputQueue is None: inputQueue = QueueEx() if successOutputQueue is None: successOutputQueue = QueueEx() if primaryFailureOutputQueue is None: primaryFailureOutputQueue = QueueEx() self.input_queue = inputQueue self.success_output_queue = successOutputQueue self.primary_failure_output_queue = primaryFailureOutputQueue self.high_load_failure_output_queue = highLoadFailureOutputQueue self.geocode_config = geocodeConfig self.num_dropped_from_success = 0 self.num_dropped_from_primary_failure = 0 self.num_failed_over = 0 self.log_timer = Timer(Configuration.LOG_DROP_AMOUNT_FREQ_MS, False) self.num_processed = 0 self.in_memory_only = inMemoryOnly self.sleep_time = float( Configuration.GEOCODE_FROM_CACHE_THREAD_WAIT_TIME_MS) / 1000.0
def __init__(self, webSocket, onRegisteredFunc=None): super(WebSocket, self).__init__(processSignalFunc=self.onUpdate, onRegisteredFunc=onRegisteredFunc) assert webSocket is not None self.web_socket = webSocket self.is_cleaned_up = False self.controls = dict() self.pingTimer = Timer(4000,False) self.cleanup_funcs = []
def func(templateArguments, *args, **kwargs): if self.on_display_usage_func is not None: displayUsageFuncCallTimer = Timer(1000, True) def theDisplayUsageFunc(): if displayUsageFuncCallTimer.ticked(): self.on_display_usage_func( self, packArguments(*args, **kwargs)) onDisplayUsageFunc = theDisplayUsageFunc else: onDisplayUsageFunc = None # > 1 for buffering, ensure we are always sending not send -> wait for database -> send. worker = AsyncWorker(2, onDisplayUsageFunc) tunnelId = kwargs['tunnel_id'] socketId = kwargs['socket_id'] self.setResponseHeaders(tunnelId) logger.info('Tunnel %s on socket %s has started opening' % (tunnelId, socketId)) socket = self.sockets.get(socketId, None) if socket is None: logger.error( 'Bulk download attempted but no matching socket with ID: %d found' % socketId) worker.on_finish() return worker.queue tunnelEvent = socket.tunnel_events.get(tunnelId, None) if tunnelEvent is None: logger.error('Invalid tunnel ID received: %s' % tunnelId) worker.on_finish() return worker.queue if tunnelEvent.is_set(): logger.error( 'Attempted to assign two bulk download providers to one socket with ID: %d, and tunnel ID: %s' % (socketId, tunnelId)) worker.on_finish() return worker.queue socket.tunnels[tunnelId] = worker tunnelEvent.set() return worker.queue
class GeocodeFromCacheThread(BaseThread): def __init__(self, geocodeConfig, inputQueue=None, successOutputQueue=None, primaryFailureOutputQueue=None, highLoadFailureOutputQueue=None, inMemoryOnly=None): if inMemoryOnly: inMemoryOnlyStr = '_MEMORY_ONLY' else: inMemoryOnlyStr = '' super(GeocodeFromCacheThread, self).__init__( '%s%s' % (self.__class__.__name__, inMemoryOnlyStr), criticalThread=True) assert isinstance(geocodeConfig, UserGeocodeConfig) if inputQueue is None: inputQueue = QueueEx() if successOutputQueue is None: successOutputQueue = QueueEx() if primaryFailureOutputQueue is None: primaryFailureOutputQueue = QueueEx() self.input_queue = inputQueue self.success_output_queue = successOutputQueue self.primary_failure_output_queue = primaryFailureOutputQueue self.high_load_failure_output_queue = highLoadFailureOutputQueue self.geocode_config = geocodeConfig self.num_dropped_from_success = 0 self.num_dropped_from_primary_failure = 0 self.num_failed_over = 0 self.log_timer = Timer(Configuration.LOG_DROP_AMOUNT_FREQ_MS, False) self.num_processed = 0 self.in_memory_only = inMemoryOnly self.sleep_time = float( Configuration.GEOCODE_FROM_CACHE_THREAD_WAIT_TIME_MS) / 1000.0 def _run(self): for item in self.input_queue: if not self.in_memory_only: time.sleep(self.sleep_time) user = getUser(item) assert user is not None user.clearGeocode( True) # in case previously geocoded by in memory. if user.is_geocoded: success = True else: success = user.geocodeLocationFromCache( self.geocode_config, self.in_memory_only) if self.log_timer.ticked(): numProcessed = self.num_processed numDroppedFromSuccess = self.num_dropped_from_success numDroppedFromPrimaryFailure = self.num_dropped_from_primary_failure numFailedOver = self.num_failed_over total = numProcessed + numDroppedFromSuccess + numDroppedFromPrimaryFailure + numFailedOver if total == 0: percentageDroppedFromSuccess = 0 percentageDroppedFromPrimaryFailure = 0 percentageFailedOver = 0 percentageSuccess = 0 else: percentageDroppedFromSuccess = float( numDroppedFromSuccess) / float(total) * 100.0 percentageDroppedFromPrimaryFailure = float( numDroppedFromPrimaryFailure) / float(total) * 100.0 percentageFailedOver = float(numFailedOver) / float( total) * 100.0 percentageSuccess = float(numProcessed) / float( total) * 100.0 outputQueueSize = self.success_output_queue.qsize() failOverOutputQueueSize = self.primary_failure_output_queue.qsize( ) geocodeDataInMemoryCacheSize = getGeocodeDataInMemoryCacheSize( ) geocodeQueryInMemoryCacheSize = getGeocodeQueryInMemoryCacheSize( ) self.num_dropped_from_success = 0 self.num_dropped_from_primary_failure = 0 self.num_processed = 0 self.num_failed_over = 0 # FEGQ = follower extractor gate queue logger.info( 'Geocoded %d items (%.2f%%), failed over %d items (%.2f%%), dropped successful geocode items %d items (%.2f%%), dropped failed geocode items %d items (%.2f%%) - success output queue size: %d, fail over output queue size: %d - geocode cache size: %d, place cache size %d' % (numProcessed, percentageSuccess, numFailedOver, percentageFailedOver, numDroppedFromSuccess, percentageDroppedFromSuccess, numDroppedFromPrimaryFailure, percentageDroppedFromPrimaryFailure, outputQueueSize, failOverOutputQueueSize, geocodeDataInMemoryCacheSize, geocodeQueryInMemoryCacheSize)) if success: if self.success_output_queue.qsize( ) < Configuration.ANALYSIS_INPUT_THREAD_SIZE_CAP: self.num_processed += 1 self.success_output_queue.put(item) else: self.num_dropped_from_success += 1 else: # Make sure the queue doesn't get too full, we only geocode once a second. # We don't deal with those that followers because that would take too long. if self.primary_failure_output_queue.qsize( ) <= Configuration.GEOCODE_FROM_CACHE_PRIMARY_FAILURE_OUTPUT_QUEUE_SIZE and ( user.has_location or user.has_twitter_place): self.num_failed_over += 1 self.primary_failure_output_queue.put(item) elif self.high_load_failure_output_queue is not None: self.num_dropped_from_primary_failure += 1 self.high_load_failure_output_queue.put(item)
import unittest import requests from api.config import Configuration, GE_MAP_QUEST, GE_GOOGLE from api.core.utility import Timer from api.geocode.geocode_shared import GeocodeResult, GeocodeResultGoogle, BadGeocodeException import logging import itertools logger = logging.getLogger(__name__) __author__ = 'Michael Pryor' # 1 every two seconds. # Confirmed with open map quest that 1 per second is okay, but set to every two seconds to be nice. geocode_from_external_timer_omq = Timer.rate_limited(60,120*1000) # 2500 requests per day (24 hours). # This works out as once every 35 seconds. geocode_from_external_timer_google = Timer.rate_limited(2500,24*60*60*1000) def _geocodeFromExternalOMQ(query, countryCode=None, acceptableTypes=None): """ Uses open map quest to do a location search, e.g. if query is London then information about London city will be returned. Note this method restricts itself to 1 call per second.""" if query is None: return None geocode_from_external_timer_omq.waitForTick() try: url = "http://open.mapquestapi.com/nominatim/v1/search"
def processCursor(cursor, constructObjectFunc, onIterationFunc=None, cursorSize=None, getCurrentIterationFunc=None): try: if cursor is None: return None timer = Timer() results = [] if getCurrentIterationFunc is None: currentIterationCounter = [0] def getIterationFunc( obj, currentIterationCounter=currentIterationCounter): currentIterationCounter[0] += 1 return currentIterationCounter[0] getCurrentIterationFunc = getIterationFunc brokeOut = False iteration = 0 cursorIterationOffset = 0 endIteration = cursorSize isIterationBoundsInitialised = False if onIterationFunc is not None: onIterationFunc(cursorIterationOffset, endIteration, False, None, 'base') for item in cursor: currentObject = constructObjectFunc(item) if onIterationFunc is not None: iteration = getCurrentIterationFunc(currentObject) if iteration is None: continue if cursorSize is not None: if not isIterationBoundsInitialised: cursorIterationOffset = iteration - 1 # Iterations don't have to be 0 indexed. endIteration = cursorSize - cursorIterationOffset isIterationBoundsInitialised = True if isIterationBoundsInitialised: iteration -= cursorIterationOffset #logger.info('S: %d, M: %d, E: %d' % (0, iteration, endIteration)) #assert 0 <= iteration <= (endIteration + 5) result = onIterationFunc(iteration, endIteration, False, currentObject, 'base') if result is False: brokeOut = True break else: # Don't return in results if we have an iteration func. # This is important in case we are processing millions of rows # (more than we can fit in memory). if currentObject is not None: results.append(currentObject) # Signal that we're finished. if onIterationFunc is not None: if not brokeOut: iteration = endIteration onIterationFunc(iteration, endIteration, True, None, 'base') timeTaken2 = timer.time_since_constructed logger.info('Successfully processed cursor in %dms' % timeTaken2) timeTaken = timer.time_since_constructed logger.info('Successfully read %d items from cache (%d) in %dms' % (len(results), timer.__hash__(), timeTaken)) if len(results) == 0: return None else: return results finally: if cursor is not None: cursor.close()
def cursorItemsFromCache(instanceId, getCollectionFunc, placeId=None, epochMsStartRange=None, epochMsEndRange=None, pageNum=None, pageSize=None, typeSpecificQuery=None, projection=None, sortByTimestamp=None, typeSpecificHint=None): if sortByTimestamp is None: sortByTimestamp = True epochMsStartRange, epochMsEndRange = fixEpochMsRange(epochMsStartRange, epochMsEndRange) if epochMsEndRange is None: upperBoundTimestamp = getEpochMs() else: upperBoundTimestamp = epochMsEndRange if projection is not None and projection.do_query is False: return None assert instanceId is not None assert getCollectionFunc is not None collection = getCollectionFunc(instanceId) logFormatting = 'IN:%s, P:%s, ES:%s, EE:%s, PN:%s, PS:%s, T:%s, P:%s' % (instanceId, placeId, epochMsStartRange, epochMsEndRange, pageNum, pageSize, typeSpecificQuery, projection) timer = Timer() logger.info('Attempting to read items from cache (%d) -- %s' % (timer.__hash__(),logFormatting)) findDic = dict() timestampDic = None if epochMsEndRange is not None: if timestampDic is None: timestampDic = dict() timestampDic.update({'$lt' : epochMsEndRange}) if epochMsStartRange is not None: if timestampDic is None: timestampDic = dict() timestampDic.update({'$gte' : epochMsStartRange}) if timestampDic is not None: findDic.update({'timestamp' : timestampDic}) if placeId is not None: findDic.update({'geocode.placeId' : placeId['placeId'], 'geocode.providerId' : placeId['providerId']}) # MongoDB sometimes gets it wrong, particularly with geocode.placeId. if typeSpecificHint is None: if timestampDic is not None: if placeId is not None: hint = [('geocode.placeId', pymongo.ASCENDING), ('timestamp', pymongo.ASCENDING)] else: hint = [('timestamp', pymongo.ASCENDING)] else: if placeId is not None: hint = [('geocode.placeId', pymongo.ASCENDING)] else: hint = None else: hint = typeSpecificHint if typeSpecificQuery is not None: findDic.update(typeSpecificQuery) if projection is None: cursor = collection.find(findDic,timeout=False).hint(hint) else: cursor = collection.find(findDic, projection.projection,timeout=False).hint(hint) if sortByTimestamp: cursor = cursor.sort([('timestamp', pymongo.ASCENDING)]) if pageSize is not None and pageNum is not None: cursor = cursor.skip(pageSize*pageNum).limit(pageSize) # We use this to calculate progress through the cursor, # It is more efficient than using cursor.count. cursor.upper_bound_timestamp = upperBoundTimestamp timeTaken = timer.time_since_constructed logger.info('Successfully setup cursor in %dms -- %s' % (timeTaken,logFormatting)) if Configuration.MONGO_EXPLAINS_ENABLED: logger.critical('Tweet/User Explain: %s' % unicode(cursor.explain())) return cursor
except KeyboardInterrupt: pass print 'Finished!' sys.exit(0) if args.show_database_storage_usage: f = open('db_results.txt', 'w') sys.stdout = f theStep = 1000 * 60 * 15 print 'Running in show database storage mode, update every %dms' % theStep print f.flush() updateTimer = Timer(theStep,True) try: while True: updateTimer.waitForTick() collections = getCollections() print 'The time: %s'% unicode(datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')) print 'Available collections: %s' % unicode(collections) print print print 'Database statistics: %s' % unicode(getDatabase().command({'dbStats' : 1})) for collection in collections: print '%*s collection statistics: %s' % (20, collection, unicode(getDatabase().command({'collStats' : collection}))) print
def manageSocket(self, webSocket, tupleArguments, socketId): instanceId = tupleArguments[0] mainControl = webSocket.controls[self.key] assert isinstance(mainControl, DocumentControl) bytesPerBatch = parseInteger(request.GET.batchSizeBytes, maximum=1024 * 1024 * 256, default=1024 * 1024 * 1) tweetInfo = parseBoolean(request.GET.tweet_info, False) followerInfo = parseBoolean(request.GET.follower_info_full, False) followerInfoShort = parseBoolean(request.GET.follower_info_short, False) providerId = parseInteger(request.GET.provider_id) placeId = parseInteger(request.GET.place_id) startEpoch = parseInteger(request.GET.start_epoch) endEpoch = parseInteger(request.GET.end_epoch) if placeId is not None and providerId is not None: placeCacheId = GeocodeResultAbstract.buildCacheId(providerId, placeId) else: placeCacheId = None if followerInfo: tweetInfo = False followerInfoShort = False elif tweetInfo: followerInfo = False followerInfoShort = False elif followerInfoShort: followerInfo = False tweetInfo = False else: followerInfo = True userTunnelId = 'user_tunnel' tweetTunnelId = None if tweetInfo: tweetTunnelId = 'tweet_tunnel' def openRequiredTunnels(): if tweetInfo: return self.openTunnels(webSocket) else: return self.openTunnel(userTunnelId, webSocket) if not openRequiredTunnels(): logger.error('Failed to open initial tunnels') return False if tweetInfo: followerIdsFlag = False followeeIdsFlag = False analysisFlag = False isFollowersLoadedRequirement = None associatedWithTweetRequirement = True recursiveCacheFlag = False followerIdsProjection = None outputType = 1 # for csv. elif followerInfo: followerIdsFlag = True followeeIdsFlag = True analysisFlag = True isFollowersLoadedRequirement = True associatedWithTweetRequirement = None recursiveCacheFlag = True followerIdsProjection = None # this gives us all data on each follower. outputType = 2 elif followerInfoShort: followerIdsFlag = True followeeIdsFlag = True followerIdsProjection = NoQueryProjection() analysisFlag = True isFollowersLoadedRequirement = True associatedWithTweetRequirement = None recursiveCacheFlag = True outputType = 3 else: raise NotImplementedError() userProjection = UserProjection(True, True, None, True, followerIdsFlag, followerIdsProjection, followeeIdsFlag, UserProjection.Id(), True, False, False, True, True, False, False, False, False, analysisFlag) isFirstIteration = [True] twitterInstance = self.application.twitter_instances.getInstanceByInstanceKey(instanceId) if twitterInstance is None: return False twitterSession = twitterInstance.twitter_thread.twitter_session progressBarTotalId = 'progress-bar-total' progressBarCurrentBatchId = 'progress-bar-current-batch' signaler = EventSignaler(self.key, [webSocket]) updateProgressBarFreq = Timer(400,True) def sendData(tunnelId, data): self.sendDataOnTunnel(webSocket, tunnelId, (unicode(data) + '\r\n')) def sendHeader(): sendData(userTunnelId, getUserHeader(outputType)) if tweetTunnelId is not None: sendData(tweetTunnelId, getTweetHeader()) def doProgressBarChange(percentage, progressBarId): mainControl.executeJavascript('$("#%s").width("%.3f%%");' % (progressBarId, percentage)) sendHeader() counter = [0] previousCounter = [0] def updateSocket(controls, data, bytesCounter=counter, bytesPerBatch=bytesPerBatch, previousCounter=previousCounter, isFirstIteration=isFirstIteration): user = data['user_data'] tweet = data['tweet_data'] percentage = data['percentage'] isFinished = data['isFinished'] control = controls[self.key] assert isinstance(control, DocumentControl) def updateProgressBars(): previousCounter[0] = thisCounter = bytesCounter[0] percentageCurrentBatch = float(thisCounter) / float(bytesPerBatch) * 100 percentageTotal = percentage if percentageTotal >= 100: percentageCurrentBatch = 100 if isFirstIteration[0] and percentageCurrentBatch < percentageTotal: percentageCurrentBatch = percentageTotal doProgressBarChange(percentageTotal, progressBarTotalId) doProgressBarChange(percentageCurrentBatch, progressBarCurrentBatchId) if previousCounter[0] != bytesCounter[0] and updateProgressBarFreq.ticked(): updateProgressBars() dataToSendToClient = '' if user is not None: assert isinstance(user,User) dataToSendToClient = getUserRepresentation(user, outputType) sendData(userTunnelId, dataToSendToClient) if tweet is not None: assert isinstance(tweet, Tweet) dataToSendToClient = getTweetRepresentation(tweet) sendData(tweetTunnelId, dataToSendToClient) dataLength = len(dataToSendToClient) bytesCounter[0] += dataLength if bytesCounter[0] > bytesPerBatch or isFinished: updateProgressBars() isFirstIteration[0] = False bytesCounter[0] = 0 mainControl.executeJavascript('onBatchEnd();') self.closeTunnels(webSocket) if not isFinished: logger.debug('Waiting to receive next data provider') if not openRequiredTunnels(): logger.warning('Failed to reinitialize tunnel slots') webSocket.cleanup() return sendHeader() else: mainControl.executeJavascript('onFinished();') webSocket.cleanup() def onCacheIteration(iteration, total, isFinished, data, iteratorId): # Don't write followee data to output as it would duplicate alot of data. if iteratorId == 'followee': data = None running = not webSocket.is_cleaned_up if running: # We need to do this so that if the client closes the socket we are notified. webSocket.pingFreqLimited() percentage = getPercentage(iteration, total) dataId = None if data is not None: dataId = data.id #logger.info('iteration %.2f of %.2f (%.1f%%) - it: %s, userId: %s' % (iteration, total, percentage,iteratorId,dataId)) user = None tweet = None if data is None: pass elif isinstance(data, User): user = data elif isinstance(data, Tweet): tweet = data if tweet.has_user: user = tweet.user else: logger.error('Invalid data from cache, type: %s' % type(data)) return running signaler.signalEvent({SignalActions.SOCKET: updateSocket, 'percentage' : percentage, 'user_data' : user, 'tweet_data' : tweet, 'isFinished' : isFinished}) gevent.sleep(0) else: logger.debug('Ending cache download prematurely') return running logger.debug('Starting to read data from cache...') # This makes sure the search is finite. epochNow = getEpochMs() if endEpoch is None or endEpoch > epochNow: endEpoch = epochNow if followerInfo or followerInfoShort: readUsersFromCache(twitterSession, instanceId, placeId = placeCacheId, epochMsStartRange=startEpoch, epochMsEndRange=endEpoch, isFollowersLoadedRequirement=isFollowersLoadedRequirement, associatedWithTweetRequirement=associatedWithTweetRequirement, onIterationFunc=onCacheIteration, recursive=recursiveCacheFlag, userProjection=userProjection) else: readTweetsFromCache(twitterSession, instanceId, placeId = placeCacheId, epochMsStartRange=startEpoch, epochMsEndRange=endEpoch, onIterationFunc=onCacheIteration, retrieveUserData=True, userProjection=userProjection) # We want to cleanup everything now since we are done. return False
class WebSocket(EventHandler): """ Base class for all web socket interactions. """ class OP: """ Contains all operation codes, indicating what message is for. These codes are passed to the client directly via templating, so no need to modify elsewhere. """ ADD_MARKER = 1 ADD_LINE = 2 REMOVE_ITEM = 3 ADD_ROW = 4 UPDATE_ROW = 6 SET_HEADER = 7 SET_ELEMENT_INNER_HTML = 8 EXECUTE_JAVASCRIPT = 9 PING = 0 def __init__(self, webSocket, onRegisteredFunc=None): super(WebSocket, self).__init__(processSignalFunc=self.onUpdate, onRegisteredFunc=onRegisteredFunc) assert webSocket is not None self.web_socket = webSocket self.is_cleaned_up = False self.controls = dict() self.pingTimer = Timer(4000,False) self.cleanup_funcs = [] def ping(self): self.send({'static_op' : WebSocket.OP.PING}) pingBack = self.receive() if pingBack != 'PING_BACK': self.cleanup() def pingFreqLimited(self): if self.pingTimer.ticked(): self.ping() def send(self, data): """ Sends a dictionary to the client in json form. @param data a dictionary to be sent to the client. """ dataToSend = json.dumps(data) try: self.web_socket.send(dataToSend) except Exception as e: self.cleanup() logger.debug('Web socket connection terminated while sending, reason: %s, exception type %s' % (e, type(e))) def receive(self): try: return self.web_socket.receive() except Exception as e: self.cleanup() logger.debug('Web socket connection terminated while receiving, reason: %s, exception type %s' % (e, type(e))) return None def onUpdate(self, signaler, data): if data is None: return if SignalActions.SOCKET in data: data[SignalActions.SOCKET](self.controls, data) def cleanup(self): self.is_cleaned_up = True # Do not unregister from all here, the thread managing the socket does this # See WebSocketGroup.processWebSocket. This is important; we want the unregistering # to be done from a different thread to the one which the send operation originated from. # This avoids a problem where we might be iterating through event signalers, signal an event # but then the event signaler collection decreases in size as one is cleaned up. If from a different thread # it will change size after we have finished iterating through it. for item in self.cleanup_funcs: item(self) def addControls(self, controls): for control in controls: self.addControl(control) def addControl(self, control): assert isinstance(control, Control) self.controls[control.control_name] = control control.web_socket = self
class TwitterThread(BaseThread): def __init__(self, feed, outputQueue=None, initialData=None): super(TwitterThread, self).__init__(self.__class__.__name__ + "_" + str(getUniqueId()), criticalThread=False) if feed is None: feed = TwitterFeed([], [], [], DummyIterable(), None) if outputQueue is None: outputQueue = QueueEx() assert isinstance(feed, TwitterFeed) assert isinstance(feed.twitter_session, TwitterSession) self.input_queue = feed self.twitter_session = feed.twitter_session self.twitter_feed = feed self.output_queue = outputQueue if initialData is not None: for item in initialData: item = copy.deepcopy(item) user = getUser(item) assert isinstance(user, User) logger.info('Retrieved tweet/user from file: %s' % item) item.setTwitterSession(self.twitter_session) self.output_queue.put(item) self.num_dropped = 0 self.num_processed = 0 self.num_twitter_geocoded_place = 0 self.num_twitter_geocoded_coordinate = 0 self.num_twitter_geocoded_both = 0 self.num_not_twitter_geocoded = 0 self.num_no_location = 0 self.num_geocodeable = 0 self.log_num_dropped_timer = Timer( Configuration.LOG_DROP_AMOUNT_FREQ_MS, False) def _onFailure(self, e): if not self.twitter_session.parent_instance.enable_shutdown_after_no_usage: logger.error( "Failure limit reached on instance, but not shutting it down because it is a core instance" ) return logger.error( 'Twitter stream thread has failed for instance %s, shutting down instance' % self.twitter_session.instance_key) self.twitter_session.parent_instance.shutdownInstance() super(TwitterThread, self)._onFailure(e) def _onRestart(self, e): logger.error( 'Twitter stream thread has failed for instance %s, restarting stream' % self.twitter_session.instance_key) self.twitter_feed.restartConnection() def _run(self): for tweet in self.input_queue: if self.stopped: return 0 if tweet is None: continue if self.log_num_dropped_timer.ticked(): numProcessed = self.num_processed numDropped = self.num_dropped total = numProcessed + numDropped if total == 0: percentageDropped = 0 else: percentageDropped = float(numDropped) / float( total) * 100.0 outputQueueSize = self.output_queue.qsize() numNoLocation = self.num_no_location numTwitterGeocodedPlace = self.num_twitter_geocoded_place numTwitterGeocodedCoordinate = self.num_twitter_geocoded_coordinate numTwitterGeocodedBoth = self.num_twitter_geocoded_both numNotTwitterGeocoded = self.num_not_twitter_geocoded numGeocodeable = self.num_geocodeable self.num_no_location = 0 self.num_twitter_geocoded_place = 0 self.num_twitter_geocoded_coordinate = 0 self.num_twitter_geocoded_both = 0 self.num_not_twitter_geocoded = 0 self.num_geocodeable = 0 self.num_dropped = 0 self.num_processed = 0 # GCQ = geocode cache queue. logger.info( 'Processed %d items, dropped %d items (%.2f%%) from GCQ (queue size: %d)' % (numProcessed, numDropped, percentageDropped, outputQueueSize)) logger.info( 'Initial tweet state: no twitter geocode %d, twitter place %d, twitter coord %d, twitter place and coord %d, num geocodeable %d, num not geocodeable %d' % (numNotTwitterGeocoded, numTwitterGeocodedPlace, numTwitterGeocodedCoordinate, numTwitterGeocodedBoth, numGeocodeable, numNoLocation)) assert isinstance(tweet, Tweet) if tweet.has_twitter_place and tweet.coordinate is None: self.num_twitter_geocoded_place += 1 elif tweet.coordinate is not None and not tweet.has_twitter_place: self.num_twitter_geocoded_coordinate += 1 elif tweet.coordinate is not None and tweet.has_twitter_place: self.num_twitter_geocoded_both += 1 else: self.num_not_twitter_geocoded += 1 if tweet.has_user: if (not tweet.user.has_location ) and tweet.coordinate is None and ( not tweet.has_twitter_place): self.num_no_location += 1 else: self.num_geocodeable += 1 if self.output_queue.qsize( ) < Configuration.GEOCODE_FROM_CACHE_INPUT_THREAD_SIZE_CAP: self.num_processed += 1 self.output_queue.put(tweet) else: self.num_dropped += 1 def stop(self): super(TwitterThread, self).stop() if self.twitter_session is not None: self.twitter_session.close()
def processCursor(cursor, constructObjectFunc, onIterationFunc=None, cursorSize=None, getCurrentIterationFunc=None): try: if cursor is None: return None timer = Timer() results = [] if getCurrentIterationFunc is None: currentIterationCounter = [0] def getIterationFunc(obj, currentIterationCounter=currentIterationCounter): currentIterationCounter[0] += 1 return currentIterationCounter[0] getCurrentIterationFunc = getIterationFunc brokeOut = False iteration = 0 cursorIterationOffset = 0 endIteration = cursorSize isIterationBoundsInitialised = False if onIterationFunc is not None: onIterationFunc(cursorIterationOffset, endIteration, False, None, 'base') for item in cursor: currentObject = constructObjectFunc(item) if onIterationFunc is not None: iteration = getCurrentIterationFunc(currentObject) if iteration is None: continue if cursorSize is not None: if not isIterationBoundsInitialised: cursorIterationOffset = iteration - 1 # Iterations don't have to be 0 indexed. endIteration = cursorSize - cursorIterationOffset isIterationBoundsInitialised = True if isIterationBoundsInitialised: iteration -= cursorIterationOffset #logger.info('S: %d, M: %d, E: %d' % (0, iteration, endIteration)) #assert 0 <= iteration <= (endIteration + 5) result = onIterationFunc(iteration, endIteration, False, currentObject, 'base') if result is False: brokeOut = True break else: # Don't return in results if we have an iteration func. # This is important in case we are processing millions of rows # (more than we can fit in memory). if currentObject is not None: results.append(currentObject) # Signal that we're finished. if onIterationFunc is not None: if not brokeOut: iteration = endIteration onIterationFunc(iteration, endIteration, True, None, 'base') timeTaken2 = timer.time_since_constructed logger.info('Successfully processed cursor in %dms' % timeTaken2) timeTaken = timer.time_since_constructed logger.info('Successfully read %d items from cache (%d) in %dms' % (len(results),timer.__hash__(),timeTaken)) if len(results) == 0: return None else: return results finally: if cursor is not None: cursor.close()
class FollowerExtractorGateThread(BaseThread): # We have a follower extractor per twitter thread so that we can do more # than one user at a time. follower_extractor_threads = dict() _follower_extractor_threads_lock = RLock() def __init__(self, geocodeUserConfig, inputQueue=None, outputQueue=None, dataCollection=None, userAnalysisList=None): super(FollowerExtractorGateThread, self).__init__(self.__class__.__name__, criticalThread=True) if inputQueue is None: inputQueue = QueueEx() if outputQueue is None: outputQueue = QueueEx() assert dataCollection is not None assert isinstance(dataCollection, DataCollection) assert isinstance(geocodeUserConfig, UserGeocodeConfig) self.input_queue = inputQueue self.output_queue = outputQueue self.geocode_user_config = geocodeUserConfig # We use the data collection to check for users which already have followers. self.data_collection = dataCollection self.user_analysis_list = userAnalysisList self.num_dropped = 0 self.num_processed = 0 self.log_num_dropped_timer = Timer( Configuration.LOG_DROP_AMOUNT_FREQ_MS, False) def getExtractorThreadByTwitterSession(self, twitterSession): if not twitterSession.is_session_active: return None extractorThread = criticalSection( FollowerExtractorGateThread._follower_extractor_threads_lock, lambda: FollowerExtractorGateThread.follower_extractor_threads.get( twitterSession, None)) if extractorThread is not None: return extractorThread else: def onTerminateFunc(): def doAction(): del FollowerExtractorGateThread.follower_extractor_threads[ twitterSession] criticalSection( FollowerExtractorGateThread. _follower_extractor_threads_lock, doAction) newThread = FollowerExtractorThread( self.geocode_user_config, outputQueue=self.output_queue, twitterSession=twitterSession, onTerminateFunc=onTerminateFunc, userAnalysisList=self.user_analysis_list) def doAction(): FollowerExtractorGateThread.follower_extractor_threads[ twitterSession] = newThread criticalSection( FollowerExtractorGateThread._follower_extractor_threads_lock, doAction) newThread.start() return newThread def shouldProcessUser(self, user): if user is None: return False return not self.data_collection.isDeepUserObjectIn(user) def addUser(self, user, maxQueueSize=None, restrictInfluenceArea=True): assert isinstance(user, User) if user.id is None: return False if not self.shouldProcessUser(user): return False extractorThread = self.getExtractorThreadByTwitterSession( user.twitter_session) if extractorThread is None: return False extractorThreadQueue = extractorThread.input_queue extractorThreadSession = extractorThread.twitter_session if maxQueueSize is not None and extractorThreadQueue.qsize( ) > maxQueueSize: return False if not user.is_geocoded: return False if restrictInfluenceArea is True: found = False locations = list() locations.append(user.location_geocode) if user.location_geocode.country is not None: locations.append(user.location_geocode.country) if user.location_geocode.continent is not None: locations.append(user.location_geocode.continent) influenceSourceGeocodeIds = extractorThreadSession.parent_instance.influence_source_cache_ids influenceSourceRectangles = extractorThreadSession.parent_instance.influence_source_rectangles if influenceSourceGeocodeIds is not None: for geocodeCacheId in influenceSourceGeocodeIds: for userLocation in locations: if userLocation == geocodeCacheId: found = True break if (not found) and influenceSourceRectangles is not None: for rectangle in influenceSourceRectangles: south = rectangle[0] east = rectangle[1] north = rectangle[2] west = rectangle[3] for userLocation in locations: userCoord = userLocation.coordinate result = south < userCoord[0] < north and \ east < userCoord[1] < west if result is True: found = True break if not found: return False user.queued_for_follower_enrichment = True FollowerExtractorGateThread.lastPlace = user.location_geocode.place_id extractorThreadQueue.put(user) return True def _run(self): for item in self.input_queue: user = getUser(item) assert user is not None assert isinstance(user, User) if self.log_num_dropped_timer.ticked(): numDropped = self.num_dropped numProcessed = self.num_processed total = numDropped + numProcessed if total == 0: percentageDropped = 0 else: percentageDropped = float(numDropped) / float( total) * 100.0 outputQueueSize = self.output_queue.qsize() self.num_dropped = 0 self.num_processed = 0 # AQ = analysis queue logger.info( 'Processed %d items, dropped %d items (%.2f%%) from AQ (queue size %d)' % (numProcessed, numDropped, percentageDropped, outputQueueSize)) # Always add tweets, even if we don't extract the followers of the user. if isinstance(item, Tweet): if self.output_queue.qsize( ) < Configuration.ANALYSIS_INPUT_THREAD_SIZE_CAP: self.num_processed += 1 self.output_queue.put(item) else: self.num_dropped += 1 # Already has followers loaded, maybe this came back from # geocoder, so we can put it in our output queue safely. if user.is_followers_loaded or user.is_followee: # Don't drop follower information, since that is so valuable. self.output_queue.put(item) continue # Make sure the queue doesn't get too full. if Configuration.AUTO_ENRICH_FOLLOWER_INFO_ENABLED: # Skip users with too few or twoo many followers. if (Configuration.FOLLOWER_ENRICHMENT_GATE_THREAD_MINIMUM_FOLLOWERS != 0 and user.num_followers < Configuration.FOLLOWER_ENRICHMENT_GATE_THREAD_MINIMUM_FOLLOWERS) or \ (Configuration.FOLLOWER_ENRICHMENT_GATE_THREAD_MAXIMUM_FOLLOWERS != 0 and user.num_followers > Configuration.FOLLOWER_ENRICHMENT_GATE_THREAD_MAXIMUM_FOLLOWERS): continue self.addUser(user, Configuration.FOLLOWER_ENRICHMENT_QUEUE_SIZE)
@classmethod def GeocodeFollowers(cls, includeFolloweeData, followeeDataProjection, includeFollowersData, followersDataProjection): geocodeProjection = UserProjection.Geocode(includeFolloweeData, followeeDataProjection) return cls(False, includeFollowersData, followersDataProjection, True, geocodeProjection, False, None, False, False, False, False, False, False, False, False, False, True) @classmethod def ExcludeRecursiveData(cls, dataProjection=None): return cls(True, True, dataProjection, True, False, None, False, None, True, True, True, True, True, True, True, True, True, True) logUserWritePerformanceTimer = Timer(Configuration.LOG_DROP_AMOUNT_FREQ_MS, False) def writeUserToCache(user, doUpdate): assert isinstance(user, User) # Used with $set operation. setFields = dict() # Used $addToSet operation. addToSetFields = dict() if user.is_followers_loaded: setFields.update({'is_followers_loaded': True}) if user.is_followee:
class FollowerExtractorThread(BaseThread): def __init__(self, geocodeUserConfig, outputQueue=None, twitterSession=None, onTerminateFunc=None, userAnalysisList=None): super(FollowerExtractorThread, self).__init__(self.__class__.__name__ + "_" + str(getUniqueId()), onTerminateFunc, criticalThread=False) if outputQueue is None: outputQueue = QueueEx() if userAnalysisList is None: userAnalysisList = list() assert isinstance(twitterSession, TwitterSession) assert isinstance(geocodeUserConfig, UserGeocodeConfig) def continueRunningCheck(): return twitterSession.is_session_active def notifyPositionFunc(item, position, lastPoppedItem): user = getUser(item) if user is None: return assert isinstance(user, User) user.follower_enrichment_progress.onQueuePositionChange( user, position, lastPoppedItem) self.output_queue.put(user) self.input_queue = QueueNotify(continueRunningCheck, 2, notifyPositionFunc) self.output_queue = outputQueue self.twitter_session = twitterSession self.user_analysis_list = userAnalysisList self.geocode_user_config = geocodeUserConfig self.num_followers_processed = 0 self.num_followers_geocoded = 0 self.num_followees_processed = 0 self.log_performance_timer = Timer(60000, False) def _onFailure(self, e): logger.error( 'Follower extractor thread has failed for instance %s, shutting down instance' % self.twitter_session.instance_key) self.twitter_session.parent_instance.shutdownInstance() super(FollowerExtractorThread, self)._onFailure(e) def _run(self): for item in self.input_queue: user = getUser(item) assert user is not None if user.is_followers_loaded: continue if user.twitter_session is None: logger.error( 'User reached enrichment thread with no twitter session') continue instance = user.twitter_session.parent_instance instance_key = user.instance_key startTime = instance.constructed_at temporalCollection = getTemporalInfluenceCollection(instance_key) analysis_list = list() for item in self.user_analysis_list: analysisObj = item(user) if analysisObj is not None: assert isinstance(analysisObj, UserAnalysis) analysis_list.append(analysisObj) def idsIterationFunc(userId, iteration, totalIterations): if not self.twitter_session.is_session_active: return False #logger.info('Retrieved ids of user %d/%d' % (iteration, totalIterations)) self.output_queue.put(user) return True def addTemporalEntryForCurrentUser(follower): timeId = getTimeIdFromTimestamp(startTime, Configuration.TEMPORAL_STEP, getEpochMs()) userCacheIds = user.location_geocode.all_geocode_results_cache_id followerGeocodeResults = follower.location_geocode.all_geocode_results for userCacheId in userCacheIds: userPlaceId = GeocodeResultAbstract.getPlaceIdFromCacheId( userCacheId) userProviderId = GeocodeResultAbstract.getProviderIdFromCacheId( userCacheId) for followerGeocodeResult in followerGeocodeResults: followerPlaceId = followerGeocodeResult.place_id followerProviderId = followerGeocodeResult.provider_id followerPlaceType = followerGeocodeResult.place_type instance.addTemporalEntry(temporalCollection, timeId, userProviderId, userPlaceId, followerProviderId, followerPlaceId, followerPlaceType) def iterationFunc(userId, iteration, totalIterations, followersFromLastIteration): if followersFromLastIteration is not None: for follower in followersFromLastIteration: if not self.twitter_session.is_session_active: return False assert isinstance(follower, User) follower.is_followee = True follower.geocodeLocationFromCache( self.geocode_user_config, False) self.output_queue.put(follower) # Follower is now ready to be analysed. for item in analysis_list: item.onFollower(follower) self.num_followers_processed += 1 if user.is_geocoded and follower.is_geocoded: self.num_followers_geocoded += 1 addTemporalEntryForCurrentUser(follower) self.output_queue.put(user) return True # Retrieve followers. #logger.info('Attempting to retrieve followers for user: %s' % user) user.getFollowerIds(idsIterationFunc) result = user.getFollowers(iterationFunc) for item in analysis_list: user.addAnalyser(item) user.queued_for_follower_enrichment = False if result is None: logger.error( 'Failed to retrieve followers for user: %s - explanation: %s, %s, %s' % (user.last_follower_enrichment_error, user.is_followers_loaded, user.is_follower_ids_loaded, user)) #else: #logger.info('Retrieved %d followers for user %s' % (len(result), user)) # Push update. self.num_followees_processed += 1 self.output_queue.put(user) if self.log_performance_timer.ticked(): numFolloweesProcessed = self.num_followees_processed numFollowersProcessed = self.num_followers_processed numFollowersGeocoded = self.num_followers_geocoded self.num_followees_processed = 0 self.num_followers_processed = 0 self.num_followers_geocoded = 0 logger.info( 'Num followees processed %d, num followers processed %d, num followers geocoded %d' % (numFolloweesProcessed, numFollowersProcessed, numFollowersGeocoded)) # Prevent this thread from being restarted. self.stop()
def cursorItemsFromCache(instanceId, getCollectionFunc, placeId=None, epochMsStartRange=None, epochMsEndRange=None, pageNum=None, pageSize=None, typeSpecificQuery=None, projection=None, sortByTimestamp=None, typeSpecificHint=None): if sortByTimestamp is None: sortByTimestamp = True epochMsStartRange, epochMsEndRange = fixEpochMsRange( epochMsStartRange, epochMsEndRange) if epochMsEndRange is None: upperBoundTimestamp = getEpochMs() else: upperBoundTimestamp = epochMsEndRange if projection is not None and projection.do_query is False: return None assert instanceId is not None assert getCollectionFunc is not None collection = getCollectionFunc(instanceId) logFormatting = 'IN:%s, P:%s, ES:%s, EE:%s, PN:%s, PS:%s, T:%s, P:%s' % ( instanceId, placeId, epochMsStartRange, epochMsEndRange, pageNum, pageSize, typeSpecificQuery, projection) timer = Timer() logger.info('Attempting to read items from cache (%d) -- %s' % (timer.__hash__(), logFormatting)) findDic = dict() timestampDic = None if epochMsEndRange is not None: if timestampDic is None: timestampDic = dict() timestampDic.update({'$lt': epochMsEndRange}) if epochMsStartRange is not None: if timestampDic is None: timestampDic = dict() timestampDic.update({'$gte': epochMsStartRange}) if timestampDic is not None: findDic.update({'timestamp': timestampDic}) if placeId is not None: findDic.update( dict({ 'geocode.providerId': placeId['providerId'], 'geocode.placeId': placeId['placeId'] })) # MongoDB sometimes gets it wrong, particularly with geocode.placeId. if typeSpecificHint is None: if timestampDic is not None: if placeId is not None: hint = [('geocode.placeId', pymongo.ASCENDING), ('timestamp', pymongo.ASCENDING)] else: hint = [('timestamp', pymongo.ASCENDING)] else: if placeId is not None: hint = [('geocode.placeId', pymongo.ASCENDING)] else: hint = None else: hint = typeSpecificHint if typeSpecificQuery is not None: findDic.update(typeSpecificQuery) if projection is None: cursor = collection.find(findDic).hint(hint) else: cursor = collection.find(findDic, projection.projection).hint(hint) if sortByTimestamp: cursor = cursor.sort([('timestamp', pymongo.ASCENDING)]) if pageSize is not None and pageNum is not None: cursor = cursor.skip(pageSize * pageNum).limit(pageSize) # We use this to calculate progress through the cursor, # It is more efficient than using cursor.count. cursor.upper_bound_timestamp = upperBoundTimestamp timeTaken = timer.time_since_constructed logger.info('Successfully setup cursor in %dms -- %s' % (timeTaken, logFormatting)) if Configuration.MONGO_EXPLAINS_ENABLED: logger.critical('Tweet/User Explain: %s' % unicode(cursor.explain())) return cursor
import unittest import requests from api.config import Configuration, GE_MAP_QUEST, GE_GOOGLE from api.core.utility import Timer from api.geocode.geocode_shared import GeocodeResult, GeocodeResultGoogle, BadGeocodeException import logging import itertools logger = logging.getLogger(__name__) __author__ = 'Michael Pryor' # 1 every two seconds. # Confirmed with open map quest that 1 per second is okay, but set to every two seconds to be nice. geocode_from_external_timer_omq = Timer.rate_limited(60, 120 * 1000) # 2500 requests per day (24 hours). # This works out as once every 35 seconds. geocode_from_external_timer_google = Timer.rate_limited( 2500, 24 * 60 * 60 * 1000) def _geocodeFromExternalOMQ(query, countryCode=None, acceptableTypes=None): """ Uses open map quest to do a location search, e.g. if query is London then information about London city will be returned. Note this method restricts itself to 1 call per second.""" if query is None: return None geocode_from_external_timer_omq.waitForTick()
class WebSocket(EventHandler): """ Base class for all web socket interactions. """ class OP: """ Contains all operation codes, indicating what message is for. These codes are passed to the client directly via templating, so no need to modify elsewhere. """ ADD_MARKER = 1 ADD_LINE = 2 REMOVE_ITEM = 3 ADD_ROW = 4 UPDATE_ROW = 6 SET_HEADER = 7 SET_ELEMENT_INNER_HTML = 8 EXECUTE_JAVASCRIPT = 9 PING = 0 def __init__(self, webSocket, onRegisteredFunc=None): super(WebSocket, self).__init__(processSignalFunc=self.onUpdate, onRegisteredFunc=onRegisteredFunc) assert webSocket is not None self.web_socket = webSocket self.is_cleaned_up = False self.controls = dict() self.pingTimer = Timer(4000, False) self.cleanup_funcs = [] def ping(self): self.send({'static_op': WebSocket.OP.PING}) pingBack = self.receive() if pingBack != 'PING_BACK': self.cleanup() def pingFreqLimited(self): if self.pingTimer.ticked(): self.ping() def send(self, data): """ Sends a dictionary to the client in json form. @param data a dictionary to be sent to the client. """ dataToSend = json.dumps(data) try: self.web_socket.send(dataToSend) except Exception as e: self.cleanup() logger.debug( 'Web socket connection terminated while sending, reason: %s, exception type %s' % (e, type(e))) def receive(self): try: return self.web_socket.receive() except Exception as e: self.cleanup() logger.debug( 'Web socket connection terminated while receiving, reason: %s, exception type %s' % (e, type(e))) return None def onUpdate(self, signaler, data): if data is None: return if SignalActions.SOCKET in data: data[SignalActions.SOCKET](self.controls, data) def cleanup(self): self.is_cleaned_up = True # Do not unregister from all here, the thread managing the socket does this # See WebSocketGroup.processWebSocket. This is important; we want the unregistering # to be done from a different thread to the one which the send operation originated from. # This avoids a problem where we might be iterating through event signalers, signal an event # but then the event signaler collection decreases in size as one is cleaned up. If from a different thread # it will change size after we have finished iterating through it. for item in self.cleanup_funcs: item(self) def addControls(self, controls): for control in controls: self.addControl(control) def addControl(self, control): assert isinstance(control, Control) self.controls[control.control_name] = control control.web_socket = self