def __init__(self, geocodeUserConfig, inputQueue=None, outputQueue=None, dataCollection=None, userAnalysisList=None): super(FollowerExtractorGateThread, self).__init__(self.__class__.__name__, criticalThread=True) if inputQueue is None: inputQueue = QueueEx() if outputQueue is None: outputQueue = QueueEx() assert dataCollection is not None assert isinstance(dataCollection, DataCollection) assert isinstance(geocodeUserConfig, UserGeocodeConfig) self.input_queue = inputQueue self.output_queue = outputQueue self.geocode_user_config = geocodeUserConfig # We use the data collection to check for users which already have followers. self.data_collection = dataCollection self.user_analysis_list = userAnalysisList self.num_dropped = 0 self.num_processed = 0 self.log_num_dropped_timer = Timer( Configuration.LOG_DROP_AMOUNT_FREQ_MS, False)
def __init__(self, data, inputQueue=None): super(AnalysisThread, self).__init__(self.__class__.__name__, criticalThread=True) if inputQueue is None: inputQueue = QueueEx() assert isinstance(data, DataCollection) self.input_queue = inputQueue self.data = data
def __init__(self, geocodeConfig, inputQueue=None, successOutputQueue=None, failureOutputQueue=None): super(GeocodeFromExternalThread, self).__init__(self.__class__.__name__, criticalThread=True) assert isinstance(geocodeConfig, UserGeocodeConfig) if inputQueue is None: inputQueue = QueueEx() if successOutputQueue is None: successOutputQueue = QueueEx() self.input_queue = inputQueue self.success_output_queue = successOutputQueue self.geocode_config = geocodeConfig self.failure_output_queue = failureOutputQueue
def __init__(self, geocodeConfig, inputQueue=None, successOutputQueue=None, primaryFailureOutputQueue=None, highLoadFailureOutputQueue=None, inMemoryOnly=None): if inMemoryOnly: inMemoryOnlyStr = '_MEMORY_ONLY' else: inMemoryOnlyStr = '' super(GeocodeFromCacheThread, self).__init__( '%s%s' % (self.__class__.__name__, inMemoryOnlyStr), criticalThread=True) assert isinstance(geocodeConfig, UserGeocodeConfig) if inputQueue is None: inputQueue = QueueEx() if successOutputQueue is None: successOutputQueue = QueueEx() if primaryFailureOutputQueue is None: primaryFailureOutputQueue = QueueEx() self.input_queue = inputQueue self.success_output_queue = successOutputQueue self.primary_failure_output_queue = primaryFailureOutputQueue self.high_load_failure_output_queue = highLoadFailureOutputQueue self.geocode_config = geocodeConfig self.num_dropped_from_success = 0 self.num_dropped_from_primary_failure = 0 self.num_failed_over = 0 self.log_timer = Timer(Configuration.LOG_DROP_AMOUNT_FREQ_MS, False) self.num_processed = 0 self.in_memory_only = inMemoryOnly self.sleep_time = float( Configuration.GEOCODE_FROM_CACHE_THREAD_WAIT_TIME_MS) / 1000.0
def __init__(self, geocodeUserConfig, outputQueue=None, twitterSession=None, onTerminateFunc=None, userAnalysisList=None): super(FollowerExtractorThread, self).__init__(self.__class__.__name__ + "_" + str(getUniqueId()), onTerminateFunc, criticalThread=False) if outputQueue is None: outputQueue = QueueEx() if userAnalysisList is None: userAnalysisList = list() assert isinstance(twitterSession, TwitterSession) assert isinstance(geocodeUserConfig, UserGeocodeConfig) def continueRunningCheck(): return twitterSession.is_session_active def notifyPositionFunc(item, position, lastPoppedItem): user = getUser(item) if user is None: return assert isinstance(user, User) user.follower_enrichment_progress.onQueuePositionChange( user, position, lastPoppedItem) self.output_queue.put(user) self.input_queue = QueueNotify(continueRunningCheck, 2, notifyPositionFunc) self.output_queue = outputQueue self.twitter_session = twitterSession self.user_analysis_list = userAnalysisList self.geocode_user_config = geocodeUserConfig self.num_followers_processed = 0 self.num_followers_geocoded = 0 self.num_followees_processed = 0 self.log_performance_timer = Timer(60000, False)
def __init__(self, feed, outputQueue=None, initialData=None): super(TwitterThread, self).__init__(self.__class__.__name__ + "_" + str(getUniqueId()), criticalThread=False) if feed is None: feed = TwitterFeed([], [], [], DummyIterable(), None) if outputQueue is None: outputQueue = QueueEx() assert isinstance(feed, TwitterFeed) assert isinstance(feed.twitter_session, TwitterSession) self.input_queue = feed self.twitter_session = feed.twitter_session self.twitter_feed = feed self.output_queue = outputQueue if initialData is not None: for item in initialData: item = copy.deepcopy(item) user = getUser(item) assert isinstance(user, User) logger.info('Retrieved tweet/user from file: %s' % item) item.setTwitterSession(self.twitter_session) self.output_queue.put(item) self.num_dropped = 0 self.num_processed = 0 self.num_twitter_geocoded_place = 0 self.num_twitter_geocoded_coordinate = 0 self.num_twitter_geocoded_both = 0 self.num_not_twitter_geocoded = 0 self.num_no_location = 0 self.num_geocodeable = 0 self.log_num_dropped_timer = Timer( Configuration.LOG_DROP_AMOUNT_FREQ_MS, False)
def startThreads(data, display, userAnalysers): tweetQueue = QueueEx() userGeocodeConfig = UserGeocodeConfig( Configuration.GEOCODE_EXTERNAL_PROVIDER) feb = FollowerExtractorGateThread(userGeocodeConfig, dataCollection=data, userAnalysisList=userAnalysers) an = AnalysisThread(inputQueue=feb.output_queue, data=data) ge = GeocodeFromExternalThread(geocodeConfig=userGeocodeConfig, failureOutputQueue=an.input_queue, successOutputQueue=feb.input_queue) di = DisplayThread(display=display) gc = GeocodeFromCacheThread(geocodeConfig=userGeocodeConfig, primaryFailureOutputQueue=ge.input_queue, highLoadFailureOutputQueue=an.input_queue, successOutputQueue=feb.input_queue, inMemoryOnly=False) gcm = GeocodeFromCacheThread(geocodeConfig=userGeocodeConfig, inputQueue=tweetQueue, primaryFailureOutputQueue=gc.input_queue, highLoadFailureOutputQueue=an.input_queue, successOutputQueue=feb.input_queue, inMemoryOnly=True) for n in range(1, Configuration.NUM_GEOCODE_FROM_CACHE_WORKERS_MEMORY_ONLY): aux = GeocodeFromCacheThread( geocodeConfig=gcm.geocode_config, inputQueue=gcm.input_queue, primaryFailureOutputQueue=gcm.primary_failure_output_queue, highLoadFailureOutputQueue=gcm.high_load_failure_output_queue, successOutputQueue=gcm.success_output_queue, inMemoryOnly=gcm.in_memory_only) aux.start() for n in range(1, Configuration.NUM_GEOCODE_FROM_CACHE_WORKERS): aux = GeocodeFromCacheThread( geocodeConfig=gc.geocode_config, inputQueue=gc.input_queue, primaryFailureOutputQueue=gc.primary_failure_output_queue, highLoadFailureOutputQueue=gc.high_load_failure_output_queue, successOutputQueue=gc.success_output_queue, inMemoryOnly=gc.in_memory_only) aux.start() for n in range(1, Configuration.NUM_ANALYSIS_WORKERS): aux = AnalysisThread(inputQueue=an.input_queue, data=data) aux.start() aux = FollowerExtractorGateThread( inputQueue=feb.input_queue, outputQueue=feb.output_queue, geocodeUserConfig=feb.geocode_user_config, dataCollection=feb.data_collection, userAnalysisList=feb.user_analysis_list) aux.start() gc.start() gcm.start() ge.start() an.start() feb.start() di.start() return {'tweet_queue': tweetQueue, 'follower_extractor_gate_thread': feb}