Ejemplo n.º 1
0
    def __init__(self,
                 geocodeUserConfig,
                 inputQueue=None,
                 outputQueue=None,
                 dataCollection=None,
                 userAnalysisList=None):
        super(FollowerExtractorGateThread,
              self).__init__(self.__class__.__name__, criticalThread=True)

        if inputQueue is None:
            inputQueue = QueueEx()
        if outputQueue is None:
            outputQueue = QueueEx()

        assert dataCollection is not None
        assert isinstance(dataCollection, DataCollection)
        assert isinstance(geocodeUserConfig, UserGeocodeConfig)

        self.input_queue = inputQueue
        self.output_queue = outputQueue
        self.geocode_user_config = geocodeUserConfig

        # We use the data collection to check for users which already have followers.
        self.data_collection = dataCollection

        self.user_analysis_list = userAnalysisList

        self.num_dropped = 0
        self.num_processed = 0
        self.log_num_dropped_timer = Timer(
            Configuration.LOG_DROP_AMOUNT_FREQ_MS, False)
Ejemplo n.º 2
0
    def __init__(self, data, inputQueue=None):
        super(AnalysisThread, self).__init__(self.__class__.__name__,
                                             criticalThread=True)

        if inputQueue is None:
            inputQueue = QueueEx()

        assert isinstance(data, DataCollection)
        self.input_queue = inputQueue
        self.data = data
Ejemplo n.º 3
0
    def __init__(self,
                 geocodeConfig,
                 inputQueue=None,
                 successOutputQueue=None,
                 failureOutputQueue=None):
        super(GeocodeFromExternalThread,
              self).__init__(self.__class__.__name__, criticalThread=True)

        assert isinstance(geocodeConfig, UserGeocodeConfig)

        if inputQueue is None:
            inputQueue = QueueEx()
        if successOutputQueue is None:
            successOutputQueue = QueueEx()

        self.input_queue = inputQueue
        self.success_output_queue = successOutputQueue
        self.geocode_config = geocodeConfig
        self.failure_output_queue = failureOutputQueue
Ejemplo n.º 4
0
    def __init__(self,
                 geocodeConfig,
                 inputQueue=None,
                 successOutputQueue=None,
                 primaryFailureOutputQueue=None,
                 highLoadFailureOutputQueue=None,
                 inMemoryOnly=None):
        if inMemoryOnly:
            inMemoryOnlyStr = '_MEMORY_ONLY'
        else:
            inMemoryOnlyStr = ''

        super(GeocodeFromCacheThread, self).__init__(
            '%s%s' % (self.__class__.__name__, inMemoryOnlyStr),
            criticalThread=True)

        assert isinstance(geocodeConfig, UserGeocodeConfig)

        if inputQueue is None:
            inputQueue = QueueEx()
        if successOutputQueue is None:
            successOutputQueue = QueueEx()
        if primaryFailureOutputQueue is None:
            primaryFailureOutputQueue = QueueEx()

        self.input_queue = inputQueue
        self.success_output_queue = successOutputQueue
        self.primary_failure_output_queue = primaryFailureOutputQueue
        self.high_load_failure_output_queue = highLoadFailureOutputQueue
        self.geocode_config = geocodeConfig

        self.num_dropped_from_success = 0
        self.num_dropped_from_primary_failure = 0
        self.num_failed_over = 0
        self.log_timer = Timer(Configuration.LOG_DROP_AMOUNT_FREQ_MS, False)

        self.num_processed = 0
        self.in_memory_only = inMemoryOnly

        self.sleep_time = float(
            Configuration.GEOCODE_FROM_CACHE_THREAD_WAIT_TIME_MS) / 1000.0
Ejemplo n.º 5
0
    def __init__(self,
                 geocodeUserConfig,
                 outputQueue=None,
                 twitterSession=None,
                 onTerminateFunc=None,
                 userAnalysisList=None):
        super(FollowerExtractorThread, self).__init__(self.__class__.__name__ +
                                                      "_" + str(getUniqueId()),
                                                      onTerminateFunc,
                                                      criticalThread=False)

        if outputQueue is None:
            outputQueue = QueueEx()
        if userAnalysisList is None:
            userAnalysisList = list()

        assert isinstance(twitterSession, TwitterSession)
        assert isinstance(geocodeUserConfig, UserGeocodeConfig)

        def continueRunningCheck():
            return twitterSession.is_session_active

        def notifyPositionFunc(item, position, lastPoppedItem):
            user = getUser(item)
            if user is None:
                return

            assert isinstance(user, User)
            user.follower_enrichment_progress.onQueuePositionChange(
                user, position, lastPoppedItem)
            self.output_queue.put(user)

        self.input_queue = QueueNotify(continueRunningCheck, 2,
                                       notifyPositionFunc)
        self.output_queue = outputQueue
        self.twitter_session = twitterSession
        self.user_analysis_list = userAnalysisList
        self.geocode_user_config = geocodeUserConfig

        self.num_followers_processed = 0
        self.num_followers_geocoded = 0
        self.num_followees_processed = 0
        self.log_performance_timer = Timer(60000, False)
Ejemplo n.º 6
0
    def __init__(self, feed, outputQueue=None, initialData=None):
        super(TwitterThread, self).__init__(self.__class__.__name__ + "_" +
                                            str(getUniqueId()),
                                            criticalThread=False)

        if feed is None:
            feed = TwitterFeed([], [], [], DummyIterable(), None)

        if outputQueue is None:
            outputQueue = QueueEx()

        assert isinstance(feed, TwitterFeed)
        assert isinstance(feed.twitter_session, TwitterSession)

        self.input_queue = feed
        self.twitter_session = feed.twitter_session
        self.twitter_feed = feed

        self.output_queue = outputQueue

        if initialData is not None:
            for item in initialData:
                item = copy.deepcopy(item)

                user = getUser(item)
                assert isinstance(user, User)

                logger.info('Retrieved tweet/user from file: %s' % item)

                item.setTwitterSession(self.twitter_session)

                self.output_queue.put(item)

        self.num_dropped = 0
        self.num_processed = 0
        self.num_twitter_geocoded_place = 0
        self.num_twitter_geocoded_coordinate = 0
        self.num_twitter_geocoded_both = 0
        self.num_not_twitter_geocoded = 0
        self.num_no_location = 0
        self.num_geocodeable = 0
        self.log_num_dropped_timer = Timer(
            Configuration.LOG_DROP_AMOUNT_FREQ_MS, False)
Ejemplo n.º 7
0
def startThreads(data, display, userAnalysers):
    tweetQueue = QueueEx()

    userGeocodeConfig = UserGeocodeConfig(
        Configuration.GEOCODE_EXTERNAL_PROVIDER)

    feb = FollowerExtractorGateThread(userGeocodeConfig,
                                      dataCollection=data,
                                      userAnalysisList=userAnalysers)

    an = AnalysisThread(inputQueue=feb.output_queue, data=data)

    ge = GeocodeFromExternalThread(geocodeConfig=userGeocodeConfig,
                                   failureOutputQueue=an.input_queue,
                                   successOutputQueue=feb.input_queue)
    di = DisplayThread(display=display)

    gc = GeocodeFromCacheThread(geocodeConfig=userGeocodeConfig,
                                primaryFailureOutputQueue=ge.input_queue,
                                highLoadFailureOutputQueue=an.input_queue,
                                successOutputQueue=feb.input_queue,
                                inMemoryOnly=False)

    gcm = GeocodeFromCacheThread(geocodeConfig=userGeocodeConfig,
                                 inputQueue=tweetQueue,
                                 primaryFailureOutputQueue=gc.input_queue,
                                 highLoadFailureOutputQueue=an.input_queue,
                                 successOutputQueue=feb.input_queue,
                                 inMemoryOnly=True)

    for n in range(1,
                   Configuration.NUM_GEOCODE_FROM_CACHE_WORKERS_MEMORY_ONLY):
        aux = GeocodeFromCacheThread(
            geocodeConfig=gcm.geocode_config,
            inputQueue=gcm.input_queue,
            primaryFailureOutputQueue=gcm.primary_failure_output_queue,
            highLoadFailureOutputQueue=gcm.high_load_failure_output_queue,
            successOutputQueue=gcm.success_output_queue,
            inMemoryOnly=gcm.in_memory_only)
        aux.start()

    for n in range(1, Configuration.NUM_GEOCODE_FROM_CACHE_WORKERS):
        aux = GeocodeFromCacheThread(
            geocodeConfig=gc.geocode_config,
            inputQueue=gc.input_queue,
            primaryFailureOutputQueue=gc.primary_failure_output_queue,
            highLoadFailureOutputQueue=gc.high_load_failure_output_queue,
            successOutputQueue=gc.success_output_queue,
            inMemoryOnly=gc.in_memory_only)
        aux.start()

    for n in range(1, Configuration.NUM_ANALYSIS_WORKERS):
        aux = AnalysisThread(inputQueue=an.input_queue, data=data)
        aux.start()

        aux = FollowerExtractorGateThread(
            inputQueue=feb.input_queue,
            outputQueue=feb.output_queue,
            geocodeUserConfig=feb.geocode_user_config,
            dataCollection=feb.data_collection,
            userAnalysisList=feb.user_analysis_list)
        aux.start()

    gc.start()
    gcm.start()
    ge.start()
    an.start()
    feb.start()
    di.start()

    return {'tweet_queue': tweetQueue, 'follower_extractor_gate_thread': feb}