Beispiel #1
0
    def __init__(self):
        super(RealtimePerformanceContainer, self).__init__()

        self.instance_performance = dict()
        self.location_performance = dict()

        self.event_signaler = EventSignaler('realtime_performance')
Beispiel #2
0
class RealtimePerformanceContainer(object):
    def __init__(self):
        super(RealtimePerformanceContainer,self).__init__()

        self.instance_performance = dict()
        self.location_performance = dict()

        self.event_signaler = EventSignaler('realtime_performance')

    def onTweet(self, tweet):
        assert isinstance(tweet, Tweet)

        instancePerformance = self.instance_performance.setdefault(tweet.instance_key, {'success' : RealtimePerformance(), 'geocode_fail' : RealtimePerformance()})

        if tweet.has_user and tweet.user.is_geocoded:
            for location in tweet.user.location_geocode.all_geocode_results:
                locationPerformance = self.location_performance.setdefault(tweet.instance_key, dict()).setdefault(location, {'success': RealtimePerformance()})
                locationPerformance['success'].onTweet(tweet)
                
            instancePerformance['success'].onTweet(tweet)
        else:
            instancePerformance['geocode_fail'].onTweet(tweet)

    def signalUpdate(self):
        self.event_signaler.signalEvent({'instance_tweets' : self.instance_performance,
                                         'location_tweets' : self.location_performance})
Beispiel #3
0
class DataSignaler(object):
    def __init__(self, name, pruneFunc, data):
        super(DataSignaler, self).__init__()

        assert isinstance(data,TreeFunctioned)

        self.data = data
        self.event_signaler = EventSignaler(key=name)

        if pruneFunc is not None:
            assert callable(pruneFunc)

        self.prune_func = pruneFunc
        self._lock = RLock()

    def add(self, value):
        self._lock.acquire()

        try:
            self.data.addToTreeByFunction(value)
        finally:
            self._lock.release()

        data = {self.event_signaler.key : {'data': self.data}}
        self.event_signaler.signalEvent(data)

    def prune(self):
        if self.prune_func is not None:
            return criticalSection(self._lock, lambda: self.prune_func(dataStructure=self.data))

    def inByFunction(self, value, hashFuncList=None, depth=0):
       return criticalSection(self._lock, lambda: self.data.inByFunction(value, hashFuncList, depth))

    def getOriginalByFunction(self, value, hashFuncList=None, depth=0):
        return criticalSection(self._lock, lambda: self.data.getOriginalByFunction(value, hashFuncList, depth))
Beispiel #4
0
class DataSignalerStateless(object):
    def __init__(self, name):
        super(DataSignalerStateless,self).__init__()

        self.event_signaler = EventSignaler(key=name)

    def add(self, value):
        self.event_signaler.signalEvent({self.event_signaler.key : {'data' : value}})
Beispiel #5
0
class DataSignalerStateless(object):
    def __init__(self, name):
        super(DataSignalerStateless, self).__init__()

        self.event_signaler = EventSignaler(key=name)

    def add(self, value):
        self.event_signaler.signalEvent(
            {self.event_signaler.key: {
                'data': value
            }})
Beispiel #6
0
    def __init__(self, name, pruneFunc, data):
        super(DataSignaler, self).__init__()

        assert isinstance(data, TreeFunctioned)

        self.data = data
        self.event_signaler = EventSignaler(key=name)

        if pruneFunc is not None:
            assert callable(pruneFunc)

        self.prune_func = pruneFunc
        self._lock = RLock()
Beispiel #7
0
    def __init__(self):
        super(RealtimePerformanceContainer,self).__init__()

        self.instance_performance = dict()
        self.location_performance = dict()

        self.event_signaler = EventSignaler('realtime_performance')
Beispiel #8
0
class DataSignaler(object):
    def __init__(self, name, pruneFunc, data):
        super(DataSignaler, self).__init__()

        assert isinstance(data, TreeFunctioned)

        self.data = data
        self.event_signaler = EventSignaler(key=name)

        if pruneFunc is not None:
            assert callable(pruneFunc)

        self.prune_func = pruneFunc
        self._lock = RLock()

    def add(self, value):
        self._lock.acquire()

        try:
            self.data.addToTreeByFunction(value)
        finally:
            self._lock.release()

        data = {self.event_signaler.key: {'data': self.data}}
        self.event_signaler.signalEvent(data)

    def prune(self):
        if self.prune_func is not None:
            return criticalSection(
                self._lock, lambda: self.prune_func(dataStructure=self.data))

    def inByFunction(self, value, hashFuncList=None, depth=0):
        return criticalSection(
            self._lock,
            lambda: self.data.inByFunction(value, hashFuncList, depth))

    def getOriginalByFunction(self, value, hashFuncList=None, depth=0):
        return criticalSection(
            self._lock, lambda: self.data.getOriginalByFunction(
                value, hashFuncList, depth))
Beispiel #9
0
    def __init__(self, name, pruneFunc, data):
        super(DataSignaler, self).__init__()

        assert isinstance(data,TreeFunctioned)

        self.data = data
        self.event_signaler = EventSignaler(key=name)

        if pruneFunc is not None:
            assert callable(pruneFunc)

        self.prune_func = pruneFunc
        self._lock = RLock()
Beispiel #10
0
class RealtimePerformanceContainer(object):
    def __init__(self):
        super(RealtimePerformanceContainer, self).__init__()

        self.instance_performance = dict()
        self.location_performance = dict()

        self.event_signaler = EventSignaler('realtime_performance')

    def onTweet(self, tweet):
        assert isinstance(tweet, Tweet)

        instancePerformance = self.instance_performance.setdefault(
            tweet.instance_key, {
                'success': RealtimePerformance(),
                'geocode_fail': RealtimePerformance()
            })

        if tweet.has_user and tweet.user.is_geocoded:
            for location in tweet.user.location_geocode.all_geocode_results:
                locationPerformance = self.location_performance.setdefault(
                    tweet.instance_key,
                    dict()).setdefault(location,
                                       {'success': RealtimePerformance()})
                locationPerformance['success'].onTweet(tweet)

            instancePerformance['success'].onTweet(tweet)
        else:
            instancePerformance['geocode_fail'].onTweet(tweet)

    def signalUpdate(self):
        self.event_signaler.signalEvent({
            'instance_tweets':
            self.instance_performance,
            'location_tweets':
            self.location_performance
        })
    def manageSocket(self, webSocket, tupleArguments, socketId):
        instanceId = tupleArguments[0]

        mainControl = webSocket.controls[self.key]
        assert isinstance(mainControl, DocumentControl)

        bytesPerBatch       =        parseInteger(request.GET.batchSizeBytes, maximum=1024 * 1024 * 256, default=1024 * 1024 * 1)
        tweetInfo           =        parseBoolean(request.GET.tweet_info, False)
        followerInfo        =        parseBoolean(request.GET.follower_info_full, False)
        followerInfoShort   =        parseBoolean(request.GET.follower_info_short, False)
        providerId          =        parseInteger(request.GET.provider_id)
        placeId             =        parseInteger(request.GET.place_id)
        startEpoch          =        parseInteger(request.GET.start_epoch)
        endEpoch            =        parseInteger(request.GET.end_epoch)

        if placeId is not None and providerId is not None:
            placeCacheId = GeocodeResultAbstract.buildCacheId(providerId, placeId)
        else:
            placeCacheId = None

        if followerInfo:
            tweetInfo = False
            followerInfoShort = False
        elif tweetInfo:
            followerInfo = False
            followerInfoShort = False
        elif followerInfoShort:
            followerInfo = False
            tweetInfo = False
        else:
            followerInfo = True


        userTunnelId = 'user_tunnel'
        tweetTunnelId = None

        if tweetInfo:
            tweetTunnelId = 'tweet_tunnel'

        def openRequiredTunnels():
            if tweetInfo:
                return self.openTunnels(webSocket)
            else:
                return self.openTunnel(userTunnelId, webSocket)

        if not openRequiredTunnels():
            logger.error('Failed to open initial tunnels')
            return False

        if tweetInfo:
            followerIdsFlag = False
            followeeIdsFlag = False
            analysisFlag = False
            isFollowersLoadedRequirement = None
            associatedWithTweetRequirement = True
            recursiveCacheFlag = False
            followerIdsProjection = None
            outputType = 1 # for csv.
        elif followerInfo:
            followerIdsFlag = True
            followeeIdsFlag = True
            analysisFlag = True
            isFollowersLoadedRequirement = True
            associatedWithTweetRequirement = None
            recursiveCacheFlag = True
            followerIdsProjection = None # this gives us all data on each follower.
            outputType = 2
        elif followerInfoShort:
            followerIdsFlag = True
            followeeIdsFlag = True
            followerIdsProjection = NoQueryProjection()
            analysisFlag = True
            isFollowersLoadedRequirement = True
            associatedWithTweetRequirement = None
            recursiveCacheFlag = True
            outputType = 3
        else:
            raise NotImplementedError()

        userProjection = UserProjection(True,
                                        True,
                                        None,
                                        True,
                                        followerIdsFlag,
                                        followerIdsProjection,
                                        followeeIdsFlag,
                                        UserProjection.Id(),
                                        True,
                                        False,
                                        False,
                                        True,
                                        True,
                                        False,
                                        False,
                                        False,
                                        False,
                                        analysisFlag)

        isFirstIteration = [True]

        twitterInstance = self.application.twitter_instances.getInstanceByInstanceKey(instanceId)
        if twitterInstance is None:
            return False

        twitterSession = twitterInstance.twitter_thread.twitter_session
        progressBarTotalId = 'progress-bar-total'
        progressBarCurrentBatchId = 'progress-bar-current-batch'

        signaler = EventSignaler(self.key, [webSocket])

        updateProgressBarFreq = Timer(400,True)

        def sendData(tunnelId, data):
            self.sendDataOnTunnel(webSocket, tunnelId, (unicode(data) + '\r\n'))

        def sendHeader():
            sendData(userTunnelId, getUserHeader(outputType))

            if tweetTunnelId is not None:
                sendData(tweetTunnelId, getTweetHeader())

        def doProgressBarChange(percentage, progressBarId):
            mainControl.executeJavascript('$("#%s").width("%.3f%%");' % (progressBarId, percentage))

        sendHeader()

        counter = [0]
        previousCounter = [0]
        def updateSocket(controls,
                         data,
                         bytesCounter=counter,
                         bytesPerBatch=bytesPerBatch,
                         previousCounter=previousCounter,
                         isFirstIteration=isFirstIteration):
            user = data['user_data']
            tweet = data['tweet_data']
            percentage = data['percentage']
            isFinished = data['isFinished']

            control = controls[self.key]
            assert isinstance(control, DocumentControl)

            def updateProgressBars():
                previousCounter[0] = thisCounter = bytesCounter[0]

                percentageCurrentBatch = float(thisCounter) / float(bytesPerBatch) * 100
                percentageTotal = percentage

                if percentageTotal >= 100:
                    percentageCurrentBatch = 100

                if isFirstIteration[0] and percentageCurrentBatch < percentageTotal:
                    percentageCurrentBatch = percentageTotal

                doProgressBarChange(percentageTotal, progressBarTotalId)
                doProgressBarChange(percentageCurrentBatch, progressBarCurrentBatchId)

            if previousCounter[0] != bytesCounter[0] and updateProgressBarFreq.ticked():
                updateProgressBars()

            dataToSendToClient = ''
            if user is not None:
                assert isinstance(user,User)
                dataToSendToClient = getUserRepresentation(user, outputType)
                sendData(userTunnelId, dataToSendToClient)

            if tweet is not None:
                assert isinstance(tweet, Tweet)
                dataToSendToClient = getTweetRepresentation(tweet)
                sendData(tweetTunnelId, dataToSendToClient)

            dataLength = len(dataToSendToClient)
            bytesCounter[0] += dataLength

            if bytesCounter[0] > bytesPerBatch or isFinished:
                updateProgressBars()
                isFirstIteration[0] = False

                bytesCounter[0] = 0
                mainControl.executeJavascript('onBatchEnd();')

                self.closeTunnels(webSocket)

                if not isFinished:
                    logger.debug('Waiting to receive next data provider')
                    if not openRequiredTunnels():
                        logger.warning('Failed to reinitialize tunnel slots')
                        webSocket.cleanup()
                        return

                    sendHeader()
                else:
                    mainControl.executeJavascript('onFinished();')

                    webSocket.cleanup()

        def onCacheIteration(iteration, total, isFinished, data, iteratorId):
            # Don't write followee data to output as it would duplicate alot of data.
            if iteratorId == 'followee':
                data = None

            running = not webSocket.is_cleaned_up
            if running:
                # We need to do this so that if the client closes the socket we are notified.
                webSocket.pingFreqLimited()

                percentage = getPercentage(iteration, total)
                dataId = None
                if data is not None:
                    dataId = data.id
                #logger.info('iteration %.2f of %.2f (%.1f%%) - it: %s, userId: %s' % (iteration, total, percentage,iteratorId,dataId))

                user = None
                tweet = None
                if data is None:
                    pass
                elif isinstance(data, User):
                    user = data
                elif isinstance(data, Tweet):
                    tweet = data
                    if tweet.has_user:
                        user = tweet.user
                else:
                    logger.error('Invalid data from cache, type: %s' % type(data))
                    return running

                signaler.signalEvent({SignalActions.SOCKET: updateSocket, 'percentage' : percentage, 'user_data' : user, 'tweet_data' : tweet, 'isFinished' : isFinished})
                gevent.sleep(0)
            else:
                logger.debug('Ending cache download prematurely')

            return running

        logger.debug('Starting to read data from cache...')

        # This makes sure the search is finite.
        epochNow = getEpochMs()
        if endEpoch is None or endEpoch > epochNow:
            endEpoch = epochNow

        if followerInfo or followerInfoShort:
            readUsersFromCache(twitterSession,
                               instanceId,
                               placeId = placeCacheId,
                               epochMsStartRange=startEpoch,
                               epochMsEndRange=endEpoch,
                               isFollowersLoadedRequirement=isFollowersLoadedRequirement,
                               associatedWithTweetRequirement=associatedWithTweetRequirement,
                               onIterationFunc=onCacheIteration,
                               recursive=recursiveCacheFlag,
                               userProjection=userProjection)
        else:
            readTweetsFromCache(twitterSession,
                                instanceId,
                                placeId = placeCacheId,
                                epochMsStartRange=startEpoch,
                                epochMsEndRange=endEpoch,
                                onIterationFunc=onCacheIteration,
                                retrieveUserData=True,
                                userProjection=userProjection)

        # We want to cleanup everything now since we are done.
        return False
Beispiel #12
0
    def __init__(self, name):
        super(DataSignalerStateless,self).__init__()

        self.event_signaler = EventSignaler(key=name)
Beispiel #13
0
    def __init__(self, name):
        super(DataSignalerStateless, self).__init__()

        self.event_signaler = EventSignaler(key=name)