Beispiel #1
0
    def shutdownInstance(self, removeFromTwitterInstancesParent=True):
        if self.is_shutdown:
            return

        if removeFromTwitterInstancesParent and self.parent_twitter_instances is not None:
            # Will call this method with removeFromTwitterInstancesParent set to False.
            self.parent_twitter_instances.removeTwitterInstanceByAuth(
                self.oauth)
        else:
            self.is_shutdown = True

            instanceKey = unicode(self.instance_key)
            logger.info('Shutdown instance called on instance %s' %
                        instanceKey)

            logger.info('Shutting down twitter thread on instance %s..' %
                        instanceKey)
            try:
                self.twitter_thread.stop()
            # might not have been initialized yet.
            except AttributeError:
                pass

            # Don't wait on thread, cannot find a way to terminate post request in requests API so have
            # to wait for next tweet or keep alive request to come from twitter before terminating.
            #self.twitter_thread.join()

            # Wait for current write to finish, avoid dropping collection and then when write completes
            # collection is made again.
            time.sleep(1.5)

            logger.info('Dropping twitter user data on instance %s..' %
                        instanceKey)
            getUserCollection(instanceKey).drop()

            logger.info('Dropping twitter tweet data on instance %s..' %
                        instanceKey)
            getTweetCollection(instanceKey).drop()

            logger.info(
                'Dropping twitter temporal influence data on instance %s..' %
                instanceKey)
            getTemporalInfluenceCollection(instanceKey).drop()

            if self.instance_setup_code is not None:
                logger.info(
                    'Returning instance setup code %s on instance %s..' %
                    (instanceKey, self.instance_setup_code))
                unconsumeCode(self.instance_setup_code)

            logger.info(
                'Removing instance from instance %s lifetime collection..' %
                instanceKey)
            removeInstance(instanceKey)

            logger.info('Instance %s cleaned up successfully' % instanceKey)
    def shutdownInstance(self, removeFromTwitterInstancesParent = True):
        if self.is_shutdown:
            return

        if removeFromTwitterInstancesParent and self.parent_twitter_instances is not None:
            # Will call this method with removeFromTwitterInstancesParent set to False.
            self.parent_twitter_instances.removeTwitterInstanceByAuth(self.oauth)
        else:
            self.is_shutdown = True

            instanceKey = unicode(self.instance_key)
            logger.info('Shutdown instance called on instance %s' % instanceKey)

            logger.info('Shutting down twitter thread on instance %s..' % instanceKey)
            try:
                self.twitter_thread.stop()
            # might not have been initialized yet.
            except AttributeError:
                pass

            # Don't wait on thread, cannot find a way to terminate post request in requests API so have
            # to wait for next tweet or keep alive request to come from twitter before terminating.
            #self.twitter_thread.join()

            # Wait for current write to finish, avoid dropping collection and then when write completes
            # collection is made again.
            time.sleep(1.5)

            logger.info('Dropping twitter user data on instance %s..' % instanceKey)
            getUserCollection(instanceKey).drop()

            logger.info('Dropping twitter tweet data on instance %s..' % instanceKey)
            getTweetCollection(instanceKey).drop()

            logger.info('Dropping twitter temporal influence data on instance %s..' % instanceKey)
            getTemporalInfluenceCollection(instanceKey).drop()

            if self.instance_setup_code is not None:
                logger.info('Returning instance setup code %s on instance %s..' % (instanceKey, self.instance_setup_code))
                unconsumeCode(self.instance_setup_code)

            logger.info('Removing instance from instance %s lifetime collection..' % instanceKey)
            removeInstance(instanceKey)

            logger.info('Instance %s cleaned up successfully' % instanceKey)
Beispiel #3
0
    def _run(self):
        for item in self.input_queue:
            user = getUser(item)
            assert user is not None

            if user.is_followers_loaded:
                continue

            if user.twitter_session is None:
                logger.error(
                    'User reached enrichment thread with no twitter session')
                continue

            instance = user.twitter_session.parent_instance
            instance_key = user.instance_key
            startTime = instance.constructed_at
            temporalCollection = getTemporalInfluenceCollection(instance_key)

            analysis_list = list()
            for item in self.user_analysis_list:
                analysisObj = item(user)
                if analysisObj is not None:
                    assert isinstance(analysisObj, UserAnalysis)
                    analysis_list.append(analysisObj)

            def idsIterationFunc(userId, iteration, totalIterations):
                if not self.twitter_session.is_session_active:
                    return False

                #logger.info('Retrieved ids of user %d/%d' % (iteration, totalIterations))
                self.output_queue.put(user)

                return True

            def addTemporalEntryForCurrentUser(follower):
                timeId = getTimeIdFromTimestamp(startTime,
                                                Configuration.TEMPORAL_STEP,
                                                getEpochMs())

                userCacheIds = user.location_geocode.all_geocode_results_cache_id
                followerGeocodeResults = follower.location_geocode.all_geocode_results

                for userCacheId in userCacheIds:
                    userPlaceId = GeocodeResultAbstract.getPlaceIdFromCacheId(
                        userCacheId)
                    userProviderId = GeocodeResultAbstract.getProviderIdFromCacheId(
                        userCacheId)

                    for followerGeocodeResult in followerGeocodeResults:
                        followerPlaceId = followerGeocodeResult.place_id
                        followerProviderId = followerGeocodeResult.provider_id
                        followerPlaceType = followerGeocodeResult.place_type

                        instance.addTemporalEntry(temporalCollection, timeId,
                                                  userProviderId, userPlaceId,
                                                  followerProviderId,
                                                  followerPlaceId,
                                                  followerPlaceType)

            def iterationFunc(userId, iteration, totalIterations,
                              followersFromLastIteration):
                if followersFromLastIteration is not None:
                    for follower in followersFromLastIteration:
                        if not self.twitter_session.is_session_active:
                            return False

                        assert isinstance(follower, User)
                        follower.is_followee = True

                        follower.geocodeLocationFromCache(
                            self.geocode_user_config, False)
                        self.output_queue.put(follower)

                        # Follower is now ready to be analysed.
                        for item in analysis_list:
                            item.onFollower(follower)

                        self.num_followers_processed += 1

                        if user.is_geocoded and follower.is_geocoded:
                            self.num_followers_geocoded += 1
                            addTemporalEntryForCurrentUser(follower)

                self.output_queue.put(user)
                return True

            # Retrieve followers.
            #logger.info('Attempting to retrieve followers for user: %s' % user)
            user.getFollowerIds(idsIterationFunc)
            result = user.getFollowers(iterationFunc)

            for item in analysis_list:
                user.addAnalyser(item)

            user.queued_for_follower_enrichment = False

            if result is None:
                logger.error(
                    'Failed to retrieve followers for user: %s - explanation: %s, %s, %s'
                    % (user.last_follower_enrichment_error,
                       user.is_followers_loaded, user.is_follower_ids_loaded,
                       user))
            #else:
            #logger.info('Retrieved %d followers for user %s' % (len(result), user))

            # Push update.
            self.num_followees_processed += 1
            self.output_queue.put(user)

            if self.log_performance_timer.ticked():
                numFolloweesProcessed = self.num_followees_processed
                numFollowersProcessed = self.num_followers_processed
                numFollowersGeocoded = self.num_followers_geocoded
                self.num_followees_processed = 0
                self.num_followers_processed = 0
                self.num_followers_geocoded = 0

                logger.info(
                    'Num followees processed %d, num followers processed %d, num followers geocoded %d'
                    % (numFolloweesProcessed, numFollowersProcessed,
                       numFollowersGeocoded))

        # Prevent this thread from being restarted.
        self.stop()
Beispiel #4
0
        def func(templateArguments, instance):
            twitterInstance = self.application.twitter_instances.getInstanceByInstanceKey(
                instance)
            if twitterInstance is None:
                return dict()

            baseEpoch = twitterInstance.constructed_at

            start_epoch = parseInteger(request.GET.start_epoch, default=None)
            end_epoch = parseInteger(request.GET.end_epoch, default=None)
            source_place_id = parseInteger(request.GET.source_place_id)
            source_provider_id = parseInteger(request.GET.source_provider_id)

            if source_place_id is None:
                logger.error(
                    'Invalid place ID specified while providing influence data: %s'
                    % unicode(source_place_id))
                return dict()

            source_cache_id = GeocodeResultAbstract.buildCacheId(
                source_provider_id, source_place_id)

            temporalCollection = getTemporalInfluenceCollection(instance)

            if start_epoch is not None:
                start_time_id = getTimeIdFromTimestamp(
                    baseEpoch, Configuration.TEMPORAL_STEP, start_epoch)
            else:
                start_time_id = None

            if end_epoch is not None:
                end_time_id = getTimeIdFromTimestamp(
                    baseEpoch, Configuration.TEMPORAL_STEP, end_epoch)
            else:
                end_time_id = None

            timerMs = getEpochMs()
            cacheData = getTemporalRange(temporalCollection,
                                         start_time_id,
                                         end_time_id,
                                         source_cache_id,
                                         preciseFromBack=True,
                                         preciseFromFront=True)
            logger.info('Took %dms to read temporal range data' %
                        (getEpochMs() - timerMs))

            timerMs = getEpochMs()

            geocodeByPlaceType = dict()
            totalsByPlaceType = dict()

            if cacheData is not None:
                for providerId, providerIdData in cacheData.iteritems():
                    providerId = int(providerId)

                    for destination, count in providerIdData.iteritems():
                        split = destination.split('_')
                        placeType = int(split[0])
                        placeId = int(split[1])

                        record = [placeId, providerId, None, None, count, None]

                        geocodeByPlaceType.setdefault(placeType,
                                                      list()).append(record)

                # Process only the records we are going to display.
                for placeType, records in geocodeByPlaceType.iteritems():
                    aux = sorted(records, key=lambda x: x[4], reverse=True)
                    aux = aux[:Configuration.
                              DISPLAY_MAX_NUM_INFLUENCE_RECORDS_PER_PLACE_TYPE]
                    geocodeByPlaceType[placeType] = aux

                    for record in aux:
                        cacheId = GeocodeResultAbstract.buildCacheId(
                            record[1], record[0])
                        geocode = geocodeFromCacheById(cacheId)

                        record[2] = geocode.display_name
                        record[3] = geocode.coordinate
                        count = record[4]
                        record[5] = geocode.bounding_box

                        totalsByPlaceType[placeType] = totalsByPlaceType.get(
                            placeType, 0) + count

            def getResultPart(placeType):
                return {
                    'geocode_list': geocodeByPlaceType.get(placeType, list()),
                    'total': totalsByPlaceType.get(placeType, 0)
                }

            resultData = dict()
            resultData['city'] = getResultPart(
                GeocodeResultAbstract.PlaceTypes.CITY)
            resultData['country'] = getResultPart(
                GeocodeResultAbstract.PlaceTypes.COUNTRY)
            resultData['continent'] = getResultPart(
                GeocodeResultAbstract.PlaceTypes.CONTINENT)

            logger.info('Took %dms to build temporal range result data' %
                        (getEpochMs() - timerMs))

            return {'json': resultData}
Beispiel #5
0
        def func(templateArguments, instance):
            twitterInstance = self.application.twitter_instances.getInstanceByInstanceKey(instance)
            if twitterInstance is None:
                return dict()

            baseEpoch = twitterInstance.constructed_at

            start_epoch = parseInteger(request.GET.start_epoch, default=None)
            end_epoch = parseInteger(request.GET.end_epoch, default=None)
            source_place_id = parseInteger(request.GET.source_place_id)
            source_provider_id = parseInteger(request.GET.source_provider_id)

            if source_place_id is None:
                logger.error('Invalid place ID specified while providing influence data: %s' % unicode(source_place_id))
                return dict()

            source_cache_id = GeocodeResultAbstract.buildCacheId(source_provider_id, source_place_id)

            temporalCollection = getTemporalInfluenceCollection(instance)

            if start_epoch is not None:
                start_time_id = getTimeIdFromTimestamp(baseEpoch, Configuration.TEMPORAL_STEP, start_epoch)
            else:
                start_time_id = None

            if end_epoch is not None:
                end_time_id = getTimeIdFromTimestamp(baseEpoch, Configuration.TEMPORAL_STEP, end_epoch)
            else:
                end_time_id = None

            timerMs = getEpochMs()
            cacheData = getTemporalRange(temporalCollection, start_time_id, end_time_id, source_cache_id, preciseFromBack=True, preciseFromFront=True)
            logger.info('Took %dms to read temporal range data' % (getEpochMs() - timerMs))

            timerMs = getEpochMs()

            geocodeByPlaceType = dict()
            totalsByPlaceType = dict()

            if cacheData is not None:
                for providerId, providerIdData in cacheData.iteritems():
                    providerId = int(providerId)

                    for destination, count in providerIdData.iteritems():
                        split = destination.split('_')
                        placeType = int(split[0])
                        placeId = int(split[1])

                        record = [placeId,
                                  providerId,
                                  None,
                                  None,
                                  count,
                                  None]

                        geocodeByPlaceType.setdefault(placeType,list()).append(record)

                # Process only the records we are going to display.
                for placeType, records in geocodeByPlaceType.iteritems():
                    aux = sorted(records, key=lambda x: x[4], reverse=True)
                    aux = aux[:Configuration.DISPLAY_MAX_NUM_INFLUENCE_RECORDS_PER_PLACE_TYPE]
                    geocodeByPlaceType[placeType] = aux

                    for record in aux:
                        cacheId = GeocodeResultAbstract.buildCacheId(record[1], record[0])
                        geocode = geocodeFromCacheById(cacheId)

                        record[2] = geocode.display_name
                        record[3] = geocode.coordinate
                        count = record[4]
                        record[5] = geocode.bounding_box

                        totalsByPlaceType[placeType] = totalsByPlaceType.get(placeType,0) + count

            def getResultPart(placeType):
                return {'geocode_list' : geocodeByPlaceType.get(placeType,list()), 'total' : totalsByPlaceType.get(placeType, 0)}

            resultData = dict()
            resultData['city'] =        getResultPart(GeocodeResultAbstract.PlaceTypes.CITY)
            resultData['country'] =     getResultPart(GeocodeResultAbstract.PlaceTypes.COUNTRY)
            resultData['continent'] =   getResultPart(GeocodeResultAbstract.PlaceTypes.CONTINENT)

            logger.info('Took %dms to build temporal range result data' % (getEpochMs() - timerMs))

            return {'json' : resultData}