def shutdownInstance(self, removeFromTwitterInstancesParent=True): if self.is_shutdown: return if removeFromTwitterInstancesParent and self.parent_twitter_instances is not None: # Will call this method with removeFromTwitterInstancesParent set to False. self.parent_twitter_instances.removeTwitterInstanceByAuth( self.oauth) else: self.is_shutdown = True instanceKey = unicode(self.instance_key) logger.info('Shutdown instance called on instance %s' % instanceKey) logger.info('Shutting down twitter thread on instance %s..' % instanceKey) try: self.twitter_thread.stop() # might not have been initialized yet. except AttributeError: pass # Don't wait on thread, cannot find a way to terminate post request in requests API so have # to wait for next tweet or keep alive request to come from twitter before terminating. #self.twitter_thread.join() # Wait for current write to finish, avoid dropping collection and then when write completes # collection is made again. time.sleep(1.5) logger.info('Dropping twitter user data on instance %s..' % instanceKey) getUserCollection(instanceKey).drop() logger.info('Dropping twitter tweet data on instance %s..' % instanceKey) getTweetCollection(instanceKey).drop() logger.info( 'Dropping twitter temporal influence data on instance %s..' % instanceKey) getTemporalInfluenceCollection(instanceKey).drop() if self.instance_setup_code is not None: logger.info( 'Returning instance setup code %s on instance %s..' % (instanceKey, self.instance_setup_code)) unconsumeCode(self.instance_setup_code) logger.info( 'Removing instance from instance %s lifetime collection..' % instanceKey) removeInstance(instanceKey) logger.info('Instance %s cleaned up successfully' % instanceKey)
def shutdownInstance(self, removeFromTwitterInstancesParent = True): if self.is_shutdown: return if removeFromTwitterInstancesParent and self.parent_twitter_instances is not None: # Will call this method with removeFromTwitterInstancesParent set to False. self.parent_twitter_instances.removeTwitterInstanceByAuth(self.oauth) else: self.is_shutdown = True instanceKey = unicode(self.instance_key) logger.info('Shutdown instance called on instance %s' % instanceKey) logger.info('Shutting down twitter thread on instance %s..' % instanceKey) try: self.twitter_thread.stop() # might not have been initialized yet. except AttributeError: pass # Don't wait on thread, cannot find a way to terminate post request in requests API so have # to wait for next tweet or keep alive request to come from twitter before terminating. #self.twitter_thread.join() # Wait for current write to finish, avoid dropping collection and then when write completes # collection is made again. time.sleep(1.5) logger.info('Dropping twitter user data on instance %s..' % instanceKey) getUserCollection(instanceKey).drop() logger.info('Dropping twitter tweet data on instance %s..' % instanceKey) getTweetCollection(instanceKey).drop() logger.info('Dropping twitter temporal influence data on instance %s..' % instanceKey) getTemporalInfluenceCollection(instanceKey).drop() if self.instance_setup_code is not None: logger.info('Returning instance setup code %s on instance %s..' % (instanceKey, self.instance_setup_code)) unconsumeCode(self.instance_setup_code) logger.info('Removing instance from instance %s lifetime collection..' % instanceKey) removeInstance(instanceKey) logger.info('Instance %s cleaned up successfully' % instanceKey)
def _run(self): for item in self.input_queue: user = getUser(item) assert user is not None if user.is_followers_loaded: continue if user.twitter_session is None: logger.error( 'User reached enrichment thread with no twitter session') continue instance = user.twitter_session.parent_instance instance_key = user.instance_key startTime = instance.constructed_at temporalCollection = getTemporalInfluenceCollection(instance_key) analysis_list = list() for item in self.user_analysis_list: analysisObj = item(user) if analysisObj is not None: assert isinstance(analysisObj, UserAnalysis) analysis_list.append(analysisObj) def idsIterationFunc(userId, iteration, totalIterations): if not self.twitter_session.is_session_active: return False #logger.info('Retrieved ids of user %d/%d' % (iteration, totalIterations)) self.output_queue.put(user) return True def addTemporalEntryForCurrentUser(follower): timeId = getTimeIdFromTimestamp(startTime, Configuration.TEMPORAL_STEP, getEpochMs()) userCacheIds = user.location_geocode.all_geocode_results_cache_id followerGeocodeResults = follower.location_geocode.all_geocode_results for userCacheId in userCacheIds: userPlaceId = GeocodeResultAbstract.getPlaceIdFromCacheId( userCacheId) userProviderId = GeocodeResultAbstract.getProviderIdFromCacheId( userCacheId) for followerGeocodeResult in followerGeocodeResults: followerPlaceId = followerGeocodeResult.place_id followerProviderId = followerGeocodeResult.provider_id followerPlaceType = followerGeocodeResult.place_type instance.addTemporalEntry(temporalCollection, timeId, userProviderId, userPlaceId, followerProviderId, followerPlaceId, followerPlaceType) def iterationFunc(userId, iteration, totalIterations, followersFromLastIteration): if followersFromLastIteration is not None: for follower in followersFromLastIteration: if not self.twitter_session.is_session_active: return False assert isinstance(follower, User) follower.is_followee = True follower.geocodeLocationFromCache( self.geocode_user_config, False) self.output_queue.put(follower) # Follower is now ready to be analysed. for item in analysis_list: item.onFollower(follower) self.num_followers_processed += 1 if user.is_geocoded and follower.is_geocoded: self.num_followers_geocoded += 1 addTemporalEntryForCurrentUser(follower) self.output_queue.put(user) return True # Retrieve followers. #logger.info('Attempting to retrieve followers for user: %s' % user) user.getFollowerIds(idsIterationFunc) result = user.getFollowers(iterationFunc) for item in analysis_list: user.addAnalyser(item) user.queued_for_follower_enrichment = False if result is None: logger.error( 'Failed to retrieve followers for user: %s - explanation: %s, %s, %s' % (user.last_follower_enrichment_error, user.is_followers_loaded, user.is_follower_ids_loaded, user)) #else: #logger.info('Retrieved %d followers for user %s' % (len(result), user)) # Push update. self.num_followees_processed += 1 self.output_queue.put(user) if self.log_performance_timer.ticked(): numFolloweesProcessed = self.num_followees_processed numFollowersProcessed = self.num_followers_processed numFollowersGeocoded = self.num_followers_geocoded self.num_followees_processed = 0 self.num_followers_processed = 0 self.num_followers_geocoded = 0 logger.info( 'Num followees processed %d, num followers processed %d, num followers geocoded %d' % (numFolloweesProcessed, numFollowersProcessed, numFollowersGeocoded)) # Prevent this thread from being restarted. self.stop()
def func(templateArguments, instance): twitterInstance = self.application.twitter_instances.getInstanceByInstanceKey( instance) if twitterInstance is None: return dict() baseEpoch = twitterInstance.constructed_at start_epoch = parseInteger(request.GET.start_epoch, default=None) end_epoch = parseInteger(request.GET.end_epoch, default=None) source_place_id = parseInteger(request.GET.source_place_id) source_provider_id = parseInteger(request.GET.source_provider_id) if source_place_id is None: logger.error( 'Invalid place ID specified while providing influence data: %s' % unicode(source_place_id)) return dict() source_cache_id = GeocodeResultAbstract.buildCacheId( source_provider_id, source_place_id) temporalCollection = getTemporalInfluenceCollection(instance) if start_epoch is not None: start_time_id = getTimeIdFromTimestamp( baseEpoch, Configuration.TEMPORAL_STEP, start_epoch) else: start_time_id = None if end_epoch is not None: end_time_id = getTimeIdFromTimestamp( baseEpoch, Configuration.TEMPORAL_STEP, end_epoch) else: end_time_id = None timerMs = getEpochMs() cacheData = getTemporalRange(temporalCollection, start_time_id, end_time_id, source_cache_id, preciseFromBack=True, preciseFromFront=True) logger.info('Took %dms to read temporal range data' % (getEpochMs() - timerMs)) timerMs = getEpochMs() geocodeByPlaceType = dict() totalsByPlaceType = dict() if cacheData is not None: for providerId, providerIdData in cacheData.iteritems(): providerId = int(providerId) for destination, count in providerIdData.iteritems(): split = destination.split('_') placeType = int(split[0]) placeId = int(split[1]) record = [placeId, providerId, None, None, count, None] geocodeByPlaceType.setdefault(placeType, list()).append(record) # Process only the records we are going to display. for placeType, records in geocodeByPlaceType.iteritems(): aux = sorted(records, key=lambda x: x[4], reverse=True) aux = aux[:Configuration. DISPLAY_MAX_NUM_INFLUENCE_RECORDS_PER_PLACE_TYPE] geocodeByPlaceType[placeType] = aux for record in aux: cacheId = GeocodeResultAbstract.buildCacheId( record[1], record[0]) geocode = geocodeFromCacheById(cacheId) record[2] = geocode.display_name record[3] = geocode.coordinate count = record[4] record[5] = geocode.bounding_box totalsByPlaceType[placeType] = totalsByPlaceType.get( placeType, 0) + count def getResultPart(placeType): return { 'geocode_list': geocodeByPlaceType.get(placeType, list()), 'total': totalsByPlaceType.get(placeType, 0) } resultData = dict() resultData['city'] = getResultPart( GeocodeResultAbstract.PlaceTypes.CITY) resultData['country'] = getResultPart( GeocodeResultAbstract.PlaceTypes.COUNTRY) resultData['continent'] = getResultPart( GeocodeResultAbstract.PlaceTypes.CONTINENT) logger.info('Took %dms to build temporal range result data' % (getEpochMs() - timerMs)) return {'json': resultData}
def func(templateArguments, instance): twitterInstance = self.application.twitter_instances.getInstanceByInstanceKey(instance) if twitterInstance is None: return dict() baseEpoch = twitterInstance.constructed_at start_epoch = parseInteger(request.GET.start_epoch, default=None) end_epoch = parseInteger(request.GET.end_epoch, default=None) source_place_id = parseInteger(request.GET.source_place_id) source_provider_id = parseInteger(request.GET.source_provider_id) if source_place_id is None: logger.error('Invalid place ID specified while providing influence data: %s' % unicode(source_place_id)) return dict() source_cache_id = GeocodeResultAbstract.buildCacheId(source_provider_id, source_place_id) temporalCollection = getTemporalInfluenceCollection(instance) if start_epoch is not None: start_time_id = getTimeIdFromTimestamp(baseEpoch, Configuration.TEMPORAL_STEP, start_epoch) else: start_time_id = None if end_epoch is not None: end_time_id = getTimeIdFromTimestamp(baseEpoch, Configuration.TEMPORAL_STEP, end_epoch) else: end_time_id = None timerMs = getEpochMs() cacheData = getTemporalRange(temporalCollection, start_time_id, end_time_id, source_cache_id, preciseFromBack=True, preciseFromFront=True) logger.info('Took %dms to read temporal range data' % (getEpochMs() - timerMs)) timerMs = getEpochMs() geocodeByPlaceType = dict() totalsByPlaceType = dict() if cacheData is not None: for providerId, providerIdData in cacheData.iteritems(): providerId = int(providerId) for destination, count in providerIdData.iteritems(): split = destination.split('_') placeType = int(split[0]) placeId = int(split[1]) record = [placeId, providerId, None, None, count, None] geocodeByPlaceType.setdefault(placeType,list()).append(record) # Process only the records we are going to display. for placeType, records in geocodeByPlaceType.iteritems(): aux = sorted(records, key=lambda x: x[4], reverse=True) aux = aux[:Configuration.DISPLAY_MAX_NUM_INFLUENCE_RECORDS_PER_PLACE_TYPE] geocodeByPlaceType[placeType] = aux for record in aux: cacheId = GeocodeResultAbstract.buildCacheId(record[1], record[0]) geocode = geocodeFromCacheById(cacheId) record[2] = geocode.display_name record[3] = geocode.coordinate count = record[4] record[5] = geocode.bounding_box totalsByPlaceType[placeType] = totalsByPlaceType.get(placeType,0) + count def getResultPart(placeType): return {'geocode_list' : geocodeByPlaceType.get(placeType,list()), 'total' : totalsByPlaceType.get(placeType, 0)} resultData = dict() resultData['city'] = getResultPart(GeocodeResultAbstract.PlaceTypes.CITY) resultData['country'] = getResultPart(GeocodeResultAbstract.PlaceTypes.COUNTRY) resultData['continent'] = getResultPart(GeocodeResultAbstract.PlaceTypes.CONTINENT) logger.info('Took %dms to build temporal range result data' % (getEpochMs() - timerMs)) return {'json' : resultData}