def start_harvester(args, logger=None): keywords = None if args.filter_type == 'keyword': keywords = format_keywords(args.keywords) log_if_exists(logger, 'Creating harvester.', 'DEBUG') h = Harvester(logger=logger, keywords=keywords, args=args) log_if_exists(logger, 'Starting the stream...', 'INFO') h.stream()
def on_status(self, status): location_exists = False status_counter = 0 data = dict() log_if_exists(self.logger, 'Status Text: {}'.format(status.text), 'DEBUG') data['text'] = status.text data['user_location_str'] = status.user.location data['location_place_name'] = status.place.name data['location_place_full_name'] = status.place.full_name data['location_place_country'] = status.place.country data['location_place_country_code'] = status.place.country_code data['location_place_type'] = status.place.place_type data['user_time_zone'] = status.user.time_zone data['source'] = status.source data['source_url'] = status.source_url data['id_str'] = status.id_str data['user_profile_image_url'] = status.user.profile_image_url data['user_profile_background_image_url'] = \ status.user.profile_background_image_url data['user_screen_name'] = status.user.screen_name data['user_status_count'] = status.user.statuses_count data['user_followers_count'] = status.user.followers_count data['user_friends_count'] = status.user.friends_count data['user_favourites_count'] = status.user.favourites_count data['user_url'] = status.user.url data['user_description'] = status.user.description data['user_id_str'] = status.user.id_str data['language_code'] = status.lang data['created_at'] = \ status.created_at.strftime("%a, %d %b %Y %H:%M:%S +0000") if status.coordinates is not None \ and status.coordinates['type'] == 'Point': data['lon'] = status.coordinates['coordinates'][0] data['lat'] = status.coordinates['coordinates'][1] location_exists = True json_data = json.dumps(data) # self.f.write(json_data) # self.f.write('\n') # self.r.set(data['id_str'], json_data) if not self.location_only: if self.redis: self.r.publish(self.cli_args.redis_channel, pickle.dumps(json_data)) elif self.location_only and location_exists: if self.redis: self.r.publish(self.cli_args.redis_channel, pickle.dumps(json_data))
def __init__(self, args, logger=None, keywords=None): self.cli_args = args self.logger = logger self.keywords = keywords self.filter_type = self.cli_args.filter_type self.location = [float(l) for l in self.cli_args.location.split(',')] log_if_exists(self.logger, self.location, 'DEBUG') self.settings = Settings() self.auth = tweepy.OAuthHandler(self.settings.CONSUMER_KEY, self.settings.CONSUMER_SECRET) self.auth.set_access_token(self.settings.ACCESS_TOKEN, self.settings.ACCESS_TOKEN_SECRET) self.api = tweepy.API(self.auth) self.stream_listener = LokiStreamListener(self.api, self.cli_args, self.logger) self.streaming_api = tweepy.streaming.Stream(self.auth, self.stream_listener)
def on_timeout(self): log_if_exists(self.logger, 'Timeout occurred on stream.', 'WARNING') return True # Don't kill the stream
def on_error(self, status_code): log_if_exists(self.logger, 'Encountered error with status code: {}'.format( status_code), 'ERROR') return True # Don't kill the stream