Example #1
0
def start_harvester(args, logger=None):
    keywords = None
    if args.filter_type == 'keyword':
        keywords = format_keywords(args.keywords)
    log_if_exists(logger, 'Creating harvester.', 'DEBUG')
    h = Harvester(logger=logger, keywords=keywords, args=args)
    log_if_exists(logger, 'Starting the stream...', 'INFO')
    h.stream()
    def on_status(self, status):

        location_exists = False
        status_counter = 0

        data = dict()

        log_if_exists(self.logger, 'Status Text: {}'.format(status.text),
                      'DEBUG')

        data['text'] = status.text
        data['user_location_str'] = status.user.location
        data['location_place_name'] = status.place.name
        data['location_place_full_name'] = status.place.full_name
        data['location_place_country'] = status.place.country
        data['location_place_country_code'] = status.place.country_code
        data['location_place_type'] = status.place.place_type
        data['user_time_zone'] = status.user.time_zone
        data['source'] = status.source
        data['source_url'] = status.source_url
        data['id_str'] = status.id_str
        data['user_profile_image_url'] = status.user.profile_image_url
        data['user_profile_background_image_url'] = \
            status.user.profile_background_image_url
        data['user_screen_name'] = status.user.screen_name
        data['user_status_count'] = status.user.statuses_count
        data['user_followers_count'] = status.user.followers_count
        data['user_friends_count'] = status.user.friends_count
        data['user_favourites_count'] = status.user.favourites_count
        data['user_url'] = status.user.url
        data['user_description'] = status.user.description
        data['user_id_str'] = status.user.id_str
        data['language_code'] = status.lang
        data['created_at'] = \
            status.created_at.strftime("%a, %d %b %Y %H:%M:%S +0000")

        if status.coordinates is not None \
                and status.coordinates['type'] == 'Point':

            data['lon'] = status.coordinates['coordinates'][0]
            data['lat'] = status.coordinates['coordinates'][1]

            location_exists = True

        json_data = json.dumps(data)
        # self.f.write(json_data)
        # self.f.write('\n')
        # self.r.set(data['id_str'], json_data)

        if not self.location_only:
            if self.redis:
                self.r.publish(self.cli_args.redis_channel,
                               pickle.dumps(json_data))
        elif self.location_only and location_exists:
            if self.redis:
                self.r.publish(self.cli_args.redis_channel,
                               pickle.dumps(json_data))
Example #3
0
    def __init__(self, args, logger=None, keywords=None):
        self.cli_args = args
        self.logger = logger
        self.keywords = keywords

        self.filter_type = self.cli_args.filter_type
        self.location = [float(l) for l in self.cli_args.location.split(',')]

        log_if_exists(self.logger, self.location, 'DEBUG')

        self.settings = Settings()
        self.auth = tweepy.OAuthHandler(self.settings.CONSUMER_KEY,
                                        self.settings.CONSUMER_SECRET)
        self.auth.set_access_token(self.settings.ACCESS_TOKEN,
                                   self.settings.ACCESS_TOKEN_SECRET)
        self.api = tweepy.API(self.auth)
        self.stream_listener = LokiStreamListener(self.api, self.cli_args, self.logger)
        self.streaming_api = tweepy.streaming.Stream(self.auth,
                                                     self.stream_listener)
 def on_timeout(self):
     log_if_exists(self.logger,
                   'Timeout occurred on stream.',
                   'WARNING')
     return True  # Don't kill the stream
 def on_error(self, status_code):
     log_if_exists(self.logger,
                   'Encountered error with status code: {}'.format(
                       status_code),
                   'ERROR')
     return True  # Don't kill the stream