def twitter_stream(cls, session, args, max_retries=5, retry_stall=60): """Twitter streaming process.""" sites = get_site_tuples(session) keywords = get_track_keywords(sites) platform_id = get_platform_id(session, name=N_PLATFORM_TWITTER) session.close() w_size = cls.conf['window_size'] c = cls.conf['sns']['twitter']['app_credentials'] snut = cls.conf['sns']['twitter']['save_none_url_tweet'] retries = 0 q = Queue.Queue() consumer = QueueParser(q, platform_id, w_size, save_none_url_tweet=snut) qhandler = QueueHandler(q) consumer.start() stall_time = retry_stall while True: try: streamer = TwitterStream(c, [qhandler], dict(track=keywords), w_size) streamer.stream() except Exception as e: logger.exception(e) time.sleep(stall_time) if streamer._counter > 100: # reset retry counter and stall time retries = 0 stall_time = retry_stall else: # increase retry counter and stall time retries += 1 stall_time = 2 * stall_time if retries >= max_retries: logger.error('Reached max retries!') break except (KeyboardInterrupt, SystemExit): break consumer.stop() s = args['--mail-server'] f = args['--mail-from'] t = args['--mail-to'] if s and f and t: logger.info('server %r, from %r, to %r', s, f, t) try: server = smtplib.SMTP(s) msg = 'Twitter streaming is stopped!' server.sendmail(f, t, msg) except Exception as e: logger.error(e) logger.info('Exit')
def twitter_stream(cls, session, args): """Twitter streaming process.""" # create a dump folder if args['--dump-dir'] is not None: dump_dir = os.path.expanduser(args['--dump-dir']) dump_dir = os.path.abspath(dump_dir) else: dump_dir = os.path.join(HOAXY_HOME, 'dumps') if not os.path.exists(dump_dir): try: org_umask = os.umask(0) os.makedirs(dump_dir, 0o755) finally: os.umask(org_umask) sites = get_site_tuples(session) keywords = get_track_keywords(sites) session.close() window_size = cls.conf['window_size'] credentials = cls.conf['sns']['twitter']['app_credentials'] save_none_url_tweet = cls.conf['sns']['twitter']['save_none_url_tweet'] tw_queue = Queue() consumer = QueueHandler( tw_queue, bucket_size=window_size, dump_dir=dump_dir, parser_kwargs=dict(save_none_url_tweet=save_none_url_tweet)) consumer.start() logger.debug('Consumer thread started.') # KeyboardInterrupt signal handler def clean_up(signal_n, c_frame): raise KeyboardInterrupt signal.signal(signal.SIGINT, clean_up) # signal.signal(signal.SIGINT, signal.SIG_DFL) try: streamer = TwitterStream(credentials=credentials, handlers=[consumer], params=dict(track=keywords), window_size=window_size) streamer.stream() logger.info('Twitter steaming exits.') except (KeyboardInterrupt, SystemExit): logger.info('KeyboardInterruption recevied, cleaning up ...') consumer.stop() logger.info('Clean up done, exit!')
def open_spider(self, spider): """Get sites when opening the spider.""" self.site_tuples = get_site_tuples(spider.session)