예제 #1
0
    def twitter_stream(cls, session, args, max_retries=5, retry_stall=60):
        """Twitter streaming process."""
        sites = get_site_tuples(session)
        keywords = get_track_keywords(sites)
        platform_id = get_platform_id(session, name=N_PLATFORM_TWITTER)
        session.close()
        w_size = cls.conf['window_size']
        c = cls.conf['sns']['twitter']['app_credentials']
        snut = cls.conf['sns']['twitter']['save_none_url_tweet']

        retries = 0
        q = Queue.Queue()
        consumer = QueueParser(q,
                               platform_id,
                               w_size,
                               save_none_url_tweet=snut)
        qhandler = QueueHandler(q)
        consumer.start()
        stall_time = retry_stall

        while True:
            try:
                streamer = TwitterStream(c, [qhandler], dict(track=keywords),
                                         w_size)
                streamer.stream()
            except Exception as e:
                logger.exception(e)
                time.sleep(stall_time)
                if streamer._counter > 100:
                    # reset retry counter and stall time
                    retries = 0
                    stall_time = retry_stall
                else:
                    # increase retry counter and stall time
                    retries += 1
                    stall_time = 2 * stall_time
                    if retries >= max_retries:
                        logger.error('Reached max retries!')
                        break
            except (KeyboardInterrupt, SystemExit):
                break
        consumer.stop()
        s = args['--mail-server']
        f = args['--mail-from']
        t = args['--mail-to']

        if s and f and t:
            logger.info('server %r, from %r, to %r', s, f, t)
            try:
                server = smtplib.SMTP(s)
                msg = 'Twitter streaming is stopped!'
                server.sendmail(f, t, msg)
            except Exception as e:
                logger.error(e)
        logger.info('Exit')
예제 #2
0
파일: sns.py 프로젝트: zjcom/hoaxy-backend
    def twitter_stream(cls, session, args):
        """Twitter streaming process."""
        # create a dump folder
        if args['--dump-dir'] is not None:
            dump_dir = os.path.expanduser(args['--dump-dir'])
            dump_dir = os.path.abspath(dump_dir)
        else:
            dump_dir = os.path.join(HOAXY_HOME, 'dumps')
        if not os.path.exists(dump_dir):
            try:
                org_umask = os.umask(0)
                os.makedirs(dump_dir, 0o755)
            finally:
                os.umask(org_umask)
        sites = get_site_tuples(session)
        keywords = get_track_keywords(sites)
        session.close()
        window_size = cls.conf['window_size']
        credentials = cls.conf['sns']['twitter']['app_credentials']
        save_none_url_tweet = cls.conf['sns']['twitter']['save_none_url_tweet']
        tw_queue = Queue()
        consumer = QueueHandler(
            tw_queue,
            bucket_size=window_size,
            dump_dir=dump_dir,
            parser_kwargs=dict(save_none_url_tweet=save_none_url_tweet))
        consumer.start()
        logger.debug('Consumer thread started.')

        # KeyboardInterrupt signal handler
        def clean_up(signal_n, c_frame):
            raise KeyboardInterrupt

        signal.signal(signal.SIGINT, clean_up)
        # signal.signal(signal.SIGINT, signal.SIG_DFL)

        try:
            streamer = TwitterStream(credentials=credentials,
                                     handlers=[consumer],
                                     params=dict(track=keywords),
                                     window_size=window_size)
            streamer.stream()
            logger.info('Twitter steaming exits.')
        except (KeyboardInterrupt, SystemExit):
            logger.info('KeyboardInterruption recevied, cleaning up ...')
            consumer.stop()
            logger.info('Clean up done, exit!')
예제 #3
0
 def open_spider(self, spider):
     """Get sites when opening the spider."""
     self.site_tuples = get_site_tuples(spider.session)