Esempio n. 1
0
    def _monitor(self):
        """Monitor the queue for tweet, and use function parse to parse it.

        This method runs on a separate, internal thread.
        The thread will terminate if it sees a sentinel object in the queue.
        """
        # scoped_session
        # Session itself is not thread safe, use scoped_session
        # each thread use only one scoped_session object
        # We never delete anything from database, and we rely much
        # on the `id` of  existed object to build relaship
        # set expire_on_commit=False
        # to avoid re-fetch of these existed objects
        session = Session(expire_on_commit=False)
        parser = Parser(session, self.platform_id, **self.p_kwargs)
        q = self.queue
        has_task_done = hasattr(q, 'task_done')
        while not self._stop.isSet():
            # Server down, hold on
            if self._hold_on is True:
                logger.info('qsize is %s', q.qsize())
                time.sleep(self._hold_on_unit)
                self._hold_on_counter += self._hold_on_unit
                if self._hold_on_counter >= self._hold_on_max:
                    return
                logger.info('Hold on, keep tring to connect SQL server...')
                logger.info('Elapsed %s seconds, since recent server down',
                            self._hold_on_counter)
                if self._test_connection(session):
                    self._hold_on = False
                    self._hold_on_counter = 0
                continue
            try:
                jd = self.dequeue(True)
                if jd is self._sentinel:
                    break
                self._counter += 1
                if self._counter % self._window_size == 0:
                    logger.info('qsize is %s', q.qsize())
                parser.parse(jd)
                if has_task_done:
                    q.task_done()
            except Queue.Empty:
                break
            except Exception as e:
                logger.error('Exception %s when parsing %s', e, jd)
                if isinstance(e, SQLAlchemyError):
                    session.rollback()
                    if isinstance(e, OperationalError):
                        # if 'could not connect to server' in str(e):
                        logger.error('Hold on until SQL service back! %s', e)
                        self._hold_on = True
        # There might still be records in the queue.
        while True:
            try:
                jd = self.dequeue(False)
                if jd is self._sentinel:
                    break
                parser.parse(jd)
                if has_task_done:
                    q.task_done()
            except Queue.Empty:
                break
            except Exception as e:
                logger.error('Exception %s when parsing %s', e, jd)
                if isinstance(e, SQLAlchemyError):
                    session.rollback()
                    if isinstance(e, OperationalError):
                        return
Esempio n. 2
0
    def _monitor(self):
        """Monitor the queue for tweet incoming and then parse and save it into
        the database.

        This method runs on a separate, internal thread.
        The thread will terminate if it sees a sentinel object in the queue.
        """
        # scoped_session
        # Session itself is not thread safe, we use scoped_session.
        # Each thread uses only one scoped_session object
        # We never delete anything from database in this function.
        # set expire_on_commit=False to avoid re-fetch of these existed objects
        session = Session(expire_on_commit=False)
        parser = Parser(**self.parser_kwargs)
        platform_id = get_platform_id(session, name=N_PLATFORM_TWITTER)
        has_task_done = hasattr(self.queue, 'task_done')
        while not self._stop.isSet():
            if self.is_connection_failed is True:
                self.on_db_server_down(session)
                continue
            # normal bulk insert process
            try:
                # fill the bucket
                for i in range(self.bucket_size):
                    # dequeue with block=True
                    jd = self.queue.get(True)
                    if has_task_done is True:
                        self.queue.task_done()
                    if jd is not self._sentinel:
                        self.global_counter += 1
                        self.bucket.append(jd)
                    else:
                        break
                # consume this bucket
                self.consume_this_bucket(parser, session, platform_id)
                self.bucket = []
            # database is shutdown unexpectedly
            except OperationalError as err:
                session.rollback()
                if 'server closed the connection unexpectedly' in repr(
                        err) or 'could not connect to server' in repr(err):
                    logger.critical('Causion: database server is down!')
                    self.is_connection_failed = True
                else:
                    logger.error(err)
                    self.on_db_bulk_save_error()
            except SQLAlchemyError as err:
                session.rollback()
                logger.exception(err)
                self.on_db_bulk_save_error()
            except BaseException as err:
                # unexpected exception, logging (will exit)
                logger.exception(err)
                raise
        # There might still be records in the queue.
        while True:
            try:
                jd = self.queue.get(False)
                if has_task_done:
                    self.queue.task_done()
                if jd is self._sentinel:
                    break
                self.bucket.append(jd)
            except queue.Empty:
                break
        if self.bucket:
            try:
                self.consume_this_bucket(parser, session, platform_id)
                self.bucket = []
            except SQLAlchemyError as err:
                session.rollback()
                logger.exception('Consumer thread: %s', err)
                self.on_db_bulk_save_error()
        if self._fp_db_down is not None:
            self._fp_db_down.close()
        if self._fp_db_bulk_save is not None:
            self._fp_db_bulk_save.close()