def _monitor(self): """Monitor the queue for tweet, and use function parse to parse it. This method runs on a separate, internal thread. The thread will terminate if it sees a sentinel object in the queue. """ # scoped_session # Session itself is not thread safe, use scoped_session # each thread use only one scoped_session object # We never delete anything from database, and we rely much # on the `id` of existed object to build relaship # set expire_on_commit=False # to avoid re-fetch of these existed objects session = Session(expire_on_commit=False) parser = Parser(session, self.platform_id, **self.p_kwargs) q = self.queue has_task_done = hasattr(q, 'task_done') while not self._stop.isSet(): # Server down, hold on if self._hold_on is True: logger.info('qsize is %s', q.qsize()) time.sleep(self._hold_on_unit) self._hold_on_counter += self._hold_on_unit if self._hold_on_counter >= self._hold_on_max: return logger.info('Hold on, keep tring to connect SQL server...') logger.info('Elapsed %s seconds, since recent server down', self._hold_on_counter) if self._test_connection(session): self._hold_on = False self._hold_on_counter = 0 continue try: jd = self.dequeue(True) if jd is self._sentinel: break self._counter += 1 if self._counter % self._window_size == 0: logger.info('qsize is %s', q.qsize()) parser.parse(jd) if has_task_done: q.task_done() except Queue.Empty: break except Exception as e: logger.error('Exception %s when parsing %s', e, jd) if isinstance(e, SQLAlchemyError): session.rollback() if isinstance(e, OperationalError): # if 'could not connect to server' in str(e): logger.error('Hold on until SQL service back! %s', e) self._hold_on = True # There might still be records in the queue. while True: try: jd = self.dequeue(False) if jd is self._sentinel: break parser.parse(jd) if has_task_done: q.task_done() except Queue.Empty: break except Exception as e: logger.error('Exception %s when parsing %s', e, jd) if isinstance(e, SQLAlchemyError): session.rollback() if isinstance(e, OperationalError): return
def _monitor(self): """Monitor the queue for tweet incoming and then parse and save it into the database. This method runs on a separate, internal thread. The thread will terminate if it sees a sentinel object in the queue. """ # scoped_session # Session itself is not thread safe, we use scoped_session. # Each thread uses only one scoped_session object # We never delete anything from database in this function. # set expire_on_commit=False to avoid re-fetch of these existed objects session = Session(expire_on_commit=False) parser = Parser(**self.parser_kwargs) platform_id = get_platform_id(session, name=N_PLATFORM_TWITTER) has_task_done = hasattr(self.queue, 'task_done') while not self._stop.isSet(): if self.is_connection_failed is True: self.on_db_server_down(session) continue # normal bulk insert process try: # fill the bucket for i in range(self.bucket_size): # dequeue with block=True jd = self.queue.get(True) if has_task_done is True: self.queue.task_done() if jd is not self._sentinel: self.global_counter += 1 self.bucket.append(jd) else: break # consume this bucket self.consume_this_bucket(parser, session, platform_id) self.bucket = [] # database is shutdown unexpectedly except OperationalError as err: session.rollback() if 'server closed the connection unexpectedly' in repr( err) or 'could not connect to server' in repr(err): logger.critical('Causion: database server is down!') self.is_connection_failed = True else: logger.error(err) self.on_db_bulk_save_error() except SQLAlchemyError as err: session.rollback() logger.exception(err) self.on_db_bulk_save_error() except BaseException as err: # unexpected exception, logging (will exit) logger.exception(err) raise # There might still be records in the queue. while True: try: jd = self.queue.get(False) if has_task_done: self.queue.task_done() if jd is self._sentinel: break self.bucket.append(jd) except queue.Empty: break if self.bucket: try: self.consume_this_bucket(parser, session, platform_id) self.bucket = [] except SQLAlchemyError as err: session.rollback() logger.exception('Consumer thread: %s', err) self.on_db_bulk_save_error() if self._fp_db_down is not None: self._fp_db_down.close() if self._fp_db_bulk_save is not None: self._fp_db_bulk_save.close()