def run(self): self.logger.info('Connect to the database...') self._conn_db() if not self._db_conn: self.logger.debug( 'No database connection, cannot perform keyword extraction') return self.logger.info('Construct unnecessary words...') self._construct_unnecessary_words() while 1: query = 'SELECT id,text FROM raw_text WHERE status=%s' results = MySQLDBUtil.fetch_multiple_rows(query, (UN_PUBLISHED, ), self._db_conn) if not results: time.sleep(SLEEP_TIME) else: for result in results: (record_id, text) = result key_words = self._extrack_key_words(text) try: self._store_key_words(key_words) except Exception: self.logger.exception('Store key words error') try: self._update_status(record_id, CONSUMED) except Exception: self.logger.exception('Update raw_text status error')
def _fetch_category_list(self): category_list = [] self.logger.info('Get un-published category list...') query = 'SELECT category FROM category WHERE status=%s LIMIT %s' try: results = MySQLDBUtil.fetch_multiple_rows(query, (UN_PUBLISHED, QUEUE_LIMIT), self._db_conn) for result in results: (category,) = result category_list.append(category) except Exception: self.logger.exception('Query un-published category error') return category_list
def _fetch_package_list(self): package_list = [] self.logger.info('Get un-published package list...') query = 'SELECT package_name FROM package_name WHERE status=%s LIMIT %s' try: results = MySQLDBUtil.fetch_multiple_rows(query, (UN_PUBLISHED, QUEUE_LIMIT), self._db_conn) for result in results: (package_name,) = result package_list.append(package_name) except Exception: self.logger.exception('Query un-used package name error') return package_list
def _fetch_developer_list(self): developer_list = [] self.logger.info('Get un-published developer list...') query = 'SELECT name FROM developer WHERE status=%s LIMIT %s' try: results = MySQLDBUtil.fetch_multiple_rows( query, (UN_PUBLISHED, QUEUE_LIMIT), self._db_conn) for result in results: (developer, ) = result developer_list.append(developer) except Exception: self.logger.exception('Query un-published developer error') return developer_list
def _fetch_key_words(self): key_words = [] self.logger.info('Get un-published keywords...') query = 'SELECT key_word FROM key_word WHERE status=%s LIMIT %s' try: results = MySQLDBUtil.fetch_multiple_rows( query, (UN_PUBLISHED, QUEUE_LIMIT), self._db_conn) for result in results: (key_word, ) = result key_words.append(key_word) except Exception: self.logger.exception('Query un-published keywords error') return key_words