class GooglePlayCrawler:
    def __init__(self):
        log_file = os.path.join(AppConfig.get_log_dir(),
                                'google_play_crawler.log')

        self.logger = Logger(log_file, 'google_play_crawler', 10 * 1024 * 1024,
                             2)

    def start(self):
        self.logger.info('Start key word extractor thread....')
        self._start_key_word_extractor()

        self.logger.info('Start category feeder...')
        category_producer = os.path.join(os.getcwd(), 'feed', 'category',
                                         'category_producer.py')
        category_consumer = os.path.join(os.getcwd(), 'feed', 'category',
                                         'category_consumer.py')
        self._start_process(category_producer)
        self._start_process(category_consumer)

        self.logger.info('Start developer feeder...')
        developer_producer = os.path.join(os.getcwd(), 'feed', 'developer',
                                          'developer_producer.py')
        developer_consumer = os.path.join(os.getcwd(), 'feed', 'developer',
                                          'developer_consumer.py')
        self._start_process(developer_producer)
        self._start_process(developer_consumer)

        self.logger.info('Start search feeder...')
        search_producer = os.path.join(os.getcwd(), 'feed', 'search',
                                       'search_producer.py')
        search_consumer = os.path.join(os.getcwd(), 'feed', 'search',
                                       'search_consumer.py')
        self._start_process(search_producer)
        self._start_process(search_consumer)

        self.logger.info('Start similar feeder...')
        similar_producer = os.path.join(os.getcwd(), 'feed', 'similar',
                                        'similar_producer.py')
        similar_consumer = os.path.join(os.getcwd(), 'feed', 'similar',
                                        'similar_consumer.py')
        self._start_process(similar_producer)
        self._start_process(similar_consumer)

        self.logger.info('Start app detail crawler...')
        app_producer = os.path.join(os.getcwd(), 'crawler', 'app_producer.py')
        app_consumer = os.path.join(os.getcwd(), 'crawler', 'app_consumer.py')
        self._start_process(app_producer)
        self._start_process(app_consumer)

    def _start_key_word_extractor(self):
        key_word_extractor = KeyWordExtractor(self.logger)
        key_word_extractor.start()

    def _start_process(self, file_path):
        cmd = ['python', file_path]
        try:
            Command.run_without_wait(cmd)
        except Exception:
            self.logger.exception('Run script %s error' % file_path)
class KeyWordConsumer:
    def __init__(self, log_file, log_name):
        self.logger = Logger(log_file, log_name, 10*1024*1024, 2)
        self._db_conn = None

    def start(self):
        rabbit_topic = RabbitTopic.init_rabbitmq_consumer(EXCHANGE_NAME, QUEUE_NAME, QUEUE_LIMIT,
                                                          [ROUTING_KEY], self.logger)
        if not rabbit_topic:
            self.logger.debug('Construct key word consumer error')
            return

        self._conn_db()
        if not self._db_conn:
            self.logger.exception('Connect to database error')
            return

        while 1:
            try:
                rabbit_topic.start_consuming(self._callback, QUEUE_NAME)
            except ConnectionClosed:
                self.logger.debug('Connection to rabbitmq server closed, re-connecting...')
                rabbit_topic = RabbitTopic.init_rabbitmq_consumer(EXCHANGE_NAME, QUEUE_NAME, QUEUE_LIMIT,
                                                                  [ROUTING_KEY], self.logger)

    def _callback(self, channel, method, properties, key_word):
        self.logger.info(os.linesep)
        self.logger.info('----> Get body message %s and start searching this key word...<----' % key_word)
        try:
            url = 'https://play.google.com/store/search?q=%s&c=apps' % key_word
            search_web_driver = SearchWebDriver(url, self._db_conn, self.logger)
            search_web_driver.search()
        except Exception:
            self.logger.exception('Search key word %s error' % key_word)

        channel.basic_ack(delivery_tag=method.delivery_tag)

        self.logger.info('Set key word %s as consumed' % key_word)
        self._set_key_word_consumed(key_word)

    def _conn_db(self):
        try:
            self._db_conn = util.conn_mysql_db()
        except Exception:
            self.logger.exception('Connect to database error')

    def _set_key_word_consumed(self, key_word):
        query = 'UPDATE key_word SET status=%s WHERE key_word=%s'
        try:
            MySQLDBUtil.update(query, (CONSUMED, key_word), self._db_conn)
        except Exception:
            self.logger.exception('Set key word %s as consumed error' % key_word)
Example #3
0
class CategoryProducer:
    def __init__(self):
        self.logger = Logger(LOG_FILE, LOG_NAME, 10*1024*1024, 2)
        self._db_conn = None

    def start(self):
        rabbit_topic = RabbitTopic.init_rabbitmq_producer(EXCHANGE_NAME, self.logger)
        if not rabbit_topic:
            return

        self._conn_db()
        if not self._db_conn:
            self.logger.exception('Connect database error')
            return

        while 1:
            try:
                if self._is_no_more_records():
                    self.logger.info('There are no more records, wait...')
                    time.sleep(PRODUCE_WAIT_TIME)
                    # Still no more available records, then reset...
                    if self._is_no_more_records():
                        self.logger.info('Still there are no more records, reset all records...')
                        self._reset_category()

                category_list = self._fetch_category_list()
                for category in category_list:
                    if RabbitTopic.is_queue_full(QUEUE_NAME, QUEUE_LIMIT, self.logger):
                        self.logger.info('Queue %s is full, wait...' % QUEUE_NAME)
                        time.sleep(PRODUCE_WAIT_TIME)
                        continue
                    try:
                        rabbit_topic.publish(ROUTING_KEY, category)
                        self.logger.info('Publish category %s and update the status' % category)
                        self._update_status(PUBLISHED, category)
                    except ConnectionClosed:
                        self.logger.debug('Connection to rabbitmq server closed, re-connecting...')
                        rabbit_topic = RabbitTopic.init_rabbitmq_producer(EXCHANGE_NAME, self.logger)
            except Exception:
                self.logger.exception('Publish category error')

    def _conn_db(self):
        try:
            self._db_conn = util.conn_mysql_db()
        except Exception:
            self.logger.exception('Connect database error')

    def _fetch_category_list(self):
        category_list = []

        self.logger.info('Get un-published category list...')
        query = 'SELECT category FROM category WHERE status=%s LIMIT %s'
        try:
            results = MySQLDBUtil.fetch_multiple_rows(query, (UN_PUBLISHED, QUEUE_LIMIT), self._db_conn)
            for result in results:
                (category,) = result
                category_list.append(category)
        except Exception:
            self.logger.exception('Query un-published category error')
        return category_list

    def _is_no_more_records(self):
        query = 'SELECT COUNT(*) FROM category WHERE status=%s'
        try:
            result = MySQLDBUtil.fetch_single_row(query, (UN_PUBLISHED,), self._db_conn)
            if result:
                (count,) = result
                if count == 0:
                    return True
        except Exception:
            self.logger.exception('Check available records error')
        return False

    def _reset_category(self):
        try:
            self.logger.info('All category have been published, reset all to un-published status')
            self._update_status(UN_PUBLISHED)
        except Exception:
            self.logger.exception('Reset category table error')
            return

    def _update_status(self, status, category=None):
        if not category:
            query = 'UPDATE category SET status=%s'
        else:
            query = 'UPDATE category SET status=%s WHERE category=%s'
        try:
            if not category:
                MySQLDBUtil.update(query, (status,), self._db_conn)
            else:
                MySQLDBUtil.update(query, (status, category), self._db_conn)
        except Exception:
            self.logger.exception('Update record status')
Example #4
0
class AppProducer:
    def __init__(self):
        self.logger = Logger(LOG_FILE, LOG_NAME, 10*1024*1024, 2)
        self._db_conn = None

    def start(self):
        rabbit_topic = RabbitTopic.init_rabbitmq_producer(EXCHANGE_NAME, self.logger)
        if not rabbit_topic:
            return

        self._conn_db()
        if not self._db_conn:
            self.logger.exception('Connect database error')
            return

        while 1:
            try:
                if self._is_no_more_records():
                    self.logger.info('There are no more available records, wait...')
                    time.sleep(PRODUCE_WAIT_TIME)

                package_list = self._fetch_package_list()
                for package_name in package_list:
                    if RabbitTopic.is_queue_full(QUEUE_NAME, QUEUE_LIMIT, self.logger):
                        self.logger.info('Queue %s is full, wait to consume...' % QUEUE_NAME)
                        time.sleep(PRODUCE_WAIT_TIME)
                        continue
                    try:
                        rabbit_topic.publish(ROUTING_KEY, package_name)
                        self.logger.info('Publish package %s and update status' % package_name)
                        self._update_status(PUBLISHED, package_name)
                    except ConnectionClosed:
                        self.logger.debug('Connection to rabbitmq server closed, re-connecting...')
                        rabbit_topic = RabbitTopic.init_rabbitmq_producer(EXCHANGE_NAME, self.logger)
            except Exception:
                self.logger.exception('Publish similar app package name error')

    def _init_rabbitmq(self):
        try:
            rabbit_topic = RabbitTopic(EXCHANGE_NAME)
            rabbit_topic.construct_producer()
        except Exception:
            self.logger.exception('Construct app producer error')
            return None
        return rabbit_topic

    def _conn_db(self):
        try:
            self._db_conn = util.conn_mysql_db()
        except Exception:
            self.logger.exception('Connect database error')

    def _fetch_package_list(self):
        package_list = []

        self.logger.info('Get un-published package list...')
        query = 'SELECT package_name FROM package_name WHERE status=%s LIMIT %s'
        try:
            results = MySQLDBUtil.fetch_multiple_rows(query, (UN_PUBLISHED, QUEUE_LIMIT), self._db_conn)
            for result in results:
                (package_name,) = result
                package_list.append(package_name)
        except Exception:
            self.logger.exception('Query un-used package name error')
        return package_list

    def _is_no_more_records(self):
        query = 'SELECT COUNT(*) FROM package_name WHERE status=%s'
        try:
            result = MySQLDBUtil.fetch_single_row(query, (UN_PUBLISHED,), self._db_conn)
            if result:
                (count, ) = result
                if count == 0:
                    return True
        except Exception:
            self.logger.exception('Check if there is no more packages error')
        return False

    def _update_status(self, status, package_name=None):
        if not package_name:
            query = 'UPDATE package_name SET status=%s'
        else:
            query = 'UPDATE package_name SET status=%s WHERE package_name=%s'
        try:
            if not package_name:
                MySQLDBUtil.update(query, (status,), self._db_conn)
            else:
                MySQLDBUtil.update(query, (status, package_name), self._db_conn)
        except Exception:
            self.logger.exception('Update record status')
Example #5
0
class DeveloperProducer:
    def __init__(self):
        self.logger = Logger(LOG_FILE, LOG_NAME, 10 * 1024 * 1024, 2)
        self._db_conn = None

    def start(self):
        rabbit_topic = RabbitTopic.init_rabbitmq_producer(
            EXCHANGE_NAME, self.logger)
        if not rabbit_topic:
            return

        self._conn_db()
        if not self._db_conn:
            self.logger.exception('Connect database error')
            return

        while 1:
            try:
                if self._is_no_more_records():
                    self.logger.info('There are no more records, wait...')
                    time.sleep(PRODUCE_WAIT_TIME)

                developer_list = self._fetch_developer_list()
                for developer in developer_list:
                    if RabbitTopic.is_queue_full(QUEUE_NAME, QUEUE_LIMIT,
                                                 self.logger):
                        self.logger.info('Queue %s is full, wait...' %
                                         QUEUE_NAME)
                        time.sleep(PRODUCE_WAIT_TIME)
                        continue
                    try:
                        rabbit_topic.publish(ROUTING_KEY, developer)
                        self.logger.info(
                            'Publish developer %s and update status' %
                            developer)
                        self._update_status(PUBLISHED, developer)
                    except ConnectionClosed:
                        self.logger.debug(
                            'Connection to rabbitmq server closed, re-connecting...'
                        )
                        rabbit_topic = RabbitTopic.init_rabbitmq_producer(
                            EXCHANGE_NAME, self.logger)
            except Exception:
                self.logger.exception('Publish developer error')

    def _conn_db(self):
        try:
            self._db_conn = util.conn_mysql_db()
        except Exception:
            self.logger.exception('Connect database error')

    def _fetch_developer_list(self):
        developer_list = []

        self.logger.info('Get un-published developer list...')
        query = 'SELECT name FROM developer WHERE status=%s LIMIT %s'
        try:
            results = MySQLDBUtil.fetch_multiple_rows(
                query, (UN_PUBLISHED, QUEUE_LIMIT), self._db_conn)
            for result in results:
                (developer, ) = result
                developer_list.append(developer)
        except Exception:
            self.logger.exception('Query un-published developer error')
        return developer_list

    def _is_no_more_records(self):
        query = 'SELECT COUNT(*) FROM developer WHERE status=%s'
        try:
            result = MySQLDBUtil.fetch_single_row(query, (UN_PUBLISHED, ),
                                                  self._db_conn)
            if result:
                (count, ) = result
                if count == 0:
                    return True
        except Exception:
            self.logger.exception('Check if there is no more developers error')
        return False

    def _update_status(self, status, developer=None):
        if not developer:
            query = 'UPDATE developer SET status=%s'
        else:
            query = 'UPDATE developer SET status=%s WHERE name=%s'
        try:
            if not developer:
                MySQLDBUtil.update(query, (status, ), self._db_conn)
            else:
                MySQLDBUtil.update(query, (status, developer), self._db_conn)
        except Exception:
            self.logger.exception('Update record status')
class DeveloperConsumer:
    def __init__(self, log_file, log_name):
        self.logger = Logger(log_file, log_name, 10 * 1024 * 1024, 2)
        self._db_conn = None

    def start(self):
        rabbit_topic = RabbitTopic.init_rabbitmq_consumer(
            EXCHANGE_NAME, QUEUE_NAME, QUEUE_LIMIT, [ROUTING_KEY], self.logger)
        if not rabbit_topic:
            self.logger.debug('Construct developer consumer error')
            return

        self._conn_db()
        if not self._db_conn:
            self.logger.exception('Connect to database error')
            return

        while 1:
            try:
                rabbit_topic.start_consuming(self._callback, QUEUE_NAME)
            except ConnectionClosed:
                self.logger.debug(
                    'Connection to rabbitmq server closed, re-connecting...')
                rabbit_topic = RabbitTopic.init_rabbitmq_consumer(
                    EXCHANGE_NAME, QUEUE_NAME, QUEUE_LIMIT, [ROUTING_KEY],
                    self.logger)

    def _callback(self, channel, method, properties, developer):
        self.logger.info(os.linesep)
        self.logger.info(
            '----> Get body message %s and start query this developer... <----'
            % developer)
        try:
            if developer.isdigit():
                self.logger.info('Developer info is all digit numbers')
                url = '%s=%s' % (DEVELOPER_ID_HOST_URL, developer)
            else:
                self.logger.info('Developer info is non digit numbers')
                url = '%s=%s' % (DEVELOPER_NAME_HOST_URL, developer)
            self.logger.info('Query developer apps with url %s' % url)
            developer_web_driver = DeveloperWebDriver(url, self._db_conn,
                                                      self.logger)
            developer_web_driver.query()
        except Exception:
            self.logger.exception('Query developer %s error' % developer)

        channel.basic_ack(delivery_tag=method.delivery_tag)

        self.logger.info('Set developer %s as consumed' % developer)
        self._set_developer_consumed(developer)

    def _conn_db(self):
        try:
            self._db_conn = util.conn_mysql_db()
        except Exception:
            self.logger.exception('Connect database error')

    def _set_developer_consumed(self, developer):
        query = 'UPDATE developer SET status=%s WHERE name=%s'
        try:
            MySQLDBUtil.update(query, (CONSUMED, developer), self._db_conn)
        except Exception:
            self.logger.exception('Set devloper %s as consumed error' %
                                  developer)
class AppConsumer:
    def __init__(self, log_file, log_name):
        self.logger = Logger(log_file, log_name, 10*1024*1024, 2)
        self._mysql_db_conn = None
        self._mongo_db_conn = None

    def start(self):
        rabbit_topic = RabbitTopic.init_rabbitmq_consumer(EXCHANGE_NAME, QUEUE_NAME, QUEUE_LIMIT,
                                                          [ROUTING_KEY], self.logger)
        if not rabbit_topic:
            self.logger.debug('Construct app consumer error')
            return

        self._conn_db()
        if not self._mysql_db_conn or not self._mongo_db_conn:
            self.logger.exception('Connect to database error')
            return

        while 1:
            try:
                rabbit_topic.start_consuming(self._callback, QUEUE_NAME)
            except ConnectionClosed:
                self.logger.debug('Connection to rabbitmq server closed, re-connecting...')
                rabbit_topic = RabbitTopic.init_rabbitmq_consumer(EXCHANGE_NAME, QUEUE_NAME, QUEUE_LIMIT,
                                                                  [ROUTING_KEY], self.logger)

    def _callback(self, channel, method, properties, package_name):
        self.logger.info(os.linesep)
        self.logger.info('----> Get body message %s and start get app detail... <-----' % package_name)
        try:
            url = '%s=%s' % (APP_HOST_URL, package_name)
            self.logger.info('Query app detail with url %s' % url)
            app_detail = self._parse_web_content(url)

            if not app_detail:
                self.logger.info('App detail extraction fail')
            else:
                self.logger.info('Store app detail...')
                app_detail.package_name = package_name
                self._store_app_detail(app_detail)

            self.logger.info('Insert package name %s into similar app table...' % package_name)
            self._store_package_name_similar(package_name)

            self.logger.info('Store app description...')
            self._store_app_description(app_detail)

            self.logger.info('Store app developer...')
            self._store_app_developer(app_detail)

        except Exception:
            self.logger.exception('Query app detail %s error' % package_name)

        channel.basic_ack(delivery_tag=method.delivery_tag)

        self.logger.info('Set package name %s as consumed' % package_name)
        self._set_package_consumed(package_name)

    def _conn_db(self):
        try:
            self._mysql_db_conn = util.conn_mysql_db()
            self._mongo_db_conn = util.conn_mongo_db()
        except Exception:
            self.logger.exception('Connect database error')

    def _parse_web_content(self, url):
        app_detail = None
        try:
            response = requests.get(url)
        except Exception:
            self.logger.exception('Get web content from url %s error' % url)
            return app_detail
        try:
            web_content = util.decode_utf8(response.content)
        except Exception:
            self.logger.exception('Decode web content error')
            return app_detail
        if not web_content:
            self.logger.debug('Web content is empty, no need to parse')
            return app_detail
        else:
            self.logger.info('Get web content successfully,try to parse it...')

        app_detail_lxml_parser = AppDetailLxmlParser(web_content, self.logger)
        try:
            app_detail_lxml_parser.parse()
            app_detail = app_detail_lxml_parser.app_detail
        except Exception:
            self.logger.exception('Use lxml to parse the web content error, try to use the backup one beautiful soup...')
            app_detail_b4_parser = AppDetailB4Parser(response.content, self.logger)
            try:
                app_detail_b4_parser.parse()
                app_detail = app_detail_b4_parser.app_detail
            except Exception:
                self.logger.exception('Use beautiful soup to parse the web content error')

        return app_detail

    def _store_app_detail(self, app_detail):
        if not app_detail:
            self.logger.debug('No app detail content, cannot store into database')
            return
        app_detail_json = app_detail.to_json()
        try:
            MongoDBUtil.insert(app_detail_json, self._mongo_db_conn, 'app_detail')
        except Exception:
            self.logger.exception('Store app detail content into mongo db error')

    def _store_package_name_similar(self, package_name):
        query = 'INSERT IGNORE INTO similar_app (package_name) VALUES ("%s")' % package_name
        try:
            MySQLDBUtil.insert(query, None, self._mysql_db_conn)
        except Exception:
            self.logger.exception('Store package name into similar app database fail')

    def _store_app_description(self, app_detail):
        if not app_detail:
            return
        description = ' '.join(app_detail.description)
        description = description.replace('"', '').replace('\'', '')
        if not description:
            return
        query = 'INSERT INTO raw_text (text) VALUES ("%s")' % description
        try:
            MySQLDBUtil.insert(query, None, self._mysql_db_conn)
        except Exception:
            self.logger.exception('Store app description error')

    def _store_app_developer(self, app_detail):
        if not app_detail:
            return
        developer_link = app_detail.developer_link
        if not developer_link:
            return
        items = developer_link.split('id=')
        if len(items) == 2:
            developer_name = items[-1]
            query = 'INSERT IGNORE INTO developer (name) VALUES ("%s")' % developer_name
            try:
                MySQLDBUtil.insert(query, None, self._mysql_db_conn)
                self.logger.info('Stored app developer %s' % developer_name)
            except Exception:
                self.logger.exception('Store app developer error')
        else:
            return

    def _set_package_consumed(self, package_name):
        query = 'UPDATE package_name SET status=%s WHERE package_name=%s'
        try:
            MySQLDBUtil.update(query, (CONSUMED, package_name), self._mysql_db_conn)
        except Exception:
            self.logger.exception('Set package name %s as consumed error' % package_name)
class SimilarConsumer:
    def __init__(self, log_file, log_name):
        self.logger = Logger(log_file, log_name, 10 * 1024 * 1024, 2)
        self._db_conn = None

    def start(self):
        rabbit_topic = RabbitTopic.init_rabbitmq_consumer(
            EXCHANGE_NAME, QUEUE_NAME, QUEUE_LIMIT, [ROUTING_KEY], self.logger)
        if not rabbit_topic:
            self.logger.debug('Construct similar consumer error')
            return

        self._conn_db()
        if not self._db_conn:
            self.logger.exception('Connect to database error')
            return

        while 1:
            try:
                rabbit_topic.start_consuming(self._callback, QUEUE_NAME)
            except ConnectionClosed:
                self.logger.debug(
                    'Connection to rabbitmq server closed, re-connecting...')
                rabbit_topic = RabbitTopic.init_rabbitmq_consumer(
                    EXCHANGE_NAME, QUEUE_NAME, QUEUE_LIMIT, [ROUTING_KEY],
                    self.logger)

    def _callback(self, channel, method, properties, package_name):
        self.logger.info(os.linesep)
        self.logger.info(
            '----> Get body message %s and start query apps similar to this... <----'
            % package_name)
        try:
            url = '%s=%s' % (SIMILAR_HOST_URL, package_name)
            self.logger.info('Query similar apps with url %s' % url)
            package_names = self._extract_package_names(url)
            self.logger.info('Store package names...')
            self._store_package_names(package_names)
        except Exception:
            self.logger.exception('Query similar apps %s error' % package_name)

        channel.basic_ack(delivery_tag=method.delivery_tag)

        self.logger.info('Set package name %s as consumed' % package_name)
        self._set_package_consumed(package_name)

    def _conn_db(self):
        try:
            self._db_conn = util.conn_mysql_db()
        except Exception:
            self.logger.exception('Connect database error')

    def _extract_package_names(self, url):
        package_names = set()

        try:
            response = requests.get(url)
        except Exception:
            self.logger.exception('Get content with url %s error' % url)
            return package_names

        if response.status_code == requests.codes.ok:
            html_tree = html.fromstring(response.content)
            app_links = html_tree.xpath('//a[@class="title"]/@href')
            for app_link in app_links:
                try:
                    package_names.add(app_link.split('id=')[-1])
                except Exception:
                    self.logger.exception(
                        'Extract package from link %s error' % app_link)
        else:
            self.logger.debug(
                'Access similar app url %s and returns wrong response code %d'
                % (url, response.status_code))
        return package_names

    def _store_package_names(self, package_names):
        values = []
        for package_name in package_names:
            values.append('("%s")' % package_name)

        if len(values) > 0:
            query = 'INSERT IGNORE INTO package_name (package_name) VALUES ' + ','.join(
                values)
            try:
                MySQLDBUtil.insert(query, None, self._db_conn)
            except Exception:
                self.logger.exception('Store package names into database fail')

    def _set_package_consumed(self, package_name):
        query = 'UPDATE similar_app SET status=%s WHERE package_name=%s'
        try:
            MySQLDBUtil.update(query, (CONSUMED, package_name), self._db_conn)
        except Exception:
            self.logger.exception('Set package name %s as consumed error' %
                                  package_name)
class CategoryConsumer:
    def __init__(self, log_file, log_name):
        self.logger = Logger(log_file, log_name, 10 * 1024 * 1024, 2)
        self._db_conn = None

    def start(self):
        rabbit_topic = RabbitTopic.init_rabbitmq_consumer(
            EXCHANGE_NAME, QUEUE_NAME, QUEUE_LIMIT, [ROUTING_KEY], self.logger)
        if not rabbit_topic:
            self.logger.debug('Construct category consumer error')
            return

        self._conn_db()
        if not self._db_conn:
            self.logger.exception('Connect to database error')
            return

        while 1:
            try:
                rabbit_topic.start_consuming(self._callback, QUEUE_NAME)
            except ConnectionClosed:
                self.logger.debug(
                    'Connection to rabbitmq server closed, re-connecting...')
                rabbit_topic = RabbitTopic.init_rabbitmq_consumer(
                    EXCHANGE_NAME, QUEUE_NAME, QUEUE_LIMIT, [ROUTING_KEY],
                    self.logger)

    def _callback(self, channel, method, properties, category):
        self.logger.info(os.linesep)
        self.logger.info(
            '----> Get body message %s and start query this category... <----'
            % category)
        try:
            detail_urls = self._parse_detail_urls(category)
            if not detail_urls:
                self.logger.debug('No detail category urls got')
                return
            for detail_url in detail_urls:
                self.logger.info('Query detail category url %s' % detail_url)
                category_web_driver = CategoryWebDriver(
                    detail_url, self._db_conn, self.logger)
                category_web_driver.query()
                time.sleep(10)
        except Exception:
            self.logger.exception('Query category %s error' % category)

        channel.basic_ack(delivery_tag=method.delivery_tag)

        self.logger.info('Set category %s as consumed' % category)
        self._set_category_consumed(category)

    def _conn_db(self):
        try:
            self._db_conn = util.conn_mysql_db()
        except Exception:
            self.logger.exception('Connect database error')

    # see more
    def _parse_detail_urls(self, category):
        detail_urls = set()
        category_url = '%s/%s' % (CATEGORY_HOST_URL, category)
        try:
            response = requests.get(category_url)
            if response.status_code == requests.codes.ok:
                # html is from lxml to parse html page
                html_tree = html.fromstring(response.content)
                category_detail_links = html_tree.xpath(
                    '//a[@class="see-more play-button small id-track-click apps id-responsive-see-more"]//@href'
                )
                for category_detail_link in category_detail_links:
                    category_detail_link = category_detail_link.strip()
                    detail_urls.add('https://play.google.com%s' %
                                    category_detail_link)
            else:
                self.logger.debug(
                    'Access category url %s and returns wrong response code %d'
                    % (category_url, response.status_code))
        except Exception:
            self.logger.exception('Get category web page error')
        return detail_urls

    def _set_category_consumed(self, category):
        query = 'UPDATE category SET status=%s WHERE category=%s'
        try:
            MySQLDBUtil.update(query, (CONSUMED, category), self._db_conn)
        except Exception:
            self.logger.exception('Set category %s as consumed error' %
                                  category)
Example #10
0
from common.logger import Logger
log = Logger()

try:
    from apps.mayaManager import MayaManager
    app = 'maya'
except ImportError:
    pass
try:
    from apps.houdiniManager import HoudiniManager
    app = 'houdini'
except ImportError:
    pass

if not app:
    msg = "Failed to load manager! Make sure the tool and its dependencies are in PYTHONPATH"
    log.exception(msg)
    raise ImportError(msg)


def run(lights_count, modes=[], radius=1000, blend=25):
    if app == 'maya':
        manager = MayaManager()
        manager.getLights(MayaManager.getSelection())
    else:
        manager = HoudiniManager()
        manager.getLights(HoudiniManager.getSelection())
    manager.extractLights(lights_count, modes, radius, blend)

    return manager