def check_condition_to_start(version_id): global product_api product_api = Products() crawl_api = Crawls() try: log.info("check_condition_to_start") # Check if crawling process is done total_crawl_size = crawl_api.get_size_crawls(version_id) crawled_size = crawl_api.get_size_crawls(version_id, status='done') if total_crawl_size != crawled_size: return False queue_size = rconn.llen(REDIS_PRODUCT_TEXT_MODEL_PROCESS_QUEUE) if queue_size > 0: return False total_product_size = product_api.get_size_products(version_id) processed_product_size = product_api.get_size_products(version_id, is_processed_for_text_class_model=True) not_processed_product_size = product_api.get_size_products(version_id, is_processed_for_text_class_model=False) if (processed_product_size + not_processed_product_size) == total_product_size: return False except Exception as e: log.error(str(e)) return True
def check_condition_to_start(version_id): global product_api product_api = Products() crawl_api = Crawls() model_api = Models() try: model = model_api.get_model(TEXT_CLASSIFICATION_MODEL_TYPE, version_id=version_id) if model is not None: return False log.info("check_condition_to_start") # Check if crawling process is done total_crawl_size = crawl_api.get_size_crawls(version_id) crawled_size = crawl_api.get_size_crawls(version_id, status='done') if total_crawl_size != crawled_size: return False total_product_size = product_api.get_size_products(version_id) processed_product_size = product_api.get_size_products( version_id, is_processed_for_text_class_model=True) not_processed_product_size = product_api.get_size_products( version_id, is_processed_for_text_class_model=False) if (processed_product_size + not_processed_product_size) == total_product_size: return False except Exception as e: log.error(str(e)) return True
def check_condition_to_start(version_id): global product_api product_api = Products() crawl_api = Crawls() try: # Check Object classifying process is done queue_size = rconn.llen(REDIS_PRODUCT_CLASSIFY_QUEUE) if queue_size != 0: return False # Check Crawling process is done total_crawl_size = crawl_api.get_size_crawls(version_id) crawled_size = crawl_api.get_size_crawls(version_id, status='done') if total_crawl_size != crawled_size: return False # Check if all images are processed total_product_size = product_api.get_size_products(version_id) available_product_size = product_api.get_size_products(version_id, is_available=True) unavailable_product_size = product_api.get_size_products(version_id, is_available=False) # processed_size = product_api.get_size_products(version_id, is_processed=True) if (available_product_size + unavailable_product_size) != total_product_size: return False # Check Classifying processing process is done classified_size = product_api.get_size_products(version_id, is_classified=True) not_classified_size = product_api.get_size_products(version_id, is_classified=False) if (classified_size + not_classified_size) == total_product_size: return False except Exception as e: log.error(str(e)) return True
def check_condition_to_start(version_id): global product_api product_api = Products() crawl_api = Crawls() try: log.info("check_condition_to_start") # Check if image processing queue is empty queue_size = rconn.llen(REDIS_PRODUCT_IMAGE_PROCESS_QUEUE) if queue_size != 0: return False # Check if crawling process is done total_crawl_size = crawl_api.get_size_crawls(version_id) crawled_size = crawl_api.get_size_crawls(version_id, status='done') if total_crawl_size != crawled_size: return False # Check if all images are processed total_product_size = product_api.get_size_products(version_id) available_product_size = product_api.get_size_products( version_id, is_available=True) unavailable_product_size = product_api.get_size_products( version_id, is_available=False) # processed_size = product_api.get_size_products(version_id, is_processed=True) if (available_product_size + unavailable_product_size) == total_product_size: return False except Exception as e: log.error(str(e)) return True
def start(rconn): global object_api global feature_api global product_api global version_id try: log.info("Start bl-object-index:1") object_api = Objects() feature_api = Features() product_api = Products() crawl_api = Crawls() file = os.path.join(os.getcwd(), INDEX_FILE) # index_file = load_index_file(file) while True: version_id = get_latest_crawl_version() if version_id is not None: log.info("check_condition_to_start") ok = check_condition_to_start(version_id) log.info("check_condition_to_start: " + str(ok)) if ok is True: index_file = None reset_index(version_id) # dispatch(rconn) # prepare_objects_to_index(rconn, version_id) if DATA_SOURCE == DATA_SOURCE_QUEUE: load_from_queue(index_file) elif DATA_SOURCE == DATA_SOURCE_DB: load_from_db(index_file, version_id) time.sleep(60 * 10) except Exception as e: log.error(str(e))
from __future__ import print_function import time import stylelens_product from stylelens_product.crawls import Crawls from pprint import pprint # create an instance of the API class api_instance = Crawls() version_id = "11111j" try: api_response = api_instance.get_crawls(version_id=version_id, status='todo') pprint(api_response) except Exception as e: print("Exception when calling get_crawls: %s\n" % e)
from __future__ import print_function from stylelens_product.crawls import Crawls from pprint import pprint # create an instance of the API class api_instance = Crawls() version_id = "5a47ccfe4dfd7d90b84eb710" try: api_response = api_instance.get_size_crawls(version_id=version_id) pprint(api_response) except Exception as e: print("Exception when calling get_size_crawls: %s\n" % e)
from __future__ import print_function import time import stylelens_product from stylelens_product.crawls import Crawls from pprint import pprint # create an instance of the API class api_instance = Crawls() crawl = {} crawl['host_code'] = "HCBOK1" crawl['version_id'] = "11111j" try: api_response = api_instance.add_crawl(crawl) pprint(api_response) except Exception as e: print("Exception when calling add_crawl: %s\n" % e)
REDIS_PASSWORD = os.environ['REDIS_PASSWORD'] RELEASE_MODE = os.environ['RELEASE_MODE'] DB_PRODUCT_HOST = os.environ['DB_PRODUCT_HOST'] DB_PRODUCT_PORT = os.environ['DB_PRODUCT_PORT'] DB_PRODUCT_USER = os.environ['DB_PRODUCT_USER'] DB_PRODUCT_PASSWORD = os.environ['DB_PRODUCT_PASSWORD'] DB_PRODUCT_NAME = os.environ['DB_PRODUCT_NAME'] rconn = redis.StrictRedis(REDIS_SERVER, port=6379, password=REDIS_PASSWORD) options = { 'REDIS_SERVER': REDIS_SERVER, 'REDIS_PASSWORD': REDIS_PASSWORD } log = Logging(options, tag='bl-crawl') crawl_api = Crawls() def spawn_crawler(host_code, version_id): pool = spawning_pool.SpawningPool() id = host_code.lower() project_name = 'bl-crawler-' + id log.debug('spawn_crawler: ' + project_name) pool.setServerUrl(REDIS_SERVER) pool.setServerPassword(REDIS_PASSWORD) pool.setApiVersion('v1') pool.setKind('Pod') pool.setMetadataName(project_name) pool.setMetadataNamespace(RELEASE_MODE) pool.addMetadataLabel('name', project_name)