Example #1
0
def check_condition_to_start(version_id):
  global product_api

  product_api = Products()
  crawl_api = Crawls()

  try:
    log.info("check_condition_to_start")
    # Check if crawling process is done
    total_crawl_size = crawl_api.get_size_crawls(version_id)
    crawled_size = crawl_api.get_size_crawls(version_id, status='done')
    if total_crawl_size != crawled_size:
      return False

    queue_size = rconn.llen(REDIS_PRODUCT_TEXT_MODEL_PROCESS_QUEUE)
    if queue_size > 0:
      return False

    total_product_size = product_api.get_size_products(version_id)
    processed_product_size = product_api.get_size_products(version_id, is_processed_for_text_class_model=True)
    not_processed_product_size = product_api.get_size_products(version_id, is_processed_for_text_class_model=False)

    if (processed_product_size + not_processed_product_size) == total_product_size:
      return False

  except Exception as e:
    log.error(str(e))

  return True
Example #2
0
def check_condition_to_start(version_id):
    global product_api

    product_api = Products()
    crawl_api = Crawls()
    model_api = Models()

    try:
        model = model_api.get_model(TEXT_CLASSIFICATION_MODEL_TYPE,
                                    version_id=version_id)
        if model is not None:
            return False

        log.info("check_condition_to_start")
        # Check if crawling process is done
        total_crawl_size = crawl_api.get_size_crawls(version_id)
        crawled_size = crawl_api.get_size_crawls(version_id, status='done')
        if total_crawl_size != crawled_size:
            return False

        total_product_size = product_api.get_size_products(version_id)
        processed_product_size = product_api.get_size_products(
            version_id, is_processed_for_text_class_model=True)
        not_processed_product_size = product_api.get_size_products(
            version_id, is_processed_for_text_class_model=False)

        if (processed_product_size +
                not_processed_product_size) == total_product_size:
            return False

    except Exception as e:
        log.error(str(e))

    return True
Example #3
0
def check_condition_to_start(version_id):
  global product_api

  product_api = Products()
  crawl_api = Crawls()

  try:
    # Check Object classifying process is done
    queue_size = rconn.llen(REDIS_PRODUCT_CLASSIFY_QUEUE)
    if queue_size != 0:
      return False

    # Check Crawling process is done
    total_crawl_size = crawl_api.get_size_crawls(version_id)
    crawled_size = crawl_api.get_size_crawls(version_id, status='done')
    if total_crawl_size != crawled_size:
      return False

    # Check if all images are processed
    total_product_size = product_api.get_size_products(version_id)
    available_product_size = product_api.get_size_products(version_id, is_available=True)
    unavailable_product_size = product_api.get_size_products(version_id, is_available=False)
    # processed_size = product_api.get_size_products(version_id, is_processed=True)

    if (available_product_size + unavailable_product_size) != total_product_size:
      return False

    # Check Classifying processing process is done
    classified_size = product_api.get_size_products(version_id, is_classified=True)
    not_classified_size = product_api.get_size_products(version_id, is_classified=False)
    if (classified_size + not_classified_size) == total_product_size:
      return False


  except Exception as e:
    log.error(str(e))

  return True
Example #4
0
def check_condition_to_start(version_id):
    global product_api

    product_api = Products()
    crawl_api = Crawls()

    try:
        log.info("check_condition_to_start")

        # Check if image processing queue is empty
        queue_size = rconn.llen(REDIS_PRODUCT_IMAGE_PROCESS_QUEUE)
        if queue_size != 0:
            return False

        # Check if crawling process is done
        total_crawl_size = crawl_api.get_size_crawls(version_id)
        crawled_size = crawl_api.get_size_crawls(version_id, status='done')
        if total_crawl_size != crawled_size:
            return False

        # Check if all images are processed
        total_product_size = product_api.get_size_products(version_id)
        available_product_size = product_api.get_size_products(
            version_id, is_available=True)
        unavailable_product_size = product_api.get_size_products(
            version_id, is_available=False)
        # processed_size = product_api.get_size_products(version_id, is_processed=True)

        if (available_product_size +
                unavailable_product_size) == total_product_size:
            return False

    except Exception as e:
        log.error(str(e))

    return True
Example #5
0
def start(rconn):
    global object_api
    global feature_api
    global product_api
    global version_id

    try:
        log.info("Start bl-object-index:1")

        object_api = Objects()
        feature_api = Features()
        product_api = Products()
        crawl_api = Crawls()
        file = os.path.join(os.getcwd(), INDEX_FILE)
        # index_file = load_index_file(file)

        while True:
            version_id = get_latest_crawl_version()
            if version_id is not None:
                log.info("check_condition_to_start")
                ok = check_condition_to_start(version_id)
                log.info("check_condition_to_start: " + str(ok))

                if ok is True:
                    index_file = None
                    reset_index(version_id)
                    # dispatch(rconn)
                    # prepare_objects_to_index(rconn, version_id)

                    if DATA_SOURCE == DATA_SOURCE_QUEUE:
                        load_from_queue(index_file)
                    elif DATA_SOURCE == DATA_SOURCE_DB:
                        load_from_db(index_file, version_id)

            time.sleep(60 * 10)
    except Exception as e:
        log.error(str(e))
Example #6
0
from __future__ import print_function
import time
import stylelens_product
from stylelens_product.crawls import Crawls
from pprint import pprint
# create an instance of the API class
api_instance = Crawls()

version_id = "11111j"

try:
    api_response = api_instance.get_crawls(version_id=version_id,
                                           status='todo')
    pprint(api_response)
except Exception as e:
    print("Exception when calling get_crawls: %s\n" % e)
Example #7
0
from __future__ import print_function
from stylelens_product.crawls import Crawls
from pprint import pprint
# create an instance of the API class
api_instance = Crawls()

version_id = "5a47ccfe4dfd7d90b84eb710"

try:
    api_response = api_instance.get_size_crawls(version_id=version_id)
    pprint(api_response)
except Exception as e:
    print("Exception when calling get_size_crawls: %s\n" % e)
Example #8
0
from __future__ import print_function
import time
import stylelens_product
from stylelens_product.crawls import Crawls
from pprint import pprint
# create an instance of the API class
api_instance = Crawls()

crawl = {}
crawl['host_code'] = "HCBOK1"
crawl['version_id'] = "11111j"


try:
    api_response = api_instance.add_crawl(crawl)
    pprint(api_response)
except Exception as e:
    print("Exception when calling add_crawl: %s\n" % e)
Example #9
0
REDIS_PASSWORD = os.environ['REDIS_PASSWORD']
RELEASE_MODE = os.environ['RELEASE_MODE']
DB_PRODUCT_HOST = os.environ['DB_PRODUCT_HOST']
DB_PRODUCT_PORT = os.environ['DB_PRODUCT_PORT']
DB_PRODUCT_USER = os.environ['DB_PRODUCT_USER']
DB_PRODUCT_PASSWORD = os.environ['DB_PRODUCT_PASSWORD']
DB_PRODUCT_NAME = os.environ['DB_PRODUCT_NAME']
rconn = redis.StrictRedis(REDIS_SERVER, port=6379, password=REDIS_PASSWORD)

options = {
  'REDIS_SERVER': REDIS_SERVER,
  'REDIS_PASSWORD': REDIS_PASSWORD
}
log = Logging(options, tag='bl-crawl')

crawl_api = Crawls()

def spawn_crawler(host_code, version_id):
  pool = spawning_pool.SpawningPool()
  id = host_code.lower()

  project_name = 'bl-crawler-' + id
  log.debug('spawn_crawler: ' + project_name)

  pool.setServerUrl(REDIS_SERVER)
  pool.setServerPassword(REDIS_PASSWORD)
  pool.setApiVersion('v1')
  pool.setKind('Pod')
  pool.setMetadataName(project_name)
  pool.setMetadataNamespace(RELEASE_MODE)
  pool.addMetadataLabel('name', project_name)