Exemple #1
0
def check_condition_to_start(version_id):
    global product_api

    product_api = Products()
    crawl_api = Crawls()
    model_api = Models()

    try:
        model = model_api.get_model(TEXT_CLASSIFICATION_MODEL_TYPE,
                                    version_id=version_id)
        if model is not None:
            return False

        log.info("check_condition_to_start")
        # Check if crawling process is done
        total_crawl_size = crawl_api.get_size_crawls(version_id)
        crawled_size = crawl_api.get_size_crawls(version_id, status='done')
        if total_crawl_size != crawled_size:
            return False

        total_product_size = product_api.get_size_products(version_id)
        processed_product_size = product_api.get_size_products(
            version_id, is_processed_for_text_class_model=True)
        not_processed_product_size = product_api.get_size_products(
            version_id, is_processed_for_text_class_model=False)

        if (processed_product_size +
                not_processed_product_size) == total_product_size:
            return False

    except Exception as e:
        log.error(str(e))

    return True
Exemple #2
0
def check_condition_to_start(version_id):
  global product_api

  product_api = Products()
  crawl_api = Crawls()

  try:
    log.info("check_condition_to_start")
    # Check if crawling process is done
    total_crawl_size = crawl_api.get_size_crawls(version_id)
    crawled_size = crawl_api.get_size_crawls(version_id, status='done')
    if total_crawl_size != crawled_size:
      return False

    queue_size = rconn.llen(REDIS_PRODUCT_TEXT_MODEL_PROCESS_QUEUE)
    if queue_size > 0:
      return False

    total_product_size = product_api.get_size_products(version_id)
    processed_product_size = product_api.get_size_products(version_id, is_processed_for_text_class_model=True)
    not_processed_product_size = product_api.get_size_products(version_id, is_processed_for_text_class_model=False)

    if (processed_product_size + not_processed_product_size) == total_product_size:
      return False

  except Exception as e:
    log.error(str(e))

  return True
Exemple #3
0
    def get_products_by_keyword(self, keyword, offset=0, limit=100):
        self.log.debug('get_products_by_keyword')
        product_api = Products()
        try:
            total_count = product_api.get_products_count_by_keyword(keyword)
        except Exception as e:
            self.log.error(
                "Exception when calling get_products_count_by_keyword: %s\n" %
                e)

        try:
            products = product_api.get_products_by_keyword(keyword,
                                                           only_text=False,
                                                           offset=offset,
                                                           limit=limit)
        except Exception as e:
            self.log.error(
                "Exception when calling get_products_by_keyword: %s\n" % e)

        return total_count, products
Exemple #4
0
def check_condition_to_start(version_id):
    global product_api

    product_api = Products()

    try:
        # Check Classifying processing process is done
        total_product_size = product_api.get_size_products(version_id)
        classified_size = product_api.get_size_products(version_id,
                                                        is_classified=True)
        if total_product_size != classified_size:
            return False

        # Check Object classifying process is done
        queue_size = rconn.llen(REDIS_OBJECT_INDEX_QUEUE)
        if queue_size != 0:
            return False

    except Exception as e:
        log.error(str(e))

    return True
Exemple #5
0
def check_condition_to_start(version_id):
  global product_api

  product_api = Products()
  crawl_api = Crawls()

  try:
    # Check Object classifying process is done
    queue_size = rconn.llen(REDIS_PRODUCT_CLASSIFY_QUEUE)
    if queue_size != 0:
      return False

    # Check Crawling process is done
    total_crawl_size = crawl_api.get_size_crawls(version_id)
    crawled_size = crawl_api.get_size_crawls(version_id, status='done')
    if total_crawl_size != crawled_size:
      return False

    # Check if all images are processed
    total_product_size = product_api.get_size_products(version_id)
    available_product_size = product_api.get_size_products(version_id, is_available=True)
    unavailable_product_size = product_api.get_size_products(version_id, is_available=False)
    # processed_size = product_api.get_size_products(version_id, is_processed=True)

    if (available_product_size + unavailable_product_size) != total_product_size:
      return False

    # Check Classifying processing process is done
    classified_size = product_api.get_size_products(version_id, is_classified=True)
    not_classified_size = product_api.get_size_products(version_id, is_classified=False)
    if (classified_size + not_classified_size) == total_product_size:
      return False


  except Exception as e:
    log.error(str(e))

  return True
Exemple #6
0
def start(rconn):
    global object_api
    global feature_api
    global product_api
    global version_id

    try:
        log.info("Start bl-object-index:1")

        object_api = Objects()
        feature_api = Features()
        product_api = Products()
        crawl_api = Crawls()
        file = os.path.join(os.getcwd(), INDEX_FILE)
        # index_file = load_index_file(file)

        while True:
            version_id = get_latest_crawl_version()
            if version_id is not None:
                log.info("check_condition_to_start")
                ok = check_condition_to_start(version_id)
                log.info("check_condition_to_start: " + str(ok))

                if ok is True:
                    index_file = None
                    reset_index(version_id)
                    # dispatch(rconn)
                    # prepare_objects_to_index(rconn, version_id)

                    if DATA_SOURCE == DATA_SOURCE_QUEUE:
                        load_from_queue(index_file)
                    elif DATA_SOURCE == DATA_SOURCE_DB:
                        load_from_db(index_file, version_id)

            time.sleep(60 * 10)
    except Exception as e:
        log.error(str(e))
Exemple #7
0
def check_condition_to_start(version_id):
    global product_api

    product_api = Products()
    crawl_api = Crawls()

    try:
        log.info("check_condition_to_start")

        # Check if image processing queue is empty
        queue_size = rconn.llen(REDIS_PRODUCT_IMAGE_PROCESS_QUEUE)
        if queue_size != 0:
            return False

        # Check if crawling process is done
        total_crawl_size = crawl_api.get_size_crawls(version_id)
        crawled_size = crawl_api.get_size_crawls(version_id, status='done')
        if total_crawl_size != crawled_size:
            return False

        # Check if all images are processed
        total_product_size = product_api.get_size_products(version_id)
        available_product_size = product_api.get_size_products(
            version_id, is_available=True)
        unavailable_product_size = product_api.get_size_products(
            version_id, is_available=False)
        # processed_size = product_api.get_size_products(version_id, is_processed=True)

        if (available_product_size +
                unavailable_product_size) == total_product_size:
            return False

    except Exception as e:
        log.error(str(e))

    return True
Exemple #8
0
REDIS_PRODUCT_IMAGE_PROCESS_QUEUE = 'bl:product:image:process:queue'
REDIS_CRAWL_VERSION = 'bl:crawl:version'
REDIS_CRAWL_VERSION_LATEST = 'latest'

options = {
  'REDIS_SERVER': REDIS_SERVER,
  'REDIS_PASSWORD': REDIS_PASSWORD
}
log = Logging(options, tag='bl-object-classifier')
rconn = redis.StrictRedis(REDIS_SERVER, decode_responses=False, port=6379, password=REDIS_PASSWORD)

storage = s3.S3(AWS_ACCESS_KEY, AWS_SECRET_ACCESS_KEY)

heart_bit = True

product_api = Products()
object_api = Objects()
image_api = Images()
version_id = None

def analyze_product(p_data):
  log.info('analyze_product')
  product = pickle.loads(p_data)

  try:
    main_class_code, main_objects = analyze_main_image(product)
  except Exception as e:
    log.error('analyze_product:' + str(e))
    delete_product_from_db(str(product['_id']))
    return
Exemple #9
0
from __future__ import print_function
from stylelens_product.products import Products
from pprint import pprint
api_instance = Products()

try:
    # api_response = api_instance.get_products_by_keyword('Coat', only_text=True, is_processed_for_text_class_model=False, offset=0, limit=100)
    # pprint(api_response)

    keyword = 'coat'
    test_str = 'coating'

    offset = 0
    limit = 100

    while True:
        api_response = api_instance.get_products_by_keyword(
            keyword,
            only_text=True,
            is_processed_for_text_class_model=False,
            offset=offset,
            limit=limit)

        # pprint(api_response)

        for res in api_response:
            name = res.get('name')
            if test_str in name:
                pprint(test_str + ' in keyword: ' + keyword)
                pprint(name)
                pprint(res.get('cate'))
Exemple #10
0
def crawl(host_code, version_id):
    global product_api
    product_api = Products()
    options = {}
    log.setTag('bl-crawler-' + SPAWN_ID)
    log.debug('start crawl')
    options['host_code'] = host_code

    crawler = StylensCrawler(options)

    try:
        if crawler.start() == True:
            items = crawler.get_items()

            for item in items:
                product = {}
                product['name'] = item['name']
                product['host_url'] = item['host_url']
                product['host_code'] = item['host_code']
                product['host_name'] = item['host_name']
                product['product_no'] = item['product_no']
                product['main_image'] = item['main_image']
                product['sub_images'] = item['sub_images']

                try:
                    res = product_api.update_product_by_hostcode_and_productno(
                        product)
                    product['version_id'] = version_id
                    product['product_url'] = item['product_url']
                    product['tags'] = item['tags']
                    product['price'] = item['price']
                    product['currency_unit'] = item['currency_unit']
                    product['nation'] = item['nation']
                    product['cate'] = item['cate']
                    product['sale_price'] = item['sale_price']
                    product['related_product'] = item['related_product']
                    product['thumbnail'] = item['thumbnail']

                    if 'upserted' in res:
                        product_id = str(res['upserted'])
                        log.debug("Created a product: " + product_id)
                        product['is_processed'] = False
                        update_product_by_id(product_id, product)
                    elif res['nModified'] > 0:
                        log.debug("Existing product is updated: product_no:" +
                                  product['product_no'])
                        product['is_processed'] = False
                        update_product_by_hostcode_and_productno(product)
                    else:
                        log.debug("The product is same")
                        product['is_processed'] = True
                        update_product_by_hostcode_and_productno(product)
                except Exception as e:
                    log.error(
                        "Exception when calling ProductApi->update_product_by_hostcode_and_productno: %s\n"
                        % e)
                    # delete_pod()

    except Exception as e:
        log.error("host_code:" + host_code + 'error: ' + str(e))
        delete_pod()

    notify_to_classify(host_code)
    delete_pod()