Ejemplo n.º 1
0
def check_condition_to_start(version_id):
  global product_api

  product_api = Products()
  crawl_api = Crawls()

  try:
    log.info("check_condition_to_start")
    # Check if crawling process is done
    total_crawl_size = crawl_api.get_size_crawls(version_id)
    crawled_size = crawl_api.get_size_crawls(version_id, status='done')
    if total_crawl_size != crawled_size:
      return False

    queue_size = rconn.llen(REDIS_PRODUCT_TEXT_MODEL_PROCESS_QUEUE)
    if queue_size > 0:
      return False

    total_product_size = product_api.get_size_products(version_id)
    processed_product_size = product_api.get_size_products(version_id, is_processed_for_text_class_model=True)
    not_processed_product_size = product_api.get_size_products(version_id, is_processed_for_text_class_model=False)

    if (processed_product_size + not_processed_product_size) == total_product_size:
      return False

  except Exception as e:
    log.error(str(e))

  return True
Ejemplo n.º 2
0
def check_condition_to_start(version_id):
    global product_api

    product_api = Products()
    crawl_api = Crawls()
    model_api = Models()

    try:
        model = model_api.get_model(TEXT_CLASSIFICATION_MODEL_TYPE,
                                    version_id=version_id)
        if model is not None:
            return False

        log.info("check_condition_to_start")
        # Check if crawling process is done
        total_crawl_size = crawl_api.get_size_crawls(version_id)
        crawled_size = crawl_api.get_size_crawls(version_id, status='done')
        if total_crawl_size != crawled_size:
            return False

        total_product_size = product_api.get_size_products(version_id)
        processed_product_size = product_api.get_size_products(
            version_id, is_processed_for_text_class_model=True)
        not_processed_product_size = product_api.get_size_products(
            version_id, is_processed_for_text_class_model=False)

        if (processed_product_size +
                not_processed_product_size) == total_product_size:
            return False

    except Exception as e:
        log.error(str(e))

    return True
Ejemplo n.º 3
0
    def get_products_by_keyword(self, keyword, offset=0, limit=100):
        self.log.debug('get_products_by_keyword')
        product_api = Products()
        try:
            total_count = product_api.get_products_count_by_keyword(keyword)
        except Exception as e:
            self.log.error(
                "Exception when calling get_products_count_by_keyword: %s\n" %
                e)

        try:
            products = product_api.get_products_by_keyword(keyword,
                                                           only_text=False,
                                                           offset=offset,
                                                           limit=limit)
        except Exception as e:
            self.log.error(
                "Exception when calling get_products_by_keyword: %s\n" % e)

        return total_count, products
Ejemplo n.º 4
0
def check_condition_to_start(version_id):
    global product_api

    product_api = Products()

    try:
        # Check Classifying processing process is done
        total_product_size = product_api.get_size_products(version_id)
        classified_size = product_api.get_size_products(version_id,
                                                        is_classified=True)
        if total_product_size != classified_size:
            return False

        # Check Object classifying process is done
        queue_size = rconn.llen(REDIS_OBJECT_INDEX_QUEUE)
        if queue_size != 0:
            return False

    except Exception as e:
        log.error(str(e))

    return True
Ejemplo n.º 5
0
def check_condition_to_start(version_id):
    global product_api

    product_api = Products()
    crawl_api = Crawls()

    try:
        log.info("check_condition_to_start")

        # Check if image processing queue is empty
        queue_size = rconn.llen(REDIS_PRODUCT_IMAGE_PROCESS_QUEUE)
        if queue_size != 0:
            return False

        # Check if crawling process is done
        total_crawl_size = crawl_api.get_size_crawls(version_id)
        crawled_size = crawl_api.get_size_crawls(version_id, status='done')
        if total_crawl_size != crawled_size:
            return False

        # Check if all images are processed
        total_product_size = product_api.get_size_products(version_id)
        available_product_size = product_api.get_size_products(
            version_id, is_available=True)
        unavailable_product_size = product_api.get_size_products(
            version_id, is_available=False)
        # processed_size = product_api.get_size_products(version_id, is_processed=True)

        if (available_product_size +
                unavailable_product_size) == total_product_size:
            return False

    except Exception as e:
        log.error(str(e))

    return True
Ejemplo n.º 6
0
def start(rconn):
    global object_api
    global feature_api
    global product_api
    global version_id

    try:
        log.info("Start bl-object-index:1")

        object_api = Objects()
        feature_api = Features()
        product_api = Products()
        crawl_api = Crawls()
        file = os.path.join(os.getcwd(), INDEX_FILE)
        # index_file = load_index_file(file)

        while True:
            version_id = get_latest_crawl_version()
            if version_id is not None:
                log.info("check_condition_to_start")
                ok = check_condition_to_start(version_id)
                log.info("check_condition_to_start: " + str(ok))

                if ok is True:
                    index_file = None
                    reset_index(version_id)
                    # dispatch(rconn)
                    # prepare_objects_to_index(rconn, version_id)

                    if DATA_SOURCE == DATA_SOURCE_QUEUE:
                        load_from_queue(index_file)
                    elif DATA_SOURCE == DATA_SOURCE_DB:
                        load_from_db(index_file, version_id)

            time.sleep(60 * 10)
    except Exception as e:
        log.error(str(e))
Ejemplo n.º 7
0
def check_condition_to_start(version_id):
  global product_api

  product_api = Products()
  crawl_api = Crawls()

  try:
    # Check Object classifying process is done
    queue_size = rconn.llen(REDIS_PRODUCT_CLASSIFY_QUEUE)
    if queue_size != 0:
      return False

    # Check Crawling process is done
    total_crawl_size = crawl_api.get_size_crawls(version_id)
    crawled_size = crawl_api.get_size_crawls(version_id, status='done')
    if total_crawl_size != crawled_size:
      return False

    # Check if all images are processed
    total_product_size = product_api.get_size_products(version_id)
    available_product_size = product_api.get_size_products(version_id, is_available=True)
    unavailable_product_size = product_api.get_size_products(version_id, is_available=False)
    # processed_size = product_api.get_size_products(version_id, is_processed=True)

    if (available_product_size + unavailable_product_size) != total_product_size:
      return False

    # Check Classifying processing process is done
    classified_size = product_api.get_size_products(version_id, is_classified=True)
    not_classified_size = product_api.get_size_products(version_id, is_classified=False)
    if (classified_size + not_classified_size) == total_product_size:
      return False


  except Exception as e:
    log.error(str(e))

  return True
Ejemplo n.º 8
0
from __future__ import print_function
from stylelens_product.products import Products
from bson import ObjectId
from pprint import pprint
api_instance = Products()

products = []

product1 = {}

product1['_id'] = ObjectId("5a6715e936b7a0be34e7d728")
product1['is_processed_for_text_class_model'] = True

product2 = {}

product2['_id'] = ObjectId("5a6715e936b7a0be34e7d72b")
product2['is_processed_for_text_class_model'] = True

products.append(product1)
products.append(product2)

try:
    # Added a new Product
    api_response = api_instance.update_products(products)
    pprint(api_response)
except Exception as e:
    print("Exception when calling update_products: %s\n" % e)
Ejemplo n.º 9
0
from __future__ import print_function
import time
from stylelens_product.products import Products
from pprint import pprint
# create an instance of the API class
api_instance = Products()

try:
    res = api_instance.get_product_by_id("5a4e3f914dfd7d90b888598b")
    pprint(res)
except Exception as e:
    print("Exception when calling get_product_by_id: %s\n" % e)
Ejemplo n.º 10
0
from __future__ import print_function
import time
from stylelens_product.products import Products
from pprint import pprint
# create an instance of the API class
api_instance = Products()

try:
    api_response = api_instance.get_text_by_keyword("민소매")
    pprint(api_response)
except Exception as e:
    print("Exception when calling get_text_by_keyword: %s\n" % e)
from __future__ import print_function
from stylelens_product.products import Products
from pprint import pprint
# create an instance of the API class
api_instance = Products()

try:
    api_response = api_instance.reset_product_as_available()
    pprint(api_response)
except Exception as e:
    print("Exception when calling add_product: %s\n" % e)
Ejemplo n.º 12
0
from __future__ import print_function
from stylelens_product.products import Products
from pprint import pprint
# create an instance of the API class
api_instance = Products()

try:
    api_response = api_instance.delete_product('5a4dadfd4dfd7d90b8838fbf')
    pprint(api_response)
except Exception as e:
    print("Exception when calling add_product: %s\n" % e)
Ejemplo n.º 13
0
from __future__ import print_function
from stylelens_product.products import Products
from pprint import pprint
api_instance = Products()

try:
    # Added a new Product
    api_response = api_instance.get_products_count_by_keyword('재킷')
    pprint(api_response)
except Exception as e:
    print("Exception when calling ProductApi->get_products_count_by_keyword: %s\n" % e)
Ejemplo n.º 14
0
from __future__ import print_function
from stylelens_product.products import Products
from pprint import pprint
# create an instance of the API class
api_instance = Products()

product = {}
product['host_code'] = 'HC1'
product['product_no'] = 'lll2'
product['version_id'] = '1'

try:
    api_response = api_instance.update_product_by_id(
        "5a3a32a94dfd7d90b88e2a84", product)
    pprint(api_response)
except Exception as e:
    print("Exception when calling add_product: %s\n" % e)
Ejemplo n.º 15
0
from __future__ import print_function
from stylelens_product.products import Products
from pprint import pprint
# create an instance of the API class
api_instance = Products()

try:
    api_response = api_instance.reset_product_as_not_object_classified()
    pprint(api_response)
except Exception as e:
    print("Exception when calling add_product: %s\n" % e)
Ejemplo n.º 16
0
from __future__ import print_function
from stylelens_product.products import Products
from pprint import pprint
api_instance = Products()

try:
    # api_response = api_instance.get_products_by_keyword('Coat', only_text=True, is_processed_for_text_class_model=False, offset=0, limit=100)
    # pprint(api_response)

    keyword = 'coat'
    test_str = 'coating'

    offset = 0
    limit = 100

    while True:
        api_response = api_instance.get_products_by_keyword(
            keyword,
            only_text=True,
            is_processed_for_text_class_model=False,
            offset=offset,
            limit=limit)

        # pprint(api_response)

        for res in api_response:
            name = res.get('name')
            if test_str in name:
                pprint(test_str + ' in keyword: ' + keyword)
                pprint(name)
                pprint(res.get('cate'))
Ejemplo n.º 17
0
from __future__ import print_function
import time
import stylelens_product
from stylelens_product.products import Products
from pprint import pprint
# create an instance of the API class
api_instance = Products()

try:
    api_response = api_instance.get_products_by_hostcode_and_version_id(
        host_code="HC0", version_id='1', offset=0, limit=10)
    pprint(api_response)
except Exception as e:
    print(
        "Exception when calling ProductApi->get_products_by_hostcode_and_version_id: %s\n"
        % e)
Ejemplo n.º 18
0
from __future__ import print_function
import time
from stylelens_product.products import Products
from pprint import pprint
# create an instance of the API class
api_instance = Products()

try:
    api_response = api_instance.get_products_by_version_id(
        '5a47ccfe4dfd7d90b84eb710')
    pprint(api_response)
    api_response = api_instance.get_products_by_version_id(
        '5a47ccfe4dfd7d90b84eb710', is_classified=False, is_processed=True)
    pprint(api_response)
except Exception as e:
    print("Exception when calling get_products_by_version_id: %s\n" % e)
Ejemplo n.º 19
0
from __future__ import print_function
from stylelens_product.products import Products
from pprint import pprint

api_instance = Products()

product = {}
product['host_code'] = 'HC1'
product['product_no'] = 'lll4'
product['price'] = '1'

try:
    api_response = api_instance.update_product_by_hostcode_and_productno(product)
    pprint(api_response)
except Exception as e:
    print("Exception when calling ProductApi->update_product_by_id: %s\n" % e)
Ejemplo n.º 20
0
from __future__ import print_function
import time
import stylelens_product
from stylelens_product.products import Products
from pprint import pprint
# create an instance of the API class
api_instance = Products()

try:
    api_response = api_instance.delete_old_products(
        version_id="5a7809f1567d6814379f9203")
    pprint(api_response)
except Exception as e:
    print("Exception when calling delete_old_products: %s\n" % e)
Ejemplo n.º 21
0
from __future__ import print_function
from stylelens_product.products import Products
from pprint import pprint
# create an instance of the API class
api_instance = Products()

try:
    api_response = api_instance.reset_product_is_classified_for_text()
    pprint(api_response)
except Exception as e:
    print("Exception when calling reset_product_is_classified_for_text: %s\n" %
          e)
Ejemplo n.º 22
0
from __future__ import print_function
from stylelens_product.hosts import Hosts
from stylelens_product.products import Products
from pprint import pprint
# create an instance of the API class
host_api = Hosts()
product_api = Products()

try:
    hosts = host_api.get_hosts()
    for host in hosts:
        products = product_api.get_products_by_hostcode_and_version_id(
            host['host_code'], offset=100, limit=1)
        for p in products:
            print('host_code: ' + p['host_code'])
            print('name     : ' + p['name'])
            print('cate     : ' + str(p['cate']))
            print('tags     : ' + str(p['tags']))
            print('_________')

except Exception as e:
    print("Exception when calling get_hosts: %s\n" % e)
Ejemplo n.º 23
0
from __future__ import print_function
from stylelens_product.products import Products
from pprint import pprint
# create an instance of the API class
api_instance = Products()

product = {}
product['host_code'] = 'HC1'
product['product_no'] = 'sss2'
product['version_id'] = '1'

try:
  api_response = api_instance.add_product(product)
  pprint(api_response)
except Exception as e:
  print("Exception when calling add_product: %s\n" % e)
Ejemplo n.º 24
0
from __future__ import print_function
import time
import stylelens_product
from stylelens_product.products import Products
from pprint import pprint
# create an instance of the API class
api_instance = Products()

version_id = "5a4dacce4dfd7d90b8837719"

try:
    api_response = api_instance.get_size_products(version_id=version_id)
    pprint('total products: ' + str(api_response))
    api_response = api_instance.get_size_products(version_id=version_id,
                                                  is_processed=True)
    pprint('Processed products: ' + str(api_response))
    api_response = api_instance.get_size_products(version_id=version_id,
                                                  is_classified=True)
    pprint('Classified products: ' + str(api_response))
except Exception as e:
    print("Exception when calling get_size_products: %s\n" % e)
Ejemplo n.º 25
0
def crawl(host_code, version_id):
    global product_api
    product_api = Products()
    options = {}
    log.setTag('bl-crawler-' + SPAWN_ID)
    log.debug('start crawl')
    options['host_code'] = host_code

    crawler = StylensCrawler(options)

    try:
        if crawler.start() == True:
            items = crawler.get_items()

            for item in items:
                product = {}
                product['name'] = item['name']
                product['host_url'] = item['host_url']
                product['host_code'] = item['host_code']
                product['host_name'] = item['host_name']
                product['product_no'] = item['product_no']
                product['main_image'] = item['main_image']
                product['sub_images'] = item['sub_images']

                try:
                    res = product_api.update_product_by_hostcode_and_productno(
                        product)
                    product['version_id'] = version_id
                    product['product_url'] = item['product_url']
                    product['tags'] = item['tags']
                    product['price'] = item['price']
                    product['currency_unit'] = item['currency_unit']
                    product['nation'] = item['nation']
                    product['cate'] = item['cate']
                    product['sale_price'] = item['sale_price']
                    product['related_product'] = item['related_product']
                    product['thumbnail'] = item['thumbnail']

                    if 'upserted' in res:
                        product_id = str(res['upserted'])
                        log.debug("Created a product: " + product_id)
                        product['is_processed'] = False
                        update_product_by_id(product_id, product)
                    elif res['nModified'] > 0:
                        log.debug("Existing product is updated: product_no:" +
                                  product['product_no'])
                        product['is_processed'] = False
                        update_product_by_hostcode_and_productno(product)
                    else:
                        log.debug("The product is same")
                        product['is_processed'] = True
                        update_product_by_hostcode_and_productno(product)
                except Exception as e:
                    log.error(
                        "Exception when calling ProductApi->update_product_by_hostcode_and_productno: %s\n"
                        % e)
                    # delete_pod()

    except Exception as e:
        log.error("host_code:" + host_code + 'error: ' + str(e))
        delete_pod()

    notify_to_classify(host_code)
    delete_pod()
Ejemplo n.º 26
0
from __future__ import print_function
import time
import stylelens_product
from stylelens_product.products import Products
from pprint import pprint
# create an instance of the API class
api_instance = Products()

version_id = "5a47a2214dfd7d90b8355169"

try:
    api_response = api_instance.get_size_not_classified(version_id=version_id)
    pprint(api_response)
except Exception as e:
    print("Exception when calling get_size_not_classified: %s\n" % e)
Ejemplo n.º 27
0
REDIS_PRODUCT_IMAGE_PROCESS_QUEUE = 'bl:product:image:process:queue'
REDIS_CRAWL_VERSION = 'bl:crawl:version'
REDIS_CRAWL_VERSION_LATEST = 'latest'

options = {
  'REDIS_SERVER': REDIS_SERVER,
  'REDIS_PASSWORD': REDIS_PASSWORD
}
log = Logging(options, tag='bl-object-classifier')
rconn = redis.StrictRedis(REDIS_SERVER, decode_responses=False, port=6379, password=REDIS_PASSWORD)

storage = s3.S3(AWS_ACCESS_KEY, AWS_SECRET_ACCESS_KEY)

heart_bit = True

product_api = Products()
object_api = Objects()
image_api = Images()
version_id = None

def analyze_product(p_data):
  log.info('analyze_product')
  product = pickle.loads(p_data)

  try:
    main_class_code, main_objects = analyze_main_image(product)
  except Exception as e:
    log.error('analyze_product:' + str(e))
    delete_product_from_db(str(product['_id']))
    return
from __future__ import print_function
from stylelens_product.products import Products
from pprint import pprint
# create an instance of the API class
api_instance = Products()

try:
    api_response = api_instance.reset_product_is_processed_for_text_class_model(
    )
    pprint(api_response)
except Exception as e:
    print("Exception when calling add_product: %s\n" % e)