def clear_dbs(version_id): global product_api try: res = product_api.reset_product_as_not_object_classified(version_id=version_id) except Exception as e: log.error(str(e)) object_api = Objects() try: res = object_api.delete_all() except Exception as e: log.error(str(e)) try: res = object_api.reset_index(version_id) except Exception as e: log.error(str(e)) try: res = object_api.reset_image_index(version_id) except Exception as e: log.error(str(e)) feature_api = Features() try: res = feature_api.delete_all() except Exception as e: log.error(str(e)) image_api = Images() try: res = image_api.delete_all() except Exception as e: log.error(str(e))
def __init__(self, log): log.info('init') self.image_feature = feature_extract.ExtractFeature(use_gpu=True) self.log = log self.vector_search = VectorSearch() self.object_detector = ObjectDetector() self.object_api = Objects() self.image_api = Images() self.index_image_api = IndexImages()
def start(rconn): global object_api global feature_api global product_api global version_id try: log.info("Start bl-object-index:1") object_api = Objects() feature_api = Features() product_api = Products() crawl_api = Crawls() file = os.path.join(os.getcwd(), INDEX_FILE) # index_file = load_index_file(file) while True: version_id = get_latest_crawl_version() if version_id is not None: log.info("check_condition_to_start") ok = check_condition_to_start(version_id) log.info("check_condition_to_start: " + str(ok)) if ok is True: index_file = None reset_index(version_id) # dispatch(rconn) # prepare_objects_to_index(rconn, version_id) if DATA_SOURCE == DATA_SOURCE_QUEUE: load_from_queue(index_file) elif DATA_SOURCE == DATA_SOURCE_DB: load_from_db(index_file, version_id) time.sleep(60 * 10) except Exception as e: log.error(str(e))
from __future__ import print_function from stylelens_object.objects import Objects from pprint import pprint # create an instance of the API class api_instance = Objects() try: api_response = api_instance.get_object_ids("5a4e3dc74dfd7d90b8885411", is_indexed=True, image_indexed=False) pprint(api_response) except Exception as e: print("Exception when calling get_object_ids: %s\n" % e)
from __future__ import print_function from stylelens_object.objects import Objects from pprint import pprint # create an instance of the API class api_instance = Objects() version_id = "5a49b8e54dfd7d90b8786df8" try: api_response = api_instance.get_objects_with_null_feature( version_id=version_id) pprint(api_response) except Exception as e: print("Exception when calling get_objects_with_null_feature: %s\n" % e)
from __future__ import print_function from stylelens_object.objects import Objects from pprint import pprint # create an instance of the API class api_instance = Objects() try: api_response = api_instance.reset_index() pprint(api_response) except Exception as e: print("Exception when calling ProductApi->update_objects: %s\n" % e)
class Search: def __init__(self, log): log.info('init') self.image_feature = feature_extract.ExtractFeature(use_gpu=True) self.log = log self.vector_search = VectorSearch() self.object_detector = ObjectDetector() self.object_api = Objects() self.image_api = Images() self.index_image_api = IndexImages() def get_images_by_object_vector(self, vector, offset=0, limit=10): return self.get_images_by_vector(vector, offset=offset, limit=limit) def search_image_file(self, file, offset=0, limit=5): feature = self.extract_feature(file) return self.get_images_by_vector(feature, offset=offset, limit=limit) def search_image_data(self, image_data, offset=0, limit=10): start_time = time.time() im = Image.open(io.BytesIO(image_data)) size = 380, 380 im.thumbnail(size, Image.ANTIALIAS) # im.show() file_name = str(uuid.uuid4()) + '.jpg' im.save(file_name) feature = self.extract_feature(file_name) print(feature.dtype) elapsed_time = time.time() - start_time self.log.info('search_image time: ' + str(elapsed_time)) return self.get_images_by_vector(feature, offset, limit) def get_images_by_vector(self, vector, offset=0, limit=5): try: # Query to search vector start_time = time.time() vector_d, vector_i = self.vector_search.search(vector, limit) distances = np.fromstring(vector_d, dtype=np.float32) ids = np.fromstring(vector_i, dtype=np.int) elapsed_time = time.time() - start_time self.log.debug('vector search time: ' + str(elapsed_time)) # pprint(api_response) except Exception as e: self.log.error( "Exception when calling SearchApi->search_vector: %s\n" % e) arr_i = [] i = 0 for d in distances: print(d) if d <= VECTOR_SIMILARITY_THRESHHOLD: if i < limit: arr_i.append(ids[i]) else: break i = i + 1 if len(arr_i) > 0: ids = [int(x) for x in arr_i] try: start_time = time.time() objects = self.object_api.get_objects_by_indexes(ids) elapsed_time = time.time() - start_time print(elapsed_time) images = self.get_images_from_objects(objects) return images except Exception as e: self.log.error('Trying Objects.get_objects_by_indexes():' + str(e)) return None # obj_ids = self.get_object_ids(arr_i) # prod_ids = self.get_image_ids(obj_ids) # if len(arr_i) > 5: # # Using MongoDB # products_info = self.get_products_from_db(prod_ids, offset=offset, limit=limit) # else: # # Using Redis # products_info = self.get_image_info(prod_ids, offset=offset, limit=limit) return None def get_images_from_objects(self, objects): limit = 10 image_ids = [] for obj in objects: image_ids.append(obj['image_id']) ids = list(set(image_ids)) try: start_time = time.time() _images = self.image_api.get_images_by_ids(ids) elapsed_time = time.time() - start_time print(elapsed_time) images = [] for image in _images: image['id'] = str(image.pop('_id')) image.pop('images', None) images.append(image) return images except Exception as e: self.log.error(str(e)) return None return images def get_products_from_db(self, ids, offset=0, limit=5): self.log.debug('get_products_from_db') start_time = time.time() product_api = None # product_api = stylelens_product.ProductApi() try: api_response = product_api.get_products_by_ids(ids) except Exception as e: self.log.error( "Exception when calling ProductApi->get_products_by_ids: %s\n" % e) elapsed_time = time.time() - start_time self.log.debug('get_products_from_db time: ' + str(elapsed_time)) products_info = [] for id in ids: i = id.decode('utf-8') for p in api_response.data: if i == p.id: products_info.append(p.to_dict()) break return products_info def get_object_ids(self, ids): obj_ids = [] for i in ids: id = rconn.lindex(REDIS_INDEXED_OBJECT_LIST, i - 1) obj_ids.append(id.decode('utf-8')) self.log.debug(obj_ids) return obj_ids def get_image_ids(self, ids): self.log.debug('get_product_ids' + str(ids)) product_ids = rconn.hmget(REDIS_INDEXED_IMAGE_HASH, ids) # self.log.debug(product_ids) return product_ids def get_image_info(self, ids, offset=0, limit=5): self.log.debug('get_product_info' + str(ids)) products = [] products_info = rconn.hmget(REDIS_PRODUCT_HASH, ids) i = 0 for p in products_info: if i < limit: product = pickle.loads(p) product['sub_images'] = None product['sub_images_mobile'] = None products.append(product) i = i + 1 return products def allowed_file(self, filename): return '.' in filename and \ filename.rsplit('.', 1)[1] in ALLOWED_EXTENSIONS def extract_feature(self, file): feature_vector = self.image_feature.extract_feature(file) # feature = np.fromstring(feature_vector, dtype=np.float32) return feature_vector def get_product_info(self, index): start_time = time.time() obj_id = rconn.lindex(REDIS_KEY_OBJECT_LIST, index - 1) obj_id = obj_id.decode('utf-8') # self.log.debug(obj_id) product_id = rconn.hget(REDIS_OBJECT_HASH, obj_id) # self.log.debug(product_id) product_id = product_id.decode('utf-8') # self.log.debug(product_id) product = rconn.hget(REDIS_PRODUCT_HASH, product_id) # self.log.debug(product) product = pickle.loads(product) # self.log.debug('get_product_info: done') elapsed_time = time.time() - start_time # self.log.info('get_product_info time: ' + str(elapsed_time)) return product def get_objects(self, image_file, products_offset=0, products_limit=5): start_time = time.time() objects = self.object_detector.getObjects(file=image_file) boxes_array = [] objects_array = [] feature = [] best_score = -1 best_score_index = 0 i = 0 for object in objects: obj = {} box_object = BoxObject() box_object.class_name = object.class_name obj['class_name'] = object.class_name box_object.class_code = object.class_code obj['class_code'] = object.class_code box_object.score = object.score obj['score'] = object.score if best_score_index < object.score: best_score_index = object.score best_score = object.score best_score_index = i # arr = np.fromstring(object.feature, dtype=np.float32) # feature = arr # self.log.debug(object.class_name) # self.log.debug(object.class_code) # self.log.debug(object.location) box = Box() box_dic = {} box.left = object.location.left box_dic['left'] = object.location.left box.right = object.location.right box_dic['right'] = object.location.right box.top = object.location.top box_dic['top'] = object.location.top box.bottom = object.location.bottom box_dic['bottom'] = object.location.bottom box_object.box = box obj['box'] = box_dic obj['feature'] = object.feature # self.log.debug(box) boxes_array.append(box_object) objects_array.append(obj) i = i + 1 if best_score == -1: box_object = BoxObject() box_object.class_name = 'na' box_object.class_code = 'na' box_object.score = '-1' box = Box() box.left = -1 box.right = -1 box.top = -1 box.bottom = -1 box_object.box = box boxes_array.append(box_object) # products = self.search_image_data(image_data, offset=products_offset, limit=products_limit) # else: # images = self.get_images_by_vector(object.feature, offset=products_offset, limit=products_limit) local_start_time = time.time() elapsed_time = time.time() - local_start_time self.log.info('query_feature time: ' + str(elapsed_time)) # boxes_array[best_score_index].images = images elapsed_time = time.time() - start_time self.log.info('get_objects time: ' + str(elapsed_time)) return boxes_array, objects_array def get_indexed_image(self, image_id): try: image = self.index_image_api.get_image(image_id) except Exception as e: self.log.error(str(e)) return None return image def get_products_by_product_id(self, product_id, offset=0, limit=5): if rconn.hexists(REDIS_PRODUCTS_BY_PRODUCT_HASH, product_id): products = rconn.hget(REDIS_PRODUCTS_BY_PRODUCT_HASH, product_id) products = pickle.loads(products) return products[offset:limit] else: product = rconn.hget(REDIS_PRODUCT_HASH, product_id) product = pickle.loads(product) try: f = urllib.request.urlopen(product['main_image_mobile_full']) except Exception as e: self.log.error(str(e)) boxes = self.get_objects(f, limit) for box in boxes: if box.products: rconn.hset(REDIS_PRODUCTS_BY_PRODUCT_HASH, product_id, pickle.dumps(box.products)) return box.products return {} def get_products_by_keyword(self, keyword, offset=0, limit=100): self.log.debug('get_products_by_keyword') product_api = Products() try: total_count = product_api.get_products_count_by_keyword(keyword) except Exception as e: self.log.error( "Exception when calling get_products_count_by_keyword: %s\n" % e) try: products = product_api.get_products_by_keyword(keyword, only_text=False, offset=offset, limit=limit) except Exception as e: self.log.error( "Exception when calling get_products_by_keyword: %s\n" % e) return total_count, products
from __future__ import print_function from stylelens_object.objects import Objects from pprint import pprint # create an instance of the API class api_instance = Objects() try: api_response = api_instance.get_objects("5a50d4ba4dfd7d90b8b9369a", # is_indexed=True, sort_key='index', sort_order=1, is_main=True ) pprint(api_response) except Exception as e: print("Exception when calling get_objects: %s\n" % e)
from __future__ import print_function from stylelens_object.objects import Objects from pprint import pprint # create an instance of the API class api_instance = Objects() version_id = "5a70779890e0881036849e95" try: api_response = api_instance.get_size_objects(version_id=version_id) pprint('total objects: ' + str(api_response)) api_response = api_instance.get_size_objects(version_id=version_id, is_indexed=True) pprint('indexed objects: ' + str(api_response)) api_response = api_instance.get_size_objects(version_id=version_id, is_indexed=False) pprint('Not indexed objects: ' + str(api_response)) api_response = api_instance.get_size_objects(version_id=version_id, is_indexed=True, image_indexed=True) pprint('Not indexed objects: ' + str(api_response)) api_response = api_instance.get_size_objects(version_id=version_id, is_indexed=True, image_indexed=False) pprint('Not indexed objects: ' + str(api_response)) except Exception as e: print("Exception when calling get_objects_with_null_index: %s\n" % e)
from __future__ import print_function from stylelens_object.objects import Objects from pprint import pprint # create an instance of the API class api_instance = Objects() try: api_response = api_instance.get_object_by_index( 1, '5a50d4ba4dfd7d90b8b9369a') pprint(api_response) except Exception as e: print("Exception when calling ProductApi->add_object: %s\n" % e)
from __future__ import print_function from stylelens_object.objects import Objects from pprint import pprint # create an instance of the API class api_instance = Objects() try: api_response = api_instance.delete_all() pprint(api_response) except Exception as e: print("Exception when calling ProductApi->update_objects: %s\n" % e)
from __future__ import print_function from stylelens_object.objects import Objects from pprint import pprint # create an instance of the API class api_instance = Objects() version_id = "5a50d4ba4dfd7d90b8b9369a" ids = ["5a5aebb1ca4ad59194b698b2", "5a5aebb2ca4ad59194b698e2"] try: api_response = api_instance.get_objects_by_ids(ids=ids, version_id=version_id) pprint(api_response) except Exception as e: print("Exception when calling get_objects_by_ids: %s\n" % e)
from __future__ import print_function from stylelens_object.objects import Objects from pprint import pprint # create an instance of the API class api_instance = Objects() try: api_response = api_instance.get_object('5a5ac4dfca4ad59194b67c26') pprint(api_response) except Exception as e: print("Exception when calling ProductApi->get_object: %s\n" % e)
from __future__ import print_function from stylelens_object.objects import Objects from pprint import pprint # create an instance of the API class api_instance = Objects() object = {} object['name'] = 'a92' try: # Added a new Object api_response = api_instance.add_object(object) pprint(api_response) except Exception as e: print("Exception when calling ProductApi->add_object: %s\n" % e)
from __future__ import print_function from stylelens_object.objects import Objects from pprint import pprint # create an instance of the API class api_instance = Objects() objects = [] object = {} object['name'] = 'a1' object['index'] = 1 object2 = {} object2['name'] = 'a2' object2['index'] = 2 object3 = {} object3['name'] = 'a3' object3['index'] = 3 objects.append(object) objects.append(object2) objects.append(object3) try: api_response = api_instance.update_objects(objects) pprint(api_response) except Exception as e: print("Exception when calling ProductApi->update_objects: %s\n" % e)
REDIS_CRAWL_VERSION = 'bl:crawl:version' REDIS_CRAWL_VERSION_LATEST = 'latest' options = { 'REDIS_SERVER': REDIS_SERVER, 'REDIS_PASSWORD': REDIS_PASSWORD } log = Logging(options, tag='bl-object-classifier') rconn = redis.StrictRedis(REDIS_SERVER, decode_responses=False, port=6379, password=REDIS_PASSWORD) storage = s3.S3(AWS_ACCESS_KEY, AWS_SECRET_ACCESS_KEY) heart_bit = True product_api = Products() object_api = Objects() image_api = Images() version_id = None def analyze_product(p_data): log.info('analyze_product') product = pickle.loads(p_data) try: main_class_code, main_objects = analyze_main_image(product) except Exception as e: log.error('analyze_product:' + str(e)) delete_product_from_db(str(product['_id'])) return sub_class_code, sub_objects = analyze_sub_images(product['sub_images_mobile'])
from __future__ import print_function from stylelens_object.objects import Objects from pprint import pprint # create an instance of the API class api_instance = Objects() id = "5a4df4c299866d4ecfd0091d" object = {} object['image_id'] = 'bok' try: api_response = api_instance.update_object_by_id(id, object) pprint(api_response) except Exception as e: print("Exception when calling ProductApi->update_object_by_id: %s\n" % e)