Beispiel #1
0
def clear_dbs(version_id):
  global product_api

  try:
    res = product_api.reset_product_as_not_object_classified(version_id=version_id)
  except Exception as e:
    log.error(str(e))

  object_api = Objects()
  try:
    res = object_api.delete_all()
  except Exception as e:
    log.error(str(e))

  try:
    res = object_api.reset_index(version_id)
  except Exception as e:
    log.error(str(e))

  try:
    res = object_api.reset_image_index(version_id)
  except Exception as e:
    log.error(str(e))

  feature_api = Features()
  try:
    res = feature_api.delete_all()
  except Exception as e:
    log.error(str(e))

  image_api = Images()
  try:
    res = image_api.delete_all()
  except Exception as e:
    log.error(str(e))
Beispiel #2
0
 def __init__(self, log):
     log.info('init')
     self.image_feature = feature_extract.ExtractFeature(use_gpu=True)
     self.log = log
     self.vector_search = VectorSearch()
     self.object_detector = ObjectDetector()
     self.object_api = Objects()
     self.image_api = Images()
     self.index_image_api = IndexImages()
Beispiel #3
0
def start(rconn):
    global object_api
    global feature_api
    global product_api
    global version_id

    try:
        log.info("Start bl-object-index:1")

        object_api = Objects()
        feature_api = Features()
        product_api = Products()
        crawl_api = Crawls()
        file = os.path.join(os.getcwd(), INDEX_FILE)
        # index_file = load_index_file(file)

        while True:
            version_id = get_latest_crawl_version()
            if version_id is not None:
                log.info("check_condition_to_start")
                ok = check_condition_to_start(version_id)
                log.info("check_condition_to_start: " + str(ok))

                if ok is True:
                    index_file = None
                    reset_index(version_id)
                    # dispatch(rconn)
                    # prepare_objects_to_index(rconn, version_id)

                    if DATA_SOURCE == DATA_SOURCE_QUEUE:
                        load_from_queue(index_file)
                    elif DATA_SOURCE == DATA_SOURCE_DB:
                        load_from_db(index_file, version_id)

            time.sleep(60 * 10)
    except Exception as e:
        log.error(str(e))
from __future__ import print_function
from stylelens_object.objects import Objects
from pprint import pprint
# create an instance of the API class
api_instance = Objects()

try:
    api_response = api_instance.get_object_ids("5a4e3dc74dfd7d90b8885411",
                                               is_indexed=True,
                                               image_indexed=False)
    pprint(api_response)
except Exception as e:
    print("Exception when calling get_object_ids: %s\n" % e)
Beispiel #5
0
from __future__ import print_function
from stylelens_object.objects import Objects
from pprint import pprint
# create an instance of the API class
api_instance = Objects()

version_id = "5a49b8e54dfd7d90b8786df8"

try:
    api_response = api_instance.get_objects_with_null_feature(
        version_id=version_id)
    pprint(api_response)
except Exception as e:
    print("Exception when calling get_objects_with_null_feature: %s\n" % e)
from __future__ import print_function
from stylelens_object.objects import Objects
from pprint import pprint
# create an instance of the API class
api_instance = Objects()

try:
    api_response = api_instance.reset_index()
    pprint(api_response)
except Exception as e:
    print("Exception when calling ProductApi->update_objects: %s\n" % e)
Beispiel #7
0
class Search:
    def __init__(self, log):
        log.info('init')
        self.image_feature = feature_extract.ExtractFeature(use_gpu=True)
        self.log = log
        self.vector_search = VectorSearch()
        self.object_detector = ObjectDetector()
        self.object_api = Objects()
        self.image_api = Images()
        self.index_image_api = IndexImages()

    def get_images_by_object_vector(self, vector, offset=0, limit=10):
        return self.get_images_by_vector(vector, offset=offset, limit=limit)

    def search_image_file(self, file, offset=0, limit=5):
        feature = self.extract_feature(file)
        return self.get_images_by_vector(feature, offset=offset, limit=limit)

    def search_image_data(self, image_data, offset=0, limit=10):

        start_time = time.time()
        im = Image.open(io.BytesIO(image_data))
        size = 380, 380
        im.thumbnail(size, Image.ANTIALIAS)
        # im.show()
        file_name = str(uuid.uuid4()) + '.jpg'
        im.save(file_name)
        feature = self.extract_feature(file_name)
        print(feature.dtype)
        elapsed_time = time.time() - start_time
        self.log.info('search_image time: ' + str(elapsed_time))
        return self.get_images_by_vector(feature, offset, limit)

    def get_images_by_vector(self, vector, offset=0, limit=5):
        try:
            # Query to search vector
            start_time = time.time()

            vector_d, vector_i = self.vector_search.search(vector, limit)
            distances = np.fromstring(vector_d, dtype=np.float32)
            ids = np.fromstring(vector_i, dtype=np.int)

            elapsed_time = time.time() - start_time
            self.log.debug('vector search time: ' + str(elapsed_time))
            # pprint(api_response)
        except Exception as e:
            self.log.error(
                "Exception when calling SearchApi->search_vector: %s\n" % e)

        arr_i = []
        i = 0
        for d in distances:
            print(d)
            if d <= VECTOR_SIMILARITY_THRESHHOLD:
                if i < limit:
                    arr_i.append(ids[i])
                else:
                    break
                i = i + 1

        if len(arr_i) > 0:
            ids = [int(x) for x in arr_i]

            try:
                start_time = time.time()
                objects = self.object_api.get_objects_by_indexes(ids)
                elapsed_time = time.time() - start_time
                print(elapsed_time)
                images = self.get_images_from_objects(objects)
                return images
            except Exception as e:
                self.log.error('Trying Objects.get_objects_by_indexes():' +
                               str(e))
                return None

        # obj_ids = self.get_object_ids(arr_i)
        # prod_ids = self.get_image_ids(obj_ids)

        # if len(arr_i) > 5:
        #   # Using MongoDB
        #   products_info = self.get_products_from_db(prod_ids, offset=offset, limit=limit)
        # else:
        #   # Using Redis
        #   products_info = self.get_image_info(prod_ids, offset=offset, limit=limit)
        return None

    def get_images_from_objects(self, objects):
        limit = 10
        image_ids = []

        for obj in objects:
            image_ids.append(obj['image_id'])

        ids = list(set(image_ids))
        try:
            start_time = time.time()
            _images = self.image_api.get_images_by_ids(ids)
            elapsed_time = time.time() - start_time
            print(elapsed_time)
            images = []
            for image in _images:
                image['id'] = str(image.pop('_id'))
                image.pop('images', None)
                images.append(image)
            return images
        except Exception as e:
            self.log.error(str(e))
            return None

        return images

    def get_products_from_db(self, ids, offset=0, limit=5):
        self.log.debug('get_products_from_db')
        start_time = time.time()
        product_api = None
        # product_api = stylelens_product.ProductApi()
        try:
            api_response = product_api.get_products_by_ids(ids)
        except Exception as e:
            self.log.error(
                "Exception when calling ProductApi->get_products_by_ids: %s\n"
                % e)
        elapsed_time = time.time() - start_time
        self.log.debug('get_products_from_db time: ' + str(elapsed_time))

        products_info = []

        for id in ids:
            i = id.decode('utf-8')
            for p in api_response.data:
                if i == p.id:
                    products_info.append(p.to_dict())
                    break

        return products_info

    def get_object_ids(self, ids):
        obj_ids = []
        for i in ids:
            id = rconn.lindex(REDIS_INDEXED_OBJECT_LIST, i - 1)
            obj_ids.append(id.decode('utf-8'))
        self.log.debug(obj_ids)
        return obj_ids

    def get_image_ids(self, ids):
        self.log.debug('get_product_ids' + str(ids))
        product_ids = rconn.hmget(REDIS_INDEXED_IMAGE_HASH, ids)
        # self.log.debug(product_ids)
        return product_ids

    def get_image_info(self, ids, offset=0, limit=5):
        self.log.debug('get_product_info' + str(ids))
        products = []
        products_info = rconn.hmget(REDIS_PRODUCT_HASH, ids)
        i = 0
        for p in products_info:
            if i < limit:
                product = pickle.loads(p)
                product['sub_images'] = None
                product['sub_images_mobile'] = None
                products.append(product)
            i = i + 1
        return products

    def allowed_file(self, filename):
        return '.' in filename and \
               filename.rsplit('.', 1)[1] in ALLOWED_EXTENSIONS

    def extract_feature(self, file):
        feature_vector = self.image_feature.extract_feature(file)
        # feature = np.fromstring(feature_vector, dtype=np.float32)
        return feature_vector

    def get_product_info(self, index):
        start_time = time.time()
        obj_id = rconn.lindex(REDIS_KEY_OBJECT_LIST, index - 1)
        obj_id = obj_id.decode('utf-8')
        # self.log.debug(obj_id)

        product_id = rconn.hget(REDIS_OBJECT_HASH, obj_id)
        # self.log.debug(product_id)
        product_id = product_id.decode('utf-8')
        # self.log.debug(product_id)

        product = rconn.hget(REDIS_PRODUCT_HASH, product_id)
        # self.log.debug(product)
        product = pickle.loads(product)
        # self.log.debug('get_product_info: done')
        elapsed_time = time.time() - start_time
        # self.log.info('get_product_info time: ' + str(elapsed_time))
        return product

    def get_objects(self, image_file, products_offset=0, products_limit=5):
        start_time = time.time()

        objects = self.object_detector.getObjects(file=image_file)

        boxes_array = []
        objects_array = []
        feature = []
        best_score = -1
        best_score_index = 0
        i = 0
        for object in objects:
            obj = {}
            box_object = BoxObject()
            box_object.class_name = object.class_name
            obj['class_name'] = object.class_name
            box_object.class_code = object.class_code
            obj['class_code'] = object.class_code
            box_object.score = object.score
            obj['score'] = object.score

            if best_score_index < object.score:
                best_score_index = object.score
                best_score = object.score
                best_score_index = i
                # arr = np.fromstring(object.feature, dtype=np.float32)
                # feature = arr
            # self.log.debug(object.class_name)
            # self.log.debug(object.class_code)
            # self.log.debug(object.location)
            box = Box()
            box_dic = {}
            box.left = object.location.left
            box_dic['left'] = object.location.left
            box.right = object.location.right
            box_dic['right'] = object.location.right
            box.top = object.location.top
            box_dic['top'] = object.location.top
            box.bottom = object.location.bottom
            box_dic['bottom'] = object.location.bottom
            box_object.box = box
            obj['box'] = box_dic

            obj['feature'] = object.feature
            # self.log.debug(box)
            boxes_array.append(box_object)
            objects_array.append(obj)
            i = i + 1

        if best_score == -1:
            box_object = BoxObject()
            box_object.class_name = 'na'
            box_object.class_code = 'na'
            box_object.score = '-1'

            box = Box()
            box.left = -1
            box.right = -1
            box.top = -1
            box.bottom = -1
            box_object.box = box
            boxes_array.append(box_object)
            # products = self.search_image_data(image_data, offset=products_offset, limit=products_limit)
        # else:
        #   images = self.get_images_by_vector(object.feature, offset=products_offset, limit=products_limit)

        local_start_time = time.time()
        elapsed_time = time.time() - local_start_time
        self.log.info('query_feature time: ' + str(elapsed_time))
        # boxes_array[best_score_index].images = images

        elapsed_time = time.time() - start_time
        self.log.info('get_objects time: ' + str(elapsed_time))
        return boxes_array, objects_array

    def get_indexed_image(self, image_id):
        try:
            image = self.index_image_api.get_image(image_id)
        except Exception as e:
            self.log.error(str(e))
            return None

        return image

    def get_products_by_product_id(self, product_id, offset=0, limit=5):
        if rconn.hexists(REDIS_PRODUCTS_BY_PRODUCT_HASH, product_id):
            products = rconn.hget(REDIS_PRODUCTS_BY_PRODUCT_HASH, product_id)
            products = pickle.loads(products)
            return products[offset:limit]
        else:
            product = rconn.hget(REDIS_PRODUCT_HASH, product_id)
            product = pickle.loads(product)
            try:
                f = urllib.request.urlopen(product['main_image_mobile_full'])
            except Exception as e:
                self.log.error(str(e))
            boxes = self.get_objects(f, limit)

            for box in boxes:
                if box.products:
                    rconn.hset(REDIS_PRODUCTS_BY_PRODUCT_HASH, product_id,
                               pickle.dumps(box.products))
                    return box.products
        return {}

    def get_products_by_keyword(self, keyword, offset=0, limit=100):
        self.log.debug('get_products_by_keyword')
        product_api = Products()
        try:
            total_count = product_api.get_products_count_by_keyword(keyword)
        except Exception as e:
            self.log.error(
                "Exception when calling get_products_count_by_keyword: %s\n" %
                e)

        try:
            products = product_api.get_products_by_keyword(keyword,
                                                           only_text=False,
                                                           offset=offset,
                                                           limit=limit)
        except Exception as e:
            self.log.error(
                "Exception when calling get_products_by_keyword: %s\n" % e)

        return total_count, products
Beispiel #8
0
from __future__ import print_function
from stylelens_object.objects import Objects
from pprint import pprint
# create an instance of the API class
api_instance = Objects()

try:
  api_response = api_instance.get_objects("5a50d4ba4dfd7d90b8b9369a",
                                          # is_indexed=True,
                                          sort_key='index',
                                          sort_order=1,
                                          is_main=True
                                          )
  pprint(api_response)
except Exception as e:
  print("Exception when calling get_objects: %s\n" % e)
from __future__ import print_function
from stylelens_object.objects import Objects
from pprint import pprint
# create an instance of the API class
api_instance = Objects()

version_id = "5a70779890e0881036849e95"

try:
  api_response = api_instance.get_size_objects(version_id=version_id)
  pprint('total objects: ' + str(api_response))
  api_response = api_instance.get_size_objects(version_id=version_id, is_indexed=True)
  pprint('indexed objects: ' + str(api_response))
  api_response = api_instance.get_size_objects(version_id=version_id, is_indexed=False)
  pprint('Not indexed objects: ' + str(api_response))
  api_response = api_instance.get_size_objects(version_id=version_id, is_indexed=True, image_indexed=True)
  pprint('Not indexed objects: ' + str(api_response))
  api_response = api_instance.get_size_objects(version_id=version_id, is_indexed=True, image_indexed=False)
  pprint('Not indexed objects: ' + str(api_response))
except Exception as e:
  print("Exception when calling get_objects_with_null_index: %s\n" % e)
Beispiel #10
0
from __future__ import print_function
from stylelens_object.objects import Objects
from pprint import pprint
# create an instance of the API class
api_instance = Objects()

try:
    api_response = api_instance.get_object_by_index(
        1, '5a50d4ba4dfd7d90b8b9369a')
    pprint(api_response)
except Exception as e:
    print("Exception when calling ProductApi->add_object: %s\n" % e)
Beispiel #11
0
from __future__ import print_function
from stylelens_object.objects import Objects
from pprint import pprint
# create an instance of the API class
api_instance = Objects()

try:
    api_response = api_instance.delete_all()
    pprint(api_response)
except Exception as e:
    print("Exception when calling ProductApi->update_objects: %s\n" % e)
Beispiel #12
0
from __future__ import print_function
from stylelens_object.objects import Objects
from pprint import pprint
# create an instance of the API class
api_instance = Objects()

version_id = "5a50d4ba4dfd7d90b8b9369a"
ids = ["5a5aebb1ca4ad59194b698b2", "5a5aebb2ca4ad59194b698e2"]

try:
    api_response = api_instance.get_objects_by_ids(ids=ids,
                                                   version_id=version_id)
    pprint(api_response)
except Exception as e:
    print("Exception when calling get_objects_by_ids: %s\n" % e)
Beispiel #13
0
from __future__ import print_function
from stylelens_object.objects import Objects
from pprint import pprint
# create an instance of the API class
api_instance = Objects()

try:
    api_response = api_instance.get_object('5a5ac4dfca4ad59194b67c26')
    pprint(api_response)
except Exception as e:
    print("Exception when calling ProductApi->get_object: %s\n" % e)
Beispiel #14
0
from __future__ import print_function
from stylelens_object.objects import Objects
from pprint import pprint
# create an instance of the API class
api_instance = Objects()

object = {}
object['name'] = 'a92'

try:
    # Added a new Object
    api_response = api_instance.add_object(object)
    pprint(api_response)
except Exception as e:
    print("Exception when calling ProductApi->add_object: %s\n" % e)
Beispiel #15
0
from __future__ import print_function
from stylelens_object.objects import Objects
from pprint import pprint
# create an instance of the API class
api_instance = Objects()

objects = []
object = {}
object['name'] = 'a1'
object['index'] = 1

object2 = {}
object2['name'] = 'a2'
object2['index'] = 2

object3 = {}
object3['name'] = 'a3'
object3['index'] = 3

objects.append(object)
objects.append(object2)
objects.append(object3)

try:
    api_response = api_instance.update_objects(objects)
    pprint(api_response)
except Exception as e:
    print("Exception when calling ProductApi->update_objects: %s\n" % e)
Beispiel #16
0
REDIS_CRAWL_VERSION = 'bl:crawl:version'
REDIS_CRAWL_VERSION_LATEST = 'latest'

options = {
  'REDIS_SERVER': REDIS_SERVER,
  'REDIS_PASSWORD': REDIS_PASSWORD
}
log = Logging(options, tag='bl-object-classifier')
rconn = redis.StrictRedis(REDIS_SERVER, decode_responses=False, port=6379, password=REDIS_PASSWORD)

storage = s3.S3(AWS_ACCESS_KEY, AWS_SECRET_ACCESS_KEY)

heart_bit = True

product_api = Products()
object_api = Objects()
image_api = Images()
version_id = None

def analyze_product(p_data):
  log.info('analyze_product')
  product = pickle.loads(p_data)

  try:
    main_class_code, main_objects = analyze_main_image(product)
  except Exception as e:
    log.error('analyze_product:' + str(e))
    delete_product_from_db(str(product['_id']))
    return

  sub_class_code, sub_objects = analyze_sub_images(product['sub_images_mobile'])
Beispiel #17
0
from __future__ import print_function
from stylelens_object.objects import Objects
from pprint import pprint
# create an instance of the API class
api_instance = Objects()

id = "5a4df4c299866d4ecfd0091d"
object = {}
object['image_id'] = 'bok'

try:
    api_response = api_instance.update_object_by_id(id, object)
    pprint(api_response)
except Exception as e:
    print("Exception when calling ProductApi->update_object_by_id: %s\n" % e)