Beispiel #1
0
def gc2mask_test(image, bb):
    small_image, resize_ratio = background_removal.standard_resize(
        image, 400)  # shrink image for faster process
    bb = np.array(bb) / resize_ratio
    bb = bb.astype(np.uint16)  # shrink bb in the same ratio
    # bb = [int(b) for b in (np.array(bb)/resize_ratio)]
    x, y, w, h = bb
    cv2.rectangle(small_image, (x, y), (x + w, y + h), [0, 255, 0], 2)
    cv2.imshow('1', small_image)
    cv2.waitKey(0)
    fg_mask = background_removal.get_fg_mask(
        small_image, bb
    )  # returns the grab-cut mask (if bb => PFG-PBG gc, if !bb => face gc)
    cv2.imshow('2', background_removal.get_masked_image(small_image, fg_mask))
    cv2.waitKey(0)
    bb_mask = background_removal.get_binary_bb_mask(small_image,
                                                    bb)  # bounding box mask
    cv2.imshow('3', background_removal.get_masked_image(small_image, bb_mask))
    cv2.waitKey(0)
    combined_mask = cv2.bitwise_and(
        fg_mask, bb_mask)  # for sending the right mask to the fp
    cv2.imshow('4',
               background_removal.get_masked_image(small_image, combined_mask))
    cv2.waitKey(0)
    return
Beispiel #2
0
def from_image_url_to_categorization_task(image_url):
    image_obj = images.find_one({"image_urls": image_url})
    if not image_obj:  # new image
        image = background_removal.standard_resize(Utils.get_cv2_img_array(image_url), 400)[0]
        if image is None:
            logging.warning("There's no image in the url!")
            return None
        relevance = background_removal.image_is_relevant(image)
        image_dict = {'image_urls': [], 'relevant': relevance.is_relevant, '_id': bson.ObjectId()}
        image_dict['image_urls'].append(image_url)
        if relevance.is_relevant:
            image_dict['people'] = []
            for face in relevance.faces:
                x, y, w, h = face
                person = {'face': face.tolist(), 'person_id': str(bson.ObjectId())}
                image_copy = image.copy()
                cv2.rectangle(image_copy, (x, y), (x + w, y + h), [0, 255, 0], 2)
                person['url'] = upload_image(image_copy, str(person['person_id']))
                image_dict['people'].append(person)
                q2.enqueue(send_image_to_qc_categorization, person['url'], person['person_id'])
        else:
            logging.warning('image is not relevant, but stored anyway..')
        images.insert(image_dict)
    else:
        if image_url not in image_obj['image_urls']:
            image_obj['image_urls'].append(image_url)
        if image_obj['relevant']:
            logging.warning("Image is in the DB and relevant!")
        else:
            logging.warning("Image is in the DB and not relevant!")
        return image_obj
Beispiel #3
0
def make_mask_test(image_url, bb=None):
    svg_address = constants.svg_folder
    image = Utils.get_cv2_img_array(image_url)  # turn the URL into a cv2 image
    small_image, resize_ratio = background_removal.standard_resize(
        image, 400)  # shrink image for faster process
    bb = [int(b) for b in (np.array(bb) / resize_ratio)
          ]  # shrink bb in the same ratio
    fg_mask = background_removal.get_fg_mask(
        small_image, bb
    )  # returns the grab-cut mask (if bb => PFG-PBG gc, if !bb => face gc)
    # bb_mask = background_removal.get_binary_bb_mask(small_image, bb)            # bounding box mask
    # combined_mask = cv2.bitwise_and(fg_mask, bb_mask)                           # for sending the right mask to the fp
    gc_image = background_removal.get_masked_image(small_image, fg_mask)
    face_rect = background_removal.find_face(small_image)
    if len(face_rect) > 0:
        x, y, w, h = face_rect[0]
        face_image = image[y:y + h, x:x + w, :]
        without_skin = kassper.skin_removal(face_image, gc_image)
        crawl_mask = kassper.clutter_removal(without_skin, 200)
        without_clutter = background_removal.get_masked_image(
            without_skin, crawl_mask)
        mask = kassper.get_mask(without_clutter)
    else:
        mask = kassper.get_mask(gc_image)
    return mask
Beispiel #4
0
def check_if_relevant(image_url, page_url, products_collection, method):
    image = Utils.get_cv2_img_array(image_url)
    if image is None:
        return
    small_img, _ = background_removal.standard_resize(image, 600)
    relevance = background_removal.image_is_relevant(small_img, use_caffe=False, image_url=image_url)

    if not relevance.is_relevant:
        hashed = get_hash(image)
        try:
            label = labelize(image).replace('.', '')
        except:
            label = None
        image_obj = {'image_hash': hashed, 'image_urls': [image_url], 'page_urls': [page_url], 'people': [],
                     'relevant': False, 'saved_date': str(datetime.datetime.utcnow()), 'views': 1,
                     'labels': label}
        db.irrelevant_images.insert_one(image_obj)
        db.labeled_irrelevant.insert_one(image_obj)
        return image_obj
    image_obj = {'people': [{'person_id': str(bson.ObjectId()), 'face': face.tolist(),
                             'gender': classifier_client.get('gender', image, face=face.tolist())['gender']} for face in
                            relevance.faces],
                 'image_urls': image_url, 'page_url': page_url, 'insert_time': datetime.datetime.now()}
    db.iip.insert_one(image_obj)
    start_pipeline.enqueue_call(func="", args=(page_url, image_url, products_collection, method),
                                ttl=2000, result_ttl=2000, timeout=2000)
Beispiel #5
0
def run_fp(image_url, bb=None):
    image = Utils.get_cv2_img_array(image_url)
    small_image, resize_ratio = background_removal.standard_resize(image, 400)
    mask = get_mask(small_image )
    print mask.shape
    #cv2.imshow('mask', mask)
    #cv2.waitKey(0)
    fp_vector = yuli_fp(small_image, mask, whaterver=63)
    return fp_vector
Beispiel #6
0
def dl_keyword_images(
        category_id,
        total=2000,
        keyword=None,
        # dir='/home/jeremy/[email protected]/TrendiGuru/techdev/trendi_guru_modules/images',
        # dir='/home/ubuntu/Dev/trendi_guru_modules/images',
        dir='images',
        show_visual_output=False):
    query = {
        "categories": {
            "$elemMatch": {
                "id": {
                    "$in": get_all_subcategories(db.categories, category_id)
                }
            }
        }
    }
    if keyword is None:
        path = os.path.join(dir, category_id)
        cursor = db.products.find(query)
    else:
        path = os.path.join(dir, category_id)
        path = os.path.join(path, keyword)
        cursor = db.products.find(
            {'$and': [{
                "description": {
                    '$regex': keyword
                }
            }, query]})
    print('path:' + path)
    if not os.path.exists(path):
        print('creating dir')
        os.makedirs(path)
    i = 0
    for item in cursor:
        if i > total:
            break
        i += 1
        url = item['image']['sizes']['XLarge']['url']
        print('url:' + url)
        item_image = Utils.get_cv2_img_array(url)
        if item_image is None:
            return None
        if show_visual_output == True:
            cv2.imshow('im1', item_image)
            k = cv2.waitKey(200)

        if background_removal.image_is_relevant(
                background_removal.standard_resize(item_image, 400)[0]):
            name = os.path.join(path, str(item['id']) + '.jpg')
            try:
                print('writing ' + name)
                cv2.imwrite(name, item_image)
            except:
                print('couldnt write file:' + name)
Beispiel #7
0
def skin_removal_test():
    image, ratio = background_removal.standard_resize(
        background_removal.get_image(), 400)
    fg_mask = background_removal.get_fg_mask(image)
    gc_image = background_removal.get_masked_image(image, fg_mask)
    face_rect = background_removal.find_face(image)
    x, y, w, h = face_rect[0]
    face_image = image[y:y + h, x:x + w, :]
    without_skin = kassper.skin_removal(gc_image, face_image)
    cv2.imshow('original', image)
    cv2.imshow('gc', gc_image)
    cv2.imshow('after skin', without_skin)
    cv2.waitKey(0)
Beispiel #8
0
def find_images(description):
    print('starting to find ' + str(description))
    no_face_count = 0
    face_count = 0
    for i in range(0, 10500):
        mdoc = dbUtils.lookfor_next_unbounded_feature_from_db_category(current_item=i, skip_if_marked_to_skip=True,
                                                                       which_to_show='showAll',
                                                                       filter_type='byWordInDescription',
                                                                       category_id=None,
                                                                       word_in_description=description,
                                                                       db=db)

        if mdoc is not None and 'doc' in mdoc:
            i = i + 1
            print('not none')
            doc = mdoc['doc']
            print doc
            xlarge_url = doc['image']['sizes']['XLarge']['url']
            print('large img url:' + str(xlarge_url))
            img_arr = Utils.get_cv2_img_array(xlarge_url)
            if img_arr is None:
                return None
            small_img = background_removal.standard_resize(img_arr, 400)[0]
            show_visual_output = False
            if show_visual_output == True:
                cv2.imshow('im1', img_arr)
                # k = cv2.waitKey(200)
                # cv2.imshow('smallim1', small_img)
                # k = cv2.waitKey(200)
            relevance = background_removal.image_is_relevant(small_img)
            print('relevance:' + str(relevance))
            rel = relevance.is_relevant
            if rel == False:
                no_face_count += 1
                print('relevant')
                fname = os.path.join('netanel', description)
                fname = os.path.join(fname, 'noface')
                Utils.ensure_dir(fname)
                fname = os.path.join(fname, str(no_face_count) + '.jpg')
            else:
                face_count += 1
                print('not relevant')
                fname = os.path.join('netanel', description)
                Utils.ensure_dir(fname)
                fname = os.path.join(description, str(face_count) + '.jpg')
            print('writing ' + str(fname))
            cv2.imwrite(fname, img_arr)
        else:
            break
def test_classifier(classifier,
                    imagesDir,
                    max_files_to_try=10000,
                    use_visual_output=False):
    '''
    run classifier on all images in dict - assume only one or no target items per image
    :param classifier: the classifier xml
    :param imagesDir: directory containing images
    :return:
    '''
    n_files = 0
    i = 0
    totTargets = 0
    totMatches = 0
    totExtra = 0
    files = Utils.files_in_directory(imagesDir)
    print('testing ' + str(len(files)) + ' files in directory ' +
          str(imagesDir))
    for file in files:
        img_array = cv2.imread(file)
        if img_array is None:
            print('file:' + file + ' read error')  #
            continue
        img_array, ratio = background_removal.standard_resize(img_array, 400)
        h, w, d = img_array.shape
        n_files = n_files + 1
        if use_visual_output is True:
            cv2.imshow('input', img_array)
            k = 0xFF & cv2.waitKey(10)

        nMatches = detect_no_bb(classifier,
                                img_array,
                                use_visual_output=use_visual_output)
        n_extra = 0
        if nMatches:
            n_extra = nMatches - 1  # any more than 1 match is assumed wrong here
        totTargets = totTargets + 1
        totMatches = totMatches + (nMatches > 0)
        totExtra = totExtra + n_extra
        print('totTargets:' + str(totTargets) + ' nMatches:' + str(nMatches) +
              ' totMatches:' + str(totMatches) + ' nExtra:' + str(n_extra) +
              ' totextra:' + str(totExtra),
              end='\r')
        #.filter(db.items.fingerprint.is_(None))
        if n_files == max_files_to_try:
            print('reached max of ' + str(max_files_to_try) +
                  ' files to check')
            break
    return (totTargets, totMatches, totExtra)
Beispiel #10
0
def find_images(description):
    print('starting to find ' + str(description))
    for i in range(0, 500):
        mdoc = dbUtils.lookfor_next_unbounded_feature_from_db_category(
            current_item=i,
            skip_if_marked_to_skip=True,
            which_to_show='showAll',
            filter_type='byWordInDescription',
            category_id=None,
            word_in_description=description,
            db=None)

        if mdoc is not None and 'doc' in mdoc:
            doc = mdoc['doc']
            # print doc

            xlarge_url = doc['image']['sizes']['XLarge']['url']
            # print('large img url:' + str(xlarge_url))
            img_arr = Utils.get_cv2_img_array(xlarge_url)
            if img_arr is None:
                return None
            small_img = background_removal.standard_resize(img_arr, 400)[0]
            show_visual_output = False
            if show_visual_output == True:
                cv2.imshow('im1', img_arr)
                k = cv2.waitKey(200)
                cv2.imshow('smallim1', small_img)
                k = cv2.waitKey(200)
            relevance = background_removal.image_is_relevant(small_img)
            if not relevance:
                print('image is not relevant:' + str(description))
                continue
            print('image is relevant:' + str(description))
            face1 = background_removal.find_face(img_arr)
            if face1 is not None and len(face1) != 0:
                print('face1:' + str(face1))
                bb1 = face1[0]
                get_pose_est_bbs(xlarge_url,
                                 description,
                                 n=i,
                                 show_visual_output=show_visual_output,
                                 bb=bb1)

            else:
                get_pose_est_bbs(xlarge_url,
                                 description,
                                 n=i,
                                 show_visual_output=show_visual_output)
Beispiel #11
0
def from_svg_to_similar_results(svg_url, image_url, fp_length=fingerprint_length, bins=histograms_length,
                                collection_name="products",
                                fp_category="fingerprint",
                                distance_func=None):
    projection_dict = {
        'seeMoreUrl': 1,
        'image': 1,
        'clickUrl': 1,
        'retailer': 1,
        'currency': 1,
        'brand': 1,
        'description': 1,
        'price': 1,
        'categories': 1,
        'name': 1,
        'sizes': 1,
        'pageUrl': 1,
        '_id': 0,
        'priceLabel': 1
    }
    if svg_url is None or image_url is None:
        logging.warning("Bad urls!")
        return None
    image_dict = db.images.find_one({'image_urls': image_url})
    if image_dict is None:
        logging.warning("item wasn't found for some reason")
        return None
    for item in image_dict["items"]:
        if item["svg_url"] == svg_url:
            curr_item = item
            item_mask = cv2.imread(curr_item['mask_name'])[:, :, 0]
            image = background_removal.standard_resize(utils_tg.get_cv2_img_array(image_dict['image_urls'][0]), 400)[0]

            curr_item['fp'], curr_item['similar_results'] = \
                find_similar_mongo.find_top_n_results(image, item_mask, 30, curr_item['category'], collection_name,
                                                      fp_category, fp_length, distance_func, bins)

            # top_matches = {"similar_results": [db.products.find_one({"_id": result["_id"]})
            #                          for result in curr_item['similar_results']]}
            #
            # return top_matches

            return db.images.find_one_and_update({'items.svg_url': curr_item["svg_url"]},
                                                 {'$set': {'items.$': curr_item}},
                                                 return_document=pymongo.ReturnDocument.AFTER)
Beispiel #12
0
def add_new_fp(doc, x):
    image_url = doc["image"]["sizes"]["XLarge"]["url"]

    image = utils_tg.get_cv2_img_array(image_url)
    if not utils_tg.is_valid_image(image):
        logging.warning("image is None. url: {url}".format(url=image_url))
        return

    small_image, resize_ratio = background_removal.standard_resize(image, 400)
    del image

    mask = fp.generate_mask_and_insert(image_url=None, db_doc=doc, save_to_db=False, mask_only=True)
    try:
        fingerprint = fp.fp(small_image, mask=mask)
        doc["new_fp"] = fingerprint.tolist()
        doc["fp_version"] = 999
        db.fp_testing.insert(doc)
    except Exception as ex:
        logging.warning("Exception caught while inserting element #" + str(x) + " to the collection".format(ex))

    return x
Beispiel #13
0
def find_or_create_image(image_url):
    """
    Search in db.images for the image by image url, if not exists - create one and start the process.
    :param image url - this is coming directly from the web interface so it's all we'll ever get.
    :return: image dictionary with svgs
    """
    image = background_removal.standard_resize(utils_tg.get_cv2_img_array(image_url), 400)[0]
    if image is None:
        logging.warning("Bad url!")
        return None
    image_dict = db.images.find_one({"image_urls": image_url})
    if image_dict is None or 'items' not in image_dict.keys():
        if image_dict is None:
            image_id = db.images.insert({"image_urls": [image_url]})
            # TODO - where is the case which we append other url on the same image
        else:
            image_id = image_dict['_id']
        items_dict = from_image_to_svgs(image, image_id)
        image_dict = db.images.find_one_and_update({'image_urls': image_url}, {'$set': items_dict},
                                                   return_document=pymongo.ReturnDocument.AFTER)
    return image_dict
Beispiel #14
0
def lomshane_test():
    """inputs = [json_util.loads(
        '{"url":"http://msc.wcdn.co.il/w/w-635/1684386-5.jpg","bb":"[137.2972972972973,188.80597014925374,356.97297297297297,319.2537313432836]","keyword":"mens-outerwear","post_id":"552a79359e31f134f0f9c401"}')
    , json_util.loads(
        '{"url":"http://msc.wcdn.co.il/w/w-635/1684386-5.jpg","bb":"[50.2972972972973,50.80597014925374,70.97297297297297,70.2537313432836]","keyword":"mens-outerwear","post_id":"552a79359e31f134f0f9c401"}')
    ,json_util.loads(
        '{"url":"http://msc.wcdn.co.il/w/w-635/1684386-5.jpg","bb":"[0, 0, 100.97297297297297, 100.2537313432836]","keyword":"mens-outerwear","post_id":"552a79359e31f134f0f9c401"}')
    ]"""
    bbs = [[9, 135, 97, 87], [200, 299, 98, 67], [9, 272, 120, 95],
           [316, 13, 83, 138]]
    image, ratio = background_removal.standard_resize(get_image(), 400)
    masks = [make_mask_test(image, bb) for bb in bbs]
    fingers = [fingerprint.fp(image, mask) for mask in masks]
    print np.array([(pair[0] - pair[1])
                    for pair in itertools.combinations(fingers, 2)])
    cv2.imshow('mask1', masks[0])
    cv2.imshow('mask2', masks[1])
    cv2.imshow('mask3', masks[2])
    cv2.imshow('mask4', masks[3])
    cv2.waitKey(0)
    return
Beispiel #15
0
def generate_mask_and_insert(doc,
                             image_url=None,
                             fp_date=None,
                             coll="products",
                             img=None,
                             neuro=False):
    """
    Takes an image + whatever else you give it, and handles all the logic (using/finding/creating a bb, then a mask)
    Work in progress...
    :param image_url:
    :param doc: ShopStyle DB doc
    :return:
    """
    image_url = image_url or doc["image"]["sizes"]["XLarge"]["url"]
    collection = coll
    if neuro or img is not None:
        image = img
    else:
        image = Utils.get_cv2_img_array(image_url)
    if not Utils.is_valid_image(image):
        logging.warning("image is None. url: {url}".format(url=image_url))
        return
    # img_hash = get_hash(image)
    # if db[coll].find_one({'img_hash': img_hash}):
    #     return
    small_image, resize_ratio = background_removal.standard_resize(image, 400)
    # del image

    if not Utils.is_valid_image(small_image):
        logging.warning("small_image is Bad. {img}".format(img=small_image))
        return
    category = doc['categories']
    print category
    if neuro:
        category_idx = recruit2category_idx[category]
        success, neuro_mask = neurodoll(image, category_idx)
        if not success:
            print "error neurodolling"
            return []
        small_mask = cv2.resize(neuro_mask, (400, 400))

    else:
        small_mask = background_removal.get_fg_mask(small_image)

    fingerprint = dict_fp(small_image, small_mask, category)
    print 'fingerprint done'
    doc["fingerprint"] = fingerprint
    doc["download_data"]["first_dl"] = fp_date
    doc["download_data"]["dl_version"] = fp_date
    doc["download_data"]["fp_version"] = constants.fingerprint_version
    print "prod insert ..."
    try:
        db[collection].insert_one(doc)
        print "successfull"
        # db.fp_in_process.delete_one({"id": doc["id"]})
    except:
        # db.download_data.find_one_and_update({"criteria": collection},
        #                                      {'$inc': {"errors": 1}})
        print "failed"

    return fingerprint['color']
Beispiel #16
0
def selectBest(bblist, imgurl):
    large_img = utils_tg.get_cv2_img_array(imgurl)
    img, ratio = background_removal.standard_resize(large_img, 400)
    height, width = img.shape[0:2]
    img_size = height * width

    # calcuale center of mass per selection
    COM = []
    for i in bblist:
        COM.append((i[0] + i[2] / 2, i[1] + i[3] / 2))

    weights = [0, 0, 0]
    new_points = []
    total_weight = 0
    simcount = 0

    def minmax(xlist, ylist, wlist, hlist):

        x = int(np.mean(xlist))
        y = int(np.mean(ylist))
        w = int(np.mean(wlist))
        h = int(np.mean(hlist))
        return [x, y, w, h]

    def is_similar(com1, com2, x, y, wid, hig):
        """
        TODO = enter explanation
        :param com1:
        :param com2:
        :param wid:
        :param hig:
        :return:
        """

        oclid_dis_thres = math.sqrt(pow(min(wid), 2) + pow(min(hig), 2))
        oclid_dis_com = math.sqrt(
            pow(com1[0] - com2[0], 2) + pow(com1[1] - com2[1], 2))
        # overlap
        larger_x_rightcorner = max(x[0], x[1])
        larger_y_rightcorner = max(y[0], y[1])
        smaller_x_leftcorner = min(x[0] + wid[0], x[1] + wid[1])
        smaller_y_leftcorner = min(y[0] + hig[0], y[1] + hig[1])

        overlap = (smaller_x_leftcorner - larger_x_rightcorner) * (
            smaller_y_leftcorner - larger_y_rightcorner)
        size1 = wid[0] * hig[0]
        size2 = wid[1] * hig[1]

        if oclid_dis_com < 0.5 * oclid_dis_thres:
            # TODO : decide the dist percent
            # check overlap
            weight = 2.0 * overlap / (size1 + size2)
            return weight

        return 0

    couples = [(0, 1), (0, 2), (1, 2)]
    for idx, pair in enumerate(couples):
        x = [bblist[pair[0]][0], bblist[pair[1]][0]]
        y = [bblist[pair[0]][1], bblist[pair[1]][1]]
        wid = [bblist[pair[0]][2], bblist[pair[1]][2]]
        hig = [bblist[pair[0]][3], bblist[pair[1]][3]]

        # size threshold check - selection bigger than 1% of original img size
        if min(wid[0] * hig[0], wid[1] * hig[1]) > 0.01 * img_size:
            weights[idx] = is_similar(COM[pair[0]], COM[pair[1]], x, y, wid,
                                      hig)
            if weights[idx] > 0.2:
                simcount += 1
                total_weight += weights[idx]
            else:
                weights[idx] = 0

        new_points.append(minmax(x, y, wid, hig))

    if simcount == 0:
        print 'WARNING : three different entries'
        return None

    if simcount == 1:
        print 'one similar'

    if simcount == 2:
        print 'two similar'

    if simcount == 3:
        print 'three similar'

    para = []
    for p in range(4):
        para.append(
            int(
                math.floor(weights[0] / total_weight * new_points[0][p] +
                           weights[1] / total_weight * new_points[1][p] +
                           weights[2] / total_weight * new_points[2][p])))

    # para = [x,y,w,h]
    return para