def gc2mask_test(image, bb): small_image, resize_ratio = background_removal.standard_resize( image, 400) # shrink image for faster process bb = np.array(bb) / resize_ratio bb = bb.astype(np.uint16) # shrink bb in the same ratio # bb = [int(b) for b in (np.array(bb)/resize_ratio)] x, y, w, h = bb cv2.rectangle(small_image, (x, y), (x + w, y + h), [0, 255, 0], 2) cv2.imshow('1', small_image) cv2.waitKey(0) fg_mask = background_removal.get_fg_mask( small_image, bb ) # returns the grab-cut mask (if bb => PFG-PBG gc, if !bb => face gc) cv2.imshow('2', background_removal.get_masked_image(small_image, fg_mask)) cv2.waitKey(0) bb_mask = background_removal.get_binary_bb_mask(small_image, bb) # bounding box mask cv2.imshow('3', background_removal.get_masked_image(small_image, bb_mask)) cv2.waitKey(0) combined_mask = cv2.bitwise_and( fg_mask, bb_mask) # for sending the right mask to the fp cv2.imshow('4', background_removal.get_masked_image(small_image, combined_mask)) cv2.waitKey(0) return
def from_image_url_to_categorization_task(image_url): image_obj = images.find_one({"image_urls": image_url}) if not image_obj: # new image image = background_removal.standard_resize(Utils.get_cv2_img_array(image_url), 400)[0] if image is None: logging.warning("There's no image in the url!") return None relevance = background_removal.image_is_relevant(image) image_dict = {'image_urls': [], 'relevant': relevance.is_relevant, '_id': bson.ObjectId()} image_dict['image_urls'].append(image_url) if relevance.is_relevant: image_dict['people'] = [] for face in relevance.faces: x, y, w, h = face person = {'face': face.tolist(), 'person_id': str(bson.ObjectId())} image_copy = image.copy() cv2.rectangle(image_copy, (x, y), (x + w, y + h), [0, 255, 0], 2) person['url'] = upload_image(image_copy, str(person['person_id'])) image_dict['people'].append(person) q2.enqueue(send_image_to_qc_categorization, person['url'], person['person_id']) else: logging.warning('image is not relevant, but stored anyway..') images.insert(image_dict) else: if image_url not in image_obj['image_urls']: image_obj['image_urls'].append(image_url) if image_obj['relevant']: logging.warning("Image is in the DB and relevant!") else: logging.warning("Image is in the DB and not relevant!") return image_obj
def make_mask_test(image_url, bb=None): svg_address = constants.svg_folder image = Utils.get_cv2_img_array(image_url) # turn the URL into a cv2 image small_image, resize_ratio = background_removal.standard_resize( image, 400) # shrink image for faster process bb = [int(b) for b in (np.array(bb) / resize_ratio) ] # shrink bb in the same ratio fg_mask = background_removal.get_fg_mask( small_image, bb ) # returns the grab-cut mask (if bb => PFG-PBG gc, if !bb => face gc) # bb_mask = background_removal.get_binary_bb_mask(small_image, bb) # bounding box mask # combined_mask = cv2.bitwise_and(fg_mask, bb_mask) # for sending the right mask to the fp gc_image = background_removal.get_masked_image(small_image, fg_mask) face_rect = background_removal.find_face(small_image) if len(face_rect) > 0: x, y, w, h = face_rect[0] face_image = image[y:y + h, x:x + w, :] without_skin = kassper.skin_removal(face_image, gc_image) crawl_mask = kassper.clutter_removal(without_skin, 200) without_clutter = background_removal.get_masked_image( without_skin, crawl_mask) mask = kassper.get_mask(without_clutter) else: mask = kassper.get_mask(gc_image) return mask
def check_if_relevant(image_url, page_url, products_collection, method): image = Utils.get_cv2_img_array(image_url) if image is None: return small_img, _ = background_removal.standard_resize(image, 600) relevance = background_removal.image_is_relevant(small_img, use_caffe=False, image_url=image_url) if not relevance.is_relevant: hashed = get_hash(image) try: label = labelize(image).replace('.', '') except: label = None image_obj = {'image_hash': hashed, 'image_urls': [image_url], 'page_urls': [page_url], 'people': [], 'relevant': False, 'saved_date': str(datetime.datetime.utcnow()), 'views': 1, 'labels': label} db.irrelevant_images.insert_one(image_obj) db.labeled_irrelevant.insert_one(image_obj) return image_obj image_obj = {'people': [{'person_id': str(bson.ObjectId()), 'face': face.tolist(), 'gender': classifier_client.get('gender', image, face=face.tolist())['gender']} for face in relevance.faces], 'image_urls': image_url, 'page_url': page_url, 'insert_time': datetime.datetime.now()} db.iip.insert_one(image_obj) start_pipeline.enqueue_call(func="", args=(page_url, image_url, products_collection, method), ttl=2000, result_ttl=2000, timeout=2000)
def run_fp(image_url, bb=None): image = Utils.get_cv2_img_array(image_url) small_image, resize_ratio = background_removal.standard_resize(image, 400) mask = get_mask(small_image ) print mask.shape #cv2.imshow('mask', mask) #cv2.waitKey(0) fp_vector = yuli_fp(small_image, mask, whaterver=63) return fp_vector
def dl_keyword_images( category_id, total=2000, keyword=None, # dir='/home/jeremy/[email protected]/TrendiGuru/techdev/trendi_guru_modules/images', # dir='/home/ubuntu/Dev/trendi_guru_modules/images', dir='images', show_visual_output=False): query = { "categories": { "$elemMatch": { "id": { "$in": get_all_subcategories(db.categories, category_id) } } } } if keyword is None: path = os.path.join(dir, category_id) cursor = db.products.find(query) else: path = os.path.join(dir, category_id) path = os.path.join(path, keyword) cursor = db.products.find( {'$and': [{ "description": { '$regex': keyword } }, query]}) print('path:' + path) if not os.path.exists(path): print('creating dir') os.makedirs(path) i = 0 for item in cursor: if i > total: break i += 1 url = item['image']['sizes']['XLarge']['url'] print('url:' + url) item_image = Utils.get_cv2_img_array(url) if item_image is None: return None if show_visual_output == True: cv2.imshow('im1', item_image) k = cv2.waitKey(200) if background_removal.image_is_relevant( background_removal.standard_resize(item_image, 400)[0]): name = os.path.join(path, str(item['id']) + '.jpg') try: print('writing ' + name) cv2.imwrite(name, item_image) except: print('couldnt write file:' + name)
def skin_removal_test(): image, ratio = background_removal.standard_resize( background_removal.get_image(), 400) fg_mask = background_removal.get_fg_mask(image) gc_image = background_removal.get_masked_image(image, fg_mask) face_rect = background_removal.find_face(image) x, y, w, h = face_rect[0] face_image = image[y:y + h, x:x + w, :] without_skin = kassper.skin_removal(gc_image, face_image) cv2.imshow('original', image) cv2.imshow('gc', gc_image) cv2.imshow('after skin', without_skin) cv2.waitKey(0)
def find_images(description): print('starting to find ' + str(description)) no_face_count = 0 face_count = 0 for i in range(0, 10500): mdoc = dbUtils.lookfor_next_unbounded_feature_from_db_category(current_item=i, skip_if_marked_to_skip=True, which_to_show='showAll', filter_type='byWordInDescription', category_id=None, word_in_description=description, db=db) if mdoc is not None and 'doc' in mdoc: i = i + 1 print('not none') doc = mdoc['doc'] print doc xlarge_url = doc['image']['sizes']['XLarge']['url'] print('large img url:' + str(xlarge_url)) img_arr = Utils.get_cv2_img_array(xlarge_url) if img_arr is None: return None small_img = background_removal.standard_resize(img_arr, 400)[0] show_visual_output = False if show_visual_output == True: cv2.imshow('im1', img_arr) # k = cv2.waitKey(200) # cv2.imshow('smallim1', small_img) # k = cv2.waitKey(200) relevance = background_removal.image_is_relevant(small_img) print('relevance:' + str(relevance)) rel = relevance.is_relevant if rel == False: no_face_count += 1 print('relevant') fname = os.path.join('netanel', description) fname = os.path.join(fname, 'noface') Utils.ensure_dir(fname) fname = os.path.join(fname, str(no_face_count) + '.jpg') else: face_count += 1 print('not relevant') fname = os.path.join('netanel', description) Utils.ensure_dir(fname) fname = os.path.join(description, str(face_count) + '.jpg') print('writing ' + str(fname)) cv2.imwrite(fname, img_arr) else: break
def test_classifier(classifier, imagesDir, max_files_to_try=10000, use_visual_output=False): ''' run classifier on all images in dict - assume only one or no target items per image :param classifier: the classifier xml :param imagesDir: directory containing images :return: ''' n_files = 0 i = 0 totTargets = 0 totMatches = 0 totExtra = 0 files = Utils.files_in_directory(imagesDir) print('testing ' + str(len(files)) + ' files in directory ' + str(imagesDir)) for file in files: img_array = cv2.imread(file) if img_array is None: print('file:' + file + ' read error') # continue img_array, ratio = background_removal.standard_resize(img_array, 400) h, w, d = img_array.shape n_files = n_files + 1 if use_visual_output is True: cv2.imshow('input', img_array) k = 0xFF & cv2.waitKey(10) nMatches = detect_no_bb(classifier, img_array, use_visual_output=use_visual_output) n_extra = 0 if nMatches: n_extra = nMatches - 1 # any more than 1 match is assumed wrong here totTargets = totTargets + 1 totMatches = totMatches + (nMatches > 0) totExtra = totExtra + n_extra print('totTargets:' + str(totTargets) + ' nMatches:' + str(nMatches) + ' totMatches:' + str(totMatches) + ' nExtra:' + str(n_extra) + ' totextra:' + str(totExtra), end='\r') #.filter(db.items.fingerprint.is_(None)) if n_files == max_files_to_try: print('reached max of ' + str(max_files_to_try) + ' files to check') break return (totTargets, totMatches, totExtra)
def find_images(description): print('starting to find ' + str(description)) for i in range(0, 500): mdoc = dbUtils.lookfor_next_unbounded_feature_from_db_category( current_item=i, skip_if_marked_to_skip=True, which_to_show='showAll', filter_type='byWordInDescription', category_id=None, word_in_description=description, db=None) if mdoc is not None and 'doc' in mdoc: doc = mdoc['doc'] # print doc xlarge_url = doc['image']['sizes']['XLarge']['url'] # print('large img url:' + str(xlarge_url)) img_arr = Utils.get_cv2_img_array(xlarge_url) if img_arr is None: return None small_img = background_removal.standard_resize(img_arr, 400)[0] show_visual_output = False if show_visual_output == True: cv2.imshow('im1', img_arr) k = cv2.waitKey(200) cv2.imshow('smallim1', small_img) k = cv2.waitKey(200) relevance = background_removal.image_is_relevant(small_img) if not relevance: print('image is not relevant:' + str(description)) continue print('image is relevant:' + str(description)) face1 = background_removal.find_face(img_arr) if face1 is not None and len(face1) != 0: print('face1:' + str(face1)) bb1 = face1[0] get_pose_est_bbs(xlarge_url, description, n=i, show_visual_output=show_visual_output, bb=bb1) else: get_pose_est_bbs(xlarge_url, description, n=i, show_visual_output=show_visual_output)
def from_svg_to_similar_results(svg_url, image_url, fp_length=fingerprint_length, bins=histograms_length, collection_name="products", fp_category="fingerprint", distance_func=None): projection_dict = { 'seeMoreUrl': 1, 'image': 1, 'clickUrl': 1, 'retailer': 1, 'currency': 1, 'brand': 1, 'description': 1, 'price': 1, 'categories': 1, 'name': 1, 'sizes': 1, 'pageUrl': 1, '_id': 0, 'priceLabel': 1 } if svg_url is None or image_url is None: logging.warning("Bad urls!") return None image_dict = db.images.find_one({'image_urls': image_url}) if image_dict is None: logging.warning("item wasn't found for some reason") return None for item in image_dict["items"]: if item["svg_url"] == svg_url: curr_item = item item_mask = cv2.imread(curr_item['mask_name'])[:, :, 0] image = background_removal.standard_resize(utils_tg.get_cv2_img_array(image_dict['image_urls'][0]), 400)[0] curr_item['fp'], curr_item['similar_results'] = \ find_similar_mongo.find_top_n_results(image, item_mask, 30, curr_item['category'], collection_name, fp_category, fp_length, distance_func, bins) # top_matches = {"similar_results": [db.products.find_one({"_id": result["_id"]}) # for result in curr_item['similar_results']]} # # return top_matches return db.images.find_one_and_update({'items.svg_url': curr_item["svg_url"]}, {'$set': {'items.$': curr_item}}, return_document=pymongo.ReturnDocument.AFTER)
def add_new_fp(doc, x): image_url = doc["image"]["sizes"]["XLarge"]["url"] image = utils_tg.get_cv2_img_array(image_url) if not utils_tg.is_valid_image(image): logging.warning("image is None. url: {url}".format(url=image_url)) return small_image, resize_ratio = background_removal.standard_resize(image, 400) del image mask = fp.generate_mask_and_insert(image_url=None, db_doc=doc, save_to_db=False, mask_only=True) try: fingerprint = fp.fp(small_image, mask=mask) doc["new_fp"] = fingerprint.tolist() doc["fp_version"] = 999 db.fp_testing.insert(doc) except Exception as ex: logging.warning("Exception caught while inserting element #" + str(x) + " to the collection".format(ex)) return x
def find_or_create_image(image_url): """ Search in db.images for the image by image url, if not exists - create one and start the process. :param image url - this is coming directly from the web interface so it's all we'll ever get. :return: image dictionary with svgs """ image = background_removal.standard_resize(utils_tg.get_cv2_img_array(image_url), 400)[0] if image is None: logging.warning("Bad url!") return None image_dict = db.images.find_one({"image_urls": image_url}) if image_dict is None or 'items' not in image_dict.keys(): if image_dict is None: image_id = db.images.insert({"image_urls": [image_url]}) # TODO - where is the case which we append other url on the same image else: image_id = image_dict['_id'] items_dict = from_image_to_svgs(image, image_id) image_dict = db.images.find_one_and_update({'image_urls': image_url}, {'$set': items_dict}, return_document=pymongo.ReturnDocument.AFTER) return image_dict
def lomshane_test(): """inputs = [json_util.loads( '{"url":"http://msc.wcdn.co.il/w/w-635/1684386-5.jpg","bb":"[137.2972972972973,188.80597014925374,356.97297297297297,319.2537313432836]","keyword":"mens-outerwear","post_id":"552a79359e31f134f0f9c401"}') , json_util.loads( '{"url":"http://msc.wcdn.co.il/w/w-635/1684386-5.jpg","bb":"[50.2972972972973,50.80597014925374,70.97297297297297,70.2537313432836]","keyword":"mens-outerwear","post_id":"552a79359e31f134f0f9c401"}') ,json_util.loads( '{"url":"http://msc.wcdn.co.il/w/w-635/1684386-5.jpg","bb":"[0, 0, 100.97297297297297, 100.2537313432836]","keyword":"mens-outerwear","post_id":"552a79359e31f134f0f9c401"}') ]""" bbs = [[9, 135, 97, 87], [200, 299, 98, 67], [9, 272, 120, 95], [316, 13, 83, 138]] image, ratio = background_removal.standard_resize(get_image(), 400) masks = [make_mask_test(image, bb) for bb in bbs] fingers = [fingerprint.fp(image, mask) for mask in masks] print np.array([(pair[0] - pair[1]) for pair in itertools.combinations(fingers, 2)]) cv2.imshow('mask1', masks[0]) cv2.imshow('mask2', masks[1]) cv2.imshow('mask3', masks[2]) cv2.imshow('mask4', masks[3]) cv2.waitKey(0) return
def generate_mask_and_insert(doc, image_url=None, fp_date=None, coll="products", img=None, neuro=False): """ Takes an image + whatever else you give it, and handles all the logic (using/finding/creating a bb, then a mask) Work in progress... :param image_url: :param doc: ShopStyle DB doc :return: """ image_url = image_url or doc["image"]["sizes"]["XLarge"]["url"] collection = coll if neuro or img is not None: image = img else: image = Utils.get_cv2_img_array(image_url) if not Utils.is_valid_image(image): logging.warning("image is None. url: {url}".format(url=image_url)) return # img_hash = get_hash(image) # if db[coll].find_one({'img_hash': img_hash}): # return small_image, resize_ratio = background_removal.standard_resize(image, 400) # del image if not Utils.is_valid_image(small_image): logging.warning("small_image is Bad. {img}".format(img=small_image)) return category = doc['categories'] print category if neuro: category_idx = recruit2category_idx[category] success, neuro_mask = neurodoll(image, category_idx) if not success: print "error neurodolling" return [] small_mask = cv2.resize(neuro_mask, (400, 400)) else: small_mask = background_removal.get_fg_mask(small_image) fingerprint = dict_fp(small_image, small_mask, category) print 'fingerprint done' doc["fingerprint"] = fingerprint doc["download_data"]["first_dl"] = fp_date doc["download_data"]["dl_version"] = fp_date doc["download_data"]["fp_version"] = constants.fingerprint_version print "prod insert ..." try: db[collection].insert_one(doc) print "successfull" # db.fp_in_process.delete_one({"id": doc["id"]}) except: # db.download_data.find_one_and_update({"criteria": collection}, # {'$inc': {"errors": 1}}) print "failed" return fingerprint['color']
def selectBest(bblist, imgurl): large_img = utils_tg.get_cv2_img_array(imgurl) img, ratio = background_removal.standard_resize(large_img, 400) height, width = img.shape[0:2] img_size = height * width # calcuale center of mass per selection COM = [] for i in bblist: COM.append((i[0] + i[2] / 2, i[1] + i[3] / 2)) weights = [0, 0, 0] new_points = [] total_weight = 0 simcount = 0 def minmax(xlist, ylist, wlist, hlist): x = int(np.mean(xlist)) y = int(np.mean(ylist)) w = int(np.mean(wlist)) h = int(np.mean(hlist)) return [x, y, w, h] def is_similar(com1, com2, x, y, wid, hig): """ TODO = enter explanation :param com1: :param com2: :param wid: :param hig: :return: """ oclid_dis_thres = math.sqrt(pow(min(wid), 2) + pow(min(hig), 2)) oclid_dis_com = math.sqrt( pow(com1[0] - com2[0], 2) + pow(com1[1] - com2[1], 2)) # overlap larger_x_rightcorner = max(x[0], x[1]) larger_y_rightcorner = max(y[0], y[1]) smaller_x_leftcorner = min(x[0] + wid[0], x[1] + wid[1]) smaller_y_leftcorner = min(y[0] + hig[0], y[1] + hig[1]) overlap = (smaller_x_leftcorner - larger_x_rightcorner) * ( smaller_y_leftcorner - larger_y_rightcorner) size1 = wid[0] * hig[0] size2 = wid[1] * hig[1] if oclid_dis_com < 0.5 * oclid_dis_thres: # TODO : decide the dist percent # check overlap weight = 2.0 * overlap / (size1 + size2) return weight return 0 couples = [(0, 1), (0, 2), (1, 2)] for idx, pair in enumerate(couples): x = [bblist[pair[0]][0], bblist[pair[1]][0]] y = [bblist[pair[0]][1], bblist[pair[1]][1]] wid = [bblist[pair[0]][2], bblist[pair[1]][2]] hig = [bblist[pair[0]][3], bblist[pair[1]][3]] # size threshold check - selection bigger than 1% of original img size if min(wid[0] * hig[0], wid[1] * hig[1]) > 0.01 * img_size: weights[idx] = is_similar(COM[pair[0]], COM[pair[1]], x, y, wid, hig) if weights[idx] > 0.2: simcount += 1 total_weight += weights[idx] else: weights[idx] = 0 new_points.append(minmax(x, y, wid, hig)) if simcount == 0: print 'WARNING : three different entries' return None if simcount == 1: print 'one similar' if simcount == 2: print 'two similar' if simcount == 3: print 'three similar' para = [] for p in range(4): para.append( int( math.floor(weights[0] / total_weight * new_points[0][p] + weights[1] / total_weight * new_points[1][p] + weights[2] / total_weight * new_points[2][p]))) # para = [x,y,w,h] return para