def create_ann_file_for_dataset(dataset_path, ann_file_path, params=None): """ Create YAML file with annotations for given dataset. For each image in the dataset, posizion and size of the found face is stored. :type dataset_path: string :param dataset_path: path of dataset :type ann_file_path: string :param ann_file_path: path of file that will contain annotations :type params: dictionary :param params: configuration parameters (see table) ============================================ ======================================== ============================= Key (params) Value Default value ============================================ ======================================== ============================= aligned_faces_path Path of directory for aligned faces check_eye_positions If True, check eye positions True classifiers_dir_path Path of directory with OpenCV cascade classifiers eye_detection_classifier Classifier for eye detection 'haarcascade_mcs_lefteye.xml' face_detection_algorithm Classifier for face detection 'HaarCascadeFrontalFaceAlt2' ('HaarCascadeFrontalFaceAlt', 'HaarCascadeFrontalFaceAltTree', 'HaarCascadeFrontalFaceAlt2', 'HaarCascadeFrontalFaceDefault', 'HaarCascadeProfileFace', 'HaarCascadeFrontalAndProfileFaces', 'HaarCascadeFrontalAndProfileFaces2', 'LBPCascadeFrontalface', 'LBPCascadeProfileFace' or 'LBPCascadeFrontalAndProfileFaces') flags Flags used in face detection 'DoCannyPruning' ('DoCannyPruning', 'ScaleImage', 'FindBiggestObject', 'DoRoughSearch'). If 'DoCannyPruning' is used, regions that do not contain lines are discarded. If 'ScaleImage' is used, image instead of the detector is scaled (it can be advantegeous in terms of memory and cache use). If 'FindBiggestObject' is used, only the biggest object is returned by the detector. 'DoRoughSearch', used together with 'FindBiggestObject', terminates the search as soon as the first candidate object is found min_neighbors Mininum number of neighbor bounding 5 boxes for retaining face detection min_size_height Minimum height of face detection 20 bounding box (in pixels) min_size_width Minimum width of face detection 20 bounding box (in pixels) scale_factor Scale factor between two scans 1.1 in face detection max_eye_angle Maximum inclination of the line 0.125 connecting the eyes (in % of pi radians) min_eye_distance Minimum distance between eyes 0.25 (in % of the width of the face bounding box) nose_detection_classifier Classifier for nose detection 'haarcascade_mcs_nose.xml' software_test_file Path of image to be used for software test use_nose_pos_in_detection If True, detections with no good False nose position are discarded ============================================ ======================================== ============================= """ # Set parameters align_path = c.ALIGNED_FACES_PATH if params: if c.ALIGNED_FACES_PATH_KEY in params: align_path = params[c.ALIGNED_FACES_PATH_KEY] ann_dict = {} for subject_dir in os.listdir(dataset_path): subject_path = os.path.join(dataset_path, subject_dir) for im_name in os.listdir(subject_path): # Path of image relative to dataset path rel_im_path = os.path.join(subject_dir, im_name) ann_dict[rel_im_path] = {} print('rel_im_path', rel_im_path) # Full path of image im_path = os.path.join(subject_path, im_name) # Detect faces in image and take first result result_dict = detect_faces_in_image( im_path, align_path, params, False) if c.FACES_KEY in result_dict: faces = result_dict[c.FACES_KEY] if len(faces) > 0: face_dict = faces[0] bbox = face_dict[c.BBOX_KEY] ann_dict[rel_im_path][c.BBOX_KEY] = bbox save_YAML_file(ann_file_path, ann_dict)
def get_tag_from_image(im_path, params=None, api=None): """ Find tag in image captions :type im_path: string :param im_path: path of image to be analyzed :type params: dictionary :param params: configuration parameters to be used for the caption recognition :type api: Tesseract TessBaseAPI :param api: api to be used for the Optical Character Recognition :rtype: dictionary :returns: dictionary with results (see table) ============================================ ======================================== ============== Key (params) Value Default value ============================================ ======================================== ============== caption_results_file_path Path of file where caption recognition results will be saved lev_ratio_pct_threshold Minimum threshold for considering 0.8 captions in frame min_tag_length Minimum length of tags considered 10 in caption recognition tags_file_path Path of text file containing list of tags tesseract_parent_dir_path Path of directory containing 'tesseract' directory use_blacklist If True, use blacklist of items True that make the results of the caption recognition on a frame rejected use_levenshtein If True, words found in image True by caption recognition and tags are compared by using the Levenshtein distance ============================================ ======================================== ============== ===================================== ===================================== Key (results) Value ===================================== ===================================== assigned_tag Predicted tag (most similar tag) eq_letters_nr Similarity value for most similar tag (not normalized) tot_letters_nr Maximum possible similarity value confidence Confidence associated to prediction (normalized similarity value) tags Set of tags in dictionary ===================================== ===================================== """ gray_im = cv2.imread(im_path, cv2.IMREAD_GRAYSCALE) if api is None: # Tesseract init api = tesseract.TessBaseAPI() # Set parent directory of "tessdata" directory tesseract_parent_dir_path = c.TESSERACT_PARENT_DIR_PATH if (params is not None) and (c.TESSERACT_PARENT_DIR_PATH_KEY in params): tesseract_parent_dir_path = params[c.TESSERACT_PARENT_DIR_PATH_KEY] api.Init(tesseract_parent_dir_path, "eng", tesseract.OEM_DEFAULT) api.SetVariable("tessedit_char_whitelist", "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz") api.SetPageSegMode(tesseract.PSM_SINGLE_CHAR) result_dict = find_letters_in_image(gray_im, api, True, False) contours = result_dict[c.CONTOURS_KEY] hierarchy = result_dict[c.HIERARCHY_KEY] all_letters = result_dict[c.ALL_LETTERS_KEY] ord_bboxs = result_dict[c.ORD_BBOXS_KEY] ord_contour_idxs = result_dict[c.ORD_CONTOUR_IDXS_KEY] # Divide letters by row rows = [] rows_bboxs = [] rows_contour_idxs = [] # Index of letters that must not be considered anymore idx_black_list = [] lett_idx = 0 for lett in all_letters: if (lett_idx not in idx_black_list) and (len(lett) > 0): idx_black_list.append(lett_idx) bbox = ord_bboxs[lett_idx] x1 = bbox[0] y1 = bbox[1] w = bbox[2] h = bbox[3] y2 = y1 + h big_bbox = bbox row = [lett] row_bboxs = [bbox] contour_idx = ord_contour_idxs[lett_idx] row_contour_idxs = [contour_idx] pt1 = (bbox[0], bbox[1]) for idx2 in range((lett_idx + 1), len(all_letters)): if idx2 not in idx_black_list: lett2 = all_letters[idx2] bbox2 = ord_bboxs[idx2] x12 = bbox2[0] y12 = bbox2[1] w2 = bbox2[2] h2 = bbox2[3] x22 = x12 + w2 y22 = y12 + h2 if (((y12 > (y1 - c.MAX_BBOX_DIFF)) and (y12 < y2)) or ((y22 > y1) and (y22 < (y2 + c.MAX_BBOX_DIFF)))): lett2 = all_letters[idx2] idx_black_list.append(idx2) # Discard letter if it is inside previous letter big_x = big_bbox[0] big_y = big_bbox[1] big_w = big_bbox[2] big_h = big_bbox[3] big_x2 = big_x + big_w big_y2 = big_y + big_h if (not ((x12 > big_x) and (y12 > big_y) and (x22 < big_x2) and (y22 < big_y2))): row.append(lett2) row_bboxs.append(bbox2) contour_idx = ord_contour_idxs[idx2] row_contour_idxs.append(contour_idx) big_bbox = bbox2 rows.append(row) rows_bboxs.append(row_bboxs) rows_contour_idxs.append(row_contour_idxs) lett_idx += 1 im_height, im_width = gray_im.shape row_idx = 0 words = [] for row in rows: x1_min = im_width y1_min = im_height x2_max = 0 y2_max = 0 for i in range(0, len(row)): lett = row[i] contour_idx = rows_contour_idxs[row_idx][i] contour_bbox = rows_bboxs[row_idx][i] x1 = contour_bbox[0] y1 = contour_bbox[1] w = contour_bbox[2] h = contour_bbox[3] x2 = x1 + w y2 = y1 + h if x1 < x1_min: x1_min = x1 if y1 < y1_min: y1_min = y1 if x2 > x2_max: x2_max = x2 if y2 > y2_max: y2_max = y2 # Convert block region in original image to black and white image block_im = cv2.copyMakeBorder( gray_im[y1_min - c.LETT_MARGIN: y2_max + c.LETT_MARGIN, x1_min - c.LETT_MARGIN: x2_max + c.LETT_MARGIN], c.LETT_MARGIN, c.LETT_MARGIN, c.LETT_MARGIN, c.LETT_MARGIN, cv2.BORDER_CONSTANT, value=255) block_result_dict = find_letters_in_image(block_im, api, False, False) block_contours = block_result_dict[c.CONTOURS_KEY] block_hierarchy = block_result_dict[c.HIERARCHY_KEY] block_all_letters = block_result_dict[c.ALL_LETTERS_KEY] block_ord_bboxs = block_result_dict[c.ORD_BBOXS_KEY] block_ord_contour_idxs = block_result_dict[c.ORD_CONTOUR_IDXS_KEY] block_im[:, :] = 255 is_first_lett = True big_bbox = None for i in range(0, len(block_all_letters)): lett = block_all_letters[i] if len(lett) > 0: if is_first_lett: big_bbox = block_ord_bboxs[i] is_first_lett = False else: big_x = big_bbox[0] big_y = big_bbox[1] big_w = big_bbox[2] big_h = big_bbox[3] big_x2 = big_x + big_w big_y2 = big_y + big_h bbox = block_ord_bboxs[i] x1 = bbox[0] y1 = bbox[1] w = bbox[2] h = bbox[3] x2 = x1 + w y2 = y1 + h # Discard letter if it is inside previous letter if (not ((x1 > big_x) and (y1 > big_y) and (x2 < big_x2) and (y2 < big_y2))): contour_idx = block_ord_contour_idxs[i] cv2.drawContours(block_im, block_contours, contour_idx, 0, -1, cv2.CV_AA, block_hierarchy, 1) big_bbox = bbox # Transform image shape_1 = block_im.shape[1] shape_0 = block_im.shape[0] depth = cv.IPL_DEPTH_8U bitmap = cv.CreateImageHeader((shape_1, shape_0), depth, 1) cv.SetData(bitmap, block_im.tostring(), block_im.dtype.itemsize * 1 * shape_1) api.SetPageSegMode(tesseract.PSM_SINGLE_BLOCK) tesseract.SetCvImage(bitmap, api) text = api.GetUTF8Text().rstrip() if len(text) > 0: row_words = text.split() for row_word in row_words: words.append(row_word) row_idx += 1 tags = [] use_blacklist = c.USE_BLACKLIST tags_file_path = None if params is not None: if c.USE_BLACKLIST_KEY in params: use_blacklist = params[c.USE_BLACKLIST_KEY] if c.TAGS_FILE_PATH_KEY in params: tags_file_path = params[c.TAGS_FILE_PATH_KEY] if tags_file_path: # Load tags from file tags = get_tags_from_file(tags_file_path) else: fm = FaceModels(params) tags = fm.get_tags() assigned_tag = c.UNDEFINED_TAG eq_letters_nr = 0 tot_letters_nr = 0 lev_ratio_pct = 0 result_dict = {c.ASSIGNED_TAG_KEY: assigned_tag, c.EQ_LETTERS_NR_KEY: eq_letters_nr, c.TOT_LETTERS_NR_KEY: tot_letters_nr, c.CONFIDENCE_KEY: lev_ratio_pct, c.TAGS_KEY: tags} blacklist_results = None if use_blacklist: # Check if one blacklist item is found in image blacklist = fm.get_blacklist() if len(blacklist) > 0: blacklist_results = find_most_similar_tag(blacklist, words, params) if ((blacklist_results is None) or (blacklist_results[c.ASSIGNED_TAG_KEY] == c.UNDEFINED_TAG)): if len(tags) > 0: result_dict = find_most_similar_tag(tags, words, params) # Save file with results if params is not None and c.CAPTION_RESULTS_FILE_PATH_KEY in params: caption_results_file_path = params[c.CAPTION_RESULTS_FILE_PATH_KEY] save_YAML_file(caption_results_file_path, result_dict) return result_dict