def test_get_tag_from_image_with_threshold(self): image_path = ('..' + os.sep + 'test_files' + os.sep + 'caption_recognition' + os.sep + 'Test.jpg') tags_file_path = ('..' + os.sep + 'test_files' + os.sep + 'caption_recognition' + os.sep + 'Tags.txt') params = {c.USE_LEVENSHTEIN_KEY: True, c.LEV_RATIO_PCT_THRESH_KEY: 0.99, c.MIN_TAG_LENGTH_KEY: 0, c.USE_BLACKLIST_KEY: False, c.TAGS_FILE_PATH_KEY: tags_file_path} result_dict = get_tag_from_image(image_path, params) assigned_tag = result_dict[c.ASSIGNED_TAG_KEY] # Assigned tag for this image must be undefined self.assertEquals(assigned_tag, c.UNDEFINED_TAG) eq_letters_nr = result_dict[c.EQ_LETTERS_NR_KEY] tot_letters_nr = result_dict[c.TOT_LETTERS_NR_KEY] self.assertTrue(eq_letters_nr <= tot_letters_nr)
def test_get_tag_from_image(self): image_path = ('..' + os.sep + 'test_files' + os.sep + 'caption_recognition' + os.sep + 'Test.jpg') tags_file_path = ('..' + os.sep + 'test_files' + os.sep + 'caption_recognition' + os.sep + 'Tags.txt') tags = get_tags_from_file(tags_file_path) params = {c.USE_LEVENSHTEIN_KEY: True, c.LEV_RATIO_PCT_THRESH_KEY: 0, c.MIN_TAG_LENGTH_KEY: 0, c.USE_BLACKLIST_KEY: False, c.TAGS_FILE_PATH_KEY: tags_file_path} result_dict = get_tag_from_image(image_path, params) assigned_tag = result_dict[c.ASSIGNED_TAG_KEY] # Threshold and blacklist are not used, # so assigned tag must be in tags file self.assertIn(assigned_tag, tags) eq_letters_nr = result_dict[c.EQ_LETTERS_NR_KEY] tot_letters_nr = result_dict[c.TOT_LETTERS_NR_KEY] self.assertTrue(eq_letters_nr <= tot_letters_nr)
def analyze_image(image_path, params=None): """ Execute caption recognition on given image :type image_path: string :param image_path: path of image to be analyzed :type params: dictionary :param params: configuration parameters (see table) :rtype: tuple :returns: a (label, tag, face) tuple, where label is the assigned label, tag the assigned tag and face the detected face in image (as an OpenCV image) ============================================ ======================================== ============================= Key Value Default value ============================================ ======================================== ============================= align_path Path of directory where aligned faces are saved check_eye_positions If True, check eye positions True classifiers_dir_path Path of directory with OpenCV cascade classifiers eye_detection_classifier Classifier for eye detection 'haarcascade_mcs_lefteye.xml' face_detection_algorithm Classifier for face detection 'HaarCascadeFrontalFaceAlt2' ('HaarCascadeFrontalFaceAlt', 'HaarCascadeFrontalFaceAltTree', 'HaarCascadeFrontalFaceAlt2', 'HaarCascadeFrontalFaceDefault', 'HaarCascadeProfileFace', 'HaarCascadeFrontalAndProfileFaces', 'HaarCascadeFrontalAndProfileFaces2', 'LBPCascadeFrontalface', 'LBPCascadeProfileFace' or 'LBPCascadeFrontalAndProfileFaces') flags Flags used in face detection 'DoCannyPruning' ('DoCannyPruning', 'ScaleImage', 'FindBiggestObject', 'DoRoughSearch'). If 'DoCannyPruning' is used, regions that do not contain lines are discarded. If 'ScaleImage' is used, image instead of the detector is scaled (it can be advantegeous in terms of memory and cache use). If 'FindBiggestObject' is used, only the biggest object is returned by the detector. 'DoRoughSearch', used together with 'FindBiggestObject', terminates the search as soon as the first candidate object is found min_neighbors Mininum number of neighbor bounding 5 boxes for retaining face detection min_size_height Minimum height of face detection 20 bounding box (in pixels) min_size_width Minimum width of face detection 20 bounding box (in pixels) scale_factor Scale factor between two scans 1.1 in face detection max_eye_angle Maximum inclination of the line 0.125 connecting the eyes (in % of pi radians) min_eye_distance Minimum distance between eyes 0.25 (in % of the width of the face bounding box) nose_detection_classifier Classifier for nose detection 'haarcascade_mcs_nose.xml' software_test_file Path of image to be used for software test use_nose_pos_in_detection If True, detections with no good False nose position are discarded lev_ratio_pct_threshold Minimum threshold for considering 0.8 captions in frame min_tag_length Minimum length of tags considered 10 in caption recognition tags_file_path Path of text file containing list of tags tesseract_parent_dir_path Path of directory containing 'tesseract' directory use_blacklist If True, use blacklist of items True that make the results of the caption recognition on a frame rejected use_levenshtein If True, words found in image True by caption recognition and tags are compared by using the Levenshtein distance ============================================ ======================================== ============================= """ label = -1 tag = -1 align_path = c.ALIGNED_FACES_PATH if (params is not None) and (c.ALIGNED_FACES_PATH_KEY in params): align_path = params[c.ALIGNED_FACES_PATH_KEY] # Face detection face = get_detected_cropped_face(image_path, align_path, params) if face is not None: cap_rec_res = get_tag_from_image(image_path, params) label = cap_rec_res[c.ASSIGNED_LABEL_KEY] tag = cap_rec_res[c.ASSIGNED_TAG_KEY] return label, tag, face
def caption_experiments(params=None): """ Execute caption recognition experiments :type params: dictionary :param params: configuration parameters to be used for the experiment (see table) ============================================ ======================================== ============== Key Value Default value ============================================ ======================================== ============== test_set_path Path of directory containing test set lev_ratio_pct_threshold Minimum threshold for considering 0.8 captions in frame min_tag_length Minimum length of tags considered 10 in caption recognition tags_file_path Path of text file containing list of tags tesseract_parent_dir_path Path of directory containing 'tesseract' directory use_blacklist If True, use blacklist of items True that make the results of the caption recognition on a frame rejected use_levenshtein If True, words found in image True by caption recognition and tags are compared by using the Levenshtein distance ============================================ ======================================== ============== """ test_set_path = ce.CAPTION_RECOGNITION_TEST_SET_PATH if params is not None: if ce.TEST_SET_PATH_KEY in params: test_set_path = params[ce.TEST_SET_PATH_KEY] # Iterate over all directories with images rec_rate_list = [] conf_list = [] time_list = [] for images_dir in os.listdir(test_set_path): ann_tag = images_dir images_dir_complete_path = os.path.join( test_set_path, images_dir) true_pos_nr = 0.0 person_test_images_nr = 0.0 for image in os.listdir(images_dir_complete_path): image_complete_path = os.path.join( images_dir_complete_path, image) start_time = cv2.getTickCount() result_dict = get_tag_from_image( image_complete_path, params) time_in_clocks = cv2.getTickCount() - start_time time_in_seconds = time_in_clocks / cv2.getTickFrequency() time_list.append(time_in_seconds) assigned_tag = result_dict[c.ASSIGNED_TAG_KEY] conf = result_dict[c.CONFIDENCE_KEY] if assigned_tag == ann_tag: true_pos_nr += 1 conf_list.append(conf) person_test_images_nr += 1 rec_rate = true_pos_nr / person_test_images_nr rec_rate_list.append(rec_rate) mean_rec_rate = float(numpy.mean(rec_rate_list)) std_rec_rate = float(numpy.std(rec_rate_list)) mean_conf = float(numpy.mean(conf_list)) mean_time = float(numpy.mean(time_list)) print("\n ### RESULTS ###\n") print('Mean of recognition rate: ' + str(mean_rec_rate)) print('Standard deviation of recognition rate: ' + str(std_rec_rate)) print('Mean of confidence in true positives: ' + str(mean_conf)) print('Mean analysis time: ' + str(mean_time) + ' s')