Exemple #1
0
def create_ann_file_for_dataset(dataset_path, ann_file_path, params=None):
    """
    Create YAML file with annotations for given dataset.
    For each image in the dataset,
    posizion and size of the found face is stored.

    :type dataset_path: string
    :param dataset_path: path of dataset

    :type ann_file_path: string
    :param ann_file_path: path of file that will contain annotations

    :type params: dictionary
    :param params: configuration parameters (see table)

    ============================================  ========================================  =============================
    Key (params)                                  Value                                     Default value
    ============================================  ========================================  =============================
    aligned_faces_path                            Path of directory for aligned faces
    check_eye_positions                           If True, check eye positions              True
    classifiers_dir_path                          Path of directory with OpenCV
                                                  cascade classifiers
    eye_detection_classifier                      Classifier for eye detection              'haarcascade_mcs_lefteye.xml'
    face_detection_algorithm                      Classifier for face detection             'HaarCascadeFrontalFaceAlt2'
                                                  ('HaarCascadeFrontalFaceAlt',
                                                  'HaarCascadeFrontalFaceAltTree',
                                                  'HaarCascadeFrontalFaceAlt2',
                                                  'HaarCascadeFrontalFaceDefault',
                                                  'HaarCascadeProfileFace',
                                                  'HaarCascadeFrontalAndProfileFaces',
                                                  'HaarCascadeFrontalAndProfileFaces2',
                                                  'LBPCascadeFrontalface',
                                                  'LBPCascadeProfileFace' or
                                                  'LBPCascadeFrontalAndProfileFaces')
    flags                                         Flags used in face detection              'DoCannyPruning'
                                                  ('DoCannyPruning', 'ScaleImage',
                                                  'FindBiggestObject', 'DoRoughSearch').
                                                  If 'DoCannyPruning' is used, regions
                                                  that do not contain lines are discarded.
                                                  If 'ScaleImage' is used, image instead
                                                  of the detector is scaled
                                                  (it can be advantegeous in terms of
                                                  memory and cache use).
                                                  If 'FindBiggestObject' is used,
                                                  only the biggest object is returned
                                                  by the detector.
                                                  'DoRoughSearch', used together with
                                                  'FindBiggestObject',
                                                  terminates the search as soon as
                                                  the first candidate object is found
    min_neighbors                                 Mininum number of neighbor bounding       5
                                                  boxes for retaining face detection
    min_size_height                               Minimum height of face detection          20
                                                  bounding box (in pixels)
    min_size_width                                Minimum width of face detection           20
                                                  bounding box (in pixels)
    scale_factor                                  Scale factor between two scans            1.1
                                                  in face detection
    max_eye_angle                                 Maximum inclination of the line           0.125
                                                  connecting the eyes
                                                  (in % of pi radians)
    min_eye_distance                              Minimum distance between eyes             0.25
                                                  (in % of the width of the face
                                                  bounding box)
    nose_detection_classifier                     Classifier for nose detection             'haarcascade_mcs_nose.xml'
    software_test_file                            Path of image to be used for
                                                  software test
    use_nose_pos_in_detection                     If True, detections with no good          False
                                                  nose position are discarded
    ============================================  ========================================  =============================
    """

    # Set parameters
    align_path = c.ALIGNED_FACES_PATH
    if params:
        if c.ALIGNED_FACES_PATH_KEY in params:
            align_path = params[c.ALIGNED_FACES_PATH_KEY]

    ann_dict = {}
    for subject_dir in os.listdir(dataset_path):
        subject_path = os.path.join(dataset_path, subject_dir)
        for im_name in os.listdir(subject_path):
            # Path of image relative to dataset path
            rel_im_path = os.path.join(subject_dir, im_name)
            ann_dict[rel_im_path] = {}
            print('rel_im_path', rel_im_path)

            # Full path of image
            im_path = os.path.join(subject_path, im_name)

            # Detect faces in image and take first result

            result_dict = detect_faces_in_image(
                im_path, align_path, params, False)

            if c.FACES_KEY in result_dict:
                faces = result_dict[c.FACES_KEY]
                if len(faces) > 0:
                    face_dict = faces[0]
                    bbox = face_dict[c.BBOX_KEY]
                    ann_dict[rel_im_path][c.BBOX_KEY] = bbox

    save_YAML_file(ann_file_path, ann_dict)
Exemple #2
0
def get_tag_from_image(im_path, params=None, api=None):
    """
    Find tag in image captions

    :type im_path: string
    :param im_path: path of image to be analyzed

    :type params: dictionary
    :param params: configuration parameters to be used for
                   the caption recognition

    :type api: Tesseract TessBaseAPI
    :param api: api to be used for the Optical Character Recognition

    :rtype: dictionary
    :returns: dictionary with results (see table)

    ============================================  ========================================  ==============
    Key (params)                                  Value                                     Default value
    ============================================  ========================================  ==============
    caption_results_file_path                     Path of file where caption recognition
                                                  results will be saved
    lev_ratio_pct_threshold                       Minimum threshold for considering         0.8
                                                  captions in frame
    min_tag_length                                Minimum length of tags considered         10
                                                  in caption recognition
    tags_file_path                                Path of text file containing
                                                  list of tags
    tesseract_parent_dir_path                     Path of directory containing
                                                  'tesseract' directory
    use_blacklist                                 If True, use blacklist of items           True
                                                  that make the results of the
                                                  caption recognition on a frame
                                                  rejected
    use_levenshtein                               If True, words found in image             True
                                                  by caption recognition and tags
                                                  are compared by using
                                                  the Levenshtein distance
    ============================================  ========================================  ==============

    =====================================  =====================================
    Key (results)                          Value
    =====================================  =====================================
    assigned_tag                           Predicted tag (most similar tag)
    eq_letters_nr                          Similarity value for most similar tag
                                           (not normalized)
    tot_letters_nr                         Maximum possible similarity value
    confidence                             Confidence associated to prediction
                                           (normalized similarity value)
    tags                                   Set of tags in dictionary
    =====================================  =====================================
    """

    gray_im = cv2.imread(im_path, cv2.IMREAD_GRAYSCALE)

    if api is None:
        # Tesseract init
        api = tesseract.TessBaseAPI()

        # Set parent directory of "tessdata" directory
        tesseract_parent_dir_path = c.TESSERACT_PARENT_DIR_PATH
        if (params is not None) and (c.TESSERACT_PARENT_DIR_PATH_KEY in params):
            tesseract_parent_dir_path = params[c.TESSERACT_PARENT_DIR_PATH_KEY]

        api.Init(tesseract_parent_dir_path, "eng", tesseract.OEM_DEFAULT)

        api.SetVariable("tessedit_char_whitelist",
                        "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz")
        api.SetPageSegMode(tesseract.PSM_SINGLE_CHAR)

    result_dict = find_letters_in_image(gray_im, api, True, False)

    contours = result_dict[c.CONTOURS_KEY]

    hierarchy = result_dict[c.HIERARCHY_KEY]

    all_letters = result_dict[c.ALL_LETTERS_KEY]

    ord_bboxs = result_dict[c.ORD_BBOXS_KEY]
    ord_contour_idxs = result_dict[c.ORD_CONTOUR_IDXS_KEY]

    # Divide letters by row
    rows = []
    rows_bboxs = []
    rows_contour_idxs = []
    # Index of letters that must not be considered anymore
    idx_black_list = []

    lett_idx = 0

    for lett in all_letters:
        if (lett_idx not in idx_black_list) and (len(lett) > 0):

            idx_black_list.append(lett_idx)
            bbox = ord_bboxs[lett_idx]
            x1 = bbox[0]
            y1 = bbox[1]
            w = bbox[2]
            h = bbox[3]
            y2 = y1 + h

            big_bbox = bbox

            row = [lett]
            row_bboxs = [bbox]

            contour_idx = ord_contour_idxs[lett_idx]
            row_contour_idxs = [contour_idx]

            pt1 = (bbox[0], bbox[1])

            for idx2 in range((lett_idx + 1), len(all_letters)):

                if idx2 not in idx_black_list:

                    lett2 = all_letters[idx2]
                    bbox2 = ord_bboxs[idx2]
                    x12 = bbox2[0]
                    y12 = bbox2[1]
                    w2 = bbox2[2]
                    h2 = bbox2[3]
                    x22 = x12 + w2
                    y22 = y12 + h2

                    if (((y12 > (y1 - c.MAX_BBOX_DIFF)) and (y12 < y2)) or
                            ((y22 > y1) and (y22 < (y2 + c.MAX_BBOX_DIFF)))):
                        lett2 = all_letters[idx2]
                        idx_black_list.append(idx2)

                        # Discard letter if it is inside previous letter
                        big_x = big_bbox[0]
                        big_y = big_bbox[1]
                        big_w = big_bbox[2]
                        big_h = big_bbox[3]
                        big_x2 = big_x + big_w
                        big_y2 = big_y + big_h

                        if (not ((x12 > big_x) and (y12 > big_y)
                                 and (x22 < big_x2) and (y22 < big_y2))):
                            row.append(lett2)

                            row_bboxs.append(bbox2)

                            contour_idx = ord_contour_idxs[idx2]

                            row_contour_idxs.append(contour_idx)

                            big_bbox = bbox2

            rows.append(row)
            rows_bboxs.append(row_bboxs)
            rows_contour_idxs.append(row_contour_idxs)

        lett_idx += 1

    im_height, im_width = gray_im.shape

    row_idx = 0
    words = []
    for row in rows:

        x1_min = im_width
        y1_min = im_height
        x2_max = 0
        y2_max = 0

        for i in range(0, len(row)):

            lett = row[i]

            contour_idx = rows_contour_idxs[row_idx][i]

            contour_bbox = rows_bboxs[row_idx][i]

            x1 = contour_bbox[0]
            y1 = contour_bbox[1]
            w = contour_bbox[2]
            h = contour_bbox[3]
            x2 = x1 + w
            y2 = y1 + h

            if x1 < x1_min:
                x1_min = x1
            if y1 < y1_min:
                y1_min = y1
            if x2 > x2_max:
                x2_max = x2
            if y2 > y2_max:
                y2_max = y2

        # Convert block region in original image to black and white image

        block_im = cv2.copyMakeBorder(
            gray_im[y1_min - c.LETT_MARGIN: y2_max + c.LETT_MARGIN,
            x1_min - c.LETT_MARGIN: x2_max + c.LETT_MARGIN],
            c.LETT_MARGIN, c.LETT_MARGIN, c.LETT_MARGIN, c.LETT_MARGIN,
            cv2.BORDER_CONSTANT, value=255)

        block_result_dict = find_letters_in_image(block_im, api, False, False)

        block_contours = block_result_dict[c.CONTOURS_KEY]

        block_hierarchy = block_result_dict[c.HIERARCHY_KEY]

        block_all_letters = block_result_dict[c.ALL_LETTERS_KEY]

        block_ord_bboxs = block_result_dict[c.ORD_BBOXS_KEY]

        block_ord_contour_idxs = block_result_dict[c.ORD_CONTOUR_IDXS_KEY]

        block_im[:, :] = 255

        is_first_lett = True

        big_bbox = None

        for i in range(0, len(block_all_letters)):

            lett = block_all_letters[i]

            if len(lett) > 0:

                if is_first_lett:
                    big_bbox = block_ord_bboxs[i]
                    is_first_lett = False

                else:

                    big_x = big_bbox[0]
                    big_y = big_bbox[1]
                    big_w = big_bbox[2]
                    big_h = big_bbox[3]
                    big_x2 = big_x + big_w
                    big_y2 = big_y + big_h

                    bbox = block_ord_bboxs[i]
                    x1 = bbox[0]
                    y1 = bbox[1]
                    w = bbox[2]
                    h = bbox[3]
                    x2 = x1 + w
                    y2 = y1 + h

                    # Discard letter if it is inside previous letter

                    if (not ((x1 > big_x) and (y1 > big_y)
                             and (x2 < big_x2) and (y2 < big_y2))):
                        contour_idx = block_ord_contour_idxs[i]

                        cv2.drawContours(block_im, block_contours,
                                         contour_idx, 0, -1, cv2.CV_AA,
                                         block_hierarchy, 1)

                        big_bbox = bbox

        # Transform image
        shape_1 = block_im.shape[1]
        shape_0 = block_im.shape[0]
        depth = cv.IPL_DEPTH_8U
        bitmap = cv.CreateImageHeader((shape_1, shape_0), depth, 1)
        cv.SetData(bitmap, block_im.tostring(),
                   block_im.dtype.itemsize * 1 * shape_1)

        api.SetPageSegMode(tesseract.PSM_SINGLE_BLOCK)
        tesseract.SetCvImage(bitmap, api)
        text = api.GetUTF8Text().rstrip()

        if len(text) > 0:
            row_words = text.split()
            for row_word in row_words:
                words.append(row_word)

        row_idx += 1

    tags = []
    use_blacklist = c.USE_BLACKLIST
    tags_file_path = None

    if params is not None:
        if c.USE_BLACKLIST_KEY in params:
            use_blacklist = params[c.USE_BLACKLIST_KEY]
        if c.TAGS_FILE_PATH_KEY in params:
            tags_file_path = params[c.TAGS_FILE_PATH_KEY]

    if tags_file_path:
        # Load tags from file
        tags = get_tags_from_file(tags_file_path)
    else:
        fm = FaceModels(params)
        tags = fm.get_tags()

    assigned_tag = c.UNDEFINED_TAG
    eq_letters_nr = 0
    tot_letters_nr = 0
    lev_ratio_pct = 0

    result_dict = {c.ASSIGNED_TAG_KEY: assigned_tag,
                   c.EQ_LETTERS_NR_KEY: eq_letters_nr,
                   c.TOT_LETTERS_NR_KEY: tot_letters_nr,
                   c.CONFIDENCE_KEY: lev_ratio_pct, c.TAGS_KEY: tags}

    blacklist_results = None
    if use_blacklist:
        # Check if one blacklist item is found in image
        blacklist = fm.get_blacklist()

        if len(blacklist) > 0:
            blacklist_results = find_most_similar_tag(blacklist, words, params)

    if ((blacklist_results is None) or
            (blacklist_results[c.ASSIGNED_TAG_KEY] == c.UNDEFINED_TAG)):
        if len(tags) > 0:
            result_dict = find_most_similar_tag(tags, words, params)

    # Save file with results
    if params is not None and c.CAPTION_RESULTS_FILE_PATH_KEY in params:
        caption_results_file_path = params[c.CAPTION_RESULTS_FILE_PATH_KEY]
        save_YAML_file(caption_results_file_path, result_dict)

    return result_dict