コード例 #1
0
ファイル: imgprocess.py プロジェクト: shaficse/P2PaLA
def _processData(params):
    """
    Resize image and extract mask from PAGE file 
    """
    (
        img_path,
        out_size,
        out_folder,
        classes,
        line_width,
        line_color,
        build_labels,
        ext_mode,
        node_types,
    ) = params
    img_id = os.path.splitext(os.path.basename(img_path))[0]
    img_dir = os.path.dirname(img_path)

    img_data = cv2.imread(img_path)
    # --- resize image
    res_img = cv2.resize(img_data, (out_size[1], out_size[0]),
                         interpolation=cv2.INTER_CUBIC)
    new_img_path = os.path.join(out_folder, img_id + ".png")
    cv2.imwrite(new_img_path, res_img)
    # --- get label
    if build_labels:
        if os.path.isfile(img_dir + "/page/" + img_id + ".xml"):
            xml_path = img_dir + "/page/" + img_id + ".xml"
        else:
            # logger.critical('No xml found for file {}'.format(img_path))
            # --- TODO move to logger
            print("No xml found for file {}".format(img_path))
            raise Exception("Execution stop due Critical Errors")
        gt_data = pageData(xml_path)
        gt_data.parse()
        # --- build lines mask
        if ext_mode != "R":
            lin_mask = gt_data.build_baseline_mask(out_size, line_color,
                                                   line_width)
        # --- buid regions mask
        if ext_mode == "LR":
            reg_mask = gt_data.build_mask(out_size, node_types, classes)
            label = np.array((lin_mask, reg_mask))
        elif ext_mode == "R":
            label = gt_data.build_mask(out_size, node_types, classes)
        else:
            label = lin_mask

        new_label_path = os.path.join(out_folder, img_id + ".pickle")
        fh = open(new_label_path, "wb")
        pickle.dump(label, fh, -1)
        fh.close()
        return (new_img_path, new_label_path, xml_path)
    return (new_img_path, None, None)
コード例 #2
0
def main():
    """Build a PAGE-XML file from img encoded data"""
    in_path = sys.argv[1]
    out_path = sys.argv[2]
    nm = re.split(r"-", sys.argv[3])
    name_map = {}
    for k in nm:
        z, c = re.split(r":", k)
        name_map[z] = tuple(c.split(","))

    name_map = {"marginalia": [0, 0, 255]}
    formats = ["tif", "tiff", "png", "jpg", "jpeg", "JPG", "bmp"]
    img_paths = []
    for ext in formats:
        img_paths.extend(glob.glob(in_path + "/*." + ext))
    img_ids = [os.path.splitext(os.path.basename(x))[0] for x in img_paths]
    img_data = dict(zip(img_ids, img_paths))

    for img_id, img_p in img_data.items():
        page = pageData(os.path.join(out_path, img_id + ".xml"), logger=None)
        img_name = os.path.basename(img_p)
        # --- open file
        img = cv2.imread(img_p)
        rows, cols, _ = img.shape
        page.new_page(img_name, str(rows), str(cols))
        r_id = 0
        for zone_name, color in name_map.items():
            zones_img = np.all(img == color, axis=-1).astype(np.uint8)
            _, contours, hierarchy = cv2.findContours(zones_img,
                                                      cv2.RETR_EXTERNAL,
                                                      cv2.CHAIN_APPROX_SIMPLE)
            for cnt in contours:
                reg_coords = ""
                for x in cnt.reshape(-1, 2):
                    reg_coords = reg_coords + " {},{}".format(x[0], x[1])
                text_reg = page.add_element("TextRegion", str(r_id), zone_name,
                                            reg_coords.strip())
                r_id += 1
        page.save_xml()
コード例 #3
0
ファイル: imgprocess.py プロジェクト: fendaq/P2PaLA
    def gen_page(self,img_id,data, reg_list=None, out_folder='./',
                 approx_alg=None, num_segments=None):
        """
        """
        self.approx_alg = self.approx_alg if approx_alg==None else approx_alg
        self.num_segments = self.num_segments if num_segments==None else num_segments
        self.logger.debug('Gen PAGE for image: {}'.format(img_id))
        #--- sym link to original image 
        #--- TODO: check if orig image exist
        img_name = os.path.basename(self.img_data[img_id])
        symlink_force(os.path.realpath(self.img_data[img_id]),
                      os.path.join(out_folder,img_name))
        o_img = cv2.imread(self.img_data[img_id])
        (o_rows, o_cols, _) = o_img.shape
        o_max = max(o_rows,o_cols)
        o_min = min(o_rows,o_cols)
        cScale = np.array([o_cols/self.out_size[1],
                           o_rows/self.out_size[0]])
        
        page = pageData(os.path.join(out_folder, 'page', img_id + '.xml'),
                        logger=self.logger)
        self.hyp_xml_list.append(page.filepath)
        self.hyp_xml_list.sort()
        page.new_page(img_name, str(o_rows), str(o_cols)) 
        ####
        if self.out_type == 'C':
            if self.ext_mode == 'L':
                lines = data[0].astype(np.uint8) 
                reg_list= ['full_page']
                colors = {'full_page':0}
                r_data = np.zeros(lines.shape, dtype=np.uint8)
            elif self.ext_mode == 'R':
                r_data = data[0]
                lines = np.zeros(r_data.shape, dtype=np.uint8)
                colors = self.classes
            elif self.ext_mode == 'LR':
                lines = data[0].astype(np.uint8) 
                r_data = data[1]
                colors = self.classes
            else:
                pass
        elif self.out_type == 'R':
            if self.ext_mode == 'L':
                l_color = (-1 - ((self.line_color*(2/255))-1))/2
                lines = np.zeros(data[0].shape, dtype = np.uint8)
                lines[data[0] >= l_color] = 1 
                reg_list= ['full_page']
                colors = {'full_page':128}
                r_data = np.zeros(lines.shape, dtype=np.uint8)
            elif self.ext_mode == 'R':
                r_data = data[1]
                colors = self.classes
                lines = np.zeros(r_data.shape, dtype=np.uint8)
            elif self.ext_mode == 'LR':
                l_color = (-1 - ((self.line_color*(2/255))-1))/2
                lines = np.zeros(data[0].shape, dtype = np.uint8)
                lines[data[0] >= l_color] = 1 
                r_data = data[1]
                colors = self.classes
            else:
                pass   
        else:
            pass
        reg_mask = np.zeros(r_data.shape,dtype='uint8')
        lin_mask = np.zeros(lines.shape,dtype='uint8')
        r_id = 0
        kernel = np.ones((5,5),np.uint8)

        #--- get regions and lines for each class
        for reg in reg_list:
            r_color = colors[reg]
            #--- fill the array is faster then create a new one or mult by 0
            reg_mask.fill(0)
            if self.out_type == 'R':
                lim_inf = ((r_color - self.th_span)*(2/255)) - 1
                lim_sup = ((r_color + self.th_span)*(2/255)) - 1
                reg_mask[np.where((r_data > lim_inf) & (r_data < lim_sup))] = 1
            elif self.out_type == 'C':
                reg_mask[r_data == r_color] = 1
            else:
                pass

            _ , contours, hierarchy = cv2.findContours(reg_mask,
                                                   cv2.RETR_EXTERNAL,
                                                   cv2.CHAIN_APPROX_SIMPLE)
            for cnt in contours:
                #--- remove small objects
                if(cnt.shape[0] < 4):
                    continue
                if(cv2.contourArea(cnt) < 0.01*self.out_size[0]):
                    continue
                #--- get lines inside the region
                lin_mask.fill(0)
                rect = cv2.minAreaRect(cnt)
                #--- soft a bit the region to prevent spikes 
                epsilon = 0.005*cv2.arcLength(cnt,True)
                approx = cv2.approxPolyDP(cnt,epsilon,True)
                #box = np.array((rect[0][0], rect[0][1], rect[1][0], rect[1][1])).astype(int)
                r_id = r_id + 1
                approx= (approx*cScale).astype('int32')
                reg_coords = ''
                for x in approx.reshape(-1,2):
                    reg_coords = reg_coords + " {},{}".format(x[0],x[1])

                if not self.ext_mode == 'R':
                    cv2.fillConvexPoly(lin_mask,points=cnt, color=(1,1,1))
                    lin_mask = cv2.erode(lin_mask,kernel,iterations = 1)
                    lin_mask = cv2.dilate(lin_mask,kernel,iterations = 1)
                    reg_lines = lines * lin_mask
                    #--- search for the lines
                    _, l_cont, l_hier = cv2.findContours(reg_lines,
                                                  cv2.RETR_EXTERNAL,
                                                  cv2.CHAIN_APPROX_SIMPLE)
                    if (len(l_cont) == 0):
                        continue
                    #--- Add region to XML only is there is some line
                    text_reg = page.add_element('TextRegion',
                                            str(r_id),
                                            reg,
                                            reg_coords.strip())
                    n_lines = 0
                    for l_id,l_cnt in enumerate(l_cont):
                        if(l_cnt.shape[0] < 4):
                            continue
                        if (cv2.contourArea(l_cnt) < 0.01*self.out_size[0]):
                            continue
                        #--- convert to convexHull if poly is not convex
                        if (not cv2.isContourConvex(l_cnt)):
                            l_cnt = cv2.convexHull(l_cnt)
                        lin_coords = ''
                        l_cnt = (l_cnt*cScale).astype('int32')
                        for l_x in l_cnt.reshape(-1,2): 
                            lin_coords = lin_coords + " {},{}".format(l_x[0],l_x[1])
                        (is_line, approx_lin) = self._get_baseline(o_img, l_cnt)
                        if is_line == False:
                            continue
                        text_line = page.add_element('TextLine',
                                                 str(l_id) + '_' + str(r_id),
                                                 reg,
                                                 lin_coords.strip(),
                                                 parent=text_reg)
                        baseline = pa.points_to_str(approx_lin)
                        page.add_baseline(baseline, text_line)
                        n_lines += 1
                    #--- remove regions without text lines
                    if n_lines == 0:
                        page.remove_element(text_reg)
                else:
                    text_reg = page.add_element('TextRegion',
                                            str(r_id),
                                            reg,
                                            reg_coords.strip())

        page.save_xml()
コード例 #4
0
    def _extract_images_from_page(self, page_name):
        """
        Args:
            page_name (string): The name of the page to extract all the patches
            
        Returns:
        
            pandas dataframe: with the coordinates of the patches as well as the
                filename used to store the patch
        """
        logging.debug(f'page : {page_name}')

        x_arr_min = []
        x_arr_max = []
        y_arr_min = []
        y_arr_max = []
        filename_arr = []
        page_name_arr = []
        collumn_number = []
        patch_number = []
        MIN_LENGTH_OF_PATH = 50

        xml_path = os.path.join(self.xml_directory, page_name + ".xml")
        image_path = os.path.join(self.image_directory, page_name + ".jpg")

        #line detection

        def load_image(path: str):
            return cv2.imread(path)

        def show_image(img, title="") -> None:
            return
            #plt.figure(figsize=(10,15))
            #plt.imshow(img, cmap='Greys_r')
            #plt.title(title)
            #plt.show()

        def remove_connected_components(img, min_size=50):
            number_of_labels, labels, stats, centroids = cv2.connectedComponentsWithStats(
                img, 8, cv2.CV_32S)
            sizes = stats[
                1:,
                -1]  # first index represents the background and the last column the total area in pixels
            number_of_labels = number_of_labels - 1

            img_cleaned = np.full(img.shape, 0)
            for i in range(0, number_of_labels):
                if sizes[i] >= min_size:
                    img_cleaned[labels == i + 1] = 255
                else:
                    number_of_labels = number_of_labels - 1

            return img_cleaned

        def binarize_image(img, otsu=True):
            img_binarized = None
            if otsu:
                threshold, img_binarized = cv2.threshold(
                    img, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
            else:
                threshold, img_binarized = cv2.threshold(
                    img, 128, 255, cv2.THRESH_BINARY | cv2.THRESH_OTSU)
            return img_binarized

        def _get_kernel(theta) -> float:
            ksize = 31
            return cv2.getGaborKernel((ksize, ksize),
                                      4.0,
                                      theta,
                                      10.0,
                                      0.5,
                                      0,
                                      ktype=cv2.CV_32F)

        def filter_image(img, theta=np.pi):
            kernel = _get_kernel(theta)
            return cv2.filter2D(img, -1, kernel)

        def invert(img):
            return cv2.bitwise_not(img)

        #LOADING IMG
        connected_components_threshold = 600
        img_og = load_image(image_path)
        image = mpimg.imread(image_path)
        #LOADING PAGE
        page = xmlPAGE.pageData(xml_path)
        page.parse()
        imgBoundingBox = page.build_baseline_coordinates()

        img = cv2.cvtColor(img_og, cv2.COLOR_BGR2GRAY)
        show_image(img, "Original")
        #Finding Region of Intreset ROI:
        x_min_global = 6000
        x_max_global = -1
        y_min_global = 6000
        y_max_global = -1
        for i in imgBoundingBox:
            for j in i[0]:
                x_max_global = max(j[0][0], x_max_global)
                x_min_global = min(j[0][0], x_min_global)
                y_max_global = max(j[0][1], y_max_global)
                y_min_global = min(j[0][1], y_min_global)

        x_min_global = max(300, x_min_global)
        x_max_global = min(4000, x_max_global)

        def ROILineMask(img):
            connected_components_threshold = 500
            img_filtered = filter_image(img, theta=np.pi)
            img_vertical_binarized = binarize_image(img_filtered.copy())
            img_vertical_binarized = invert(img_vertical_binarized)
            img_cleaned_cc = remove_connected_components(
                img_vertical_binarized.copy(), connected_components_threshold)
            #show_image(img_cleaned_cc, "adapt threshold cc")
            img_gg_blur = cv2.blur(img_cleaned_cc, (20, 20))
            _, img_bin = cv2.threshold(np.float32(img_gg_blur), 20, 255,
                                       cv2.THRESH_BINARY)
            rows, cols = img_bin.shape
            M = np.float32([[1, 0, 0], [0, 1, -150]])
            dst = cv2.warpAffine(img_bin, M, (cols, rows))
            res = cv2.bitwise_or(img_bin, dst)
            #show_image(res, "translationOR")
            connected_components_threshold = 50000
            img_col = remove_connected_components(
                np.uint8(res).copy(), connected_components_threshold)
            show_image(img_col, "cleanup")
            return img_col

        y_min_global_margin = -100
        y_max_global_margin = 150

        logging.debug(
            f'ROILineMask input coordinates {y_min_global+y_min_global_margin}:{y_max_global+y_max_global_margin}, {x_min_global}:{x_max_global}'
        )

        img_col_ROI = ROILineMask(
            img[y_min_global + y_min_global_margin:y_max_global +
                y_max_global_margin, x_min_global:x_max_global])

        #img = img[y_min_global-100:y_max_global+150, x_min_global:x_max_global]
        #show_image(img, "ROI")

        def x_ogToROICoordinates(og_x):
            return max(0, og_x - x_min_global)

        def y_ogToROICoordinates(og_y):
            return og_y - (y_min_global + y_min_global_margin)

        #img_filtered = filter_image(img, theta=np.pi)
        #img_vertical_binarized = binarize_image(img_filtered.copy())
        #img_vertical_binarized = invert(img_vertical_binarized)
        #img_cleaned_cc = remove_connected_components(img_vertical_binarized.copy(), connected_components_threshold)
        #show_image(img_cleaned_cc, "cleaned")

        if not (os.path.isdir(self.save_directory)):
            os.mkdir(self.save_directory)

        patch_height_above = -100
        patch_height_below = 20

        for i in imgBoundingBox:
            x_min = 6000
            x_max = -1
            y_min = 6000
            y_max = -1
            for j in i[0]:
                x_max = max(j[0][0], x_max)
                x_min = min(j[0][0], x_min)
                y_max = max(j[0][1], y_max)
                y_min = min(j[0][1], y_min)
            og_patch_img = image[y_min + patch_height_above:y_max +
                                 patch_height_below, x_min:x_max]

            #show_image(og_patch_img, f'patch_{x_min}_{x_max}_{y_min}_{y_max}')
            #show_image(img_cleaned_cc[y_min+patch_height_above:y_max+patch_height_below, x_min:x_max], "vert lines")

            show_image(
                np.uint8(img_col_ROI)
                [y_ogToROICoordinates(y_min + patch_height_above):
                 y_ogToROICoordinates(y_max + patch_height_below), :], "Line")
            number_of_labels, labels, stats, centroids = cv2.connectedComponentsWithStats(
                np.uint8(img_col_ROI)
                [y_ogToROICoordinates(y_min + patch_height_above):
                 y_ogToROICoordinates(y_max + patch_height_below), :], 8,
                cv2.CV_32S)
            centroid_x_locations = centroids[1:, 0]
            centroid_x_locations.sort()
            #print(f"centroid x location : {centroid_x_locations}")

            #Locations of lines x-coord
            number_of_labels, labels, stats, centroids = cv2.connectedComponentsWithStats(
                np.uint8(img_col_ROI)
                [y_ogToROICoordinates(y_min + patch_height_above):
                 y_ogToROICoordinates(y_max + patch_height_below),
                 x_ogToROICoordinates(x_min):x_ogToROICoordinates(x_max)], 8,
                cv2.CV_32S)
            #print(f"centroidis raw location  = {centroids[:,0].astype(int)}")
            cut_locations = np.append(centroids[1:, 0].astype(int),
                                      [0, x_max - x_min],
                                      axis=0) + x_min - x_min_global
            cut_locations.sort()

            show_image(
                img_col_ROI[
                    y_ogToROICoordinates(y_min + patch_height_above):
                    y_ogToROICoordinates(y_max + patch_height_below),
                    x_ogToROICoordinates(x_min):x_ogToROICoordinates(x_max)],
                "SIMPLE_PATH")

            #print(f"{x_min_global=}, {x_min=}")
            #print(f"{cut_locations=}")
            for c in range(1, len(cut_locations[1:]) + 1):
                #print("patches")
                if (cut_locations[c] - cut_locations[c - 1] >
                        MIN_LENGTH_OF_PATH):
                    locationOfPatch = cut_locations[
                        c - 1] + (cut_locations[c] - cut_locations[c - 1]) / 2
                    #print(f"location of patch={locationOfPatch}")
                    col_location = np.searchsorted(centroid_x_locations,
                                                   locationOfPatch)

                    x_min_temp = cut_locations[c - 1] + x_min_global
                    x_max_temp = cut_locations[c] + x_min_global
                    y_min_temp = y_min + patch_height_above
                    y_max_temp = y_max + patch_height_below

                    output_img = image[y_min_temp:y_max_temp,
                                       x_min_temp:x_max_temp]

                    x_arr_max.append(x_max_temp)
                    x_arr_min.append(x_min_temp)
                    y_arr_min.append(y_min_temp)
                    y_arr_max.append(y_max_temp)
                    collumn_number.append(col_location)
                    patch_number.append(c)
                    filename = f'{page_name}_{x_min_temp}_{x_max_temp}_{y_min_temp}_{y_max_temp}_{c}_{col_location}.jpg'
                    filename_arr.append(filename)
                    page_name_arr.append(page_name)

                    plt.imsave(os.path.join(self.save_directory, filename),
                               output_img)

        return pd.DataFrame({
            "x_min": x_arr_min,
            "x_max": x_arr_max,
            "y_min": y_arr_min,
            "y_max": y_arr_max,
            "collumn_number": collumn_number,
            "patch_number": patch_number,
            "filename": filename_arr,
            "page_name": page_name_arr
        })
コード例 #5
0
ファイル: imgprocess.py プロジェクト: rvankoert/P2PaLA
    def gen_page(
        self,
        img_id,
        data,
        reg_list=None,
        out_folder="./",
        approx_alg=None,
        num_segments=None,
    ):
        """
        """
        self.approx_alg = self.opts.approx_alg if approx_alg == None else approx_alg
        self.num_segments = (
            self.opts.num_segments if num_segments == None else num_segments
        )
        self.logger.debug("Gen PAGE for image: {}".format(img_id))
        # --- sym link to original image
        # --- TODO: check if orig image exist
        img_name = os.path.basename(self.img_data[img_id])
        symlink_force(
            os.path.realpath(self.img_data[img_id]), os.path.join(out_folder, img_name)
        )
        o_img = cv2.imread(self.img_data[img_id])
        (o_rows, o_cols, _) = o_img.shape
        o_max = max(o_rows, o_cols)
        o_min = min(o_rows, o_cols)
        cScale = np.array(
            [o_cols / self.opts.img_size[1], o_rows / self.opts.img_size[0]]
        )

        page = pageData(
            os.path.join(out_folder, "page", img_id + ".xml"), logger=self.logger
        )
        self.hyp_xml_list.append(page.filepath)
        self.hyp_xml_list.sort()
        page.new_page(img_name, str(o_rows), str(o_cols))
        ####
        if self.opts.net_out_type == "C":
            if self.opts.out_mode == "L":
                lines = data[0].astype(np.uint8)
                reg_list = ["full_page"]
                colors = {"full_page": 0}
                r_data = np.zeros(lines.shape, dtype=np.uint8)
            elif self.opts.out_mode == "R":
                r_data = data[0]
                lines = np.zeros(r_data.shape, dtype=np.uint8)
                colors = self.opts.regions_colors
            elif self.opts.out_mode == "LR":
                lines = data[0].astype(np.uint8)
                #lines = data[0]
                r_data = data[1]
                colors = self.opts.regions_colors
            else:
                pass
        elif self.opts.net_out_type == "R":
            if self.opts.out_mode == "L":
                l_color = (-1 - ((self.line_color * (2 / 255)) - 1)) / 2
                lines = np.zeros(data[0].shape, dtype=np.uint8)
                lines[data[0] >= l_color] = 1
                reg_list = ["full_page"]
                colors = {"full_page": 128}
                r_data = np.zeros(lines.shape, dtype=np.uint8)
            elif self.opts.out_mode == "R":
                r_data = data[1]
                colors = self.opts.regions_colors
                lines = np.zeros(r_data.shape, dtype=np.uint8)
            elif self.opts.out_mode == "LR":
                l_color = (-1 - ((self.line_color * (2 / 255)) - 1)) / 2
                lines = np.zeros(data[0].shape, dtype=np.uint8)
                lines[data[0] >= l_color] = 1
                r_data = data[1]
                colors = self.opts.regions_colors
            else:
                pass
        else:
            pass
        reg_mask = np.zeros(r_data.shape, dtype="uint8")
        lin_mask = np.zeros(lines.shape, dtype="uint8")
        r_id = 0
        kernel = np.ones((5, 5), np.uint8)
        if self.opts.line_alg == 'external' and self.opts.net_out_type != "R":
            cv2.imwrite(os.path.join(out_folder, "page", img_id + ".png"),
                cv2.resize(np.abs(lines-1).astype(np.uint8)*255,
                (o_cols,o_rows),
                interpolation = cv2.INTER_NEAREST) )

        # --- get regions and lines for each class
        for reg in reg_list:
            r_color = colors[reg]
            r_type = self.opts.region_types[reg]

            # --- fill the array is faster then create a new one or mult by 0
            reg_mask.fill(0)
            if self.opts.net_out_type == "R":
                lim_inf = ((r_color - self.th_span) * (2 / 255)) - 1
                lim_sup = ((r_color + self.th_span) * (2 / 255)) - 1
                reg_mask[np.where((r_data > lim_inf) & (r_data < lim_sup))] = 1
            elif self.opts.net_out_type == "C":
                reg_mask[r_data == r_color] = 1
            else:
                pass

            #_, contours, hierarchy = cv2.findContours(
            #    reg_mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE
            #)
            
            res_ = cv2.findContours(
                reg_mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE
            )
            
            if len(res_) == 2:
                contours, hierarchy = res_
            else:
                _, contours, hierarchy = res_
            
            for cnt in contours:
                # --- remove small objects
                if cnt.shape[0] < 4:
                    continue
                if cv2.contourArea(cnt) < self.opts.min_area * self.opts.img_size[0]:
                    continue

                rect = cv2.minAreaRect(cnt)
                # --- soft a bit the region to prevent spikes
                epsilon = 0.005 * cv2.arcLength(cnt, True)
                approx = cv2.approxPolyDP(cnt, epsilon, True)
                # box = np.array((rect[0][0], rect[0][1], rect[1][0], rect[1][1])).astype(int)
                r_id = r_id + 1
                approx = (approx * cScale).astype("int32")
                reg_coords = ""
                for x in approx.reshape(-1, 2):
                    reg_coords = reg_coords + " {},{}".format(x[0], x[1])

                if (
                    self.opts.nontext_regions == None
                    or reg not in self.opts.nontext_regions
                ):
                    # --- get lines inside the region
                    lin_mask.fill(0)

                    if not self.opts.out_mode == "R" and self.opts.line_alg != "external":
                        cv2.fillConvexPoly(lin_mask, points=cnt, color=(1, 1, 1))
                        lin_mask = cv2.erode(lin_mask, kernel, iterations=1)
                        lin_mask = cv2.dilate(lin_mask, kernel, iterations=1)
                        reg_lines = lines * lin_mask
                        # --- search for the lines
                        #_, l_cont, l_hier = cv2.findContours(
                        #    reg_lines, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE
                        #)
                        
                        resl_ = cv2.findContours(
                            reg_lines, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE
                        )
                        
                        if len(resl_) ==2:
                            l_cont, l_hier = resl_
                        else:
                            _, l_cont, l_hier = resl_
                        
                        
                        if len(l_cont) == 0:
                            continue
                        # --- Add region to XML only is there is some line
                        uuid = ''.join(random.choice(self.validValues) for _ in range(4))
                        text_reg = page.add_element(
                            r_type, "r" + uuid + "_" +str(r_id), reg, reg_coords.strip()
                        )
                        n_lines = 0
                        for l_id, l_cnt in enumerate(l_cont):
                            if l_cnt.shape[0] < 4:
                                continue
                            if cv2.contourArea(l_cnt) < 0.01 * self.opts.img_size[0]:
                                continue
                            # --- convert to convexHull if poly is not convex
                            if not cv2.isContourConvex(l_cnt):
                                l_cnt = cv2.convexHull(l_cnt)
                            lin_coords = ""
                            l_cnt = (l_cnt * cScale).astype("int32")
                            (is_line, approx_lin) = self._get_baseline(o_img, l_cnt)
                            if is_line == False:
                                continue
                            is_line, l_cnt = build_baseline_offset(
                                approx_lin, offset=self.opts.line_offset
                            )
                            if is_line == False:
                                continue
                            for l_x in l_cnt.reshape(-1, 2):
                                lin_coords = lin_coords + " {},{}".format(
                                    l_x[0], l_x[1]
                                )
                            uuid = ''.join(random.choice(self.validValues) for _ in range(4))
                            text_line = page.add_element(
                                "TextLine",
                                "l" + uuid + "_" + str(l_id),
                                reg,
                                lin_coords.strip(),
                                parent=text_reg,
                            )
                            baseline = pa.points_to_str(approx_lin)
                            page.add_baseline(baseline, text_line)
                            n_lines += 1
                        # --- remove regions without text lines
                        if n_lines == 0:
                            page.remove_element(text_reg)
                    else:
                        uuid = ''.join(random.choice(self.validValues) for _ in range(4))
                        text_reg = page.add_element(
                            r_type, "r" + uuid + "_" + str(r_id), reg, reg_coords.strip()
                        )
                else:
                    uuid = ''.join(random.choice(self.validValues) for _ in range(4))
                    text_reg = page.add_element(
                        r_type, "r" + uuid + "_" + str(r_id), reg, reg_coords.strip()
                    )

        page.save_xml()
コード例 #6
0
    def gen_page(self, in_img_path, line_mask, id):
        in_img = cv2.imread(in_img_path)
        (in_img_rows, in_img_cols, _) = in_img.shape
        # print('line_mask.shape:', line_mask.shape)

        cScale = np.array([
            in_img_cols / line_mask.shape[1], in_img_rows / line_mask.shape[0]
        ])

        page = pageData(os.path.join('out', id + ".xml"))
        page.new_page(os.path.basename(in_img_path), str(in_img_rows),
                      str(in_img_cols))

        kernel = np.ones((5, 5), np.uint8)
        validValues = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789'

        lines = line_mask.copy()
        lines[line_mask > 0.1] = 1
        lines = lines.astype(np.uint8)

        # plt.axis("off")
        # plt.imshow(lines, cmap='gray')
        # plt.show()

        r_id = 0
        lin_mask = np.zeros(line_mask.shape, dtype="uint8")

        reg_mask = np.ones(line_mask.shape, dtype="uint8")
        res_ = cv2.findContours(reg_mask, cv2.RETR_EXTERNAL,
                                cv2.CHAIN_APPROX_SIMPLE)

        if len(res_) == 2:
            contours, hierarchy = res_
        else:
            _, contours, hierarchy = res_

        for cnt in contours:
            min_area = 0.01
            # --- remove small objects
            if cnt.shape[0] < 4:
                continue
            if cv2.contourArea(cnt) < min_area * line_mask.shape[0]:
                continue

            rect = cv2.minAreaRect(cnt)
            # --- soft a bit the region to prevent spikes
            epsilon = 0.005 * cv2.arcLength(cnt, True)
            approx = cv2.approxPolyDP(cnt, epsilon, True)
            # box = np.array((rect[0][0], rect[0][1], rect[1][0], rect[1][1])).astype(int)
            r_id = r_id + 1
            approx = (approx * cScale).astype("int32")
            reg_coords = ""
            for x in approx.reshape(-1, 2):
                reg_coords = reg_coords + " {},{}".format(x[0], x[1])

            cv2.fillConvexPoly(lin_mask, points=cnt, color=(1, 1, 1))
            lin_mask = cv2.erode(lin_mask, kernel, iterations=1)
            lin_mask = cv2.dilate(lin_mask, kernel, iterations=1)
            reg_lines = lines * lin_mask

            resl_ = cv2.findContours(reg_lines, cv2.RETR_EXTERNAL,
                                     cv2.CHAIN_APPROX_SIMPLE)

            if len(resl_) == 2:
                l_cont, l_hier = resl_
            else:
                _, l_cont, l_hier = resl_

            # IMPORTANT l_cont, l_hier
            if len(l_cont) == 0:
                continue

            # --- Add region to XML only is there is some line
            uuid = ''.join(random.choice(validValues) for _ in range(4))
            text_reg = page.add_element('TextRegion',
                                        "r" + uuid + "_" + str(r_id),
                                        'full_page', reg_coords.strip())
            n_lines = 0
            for l_id, l_cnt in enumerate(l_cont):
                if l_cnt.shape[0] < 4:
                    continue
                if cv2.contourArea(l_cnt) < 0.01 * line_mask.shape[0]:
                    continue
                # --- convert to convexHull if poly is not convex
                if not cv2.isContourConvex(l_cnt):
                    l_cnt = cv2.convexHull(l_cnt)
                lin_coords = ""
                l_cnt = (l_cnt * cScale).astype("int32")
                # IMPORTANT
                (is_line, approx_lin) = self._get_baseline(in_img, l_cnt)

                if is_line == False:
                    continue

                is_line, l_cnt = build_baseline_offset(approx_lin, offset=50)
                if is_line == False:
                    continue
                for l_x in l_cnt.reshape(-1, 2):
                    lin_coords = lin_coords + " {},{}".format(l_x[0], l_x[1])
                uuid = ''.join(random.choice(validValues) for _ in range(4))
                text_line = page.add_element(
                    "TextLine",
                    "l" + uuid + "_" + str(l_id),
                    'full_page',
                    lin_coords.strip(),
                    parent=text_reg,
                )
                # IMPORTANT
                baseline = pa.points_to_str(approx_lin)
                page.add_baseline(baseline, text_line)
                n_lines += 1
        page.save_xml()
コード例 #7
0
def zone_map(hyp, target, img, alpha=0, dist=lambda r,h:int(r==h)):
    """
    Computes ZoneMap metric from:
    O. Galibert, J. Kahn and I. Oparin, The zonemap metric for page 
    segmentation and area classification in scanned documents, 2014 
    IEEE International Conference on Image Processing (ICIP), Paris, 
    2014, pp. 2594-2598.
    Inputs: 
        hyp: hypoteses data (PAGE-XML object)
        target: reference data (PAGE-XML object)
        img: pointer to image file. If img is not binary img=Otsu(img) 
        alpha: classification error weigth [0,1]
        dist: difference function between zones [0,1], default:
            dist(h,r)=lambda(0 if h==r; 1 else)
    """
    if os.path.isfile(img):
        img = cv2.imread(img,0)
    if np.max(img) > 1:
        _, img = cv2.threshold(img, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
    img = np.abs(1-img).astype(np.uint8)

    hyp_data = pageData(hyp)
    hyp_data.parse()
    tar_data = pageData(target)
    tar_data.parse()
    Hzones = hyp_data.get_zones(["TextRegion"])
    Rzones = tar_data.get_zones(["TextRegion"])
    #--- Mapping
    tmp_H = np.zeros(img.shape,dtype=np.uint8)
    tmp_R = np.zeros(img.shape,dtype=np.uint8)
    force = np.zeros((len(Rzones),len(Hzones)))
    area_R = 0
    for i,R in enumerate(sorted(Rzones.keys())):
        tmp_R.fill(0)
        cv2.fillConvexPoly(tmp_R, Rzones[R]['coords'].astype(np.int), 1)
        Rzones[R]['area'] = np.logical_and(img==1, img==tmp_R).sum()
        area_R += Rzones[R]['area']
        for j,H in enumerate(sorted(Hzones.keys())):
            tmp_H.fill(0)
            cv2.fillConvexPoly(tmp_H, Hzones[H]['coords'].astype(np.int),1)
            #--- Intersection 
            I = img[np.where(np.logical_and(tmp_R == tmp_H,tmp_R ==1))].sum()
            #I = img[np.where(tmp_R == tmp_H)].sum()
            Hzones[H]['area'] = np.logical_and(tmp_H==1, img==1).sum()
            force[i][j] = I*((1/Rzones[R]['area'])+(1/Hzones[H]['area']))

    print(force)
    #--- get and sort non-zero links
    nz = force != 0
    s_index = np.unravel_index(np.where(nz, force, np.nan).argsort(axis=None)[:nz.sum()],force.shape)
    s_index= (s_index[0][::-1], s_index[1][::-1])
    #--- make groups
    #--- Manage False Alarm and Miss groups
    #---    False Alarm:
    fa = np.where(force.sum(axis=0)==0)
    g_id = 1
    G = {}
    for f in fa[0]:
        G[g_id] = ([],[f])
        #G[g_id] = ([],[Hzones[f]['id']])
        g_id += 1
    #---    Miss
    mi = np.where(force.sum(axis=1)==0)
    L = (np.zeros(len(Rzones), dtype=np.uint8),np.zeros(len(Hzones),dtype=np.uint8))
    for m in mi[0]:
        G[g_id] = ([m],[])
        #G[g_id] = ([Rzones[m]['id']],[])
        g_id += 1
    for r_ix,h_ix in zip(s_index[0],s_index[1]):
        print("H:{} R:{} L:{}".format(Hzones[h_ix]['id'],Rzones[r_ix]['id'],force[r_ix,h_ix]))
        if L[0][r_ix] > 0 and L[1][h_ix] > 0:
            #--- do not add to any group
            pass
        elif L[0][r_ix] == 0 and L[1][h_ix] ==0:
            #--- create new group
            G[g_id] = ([r_ix],([h_ix]))
            #G[g_id] = ([Rzones[r_ix]['id']],[Hzones[h_ix]['id']])
            L[0][r_ix] = g_id
            L[1][h_ix] = g_id
            g_id += 1
        elif L[0][r_ix] == 0:
            #--- only ref is not assigned
            p_id = L[1][h_ix]
            G[p_id][0].append(r_ix)
            #G[p_id][0].append(Rzones[r_ix]['id'])
            L[0][r_ix] = p_id
            
        else:
            #--- only hyp is not assigned
            p_id = L[0][r_ix]
            G[p_id][1].append(h_ix)
            #G[p_id][1].append(Hzones[h_ix]['id'])
            L[1][h_ix] = p_id
    print(G)
    #--- Second stage, error calcualtion 
    Ezm = 0
    for gr_key in G.keys():
        #--- handle false alarm:
        if len(G[gr_key][0]) == 0:
            Es = Hzones[G[gr_key][1][0]]['area']
            #--- Ec = Es, E=(1-a)Es + aEc == Ec 
            Ezm += Es
        #--- handle miss:
        elif len(G[gr_key][1]) == 0:
            Es = Rzones[G[gr_key][0][0]]['area']
            Ezm += Es
        #--- handle match
        elif len(G[gr_key][0]) == 1 and len(G[gr_key][1]) == 1:
            tmp_R.fill(0)
            cv2.fillConvexPoly(tmp_R, Rzones[G[gr_key][0][0]]['coords'].astype(np.int), 1)
            tmp_H.fill(0)
            cv2.fillConvexPoly(tmp_H, Hzones[G[gr_key][1][0]]['coords'].astype(np.int), 1)
            reg_intersection = (img*tmp_R)[np.where(tmp_R == tmp_H)].sum()
            Es = Rzones[G[gr_key][0][0]]['area'] + Hzones[G[gr_key][1][0]]['area'] - \
                    (2* reg_intersection)
            Ec = (dist(Rzones[G[gr_key][0][0]]['type'],Hzones[G[gr_key][1][0]]['type']) * reg_intersection) + Es
            Ezm += ((1-alpha)*Es)+(alpha*Ec)
        #--- handle split
        elif len(G[gr_key][0]) == 1 and len(G[gr_key][1]) > 1:
            tmp_R.fill(0)
            cv2.fillConvexPoly(tmp_R, Rzones[G[gr_key][0][0]]['coords'].astype(np.int), 1)
            tmp_H.fill(0)
            tmp_H_o = tmp_H.copy()
            for i,h in enumerate(G[gr_key][1]):
                print(h)
                cv2.fillConvexPoly(tmp_H, Hzones[h]['coords'].astype(np.int), 1 + i)
                cv2.fillConvexPoly(tmp_H_o, Hzones[h]['coords'].astype(np.int), 1)
            #--- handle sub-zones by configuration
            #--- Miss Error i.e. in R but not in any H
            #--- E = Area(z)
            Er = (img*tmp_R)[np.where(tmp_R != tmp_H_o)].sum()
            #--- False detection error i.e. H but not in R
            Er += (img*tmp_H_o)[np.where(tmp_R != tmp_H_o)].sum()
            #--- Segmentation error and Correct parts
            Ec = 0 



        #--- handle merge
        elif len(G[gr_key][0]) > 1 and len(G[gr_key][1]) == 1:
            pass


    print(Ezm/area_R)
コード例 #8
0
def compute_metrics(hyp, target, opts, logger=None):
    """
    """
    #logger = logging.getLogger(__name__) if logger==None else logger
    if logger == None:
        logger = logging.getLogger(__name__)
        logger.setLevel(logging.DEBUG)
        ch = logging.StreamHandler()
        ch.setLevel(logging.DEBUG)
        formatter = logging.Formatter(
            '%(asctime)s - %(module)s - %(levelname)s - %(message)s')
        ch.setFormatter(formatter)
        logger.addHandler(ch)

    num_samples = len(target)
    metrics = {}
    #--- force dict to appear in the same order always
    summary = OrderedDict()
    if opts.out_mode == 'L' or opts.out_mode == 'LR':
        metrics.update({
            'p_bl': np.empty(num_samples, dtype=np.float),
            'r_bl': np.empty(num_samples, dtype=np.float),
            'f1_bl': np.empty(num_samples, dtype=np.float)
        })
        t_file = '/tmp/'

        #--- sufix must be added because for some extrange reason
        #--- Transkribus tool need it
        t_fd, t_path = tempfile.mkstemp(suffix='.lst')
        h_fd, h_path = tempfile.mkstemp(suffix='.lst')
        try:
            with os.fdopen(t_fd, 'w') as tmp:
                tmp.write('\n'.join(target))
            with os.fdopen(h_fd, 'w') as tmp:
                tmp.write('\n'.join(hyp))
            evaltool = os.path.dirname(__file__) + '/baselineEvaluator.jar'
            cmd = subprocess.Popen(
                ['java', '-jar', evaltool, '-no_s', t_path, h_path],
                stdout=subprocess.PIPE,
                stderr=subprocess.PIPE)
            bl_results, _err = cmd.communicate()
            logger.debug(_err)
        finally:
            os.remove(t_path)
            os.remove(h_path)

        logger.info("-" * 10 + "BASELINE EVALUATION RESULTS" + "-" * 10)
        logger.debug(bl_results)
        bl_results = bl_results.split('\n')
        for i in range(num_samples):
            res = bl_results[17 + i].split(',')
            metrics['p_bl'][i] = float(res[0])
            metrics['r_bl'][i] = float(res[1])
            metrics['f1_bl'][i] = float(res[2])
        logger.info(bl_results[-6])
        logger.info(bl_results[-5])
        logger.info(bl_results[-4])
        summary['p_bl'] = bl_results[-6]
        summary['r_bl'] = bl_results[-5]
        summary['f1_bl'] = bl_results[-4]

    if opts.out_mode == 'R' or opts.out_mode == 'LR':
        metrics.update({
            'p_acc': np.empty(num_samples, dtype=np.float),
            'm_acc': np.empty(num_samples, dtype=np.float),
            'm_iu': np.empty(num_samples, dtype=np.float),
            'f_iu': np.empty(num_samples, dtype=np.float)
        })
        per_class_m = np.zeros(
            (num_samples, np.unique(opts.regions_colors.values()).size + 1),
            dtype=np.float)
        logger.info("-" * 10 + "REGIONS EVALUATION RESULTS" + "-" * 10)
        logger.debug("p_cc,m_acc,m_iu,f_iu,target_file,hyp_file")
        hyp = np.sort(hyp)
        target = np.sort(target)
        for i, (h, t) in enumerate(zip(hyp, target)):
            target_data = pageData(t)
            target_data.parse()
            img_size = np.array(target_data.get_size())
            target_mask = target_data.build_mask(img_size, 'TextRegion',
                                                 opts.regions_colors)
            hyp_data = pageData(h)
            hyp_data.parse()
            hyp_mask = hyp_data.build_mask(img_size, 'TextRegion',
                                           opts.regions_colors)

            metrics['p_acc'][i] = ev.pixel_accuraccy(hyp_mask, target_mask)
            metrics['m_acc'][i] = ev.mean_accuraccy(hyp_mask, target_mask)
            metrics['m_iu'][i] = ev.mean_IU(hyp_mask, target_mask)
            metrics['f_iu'][i] = ev.freq_weighted_IU(hyp_mask, target_mask)
            tmp = ev.per_class_accuraccy(hyp_mask, target_mask)
            for m, c in zip(tmp[0], tmp[1]):
                per_class_m[i, c] = m

            #per_class_m['pc_p_acc'][i] = ev.per_class_accuraccy(hyp_mask,target_mask)
            #per_class_m += ev.per_class_accuraccy(hyp_mask,target_mask)[0]
            logger.debug('{:.4f},{:.4f},{:.4f} {:.4f},{},{}'.format(
                metrics['p_acc'][i], metrics['m_acc'][i], metrics['m_iu'][i],
                metrics['f_iu'][i], t, h))

        #t_polygons = target_data.get_polygons('TextRegion')
        #h_polygons = hyp_data.get_polygons('TextRegion')
        #metrics['m_struct'][i] = ev.matching_structure(h_polygons,t_polygons)
        #ev.matching_structure(h_polygons,t_polygons)

        summary.update({m: metrics[m].sum() / num_samples for m in metrics})
        logger.info("Pixel accuracy:  {}".format(summary['p_acc']))
        logger.info("Mean accuracy:   {}".format(summary['m_acc']))
        logger.info("Mean IU:          {}".format(summary['m_iu']))
        logger.info("freq weighted IU: {}".format(summary['f_iu']))
        logger.info("Per_class Pixel accuracy: {}".format(
            per_class_m.sum(axis=0) / num_samples))
        mm = per_class_m.sum(axis=0) / num_samples
        #print("BG:{}".format(mm[0]))
        #for n,c in opts.regions_colors.items():
        #    print("{}:{}".format(n,mm[c]))
    #--- return averages only
    return summary