Ejemplo n.º 1
0
def main():
    c.clear_temp()

    img = dataset_manager.get_training_image()
    recognizer = captcha_recognizer.CaptchaRecognizer()
    mpimg.imsave(c.temp_path('00.origin.png'), img)

    # 1
    img_01 = time_func(
        'remove_noise_with_hsv',
        lambda: recognizer.remove_noise_with_hsv(img)
    )
    mpimg.imsave(c.temp_path('01.hsv.png'), img_01, cmap=cm_greys)

    # 2
    img_02 = time_func(
        'remove_noise_with_neighbors',
        lambda: repeat(recognizer.remove_noise_with_neighbors, 2)(img_01)
    )
    mpimg.imsave(c.temp_path('02.neighbor.png'), img_02, cmap=cm_greys)

    img_03a = time_func(
        'skeletonize',
        lambda: morph.skeletonize(img_02)
    )
    mpimg.imsave(c.temp_path('03a.skeleton.png'), img_03a, cmap=cm_greys)
    def partition(self, img, save_intermediate=False, verbose=False, force_partition=True):
        weak_confidence = 0
        if save_intermediate:
            mpimg.imsave(c.temp_path("00.origin.png"), img)

        # step 1
        # remove noise with hsv
        img_01 = time_func("remove_noise_with_hsv" if verbose else None, lambda: self.remove_noise_with_hsv(img))
        if save_intermediate:
            mpimg.imsave(c.temp_path("01.hsv.png"), img_01, cmap=cm_greys)

        # step 2
        # remove noise with neighbors
        img_02 = time_func(
            "remove_noise_with_neighbors" if verbose else None,
            lambda: repeat(self.remove_noise_with_neighbors, 2)(img_01),
        )
        if save_intermediate:
            mpimg.imsave(c.temp_path("02.neighbor.png"), img_02, cmap=cm_greys)

        # step 3
        # partition stage 1
        labels, object_slices = time_func(
            "segment_with_label" if verbose else None, lambda: self.segment_with_label(img_02)
        )
        if verbose:
            print("{} connected components found".format(len(object_slices)))
        if save_intermediate:
            mpimg.imsave(c.temp_path("03.00000.png"), labels)

        # step 4
        # Arrange the segments from left to right and probably partition stage 2
        xmin_arr = np.array([s[1].start for s in object_slices])
        sort_index = xmin_arr.argsort()
        char_images = []
        for i in list(sort_index):
            char_image = img_02.copy()
            char_image[labels != i + 1] = 0
            char_image = char_image[object_slices[i]]
            char_images.append(char_image)
        if force_partition and len(char_images) == self.character_num - 1:
            weak_confidence = 1
            char_images = self.force_partition(char_images)

        # step 5
        # Check if segmentation was successful and get characters
        if len(char_images) == self.character_num:
            shapes = np.array(list(map(np.shape, char_images)))
            heights, widths = shapes[:, 0], shapes[:, 1]
            if verbose:
                print("Heights {}".format(heights))
                print("Widths {}".format(widths))
            # noinspection PyTypeChecker
            if (
                np.all(heights >= self.char_height_min)
                and np.all(heights <= self.char_height_max)
                and np.all(widths >= self.char_width_min)
                and np.all(widths <= self.char_width_max)
            ):

                if save_intermediate:
                    for i in range(len(char_images)):
                        mpimg.imsave(c.temp_path("03.char.{}.png".format(i + 1)), char_images[i], cmap=cm_greys)
                return char_images, weak_confidence

        if verbose:
            print("Warning: partition failed!")
        return None, weak_confidence
Ejemplo n.º 3
0
    def partition(self, img, save_intermediate=False, verbose=False,
                  force_partition=True):
        weak_confidence = 0
        if save_intermediate:
            mpimg.imsave(c.temp_path('00.origin.png'), img)

        # step 1
        # remove noise with hsv
        img_01 = time_func(
            'remove_noise_with_hsv' if verbose else None,
            lambda: self.remove_noise_with_hsv(img)
        )
        if save_intermediate:
            mpimg.imsave(c.temp_path('01.hsv.png'), img_01, cmap=cm_greys)

        # step 2
        # remove noise with neighbors
        img_02 = time_func(
            'remove_noise_with_neighbors' if verbose else None,
            lambda: repeat(self.remove_noise_with_neighbors, 2)(img_01)
        )
        if save_intermediate:
            mpimg.imsave(c.temp_path('02.neighbor.png'), img_02, cmap=cm_greys)

        # step 3
        # partition stage 1
        labels, object_slices = time_func(
            'segment_with_label' if verbose else None,
            lambda: self.segment_with_label(img_02)
        )
        if verbose:
            print('{} connected components found'.format(len(object_slices)))
        if save_intermediate:
            mpimg.imsave(c.temp_path('03.00000.png'), labels)

        # step 4
        # Arrange the segments from left to right and probably partition stage 2
        xmin_arr = np.array([s[1].start for s in object_slices])
        sort_index = xmin_arr.argsort()
        char_images = []
        for i in list(sort_index):
            char_image = img_02.copy()
            char_image[labels != i + 1] = 0
            char_image = char_image[object_slices[i]]
            char_images.append(char_image)
        if force_partition and len(char_images) == self.character_num - 1:
            weak_confidence = 1
            char_images = self.force_partition(char_images)

        # step 5
        # Check if segmentation was successful and get characters
        if len(char_images) == self.character_num:
            shapes = np.array(list(map(np.shape, char_images)))
            heights, widths = shapes[:, 0], shapes[:, 1]
            if verbose:
                print('Heights {}'.format(heights))
                print('Widths {}'.format(widths))
            # noinspection PyTypeChecker
            if (np.all(heights >= self.char_height_min) and
                    np.all(heights <= self.char_height_max) and
                    np.all(widths >= self.char_width_min) and
                    np.all(widths <= self.char_width_max)):

                if save_intermediate:
                    for i in range(len(char_images)):
                        mpimg.imsave(
                            c.temp_path('03.char.{}.png'.format(i + 1)),
                            char_images[i], cmap=cm_greys)
                return char_images, weak_confidence

        if verbose:
            print('Warning: partition failed!')
        return None, weak_confidence
    def partition(self, img, save_intermediate=False, verbose=False):
        if save_intermediate:
            mpimg.imsave(c.temp_path('00.origin.png'), img)

        # 1
        img_01 = time_func(
            'remove_noise_with_hsv' if verbose else None,
            lambda: self.remove_noise_with_hsv(img)
        )
        if save_intermediate:
            mpimg.imsave(c.temp_path('01.hsv.png'), img_01, cmap=cm_greys)

        # 2
        img_02 = time_func(
            'remove_noise_with_neighbors' if verbose else None,
            lambda: repeat(self.remove_noise_with_neighbors, 2)(img_01)
        )
        if save_intermediate:
            mpimg.imsave(c.temp_path('02.neighbor.png'), img_02, cmap=cm_greys)

        # 3
        labels, object_slices = time_func(
            'segment_with_label' if verbose else None,
            lambda: self.segment_with_label(img_02)
        )
        if verbose:
            print('{} connected components found'.format(len(object_slices)))
        if save_intermediate:
            mpimg.imsave(c.temp_path('03.00000.png'), labels)
        # Arrange the segments from left to right
        xmin_arr = np.array([s[1].start for s in object_slices])
        sort_index = xmin_arr.argsort()
        char_images = []
        # noinspection PyTypeChecker
        for i in sort_index:
            char_image = img_02.copy()
            char_image[labels != i + 1] = 0
            char_image = char_image[object_slices[i]]
            char_images.append(char_image)

        # Check if segmentation was successful
        if len(char_images) == self.character_num:
            shapes = np.array(list(map(np.shape, char_images)))
            heights, widths = shapes[:, 0], shapes[:, 1]
            if verbose:
                print('Heights {}'.format(heights))
                print('Widths {}'.format(widths))
            # noinspection PyTypeChecker
            if (np.all(heights >= self.char_height_min) and
                    np.all(heights <= self.char_height_max) and
                    np.all(widths >= self.char_width_min) and
                    np.all(widths <= self.char_width_max)):

                if save_intermediate:
                    for i in range(len(char_images)):
                        mpimg.imsave(
                            c.temp_path('03.char.{}.png'.format(i + 1)),
                            char_images[i], cmap=cm_greys)
                return char_images
        if verbose:
            print('Warning: partition failed!')
        return None