Example #1
0
    def readtext(self, image, decoder = 'greedy', beamWidth= 5, batch_size = 1,\
                 workers = 0, allowlist = None, blocklist = None, detail = 1,\
                 paragraph = False, min_size = 20,\
                 contrast_ths = 0.1,adjust_contrast = 0.5, filter_ths = 0.003,\
                 text_threshold = 0.7, low_text = 0.4, link_threshold = 0.4,\
                 canvas_size = 2560, mag_ratio = 1.,\
                 slope_ths = 0.1, ycenter_ths = 0.5, height_ths = 0.5,\
                 width_ths = 0.5, add_margin = 0.1):
        '''
        Parameters:
        image: file path or numpy-array or a byte stream object
        '''
        img, img_cv_grey = reformat_input(image)

        horizontal_list, free_list = self.detect(img, min_size, text_threshold,\
                                                 low_text, link_threshold,\
                                                 canvas_size, mag_ratio,\
                                                 slope_ths, ycenter_ths,\
                                                 height_ths,width_ths,\
                                                 add_margin, False)

        result = self.recognize(img_cv_grey, horizontal_list, free_list,\
                                decoder, beamWidth, batch_size,\
                                workers, allowlist, blocklist, detail,\
                                paragraph, contrast_ths, adjust_contrast,\
                                filter_ths, False)

        return result
Example #2
0
    def detect(self, img, min_size = 20, text_threshold = 0.7, low_text = 0.4,\
               link_threshold = 0.4,canvas_size = 2560, mag_ratio = 1.,\
               slope_ths = 0.1, ycenter_ths = 0.5, height_ths = 0.5,\
               width_ths = 0.5, add_margin = 0.1, reformat=True):

        if reformat:
            img, img_cv_grey = reformat_input(img)

        text_box = get_textbox(self.detector, img, canvas_size, mag_ratio,\
                               text_threshold, link_threshold, low_text,\
                               False, self.device)
        horizontal_list, free_list = group_text_box(text_box, slope_ths,\
                                                    ycenter_ths, height_ths,\
                                                    width_ths, add_margin)

        if min_size:
            horizontal_list = [
                i for i in horizontal_list
                if max(i[1] - i[0], i[3] - i[2]) > min_size
            ]
            free_list = [
                i for i in free_list
                if max(diff([c[0] for c in i]), diff([c[1]
                                                      for c in i])) > min_size
            ]

        return horizontal_list, free_list
    def recognize(self,
                  img,
                  horizontal_list=None,
                  free_list=None,
                  reformat=True,
                  imgH=32):

        if reformat:
            img, img_cv_grey = reformat_input(img_cv_grey)

        if (horizontal_list == None) and (free_list == None):
            b, y_max, x_max = img.shape
            ratio = x_max / y_max
            max_width = int(imgH * ratio)
            crop_img = cv2.resize(img, (max_width, imgH),
                                  interpolation=Image.ANTIALIAS)
            image_list = [([[0, 0], [x_max, 0], [x_max, y_max],
                            [0, y_max]], crop_img)]
        else:
            image_list, max_width = get_image_list(horizontal_list,
                                                   free_list,
                                                   img,
                                                   model_height=imgH)
        result = get_text(self.recognizer, image_list)
        return result
def train_model(model_builder,
                images,
                labels,
                fold_pairs,
                epochs=10,
                batch_size=32,
                callbacks=[],
                verbose=0,
                fold_verbose=0):
    mean_val_score = 0
    mean_test_score = 0

    for i in range(len(fold_pairs)):
        model = model_builder()
        image_preprocessor = ImagePreprocessor()

        (X_train, y_train), (X_val, y_val), (X_test, y_test) = reformat_input(
            images, labels, fold_pairs[i])

        X_train = image_preprocessor.fit_transform(X_train)
        X_val = image_preprocessor.transform(X_val)
        X_test = image_preprocessor.transform(X_test)

        model.fit(X_train,
                  y_train,
                  verbose=fold_verbose,
                  epochs=epochs,
                  batch_size=batch_size,
                  validation_data=(X_val, y_val),
                  callbacks=callbacks)
        y_test_predict = np.argmax(model.predict(X_test), axis=1)
        y_val_predict = np.argmax(model.predict(X_val), axis=1)

        val_accuracy_score = accuracy_score(y_val, y_val_predict)
        test_accuracy_score = accuracy_score(y_test, y_test_predict)

        mean_val_score += val_accuracy_score
        mean_test_score += test_accuracy_score

        if verbose >= 2:
            print('===========Fold {}/{}==========='.format(
                i + 1, len(fold_pairs)))
            print('Validation Accuracy score: ', val_accuracy_score)
            print('Test Accuracy score: ', test_accuracy_score)
        if verbose == 3:
            print('Test Precision score: ',
                  precision_score(y_test, y_test_predict, average=None))
            print('Test Recall score: ',
                  recall_score(y_test, y_test_predict, average=None))
            print('Test F1 score: ',
                  f1_score(y_test, y_test_predict, average=None))

    if verbose >= 1:
        print('\nMean validation accuracy score: ',
              mean_val_score / len(fold_pairs))
        print('Mean test accuracy score: ', mean_test_score / len(fold_pairs))
Example #5
0
    def recognize(self, img_cv_grey, horizontal_list=None, free_list=None,\
                  decoder = 'greedy', beamWidth= 5, batch_size = 1,\
                  workers = 0, allowlist = None, blocklist = None, detail = 1,\
                  paragraph = False,\
                  contrast_ths = 0.1,adjust_contrast = 0.5, filter_ths = 0.003,\
                  reformat=True):

        if reformat:
            img, img_cv_grey = reformat_input(img_cv_grey)

        if (horizontal_list == None) and (free_list == None):
            y_max, x_max = img_cv_grey.shape
            ratio = x_max / y_max
            max_width = int(imgH * ratio)
            crop_img = cv2.resize(img_cv_grey, (max_width, imgH),
                                  interpolation=Image.ANTIALIAS)
            image_list = [([[0, 0], [x_max, 0], [x_max, y_max],
                            [0, y_max]], crop_img)]
        else:
            image_list, max_width = get_image_list(horizontal_list,
                                                   free_list,
                                                   img_cv_grey,
                                                   model_height=imgH)

        if allowlist:
            ignore_char = ''.join(set(self.character) - set(allowlist))
        elif blocklist:
            ignore_char = ''.join(set(blocklist))
        else:
            ignore_char = ''.join(set(self.character) - set(self.lang_char))

        if self.model_lang in [
                'chinese_tra', 'chinese_sim', 'japanese', 'korean'
        ]:
            decoder = 'greedy'
        result = get_text(self.character, imgH, int(max_width), self.recognizer, self.converter, image_list,\
                      ignore_char, decoder, beamWidth, batch_size, contrast_ths, adjust_contrast, filter_ths,\
                      workers, self.device)

        if self.model_lang == 'arabic':
            direction_mode = 'rtl'
            result = [list(item) for item in result]
            for item in result:
                item[1] = get_display(item[1])
        else:
            direction_mode = 'ltr'

        if paragraph:
            result = get_paragraph(result, mode=direction_mode)

        if detail == 0:
            return [item[1] for item in result]
        else:
            return result
    def readtext(self, image,min_size = 20,\
                 text_threshold = 0.7, low_text = 0.4, link_threshold = 0.4,\
                 canvas_size = 2560, mag_ratio = 1.,\
                 slope_ths = 0.1, ycenter_ths = 0.5, height_ths = 0.5,\
                 width_ths = 0.5, add_margin = 0.1):
        '''
        Parameters:
        image: file path or numpy-array or a byte stream object
        '''
        img, img_cv_grey = reformat_input(image)

        horizontal_list, free_list = self.detect(img, min_size, text_threshold,\
                                                 low_text, link_threshold,\
                                                 canvas_size, mag_ratio,\
                                                 slope_ths, ycenter_ths,\
                                                 height_ths,width_ths,\
                                                 add_margin, False)

        result = self.recognize(img, horizontal_list, free_list, False)
        return result, img
    def recognize(self, img_cv_grey, horizontal_list=None, free_list=None,\
                  decoder = 'greedy', beamWidth= 5, batch_size = 1,\
                  workers = 0, allowlist = None, blocklist = None, detail = 1,\
                  rotation_info = None,\
                  paragraph = False,\
                  contrast_ths = 0.1,adjust_contrast = 0.5, filter_ths = 0.003,\
                  reformat=True):

        if reformat:
            img, img_cv_grey = reformat_input(img_cv_grey)

        if allowlist:
            ignore_char = ''.join(set(self.character) - set(allowlist))
        elif blocklist:
            ignore_char = ''.join(set(blocklist))
        else:
            ignore_char = ''.join(set(self.character) - set(self.lang_char))

        if self.model_lang in ['chinese_tra', 'chinese_sim']:
            decoder = 'greedy'

        if (horizontal_list == None) and (free_list == None):
            y_max, x_max = img_cv_grey.shape
            horizontal_list = [[0, x_max, 0, y_max]]
            free_list = []

        # without gpu/parallelization, it is faster to process image one by one
        if ((batch_size == 1) or (self.device == 'cpu')) and not rotation_info:
            result = []
            for bbox in horizontal_list:
                h_list = [bbox]
                f_list = []
                image_list, max_width = get_image_list(h_list,
                                                       f_list,
                                                       img_cv_grey,
                                                       model_height=imgH)
                result0 = get_text(self.character, imgH, int(max_width), self.recognizer, self.converter, image_list,\
                              ignore_char, decoder, beamWidth, batch_size, contrast_ths, adjust_contrast, filter_ths,\
                              workers, self.device)
                result += result0
            for bbox in free_list:
                h_list = []
                f_list = [bbox]
                image_list, max_width = get_image_list(h_list,
                                                       f_list,
                                                       img_cv_grey,
                                                       model_height=imgH)
                result0 = get_text(self.character, imgH, int(max_width), self.recognizer, self.converter, image_list,\
                              ignore_char, decoder, beamWidth, batch_size, contrast_ths, adjust_contrast, filter_ths,\
                              workers, self.device)
                result += result0
        # default mode will try to process multiple boxes at the same time
        else:
            image_list, max_width = get_image_list(horizontal_list,
                                                   free_list,
                                                   img_cv_grey,
                                                   model_height=imgH)
            image_len = len(image_list)
            if rotation_info and image_list:
                image_list = make_rotated_img_list(rotation_info, image_list)
                max_width = max(max_width, imgH)

            result = get_text(self.character, imgH, int(max_width), self.recognizer, self.converter, image_list,\
                          ignore_char, decoder, beamWidth, batch_size, contrast_ths, adjust_contrast, filter_ths,\
                          workers, self.device)

            if rotation_info and (horizontal_list + free_list):
                result = set_result_with_confidence(result, image_len)

        if self.model_lang == 'arabic':
            direction_mode = 'rtl'
            result = [list(item) for item in result]
            for item in result:
                item[1] = get_display(item[1])
        else:
            direction_mode = 'ltr'

        if paragraph:
            result = get_paragraph(result, mode=direction_mode)

        if detail == 0:
            return [item[1] for item in result]
        else:
            return result