def readtext(self, image, decoder = 'greedy', beamWidth= 5, batch_size = 1,\ workers = 0, allowlist = None, blocklist = None, detail = 1,\ paragraph = False, min_size = 20,\ contrast_ths = 0.1,adjust_contrast = 0.5, filter_ths = 0.003,\ text_threshold = 0.7, low_text = 0.4, link_threshold = 0.4,\ canvas_size = 2560, mag_ratio = 1.,\ slope_ths = 0.1, ycenter_ths = 0.5, height_ths = 0.5,\ width_ths = 0.5, add_margin = 0.1): ''' Parameters: image: file path or numpy-array or a byte stream object ''' img, img_cv_grey = reformat_input(image) horizontal_list, free_list = self.detect(img, min_size, text_threshold,\ low_text, link_threshold,\ canvas_size, mag_ratio,\ slope_ths, ycenter_ths,\ height_ths,width_ths,\ add_margin, False) result = self.recognize(img_cv_grey, horizontal_list, free_list,\ decoder, beamWidth, batch_size,\ workers, allowlist, blocklist, detail,\ paragraph, contrast_ths, adjust_contrast,\ filter_ths, False) return result
def detect(self, img, min_size = 20, text_threshold = 0.7, low_text = 0.4,\ link_threshold = 0.4,canvas_size = 2560, mag_ratio = 1.,\ slope_ths = 0.1, ycenter_ths = 0.5, height_ths = 0.5,\ width_ths = 0.5, add_margin = 0.1, reformat=True): if reformat: img, img_cv_grey = reformat_input(img) text_box = get_textbox(self.detector, img, canvas_size, mag_ratio,\ text_threshold, link_threshold, low_text,\ False, self.device) horizontal_list, free_list = group_text_box(text_box, slope_ths,\ ycenter_ths, height_ths,\ width_ths, add_margin) if min_size: horizontal_list = [ i for i in horizontal_list if max(i[1] - i[0], i[3] - i[2]) > min_size ] free_list = [ i for i in free_list if max(diff([c[0] for c in i]), diff([c[1] for c in i])) > min_size ] return horizontal_list, free_list
def recognize(self, img, horizontal_list=None, free_list=None, reformat=True, imgH=32): if reformat: img, img_cv_grey = reformat_input(img_cv_grey) if (horizontal_list == None) and (free_list == None): b, y_max, x_max = img.shape ratio = x_max / y_max max_width = int(imgH * ratio) crop_img = cv2.resize(img, (max_width, imgH), interpolation=Image.ANTIALIAS) image_list = [([[0, 0], [x_max, 0], [x_max, y_max], [0, y_max]], crop_img)] else: image_list, max_width = get_image_list(horizontal_list, free_list, img, model_height=imgH) result = get_text(self.recognizer, image_list) return result
def train_model(model_builder, images, labels, fold_pairs, epochs=10, batch_size=32, callbacks=[], verbose=0, fold_verbose=0): mean_val_score = 0 mean_test_score = 0 for i in range(len(fold_pairs)): model = model_builder() image_preprocessor = ImagePreprocessor() (X_train, y_train), (X_val, y_val), (X_test, y_test) = reformat_input( images, labels, fold_pairs[i]) X_train = image_preprocessor.fit_transform(X_train) X_val = image_preprocessor.transform(X_val) X_test = image_preprocessor.transform(X_test) model.fit(X_train, y_train, verbose=fold_verbose, epochs=epochs, batch_size=batch_size, validation_data=(X_val, y_val), callbacks=callbacks) y_test_predict = np.argmax(model.predict(X_test), axis=1) y_val_predict = np.argmax(model.predict(X_val), axis=1) val_accuracy_score = accuracy_score(y_val, y_val_predict) test_accuracy_score = accuracy_score(y_test, y_test_predict) mean_val_score += val_accuracy_score mean_test_score += test_accuracy_score if verbose >= 2: print('===========Fold {}/{}==========='.format( i + 1, len(fold_pairs))) print('Validation Accuracy score: ', val_accuracy_score) print('Test Accuracy score: ', test_accuracy_score) if verbose == 3: print('Test Precision score: ', precision_score(y_test, y_test_predict, average=None)) print('Test Recall score: ', recall_score(y_test, y_test_predict, average=None)) print('Test F1 score: ', f1_score(y_test, y_test_predict, average=None)) if verbose >= 1: print('\nMean validation accuracy score: ', mean_val_score / len(fold_pairs)) print('Mean test accuracy score: ', mean_test_score / len(fold_pairs))
def recognize(self, img_cv_grey, horizontal_list=None, free_list=None,\ decoder = 'greedy', beamWidth= 5, batch_size = 1,\ workers = 0, allowlist = None, blocklist = None, detail = 1,\ paragraph = False,\ contrast_ths = 0.1,adjust_contrast = 0.5, filter_ths = 0.003,\ reformat=True): if reformat: img, img_cv_grey = reformat_input(img_cv_grey) if (horizontal_list == None) and (free_list == None): y_max, x_max = img_cv_grey.shape ratio = x_max / y_max max_width = int(imgH * ratio) crop_img = cv2.resize(img_cv_grey, (max_width, imgH), interpolation=Image.ANTIALIAS) image_list = [([[0, 0], [x_max, 0], [x_max, y_max], [0, y_max]], crop_img)] else: image_list, max_width = get_image_list(horizontal_list, free_list, img_cv_grey, model_height=imgH) if allowlist: ignore_char = ''.join(set(self.character) - set(allowlist)) elif blocklist: ignore_char = ''.join(set(blocklist)) else: ignore_char = ''.join(set(self.character) - set(self.lang_char)) if self.model_lang in [ 'chinese_tra', 'chinese_sim', 'japanese', 'korean' ]: decoder = 'greedy' result = get_text(self.character, imgH, int(max_width), self.recognizer, self.converter, image_list,\ ignore_char, decoder, beamWidth, batch_size, contrast_ths, adjust_contrast, filter_ths,\ workers, self.device) if self.model_lang == 'arabic': direction_mode = 'rtl' result = [list(item) for item in result] for item in result: item[1] = get_display(item[1]) else: direction_mode = 'ltr' if paragraph: result = get_paragraph(result, mode=direction_mode) if detail == 0: return [item[1] for item in result] else: return result
def readtext(self, image,min_size = 20,\ text_threshold = 0.7, low_text = 0.4, link_threshold = 0.4,\ canvas_size = 2560, mag_ratio = 1.,\ slope_ths = 0.1, ycenter_ths = 0.5, height_ths = 0.5,\ width_ths = 0.5, add_margin = 0.1): ''' Parameters: image: file path or numpy-array or a byte stream object ''' img, img_cv_grey = reformat_input(image) horizontal_list, free_list = self.detect(img, min_size, text_threshold,\ low_text, link_threshold,\ canvas_size, mag_ratio,\ slope_ths, ycenter_ths,\ height_ths,width_ths,\ add_margin, False) result = self.recognize(img, horizontal_list, free_list, False) return result, img
def recognize(self, img_cv_grey, horizontal_list=None, free_list=None,\ decoder = 'greedy', beamWidth= 5, batch_size = 1,\ workers = 0, allowlist = None, blocklist = None, detail = 1,\ rotation_info = None,\ paragraph = False,\ contrast_ths = 0.1,adjust_contrast = 0.5, filter_ths = 0.003,\ reformat=True): if reformat: img, img_cv_grey = reformat_input(img_cv_grey) if allowlist: ignore_char = ''.join(set(self.character) - set(allowlist)) elif blocklist: ignore_char = ''.join(set(blocklist)) else: ignore_char = ''.join(set(self.character) - set(self.lang_char)) if self.model_lang in ['chinese_tra', 'chinese_sim']: decoder = 'greedy' if (horizontal_list == None) and (free_list == None): y_max, x_max = img_cv_grey.shape horizontal_list = [[0, x_max, 0, y_max]] free_list = [] # without gpu/parallelization, it is faster to process image one by one if ((batch_size == 1) or (self.device == 'cpu')) and not rotation_info: result = [] for bbox in horizontal_list: h_list = [bbox] f_list = [] image_list, max_width = get_image_list(h_list, f_list, img_cv_grey, model_height=imgH) result0 = get_text(self.character, imgH, int(max_width), self.recognizer, self.converter, image_list,\ ignore_char, decoder, beamWidth, batch_size, contrast_ths, adjust_contrast, filter_ths,\ workers, self.device) result += result0 for bbox in free_list: h_list = [] f_list = [bbox] image_list, max_width = get_image_list(h_list, f_list, img_cv_grey, model_height=imgH) result0 = get_text(self.character, imgH, int(max_width), self.recognizer, self.converter, image_list,\ ignore_char, decoder, beamWidth, batch_size, contrast_ths, adjust_contrast, filter_ths,\ workers, self.device) result += result0 # default mode will try to process multiple boxes at the same time else: image_list, max_width = get_image_list(horizontal_list, free_list, img_cv_grey, model_height=imgH) image_len = len(image_list) if rotation_info and image_list: image_list = make_rotated_img_list(rotation_info, image_list) max_width = max(max_width, imgH) result = get_text(self.character, imgH, int(max_width), self.recognizer, self.converter, image_list,\ ignore_char, decoder, beamWidth, batch_size, contrast_ths, adjust_contrast, filter_ths,\ workers, self.device) if rotation_info and (horizontal_list + free_list): result = set_result_with_confidence(result, image_len) if self.model_lang == 'arabic': direction_mode = 'rtl' result = [list(item) for item in result] for item in result: item[1] = get_display(item[1]) else: direction_mode = 'ltr' if paragraph: result = get_paragraph(result, mode=direction_mode) if detail == 0: return [item[1] for item in result] else: return result