def __init__(self, weights): # list tinh & tp co dau self.tinh_list = [ 'An Giang', 'Bà Rịa - Vũng Tàu', 'Bắc Giang', 'Bắc Kạn', 'Bạc Liêu', 'Bắc Ninh', 'Bến Tre', 'Bình Định', 'Bình Dương', 'Bình Phước', 'Bình Thuận', 'Cà Mau', 'Cao Bằng', 'Đắk Lắk', 'Đắk Nông', 'Điện Biên', 'Đồng Nai', 'Đồng Tháp', 'Gia Lai', 'Hà Giang', 'Hà Nam', 'Hà Tĩnh', 'Hải Dương', 'Hậu Giang', 'Hòa Bình', 'Hưng Yên', 'Khánh Hòa', 'Kiên Giang', 'Kon Tum', 'Lai Châu', 'Lâm Đồng', 'Lạng Sơn', 'Lào Cai', 'Long An', 'Nam Định', 'Nghệ An', 'Ninh Bình', 'Ninh Thuận', 'Phú Thọ', 'Quảng Bình', 'Quảng Nam', 'Quảng Ngãi', 'Quảng Ninh', 'Quảng Trị', 'Sóc Trăng', 'Sơn La', 'Tây Ninh', 'Thái Bình', 'Thái Nguyên', 'Thanh Hóa', 'Thừa Thiên Huế', 'Tiền Giang', 'Trà Vinh', 'Tuyên Quang', 'Vĩnh Long', 'Vĩnh Phúc', 'Yên Bái', 'Phú Yên', 'Cần Thơ', 'Đà Nẵng', 'Hải Phòng', 'Hà Nội', 'TP Hồ Chí Minh' ] # list tinh & tp khong co dau self.provinces = [ self.remove_accent(tinh).lower() for tinh in self.tinh_list ] self.config = Cfg.load_config_from_name('vgg_transformer') self.config['weights'] = weights self.config['cnn']['pretrained'] = False self.config['device'] = 'cpu' self.config['predictor']['beamsearch'] = False self.reader = Predictor(self.config)
def __init__(self): config = Cfg.load_config_from_name('vgg_transformer') config['weights'] = './model/transformerocr.pth' # config['weights'] = 'https://drive.google.com/uc?id=13327Y1tz1ohsm5YZMyXVMPIOjoOA0OaA' # config['device'] = '' config['device'] = 'cuda' config['predictor']['beamsearch'] = False self.detector = Predictor(config)
def __init__(self, cmnd_detect_config_path='./center/config/cmnd.yml', line_detect_weight_path='weights/line_detect_weight.pth', reg_model='vgg_seq2seq', ocr_weight_path='weights/vgg-seq2seq.pth'): print("Loading TEXT_MODEL...") cmnd_detect_config = Cfg.load_config_from_file(cmnd_detect_config_path) self.cmnd_detect_module = CENTER_MODEL(cmnd_detect_config) self.line_detect_module = LineDetection(line_detect_weight_path) config = Cfg_reg.load_config_from_name(reg_model) config['weights'] = ocr_weight_path config['device'] = 'cpu' config['predictor']['beamsearch'] = False self.recognition_text_module = Predictor(config)
def __init__( self, config, ) -> None: super(ocr, self).__init__() self.config = config config_base = Cfg.load_config_from_file("config/base.yml") config = Cfg.load_config_from_file(self.config) config_base.update(config) config = config_base config['vocab'] = character self.text_r = Predictor(config)
def __init__(self, reg_model='seq2seq'): print("Loading TEXT_MODEL...") if reg_model == "seq2seq": config = Cfg.load_config_from_name('vgg_seq2seq') config['weights'] = 'weights/vgg-seq2seq.pth' self.model_box = BOX_MODEL() config['device'] = 'cpu' config['predictor']['beamsearch'] = False self.model_reg = Predictor(config) self.craft_model = CraftDetection()
def __init__(self): self.yolo = YOLOv4() self.yolo.classes = './coco.names' self.yolo.make_model() self.yolo.load_weights("./model/yolov4-custom_last.weights", weights_type="yolo") self.config = Cfg.load_config() self.config['weights'] = './model/transformerocr.pth' self.config['predictor']['beamsearch'] = False self.config['device'] = 'cpu' self.detector = Predictor(self.config) self.classes = ['id', 'name', 'dmy', 'add1', 'add2'] self.res = dict.fromkeys(self.classes, '')
def __init__(self, ckpt_path=None, gpu='0'): print('Classifier_Vietocr. Init') self.config = Cfg.load_config(cls_base_config_path, cls_config_path) if ckpt_path is not None: self.config['weights'] = ckpt_path self.config['cnn']['pretrained'] = False if gpu is not None: self.config['device'] = 'cuda:' + str(gpu) else: self.config['device'] = 'cpu' self.config['predictor']['beamsearch'] = False self.model = Predictor(self.config)
def predict_file(): config_path = './logs/hw_word_seq2seq/config.yml' config = Cfg.load_config_from_file(config_path, download_base=False) config['weights'] = './logs/hw_word_seq2seq_finetuning/best.pt' print(config.pretty_text()) detector = Predictor(config) detector.gen_annotations( './DATA/data_verifier/hw_word_15k_labels.txt', './DATA/data_verifier/hw_word_15k_labels_preds.txt', data_root='./DATA/data_verifier')
def create_text_annotation_ocr(imgs, dest): config = Cfg.load_config_from_name('vgg_transformer') config['export'] = 'transformerocr_checkpoint.pth' config['device'] = 'cuda' config['predictor']['beamsearch'] = False detector = Predictor(config) f = io.open(os.path.join(dest, "annotation.txt"), "a", encoding="utf-8") for idx, image in enumerate(imgs): text = detector.predict(image) if idx + 1 == len(imgs): f.write('crop_img/{:06d}.jpg\t{}'.format(idx + 1, text)) else: f.write('crop_img/{:06d}.jpg\t{}\n'.format(idx+1, text)) f.close()
def main(): parser = argparse.ArgumentParser() parser.add_argument('--img', required=True, help='foo help') parser.add_argument('--config', required=True, help='foo help') args = parser.parse_args() config = Cfg.load_config_from_file(args.config) detector = Predictor(config) img = Image.open(args.img) s = detector.predict(img) print(s)
class Classifier_Vietocr: def __init__(self, ckpt_path=None, gpu='0'): print('Classifier_Vietocr. Init') self.config = Cfg.load_config(cls_base_config_path, cls_config_path) if ckpt_path is not None: self.config['weights'] = ckpt_path self.config['cnn']['pretrained'] = False if gpu is not None: self.config['device'] = 'cuda:' + str(gpu) else: self.config['device'] = 'cpu' self.config['predictor']['beamsearch'] = False self.model = Predictor(self.config) def inference(self, numpy_list, debug=False): print('Classifier_Vietocr. Inference', len(numpy_list), 'boxes') text_values = [] prob_value = [] for idx, f in enumerate(numpy_list): img = Image.fromarray(f) s, prob = self.model.predict(img, True) if debug: print(round(prob, 3), s) cv2.imshow('sample', f) cv2.waitKey(0) text_values.append(s) prob_value.append(prob) return text_values, prob_value
class TEXT_IMAGES(object): def __init__(self, cmnd_detect_config_path='./center/config/cmnd.yml', line_detect_weight_path='weights/line_detect_weight.pth', reg_model='vgg_seq2seq', ocr_weight_path='weights/vgg-seq2seq.pth'): print("Loading TEXT_MODEL...") cmnd_detect_config = Cfg.load_config_from_file(cmnd_detect_config_path) self.cmnd_detect_module = CENTER_MODEL(cmnd_detect_config) self.line_detect_module = LineDetection(line_detect_weight_path) config = Cfg_reg.load_config_from_name(reg_model) config['weights'] = ocr_weight_path config['device'] = 'cpu' config['predictor']['beamsearch'] = False self.recognition_text_module = Predictor(config) def get_content_image(self, image, show_line=False): # cv image # return image_drawed, texts, boxes img_detected, have_cmnd = self.cmnd_detect_module.detect_obj(image) if not have_cmnd: print("Không phát hiện CMND!!!") return None, None result_line_img, img_draw_box = self.line_detect_module.predict_box(img_detected, show_line) result_ocr = {} for key, values in result_line_img.items(): label = key imgs = values result_ocr[label] = [] for img in imgs: res_str = self.recognition_text_module.predict(img) result_ocr[label].append(res_str) print(result_ocr) return result_ocr, img_draw_box
class Classifier_Vietocr: def __init__(self, ckpt_path=None, gpu='0', config_name='vgg_seq2seq', write_file=False, debug=False): print('Classifier_Vietocr. Init') self.config = Cfg.load_config_from_name(config_name) # config['weights'] = './weights/transformerocr.pth' if ckpt_path is not None: self.config['weights'] = ckpt_path self.config['cnn']['pretrained'] = False if gpu is not None: self.config['device'] = 'cuda:' + str(gpu) self.config['predictor']['beamsearch'] = False self.model = Predictor(self.config) def inference(self, numpy_list, debug=False): print('Classifier_Vietocr. Inference', len(numpy_list), 'boxes') text_values = [] prob_value = [] # t = tqdm(iter(val_loader), total=len(val_loader), desc='Classifier_CRNN. Inference...') for idx, f in enumerate(numpy_list): img = Image.fromarray(f) s, prob = self.model.predict(img, True) if debug: print(round(prob, 3), s) cv2.imshow('sample', f) cv2.waitKey(0) text_values.append(s) prob_value.append(prob) return text_values, prob_value
def __init__(self, ckpt_path=None, gpu='0', config_name='vgg_seq2seq', write_file=False, debug=False): print('Classifier_Vietocr. Init') self.config = Cfg.load_config_from_name(config_name) # config['weights'] = './weights/transformerocr.pth' if ckpt_path is not None: self.config['weights'] = ckpt_path self.config['cnn']['pretrained'] = False if gpu is not None: self.config['device'] = 'cuda:' + str(gpu) self.config['predictor']['beamsearch'] = False self.model = Predictor(self.config)
def load_recognition_model(): #chuan bi ocr predict model config = Cfg.load_config_from_file('./vietocr/config.yml') config['weights'] = "./models/transformerocr.pth" config['cnn']['pretrained']=False config['device'] = 'cuda:0' config['predictor']['beamsearch']=False recognizer = Predictor(config) return recognizer
def main(): parser = argparse.ArgumentParser() parser.add_argument('--img', required=True, help='foo help') parser.add_argument('--config', required=True, help='foo help') args = parser.parse_args() config_base = Cfg.load_config_from_file("config/base.yml") config = Cfg.load_config_from_file(args.config) config_base.update(config) config = config_base config['vocab'] = character detector = Predictor(config) img = Image.open(args.img) s = detector.predict(img) print(s)
def img_to_text(list_img): results = [] for img in list_img: # sử dụng config mặc định của mô hình config = Cfg.load_config_from_name('vgg_transformer') # đường dẫn đến trọng số đã huấn luyện hoặc comment để sử dụng #pretrained model mặc định config['weights'] = 'checkpoints/transformerocr.pth' config['device'] = 'cpu' # device chạy 'cuda:0', 'cuda:1', 'cpu' detector = Predictor(config) img = Image.fromarray(img.astype(np.uint8)) # img = Image.fromarray((img * 255).astype(np.uint8)) # img.show() # dự đoán # muốn trả về xác suất của câu dự đoán thì đổi return_prob=True text = detector.predict(img) if len(text) > 0: results.append(text) return results
def main(): parser = argparse.ArgumentParser() parser.add_argument('--config', type=str, default='./logs/hw_word_seq2seq/config.yml') parser.add_argument('--weight', type=str, default='./logs/hw_word_seq2seq/best.pt') parser.add_argument('--img', type=str, default=None, required=True) args = parser.parse_args() config = Cfg.load_config_from_file(args.config, download_base=False) config['weights'] = args.weight print(config.pretty_text()) detector = Predictor(config) if os.path.isdir(args.img): img_paths = os.listdir(args.img) for img_path in img_paths: try: img = Image.open(args.img + '/' + img_path) except: continue t1 = time.time() s, prob = detector.predict(img, return_prob=True) print('Text in {} is:\t {} | prob: {:.2f} | times: {:.2f}'.format( img_path, s, prob, time.time() - t1)) else: t1 = time.time() img = Image.open(args.img) s, prob = detector.predict(img, return_prob=True) print('Text in {} is:\t {} | prob: {:.2f} | times: {:.2f}'.format( args.img, s, prob, time.time() - t1))
class OCR(): def __init__(self): config = Cfg.load_config_from_name('vgg_transformer') config['weights'] = './model/transformerocr.pth' # config['weights'] = 'https://drive.google.com/uc?id=13327Y1tz1ohsm5YZMyXVMPIOjoOA0OaA' # config['device'] = '' config['device'] = 'cuda' config['predictor']['beamsearch'] = False self.detector = Predictor(config) def recognize(self, img): img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) img = Image.fromarray(img) return self.detector.predict(img)
def img_to_text(list_img): results = [] config = Cfg.load_config_from_name("vgg_transformer") # đường dẫn đến trọng số đã huấn luyện hoặc comment để sử dụng #pretrained model mặc định config[ "weights"] = "https://drive.google.com/uc?id=13327Y1tz1ohsm5YZMyXVMPIOjoOA0OaA" # config['weights'] = 'transformerocr.pth' config["device"] = "cpu" # device chạy 'cuda:0', 'cuda:1', 'cpu' detector = Predictor(config) for i in range(len(list_img)): if i == 0: continue # sử dụng config mặc định của mô hình img = Image.fromarray(list_img[i].astype(np.uint8)) # dự đoán # muốn trả về xác suất của câu dự đoán thì đổi return_prob=True text = detector.predict(img) if len(text) > 0: results.append(text) return results
class ocr: def __init__( self, config, ) -> None: super(ocr, self).__init__() self.config = config config_base = Cfg.load_config_from_file("config/base.yml") config = Cfg.load_config_from_file(self.config) config_base.update(config) config = config_base config['vocab'] = character self.text_r = Predictor(config) def run(self, im: Image): s = self.text_r.predict(im) return index_decode(s)
def load_model(): config = program.load_config('./configs/det/det_r18_vd_db_v1.1.yml') # check if set use_gpu=True in paddlepaddle cpu version use_gpu = config['Global']['use_gpu'] program.check_gpu(use_gpu) place = fluid.CUDAPlace(0) if use_gpu else fluid.CPUPlace() exe = fluid.Executor(place) det_model = create_module( config['Architecture']['function'])(params=config) startup_prog = fluid.Program() eval_prog = fluid.Program() with fluid.program_guard(eval_prog, startup_prog): with fluid.unique_name.guard(): _, eval_outputs = det_model(mode="test") fetch_name_list = list(eval_outputs.keys()) eval_fetch_list = [eval_outputs[v].name for v in fetch_name_list] eval_prog = eval_prog.clone(for_test=True) exe.run(startup_prog) # load checkpoints checkpoints = config['Global'].get('checkpoints') if checkpoints: path = checkpoints fluid.load(eval_prog, path, exe) logger.info("Finish initing model from {}".format(path)) else: raise Exception("{} not exists!".format(checkpoints)) config_ocr = Cfg.load_config_from_name('vgg_seq2seq') config_ocr['weights'] = './my_weights/transformer.pth' config_ocr['cnn']['pretrained'] = False config_ocr['device'] = 'cpu' config_ocr['predictor']['beamsearch'] = False detector = Predictor(config_ocr) return detector, exe, config, eval_prog, eval_fetch_list
def __init__(self, model_path): #Load the pretrained PhoBERT Model print("Loading Classification...") self.config = RobertaConfig.from_pretrained( model_path + 'PhoBERT/config.json', from_tf=False, num_labels=5, output_hidden_states=False, ) self.phoBERT_cls = RobertaForSequenceClassification.from_pretrained( model_path + 'PhoBERT/model.bin', config=self.config) device = "cuda:0" self.phoBERT_cls = self.phoBERT_cls.to(device) self.phoBERT_cls.eval() print("Loading pre-trained model...") self.phoBERT_cls.load_state_dict( torch.load( model_path + 'roberta_state_dict_9bfb8319-01b2-4301-aa5a-756d390a98e1.pth')) print("Finished loading PhoBERT Classification model.") #Load the BPE and Vocabulary Dictionary print("Loading BPE and vocab dict ...") class BPE(): bpe_codes = model_path + 'PhoBERT/bpe.codes' args = BPE() self.bpe = fastBPE(args) self.vocab = Dictionary() self.vocab.add_from_file(model_path + "PhoBERT/dict.txt") print("Finished loading BPE and vocab dict.") #Load the Text Recognizer config = Cfg.load_config_from_name('vgg_transformer') config['weights'] = 'weights/transformerocr.pth' config['cnn']['pretrained'] = False config['device'] = 'cuda:0' config['predictor']['beamsearch'] = False self.text_recognizer = Predictor(config)
class PredictorImage(object): def __init__(self): self.yolo = YOLOv4() self.yolo.classes = './coco.names' self.yolo.make_model() self.yolo.load_weights("./model/yolov4-custom_last.weights", weights_type="yolo") self.config = Cfg.load_config() self.config['weights'] = './model/transformerocr.pth' self.config['predictor']['beamsearch'] = False self.config['device'] = 'cpu' self.detector = Predictor(self.config) self.classes = ['id', 'name', 'dmy', 'add1', 'add2'] self.res = dict.fromkeys(self.classes, '') # self.address_correction = AddressCorrection() def predict(self, img): return self.yolo.predict(img) def inference(self, img): res = self.predict(img) img_h, img_w = img.shape[:2] for r in res: x_center_p, y_center_p, w_p, h_p, cl, pro = r[0], r[1], r[2], r[ 3], r[4], r[5] w = int(img_w * w_p) h = int(img_h * h_p) x = int(x_center_p * img_w - w / 2) y = int(y_center_p * img_h - h / 2) resize_img = img[y:y + h, x:x + w, :] self.predict_ocr(resize_img, self.classes[int(cl)]) cv2.rectangle(img, (int(x), int(y)), (int(x + w), int(y + h)), (0, 255, 0), 1) print(self.res) cv2.imshow('predict', img) cv2.waitKey(0) def predict_ocr(self, img, label): img = Image.fromarray(img) s = self.detector.predict(img) self.res[label] += s + ' '
y_dist_limit: 10 (Maximum distance by y coordinate to merge two boxes) x_dist_limit: 40 (Maximum distance by x coordinate to merge two boxes) iou_limit = 0.001 ''' need_merging = True while need_merging: need_merging, texts, bboxes_xxyy = merge_box_by_iou( texts, bboxes_xxyy) need_merging = True while need_merging: need_merging, texts, bboxes_xxyy = merge_box_by_distance( texts, bboxes_xxyy) return texts if __name__ == "__main__": config = Cfg.load_config_from_name('vgg_transformer') config['weights'] = 'weights/transformerocr.pth' config['cnn']['pretrained'] = False config['device'] = 'cuda:0' config['predictor']['beamsearch'] = False text_recognizer = Predictor(config) test_image_path = "test_data/Công văn 641_UBND-NC PDF.pdf.jpg" image = cv2.imread(test_image_path) detected_texts = export_text(image, text_recognizer) print(detected_texts)
from PIL import Image import time from vietocr.tool.predictor import Predictor from vietocr.tool.config import Cfg config = Cfg.load_config_from_name('vgg_seq2seq') # config['weights'] = './transformerocr.pth' # config['weights'] = 'https://drive.google.com/uc?id=13327Y1tz1ohsm5YZMyXVMPIOjoOA0OaA' config['device'] = 'cuda:0' config['predictor']['beamsearch'] = False # config['trainer']['checkpoint'] = '/dataset/Students/thuyentd/VietOcr/vgg_seq2seq_receipt_31122020checkpoint.pth' detector = Predictor(config) def predict_this_box(config, img): s, pros = detector.predict(img, return_prob=True) return s, pros def rotate_text(img, pts): height, width, _ = img.shape pts = pts.reshape(-1, 2) centroid = np.mean(pts, axis=0) left = pts[pts[:, 0] < centroid[0]] topleft = left[np.argmin(left, axis=0)[1]] botleft = left[np.argmax(left, axis=0)[1]]
from vietocr.tool.config import Cfg from vietocr.tool.predictor import Predictor import cv2 config = Cfg.load_config_from_name('vgg_transformer') # load pretrained weight config['weights'] = './transformerocr.pth' # set device to use cpu config['device'] = 'cpu' config['cnn']['pretrained'] = False config['predictor']['beamsearch'] = False detector = Predictor(config) img = cv2.imread('img_check.png') result = detector.predict(img) print(result)
if (max(len(sim_pred), len(label)) > 0): loss = Levenshtein.distance(sim_pred, label) * 1.0 / max(len(sim_pred), len(label)) else: return 0 return loss debug = False config = Cfg.load_config_from_name('vgg_transformer') # config['weights'] = './weights/transformerocr.pth' # config['weights'] = 'https://drive.google.com/uc?id=13327Y1tz1ohsm5YZMyXVMPIOjoOA0OaA' config['cnn']['pretrained'] = False config['device'] = 'cuda:0' config['predictor']['beamsearch'] = False detector = Predictor(config) src_dir = '/data20.04/data/aicr/funsd_extra/dataset/testing_data/crnn_extend_True_y_ratio_0.05_min_y_4_min_x_2' img_path = '/home/duycuong/PycharmProjects/dataset/ocr/train_data_29Feb_update_30Mar_13May_refined_23July/handwriting/' \ 'cleaned_data_02Mar/test/AICR_test1/AICR_P0000005/0005_1.jpg' img_path = '' if img_path == '': list_files = get_list_file_in_dir_and_subdirs(src_dir) else: list_files = [img_path] total_cer = 0 total_inference_time = 0 print('Total files:', len(list_files)) for idx, f in enumerate(list_files): img_path = os.path.join(src_dir, f)
from PIL import Image from vietocr.tool.predictor import Predictor from vietocr.tool.config import Cfg import matplotlib.pyplot as plt import numpy as np from pathlib import Path import cv2 import time config = Cfg.load_config_from_name('vgg_transformer') config['export'] = 'transformerocr_checkpoint.pth' config['device'] = 'cpu' config['predictor']['beamsearch'] = False start1 = time.time() detector = Predictor(config) end1 = time.time() img = Image.open('./ANH_1321.jpeg') print("Load image: ", end1 - start1) start = time.time() print(detector.predict(img)) end = time.time() print('Required time: ', end - start) cv2.imshow('image', np.array(img)) cv2.waitKey(0)
class Reader: def __init__(self, weights): # list tinh & tp co dau self.tinh_list = [ 'An Giang', 'Bà Rịa - Vũng Tàu', 'Bắc Giang', 'Bắc Kạn', 'Bạc Liêu', 'Bắc Ninh', 'Bến Tre', 'Bình Định', 'Bình Dương', 'Bình Phước', 'Bình Thuận', 'Cà Mau', 'Cao Bằng', 'Đắk Lắk', 'Đắk Nông', 'Điện Biên', 'Đồng Nai', 'Đồng Tháp', 'Gia Lai', 'Hà Giang', 'Hà Nam', 'Hà Tĩnh', 'Hải Dương', 'Hậu Giang', 'Hòa Bình', 'Hưng Yên', 'Khánh Hòa', 'Kiên Giang', 'Kon Tum', 'Lai Châu', 'Lâm Đồng', 'Lạng Sơn', 'Lào Cai', 'Long An', 'Nam Định', 'Nghệ An', 'Ninh Bình', 'Ninh Thuận', 'Phú Thọ', 'Quảng Bình', 'Quảng Nam', 'Quảng Ngãi', 'Quảng Ninh', 'Quảng Trị', 'Sóc Trăng', 'Sơn La', 'Tây Ninh', 'Thái Bình', 'Thái Nguyên', 'Thanh Hóa', 'Thừa Thiên Huế', 'Tiền Giang', 'Trà Vinh', 'Tuyên Quang', 'Vĩnh Long', 'Vĩnh Phúc', 'Yên Bái', 'Phú Yên', 'Cần Thơ', 'Đà Nẵng', 'Hải Phòng', 'Hà Nội', 'TP Hồ Chí Minh' ] # list tinh & tp khong co dau self.provinces = [ self.remove_accent(tinh).lower() for tinh in self.tinh_list ] self.config = Cfg.load_config_from_name('vgg_transformer') self.config['weights'] = weights self.config['cnn']['pretrained'] = False self.config['device'] = 'cpu' self.config['predictor']['beamsearch'] = False self.reader = Predictor(self.config) def read(self, image): """ Recognise text from image :param image: ndarray of image :return: text """ text = self.reader.predict(image) return text def remove_accent(self, text): return unidecode.unidecode(text) def postprocess_address(self, original_text, thresold): # preprocess text text = self.remove_accent(original_text) text = text.lower() # calculate editance between text with each of address in provinces list edits = [ levenshtein_distance(text, address) for address in self.provinces ] edits = np.array(edits) arg_min = np.argmin(edits) if edits[arg_min] < thresold: return self.tinh_list[arg_min] else: return original_text