def dataloader(self, alphabet): # train_transform = transforms.Compose( # [transforms.ColorJitter(brightness=0.5, contrast=0.5, saturation=0.5, hue=0.5), # resizeNormalize(args.imgH)]) # train_dataset = BaseDataset(args.train_dir, alphabet, transform=train_transform) train_dataset = NumDataset(args.train_dir, alphabet, transform=resizeNormalize(args.imgH)) train_dataloader = DataLoader(dataset=train_dataset, batch_size=args.batch_size, shuffle=True, num_workers=args.num_workers, pin_memory=True) if os.path.exists(args.val_dir): # val_dataset = BaseDataset(args.val_dir, alphabet, transform=resizeNormalize(args.imgH)) val_dataset = NumDataset(args.val_dir, alphabet, mode='test', transform=resizeNormalize(args.imgH)) val_dataloader = DataLoader(dataset=val_dataset, batch_size=args.batch_size, shuffle=False, num_workers=args.num_workers, pin_memory=True) else: val_dataloader = None return train_dataloader, val_dataloader
def predict(image): """ 加载crnn模型,做ocr识别 """ scale = image.size[1] * 1.0 / 32 w = image.size[0] / scale w = int(w) # print "im size:{}, {}".format(image.size, w) transformer = dataset.resizeNormalize((w, 32)) if torch.cuda.is_available() and GPU: image = transformer(image).cuda() else: image = transformer(image).cpu() image = image.view(1, *image.size()) image = Variable(image) model.eval() preds = model(image) _, preds = preds.max(2) preds = preds.transpose(1, 0).contiguous().view(-1) preds_size = Variable(torch.IntTensor([preds.size(0)])) sim_pred = converter.decode(preds.data, preds_size.data, raw=False) if len(sim_pred) > 0: if sim_pred[0] == u'-': sim_pred = sim_pred[1:] return sim_pred
def batch_test(dirpath): alphabet = keys_crnn.alphabet #print(len(alphabet)) #input('\ninput:') converter = util.strLabelConverter(alphabet) # model = crnn.CRNN(32, 1, len(alphabet) + 1, 256, 1).cuda() model = crnn.CRNN(32, 1, len(alphabet) + 1, 256, 1) path = './samples/model_acc97.pth' model.load_state_dict(torch.load(path)) #print(model) paths=glob.glob(os.path.join(dirpath,'*.[jp][pn]g')) for i in paths: print(i) image = Image.open(i).convert('L') #print(image.size) scale = image.size[1] * 1.0 / 32 w = image.size[0] / scale w = int(w) #print("width:" + str(w)) transformer = dataset.resizeNormalize((w, 32)) # image = transformer(image).cuda() image = transformer(image) image = image.view(1, *image.size()) image = Variable(image) model.eval() preds = model(image) #print(preds.shape) _, preds = preds.max(2) #print(preds.shape) preds = preds.squeeze(1) preds = preds.transpose(-1, 0).contiguous().view(-1) preds_size = Variable(torch.IntTensor([preds.size(0)])) raw_pred = converter.decode(preds.data, preds_size.data, raw=True) sim_pred = converter.decode(preds.data, preds_size.data, raw=False) print(sim_pred)
def data_loader(): # train transform = torchvision.transforms.Compose( [ImgAugTransform(), GridDistortion(prob=0.65)]) train_dataset = dataset.lmdbDataset(root=args.trainroot, transform=transform) assert train_dataset if not params.random_sample: sampler = dataset.randomSequentialSampler(train_dataset, params.batchSize) else: sampler = None train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=params.batchSize, \ shuffle=True, sampler=sampler, num_workers=int(params.workers), \ collate_fn=dataset.alignCollate(imgH=params.imgH, imgW=params.imgW, keep_ratio=params.keep_ratio)) # val transform = torchvision.transforms.Compose( [dataset.resizeNormalize((params.imgW, params.imgH))]) val_dataset = dataset.lmdbDataset(root=args.valroot, transform=transform) assert val_dataset val_loader = torch.utils.data.DataLoader(val_dataset, shuffle=True, batch_size=params.batchSize, num_workers=int(params.workers)) return train_loader, val_loader
def crnn_recognition(imgpth, model, tesing_dataset, total_correct_num, total_string_length): cropped_image = Image.open(imgpth) converter = utils.strLabelConverter(alphabet) image = cropped_image.convert('L') ## w = int(image.size[0] / (280 * 1.0 / 180)) # w = image.size[0] # w = int(image.size[0] / (32 * 1.0 / image.size[1])) transformer = dataset.resizeNormalize((w, 32)) image = transformer(image) if torch.cuda.is_available(): image = image.cuda() image = image.view(1, *image.size()) image = Variable(image) model.eval() preds = model(image) _, preds = preds.max(2) preds = preds.transpose(1, 0).contiguous().view(-1) preds_size = Variable(torch.IntTensor([preds.size(0)])) sim_pred = converter.decode(preds.data, preds_size.data, raw=False) ground_truth = tesing_dataset.get(imgpth) correct_num = int( len(ground_truth) * textdistance.levenshtein.normalized_similarity(ground_truth, sim_pred)) string_length = len(ground_truth) #check = ground_truth == sim_pred print('results: {0}, gt: {1}'.format(sim_pred, ground_truth)) return correct_num, string_length
def model_predict(img_path, loadmodel): converter = utils.strLabelConverter(alphabet) transformer = dataset.resizeNormalize((100, 32)) image = Image.open(img_path).convert('L') image = transformer(image) if torch.cuda.is_available(): image = image.cuda() image = image.view(1, *image.size()) image = Variable(image) model.eval() preds = model(image) _, preds = preds.max(2) preds = preds.transpose(1, 0).contiguous().view(-1) preds_size = Variable(torch.IntTensor([preds.size(0)])) raw_pred = converter.decode(preds.data, preds_size.data, raw=True) sim_pred = converter.decode(preds.data, preds_size.data, raw=False) print('%-20s => %-20s' % (raw_pred, sim_pred)) return sim_pred
def crnn_single_test(cropped_image, model): converter = utils.strLabelConverter(alphabet) image = cropped_image.convert('L') ## w = int(image.size[0] / (280 * 1.0 / 180)) # w = image.size[0] # w = int(image.size[0] / (32 * 1.0 / image.size[1])) transformer = dataset.resizeNormalize((w, 32)) image = transformer(image) if torch.cuda.is_available(): image = image.cuda() image = image.view(1, *image.size()) image = Variable(image) model.eval() preds = model(image) _, preds = preds.max(2) preds = preds.transpose(1, 0).contiguous().view(-1) preds_size = Variable(torch.IntTensor([preds.size(0)])) sim_pred = converter.decode(preds.data, preds_size.data, raw=False) print('results: {0}'.format(sim_pred))
def data_loader(): # train train_dataset = dataset.lmdbDataset(root=args.trainroot, transform=dataset.customResize()) assert train_dataset if not params.random_sample: sampler = dataset.randomSequentialSampler(train_dataset, params.batchSize) else: sampler = None train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=params.batchSize, \ shuffle=True, sampler=sampler, num_workers=int(params.workers), \ collate_fn=dataset.alignCollate(imgH=params.imgH, imgW=params.imgW)) # val val_dataset = dataset.lmdbDataset(root=args.valroot, transform=dataset.resizeNormalize( (params.imgW, params.imgH))) assert val_dataset val_loader = torch.utils.data.DataLoader(val_dataset, shuffle=True, batch_size=params.batchSize, num_workers=int(params.workers)) return train_loader, val_loader
def crnn_recognition(cropped_image, model): converter = utils.strLabelConverter(alphabet) image = cropped_image.convert('L') ## w = int(image.size[0] / (280 * 1.0 / 160)) transformer = dataset.resizeNormalize((w, 32)) image = transformer(image) if torch.cuda.is_available(): image = image.cuda() image = image.view(1, *image.size()) image = Variable(image) model.eval() preds = model(image) _, preds = preds.max(2) preds = preds.transpose(1, 0).contiguous().view(-1) preds_size = Variable(torch.IntTensor([preds.size(0)])) i = 0 #print(preds_size.data[0]) out = '' while i < preds_size.data[0]: if preds.data[i] is not 0: out += alphabet[preds.data[i] - 1] i += 1 print(out)
def recognize(image_path, alphabet, snapshot, gpu): model = crnn.CRNN(32, 1, 37, 256) if torch.cuda.is_available(): model = model.cuda() print('loading pretrained model from %s' % snapshot) model.load_state_dict(torch.load(snapshot)) converter = utils.strLabelConverter(alphabet) transformer = dataset.resizeNormalize((100, 32)) image = Image.open(image_path).convert('L') image = transformer(image) if torch.cuda.is_available(): image = image.cuda() image = image.view(1, *image.size()) image = Variable(image) model.eval() preds = model(image) _, preds = preds.max(2) preds = preds.transpose(1, 0).contiguous().view(-1) preds_size = Variable(torch.IntTensor([preds.size(0)])) raw_pred = converter.decode(preds.data, preds_size.data, raw=True) sim_pred = converter.decode(preds.data, preds_size.data, raw=False) print('%-20s => %-20s' % (raw_pred, sim_pred)) return sim_pred
def crnn_recognition(cropped_image, model): converter = utils.strLabelConverter(alphabet) image = cropped_image.convert('L') ## w = int(image.size[0] / (280 * 1.0 / params.imgW)) transformer = dataset.resizeNormalize((w, 32)) image = transformer(image) #if torch.cuda.is_available(): #image = image.cuda() image = image.view(1, *image.size()) #image = Variable(image) model.eval() preds = model(image) print("preds first=", preds.size()) _, preds = preds.max(2) print("preds pre=", preds.size()) preds = preds.transpose(1, 0).contiguous().view(-1) print("preds size=", preds.size()) #preds_size = Variable(torch.IntTensor([preds.size(0)])) preds_size = torch.IntTensor([preds.size(0)]) #raw_pred = converter.decode(preds.data, preds_size.data, raw=True) sim_pred = converter.decode(preds.data, preds_size.data, raw=False) #print('%-20s => %-20s' % (raw_pred, sim_pred)) print('results: {0}'.format(sim_pred))
def predict(self, image): img_w = 32 * image.size[0] // image.size[1] #维持固定宽高比 transformer = dataset.resizeNormalize((img_w, 32)) image = transformer(image) if torch.cuda.is_available(): image = image.cuda() image = image.view(1, *image.size()) image = Variable(image) if image.size()[-1] < 8: return '' preds = self(image) max_val, preds = preds.max(2) preds = preds.view(-1) preds_size = Variable(torch.IntTensor([preds.size(0)])) raw_pred = self.converter.decode(preds.data, preds_size.data, raw=True) sim_pred = self.converter.decode(preds.data, preds_size.data, raw=False) #sim_pred = converter.decode(preds.data, preds_size.data, raw=False) return preds, raw_pred, sim_pred
def __init__(self, args): os.environ['CUDA_VISIBLE_DEVICES'] = args.gpus self.args = args self.alphabet = alphabetChinese nclass = len(self.alphabet) + 1 nc = 1 self.net = CRNN(args.imgH, nc, args.nh, nclass) self.converter = utils.strLabelConverter(self.alphabet, ignore_case=False) self.transformer = resizeNormalize(args.imgH) print('loading pretrained model from %s' % args.model_path) checkpoint = torch.load(args.model_path) if 'model_state_dict' in checkpoint.keys(): checkpoint = checkpoint['model_state_dict'] from collections import OrderedDict model_dict = OrderedDict() for k, v in checkpoint.items(): if 'module' in k: model_dict[k[7:]] = v else: model_dict[k] = v self.net.load_state_dict(model_dict) if args.cuda and torch.cuda.is_available(): print('available gpus is,', torch.cuda.device_count()) self.net = torch.nn.DataParallel(self.net, output_dim=1).cuda() self.net.eval()
def crnn_recognition(cropped_image, model): converter = utils.strLabelConverter(alphabet) image = cropped_image.convert('L') ## w = int(image.size[0] / (280 * 1.0 / 160)) transformer = dataset.resizeNormalize((w, 32)) image = transformer(image) if torch.cuda.is_available(): image = image.cuda() image = image.view(1, *image.size()) image = Variable(image) model.eval() preds = model(image) _, preds = preds.max(2) preds = preds.transpose(1, 0).contiguous().view(-1) preds_size = Variable(torch.IntTensor([preds.size(0)])) sim_pred = converter.decode(preds.data, preds_size.data, raw=False) f = open('test.txt', 'w') # 若是'wb'就表示写二进制文件 f.write('results: {0}'.format(sim_pred)) f.close() print('results: {0}'.format(sim_pred))
def crnnOcr(image): """ crnn模型,ocr识别 @@model, @@converter, @@im @@text_recs:text box """ scale = image.size[1] * 1.0 / 32 w = image.size[0] / scale w = int(w) #print "im size:{},{}".format(image.size,w) transformer = dataset.resizeNormalize((w, 32)) if torch.cuda.is_available() and GPU: image = transformer(image).cuda() else: image = transformer(image).cpu() image = image.view(1, *image.size()) image = Variable(image) model.eval() preds = model(image) _, preds = preds.max(2) #preds = preds.squeeze(2) preds = preds.transpose(1, 0).contiguous().view(-1) preds_size = Variable(torch.IntTensor([preds.size(0)])) #raw_pred = converter.decode(preds.data, preds_size.data, raw=True) sim_pred = converter.decode(preds.data, preds_size.data, raw=False) if sim_pred[0] == u'-': sim_pred = sim_pred[1:] return sim_pred
def crnn_recognition(cropped_image, model): converter = utils.strLabelConverter(alphabet) image = cropped_image.convert('L') #print("image size=",image.size[0]) #image shape = (w,h) ## w = int(image.size[0] / (280 * 1.0 / params.imgW))#image.size[0] is W #print("w=",w) transformer = dataset.resizeNormalize((w, 32)) #format is CHW because it is a tensor image = transformer(image)# image represents a tensor, shape is CHW #print("image resize=",image.shape) #if torch.cuda.is_available(): #image = image.cuda() image = image.view(1, *image.size()) #print("image=",image.shape) #image = Variable(image) #print("model:",model) model.eval() preds = model(image) #print("preds:",preds) _, preds = preds.max(2) preds = preds.transpose(1, 0).contiguous().view(-1) preds_size = torch.IntTensor([preds.size(0)]) #preds_size = torch.IntTensor([preds.size(0)]) #raw_pred = converter.decode(preds.data, preds_size.data, raw=True) sim_pred = converter.decode(preds.data, preds_size.data, raw=False) #print('%-20s => %-20s' % (raw_pred, sim_preda print('result:{0}'.format(sim_pred)) return ('{0}'.format(sim_pred))
def crnn_recognition(cropped_image, model): converter = utils.strLabelConverter(alphabet) image = cropped_image.convert('L') ## In training step, 280 is the width of training image, and resize it to 160 then feed into neural networks. ## Hence in test step, the width of test_image should multipy by the scale in traning step we resize. w = int(image.size[0] / (280 * 1.0 / 160)) transformer = dataset.resizeNormalize((w, 32)) image = transformer(image) if torch.cuda.is_available(): image = image.cuda() image = image.view(1, *image.size()) image = Variable(image) model.eval() preds = model(image) _, preds = preds.max(2) preds = preds.transpose(1, 0).contiguous().view(-1) preds_size = Variable(torch.IntTensor([preds.size(0)])) sim_pred = converter.decode(preds.data, preds_size.data, raw=False) print('results: {0}'.format(sim_pred))
def show_predict(image_directory, filename): output_dir = 'static/images/cropped_craft' prediction_result = craft.detect_text(image_directory, output_dir, crop_type='polly', export_extra=True, refiner=False, cuda=True) cropped_dir = output_dir + "/" + filename[:-4] + "_crops" transformer = dataset.resizeNormalize((100, 32)) predicted_text = "" for cropped in listdir(cropped_dir): img = cropped_dir + "/" + cropped image = Image.open(img).convert("L") image = transformer(image) if torch.cuda.is_available(): image = image.cuda() image = image.view(1, *image.size()) image = Variable(image) model.eval() preds = model(image) _, preds = preds.max(2) preds = preds.transpose(1, 0).contiguous().view(-1) preds_size = Variable(torch.IntTensor([preds.size(0)])) sim_pred = converter.decode(preds.data, preds_size.data, raw=False) predicted_text = predicted_text + sim_pred[2:len(predicted_text) - 1] + "\n" return predicted_text, output_dir
def test(model, data_path, max_iter=100): test_dataset = dataset.listDataset(list_file=data_path, transform=dataset.resizeNormalize( (100, 32))) image = torch.FloatTensor(64, 3, 32, 32) text = torch.LongTensor(64 * 5) length = torch.IntTensor(64) image = Variable(image) text = Variable(text) length = Variable(length) print('Start test') # for p in crnn.parameters(): # p.requires_grad = False length = torch.IntTensor(1) length[0] = 7 length = Variable(length) model.eval() data_loader = torch.utils.data.DataLoader(test_dataset, shuffle=True, batch_size=64, num_workers=int(2)) test_iter = iter(data_loader) i = 0 n_correct = 0 loss_avg = utils.averager() max_iter = min(max_iter, len(data_loader)) for i in range(max_iter): data = test_iter.next() i += 1 cpu_images, cpu_texts = data print('cpu_image:', cpu_images.size()) batch_size = cpu_images.size(0) utils.loadData(image, cpu_images) t, l = converter.encode(cpu_texts) utils.loadData(text, t) utils.loadData(length, l) preds = model(image, length) _, preds = preds.max(1) preds = preds.view(-1) sim_preds = converter.decode(preds.data, length.data) for pred, target in zip(sim_preds, cpu_texts): target = ''.join(target.split(':')) if pred == target: n_correct += 1 for pred, gt in zip(sim_preds, cpu_texts): gt = ''.join(gt.split(':')) print('%-20s, gt: %-20s' % (pred, gt)) accuracy = n_correct / float(max_iter * 64) print('Test loss: %f, accuray: %f' % (loss_avg.val(), accuracy))
def read_image(path): transformer = dataset.resizeNormalize((100, 32)) image = Image.open(path).convert('L') image = transformer(image) if torch.cuda.is_available(): image = image.cuda() image = image.view(1, *image.size()) image = Variable(image) return image
def initValDataSets(): index = 0 list_name = [] if os.path.exists(val_path + "/data.mdb"): one_dataset = dataset.lmdbDataset(root=val_path, transform=dataset.resizeNormalize( (100, 32))) val_data = { "dir": val_path, "dataset": one_dataset, # "loader": one_loader, "index": index } val_data_list.append(val_data) list_name.append(val_path) else: fs = os.listdir(val_path) for one in fs: root_path = val_path + "/" + one + "/val" if not os.path.exists(root_path) or not os.path.exists( val_path + "/" + one + "/val/data.mdb"): if os.path.exists(val_path + "/" + one + "/data.mdb"): root_path = val_path + "/" + one else: continue # print("添加校验数据集:{}".format(root_path)) one_dataset = dataset.lmdbDataset( root=root_path, transform=dataset.resizeNormalize((100, 32))) # one_loader = torch.utils.data.DataLoader(one_dataset, shuffle=True, batch_size=opt.batchSize, # num_workers=int(opt.workers)) val_data = { "dir": one, "dataset": one_dataset, # "loader": one_loader, "index": index } index += 1 val_data_list.append(val_data) list_name.append(one) print_msg("加载了{}个验证集:{}".format(len(list_name), list_name))
def process(self, im, text_recs): index = 0 sim_preds = [] for rec in text_recs: if len(rec) > 8: top, left, bottom, right, score = rec[0], rec[1], rec[6], rec[ 7], rec[8] else: top, left, bottom, right, score = rec crop_img = im[int(left):int(right), int(top):int(bottom)] # pt1 = (rec[0], rec[1]) # pt2 = (rec[2], rec[3]) # pt3 = (rec[6], rec[7]) # pt4 = (rec[4], rec[5]) # partImg = dumpRotateImage(im, degrees(atan2(pt2[1] - pt1[1], pt2[0] - pt1[0])), pt1, pt2, pt3, pt4) # # mahotas.imsave('%s.jpg'%index, partImg) image = Image.fromarray(crop_img).convert('L') # height,width,channel=partImg.shape[:3] # print(height,width,channel) # print(image.size) # image = Image.open('./img/t4.jpg').convert('L') scale = image.size[1] * 1.0 / 32 w = image.size[0] / scale w = int(w) # print(w) transformer = dataset.resizeNormalize((w, 32)) if self.gpuid == '-1': image = transformer(image) else: image = transformer(image).cuda() image = image.view(1, *image.size()) image = Variable(image) self.model.eval() preds = self.model(image) _, preds = preds.max(2) # preds = preds.squeeze(2) preds = preds.transpose(1, 0).contiguous().view(-1) preds_size = Variable(torch.IntTensor([preds.size(0)])) raw_pred = self.converter.decode(preds.data, preds_size.data, raw=True) sim_pred = self.converter.decode(preds.data, preds_size.data, raw=False) # print('%-20s => %-20s' % (raw_pred, sim_pred)) # print(index) # print(sim_pred) sim_preds.append(sim_pred) # index = index + 1 return sim_preds
def image_pil_to_logits(oracle, pil_im): transformer = dataset.resizeNormalize((imgW, imgH)) image = transformer(pil_im) if torch.cuda.is_available(): image = image.cuda() image = image.view(1, *image.size()) image = Variable(image) preds = oracle(image) return preds
def data_loader(): # val val_dataset = dataset.lmdbDataset(root=args.valroot, transform=dataset.resizeNormalize( (params.imgW, params.imgH))) assert val_dataset val_loader = torch.utils.data.DataLoader(val_dataset, shuffle=True, batch_size=params.batchSize, num_workers=int(params.workers)) return val_loader
def crnnRec(self, im, text_recs, use_gpu=True): texts = [] index = 0 for rec in text_recs: pt1 = (rec[0], rec[1]) pt2 = (rec[2], rec[3]) pt3 = (rec[6], rec[7]) pt4 = (rec[4], rec[5]) partImg = self.dumpRotateImage( im, degrees(atan2(pt2[1] - pt1[1], pt2[0] - pt1[0])), pt1, pt2, pt3, pt4) #mahotas.imsave('%s.jpg'%index, partImg) image = Image.fromarray(partImg).convert('L') #height,width,channel=partImg.shape[:3] #print(height,width,channel) #print(image.size) #image = Image.open('./img/t4.jpg').convert('L') scale = image.size[1] * 1.0 / 32 w = image.size[0] / scale w = int(w) #print(w) transformer = dataset.resizeNormalize((w, 32)) image = transformer(image) model = self.cpu_model if use_gpu and torch.cuda.is_available(): image = image.cuda() model = self.model image = image.view(1, *image.size()) image = Variable(image) model.eval() print(type(model), type(image)) preds = model(image) _, preds = preds.max(2) preds = preds.squeeze(0) preds = preds.transpose(1, 0).contiguous().view(-1) preds_size = Variable(torch.IntTensor([preds.size(0)])) raw_pred = self.converter.decode(preds.data, preds_size.data, raw=True) sim_pred = self.converter.decode(preds.data, preds_size.data, raw=False) print('%-20s => %-20s' % (raw_pred, sim_pred)) #print(index) #print(sim_pred) index = index + 1 texts.append(sim_pred) return texts
def load_img(path): transformer = dataset.resizeNormalize((100, 32)) result = [] for p in path: image = Image.open(p).convert('L') image = transformer(image) result.append(image) return torch.stack(result)
def normalize_image(image): # Resize, antialias and transpose the image to CHW. #n, c, h, w = ModelData.INPUT_SHAPE #transformer = resizeNormalize((w, h)) #image = transformer(image) #image_np = (np.asarray(image)-0.5)/0.5 #image_np_with_batch = np.expand_dims(image_np, 0) transformer = dataset.resizeNormalize((100, 32)) image = transformer(image) image = image.view(1, *image.size()) #image_arr = np.asarray(image.resize((w, h), Image.ANTIALIAS)).transpose([2, 0, 1]).astype(trt.nptype(ModelData.DTYPE)).ravel() # This particular ResNet50 model requires some preprocessing, specifically, mean normalization. return image
def __init__(self, weightfile, gpu_id=0): alphabet = '0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz-()图' print(alphabet) print(len(alphabet)) nclass = len(alphabet) + 1 self.__net = crnn.CRNN(32, 1, nclass, 256) if torch.cuda.is_available(): self.__net.cuda(device=gpu_id) self.__gpu_id = gpu_id self.__net.load_state_dict(torch.load(weightfile)) self.__transformer = dataset.resizeNormalize((160, 32)) self.__converter = utils.strLabelConverter(alphabet)
def load_model(model_path): # net init global transformer, model, converter print('loading pretrained model from %s' % model_path) nclass = len(params.alphabet) + 1 model = crnn.CRNN(params.imgH, params.nc, nclass, params.nh) if torch.cuda.is_available(): model = model.cuda() model = torch.nn.DataParallel(model) else: model.load_state_dict(torch.load(model_path, map_location='cpu')) model.eval() converter = utils.strLabelConverter(params.alphabet) transformer = dataset.resizeNormalize((100, 32))
def crnnRec(model, converter, im, text_recs): index = 0 for rec in text_recs: pt1 = (rec[0], rec[1]) pt2 = (rec[2], rec[3]) pt3 = (rec[6], rec[7]) pt4 = (rec[4], rec[5]) partImg = dumpRotateImage( im, degrees(atan2(pt2[1] - pt1[1], pt2[0] - pt1[0])), pt1, pt2, pt3, pt4) if partImg.shape[0] == 0 or partImg.shape[1] == 0: return #mahotas.imsave('%s.jpg'%index, partImg) # plt.imshow(im, cmap='gray') # plt.plot(pt1[0], pt1[1], 'bo') # plt.plot(pt2[0], pt2[1], 'bo') # plt.plot(pt3[0], pt3[1], 'bo') # plt.plot(pt4[0], pt4[1], 'bo') # plt.show() # return image = Image.fromarray(partImg).convert('L') #height,width,channel=partImg.shape[:3] #print(height,width,channel) #print(image.size) #image = Image.open('./img/t4.jpg').convert('L') scale = image.size[1] * 1.0 / 32 w = image.size[0] / scale w = int(w) #print(w) transformer = dataset.resizeNormalize((w, 32)) # image = transformer(image).cuda() image = transformer(image) image = image.view(1, *image.size()) image = Variable(image, volatile=True) model.eval() preds = model(image) _, preds = preds.max(2) preds = preds.squeeze(0).squeeze(0) preds = preds.transpose(1, 0).contiguous().view(-1) preds_size = Variable(torch.IntTensor([preds.size(0)])) raw_pred = converter.decode(preds.data, preds_size.data, raw=True) sim_pred = converter.decode(preds.data, preds_size.data, raw=False) #print('%-20s => %-20s' % (raw_pred, sim_pred)) print(index) print(sim_pred) index = index + 1
def crnnRec(model,converter,im,text_recs): index = 0 for rec in text_recs: pt1 = (rec[0],rec[1]) pt2 = (rec[2],rec[3]) pt3 = (rec[6],rec[7]) pt4 = (rec[4],rec[5]) partImg = dumpRotateImage(im,degrees(atan2(pt2[1]-pt1[1],pt2[0]-pt1[0])),pt1,pt2,pt3,pt4) #mahotas.imsave('%s.jpg'%index, partImg) image = Image.fromarray(partImg ).convert('L') #height,width,channel=partImg.shape[:3] #print(height,width,channel) #print(image.size) #image = Image.open('./img/t4.jpg').convert('L') scale = image.size[1]*1.0 / 32 w = image.size[0] / scale w = int(w) #print(w) transformer = dataset.resizeNormalize((w, 32)) image = transformer(image).cuda() image = image.view(1, *image.size()) image = Variable(image) model.eval() preds = model(image) _, preds = preds.max(2) preds = preds.squeeze(2) preds = preds.transpose(1, 0).contiguous().view(-1) preds_size = Variable(torch.IntTensor([preds.size(0)])) raw_pred = converter.decode(preds.data, preds_size.data, raw=True) sim_pred = converter.decode(preds.data, preds_size.data, raw=False) #print('%-20s => %-20s' % (raw_pred, sim_pred)) print(index) print(sim_pred) index = index + 1
if torch.cuda.is_available() and not opt.cuda: print("WARNING: You have a CUDA device, so you should probably run with --cuda") train_dataset = dataset.lmdbDataset(root=opt.trainroot) assert train_dataset if not opt.random_sample: sampler = dataset.randomSequentialSampler(train_dataset, opt.batchSize) else: sampler = None train_loader = torch.utils.data.DataLoader( train_dataset, batch_size=opt.batchSize, shuffle=True, sampler=sampler, num_workers=int(opt.workers), collate_fn=dataset.alignCollate(imgH=opt.imgH, keep_ratio=opt.keep_ratio)) test_dataset = dataset.lmdbDataset( root=opt.valroot, transform=dataset.resizeNormalize((100, 32))) ngpu = int(opt.ngpu) nh = int(opt.nh) alphabet = opt.alphabet nclass = len(alphabet) + 1 nc = 1 converter = utils.strLabelConverter(alphabet) criterion = CTCLoss() # custom weights initialization called on crnn def weights_init(m): classname = m.__class__.__name__ if classname.find('Conv') != -1:
from PIL import Image import models.crnn as crnn model_path = './data/crnn.pth' img_path = './data/demo.png' alphabet = '0123456789abcdefghijklmnopqrstuvwxyz' model = crnn.CRNN(32, 1, 37, 256).cuda() print('loading pretrained model from %s' % model_path) model.load_state_dict(torch.load(model_path)) converter = utils.strLabelConverter(alphabet) transformer = dataset.resizeNormalize((100, 32)) image = Image.open(img_path).convert('L') image = transformer(image).cuda() image = image.view(1, *image.size()) image = Variable(image) model.eval() preds = model(image) _, preds = preds.max(2) preds = preds.squeeze(2) preds = preds.transpose(1, 0).contiguous().view(-1) preds_size = Variable(torch.IntTensor([preds.size(0)])) raw_pred = converter.decode(preds.data, preds_size.data, raw=True) sim_pred = converter.decode(preds.data, preds_size.data, raw=False)