def pseudo_label(model, input_dir, output_file=None): image_paths = glob.glob(input_dir + "/*.jpg") converter = utils.strLabelConverter(params.alphabet) transformer = dataset.processing_image((params.imgW, params.imgH)) with torch.no_grad(): with open(output_file, 'w') as f: for image_path in tqdm.tqdm(image_paths): image = Image.open(image_path).convert('L') image = transformer(image) if args.check: cv2.imwrite('DATA/img_check/' + os.path.basename(image_path) + ".jpg", image.mul_(0.5).add_(0.5).permute(1, 2, 0).numpy()* 255.0) if torch.cuda.is_available(): image = image.cuda() image = image.view(1, *image.size()) image = Variable(image) preds = model(image) # SxBxC preds_exp = torch.exp(preds) max_probs, preds = preds_exp.max(2) prob = max_probs.cumprod(0)[-1] # _, preds = preds.max(2) preds = preds.transpose(1, 0).contiguous().view(-1) preds_size = Variable(torch.LongTensor([preds.size(0)] * BATCH_SIZE)) # raw_pred = converter.decode(preds.data, preds_size.data, raw=True) sim_pred = converter.decode(preds.data, preds_size.data, raw=False) for pred, prob in zip(sim_pred, probs): # print('Text in %-65s is: %-12s - prob: %-6f' % (image_path, sim_pred, prob)) info = "||||".join([image_path, pred, str(float(probs))]) + "\n" f.write(info)
def crnn_recognition(cropped_image, model): converter = utils.strLabelConverter(alphabet) image = cv2.cvtColor(cropped_image, cv2.COLOR_BGR2GRAY) ### ratio ### 280是中文训练集中图片的宽度,160是将原始图片缩小后的图片宽度 w_now = int(image.shape[1] / (280 * 1.0 / params.imgW)) h, w = image.shape image = cv2.resize(image, (0, 0), fx=w_now / w, fy=params.imgH / h, interpolation=cv2.INTER_CUBIC) image = (np.reshape(image, (params.imgH, w_now, 1))).transpose(2, 0, 1) image = image.astype(np.float32) / 255. image = torch.from_numpy(image).type(torch.FloatTensor) image.sub_(params.mean).div_(params.std) if torch.cuda.is_available(): image = image.cuda() image = image.view(1, *image.size()) image = Variable(image) model.eval() preds = model(image) _, preds = preds.max(2) preds = preds.transpose(1, 0).contiguous().view(-1) preds_size = Variable(torch.IntTensor([preds.size(0)])) sim_pred = converter.decode(preds.data, preds_size.data, raw=False) print('results: {0}'.format(sim_pred))
def crnn_recognition(cropped_image, model): converter = utils.strLabelConverter(alphabet) image = cropped_image.convert('L') #print("image size=",image.size[0]) #image shape = (w,h) ## w = int(image.size[0] / (280 * 1.0 / params.imgW))#image.size[0] is W #print("w=",w) transformer = dataset.resizeNormalize((w, 32)) #format is CHW because it is a tensor image = transformer(image)# image represents a tensor, shape is CHW #print("image resize=",image.shape) #if torch.cuda.is_available(): #image = image.cuda() image = image.view(1, *image.size()) #print("image=",image.shape) #image = Variable(image) #print("model:",model) model.eval() preds = model(image) #print("preds:",preds) _, preds = preds.max(2) preds = preds.transpose(1, 0).contiguous().view(-1) preds_size = torch.IntTensor([preds.size(0)]) #preds_size = torch.IntTensor([preds.size(0)]) #raw_pred = converter.decode(preds.data, preds_size.data, raw=True) sim_pred = converter.decode(preds.data, preds_size.data, raw=False) #print('%-20s => %-20s' % (raw_pred, sim_preda print('result:{0}'.format(sim_pred)) return ('{0}'.format(sim_pred))
def crnn_recognition(image, model): imgH = 32 imgW = 100 converter = utils.strLabelConverter(alphabet) ### ratio h, w, c = image.shape image = cv2.resize(image, (0, 0), fx=imgW / w, fy=imgH / h, interpolation=cv2.INTER_CUBIC) image = (np.reshape(image, (imgH, imgW, 3))).transpose(2, 0, 1) image = image.astype(np.float32) / 255. image = torch.from_numpy(image).type(torch.FloatTensor) if torch.cuda.is_available(): image = image.cuda(device) image = image.view(1, *image.size()) image = Variable(image) model.eval() preds = model(image) _, preds = preds.max(2) preds = preds.transpose(1, 0).contiguous().view(-1) print(preds.shape) preds_size = Variable(torch.IntTensor([preds.size(0)])) sim_pred = converter.decode(preds.data, preds_size.data, raw=False) log = converter.decode(preds.data, preds_size.data, raw=True) print("raw_data: ", log) return sim_pred\
def __init__(self, model_path, gpu_id=None): ''' 初始化pytorch模型 :param model_path: 模型地址(可以是模型的参数或者参数和计算图一起保存的文件) :param alphabet: 字母表 :param img_shape: 图像的尺寸(w,h) :param net: 网络计算图,如果在model_path中指定的是参数的保存路径,则需要给出网络的计算图 :param img_channel: 图像的通道数: 1,3 :param gpu_id: 在哪一块gpu上运行 ''' self.gpu_id = gpu_id checkpoint = torch.load(model_path) if self.gpu_id is not None and isinstance( self.gpu_id, int) and torch.cuda.is_available(): self.device = torch.device("cuda:%s" % self.gpu_id) else: self.device = torch.device("cpu") print('device:', self.device) config = checkpoint['config'] self.net = get_model(config) self.img_w = config['data_loader']['args']['dataset']['img_w'] self.img_h = config['data_loader']['args']['dataset']['img_h'] self.img_channel = config['data_loader']['args']['dataset'][ 'img_channel'] self.converter = utils.strLabelConverter( config['data_loader']['args']['alphabet']) self.net.load_state_dict(checkpoint['state_dict']) self.net.to(self.device) self.net.eval()
def crnn_recognition(cropped_image, model): converter = utils.strLabelConverter(alphabet) image = cropped_image.convert('L') ## w = int(image.size[0] / (280 * 1.0 / params.imgW)) transformer = dataset.resizeNormalize((w, 32)) image = transformer(image) #if torch.cuda.is_available(): #image = image.cuda() image = image.view(1, *image.size()) #image = Variable(image) model.eval() preds = model(image) print("preds first=", preds.size()) _, preds = preds.max(2) print("preds pre=", preds.size()) preds = preds.transpose(1, 0).contiguous().view(-1) print("preds size=", preds.size()) #preds_size = Variable(torch.IntTensor([preds.size(0)])) preds_size = torch.IntTensor([preds.size(0)]) #raw_pred = converter.decode(preds.data, preds_size.data, raw=True) sim_pred = converter.decode(preds.data, preds_size.data, raw=False) #print('%-20s => %-20s' % (raw_pred, sim_pred)) print('results: {0}'.format(sim_pred))
def checkConverter(self): encoder = utils.strLabelConverter('abcdefghijklmnopqrstuvwxyz') # Encode # trivial mode result = encoder.encode('efa') target = (torch.IntTensor([5, 6, 1]), torch.IntTensor([3])) self.assertTrue(equal(result, target)) # batch mode result = encoder.encode(['efa', 'ab']) target = (torch.IntTensor([5, 6, 1, 1, 2]), torch.IntTensor([3, 2])) self.assertTrue(equal(result, target)) # Decode # trivial mode result = encoder.decode(torch.IntTensor([5, 6, 1]), torch.IntTensor([3])) target = 'efa' self.assertTrue(equal(result, target)) # replicate mode result = encoder.decode(torch.IntTensor([5, 5, 0, 1, 0]), torch.IntTensor([4])) target = 'ea' self.assertTrue(equal(result, target)) # batch mode result = encoder.decode(torch.IntTensor([5, 6, 1, 1, 2]), torch.IntTensor([3, 2])) target = ['efa', 'ab'] self.assertTrue(equal(result, target))
def checkConverter(self): encoder = utils.strLabelConverter('abcdefghijklmnopqrstuvwxyz') # Encode # trivial mode result = encoder.encode('efa') target = (torch.IntTensor([5, 6, 1]), torch.IntTensor([3])) self.assertTrue(equal(result, target)) # batch mode result = encoder.encode(['efa', 'ab']) target = (torch.IntTensor([5, 6, 1, 1, 2]), torch.IntTensor([3, 2])) self.assertTrue(equal(result, target)) # Decode # trivial mode result = encoder.decode( torch.IntTensor([5, 6, 1]), torch.IntTensor([3])) target = 'efa' self.assertTrue(equal(result, target)) # replicate mode result = encoder.decode( torch.IntTensor([5, 5, 0, 1, 0]), torch.IntTensor([4])) target = 'ea' self.assertTrue(equal(result, target)) # batch mode result = encoder.decode( torch.IntTensor([5, 6, 1, 1, 2]), torch.IntTensor([3, 2])) target = ['efa', 'ab'] self.assertTrue(equal(result, target))
def crnn_predict(crnn, img, transformer, num_top_results=5): """ Returns ------ out: a list of tuples (predicted alphanumeric sequence, confidence level) """ output = [] alphabet = string.ascii_uppercase + string.digits image = img.copy() image = transformer(image) if torch.cuda.is_available(): image.cuda() image = image.view(1, *image.size()) # forward pass (convert to numpy array) preds_np = crnn(image).data.cpu().numpy().squeeze().transpose() # top predictions and their corresponding confidence seq_conf = utils.pred_conf(preds_np, num_top_results=num_top_results) converter = utils.strLabelConverter(alphabet) for (seq, conf) in seq_conf: seq_tensor = torch.from_numpy(seq).cuda() seq_tensor_size = Variable(torch.IntTensor([seq_tensor.size(0)])) sim_pred = converter.decode(seq_tensor.data, seq_tensor_size.data, raw=False).upper() output.append((sim_pred, conf)) return output
def model_predict(img_path, loadmodel): converter = utils.strLabelConverter(alphabet) transformer = dataset.resizeNormalize((100, 32)) image = Image.open(img_path).convert('L') image = transformer(image) if torch.cuda.is_available(): image = image.cuda() image = image.view(1, *image.size()) image = Variable(image) model.eval() preds = model(image) _, preds = preds.max(2) preds = preds.transpose(1, 0).contiguous().view(-1) preds_size = Variable(torch.IntTensor([preds.size(0)])) raw_pred = converter.decode(preds.data, preds_size.data, raw=True) sim_pred = converter.decode(preds.data, preds_size.data, raw=False) print('%-20s => %-20s' % (raw_pred, sim_pred)) return sim_pred
def crnn_recognition(image, model): converter = utils.strLabelConverter(alphabet) h, w, c = image.shape ratio = imgH * 1.0 / h image = cv2.resize(image, (0, 0), fx=ratio, fy=ratio, interpolation=cv2.INTER_CUBIC) image = (np.reshape(image, (imgH, -1, c))).transpose(2, 0, 1) image = image.astype(np.float32) / 255. image = torch.from_numpy(image).type(torch.FloatTensor) image = transformer(image) if torch.cuda.is_available(): image = image.cuda() image = image.view(1, *image.size()) image = Variable(image) preds = model(image) _, preds = preds.max(2) preds = preds.transpose(1, 0).contiguous().view(-1) preds_size = Variable(torch.IntTensor([preds.size(0)])) sim_pred = converter.decode(preds.data, preds_size.data, raw=False) return sim_pred
def crnn_recognition(cropped_image, model): converter = utils.strLabelConverter(alphabet) image = cropped_image.convert('L') ## w = int(image.size[0] / (280 * 1.0 / 160)) transformer = dataset.resizeNormalize((w, 32)) image = transformer(image) if torch.cuda.is_available(): image = image.cuda() image = image.view(1, *image.size()) image = Variable(image) model.eval() preds = model(image) _, preds = preds.max(2) preds = preds.transpose(1, 0).contiguous().view(-1) preds_size = Variable(torch.IntTensor([preds.size(0)])) i = 0 #print(preds_size.data[0]) out = '' while i < preds_size.data[0]: if preds.data[i] is not 0: out += alphabet[preds.data[i] - 1] i += 1 print(out)
def crnn_recognition(cropped_image, model): converter = utils.strLabelConverter(alphabet) image = cropped_image.convert('L') ## w = int(image.size[0] / (280 * 1.0 / 160)) transformer = dataset.resizeNormalize((w, 32)) image = transformer(image) if torch.cuda.is_available(): image = image.cuda() image = image.view(1, *image.size()) image = Variable(image) model.eval() preds = model(image) _, preds = preds.max(2) preds = preds.transpose(1, 0).contiguous().view(-1) preds_size = Variable(torch.IntTensor([preds.size(0)])) sim_pred = converter.decode(preds.data, preds_size.data, raw=False) f = open('test.txt', 'w') # 若是'wb'就表示写二进制文件 f.write('results: {0}'.format(sim_pred)) f.close() print('results: {0}'.format(sim_pred))
def recognize(image_path, alphabet, snapshot, gpu): model = crnn.CRNN(32, 1, 37, 256) if torch.cuda.is_available(): model = model.cuda() print('loading pretrained model from %s' % snapshot) model.load_state_dict(torch.load(snapshot)) converter = utils.strLabelConverter(alphabet) transformer = dataset.resizeNormalize((100, 32)) image = Image.open(image_path).convert('L') image = transformer(image) if torch.cuda.is_available(): image = image.cuda() image = image.view(1, *image.size()) image = Variable(image) model.eval() preds = model(image) _, preds = preds.max(2) preds = preds.transpose(1, 0).contiguous().view(-1) preds_size = Variable(torch.IntTensor([preds.size(0)])) raw_pred = converter.decode(preds.data, preds_size.data, raw=True) sim_pred = converter.decode(preds.data, preds_size.data, raw=False) print('%-20s => %-20s' % (raw_pred, sim_pred)) return sim_pred
def __init__(self, model_path, gpu_id=None): ''' 初始化pytorch模型 :param model_path: 模型地址(可以是模型的参数或者参数和计算图一起保存的文件) :param gpu_id: 在哪一块gpu上运行 ''' self.gpu_id = gpu_id self.converter = utils.strLabelConverter(alphabet) if self.gpu_id is not None and isinstance( self.gpu_id, int) and torch.cuda.is_available(): checkpoint = torch.load(model_path) self.device = torch.device("cuda:%s" % self.gpu_id) else: checkpoint = torch.load(model_path, map_location='cpu') self.device = torch.device("cpu") print('text recognition running on device:', self.device) self.net = crnn.CRNN(CNN=CNN, RNN=RNN, nIn=n_in, n_class=new_class, nHidden=n_hidden, nLayer=n_layer, dropout=dp) self.net.load_state_dict(checkpoint['state_dict']) self.net.to(self.device) self.net.eval() self.transform = transforms.Compose([transforms.ToTensor()])
def testing(model, input_dir=None, pseudo_label=None): image_paths = glob.glob(input_dir + "/*.jpg") converter = utils.strLabelConverter(params.alphabet) transformer = dataset.processing_image((params.imgW, params.imgH)) with torch.no_grad(): for image_path in image_paths: image = Image.open(image_path).convert('L') image = transformer(image) if args.check: cv2.imwrite('DATA/img_check/' + os.path.basename(image_path) + ".jpg", image.mul_(0.5).add_(0.5).permute(1, 2, 0).numpy()* 255.0) if torch.cuda.is_available(): image = image.cuda() image = image.view(1, *image.size()) image = Variable(image) preds = model(image) # SxBxC probs = torch.exp(preds) max_probs, preds = probs.max(2) prob = max_probs.cumprod(0)[-1] # from IPython import embed; embed() # _, preds = preds.max(2) preds = preds.transpose(1, 0).contiguous().view(-1) preds_size = Variable(torch.LongTensor([preds.size(0)])) raw_pred = converter.decode(preds.data, preds_size.data, raw=True) sim_pred = converter.decode(preds.data, preds_size.data, raw=False) print('Text in %-65s is: %-12s - prob: %-6f' % (image_path, sim_pred, prob))
def __init__(self, model_path, alphabet, img_shape, net, img_channel=3, gpu_id=None): ''' 初始化pytorch模型 :param model_path: 模型地址(可以是模型的参数或者参数和计算图一起保存的文件) :param alphabet: 字母表 :param img_shape: 图像的尺寸(w,h) :param net: 网络计算图,如果在model_path中指定的是参数的保存路径,则需要给出网络的计算图 :param img_channel: 图像的通道数: 1,3 :param gpu_id: 在哪一块gpu上运行 ''' self.gpu_id = gpu_id self.img_w = img_shape[0] self.img_h = img_shape[1] self.img_channel = img_channel self.converter = utils.strLabelConverter(alphabet) if self.gpu_id is not None and isinstance(self.gpu_id, int) and torch.cuda.is_available(): self.device = torch.device("cuda:%s" % self.gpu_id) self.net = torch.load(model_path, map_location=lambda storage, loc: storage.cuda(gpu_id)) else: self.device = torch.device("cpu") self.net = torch.load(model_path, map_location=lambda storage, loc: storage.cpu()) print('device:', self.device) if net is not None: # 如果网络计算图和参数是分开保存的,就执行参数加载 net = net.to(self.device) net.load_state_dict(self.net) self.net = net self.net.eval()
def __init__(self, args): os.environ['CUDA_VISIBLE_DEVICES'] = args.gpus self.args = args self.alphabet = alphabetChinese nclass = len(self.alphabet) + 1 nc = 1 self.net = CRNN(args.imgH, nc, args.nh, nclass) self.converter = utils.strLabelConverter(self.alphabet, ignore_case=False) self.transformer = resizeNormalize(args.imgH) print('loading pretrained model from %s' % args.model_path) checkpoint = torch.load(args.model_path) if 'model_state_dict' in checkpoint.keys(): checkpoint = checkpoint['model_state_dict'] from collections import OrderedDict model_dict = OrderedDict() for k, v in checkpoint.items(): if 'module' in k: model_dict[k[7:]] = v else: model_dict[k] = v self.net.load_state_dict(model_dict) if args.cuda and torch.cuda.is_available(): print('available gpus is,', torch.cuda.device_count()) self.net = torch.nn.DataParallel(self.net, output_dim=1).cuda() self.net.eval()
def crnn_single_test(cropped_image, model): converter = utils.strLabelConverter(alphabet) image = cropped_image.convert('L') ## w = int(image.size[0] / (280 * 1.0 / 180)) # w = image.size[0] # w = int(image.size[0] / (32 * 1.0 / image.size[1])) transformer = dataset.resizeNormalize((w, 32)) image = transformer(image) if torch.cuda.is_available(): image = image.cuda() image = image.view(1, *image.size()) image = Variable(image) model.eval() preds = model(image) _, preds = preds.max(2) preds = preds.transpose(1, 0).contiguous().view(-1) preds_size = Variable(torch.IntTensor([preds.size(0)])) sim_pred = converter.decode(preds.data, preds_size.data, raw=False) print('results: {0}'.format(sim_pred))
def crnn_recognition(imgpth, model, tesing_dataset, total_correct_num, total_string_length): cropped_image = Image.open(imgpth) converter = utils.strLabelConverter(alphabet) image = cropped_image.convert('L') ## w = int(image.size[0] / (280 * 1.0 / 180)) # w = image.size[0] # w = int(image.size[0] / (32 * 1.0 / image.size[1])) transformer = dataset.resizeNormalize((w, 32)) image = transformer(image) if torch.cuda.is_available(): image = image.cuda() image = image.view(1, *image.size()) image = Variable(image) model.eval() preds = model(image) _, preds = preds.max(2) preds = preds.transpose(1, 0).contiguous().view(-1) preds_size = Variable(torch.IntTensor([preds.size(0)])) sim_pred = converter.decode(preds.data, preds_size.data, raw=False) ground_truth = tesing_dataset.get(imgpth) correct_num = int( len(ground_truth) * textdistance.levenshtein.normalized_similarity(ground_truth, sim_pred)) string_length = len(ground_truth) #check = ground_truth == sim_pred print('results: {0}, gt: {1}'.format(sim_pred, ground_truth)) return correct_num, string_length
def crnn_recognition(cropped_image, model): converter = utils.strLabelConverter(alphabet) image = cropped_image.convert('L') ## In training step, 280 is the width of training image, and resize it to 160 then feed into neural networks. ## Hence in test step, the width of test_image should multipy by the scale in traning step we resize. w = int(image.size[0] / (280 * 1.0 / 160)) transformer = dataset.resizeNormalize((w, 32)) image = transformer(image) if torch.cuda.is_available(): image = image.cuda() image = image.view(1, *image.size()) image = Variable(image) model.eval() preds = model(image) _, preds = preds.max(2) preds = preds.transpose(1, 0).contiguous().view(-1) preds_size = Variable(torch.IntTensor([preds.size(0)])) sim_pred = converter.decode(preds.data, preds_size.data, raw=False) print('results: {0}'.format(sim_pred))
def __init__(self, oracle, alphabet, image_shape, target, file_weights): self.learning_rate = 0.001 # self.learning_rate = 10 self.num_iterations = 5000 # self.num_iterations = 100 self.batch_size = bs = 1 self.phrase_length = len(target) self.o_imW, self.o_imH = image_shape self.i_imW, self.i_imH = imgW, imgH self.oracle = oracle self.weights = file_weights # Variable for adversarial noise, which is added to the image to perturb it if torch.cuda.is_available(): self.delta = Variable(torch.rand( (1, self.o_imH, self.o_imW)).cuda(), requires_grad=True) else: self.delta = Variable(torch.rand((1, self.o_imH, self.o_imW)), requires_grad=True) # Optimize on delta and use ctc as criterion ctcloss = CTCLoss() self.optimizer = optim.Adam([self.delta], lr=self.learning_rate, betas=(0.9, 0.999)) self.loss = ctcloss self.ctcloss = ctcloss self.target = target self.converter = utils.strLabelConverter(alphabet, attention=False)
def main(): resnet_crnn = ResNetCRNN(rc_params.imgH, 1, len(rc_params.alphabet) + 1, rc_params.nh, resnet_type=rc_params.resnet_type, feat_size=rc_params.feat_size) resnet_crnn = torch.nn.DataParallel(resnet_crnn) state_dict = torch.load( './work_dirs/resnet18_rcnn_sgd_imgh128_rgb_512x1x16_lr_0.00100_batchSize_8_time_0319110013_/crnn_Rec_done_epoch_7.pth' ) resnet_crnn.load_state_dict(state_dict) test_dataset = dataset.lmdbDataset(root='to_lmdb/test_index', rgb=True) converter = utils.strLabelConverter(rc_params.alphabet) resnet_crnn.eval() resnet_crnn.cuda() data_loader = torch.utils.data.DataLoader( test_dataset, shuffle=False, batch_size=1, num_workers=int(rc_params.workers), collate_fn=alignCollate(imgH=rc_params.imgH, imgW=rc_params.imgW, keep_ratio=rc_params.keep_ratio, rgb=True)) val_iter = iter(data_loader) max_iter = len(data_loader) record_dir = 'test_out/test_out.txt' r = 1 f = open(record_dir, "a") image = torch.FloatTensor(rc_params.batchSize, 3, rc_params.imgH, rc_params.imgH) prog_bar = mmcv.ProgressBar(max_iter) for i in range(max_iter): data = val_iter.next() i += 1 cpu_images, cpu_texts = data batch_size = cpu_images.size(0) utils.loadData(image, cpu_images) # image = cpu_images.cuda() with torch.no_grad(): preds = resnet_crnn(image) preds_size = torch.IntTensor([preds.size(0)] * batch_size) _, preds = preds.max(2) preds = preds.transpose(1, 0).contiguous().view(-1) sim_preds = converter.decode(preds.data, preds_size.data, raw=False) if not isinstance(sim_preds, list): sim_preds = [sim_preds] for pred in sim_preds: f.write(str(r).zfill(6) + ".jpg " + pred + "\n") r += 1 prog_bar.update() print("") f.close()
def __init__(self, model_path='./crnn.pytorch-master/data/crnn.pth'): self.model = crnn.CRNN(32, 1, 37, 256) if torch.cuda.is_available(): self.model = self.model.cuda() print('loading pretrained self.model from %s' % model_path) self.model.load_state_dict(torch.load(model_path)) self.alphabet = '0123456789abcdefghijklmnopqrstuvwxyz' self.converter = utils.strLabelConverter(self.alphabet)
def __init__(self, model_path): self.model = crnn.CRNN(32, 1, 37, 256) if torch.cuda.is_available(): self.model = self.model.cuda() self.model.load_state_dict(torch.load(model_path)) self.converter = utils.strLabelConverter( '0123456789abcdefghijklmnopqrstuvwxyz') self.model.eval()
def main(): model_path = './data/crnn.pth' img_path = './data/demo.png' alphabet = '0123456789abcdefghijklmnopqrstuvwxyz' model = crnn.CRNN(32, 1, 37, 256) if torch.cuda.is_available(): model = model.cuda() print('loading pretrained model from %s' % model_path) model.load_state_dict(torch.load(model_path)) converter = utils.strLabelConverter(alphabet) #transformer = dataset.resizeNormalize((100, 32)) dir = Path("images/").glob( "**/*.jpg") # Modifiable. Takes all .jpg files in the given directory for image_path in dir: # Applies a Face - Detect for every image in the above directory image = cv2.imread(str(image_path)) cv2.imshow('Original Image', image) cv2.waitKey(0) plate_contour = plate_detection(image) if plate_contour is None: print("No license plates detected") continue img = image.copy() try: plate = crop_contour(plate_contour, image, img) except: print("Plate could not be processed") continue cv2.destroyAllWindows() plate_recognition(plate) image = plate.convert('L') #image = transformer(image) cv2.resize(image, (100, 32)) if torch.cuda.is_available(): image = image.cuda() image = image.view(1, *image.size()) image = Variable(image) model.eval() preds = model(image) _, preds = preds.max(2) preds = preds.transpose(1, 0).contiguous().view(-1) preds_size = Variable(torch.IntTensor([preds.size(0)])) raw_pred = converter.decode(preds.data, preds_size.data, raw=True) sim_pred = converter.decode(preds.data, preds_size.data, raw=False) print('%-20s => %-20s' % (raw_pred, sim_pred)) exit()
def __init__(self, train_path, test_path, model_file, model, img_h=32, img_w=110, batch_size=64, lr=1e-3, use_unicode=True, best_loss=0.2, use_gpu=True, workers=1): self.model = model self.model_file = model_file self.use_unicode = use_unicode self.img_h = img_h self.img_w = img_w self.batch_size = batch_size self.lr = lr self.best_loss = best_loss self.best_acc = 0.95 self.use_gpu = use_gpu self.workers = workers self.converter = utils.strLabelConverter(alphabet) self.criterion = CTCLoss() if self.use_gpu: print("[use gpu] ...") self.model = self.model.cuda() self.criterion = self.criterion.cuda() if torch.cuda.is_available() and not self.use_gpu: print("[WARNING] You have a CUDA device, so you should probably run with --cuda") # 加载模型 if os.path.exists(self.model_file): self.load(self.model_file) else: print('[Load model] error !!!') self.transform = T.Compose([ T.Resize((self.img_h, self.img_w)), T.ToTensor(), # T.Normalize(mean=[.5, .5, .5], std=[.5, .5, .5]) ]) train_label = os.path.join(train_path, 'labels_normal.txt') train_dataset = my_dataset.MyDataset(root=train_path, label_file=train_label, transform=self.transform, is_train=True, img_h=self.img_h, img_w=self.img_w) self.train_loader = torch.utils.data.DataLoader(dataset=train_dataset, batch_size=self.batch_size, shuffle=True, num_workers=int(self.workers)) test_label = os.path.join(test_path, 'labels_normal.txt') test_dataset = my_dataset.MyDataset(root=test_path, label_file=test_label, transform=self.transform, is_train=False, img_h=self.img_h, img_w=self.img_w) self.test_loader = torch.utils.data.DataLoader(dataset=test_dataset, batch_size=self.batch_size, shuffle=False, num_workers=int(self.workers)) # setup optimizer # if opt.adam: # self.optimizer = optim.Adam(crnn.parameters(), lr=opt.lr, betas=(opt.beta1, 0.999)) # elif opt.adadelta: # self.optimizer = optim.Adadelta(crnn.parameters(), lr=opt.lr) # else: # self.optimizer = optim.RMSprop(crnn.parameters(), lr=opt.lr) self.optimizer = optim.Adam(self.model.parameters(), lr=self.lr, weight_decay=1e-5)
def crnnSource(): alphabet = keys.alphabet converter = utils.strLabelConverter(alphabet) if torch.cuda.is_available() and GPU: model = crnn.CRNN(32, 1, len(alphabet) + 1, 256, 1).cuda() else: model = crnn.CRNN(32, 1, len(alphabet) + 1, 256, 1).cpu() path = './crnn/pytorch/models/model_acc97.pth' model.eval() model.load_state_dict(torch.load(path)) return model, converter
def __init__(self, crnn_model_path): super(CRNN_MODEL, self).__init__() self.converter = strLabelConverter(alphabet) nclass = len(alphabet) + 1 self.model = crnn.CRNN(32, 1, nclass, 256) if torch.cuda.is_available(): self.model = self.model.cuda() #self.model.load_state_dict(torch.load(crnn_model_path, map_location=torch.device('cpu'))) self.model.load_state_dict(torch.load(crnn_model_path)) self.model.eval() """
def crnnSource(self, dir_model): alphabet = '0123456789abcdefghijklmnopqrstuvwxyz' # keys.alphabet converter = utils.strLabelConverter(alphabet) if self.gpuid == '-1': print('cpu-version') model = crnn.CRNN(32, 1, len(alphabet) + 1, 256) else: print('gpu-version') model = crnn.CRNN(32, 1, len(alphabet) + 1, 256).cuda() path = dir_model + '/bib_number/CRNN/crnn.pth' # './crnn/samples/netCRNN63.pth' model.load_state_dict(torch.load(path)) print('Loaded network {:s}'.format(path)) return model, converter
def load_model(model_path): # net init global transformer, model, converter print('loading pretrained model from %s' % model_path) nclass = len(params.alphabet) + 1 model = crnn.CRNN(params.imgH, params.nc, nclass, params.nh) if torch.cuda.is_available(): model = model.cuda() model = torch.nn.DataParallel(model) else: model.load_state_dict(torch.load(model_path, map_location='cpu')) model.eval() converter = utils.strLabelConverter(params.alphabet) transformer = dataset.resizeNormalize((100, 32))
import utils import dataset from PIL import Image import models.crnn as crnn model_path = './data/crnn.pth' img_path = './data/demo.png' alphabet = '0123456789abcdefghijklmnopqrstuvwxyz' model = crnn.CRNN(32, 1, 37, 256).cuda() print('loading pretrained model from %s' % model_path) model.load_state_dict(torch.load(model_path)) converter = utils.strLabelConverter(alphabet) transformer = dataset.resizeNormalize((100, 32)) image = Image.open(img_path).convert('L') image = transformer(image).cuda() image = image.view(1, *image.size()) image = Variable(image) model.eval() preds = model(image) _, preds = preds.max(2) preds = preds.squeeze(2) preds = preds.transpose(1, 0).contiguous().view(-1) preds_size = Variable(torch.IntTensor([preds.size(0)]))