def load_model(abc, seq_proj=[0, 0], backend='resnet18', snapshot=None, cuda=False): net = CRNN(abc=abc, seq_proj=seq_proj, backend=backend) net = nn.DataParallel(net) if snapshot is not None: load_weights(net, torch.load(snapshot)) if cuda: net = net.cuda() return net
class PytorchOcr(): def __init__(self, model_path): alphabet_unicode = config.alphabet_v2 self.alphabet = ''.join([chr(uni) for uni in alphabet_unicode]) # print(len(self.alphabet)) self.nclass = len(self.alphabet) + 1 self.model = CRNN(config.imgH, 1, self.nclass, 256) self.cuda = False if torch.cuda.is_available(): self.cuda = True self.model.cuda() self.model.load_state_dict({ k.replace('module.', ''): v for k, v in torch.load(model_path).items() }) else: # self.model = nn.DataParallel(self.model) self.model.load_state_dict( torch.load(model_path, map_location='cpu')) self.model.eval() self.converter = strLabelConverter(self.alphabet) def recognize(self, img): h, w = img.shape[:2] if len(img.shape) == 3: img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) image = Image.fromarray(img) transformer = resizeNormalize((int(w / h * 32), 32)) image = transformer(image) image = image.view(1, *image.size()) image = Variable(image) if self.cuda: image = image.cuda() preds = self.model(image) _, preds = preds.max(2) preds = preds.transpose(1, 0).contiguous().view(-1) preds_size = Variable(torch.IntTensor([preds.size(0)])) txt = self.converter.decode(preds.data, preds_size.data, raw=False) return txt
def load_model_from_checkpoint(checkpoint_file_name, use_gpu=False): """Load a pretrained CRNN model.""" model = CRNN(line_size, 1, len(vocab), 256) checkpoint = torch.load(checkpoint_file_name, map_location='cpu' if not use_gpu else None) model.load_state_dict(checkpoint['state_dict']) model.float() model.eval() model = model.cuda() if use_gpu else model.cpu() return model
def ocr(orig_img, lines, checkpoint_file_name, use_gpu=False): """OCR on segmented lines.""" model = CRNN(line_size, 1, len(vocab), 256) checkpoint = torch.load(checkpoint_file_name, map_location='cpu' if not use_gpu else None) model.load_state_dict(checkpoint['state_dict']) model.float() model.eval() model = model.cuda() if use_gpu else model.cpu() torch.set_grad_enabled(False) result = [] for line in lines: (x1, y1), (x2, y2) = line line_img = image_resize(np.array(np.rot90(orig_img[y1:y2, x1:x2])), height=line_size) inputs = torch.from_numpy(line_img / 255).float().unsqueeze(0).unsqueeze(0) outputs = model(inputs) prediction = outputs.softmax(2).max(2)[1] def to_text(tensor, max_length=None, remove_repetitions=False): sentence = '' sequence = tensor.cpu().detach().numpy() for i in range(len(sequence)): if max_length is not None and i >= max_length: continue char = idx2char[sequence[i]] if char != 'B': # ignore blank if remove_repetitions and i != 0 and char == idx2char[ sequence[i - 1]]: pass else: sentence = sentence + char return sentence predicted_text = to_text(prediction[:, 0], remove_repetitions=True) result.append((line_img, predicted_text)) return result
# print(k, v.numpy().shape, reduce(mul, v.numpy().shape)) params_shape.append(reduce(mul, v.numpy().shape)) params_total = sum(params_shape) print('params_total:', params_total) if opt.finetune: print('Loading model from', opt.modeldir + opt.modelname) net.load_state_dict(torch.load(opt.modeldir + opt.modelname)) else: print('create new model') net.apply(weights_init) if opt.ngpu > 1: # print("Let's use", torch.cuda.device_count(), "GPUs!") net = nn.DataParallel(net, device_ids=range(opt.ngpu)) net.cuda() criterion = CTCLoss().cuda() if opt.adadelta: optimizer = optim.Adadelta(net.parameters(), lr=opt.lr) # , weight_decay=1e-8) elif opt.rms: optimizer = optim.RMSprop(net.parameters(), lr=opt.lr) else: optimizer = optim.Adam(net.parameters(), lr=opt.lr, betas=(0.5, 0.999), weight_decay=0.003) def val_test():
batch_size=option.batch_size, shuffle=True) validationset = LMDBDataset(option.validationset_path, transform=transforms.Compose([ transforms.Resize( (option.image_h, option.image_w)), transforms.ToTensor() ])) validationset_dataloader = DataLoader(validationset, batch_size=option.batch_size, shuffle=True) nc = 1 nclass = len(option.alphabet) + 1 crnn = CRNN(nc, nclass, option.nh) crnn = crnn.cuda() def weight_init(module): class_name = module.__class__.__name__ if class_name.find('Conv') != -1: module.weight.data.normal_(0, 0.02) if class_name.find('BatchNorm') != -1: module.weight.data.normal_(1, 0.02) module.bias.data.fill_(0) crnn.apply(weight_init) loss_function = CTCLoss(zero_infinity=True) loss_function = loss_function.cuda()
parser.add_argument('--savedmodel', type=str, default="save", help="directory to saved model") parser.add_argument('--batchsize', type=int, default=64) parser.add_argument('--alphabet', type=str, default='0123456789abcdefghijklmnopqrstuvwxyz') opt = parser.parse_args() cuda = torch.cuda.is_available() device = torch.device('cuda') #intialize model model = CRNN() model.cuda() def weights_init(m): classname = m.__class__.__name__ if classname.find("Conv") != -1: torch.nn.init.normal_(m.weight.data, 0.0, 0.02).cuda() if hasattr(m, "bias") and m.bias is not None: torch.nn.init.constant_(m.bias.data, 0.0).cuda() elif classname.find("BatchNorm2d") != -1: torch.nn.init.normal_(m.weight.data, 1.0, 0.02).cuda() torch.nn.init.constant_(m.bias.data, 0.0).cuda() # weitghts initalize weights_init(model)