def train(trainloader, crnn, converter, criterion, optimizer): running_loss = 0.0 started = time.time() for i, data in enumerate(trainloader, 0): inputs, labels = data inputs = inputs.to(device) optimizer.zero_grad() outputs = crnn(inputs) log_probs = torch.nn.functional.log_softmax(outputs, dim=2) input_lengths = torch.full(size=(inputs.size(0), ), fill_value=outputs.size(0), dtype=torch.long).to(device) target, target_lengths = converter.encode(labels) loss = criterion(log_probs, target, input_lengths, target_lengths) loss.backward() optimizer.step() running_loss += loss.item() if i % 100 == 99: finished = time.time() print('[{}, {:5d}] loss: {:.8f} time: {:.2f}s'.format( epoch + 1, i + 1, running_loss / 100, finished - started)) running_loss = 0.0 started = finished
def validate(validloader, crnn, converter): correct = 0 total = 0 with torch.no_grad(): for data in validloader: inputs, labels = data inputs = inputs.to(device) outputs = crnn(inputs) _, predicted = outputs.max(2) predicted = predicted.transpose(1, 0).contiguous().view(-1) input_lengths = torch.full(size=(inputs.size(0), ), fill_value=outputs.size(0), dtype=torch.long).to(device) sim_preds = converter.decode(predicted.data, input_lengths.data, raw=False) targets = [i.decode('utf-8', 'strict') for i in labels] for sim_pred, target in zip(sim_preds, targets): total += 1 if sim_pred == target: correct += 1 print('[{}] Accuracy of the network on the {} validation images: {:.2%}'. format(epoch + 1, total, correct / total)) with open('{}/train.log'.format(args.save_path), 'a') as f: f.write( '[{}] Accuracy of the network on the {} validation images: {:.2%}\n' .format(epoch + 1, total, correct / total))
def val(net, val_loader, criterion, epoch, max_i=1000): print('================Start val=================') for p in crnn.parameters(): p.requires_grad = False net.eval() i = 0 n_correct = 0 n_all = 0 loss_avg = utils.averager() for i_batch, (image, index) in enumerate(val_loader): image = image.to(device) print('image.shape:', image.shape) label = utils.get_batch_label(val_dataset, index) # [41,batch,nclass] preds = crnn(image) batch_size = image.size(0) # index = np.array(index.data.numpy()) label_text, label_length = converter.encode(label) preds_size = Variable(torch.IntTensor([preds.size(0)] * batch_size)) cost = criterion(preds, label_text, preds_size, label_length) / batch_size loss_avg.add(cost) # [41,batch] _, preds = preds.max(2) preds = preds.transpose(1, 0).contiguous().view(-1) # preds = preds.transpose(1, 0).reshape(-1) sim_preds = converter.decode(preds.data, preds_size.data, raw=False) print('label:', label[:2]) print('sim_preds:', sim_preds[:2]) # print(list(zip(sim_preds, label))) n_all += len(label) for pred, target in list(zip(sim_preds, label)): if pred == target: n_correct += 1 if (i_batch + 1) % params.displayInterval == 0: print('[%d/%d][%d/%d]' % (epoch, params.epochs, i_batch, len(val_loader))) if i_batch == max_i: break raw_preds = converter.decode(preds.data, preds_size.data, raw=True)[:params.n_test_disp] for raw_pred, pred, gt in zip(raw_preds, sim_preds, label): print('%-20s => %-20s, gt: %-20s' % (raw_pred, pred, gt)) # # print('n_correct:',n_correct) # accuracy = n_correct / float(max_i * params.val_batchSize) accuracy = n_correct / n_all print('Test loss: %f, accuray: %f' % (loss_avg.val(), accuracy)) return accuracy
def val(net, criterion, max_iter=3): # print('Start val') for p in crnn.parameters(): p.requires_grad = False net.eval() val_iter = iter(test_loader) i = 0 n_correct = 0 loss_avg = utils.averager() max_iter = min(max_iter, len(test_loader)) for i in range(max_iter): data = val_iter.next() i += 1 cpu_images, cpu_texts = data batch_size = cpu_images.size(0) utils.loadData(image, cpu_images) if ifUnicode: cpu_texts = [clean_txt(tx.decode('utf-8')) for tx in cpu_texts] t, l = converter.encode(cpu_texts) utils.loadData(text, t) utils.loadData(length, l) preds = crnn(image) preds_size = Variable(torch.IntTensor([preds.size(0)] * batch_size)) cost = criterion(preds, text, preds_size, length) / batch_size loss_avg.add(cost) _, preds = preds.max(2) preds = preds.squeeze(2) preds = preds.transpose(1, 0).contiguous().view(-1) sim_preds = converter.decode(preds.data, preds_size.data, raw=False) for pred, target in zip(sim_preds, cpu_texts): if pred.strip() == target.strip(): n_correct += 1 raw_preds = converter.decode(preds.data, preds_size.data, raw=True)[:opt.n_test_disp] # for raw_pred, pred, gt in zip(raw_preds, sim_preds, cpu_texts): # print((pred, gt)) accuracy = n_correct / float(max_iter * opt.batchSize) testLoss = loss_avg.val() # print('Test loss: %f, accuray: %f' % (testLoss, accuracy)) return testLoss, accuracy
def crnn_predict(crnn, img, transformer, decoder='bestPath', normalise=False): """ Params ------ crnn: torch.nn Neural network architecture transformer: torchvision.transform Image transformer decoder: string, 'bestPath' or 'beamSearch' CTC decoder method. Returns ------ out: a list of tuples (predicted alphanumeric sequence, confidence level) """ classes = string.ascii_uppercase + string.digits image = img.copy() image = transformer(image).to(device) image = image.view(1, *image.size()) # forward pass (convert to numpy array) preds_np = crnn(image).data.cpu().numpy().squeeze() # move first column to last (so that we can use CTCDecoder as it is) preds_np = np.hstack([preds_np[:, 1:], preds_np[:, [0]]]) preds_sm = softmax(preds_np, axis=1) # preds_sm = np.divide(preds_sm, prior) # normalise is only suitable for best path if normalise == True: preds_sm = np.divide(preds_sm, prior) if decoder == 'bestPath': output = ctcBestPath(preds_sm, classes) elif decoder == 'beamSearch': output = ctcBeamSearch(preds_sm, classes, None) else: raise Exception("Invalid decoder method. \ Choose either 'bestPath' or 'beamSearch'") return output
def model_infer(crnn, converter, cvImg): # cvImg = resize_img(cvImg) # print(cvImg.shape) image = torch.from_numpy(cvImg).type(torch.FloatTensor) image.sub_(params_test.mean).div_(params_test.std) image = image.unsqueeze(0) image = image.unsqueeze(0) image = image.to(device) preds_tabel = crnn(image) preds_tabel = preds_tabel.permute(1, 0, 2) pro, preds = preds_tabel.max(2) # print(pro) # print(preds) prob_s = torch.prod(pro).cpu().numpy() score = 1.0 preds = preds.transpose(1, 0).contiguous().view(-1) batch_size = image.size(0) preds_size = Variable(torch.IntTensor([preds.size(0)] * batch_size)) reg = converter.decode(preds.data, preds_size.data, raw=False) return reg
def trainBatch(net, criterion, optimizer, flage=False): data = train_iter.next() cpu_images, cpu_texts = data # decode utf-8 to unicode if ifUnicode: cpu_texts = [clean_txt(tx.decode('utf-8')) for tx in cpu_texts] batch_size = cpu_images.size(0) utils.loadData(image, cpu_images) t, l = converter.encode(cpu_texts) utils.loadData(text, t) utils.loadData(length, l) preds = crnn(image) preds_size = Variable(torch.IntTensor([preds.size(0)] * batch_size)) cost = criterion(preds, text, preds_size, length) / batch_size crnn.zero_grad() cost.backward() if flage: lr = 0.0001 optimizer = optim.Adadelta(crnn.parameters(), lr=lr) optimizer.step() return cost
def train(crnn, train_loader, criterion, epoch): for p in crnn.parameters(): p.requires_grad = True crnn.train() #loss averager loss_avg = utils.averager() for i_batch, (image, index) in enumerate(train_loader): #[b,c,h,w] [32,1,32,160] image = image.to(device) print('image.shape:', image.shape) batch_size = image.size(0) #['xxx','xxxx',...batch] label = utils.get_batch_label(dataset, index) #[41,batch,nclass] preds = crnn(image) # print('preds.shape',preds.shape) # index = np.array(index.data.numpy()) #[, , ,] [len(lable[0]),len(lable[1]),...] label_text, label_length = converter.encode(label) # print('label_text:', len(label_text)) # print('label_length:', label_length) #[41,41,41,...]*batch preds_size = torch.IntTensor([preds.size(0)] * batch_size) # print('preds.shape, label_text.shape, preds_size.shape, label_length.shape',preds.shape, label_text.shape, preds_size.shape, label_length.shape) # torch.Size([41, 32, 6736]) torch.Size([320]) torch.Size([320]) torch.Size([320]) cost = criterion(preds, label_text, preds_size, label_length) / batch_size # print('cost:',cost) crnn.zero_grad() cost.backward() optimizer.step() loss_avg.add(cost) if (i_batch + 1) % params.displayInterval == 0: print('[%d/%d][%d/%d] Loss: %f' % (epoch, params.epochs, i_batch, len(train_loader), loss_avg.val())) loss_avg.reset()
preds_size = torch.tensor([33]) converter = utils.strLabelConverter(alphabet) crnn = crnn.CRNN(imgH, nc, nclass, nh).to(device) crnn.load_state_dict(torch.load('/content/drive/My Drive/WeightNet/OCR(3.0)')) tp_1, fp_1, fn_1 = 0, 0, 0 # True Positive, False positive, False negative for first head tp_2, fp_2, fn_2 = 0, 0, 0 # for double head result = ['', ''] with torch.no_grad(): for x, y in test_loader: cpu_images, cpu_texts = x.to(device), y preds = crnn(cpu_images) preds = preds.view(-1, preds_size.item(), 1, 23) for idx, head in enumerate(preds): _, preds1 = head.max(2) preds1 = preds1.transpose(1, 0).contiguous().view(-1) raw_pred = converter.decode(preds1.data, preds_size.data, raw=False) postpro = re.findall(temp, raw_pred) sim_pred = postpro[0] if postpro != [] else 'Unknown' result[idx] = sim_pred
def detect(path_name_img): image, image_orig, img_shape = prepare_image(path_name_img) test_output = model(image.to(device)) w, h, _ = img_shape mask_gray = cv2.resize( (test_output[0].cpu().detach().numpy().squeeze() > 0.6).astype( np.uint8), dsize=(h, w)) mask_rgb = cv2.cvtColor(mask_gray, cv2.COLOR_GRAY2RGB) contours, hierarchy = cv2.findContours(mask_gray.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) count_contours = len(contours) outputs = [] mask_list = [] nomer_list = [] for cont in range(count_contours): # делаем несколько масок mask = np.zeros_like(mask_gray) mask = cv2.drawContours(mask, [contours[cont]], -1, (255, 0, 0), 3, cv2.LINE_AA, np.expand_dims(hierarchy[:, cont], axis=1), 1) mask = convex_hull_image(mask).astype(np.uint8) if np.sum(mask) < 1500: continue mask_list.append(mask) a3 = cv2.cvtColor(mask, cv2.COLOR_GRAY2RGB) cord = rectDetect.detect([a3 * 255]) # x1, x2, x3, x4 = cord[0] zones = rectDetect.get_cv_zonesBGR(image_orig.copy(), cord) outputs.append(zones[0]) for nomer in outputs: image = nomer_aug(image=nomer)['image'].unsqueeze(0) preds1 = crnn(image.to(device)) preds1 = preds1.view(-1, 33, 1, 23) conf = [0, 0] for idx, head in enumerate(preds1): _, preds = head.max(2) batch = preds.size(1) preds = preds.transpose(1, 0).contiguous().view(-1) sim_pred = converter.decode(preds.data, preds_size.data, raw=False) postpro = re.findall(temp, sim_pred) sim_pred = postpro[0] if postpro != [] else 'Unknown' conf[idx] = sim_pred # if conf[0] == conf[1]: # nomer_list.append(conf[0]) # else: # nomer_list.append('Unknown') nomer_list.append(conf) return nomer_list, outputs, mask_rgb, image_orig