def val(net, val_loader, criterion, iteration, max_i=1000): print('Start val') for p in crnn.parameters(): p.requires_grad = False net.eval() i = 0 n_correct = 0 loss_avg = utils.averager() for i_batch, (image, index) in enumerate(val_loader): if args.cuda: image = image.cuda() criterion = criterion.cuda() label = utils.get_batch_label(val_dataset, index) preds = crnn(image) batch_size = image.size(0) index = np.array(index.data.numpy()) text, length = converter.encode(label) preds_size = Variable(torch.IntTensor([preds.size(0)] * batch_size)) cost = criterion(preds, text, preds_size, length) / batch_size loss_avg.add(cost) _, preds = preds.max(2) preds = preds.transpose(1, 0).contiguous().view(-1) sim_preds = converter.decode(preds.data, preds_size.data, raw=False) for pred, target in zip(sim_preds, label): if pred == target: n_correct += 1 if (i_batch + 1) % params.displayInterval == 0: print('[%d/%d][%d/%d]' % (iteration, params.niter, i_batch, len(val_loader))) if i_batch == max_i: break raw_preds = converter.decode(preds.data, preds_size.data, raw=True)[:params.n_test_disp] for raw_pred, pred, gt in zip(raw_preds, sim_preds, label): print('%-20s => %-20s, gt: %-20s' % (raw_pred, pred, gt)) print(n_correct) print(max_i * params.val_batchSize) accuracy = n_correct / float(max_i * params.val_batchSize) print('Test loss: %f, accuray: %f' % (loss_avg.val(), accuracy)) return accuracy
def val(net, dataset, criterion, max_iter=100): print('Start val') for p in crnn.parameters(): p.requires_grad = False net.eval() data_loader = torch.utils.data.DataLoader( dataset, shuffle=True, batch_size=opt.batchSize, num_workers=int(opt.workers)) val_iter = iter(data_loader) i = 0 n_correct = 0 loss_avg = utils.averager() max_iter = min(max_iter, len(data_loader)) for i in range(max_iter): data = val_iter.next() i += 1 cpu_images, cpu_texts = data batch_size = cpu_images.size(0) utils.loadData(image, cpu_images) t, l = converter.encode(cpu_texts) utils.loadData(text, t) utils.loadData(length, l) preds = crnn(image) preds_size = Variable(torch.IntTensor([preds.size(0)] * batch_size)) cost = criterion(preds, text, preds_size, length) / batch_size loss_avg.add(cost) _, preds = preds.max(2) preds = preds.squeeze(2) preds = preds.transpose(1, 0).contiguous().view(-1) sim_preds = converter.decode(preds.data, preds_size.data, raw=False) for pred, target in zip(sim_preds, cpu_texts): if pred == target.lower(): n_correct += 1 raw_preds = converter.decode(preds.data, preds_size.data, raw=True)[:opt.n_test_disp] for raw_pred, pred, gt in zip(raw_preds, sim_preds, cpu_texts): print('%-20s => %-20s, gt: %-20s' % (raw_pred, pred, gt)) accuracy = n_correct / float(max_iter * opt.batchSize) print('Test loss: %f, accuray: %f' % (loss_avg.val(), accuracy))
def val(net, dataset, criterion, max_iter=100): print('Start val') for p in crnn.parameters(): p.requires_grad = False net.eval() data_loader = torch.utils.data.DataLoader( dataset, shuffle=True, batch_size=opt.batchSize, num_workers=int(opt.workers)) val_iter = iter(data_loader) i = 0 n_correct = 0 loss_avg = utils.averager() for i in range(min(max_iter, len(data_loader))): data = val_iter.next() i += 1 cpu_images, cpu_texts = data batch_size = cpu_images.size(0) utils.loadData(image, cpu_images) t, l = converter.encode(cpu_texts) utils.loadData(text, t) utils.loadData(length, l) preds = crnn(image) preds_size = Variable(torch.IntTensor([preds.size(0)] * batch_size)) cost = criterion(preds, text, preds_size, length) / batch_size loss_avg.add(cost) _, preds = preds.max(2) preds = preds.squeeze(2) preds = preds.transpose(1, 0).contiguous().view(-1) sim_preds = converter.decode(preds.data, preds_size.data, raw=False) for pred, target in zip(sim_preds, cpu_texts): if pred == target.lower(): n_correct += 1 raw_preds = converter.decode(preds.data, preds_size.data, raw=True) for raw_pred, pred, gt in zip(raw_preds, sim_preds, cpu_texts): print('%-20s => %-20s, gt: %-20s' % (raw_pred, pred, gt)) accuracy = n_correct / float(max_iter * opt.batchSize) print('Test loss: %f, accuray: %f' % (loss_avg.val(), accuracy))
def val(net, dataset, criterion, model_path, max_iter=np.inf): for p in net.parameters(): p.requires_grad = False net.eval() data_loader = torch.utils.data.DataLoader( dataset, shuffle=True, batch_size=opt.batchSize, num_workers=int(opt.workers)) val_iter = iter(data_loader) i = 0 n_correct = 0 n_correct_greed = 0 # loss averager loss_avg = utils.averager() loss_avg.reset() max_iter = min(max_iter, len(data_loader)) for i in range(max_iter): data = val_iter.next() i += 1 cpu_images, cpu_texts = data batch_size = cpu_images.size(0) utils.loadData(image, cpu_images) t, l = converter.encode(cpu_texts) utils.loadData(text, t) utils.loadData(length, l) preds = net(image) preds_size = Variable(torch.IntTensor([preds.size(0)] * batch_size)) cost = criterion(preds, text, preds_size, length) / batch_size loss_avg.add(cost) _, preds = preds.max(2) preds = preds.transpose(1, 0).contiguous().view(-1) sim_preds_greed = converter.decode(preds.data, preds_size.data, raw=False) for idx, (pred_greed, target) in enumerate(zip(sim_preds_greed, cpu_texts)): if pred_greed == target.lower(): n_correct_greed += 1 raw_preds = converter.decode(preds.data, preds_size.data, raw=True)[:opt.n_test_disp] accuracy_greed = n_correct_greed / float(max_iter * opt.batchSize) print('test loss: %f, accuray_greed: %f, model: %s' % (loss_avg.val(), accuracy_greed, model_path))
def train(self, max_iter=np.inf): loss_avg = utils.averager() prev_cer = 100 prev_wer = 100 write_info(self.model, self.opt) self.writer = Writer(self.opt.lr, self.opt.nepoch, self.opt.node_dir, use_tb=self.opt.use_tb) self.iterations = 0 for epoch in range(self.opt.nepoch): self.writer.epoch = epoch self.writer.nbatches = len(self.train_loader) self.train_iter = iter(self.train_loader) i = 0 while i < len(self.train_loader): if self.iterations % self.opt.valInterval == 0: valloss, val_CER, val_WER = self.eval(self.test_data, max_iter=self.val2_iter) self.writer.update_valloss(valloss.val().item(), val_CER) # trloss, trER = self.eval(self.train_data, max_iter=self.val1_iter) # self.writer.update_trloss2(trloss.val().item(), trER) torch.save( self.model.state_dict(), '{0}/{1}.pth'.format(self.opt.node_dir,'latest')) if val_CER < prev_cer: torch.save( self.model.state_dict(), '{0}/{1}.pth'.format(self.opt.node_dir,'best_cer')) prev_cer = val_CER self.writer.update_best_er(val_CER, self.iterations) if val_WER < prev_wer: torch.save( self.model.state_dict(), '{0}/{1}.pth'.format(self.opt.node_dir,'best_wer')) prev_wer = val_WER # self.writer.update_best_er(val_WER, self.iterations) cost = self.trainBatch() loss_avg.add(cost) self.iterations += 1 i += 1 self.writer.iterations = self.iterations self.writer.batch = i if self.iterations % self.opt.displayInterval == 0: self.writer.update_trloss(loss_avg.val().item()) loss_avg.reset() self.writer.end() return
def train_fn(model, data_loader, optimizer): model.train() tk = tqdm(data_loader, total=len(data_loader)) fin_loss = 0 loss_avg = utils.averager() for data in tk: imgs, texts = data.values() utils.loadData(image, imgs) batch_size = imgs.size(0) t, l = converter.encode(texts) utils.loadData(text, t) utils.loadData(length, l) optimizer.zero_grad() preds = model(image) preds_length = torch.full(size=(batch_size, ), fill_value=preds.size(0), dtype=torch.int32) loss = criterion(preds, text, preds_length, length) loss.backward() optimizer.step() fin_loss += loss.item() return fin_loss / len(data_loader)
def train(crnn, train_loader, criterion, epoch): for p in crnn.parameters(): p.requires_grad = True crnn.train() #loss averager loss_avg = utils.averager() for i_batch, (image, index) in enumerate(train_loader): #[b,c,h,w] [32,1,32,160] image = image.to(device) print('image.shape:', image.shape) batch_size = image.size(0) #['xxx','xxxx',...batch] label = utils.get_batch_label(dataset, index) #[41,batch,nclass] preds = crnn(image) # print('preds.shape',preds.shape) # index = np.array(index.data.numpy()) #[, , ,] [len(lable[0]),len(lable[1]),...] label_text, label_length = converter.encode(label) # print('label_text:', len(label_text)) # print('label_length:', label_length) #[41,41,41,...]*batch preds_size = torch.IntTensor([preds.size(0)] * batch_size) # print('preds.shape, label_text.shape, preds_size.shape, label_length.shape',preds.shape, label_text.shape, preds_size.shape, label_length.shape) # torch.Size([41, 32, 6736]) torch.Size([320]) torch.Size([320]) torch.Size([320]) cost = criterion(preds, label_text, preds_size, label_length) / batch_size # print('cost:',cost) crnn.zero_grad() cost.backward() optimizer.step() loss_avg.add(cost) if (i_batch + 1) % params.displayInterval == 0: print('[%d/%d][%d/%d] Loss: %f' % (epoch, params.epochs, i_batch, len(train_loader), loss_avg.val())) loss_avg.reset()
def val(crnn, valid_loader, criterion, max_iter=1000): print('Start val') for p in crnn.parameters(): p.requires_grad = False crnn.eval() val_iter = iter(valid_loader) i = 0 n_correct = 0 loss_avg = utils.averager() max_iter = min(max_iter, len(valid_loader)) for i in range(max_iter): names, images, texts = val_iter.next() batch_size = images.size(0) t, l = converter.encode(texts) images = images.cuda() preds = crnn(images) preds_size = Variable(torch.IntTensor([preds.size(0)] * batch_size)) cost = criterion(preds, t, preds_size, l) / batch_size loss_avg.add(cost) _, preds = preds.max(2) preds = preds.transpose(1, 0).contiguous().view(-1) sim_preds = converter.decode(preds.data, preds_size.data, raw=False) for pred, target in zip(sim_preds, texts): if pred == target: n_correct += 1 raw_preds = converter.decode(preds.data, preds_size.data, raw=True)[:opt.n_test_disp] for name, raw_pred, pred, gt in zip(names, raw_preds, sim_preds, texts): print('%-20s:%-20s => %-20s, gt: %-20s' % (name, raw_pred, pred, gt)) accuracy = n_correct / float(max_iter * opt.batchSize) print('Test loss: %f, accuray: %f' % (loss_avg.val(), accuracy)) return accuracy
def eval_fn(model, data_loader): model.eval() tk = tqdm(data_loader, total=len(data_loader)) n_correct = 0 loss_avg = utils.averager() with torch.no_grad(): for data in tk: imgs, texts = data.values() utils.loadData(image, imgs) batch_size = imgs.size(0) t, l = converter.encode(texts) utils.loadData(text, t) utils.loadData(length, l) preds = model(image) # print(preds.size()) preds_length = torch.full(size=(batch_size, ), fill_value=preds.size(0), dtype=torch.int32) loss = criterion(preds, text, preds_length, length) loss_avg.add(loss) preds = f.softmax(preds, dim=2) _, preds = preds.max(2) preds = preds.transpose(1, 0).contiguous().view(-1) sim_preds = converter.decode(preds, preds_length) cpu_texts_decode = [] for i in texts: cpu_texts_decode.append(i) for pred, target in zip(sim_preds, cpu_texts_decode): if pred == target: n_correct += 1 raw_preds = converter.decode(preds, preds_length, raw=True)[:10] for raw_pred, pred, gt in zip(raw_preds, sim_preds, cpu_texts_decode): print('%-20s => %-20s, gt: %-20s' % (raw_pred, pred, gt)) accuracy = n_correct / float(len(data_loader) * config.BATCH_SIZE) print('Val.loss: %f, accuracy: %f' % (loss_avg.val(), accuracy))
def train(crnn, train_loader, criterion, epoch): for p in crnn.parameters(): p.requires_grad = True crnn.train() loss_avg = utils.averager() for i_batch, (images, labels) in enumerate(train_loader): images = images.to(device) preds = crnn(images) batch_size = images.size(0) text, length = converter.encode(labels) # print(converter.decode(text,length)) preds_size = torch.IntTensor([preds.size(0)] * batch_size) cost = criterion(preds, text, preds_size, length) / batch_size crnn.zero_grad() cost.backward() optimizer.step() loss_avg.add(cost) if (i_batch + 1) % arg.displayInterval == 0: print('[%d/%d][%d/%d] Loss: %f' % (epoch, arg.nepoch, i_batch, len(train_loader), loss_avg.val())) loss_avg.reset()
def val(net, data_loader, criterion, max_iter=100): print('Start val') net.eval() val_iter = iter(data_loader) n_correct = 0 loss_avg = utils.averager() max_iter = min(max_iter, len(data_loader)) for _ in range(max_iter): data = val_iter.next() cpu_images, text, length, cpu_texts = data image = cpu_images.to(device) batch_size = cpu_images.size(0) with torch.no_grad(): preds = crnn(image) preds_size = torch.IntTensor([preds.size(0)] * batch_size) cost = criterion(preds, text, preds_size, length) loss_avg.add(cost) _, preds = preds.max(2) preds = preds.transpose(1, 0).contiguous().view(-1) sim_preds = converter.decode(preds.data, preds_size.data, raw=False) for pred, target in zip(sim_preds, cpu_texts): if pred == target.lower(): n_correct += 1 raw_preds = converter.decode(preds.data, preds_size.data, raw=True)[:opt.n_test_disp] for raw_pred, pred, gt in zip(raw_preds, sim_preds, cpu_texts): print('%-20s => %-20s, gt: %-20s' % (raw_pred, pred, gt)) accuracy = n_correct / float(max_iter * opt.batchSize) print('Test loss: %f, accuray: %.2f%%' % (loss_avg.val(), accuracy * 100))
def evaluate(net, dataset, criterion, max_iter=100): for p in net.parameters(): p.requires_grad = False net.eval() data_loader = DataLoader(dataset, batch_size=config.batch_size) val_iter = iter(data_loader) i = 0 n_correct = 0 n_total = 0 table_correct = 0 table_total = 0 loss_avg = utils.averager() # max_iter = min(max_iter, len(data_loader)) max_iter = len(data_loader) for i in range(max_iter): data = val_iter.next() i += 1 out_pred = net(data) loss = criterion(out_pred, data.y.cuda()) loss_avg.add(loss) _, out_pred = out_pred.max(1) label = data.y.detach().cpu().numpy() out_pred = out_pred.detach().cpu().numpy() if (label == out_pred).all(): table_correct = table_correct + 1 table_total = table_total + 1 n_correct = n_correct + (label == out_pred).sum() n_total = n_total + label.shape[0] # print("correct:",n_correct,label.shape[0]) accuracy = n_correct / float(n_total) table_accuracy = table_correct / float(table_total) logging.info('Test cell loss: %f, accuray: %f' % (loss_avg.val(), accuracy)) logging.info('Test one table loss: %f, accuray: %f' % (loss_avg.val(), table_accuracy)) return loss_avg
def test(model, crit, dataset, vocab, opt, writer): model.eval() loss_avg = averager() writer = SummaryWriter() loader = DataLoader(dataset, batch_size=opt["batch_size"], shuffle=True) scorer = COCOScorer() gt_dataframe = json_normalize( json.load(open(opt["input_json"]))['sentences']) gts = convert_data_to_coco_scorer_format(gt_dataframe) results = [] samples = {} for data in loader: # forward the model to get loss fc_feats = data['fc_feats'].cuda() labels = data['labels'].cuda() masks = data['masks'].cuda() video_ids = data['video_ids'] # clip_nums = data['clip_num'] # sorted_clip_nums, indices = torch.sort(clip_nums, descending=True) # _, desorted_indices = torch.sort(indices, descending=False) # fc_feats = fc_feats[indices] # pack = rnn.pack_padded_sequence(fc_feats, sorted_clip_nums, batch_first=True) # forward the model to also get generated samples for each image with torch.no_grad(): seq_probs, seq_preds = model(fc_feats, mode='inference', opt=opt) sents = utils.decode_sequence(vocab, seq_preds) for k, sent in enumerate(sents): video_id = video_ids[k] samples[video_id] = [{'image_id': video_id, 'caption': sent}] with suppress_stdout_stderr(): valid_score = scorer.score(gts, samples, samples.keys()) results.append(valid_score) print(valid_score)
def train(crnn, train_loader, criterion, optimizer, valid_loader): for p in crnn.parameters(): p.requires_grad = True crnn.train() train_iter = iter(train_loader) # loss averager loss_avg = utils.averager() for i in range(len(train_loader)): data = train_iter.next() _, images, texts = data batch_size = images.size(0) t, l = converter.encode(texts) images = images.cuda() preds = crnn(images) preds_size = torch.IntTensor([preds.size(0)] * batch_size) cost = criterion(preds, t, preds_size, l) / batch_size crnn.zero_grad() cost.backward() optimizer.step() loss_avg.add(cost) if (i + 1) % opt.displayInterval == 0: print('[%d/%d][%d/%d] Loss: %f' % (epoch, opt.nepoch, i, len(train_loader), loss_avg.val())) loss_avg.reset()
crnn.cuda() crnn = torch.nn.DataParallel(crnn, device_ids=range(opt.ngpu)) image = image.cuda() textAttention = textAttention.cuda() criterionAttention = criterionAttention.cuda() criterionCTC = criterionCTC.cuda() image = Variable(image) textAttention = Variable(textAttention) lengthAttention = Variable(lengthAttention) textCTC = Variable(textCTC) lengthCTC = Variable(lengthCTC) # loss averager loss_avg = utils.averager() loss_CTC = utils.averager() loss_Attention = utils.averager() # setup optimizer if opt.adam: optimizer = optim.Adam(crnn.parameters(), lr=opt.lr, betas=(opt.beta1, 0.999)) elif opt.adadelta: optimizer = optim.Adadelta(crnn.parameters(), lr=opt.lr) else: optimizer = optim.RMSprop(crnn.parameters(), lr=opt.lr) def val(net, valdataset, criterionAttention,criterionCTC, max_iter=100): print('Start val')
def val(net, valdataset, criterionAttention, criterionCTC, max_iter=100): print('Start val') for p in crnn.parameters(): p.requires_grad = False net.eval() val_sampler = dataset.randomSequentialSampler(valdataset, opt.batchSize) data_loader = torch.utils.data.DataLoader(valdataset, batch_size=opt.batchSize, shuffle=False, sampler=val_sampler, num_workers=int(opt.workers), collate_fn=dataset.alignCollate( imgH=opt.imgH, imgW=opt.imgW, keep_ratio=opt.keep_ratio)) # data_loader = torch.utils.data.DataLoader( # dataset, shuffle=True, batch_size=opt.batchSize, num_workers=int(opt.workers)) val_iter = iter(data_loader) i = 0 n_correct = 0 n_correctCTC = 0 n_correctAttention = 0 distanceCTC = 0 distanceAttention = 0 sum_charNum = 0 loss_avg = utils.averager() max_iter = min(max_iter, len(data_loader)) for i in range(max_iter): data = val_iter.next() i += 1 cpu_images, cpu_texts = data batch_size = cpu_images.size(0) utils.loadData(image, cpu_images) tAttention, lAttention = converterAttention.encode(cpu_texts) utils.loadData(textAttention, tAttention) utils.loadData(lengthAttention, lAttention) tCTC, lCTC = converterCTC.encode(cpu_texts) utils.loadData(textCTC, tCTC) utils.loadData(lengthCTC, lCTC) # print (image) if opt.lang: predsCTC, predsAttention = crnn(image, lengthAttention, textAttention) else: predsCTC, predsAttention = crnn(imageAttention, lengthAttention) costAttention = criterionAttention(predsAttention, textAttention) preds_size = Variable(torch.IntTensor([predsCTC.size(0)] * batch_size)) costCTC = criterionCTC(predsCTC, textCTC, preds_size, lengthCTC) / batch_size loss_avg.add(costAttention) loss_avg.add(costCTC.cuda()) _, predsAttention = predsAttention.max(1) predsAttention = predsAttention.view(-1) sim_predsAttention = converterAttention.decode(predsAttention.data, lengthAttention.data) _, predsCTC = predsCTC.max(2) predsCTC = predsCTC.transpose(1, 0).contiguous().view(-1) sim_predsCTC = converterCTC.decode(predsCTC.data, preds_size.data, raw=False) for i, cpu_text in enumerate(cpu_texts): gtText = cpu_text.decode('utf-8') CTCText = sim_predsCTC[i] if isinstance(CTCText, str): CTCText = CTCText.decode('utf-8') AttentionText = sim_predsAttention[i] print('gtText: %s' % gtText) print('CTCText: %s' % CTCText) print('AttentionText: %s' % AttentionText) if gtText == CTCText: n_correctCTC += 1 if gtText == AttentionText: n_correctAttention += 1 distanceCTC += Levenshtein.distance(CTCText, gtText) distanceAttention += Levenshtein.distance(AttentionText, gtText) sum_charNum = sum_charNum + len(gtText) correctCTC_accuracy = n_correctCTC / float(max_iter * batch_size) cerCTC = distanceCTC / float(sum_charNum) print('Test CERCTC: %f, accuracyCTC: %f' % (cerCTC, correctCTC_accuracy)) correctAttention_accuracy = n_correctAttention / float( max_iter * batch_size) cerAttention = distanceAttention / float(sum_charNum) print('Test CERAttention: %f, accuricyAttention: %f' % (cerAttention, correctAttention_accuracy))
def val(net, val_dataset, criterion, max_iter=100): print('Start val') for p in model.parameters(): p.requires_grad = False net.eval() # data_loader = torch.utils.data.DataLoader( # dataset, shuffle=False, batch_size=opt.batchSize, num_workers=int(opt.workers)) # val_iter = iter(data_loader) data_loader = torch.utils.data.DataLoader(val_dataset, shuffle=False, batch_size=opt.batchSize, num_workers=int(opt.workers), collate_fn=dataset.alignCollate( imgH=opt.imgH, imgW=opt.imgW, keep_ratio=opt.keep_ratio)) val_iter = iter(data_loader) i = 0 n_correct = 0 loss_avg = utils.averager() edt_dst = 0 max_iter = max(max_iter, len(data_loader)) for i in range(max_iter): data = val_iter.next() i += 1 cpu_images, cpu_texts = data batch_size = cpu_images.size(0) utils.loadData(image, cpu_images) t, l = converter.encode(cpu_texts) utils.loadData(text, t) utils.loadData(length, l) preds = model(image) preds_size = Variable(torch.IntTensor([preds.size(0)] * batch_size)) cost = criterion(preds, text, preds_size, length) / batch_size loss_avg.add(cost) _, preds = preds.max(2) # preds = preds.squeeze(2) preds = preds.transpose(1, 0).contiguous().view(-1) sim_preds = converter.decode(preds.data, preds_size.data, raw=False) for pred, target in zip(sim_preds, cpu_texts): if pred == target: n_correct += 1 else: dst = editdistance.eval(pred, target) # print('pred=', pred, ' target=', target, ' dst=', dst) edt_dst += dst raw_preds = converter.decode(preds.data, preds_size.data, raw=True)[:opt.n_test_disp] for raw_pred, pred, gt in zip(raw_preds, sim_preds, cpu_texts): print('%-20s => %-20s, gt: %-20s' % (raw_pred, pred, gt)) accuracy = n_correct / float(max_iter * opt.batchSize) avg_edt_dst = edt_dst / float(max_iter * opt.batchSize) print('Test loss: %f, accuray: %f, avg_edt_dst: %f' % (loss_avg.val(), accuracy, avg_edt_dst))
def val(net, dataset, criterion, best_accuracy, epoch, i, best_epoch, best_i, max_iter=100): print('Validating...') for para in crnn.parameters(): para.requires_grad = False net.eval() data_loader = torch.utils.data.DataLoader(dataset, shuffle=True, batch_size=opt.batch_size, num_workers=int(opt.workers)) val_iter = iter(data_loader) n_correct = 0 loss_avg_ = utils.averager() max_iter = min(max_iter, len(data_loader)) for j in range(max_iter): data = val_iter.next() j += 1 cpu_images, cpu_texts, _ = data batch_size = cpu_images.size(0) utils.loadData(image, cpu_images) t, length_ = converter.encode(cpu_texts) utils.loadData(text, t) utils.loadData(length, length_) preds = crnn(image) preds_size = Variable(torch.IntTensor([preds.size(0)] * batch_size)) cost_ = criterion(preds, text, preds_size, length) / batch_size loss_avg_.add(cost_) _, preds = preds.max(2) preds = preds.transpose(1, 0).contiguous().view(-1) sim_preds = converter.decode(preds.data, preds_size.data, raw=False) for pred, target in zip(sim_preds, cpu_texts): if pred == target.lower(): n_correct += 1 raw_preds = converter.decode(preds.data, preds_size.data, raw=True)[:opt.test_display_number] for raw_pred, pred, gt in zip(raw_preds, sim_preds, cpu_texts): print('%-20s => %-20s, gt: %-20s' % (raw_pred, pred, gt)) accuracy = n_correct / float(max_iter * opt.batch_size) print('Test loss: %f, accuray: %f' % (loss_avg.val(), accuracy)) if accuracy > best_accuracy: best_accuracy = accuracy best_epoch = epoch best_i = i print('Best accuracy: ', best_accuracy, ' from ecpoch ', best_epoch, ', iteration ', best_i) return best_accuracy, best_epoch, best_i
def train(loader, model, crit, optimizer, lr_scheduler, opt, rl_crit=None, converter=None): model.cuda() # crit.cuda() # optimizer.cuda() # lr_scheduler.cuda() # video = torch.FloatTensor(params.batchSize, 3, params.imgH, params.imgH) #TODO 原本中国手语是30 text = torch.LongTensor(opt['batch_size'] * opt['max_len']) # text = torch.IntTensor(opt['batch_size'] * 30) length = torch.LongTensor(opt['batch_size']) converter = strLabelConverter(loader.dataset) # model = nn.DataParallel(model) writer = SummaryWriter("two_lstm_exp_German") loss_avg = averager() wer_val = 1.0 for epoch in range(opt["epochs"]): n_correct = 0 model.train() if opt['lr_schluder'] == 'StepLR': lr_scheduler.step() elif opt['lr_schluder'] == 'ReduceLROnPlateau': lr_scheduler.step(wer_val) iteration = 0 f_wer = 0.0 for data in loader: torch.cuda.synchronize() for p in model.parameters(): p.requires_grad = True fc_feats = data['fc_feats'].cuda() # (batch_size, 80, 512) # 1. slice 10 * (batch_size, 8, 512) # 2. send each slice to LSTM 10 * (batch_size, 1024) # 3. set another mask M2(batch_size, 10) # 4. if a slice is full of Zero, set the corresponding index of M2 zero # 5. LSTM2 # 6. obtain final result bt * labels = data['labels'].cuda() # masks = data['masks'].cuda() # clip_nums = data['clip_num'] # sorted_clip_nums,indices = torch.sort(clip_nums,descending=True) # _, desorted_indices = torch.sort(indices, descending=False) # fc_feats=fc_feats[indices] # pack = rnn.pack_padded_sequence(fc_feats,sorted_clip_nums,batch_first=True) #TODO optimizer.zero_grad() output = model(fc_feats) # desorted_res = output[desorted_indices] output = output.log_softmax(2).requires_grad_() _, preds = output.max(2) output = output.transpose(0, 1).contiguous() labels_ctc = [] ys = [] for i in labels: for j in i: if not j == -1: labels_ctc.append(j) for i in labels: non_zero = (i == -1).nonzero() if not non_zero.numel(): ys.append(opt['max_len']) else: ys.append(non_zero[0][0]) loadData(text, torch.LongTensor(labels_ctc)) loadData(length, torch.LongTensor(ys)) preds_size = Variable( torch.LongTensor([output.size(0)] * output.size(1))) loss = crit(output, text.cuda(), preds_size.cuda(), length.cuda()) # loss= crit(output,text,preds_size,length)/opt['batch_size'] preds = preds.contiguous().view(-1) sim_preds = converter.decode(preds.data, preds_size.data, raw=False) list_1 = [] for pred, target in zip(sim_preds, labels): ts = target.squeeze().cpu().numpy().tolist() res = [] for i in ts: if i == -1: continue res.append(loader.dataset.ix_to_word[str(i)]) target = ' '.join(res) tmp_wer = wer(target, pred) f_wer += tmp_wer if pred == target: n_correct += 1 loss_avg.add(loss) loss.backward() optimizer.step() torch.cuda.synchronize() iteration += 1 acc = n_correct / float(len(loader)) # print(len(loader)*opt['batch_size']) f_wer = f_wer / float(len(loader) * opt['batch_size']) print("[epoch %d]->train_loss = %.6f , wer = %.6f" % (epoch, loss_avg.val(), f_wer)) if epoch % opt["eval_every"] == 0: for p in model.parameters(): p.requires_grad = False loss_eval, wer_val = val(model, crit, opt, writer, epoch) writer.add_scalars('loss_epcho', { 'train_loss': loss_avg.val(), 'val_loss': loss_eval }, epoch) writer.add_scalars('wer_epcho', { 'train_wer': f_wer, 'eval_wer': wer_val }, epoch) if epoch % opt["save_checkpoint_every"] == 0: path = opt['root_model_path'] # if not os.path.exists(path): # os.mkdir(path) # else: # shutil.rmtree(path) # os.mkdir(path) model_path = os.path.join(path, 'model_%d.pth' % (epoch)) model_info_path = os.path.join(path, 'model_score.txt') torch.save(model.state_dict(), model_path) print("model saved to %s" % (model_path)) with open(model_info_path, 'a') as f: f.write( "model_%d, loss: %.6f train wer: %.6f val wer: %.6f\n" % (epoch, loss_avg.val(), f_wer, wer_val)) loss_avg.reset()
def val(encoder, decoder, criterion, batchsize, dataset, teach_forcing=False, max_iter=100): print('Start val') for e, d in zip(encoder.parameters(), decoder.parameters()): e.requires_grad = False d.requires_grad = False encoder.eval() decoder.eval() data_loader = torch.utils.data.DataLoader(dataset, shuffle=False, batch_size=batchsize, num_workers=int(opt.workers)) val_iter = iter(data_loader) n_correct = 0 n_total = 0 loss_avg = utils.averager() max_iter = min(max_iter, len(data_loader)) # max_iter = len(data_loader) - 1 for i in range(max_iter): data = val_iter.next() i += 1 cpu_images, cpu_texts = data b = cpu_images.size(0) utils.loadData(image, cpu_images) target_variable = converter.encode(cpu_texts) n_total += len(cpu_texts[0]) + 1 # 还要准确预测出EOS停止位 decoded_words = [] decoded_label = [] decoder_attentions = torch.zeros(len(cpu_texts[0]) + 1, opt.max_width) encoder_outputs = encoder(image) # cnn+biLstm做特征提取 target_variable = target_variable.cuda() decoder_input = target_variable[0].cuda() # 初始化decoder的开始,从0开始输出 decoder_hidden = decoder.initHidden(b).cuda() loss = 0.0 if not teach_forcing: # 预测的时候采用非强制策略,将前一次的输出,作为下一次的输入,直到标签为EOS_TOKEN时停止 for di in range(1, target_variable.shape[0]): # 最大字符串的长度 decoder_output, decoder_hidden, decoder_attention = decoder( decoder_input, decoder_hidden, encoder_outputs) loss += criterion(decoder_output, target_variable[di]) # 每次预测一个字符 loss_avg.add(loss) decoder_attentions[di - 1] = decoder_attention.data topv, topi = decoder_output.data.topk(1) ni = topi.squeeze(1) decoder_input = ni if ni == EOS_TOKEN: decoded_words.append('<EOS>') decoded_label.append(EOS_TOKEN) break else: decoded_words.append(converter.decode(ni)) decoded_label.append(ni) # 计算正确个数 for pred, target in zip(decoded_label, target_variable[1:, :]): if pred == target: n_correct += 1 if i % 100 == 0: # 每100次输出一次 texts = cpu_texts[0] print('pred:%-20s, gt: %-20s' % (decoded_words, texts)) accuracy = n_correct / float(n_total) print('Test loss: %f, accuray: %f' % (loss_avg.val(), accuracy))
def val(encoder, decoder, criterion, val_loader, device): print('Start val') with torch.no_grad(): encoder.eval() decoder.eval() i = 0 n_correct = 0 n_total = 0 loss_avg = utils.averager() for data in val_loader: i += 1 cpu_images, cpu_texts = data b = cpu_images.size(0) image = cpu_images.to(device) target_variable = converter.encode(cpu_texts).to(device) n_total += len(cpu_texts[0]) + 1 #print(cpu_images.size(), target_variable.size()) decoded_words = [] decoded_label = [] decoder_attentions = torch.zeros( len(cpu_texts[0]) + 1, opt.max_width) #print(decoder_attentions.size()) encoder_outputs = encoder(image) decoder_input = target_variable[0].to(device) decoder_hidden = decoder.initHidden(b).to(device) #print(encoder_outputs.size(), decoder_input.size(), decoder_hidden.size()) loss = 0.0 for di in range(1, target_variable.shape[0]): decoder_output, decoder_hidden, decoder_attention = decoder( decoder_input, decoder_hidden, encoder_outputs) loss += criterion(decoder_output, target_variable[di]) loss_avg.add(loss) #print(decoder_attention.data.size()) decoder_attentions[di - 1] = decoder_attention.data topv, topi = decoder_output.data.topk(1) ni = topi.squeeze(1) decoder_input = ni if ni == EOS: decoded_words.append('<EOS>') decoded_label.append(EOS) break else: decoded_words.append(converter.decode(ni)) decoded_label.append(ni) for pred, target in zip(decoded_label, target_variable[1:, :]): if pred == target: n_correct += 1 if i % 1000 == 0: print(i) texts = cpu_texts[0] print('pred:%-20s, gt: %-20s' % (decoded_words, texts)) accuracy = n_correct / float(n_total) print('Test loss: %f, accuracy: %f' % (loss_avg.val(), accuracy)) return loss_avg.val(), accuracy
def checkAverager(self): acc = utils.averager() acc.add(Variable(torch.Tensor([1, 2]))) acc.add(Variable(torch.Tensor([[5, 6]]))) assert acc.val() == 3.5
def val(net, valdataset, criterionAttention,criterionCTC, max_iter=100): print('Start val') for p in crnn.parameters(): p.requires_grad = False net.eval() val_sampler = dataset.randomSequentialSampler(valdataset, opt.batchSize) data_loader = torch.utils.data.DataLoader( valdataset, batch_size=opt.batchSize, shuffle=False, sampler=val_sampler, num_workers=int(opt.workers), collate_fn=dataset.alignCollate(imgH=opt.imgH, imgW=opt.imgW, keep_ratio=opt.keep_ratio)) # data_loader = torch.utils.data.DataLoader( # dataset, shuffle=True, batch_size=opt.batchSize, num_workers=int(opt.workers)) val_iter = iter(data_loader) i = 0 n_correct = 0 loss_avg = utils.averager() max_iter = min(max_iter, len(data_loader)) for i in range(max_iter): data = val_iter.next() i += 1 cpu_images, cpu_texts = data batch_size = cpu_images.size(0) utils.loadData(image, cpu_images) tAttention, lAttention = converterAttention.encode(cpu_texts) utils.loadData(textAttention, tAttention) utils.loadData(lengthAttention, lAttention) tCTC, lCTC = converterCTC.encode(cpu_texts) utils.loadData(textCTC, tCTC) utils.loadData(lengthCTC, lCTC) # print (image) if opt.lang: predsCTC, predsAttention = crnn(image, lengthAttention, textAttention) else: predsCTC, predsAttention = crnn(imageAttention, lengthAttention) costAttention = criterionAttention(predsAttention, textAttention) preds_size = Variable(torch.IntTensor([predsCTC.size(0)] * batch_size)) costCTC = criterionCTC(predsCTC, textCTC, preds_size, lengthCTC) / batch_size loss_avg.add(costAttention) loss_avg.add(costCTC.cuda()) _, predsAttention = predsAttention.max(1) predsAttention = predsAttention.view(-1) sim_predsAttention = converterAttention.decode(predsAttention.data, lengthAttention.data) for pred, target in zip(sim_predsAttention, cpu_texts): #regText = pred.decode('utf-8') regText = pred#type of pred is unicode, do not need convert gtText = target.decode('utf-8')#convert str(label type)to unicode print (regText,gtText) if regText == gtText: print("correct") print (regText,gtText) n_correct += 1 # for pred, gt in zip(sim_preds, cpu_texts): # gt = ''.join(gt.split(opt.sep)) # print('%-20s, gt: %-20s' % (pred, gt)) accuracy = n_correct / float(max_iter * opt.batchSize) print('Test loss: %f, accuray: %f' % (loss_avg.val(), accuracy))
def val(net, dataset, criterion, max_iter=100): print('Start val') for p in crnn.parameters(): p.requires_grad = False net.eval() data_loader = torch.utils.data.DataLoader(dataset, shuffle=True, batch_size=opt.batchSize, num_workers=int(opt.workers)) val_iter = iter(data_loader) epoch_iter = 0 n_correct = 0 loss_avg = utils.averager() edit_distance = 0 max_iter = min(max_iter, len(data_loader)) for epoch_iter in range(max_iter): data = val_iter.next() epoch_iter += 1 cpu_images, cpu_texts = data # print (cpu_texts) # from matplotlib import pyplot as plt # import numpy as np # for i in range(cpu_images.shape[0]): # tmp = cpu_images[i].numpy() # # tmp = np.squeeze(tmp, axis=0) # tmp = tmp.transpose(1, 2, 0) # plt.imshow(tmp) # plt.show() batch_size = cpu_images.size(0) utils.loadData(image, cpu_images) t, l = converter.encode(cpu_texts) utils.loadData(text, t) utils.loadData(length, l) preds = crnn(image) preds_size = Variable(torch.IntTensor([preds.size(0)] * batch_size)) cost = criterion(preds, text, preds_size, length) / batch_size loss_avg.add(cost) _, preds = preds.max(2) # preds = preds.squeeze(2) preds = preds.transpose(1, 0).contiguous().view(-1) sim_preds = converter.decode(preds.data, preds_size.data, raw=False) for pred, target in zip(sim_preds, cpu_texts): if len(opt.alphabet) == 36: if pred == target.lower(): n_correct += 1 else: if pred == target: n_correct += 1 # add edit distance. if len(opt.alphabet) == 36: edit_distance += Lev.distance(pred, target.lower()) else: edit_distance += Lev.distance(pred, target) raw_preds = converter.decode(preds.data, preds_size.data, raw=True)[:opt.n_test_disp] for raw_pred, pred, gt in zip(raw_preds, sim_preds, cpu_texts): print('%-20s => %-20s, gt: %-20s' % (raw_pred, pred, gt)) accuracy = n_correct / float(max_iter * opt.batchSize) print('Test loss: %f, accuray: %f' % (loss_avg.val(), accuracy)) print('Total distance: ', edit_distance) return accuracy, edit_distance
def val(model, val_loader, criterion, iteration, max_i=1000): print('Start val') # for p in model.parameters(): # p.requires_grad = False model.eval() i = 0 n_correct = 0 total_images_count = 0 loss_avg = utils.averager() for i_batch, (image, label, length) in enumerate(val_loader): image = image.to(device) label = label.to(device) length = length.to(device) preds = model(image) # preds 所在设备跟会跟model相同 batch_size = preds.size(1) max_seq = torch.IntTensor([preds.size(0)] * batch_size) # T, B, C max_seq = max_seq.to(device) cost = criterion(preds, label, max_seq, length) / batch_size # 一个样本的 cost loss_avg.add(cost) _, paths = preds.max(2) # paths, scores = beam_decode(paths) paths = paths.transpose(1, 0).contiguous().view(-1) pred_labels = converter.decode(paths.data, max_seq.data, raw=False) label_split_start_idx = 0 for pred, target_len in zip(pred_labels, length): target = label[label_split_start_idx:label_split_start_idx + target_len] label_split_start_idx += target_len.tolist( ) # 这个 tolist 惊到我了,其实是一个 int target = "".join( [converter.alphabet[t - 1] for t in target.cpu().numpy()]) # target = converter.decode(target, torch.IntTensor([len(target)])) if pred == target: n_correct += 1 total_images_count += 1 if (i_batch + 1) % params.displayInterval == 0: print('[%d/%d][%d/%d]' % (iteration, params.epoch, i_batch, len(val_loader))) if i_batch == max_i: break # 验证样本展示,方便观察训练情况 raw_preds = converter.decode(paths.data, max_seq.data, raw=True)[:params.n_test_disp] label_split_start_idx = 0 for raw_pred, pred, target_len in zip(raw_preds, pred_labels, length): target = label[label_split_start_idx:label_split_start_idx + target_len] label_split_start_idx += target_len.tolist( ) # 这个 tolist 惊到我了,其实是一个 int target = "".join( [converter.alphabet[t - 1] for t in target.cpu().numpy()]) # target = converter.decode(target, torch.IntTensor([len(target)])) # 这个 decode 是解码路径,不能做label的int2char,因为比如label=1111,那decode会变成a,注意别混淆了 print('%-20s => %-20s, tg: %-20s' % (raw_pred, pred, target)) # print(n_correct) # print(max_i * params.val_batchSize) accuracy = n_correct / float(total_images_count) print('Val loss: %f, accuray: %d/%d=%f' % (loss_avg.val(), n_correct, total_images_count, accuracy)) return accuracy
def val(model, crit, opt, writer=None, epoch=0): dataset = VideoDataset(opt, 'test') dataloader = DataLoader(dataset, batch_size=opt['batch_size'], shuffle=True) opt["vocab_size"] = dataset.get_vocab_size() model.eval() # TODO 原本中国手语是30 text = torch.LongTensor(opt['batch_size'] * opt['max_len']) # text = torch.IntTensor(opt['batch_size'] * 30) length = torch.LongTensor(opt['batch_size']) loss_avg = averager() n_correct = 0 f_wer = 0.0 # converter = strLabelConverter(dataset) converter = strLabelConverter(dataloader.dataset) for data in dataloader: fc_feats = data['fc_feats'].cuda() labels = data['labels'].cuda() # masks = data['masks'].cuda() # clip_nums = data['clip_num'] # sorted_clip_nums, indices = torch.sort(clip_nums, descending=True) # _, desorted_indices = torch.sort(indices, descending=False) # fc_feats = fc_feats[indices] # pack = rnn.pack_padded_sequence(fc_feats, sorted_clip_nums, batch_first=True) with torch.no_grad(): output = model(fc_feats) # desorted_res = output[desorted_indices] output = output.log_softmax(2).requires_grad_() _, preds = output.max(2) output = output.transpose(0, 1).contiguous() labels_ctc = [] ys = [] for i in labels: for j in i: if not j == -1: labels_ctc.append(j) for i in labels: non_zero = (i == -1).nonzero() if not non_zero.numel(): ys.append(opt['max_len']) else: ys.append(non_zero[0][0]) loadData(text, torch.LongTensor(labels_ctc)) loadData(length, torch.LongTensor(ys)) preds_size = Variable( torch.LongTensor([output.size(0)] * output.size(1))) loss = crit(output.cuda(), text.cuda(), preds_size.cuda(), length.cuda()) preds = preds.contiguous().view(-1) sim_preds = converter.decode(preds.data, preds_size.data, raw=False) for pred, target in zip(sim_preds, labels): ts = target.squeeze().cpu().numpy().tolist() res = [] for i in ts: if i == -1: continue res.append(dataloader.dataset.ix_to_word[str(i)]) target = ' '.join(res) tmp_wer = wer(target, pred) f_wer += tmp_wer if pred == target: n_correct += 1 loss_avg.add(loss) acc = n_correct / float(len(dataloader)) f_wer = f_wer / float(len(dataloader) * opt['batch_size']) print("[epoch %d]->val_loss = %.6f , wer = %.6f" % (epoch, loss_avg.val(), f_wer)) # writer.add_scalar('scalar/val_loss_epcho', loss_avg.val()) return loss_avg.val(), f_wer
def val(net, val_dataset, criterion, max_iter=100): print('Start val') for p in model.parameters(): p.requires_grad = False net.eval() data_loader = torch.utils.data.DataLoader( val_dataset, shuffle=False, batch_size=opt.batchSize, num_workers=int(opt.workers),collate_fn=dataset.alignCollate(imgH=opt.imgH, imgW=opt.imgW, keep_ratio=opt.keep_ratio)) val_iter = iter(data_loader) i = 0 n_correct = 0 loss_avg = utils.averager() max_iter = max(max_iter, len(data_loader)) img_num=1 max_sub=-1; str_line='' dst_file=open('./tes_hori_sub_rst.txt','w+') gt = [] for i in range(max_iter): data = val_iter.next() i += 1 cpu_images, cpu_texts = data batch_size = cpu_images.size(0) utils.loadData(image, cpu_images) t, l = converter.encode(cpu_texts) utils.loadData(text, t) utils.loadData(length, l) preds = model(image) preds_size = Variable(torch.IntTensor([preds.size(0)] * batch_size)) cost = criterion(preds, text, preds_size, length) / batch_size loss_avg.add(cost) _, preds = preds.max(2) preds = preds.transpose(1, 0).contiguous().view(-1) sim_preds = converter.decode(preds.data, preds_size.data, raw=False) for pred, gt in zip(sim_preds, cpu_texts): # # if gt=='line_2135.jpg' # # with subcribe gt=gt.split('_') cur_num=int(gt[1]) if cur_num==img_num: max_sub+=1 str_line=pred+str_line else: # dst_ite_pth=dst_root+str(img_num).zfill(7)+'.txt' # f=open(dst_ite_pth,'w+') # f.write(str_line) # f.close() dst_file.write(gt[0]+'_'+str(img_num)+'.jpg'+' '+str_line+'\n') str_line=pred img_num=cur_num #without subcribe # dst_file.write(gt + ' ' + pred + '\n') print('pred:%-20s, gt: %-20s' % ( pred, gt)) dst_file.write(gt[0] + '_' + str(img_num) + '.jpg' + ' ' + str_line + '\n') dst_file.close()
def val(net, net2, net3, _dataset, epoch, step, criterion, max_iter=100): logger.info('Start val') # for p in crnn.parameters(): # p.requires_grad = False net.eval() net2.eval() net3.eval() data_loader = torch.utils.data.DataLoader( _dataset, shuffle=False, batch_size=params.batchSize, num_workers=int(params.workers), collate_fn=dataset.alignCollate(imgH=params.imgH, imgW=params.imgW, keep_ratio=params.keep_ratio)) val_iter = iter(data_loader) i = 0 n_correct = 0 loss_avg = utils.averager() max_iter = len(data_loader) record_dir = log_dir + 'epoch_%d_step_%d_data.txt' % (epoch, step) record_dir1 = log_dir + 'epoch_%d_step_%d_data1.txt' % (epoch, step) record_dir2 = log_dir + 'epoch_%d_step_%d_data2.txt' % (epoch, step) r = 1 f = open(record_dir, "a") f1 = open(record_dir1, "a") f2 = open(record_dir2, "a") num_label, num_pred = params.total_num, 0 start = time.time() for i in range(max_iter): data = val_iter.next() if i < 6000: pass #continue i += 1 cpu_images, cpu_texts = data batch_size = cpu_images.size(0) utils.loadData(image, cpu_images) t, l = converter.encode(cpu_texts) utils.loadData(text, t) utils.loadData(length, l) with torch.no_grad(): n1img = net(image) n2img = net2(image) n3img = net3(image) preds_size = Variable(torch.IntTensor([n1img.size(0)] * batch_size)) _, n1 = n1img.max(2) _, n2 = n2img.max(2) _, n3 = n3img.max(2) ind = torch.arange(batch_size) _ind = torch.arange(batch_size) n1_index = n1.transpose(1, 0).data n2_index = n2.transpose(1, 0).data n3_index = n3.transpose(1, 0).data ind = ind[torch.sum(n1_index != 0, 1) == torch.sum(n2_index != 0, 1)] _ind = _ind[ (torch.sum(n1_index != 0, 1) == torch.sum(n2_index != 0, 1)) * (torch.sum(n3_index != 0, 1) == torch.sum(n2_index != 0, 1))] for i in ind: ind1 = np.arange(n1img.shape[0]) ind2 = np.arange(n1img.shape[0]) ind1 = ind1[(n1_index[int(i), :].cpu().numpy().astype(bool) != 0)] ind2 = ind2[(n2_index[int(i), :].cpu().numpy().astype(bool) != 0)] n1img[ind1, int(i), :] = (n1img[ind1, int(i), :] + n2img[ind2, int(i), :]) / 2 if torch.sum(int(i) == _ind) > 0: ind3 = np.arange(n1img.shape[0]) ind3 = ind3[(n3_index[int(i), :].cpu().numpy().astype(bool) != 0)] n1img[ind1, int(i), :] = ( n1img[ind1, int(i), :] + n2img[ind2, int(i), :] + n3img[ind3, int(i), :]) / 3 #+ n3img[ind3, int(i), :] else: n1img[ind1, int(i), :] = (n1img[ind1, int(i), :] + n2img[ind2, int(i), :]) / 2 preds = n1img soft_max_preds = torch.exp(n1img.data) / torch.unsqueeze( torch.sum(torch.exp(n1img.data), 2), 2).repeat(1, 1, 9116) print(n1img.data) print('----------------------') print(torch.exp(n1img.data)) print('---------------------------------------') print( torch.unsqueeze(torch.sum(torch.exp(n1img.data), 2), 2).repeat(1, 1, 9116)) print('--------------') print(soft_max_preds) cost = criterion(preds, text, preds_size, length) / batch_size loss_avg.add(cost) _, preds = preds.max(2) _preds = preds.transpose(1, 0).contiguous() preds = preds.transpose(1, 0).contiguous().view(-1) sim_preds = converter.decode(preds.data, preds_size.data, raw=False) if not isinstance(sim_preds, list): sim_preds = [sim_preds] for i, pred in enumerate(sim_preds): ind = _preds[int(i), :] != 0 ind2 = torch.arange(_preds.shape[1]) ind2 = ind2[ind] ind = _preds[int(i), :][ind] d = [] j = 0 for _ind in ind2: d.append('%.2f' % (soft_max_preds[int(_ind), i, int(ind[j])])) j += 1 f.write(str(r).zfill(6) + ".jpg " + pred + ' '.join(d) + "\n") r += 1 list_1 = [] for i in cpu_texts: string = i.decode('utf-8', 'strict') list_1.append(string) for pred, target in zip(sim_preds, list_1): if pred == target: n_correct += 1 num_pred += len(sim_preds) print("") f.close() raw_preds = converter.decode(preds.data, preds_size.data, raw=True)[:params.n_test_disp] for raw_pred, pred, gt in zip(raw_preds, sim_preds, list_1): logger.info('%-20s => %-20s, gt: %-20s' % (raw_pred, pred, gt)) logger.info('correct_num: %d' % (n_correct)) logger.info('Total_num: %d' % (max_iter * params.batchSize)) accuracy = float(n_correct) / num_pred recall = float(n_correct) / num_label logger.info( 'Test loss: %f, accuray: %f, recall: %f, F1 score: %f, Cost : %.4fs per img' % (loss_avg.val(), accuracy, recall, 2 * accuracy * recall / (accuracy + recall + 1e-2), (time.time() - start) / max_iter))
def train(field): alphabet = ''.join(json.load(open('./cn-alphabet.json', 'rb'))) nclass = len(alphabet) + 1 # add the dash - batch_size = BATCH_SIZE if field == 'address' or field == 'psb': batch_size = 1 # image length varies converter = LabelConverter(alphabet) criterion = CTCLoss(zero_infinity=True) crnn = CRNN(IMAGE_HEIGHT, nc, nclass, number_hidden) crnn.apply(weights_init) image_transform = transforms.Compose([ Rescale(IMAGE_HEIGHT), transforms.ToTensor(), Normalize() ]) dataset = LmdbDataset(db_path, field, image_transform) dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True, num_workers=4) image = torch.FloatTensor(batch_size, 3, IMAGE_HEIGHT, IMAGE_HEIGHT) text = torch.IntTensor(batch_size * 5) length = torch.IntTensor(batch_size) image = Variable(image) text = Variable(text) length = Variable(length) loss_avg = utils.averager() optimizer = optim.RMSprop(crnn.parameters(), lr=lr) if torch.cuda.is_available(): crnn.cuda() crnn = nn.DataParallel(crnn) image = image.cuda() criterion = criterion.cuda() def train_batch(net, iteration): data = iteration.next() cpu_images, cpu_texts = data batch_size = cpu_images.size(0) utils.load_data(image, cpu_images) t, l = converter.encode(cpu_texts) utils.load_data(text, t) utils.load_data(length, l) preds = crnn(image) preds_size = Variable(torch.IntTensor([preds.size(0)] * batch_size)) cost = criterion(preds, text, preds_size, length) / batch_size crnn.zero_grad() cost.backward() optimizer.step() return cost nepoch = 25 for epoch in range(nepoch): train_iter = iter(dataloader) i = 0 while i < len(dataloader): for p in crnn.parameters(): p.requires_grad = True crnn.train() cost = train_batch(crnn, train_iter) loss_avg.add(cost) i += 1 if i % 500 == 0: print('%s [%d/%d][%d/%d] Loss: %f' % (datetime.datetime.now(), epoch, nepoch, i, len(dataloader), loss_avg.val())) loss_avg.reset() # do checkpointing if i % 500 == 0: torch.save( crnn.state_dict(), f'{model_path}crnn_{field}_{epoch}_{i}.pth')
def val(model, converter, data_loader, max_iter=100): print('Start val') # input tensor image = torch.FloatTensor(opt.batch_size, 3, imgH, imgH) image = image.cuda() for p in model.parameters(): p.requires_grad = False model.eval() val_iter = iter(data_loader) i = 0 n_correct = 0 loss_avg = averager() max_iter = min(max_iter, len(data_loader)) for i in range(max_iter): data = val_iter.next() i += 1 cpu_images, cpu_texts = data print('-------\ninput ', cpu_images.size()) batch_size = cpu_images.size(0) #30个 loadData(image, cpu_images) preds = model(image) #[483*10*] print('out ', preds.size()) preds_size = Variable(torch.IntTensor([preds.size(1)] * batch_size)) print("len ", preds_size.data) _, preds = preds.max(2) preds = preds.contiguous().view(-1) print("out preds ", preds.size()) sim_preds = converter.decode(preds.data, preds_size.data, raw=False) if batch_size == 1: sim_preds = [sim_preds] for pred, target in zip(sim_preds, cpu_texts): print("pred ", pred, 'gt ', target) if pred == target.lower(): print('true') n_correct += 1 else: print('false') # raw_preds = converter.decode(preds.data, preds_size.data, raw=True)[:10] # for raw_pred, pred, gt in zip(raw_preds, sim_preds, cpu_texts): # print('%-20s => %-20s, gt: %-20s\n' % (raw_pred, pred, gt)) # img = cpu_images.numpy()[0] # img = np.squeeze(img) # if len(img.shape) == 3 and img.shape[2] != 3: # img = img.transpose((1, 2, 0)) # cv2.imshow("im", img) # cv2.waitKey(0) accuracy = n_correct / float(max_iter * opt.batch_size) print('Test loss: %f, accuray: %f' % (loss_avg.val(), accuracy)) return accuracy
image = torch.FloatTensor(opt.batchSize, 3, opt.imgH, opt.imgH) text = torch.IntTensor(opt.batchSize * 5) length = torch.IntTensor(opt.batchSize) if opt.cuda: crnn.cuda() image = image.cuda() criterion = criterion.cuda() image = Variable(image) text = Variable(text) length = Variable(length) # loss averager loss_avg = utils.averager() # setup optimizer if opt.adam: optimizer = optim.Adam(crnn.parameters(), lr=opt.lr, betas=(opt.beta1, 0.999)) elif opt.adadelta: optimizer = optim.Adadelta(crnn.parameters(), lr=opt.lr) else: optimizer = optim.RMSprop(crnn.parameters(), lr=opt.lr) def val(net, dataset, criterion, max_iter=100): print('Start val') for p in crnn.parameters():
image = torch.FloatTensor(opt.batchSize, 3, opt.imgH, opt.imgH) text = torch.IntTensor(opt.batchSize * 5) length = torch.IntTensor(opt.batchSize) if torch.cuda.is_available(): crnn = crnn.cuda(opt.gpu) # crnn = torch.nn.DataParallel(crnn, device_ids=range(opt.ngpu)) image = image.cuda(opt.gpu) criterion = criterion.cuda(opt.gpu) image = Variable(image) text = Variable(text) length = Variable(length) # loss averager loss_avg = utils.averager() # setup optimizer if opt.adam: optimizer = optim.Adam(crnn.parameters(), lr=opt.lr, betas=(opt.beta1, 0.999)) elif opt.adadelta: optimizer = optim.Adadelta(crnn.parameters()) else: optimizer = optim.RMSprop(crnn.parameters(), lr=opt.lr) def val(net, criterion, max_iter=100): print('Start val')
def val(net, criterion, max_iter=100): print('Start val') # read test set test_dataset = dataset.lmdbDataset(root=params.valroot, transform=dataset.resizeNormalize( (params.imgW, params.imgH))) for p in crnn.parameters(): p.requires_grad = False net.eval() try: data_loader = torch.utils.data.DataLoader(test_dataset, shuffle=True, batch_size=params.batchSize, num_workers=int( params.workers)) val_iter = iter(data_loader) i = 0 n_correct = 0 loss_avg = utils.averager() max_iter = min(max_iter, len(data_loader)) for i in range(max_iter): data = val_iter.next() i += 1 cpu_images, cpu_texts = data batch_size = cpu_images.size(0) utils.loadData(image, cpu_images) t, l = converter.encode(cpu_texts) utils.loadData(text, t) utils.loadData(length, l) preds = crnn(image) preds_size = Variable(torch.IntTensor([preds.size(0)] * batch_size)) cost = criterion(preds, text, preds_size, length) / batch_size loss_avg.add(cost) _, preds = preds.max(2) # preds = preds.squeeze(2) preds = preds.transpose(1, 0).contiguous().view(-1) sim_preds = converter.decode(preds.data, preds_size.data, raw=False) list_1 = [] for i in cpu_texts: list_1.append(i.decode('utf-8', 'strict')) for pred, target in zip(sim_preds, list_1): if pred == target: n_correct += 1 raw_preds = converter.decode(preds.data, preds_size.data, raw=True)[:params.n_test_disp] for raw_pred, pred, gt in zip(raw_preds, sim_preds, list_1): print('%-20s => %-20s, gt: %-20s' % (raw_pred, pred, gt)) # print(n_correct) # print(max_iter * params.batchSize) accuracy = n_correct / float(max_iter * params.batchSize) print('Test loss: %f, accuray: %f' % (loss_avg.val(), accuracy)) except: pass