def test(model, net): #transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)) # composed = transforms.Compose([Rescale(114), transforms.CenterCrop((112, 224),ToTensor(),ColorNormalize()]) with torch.no_grad(): # 引入测试集数据 dataset = MyDataset(opt.video_path, opt.val_list, max_frame_len=opt.max_frame_len, trainflag=False) # 输出测试集大小 print('num_test_data:{}'.format(len(dataset.data))) # ? model.eval() # 初始化数据加载器 loader = dataset2dataloader(dataset, shuffle=False) loss_list = [] cer = [] crit = nn.CTCLoss(blank=0, reduction='mean') tic = time.time() for (i_iter, input) in enumerate(loader): # 存储图片id集、标签集 # 记录图片数和标签数 video = input.get('video').to(device) txt = input.get('txt').to(device) vid_len = input.get('vid_len').to(device) txt_len = input.get('txt_len').to(device) y = net(video) loss = crit( y.transpose(0, 1).log_softmax(-1), txt, vid_len.view(-1), txt_len.view(-1)).detach().cpu().numpy() loss_list.append(loss) pred_txt = ctc_decode(y) truth_txt = [MyDataset.arr2txt(txt[_]) for _ in range(txt.size(0))] cer.extend(MyDataset.cer(pred_txt, truth_txt)) if (i_iter % opt.display == 0): # 剩余时间 v = 1.0 * (time.time() - tic) / (i_iter + 1) eta = v * (len(loader) - i_iter) / 3600.0 print(''.join(101 * '-')) print('{:<50}|{:>50}'.format('predict', 'truth')) print(''.join(101 * '-')) for (predict, truth) in list(zip(pred_txt, truth_txt))[:10]: print('{:<50}|{:>50}'.format(predict, truth)) print(''.join(101 * '-')) print('test_iter={},eta={},cer={}'.format( i_iter, eta, np.array(cer).mean())) print(''.join(101 * '-')) return (np.array(loss_list).mean(), np.array(cer).mean())
def test(model, net): with torch.no_grad(): dataset = MyDataset(opt.video_path, opt.anno_path, opt.val_list, opt.vid_padding, opt.txt_padding, 'test') print('num_test_data:{}'.format(len(dataset.data))) model.eval() loader = dataset2dataloader(dataset, shuffle=False) loss_list = [] wer = [] cer = [] crit = nn.CTCLoss() tic = time.time() for (i_iter, input) in enumerate(loader): vid = input.get('vid').cuda() txt = input.get('txt').cuda() vid_len = input.get('vid_len').cuda() txt_len = input.get('txt_len').cuda() y = net(vid) loss = crit( y.transpose(0, 1).log_softmax(-1), txt, vid_len.view(-1), txt_len.view(-1)).detach().cpu().numpy() loss_list.append(loss) pred_txt = ctc_decode(y) truth_txt = [ MyDataset.arr2txt(txt[_], start=1) for _ in range(txt.size(0)) ] wer.extend(MyDataset.wer(pred_txt, truth_txt)) cer.extend(MyDataset.cer(pred_txt, truth_txt)) if (i_iter % opt.display == 0): v = 1.0 * (time.time() - tic) / (i_iter + 1) eta = v * (len(loader) - i_iter) / 3600.0 print(''.join(101 * '-')) print('{:<50}|{:>50}'.format('predict', 'truth')) print(''.join(101 * '-')) for (predict, truth) in list(zip(pred_txt, truth_txt))[:10]: print('{:<50}|{:>50}'.format(predict, truth)) print(''.join(101 * '-')) print('test_iter={},eta={},wer={},cer={}'.format( i_iter, eta, np.array(wer).mean(), np.array(cer).mean())) print(''.join(101 * '-')) return (np.array(loss_list).mean(), np.array(wer).mean(), np.array(cer).mean())
def train(model, net): #composed = transforms.Compose([Rescale(115), RandomCrop((112, 224)),ToTensor(),ColorNormalize()]) dataset = MyDataset(opt.video_path, opt.train_list, max_frame_len=opt.max_frame_len) loader = dataset2dataloader(dataset) optimizer = optim.Adam(model.parameters(), lr=opt.base_lr, weight_decay=0., amsgrad=True) print('num_train_data:{}'.format(len(dataset.data))) crit = nn.CTCLoss(blank=0, reduction='mean') tic = time.time() train_cer = [] for epoch in range(opt.max_epoch): for (i_iter, input) in enumerate(loader): model.train() video = input.get('video').to(device) txt = input.get('txt').to(device) vid_len = input.get('vid_len').to(device) txt_len = input.get('txt_len').to(device) optimizer.zero_grad() y = net(video) y_trans_log_soft = y.transpose(0, 1).log_softmax(-1) loss = crit(y_trans_log_soft, txt, vid_len.view(-1), txt_len.view(-1)) loss.backward() if (opt.is_optimize): optimizer.step() tot_iter = i_iter + epoch * len(loader) pred_txt = ctc_decode(y) truth_txt = [MyDataset.arr2txt(txt[_]) for _ in range(txt.size(0))] train_cer.extend(MyDataset.cer(pred_txt, truth_txt)) if (tot_iter % opt.display == 0): v = 1.0 * (time.time() - tic) / (tot_iter + 1) eta = (len(loader) - i_iter) * v / 3600.0 writer.add_scalar('train loss', loss, tot_iter) writer.add_scalar('train cer', np.array(train_cer).mean(), tot_iter) print(''.join(101 * '-')) print('{:<50}|{:>50}'.format('predict', 'truth')) print(''.join(101 * '-')) for (predict, truth) in list(zip(pred_txt, truth_txt))[:3]: print('{:<50}|{:>50}'.format(predict, truth)) print(''.join(101 * '-')) print( 'epoch={},tot_iter={},eta={},loss={},train_cer={}'.format( epoch, tot_iter, eta, loss, np.array(train_cer).mean())) print(''.join(101 * '-')) if (tot_iter % opt.test_step == 0): (loss, cer) = test(model, net) print('i_iter={},lr={},loss={},cer={}'.format( tot_iter, show_lr(optimizer), loss, cer)) writer.add_scalar('val loss', loss, tot_iter) writer.add_scalar('cer', cer, tot_iter) writer.add_graph(model, video) savename = '{}_loss_{}_cer_{}.pt'.format( opt.save_prefix, loss, cer) (path, name) = os.path.split(savename) if (not os.path.exists(path)): os.makedirs(path) torch.save(model.state_dict(), savename) if (not opt.is_optimize): exit()
def eval(model, net): with torch.no_grad(): dataset = MyDataset(opt.video_path, opt.anno_path, opt.val_list, opt.vid_padding, opt.txt_padding, 'test') print('num_test_data:{}'.format(len(dataset.data))) model.eval() loader = dataset2dataloader(dataset, shuffle=False) wer = [] cer = [] wla = [] total_sentences = 0.0 correct_sentences = 0.0 for (i_iter, input) in enumerate(loader): vid = input.get('vid').cuda() txt = input.get('txt').cuda() y = net(vid) pred_txt = ctc_decode(y) truth_txt = [ MyDataset.arr2txt(txt[_], start=1) for _ in range(txt.size(0)) ] wer.extend(MyDataset.wer(pred_txt, truth_txt)) cer.extend(MyDataset.cer(pred_txt, truth_txt)) wla.extend(MyDataset.wla(pred_txt, truth_txt)) batch_correct_sentences, batch_total_sentences = MyDataset.sentences( pred_txt, truth_txt) correct_sentences = correct_sentences + batch_correct_sentences total_sentences = total_sentences + batch_total_sentences sla = correct_sentences / total_sentences if (i_iter % opt.display == 0): print(''.join(101 * '-')) print('{:<50}|{:>50}'.format('predict', 'truth')) print(''.join(101 * '-')) for (predict, truth) in list(zip(pred_txt, truth_txt))[:10]: print('{:<50}|{:>50}'.format(predict, truth)) print(''.join(101 * '-')) print('test_iter={}, wer={}, cer={}, wla={} , sla={}'.format( i_iter, np.array(wer).mean(), np.array(cer).mean(), np.array(wla).mean(), sla)) print(''.join(101 * '-')) writer.add_scalar('wer', np.array(wer).mean(), i_iter) writer.add_scalar('cer', np.array(cer).mean(), i_iter) writer.add_scalar('wla', np.array(wla).mean(), i_iter) writer.add_scalar('bla', sla, i_iter) return np.array(wer).mean(), np.array(cer).mean(), np.array( wla).mean(), sla