def test_BMN(data_loader, model, epoch, writer, opt): model.eval() epoch_pem_loss = 0 epoch_tem_loss = 0 epoch_loss = 0 for n_iter, (input_data, label_start, label_end, label_confidence) in enumerate(data_loader): input_data = input_data.cuda() label_start = label_start.cuda() label_end = label_end.cuda() label_confidence = label_confidence.cuda() start_end, confidence_map = model(input_data) tem_loss = TEM_loss_function(label_start, label_end, start_end, opt) pem_loss = PEM_loss_function(label_confidence, confidence_map, confidence_mask, opt) loss = tem_loss + pem_loss epoch_pem_loss += pem_loss.cpu().detach().numpy() epoch_tem_loss += tem_loss.cpu().detach().numpy() epoch_loss += loss.cpu().detach().numpy() writer.add_scalars('data/pem_loss', {'train': epoch_pem_loss / (n_iter + 1)}, epoch) writer.add_scalars('data/tem_loss', {'train': epoch_tem_loss / (n_iter + 1)}, epoch) writer.add_scalars('data/total_loss', {'train': epoch_loss / (n_iter + 1)}, epoch) print("BMN testing loss(epoch %d): tem_loss: %.03f, pem_loss: %.03f, total_loss: %.03f" % ( epoch, epoch_tem_loss / (n_iter + 1), epoch_pem_loss / (n_iter + 1), epoch_loss / (n_iter + 1))) state = {'epoch': epoch + 1, 'state_dict': model.state_dict()} torch.save(state, opt["checkpoint_path"] + "/BMN_checkpoint.pth.tar") if epoch_loss < model.best_loss: model.best_loss = epoch_loss torch.save(state, opt["checkpoint_path"] + "/BMN_best.pth.tar")
def train_BMN(data_loader, model, optimizer, epoch, writer, opt): model.train() epoch_pem_loss = 0 epoch_tem_loss = 0 epoch_loss = 0 for n_iter, (input_data, label_start, label_end, label_confidence) in enumerate(data_loader): input_data = input_data.cuda() label_start = label_start.cuda() label_end = label_end.cuda() label_confidence = label_confidence.cuda() start_end, confidence_map = model(input_data) tem_loss = TEM_loss_function(label_start, label_end, start_end, opt) pem_loss = PEM_loss_function(label_confidence, confidence_map, confidence_mask, opt) loss = tem_loss + pem_loss optimizer.zero_grad() loss.backward() optimizer.step() epoch_pem_loss += pem_loss.cpu().detach().numpy() epoch_tem_loss += tem_loss.cpu().detach().numpy() epoch_loss += loss.cpu().detach().numpy() writer.add_scalars('data/pem_loss', {'train': epoch_pem_loss / (n_iter + 1)}, epoch) writer.add_scalars('data/tem_loss', {'train': epoch_tem_loss / (n_iter + 1)}, epoch) writer.add_scalars('data/total_loss', {'train': epoch_loss / (n_iter + 1)}, epoch) print("BMN training loss(epoch %d): tem_loss: %.03f, pem_loss: %.03f, total_loss: %.03f" % ( epoch, epoch_tem_loss / (n_iter + 1), epoch_pem_loss / (n_iter + 1), epoch_loss / (n_iter + 1)))
def train_TEM(data_loader, model, optimizer, epoch, writer, opt): model.train() epoch_action_loss = 0 epoch_start_loss = 0 epoch_end_loss = 0 epoch_cost = 0 for n_iter, (input_data, label_action, label_start, label_end) in enumerate(data_loader): TEM_output = model(input_data) loss = TEM_loss_function(label_action, label_start, label_end, TEM_output, opt) cost = loss["cost"] optimizer.zero_grad() cost.backward() optimizer.step() epoch_action_loss += loss["loss_action"].cpu().detach().numpy() epoch_start_loss += loss["loss_start"].cpu().detach().numpy() epoch_end_loss += loss["loss_end"].cpu().detach().numpy() epoch_cost += loss["cost"].cpu().detach().numpy() writer.add_scalars('data/action', {'train': epoch_action_loss / (n_iter + 1)}, epoch) writer.add_scalars('data/start', {'train': epoch_start_loss / (n_iter + 1)}, epoch) writer.add_scalars('data/end', {'train': epoch_end_loss / (n_iter + 1)}, epoch) writer.add_scalars('data/cost', {'train': epoch_cost / (n_iter + 1)}, epoch) print "TEM training loss(epoch %d): action - %.03f, start - %.03f, end - %.03f" % ( epoch, epoch_action_loss / (n_iter + 1), epoch_start_loss / (n_iter + 1), epoch_end_loss / (n_iter + 1))
def test_TEM(data_loader,model,epoch,writer,opt): model.eval() epoch_action_loss = 0 epoch_start_loss = 0 epoch_end_loss = 0 epoch_cost = 0 for n_iter,(input_data,label_action,label_start,label_end) in enumerate(data_loader): TEM_output = model(input_data) loss = TEM_loss_function(label_action,label_start,label_end,TEM_output,opt) epoch_action_loss += loss["loss_action"].cpu().detach().numpy() epoch_start_loss += loss["loss_start"].cpu().detach().numpy() epoch_end_loss += loss["loss_end"].cpu().detach().numpy() epoch_cost += loss["cost"].cpu().detach().numpy() writer.add_scalars('data/action', {'test': epoch_action_loss/(n_iter+1)}, epoch) writer.add_scalars('data/start', {'test': epoch_start_loss/(n_iter+1)}, epoch) writer.add_scalars('data/end', {'test': epoch_end_loss/(n_iter+1)}, epoch) writer.add_scalars('data/cost', {'test': epoch_cost/(n_iter+1)}, epoch) print "TEM testing loss(epoch %d): action - %.03f, start - %.03f, end - %.03f" %(epoch,epoch_action_loss/(n_iter+1), epoch_start_loss/(n_iter+1), epoch_end_loss/(n_iter+1)) state = {'epoch': epoch + 1, 'state_dict': model.state_dict()} torch.save(state, opt["checkpoint_path"]+"/"+opt["arch"]+"_tem_checkpoint.pth.tar" ) if epoch_cost< model.module.tem_best_loss: model.module.tem_best_loss = np.mean(epoch_cost) torch.save(state, opt["checkpoint_path"]+"/"+opt["arch"]+"_tem_best.pth.tar" )
def train_TEM(data_loader, model, optimizer, epoch, writer, opt): ''' Training of TEM with look-once dataset ''' model.train() # declare model status epoch_action_loss, epoch_cost = 0, 0 epoch_start_loss, epoch_end_loss = 0, 0 for n_iter, (input_data,label_action,label_start,label_end) in enumerate(data_loader): TEM_output = model(input_data) # batch_size x 3 x num_snappets # TODO loss = TEM_loss_function(label_action, label_start, label_end, TEM_output, opt) cost = loss["cost"] optimizer.zero_grad() cost.backward() optimizer.step() epoch_action_loss += loss["loss_action"].cpu().detach().numpy() epoch_start_loss += loss["loss_start"].cpu().detach().numpy() epoch_end_loss += loss["loss_end"].cpu().detach().numpy() epoch_cost += loss["cost"].cpu().detach().numpy() writer.add_scalars('data/action', {'train': epoch_action_loss/(n_iter+1)}, epoch) writer.add_scalars('data/start', {'train': epoch_start_loss/(n_iter+1)}, epoch) writer.add_scalars('data/end', {'train': epoch_end_loss/(n_iter+1)}, epoch) writer.add_scalars('data/cost', {'train': epoch_cost/(n_iter+1)}, epoch) print ("TEM training loss(epoch %d): action - %.03f, start - %.03f, end - %.03f" %(epoch,epoch_action_loss/(n_iter+1), epoch_start_loss/(n_iter+1), epoch_end_loss/(n_iter+1)))
def train_TEM(data_loader, model, optimizer, epoch, writer, opt): model.train() epoch_action_loss = 0 epoch_start_loss = 0 epoch_end_loss = 0 epoch_cost = 0 losses = AverageMeter() gradient_clip = AverageMeter() batch_time = AverageMeter() for n_iter, (input_data, label_action, label_start, label_end) in enumerate(data_loader): start_time = time.time() TEM_output = model(input_data) loss = TEM_loss_function(label_action, label_start, label_end, TEM_output, opt) cost = loss["cost"] optimizer.zero_grad() cost.backward() gradient_clip.update( torch.nn.utils.clip_grad_norm_(model.parameters(), 20)) optimizer.step() losses.update(cost.item()) epoch_action_loss += loss["loss_action"].cpu().detach().numpy() epoch_start_loss += loss["loss_start"].cpu().detach().numpy() epoch_end_loss += loss["loss_end"].cpu().detach().numpy() epoch_cost += loss["cost"].cpu().detach().numpy() end_time = time.time() batch_time.update(end_time - start_time) if (n_iter + 1) % opt['print_freq'] == 0: print( '[TRAIN] Epoch {}, iter {} / {}, loss: {}, gradient clip: {}, time: {:.5f}s' .format(epoch, n_iter + 1, len(data_loader), losses.avg, gradient_clip.avg, batch_time.avg)) writer.add_scalars('data/action', {'train': epoch_action_loss / (n_iter + 1)}, epoch) writer.add_scalars('data/start', {'train': epoch_start_loss / (n_iter + 1)}, epoch) writer.add_scalars('data/end', {'train': epoch_end_loss / (n_iter + 1)}, epoch) writer.add_scalars('data/cost', {'train': epoch_cost / (n_iter + 1)}, epoch) print( "TEM training loss(epoch %d): action - %.03f, start - %.03f, end - %.03f" % (epoch, epoch_action_loss / (n_iter + 1), epoch_start_loss / (n_iter + 1), epoch_end_loss / (n_iter + 1)))
def test_TEM(data_loader, model, epoch, writer, opt): model.eval() epoch_action_loss = 0 epoch_start_loss = 0 epoch_end_loss = 0 epoch_cost = 0 losses = AverageMeter() with torch.no_grad(): for n_iter, (input_data, label_action, label_start, label_end) in enumerate(data_loader): TEM_output = model(input_data) loss = TEM_loss_function(label_action, label_start, label_end, TEM_output, opt) epoch_action_loss += loss["loss_action"].cpu().detach().numpy() epoch_start_loss += loss["loss_start"].cpu().detach().numpy() epoch_end_loss += loss["loss_end"].cpu().detach().numpy() epoch_cost += loss["cost"].cpu().detach().numpy() losses.update(loss["cost"].item()) if (n_iter + 1) % opt['print_freq'] == 0: print('[TEST] Epoch {}, iter {} / {}, loss: {}'.format( epoch, n_iter + 1, len(data_loader), losses.avg)) writer.add_scalars('data/action', {'test': epoch_action_loss / (n_iter + 1)}, epoch) writer.add_scalars('data/start', {'test': epoch_start_loss / (n_iter + 1)}, epoch) writer.add_scalars('data/end', {'test': epoch_end_loss / (n_iter + 1)}, epoch) writer.add_scalars('data/cost', {'test': epoch_cost / (n_iter + 1)}, epoch) print( "TEM testing loss(epoch %d): action - %.03f, start - %.03f, end - %.03f" % (epoch, epoch_action_loss / (n_iter + 1), epoch_start_loss / (n_iter + 1), epoch_end_loss / (n_iter + 1))) state = {'epoch': epoch + 1, 'state_dict': model.state_dict()} torch.save(state, opt["checkpoint_path"] + "/tem_checkpoint.pth.tar") if epoch_cost < model.module.tem_best_loss: model.module.tem_best_loss = np.mean(epoch_cost) torch.save(state, opt["checkpoint_path"] + "/tem_best.pth.tar")
def train_TEM(data_loader,model,optimizer,epoch,writer,opt): model.train() epoch_action_loss = 0 epoch_start_loss = 0 epoch_end_loss = 0 epoch_cost = 0 # 迭代数据训练 (N,C,T) # input_data : (16,400,100) # 其他 :(16,100) for n_iter,(input_data,label_action,label_start,label_end) in enumerate(data_loader): TEM_output = model(input_data) # [16,3,100],其中3就是module的输出维度(action,start,end) ''' loss_dict={"loss_action":loss_action,"num_sample_action":num_sample_action, "loss_start":loss_start_small,"num_sample_start":num_sample_start_small, "loss_end":loss_end_small,"num_sample_end":num_sample_end_small} ''' loss = TEM_loss_function(label_action,label_start,label_end,TEM_output,opt) # 返回的是loss字典 cost = loss["cost"] # cost是action_loss+start_loss+end_loss optimizer.zero_grad() cost.backward() optimizer.step() # 将每个batch的loss添加到总的epoch的loss中 epoch_action_loss += loss["loss_action"].cpu().detach().numpy() epoch_start_loss += loss["loss_start"].cpu().detach().numpy() epoch_end_loss += loss["loss_end"].cpu().detach().numpy() epoch_cost += loss["cost"].cpu().detach().numpy() # add_scalars是将不同得变量添加到同一个图下,图的名称是add_scalars得第一个变量 writer.add_scalars('data/action', {'train': epoch_action_loss/(n_iter+1)}, epoch) writer.add_scalars('data/start', {'train': epoch_start_loss/(n_iter+1)}, epoch) writer.add_scalars('data/end', {'train': epoch_end_loss/(n_iter+1)}, epoch) writer.add_scalars('data/cost', {'train': epoch_cost/(n_iter+1)}, epoch) print "TEM training loss(epoch %d): action - %.03f, start - %.03f, end - %.03f" %(epoch,epoch_action_loss/(n_iter+1), epoch_start_loss/(n_iter+1), epoch_end_loss/(n_iter+1))
def train_TEM(data_loader, model, optimizer, epoch, global_step, comet_exp, opt): model.train() if opt['do_representation'] and not opt['no_freeze']: model.module.set_eval_representation() count = 1 keys = [ 'action_loss', 'start_loss', 'end_loss', 'total_loss', 'cost', 'current_l2', 'action_positive', 'start_positive', 'end_positive', 'entries' ] epoch_sums = {k: 0 for k in keys} if comet_exp: with comet_exp.train(): comet_exp.log_current_epoch(epoch) start = time.time() for n_iter, (input_data, label_action, label_start, label_end) in enumerate(data_loader): if time.time() - opt['start_time'] > opt[ 'time'] * 3600 - 10 and comet_exp is not None: comet_exp.end() sys.exit(-1) # for thumosimages, input_data shape: [bs, 100, 3, 176, 320] # print('Just before tem input: ', type(input_data)) # print(input_data[0]) TEM_output = model(input_data) loss = TEM_loss_function(label_action, label_start, label_end, TEM_output, opt) l2 = sum([(W**2).sum() for W in model.module.parameters()]) l2 = l2.sum() / 2 l2 = opt['tem_l2_loss'] * l2 loss['current_l2'] = l2 total_loss = loss['cost'] + l2 loss["total_loss"] = total_loss optimizer.zero_grad() if opt['do_gradient_checkpointing']: model.zero_grad() total_loss.backward() # print(model.module.representation_model.backbone.inception_5b_3x3.weight[0][0]) optimizer.step() global_step += 1 if n_iter % opt['tem_compute_loss_interval'] == 0: epoch_sums, epoch_avg = compute_metrics(epoch_sums, loss, count) count += 1 steps_per_second = 0 if n_iter > 10: steps_per_second = (n_iter + 1) / (time.time() - start) epoch_avg['steps_per_second'] = steps_per_second epoch_avg['current_lr'] = get_lr(optimizer) # print({k: type(v) for k, v in epoch_avg.items()}) print('\nEpoch %d, S/S %.3f, Global Step %d, Local Step %d / %d.' % (epoch, steps_per_second, global_step, n_iter, len(data_loader)), flush=True) s = ", ".join( ['%s --> %.6f' % (key, epoch_avg[key]) for key in epoch_avg]) print("TEM avg: %s." % s, flush=True) if comet_exp: with comet_exp.train(): comet_exp.log_metrics(epoch_avg, step=global_step, epoch=epoch) epoch_sums, epoch_avg = compute_metrics(epoch_sums, loss, count) # steps_per_second = (n_iter+1) / (time.time() - start) # epoch_avg['steps_per_second'] = steps_per_second print('\n***End of Epoch %d***\nLearningRate: %.4f' % (epoch, get_lr(optimizer)), flush=True) s = ", ".join([ '%s --> %.6f' % (key.replace('_loss', '').replace( 'current_', '').capitalize(), epoch_avg[key]) for key in sorted(epoch_avg.keys()) ]) print("Train: %s." % s, flush=True) if comet_exp: with comet_exp.train(): comet_exp.log_metrics(epoch_avg, step=global_step, epoch=epoch) comet_exp.log_epoch_end(epoch) return global_step + 1
def test_TEM(data_loader, model, optimizer, epoch, global_step, comet_exp, opt): model.eval() keys = [ 'action_loss', 'start_loss', 'end_loss', 'total_loss', 'cost', 'action_positive', 'start_positive', 'end_positive', 'entries', 'current_l2' ] epoch_sums = {k: 0 for k in keys} for n_iter, (input_data, label_action, label_start, label_end) in enumerate(data_loader): if time.time() - opt['start_time'] > opt[ 'time'] * 3600 - 10 and comet_exp is not None: comet_exp.end() sys.exit(-1) TEM_output = model(input_data) loss = TEM_loss_function(label_action, label_start, label_end, TEM_output, opt) l2 = sum([(W**2).sum() for W in model.module.parameters()]) l2 = l2.sum() / 2 l2 = opt['tem_l2_loss'] * l2 loss['current_l2'] = l2 total_loss = loss['cost'] + l2 loss["total_loss"] = total_loss for k in keys: if k == 'entries': epoch_sums[k] += loss[k] else: epoch_sums[k] += loss[k].cpu().detach().numpy() # if n_iter % opt['tem_compute_loss_interval'] == 0: # print('\nTest - Local Step %d / %d.' % (n_iter, len(data_loader))) epoch_values = {k: v / (n_iter + 1) for k, v in epoch_sums.items()} if comet_exp: with comet_exp.test(): comet_exp.log_metrics(epoch_values, step=global_step, epoch=epoch) s = ", ".join([ '%s --> %.6f' % (key.replace('_loss', '').replace( 'current_', '').capitalize(), epoch_values[key]) for key in sorted(epoch_values.keys()) ]) print("Test %s." % s, flush=True) state = { 'epoch': epoch, 'global_step': global_step, 'state_dict': model.state_dict(), 'optimizer_dict': optimizer.state_dict() } save_dir = os.path.join(opt["checkpoint_path"], opt['name']) if not os.path.exists(save_dir): os.makedirs(save_dir, exist_ok=True) total_loss = epoch_values['total_loss'] if total_loss < model.module.tem_best_loss: model.module.tem_best_loss = total_loss save_path = os.path.join(save_dir, 'tem_checkpoint.%d.pth' % epoch) torch.save(state, save_path)