def test_PEM(data_loader, model, epoch, writer, opt): model.eval() epoch_iou_loss = 0 for n_iter, (input_data, label_iou, is_whole_lenght) in enumerate(data_loader): PEM_output = model(input_data) iou_loss = PEM_loss_function(PEM_output, [label_iou, is_whole_lenght], model, opt) epoch_iou_loss += iou_loss.cpu().detach().numpy() writer.add_scalars('data/iou_loss', {'validation': epoch_iou_loss / (n_iter + 1)}, epoch) print("PEM testing loss(epoch %d): iou - %.04f" % (epoch, epoch_iou_loss / (n_iter + 1))) state = {'epoch': epoch + 1, 'state_dict': model.state_dict()} torch.save( state, opt["checkpoint_path"] + "/" + opt["arch"] + "_pem_checkpoint.pth.tar") if epoch_iou_loss < model.module.pem_best_loss: model.module.pem_best_loss = np.mean(epoch_iou_loss) torch.save( state, opt["checkpoint_path"] + "/" + opt["arch"] + "_pem_best.pth.tar")
def train_BMN(data_loader, model, optimizer, epoch, writer, opt): model.train() epoch_pem_loss = 0 epoch_tem_loss = 0 epoch_loss = 0 for n_iter, (input_data, label_start, label_end, label_confidence) in enumerate(data_loader): input_data = input_data.cuda() label_start = label_start.cuda() label_end = label_end.cuda() label_confidence = label_confidence.cuda() start_end, confidence_map = model(input_data) tem_loss = TEM_loss_function(label_start, label_end, start_end, opt) pem_loss = PEM_loss_function(label_confidence, confidence_map, confidence_mask, opt) loss = tem_loss + pem_loss optimizer.zero_grad() loss.backward() optimizer.step() epoch_pem_loss += pem_loss.cpu().detach().numpy() epoch_tem_loss += tem_loss.cpu().detach().numpy() epoch_loss += loss.cpu().detach().numpy() writer.add_scalars('data/pem_loss', {'train': epoch_pem_loss / (n_iter + 1)}, epoch) writer.add_scalars('data/tem_loss', {'train': epoch_tem_loss / (n_iter + 1)}, epoch) writer.add_scalars('data/total_loss', {'train': epoch_loss / (n_iter + 1)}, epoch) print("BMN training loss(epoch %d): tem_loss: %.03f, pem_loss: %.03f, total_loss: %.03f" % ( epoch, epoch_tem_loss / (n_iter + 1), epoch_pem_loss / (n_iter + 1), epoch_loss / (n_iter + 1)))
def test_BMN(data_loader, model, epoch, writer, opt): model.eval() epoch_pem_loss = 0 epoch_tem_loss = 0 epoch_loss = 0 for n_iter, (input_data, label_start, label_end, label_confidence) in enumerate(data_loader): input_data = input_data.cuda() label_start = label_start.cuda() label_end = label_end.cuda() label_confidence = label_confidence.cuda() start_end, confidence_map = model(input_data) tem_loss = TEM_loss_function(label_start, label_end, start_end, opt) pem_loss = PEM_loss_function(label_confidence, confidence_map, confidence_mask, opt) loss = tem_loss + pem_loss epoch_pem_loss += pem_loss.cpu().detach().numpy() epoch_tem_loss += tem_loss.cpu().detach().numpy() epoch_loss += loss.cpu().detach().numpy() writer.add_scalars('data/pem_loss', {'train': epoch_pem_loss / (n_iter + 1)}, epoch) writer.add_scalars('data/tem_loss', {'train': epoch_tem_loss / (n_iter + 1)}, epoch) writer.add_scalars('data/total_loss', {'train': epoch_loss / (n_iter + 1)}, epoch) print("BMN testing loss(epoch %d): tem_loss: %.03f, pem_loss: %.03f, total_loss: %.03f" % ( epoch, epoch_tem_loss / (n_iter + 1), epoch_pem_loss / (n_iter + 1), epoch_loss / (n_iter + 1))) state = {'epoch': epoch + 1, 'state_dict': model.state_dict()} torch.save(state, opt["checkpoint_path"] + "/BMN_checkpoint.pth.tar") if epoch_loss < model.best_loss: model.best_loss = epoch_loss torch.save(state, opt["checkpoint_path"] + "/BMN_best.pth.tar")
def test_PEM(data_loader, model, epoch, writer, opt): model.eval() epoch_iou_loss = 0 losses = AverageMeter() for n_iter, (input_data, label_iou) in enumerate(data_loader): PEM_output = model(input_data) iou_loss = PEM_loss_function(PEM_output, label_iou, model, opt) epoch_iou_loss += iou_loss.cpu().detach().numpy() losses.update(iou_loss.item()) if (n_iter + 1) % opt['print_freq'] == 0: print('[TEST] Epoch {}, iter {} / {}, loss: {}'.format( epoch, n_iter + 1, len(data_loader), losses.avg)) writer.add_scalars('data/iou_loss', {'validation': epoch_iou_loss / (n_iter + 1)}, epoch) print("PEM testing loss(epoch %d): iou - %.04f" % (epoch, epoch_iou_loss / (n_iter + 1))) state = {'epoch': epoch + 1, 'state_dict': model.state_dict()} torch.save(state, opt["checkpoint_path"] + "/pem_checkpoint.pth.tar") if epoch_iou_loss < model.module.pem_best_loss: model.module.pem_best_loss = np.mean(epoch_iou_loss) torch.save(state, opt["checkpoint_path"] + "/pem_best.pth.tar")
def train_PEM(data_loader,model,optimizer,epoch,writer,opt): model.train() epoch_iou_loss = 0 for n_iter,(input_data,label_iou,is_whole_lenght) in enumerate(data_loader): PEM_output = model(input_data) iou_loss = PEM_loss_function(PEM_output,[label_iou, is_whole_lenght],model,opt) optimizer.zero_grad() iou_loss.backward() optimizer.step() epoch_iou_loss += iou_loss.cpu().detach().numpy() writer.add_scalars('data/iou_loss', {'train': epoch_iou_loss/(n_iter+1)}, epoch) print "PEM training loss(epoch %d): iou - %.04f" %(epoch,epoch_iou_loss/(n_iter+1))
def test_PEM(data_loader, model, epoch, global_step, comet_exp, opt): model.eval() keys = ['iou_loss', 'current_l2', 'total_loss'] epoch_sums = {k: 0 for k in keys} for n_iter, (input_data, label_iou) in enumerate(data_loader): if time.time() - opt['start_time'] > opt[ 'time'] * 3600 - 10 and comet_exp is not None: comet_exp.end() sys.exit(-1) PEM_output = model(input_data) loss = PEM_loss_function(PEM_output, label_iou, opt) l2 = sum([(W**2).sum() for W in model.module.parameters()]) l2 = l2.sum() / 2 l2 = opt['pem_l2_loss'] * l2 loss['current_l2'] = l2 loss['iou_loss'] *= 10 iou_loss = loss['iou_loss'] total_loss = iou_loss + l2 loss['total_loss'] = total_loss for k in keys: epoch_sums[k] += loss[k].cpu().detach().numpy() epoch_values = {k: v / (n_iter + 1) for k, v in epoch_sums.items()} if comet_exp: with comet_exp.test(): comet_exp.log_metrics(epoch_values, step=global_step, epoch=epoch) s = ", ".join([ '%s --> %.06f' % (k.replace('current_', '').replace( '_loss', '').capitalize(), epoch_values[k]) for k in sorted(keys) ]) print("Test: %s." % s) state = { 'epoch': epoch, 'global_step': global_step, 'state_dict': model.state_dict() } save_dir = os.path.join(opt["checkpoint_path"], opt['name']) if not os.path.exists(save_dir): os.makedirs(save_dir, exist_ok=True) save_path = os.path.join(save_dir, 'pem_checkpoint.%d.pth' % epoch) torch.save(state, save_path) iou_loss = epoch_values['iou_loss'] if iou_loss < model.module.pem_best_loss: model.module.pem_best_loss = iou_loss save_path = os.path.join(save_dir, 'pem_best.pth') torch.save(state, save_path)
def train_PEM(data_loader, model, optimizer, epoch, writer, opt): model.train() epoch_iou_loss = 0 losses = AverageMeter() for n_iter, (input_data, label_iou) in enumerate(data_loader): PEM_output = model(input_data) iou_loss = PEM_loss_function(PEM_output, label_iou, model, opt) optimizer.zero_grad() iou_loss.backward() optimizer.step() epoch_iou_loss += iou_loss.cpu().detach().numpy() losses.update(iou_loss.item()) if (n_iter + 1) % opt['print_freq'] == 0: print('[TRAIN] Epoch {}, iter {} / {}, loss: {}'.format( epoch, n_iter + 1, len(data_loader), losses.avg)) writer.add_scalars('data/iou_loss', {'train': epoch_iou_loss / (n_iter + 1)}, epoch) print("PEM training loss(epoch %d): iou - %.04f" % (epoch, epoch_iou_loss / (n_iter + 1)))
def train_PEM(data_loader, model, optimizer, epoch, global_step, comet_exp, opt): model.train() count = 1 keys = ['iou_loss', 'current_l2', 'total_loss'] epoch_sums = {k: 0 for k in keys} start = time.time() for n_iter, (input_data, label_iou) in enumerate(data_loader): if time.time() - opt['start_time'] > opt[ 'time'] * 3600 - 10 and comet_exp is not None: comet_exp.end() sys.exit(-1) PEM_output = model(input_data) loss = PEM_loss_function(PEM_output, label_iou, opt) l2 = sum([(W**2).sum() for W in model.module.parameters()]) l2 = l2.sum() / 2 l2 = opt['pem_l2_loss'] * l2 loss['current_l2'] = l2 loss['iou_loss'] *= 10 iou_loss = loss['iou_loss'] total_loss = iou_loss + l2 loss['total_loss'] = total_loss optimizer.zero_grad() total_loss.backward() optimizer.step() global_step += 1 if n_iter % opt['pem_compute_loss_interval'] == 0: epoch_sums, epoch_avg = compute_metrics(epoch_sums, loss, count) count += 1 steps_per_second = 0 if n_iter > 10: steps_per_second = (n_iter + 1) / (time.time() - start) epoch_avg['steps_per_second'] = steps_per_second epoch_avg['current_lr'] = get_lr(optimizer) # print('\nEpoch %d, S/S %.3f, Global Step %d, Local Step %d / %d.' % (epoch, steps_per_second, global_step, n_iter, len(data_loader))) # s = ", ".join(['%s --> %.6f' % (key, epoch_avg[key]) for key in epoch_avg]) # print("PEM avg so far this epoch: %s." % s) if comet_exp: with comet_exp.train(): comet_exp.log_metrics(epoch_avg, step=global_step, epoch=epoch) epoch_sums, epoch_avg = compute_metrics(epoch_sums, loss, count) steps_per_second = (n_iter + 1) / (time.time() - start) # epoch_avg['steps_per_second'] = steps_per_second print('\n***End of Epoch %d***\nLearningRate: %.4f' % (epoch, get_lr(optimizer))) s = ", ".join([ '%s --> %.6f' % (key.replace('current_', '').replace( '_loss', '').capitalize(), epoch_avg[key]) for key in sorted(epoch_avg.keys()) ]) print("Train: %s." % s) if comet_exp: with comet_exp.train(): comet_exp.log_metrics(epoch_avg, step=global_step, epoch=epoch) comet_exp.log_epoch_end(epoch) return global_step + 1