def validation(model, valid_loader, criterion): model.eval() losses = AverageMeter() for i, (img, train_mask, tr_mask, tcl_mask, radius_map, sin_map, cos_map, meta) in enumerate(valid_loader): print(meta['image_id']) img, train_mask, tr_mask, tcl_mask, radius_map, sin_map, cos_map = to_device( img, train_mask, tr_mask, tcl_mask, radius_map, sin_map, cos_map) output = model(img) tr_loss, tcl_loss, sin_loss, cos_loss, radii_loss = \ criterion(output, tr_mask, tcl_mask, sin_map, cos_map, radius_map, train_mask) loss = tr_loss + tcl_loss + sin_loss + cos_loss + radii_loss losses.update(loss.item()) if cfg.viz and i < cfg.vis_num: visualize_network_output(output, tr_mask, tcl_mask, prefix='val_{}'.format(i)) if i % cfg.display_freq == 0: print( 'Validation: - Loss: {:.4f} - tr_loss: {:.4f} - tcl_loss: {:.4f} - sin_loss: {:.4f} - cos_loss: {:.4f} - radii_loss: {:.4f}' .format(loss.item(), tr_loss.item(), tcl_loss.item(), sin_loss.item(), cos_loss.item(), radii_loss.item())) print('Validation Loss: {}'.format(losses.avg))
def validation(self, model, valid_loader, criterion, epoch, logger): with torch.no_grad(): model.eval() losses = AverageMeter() tr_losses = AverageMeter() tcl_losses = AverageMeter() sin_losses = AverageMeter() cos_losses = AverageMeter() radii_losses = AverageMeter() for i, (img, train_mask, tr_mask, tcl_mask, radius_map, sin_map, cos_map, meta) in enumerate(valid_loader): img, train_mask, tr_mask, tcl_mask, radius_map, sin_map, cos_map = to_device( img, train_mask, tr_mask, tcl_mask, radius_map, sin_map, cos_map) output = model(img) tr_loss, tcl_loss, sin_loss, cos_loss, radii_loss = \ criterion(output, tr_mask, tcl_mask, sin_map, cos_map, radius_map, train_mask) loss = tr_loss + tcl_loss + sin_loss + cos_loss + radii_loss # update losses losses.update(loss.item()) tr_losses.update(tr_loss.item()) tcl_losses.update(tcl_loss.item()) sin_losses.update(sin_loss.item()) cos_losses.update(cos_loss.item()) radii_losses.update(radii_loss.item()) if cfg.viz and i % cfg.viz_freq == 0: visualize_network_output(output, tr_mask, tcl_mask, mode='val') if i % cfg.display_freq == 0: print( 'Validation: - Loss: {:.4f} - tr_loss: {:.4f} - tcl_loss: {:.4f} - sin_loss: {:.4f} - cos_loss: {:.4f} - radii_loss: {:.4f}' .format(loss.item(), tr_loss.item(), tcl_loss.item(), sin_loss.item(), cos_loss.item(), radii_loss.item())) logger.write_scalars( { 'loss': losses.avg, 'tr_loss': tr_losses.avg, 'tcl_loss': tcl_losses.avg, 'sin_loss': sin_losses.avg, 'cos_loss': cos_losses.avg, 'radii_loss': radii_losses.avg }, tag='val', n_iter=epoch) print('Validation Loss: {}'.format(losses.avg))
def train(model, train_loader, criterion, scheduler, optimizer, epoch): start = time.time() losses = AverageMeter() batch_time = AverageMeter() data_time = AverageMeter() end = time.time() model.train() for i, (img, train_mask, tr_mask, tcl_mask, radius_map, sin_map, cos_map, meta) in enumerate(train_loader): data_time.update(time.time() - end) img, train_mask, tr_mask, tcl_mask, radius_map, sin_map, cos_map = to_device( img, train_mask, tr_mask, tcl_mask, radius_map, sin_map, cos_map) output = model(img) tr_loss, tcl_loss, sin_loss, cos_loss, radii_loss = \ criterion(output, tr_mask, tcl_mask, sin_map, cos_map, radius_map, train_mask) loss = tr_loss + tcl_loss + sin_loss + cos_loss + radii_loss # backward scheduler.step() optimizer.zero_grad() loss.backward() optimizer.step() losses.update(loss.item()) # measure elapsed time batch_time.update(time.time() - end) end = time.time() if cfg.viz and i < cfg.vis_num: visualize_network_output(output, tr_mask, tcl_mask, prefix='train_{}'.format(i)) if i % cfg.display_freq == 0: print( 'Epoch: [ {} ][ {:03d} / {:03d} ] - Loss: {:.4f} - tr_loss: {:.4f} - tcl_loss: {:.4f} - sin_loss: {:.4f} - cos_loss: {:.4f} - radii_loss: {:.4f}' .format(epoch, i, len(train_loader), loss.item(), tr_loss.item(), tcl_loss.item(), sin_loss.item(), cos_loss.item(), radii_loss.item())) if epoch % cfg.save_freq == 0 and epoch > 0: save_model(model, epoch, scheduler.get_lr()) print('Training Loss: {}'.format(losses.avg))
def train(model, train_loader, criterion, scheduler, optimizer, epoch, logger): global train_step losses = AverageMeter() batch_time = AverageMeter() data_time = AverageMeter() end = time.time() model.train() scheduler.step() print('Epoch: {} : LR = {}'.format(epoch, lr)) for i, (img, train_mask, tr_mask, tcl_mask, radius_map, sin_map, cos_map, meta) in enumerate(train_loader): data_time.update(time.time() - end) train_step += 1 img, train_mask, tr_mask, tcl_mask, radius_map, sin_map, cos_map = to_device( img, train_mask, tr_mask, tcl_mask, radius_map, sin_map, cos_map) # 模型输出 output = model(img) # loss 计算 tr_loss, tcl_loss, sin_loss, cos_loss, radii_loss = \ criterion(output, tr_mask, tcl_mask, sin_map, cos_map, radius_map, train_mask) loss = tr_loss + tcl_loss + sin_loss + cos_loss + radii_loss # backward # 每次迭代清空上一次的梯度 optimizer.zero_grad() # 反向传播 loss.backward() # 更新梯度 optimizer.step() # 更新loss losses.update(loss.item()) # 计算耗时 batch_time.update(time.time() - end) end = time.time() if cfg.viz and i % cfg.viz_freq == 0: visualize_network_output(output, tr_mask, tcl_mask, mode='train') if i % cfg.display_freq == 0: print( '({:d} / {:d}) - Loss: {:.4f} - tr_loss: {:.4f} - tcl_loss: {:.4f} - sin_loss: {:.4f} - cos_loss: {:.4f} - radii_loss: {:.4f}' .format(i, len(train_loader), loss.item(), tr_loss.item(), tcl_loss.item(), sin_loss.item(), cos_loss.item(), radii_loss.item())) if i % cfg.log_freq == 0: logger.write_scalars( { 'loss': loss.item(), 'tr_loss': tr_loss.item(), 'tcl_loss': tcl_loss.item(), 'sin_loss': sin_loss.item(), 'cos_loss': cos_loss.item(), 'radii_loss': radii_loss.item() }, tag='train', n_iter=train_step) if epoch % cfg.save_freq == 0: save_model(model, epoch, scheduler.get_lr(), optimizer) print('Training Loss: {}'.format(losses.avg))
def train(model, train_loader, criterion, scheduler, optimizer, epoch, summary_writer): start = time.time() losses = AverageMeter() batch_time = AverageMeter() data_time = AverageMeter() end = time.time() model.train() global total_iter for i, (img, train_mask, tr_mask, tcl_mask, radius_map, sin_map, cos_map, meta) in enumerate(train_loader): data_time.update(time.time() - end) img, train_mask, tr_mask, tcl_mask, radius_map, sin_map, cos_map = to_device( img, train_mask, tr_mask, tcl_mask, radius_map, sin_map, cos_map) output = model(img) tr_loss, tcl_loss, sin_loss, cos_loss, radii_loss = \ criterion(output, tr_mask, tcl_mask, sin_map, cos_map, radius_map, train_mask, total_iter) loss = tr_loss + tcl_loss + sin_loss + cos_loss + radii_loss # backward # scheduler.step() optimizer.zero_grad() loss.backward() optimizer.step() losses.update(loss.item()) # measure elapsed time batch_time.update(time.time() - end) end = time.time() if cfg.viz and i < cfg.vis_num: visualize_network_output(output, tr_mask, tcl_mask, prefix='train_{}'.format(i)) if i % cfg.display_freq == 0: print( 'Epoch: [ {} ][ {:03d} / {:03d} ] - Loss: {:.4f} - tr_loss: {:.4f} - tcl_loss: {:.4f} - sin_loss: {:.4f} - cos_loss: {:.4f} - radii_loss: {:.4f} - {:.2f}s/step' .format(epoch, i, len(train_loader), loss.item(), tr_loss.item(), tcl_loss.item(), sin_loss.item(), cos_loss.item(), radii_loss.item(), batch_time.avg)) # write summary if total_iter % cfg.summary_freq == 0: print('Summary in {}'.format( os.path.join(cfg.summary_dir, cfg.exp_name))) tr_pred = output[:, 0:2].softmax(dim=1)[:, 1:2] tcl_pred = output[:, 2:4].softmax(dim=1)[:, 1:2] summary_writer.add_image('input_image', vutils.make_grid(img, normalize=True), total_iter) summary_writer.add_image( 'tr/tr_pred', vutils.make_grid(tr_pred * 255, normalize=True), total_iter) summary_writer.add_image( 'tr/tr_mask', vutils.make_grid( torch.unsqueeze(tr_mask * train_mask, 1) * 255), total_iter) summary_writer.add_image( 'tcl/tcl_pred', vutils.make_grid(tcl_pred * 255, normalize=True), total_iter) summary_writer.add_image( 'tcl/tcl_mask', vutils.make_grid( torch.unsqueeze(tcl_mask * train_mask, 1) * 255), total_iter) summary_writer.add_scalar('learning_rate', optimizer.param_groups[0]['lr'], total_iter) summary_writer.add_scalar('model/tr_loss', tr_loss.item(), total_iter) summary_writer.add_scalar('model/tcl_loss', tcl_loss.item(), total_iter) summary_writer.add_scalar('model/sin_loss', sin_loss.item(), total_iter) summary_writer.add_scalar('model/cos_loss', cos_loss.item(), total_iter) summary_writer.add_scalar('model/radii_loss', radii_loss.item(), total_iter) summary_writer.add_scalar('model/loss', loss.item(), total_iter) total_iter += 1 print('Speed: {}s /step, {}s /epoch'.format(batch_time.avg, time.time() - start)) if epoch % cfg.save_freq == 0: save_model(model, optimizer, scheduler, epoch) print('Training Loss: {}'.format(losses.avg))
def train(self, model, train_loader, criterion, scheduler, optimizer, epoch, logger, train_step): losses = AverageMeter() batch_time = AverageMeter() data_time = AverageMeter() end = time.time() model.train() scheduler.step() lr = scheduler.get_lr()[0] print('Epoch: {} : LR = {}'.format(epoch, lr)) for i, (img, train_mask, tr_mask, tcl_mask, radius_map, sin_map, cos_map, meta) in enumerate(train_loader): data_time.update(time.time() - end) train_step += 1 img, train_mask, tr_mask, tcl_mask, radius_map, sin_map, cos_map = to_device( img, train_mask, tr_mask, tcl_mask, radius_map, sin_map, cos_map) output = model(img) tr_loss, tcl_loss, sin_loss, cos_loss, radii_loss = \ criterion(output, tr_mask, tcl_mask, sin_map, cos_map, radius_map, train_mask) loss = tr_loss + tcl_loss + sin_loss + cos_loss + radii_loss # backward optimizer.zero_grad() loss.backward() optimizer.step() losses.update(loss.item()) # measure elapsed time batch_time.update(time.time() - end) end = time.time() if cfg.viz and i % cfg.viz_freq == 0: visualize_network_output(output, tr_mask, tcl_mask, mode='train') if i % cfg.display_freq == 0: #print(loss.item()) #print(tr_loss.item()) #print(tcl_loss.item()) #print(sin_loss.item()) #print(cos_loss.item()) #print(radii_loss.item()) try: print( '({:d} / {:d}) - Loss: {:.4f} - tr_loss: {:.4f} - tcl_loss: {:.4f} - sin_loss: {:.4f} - cos_loss: {:.4f} - radii_loss: {:.4f}' .format(i, len(train_loader), loss.item(), tr_loss.item(), tcl_loss.item(), sin_loss.item(), cos_loss.item(), radii_loss.item())) except: print('({:d} / {:d}) - Loss: {:.4f} - tr_loss: {:.4f}'. format(i, len(train_loader), loss.item(), tr_loss.item())) if i % cfg.log_freq == 0: try: logger.write_scalars( { 'loss': loss.item(), 'tr_loss': tr_loss.item(), 'tcl_loss': tcl_loss.item(), 'sin_loss': sin_loss.item(), 'cos_loss': cos_loss.item(), 'radii_loss': radii_loss.item() }, tag='train', n_iter=train_step) except: logger.write_scalars( { 'loss': loss.item(), 'tr_loss': tr_loss.item() }, tag='train', n_iter=train_step) if epoch % cfg.save_freq == 0: self.save_model(model, epoch, scheduler.get_lr(), optimizer) print('Training Loss: {}'.format(losses.avg)) return train_step
def train(model, train_loader, criterion, scheduler, optimizer, epoch, logger): global train_step losses = AverageMeter() batch_time = AverageMeter() data_time = AverageMeter() end = time.time() model.train() # scheduler.step() print('Epoch: {} : LR = {}'.format(epoch, scheduler.get_lr())) for i, (img, train_mask, tr_mask, tcl_mask, radius_map, sin_map, cos_map, gt_roi) in enumerate(train_loader): data_time.update(time.time() - end) train_step += 1 img, train_mask, tr_mask, tcl_mask, radius_map, sin_map, cos_map \ = to_device(img, train_mask, tr_mask, tcl_mask, radius_map, sin_map, cos_map) output, gcn_data = model(img, gt_roi, to_device) tr_loss, tcl_loss, sin_loss, cos_loss, radii_loss, gcn_loss \ = criterion(output, gcn_data, train_mask, tr_mask, tcl_mask, radius_map, sin_map, cos_map) loss = tr_loss + tcl_loss + sin_loss + cos_loss + radii_loss + gcn_loss # backward try: optimizer.zero_grad() loss.backward() except: print("loss gg") continue optimizer.step() losses.update(loss.item()) # measure elapsed time batch_time.update(time.time() - end) end = time.time() gc.collect() if cfg.viz and i % cfg.viz_freq == 0: visualize_network_output(output, tr_mask, tcl_mask[:, :, :, 0], mode='train') if i % cfg.display_freq == 0: print( '({:d} / {:d}) Loss: {:.4f} tr_loss: {:.4f} tcl_loss: {:.4f} ' 'sin_loss: {:.4f} cos_loss: {:.4f} radii_loss: {:.4f} gcn_loss: {:.4f}' .format(i, len(train_loader), loss.item(), tr_loss.item(), tcl_loss.item(), sin_loss.item(), cos_loss.item(), radii_loss.item(), gcn_loss.item())) if i % cfg.log_freq == 0: logger.write_scalars( { 'loss': loss.item(), 'tr_loss': tr_loss.item(), 'tcl_loss': tcl_loss.item(), 'sin_loss': sin_loss.item(), 'cos_loss': cos_loss.item(), 'radii_loss': radii_loss.item(), 'gcn_loss:': gcn_loss.item() }, tag='train', n_iter=train_step) if epoch % cfg.save_freq == 0: save_model(model, epoch, scheduler.get_lr(), optimizer) print('Training Loss: {}'.format(losses.avg))