def validate(val_loader, model, criterion): logger.info('>>>>>>>>>>>>>>>> Start Evaluation >>>>>>>>>>>>>>>>') batch_time = AverageMeter() data_time = AverageMeter() loss_meter = AverageMeter() intersection_meter = AverageMeter() union_meter = AverageMeter() target_meter = AverageMeter() top1_meter = AverageMeter() top5_meter = AverageMeter() model.eval() end = time.time() for i, (input, target) in enumerate(val_loader): data_time.update(time.time() - end) input = input.cuda(non_blocking=True) target = target.cuda(non_blocking=True) with torch.no_grad(): output = model(input) loss = criterion(output, target) top1, top5 = cal_accuracy(output, target, topk=(1, 5)) n = input.size(0) loss_meter.update(loss.item(), n), top1_meter.update(top1.item(), n), top5_meter.update(top5.item(), n) output = output.max(1)[1] intersection, union, target = intersectionAndUnionGPU(output, target, args.classes, args.ignore_label) intersection, union, target = intersection.cpu().numpy(), union.cpu().numpy(), target.cpu().numpy() intersection_meter.update(intersection), union_meter.update(union), target_meter.update(target) accuracy = sum(intersection_meter.val) / (sum(target_meter.val) + 1e-10) batch_time.update(time.time() - end) end = time.time() if (i + 1) % args.print_freq == 0: logger.info('Test: [{}/{}] ' 'Data {data_time.val:.3f} ({data_time.avg:.3f}) ' 'Batch {batch_time.val:.3f} ({batch_time.avg:.3f}) ' 'Loss {loss_meter.val:.4f} ({loss_meter.avg:.4f}) ' 'Accuracy {accuracy:.4f} ' 'Acc@1 {top1.val:.3f} ({top1.avg:.3f}) ' 'Acc@5 {top5.val:.3f} ({top5.avg:.3f}).'.format(i + 1, len(val_loader), data_time=data_time, batch_time=batch_time, loss_meter=loss_meter, accuracy=accuracy, top1=top1_meter, top5=top5_meter)) iou_class = intersection_meter.sum / (union_meter.sum + 1e-10) accuracy_class = intersection_meter.sum / (target_meter.sum + 1e-10) mIoU = np.mean(iou_class) mAcc = np.mean(accuracy_class) allAcc = sum(intersection_meter.sum) / (sum(target_meter.sum) + 1e-10) logger.info('Val result: mIoU/mAcc/allAcc/top1/top5 {:.4f}/{:.4f}/{:.4f}/{:.4f}/{:.4f}.'.format(mIoU, mAcc, allAcc, top1_meter.avg, top5_meter.avg)) for i in range(args.classes): logger.info('Class_{} Result: iou/accuracy {:.4f}/{:.4f}.'.format(i, iou_class[i], accuracy_class[i])) logger.info('<<<<<<<<<<<<<<<<< End Evaluation <<<<<<<<<<<<<<<<<') return loss_meter.avg, mIoU, mAcc, allAcc, top1_meter.avg, top5_meter.avg
def test_epoch(test_loader, model, epoch, criterion): test_loss = 0.0 count = 0.0 model.eval() intersection_meter = AverageMeter() union_meter = AverageMeter() target_meter = AverageMeter() for data, label in test_loader: data, label = data.cuda(non_blocking=True), label.cuda( non_blocking=True).squeeze(1) data = data.permute(0, 2, 1) batch_size = data.size(0) logits = model(data) # Loss loss = criterion(logits, label) # here use model's output directly if args.multiprocessing_distributed: loss = loss * batch_size _count = label.new_tensor([batch_size], dtype=torch.long).cuda(non_blocking=True) dist.all_reduce(loss), dist.all_reduce(_count) n = _count.item() loss = loss / n else: loss = torch.mean(loss) preds = logits.max(dim=1)[1] count += batch_size test_loss += loss.item() * batch_size intersection, union, target = intersectionAndUnionGPU( preds, label, args.classes) if args.multiprocessing_distributed: dist.all_reduce(intersection), dist.all_reduce( union), dist.all_reduce(target) intersection, union, target = intersection.cpu().numpy(), union.cpu( ).numpy(), target.cpu().numpy() intersection_meter.update(intersection), union_meter.update( union), target_meter.update(target) accuracy_class = intersection_meter.sum / (target_meter.sum + 1e-10) mAcc = np.mean(accuracy_class) allAcc = sum(intersection_meter.sum) / (sum(target_meter.sum) + 1e-10) outstr = 'Test %d, loss: %.6f, test acc: %.6f, ' \ 'test avg acc: %.6f' % (epoch + 1, test_loss * 1.0 / count, allAcc, mAcc) if main_process(): logger.info(outstr) # Write to tensorboard writer.add_scalar('loss_test', test_loss * 1.0 / count, epoch + 1) writer.add_scalar('mAcc_test', mAcc, epoch + 1) writer.add_scalar('allAcc_test', allAcc, epoch + 1) return allAcc
def validate(val_loader, model, criterion): logger.info('>>>>>>>>>>>>>>>> Start Evaluation >>>>>>>>>>>>>>>>') loss_meter = AverageMeter() intersection_meter = AverageMeter() union_meter = AverageMeter() target_meter = AverageMeter() model.eval() for i, (input, target) in tqdm(enumerate(val_loader), total=len(val_loader)): input = input.cuda(non_blocking=True) target = target.cuda(non_blocking=True) output = model(input) if args.zoom_factor != 8: output = F.interpolate(output, size=target.size()[1:], mode='bilinear', align_corners=True) loss = criterion(output, target) loss = torch.mean(loss) output = F.softmax(output).max(1)[1] intersection, union, target = intersectionAndUnionGPU( output, target, args.classes, args.ignore_label) intersection, union, target = intersection.cpu().numpy(), union.cpu( ).numpy(), target.cpu().numpy() intersection_meter.update(intersection), union_meter.update( union), target_meter.update(target) loss_meter.update(loss.item(), input.size(0)) iou_class = intersection_meter.sum / (union_meter.sum + 1e-10) accuracy_class = intersection_meter.sum / (target_meter.sum + 1e-10) mIoU = np.mean(iou_class) mAcc = np.mean(accuracy_class) allAcc = sum(intersection_meter.sum) / (sum(target_meter.sum) + 1e-10) logger.info('Val result: mIoU/mAcc/allAcc {:.4f}/{:.4f}/{:.4f}.'.format( mIoU, mAcc, allAcc)) for i in range(args.classes): logger.info('Class_{} Result: iou/accuracy {:.4f}/{:.4f}.'.format( i, iou_class[i], accuracy_class[i])) logger.info('<<<<<<<<<<<<<<<<< End Evaluation <<<<<<<<<<<<<<<<<') return loss_meter.avg, mIoU, mAcc, allAcc
def train(train_loader, model, criterion, optimizer, epoch): batch_time = AverageMeter() data_time = AverageMeter() loss_meter = AverageMeter() intersection_meter = AverageMeter() union_meter = AverageMeter() target_meter = AverageMeter() model.train() end = time.time() max_iter = args.epochs * len(train_loader) for i, (input, target) in enumerate(train_loader): data_time.update(time.time() - end) input = input.cuda(non_blocking=True) target = target.cuda(non_blocking=True) output = model(input) if target.shape[-1] == 1: target = target[:, 0] # for cls loss = criterion(output, target) optimizer.zero_grad() loss.backward() optimizer.step() output = output.max(1)[1] intersection, union, target = intersectionAndUnionGPU( output, target, args.classes, args.ignore_label) intersection, union, target = intersection.cpu().numpy(), union.cpu( ).numpy(), target.cpu().numpy() intersection_meter.update(intersection), union_meter.update( union), target_meter.update(target) accuracy = sum( intersection_meter.val) / (sum(target_meter.val) + 1e-10) loss_meter.update(loss.item(), input.size(0)) batch_time.update(time.time() - end) end = time.time() # calculate remain time current_iter = epoch * len(train_loader) + i + 1 remain_iter = max_iter - current_iter remain_time = remain_iter * batch_time.avg t_m, t_s = divmod(remain_time, 60) t_h, t_m = divmod(t_m, 60) remain_time = '{:02d}:{:02d}:{:02d}'.format(int(t_h), int(t_m), int(t_s)) if (i + 1) % args.print_freq == 0: logger.info('Epoch: [{}/{}][{}/{}] ' 'Data {data_time.val:.3f} ({data_time.avg:.3f}) ' 'Batch {batch_time.val:.3f} ({batch_time.avg:.3f}) ' 'Remain {remain_time} ' 'Loss {loss_meter.val:.4f} ' 'Accuracy {accuracy:.4f}.'.format( epoch + 1, args.epochs, i + 1, len(train_loader), batch_time=batch_time, data_time=data_time, remain_time=remain_time, loss_meter=loss_meter, accuracy=accuracy)) writer.add_scalar('loss_train_batch', loss_meter.val, current_iter) writer.add_scalar('mIoU_train_batch', np.mean(intersection / (union + 1e-10)), current_iter) writer.add_scalar('mAcc_train_batch', np.mean(intersection / (target + 1e-10)), current_iter) writer.add_scalar('allAcc_train_batch', accuracy, current_iter) iou_class = intersection_meter.sum / (union_meter.sum + 1e-10) accuracy_class = intersection_meter.sum / (target_meter.sum + 1e-10) mIoU = np.mean(iou_class) mAcc = np.mean(accuracy_class) allAcc = sum(intersection_meter.sum) / (sum(target_meter.sum) + 1e-10) logger.info( 'Train result at epoch [{}/{}]: mIoU/mAcc/allAcc {:.4f}/{:.4f}/{:.4f}.' .format(epoch + 1, args.epochs, mIoU, mAcc, allAcc)) return loss_meter.avg, mIoU, mAcc, allAcc
def train(train_loader, model, criterion, optimizer, epoch): batch_time = AverageMeter() data_time = AverageMeter() loss_meter = AverageMeter() intersection_meter = AverageMeter() union_meter = AverageMeter() target_meter = AverageMeter() top1_meter = AverageMeter() top5_meter = AverageMeter() model.train() # print(get_parameter_number(model)) # exit(0) end = time.time() max_iter = args.epochs * len(train_loader) for i, (input, target) in enumerate(train_loader): data_time.update(time.time() - end) input = input.cuda(non_blocking=True) target = target.cuda(non_blocking=True) if args.mixup_alpha: eps = args.label_smoothing if args.label_smoothing else 0.0 input, target_a, target_b, lam = mixup_data( input, target, args.mixup_alpha) output = model(input) loss = mixup_loss(output, target_a, target_b, lam, eps) else: output = model(input) loss = smooth_loss( output, target, args.label_smoothing) if args.label_smoothing else criterion( output, target) optimizer.zero_grad() loss.backward() optimizer.step() top1, top5 = cal_accuracy(output, target, topk=(1, 5)) n = input.size(0) if args.multiprocessing_distributed: with torch.no_grad(): loss, top1, top5 = loss.detach() * n, top1 * n, top5 * n count = target.new_tensor([n], dtype=torch.long) dist.all_reduce(loss), dist.all_reduce(top1), dist.all_reduce( top5), dist.all_reduce(count) n = count.item() loss, top1, top5 = loss / n, top1 / n, top5 / n loss_meter.update(loss.item(), n), top1_meter.update( top1.item(), n), top5_meter.update(top5.item(), n) output = output.max(1)[1] intersection, union, target = intersectionAndUnionGPU( output, target, args.classes, args.ignore_label) if args.multiprocessing_distributed: dist.all_reduce(intersection), dist.all_reduce( union), dist.all_reduce(target) intersection, union, target = intersection.cpu().numpy(), union.cpu( ).numpy(), target.cpu().numpy() intersection_meter.update(intersection), union_meter.update( union), target_meter.update(target) accuracy = sum( intersection_meter.val) / (sum(target_meter.val) + 1e-10) batch_time.update(time.time() - end) end = time.time() # calculate remain time current_iter = epoch * len(train_loader) + i + 1 remain_iter = max_iter - current_iter remain_time = remain_iter * batch_time.avg t_m, t_s = divmod(remain_time, 60) t_h, t_m = divmod(t_m, 60) remain_time = '{:02d}:{:02d}:{:02d}'.format(int(t_h), int(t_m), int(t_s)) if ((i + 1) % args.print_freq == 0) and main_process(): logger.info('Epoch: [{}/{}][{}/{}] ' 'Data {data_time.val:.3f} ({data_time.avg:.3f}) ' 'Batch {batch_time.val:.3f} ({batch_time.avg:.3f}) ' 'Remain {remain_time} ' 'Loss {loss_meter.val:.4f} ' 'Accuracy {accuracy:.4f} ' 'Acc@1 {top1.val:.3f} ({top1.avg:.3f}) ' 'Acc@5 {top5.val:.3f} ({top5.avg:.3f}).'.format( epoch + 1, args.epochs, i + 1, len(train_loader), data_time=data_time, batch_time=batch_time, remain_time=remain_time, loss_meter=loss_meter, accuracy=accuracy, top1=top1_meter, top5=top5_meter)) if main_process(): writer.add_scalar('train/loss', loss_meter.val, current_iter) writer.add_scalar('train/mIoU', np.mean(intersection / (union + 1e-10)), current_iter) writer.add_scalar('train/mAcc', np.mean(intersection / (target + 1e-10)), current_iter) writer.add_scalar('train/allAcc', accuracy, current_iter) writer.add_scalar('train/top1', top1, current_iter) writer.add_scalar('train/top5', top5, current_iter) iou_class = intersection_meter.sum / (union_meter.sum + 1e-10) accuracy_class = intersection_meter.sum / (target_meter.sum + 1e-10) mIoU = np.mean(iou_class) mAcc = np.mean(accuracy_class) allAcc = sum(intersection_meter.sum) / (sum(target_meter.sum) + 1e-10) if main_process(): logger.info( 'Train result at epoch [{}/{}]: mIoU/mAcc/allAcc/top1/top5 {:.4f}/{:.4f}/{:.4f}/{:.4f}/{:.4f}.' .format(epoch + 1, args.epochs, mIoU, mAcc, allAcc, top1_meter.avg, top5_meter.avg)) return loss_meter.avg, mIoU, mAcc, allAcc, top1_meter.avg, top5_meter.avg
def train(train_loader, model, optimizer, epoch): batch_time = AverageMeter() data_time = AverageMeter() main_loss_meter = AverageMeter() aux_loss_meter = AverageMeter() reg_loss_meter = AverageMeter() loss_meter = AverageMeter() intersection_meter = AverageMeter() union_meter = AverageMeter() target_meter = AverageMeter() final_loss_meter = AverageMeter() model.train() '''for p in model.module.layer0.parameters(): p.require_gradient=False for p in model.module.layer1.parameters(): p.require_gradient=False for p in model.module.layer2.parameters(): p.require_gradient=False for p in model.module.layer3.parameters(): p.require_gradient=False for p in model.module.layer4.parameters(): p.require_gradient=False for p in model.module.ppm.parameters(): p.require_gradient=False for p in model.module.cls.parameters(): p.require_gradient=False for p in model.module.aux.parameters(): p.require_gradient=False model.module.layer0.eval() model.module.layer1.eval() model.module.layer2.eval() model.module.layer3.eval() model.module.ppm.eval() model.module.reg.eval() model.module.aux.eval()''' end = time.time() max_iter = args.epochs * len(train_loader) for i, (input, target, feat, featidx) in enumerate(train_loader): #print (i,flush=True) data_time.update(time.time() - end) if args.zoom_factor != 8: h = int((target.size()[1] - 1) / 8 * args.zoom_factor + 1) w = int((target.size()[2] - 1) / 8 * args.zoom_factor + 1) # 'nearest' mode doesn't support align_corners mode and 'bilinear' mode is fine for downsampling target = F.interpolate(target.unsqueeze(1).float(), size=(h, w), mode='bilinear', align_corners=True).squeeze(1).long() input = input.cuda(non_blocking=True) target = target.cuda(non_blocking=True) #print ('t1',flush=True) feat=feat.cuda(non_blocking=True) #print (feat.shape,flush=True) featidx=featidx.cuda(non_blocking=True) #print ('t2',flush=True) output, main_loss, aux_loss, reg_loss, final_loss = model(input, target, feat, featidx) #print ('t3',flush=True) if not args.multiprocessing_distributed: main_loss, aux_loss, reg_loss, final_loss = torch.mean(main_loss), torch.mean(aux_loss), torch.mean(reg_loss),torch.mean(final_loss) #print (reg_loss,main_loss,aux_loss,flush=True) loss = main_loss + args.aux_weight * aux_loss + reg_loss +final_loss #print ('t4',flush=True) optimizer.zero_grad() #if args.use_apex and args.multiprocessing_distributed: # with apex.amp.scale_loss(loss, optimizer) as scaled_loss: # scaled_loss.backward() #print ('apex...',flush=True) #else: loss.backward() #print ('t5',flush=True) optimizer.step() #print ('apexfinished',flush=True) n = input.size(0) if args.multiprocessing_distributed: #print ('t6',flush=True) main_loss, aux_loss, reg_loss,final_loss, loss = main_loss.detach() * n, aux_loss * n, reg_loss*n,final_loss*n, loss * n # not considering ignore pixels #print ('t7',flush=True) count = target.new_tensor([n], dtype=torch.long) #reg_loss=torch.Tensor(reg_loss).cuda() #print ('t8',flush=True) dist.all_reduce(main_loss), dist.all_reduce(aux_loss), dist.all_reduce(reg_loss),dist.all_reduce(final_loss), dist.all_reduce(loss), dist.all_reduce(count) n = count.item() main_loss, aux_loss, reg_loss, final_loss, loss = main_loss / n, aux_loss / n, reg_loss/n, final_loss/n, loss / n #print ('2',flush=True) #print ('t9',flush=True) intersection, union, target = intersectionAndUnionGPU(output, target, args.classes, args.ignore_label) if args.multiprocessing_distributed: dist.all_reduce(intersection), dist.all_reduce(union), dist.all_reduce(target) intersection, union, target = intersection.cpu().numpy(), union.cpu().numpy(), target.cpu().numpy() intersection_meter.update(intersection), union_meter.update(union), target_meter.update(target) #print ('t10',flush=True) accuracy = sum(intersection_meter.val) / (sum(target_meter.val) + 1e-10) #print ('acc',flush=True) main_loss_meter.update(main_loss.item(), n) aux_loss_meter.update(aux_loss.item(), n) reg_loss_meter.update(reg_loss.item(), n) final_loss_meter.update(final_loss.item(), n) loss_meter.update(loss.item(), n) batch_time.update(time.time() - end) end = time.time() #print ('t11',flush=True) current_iter = epoch * len(train_loader) + i + 1 current_lr = poly_learning_rate(args.base_lr, current_iter, max_iter, power=args.power) #print (current_lr,'learningrate',flush=True) for index in range(0, args.index_split): optimizer.param_groups[index]['lr'] = current_lr for index in range(args.index_split, len(optimizer.param_groups)): optimizer.param_groups[index]['lr'] = current_lr * 10 remain_iter = max_iter - current_iter remain_time = remain_iter * batch_time.avg t_m, t_s = divmod(remain_time, 60) t_h, t_m = divmod(t_m, 60) remain_time = '{:02d}:{:02d}:{:02d}'.format(int(t_h), int(t_m), int(t_s)) if (i + 1) % args.print_freq == 0 and main_process(): logger.info('Epoch: [{}/{}][{}/{}] ' 'Data {data_time.val:.3f} ({data_time.avg:.3f}) ' 'Batch {batch_time.val:.3f} ({batch_time.avg:.3f}) ' 'Remain {remain_time} ' 'MainLoss {main_loss_meter.val:.4f} ' 'AuxLoss {aux_loss_meter.val:.4f} ' 'RegLoss {reg_loss_meter.val:.4f} ' 'FinalLoss {final_loss_meter.val:.4f} ' 'Loss {loss_meter.val:.4f} ' 'Accuracy {accuracy:.4f}.'.format(epoch+1, args.epochs, i + 1, len(train_loader), batch_time=batch_time, data_time=data_time, remain_time=remain_time, main_loss_meter=main_loss_meter, aux_loss_meter=aux_loss_meter, reg_loss_meter=reg_loss_meter, final_loss_meter=final_loss_meter, loss_meter=loss_meter, accuracy=accuracy)) if main_process(): writer.add_scalar('loss_train_batch', main_loss_meter.val, current_iter) writer.add_scalar('mIoU_train_batch', np.mean(intersection / (union + 1e-10)), current_iter) writer.add_scalar('mAcc_train_batch', np.mean(intersection / (target + 1e-10)), current_iter) writer.add_scalar('allAcc_train_batch', accuracy, current_iter) #print ('t12',flush=True) iou_class = intersection_meter.sum / (union_meter.sum + 1e-10) accuracy_class = intersection_meter.sum / (target_meter.sum + 1e-10) mIoU = np.mean(iou_class) mAcc = np.mean(accuracy_class) allAcc = sum(intersection_meter.sum) / (sum(target_meter.sum) + 1e-10) if main_process(): logger.info('Train result at epoch [{}/{}]: mIoU/mAcc/allAcc {:.4f}/{:.4f}/{:.4f}.'.format(epoch+1, args.epochs, mIoU, mAcc, allAcc)) return main_loss_meter.avg, mIoU, mAcc, allAcc
def train(train_loader, model, optimizer, epoch): batch_time = AverageMeter() data_time = AverageMeter() main_loss_meter = AverageMeter() aux_loss_meter = AverageMeter() loss_meter = AverageMeter() intersection_meter = AverageMeter() union_meter = AverageMeter() target_meter = AverageMeter() model.train() end = time.time() max_iter = args.epochs * len(train_loader) for i, (input, target) in enumerate(train_loader): data_time.update(time.time() - end) input = input.cuda(non_blocking=True) target = target.cuda(non_blocking=True) output, main_loss, aux_loss = model(input, target) if not args.multiprocessing_distributed: main_loss, aux_loss = torch.mean(main_loss), torch.mean(aux_loss) loss = main_loss + args.aux_weight * aux_loss optimizer.zero_grad() if args.use_apex and args.multiprocessing_distributed: with apex.amp.scale_loss(loss, optimizer) as scaled_loss: scaled_loss.backward() else: loss.backward() optimizer.step() n = input.size(0) if args.multiprocessing_distributed: main_loss, aux_loss, loss = main_loss.detach( ) * n, aux_loss * n, loss * n # not considering ignore pixels count = target.new_tensor([n], dtype=torch.long) dist.all_reduce(main_loss), dist.all_reduce( aux_loss), dist.all_reduce(loss), dist.all_reduce(count) n = count.item() main_loss, aux_loss, loss = main_loss / n, aux_loss / n, loss / n intersection, union, target = intersectionAndUnionGPU( output, target, args.classes, args.ignore_label) if args.multiprocessing_distributed: dist.all_reduce(intersection), dist.all_reduce( union), dist.all_reduce(target) intersection, union, target = intersection.cpu().numpy(), union.cpu( ).numpy(), target.cpu().numpy() intersection_meter.update(intersection), union_meter.update( union), target_meter.update(target) accuracy = sum( intersection_meter.val) / (sum(target_meter.val) + 1e-10) main_loss_meter.update(main_loss.item(), n) aux_loss_meter.update(aux_loss.item(), n) loss_meter.update(loss.item(), n) batch_time.update(time.time() - end) end = time.time() current_iter = epoch * len(train_loader) + i + 1 current_lr = poly_learning_rate(args.base_lr, current_iter, max_iter, power=args.power) for index in range(0, args.index_split): optimizer.param_groups[index]['lr'] = current_lr for index in range(args.index_split, len(optimizer.param_groups)): optimizer.param_groups[index]['lr'] = current_lr * 10 remain_iter = max_iter - current_iter remain_time = remain_iter * batch_time.avg t_m, t_s = divmod(remain_time, 60) t_h, t_m = divmod(t_m, 60) remain_time = '{:02d}:{:02d}:{:02d}'.format(int(t_h), int(t_m), int(t_s)) if (i + 1) % args.print_freq == 0 and main_process(): logger.info('Epoch: [{}/{}][{}/{}] ' 'Data {data_time.val:.3f} ({data_time.avg:.3f}) ' 'Batch {batch_time.val:.3f} ({batch_time.avg:.3f}) ' 'Remain {remain_time} ' 'MainLoss {main_loss_meter.val:.4f} ' 'AuxLoss {aux_loss_meter.val:.4f} ' 'Loss {loss_meter.val:.4f} ' 'Accuracy {accuracy:.4f}.'.format( epoch + 1, args.epochs, i + 1, len(train_loader), batch_time=batch_time, data_time=data_time, remain_time=remain_time, main_loss_meter=main_loss_meter, aux_loss_meter=aux_loss_meter, loss_meter=loss_meter, accuracy=accuracy)) if main_process(): writer.add_scalar('loss_train_batch', main_loss_meter.val, current_iter) writer.add_scalar('mIoU_train_batch', np.mean(intersection / (union + 1e-10)), current_iter) writer.add_scalar('mAcc_train_batch', np.mean(intersection / (target + 1e-10)), current_iter) writer.add_scalar('allAcc_train_batch', accuracy, current_iter) iou_class = intersection_meter.sum / (union_meter.sum + 1e-10) accuracy_class = intersection_meter.sum / (target_meter.sum + 1e-10) mIoU = np.mean(iou_class) mAcc = np.mean(accuracy_class) allAcc = sum(intersection_meter.sum) / (sum(target_meter.sum) + 1e-10) if main_process(): logger.info( 'Train result at epoch [{}/{}]: mIoU/mAcc/allAcc {:.4f}/{:.4f}/{:.4f}.' .format(epoch + 1, args.epochs, mIoU, mAcc, allAcc)) return main_loss_meter.avg, mIoU, mAcc, allAcc
def validate(val_loader, model, criterion, args): if main_process(): logger.info('>>>>>>>>>>>>>>>> Start Evaluation >>>>>>>>>>>>>>>>') batch_time = AverageMeter() model_time = AverageMeter() data_time = AverageMeter() loss_meter = AverageMeter() intersection_meter = AverageMeter() union_meter = AverageMeter() target_meter = AverageMeter() if args.use_coco: split_gap = 20 else: split_gap = 5 class_intersection_meter = [0] * split_gap class_union_meter = [0] * split_gap if args.manual_seed is not None and args.fix_random_seed_val: torch.cuda.manual_seed(args.manual_seed) np.random.seed(args.manual_seed) torch.manual_seed(args.manual_seed) torch.cuda.manual_seed_all(args.manual_seed) random.seed(args.manual_seed) model.eval() end = time.time() if args.split != 999: if args.use_coco: test_num = 20000 else: test_num = 1000 else: test_num = len(val_loader) assert test_num % args.batch_size_val == 0 iter_num = 0 total_time = 0 for e in range(20): for i, (input, target, s_input, s_mask, s_init_seed, subcls, ori_label, index, sample_class) in enumerate(val_loader): if (iter_num - 1) * args.batch_size_val >= test_num: break iter_num += 1 data_time.update(time.time() - end) input = input.cuda(non_blocking=True) target = target.cuda(non_blocking=True) ori_label = ori_label.cuda(non_blocking=True) start_time = time.time() output = model(s_x=s_input, s_y=s_mask, x=input, y=target, s_seed=s_init_seed) total_time = total_time + 1 model_time.update(time.time() - start_time) if False: #args.ori_resize: longerside = max(ori_label.size(1), ori_label.size(2)) backmask = torch.ones(ori_label.size(0), longerside, longerside).cuda() * 255 backmask[0, :ori_label.size(1), :ori_label.size(2)] = ori_label target = backmask.clone().long() output = F.interpolate(output, size=target.size()[1:], mode='bilinear', align_corners=True) loss = criterion(output, target) # print('output_shape:', output.size()) n = input.size(0) loss = torch.mean(loss) output_copy = output output = output.max(1)[1] #mask_probability = output_copy.softmax(axis=1)[0][1,:,:].detach().cpu().numpy()*255 # 1*2* 473 * 473 #print(mask_probability.detach().cpu().numpy()) #assert False intersection, union, new_target = intersectionAndUnionGPU( output, target, args.classes, args.ignore_label) intersection, union, target, new_target = intersection.cpu().numpy( ), union.cpu().numpy(), target.cpu().numpy(), new_target.cpu( ).numpy() intersection_meter.update(intersection), union_meter.update( union), target_meter.update(new_target) subcls = subcls[0].cpu().numpy()[0] class_intersection_meter[(subcls - 1) % split_gap] += intersection[1] class_union_meter[(subcls - 1) % split_gap] += union[1] accuracy = sum( intersection_meter.val) / (sum(target_meter.val) + 1e-10) loss_meter.update(loss.item(), input.size(0)) batch_time.update(time.time() - end) end = time.time() if ((i + 1) % (test_num / 100) == 0) and main_process(): logger.info( 'Test: [{}/{}] ' 'Data {data_time.val:.3f} ({data_time.avg:.3f}) ' 'Batch {batch_time.val:.3f} ({batch_time.avg:.3f}) ' 'Loss {loss_meter.val:.4f} ({loss_meter.avg:.4f}) ' 'Accuracy {accuracy:.4f}.'.format(iter_num * args.batch_size_val, test_num, data_time=data_time, batch_time=batch_time, loss_meter=loss_meter, accuracy=accuracy)) # print("output_szie: ", output.size()) # visualize_image(writer, s_input, s_mask, input, target, output, iter_num) pred_label = output[0] output_mask = pred_label.cpu().numpy() * 255 img = val_data.data_list[index][0] # sample_class = val_data.data_list[index][1] output_path = args.save_path + '/visualize/' if not os.path.exists(output_path): os.makedirs(output_path) output_file = output_path + str(e) + str(i) + '.jpg' origin = cv2.imread(img, cv2.IMREAD_COLOR) mask = ori_label[0].cpu().numpy() * 255 # mask = cv2.imread(map_aug/val/' + str(sample_class) + '/' + img + '.png', cv2.IMREAD_GRAYSCALE) origin = cv2.resize(origin, dsize=(473, 473), interpolation=cv2.INTER_LINEAR) mask = cv2.resize(mask, dsize=(473, 473), interpolation=cv2.INTER_LINEAR) output = np.zeros((473, 1419, 3)).astype('int32') # 1419 output[:, :473] = origin output[:, 473:946] = mask[:, :, np.newaxis] output[:, 946:] = output_mask[:, :, np.newaxis] # output[:, 1419:] = mask_probability[:,:,np.newaxis] cv2.imwrite(output_file, output) iou_class = intersection_meter.sum / (union_meter.sum + 1e-10) accuracy_class = intersection_meter.sum / (target_meter.sum + 1e-10) mIoU = np.mean(iou_class) mAcc = np.mean(accuracy_class) allAcc = sum(intersection_meter.sum) / (sum(target_meter.sum) + 1e-10) class_iou_class = [] class_miou = 0 for i in range(len(class_intersection_meter)): class_iou = class_intersection_meter[i] / (class_union_meter[i] + 1e-10) class_iou_class.append(class_iou) class_miou += class_iou class_miou = class_miou * 1.0 / len(class_intersection_meter) logger.info('meanIoU---Val result: mIoU {:.4f}.'.format(class_miou)) for i in range(split_gap): logger.info('Class_{} Result: iou {:.4f}.'.format( i + 1, class_iou_class[i])) if main_process(): logger.info( 'FBIoU---Val result: mIoU/mAcc/allAcc {:.4f}/{:.4f}/{:.4f}.'. format(mIoU, mAcc, allAcc)) for i in range(args.classes): logger.info('Class_{} Result: iou/accuracy {:.4f}/{:.4f}.'.format( i, iou_class[i], accuracy_class[i])) logger.info('<<<<<<<<<<<<<<<<< End Evaluation <<<<<<<<<<<<<<<<<') print('avg inference time: {:.4f}, count: {}'.format( model_time.avg, test_num)) return loss_meter.avg, mIoU, mAcc, allAcc, class_miou
def train(train_loader, model, criterion, optimizer, epoch, correlation_loss): batch_time = AverageMeter() data_time = AverageMeter() loss_meter = AverageMeter() main_loss_meter = AverageMeter() corr_loss_meter = AverageMeter() intersection_meter = AverageMeter() union_meter = AverageMeter() target_meter = AverageMeter() model.train() end = time.time() max_iter = args.epochs * len(train_loader) for i, (input, target) in enumerate(train_loader): data_time.update(time.time() - end) input = input.cuda(non_blocking=True) target = target.cuda(non_blocking=True) output = model(input) if target.shape[-1] == 1: target = target[:, 0] # for cls main_loss = criterion(output, target) corr_loss = 0.0 corr_loss_scale = args.get('correlation_loss_scale', 10.0) if correlation_loss: for m in model.module.SA_modules.named_modules(): if isinstance(m[-1], PAConv): kernel_matrice, output_dim, m_dim = m[-1].weightbank, m[ -1].output_dim, m[-1].m new_kernel_matrice = kernel_matrice.view( -1, m_dim, output_dim).permute(1, 0, 2).reshape(m_dim, -1) cost_matrice = torch.matmul( new_kernel_matrice, new_kernel_matrice.T ) / torch.matmul( torch.sqrt( torch.sum( new_kernel_matrice**2, dim=-1, keepdim=True)), torch.sqrt( torch.sum( new_kernel_matrice.T**2, dim=0, keepdim=True))) corr_loss += torch.sum( torch.triu(cost_matrice, diagonal=1)**2) loss = main_loss + corr_loss_scale * corr_loss optimizer.zero_grad() loss.backward() optimizer.step() output = output.max(1)[1] intersection, union, target = intersectionAndUnionGPU( output, target, args.classes, args.ignore_label) intersection, union, target = intersection.cpu().numpy(), union.cpu( ).numpy(), target.cpu().numpy() intersection_meter.update(intersection), union_meter.update( union), target_meter.update(target) accuracy = sum( intersection_meter.val) / (sum(target_meter.val) + 1e-10) loss_meter.update(loss.item(), input.size(0)) main_loss_meter.update(main_loss.item(), input.size(0)) corr_loss_meter.update( corr_loss.item() * corr_loss_scale if correlation_loss else corr_loss, input.size(0)) batch_time.update(time.time() - end) end = time.time() # calculate remain time current_iter = epoch * len(train_loader) + i + 1 remain_iter = max_iter - current_iter remain_time = remain_iter * batch_time.avg t_m, t_s = divmod(remain_time, 60) t_h, t_m = divmod(t_m, 60) remain_time = '{:02d}:{:02d}:{:02d}'.format(int(t_h), int(t_m), int(t_s)) if (i + 1) % args.print_freq == 0: logger.info('Epoch: [{}/{}][{}/{}] ' 'Data {data_time.val:.3f} ({data_time.avg:.3f}) ' 'Batch {batch_time.val:.3f} ({batch_time.avg:.3f}) ' 'Remain {remain_time} ' 'Loss {loss_meter.val:.4f} ' 'Main Loss {main_loss_meter.val:.4f} ' 'Corr Loss {corr_loss_meter.val:.4f} ' 'Accuracy {accuracy:.4f}.'.format( epoch + 1, args.epochs, i + 1, len(train_loader), batch_time=batch_time, data_time=data_time, remain_time=remain_time, loss_meter=loss_meter, main_loss_meter=main_loss_meter, corr_loss_meter=corr_loss_meter, accuracy=accuracy)) writer.add_scalar('loss_train_batch', loss_meter.val, current_iter) writer.add_scalar('mIoU_train_batch', np.mean(intersection / (union + 1e-10)), current_iter) writer.add_scalar('mAcc_train_batch', np.mean(intersection / (target + 1e-10)), current_iter) writer.add_scalar('allAcc_train_batch', accuracy, current_iter) iou_class = intersection_meter.sum / (union_meter.sum + 1e-10) accuracy_class = intersection_meter.sum / (target_meter.sum + 1e-10) mIoU = np.mean(iou_class) mAcc = np.mean(accuracy_class) allAcc = sum(intersection_meter.sum) / (sum(target_meter.sum) + 1e-10) logger.info( 'Train result at epoch [{}/{}]: mIoU/mAcc/allAcc {:.4f}/{:.4f}/{:.4f}.' .format(epoch + 1, args.epochs, mIoU, mAcc, allAcc)) return loss_meter.avg, mIoU, mAcc, allAcc
def train(train_loader, model, optimizer, epoch): ## step.1 设置评价参数,随时更新 batch_time = AverageMeter() data_time = AverageMeter() main_loss_meter = AverageMeter() aux_loss_meter = AverageMeter() loss_meter = AverageMeter() intersection_meter = AverageMeter() union_meter = AverageMeter() target_meter = AverageMeter() model.train() end = time.time() max_iter = args.epochs * len(train_loader) ## step.2 epoch内部循环 for i, (input, target) in enumerate(train_loader): data_time.update(time.time() - end) if args.zoom_factor != 8: h = int((target.size()[1] - 1) / 8 * args.zoom_factor + 1) w = int((target.size()[2] - 1) / 8 * args.zoom_factor + 1) # 'nearest' mode doesn't support align_corners mode and 'bilinear' mode is fine for downsampling target = F.interpolate(target.unsqueeze(1).float(), size=(h, w), mode='bilinear', align_corners=True).squeeze(1).long() input = input.cuda(non_blocking=True) target = target.cuda(non_blocking=True) output, main_loss, aux_loss = model(input, target) # 输出, 损失函数 if not args.multiprocessing_distributed: main_loss, aux_loss = torch.mean(main_loss), torch.mean(aux_loss) loss = main_loss + args.aux_weight * aux_loss ## step.3 反向传播 optimizer.zero_grad() loss.backward() optimizer.step() n = input.size(0) # 一张卡的batch if args.multiprocessing_distributed: main_loss, aux_loss, loss = main_loss.detach( ) * n, aux_loss * n, loss * n # not considering ignore pixels count = target.new_tensor([n], dtype=torch.long) dist.all_reduce(main_loss), dist.all_reduce( aux_loss), dist.all_reduce(loss), dist.all_reduce(count) n = count.item() main_loss, aux_loss, loss = main_loss / n, aux_loss / n, loss / n ## step.4 更新评价数据 intersection, union, target = intersectionAndUnionGPU( output, target, args.classes, args.ignore_label) if args.multiprocessing_distributed: dist.all_reduce(intersection), dist.all_reduce( union), dist.all_reduce(target) intersection, union, target = intersection.cpu().numpy(), union.cpu( ).numpy(), target.cpu().numpy() intersection_meter.update(intersection), union_meter.update( union), target_meter.update(target) accuracy = sum( intersection_meter.val) / (sum(target_meter.val) + 1e-10) main_loss_meter.update(main_loss.item(), n) aux_loss_meter.update(aux_loss.item(), n) loss_meter.update(loss.item(), n) batch_time.update(time.time() - end) end = time.time() ## step.5 调整学习率 current_iter = epoch * len(train_loader) + i + 1 current_lr = poly_learning_rate(args.base_lr, current_iter, max_iter, power=args.power) for index in range(0, args.index_split): optimizer.param_groups[index]['lr'] = current_lr # 原backbone学习率调整 for index in range(args.index_split, len(optimizer.param_groups)): optimizer.param_groups[index]['lr'] = current_lr * 10 # 后面预测网络学习调整 remain_iter = max_iter - current_iter remain_time = remain_iter * batch_time.avg t_m, t_s = divmod(remain_time, 60) t_h, t_m = divmod(t_m, 60) remain_time = '{:02d}:{:02d}:{:02d}'.format(int(t_h), int(t_m), int(t_s)) # 计算剩余时间 ## step.6 打印日志 if (i + 1) % args.print_freq == 0 and main_process(): logger.info('Epoch: [{}/{}][{}/{}] ' 'Data {data_time.val:.3f} ({data_time.avg:.3f}) ' 'Batch {batch_time.val:.3f} ({batch_time.avg:.3f}) ' 'Remain {remain_time} ' 'MainLoss {main_loss_meter.val:.4f} ' 'AuxLoss {aux_loss_meter.val:.4f} ' 'Loss {loss_meter.val:.4f} ' 'Accuracy {accuracy:.4f}.'.format( epoch + 1, args.epochs, i + 1, len(train_loader), batch_time=batch_time, data_time=data_time, remain_time=remain_time, main_loss_meter=main_loss_meter, aux_loss_meter=aux_loss_meter, loss_meter=loss_meter, accuracy=accuracy)) if main_process(): writer.add_scalar('loss_train_batch', main_loss_meter.val, current_iter) writer.add_scalar('mIoU_train_batch', np.mean(intersection / (union + 1e-10)), current_iter) writer.add_scalar('mAcc_train_batch', np.mean(intersection / (target + 1e-10)), current_iter) writer.add_scalar('allAcc_train_batch', accuracy, current_iter) iou_class = intersection_meter.sum / (union_meter.sum + 1e-10) accuracy_class = intersection_meter.sum / (target_meter.sum + 1e-10) mIoU = np.mean(iou_class) mAcc = np.mean(accuracy_class) allAcc = sum(intersection_meter.sum) / (sum(target_meter.sum) + 1e-10) if main_process(): logger.info( 'Train result at epoch [{}/{}]: mIoU/mAcc/allAcc {:.4f}/{:.4f}/{:.4f}.' .format(epoch + 1, args.epochs, mIoU, mAcc, allAcc)) return main_loss_meter.avg, mIoU, mAcc, allAcc
def test(gpu, ngpus_per_node): if main_process(): logger.info('<<<<<<<<<<<<<<<<< Start Evaluation <<<<<<<<<<<<<<<<<') # ============= Model =================== if args.arch == 'dgcnn': from model.DGCNN_PAConv import PAConv model = PAConv(args) elif args.arch == 'pointnet': from model.PointNet_PAConv import PAConv model = PAConv(args) else: raise Exception("Not implemented") if main_process(): logger.info(model) if args.sync_bn: assert args.distributed == True model = nn.SyncBatchNorm.convert_sync_batchnorm(model) if args.distributed: torch.cuda.set_device(gpu) args.batch_size = int(args.batch_size / ngpus_per_node) args.test_batch_size = int(args.test_batch_size / ngpus_per_node) args.workers = int( (args.workers + ngpus_per_node - 1) / ngpus_per_node) model = torch.nn.parallel.DistributedDataParallel( model.cuda(), device_ids=[gpu], find_unused_parameters=True) else: model = torch.nn.DataParallel(model.cuda()) state_dict = torch.load("checkpoints/%s/best_model.t7" % args.exp_name, map_location=torch.device('cpu')) for k in state_dict.keys(): if 'module' not in k: from collections import OrderedDict new_state_dict = OrderedDict() for k in state_dict: new_state_dict['module.' + k] = state_dict[k] state_dict = new_state_dict break model.load_state_dict(state_dict) # Dataloader test_data = ModelNet40(partition='test', num_points=args.num_points) if args.distributed: test_sampler = torch.utils.data.distributed.DistributedSampler( test_data) else: test_sampler = None test_loader = torch.utils.data.DataLoader(test_data, batch_size=args.test_batch_size, shuffle=False, num_workers=args.workers, pin_memory=True, sampler=test_sampler) model.eval() intersection_meter = AverageMeter() union_meter = AverageMeter() target_meter = AverageMeter() for data, label in test_loader: data, label = data.cuda(non_blocking=True), label.cuda( non_blocking=True).squeeze(1) data = data.permute(0, 2, 1) with torch.no_grad(): logits = model(data) preds = logits.max(dim=1)[1] intersection, union, target = intersectionAndUnionGPU( preds, label, args.classes) if args.multiprocessing_distributed: dist.all_reduce(intersection), dist.all_reduce( union), dist.all_reduce(target) intersection, union, target = intersection.cpu().numpy(), union.cpu( ).numpy(), target.cpu().numpy() intersection_meter.update(intersection), union_meter.update( union), target_meter.update(target) accuracy_class = intersection_meter.sum / (target_meter.sum + 1e-10) mAcc = np.mean(accuracy_class) allAcc = sum(intersection_meter.sum) / (sum(target_meter.sum) + 1e-10) if main_process(): logger.info('Test result: mAcc/allAcc {:.4f}/{:.4f}.'.format( mAcc, allAcc)) for i in range(args.classes): logger.info('Class_{} Result: accuracy {:.4f}.'.format( i, accuracy_class[i])) logger.info('<<<<<<<<<<<<<<<<< End Evaluation <<<<<<<<<<<<<<<<<')
def train_epoch(train_loader, model, opt, scheduler, epoch, criterion): train_loss = 0.0 count = 0.0 batch_time = AverageMeter() data_time = AverageMeter() forward_time = AverageMeter() backward_time = AverageMeter() loss_meter = AverageMeter() intersection_meter = AverageMeter() union_meter = AverageMeter() target_meter = AverageMeter() model.train() end = time.time() max_iter = args.epochs * len(train_loader) for ii, (data, label) in enumerate(train_loader): data_time.update(time.time() - end) data, label = data.cuda(non_blocking=True), label.cuda( non_blocking=True).squeeze(1) data = data.permute(0, 2, 1) batch_size = data.size(0) end2 = time.time() logits, loss = model(data, label, criterion) forward_time.update(time.time() - end2) preds = logits.max(dim=1)[1] if not args.multiprocessing_distributed: loss = torch.mean(loss) end3 = time.time() opt.zero_grad() loss.backward( ) # the own loss of each process, backward by the optimizer belongs to this process opt.step() backward_time.update(time.time() - end3) # Loss if args.multiprocessing_distributed: loss = loss * batch_size _count = label.new_tensor([batch_size], dtype=torch.long).cuda( non_blocking=True) # b_size on one process dist.all_reduce(loss), dist.all_reduce( _count) # obtain the sum of all xxx at all processes n = _count.item() loss = loss / n # avg loss across all processes # then calculate loss same as without dist count += batch_size train_loss += loss.item() * batch_size loss_meter.update(loss.item(), batch_size) batch_time.update(time.time() - end) end = time.time() current_iter = epoch * len(train_loader) + ii + 1 remain_iter = max_iter - current_iter remain_time = remain_iter * batch_time.avg t_m, t_s = divmod(remain_time, 60) t_h, t_m = divmod(t_m, 60) remain_time = '{:02d}:{:02d}:{:02d}'.format(int(t_h), int(t_m), int(t_s)) if (ii + 1) % args.print_freq == 0 and main_process(): logger.info('Epoch: [{}/{}][{}/{}] ' 'Data {data_time.val:.3f} ({data_time.avg:.3f}) ' 'Batch {batch_time.val:.3f} ({batch_time.avg:.3f}) ' 'Forward {for_time.val:.3f} ({for_time.avg:.3f}) ' 'Backward {back_time.val:.3f} ({back_time.avg:.3f}) ' 'Remain {remain_time} ' 'Loss {loss_meter.val:.4f} '.format( epoch + 1, args.epochs, ii + 1, len(train_loader), batch_time=batch_time, data_time=data_time, for_time=forward_time, back_time=backward_time, remain_time=remain_time, loss_meter=loss_meter)) intersection, union, target = intersectionAndUnionGPU( preds, label, args.classes) if args.multiprocessing_distributed: # obtain the sum of all tensors at all processes: all_reduce dist.all_reduce(intersection), dist.all_reduce( union), dist.all_reduce(target) intersection, union, target = intersection.cpu().numpy(), union.cpu( ).numpy(), target.cpu().numpy() intersection_meter.update(intersection), union_meter.update( union), target_meter.update(target) scheduler.step() accuracy_class = intersection_meter.sum / (target_meter.sum + 1e-10) mAcc = np.mean(accuracy_class) allAcc = sum(intersection_meter.sum) / ( sum(target_meter.sum) + 1e-10 ) # the first sum here is to sum the acc across all classes outstr = 'Train %d, loss: %.6f, train acc: %.6f, ' \ 'train avg acc: %.6f' % (epoch + 1, train_loss * 1.0 / count, allAcc, mAcc) if main_process(): logger.info(outstr) # Write to tensorboard writer.add_scalar('loss_train', train_loss * 1.0 / count, epoch + 1) writer.add_scalar('mAcc_train', mAcc, epoch + 1) writer.add_scalar('allAcc_train', allAcc, epoch + 1)
def train(train_loader, model, optimizer, epoch): batch_time = AverageMeter() data_time = AverageMeter() main_loss_meter = AverageMeter() aux_loss_meter = AverageMeter() loss_meter = AverageMeter() intersection_meter = AverageMeter() union_meter = AverageMeter() target_meter = AverageMeter() model.train() end = time.time() max_iter = args.epochs * len(train_loader) for i, (input, target) in tqdm(enumerate(train_loader), total=len(train_loader)): data_time.update(time.time() - end) if args.zoom_factor != 8: h = int((target.size()[1] - 1) / 8 * args.zoom_factor + 1) w = int((target.size()[2] - 1) / 8 * args.zoom_factor + 1) # 'nearest' mode doesn't support align_corners mode and 'bilinear' mode is fine for downsampling target = F.interpolate(target.unsqueeze(1).float(), size=(h, w), mode='bilinear', align_corners=True).squeeze(1).long() input = input.cuda() target = target.cuda() output, main_loss, aux_loss = model(input, target) main_loss = main_loss.mean() aux_loss = aux_loss.mean() loss = main_loss + args.aux_weight * aux_loss optimizer.zero_grad() loss.backward() optimizer.step() n = input.size(0) intersection, union, target = intersectionAndUnionGPU( output, target, args.classes, args.ignore_label) intersection, union, target = intersection.cpu().numpy(), union.cpu( ).numpy(), target.cpu().numpy() intersection_meter.update(intersection), union_meter.update( union), target_meter.update(target) accuracy = sum( intersection_meter.val) / (sum(target_meter.val) + 1e-10) main_loss_meter.update(main_loss.item(), n) aux_loss_meter.update(aux_loss.item(), n) loss_meter.update(loss.item(), n) batch_time.update(time.time() - end) # learning rate scheduling current_iter = epoch * len(train_loader) + i + 1 current_lr = poly_learning_rate(args.base_lr, current_iter, max_iter, power=args.power) for index in range(0, args.index_split): optimizer.param_groups[index]['lr'] = current_lr for index in range(args.index_split, len(optimizer.param_groups)): optimizer.param_groups[index]['lr'] = current_lr * 10 remain_iter = max_iter - current_iter remain_time = remain_iter * batch_time.avg t_m, t_s = divmod(remain_time, 60) t_h, t_m = divmod(t_m, 60) if (i + 1) % args.print_freq == 0: writer.add_scalar('loss/train_batch', main_loss_meter.val, current_iter) writer.add_scalar('mIoU/train_batch', np.mean(intersection / (union + 1e-10)), current_iter) writer.add_scalar('mAcc/train_batch', np.mean(intersection / (target + 1e-10)), current_iter) writer.add_scalar('allAcc/train_batch', accuracy, current_iter) iou_class = intersection_meter.sum / (union_meter.sum + 1e-10) accuracy_class = intersection_meter.sum / (target_meter.sum + 1e-10) mIoU = np.mean(iou_class) mAcc = np.mean(accuracy_class) allAcc = sum(intersection_meter.sum) / (sum(target_meter.sum) + 1e-10) logger.info( 'Train result at epoch [{}/{}]: mIoU/mAcc/allAcc {:.4f}/{:.4f}/{:.4f}.' .format(epoch + 1, args.epochs, mIoU, mAcc, allAcc)) logger.info(f'remaining time: {int(t_h)}h {int(t_m)}min {int(t_s)}sec') return main_loss_meter.avg, mIoU, mAcc, allAcc
def train(train_loader, model, optimizer, epoch): batch_time = AverageMeter() data_time = AverageMeter() main_loss_meter = AverageMeter() aux_loss_meter = AverageMeter() loss_meter = AverageMeter() intersection_meter = AverageMeter() union_meter = AverageMeter() target_meter = AverageMeter() model.train() end = time.time() max_iter = args.epochs * len(train_loader) vis_key = 0 print('Warmup: {}'.format(args.warmup)) for i, (input, target, s_input, s_mask, s_init_seed, subcls) in enumerate(train_loader): data_time.update(time.time() - end) current_iter = epoch * len(train_loader) + i + 1 index_split = -1 if args.base_lr > 1e-6: poly_learning_rate(optimizer, args.base_lr, current_iter, max_iter, power=args.power, index_split=index_split, warmup=args.warmup, warmup_step=len(train_loader)//2) s_input = s_input.cuda(non_blocking=True) s_mask = s_mask.cuda(non_blocking=True) input = input.cuda(non_blocking=True) target = target.cuda(non_blocking=True) s_init_seed = s_init_seed.cuda(non_blocking=True) output, main_loss, aux_loss = model(s_x=s_input, s_y=s_mask, x=input, y=target, s_seed=s_init_seed) if not args.multiprocessing_distributed: main_loss, aux_loss = torch.mean(main_loss), torch.mean(aux_loss) loss = main_loss + args.aux_weight * aux_loss optimizer.zero_grad() loss.backward() optimizer.step() n = input.size(0) if args.multiprocessing_distributed: main_loss, aux_loss, loss = main_loss.detach() * n, aux_loss * n, loss * n count = target.new_tensor([n], dtype=torch.long) dist.all_reduce(main_loss), dist.all_reduce(aux_loss), dist.all_reduce(loss), dist.all_reduce(count) n = count.item() main_loss, aux_loss, loss = main_loss / n, aux_loss / n, loss / n intersection, union, target = intersectionAndUnionGPU(output, target, args.classes, args.ignore_label) if args.multiprocessing_distributed: dist.all_reduce(intersection), dist.all_reduce(union), dist.all_reduce(target) intersection, union, target = intersection.cpu().numpy(), union.cpu().numpy(), target.cpu().numpy() intersection_meter.update(intersection), union_meter.update(union), target_meter.update(target) accuracy = sum(intersection_meter.val) / (sum(target_meter.val) + 1e-10) main_loss_meter.update(main_loss.item(), n) aux_loss_meter.update(aux_loss.item(), n) loss_meter.update(loss.item(), n) batch_time.update(time.time() - end) end = time.time() remain_iter = max_iter - current_iter remain_time = remain_iter * batch_time.avg t_m, t_s = divmod(remain_time, 60) t_h, t_m = divmod(t_m, 60) remain_time = '{:02d}:{:02d}:{:02d}'.format(int(t_h), int(t_m), int(t_s)) if (i + 1) % args.print_freq == 0 and main_process(): logger.info('Epoch: [{}/{}][{}/{}] ' 'Data {data_time.val:.3f} ({data_time.avg:.3f}) ' 'Batch {batch_time.val:.3f} ({batch_time.avg:.3f}) ' 'Remain {remain_time} ' 'MainLoss {main_loss_meter.val:.4f} ' 'AuxLoss {aux_loss_meter.val:.4f} ' 'Loss {loss_meter.val:.4f} ' 'Accuracy {accuracy:.4f}.'.format(epoch+1, args.epochs, i + 1, len(train_loader), batch_time=batch_time, data_time=data_time, remain_time=remain_time, main_loss_meter=main_loss_meter, aux_loss_meter=aux_loss_meter, loss_meter=loss_meter, accuracy=accuracy)) if main_process(): writer.add_scalar('loss_train_batch', main_loss_meter.val, current_iter) writer.add_scalar('aux_loss_train_batch', aux_loss_meter.val, current_iter) writer.add_scalar('mIoU_train_batch', np.mean(intersection / (union + 1e-10)), current_iter) writer.add_scalar('mAcc_train_batch', np.mean(intersection / (target + 1e-10)), current_iter) writer.add_scalar('allAcc_train_batch', accuracy, current_iter) iou_class = intersection_meter.sum / (union_meter.sum + 1e-10) accuracy_class = intersection_meter.sum / (target_meter.sum + 1e-10) mIoU = np.mean(iou_class) mAcc = np.mean(accuracy_class) allAcc = sum(intersection_meter.sum) / (sum(target_meter.sum) + 1e-10) if main_process(): logger.info('Train result at epoch [{}/{}]: mIoU/mAcc/allAcc {:.4f}/{:.4f}/{:.4f}.'.format(epoch, args.epochs, mIoU, mAcc, allAcc)) for i in range(args.classes): logger.info('Class_{} Result: iou/accuracy {:.4f}/{:.4f}.'.format(i, iou_class[i], accuracy_class[i])) return main_loss_meter.avg, aux_loss_meter.avg, mIoU, mAcc, allAcc
def train(train_loader, model, optimizer, epoch, epoch_log, val_loader, criterion): batch_time = AverageMeter() data_time = AverageMeter() main_loss_meter = AverageMeter() aux_loss_meter = AverageMeter() loss_meter = AverageMeter() intersection_meter = AverageMeter() union_meter = AverageMeter() target_meter = AverageMeter() model.train() end = time.time() max_iter = args.epochs * len(train_loader) for i, (input, target, _) in enumerate(train_loader): current_iter = epoch * len(train_loader) + i if args.just_vis or (args.evaluate and args.val_every_iter != -1 and current_iter % args.val_every_iter == 0): # if True: # logger.info('Validating.....') loss_val, mIoU_val, mAcc_val, allAcc_val, return_dict = validate( val_loader, model, criterion, args) if main_process(): writer.add_scalar('VAL/loss_val', loss_val, current_iter) writer.add_scalar('VAL/mIoU_val', mIoU_val, current_iter) writer.add_scalar('VAL/mAcc_val', mAcc_val, current_iter) writer.add_scalar('VAL/allAcc_val', allAcc_val, current_iter) for sample_idx in range(len(return_dict['image_name_list'])): writer.add_text('VAL-image_name/%d' % sample_idx, return_dict['image_name_list'][sample_idx], current_iter) writer.add_image('VAL-image/%d' % sample_idx, return_dict['im_list'][sample_idx], current_iter, dataformats='HWC') writer.add_image('VAL-color_label/%d' % sample_idx, return_dict['color_GT_list'][sample_idx], current_iter, dataformats='HWC') writer.add_image( 'VAL-color_pred/%d' % sample_idx, return_dict['color_pred_list'][sample_idx], current_iter, dataformats='HWC') model.train() end = time.time() # if (epoch_log % args.save_freq == 0) and main_process(): if args.save_every_iter != -1 and current_iter % args.save_every_iter == 0 and main_process( ): model.eval() filename = args.save_path + '/train_epoch_' + str( epoch_log) + '_tid_' + str(current_iter) + '.pth' logger.info('Saving checkpoint to: ' + filename) torch.save( { 'epoch': epoch_log, 'state_dict': model.state_dict(), 'optimizer': optimizer.state_dict() }, filename) # if epoch_log / args.save_freq > 2: # deletename = args.save_path + '/train_epoch_' + str(epoch_log - args.save_freq * 2) + '.pth' # os.remove(deletename) model.train() end = time.time() data_time.update(time.time() - end) if args.zoom_factor != 8: h = int((target.size()[1] - 1) / 8 * args.zoom_factor + 1) w = int((target.size()[2] - 1) / 8 * args.zoom_factor + 1) # 'nearest' mode doesn't support align_corners mode and 'bilinear' mode is fine for downsampling target = F.interpolate(target.unsqueeze(1).float(), size=(h, w), mode='bilinear', align_corners=True).squeeze(1).long() input = input.cuda(non_blocking=True) target = target.cuda(non_blocking=True) # if args.test_in_nyu_label_space: # target = map_openrooms_nyu_gpu(target) output, main_loss, aux_loss = model(input, target) if not args.multiprocessing_distributed: main_loss, aux_loss = torch.mean(main_loss), torch.mean(aux_loss) loss = main_loss + args.aux_weight * aux_loss optimizer.zero_grad() loss.backward() optimizer.step() n = input.size(0) if args.multiprocessing_distributed: main_loss, aux_loss, loss = main_loss.detach( ) * n, aux_loss * n, loss * n # not considering ignore pixels count = target.new_tensor([n], dtype=torch.long) dist.all_reduce(main_loss), dist.all_reduce( aux_loss), dist.all_reduce(loss), dist.all_reduce(count) n = count.item() main_loss, aux_loss, loss = main_loss / n, aux_loss / n, loss / n # if args.test_in_nyu_label_space: # intersection, union, target = intersectionAndUnionGPU(map_openrooms_nyu_gpu(output), map_openrooms_nyu_gpu(target), 41, args.ignore_label) # else: intersection, union, target = intersectionAndUnionGPU( output, target, args.classes, args.ignore_label) if args.multiprocessing_distributed: dist.all_reduce(intersection), dist.all_reduce( union), dist.all_reduce(target) intersection, union, target = intersection.cpu().numpy(), union.cpu( ).numpy(), target.cpu().numpy() intersection_meter.update(intersection), union_meter.update( union), target_meter.update(target) accuracy = sum( intersection_meter.val) / (sum(target_meter.val) + 1e-10) main_loss_meter.update(main_loss.item(), n) aux_loss_meter.update(aux_loss.item(), n) loss_meter.update(loss.item(), n) batch_time.update(time.time() - end) end = time.time() current_lr = poly_learning_rate(args.base_lr, current_iter, max_iter, power=args.power) for index in range(0, args.index_split): optimizer.param_groups[index]['lr'] = current_lr for index in range(args.index_split, len(optimizer.param_groups)): optimizer.param_groups[index]['lr'] = current_lr * 10 remain_iter = max_iter - current_iter remain_time = remain_iter * batch_time.avg t_m, t_s = divmod(remain_time, 60) t_h, t_m = divmod(t_m, 60) remain_time = '{:02d}:{:02d}:{:02d}'.format(int(t_h), int(t_m), int(t_s)) if (i + 1) % args.print_freq == 0 and main_process(): logger.info('Epoch: [{}/{}][{}/{}] ' 'Data {data_time.val:.3f} ({data_time.avg:.3f}) ' 'Batch {batch_time.val:.3f} ({batch_time.avg:.3f}) ' 'Remain {remain_time} ' 'MainLoss {main_loss_meter.val:.4f} ' 'AuxLoss {aux_loss_meter.val:.4f} ' 'Loss {loss_meter.val:.4f} ' 'Accuracy {accuracy:.4f}.'.format( epoch + 1, args.epochs, i + 1, len(train_loader), batch_time=batch_time, data_time=data_time, remain_time=remain_time, main_loss_meter=main_loss_meter, aux_loss_meter=aux_loss_meter, loss_meter=loss_meter, accuracy=accuracy)) if main_process(): writer.add_scalar('TRAIN/loss_train_batch', main_loss_meter.val, current_iter) writer.add_scalar('TRAIN/mIoU_train_batch', np.mean(intersection / (union + 1e-10)), current_iter) writer.add_scalar('TRAIN/mAcc_train_batch', np.mean(intersection / (target + 1e-10)), current_iter) writer.add_scalar('TRAIN/allAcc_train_batch', accuracy, current_iter) iou_class = intersection_meter.sum / (union_meter.sum + 1e-10) accuracy_class = intersection_meter.sum / (target_meter.sum + 1e-10) mIoU = np.mean(iou_class) mAcc = np.mean(accuracy_class) allAcc = sum(intersection_meter.sum) / (sum(target_meter.sum) + 1e-10) if main_process(): logger.info( 'Train result at epoch [{}/{}]: mIoU/mAcc/allAcc {:.4f}/{:.4f}/{:.4f}.' .format(epoch + 1, args.epochs, mIoU, mAcc, allAcc)) return main_loss_meter.avg, mIoU, mAcc, allAcc
def validate(val_loader, model, criterion): if main_process(): logger.info('>>>>>>>>>>>>>>>> Start Evaluation >>>>>>>>>>>>>>>>') batch_time = AverageMeter() model_time = AverageMeter() data_time = AverageMeter() loss_meter = AverageMeter() intersection_meter = AverageMeter() union_meter = AverageMeter() target_meter = AverageMeter() if args.use_coco: split_gap = 20 else: split_gap = 5 class_intersection_meter = [0]*split_gap class_union_meter = [0]*split_gap if args.manual_seed is not None and args.fix_random_seed_val: torch.cuda.manual_seed(args.manual_seed) np.random.seed(args.manual_seed) torch.manual_seed(args.manual_seed) torch.cuda.manual_seed_all(args.manual_seed) random.seed(args.manual_seed) model.eval() end = time.time() if args.split != 999: if args.use_coco: test_num = 20000 else: test_num = 2000 else: test_num = len(val_loader) assert test_num % args.batch_size_val == 0 iter_num = 0 total_time = 0 for e in range(10): for i, (input, target, s_input, s_mask, s_init_seed, subcls, ori_label) in enumerate(val_loader): if (iter_num-1) * args.batch_size_val >= test_num: break iter_num += 1 data_time.update(time.time() - end) input = input.cuda(non_blocking=True) target = target.cuda(non_blocking=True) ori_label = ori_label.cuda(non_blocking=True) start_time = time.time() output = model(s_x=s_input, s_y=s_mask, x=input, y=target, s_seed=s_init_seed) total_time = total_time + 1 model_time.update(time.time() - start_time) if args.ori_resize: longerside = max(ori_label.size(1), ori_label.size(2)) backmask = torch.ones(ori_label.size(0), longerside, longerside).cuda()*255 backmask[0, :ori_label.size(1), :ori_label.size(2)] = ori_label target = backmask.clone().long() output = F.interpolate(output, size=target.size()[1:], mode='bilinear', align_corners=True) loss = criterion(output, target) n = input.size(0) loss = torch.mean(loss) output = output.max(1)[1] intersection, union, new_target = intersectionAndUnionGPU(output, target, args.classes, args.ignore_label) intersection, union, target, new_target = intersection.cpu().numpy(), union.cpu().numpy(), target.cpu().numpy(), new_target.cpu().numpy() intersection_meter.update(intersection), union_meter.update(union), target_meter.update(new_target) subcls = subcls[0].cpu().numpy()[0] class_intersection_meter[(subcls-1)%split_gap] += intersection[1] class_union_meter[(subcls-1)%split_gap] += union[1] accuracy = sum(intersection_meter.val) / (sum(target_meter.val) + 1e-10) loss_meter.update(loss.item(), input.size(0)) batch_time.update(time.time() - end) end = time.time() if ((i + 1) % (test_num/100) == 0) and main_process(): logger.info('Test: [{}/{}] ' 'Data {data_time.val:.3f} ({data_time.avg:.3f}) ' 'Batch {batch_time.val:.3f} ({batch_time.avg:.3f}) ' 'Loss {loss_meter.val:.4f} ({loss_meter.avg:.4f}) ' 'Accuracy {accuracy:.4f}.'.format(iter_num* args.batch_size_val, test_num, data_time=data_time, batch_time=batch_time, loss_meter=loss_meter, accuracy=accuracy)) iou_class = intersection_meter.sum / (union_meter.sum + 1e-10) accuracy_class = intersection_meter.sum / (target_meter.sum + 1e-10) mIoU = np.mean(iou_class) mAcc = np.mean(accuracy_class) allAcc = sum(intersection_meter.sum) / (sum(target_meter.sum) + 1e-10) class_iou_class = [] class_miou = 0 for i in range(len(class_intersection_meter)): class_iou = class_intersection_meter[i]/(class_union_meter[i]+ 1e-10) class_iou_class.append(class_iou) class_miou += class_iou class_miou = class_miou*1.0 / len(class_intersection_meter) logger.info('meanIoU---Val result: mIoU {:.4f}.'.format(class_miou)) for i in range(split_gap): logger.info('Class_{} Result: iou {:.4f}.'.format(i+1, class_iou_class[i])) if main_process(): logger.info('FBIoU---Val result: mIoU/mAcc/allAcc {:.4f}/{:.4f}/{:.4f}.'.format(mIoU, mAcc, allAcc)) for i in range(args.classes): logger.info('Class_{} Result: iou/accuracy {:.4f}/{:.4f}.'.format(i, iou_class[i], accuracy_class[i])) logger.info('<<<<<<<<<<<<<<<<< End Evaluation <<<<<<<<<<<<<<<<<') print('avg inference time: {:.4f}, count: {}'.format(model_time.avg, test_num)) return loss_meter.avg, mIoU, mAcc, allAcc, class_miou
def validate(val_loader, model, criterion, args): if main_process(): logger.info('>>>>>>>>>>>>>>>> Start Evaluation >>>>>>>>>>>>>>>>') batch_time = AverageMeter() data_time = AverageMeter() loss_meter = AverageMeter() intersection_meter = AverageMeter() union_meter = AverageMeter() target_meter = AverageMeter() colors = np.loadtxt(args.colors_path).astype('uint8') model.eval() end = time.time() return_dict = {} color_GT_list = [] color_pred_list = [] im_list = [] image_name_list = [] summary_idx = 0 for i, (input, target, image_paths) in enumerate(val_loader): # if i > 20: # break data_time.update(time.time() - end) input = input.cuda(non_blocking=True) target = target.cuda(non_blocking=True) output = model(input) if args.zoom_factor != 8: output = F.interpolate(output, size=target.size()[1:], mode='bilinear', align_corners=True) loss = criterion(output, target) # print(output.shape, target.shape) # torch.Size([8, 21, 241, 321]) torch.Size([8, 241, 321]) if summary_idx > 12 and args.just_vis: break if summary_idx <= 12: prediction = torch.argmax(output, 1).cpu().numpy() label = target.cpu().numpy() for sample_idx in range(output.shape[0]): gray_GT = np.uint8(label[sample_idx]) color_GT = np.array(colorize(gray_GT, colors).convert('RGB')) gray_pred = np.uint8(prediction[sample_idx]) color_pred = np.array( colorize(gray_pred, colors).convert('RGB')) image_path = image_paths[sample_idx] # image_name = image_path.split('/')[-1].split('.')[0] # print(color_GT.shape, color_GT.dtype, color_pred.shape, color_pred.dtype, image_path) # print(np.amax(color_GT), np.amin(color_GT), np.median(color_GT)) image_name_list.append(image_path) im_list.append(args.read_image(image_path)) color_GT_list.append(color_GT) color_pred_list.append(color_pred) summary_idx += 1 if summary_idx > 12: break return_dict.update({ 'image_name_list': image_name_list, 'im_list': im_list, 'color_GT_list': color_GT_list, 'color_pred_list': color_pred_list }) n = input.size(0) if args.multiprocessing_distributed: loss = loss * n # not considering ignore pixels count = target.new_tensor([n], dtype=torch.long) dist.all_reduce(loss), dist.all_reduce(count) n = count.item() loss = loss / n else: loss = torch.mean(loss) output = output.max(1)[1] intersection, union, target = intersectionAndUnionGPU( output, target, args.classes, args.ignore_label) if args.multiprocessing_distributed: dist.all_reduce(intersection), dist.all_reduce( union), dist.all_reduce(target) intersection, union, target = intersection.cpu().numpy(), union.cpu( ).numpy(), target.cpu().numpy() intersection_meter.update(intersection), union_meter.update( union), target_meter.update(target) accuracy = sum( intersection_meter.val) / (sum(target_meter.val) + 1e-10) loss_meter.update(loss.item(), input.size(0)) batch_time.update(time.time() - end) end = time.time() if ((i + 1) % args.print_freq == 0) and main_process(): logger.info('Test: [{}/{}] ' 'Data {data_time.val:.3f} ({data_time.avg:.3f}) ' 'Batch {batch_time.val:.3f} ({batch_time.avg:.3f}) ' 'Loss {loss_meter.val:.4f} ({loss_meter.avg:.4f}) ' 'Accuracy {accuracy:.4f}.'.format( i + 1, len(val_loader), data_time=data_time, batch_time=batch_time, loss_meter=loss_meter, accuracy=accuracy)) iou_class = intersection_meter.sum / (union_meter.sum + 1e-10) accuracy_class = intersection_meter.sum / (target_meter.sum + 1e-10) mIoU = np.mean(iou_class) mAcc = np.mean(accuracy_class) allAcc = sum(intersection_meter.sum) / (sum(target_meter.sum) + 1e-10) if main_process(): logger.info( 'Val result: mIoU/mAcc/allAcc {:.4f}/{:.4f}/{:.4f}.'.format( mIoU, mAcc, allAcc)) for i in range(args.classes): logger.info('Class_{} Result: iou/accuracy {:.4f}/{:.4f}.'.format( i, iou_class[i], accuracy_class[i])) logger.info('<<<<<<<<<<<<<<<<< End Evaluation <<<<<<<<<<<<<<<<<') return loss_meter.avg, mIoU, mAcc, allAcc, return_dict
def validate(val_loader, model, criterion): if main_process(): logger.info('>>>>>>>>>>>>>>>> Start Evaluation >>>>>>>>>>>>>>>>') ## step.1 设置评价参数,随时更新 batch_time = AverageMeter() data_time = AverageMeter() loss_meter = AverageMeter() intersection_meter = AverageMeter() union_meter = AverageMeter() target_meter = AverageMeter() model.eval() end = time.time() ## step.2 epoch内部循环 for i, (input, target) in enumerate(val_loader): data_time.update(time.time() - end) input = input.cuda(non_blocking=True) target = target.cuda(non_blocking=True) output = model(input) if args.zoom_factor != 8: output = F.interpolate(output, size=target.size()[1:], mode='bilinear', align_corners=True) loss = criterion(output, target) n = input.size(0) if args.multiprocessing_distributed: loss = loss * n # not considering ignore pixels count = target.new_tensor([n], dtype=torch.long) dist.all_reduce(loss), dist.all_reduce(count) n = count.item() loss = loss / n else: loss = torch.mean(loss) ## step.4 更新评价数据 output = output.max(1)[1] intersection, union, target = intersectionAndUnionGPU( output, target, args.classes, args.ignore_label) if args.multiprocessing_distributed: dist.all_reduce(intersection), dist.all_reduce( union), dist.all_reduce(target) intersection, union, target = intersection.cpu().numpy(), union.cpu( ).numpy(), target.cpu().numpy() intersection_meter.update(intersection), union_meter.update( union), target_meter.update(target) accuracy = sum( intersection_meter.val) / (sum(target_meter.val) + 1e-10) loss_meter.update(loss.item(), input.size(0)) batch_time.update(time.time() - end) end = time.time() if ((i + 1) % args.print_freq == 0) and main_process(): logger.info('Test: [{}/{}] ' 'Data {data_time.val:.3f} ({data_time.avg:.3f}) ' 'Batch {batch_time.val:.3f} ({batch_time.avg:.3f}) ' 'Loss {loss_meter.val:.4f} ({loss_meter.avg:.4f}) ' 'Accuracy {accuracy:.4f}.'.format( i + 1, len(val_loader), data_time=data_time, batch_time=batch_time, loss_meter=loss_meter, accuracy=accuracy)) iou_class = intersection_meter.sum / (union_meter.sum + 1e-10) accuracy_class = intersection_meter.sum / (target_meter.sum + 1e-10) mIoU = np.mean(iou_class) mAcc = np.mean(accuracy_class) allAcc = sum(intersection_meter.sum) / (sum(target_meter.sum) + 1e-10) if main_process(): logger.info( 'Val result: mIoU/mAcc/allAcc {:.4f}/{:.4f}/{:.4f}.'.format( mIoU, mAcc, allAcc)) for i in range(args.classes): logger.info('Class_{} Result: iou/accuracy {:.4f}/{:.4f}.'.format( i, iou_class[i], accuracy_class[i])) logger.info('<<<<<<<<<<<<<<<<< End Evaluation <<<<<<<<<<<<<<<<<') return loss_meter.avg, mIoU, mAcc, allAcc
def train(train_loader, model, optimizer, epoch): batch_time = AverageMeter() data_time = AverageMeter() main_loss_meter = AverageMeter() aux_loss_meter = AverageMeter() loss_meter = AverageMeter() intersection_meter = AverageMeter() union_meter = AverageMeter() target_meter = AverageMeter() model.train() end = time.time() max_iter = args.epochs * len(train_loader) # 最大迭代次数,用于计算poly学习率 vis_key = 0 print('Warmup: {}'.format(args.warmup)) for i, (input, target, nomimg, s_input, s_mask, subcls) in enumerate(train_loader): data_time.update(time.time() - end) current_iter = epoch * len(train_loader) + i + 1 # 当前iteration # poly策略调整学习率 if args.base_lr > 1e-6: poly_learning_rate(optimizer, args.base_lr, current_iter, max_iter, power=args.power, warmup=args.warmup, warmup_step=len(train_loader)//2) s_input = s_input.cuda(non_blocking=True) # [b,1,3,473,473] s_mask = s_mask.cuda(non_blocking=True) # [b,1,473,473] input = input.cuda(non_blocking=True) # [b,3,473,473] target = target.cuda(non_blocking=True) # [b,473,473] nomimg = nomimg.cuda(non_blocking=True) # predicted mask[b,473,473] loss [1,b] output, main_loss = model(s_x=s_input, s_y=s_mask, nom=nomimg, x=input, y=target) loss = main_loss optimizer.zero_grad() loss.backward() optimizer.step() # 计算I和U n = input.size(0) intersection, union, target = intersectionAndUnionGPU(output, target, args.classes, args.ignore_label) intersection, union, target = intersection.cpu().numpy(), union.cpu().numpy(), target.cpu().numpy() intersection_meter.update(intersection), union_meter.update(union), target_meter.update(target) # 计算acc等指标 accuracy = sum(intersection_meter.val) / (sum(target_meter.val) + 1e-10) main_loss_meter.update(main_loss.item(), n) loss_meter.update(loss.item(), n) batch_time.update(time.time() - end) end = time.time() remain_iter = max_iter - current_iter remain_time = remain_iter * batch_time.avg t_m, t_s = divmod(remain_time, 60) t_h, t_m = divmod(t_m, 60) remain_time = '{:02d}:{:02d}:{:02d}'.format(int(t_h), int(t_m), int(t_s)) # 定期打印训练信息 if (i + 1) % args.print_freq == 0 and main_process(): logger.info('Epoch: [{}/{}][{}/{}] ' 'Data {data_time.val:.3f} ({data_time.avg:.3f}) ' 'Batch {batch_time.val:.3f} ({batch_time.avg:.3f}) ' 'Remain {remain_time} ' 'MainLoss {main_loss_meter.val:.4f} ' 'AuxLoss {aux_loss_meter.val:.4f} ' 'Loss {loss_meter.val:.4f} ' 'Accuracy {accuracy:.4f}.'.format(epoch+1, args.epochs, i + 1, len(train_loader), batch_time=batch_time, data_time=data_time, remain_time=remain_time, main_loss_meter=main_loss_meter, aux_loss_meter=aux_loss_meter, loss_meter=loss_meter, accuracy=accuracy)) if main_process(): writer.add_scalar('loss_train_batch', main_loss_meter.val, current_iter) writer.add_scalar('mIoU_train_batch', np.mean(intersection / (union + 1e-10)), current_iter) writer.add_scalar('mAcc_train_batch', np.mean(intersection / (target + 1e-10)), current_iter) writer.add_scalar('allAcc_train_batch', accuracy, current_iter) iou_class = intersection_meter.sum / (union_meter.sum + 1e-10) accuracy_class = intersection_meter.sum / (target_meter.sum + 1e-10) mIoU = np.mean(iou_class) mAcc = np.mean(accuracy_class) allAcc = sum(intersection_meter.sum) / (sum(target_meter.sum) + 1e-10) if main_process(): logger.info('Train result at epoch [{}/{}]: mIoU/mAcc/allAcc {:.4f}/{:.4f}/{:.4f}.'.format(epoch, args.epochs, mIoU, mAcc, allAcc)) for i in range(args.classes): logger.info('Class_{} Result: iou/accuracy {:.4f}/{:.4f}.'.format(i, iou_class[i], accuracy_class[i])) return main_loss_meter.avg, mIoU, mAcc, allAcc