def main(): global args, best_mIoU args = parser.parse_args() os.environ["CUDA_VISIBLE_DEVICES"] = ','.join( str(gpu) for gpu in args.gpus) args.gpus = len(args.gpus) if args.no_partialbn: sync_bn.Synchronize.init(args.gpus) if args.dataset == 'VOCAug' or args.dataset == 'VOC2012' or args.dataset == 'COCO': num_class = 21 ignore_label = 255 scale_series = [10, 20, 30, 60] elif args.dataset == 'Cityscapes': num_class = 19 ignore_label = 255 # 0 scale_series = [15, 30, 45, 90] elif args.dataset == 'ApolloScape': num_class = 37 # merge the noise and ignore labels ignore_label = 255 # 0 else: raise ValueError('Unknown dataset ' + args.dataset) model = models.ERFNet( num_class, partial_bn=not args.no_partialbn ) # models.PSPNet(num_class, base_model=args.arch, dropout=args.dropout, partial_bn=not args.no_partialbn) input_mean = model.input_mean input_std = model.input_std policies = model.get_optim_policies() model = torch.nn.DataParallel(model, device_ids=range(args.gpus)).cuda() if args.resume: if os.path.isfile(args.resume): print(("=> loading checkpoint '{}'".format(args.resume))) checkpoint = torch.load(args.resume) args.start_epoch = checkpoint['epoch'] best_mIoU = checkpoint['best_mIoU'] torch.nn.Module.load_state_dict(model, checkpoint['state_dict']) print(("=> loaded checkpoint '{}' (epoch {})".format( args.evaluate, checkpoint['epoch']))) else: print(("=> no checkpoint found at '{}'".format(args.resume))) cudnn.benchmark = True cudnn.fastest = True # Data loading code test_loader = torch.utils.data.DataLoader(getattr( ds, args.dataset.replace("ApolloScape", "VOCAug") + 'DataSet')( data_list=args.val_list, transform=[ torchvision.transforms.Compose([ tf.GroupRandomScaleRatio( size=(1692, 1692, 505, 505), interpolation=(cv2.INTER_LINEAR, cv2.INTER_NEAREST)), tf.GroupNormalize(mean=(input_mean, (0, )), std=(input_std, (1, ))), ]), torchvision.transforms.Compose([ tf.GroupRandomScaleRatio( size=(1861, 1861, 556, 556), interpolation=(cv2.INTER_LINEAR, cv2.INTER_NEAREST)), tf.GroupNormalize(mean=(input_mean, (0, )), std=(input_std, (1, ))), ]), torchvision.transforms.Compose([ tf.GroupRandomScaleRatio( size=(1624, 1624, 485, 485), interpolation=(cv2.INTER_LINEAR, cv2.INTER_NEAREST)), tf.GroupNormalize(mean=(input_mean, (0, )), std=(input_std, (1, ))), ]), torchvision.transforms.Compose([ tf.GroupRandomScaleRatio( size=(2030, 2030, 606, 606), interpolation=(cv2.INTER_LINEAR, cv2.INTER_NEAREST)), tf.GroupNormalize(mean=(input_mean, (0, )), std=(input_std, (1, ))), ]) ]), batch_size=args.batch_size, shuffle=False, num_workers=args.workers, pin_memory=False) # define loss function (criterion) optimizer and evaluator weights = [1.0 for _ in range(37)] weights[0] = 0.05 weights[36] = 0.05 class_weights = torch.FloatTensor(weights).cuda() criterion = torch.nn.NLLLoss(ignore_index=ignore_label, weight=class_weights).cuda() for group in policies: print(('group: {} has {} params, lr_mult: {}, decay_mult: {}'.format( group['name'], len(group['params']), group['lr_mult'], group['decay_mult']))) optimizer = torch.optim.SGD(policies, args.lr, momentum=args.momentum, weight_decay=args.weight_decay) evaluator = EvalSegmentation(num_class, ignore_label) ### evaluate ### validate(test_loader, model, criterion, 0, evaluator) return
def main(): global best_mIoU_cls, best_mIoU_ego, args args = parser.parse_args() os.environ["CUDA_VISIBLE_DEVICES"] = ','.join(str(gpu) for gpu in cfg.gpus) if cfg.dataset == 'VOCAug' or cfg.dataset == 'VOC2012' or cfg.dataset == 'COCO': num_ego = 21 num_class = 2 ignore_label = 255 elif cfg.dataset == 'Cityscapes': num_ego = 19 num_class = 2 ignore_label = 255 # 0 elif cfg.dataset == 'ApolloScape': num_ego = 37 # merge the noise and ignore labels num_class = 2 ignore_label = 255 elif cfg.dataset == 'CULane': num_ego = cfg.NUM_EGO num_class = 2 ignore_label = 255 else: num_ego = cfg.NUM_EGO num_class = cfg.NUM_CLASSES ignore_label = 255 print(json.dumps(cfg, sort_keys=True, indent=2)) model = net.ERFNet(num_class, num_ego) model = torch.nn.DataParallel(model, device_ids=range(len(cfg.gpus))).cuda() if num_class: print(("=> train '{}' model".format('lane_cls'))) if num_ego: print(("=> train '{}' model".format('lane_ego'))) if cfg.optimizer == 'sgd': optimizer = torch.optim.SGD(model.parameters(), cfg.lr, momentum=cfg.momentum, weight_decay=cfg.weight_decay) else: optimizer = torch.optim.Adam(model.parameters(), cfg.lr, weight_decay=cfg.weight_decay) resume_epoch = 0 if cfg.resume: if os.path.isfile(cfg.resume): print(("=> loading checkpoint '{}'".format(cfg.resume))) checkpoint = torch.load(cfg.resume) if cfg.finetune: print('finetune from ', cfg.resume) state_all = checkpoint['state_dict'] state_clip = {} # only use backbone parameters for k, v in state_all.items(): if 'module' in k: state_clip[k] = v print(k) model.load_state_dict(state_clip, strict=False) else: print('==> Resume model from ' + cfg.resume) model.load_state_dict(checkpoint['state_dict']) if 'optimizer' in checkpoint.keys(): optimizer.load_state_dict(checkpoint['optimizer']) if 'epoch' in checkpoint.keys(): resume_epoch = int(checkpoint['epoch']) + 1 else: print(("=> no checkpoint found at '{}'".format(cfg.resume))) model.apply(weights_init) else: model.apply(weights_init) # if cfg.resume: # if os.path.isfile(cfg.resume): # print(("=> loading checkpoint '{}'".format(cfg.resume))) # checkpoint = torch.load(cfg.resume) # cfg.start_epoch = checkpoint['epoch'] # # model = load_my_state_dict(model, checkpoint['state_dict']) # torch.nn.Module.load_state_dict(model, checkpoint['state_dict']) # print(("=> loaded checkpoint '{}' (epoch {})".format(cfg.evaluate, checkpoint['epoch']))) # else: # print(("=> no checkpoint found at '{}'".format(cfg.resume))) # model.apply(weights_init) # else: # model.apply(weights_init) cudnn.benchmark = True cudnn.fastest = True # Data loading code train_loader = torch.utils.data.DataLoader(getattr(ds, 'VOCAugDataSet')( dataset_path=cfg.dataset_path, data_list=cfg.train_list, transform=torchvision.transforms.Compose([ tf.GroupRandomScale(size=(0.695, 0.721), interpolation=(cv2.INTER_LINEAR, cv2.INTER_NEAREST, cv2.INTER_NEAREST)), tf.GroupRandomCropRatio(size=(cfg.MODEL_INPUT_WIDTH, cfg.MODEL_INPUT_HEIGHT)), tf.GroupRandomRotation(degree=(-1, 1), interpolation=(cv2.INTER_LINEAR, cv2.INTER_NEAREST, cv2.INTER_NEAREST), padding=(cfg.INPUT_MEAN, (ignore_label, ), (ignore_label, ))), tf.GroupNormalize(mean=(cfg.INPUT_MEAN, (0, ), (0, )), std=(cfg.INPUT_STD, (1, ), (1, ))), ])), batch_size=cfg.train_batch_size, shuffle=True, num_workers=cfg.workers, pin_memory=True, drop_last=True) val_loader = torch.utils.data.DataLoader(getattr(ds, 'VOCAugDataSet')( dataset_path=cfg.dataset_path, data_list=cfg.val_list, transform=torchvision.transforms.Compose([ tf.GroupRandomScale(size=(0.695, 0.721), interpolation=(cv2.INTER_LINEAR, cv2.INTER_NEAREST, cv2.INTER_NEAREST)), tf.GroupRandomCropRatio(size=(cfg.MODEL_INPUT_WIDTH, cfg.MODEL_INPUT_HEIGHT)), tf.GroupNormalize(mean=(cfg.INPUT_MEAN, (0, ), (0, )), std=(cfg.INPUT_STD, (1, ), (1, ))), ])), batch_size=cfg.val_batch_size, shuffle=False, num_workers=args.workers, pin_memory=True) # define loss function (criterion) optimizer and evaluator class_weights = torch.FloatTensor(cfg.CLASS_WEIGHT).cuda() weights = [1.0 for _ in range(num_ego + 1)] weights[0] = 0.4 ego_weights = torch.FloatTensor(weights).cuda() criterion_cls = torch.nn.NLLLoss(ignore_index=ignore_label, weight=class_weights).cuda() criterion_ego = torch.nn.NLLLoss(ignore_index=ignore_label, weight=ego_weights).cuda() criterion_exist = torch.nn.BCELoss().cuda() device = torch.device("cuda" if torch.cuda.is_available() else "cpu") reg_loss = None if cfg.weight_decay > 0 and cfg.use_L1: reg_loss = Regularization(model, cfg.weight_decay, p=1).to(device) else: print("no regularization") if num_class: evaluator = EvalSegmentation(num_class, ignore_label) if num_ego: evaluator = EvalSegmentation(num_ego + 1, ignore_label) # Tensorboard writer global writer writer = SummaryWriter(os.path.join(cfg.save_path, 'Tensorboard')) for epoch in range(cfg.epochs): # args.start_epoch if epoch < resume_epoch: continue adjust_learning_rate(optimizer, epoch, cfg.lr_steps) # train for one epoch train(train_loader, model, criterion_cls, criterion_ego, criterion_exist, optimizer, epoch, writer, reg_loss) # evaluate on validation set if (epoch + 1) % cfg.eval_freq == 0 or epoch == cfg.epochs - 1: mIoU_cls, mIoU_ego = validate(val_loader, model, criterion_cls, criterion_ego, criterion_exist, epoch, evaluator, writer) # remember best mIoU and save checkpoint if num_class: is_best = mIoU_cls > best_mIoU_cls if num_ego: is_best = mIoU_ego > best_mIoU_ego best_mIoU_cls = max(mIoU_cls, best_mIoU_cls) best_mIoU_ego = max(mIoU_ego, best_mIoU_ego) save_checkpoint( { 'epoch': epoch + 1, 'arch': cfg.arch, 'state_dict': model.state_dict(), 'best_mIoU': best_mIoU_ego, }, is_best) writer.close()
def main(): global args, best_mIoU args = parser.parse_args() os.environ["CUDA_VISIBLE_DEVICES"] = ','.join( str(gpu) for gpu in args.gpus) args.gpus = len(args.gpus) if args.no_partialbn: sync_bn.Synchronize.init(args.gpus) if args.dataset == 'VOCAug' or args.dataset == 'VOC2012' or args.dataset == 'COCO': num_class = 21 ignore_label = 255 scale_series = [10, 20, 30, 60] elif args.dataset == 'Cityscapes': num_class = 19 ignore_label = 255 # 0 scale_series = [15, 30, 45, 90] elif args.dataset == 'ApolloScape': num_class = 37 # merge the noise and ignore labels ignore_label = 255 elif args.dataset == 'CULane': num_class = 5 ignore_label = 255 else: raise ValueError('Unknown dataset ' + args.dataset) model = models.ERFNet(num_class, partial_bn=not args.no_partialbn) input_mean = model.input_mean input_std = model.input_std model = torch.nn.DataParallel(model, device_ids=range(args.gpus)).cuda() def load_my_state_dict( model, state_dict ): #custom function to load model when not all dict elements own_state = model.state_dict() ckpt_name = [] cnt = 0 for name, param in state_dict.items(): if name not in list(own_state.keys()) or 'output_conv' in name: ckpt_name.append(name) continue own_state[name].copy_(param) cnt += 1 print('#reused param: {}'.format(cnt)) return model if args.resume: if os.path.isfile(args.resume): print(("=> loading checkpoint '{}'".format(args.resume))) checkpoint = torch.load(args.resume) args.start_epoch = checkpoint['epoch'] model = load_my_state_dict(model, checkpoint['state_dict']) # torch.nn.Module.load_state_dict(model, checkpoint['state_dict']) print(("=> loaded checkpoint '{}' (epoch {})".format( args.evaluate, checkpoint['epoch']))) else: print(("=> no checkpoint found at '{}'".format(args.resume))) cudnn.benchmark = True cudnn.fastest = True # Data loading code train_loader = torch.utils.data.DataLoader(getattr( ds, args.dataset.replace("CULane", "VOCAug") + 'DataSet')( data_list=args.train_list, transform=torchvision.transforms.Compose([ tf.GroupRandomScale(size=(0.595, 0.621), interpolation=(cv2.INTER_LINEAR, cv2.INTER_NEAREST)), tf.GroupRandomCropRatio(size=(args.img_width, args.img_height)), tf.GroupRandomRotation(degree=(-1, 1), interpolation=(cv2.INTER_LINEAR, cv2.INTER_NEAREST), padding=(input_mean, (ignore_label, ))), tf.GroupNormalize(mean=(input_mean, (0, )), std=(input_std, (1, ))), ])), batch_size=args.batch_size, shuffle=True, num_workers=args.workers, pin_memory=False, drop_last=True) val_loader = torch.utils.data.DataLoader(getattr( ds, args.dataset.replace("CULane", "VOCAug") + 'DataSet')( data_list=args.val_list, transform=torchvision.transforms.Compose([ tf.GroupRandomScale(size=(0.595, 0.621), interpolation=(cv2.INTER_LINEAR, cv2.INTER_NEAREST)), tf.GroupRandomCropRatio(size=(args.img_width, args.img_height)), tf.GroupNormalize(mean=(input_mean, (0, )), std=(input_std, (1, ))), ])), batch_size=args.batch_size, shuffle=False, num_workers=args.workers, pin_memory=False) # define loss function (criterion) optimizer and evaluator weights = [1.0 for _ in range(5)] weights[0] = 0.4 class_weights = torch.FloatTensor(weights).cuda() criterion = torch.nn.NLLLoss(ignore_index=ignore_label, weight=class_weights).cuda() criterion_exist = torch.nn.BCELoss().cuda() optimizer = torch.optim.SGD(model.parameters(), args.lr, momentum=args.momentum, weight_decay=args.weight_decay) evaluator = EvalSegmentation(num_class, ignore_label) args.evaluate = False if args.evaluate: validate(val_loader, model, criterion, 0, evaluator) return for epoch in range(args.epochs): # args.start_epoch adjust_learning_rate(optimizer, epoch, args.lr_steps) # train for one epoch train(train_loader, model, criterion, criterion_exist, optimizer, epoch) # evaluate on validation set if (epoch + 1) % args.eval_freq == 0 or epoch == args.epochs - 1: mIoU = validate(val_loader, model, criterion, (epoch + 1) * len(train_loader), evaluator) # remember best mIoU and save checkpoint is_best = mIoU > best_mIoU best_mIoU = max(mIoU, best_mIoU) save_checkpoint( { 'epoch': epoch + 1, 'arch': args.arch, 'state_dict': model.state_dict(), 'best_mIoU': best_mIoU, }, is_best)
def main(): global args, best_mIoU args = parser.parse_args() os.environ["CUDA_VISIBLE_DEVICES"] = ','.join(str(gpu) for gpu in args.gpus) args.gpus = len(args.gpus) if args.no_partialbn: sync_bn.Synchronize.init(args.gpus) if args.dataset == 'VOCAug' or args.dataset == 'VOC2012' or args.dataset == 'COCO': num_class = 21 ignore_label = 255 elif args.dataset == 'Cityscapes': num_class = 19 ignore_label = 0 else: raise ValueError('Unknown dataset ' + args.dataset) model = getattr(models, args.method)(num_class, base_model=args.arch, dropout=args.dropout, partial_bn=not args.no_partialbn) input_mean = model.input_mean input_std = model.input_std policies = model.get_optim_policies() model = torch.nn.DataParallel(model, device_ids=range(args.gpus)).cuda() if args.resume: if os.path.isfile(args.resume): print(("=> loading checkpoint '{}'".format(args.resume))) checkpoint = torch.load(args.resume) args.start_epoch = checkpoint['epoch'] best_mIoU = checkpoint['best_mIoU'] torch.nn.Module.load_state_dict(model, checkpoint['state_dict']) print(("=> loaded checkpoint '{}' (epoch {})".format(args.evaluate, checkpoint['epoch']))) else: print(("=> no checkpoint found at '{}'".format(args.resume))) if args.weight: if os.path.isfile(args.weight): print(("=> loading initial weight '{}'".format(args.weight))) checkpoint = torch.load(args.weight) torch.nn.Module.load_state_dict(model, checkpoint['state_dict']) else: print(("=> no model file found at '{}'".format(args.weight))) cudnn.benchmark = True cudnn.fastest = True # Data loading code train_loader = torch.utils.data.DataLoader( getattr(ds, 'VOCAugDataSet')(data_list=args.train_list, transform=torchvision.transforms.Compose([ tf.GroupRandomHorizontalFlip(), tf.GroupRandomScale(size=(0.5, 2.0), interpolation=(cv2.INTER_LINEAR, cv2.INTER_NEAREST)), tf.GroupRandomCrop(size=args.train_size), tf.GroupRandomPad(size=args.train_size, padding=(input_mean, (ignore_label, ))), tf.GroupRandomRotation(degree=(-10, 10), interpolation=(cv2.INTER_LINEAR, cv2.INTER_NEAREST), padding=(input_mean, (ignore_label, ))), tf.GroupRandomBlur(applied=(True, False)), tf.GroupNormalize(mean=(input_mean, (0, )), std=(input_std, (1, ))), ])), batch_size=args.batch_size, shuffle=True, num_workers=args.workers, pin_memory=True, drop_last=True) val_loader = torch.utils.data.DataLoader( getattr(ds, 'VOCAugDataSet')(data_list=args.val_list, transform=torchvision.transforms.Compose([ tf.GroupCenterCrop(size=args.test_size), tf.GroupConcerPad(size=args.test_size, padding=(input_mean, (ignore_label, ))), tf.GroupNormalize(mean=(input_mean, (0, )), std=(input_std, (1, ))), ])), batch_size=args.batch_size * 3, shuffle=False, num_workers=args.workers, pin_memory=True) # define loss function (criterion) optimizer and evaluator criterion = torch.nn.NLLLoss(ignore_index=ignore_label).cuda() for group in policies: print(('group: {} has {} params, lr_mult: {}, decay_mult: {}'.format(group['name'], len(group['params']), group['lr_mult'], group['decay_mult']))) optimizer = torch.optim.SGD(policies, args.lr, momentum=args.momentum, weight_decay=args.weight_decay) evaluator = EvalSegmentation(num_class, ignore_label) if args.evaluate: validate(val_loader, model, criterion, 0, evaluator) return for epoch in range(args.start_epoch, args.epochs): adjust_learning_rate(optimizer, epoch, args.lr_steps) # train for one epoch train(train_loader, model, criterion, optimizer, epoch) # evaluate on validation set if (epoch + 1) % args.eval_freq == 0 or epoch == args.epochs - 1: mIoU = validate(val_loader, model, criterion, (epoch + 1) * len(train_loader), evaluator) # remember best mIoU and save checkpoint is_best = mIoU > best_mIoU best_mIoU = max(mIoU, best_mIoU) save_checkpoint({ 'epoch': epoch + 1, 'arch': args.arch, 'state_dict': model.state_dict(), 'best_mIoU': best_mIoU, }, is_best)
builder.max_workspace_size = 1 << 31 builder.int8_mode = args.use_int8 builder.fp16_mode = False builder.int8_calibrator = calib try: engine = builder.build_cuda_engine(network) except Exception as e: print("Failed creating engine for TensorRT. Error: ", e) quit() print("Done generating tensorRT engine.") print("Serializing tensorRT engine for C++ interface") try: serialized_engine = engine.serialize() except Exception as e: print("Couln't serialize engine. Not critical, so I continue. Error: ", e) with open(args.engine_file, "wb") as f: f.write(serialized_engine) if __name__ == '__main__': args = parser.parse_args() cfg.MODEL_INPUT_WIDTH, cfg.MODEL_INPUT_HEIGHT = 480,224 cfg.INPUT_MEAN = [0,0,0] cfg.INPUT_STD = [1., 1., 1.] print("Convert tensorRT... ") create_tensorrt(args) print("All Done")