def main(cfgs): Logger.init(**cfgs['logger']) local_rank = cfgs['local_rank'] world_size = int(os.environ['WORLD_SIZE']) Log.info('rank: {}, world_size: {}'.format(local_rank, world_size)) log_dir = cfgs['log_dir'] pth_dir = cfgs['pth_dir'] if local_rank == 0: assure_dir(log_dir) assure_dir(pth_dir) aux_config = cfgs.get('auxiliary', None) network = ModuleBuilder(cfgs['network'], aux_config).cuda() criterion = build_criterion(cfgs['criterion'], aux_config).cuda() optimizer = optim.SGD(network.parameters(), **cfgs['optimizer']) scheduler = PolyLRScheduler(optimizer, **cfgs['scheduler']) dataset = build_dataset(**cfgs['dataset'], **cfgs['transforms']) sampler = DistributedSampler4Iter(dataset, world_size=world_size, rank=local_rank, **cfgs['sampler']) train_loader = DataLoader(dataset, sampler=sampler, **cfgs['loader']) cudnn.benchmark = True torch.manual_seed(666) torch.cuda.manual_seed(666) torch.cuda.set_device(local_rank) dist.init_process_group(backend='nccl', init_method='env://') model = DistributedDataParallel(network) model = apex.parallel.convert_syncbn_model(model) torch.cuda.empty_cache() train(local_rank, world_size, pth_dir, cfgs['frequency'], criterion, train_loader, model, optimizer, scheduler)
def main(): # make save dir if args.local_rank == 0: if not os.path.exists(args.save_dir): os.makedirs(args.save_dir) # launch the logger Log.init( log_level=args.log_level, log_file=osp.join(args.save_dir, args.log_file), log_format=args.log_format, rewrite=args.rewrite, stdout_level=args.stdout_level ) # RGB or BGR input(RGB input for ImageNet pretrained models while BGR input for caffe pretrained models) if args.rgb: IMG_MEAN = np.array((0.485, 0.456, 0.406), dtype=np.float32) IMG_VARS = np.array((0.229, 0.224, 0.225), dtype=np.float32) else: IMG_MEAN = np.array((104.00698793, 116.66876762, 122.67891434), dtype=np.float32) IMG_VARS = np.array((1, 1, 1), dtype=np.float32) # set models import libs.models as models deeplab = models.__dict__[args.arch](num_classes=args.num_classes, data_set=args.data_set) if args.restore_from is not None: saved_state_dict = torch.load(args.restore_from, map_location=torch.device('cpu')) new_params = deeplab.state_dict().copy() for i in saved_state_dict: i_parts = i.split('.') if not i_parts[0] == 'fc': new_params['.'.join(i_parts[0:])] = saved_state_dict[i] Log.info("load pretrined models") if deeplab.backbone is not None: deeplab.backbone.load_state_dict(new_params, strict=False) else: deeplab.load_state_dict(new_params, strict=False) else: Log.info("train from stracth") args.world_size = 1 if 'WORLD_SIZE' in os.environ and args.apex: args.apex = int(os.environ['WORLD_SIZE']) > 1 args.world_size = int(os.environ['WORLD_SIZE']) print("Total world size: ", int(os.environ['WORLD_SIZE'])) if not args.gpu == None: os.environ["CUDA_VISIBLE_DEVICES"] = args.gpu h, w = args.input_size, args.input_size input_size = (h, w) # Set the device according to local_rank. torch.cuda.set_device(args.local_rank) Log.info("Local Rank: {}".format(args.local_rank)) torch.distributed.init_process_group(backend='nccl', init_method='env://') # set optimizer optimizer = optim.SGD( [{'params': filter(lambda p: p.requires_grad, deeplab.parameters()), 'lr': args.learning_rate}], lr=args.learning_rate, momentum=args.momentum, weight_decay=args.weight_decay) optimizer.zero_grad() # set on cuda deeplab.cuda() # models transformation model = DistributedDataParallel(deeplab) model = apex.parallel.convert_syncbn_model(model) model.train() model.float() model.cuda() # set loss function if args.ohem: criterion = CriterionOhemDSN(thresh=args.ohem_thres, min_kept=args.ohem_keep) # OHEM CrossEntrop if "ic" in args.arch: criterion = CriterionICNet(thresh=args.ohem_thres, min_kept=args.ohem_keep) if "dfa" in args.arch: criterion = CriterionDFANet(thresh=args.ohem_thres, min_kept=args.ohem_keep) else: criterion = CriterionDSN() # CrossEntropy criterion.cuda() cudnn.benchmark = True if args.world_size == 1: print(model) # this is a little different from mul-gpu traning setting in distributed training # because each trainloader is a process that sample from the dataset class. batch_size = args.gpu_num * args.batch_size_per_gpu max_iters = args.num_steps * batch_size / args.gpu_num # set data loader data_set = Cityscapes(args.data_dir, args.data_list, max_iters=max_iters, crop_size=input_size, scale=args.random_scale, mirror=args.random_mirror, mean=IMG_MEAN,vars=IMG_VARS, RGB= args.rgb) trainloader = data.DataLoader( data_set, batch_size=args.batch_size_per_gpu, shuffle=True, num_workers=args.num_workers, pin_memory=True) print("trainloader", len(trainloader)) torch.cuda.empty_cache() # start training: for i_iter, batch in enumerate(trainloader): images, labels = batch images = images.cuda() labels = labels.long().cuda() optimizer.zero_grad() lr = adjust_learning_rate(optimizer, args, i_iter, len(trainloader)) preds = model(images) loss = criterion(preds, labels) loss.backward() optimizer.step() reduce_loss = all_reduce_tensor(loss, world_size=args.gpu_num) if args.local_rank == 0: Log.info('iter = {} of {} completed, lr={}, loss = {}'.format(i_iter, len(trainloader), lr, reduce_loss.data.cpu().numpy())) if i_iter % args.save_pred_every == 0 and i_iter > args.save_start: print('save models ...') torch.save(deeplab.state_dict(), osp.join(args.save_dir, str(args.arch) + str(i_iter) + '.pth')) end = timeit.default_timer() if args.local_rank == 0: Log.info("Training cost: "+ str(end - start) + 'seconds') Log.info("Save final models") torch.save(deeplab.state_dict(), osp.join(args.save_dir, str(args.arch) + '_final' + '.pth'))
def main(): settings_print_interval = 1 # How often to print loss and other info settings_batch_size = 4 # Batch size 80 default 64 settings_num_workers = 16 # Number of workers for image loading settings_normalize_mean = [0.485, 0.456, 0.406] # Normalize mean (default pytorch ImageNet values) settings_normalize_std = [0.229, 0.224, 0.225] # Normalize std (default pytorch ImageNet values) settings_search_area_factor = 4.0 # Image patch size relative to target size settings_feature_sz = 24 # Size of feature map settings_output_sz = settings_feature_sz * 16 # Size of input image patches 24*16 settings_segm_use_distance = True # Settings for the image sample and proposal generation settings_center_jitter_factor = {'train': 0, 'test1': 1.5, 'test2': 1.5} settings_scale_jitter_factor = {'train': 0, 'test1': 0.25, 'test2': 0.25} #################################################################################################### start_epoch = 0 random.seed(0) args = parse_args() # Use GPU os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu if args.gpu != '' else str(opt.gpu_id) use_gpu = torch.cuda.is_available() and (args.gpu != '' or int(opt.gpu_id)) >= 0 gpu_ids = [int(val) for val in args.gpu.split(',')] if not os.path.isdir(opt.checkpoint): os.makedirs(opt.checkpoint) # Data print('==> Preparing dataset') input_size = opt.input_size train_transformer = TrainTransform(size=input_size) #train_transformer = TrainTransform_Noresize() test_transformer = TestTransform(size=input_size) try: if isinstance(opt.trainset, list): datalist = [] for dataset, freq, max_skip in zip(opt.trainset, opt.datafreq, opt.max_skip): ds = DATA_CONTAINER[dataset]( train=True, sampled_frames=opt.sampled_frames, transform=train_transformer, max_skip=max_skip, samples_per_video=opt.samples_per_video ) datalist += [ds] * freq trainset = data.ConcatDataset(datalist) else: max_skip = opt.max_skip[0] if isinstance(opt.max_skip, list) else opt.max_skip trainset = DATA_CONTAINER[opt.trainset]( train=True, sampled_frames=opt.sampled_frames, transform=train_transformer, max_skip=max_skip, samples_per_video=opt.samples_per_video ) except KeyError as ke: print('[ERROR] invalide dataset name is encountered. The current acceptable datasets are:') print(list(DATA_CONTAINER.keys())) exit() testset = DATA_CONTAINER[opt.valset]( train=False, transform=test_transformer, samples_per_video=1 ) trainloader = data.DataLoader(trainset, batch_size=opt.train_batch, shuffle=True, num_workers=opt.workers, collate_fn=multibatch_collate_fn, drop_last=True) testloader = data.DataLoader(testset, batch_size=1, shuffle=False, num_workers=opt.workers, collate_fn=multibatch_collate_fn) ######################################################################################### vos_train = Vos(split='train') transform_train = torchvision.transforms.Compose([dltransforms.ToTensorAndJitter(0.2), torchvision.transforms.Normalize(mean=settings_normalize_mean, std=settings_normalize_std)]) data_processing_train = segm_processing.SegmProcessing(search_area_factor=settings_search_area_factor, output_sz=settings_output_sz, center_jitter_factor=settings_center_jitter_factor, scale_jitter_factor=settings_scale_jitter_factor, mode='pair', transform=transform_train, use_distance=settings_segm_use_distance) dataset_train = segm_sampler.SegmSampler([vos_train], [1], samples_per_epoch=1000 * settings_batch_size * 8, max_gap=50, processing=data_processing_train) loader_train = LTRLoader('train', dataset_train, training=True, batch_size=settings_batch_size, num_workers=settings_num_workers, shuffle=True, drop_last=True, stack_dim=1) ######################################################################################### # Model print("==> creating model") net = AMB(opt.keydim, opt.valdim, 'train', mode=opt.mode, iou_threshold=opt.iou_threshold) print(' Total params: %.2fM' % (sum(p.numel() for p in net.parameters())/1000000.0)) net.eval() if use_gpu: net = net.cuda() assert opt.train_batch % len(gpu_ids) == 0 net = nn.DataParallel(net, device_ids=gpu_ids, dim=0) # set training parameters #for p in net.parameters(): # p.requires_grad = True for name, param in net.named_parameters(): #print(name) if 'Encoder' in name: param.requires_grad = False # 冻结 backbone 梯度 else: param.requires_grad = True criterion = None celoss = cross_entropy_loss if opt.loss == 'ce': criterion = celoss elif opt.loss == 'iou': criterion = mask_iou_loss elif opt.loss == 'both': criterion = lambda pred, target, obj: celoss(pred, target, obj) + mask_iou_loss(pred, target, obj) else: raise TypeError('unknown training loss %s' % opt.loss) optimizer = None if opt.solver == 'sgd': optimizer = optim.SGD(net.parameters(), lr=opt.learning_rate, momentum=opt.momentum[0], weight_decay=opt.weight_decay) elif opt.solver == 'adam': optimizer = optim.Adam(net.parameters(), lr=opt.learning_rate, betas=opt.momentum, weight_decay=opt.weight_decay) else: raise TypeError('unkown solver type %s' % opt.solver) # Resume title = 'Appearance Memory Bank' minloss = float('inf') opt.checkpoint = osp.join(osp.join(opt.checkpoint, opt.valset)) if not osp.exists(opt.checkpoint): os.mkdir(opt.checkpoint) if opt.resume: # Load checkpoint. print('==> Resuming from checkpoint {}'.format(opt.resume)) assert os.path.isfile(opt.resume), 'Error: no checkpoint directory found!' # opt.checkpoint = os.path.dirname(opt.resume) checkpoint = torch.load(opt.resume) minloss = checkpoint['minloss'] start_epoch = checkpoint['epoch'] net.load_state_dict(checkpoint['state_dict']) optimizer.load_state_dict(checkpoint['optimizer']) skips = checkpoint['max_skip'] try: if isinstance(skips, list): for idx, skip in enumerate(skips): trainloader.dataset.datasets[idx].set_max_skip(skip) else: trainloader.dataset.set_max_skip(skip) except: print('[Warning] Initializing max skip fail') logger = Logger(os.path.join(opt.checkpoint, opt.mode+'_log.txt'), resume=True) else: if opt.initial: print('==> Initialize model with weight file {}'.format(opt.initial)) weight = torch.load(opt.initial) if isinstance(weight, OrderedDict): net.module.load_param(weight) else: net.module.load_param(weight['state_dict']) logger = Logger(os.path.join(opt.checkpoint, opt.mode+'_log.txt'), resume=False) start_epoch = 0 logger.set_items(['Epoch', 'LR', 'Train Loss']) # Train and val for epoch in range(start_epoch): adjust_learning_rate(optimizer, epoch, opt) for epoch in range(start_epoch, opt.epochs): print('\nEpoch: [%d | %d] LR: %f' % (epoch + 1, opt.epochs, opt.learning_rate)) adjust_learning_rate(optimizer, epoch, opt) net.module.phase = 'train' train_loss = train(loader_train, # loader_train trainloader model=net, criterion=criterion, optimizer=optimizer, epoch=epoch, use_cuda=use_gpu, iter_size=opt.iter_size, mode=opt.mode, threshold=opt.iou_threshold) if (epoch + 1) % opt.epoch_per_test == 0: net.module.phase = 'test' test_loss = test(testloader, model=net.module, criterion=criterion, epoch=epoch, use_cuda=use_gpu) # append logger file logger.log(epoch+1, opt.learning_rate, train_loss) # adjust max skip if (epoch + 1) % opt.epochs_per_increment == 0: if isinstance(trainloader.dataset, data.ConcatDataset): for dataset in trainloader.dataset.datasets: dataset.increase_max_skip() else: trainloader.dataset.increase_max_skip() # save model is_best = train_loss <= minloss minloss = min(minloss, train_loss) skips = [ds.max_skip for ds in trainloader.dataset.datasets] \ if isinstance(trainloader.dataset, data.ConcatDataset) \ else trainloader.dataset.max_skip save_checkpoint({ 'epoch': epoch + 1, 'state_dict': net.state_dict(), 'loss': train_loss, 'minloss': minloss, 'optimizer': optimizer.state_dict(), 'max_skip': skips, }, epoch + 1, is_best, checkpoint=opt.checkpoint, filename=opt.mode) logger.close() print('minimum loss:') print(minloss)
def main(): start_epoch = 0 args = parse_args() # Use GPU os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu if args.gpu != '' else str( opt.gpu_id) use_gpu = torch.cuda.is_available() and (args.gpu != '' or int(opt.gpu_id)) >= 0 if not os.path.isdir(opt.checkpoint): os.makedirs(opt.checkpoint) # Data print('==> Preparing dataset') input_dim = opt.input_size train_transformer = TrainTransform(size=input_dim) test_transformer = TestTransform(size=input_dim) try: if isinstance(opt.trainset, list): datalist = [] for dataset, freq, max_skip in zip(opt.trainset, opt.datafreq, opt.max_skip): ds = DATA_CONTAINER[dataset]( train=True, sampled_frames=opt.sampled_frames, transform=train_transformer, max_skip=max_skip, samples_per_video=opt.samples_per_video) datalist += [ds] * freq trainset = data.ConcatDataset(datalist) else: max_skip = opt.max_skip[0] if isinstance(opt.max_skip, list) else opt.max_skip trainset = DATA_CONTAINER[opt.trainset]( train=True, sampled_frames=opt.sampled_frames, transform=train_transformer, max_skip=max_skip, samples_per_video=opt.samples_per_video) except KeyError as ke: print( '[ERROR] invalide dataset name is encountered. The current acceptable datasets are:' ) print(list(DATA_CONTAINER.keys())) exit() testset = DATA_CONTAINER[opt.valset](train=False, transform=test_transformer, samples_per_video=1) trainloader = data.DataLoader(trainset, batch_size=opt.train_batch, shuffle=True, num_workers=opt.workers, collate_fn=multibatch_collate_fn) testloader = data.DataLoader(testset, batch_size=1, shuffle=False, num_workers=opt.workers, collate_fn=multibatch_collate_fn) # Model print("==> creating model") net = STAN(opt.keydim, opt.valdim) print(' Total params: %.2fM' % (sum(p.numel() for p in net.parameters()) / 1000000.0)) net.eval() if use_gpu: net = net.cuda() # set training parameters for p in net.parameters(): p.requires_grad = True criterion = None celoss = cross_entropy_loss if opt.loss == 'ce': criterion = celoss elif opt.loss == 'iou': criterion = mask_iou_loss elif opt.loss == 'both': criterion = lambda pred, target, obj: celoss( pred, target, obj) + mask_iou_loss(pred, target, obj) else: raise TypeError('unknown training loss %s' % opt.loss) optimizer = None if opt.solver == 'sgd': optimizer = optim.SGD(net.parameters(), lr=opt.learning_rate, momentum=opt.momentum[0], weight_decay=opt.weight_decay) elif opt.solver == 'adam': optimizer = optim.Adam(net.parameters(), lr=opt.learning_rate, betas=opt.momentum, weight_decay=opt.weight_decay) else: raise TypeError('unkown solver type %s' % opt.solver) # Resume title = 'STAN' minloss = float('inf') opt.checkpoint = osp.join(osp.join(opt.checkpoint, opt.valset)) if not osp.exists(opt.checkpoint): os.mkdir(opt.checkpoint) if opt.resume: # Load checkpoint. print('==> Resuming from checkpoint {}'.format(opt.resume)) assert os.path.isfile( opt.resume), 'Error: no checkpoint directory found!' # opt.checkpoint = os.path.dirname(opt.resume) checkpoint = torch.load(opt.resume) minloss = checkpoint['minloss'] start_epoch = checkpoint['epoch'] net.load_state_dict(checkpoint['state_dict']) optimizer.load_state_dict(checkpoint['optimizer']) skips = checkpoint['max_skip'] try: if isinstance(skips, list): for idx, skip in enumerate(skips): trainloader.dataset.datasets[idx].set_max_skip(skip) else: trainloader.dataset.set_max_skip(skip) except: print('[Warning] Initializing max skip fail') logger = Logger(os.path.join(opt.checkpoint, opt.mode + '_log.txt'), resume=True) else: if opt.initial: print('==> Initialize model with weight file {}'.format( opt.initial)) weight = torch.load(opt.initial) if isinstance(weight, OrderedDict): net.load_param(weight) else: net.load_param(weight['state_dict']) logger = Logger(os.path.join(opt.checkpoint, opt.mode + '_log.txt'), resume=False) start_epoch = 0 logger.set_items(['Epoch', 'LR', 'Train Loss']) # Train and val for epoch in range(start_epoch): adjust_learning_rate(optimizer, epoch, opt) for epoch in range(start_epoch, opt.epochs): print('\nEpoch: [%d | %d] LR: %f' % (epoch + 1, opt.epochs, opt.learning_rate)) adjust_learning_rate(optimizer, epoch, opt) train_loss = train(trainloader, model=net, criterion=criterion, optimizer=optimizer, epoch=epoch, use_cuda=use_gpu, iter_size=opt.iter_size, mode=opt.mode, threshold=opt.iou_threshold) if (epoch + 1) % opt.epoch_per_test == 0: test_loss = test(testloader, model=net, criterion=criterion, epoch=epoch, use_cuda=use_gpu, opt=opt) # append logger file logger.log(epoch + 1, opt.learning_rate, train_loss) # adjust max skip if (epoch + 1) % opt.epochs_per_increment == 0: if isinstance(trainloader.dataset, data.ConcatDataset): for dataset in trainloader.dataset.datasets: dataset.increase_max_skip() else: trainloader.dataset.increase_max_skip() # save model is_best = train_loss <= minloss minloss = min(minloss, train_loss) skips = [ds.max_skip for ds in trainloader.dataset.datasets] \ if isinstance(trainloader.dataset, data.ConcatDataset) \ else trainloader.dataset.max_skip save_checkpoint( { 'epoch': epoch + 1, 'state_dict': net.state_dict(), 'loss': train_loss, 'minloss': minloss, 'optimizer': optimizer.state_dict(), 'max_skip': skips, }, epoch + 1, is_best, checkpoint=opt.checkpoint, filename=opt.mode) logger.close() print('minimum loss:') print(minloss)
def main(): # make save dir if args.local_rank == 0: if not os.path.exists(args.save_dir): os.makedirs(args.save_dir) # for tensorboard logs tb_path = osp.join(args.save_dir, "runs") writer = SummaryWriter(tb_path) # launch the logger Log.init(log_level=args.log_level, log_file=osp.join(args.save_dir, args.log_file), log_format=args.log_format, rewrite=args.rewrite, stdout_level=args.stdout_level) # RGB or BGR input(RGB input for ImageNet pretrained models while BGR input for caffe pretrained models) if args.rgb: IMG_MEAN = np.array((0.485, 0.456, 0.406), dtype=np.float32) IMG_VARS = np.array((0.229, 0.224, 0.225), dtype=np.float32) else: IMG_MEAN = np.array((104.00698793, 116.66876762, 122.67891434), dtype=np.float32) IMG_VARS = np.array((1, 1, 1), dtype=np.float32) # set models import libs.models as models deeplab = models.__dict__[args.arch](num_classes=args.num_classes) # print(deeplab) if args.restore_from is not None: print("LOADING FROM PRETRAINED MODEL") saved_state_dict = torch.load(args.restore_from, map_location=torch.device('cpu')) new_params = deeplab.state_dict().copy() for i in saved_state_dict: i_parts = i.split('.') if not i_parts[0] == 'fc': new_params['.'.join(i_parts[0:])] = saved_state_dict[i] Log.info("load pretrained models") deeplab.load_state_dict(new_params, strict=False) else: Log.info("train from scratch") args.world_size = 1 if 'WORLD_SIZE' in os.environ and args.apex: args.apex = int(os.environ['WORLD_SIZE']) > 1 args.world_size = int(os.environ['WORLD_SIZE']) print("Total world size: ", int(os.environ['WORLD_SIZE'])) if not args.gpu == None: os.environ["CUDA_VISIBLE_DEVICES"] = args.gpu h, w = args.input_size, args.input_size input_size = (h, w) # Set the device according to local_rank. # torch.cuda.set_device(args.local_rank) # Log.info("Local Rank: {}".format(args.local_rank)) # torch.distributed.init_process_group(backend='nccl', # init_method='env://') # set optimizer optimizer = optim.SGD( [{ 'params': filter(lambda p: p.requires_grad, deeplab.parameters()), 'lr': args.learning_rate }], lr=args.learning_rate, momentum=args.momentum, weight_decay=args.weight_decay) optimizer.zero_grad() deeplab.cuda() # models transformation # model = DistributedDataParallel(deeplab) # model = apex.parallel.convert_syncbn_model(model) model = deeplab model.train() model.float() model.cuda() # set loss function if args.ohem: criterion = CriterionOhemDSN( thresh=args.ohem_thres, min_kept=args.ohem_keep) # OHEM CrossEntrop else: criterion = CriterionDSN() # CrossEntropy criterion.cuda() cudnn.benchmark = True # if args.world_size == 1: # print(model) # this is a little different from mul-gpu traning setting in distributed training # because each trainloader is a process that sample from the dataset class. batch_size = args.batch_size_per_gpu max_iters = args.num_steps * batch_size # set data loader #PASCAL - VOC ----------------- from torchvision import transforms augs = transforms.Compose([ transforms.RandomResizedCrop(300), transforms.RandomRotation(20), transforms.ToTensor(), transforms.Normalize([0.4589, 0.4355, 0.4032], [0.2239, 0.2186, 0.2206]) ]) if args.data_set == 'pascalvoc': data_set = VOCSegmentation(args.data_dir, image_set='val', scale=args.random_scale, mean=IMG_MEAN, vars=IMG_VARS, transforms=augs) elif args.data_set == 'cityscapes': data_set = Cityscapes(args.data_dir, args.data_list, crop_size=input_size, scale=args.random_scale, mirror=args.random_mirror, mean=IMG_MEAN, vars=IMG_VARS, RGB=args.rgb) # instance_count = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] # for _, label in data_set: # for pixel in label.flatten(): # if(int(pixel) == 255): # pixel = 21 # instance_count[int(pixel)] += 1 # print(instance_count) # sys.exit() trainloader = data.DataLoader(data_set, batch_size=args.batch_size_per_gpu, shuffle=True, num_workers=args.num_workers, pin_memory=True) print("trainloader", len(trainloader)) torch.cuda.empty_cache() # start training: iter_no = 0 for epoch in range(args.num_steps): print("epoch " + str(epoch + 1)) total_loss = 0 total_correct = 0 for i_iter, batch in enumerate(trainloader): if i_iter % 100 == 0: print("iteration " + str(i_iter + 1)) images, labels = batch images = images.cuda() labels = labels.long().cuda() optimizer.zero_grad() lr = adjust_learning_rate(optimizer, args, i_iter, len(trainloader)) preds = model(images) loss = criterion(preds, labels) total_loss += loss.item() writer.add_scalar("Loss_vs_Iteration", loss.item(), iter_no) iter_no += 1 loss.backward() optimizer.step() writer.add_scalar("Loss_vs_Epoch", total_loss / len(trainloader), epoch) # writer.add_scaler("Correct", total_correct, epoch) # writer.add_scaler("Accuracy",total_correct / len(dataset), epoch) # reduce_loss = all_reduce_tensor(loss,world_size=args.gpu_num) # if args.local_rank == 0: # # Log.info('iter = {} of {} completed, lr={}, loss = {}'.format(i_iter, # # len(trainloader), lr, reduce_loss.data.cpu().numpy())) # if i_iter % args.save_pred_every == 0 and i_iter > args.save_start: # print('save models ...') # torch.save(deeplab.state_dict(), osp.join(args.save_dir, str(args.arch) + str(i_iter) + '.pth')) if args.local_rank == 0: if epoch % 9 == 0: print('save models ...') torch.save( deeplab.state_dict(), osp.join(args.save_dir, str(args.arch) + str(i_iter) + '.pth')) writer.close() end = timeit.default_timer() if args.local_rank == 0: Log.info("Training cost: " + str(end - start) + 'seconds') Log.info("Save final models") torch.save( deeplab.state_dict(), osp.join( args.save_dir, str(args.arch) + '_' + str(args.num_steps) + 'epoch_' + str(args.batch_size_per_gpu) + '.pth'))