예제 #1
0
def plot_lr( optim, args, hyper ):
    lr_hist = []

    batch_size = hyper.batch_size
    n_per_epoch = int( dataset_size / batch_size )
    print( "number of iterations per epoch:{}".format( n_per_epoch ) )

    start_epoch = args.start_epoch - 1
    end_epoch = start_epoch + args.epochs

    for epoch in range( start_epoch, end_epoch ):
        for i in range( n_per_epoch ):
            niter = epoch * n_per_epoch + i
            lr = adjust_learning_rate( optim, niter, hyper )
            lr_hist.append( lr )

    index = list( range( n_per_epoch * args.epochs ) )
    plt.plot( index, lr_hist )
    plt.show()
예제 #2
0
        if N_batches is not None:
            train_loader = data.DataLoader(train_ds,
                                           batch_size=args.bs,
                                           num_workers=args.n_threads,
                                           sampler=LimitedRandomSampler(
                                               train_ds, N_batches, args.bs))
        else:
            train_loader = data.DataLoader(train_ds,
                                           batch_size=args.bs,
                                           num_workers=args.n_threads,
                                           shuffle=True)

        print(colored('==> ', 'blue') + 'Epoch:', epoch + 1, cur_snapshot)
        # Adjusting learning rate using the scheduler
        optimizer, cur_lr = adjust_learning_rate(optimizer, epoch + 1, args)
        print(colored('==> ', 'red') + 'LR:', cur_lr)
        # Training one epoch and measure the time
        start = time.time()
        train_loss = train_epoch(epoch, net, optimizer, train_loader,
                                 criterion, args.n_epoch)
        epoch_time = np.round(time.time() - start, 4)
        print(
            colored('==> ', 'green') +
            'Epoch training time: {} s.'.format(epoch_time))
        # If it is time to start the validation, we will do it
        # args.args.start_val can be used to avoid time-consuming validation
        # in the beginning of the training
        if epoch >= args.start_val:
            start = time.time()
            val_loss, probs, truth, _ = validate_epoch(net, val_loader,
try:
    print("Training for %d epochs..." % NUM_EPOCHS)
    log_parameters = train_utils.log_init()
    for epoch in range(1, NUM_EPOCHS + 1):
        # perform training and validation
        train_loss, train_r_sq, train_accu, val_loss, val_r_sq, val_accu = train_utils.train_and_validate(
		perf_model, 
		criterion, 
		perf_optimizer, 
		training_data, 
		validation_data, 
		METRIC
	)

        # adjut learning rate
        train_utils.adjust_learning_rate(perf_optimizer, epoch, ADJUST_EVERY)

        # log data for visualization later
        log_parameters = train_utils.log_epoch_stats(
            log_parameters,
            epoch,
            train_loss,
            train_r_sq,
            train_accu,
            val_loss,
            val_r_sq,
            val_accu
        )

        # print loss
        if epoch % PRINT_EVERY == 0:
예제 #4
0
def train_or_eval(train, gpu, loader, model, criterion, optimizer, args, hyper,
                  epoch):
    phase = "train" if train else "test"
    model.train() if train else model.eval()

    losses = AverageMeter("Loss", ":.4e")
    top1 = AverageMeter("Accuracy1", ":6.2f")
    top5 = AverageMeter("Accuracy5", ":6.2f")
    prefix = "Epoch:[{}]".format(epoch + 1) if train else "Test: "
    progress = ProgressMeter(len(loader), [losses, top1, top5], prefix=prefix)

    if args.prof:
        print("Profiling started")
        torch.cuda.cudart().cudaProfilerStart()

    t_init = time.time()
    prefetcher = data_prefetcher(loader)
    with torch.set_grad_enabled(mode=train):
        for i, (images, target) in enumerate(prefetcher):
            niter = epoch * len(loader) + i

            if args.prof:
                torch.cuda.nvtx.range_push("Prof start iteration {}".format(i))

            if args.prof: torch.cuda.nvtx.range_push("forward")
            output = model(images)
            if args.prof: torch.cuda.nvtx.range_pop()

            loss = criterion(output, target)

            if train:
                lr = adjust_learning_rate(optimizer, niter, hyper, len(loader))

                optimizer.zero_grad()

                if args.prof: torch.cuda.nvtx.range_push("backward")
                with amp.scale_loss(loss, optimizer) as scaled_loss:
                    scaled_loss.backward()
                if args.prof: torch.cuda.nvtx.range_pop()

                if args.prof: torch.cuda.nvtx.range_push("optimizer step")
                optimizer.step()
                if args.prof: torch.cuda.nvtx.range_pop()

            distributed = args.gpu is None
            publish_stats = (not distributed or gpu == 0) and i % 100 == 0
            if not train or publish_stats:
                acc1, acc5 = accuracy(output.detach(), target, topk=(1, 5))
                losses.update(loss.item(), images.size(0))
                top1.update(acc1[0], images.size(0))
                top5.update(acc5[0], images.size(0))

            if publish_stats:
                progress.display(i)

            if train and publish_stats:
                args.writer.add_scalar("Loss/{}".format(phase), loss.item(),
                                       niter)
                args.writer.add_scalar("Accuracy/{}".format(phase), acc1,
                                       niter)
                args.writer.add_scalar("Loss/Accuracy", acc1, lr * 10000)

            if args.prof: torch.cuda.nvtx.range_pop()
            if args.prof and i == 20:
                break

    if args.prof:
        print("Profiling stopped")
        torch.cuda.cudart().cudaProfilerStop()

    print("Total {} epoch time: {}".format(phase, HTIME(time.time() - t_init)))
    return top1.avg
예제 #5
0
def main():
	parser = argparse.ArgumentParser()
	parser.add_argument('--DATASET_PATH', type=str, default='/home/zhangdong/database/DUTS/')
	parser.add_argument('--WEIGHTS_PATH', type=str, default='/home/yangle/DAVIS/result/models/')
	parser.add_argument('--EXPERIMENT', type=str, default='/home/yangle/DAVIS/result/TrainNet/')
	parser.add_argument('--N_EPOCHS', type=int, default=200)
	parser.add_argument('--MAX_PATIENCE', type=int, default=30)
	parser.add_argument('--batch_size', type=int, default=32)
	parser.add_argument('--seed', type=int, default=0)
	parser.add_argument('--N_CLASSES', type=int, default=2)
	parser.add_argument('--LEARNING_RATE', type=float, default=1e-4)
	parser.add_argument('--LR_DECAY', type=float, default=0.995)
	parser.add_argument('--DECAY_LR_EVERY_N_EPOCHS', type=int, default=1)
	parser.add_argument('--WEIGHT_DECAY', type=float, default=0.0001)
	parser.add_argument('--CUDNN', type=bool, default=True)
	args = parser.parse_args()

	torch.cuda.manual_seed(args.seed)
	cudnn.benchmark = args.CUDNN

	normalize = transforms.Normalize(mean=saliency.mean, std=saliency.std)
	train_joint_transformer_img = transforms.Compose([joint_transforms.JointResize(224)])
	mask_size_list = [14, 28, 56, 112, 224]

	train_dset = saliency.Saliency(
		args.DATASET_PATH, 'train',train_joint_transformer_img, mask_size_list,
		transform=transforms.Compose([transforms.ToTensor(), normalize, ]))
	train_loader = torch.utils.data.DataLoader(
		train_dset, batch_size=args.batch_size, shuffle=True)

	test_joint_transforms_img = transforms.Compose([joint_transforms.JointResize(224)])
	val_dset = saliency.Saliency(
		args.DATASET_PATH, 'val',test_joint_transforms_img, mask_size_list,
		transform=transforms.Compose([transforms.ToTensor(),normalize]))
	val_loader = torch.utils.data.DataLoader(
		val_dset, batch_size=args.batch_size, shuffle=False)

	print("TrainImages: %d" % len(train_loader.dataset.imgs))
	print("ValImages: %d" % len(val_loader.dataset.imgs))

	# example_inputs, example_targets = next(iter(train_loader))
	# print("InputsBatchSize: ", example_inputs.size())
	# print("TargetsBatchSize: ", len(example_targets))
	# print("\nInput (size, max, min) ---")
	# # input
	# i = example_inputs[0]
	# print(i.size())
	# print(i.max())
	# print(i.min())
	# print("Target (size, max, min) ---")
	# # target
	# for mask in example_targets:
	# 	print(mask.size())
	# 	print(mask.max())
	# 	print(mask.min())


	# initialize ResNet18 from the pre-trained classification model
	resnet = torchvision.models.resnet50(pretrained=True)
	pre_trained_dict = resnet.state_dict()
	model = SegNet.resnet50()
	model_dict = model.state_dict()

	# 1. filter out unnecessary keys
	pre_trained_dict = {k: v for k, v in pre_trained_dict.items() if k in model_dict}
	# 2. overwrite entries in the existing state dict
	model_dict.update(pre_trained_dict)
	# 3. load the new state dict
	model.load_state_dict(model_dict)
	model = model.cuda()
	#model = torch.nn.DataParallel(model).cuda()

	print('  + Number of params: {}'.format(
		sum([p.data.nelement() for p in model.parameters()])))
	# model.apply(utils.weights_init)
	optimizer = optim.RMSprop(model.parameters(), lr=args.LEARNING_RATE,
							  weight_decay=args.WEIGHT_DECAY, eps=1e-12)
	criterion = nn.NLLLoss2d().cuda()

	exp_dir = args.EXPERIMENT + 'test'
	if os.path.exists(exp_dir):
		shutil.rmtree(exp_dir)

	exp = experiment.Experiment('test', args.EXPERIMENT)
	exp.init()

	START_EPOCH = exp.epoch
	END_EPOCH = START_EPOCH + args.N_EPOCHS

	for epoch in range(START_EPOCH, END_EPOCH):

		since = time.time()

		# ### Train ###
		trn_loss, trn_err = utils.train(model, train_loader, optimizer, criterion, epoch)
		print('Epoch {:d}: Train - Loss: {:.4f}\tErr: {:.4f}'.format(epoch, trn_loss, trn_err))
		time_elapsed = time.time() - since
		print('Train Time {:.0f}m {:.0f}s'.format(
			time_elapsed // 60, time_elapsed % 60))

		### Test ###
		val_loss, val_err = utils.test(model, val_loader, criterion, epoch)
		print('Val - Loss: {:.4f}, Error: {:.4f}'.format(val_loss, val_err))
		time_elapsed = time.time() - since
		print('Total Time {:.0f}m {:.0f}s\n'.format(
			time_elapsed // 60, time_elapsed % 60))

		### Save Metrics ###
		exp.save_history('train', trn_loss, trn_err)
		exp.save_history('val', val_loss, val_err)

		### Checkpoint ###
		exp.save_weights(model, trn_loss, val_loss, trn_err, val_err)
		exp.save_optimizer(optimizer, val_loss)

		## Early Stopping ##
		if (epoch - exp.best_val_loss_epoch) > args.MAX_PATIENCE:
			print(("Early stopping at epoch %d since no "
				   +"better loss found since epoch %.3").format(epoch, exp.best_val_loss))
			break

		# Adjust Lr ###--old method
		utils.adjust_learning_rate(args.LEARNING_RATE, args.LR_DECAY, optimizer,
							 epoch, args.DECAY_LR_EVERY_N_EPOCHS)

		exp.epoch += 1
예제 #6
0
파일: train.py 프로젝트: wooginawunan/casme
def main():
    global args

    ## create models and optimizers
    print("=> creating models...")
    classifier = archs.resnet50shared(pretrained=True).to(device)
    decoder = archs.decoder(final_upsample_mode=args.upsample).to(device)

    optimizer = {}
    optimizer['classifier'] = torch.optim.SGD(classifier.parameters(),
                                              args.lr,
                                              momentum=args.momentum,
                                              weight_decay=args.weight_decay)
    optimizer['decoder'] = torch.optim.Adam(decoder.parameters(),
                                            args.lr_casme,
                                            weight_decay=args.weight_decay)

    cudnn.benchmark = True

    ## data loading code
    traindir = os.path.join(args.data, 'train')
    valdir = os.path.join(args.data, 'val')
    normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                     std=[0.229, 0.224, 0.225])

    train_loader = torch.utils.data.DataLoader(datasets.ImageFolder(
        traindir,
        transforms.Compose([
            transforms.RandomResizedCrop(224),
            transforms.RandomHorizontalFlip(),
            transforms.ToTensor(),
            normalize,
        ])),
                                               batch_size=args.batch_size,
                                               shuffle=True,
                                               num_workers=args.workers,
                                               pin_memory=False,
                                               sampler=None)

    val_loader = torch.utils.data.DataLoader(datasets.ImageFolder(
        valdir,
        transforms.Compose([
            transforms.Resize(256),
            transforms.CenterCrop(224),
            transforms.ToTensor(),
            normalize,
        ])),
                                             batch_size=args.batch_size,
                                             shuffle=False,
                                             num_workers=args.workers,
                                             pin_memory=False)

    ## training loop
    for epoch in range(args.epochs):
        epoch_start_time = time.time()

        adjust_learning_rate(optimizer, epoch, args)

        ## train for one epoch
        tr_s = train_or_eval(train_loader, classifier, decoder, True,
                             optimizer, epoch)

        ## evaluate on validation set
        val_s = train_or_eval(val_loader, classifier, decoder)

        ## save checkpoint
        save_checkpoint(
            {
                'epoch': epoch + 1,
                'state_dict_classifier': classifier.state_dict(),
                'state_dict_decoder': decoder.state_dict(),
                'optimizer_classifier': optimizer['classifier'].state_dict(),
                'optimizer_decoder': optimizer['decoder'].state_dict(),
                'args': args,
            }, args)

        ## log
        with open(args.log_path, 'a') as f:
            f.write(
                str(epoch + 1) + ' ' + str(time.time() - epoch_start_time) +
                ' ' + tr_s['acc'] + ' ' + val_s['acc'] + ' ' + tr_s['acc_m'] +
                ' ' + val_s['acc_m'] + ' ' + tr_s['avg_mask'] + ' ' +
                val_s['avg_mask'] + ' ' + tr_s['std_mask'] + ' ' +
                val_s['std_mask'] + ' ' + tr_s['entropy'] + ' ' +
                val_s['entropy'] + ' ' + tr_s['tv'] + ' ' + val_s['tv'] + '\n')
예제 #7
0
def main_worker(gpu, ngpus_per_node, args):
    global best_acc1

    if args.gpu is not None:
        print("Use GPU: {} for training".format(args.gpu))

    # Define training directory in case number of classes is required by the model instance
    main_file = args.root / args.main_file
    num_classes = len( [cur_dir.name for cur_dir in main_file.iterdir() 
                        if len(list(cur_dir.iterdir())) >= args.min_allowed_imgs] )
    if not num_classes == 1000:
        print('[INFO]: Using {} classes instead of 1000 ImageNet classes'.format(num_classes))

    # create model
    if args.pretrained:
        print("=> using pre-trained model '{}'".format(args.arch))
        model = models.__dict__[args.arch](pretrained=True)
    else:
        print("=> creating model '{}'".format(args.model))
        model = models.__dict__[args.model](num_classes=num_classes)
    
    
    if args.gpu is not None:
        torch.cuda.set_device(args.gpu)
        model = model.cuda(args.gpu)
    else:
        # DataParallel will divide and allocate batch_size to all available GPUs
        if args.arch.startswith('alexnet') or args.arch.startswith('vgg'):
            model.features = torch.nn.DataParallel(model.features)
            model.cuda()
        else:
            model = torch.nn.DataParallel(model).cuda()

    # define loss function (criterion) and optimizer
    if args.loss_func in ['cross', 'cross_entropy', 'entropy']:
        criterion = nn.CrossEntropyLoss().cuda(args.gpu)
    
    elif args.loss_func in ['l2', 'l2_squared', 'squared', 'MSE']:
        print('[INFO] Using MSE loss function instead of Cross Entropy.')
        args.loss_func = 'l2'
        criterion = nn.MSELoss().cuda(args.gpu)

    if args.opt.lower() == 'sgd':
        optimizer = torch.optim.SGD(model.parameters(), args.lr,
                                    momentum=args.momentum,
                                    weight_decay=args.weight_decay)
    elif args.opt.lower() == 'adam':
        print('[INFO] Using Adam optimizer instead of SGD.')
        optimizer = torch.optim.Adam(model.parameters(), args.lr,
                                    weight_decay=args.weight_decay)
    elif args.opt.lower() == 'lbfgs':
        print('[INFO] Using LBFGS optimizer instead of SGD.')
        optimizer = torch.optim.LBFGS(model.parameters(), args.lr,
                                      history_size=20
                                     )
    else:
        raise ValueError('Incorrect optimizer selection {}'.format(args.opt))
        
    
    if args.initial_lr:
        param_setup = [{'params': cur_lay.parameters()} 
                       for i, cur_lay in enumerate(model)
                       if 'weight' in dir(cur_lay)]
        optimizer = torch.optim.SGD(param_setup, args.lr,
                                    momentum=args.momentum,
                                    weight_decay=args.weight_decay)


    if args.schedule_lr:
        scheduler = torch.optim.lr_scheduler.CyclicLR(optimizer, 
                                                      args.lr / 100, args.lr)

    # optionally resume from a checkpoint
    if args.resume:
        if os.path.isfile(args.resume):
            print("=> loading checkpoint '{}'".format(args.resume))
            checkpoint = torch.load(args.resume)
            args.start_epoch = checkpoint['epoch']
            best_acc1 = checkpoint['best_acc1']
            if args.gpu is not None:
                # best_acc1 may be from a checkpoint from a different GPU
                best_acc1 = best_acc1.to(args.gpu)
            model.load_state_dict(checkpoint['state_dict'])
            optimizer.load_state_dict(checkpoint['optimizer'])
            print("=> loaded checkpoint '{}' (epoch {})"
                  .format(args.resume, checkpoint['epoch']))
        else:
            print("=> no checkpoint found at '{}'".format(args.resume))

    cudnn.benchmark = True

    # Data loading code
    test_file = args.root / args.test_file
    if args.sub_file:
        sub_file = args.root / args.sub_file
    normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                     std=[0.229, 0.224, 0.225])

    train_trans_list = []
    if not args.norandomcrop:
        train_trans_list.append(transforms.RandomResizedCrop(224))
    if not args.norandomflip:
        train_trans_list.append(transforms.RandomHorizontalFlip())
    train_trans_list = train_trans_list + [transforms.ToTensor(), normalize]
    
    
    train_dataset = datasets.ImageFolder(
        main_file,
        transforms.Compose(train_trans_list)
    )
    
    test_dataset = datasets.ImageFolder(test_file, 
                                    transforms.Compose([
                                        transforms.Resize(256),
                                        transforms.CenterCrop(224),
                                        transforms.ToTensor(),
                                        normalize,
                                    ]), 
                                    train=False)
    
    if args.sub_file:
        sub_dataset = datasets.ImageFolder(test_file, 
                                    transforms.Compose([
                                        transforms.Resize(256),
                                        transforms.CenterCrop(224),
                                        transforms.ToTensor(),
                                        normalize,
                                    ]), 
                                    train=False)
    

    if args.train_size or args.select_class_list:
        if not args.select_class_list:
            args.select_class_list = list(range(args.num_classes))
        sel_idx = []
        for lbl in args.select_class_list:
            lbl_idx = [i for i, t in enumerate(train_dataset.targets) if t == lbl]
            sel_idx += random.sample(lbl_idx, (args.train_size if args.train_size else len(lbl_idx)))
        train_dataset.samples = train_dataset.samples[sel_idx]
        train_dataset.targets = train_dataset.targets[sel_idx]
        for cur_idx, cur_cls in enumerate(args.select_class_list):
            train_dataset.targets[train_dataset.targets==cur_cls] = cur_idx
        
        sel_idx = []
        for lbl in args.select_class_list:
            lbl_idx = [i for i, t in enumerate(test_dataset.targets) if t == lbl]
            sel_idx += lbl_idx
        test_dataset.samples = test_dataset.samples[sel_idx]
        test_dataset.targets = test_dataset.targets[sel_idx]
        for cur_idx, cur_cls in enumerate(args.select_class_list):
            test_dataset.targets[test_dataset.targets==cur_cls] = cur_idx
    
        
    # Inject symmetric noise to training set
    if args.inject_noise:
        im_per_class = int(len(train_dataset) / args.num_classes)
        noisy_labels = np.zeros((len(train_dataset),), dtype=int)
        num_shuffle = int(im_per_class * args.inject_noise)
        for i in range(args.num_classes):
            noisy_idx = []
            cur_idx = [idx for idx, label in enumerate(train_dataset.targets) if label==i]
            shuffled_idx = random.sample(cur_idx, len(cur_idx))
            for r in range(args.num_classes):
                noisy_idx += [r for idx in shuffled_idx[im_per_class - (r+1)*num_shuffle:im_per_class - r*num_shuffle]]
            noisy_idx += [i for idx in shuffled_idx[:im_per_class - args.num_classes*num_shuffle]]
            noisy_labels[cur_idx] = np.array(noisy_idx)
        train_dataset.targets = noisy_labels
    
    # TODO: Replace fraction of one training set randomly with another.
    if args.mix_cifar:
        assert args.mix_rate, "mix_rate should be given when mix_cifar is set"
        assert args.traindir2, "traindir2 must be given when mix_cifar is set"
        assert not args.inject_noise, "inject_noise should not be given when mix_cifar is set"
        assert not args.testdir2, "only one testdir can be set when mix_cifar is set"
        
        traindir2 = os.path.join(args.root, args.traindir2)
        clean_dataset = datasets.ImageFolder(
            traindir2,
            transforms.Compose([
                transforms.ToTensor(),
                normalize,
            ]))
        
        im_per_class = int(len(train_dataset) / len(train_dataset.classes))
        num_shuffle = int(im_per_class * args.mix_rate)
        shuffled_samples = []
        clean_samples = []
        for i in range(len(train_dataset.classes)):
            cur_imgs = [s[0] for s in train_dataset.samples if s[1]==i]
            cur_imgs = random.sample(cur_imgs, im_per_class - num_shuffle)
            mix_imgs = [s[0] for s in clean_dataset.samples if s[1]==i]
            mix_imgs = random.sample(mix_imgs, num_shuffle)
            clean_samples += [(img, i) for img in mix_imgs]
            shuffled_samples += [(img, i) for img in cur_imgs + mix_imgs]
            
        train_dataset.samples = shuffled_samples
        clean_dataset.samples = clean_samples
        
        val_loader2 = torch.utils.data.DataLoader(
            clean_dataset, batch_size=args.batch_size, shuffle=(train_sampler is None),
            num_workers=args.workers, pin_memory=True, sampler=train_sampler)
        
    train_sampler = None

    train_loader = torch.utils.data.DataLoader(
        train_dataset, batch_size=args.batch_size, shuffle=(train_sampler is None),
        num_workers=args.workers, pin_memory=True, sampler=train_sampler)

    val_loader = torch.utils.data.DataLoader(
        test_dataset,
        batch_size=args.batch_size, shuffle=False,
        num_workers=args.workers, pin_memory=True)
    
    if args.sub_file:
        val_loader2 = torch.utils.data.DataLoader(
            sub_dataset,
            batch_size=args.batch_size, shuffle=False,
            num_workers=args.workers, pin_memory=True)

    if args.evaluate:
        validate(val_loader, model, criterion, args)
        return
    
    if args.compute_jacobian:
        gvec = (torch.randn((1, args.num_classes)) / len(train_dataset)).cuda(args.gpu, non_blocking=True)
    
    # TODO: tracking weights of the model
    if args.track_weights:
        layer_idx = [i for i, cl in enumerate(model) if 'weight' in dir(cl)]
        cur_weights = get_weights(model, layer_idx)
        if args.track_weights == 'filters':
            filter_w_file = args.outpath / 'filter_weights.pickle'
            filter_w_dict = {('layer_'+str(l)): [] for i, l in enumerate(layer_idx) 
                             if cur_weights[i].ndim > 2}
        if args.track_weights == 'norm':
            w_norm_dict = {('layer_'+str(l)): 0 for i, l in enumerate(layer_idx) 
                             if cur_weights[i].ndim > 1}
    
    # TODO: scaling the weights of the model manually
    if args.scale_weights:
        scale_dict = {}
        for cur_l, cur_w in enumerate(cur_weights):
            if not (cur_w.ndim > 2):
                continue
            scale_dict['layer_' + str(layer_idx[cur_l])] = np.linalg.norm(cur_w.flatten()).item()
        rescale_weights(model, scale_dict)
    
    save_config(args)
    train_log = []
    log_file = args.outpath / 'log.json'

    for epoch in range(args.start_epoch, args.epochs):
        if (epoch < args.max_lr_adjusting_epoch) and (not args.schedule_lr):
            adjust_learning_rate(optimizer, epoch, args)

        # train for one epoch
        train(train_loader, model, criterion, optimizer, epoch, args)
        
        epoch_log = {'epoch': epoch}
        
        # update learning rate with scheduler
        if args.schedule_lr:
            scheduler.step()

        # evaluate on validation set
        dum_acc1, dum_acc5 = validate(train_loader, model, criterion, args)
        epoch_log.update({'train': {'acc1': dum_acc1.cpu().numpy().item(), 
                                    'acc5': dum_acc5.cpu().numpy().item()}})
        
        acc1, acc5 = validate(val_loader, model, criterion, args)
        epoch_log.update({'test': {'acc1': acc1.cpu().numpy().item(), 
                                   'acc5': acc5.cpu().numpy().item()}})
        
        if args.sub_file or args.mix_cifar:
            dum_acc1, dum_acc5 = validate(val_loader2, model, criterion, args)
            epoch_log.update({'subset': {'acc1': dum_acc1.cpu().numpy().item(), 
                                         'acc5': dum_acc5.cpu().numpy().item()}})

        # compute the jacobian of the network
        if args.compute_jacobian:
            jTg = get_jacobian_prod(train_loader, model, criterion, gvec, args)
            epoch_log.update({'J_norm': {str(k): v.item() for k, v in enumerate(jTg)}})
        
        # TODO: tracking the weights of the layers
        if args.track_weights:
            w_change_dict = {('layer_'+str(l)): 0 for l in layer_idx}
            new_weights = get_weights(model, layer_idx)
            
            if args.track_weights == 'norm':
                for cur_l, cur_w in enumerate(new_weights):
                    if not (cur_w.ndim > 1):
                        continue
                    w_norm_dict['layer_' + str(layer_idx[cur_l])] = np.linalg.norm(cur_w.flatten()).item()
                epoch_log.update({'w_norm': {k: v for k, v in w_norm_dict.items()}})
                
            else:
                for cur_l in range(len(layer_idx)):
                    cur_change = new_weights[cur_l] - cur_weights[cur_l]

                    if args.track_weights == 'filters':
                        if cur_change.ndim > 2:
                            cur_change = np.mean(cur_change, axis=(2,3))
                            filter_w_dict['layer_' + str(layer_idx[cur_l])].append(np.absolute(cur_change))

                    chng = np.absolute(np.mean(cur_change))
                    w_change_dict['layer_' + str(layer_idx[cur_l])] = chng.item()

                epoch_log.update({'weight_change': {k: v for k, v in w_change_dict.items()}})

                if args.track_weights == 'filters':
                    with open(filter_w_file, 'wb') as fn:
                        pickle.dump({k: np.stack(v) for k, v in filter_w_dict.items()}, fn)

                cur_weights = [wh for wh in new_weights]
                new_weight = None
        
        train_log.append(epoch_log)
        with open(log_file, 'w') as fn:
            json.dump(train_log, fn, indent=2)
        
        # remember best acc@1 and save checkpoint
        is_best = acc1 > best_acc1
        best_acc1 = max(acc1, best_acc1)
            

        save_checkpoint({
            'epoch': epoch + 1,
            'arch': args.arch,
            'state_dict': model.state_dict(),
            'best_acc1': best_acc1,
            'optimizer' : optimizer.state_dict(),
        }, is_best)
예제 #8
0
def train(args):
    args.print_freq = 100
    args.gpu = None
    np.random.seed(args.seed)
    torch.manual_seed(args.seed)

    # load data
    train_dl, valid_dl, test_dl = load_imagenet(args)
    # define model
    model = torchvision.models.resnet50(pretrained=False)
    # multiple gpus
    model = torch.nn.DataParallel(model).cuda()
    loss_fn = torch.nn.CrossEntropyLoss().cuda()
    optimizer = torch.optim.SGD(model.parameters(),
                                args.lr,
                                momentum=0.9,
                                weight_decay=args.weight_decay)

    model_dir = gen_model_dir(args)
    model_dir.mkdir(parents=True, exist_ok=True)

    torch.backends.cudnn.benchmark = True
    best_acc1 = 0
    for epoch in range(args.n_epochs):
        adjust_learning_rate(optimizer, epoch, args)
        batch_time = AverageMeter('Time', ':6.3f')
        data_time = AverageMeter('Data', ':6.3f')
        losses = AverageMeter('Loss', ':.4e')
        top1 = AverageMeter('Acc@1', ':6.2f')
        top5 = AverageMeter('Acc@5', ':6.2f')
        progress = ProgressMeter(len(train_dl),
                                 [batch_time, data_time, losses, top1, top5],
                                 prefix="Epoch: [{}]".format(epoch))

        # switch to train mode
        model.train()
        end = time.time()
        for batch_idx, (images, target) in enumerate(train_dl):
            # measure data loading time
            data_time.update(time.time() - end)

            # if args.gpu is not None:
            images = images.cuda(non_blocking=True)
            target = target.cuda(non_blocking=True)

            # compute output
            output = model(images)
            loss = loss_fn(output, target)

            # measure accuracy and record loss
            acc1, acc5 = accuracy(output, target, topk=(1, 5))
            losses.update(loss.item(), images.size(0))
            top1.update(acc1[0], images.size(0))
            top5.update(acc5[0], images.size(0))

            # compute gradient and do SGD step
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            # measure elapsed time
            batch_time.update(time.time() - end)
            end = time.time()

            if batch_idx % args.print_freq == 0:
                progress.display(batch_idx)

        # evaluate on validation set
        acc1 = validate(valid_dl, model, loss_fn, args)

        # remember best acc@1 and save checkpoint
        is_best = acc1 > best_acc1
        best_acc1 = max(acc1, best_acc1)
        torch.save(
            {
                'epoch': epoch + 1,
                'model_weight': model.state_dict(),
                'heldout_best_acc1': best_acc1,
                'optimizer': optimizer.state_dict(),
            }, Path(model_dir, 'model'))
        if is_best:
            shutil.copyfile(Path(model_dir, 'model'),
                            Path(model_dir, 'model_best'))

    # load best model
    with open(Path(model_dir, "model_best"), 'rb') as f:
        params = torch.load(f)
        model.load_state_dict(params['model_weight'])

    # test
    model.eval()
    # evaluate on test set
    acc_test = validate(test_dl, model, loss_fn, args)

    print('epoch: {}, val acc: {:.4f}, test acc: {:.4f}'.format(
        params["epoch"], params["heldout_best_acc1"], acc_test))

    with open(Path(model_dir, "res.json"), 'w') as fp:
        json.dump(
            {
                'epoch': params["epoch"],
                'heldout_best_acc1': params["heldout_best_acc1"].item(),
                'test_best_acc1': acc_test.item(),
            }, fp)
예제 #9
0
    def train(self, trainloader, epoch):
        # criterion
        # print('\nEpoch: %d/%d  Epoch in filtering step: %d/%d[wait: %d]'
        #       % (epoch+1, self.max_total_epochs, epoch_in_filtering+1,
        #          self.max_epochs_per_filtering, self.wait))
        # print('Filtering step: %d  Seed: %d' % (iter_filtering, self.seed))
        class_criterion = nn.CrossEntropyLoss(reduction='sum', ignore_index=-1)
        if self.consistency_type == 'mse':
            consistency_criterion = losses.softmax_mse_loss
        elif self.consistency_type == 'kl':
            consistency_criterion = losses.softmax_kl_loss
        else:
            assert False, self.consistency_type
        residual_logit_criterion = losses.symmetric_mse_loss

        self.model.train()
        self.ema_model.train()

        running_class_loss = 0
        running_consistency_loss = 0
        running_res_loss = 0
        running_loss = 0
        correct = 0
        total = 0

        for batch_idx, ((inputs, ema_inputs),
                        targets) in enumerate(trainloader):

            adjust_learning_rate(self.optimizer, epoch, batch_idx,
                                 len(trainloader))
            inputs, ema_inputs, targets = inputs.cuda(), ema_inputs.cuda(
            ), targets.cuda()
            outputs = self.model(inputs)
            ema_outputs = self.ema_model(ema_inputs)

            minibatch_size = len(targets)
            labeled_minibatch_size = torch.sum(targets != -1).item()

            logit1, logit2 = outputs
            ema_logit, _ = ema_outputs
            if self.logit_distance_cost >= 0:
                class_logit, cons_logit = logit1, logit2
                res_loss = self.logit_distance_cost * residual_logit_criterion(
                    class_logit, cons_logit) / minibatch_size
            else:
                class_logit, cons_logit = logit1, logit1
                res_loss = 0
            class_loss = class_criterion(class_logit, targets) / minibatch_size
            consistency_weight = get_current_consistency_weight(epoch)
            consistency_loss = consistency_weight * consistency_criterion(
                cons_logit, ema_logit) / minibatch_size

            _, predicted = torch.max(class_logit, 1)
            total += labeled_minibatch_size
            correct += predicted.eq(targets).cpu().sum().item()

            loss = class_loss + consistency_loss + res_loss
            self.optimizer.zero_grad()
            loss.backward()
            self.optimizer.step()
            self.global_step += 1
            update_ema_variables(self.model, self.ema_model, self.ema_decay,
                                 self.global_step)

            running_res_loss += res_loss.item()
            running_class_loss += class_loss.item()
            running_consistency_loss += consistency_loss.item()
            running_loss += loss.item()

            progress_bar(
                batch_idx, len(trainloader),
                'Loss: %.3f | ClassLoss = %.3f | ConsLoss: %.3f | LesLoss: %.3f | Acc: %.3f%% (%d/%d) | lr: %.6f'
                % (running_loss / (batch_idx + 1), running_class_loss /
                   (batch_idx + 1), running_consistency_loss /
                   (batch_idx + 1), running_res_loss /
                   (batch_idx + 1), 100. * correct / total, correct, total,
                   self.optimizer.param_groups[-1]['lr']))
        loss = {
            'loss': running_loss / (batch_idx + 1),
            'class_loss': running_class_loss / (batch_idx + 1),
            'consistency_loss': running_consistency_loss / (batch_idx + 1),
            'res_loss': running_res_loss / (batch_idx + 1)
        }
        acc = 100. * correct / total

        return loss['loss'], acc
예제 #10
0
def main():
	parser = argparse.ArgumentParser()
	parser.add_argument('--DATASET_PATH', type=str, default='/disk5/yangle/PAMI/dataset/fc-resnet/')
	parser.add_argument('--EXPERIMENT', type=str, default='/disk5/yangle/PAMI/result/LearnModel/')
	# parser.add_argument('--DATASET_PATH', type=str, default='/disk1/hpl/segmentation/dataset/')
	# parser.add_argument('--EXPERIMENT', type=str, default='/disk1/hpl/segmentation/model/model_baselinexin/')
	parser.add_argument('--N_EPOCHS', type=int, default=200)
	parser.add_argument('--MAX_PATIENCE', type=int, default=30)
	parser.add_argument('--batch_size', type=int, default=32)
	parser.add_argument('--seed', type=int, default=0)
	parser.add_argument('--N_CLASSES', type=int, default=2)
	parser.add_argument('--LEARNING_RATE', type=float, default=1e-4)
	parser.add_argument('--LR_DECAY', type=float, default=0.995)
	parser.add_argument('--DECAY_LR_EVERY_N_EPOCHS', type=int, default=1)
	parser.add_argument('--WEIGHT_DECAY', type=float, default=0.0001)
	parser.add_argument('--CUDNN', type=bool, default=True)
	args = parser.parse_args()

	torch.cuda.manual_seed(args.seed)
	cudnn.benchmark = args.CUDNN

	normalize = transforms.Normalize(mean=dataset.mean, std=dataset.std)
	train_joint_transformer = transforms.Compose([
		joint_transforms.JointResize(256),
		joint_transforms.JointRandomCrop(224),
		joint_transforms.JointRandomHorizontalFlip(),
        ])
	mask_size_list = [28, 28, 28, 56, 112]

	train_dset = dataset.Saliency(
		args.DATASET_PATH, 'TRain', train_joint_transformer, mask_size_list,
		transform=transforms.Compose([joint_transforms.RandomErasing_random(probability=0.5, sh=0.4, r1=0.3, ),
									  transforms.ToTensor(), normalize, ]))
	train_loader = torch.utils.data.DataLoader(
		train_dset, batch_size=args.batch_size, shuffle=True, num_workers=args.batch_size)

	test_joint_transforms_img = transforms.Compose([joint_transforms.JointResize(224)])
	val_dset = dataset.TestData(args.DATASET_PATH, 'VAl', test_joint_transforms_img,
								transform=transforms.Compose([transforms.ToTensor(), normalize]),
								target_transform=transforms.Compose([transforms.ToTensor()]))
	val_loader = torch.utils.data.DataLoader(
		val_dset, batch_size=args.batch_size, shuffle=False)

	print("TrainImages: %d" % len(train_loader.dataset.imgs))
	print("ValImages: %d" % len(val_loader.dataset.imgs))

	example_inputs, example_targets = next(iter(train_loader))
	print("InputsBatchSize: ", example_inputs.size())
	print("TargetsBatchSize: ", len(example_targets))
	print("\nInput (size, max, min) ---")
	# input
	i = example_inputs[0]
	print(i.size())
	print(i.max())
	print(i.min())
	print("Target (size, max, min) ---")
	# target
	for mask in example_targets:
		print(mask.size())
		print(mask.max())
		print(mask.min())

	resnet34 = torchvision.models.resnet34(pretrained=True)
	dict_resnet34 = resnet34.state_dict()
	model = SegNet.resnet34()
	# # initialize
	model.apply(utils.weights_init)
	SegNet_dict = model.state_dict()

	pretrained_dict = {k: v for k, v in dict_resnet34.items() if k in SegNet_dict}
	# for k in pretrained_dict:
	# 	print(k)
	SegNet_dict.update(pretrained_dict)
	model.load_state_dict(SegNet_dict)

	# seperate layers, to set different lr
	param_exist = []
	param_add = []
	for k, (name, module) in enumerate(model.named_children()):
		# existing layers including: conv1 bn1 relu maxpool
		# layer1 layer2 layer3 layer4
		if k < 8:
			for param in module.parameters():
				param_exist.append(param)
		# adding layers including: bottleneck skip3 skip2 skip1 skip0
		# conv_end_1 bn_end_1 salmap Sigmoid mask0 mask4 mask3 mask2 mask1
		else:
			for param in module.parameters():
				param_add.append(param)

	model = model.cuda()
	# model = torch.nn.DataParallel(model).cuda()

	print('  + Number of params: {}'.format(
		sum([p.data.nelement() for p in model.parameters()])))
	optimizer = optim.RMSprop([{'params': param_exist, 'lr': args.LEARNING_RATE*0.1},
						   {'params': param_add}], lr=args.LEARNING_RATE,
							  weight_decay=args.WEIGHT_DECAY, eps=1e-12)
	criterion = nn.NLLLoss().cuda()

	exp_dir = args.EXPERIMENT + 'test'
	if os.path.exists(exp_dir):
		shutil.rmtree(exp_dir)

	exp = experiment.Experiment('test', args.EXPERIMENT)
	exp.init()

	START_EPOCH = exp.epoch
	END_EPOCH = START_EPOCH + args.N_EPOCHS

	for epoch in range(START_EPOCH, END_EPOCH):

		since = time.time()

		# ### Train ###
		trn_loss, trn_err = utils.train(model, train_loader, optimizer, criterion, epoch)
		print('Epoch {:d}: Train - Loss: {:.4f}\tErr: {:.4f}'.format(epoch, trn_loss, trn_err))
		time_elapsed = time.time() - since
		print('Train Time {:.0f}m {:.0f}s'.format(
			time_elapsed // 60, time_elapsed % 60))

		### Test ###
		val_loss, val_err = utils.test_score(model, val_loader)
		print('Val - Loss: {:.4f}, Error: {:.4f}'.format(val_loss, val_err))
		time_elapsed = time.time() - since
		print('Total Time {:.0f}m {:.0f}s\n'.format(
			time_elapsed // 60, time_elapsed % 60))

		### Save Metrics ###
		exp.save_history('train', trn_loss, trn_err)
		exp.save_history('val', val_loss, val_err)

		### Checkpoint ###
		exp.save_weights(model, trn_loss, val_loss, trn_err, val_err)
		exp.save_optimizer(optimizer, val_loss)

		## Early Stopping ##
		if (epoch - exp.best_val_loss_epoch) > args.MAX_PATIENCE:
			print(("Early stopping at epoch %d since no "
				   +"better loss found since epoch %.3").format(epoch, exp.best_val_loss))
			break

		# Adjust Lr ###--old method
		utils.adjust_learning_rate(args.LEARNING_RATE, args.LR_DECAY, optimizer,
							 epoch, args.DECAY_LR_EVERY_N_EPOCHS)

		exp.epoch += 1