cnn_optimizer = torch.optim.SGD(cnn.parameters(), lr=args.learning_rate, momentum=0.9, nesterov=True, weight_decay=5e-4) if args.dataset == 'svhn': scheduler = MultiStepLR(cnn_optimizer, milestones=[80, 120], gamma=0.1) else: scheduler = MultiStepLR(cnn_optimizer, milestones=[60, 120, 160], gamma=0.2) filename = 'logs/' + test_id + '.csv' csv_logger = CSVLogger(args=args, fieldnames=['epoch', 'train_acc', 'test_acc'], filename=filename) def count_params(net): return sum( [np.prod(param.size()) for name, param in net.named_parameters()]) params = count_params(cnn) mobilenet_params = 6900000 mobilenet_flops = 1170000000 def test(loader): cnn.eval() # Change model to 'eval' mode (BN uses moving mean/var).
def run_cutout(dataset="cifar10", model="resnet18", epochs=200, batch_size=128, learning_rate=0.1, data_augmentation=False, cutout=False, n_holes=1, length=8, no_cuda=False, seed=0): cuda = not no_cuda and torch.cuda.is_available() cudnn.benchmark = True # Should make training should go faster for large models torch.manual_seed(seed) if cuda: torch.cuda.manual_seed(seed) test_id = dataset + '_' + model # Image Preprocessing if dataset == 'svhn': normalize = transforms.Normalize( mean=[x / 255.0 for x in [109.9, 109.7, 113.8]], std=[x / 255.0 for x in [50.1, 50.6, 50.8]]) else: normalize = transforms.Normalize( mean=[x / 255.0 for x in [125.3, 123.0, 113.9]], std=[x / 255.0 for x in [63.0, 62.1, 66.7]]) train_transform = transforms.Compose([]) if data_augmentation: train_transform.transforms.append(transforms.RandomCrop(32, padding=4)) train_transform.transforms.append(transforms.RandomHorizontalFlip()) train_transform.transforms.append(transforms.ToTensor()) train_transform.transforms.append(normalize) if cutout: train_transform.transforms.append( Cutout(n_holes=n_holes, length=length)) test_transform = transforms.Compose([transforms.ToTensor(), normalize]) if dataset == 'cifar10': num_classes = 10 train_dataset = datasets.CIFAR10(root='data/', train=True, transform=train_transform, download=True) test_dataset = datasets.CIFAR10(root='data/', train=False, transform=test_transform, download=True) elif dataset == 'cifar100': num_classes = 100 train_dataset = datasets.CIFAR100(root='data/', train=True, transform=train_transform, download=True) test_dataset = datasets.CIFAR100(root='data/', train=False, transform=test_transform, download=True) elif dataset == 'svhn': num_classes = 10 train_dataset = datasets.SVHN(root='data/', split='train', transform=train_transform, download=True) extra_dataset = datasets.SVHN(root='data/', split='extra', transform=train_transform, download=True) # Combine both training splits (https://arxiv.org/pdf/1605.07146.pdf) data = np.concatenate([train_dataset.data, extra_dataset.data], axis=0) labels = np.concatenate([train_dataset.labels, extra_dataset.labels], axis=0) train_dataset.data = data train_dataset.labels = labels test_dataset = datasets.SVHN(root='data/', split='test', transform=test_transform, download=True) # Data Loader (Input Pipeline) train_loader = torch.utils.data.DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True, pin_memory=True, num_workers=2) test_loader = torch.utils.data.DataLoader(dataset=test_dataset, batch_size=batch_size, shuffle=False, pin_memory=True, num_workers=2) if model == 'resnet18': cnn = ResNet18(num_classes=num_classes) elif model == 'wideresnet': if dataset == 'svhn': cnn = WideResNet(depth=16, num_classes=num_classes, widen_factor=8, dropRate=0.4) else: cnn = WideResNet(depth=28, num_classes=num_classes, widen_factor=10, dropRate=0.3) cnn = cnn.cuda() criterion = nn.CrossEntropyLoss().cuda() cnn_optimizer = torch.optim.SGD(cnn.parameters(), lr=learning_rate, momentum=0.9, nesterov=True, weight_decay=5e-4) if dataset == 'svhn': scheduler = MultiStepLR(cnn_optimizer, milestones=[80, 120], gamma=0.1) else: scheduler = MultiStepLR(cnn_optimizer, milestones=[60, 120, 160], gamma=0.2) #TODO: change path to relative path filename = "/beegfs/work/workspace/ws/fr_mn119-augment-0/logs/{}.csv".format( test_id) # filename = 'logs/' + test_id + '.csv' args = argparse.Namespace( **{ "dataset": dataset, "model": model, "epochs": epochs, "batch_size": batch_size, "learning_rate": learning_rate, "data_augmentation": data_augmentation, "cutout": cutout, "n_holes": n_holes, "length": length, "no_cuda": no_cuda, "seed": seed }) csv_logger = CSVLogger(args=args, fieldnames=['epoch', 'train_acc', 'test_acc'], filename=filename) def test(loader): cnn.eval() # Change model to 'eval' mode (BN uses moving mean/var). correct = 0. total = 0. for images, labels in loader: if dataset == 'svhn': # SVHN labels are from 1 to 10, not 0 to 9, so subtract 1 labels = labels.type_as(torch.LongTensor()).view(-1) - 1 images = Variable(images, volatile=True).cuda() labels = Variable(labels, volatile=True).cuda() pred = cnn(images) pred = torch.max(pred.data, 1)[1] total += labels.size(0) correct += (pred == labels.data).sum() val_acc = correct / total cnn.train() return val_acc for epoch in range(epochs): xentropy_loss_avg = 0. correct = 0. total = 0. progress_bar = tqdm(train_loader) for i, (images, labels) in enumerate(progress_bar): progress_bar.set_description('Epoch ' + str(epoch)) if dataset == 'svhn': # SVHN labels are from 1 to 10, not 0 to 9, so subtract 1 labels = labels.type_as(torch.LongTensor()).view(-1) - 1 images = Variable(images).cuda(async=True) labels = Variable(labels).cuda(async=True) cnn.zero_grad() pred = cnn(images) xentropy_loss = criterion(pred, labels) xentropy_loss.backward() cnn_optimizer.step() xentropy_loss_avg += xentropy_loss.data[0] # Calculate running average of accuracy _, pred = torch.max(pred.data, 1) total += labels.size(0) correct += (pred == labels.data).sum() accuracy = correct / total progress_bar.set_postfix(xentropy='%.3f' % (xentropy_loss_avg / (i + 1)), acc='%.3f' % accuracy) test_acc = test(test_loader) tqdm.write('test_acc: %.3f' % (test_acc)) scheduler.step(epoch) row = { 'epoch': str(epoch), 'train_acc': str(accuracy), 'test_acc': str(test_acc) } csv_logger.writerow(row) # torch.save(cnn.state_dict(), 'checkpoints/' + test_id + '.pt') csv_logger.close() results = { 'epoch': epoch, 'train_error': 1 - accuracy, 'test_error': 1 - test_acc } # validation error for hyperband return results
args.rate) + '/' else: basic_path = 'logs/' + args.dataset + '_' + args.model + '/' + args.scheduler + '/' mkdir_p(basic_path) index = str(len(os.listdir(basic_path)) + 1) basic_path = basic_path + '/' + index + '/' mkdir_p(basic_path) mkdir_p(basic_path + 'checkpoints/') if args.scheduler == 'step': # E1/E1=rate, E1+E2=epochs args.E2 = args.epochs // (1 + args.rate) args.E1 = int(args.rate * args.E2) csv_logger = CSVLogger( args=args, fieldnames=['epoch', 'train_acc', 'test_acc', 'max_acc'], filename=basic_path + 'logs.csv') # Image Preprocessing if args.dataset == 'svhn': normalize = transforms.Normalize( mean=[x / 255.0 for x in [109.9, 109.7, 113.8]], std=[x / 255.0 for x in [50.1, 50.6, 50.8]]) else: normalize = transforms.Normalize( mean=[x / 255.0 for x in [125.3, 123.0, 113.9]], std=[x / 255.0 for x in [63.0, 62.1, 66.7]]) train_transform = transforms.Compose([]) if args.data_augmentation: train_transform.transforms.append(transforms.RandomCrop(32, padding=4))
def training(args): if not os.path.isdir('logs'): os.makedirs('logs') train_loader = torch.utils.data.DataLoader(dataset=MaskDataset(root=args.data_dir,random=True,isTraining=True), batch_size=int(512), shuffle=True, pin_memory=True, num_workers=16) val_loader = torch.utils.data.DataLoader( dataset=MaskDataset(root=args.data_dir+'validation/',random=True,isTraining=False), batch_size=1, shuffle=False, pin_memory=True, num_workers=2) cnn_optimizer = torch.optim.SGD(cnn.parameters(), lr=float(0.1), momentum=0.9, nesterov=True, weight_decay=0.0) # 0.0001 scheduler = StepLR(cnn_optimizer, gamma=0.1, step_size=3) criterion=TripletLoss(distance=args.loss).cuda() early_stopping = True patience = 20 epochs_no_improvement = 0 max_val_fscore = 0.0 best_weights = None best_epoch = -1 filename = 'logs/' + str(args.loss) + '.csv' csv_logger = CSVLogger(args=None, fieldnames=['epoch', 'TotalLoss', 'positive_loss','negative_loss','negative_positive', 'val_acc'], filename=filename) init_val_fscore, val_fscore_imposter = validation_init(val_loader) # set model to train mode cnn.train() tqdm.write('genuine: %.5f' % (init_val_fscore)) tqdm.write('imposter: %.5f' % (val_fscore_imposter)) update_weight_loss=True val_fscore=0. for epoch in range(1, 1 + args.epoch): loss_total = 0. fscore_total = 0. positive_loss_totoal=0. negative_loss_total=0. negative_positive_total=0. progress_bar = tqdm(train_loader) for i, (mask_embedding,face_embedding,negative_embedding,label,_) in enumerate(progress_bar): progress_bar.set_description('Epoch ' + str(epoch)) mask_embedding = mask_embedding.cuda() face_embedding =face_embedding.cuda() negative_embedding=negative_embedding.cuda() label=label.cuda() cnn.zero_grad() pred = cnn(mask_embedding) loss, positive_loss,negative_loss , negative_positive= criterion(pred, face_embedding, negative_embedding) loss.backward() cnn_optimizer.step() loss_total += loss.item() positive_loss_totoal+=positive_loss.item() negative_loss_total+=negative_loss.item() negative_positive_total+=negative_positive.item() row = {'epoch': str(epoch)+str("-")+str(i), 'TotalLoss': str(loss_total / (i + 1)), 'positive_loss': str(positive_loss_totoal / (i + 1)), 'negative_loss': str(negative_loss_total / (i + 1)),'negative_positive':str(negative_positive_total / (i + 1)),'val_acc':str(val_fscore)} csv_logger.writerow(row) progress_bar.set_postfix( loss='%.5f' % (loss_total / (i + 1)),negative_loss='%.5f' % (negative_loss_total/(i+1) ),positive_loss='%.5f' % (positive_loss_totoal / (i + 1)),negative_positive='%.5f' % (negative_positive_total / (i + 1)) ) val_fscore ,val_fscore_imposter= validation(val_loader) tqdm.write('fscore: %.5f' % (val_fscore)) tqdm.write('imposter: %.5f' % (val_fscore_imposter)) # scheduler.step(epoch) # Use this line for PyTorch <1.4 scheduler.step() # Use this line for PyTorch >=1.4 #row = {'epoch': str(epoch), 'train_acc': str(train_fscore), 'val_acc': str(val_fscore)} #csv_logger.writerow(row) do_stop=False if early_stopping: if val_fscore > max_val_fscore: max_val_fscore = val_fscore epochs_no_improvement = 0 best_weights = cnn.state_dict() best_epoch = epoch else: epochs_no_improvement += 1 if epochs_no_improvement >= patience and do_stop: print(f"EARLY STOPPING at {best_epoch}: {max_val_fscore}") break else: best_weights = cnn.state_dict() if not os.path.isdir(os.path.join(args.weights,str(args.loss))): os.makedirs(os.path.join(args.weights,str(args.loss))) torch.save(best_weights, os.path.join(args.weights,str(args.loss),'weights.pt')) csv_logger.close()
trainer = gluon.Trainer( net.collect_params(), args.optimizer, #sgd { 'learning_rate': lr, 'wd': wd }) # schedule = mx.lr_scheduler.MultiFactorScheduler(step=[75,150,450], factor=lr_decay) # schedule.base_lr = lr # adam_optimizer = mx.optimizer.Adam(wd=wd) #learning_rate=lr, lr_scheduler=schedule, # trainer = mx.gluon.Trainer(params=net.collect_params(), optimizer=adam_optimizer) #这里trainer先不设置学习率 logging.basicConfig(level=logging.DEBUG) csv_logger = CSVLogger( args=args, fieldnames=['epoch', 'train_acc', 'valid_acc', 'lr'], filename=args.log) metric = mx.metric.Accuracy() loss = gluon.loss.SoftmaxCrossEntropyLoss() logging.info( '[+][+]Hyperparameters: lr: {}, wd: {}, lr_decay_dict: {}'.format( lr, wd, lr_decay_dict)) logging.info('[+]Training Start...') if not args.test_only: train(start_epoch, lr, lr_decay_dict, ctx, args.epochs, train_iter,
criterion = nn.CrossEntropyLoss().cuda() cnn_optimizer = torch.optim.SGD(cnn.parameters(), lr=args.learning_rate, momentum=0.9, nesterov=True, weight_decay=5e-4) if args.dataset == 'svhn': scheduler = MultiStepLR(cnn_optimizer, milestones=[80, 120], gamma=0.1) else: scheduler = MultiStepLR(cnn_optimizer, milestones=[60, 120, 160], gamma=0.2) csv_logger = CSVLogger(args=args, fieldnames=['epoch', 'train_acc', 'val_acc'], filename=training_summary_file) # get the number of model parameters num_params = sum([p.data.nelement() for p in cnn.parameters()]) print("Model {} selected. Number of Parameters {}".format( args.model, num_params)) # --------------------------------------------------------------------------------------- # Training # --------------------------------------------------------------------------------------- print("Training Started {}".format('*' * 80)) print("Train/Validation Split={}. (nTrain {}, nValidation {})".format( args.train_validation_split, train_dataset.__len__(), validation_dataset.__len__()))