def get_model(model_name, num_classes=10, keep_prob=1.0, **kwargs): if model_name.lower() == "lenetknn": return leNet_KNN.LeNet5KNN(num_classes=num_classes, keep_prob=keep_prob, **kwargs) elif model_name.lower() == 'lenet': return leNet.LeNet5(num_classes=num_classes, keep_prob=keep_prob) elif model_name.lower() == 'resnet18': return resnet.ResNet18(num_classes=num_classes) elif model_name.lower() == 'resnet34': return resnet.ResNet34(num_classes=num_classes) elif model_name.lower() == 'resnet101': return resnet.ResNet101(num_classes=num_classes) elif model_name.lower() == "resnet18knn": return resnet_KNN.ResNet18(num_classes=num_classes, kernel_fn=kwargs["kernel_fn"]) elif model_name.lower() == "resnet101knn": return resnet_KNN.ResNet101(num_classes=num_classes, kernel_fn=kwargs["kernel_fn"]) elif model_name.lower() == 'lenetkcnn': return leNet_KNN.LeNet5KCNN(num_classes=num_classes, keep_prob=keep_prob, **kwargs) elif model_name.lower() == 'resnet101kcnn': return resnet_KNN.ResNet101KCNN(num_classes=num_classes, keep_prob=keep_prob, **kwargs) else: raise ValueError("Unknown model name {}".format(model_name))
def init_local_model(self, device): ### Select the model if self.params.t_model_version == 'resnet18': self.m_local = (resnet.ResNet18()).to(device) if self.params.t_cuda else (resnet.ResNet18()) elif self.params.t_model_version == 'alexnet': self.m_local = (alexnet.AlexNet()).to(device) if self.params.t_cuda else (alexnet.AlexNet()) elif self.params.t_model_version == 'cnn': self.m_local = (cnn.CNN()).to(device) if self.params.t_cuda else (cnn.CNN())
def _make_backbone(self, backbone): if backbone == 'resnet18': _resnet = resnet.ResNet18(use_as_backone=True) backbone = nn.Sequential(_resnet, ) feat_dim = _resnet.feat_dim elif backbone == 'deepmind': backbone = Encoder(3, num_hiddens=128, num_residual_layers=2, num_residual_hiddens=32) feat_dim = 128 else: raise Exception(f'Error. Bckbone "{backbone}" is not supported.') return backbone, feat_dim
def get_new_model(args, tmp_scale=True): if args.model == 'resnet18': return resnet.ResNet18(tmp_scale=tmp_scale, num_classes=args.num_classes) elif args.model == 'resnet50': return resnet.ResNet50(tmp_scale=tmp_scale, num_classes=args.num_classes) elif args.model == 'resnet101': return resnet.ResNet101(tmp_scale=tmp_scale, num_classes=args.num_classes) elif args.model == 'inceptionv4': return inception.inceptionv4(tmp_scale=tmp_scale, num_classes=args.num_classes) elif args.model == 'densenet': return densenet.DenseNet(tmp_scale=tmp_scale)
def loadmodel(nb_class=10, img_HW=8, pretrain_model='resnet18'): classes = ('plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck') # Model print('==> Building model..') if pretrain_model == "resnet_hist18": net = attnResNet50.resnet18(num_classes=nb_class, image_HW=img_HW) # net = attnResNet50.load_resnet_imagenet(model=net, modelname="resnet18") elif pretrain_model == "resnet_hist50": net = attnResNet50.resnet50(num_classes=nb_class, image_HW=img_HW, pretrained=False) # net = attnResNet50.load_resnet_imagenet(model=net, modelname="resnet50") elif pretrain_model is None: net = attnResNet50.resnet50(num_classes=nb_class, image_HW=img_HW) elif pretrain_model == "resnet18": net = resnet.ResNet18(n_classes=nb_class) # net = attnResNet50.load_resnet_imagenet(model=net, modelname="resnet18") elif pretrain_model == "resnet50": net = resnet.ResNet50(n_classes=nb_class) # net = attnResNet50.load_resnet_imagenet(model=net, modelname="resnet50") print(net) with torch.no_grad(): net = net.to(device) attnResNet50.initialize_weights(net) # if device == 'cuda': # net = torch.nn.DataParallel(net) # cudnn.benchmark = True print("Compute on device") if args.resume: # Load checkpoint. print('==> Resuming from checkpoint..') assert os.path.isdir('checkpoint'), 'Error: no checkpoint directory found!' checkpoint = torch.load('./checkpoint/ckpt'+run_start_time+'.pth') net.load_state_dict(checkpoint['net']) best_acc = checkpoint['acc'] start_epoch = checkpoint['epoch'] # criterion = nn.CrossEntropyLoss() criterion = nn.MSELoss() # optimizer = optim.SGD(net.parameters(), lr=args.lr, momentum=0.9, weight_decay=5e-4) optimizer = optim.Adam(net.parameters(), lr=args.lr) return net, criterion, optimizer
def main(): if args.resume: if not os.path.isfile('./checkpoint/{}.pkl'.format(args.model)): raise ValueError('no models saved....!!!!') print 'resume from checkpoint....' net = torch.load('./checkpoint/{}.pkl'.format(args.model)) else: if args.model == 'vgg16': net = vgg.VGG(args.model) elif args.model == 'vgg19': net = vgg.VGG(args.model) elif args.model == 'resnet18': net = resnet.ResNet18() elif args.model == 'resnet34': net = resnet.ResNet34() elif args.model == 'resnet50': net = resnet.ResNet50() elif args.model == 'resnet101': net = resnet.ResNet101() elif args.model == 'resnet152': net = resnet.ResNet152() elif args.model == 'densenet121': net = densenet.DenseNet121() elif args.model == 'densenet161': net = densenet.DenseNet161() elif args.model == 'densenet169': net = densenet.DenseNet169() elif args.model == 'densenet201': net = densenet.DenseNet201() else: raise ValueError('model not implemented...!!') net.cuda(args.gpu) net = nn.DataParallel(net, device_ids = range(torch.cuda.device_count())) criterion = nn.CrossEntropyLoss().cuda(args.gpu) optim = torch.optim.SGD(net.parameters(), lr=args.lr, momentum=0.9, weight_decay=1e-4) lr_scheduler = torch.optim.lr_scheduler.StepLR(optim, step_size=100, gamma=0.1) for e in xrange(args.epoch): train(e, net, criterion, optim, lr_scheduler) test(e, net)
def get_conv_net(name, input_dim, output_dim, activation, args): mynet = None if name == 'complexconv': mynet = baseline.ComplexConvNet(input_dim, args.n_units, activation, args.n_layers) elif name == 'hashconv': mynet = ctxnet.HashConvNet(input_dim=input_dim, n_units=args.n_units, activation=activation, period=args.net_period, key_pick=args.key_pick) elif name == 'cifarhashconv': mynet = ctxnet.CifarHashConvNet(input_dim=input_dim, n_units=args.n_units, n_layers=args.n_layers, activation=activation, period=args.net_period, key_pick=args.key_pick) elif name == 'cifarhashconv_': mynet = ctxnet.CifarHashConvNet_(input_dim=input_dim, n_units=args.n_units, n_layers=args.n_layers, activation=activation, period=args.net_period, key_pick=args.key_pick) elif name == 'residualcomplexconv': mynet = baseline.ResidualComplexConvNet(input_dim, args.n_units, activation, args.n_layers) elif name == 'resnet18': mynet = resnet.ResNet18() elif name == 'staticbnresnet18': mynet = resnet.StaticBNResNet18() elif name == 'outhashresnet18': mynet = resnet.OutHashResNet18() elif name == 'hashresnet18': mynet = resnet.HashResNet18(np.prod(output_dim)) elif name == 'multiresnet18': mynet = resnet.MultiHeadResNet18() return mynet
indices = list(range(NUM_TRAIN)) random.shuffle(indices) START = 2 * ADDENDUM labeled_set = indices[:START] unlabeled_set = indices[START:] train_loader = DataLoader(cifar10_train, batch_size=BATCH, sampler=SubsetRandomSampler(labeled_set), pin_memory=True) test_loader = DataLoader(cifar10_test, batch_size=BATCH) dataloaders = {'train': train_loader, 'test': test_loader} # Model backbone_net = resnet.ResNet18(NUM_CLASS).cuda() models = {'backbone': backbone_net} torch.backends.cudnn.benchmark = True # Active learning cycles for cycle in range(CYCLES): # Loss, criterion and scheduler (re)initialization criterion = nn.CrossEntropyLoss(reduction='none') optim_backbone = optim.SGD(models['backbone'].parameters(), lr=LR, momentum=MOMENTUM, weight_decay=WDECAY) sched_backbone = lr_scheduler.MultiStepLR(optim_backbone,
train_transforms, test_transforms = transform.cifar10_transforms(mean, std) trainset, testset = dataset.cifar10_dataset(train_transforms, test_transforms) train_loader, test_loader = dataloader.cifar10_dataloader(trainset, testset) # device use_cuda = torch.cuda.is_available() device = torch.device("cuda" if use_cuda else "cpu") print(device) # classes in the data classes = ('plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck') # model summary net = resnet.ResNet18().to(device) print(summary(net, input_size=(3, 32, 32))) EPOCHS = 25 optimizer = optim.SGD(net.parameters(), lr=0.005, momentum=0.9) train_acc = [] train_losses = [] test_acc = [] test_losses = [] # run_model.evaluation( train_loader, test_loader, epochs = EPOCHS, device = device) run_model.evaluation(net, train_loader, test_loader, optimizer, EPOCHS, device, train_acc, train_losses, test_acc, test_losses)
validset = datasets.DatasetFolder(root=args.valid_data_path, loader=get_data, extensions=('wav')) valid_loader = torch.utils.data.DataLoader(validset, batch_size=args.valid_batch_size, shuffle=True, num_workers=args.n_workers, pin_memory=True) args.nclasses = len(trainset.classes) print(args, '\n') if args.model == 'resnet': model = resnet.ResNet18(n_classes=args.nclasses, half_spec=args.half_spec) if args.pretrained_path: try: print('\nLoading pretrained model from: {}\n'.format( args.pretrained_path)) ckpt = torch.load(args.pretrained_path, map_location=lambda storage, loc: storage) print(model.load_state_dict(ckpt['model_state'], strict=False)) print('\n') except RuntimeError as err: print("Runtime Error: {0}".format(err)) except: print("Unexpected error:", sys.exc_info()[0]) raise
def main(): parser = argparse.ArgumentParser( description='PyTorch Gambler\'s Loss Runner') parser.add_argument('--result_dir', type=str, help='directory to save result txt files', default='results') parser.add_argument('--noise_rate', type=float, help='corruption rate, should be less than 1', default=0.5) parser.add_argument('--noise_type', type=str, help='[pairflip, symmetric]', default='symmetric') parser.add_argument('--dataset', type=str, help='mnist, cifar10, or imdb', default='mnist') parser.add_argument('--n_epoch', type=int, default=10) parser.add_argument('--seed', type=int, default=1) parser.add_argument('--num_workers', type=int, default=4, help='how many subprocesses to use for data loading') parser.add_argument('--epoch_decay_start', type=int, default=80) parser.add_argument('--load_model', type=str, default="") parser.add_argument('--model', type=str, default='default') parser.add_argument('--batch_size', type=int, default=128, metavar='N', help='input batch size for training (default: 128)') parser.add_argument( '--log-interval', type=int, default=100, metavar='N', help= 'how many batches to wait before logging training status (default: 100)' ) parser.add_argument('--lr', type=float, default=0.001, metavar='LR', help='learning rate (default: 0.001)') parser.add_argument('--eps', type=float, help='set lambda for lambda type \'gmblers\' only', default=1000.0) parser.add_argument('--lambda_type', type=str, help='[nll, euc, mid, exp, gmblers]', default="euc") parser.add_argument('--start_gamblers', type=int, help='number of epochs before starting gamblers', default=0) # label smoothing args parser.add_argument('--smoothing', type=float, default=1.0, help='smoothing parameter (default: 1)') args = parser.parse_args() args.use_scheduler = False torch.manual_seed(args.seed) torch.cuda.manual_seed(args.seed) if args.dataset == 'mnist': input_channel = 1 num_classes = 10 train_dataset = MNIST(root='./data/', download=True, train=True, transform=transforms.ToTensor(), noise_type=args.noise_type, noise_rate=args.noise_rate) test_dataset = MNIST(root='./data/', download=True, train=False, transform=transforms.ToTensor(), noise_type=args.noise_type, noise_rate=args.noise_rate) print('loading dataset...') train_loader = torch.utils.data.DataLoader( dataset=train_dataset, batch_size=args.batch_size, num_workers=args.num_workers, drop_last=True, shuffle=True) test_loader = torch.utils.data.DataLoader(dataset=test_dataset, batch_size=args.batch_size, num_workers=args.num_workers, drop_last=True, shuffle=False) if args.dataset == 'cifar10': input_channel = 3 num_classes = 10 transform_train = transforms.Compose([ transforms.RandomCrop(32, padding=4), transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)), ]) transform_test = transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)), ]) train_dataset = CIFAR10(root='./data/', download=True, train=True, transform=transform_train, noise_type=args.noise_type, noise_rate=args.noise_rate) test_dataset = CIFAR10(root='./data/', download=True, train=False, transform=transform_test, noise_type=args.noise_type, noise_rate=args.noise_rate) print('loading dataset...') train_loader = torch.utils.data.DataLoader( dataset=train_dataset, batch_size=args.batch_size, num_workers=args.num_workers, drop_last=True, shuffle=True) test_loader = torch.utils.data.DataLoader(dataset=test_dataset, batch_size=args.batch_size, num_workers=args.num_workers, drop_last=True, shuffle=False) if args.dataset == 'cifar100': input_channel = 3 num_classes = 100 transform_train = transforms.Compose([ transforms.RandomCrop(32, padding=4), transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)), ]) transform_test = transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)), ]) train_dataset = CIFAR100(root='./data/', download=True, train=True, transform=transform_train, noise_type=args.noise_type, noise_rate=args.noise_rate) test_dataset = CIFAR100(root='./data/', download=True, train=False, transform=transform_test, noise_type=args.noise_type, noise_rate=args.noise_rate) print('loading dataset...') train_loader = torch.utils.data.DataLoader( dataset=train_dataset, batch_size=args.batch_size, num_workers=args.num_workers, drop_last=True, shuffle=True) test_loader = torch.utils.data.DataLoader(dataset=test_dataset, batch_size=args.batch_size, num_workers=args.num_workers, drop_last=True, shuffle=False) if args.dataset == 'imdb': num_classes = 2 embedding_length = 300 hidden_size = 256 print('loading dataset...') TEXT, vocab_size, word_embeddings, train_loader, valid_iter, test_loader = load_data.load_dataset( rate=args.noise_rate, batch_size=args.batch_size) use_cuda = torch.cuda.is_available() device = torch.device("cuda" if use_cuda else "cpu") print("using {}".format(device)) print('building model...') if args.dataset == 'mnist': model = CNN_basic(num_classes=num_classes).to(device) optimizer = torch.optim.SGD(model.parameters(), lr=args.lr) if args.dataset == 'cifar10': if args.model == 'small': model = CNN_small(num_classes=num_classes).to(device) optimizer = torch.optim.SGD(model.parameters(), lr=args.lr) else: model = resnet.ResNet18(num_classes=num_classes).to(device) change_lr = lambda epoch: 0.1 if epoch >= 50 else 1.0 optimizer = LaProp(filter(lambda p: p.requires_grad, model.parameters()), lr=4e-4) scheduler = torch.optim.lr_scheduler.LambdaLR(optimizer, lr_lambda=change_lr) args.use_scheduler = True if args.dataset == 'cifar100': if args.model == 'small': model = CNN_small(num_classes=num_classes).to(device) optimizer = torch.optim.SGD(model.parameters(), lr=args.lr) else: model = resnet.ResNet18(num_classes=num_classes).to(device) change_lr = lambda epoch: 0.1 if epoch >= 50 else 1.0 optimizer = LaProp(filter(lambda p: p.requires_grad, model.parameters()), lr=4e-4) scheduler = torch.optim.lr_scheduler.LambdaLR(optimizer, lr_lambda=change_lr) args.use_scheduler = True if args.dataset == 'imdb': model = LSTMClassifier(args.batch_size, num_classes, hidden_size, vocab_size, embedding_length, word_embeddings).to(device) optimizer = LaProp(filter(lambda p: p.requires_grad, model.parameters()), lr=args.lr) test_accs = [] train_losses = [] test_losses = [] out = [] name = "{}_{}_{:.2f}_{:.2f}_{}_{}".format(args.dataset, args.noise_type, args.smoothing, args.noise_rate, args.eps, args.seed) if not os.path.exists(args.result_dir): os.system('mkdir -p %s' % args.result_dir) save_file = args.result_dir + "/" + name + ".json" if os.path.exists(save_file): print('case processed') exit() for epoch in range(1, args.n_epoch + 1): for param_group in optimizer.param_groups: print(epoch, param_group['lr']) print(name) train_loss = train(args, model, device, train_loader, optimizer, epoch, num_classes=num_classes, use_gamblers=(epoch >= args.start_gamblers), text=(args.dataset == 'imdb')) train_losses.append(train_loss) test_acc, test_loss = test(args, model, device, test_loader, num_classes, text=(args.dataset == 'imdb')) test_accs.append(test_acc) test_losses.append(test_loss) if (args.use_scheduler): scheduler.step() # torch.save({ # 'model_state_dict': model.state_dict(), # 'optimizer_state_dict': optimizer.state_dict(), # 'loss': loss, # 'test_acc': acc # }, args.result_dir + "/" + name + "_model.npy") save_data = { "train_loss": train_losses, "test_loss": test_losses, "test_acc": test_accs } json.dump(save_data, open(save_file, 'w'))
checkpoint = torch.load('./checkpoint/cnn_cifar100_' + args.method) model = checkpoint['model'] start_epoch = checkpoint['epoch'] train_losses = checkpoint['train_losses'] test_losses = checkpoint['test_losses'] train_errs = checkpoint['train_errs'] test_errs = checkpoint['test_errs'] else: print('==> Building model..') if args.net == 'vggnet': from models import vgg model = vgg.VGG('VGG16', num_classes=100) elif args.net == 'resnet': from models import resnet model = resnet.ResNet18(num_classes=100) elif args.net == 'wideresnet': from models import wideresnet model = wideresnet.WResNet_cifar10(num_classes=100, depth=16, multiplier=4) else: print('Network undefined!') if use_cuda: model.cuda() model = torch.nn.DataParallel(model, device_ids=range(torch.cuda.device_count())) cudnn.benchmark = True criterion = nn.CrossEntropyLoss() betas = (args.beta1, args.beta2)
import torchvision.transforms as transforms import torch.optim as optim import torch.utils.data from models import vgg, resnet, densenet # Training settings parser = argparse.ArgumentParser(description='Test new architectures') parser.add_argument('--model', choices=['vgg', 'resnet', 'densenet'], default='resnet') parser.add_argument('--hidden-size', type=int, default=512, metavar='S', help='latent layer dimension (default: 512)') parser.add_argument('--n-hidden', type=int, default=1, metavar='N', help='maximum number of frames per utterance (default: 1)') args = parser.parse_args() if args.model == 'vgg': model = vgg.VGG('VGG16', nh=args.n_hidden, n_h=args.hidden_size) elif args.model == 'resnet': model = resnet.ResNet18(nh=args.n_hidden, n_h=args.hidden_size) elif args.model == 'densenet': model = densenet.densenet_cifar(nh=args.n_hidden, n_h=args.hidden_size) batch = torch.rand(3, 3, 84, 84) emb = model.forward(batch) print(emb.size()) out = model.out_proj(emb) print(out.size()) emb = torch.cat([emb,emb],1)
def run(train_batch_size, epochs, lr, weight_decay, config, exp_id, log_dir, trained_model_file, model_name, disable_gpu=False): # 日志工具 def logging(s, log_path, print_=True, log_=True): if print_: print(s) if log_: with open(log_path, 'a+') as f_log: f_log.write(s + '\n') def get_logger(log_path, **kwargs): return functools.partial(logging, log_path=log_path, **kwargs) logging = get_logger('./logger/log.txt') # 加载数据集 if config['test_ratio']: train_loader, val_loader, test_loader = get_data_loaders(config, train_batch_size, exp_id) else: train_loader, val_loader = get_data_loaders(config, train_batch_size, exp_id) device = torch.device("cuda" if not disable_gpu and torch.cuda.is_available() else "cpu") if model_name == 'CNNIQA': model = cnn.CNNIQAnet() if model_name == 'lenet5': model = lenet5.LeNet5() if model_name == 'resnet18': model = resnet.ResNet18() if model_name == 'resnet34': model = resnet.ResNet34() if model_name == 'vgg19': model = vgg.VGG('VGG19') writer = SummaryWriter(log_dir=log_dir) model = model.to(device) # 将模型加载到指定设备上 # summary(model, input_size=(32, 32)) # must remove the number of N # print("model:", model) # logging("model: {}".format(model)) # if multi_gpu and torch.cuda.device_count() > 1: # model = nn.DataParallel(model) optimizer = Adam(model.parameters(), lr=lr, weight_decay=weight_decay) global best_criterion best_criterion = -1 # SROCC >= -1 # 训练器,调库 trainer = create_supervised_trainer(model, optimizer, loss_fn, device=device) # 校验器,调库 evaluator = create_supervised_evaluator(model, metrics={'IQA_performance': IQAPerformance()}, device=device) # 函数修饰器,以下函数都包含在trainer中,因此是一边训练一边验证、测试 # training/validation/testing = 0.6/0.2/0.2,每一个epoch训练完都进行validation和testing @trainer.on(Events.ITERATION_COMPLETED) def log_training_loss(engine): writer.add_scalar("training/loss", engine.state.output, engine.state.iteration) @trainer.on(Events.EPOCH_COMPLETED) def log_validation_results(engine): evaluator.run(val_loader) metrics = evaluator.state.metrics SROCC, KROCC, PLCC, RMSE, MAE, OR = metrics['IQA_performance'] # print("Validation Results - Epoch: {} SROCC: {:.4f} KROCC: {:.4f} PLCC: {:.4f} RMSE: {:.4f} MAE: {:.4f} OR: {:.2f}%" # .format(engine.state.epoch, SROCC, KROCC, PLCC, RMSE, MAE, 100 * OR)) logging("Validation Results - Epoch: {} SROCC: {:.4f} KROCC: {:.4f} PLCC: {:.4f} RMSE: {:.4f} MAE: {:.4f} OR: {:.2f}%" .format(engine.state.epoch, SROCC, KROCC, PLCC, RMSE, MAE, 100 * OR)) writer.add_scalar("validation/SROCC", SROCC, engine.state.epoch) writer.add_scalar("validation/KROCC", KROCC, engine.state.epoch) writer.add_scalar("validation/PLCC", PLCC, engine.state.epoch) writer.add_scalar("validation/RMSE", RMSE, engine.state.epoch) writer.add_scalar("validation/MAE", MAE, engine.state.epoch) writer.add_scalar("validation/OR", OR, engine.state.epoch) global best_criterion global best_epoch if SROCC > best_criterion: best_criterion = SROCC best_epoch = engine.state.epoch # 保存最佳模型,以SROCC指标为准 # _use_new_zipfile_serialization = False适用于pytorch1.6以前的版本 torch.save(model.state_dict(), trained_model_file, _use_new_zipfile_serialization=False) @trainer.on(Events.EPOCH_COMPLETED) def log_testing_results(engine): if config["test_ratio"] > 0 and config['test_during_training']: evaluator.run(test_loader) metrics = evaluator.state.metrics SROCC, KROCC, PLCC, RMSE, MAE, OR = metrics['IQA_performance'] # print("Testing Results - Epoch: {} SROCC: {:.4f} KROCC: {:.4f} PLCC: {:.4f} RMSE: {:.4f} MAE: {:.4f} OR: {:.2f}%" # .format(engine.state.epoch, SROCC, KROCC, PLCC, RMSE, MAE, 100 * OR)) logging("Testing Results - Epoch: {} SROCC: {:.4f} KROCC: {:.4f} PLCC: {:.4f} RMSE: {:.4f} MAE: {:.4f} OR: {:.2f}%" .format(engine.state.epoch, SROCC, KROCC, PLCC, RMSE, MAE, 100 * OR)) writer.add_scalar("testing/SROCC", SROCC, engine.state.epoch) writer.add_scalar("testing/KROCC", KROCC, engine.state.epoch) writer.add_scalar("testing/PLCC", PLCC, engine.state.epoch) writer.add_scalar("testing/RMSE", RMSE, engine.state.epoch) writer.add_scalar("testing/MAE", MAE, engine.state.epoch) writer.add_scalar("testing/OR", OR, engine.state.epoch) @trainer.on(Events.COMPLETED) def final_testing_results(engine): if config["test_ratio"]: model.load_state_dict(torch.load(trained_model_file)) evaluator.run(test_loader) metrics = evaluator.state.metrics SROCC, KROCC, PLCC, RMSE, MAE, OR = metrics['IQA_performance'] global best_epoch # best test results 是 validation的SROCC最高的一次 # print("Final Test Results - Epoch: {} SROCC: {:.4f} KROCC: {:.4f} PLCC: {:.4f} RMSE: {:.4f} MAE: {:.4f} OR: {:.2f}%" # .format(best_epoch, SROCC, KROCC, PLCC, RMSE, MAE, 100 * OR)) logging("Final Test Results - Epoch: {} SROCC: {:.4f} KROCC: {:.4f} PLCC: {:.4f} RMSE: {:.4f} MAE: {:.4f} OR: {:.2f}%" .format(best_epoch, SROCC, KROCC, PLCC, RMSE, MAE, 100 * OR)) np.save(save_result_file, (SROCC, KROCC, PLCC, RMSE, MAE, OR)) # kick everything off # 执行训练 trainer.run(train_loader, max_epochs=epochs) writer.close()
print('\n') idx_to_class = {} for key in testset.class_to_idx: idx_to_class[str(testset.class_to_idx[key])] = key print(idx_to_class, '\n') ckpt = torch.load(args.cp_path, map_location=lambda storage, loc: storage) if args.model == 'cnn': model = base_cnn.CNN(n_classes=args.nclasses) elif args.model == 'vgg': model = vgg.VGG('VGG11', n_classes=args.nclasses) elif args.model == 'resnet': model = resnet.ResNet18(n_classes=args.nclasses) elif args.model == 'densenet': model = densenet.DenseNet121(n_classes=args.nclasses) elif args.model == 'tdnn': model = TDNN.TDNN(n_classes=args.nclasses) try: print(model.load_state_dict(ckpt['model_state'], strict=True)) print('\n') except RuntimeError as err: print("Runtime Error: {0}".format(err)) except: print("Unexpected error:", sys.exc_info()[0]) raise print('\n\nNumber of parameters: {}\n'.format(
def resnet18(conf): return resnet.ResNet18(n_channels=conf["n_channels"], num_classes=conf["n_classes"])
random.shuffle(indices) # 随机打乱 labeled_set = indices[: ADDENDUM] # ADDENDUM == 1000, 最开始的,不过好像后来逐渐每次label的data加1000 unlabeled_set = indices[ADDENDUM:] train_loader = DataLoader( cifar10_train, batch_size=BATCH, # BATCH == 128, 可以改改!! sampler=SubsetRandomSampler( labeled_set), # 这是怎么弄得???好像只选了那1000个label数据来train?? pin_memory=True) # 这是干啥的?? test_loader = DataLoader(cifar10_test, batch_size=BATCH) dataloaders = {'train': train_loader, 'test': test_loader} # Model resnet18 = resnet.ResNet18( num_classes=10).cuda() # 注意者利用的 resNet18, 还可以改成resnet很深的模型 loss_module = lossnet.LossNet().cuda() models = {'backbone': resnet18, 'module': loss_module} torch.backends.cudnn.benchmark = False # if True, causes cuDNN to benchmark multiple convolution algorithms # and select the fastest. # Active learning cycles # 主动学习 for cycle in range(CYCLES): # CYCLES:10 # Loss, criterion and scheduler (re)initialization criterion = nn.CrossEntropyLoss(reduction='none') optim_backbone = optim.SGD( models['backbone'].parameters(), lr=LR, # ??用Adam 可以嘛??LR 才0.1, 可以改!! momentum=MOMENTUM, weight_decay=WDECAY) optim_module = optim.SGD(
start = timer() for _ in range(parser.iteration): data = np.random.random_sample(data_shape) net.predict(data) print( f'{name} inference time (sec): {(timer() - start) / parser.iteration:.5f}' ) if parser.model == 'resnet-50': data_shape = [1, 224, 224, 3] f = tf.keras.applications.ResNet50 execute(f, parser.model, data_shape) elif parser.model == 'resnet-18': data_shape = (224, 224, 3) execute(lambda: resnet.ResNet18(data_shape, 1000), parser.model, [1, 224, 224, 3]) elif parser.model == 'resnet-101': data_shape = (224, 224, 3) execute(lambda: resnet.ResNet101(data_shape, 1000), parser.model, [1, 224, 224, 3]) elif parser.model == 'alexnet': data_shape = (224, 224, 3) execute(lambda: alexnet.alexnet(), parser.model, [1, 224, 224, 3]) elif parser.model == 'vgg16': data_shape = [1, 224, 224, 3] f = tf.keras.applications.VGG16 execute(f, parser.model, data_shape) elif parser.model == 'vgg19': data_shape = [1, 224, 224, 3] f = tf.keras.applications.VGG19
def main(): parser = argparse.ArgumentParser( description='PyTorch Gambler\'s Loss Runner') parser.add_argument('--result_dir', type=str, help='directory to save result txt files', default='results') parser.add_argument('--gpu', type=int, help='gpu index', default=0) parser.add_argument('--noise_rate', type=float, help='corruption rate, should be less than 1', default=0.5) parser.add_argument('--noise_type', type=str, help='[pairflip, symmetric]', default='symmetric') parser.add_argument('--dataset', type=str, help='mnist, cifar10, or imdb', default='mnist') parser.add_argument('--n_epoch', type=int, default=10) parser.add_argument('--seed', type=int, default=1) parser.add_argument('--num_workers', type=int, default=4, help='how many subprocesses to use for data loading') parser.add_argument('--epoch_decay_start', type=int, default=80) parser.add_argument('--load_model', type=str, default="") parser.add_argument('--batch_size', type=int, default=128, metavar='N', help='input batch size for training (default: 128)') parser.add_argument( '--log-interval', type=int, default=100, metavar='N', help= 'how many batches to wait before logging training status (default: 100)' ) parser.add_argument('--lr', type=float, default=0.001, metavar='LR', help='learning rate (default: 0.001)') parser.add_argument('--start_method', type=int, help='number of epochs before starting method', default=0) # label smoothing args parser.add_argument('--smoothing', type=float, default=1.0, help='smoothing parameter (default: 1)') parser.add_argument('--optimal_smoothing', action='store_true', default=False) parser.add_argument('--scale_lr', type=float, default=0.0, help='exponent to scale learning rate by') parser.add_argument('--method', type=str, help='[nll, smoothing, fsmoothing]', default="nll") args = parser.parse_args() torch.manual_seed(args.seed) torch.cuda.manual_seed(args.seed) if args.dataset == 'mnist': input_channel = 1 num_classes = 10 train_dataset = MNIST(root='./data/', download=True, train=True, transform=transforms.ToTensor(), noise_type=args.noise_type, noise_rate=args.noise_rate) test_dataset = MNIST(root='./data/', download=True, train=False, transform=transforms.ToTensor(), noise_type=args.noise_type, noise_rate=args.noise_rate) print('loading dataset...') train_loader = torch.utils.data.DataLoader( dataset=train_dataset, batch_size=args.batch_size, num_workers=args.num_workers, drop_last=True, shuffle=True) test_loader = torch.utils.data.DataLoader(dataset=test_dataset, batch_size=args.batch_size, num_workers=args.num_workers, drop_last=True, shuffle=False) if args.dataset == 'cifar10': input_channel = 3 num_classes = 10 transform_train = transforms.Compose([ transforms.RandomCrop(32, padding=4), transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)), ]) transform_test = transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)), ]) train_dataset = CIFAR10(root='./data/', download=True, train=True, transform=transform_train, noise_type=args.noise_type, noise_rate=args.noise_rate) test_dataset = CIFAR10(root='./data/', download=True, train=False, transform=transform_test, noise_type=args.noise_type, noise_rate=args.noise_rate) print('loading dataset...') train_loader = torch.utils.data.DataLoader( dataset=train_dataset, batch_size=args.batch_size, num_workers=args.num_workers, drop_last=True, shuffle=True) test_loader = torch.utils.data.DataLoader(dataset=test_dataset, batch_size=args.batch_size, num_workers=args.num_workers, drop_last=True, shuffle=False) use_cuda = torch.cuda.is_available() device = torch.device("cuda:{}".format(args.gpu) if ( use_cuda and args.gpu >= 0) else "cpu") print("using {}".format(device)) args.lr *= ((0.9 / (args.smoothing - 0.1))**args.scale_lr) print("learning rate scaled to {}".format(args.lr)) print('building model...') if args.dataset == 'mnist': model = cnns.CNN_basic(num_classes=num_classes).to(device) optimizer = torch.optim.SGD(model.parameters(), lr=args.lr, momentum=0.9) if args.dataset == 'cifar10': model = resnet.ResNet18().to(device) optimizer = torch.optim.SGD(model.parameters(), lr=args.lr, momentum=0.9) test_accs = [] train_losses = [] test_losses = [] out = [] if args.optimal_smoothing: clean_rate = 1 - args.noise_rate args.smoothing = (1 - (2 * clean_rate) + clean_rate * clean_rate * num_classes) / (num_classes - 1) print("Using smoothing parameter {:.2f} for clean rate {:.2f}".format( args.smoothing, clean_rate)) name = "{}_{}_{}_{:.2f}_{:.2f}_{}".format(args.dataset, args.method, args.noise_type, args.smoothing, args.noise_rate, args.seed) if not os.path.exists(args.result_dir): os.system('mkdir -p %s' % args.result_dir) for epoch in range(1, args.n_epoch + 1): for param_group in optimizer.param_groups: print(epoch, param_group['lr']) print(name) train_loss = train(args, model, device, train_loader, optimizer, epoch, num_classes=num_classes, use_method=(epoch >= args.start_method), text=(args.dataset == 'imdb')) train_losses.append(train_loss) test_acc, test_loss = test(args, model, device, test_loader, num_classes, text=(args.dataset == 'imdb')) test_accs.append(test_acc) test_losses.append(test_loss) save_data = { "command": " ".join(sys.argv), "train_loss": train_losses, "test_loss": test_losses, "test_acc": test_accs } save_file = args.result_dir + "/" + name + ".json" json.dump(save_data, open(save_file, 'w'))
def train_baseline_KD(self): if self.params.model_version == 'resnet18': model = resnet.ResNet18().cuda( ) if self.params.cuda else resnet.ResNet18() elif self.params.model_version == 'alexnet': model = alexnet.AlexNet().cuda( ) if self.params.cuda else alexnet.AlexNet() optimizer = optim.SGD(model.parameters(), lr=self.params.learning_rate, momentum=0.9, weight_decay=5e-4) loss_function_KD = utils.loss_function_kd metrics = utils.metrics teacher_model = resnet.ResNet18() teacher_checkpoint = 'experiments/baseline_standalone_resnet18/best.pth.tar' teacher_model = teacher_model.cuda( ) if self.params.cuda else teacher_model utils.load_checkpoint(teacher_checkpoint, teacher_model) # Train the model with KD logging.info("Experiment - model version: {}".format( self.params.model_version)) logging.info("Starting training for {} epoch(s)".format( self.params.num_epochs)) logging.info( "First, loading the teacher model and computing its outputs...") best_valid_acc = 0.0 scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=150, gamma=0.1) for epoch in range(self.params.num_epochs): logging.info("Epoch {}/{}".format(epoch + 1, self.params.num_epochs)) train_metrics = baseline_KD.train_kd(model, teacher_model, optimizer, loss_function_KD, self.trainloader, metrics, self.params) scheduler.step() valid_metrics = baseline_KD.evaluate_kd(model, teacher_model, loss_function_KD, self.testloader, metrics, self.params) valid_acc = valid_metrics['accuracy'] is_best = valid_acc >= best_valid_acc # Record experiment results with open(self.model_dir + "/result.csv", 'a') as f: writer = csv.writer(f) row = [ train_metrics['loss'], train_metrics['accuracy'], valid_metrics['loss'], valid_metrics['accuracy'] ] writer.writerow(row) # Save weights utils.save_checkpoint( { 'epoch': epoch + 1, 'state_dict': model.state_dict(), 'optim_dict': optimizer.state_dict() }, is_best=is_best, checkpoint=self.model_dir) # If best_eval, best_save_path if is_best: logging.info("- Found new best accuracy") best_valid_acc = valid_acc # Save best valid metrics in a JSON file in the model directory best_json_path = os.path.join( self.model_dir, "metrics_valid_best_weights.json") utils.save_dict_to_json(valid_metrics, best_json_path) # Save latest valid metrics in a JSON file in the model directory last_json_path = os.path.join(self.model_dir, "metrics_valid_last_weights.json") utils.save_dict_to_json(valid_metrics, last_json_path)
'sm_type'] except KeyError as err: print("Key Error: {0}".format(err)) print('\nProbably old cp has no info regarding classifiers arch!\n') n_hidden, hidden_size, softmax = get_classifier_config_from_cp(ckpt) dropout_prob = args.dropout_prob if args.model == 'vgg': model = vgg.VGG('VGG16', nh=n_hidden, n_h=hidden_size, dropout_prob=dropout_prob, sm_type=softmax) elif args.model == 'resnet': model = resnet.ResNet18(nh=n_hidden, n_h=hidden_size, dropout_prob=dropout_prob, sm_type=softmax) elif args.model == 'densenet': model = densenet.densenet_cifar(nh=n_hidden, n_h=hidden_size, dropout_prob=dropout_prob, sm_type=softmax) try: model.load_state_dict(ckpt['model_state'], strict=True) except RuntimeError as err: print("Runtime Error: {0}".format(err)) except: print("Unexpected error:", sys.exc_info()[0]) raise
download=True, transform=transform_test) valid_loader = torch.utils.data.DataLoader(validset, batch_size=args.valid_batch_size, shuffle=False, num_workers=args.n_workers) if args.model == 'vgg': model = vgg.VGG('VGG16', nh=args.n_hidden, n_h=args.hidden_size, dropout_prob=args.dropout_prob, sm_type=args.softmax) elif args.model == 'resnet': model = resnet.ResNet18(nh=args.n_hidden, n_h=args.hidden_size, dropout_prob=args.dropout_prob, sm_type=args.softmax) elif args.model == 'densenet': model = densenet.densenet_cifar(nh=args.n_hidden, n_h=args.hidden_size, dropout_prob=args.dropout_prob, sm_type=args.softmax) if args.verbose > 0: print(model) if args.cuda: device = get_freer_gpu() model = model.cuda(device) optimizer = optim.SGD(model.parameters(),
image_mode=mode1, label_mode=mode2) training_loader = DataLoader(training_dataset, BATCH_SIZE, shuffle=True, pin_memory=True) testing_dataset = CIFAR10(root, train=False, transform=img_transforms) testing_loader = DataLoader(testing_dataset, BATCH_SIZE, shuffle=False, pin_memory=True) loaders = {'train': training_loader, 'test': testing_loader} resnet18 = resnet.ResNet18() vgg16 = vgg.VGG('VGG16') alex = alexnet.alexnet() inception = inceptions.GoogLeNet() exec('model={}'.format(model_name)) if use_gpu: model = model.cuda() criterion = nn.CrossEntropyLoss() # optimizer = optim.SGD(model.parameters(), lr=lr, momentum=0.9, weight_decay=weight_decay) # exp_lr_scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=1, gamma=0.95) optimizer = optim.SGD(model.parameters(), lr=lr) exp_lr_scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=1,
diff = org_labels - pred_labels.argmax(1) diff[diff != 0] = 1 total_error = 100 * np.sum(diff) / N return total_error, np.argwhere(diff != 0)[:, 1] # ====================================================================================================================== if __name__ == '__main__': data_dir_ = "data/attacked/cifar/%s/p%d" % (args.model, args.pixels) if args.model == "vgg16": net = modelvgg.VGG('VGG16') elif args.model == "resnet18": net = modelresnet.ResNet18() # load model net.load_state_dict( torch.load('checkpoints/%s/%s_%d.pth' % (args.model, args.model, args.epoch), map_location=lambda storage, loc: storage)) if use_cuda: net.cuda() else: net.cpu() cudnn.benchmark = True net.eval() # ---------------------------------------------------------------------------------------------------------
transform_test = transforms.Compose([ transforms.ToTensor(), transforms.Normalize([x / 255 for x in [125.3, 123.0, 113.9]], [x / 255 for x in [63.0, 62.1, 66.7]]) ]) validset = datasets.CIFAR10(root=args.data_path, train=False, download=True, transform=transform_test) labels_list = [x[1] for x in validset] if args.model == 'vgg': model = vgg.VGG('VGG16', sm_type=args.softmax) elif args.model == 'resnet': model = resnet.ResNet18(sm_type=args.softmax) elif args.model == 'densenet': model = densenet.densenet_cifar(sm_type=args.softmax) cp_list = glob.glob(args.cp_path + '*.pt') idxs_enroll, idxs_test, labels = create_trials_labels(labels_list) print('\n{} trials created out of which {} are target trials'.format( len(idxs_enroll), np.sum(labels))) best_model, best_eer = None, float('inf') for cp in cp_list: ckpt = torch.load(cp, map_location=lambda storage, loc: storage) try:
metavar='N', help='maximum number of frames per utterance (default: 1)') parser.add_argument('--softmax', choices=['softmax', 'am_softmax'], default='softmax', help='Softmax type') args = parser.parse_args() if args.model == 'vgg': model = vgg.VGG('VGG16', nh=args.n_hidden, n_h=args.hidden_size, sm_type=args.softmax) elif args.model == 'resnet': model = resnet.ResNet18(nh=args.n_hidden, n_h=args.hidden_size, sm_type=args.softmax) elif args.model == 'densenet': model = densenet.densenet_cifar(nh=args.n_hidden, n_h=args.hidden_size, sm_type=args.softmax) cp_list = glob.glob(args.cp_path + '*.pt') assert len(cp_list) > 0, 'No cp found in the given path!' for cp in cp_list: ckpt = torch.load(cp, map_location=lambda storage, loc: storage) try: model.load_state_dict(ckpt['model_state'], strict=True)
dataloaders = {'train': train_loader, 'test': test_loader} for cycle in range(CYCLES): # Randomly sample 10000 unlabeled data points if not args.total: random.shuffle(unlabeled_set) subset = unlabeled_set[:SUBSET] # Model - create new instance for every cycle so that it resets with torch.cuda.device(CUDA_VISIBLE_DEVICES): if args.dataset == "fashionmnist": resnet18 = resnet.ResNet18fm(num_classes=NO_CLASSES).cuda() else: #resnet18 = vgg11().cuda() resnet18 = resnet.ResNet18(num_classes=NO_CLASSES).cuda() if method == 'lloss': #loss_module = LossNet(feature_sizes=[16,8,4,2], num_channels=[128,128,256,512]).cuda() loss_module = LossNet().cuda() models = {'backbone': resnet18} if method =='lloss': models = {'backbone': resnet18, 'module': loss_module} torch.backends.cudnn.benchmark = True # Loss, criterion and scheduler (re)initialization criterion = nn.CrossEntropyLoss(reduction='none') optim_backbone = optim.SGD(models['backbone'].parameters(), lr=LR, momentum=MOMENTUM, weight_decay=WDECAY) sched_backbone = lr_scheduler.MultiStepLR(optim_backbone, milestones=MILESTONES)
def train(lr, l2, momentum, smoothing, patience, model, n_hidden, hidden_size, dropout_prob, epochs, batch_size, valid_batch_size, n_workers, cuda, data_path, valid_data_path, checkpoint_path, softmax): cp_name = get_cp_name(checkpoint_path) transform_train = transforms.Compose([ transforms.RandomCrop(32, padding=4), transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize([x / 255 for x in [125.3, 123.0, 113.9]], [x / 255 for x in [63.0, 62.1, 66.7]]) ]) transform_test = transforms.Compose([ transforms.ToTensor(), transforms.Normalize([x / 255 for x in [125.3, 123.0, 113.9]], [x / 255 for x in [63.0, 62.1, 66.7]]) ]) #trainset = Loader(data_path) trainset = datasets.CIFAR10(root='./data', train=True, download=True, transform=transform_train) train_loader = torch.utils.data.DataLoader( trainset, batch_size=batch_size, shuffle=True, num_workers=n_workers, worker_init_fn=set_np_randomseed) #validset = Loader(valid_data_path) validset = datasets.CIFAR10(root='./data', train=False, download=True, transform=transform_test) valid_loader = torch.utils.data.DataLoader(validset, batch_size=valid_batch_size, shuffle=True, num_workers=n_workers) if model == 'vgg': model_ = vgg.VGG('VGG16', nh=int(n_hidden), n_h=int(hidden_size), dropout_prob=dropout_prob, sm_type=softmax) elif model == 'resnet': model_ = resnet.ResNet18(nh=int(n_hidden), n_h=int(hidden_size), dropout_prob=dropout_prob, sm_type=softmax) elif model == 'densenet': model_ = densenet.densenet_cifar(nh=int(n_hidden), n_h=int(hidden_size), dropout_prob=dropout_prob, sm_type=softmax) if args.cuda: device = get_freer_gpu() model_ = model_.cuda(device) optimizer = optim.SGD(model_.parameters(), lr=lr, weight_decay=l2, momentum=momentum) trainer = TrainLoop(model_, optimizer, train_loader, valid_loader, patience=int(patience), label_smoothing=smoothing, verbose=-1, cp_name=cp_name, save_cp=True, checkpoint_path=checkpoint_path, cuda=cuda) for i in range(5): print(' ') print('Hyperparameters:') print('Selected model: {}'.format(model)) print('Hidden layer size size: {}'.format(int(hidden_size))) print('Number of hidden layers: {}'.format(int(n_hidden))) print('Dropout rate: {}'.format(dropout_prob)) print('Batch size: {}'.format(batch_size)) print('LR: {}'.format(lr)) print('Momentum: {}'.format(momentum)) print('l2: {}'.format(l2)) print('Label smoothing: {}'.format(smoothing)) print('Patience: {}'.format(patience)) print('Softmax Mode is: {}'.format(softmax)) print(' ') if i > 0: print(' ') print('Trial {}'.format(i + 1)) print(' ') try: cost = trainer.train(n_epochs=epochs, save_every=epochs + 10) print(' ') print('Best e2e EER in file ' + cp_name + ' was: {}'.format(cost[0])) print('Best cos EER in file ' + cp_name + ' was: {}'.format(cost[1])) print('Best Error Rate in file ' + cp_name + ' was: {}'.format(cost[2])) print(' ') return cost[0] except: print("Error:", sys.exc_info()) pass print('Returning dummy cost due to failures while training.') return 0.99
transform_test = transforms.Compose([ transforms.ToTensor(), transforms.Normalize([x / 255 for x in [125.3, 123.0, 113.9]], [x / 255 for x in [63.0, 62.1, 66.7]]) ]) validset = datasets.CIFAR10(root=args.data_path, train=False, download=True, transform=transform_test) labels_list = [x[1] for x in validset] if args.model == 'vgg': model = vgg.VGG('VGG16') elif args.model == 'resnet': model = resnet.ResNet18() elif args.model == 'densenet': model = densenet.densenet_cifar() ckpt = torch.load(args.cp_path, map_location=lambda storage, loc: storage) try: model.load_state_dict(ckpt['model_state'], strict=True) except RuntimeError as err: print("Runtime Error: {0}".format(err)) except: print("Unexpected error:", sys.exc_info()[0]) raise if args.cuda: device = get_freer_gpu() model = model.cuda(device)
def train_baseline_standalone(self): if self.params.model_version == 'resnet18': model = resnet.ResNet18().cuda( ) if self.params.cuda else resnet.ResNet18() elif self.params.model_version == 'alexnet': model = alexnet.AlexNet().cuda( ) if self.params.cuda else alexnet.AlexNet() optimizer = optim.SGD(model.parameters(), lr=self.params.learning_rate, momentum=0.9, weight_decay=5e-4) loss_function = utils.loss_function metrics = utils.metrics # Train the model logging.info("Starting training for {} epoch(s)".format( self.params.num_epochs)) best_valid_acc = 0 if self.params.model_version == "resnet18": scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=150, gamma=0.1) elif self.params.model_version == "alexnet": scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=100, gamma=0.2) for epoch in range(self.params.num_epochs): logging.info("Epoch {}/{}".format(epoch + 1, self.params.num_epochs)) train_metrics = baseline_standalone.train(model, optimizer, loss_function, self.trainloader, metrics, self.params) scheduler.step() valid_metrics = baseline_standalone.evaluate( model, loss_function, self.testloader, metrics, self.params) valid_acc = valid_metrics['accuracy'] is_best = valid_acc >= best_valid_acc # Record experiment results with open(self.model_dir + "/result.csv", 'a') as f: writer = csv.writer(f) row = [ train_metrics['loss'], train_metrics['accuracy'], valid_metrics['loss'], valid_metrics['accuracy'] ] writer.writerow(row) # Save weights utils.save_checkpoint( { 'epoch': epoch + 1, 'state_dict': model.state_dict(), 'optim_dict': optimizer.state_dict() }, is_best=is_best, checkpoint=self.model_dir) if is_best: logging.info("- Found new best accuracy") best_valid_acc = valid_acc # Save best validation metrics in a json file in the model directory best_json_path = os.path.join( self.model_dir, "metrics_valid_best_weights.json") utils.save_dict_to_json(valid_metrics, best_json_path) # Save latest valid metrics in a json file in the model directory last_json_path = os.path.join(self.model_dir, "metrics_valid_last_weights.json") utils.save_dict_to_json(valid_metrics, last_json_path)