def test(): print('Start to test...') ctx = [mx.gpu(int(i)) for i in args.gpus.split(',')] if args.gpus != '-1' else mx.cpu() _, test_iter = data_loader(args.batch_size) model = LeNetPlus() model.load_params(os.path.join(args.ckpt_dir, args.prefix + '-best.params'), ctx=ctx) start_time = timeit.default_timer() test_accuracy, features, predictions, labels = evaluate_accuracy( test_iter, model, ctx) elapsed_time = timeit.default_timer() - start_time print("Test_acc: %s, Elapsed_time: %f s" % (test_accuracy, elapsed_time)) if not os.path.exists(args.out_dir): os.makedirs(args.out_dir) if args.plotting: plot_features(features, labels, num_classes=args.num_classes, fpath=os.path.join(args.out_dir, '%s.png' % args.prefix))
def evaluate(model, criterion_cent, testloader, use_gpu, num_classes, epoch, args): model.eval() correct, total = 0, 0 if args.plot: all_features, all_labels = [], [] with torch.no_grad(): for data, labels in testloader: if use_gpu: data, labels = data.cuda(), labels.cuda() outputs = model(data) features = model.extract(data) predictions = outputs.data.max(1)[1] total += labels.size(0) correct += (predictions == labels.data).sum() if args.plot: if use_gpu: all_features.append(features.data.cpu().numpy()) all_labels.append(labels.data.cpu().numpy()) else: all_features.append(features.data.numpy()) all_labels.append(labels.data.numpy()) if args.plot: all_features = np.concatenate(all_features, 0) all_labels = np.concatenate(all_labels, 0) weights = model.classifier.weight.data.cpu().numpy() centers = criterion_cent.centers.data.cpu().numpy() plot_features(all_features, weights, centers, all_labels, num_classes, epoch, prefix='test', args=args) acc = correct * 100. / total err = 100. - acc return acc, err
def train(model, optimizer_model, trainloader, use_gpu, num_classes, epoch, args): model.train() xent_losses = AverageMeter() cent_losses = AverageMeter() losses = AverageMeter() if args.plot: all_features, all_labels = [], [] for batch_idx, (data, labels) in enumerate(trainloader): if use_gpu: data, labels = data.cuda(), labels.cuda() distmat, features = model.forward(data) loss_cent = model.center_loss(features, labels) loss_xent = F.cross_entropy(-model.scale_factor * distmat, labels) if epoch < 10: weight_loss = 0.0 else: weight_loss = 0.01 weight_loss = 0.1 loss = loss_xent + weight_loss * loss_cent optimizer_model.zero_grad() loss.backward() optimizer_model.step() losses.update(loss.item(), labels.size(0)) xent_losses.update(loss_xent.item(), labels.size(0)) cent_losses.update(loss_cent.item(), labels.size(0)) if args.plot: if use_gpu: all_features.append(features.data.cpu().numpy()) all_labels.append(labels.data.cpu().numpy()) else: all_features.append(features.data.numpy()) all_labels.append(labels.data.numpy()) if (batch_idx + 1) % args.print_freq == 0: print("Batch {}/{}\t Loss {:.6f} ({:.6f}) XentLoss {:.6f} ({:.6f}) CenterLoss {:.6f} ({:.6f})" \ .format(batch_idx + 1, len(trainloader), losses.val, losses.avg, xent_losses.val, xent_losses.avg, cent_losses.val, cent_losses.avg)) if args.plot: # weights = model.classifier.weight.data.cpu().numpy() weights = None centers = model.protos.data.cpu().numpy() all_features = np.concatenate(all_features, 0) all_labels = np.concatenate(all_labels, 0) plot_features(all_features, weights, centers, all_labels, num_classes, epoch, prefix='train', args=args)
def main(): # Parsing arguments parser = argparse.ArgumentParser(description='Training VAEs') parser.add_argument('--model', type=str, default='conditional_vae') parser.add_argument('--dataset', type=str, default='mnist') parser.add_argument('--epoch', type=int, default=10) parser.add_argument('--batch_size', type=int, default=100) parser.add_argument('--output', default='output') parser.add_argument('--zdims', type=int, default=256) parser.add_argument('--gpu', type=int, default=1) parser.add_argument('--resume', type=str, default=None) parser.add_argument('--testmode', action='store_true') args = parser.parse_args() # select gpu os.environ['CUDA_VISIBLE_DEVICES'] = str(args.gpu) # Make output direcotiry if not exists if not os.path.isdir(args.output): os.mkdir(args.output) # Load datasets if args.dataset == 'mnist': (x_train, y_train), (x_test, y_test) = load_mnist() # Construct model if args.model not in models: raise Exception('Unknown model:', args.model) model = models[args.model]() print(args.model) if args.model in ['conditional_vae']: # need labels model.fit([x_train, y_train], num_epochs=args.epoch, batch_size=args.batch_size) # display a 2D plot of the digit classes in the latent space z_test = model.extract_features(x_test, y_test, batch_size=100) plot_features(z_test, y_test) model.plot() else: # vae, aae, concrete_vae, sparse_ae model.fit(x_train, num_epochs=args.epoch, batch_size=args.batch_size) # display a 2D plot of the digit classes in the latent space z_test = model.extract_features(x_test, batch_size=100) plot_features(z_test, y_test)
def train(model, criterion_xent, criterion_cent, optimizer_model, optimizer_centloss, trainloader, use_gpu, num_classes, epoch, args): model.train() xent_losses = AverageMeter() cent_losses = AverageMeter() losses = AverageMeter() if args.plot: all_features, all_labels = [], [] for batch_idx, (data, labels) in enumerate(trainloader): if use_gpu: data, labels = data.cuda(), labels.cuda() outputs = model(data) features = model.extract(data) loss_xent = criterion_xent(outputs, labels) loss_cent = criterion_cent(features, labels) loss = loss_xent + args.weight_cent * loss_cent optimizer_model.zero_grad() optimizer_centloss.zero_grad() loss.backward() optimizer_model.step() # by doing so, weight_cent would not impact on the learning of centers # for param in criterion_cent.parameters(): # param.grad.data *= (1. / args.weight_cent) optimizer_centloss.step() losses.update(loss.item(), labels.size(0)) xent_losses.update(loss_xent.item(), labels.size(0)) cent_losses.update(loss_cent.item(), labels.size(0)) if args.plot: if use_gpu: all_features.append(features.data.cpu().numpy()) all_labels.append(labels.data.cpu().numpy()) else: all_features.append(features.data.numpy()) all_labels.append(labels.data.numpy()) if (batch_idx + 1) % args.print_freq == 0: print("Batch {}/{}\t Loss {:.6f} ({:.6f}) XentLoss {:.6f} ({:.6f}) CenterLoss {:.6f} ({:.6f})" \ .format(batch_idx + 1, len(trainloader), losses.val, losses.avg, xent_losses.val, xent_losses.avg, cent_losses.val, cent_losses.avg)) if args.plot: # weights = model.classifier.weight.data.cpu().numpy() weights = None centers = criterion_cent.centers.data.cpu().numpy() all_features = np.concatenate(all_features, 0) all_labels = np.concatenate(all_labels, 0) plot_features(all_features, weights, centers, all_labels, num_classes, epoch, prefix='train', args=args)
def main(): #定义网络 net = models.LeNetWithAngle(classes_num) if use_gpu: net = net.cuda() #定义优化器 optimizer = torch.optim.SGD(net.parameters(), lr=model_lr, weight_decay=1e-5, nesterov=True, momentum=0.9) print("net and optimzer load succeed") #定义数据加载 trainloader, testloader = dataloader.get_loader(batch_size=batch_size, root_path="./data/MNIST") print("data load succeed") #定义logger logger = utils.Logger(tb_path="./logs/tblog/") #定义学习率调整器 scheduler = lr_sche.StepLR(optimizer, 30, 0.1) #定义损失函数 criterion = a_softmax.AngleSoftmaxLoss(gamma=0) best_acc = 0 #开始训练 for i in range(1, epochs + 1): scheduler.step(epoch=i) net.train() train_acc,train_loss,all_feature,all_labels=\ train(net,optimizer,criterion,trainloader,i) utils.plot_features(all_feature, all_labels, classes_num, i, "./logs/images/train/train_{}.png") net.eval() test_acc, test_loss, all_feature, all_labels = test( net, criterion, testloader, i) utils.plot_features(all_feature, all_labels, classes_num, i, "./logs/images/test/test_{}.png") print("{} epoch end, train acc is {:.4f}, test acc is {:.4f}".format( i, train_acc, test_acc)) content = { "Train/acc": train_acc, "Test/acc": test_acc, "Train/loss": train_loss, "Test/loss": test_loss } logger.log(step=i, content=content) if best_acc < test_acc: best_acc = test_acc utils.save_checkpoints("./logs/weights/net_{}.pth",i,\ net.state_dict(),(best_acc==test_acc)) utils.make_gif("./logs/images/train/", "./logs/train.gif") utils.make_gif("./logs/images/test/", "./logs/test.gif") print("Traing finished...")
def train(model, classifier, criterion, optimizer, trainloader, use_gpu, num_classes, epoch, args): model.train() losses = AverageMeter() if args.plot: all_features, all_labels = [], [] for batch_idx, (data, labels) in enumerate(trainloader): if use_gpu: data, labels = data.cuda(), labels.cuda() features = model(data) outputs, _ = classifier(features, labels) loss = criterion(outputs, labels) optimizer.zero_grad() loss.backward() optimizer.step() losses.update(loss.item(), labels.size(0)) if args.plot_normalized: features = l2_norm(features) if args.plot: if use_gpu: all_features.append(features.data.cpu().numpy()) all_labels.append(labels.data.cpu().numpy()) else: all_features.append(features.data.numpy()) all_labels.append(labels.data.numpy()) if (batch_idx + 1) % args.print_freq == 0: print("Batch {}/{}\t Loss {:.6f} ({:.6f})" .format(batch_idx + 1, len(trainloader), losses.val, losses.avg)) if args.plot: weights = None centers = classifier.weight.data if args.plot_normalized: centers = l2_norm(classifier.weight.data) all_features = np.concatenate(all_features, 0) all_labels = np.concatenate(all_labels, 0) plot_features(all_features, weights, centers.cpu().numpy(), all_labels, num_classes, epoch, prefix='train', args=args)
def train(model, optimizer_model, trainloader, use_gpu, num_classes, epoch, args): model.train() losses = AverageMeter() if args.plot: all_features, all_labels = [], [] for batch_idx, (data, labels) in enumerate(trainloader): if use_gpu: data, labels = data.cuda(), labels.cuda() distmat, features = model(data) distance_loss = deep_lvq_loss(distmat, labels) intra_class_loss = model.center_loss(features, labels) loss = distance_loss + 0.1 * intra_class_loss optimizer_model.zero_grad() loss.backward() optimizer_model.step() losses.update(loss.item(), labels.size(0)) if args.plot: if use_gpu: all_features.append(features.data.cpu().numpy()) all_labels.append(labels.data.cpu().numpy()) else: all_features.append(features.data.numpy()) all_labels.append(labels.data.numpy()) if (batch_idx + 1) % args.print_freq == 0: print("Batch {}/{}\t Loss {:.6f} ({:.6f})".format( batch_idx + 1, len(trainloader), losses.val, losses.avg)) if args.plot: # weights = model.classifier.weight.data.cpu().numpy() weights = None centers = model.protos.data.cpu().numpy() all_features = np.concatenate(all_features, 0) all_labels = np.concatenate(all_labels, 0) plot_features(all_features, weights, centers, all_labels, num_classes, epoch, prefix='train', args=args)
def train(): mnist_set = gluon.data.vision.MNIST(train=True, transform=transform) test_mnist_set = gluon.data.vision.MNIST(train=False, transform=transform) data = [] label = [] for i in range(len(mnist_set)): data.append(mnist_set[i][0][np.newaxis, :, :, :]) label.append(mnist_set[i][1][np.newaxis, ]) data = np.concatenate(data, axis=0) label = np.concatenate(label, axis=0) full_set = (data, label) ctx = mx.gpu(0) model = LeNetPlus(normalize=arg.normalize) model.hybridize() model.initialize(mx.init.Xavier(magnitude=2.24), ctx=ctx) train_iter = RangeLossDataLoader(full_set, arg.num_class, arg.num_in_class, 15000) test_iter = mx.gluon.data.DataLoader(test_mnist_set, 500, shuffle=False) softmax_cross_entropy = gluon.loss.SoftmaxCrossEntropyLoss() Range_loss = RangeLoss(arg.alpha, arg.beta, arg.topk, arg.num_class, arg.num_in_class, 2, arg.margin) Range_loss.initialize(mx.init.Xavier(), ctx=ctx) trainer = gluon.Trainer(model.collect_params(), optimizer='adam', optimizer_params={ 'learning_rate': arg.lr, 'wd': 5e-4 }) for i, (data, label) in enumerate(train_iter): data = nd.array(data, ctx=ctx) label = nd.array(label, ctx=ctx) with autograd.record(): output, features = model(data) softmax_loss = softmax_cross_entropy(output, label) range_loss = Range_loss(features, label) loss = softmax_loss + range_loss loss.backward() trainer.step(data.shape[0]) if ((i + 1) % 3000 == 0): test_accuracy, test_ft, _, test_lb = evaluate_accuracy( test_iter, model, ctx) print(test_accuracy) plot_features(test_ft, test_lb)
def test(): """ Test model accuracy on test dataset. 测试模型在测试集上的准确率。 """ print("Start to test...") ctx = mx.gpu() if args.use_gpu else mx.cpu() _, test_iter = data_loader(args.batch_size) model = LeNetPlus() model.load_parameters(os.path.join(args.ckpt_dir, args.prefix + "-best.params"), ctx=ctx, allow_missing=True) # center_net = CenterLoss(num_classes=args.num_classes, feature_size=args.feature_size, lmbd=args.lmbd, ctx=mx.cpu()) center_net.load_parameters(os.path.join( args.ckpt_dir, args.prefix + "-feature_matrix.params"), ctx=ctx) start_time = time.time() test_accuracy, features, predictions, labels = evaluate_accuracy( test_iter, model, center_net, args.eval_method, ctx) elapsed_time = time.time() - start_time print("Test_acc: %s, Elapsed_time: %f s" % (test_accuracy, elapsed_time)) # make directory if not os.path.exists(args.out_dir): os.makedirs(args.out_dir) # draw feature map if args.plotting: plot_features(features, labels, num_classes=args.num_classes, fpath=os.path.join(args.out_dir, "%s.png" % args.prefix))
def main(args): class_names = { 0: 'airplane', 1: 'automobile', 2: 'bird', 3: 'cat', 4: 'deer', 5: 'dog', 6: 'frog', 7: 'horse', 8: 'ship', 9: 'truck' } num_epochs = args.num_epochs target_class = args.target_class anomal_classes = args.anomal_classes num_anomal_classes = len(anomal_classes) anomal_classes_names = ','.join([class_names[i] for i in anomal_classes]) gamma = args.gamma normal_subset_size = args.normal_subset_size anomal_subset_size = args.anomal_subset_size lr = args.lr feature_extractor_version = args.feature_extractor_version dir_path = args.dir_path balanced_dataset_size = anomal_subset_size one_class_dataset_size = normal_subset_size - anomal_subset_size file_name_str = f"results_{num_epochs}_{num_anomal_classes}_{gamma}_{balanced_dataset_size}_{one_class_dataset_size}_{lr}" dir_path_str = f"{dir_path}/{file_name_str}" if not Path(dir_path_str).exists(): os.makedirs(dir_path_str) device = 'cuda' if torch.cuda.is_available() else 'cpu' print('Using {} device'.format(device)) train_set, validation_set, test_set = getCifarSmallImbalancedDatasets( target_class, anomal_classes, num_anomal_classes=num_anomal_classes, normal_subset_size=normal_subset_size, anomal_subset_size=anomal_subset_size) train_dataloader = torch.utils.data.DataLoader(train_set, batch_size=20, shuffle=True, num_workers=2) validation_dataloader = torch.utils.data.DataLoader(validation_set, batch_size=20, shuffle=False, num_workers=2) test_dataloader = torch.utils.data.DataLoader(test_set, batch_size=len(test_set), shuffle=False, num_workers=2) model = build_model() model.to(device) with open(f'{dir_path_str}/parameters.txt', 'w') as writefile: writefile.write(f"epochs: {num_epochs}\n") writefile.write(f"target class: {class_names[target_class]}\n") writefile.write(f"anomal classes: {anomal_classes_names}\n") writefile.write(f"gamma: {gamma}\n") writefile.write(f"balanced dataset size: {balanced_dataset_size}*2\n") writefile.write(f"one-class dataset size: {one_class_dataset_size}\n") writefile.write(f"learning rate: {lr}") writefile.write( f"feature extractor version: {feature_extractor_version}") log_file = open(f"{dir_path_str}/log.txt", "a") train_losses, train_accuracies, validation_losses, validation_accuracies = train_model( device, train_dataloader, validation_dataloader, model, log_file, num_epochs=num_epochs, lr=lr, gamma=gamma) print('Done training.') print('===================================\nTest results:') test_loop(device, test_dataloader, model, torch.nn.CrossEntropyLoss(), log_file) log_file.close() plot_results(train_losses, train_accuracies, validation_losses, validation_accuracies, dir_path_str) plot_features(device, model, test_dataloader, dir_path_str)
def second_train(): """ Train a model using only center loss based on pretrained model. In order to avoid feature matrix becoming zero matrix, fix CenterLoss' parameters not to train it. 基于之前训练的模型,仅使用center loss对模型进行训练。 为了避免在训练的过程,CenterLoss中的特征矩阵变为0矩阵,将其参数固定不对其进行训练。 """ print("Start to train LeNet++ with CenterLoss...") train_iter, test_iter = data_loader(args.batch_size) ctx = mx.gpu() if args.use_gpu else mx.cpu() # main model (LeNetPlus), loss, trainer model = LeNetPlus(classes=args.num_classes, feature_size=args.feature_size) model.load_parameters(os.path.join(args.ckpt_dir, args.prefix + "-best.params"), ctx=ctx, allow_missing=True) network_trainer = gluon.Trainer(model.collect_params(), optimizer="sgd", optimizer_params={ "learning_rate": args.lr, "wd": args.wd }) center_loss = CenterLoss(num_classes=args.num_classes, feature_size=args.feature_size, lmbd=args.lmbd, ctx=ctx) center_loss.load_parameters(os.path.join( args.ckpt_dir, args.prefix + "-feature_matrix.params"), ctx=ctx) center_loss.params.setattr("grad_req", "null") # smoothing_constant, moving_loss = .01, 0.0 best_acc = 0.0 for epoch in range(args.epochs): # using learning rate decay during training process if (epoch > 0) and (epoch % args.lr_step == 0): network_trainer.set_learning_rate(network_trainer.learning_rate * args.lr_factor) start_time = time.time() for i, (data, label) in enumerate(train_iter): data = data.as_in_context(ctx) label = label.as_in_context(ctx) # only calculate loss with center loss with autograd.record(): output, features = model(data) loss = center_loss(features, label) loss.backward() # only update parameters of LeNet++ network_trainer.step( args.batch_size, ignore_stale_grad=True ) # without ignore_stale_grad=True, it will raise warning information # 去除ignore_stale_grad=True后,模型训练会出错,梯度无法进行更新 # calculate smoothed loss value curr_loss = nd.mean(loss).asscalar() moving_loss = (curr_loss if ((i == 0) and (epoch == 0)) else (1 - smoothing_constant) * moving_loss + smoothing_constant * curr_loss) # training cost time elapsed_time = time.time() - start_time train_accuracy, train_ft, _, train_lb = evaluate_accuracy( train_iter, model, center_loss, args.eval_method, ctx) test_accuracy, test_ft, _, test_lb = evaluate_accuracy( test_iter, model, center_loss, args.eval_method, ctx) # draw feature map with different prefix, to make it convenient to compare features if args.plotting: plot_features(train_ft, train_lb, num_classes=args.num_classes, fpath=os.path.join( args.out_dir, "%s-second-train-epoch-%d.png" % (args.prefix, epoch))) plot_features(test_ft, test_lb, num_classes=args.num_classes, fpath=os.path.join( args.out_dir, "%s-second-test-epoch-%d.png" % (args.prefix, epoch))) logging.warning( "Epoch [%d]: Loss=%f, Train-Acc=%f, Test-Acc=%f, Epoch-time=%f" % (epoch, moving_loss, train_accuracy, test_accuracy, elapsed_time)) if test_accuracy > best_acc: best_acc = test_accuracy model.save_parameters( os.path.join(args.ckpt_dir, args.prefix + "-second-best.params"))
def train(): """ train model using softmax loss or softmax loss/center loss. 训练模型。 """ print("Start to train...") if not os.path.exists(args.ckpt_dir): os.makedirs(args.ckpt_dir) train_iter, test_iter = data_loader(args.batch_size) ctx = mx.gpu() if args.use_gpu else mx.cpu() # main model (LeNetPlus), loss, trainer model = LeNetPlus(classes=args.num_classes, feature_size=args.feature_size) model.hybridize() model.initialize(mx.init.Xavier(magnitude=2.24), ctx=ctx) softmax_cross_entropy = gluon.loss.SoftmaxCrossEntropyLoss() network_trainer = gluon.Trainer(model.collect_params(), optimizer="sgd", optimizer_params={ "learning_rate": args.lr, "wd": args.wd }) # "momentum": 0.9, # center loss network and trainer if args.center_loss: center_loss = CenterLoss(num_classes=args.num_classes, feature_size=args.feature_size, lmbd=args.lmbd, ctx=ctx) center_loss.initialize(mx.init.Xavier(magnitude=2.24), ctx=ctx) # 包含了一个center矩阵,因此需要进行初始化 center_trainer = gluon.Trainer( center_loss.collect_params(), optimizer="sgd", optimizer_params={"learning_rate": args.alpha}) else: center_loss, center_trainer = None, None smoothing_constant, moving_loss = .01, 0.0 best_acc = 0.0 for epoch in range(args.epochs): # using learning rate decay during training process if (epoch > 0) and (epoch % args.lr_step == 0): network_trainer.set_learning_rate(network_trainer.learning_rate * args.lr_factor) if args.center_loss: center_trainer.set_learning_rate(center_trainer.learning_rate * args.lr_factor) start_time = time.time() for i, (data, label) in enumerate(train_iter): data = data.as_in_context(ctx) label = label.as_in_context(ctx) with autograd.record(): output, features = model(data) loss_softmax = softmax_cross_entropy(output, label) # cumpute loss according to user"s choice if args.center_loss: loss_center = center_loss(features, label) loss = loss_softmax + loss_center else: loss = loss_softmax # update 更新参数 loss.backward() network_trainer.step(args.batch_size) if args.center_loss: center_trainer.step(args.batch_size) # calculate smoothed loss value 平滑损失 curr_loss = nd.mean(loss).asscalar() moving_loss = (curr_loss if ((i == 0) and (epoch == 0)) else (1 - smoothing_constant) * moving_loss + smoothing_constant * curr_loss) # 累计加权函数 # training cost time 训练耗时 elapsed_time = time.time() - start_time train_accuracy, train_ft, _, train_lb = evaluate_accuracy( train_iter, model, center_loss, args.eval_method, ctx) test_accuracy, test_ft, _, test_lb = evaluate_accuracy( test_iter, model, center_loss, args.eval_method, ctx) # draw feature map 绘制特征图像 if args.plotting: plot_features(train_ft, train_lb, num_classes=args.num_classes, fpath=os.path.join( args.out_dir, "%s-train-epoch-%d.png" % (args.prefix, epoch))) plot_features(test_ft, test_lb, num_classes=args.num_classes, fpath=os.path.join( args.out_dir, "%s-test-epoch-%d.png" % (args.prefix, epoch))) logging.warning( "Epoch [%d]: Loss=%f, Train-Acc=%f, Test-Acc=%f, Epoch-time=%f" % (epoch, moving_loss, train_accuracy, test_accuracy, elapsed_time)) # save model parameters with the highest accuracy 保存accuracy最高的model参数 if test_accuracy > best_acc: best_acc = test_accuracy model.save_parameters( os.path.join(args.ckpt_dir, args.prefix + "-best.params")) # 因为CenterLoss继承自gluon.HyperBlock,所以具有普通模型相关的对象可供调用,即可使用save_parameters/load_parameters进行参数的保存和加载。 # 如果CenterLoss没有直接父类,那么就需要通过CenterLoss.embedding.weight.data/set_data进行数据的保存和加载。 center_loss.save_parameters( os.path.join(args.ckpt_dir, args.prefix + "-feature_matrix.params"))
def train(): print('Start to train...') if not os.path.exists(args.ckpt_dir): os.makedirs(args.ckpt_dir) ctx = [mx.gpu(int(i)) for i in args.gpus.split(',')] if args.gpus != '-1' else mx.cpu() print('Loading the data...') train_iter, test_iter = data_loader(args.batch_size) model = LeNetPlus() model.hybridize() model.initialize(mx.init.Xavier(magnitude=2.24), ctx=ctx) softmax_cross_entropy = gluon.loss.SoftmaxCrossEntropyLoss() trainer = gluon.Trainer(model.collect_params(), optimizer='sgd', optimizer_params={ 'learning_rate': args.lr, 'wd': args.wd }) if args.center_loss: center_loss = CenterLoss(args.num_classes, feature_size=2, lmbd=args.lmbd) center_loss.initialize(mx.init.Xavier(magnitude=2.24), ctx=ctx) trainer_center = gluon.Trainer( center_loss.collect_params(), optimizer='sgd', optimizer_params={'learning_rate': args.alpha}) else: center_loss, trainer_center = None, None smoothing_constant, moving_loss = .01, 0.0 best_acc = 0 for e in range(args.epochs): start_time = timeit.default_timer() for i, (data, label) in enumerate(train_iter): data = data.as_in_context(ctx[0]) label = label.as_in_context(ctx[0]) with autograd.record(): output, features = model(data) loss_softmax = softmax_cross_entropy(output, label) if args.center_loss: loss_center = center_loss(features, label) loss = loss_softmax + loss_center else: loss = loss_softmax loss.backward() trainer.step(data.shape[0]) if args.center_loss: trainer_center.step(data.shape[0]) curr_loss = nd.mean(loss).asscalar() moving_loss = (curr_loss if ((i == 0) and (e == 0)) else (1 - smoothing_constant) * moving_loss + smoothing_constant * curr_loss) elapsed_time = timeit.default_timer() - start_time train_accuracy, train_ft, _, train_lb = evaluate_accuracy( train_iter, model, ctx) test_accuracy, test_ft, _, test_lb = evaluate_accuracy( test_iter, model, ctx) if args.plotting: plot_features(train_ft, train_lb, num_classes=args.num_classes, fpath=os.path.join( args.out_dir, '%s-train-epoch-%s.png' % (args.prefix, e))) plot_features(test_ft, test_lb, num_classes=args.num_classes, fpath=os.path.join( args.out_dir, '%s-test-epoch-%s.png' % (args.prefix, e))) logging.warning("Epoch [%d]: Loss=%f" % (e, moving_loss)) logging.warning("Epoch [%d]: Train-Acc=%f" % (e, train_accuracy)) logging.warning("Epoch [%d]: Test-Acc=%f" % (e, test_accuracy)) logging.warning("Epoch [%d]: Elapsed-time=%f" % (e, elapsed_time)) if test_accuracy > best_acc: best_acc = test_accuracy model.save_params( os.path.join(args.ckpt_dir, args.prefix + '-best.params'))