opt.zero_grad() loss.backward() opt.step() # 每个epoch进行测试一下精度 net.eval() train_acc = 0 for test_step, (data, target) in enumerate(train_dl, 1): data, target = data.cuda(), target.cuda() outputs = net(data) train_acc += sum( torch.max(outputs, 1)[1].data.cpu().numpy() == target.data.cpu().numpy()) train_acc /= train_ds_size net.train() print('epoch:{}, train_acc:{:.3f} %, loss:{:.3f}, time:{:.1f} min'.format( epoch, train_acc * 100, loss.data.item(), (time() - start) / 60)) torch.save(net.state_dict(), './modle/net{}-{}.pth'.format(epoch, step))
{ 'params': lenet.parameters() }, ] optimizer = optim.Adam(params, lr=opt.lr, weight_decay=5e-4) scheduler = optim.lr_scheduler.MultiStepLR(optimizer, milestones=[20, 40], gamma=0.1) num_batch = len(train_loader) criterion = nn.CrossEntropyLoss() best_result = 0 if opt.pretrain: for e in range(30): lenet.train() running_loss = 0.0 running_klloss = 0.0 for x_batch, y_batch in train_loader: x_batch, y_batch = Variable(x_batch.cuda()), Variable( y_batch.cuda(async=True)) prediction = lenet(x_batch) lambda_0 = 1.0 loss = criterion(prediction, y_batch) optimizer.zero_grad() loss.backward() optimizer.step() batch_loss = loss.data.item()
def main(): args = parser.parse_args() print('Options:') for (key, value) in vars(args).iteritems(): print("{:16}: {}".format(key, value)) assert os.path.exists(args.xp_dir) # default value for basefile: string basis for all exported file names if args.out_name: base_file = "{}/{}".format(args.xp_dir, args.out_name) else: base_file = "{}/{}_{}_{}".format(args.xp_dir, args.dataset, args.solver, args.loss) # if pickle file already there, consider run already done if (os.path.exists("{}_weights.p".format(base_file)) and os.path.exists("{}_results.p".format(base_file))): sys.exit() # computation device if 'gpu' in args.device: theano.sandbox.cuda.use(args.device) # set save_at to n_epochs if not provided save_at = args.n_epochs if not args.save_at else args.save_at save_to = "{}_weights.p".format(base_file) ###################################### # weights = "../log/{}.p".format(args.in_name) if args.in_name else None weights = None # update config data # plot parameters Cfg.xp_path = args.xp_dir # dataset Cfg.seed = args.seed Cfg.out_frac = args.out_frac Cfg.ad_experiment = bool(args.ad_experiment) Cfg.weight_dict_init = bool(args.weight_dict_init) Cfg.pca = bool(args.pca) Cfg.unit_norm_used = args.unit_norm_used Cfg.gcn = bool(args.gcn) Cfg.zca_whitening = bool(args.zca_whitening) Cfg.mnist_val_frac = args.mnist_val_frac Cfg.mnist_bias = bool(args.mnist_bias) Cfg.mnist_rep_dim = args.mnist_rep_dim Cfg.mnist_architecture = args.mnist_architecture Cfg.mnist_normal = args.mnist_normal Cfg.mnist_outlier = args.mnist_outlier Cfg.cifar10_bias = bool(args.cifar10_bias) Cfg.cifar10_rep_dim = args.cifar10_rep_dim Cfg.cifar10_architecture = args.cifar10_architecture Cfg.cifar10_normal = args.cifar10_normal Cfg.cifar10_outlier = args.cifar10_outlier Cfg.gtsrb_rep_dim = args.gtsrb_rep_dim # neural network Cfg.softmax_loss = (args.loss == 'ce') Cfg.svdd_loss = (args.loss == 'svdd') Cfg.reconstruction_loss = (args.loss == 'autoencoder') Cfg.use_batch_norm = bool(args.use_batch_norm) Cfg.learning_rate.set_value(args.lr) Cfg.lr_decay = bool(args.lr_decay) Cfg.lr_decay_after_epoch = args.lr_decay_after_epoch Cfg.lr_drop = bool(args.lr_drop) Cfg.lr_drop_in_epoch = args.lr_drop_in_epoch Cfg.lr_drop_factor = args.lr_drop_factor Cfg.momentum.set_value(args.momentum) if args.solver == "rmsprop": Cfg.rho.set_value(0.9) if args.solver == "adadelta": Cfg.rho.set_value(0.95) Cfg.block_coordinate = bool(args.block_coordinate) Cfg.k_update_epochs = args.k_update_epochs Cfg.center_fixed = bool(args.center_fixed) Cfg.R_update_solver = args.R_update_solver Cfg.R_update_scalar_method = args.R_update_scalar_method Cfg.R_update_lp_obj = args.R_update_lp_obj Cfg.warm_up_n_epochs = args.warm_up_n_epochs Cfg.batch_size = args.batch_size Cfg.leaky_relu = bool(args.leaky_relu) # Pre-training and autoencoder configuration Cfg.pretrain = bool(args.pretrain) Cfg.ae_loss = args.ae_loss Cfg.ae_lr_drop = bool(args.ae_lr_drop) Cfg.ae_lr_drop_in_epoch = args.ae_lr_drop_in_epoch Cfg.ae_lr_drop_factor = args.ae_lr_drop_factor Cfg.ae_weight_decay = bool(args.ae_weight_decay) Cfg.ae_C.set_value(args.ae_C) # SVDD parameters Cfg.nu.set_value(args.nu) Cfg.c_mean_init = bool(args.c_mean_init) if args.c_mean_init_n_batches == -1: Cfg.c_mean_init_n_batches = "all" else: Cfg.c_mean_init_n_batches = args.c_mean_init_n_batches Cfg.hard_margin = bool(args.hard_margin) # regularization Cfg.weight_decay = bool(args.weight_decay) Cfg.C.set_value(args.C) Cfg.reconstruction_penalty = bool(args.reconstruction_penalty) Cfg.C_rec.set_value(args.C_rec) Cfg.dropout = bool(args.dropout) Cfg.dropout_architecture = bool(args.dropout_arch) # diagnostics Cfg.nnet_diagnostics = bool(args.nnet_diagnostics) Cfg.e1_diagnostics = bool(args.e1_diagnostics) Cfg.ae_diagnostics = bool(args.ae_diagnostics) # train nnet = LeNet(dataset=args.dataset, use_weights=weights, pretrain=Cfg.pretrain) # pre-train weights via autoencoder, if specified if Cfg.pretrain: nnet.pretrain(solver="adam", lr=0.0001, n_epochs=10) nnet.train() exit() # pickle/serialize AD results if Cfg.ad_experiment: nnet.log_results(filename=Cfg.xp_path + "/AD_results.p") # text log nnet.log.save_to_file("{}_results.p".format(base_file)) # save log log_exp_config(Cfg.xp_path, args.dataset) log_NeuralNet(Cfg.xp_path, args.loss, args.solver, args.lr, args.momentum, None, args.n_epochs, args.C, args.C_rec, args.nu) if Cfg.ad_experiment: log_AD_results(Cfg.xp_path, nnet) # plot diagnostics if Cfg.nnet_diagnostics: # common suffix for plot titles str_lr = "lr = " + str(args.lr) C = int(args.C) if not Cfg.weight_decay: C = None str_C = "C = " + str(C) Cfg.title_suffix = "(" + args.solver + ", " + str_C + ", " + str_lr + ")" if args.loss == 'autoencoder': plot_ae_diagnostics(nnet, Cfg.xp_path, Cfg.title_suffix) else: plot_diagnostics(nnet, Cfg.xp_path, Cfg.title_suffix) plot_filters(nnet, Cfg.xp_path, Cfg.title_suffix) # If AD experiment, plot most anomalous and most normal if Cfg.ad_experiment: n_img = 32 plot_outliers_and_most_normal(nnet, n_img, Cfg.xp_path)
import gzip import cPickle as pkl from LeNet import LeNet f = gzip.open('mnist.pkl.gz', 'rb') trainset, valset, testset = pkl.load(f) f.close() net = LeNet() config = { 'filename': 'relu.log', 'batchSize': 160, 'max_epochs': 10, 'hyperParams': [0.01, 0.9, 0.999] } net.train(trainset[0], trainset[1], valset[0], valset[1], config) # print '['+config['filename']+'] Accuracy on test set is ' \ # + str( net.test(testset[0], testset[1])) + '%'
optimizer = optim.SGD(model.parameters(), lr=hp.lr, momentum=0.9) # check the latest checkpoint to resume training ckpt_path = latest_ckpt(hp.ckpt) if ckpt_path is None: logging.info("Initializing from scratch") epoch_start = 1 else: # resume training logging.info("Loading the latest checkpoint") ckpt = torch.load(ckpt_path) model.load_state_dict(ckpt['model_state_dict']) epoch_start = ckpt['epoch_start'] model.train() # Load model to device logging.info("# Load model to %s" % (DEVICE)) model = model.to(DEVICE) # training logging.info("# Start training") start_time = time.time() for epoch in range(epoch_start, hp.epochs + 1): num_batch = math.floor(len(train_set) / hp.batch_size) for i, data in enumerate(train_loader, 0): # data comes in the form of [src, target] src, target = data[0].to(DEVICE), data[1].to(DEVICE) # feed forward predicts = model(src)
recoder = { "num_iter": [], "train_loss": [], "train_acc": [], "test_loss": [], "test_acc": [] } for epoch in range(num_epoch): for index, (batch_img, batch_label) in enumerate(train_loader): # print index num_iter += 1 # print type(batch_img) # print type(batch_label) batch_img = Variable(batch_img.cuda()) #将数据拷贝到GPU上运行,并且放入到Variable中 batch_label = Variable(batch_label.cuda()) lenet_model.train() output = lenet_model(batch_img) #进行前向计算 # print output.data.size() #output输出为batch_size*10 # print batch_label.data.size() loss = loss_func(output, batch_label) #计算loss 此loss已经在一个batch_size上做了平均 #print loss.data.size() loss输出为一个标量 train_loss += loss.item() max, max_index = torch.max(output, 1) #返回1维度上的最大值 记忆下标 #print max.size() train_correct = torch.sum( (max_index.data == batch_label.data)) #统计一个batch中预测正确的数量 # print "train_correct:",train_correct train_acc += train_correct # print "train_acc:",train_acc #反向传播