def train(self): logdir = self.model_dir + datetime.datetime.now().strftime( "%Y%m%d-%H%M%S") hr_images_test, lr_images_test, hr_images_train, lr_images_train = images_loader_mini( self.input_dir, self.scale) y_train_hr = np.array(hr_images_train[:500000]) x_train_lr = np.array(lr_images_train[:500000]) y_test_hr = np.array(hr_images_test[:15000]) x_test_lr = np.array(lr_images_test[:15000]) y_train_hr = normalize(y_train_hr) y_test_hr = normalize(y_test_hr) x_train_lr = normalize(x_train_lr) x_test_lr = normalize(x_test_lr) model = SRDeepCNN(self.channels, self.scale).build_model() model.compile(loss=content_loss, optimizer=get_optimizer(), metrics=[metrics.mse, metrics.categorical_accuracy]) tensorboard_callback = keras.callbacks.TensorBoard( log_dir=logdir, batch_size=self.batch_size, write_graph=True, write_images=True, write_grads=True) loss_history = model.fit(x_train_lr, y_train_hr, batch_size=self.batch_size, epochs=self.epochs, verbose=1, validation_data=([x_test_lr, y_test_hr]), callbacks=[tensorboard_callback]) save_model(model, loss_history, self.model_dir) plot_generated_test(self.output_dir, model, y_test_hr, x_test_lr)
def main(): print(f"\nStart training ...\n") start_epoch = 0 # start from epoch 0 or last checkpoint epoch print('==> Building model..') net = BuildNet(backbone=args.arch, num_classes=args.train_class_num, embed_dim=args.embed_dim) net = net.to(device) if device == 'cuda': net = torch.nn.DataParallel(net) cudnn.benchmark = True optimizer = torch.optim.SGD(net.parameters(), lr=args.lr, momentum=0.9, weight_decay=5e-4) if args.resume: # Load checkpoint. if os.path.isfile(args.resume): print('==> Resuming from checkpoint..') checkpoint = torch.load(args.resume) net.load_state_dict(checkpoint['net']) optimizer.load_state_dict(checkpoint['optimizer']) start_epoch = checkpoint['epoch'] logger = Logger(os.path.join(args.checkpoint, 'log.txt'), resume=True) else: print("=> no checkpoint found at '{}'".format(args.resume)) else: logger = Logger(os.path.join(args.checkpoint, 'log.txt')) logger.set_names( ['Epoch', 'Train Loss', 'Train Acc.', "Test F1", 'threshold']) if not args.evaluate: for epoch in range(start_epoch, args.es): adjust_learning_rate(optimizer, epoch, args.lr, factor=args.lr_factor, step=args.lr_step) print('\nEpoch: %d | Learning rate: %f ' % (epoch + 1, optimizer.param_groups[0]['lr'])) train_out = train(net, trainloader, optimizer, criterion, device) save_model(net, optimizer, epoch, os.path.join(args.checkpoint, 'last_model.pth')) test_out = test(net, testloader, criterion, device) logger.append([ epoch + 1, train_out["train_loss"], train_out["accuracy"], test_out["best_F1"], test_out["best_thres"] ]) logger.close() print(f"\nFinish training...\n") else: print("===> Evaluating ...") test(net, testloader, criterion, device)
def main_stage1(): print(f"\nStart Stage-1 training ...\n") start_epoch = 0 # start from epoch 0 or last checkpoint epoch print('==> Building model..') net = DFPNet(backbone=args.arch, num_classes=args.train_class_num, embed_dim=args.embed_dim, p=args.p) net = net.to(device) if device == 'cuda': net = torch.nn.DataParallel(net) cudnn.benchmark = True criterion = DFPLoss(temperature=args.temperature) optimizer = torch.optim.SGD(net.parameters(), lr=args.stage1_lr, momentum=0.9, weight_decay=5e-4) if args.stage1_resume: # Load checkpoint. if os.path.isfile(args.stage1_resume): print('==> Resuming from checkpoint..') checkpoint = torch.load(args.stage1_resume) net.load_state_dict(checkpoint['net']) optimizer.load_state_dict(checkpoint['optimizer']) start_epoch = checkpoint['epoch'] logger = Logger(os.path.join(args.checkpoint, 'log_stage1.txt'), resume=True) else: print("=> no checkpoint found at '{}'".format(args.resume)) else: logger = Logger(os.path.join(args.checkpoint, 'log_stage1.txt')) logger.set_names(['Epoch', 'Train Loss', 'Train Acc.']) if not args.evaluate: for epoch in range(start_epoch, args.stage1_es): adjust_learning_rate(optimizer, epoch, args.stage1_lr, factor=args.stage1_lr_factor, step=args.stage1_lr_step) print('\nStage_1 Epoch: %d | Learning rate: %f ' % (epoch + 1, optimizer.param_groups[0]['lr'])) train_out = stage1_train(net, trainloader, optimizer, criterion, device) save_model(net, optimizer, epoch, os.path.join(args.checkpoint, 'stage_1_last_model.pth')) logger.append([epoch + 1, train_out["train_loss"], train_out["accuracy"]]) if args.plot: plot_feature(net, args, trainloader, device, args.plotfolder, epoch=epoch, plot_class_num=args.train_class_num, plot_quality=args.plot_quality) plot_feature(net, args, testloader, device, args.plotfolder, epoch="test" + str(epoch), plot_class_num=args.train_class_num + 1, plot_quality=args.plot_quality, testmode=True) logger.close() print(f"\nFinish Stage-1 training...\n") print("===> Evaluating stage-1 ...") stage_test(net, testloader, device) mid_dict = stage_valmixup(net, trainloader, device) print("===> stage1 energy based classification") stage_evaluate(net, testloader, mid_dict["mid_unknown"].item(), mid_dict["mid_known"].item(), feature="energy") print("===> stage1 softmax based classification") stage_evaluate(net, testloader, 0., 1., feature="normweight_fea2cen") return { "net": net.state_dict(), "mid_known": mid_dict["mid_known"], "mid_unknown": mid_dict["mid_unknown"] }
def main_stage2(net, mid_known, mid_unknown): print("Starting stage-2 fine-tuning ...") start_epoch = 0 criterion = FinetuneLoss(mid_known=mid_known, mid_unknown=mid_unknown, gamma=args.gamma, temperature=args.temperature, feature='energy') criterion = criterion.to(device) optimizer = torch.optim.SGD(net.parameters(), lr=args.stage2_lr, momentum=0.9, weight_decay=5e-4) if args.stage2_resume: # Load checkpoint. if os.path.isfile(args.stage2_resume): print('==> Resuming from checkpoint..') checkpoint = torch.load(args.stage2_resume) net.load_state_dict(checkpoint['net']) optimizer.load_state_dict(checkpoint['optimizer']) start_epoch = checkpoint['epoch'] logger = Logger(os.path.join(args.checkpoint, 'log_stage2.txt'), resume=True) else: print("=> no checkpoint found at '{}'".format(args.stage2_resume)) else: logger = Logger(os.path.join(args.checkpoint, 'log_stage2.txt')) logger.set_names(['Epoch', 'Train Loss', 'Class Loss', 'Energy Loss', 'Energy Known', 'Energy Unknown', 'Train Acc.', "Test F1"]) if not args.evaluate: best_F1_list = [] for epoch in range(start_epoch, args.stage2_es): adjust_learning_rate(optimizer, epoch, args.stage2_lr, factor=args.stage2_lr_factor, step=args.stage2_lr_step) print('\nStage_2 Epoch: %d | Learning rate: %f ' % (epoch + 1, optimizer.param_groups[0]['lr'])) train_out = stage2_train(net, trainloader, optimizer, criterion, device) save_model(net, optimizer, epoch, os.path.join(args.checkpoint, 'stage_2_last_model.pth')) # test_out = test_with_hist(net, testloader, device, name=f"stage2_test{epoch}") test_out = test(net, testloader, device) # stage_valmixup(net, trainloader, device, name=f"stage2_mixup{epoch}") logger.append([epoch + 1, train_out["train_loss"], train_out["loss_classification"], train_out["loss_energy"], train_out["loss_energy_known"], train_out["loss_energy_unknown"], train_out["accuracy"], test_out["best_F1"] ]) best_F1_list.append(test_out["best_F1"]) logger.close() print(f"\nFinish Stage-2 training...\n") last_five = np.array(best_F1_list[-5:]) print(f"\nGamma:{args.gamma} | F1_mean: {last_five.mean()} | F1_std: {last_five.std()}")
def main(): print(f"\nStart training ...\n") start_epoch = 0 # start from epoch 0 or last checkpoint epoch print('==> Building model..') net = BuildNet(backbone=args.arch, num_classes=args.train_class_num, embed_dim=args.embed_dim) net = net.to(device) if device == 'cuda': net = torch.nn.DataParallel(net) cudnn.benchmark = True optimizer = torch.optim.SGD(net.parameters(), lr=args.lr, momentum=0.9, weight_decay=5e-4) if args.resume: # Load checkpoint. if os.path.isfile(args.resume): print('==> Resuming from checkpoint..') checkpoint = torch.load(args.resume) net.load_state_dict(checkpoint['net']) optimizer.load_state_dict(checkpoint['optimizer']) start_epoch = checkpoint['epoch'] loggerList = [] for i in range(args.train_class_num, args.test_class_num + 1): loggerList.append( Logger(os.path.join(args.checkpoint, f'log{i}.txt'), resume=True)) else: print("=> no checkpoint found at '{}'".format(args.resume)) else: loggerList = [] for i in range(args.train_class_num, args.test_class_num + 1): logger = Logger(os.path.join(args.checkpoint, f'log{i}.txt')) logger.set_names([ 'Epoch', 'Train Loss', 'Train Acc.', "Pos-F1", 'Norm-F1', 'Energy-F1' ]) loggerList.append(logger) if not args.evaluate: for epoch in range(start_epoch, args.es): adjust_learning_rate(optimizer, epoch, args.lr, factor=args.lr_factor, step=args.lr_step) print('\nEpoch: %d | Learning rate: %f ' % (epoch + 1, optimizer.param_groups[0]['lr'])) train_out = train(net, trainloader, optimizer, criterion, device) save_model(net, optimizer, epoch, os.path.join(args.checkpoint, 'last_model.pth')) for test_class_num in range(args.train_class_num, args.test_class_num + 1): testset = CIFAR10( root='../../data', train=False, download=True, transform=transform_test, train_class_num=args.train_class_num, test_class_num=test_class_num, includes_all_train_class=args.includes_all_train_class) testloader = torch.utils.data.DataLoader(testset, batch_size=args.bs, shuffle=False, num_workers=4) test_out = test(net, testloader, criterion, device) logger = loggerList[test_class_num - args.train_class_num] logger.append([ epoch + 1, train_out["train_loss"], train_out["accuracy"], test_out["best_F1_possibility"], test_out["best_F1_norm"], test_out["best_F1_energy"] ]) logger.close() print(f"\nFinish training...\n")
train_generator = image_data_generator(train_dir) validation_generator = image_data_generator(validation_dir) history = model.fit_generator(train_generator, steps_per_epoch=1000, epochs=40, validation_data=validation_generator, validation_steps=180) with open('dense_cnn.json', 'w') as f: json.dump(history.history, f) predict = evaluate_model(model=model) print('Testing accuracy: ', predict[1]) # Plot accuracy and loss plt_acc_loss(history) test_generator = image_data_generator(test_dir, shuffle=False, batch_size=1) err_expression = evaluate_expression_error_rate(model) # Plot individual expression error rate plt_expression( err_expression, 'Individual expression error rate (Overall %.2f%% accuracy)' % (predict[1] * 100)) #Plot confusion matrix plt_confusion_matrix(model) save_model(model, 'dense_cnn.h5')
from Utils import save_model, yolo_model, img_h, img_w, channels, MODEL_PATH input_size = (img_h, img_w, channels) model = yolo_model(input_size) save_model(model, model_path=MODEL_PATH)
def main_stage2(stage1_dict): print("Starting stage-2 fine-tuning ...") start_epoch = 0 # get key values from stage1_dict mid_known = stage1_dict["mid_known"] mid_unknown = stage1_dict["mid_unknown"] net_state_dict = stage1_dict["net"] net = DFPNet(backbone=args.arch, num_classes=args.train_class_num, embed_dim=args.embed_dim, p=args.p) net = net.to(device) if device == 'cuda': net = torch.nn.DataParallel(net) cudnn.benchmark = True optimizer = torch.optim.SGD(net.parameters(), lr=args.stage2_lr, momentum=0.9, weight_decay=5e-4) if args.stage2_resume: # Load checkpoint. if os.path.isfile(args.stage2_resume): print('==> Resuming from checkpoint..') checkpoint = torch.load(args.stage2_resume) net.load_state_dict(checkpoint['net']) optimizer.load_state_dict(checkpoint['optimizer']) start_epoch = checkpoint['epoch'] mid_known = checkpoint["mid_known"] mid_unknown = checkpoint["mid_unknown"] logger = Logger(os.path.join(args.checkpoint, 'log_stage2.txt'), resume=True) else: print("=> no checkpoint found at '{}'".format(args.resume)) else: net.load_state_dict(net_state_dict) logger = Logger(os.path.join(args.checkpoint, 'log_stage2.txt')) logger.set_names([ 'Epoch', 'Train Loss', 'Class Loss', 'Energy Loss', 'Energy Known', 'Energy Unknown', 'Train Acc.' ]) criterion = DFPNormLoss(mid_known=1.3 * mid_known, mid_unknown=0.7 * mid_unknown, alpha=args.alpha, temperature=args.temperature, feature='energy') if not args.evaluate: for epoch in range(start_epoch, args.stage2_es): adjust_learning_rate(optimizer, epoch, args.stage2_lr, factor=args.stage2_lr_factor, step=args.stage2_lr_step) print('\nStage_2 Epoch: %d | Learning rate: %f ' % (epoch + 1, optimizer.param_groups[0]['lr'])) train_out = stage2_train(net, trainloader, optimizer, criterion, device) save_model(net, optimizer, epoch, os.path.join(args.checkpoint, 'stage_2_last_model.pth'), mid_known=mid_known, mid_unknown=mid_unknown) logger.append([ epoch + 1, train_out["train_loss"], train_out["loss_classification"], train_out["loss_energy"], train_out["loss_energy_known"], train_out["loss_energy_unknown"], train_out["accuracy"] ]) if args.plot: plot_feature(net, args, trainloader, device, args.plotfolder, epoch="stage2_" + str(epoch), plot_class_num=args.train_class_num, plot_quality=args.plot_quality) plot_feature(net, args, testloader, device, args.plotfolder, epoch="stage2_test" + str(epoch), plot_class_num=args.train_class_num + 1, plot_quality=args.plot_quality, testmode=True) logger.close() print(f"\nFinish Stage-2 training...\n") print("===> Evaluating stage-2 ...") stage_test(net, testloader, device, name="stage2_test_doublebar") stage_valmixup(net, trainloader, device, name="stage2_mixup_result") stage_evaluate(net, testloader, mid_unknown.item(), mid_known.item(), feature="energy")
def main_stage2(net, mid_known, mid_unknown): print("Starting stage-2 fine-tuning ...") start_epoch = 0 criterion = FinetuneLoss(mid_known=mid_known, mid_unknown=mid_unknown, gamma=args.gamma, temperature=args.temperature, feature='energy') criterion = criterion.to(device) optimizer = torch.optim.SGD(net.parameters(), lr=args.stage2_lr, momentum=0.9, weight_decay=5e-4) if args.stage2_resume: # Load checkpoint. if os.path.isfile(args.stage2_resume): print('==> Resuming from checkpoint..') checkpoint = torch.load(args.stage2_resume) net.load_state_dict(checkpoint['net']) optimizer.load_state_dict(checkpoint['optimizer']) start_epoch = checkpoint['epoch'] loggerList = [] for i in range(args.train_class_num, args.test_class_num + 1): loggerList.append( Logger(os.path.join(args.checkpoint, f'log{i}_stage2.txt'), resume=True)) else: print("=> no checkpoint found at '{}'".format(args.stage2_resume)) else: loggerList = [] for i in range(args.train_class_num, args.test_class_num + 1): logger = Logger(os.path.join(args.checkpoint, f'log{i}_stage2.txt')) logger.set_names( ['Epoch', 'Train Loss', 'Train Acc.', 'Energy-F1']) loggerList.append(logger) if not args.evaluate: for epoch in range(start_epoch, args.stage2_es): adjust_learning_rate(optimizer, epoch, args.stage2_lr, factor=args.stage2_lr_factor, step=args.stage2_lr_step) print('\nStage_2 Epoch: %d | Learning rate: %f ' % (epoch + 1, optimizer.param_groups[0]['lr'])) train_out = stage2_train(net, trainloader, optimizer, criterion, device) save_model(net, optimizer, epoch, os.path.join(args.checkpoint, 'stage_2_last_model.pth')) for test_class_num in range(args.train_class_num, args.test_class_num + 1): testset = CIFAR10( root='../../data', train=False, download=True, transform=transform_test, train_class_num=args.train_class_num, test_class_num=test_class_num, includes_all_train_class=args.includes_all_train_class) testloader = torch.utils.data.DataLoader( testset, batch_size=args.stage2_bs, shuffle=False, num_workers=4) test_out = test(net, testloader, device) logger = loggerList[test_class_num - args.train_class_num] logger.append([ epoch + 1, train_out["train_loss"], train_out["accuracy"], test_out["best_F1"] ]) logger.close() print(f"\nFinish Stage-2 training...\n")
validLoss, validAcc = validate(model, valid_loader) trainingLosses.append(trainLoss) trainingAccuracies.append(trainAcc) validationLosses.append(validLoss) validationAccuracies.append(validAcc) print("training loss", trainLoss) print("training accuracy", trainAcc) print("\nvalidation loss", validLoss) print("validation accuracy", validAcc) print("\nsaving checkpoint ") save_model(model, optimizer, i, trainingLosses, trainingAccuracies, validationLosses, validationAccuracies) """checkpoint = load_model() trainingLosses = checkpoint["trainingLosses"] trainingAccuracies = checkpoint["trainingAccuracies"] validationLosses = checkpoint["validationLosses"] validationAccuracies = checkpoint["validationAccuracies"]""" x = np.linspace(0, 10, 10) plt.subplot(2, 2, 1) plt.plot(x, trainingLosses) plt.ylabel('Train Loss') plt.xlabel('Epochs') plt.subplot(2, 2, 2)
def main(): start_epoch = 0 best_loss = 9999999.99 # Model print('==> Building model..') net = VanillaVAE(in_channels=1, latent_dim=args.latent_dim) net = net.to(device) if device == 'cuda': # Considering the data scale and model, it is unnecessary to use DistributedDataParallel # which could speed up the training and inference compared to DataParallel net = torch.nn.DataParallel(net) cudnn.benchmark = True optimizer = optim.Adam(net.parameters(), lr=args.lr, weight_decay=args.wd) scheduler = optim.lr_scheduler.ExponentialLR(optimizer, gamma=args.scheduler_gamma) if args.resume: # Load checkpoint. if os.path.isfile(args.resume): checkpoint = torch.load(args.resume) net.load_state_dict(checkpoint['net']) optimizer.load_state_dict(checkpoint['optimizer']) logger = Logger(os.path.join(args.checkpoint, 'log.txt'), resume=True) print('==> Resuming from checkpoint, loaded..') else: print("==> No checkpoint found at '{}'".format(args.resume)) else: logger = Logger(os.path.join(args.checkpoint, 'log.txt')) logger.set_names( ['Epoch', 'LR', 'Train Loss', 'Recons Loss', 'KLD Loss']) if not args.evaluate: # training print("==> start training..") for epoch in range(start_epoch, args.es): print('\nStage_1 Epoch: %d | Learning rate: %f ' % (epoch + 1, scheduler.get_last_lr()[-1])) train_out = train(net, trainloader, optimizer) # {train_loss, recons_loss, kld_loss} save_model(net, optimizer, epoch, os.path.join(args.checkpoint, 'checkpoint.pth')) if train_out["train_loss"] < best_loss: save_model(net, optimizer, epoch, os.path.join(args.checkpoint, 'checkpoint_best.pth'), loss=train_out["train_loss"]) best_loss = train_out["train_loss"] logger.append([ epoch + 1, scheduler.get_last_lr()[-1], train_out["train_loss"], train_out["recons_loss"], train_out["kld_loss"] ]) scheduler.step() logger.close() print(f"\n==> Finish training..\n") print("===> start evaluating ...") generate_images(net, valloader, name="test_reconstruct") sample_images(net, name="test_randsample")
num_epochs = 50 trainLosses = [] validLosses = [] for epoch in range(num_epochs): print("-------------- Epoch # " + str(epoch + 1) + " --------------") trainLoss = train(VAE, train_loader, optimizer, device) trainLosses.append(trainLoss) print("Epoch train loss: {:.4f}".format(trainLoss)) validationLoss = validate(VAE, valid_loader, device) validLosses.append(validationLoss) decoder_fake = VAE.decoder z = Variable( Tensor(np.random.normal(np.zeros(100), np.ones(100), (25, 100)))) fake_imgs = decoder_fake(z) save_image(fake_imgs.data, "images_VAE/%d.png" % epoch, nrow=5, normalize=True) save_model(VAE, optimizer, epoch, trainLoss, validationLoss) torch.save(VAE.state_dict(), "decoderVAE.pth") plt.plot(np.arange(num_epochs), trainLosses) plt.plot(np.arange(num_epochs), validLosses) plt.legend(["Training", "Validation"]) plt.ylabel("ELBO Loss") plt.xlabel("Epoch number") plt.savefig("./results/VAE_training_20_epochs.png") plt.show()