def main(hparams): results_dir = get_results_directory(hparams.output_dir) writer = SummaryWriter(log_dir=str(results_dir)) ds = get_dataset(hparams.dataset, root=hparams.data_root) input_size, num_classes, train_dataset, test_dataset = ds hparams.seed = set_seed(hparams.seed) if hparams.n_inducing_points is None: hparams.n_inducing_points = num_classes print(f"Training with {hparams}") hparams.save(results_dir / "hparams.json") if hparams.ard: # Hardcoded to WRN output size ard = 640 else: ard = None feature_extractor = WideResNet( spectral_normalization=hparams.spectral_normalization, dropout_rate=hparams.dropout_rate, coeff=hparams.coeff, n_power_iterations=hparams.n_power_iterations, batchnorm_momentum=hparams.batchnorm_momentum, ) initial_inducing_points, initial_lengthscale = initial_values_for_GP( train_dataset, feature_extractor, hparams.n_inducing_points ) gp = GP( num_outputs=num_classes, initial_lengthscale=initial_lengthscale, initial_inducing_points=initial_inducing_points, separate_inducing_points=hparams.separate_inducing_points, kernel=hparams.kernel, ard=ard, lengthscale_prior=hparams.lengthscale_prior, ) model = DKL_GP(feature_extractor, gp) model = model.cuda() likelihood = SoftmaxLikelihood(num_classes=num_classes, mixing_weights=False) likelihood = likelihood.cuda() elbo_fn = VariationalELBO(likelihood, gp, num_data=len(train_dataset)) parameters = [ {"params": feature_extractor.parameters(), "lr": hparams.learning_rate}, {"params": gp.parameters(), "lr": hparams.learning_rate}, {"params": likelihood.parameters(), "lr": hparams.learning_rate}, ] optimizer = torch.optim.SGD( parameters, momentum=0.9, weight_decay=hparams.weight_decay ) milestones = [60, 120, 160] scheduler = torch.optim.lr_scheduler.MultiStepLR( optimizer, milestones=milestones, gamma=0.2 ) def step(engine, batch): model.train() likelihood.train() optimizer.zero_grad() x, y = batch x, y = x.cuda(), y.cuda() y_pred = model(x) elbo = -elbo_fn(y_pred, y) elbo.backward() optimizer.step() return elbo.item() def eval_step(engine, batch): model.eval() likelihood.eval() x, y = batch x, y = x.cuda(), y.cuda() with torch.no_grad(): y_pred = model(x) return y_pred, y trainer = Engine(step) evaluator = Engine(eval_step) metric = Average() metric.attach(trainer, "elbo") def output_transform(output): y_pred, y = output # Sample softmax values independently for classification at test time y_pred = y_pred.to_data_independent_dist() # The mean here is over likelihood samples y_pred = likelihood(y_pred).probs.mean(0) return y_pred, y metric = Accuracy(output_transform=output_transform) metric.attach(evaluator, "accuracy") metric = Loss(lambda y_pred, y: -elbo_fn(y_pred, y)) metric.attach(evaluator, "elbo") kwargs = {"num_workers": 4, "pin_memory": True} train_loader = torch.utils.data.DataLoader( train_dataset, batch_size=hparams.batch_size, shuffle=True, drop_last=True, **kwargs, ) test_loader = torch.utils.data.DataLoader( test_dataset, batch_size=512, shuffle=False, **kwargs ) @trainer.on(Events.EPOCH_COMPLETED) def log_results(trainer): metrics = trainer.state.metrics elbo = metrics["elbo"] print(f"Train - Epoch: {trainer.state.epoch} ELBO: {elbo:.2f} ") writer.add_scalar("Likelihood/train", elbo, trainer.state.epoch) if hparams.spectral_normalization: for name, layer in model.feature_extractor.named_modules(): if isinstance(layer, torch.nn.Conv2d): writer.add_scalar( f"sigma/{name}", layer.weight_sigma, trainer.state.epoch ) if not hparams.ard: # Otherwise it's too much to submit to tensorboard length_scales = model.gp.covar_module.base_kernel.lengthscale.squeeze() for i in range(length_scales.shape[0]): writer.add_scalar( f"length_scale/{i}", length_scales[i], trainer.state.epoch ) if trainer.state.epoch > 150 and trainer.state.epoch % 5 == 0: _, auroc, aupr = get_ood_metrics( hparams.dataset, "SVHN", model, likelihood, hparams.data_root ) print(f"OoD Metrics - AUROC: {auroc}, AUPR: {aupr}") writer.add_scalar("OoD/auroc", auroc, trainer.state.epoch) writer.add_scalar("OoD/auprc", aupr, trainer.state.epoch) evaluator.run(test_loader) metrics = evaluator.state.metrics acc = metrics["accuracy"] elbo = metrics["elbo"] print( f"Test - Epoch: {trainer.state.epoch} " f"Acc: {acc:.4f} " f"ELBO: {elbo:.2f} " ) writer.add_scalar("Likelihood/test", elbo, trainer.state.epoch) writer.add_scalar("Accuracy/test", acc, trainer.state.epoch) scheduler.step() pbar = ProgressBar(dynamic_ncols=True) pbar.attach(trainer) trainer.run(train_loader, max_epochs=200) # Done training - time to evaluate results = {} evaluator.run(train_loader) train_acc = evaluator.state.metrics["accuracy"] train_elbo = evaluator.state.metrics["elbo"] results["train_accuracy"] = train_acc results["train_elbo"] = train_elbo evaluator.run(test_loader) test_acc = evaluator.state.metrics["accuracy"] test_elbo = evaluator.state.metrics["elbo"] results["test_accuracy"] = test_acc results["test_elbo"] = test_elbo _, auroc, aupr = get_ood_metrics( hparams.dataset, "SVHN", model, likelihood, hparams.data_root ) results["auroc_ood_svhn"] = auroc results["aupr_ood_svhn"] = aupr print(f"Test - Accuracy {results['test_accuracy']:.4f}") results_json = json.dumps(results, indent=4, sort_keys=True) (results_dir / "results.json").write_text(results_json) torch.save(model.state_dict(), results_dir / "model.pt") torch.save(likelihood.state_dict(), results_dir / "likelihood.pt") writer.close()
help='search-value') parser.add_argument('--tau_dm_step', type=float, default=1.0, help='search-value') parser.add_argument('--rho_begin', type=float, default=1.0, help='search-value') parser.add_argument('--rho_end', type=float, default=2.0, help='search-value') parser.add_argument('--rho_step', type=float, default=1.0, help='search-value') args = parser.parse_args() cuda = torch.cuda.is_available() set_seed(args.seed, cuda) if cuda: device = torch.device('cuda') else: device = torch.device('cpu') inp_size = 40 * 40 def set_data(): ## load dataset # [5,50,100,40,40]; [5,50,100,40,40] # train_data = np.load("../data/5class_50sam_40x40_kinetics_train.npy") # test_data = np.load("../data/5class_50sam_40x40_kinetics_val.npy")
def main(args): this_dir = osp.join(osp.dirname(__file__), '.') save_dir = osp.join(this_dir, 'checkpoints') if not osp.isdir(save_dir): os.makedirs(save_dir) command = 'python ' + ' '.join(sys.argv) logger = utl.setup_logger(osp.join(this_dir, 'log.txt'), command=command) os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') utl.set_seed(int(args.seed)) model = build_model(args) if osp.isfile(args.checkpoint): checkpoint = torch.load(args.checkpoint, map_location=torch.device('cpu')) model.load_state_dict(checkpoint['model_state_dict']) else: model.apply(utl.weights_init) if args.distributed: model = nn.DataParallel(model) model = model.to(device) criterion = utl.MultiCrossEntropyLoss(ignore_index=21).to(device) optimizer = optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.weight_decay) if osp.isfile(args.checkpoint): optimizer.load_state_dict(checkpoint['optimizer_state_dict']) for param_group in optimizer.param_groups: param_group['lr'] = args.lr args.start_epoch += checkpoint['epoch'] softmax = nn.Softmax(dim=1).to(device) for epoch in range(args.start_epoch, args.start_epoch + args.epochs): if epoch == 21: args.lr = args.lr * 0.1 for param_group in optimizer.param_groups: param_group['lr'] = args.lr data_loaders = { phase: utl.build_data_loader(args, phase) for phase in args.phases } enc_losses = {phase: 0.0 for phase in args.phases} enc_score_metrics = [] enc_target_metrics = [] enc_mAP = 0.0 dec_losses = {phase: 0.0 for phase in args.phases} dec_score_metrics = [] dec_target_metrics = [] dec_mAP = 0.0 start = time.time() for phase in args.phases: training = phase == 'train' if training: model.train(True) elif not training and args.debug: model.train(False) else: continue with torch.set_grad_enabled(training): for batch_idx, (camera_inputs, motion_inputs, enc_target, dec_target) \ in enumerate(data_loaders[phase], start=1): batch_size = camera_inputs.shape[0] camera_inputs = camera_inputs.to(device) motion_inputs = motion_inputs.to(device) enc_target = enc_target.to(device).view( -1, args.num_classes) dec_target = dec_target.to(device).view( -1, args.num_classes) enc_score, dec_score = model(camera_inputs, motion_inputs) enc_loss = criterion(enc_score, enc_target) dec_loss = criterion(dec_score, dec_target) enc_losses[phase] += enc_loss.item() * batch_size dec_losses[phase] += dec_loss.item() * batch_size if args.verbose: print( 'Epoch: {:2} | iteration: {:3} | enc_loss: {:.5f} dec_loss: {:.5f}' .format(epoch, batch_idx, enc_loss.item(), dec_loss.item())) if training: optimizer.zero_grad() loss = enc_loss + dec_loss loss.backward() optimizer.step() else: # Prepare metrics for encoder enc_score = softmax(enc_score).cpu().numpy() enc_target = enc_target.cpu().numpy() enc_score_metrics.extend(enc_score) enc_target_metrics.extend(enc_target) # Prepare metrics for decoder dec_score = softmax(dec_score).cpu().numpy() dec_target = dec_target.cpu().numpy() dec_score_metrics.extend(dec_score) dec_target_metrics.extend(dec_target) end = time.time() if args.debug: result_file = 'inputs-{}-epoch-{}.json'.format(args.inputs, epoch) # Compute result for encoder enc_mAP = utl.compute_result_multilabel( args.class_index, enc_score_metrics, enc_target_metrics, save_dir, result_file, ignore_class=[0, 21], save=True, ) # Compute result for decoder dec_mAP = utl.compute_result_multilabel( args.class_index, dec_score_metrics, dec_target_metrics, save_dir, result_file, ignore_class=[0, 21], save=False, ) # Output result logger.output(epoch, enc_losses, dec_losses, len(data_loaders['train'].dataset), len(data_loaders['test'].dataset), enc_mAP, dec_mAP, end - start, debug=args.debug) # Save model checkpoint_file = 'inputs-{}-epoch-{}.pth'.format(args.inputs, epoch) torch.save( { 'epoch': epoch, 'model_state_dict': model.module.state_dict() if args.distributed else model.state_dict(), 'optimizer_state_dict': optimizer.state_dict(), }, osp.join(save_dir, checkpoint_file))
def main(args): this_dir = osp.join(osp.dirname(__file__), '.') ### make directory for each step size # '/dataset/volume1/users/yumin/result' #'/data/yumin/result' save_dir = osp.join( '/dataset/volume1/users/yumin/result', 'delta_{}_checkpoints_method{}_noenc_smoothbeta0.5'.format( args.dataset, args.method)) if not osp.isdir(save_dir): os.makedirs(save_dir) command = 'python ' + ' '.join(sys.argv) logger = utl.setup_logger(osp.join(this_dir, 'lstm_log.txt'), command=command) os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') utl.set_seed(int(args.seed)) model = build_model(args) if osp.isfile(args.checkpoint): checkpoint = torch.load(args.checkpoint, map_location=torch.device('cpu')) model.load_state_dict(checkpoint['model_state_dict']) else: model.apply(utl.weights_init) if args.distributed: ### !!! model = nn.DataParallel(model) model = model.to(device) if args.dataset == 'THUMOS': criterion1 = utl.MultiCrossEntropyLoss_Delta( num_class=args.num_classes, dirichlet=args.dirichlet, ignore_index=21).to(device) # criterion2 = nn.MSELoss() # criterion2 = nn.L1Loss() criterion2 = nn.SmoothL1Loss() # criterion2 = nn.HuberLoss() elif args.dataset == "TVSeries": criterion = utl.MultiCrossEntropyLoss_Delta( num_class=args.num_classes, dirichlet=args.dirichlet).to(device) optimizer = optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.weight_decay) if osp.isfile(args.checkpoint): optimizer.load_state_dict(checkpoint['optimizer_state_dict']) for param_group in optimizer.param_groups: param_group['lr'] = args.lr args.start_epoch += checkpoint['epoch'] softmax = nn.Softmax(dim=1).to(device) for epoch in range(args.start_epoch, args.start_epoch + args.epochs): if epoch == 21: args.lr = args.lr * 0.1 for param_group in optimizer.param_groups: param_group['lr'] = args.lr data_loaders = { phase: utl.build_data_loader(args, phase) for phase in args.phases } enc_losses = {phase: 0.0 for phase in args.phases} enc_score_metrics = [] enc_target_metrics = [] delta_score_metrics = [] delta_target_metrics = [] enc_mAP = 0.0 delta_mAP = 0.0 start = time.time() for phase in args.phases: training = phase == 'train' if training: model.train(True) elif not training and args.debug: model.train(False) else: continue with torch.set_grad_enabled(training): for batch_idx, (camera_inputs, motion_inputs, enc_target, smooth_target) \ in enumerate(data_loaders[phase], start=1): batch_size = camera_inputs.shape[0] camera_inputs = camera_inputs.to(device) motion_inputs = motion_inputs.to(device) extend_target = enc_target.to(device) enc_target = enc_target.to(device).view( -1, args.num_classes) smooth_target = smooth_target.to(device) oad_score, delta_score = model(camera_inputs, motion_inputs) oad_before = oad_score.clone().detach() oad_before = oad_before[:, 1::, :] ## have to make delta target and compute delta loss new_target = smooth_target[:, 1::, :] - oad_before # print('***** DELTA TARGET') # print(new_target) # print('***** DELTA SCORE') # print(delta_score[:,1::,:]) oad_loss = criterion1(oad_score, extend_target) delta_loss = criterion2(delta_score[:, 1::, :], new_target) # ignore the first # delta_loss = criterion2(delta_score[:,1::,:], extend_target[:,1::,:]) # without labelsmoothing enc_losses[phase] += oad_loss.item() * batch_size if args.verbose: print( 'Epoch: {:2} | iteration: {:3} | enc_loss: {:.5f} | delta_loss: {:.5f}' .format(epoch, batch_idx, oad_loss.item(), delta_loss.item() * 10)) if training: optimizer.zero_grad() loss = oad_loss + delta_loss * 10 loss.backward() optimizer.step() else: # Prepare metrics for encoder enc_score = oad_score.cpu().numpy() ## softmax check enc_target = extend_target.cpu().numpy() enc_score_metrics.extend(enc_score) enc_target_metrics.extend(enc_target) delta_score_c = delta_score[:, 1::, :].reshape( -1, args.num_classes) delta = delta_score_c.cpu().numpy() new_target_c = new_target.reshape(-1, args.num_classes) delta_target = new_target_c.cpu().numpy() delta_score_metrics.extend(delta) delta_target_metrics.extend(delta_target) end = time.time() if args.debug: if epoch % 1 == 0: result_file = osp.join( this_dir, 'delta-inputs-{}-epoch-{}.json'.format(args.inputs, epoch)) # Compute result for encoder enc_mAP = utl.compute_result_multilabel( args.dataset, args.class_index, enc_score_metrics, enc_target_metrics, save_dir, result_file, ignore_class=[0, 21], save=True, ) delta_mAP = utl.compute_result_multilabel( args.dataset, args.class_index, delta_score_metrics, delta_target_metrics, save_dir, result_file, ignore_class=[0, 21], save=True, smooth=True, ) # Output result logger.delta_output(epoch, enc_losses, len(data_loaders['train'].dataset), len(data_loaders['test'].dataset), enc_mAP, delta_mAP, end - start, debug=args.debug) # Save model checkpoint_file = 'delta-inputs-{}-epoch-{}.pth'.format( args.inputs, epoch) torch.save( { 'epoch': epoch, 'model_state_dict': model.module.state_dict() if args.distributed else model.state_dict(), 'optimizer_state_dict': optimizer.state_dict(), }, osp.join(save_dir, checkpoint_file))