def objective(trial): global counter counter += 1 # Hyperparameters margs = pick_parameters(args, trial, counter) logger.info("Starting training for the following hyperparameters:") for k, v in margs.__dict__.items(): logger.info(" %s: %s", k, v) # Bug fix related to some num_workers > 1 and CUDA. Bad things happen otherwise! torch.multiprocessing.set_start_method("spawn", force=True) # Load data simulator = load_simulator(margs) dataset = load_training_dataset(simulator, margs) # Create model model = create_model(margs, simulator) # Train _ = train.train_model(margs, dataset, model, simulator) # Save torch.save(model.state_dict(), create_filename("model", None, margs)) # Evaluate model.eval() # Evaluate test samples log_likelihood_test, reconstruction_error_test, _ = evaluate.evaluate_test_samples( margs, simulator, model, paramscan=True) mean_log_likelihood_test = np.mean(log_likelihood_test) mean_reco_error_test = np.mean(reconstruction_error_test) # Generate samples x_gen = evaluate.sample_from_model(margs, model, simulator) distances_gen = simulator.distance_from_manifold(x_gen) mean_gen_distance = np.mean(distances_gen) # Report results logger.info("Results:") logger.info(" test log p: %s", mean_log_likelihood_test) logger.info(" test reco err: %s", mean_reco_error_test) logger.info(" gen distance: %s", mean_gen_distance) return (-1.0 * margs.metricnllfactor * mean_log_likelihood_test + margs.metricrecoerrorfactor * mean_reco_error_test + margs.metricdistancefactor * mean_gen_distance)
def objective(trial): global counter counter += 1 # Hyperparameters margs = pick_parameters(args, trial, counter) logger.info(f"Starting run {counter} / {args.trials}") logger.info(f"Hyperparams:") logger.info(f" outer layers: {margs.outerlayers}") logger.info(f" inner layers: {margs.innerlayers}") logger.info(f" linear transform: {margs.lineartransform}") logger.info(f" spline range: {margs.splinerange}") logger.info(f" spline bins: {margs.splinebins}") logger.info(f" batchnorm: {margs.batchnorm}") logger.info(f" dropout: {margs.dropout}") logger.info(f" batch size: {margs.batchsize}") logger.info(f" MSE factor: {margs.msefactor}") logger.info(f" latent L2 reg: {margs.uvl2reg}") logger.info(f" weight decay: {margs.weightdecay}") logger.info(f" gradient clipping: {margs.clip}") # Bug fix related to some num_workers > 1 and CUDA. Bad things happen otherwise! torch.multiprocessing.set_start_method("spawn", force=True) # Load data simulator = load_simulator(margs) dataset = simulator.load_dataset(train=True, dataset_dir=create_filename( "dataset", None, args), limit_samplesize=margs.samplesize) # Create model model = create_model(margs, simulator) # Train trainer1 = ForwardTrainer(model) if simulator.parameter_dim( ) is None else ConditionalForwardTrainer(model) trainer2 = ForwardTrainer(model) if simulator.parameter_dim( ) is None else ConditionalForwardTrainer(model) common_kwargs, _, _, _ = train.make_training_kwargs(margs, dataset) logger.info("Starting training MF, phase 1: manifold training") np.random.seed(123) _, val_losses = trainer1.train( loss_functions=[losses.mse, losses.hiddenl2reg], loss_labels=["MSE", "L2_lat"], loss_weights=[ margs.msefactor, 0.0 if margs.uvl2reg is None else margs.uvl2reg ], epochs=margs.epochs, parameters=(list(model.outer_transform.parameters()) + list(model.encoder.parameters()) if args.algorithm == "emf" else model.outer_transform.parameters()), forward_kwargs={ "mode": "projection", "return_hidden": True }, **common_kwargs, ) logger.info("Starting training MF, phase 2: density training") np.random.seed(123) _ = trainer2.train( loss_functions=[losses.nll], loss_labels=["NLL"], loss_weights=[args.nllfactor], epochs=args.densityepochs, parameters=model.inner_transform.parameters(), forward_kwargs={"mode": "mf-fixed-manifold"}, **common_kwargs, ) # Save torch.save(model.state_dict(), create_filename("model", None, margs)) # Evaluate reco error logger.info("Evaluating reco error") model.eval() np.random.seed(123) x, params = next( iter( trainer1.make_dataloader( simulator.load_dataset(train=True, dataset_dir=create_filename( "dataset", None, args), limit_samplesize=args.samplesize), args.validationsplit, 1000, 0)[1])) x = x.to(device=trainer1.device, dtype=trainer1.dtype) params = None if simulator.parameter_dim() is None else params.to( device=trainer1.device, dtype=trainer1.dtype) x_reco, _, _ = model(x, context=params, mode="projection") reco_error = torch.mean(torch.sum((x - x_reco)**2, dim=1)**0.5).detach().cpu().numpy() # Generate samples logger.info("Evaluating sample closure") x_gen = evaluate.sample_from_model(margs, model, simulator) distances_gen = simulator.distance_from_manifold(x_gen) mean_gen_distance = np.mean(distances_gen) # Report results logger.info("Results:") logger.info(" reco err: %s", reco_error) logger.info(" gen distance: %s", mean_gen_distance) return margs.metricrecoerrorfactor * reco_error + margs.metricdistancefactor * mean_gen_distance
logger.info("Hi!") logger.debug("Starting evaluate.py with arguments %s", args) # Model name if args.truth: create_modelname(args) logger.info("Evaluating simulator truth") else: create_modelname(args) logger.info("Evaluating model %s", args.modelname) # Bug fix related to some num_workers > 1 and CUDA. Bad things happen otherwise! torch.multiprocessing.set_start_method("spawn", force=True) # Data set simulator = load_simulator(args) # Load model if not args.truth: model = create_model(args, simulator=simulator) model.load_state_dict(torch.load(create_filename("model", None, args), map_location=torch.device("cpu"))) model.eval() else: model = None # Evaluate generative performance if args.skipgeneration: logger.info("Skipping generative evaluation") elif not args.truth: x_gen = sample_from_model(args, model, simulator) evaluate_model_samples(args, simulator, x_gen)
def objective(trial): global counter counter += 1 # Hyperparameters margs = pick_parameters(args, trial, counter) logger.info(f"Starting run {counter} / {args.trials}") logger.info(f"Hyperparams:") logger.info(f" outer layers: {margs.outerlayers}") logger.info(f" linlayers: {margs.linlayers}") logger.info(f" linchannelfactor: {margs.linchannelfactor}") logger.info(f" inner layers: {margs.innerlayers}") logger.info(f" linear transform: {margs.lineartransform}") logger.info(f" spline range: {margs.splinerange}") logger.info(f" spline bins: {margs.splinebins}") logger.info(f" batchnorm: {margs.batchnorm}") logger.info(f" actnorm: {margs.actnorm}") logger.info(f" dropout: {margs.dropout}") logger.info(f" batch size: {margs.batchsize}") logger.info(f" MSE factor: {margs.msefactor}") logger.info(f" latent L2 reg: {margs.uvl2reg}") logger.info(f" weight decay: {margs.weightdecay}") logger.info(f" gradient clipping: {margs.clip}") # Bug fix related to some num_workers > 1 and CUDA. Bad things happen otherwise! torch.multiprocessing.set_start_method("spawn", force=True) # Load data simulator = load_simulator(margs) dataset = simulator.load_dataset(train=True, dataset_dir=create_filename( "dataset", None, margs), limit_samplesize=margs.samplesize) # Create model model = create_model(margs, simulator) # Train try: trainer = ForwardTrainer(model) if simulator.parameter_dim( ) is None else ConditionalForwardTrainer(model) common_kwargs, _, _, _ = train.make_training_kwargs(margs, dataset) logger.info("Starting training MF: manifold training") np.random.seed(123) _, val_losses = trainer.train( loss_functions=[losses.mse, losses.hiddenl2reg], loss_labels=["MSE", "L2_lat"], loss_weights=[ margs.msefactor, 0.0 if margs.uvl2reg is None else margs.uvl2reg ], epochs=margs.epochs, parameters=(list(model.outer_transform.parameters()) + list(model.encoder.parameters()) if args.algorithm == "emf" else model.outer_transform.parameters()), forward_kwargs={ "mode": "projection", "return_hidden": True }, **common_kwargs, ) # Save torch.save(model.state_dict(), create_filename("model", None, margs)) # Evaluate reco error logger.info("Evaluating reco error") model.eval() torch.cuda.empty_cache() np.random.seed(123) dataloader = trainer.make_dataloader( simulator.load_dataset(train=True, dataset_dir=create_filename( "dataset", None, margs), limit_samplesize=margs.samplesize), args.validationsplit, 20, 4)[1] reco_errors = [] x_plot, x_reco_plot = None, None for x, params in dataloader: x = x.to(device=trainer.device, dtype=trainer.dtype) params = None if simulator.parameter_dim( ) is None else params.to(device=trainer.device, dtype=trainer.dtype) x_reco, _, _ = model(x, context=params, mode="projection") reco_errors.append((torch.sum( (x - x_reco)**2, dim=1)**0.5).detach().cpu().numpy()) if x_plot is None: x_plot = x.detach().cpu().numpy() x_reco_plot = x_reco.detach().cpu().numpy() reco_error = np.mean(reco_errors) if not np.isfinite(reco_error): raise RuntimeError() # Report results logger.info("Results:") logger.info(" reco err: %s", reco_error) # Plot reco error x = np.clip(np.transpose(x_plot, [0, 2, 3, 1]) / 256.0, 0.0, 1.0) x_reco = np.clip( np.transpose(x_reco_plot, [0, 2, 3, 1]) / 256.0, 0.0, 1.0) plt.figure(figsize=(6 * 3.0, 5 * 3.0)) for i in range(15): plt.subplot(5, 6, 2 * i + 1) plt.imshow(x[i]) plt.gca().get_xaxis().set_visible(False) plt.gca().get_yaxis().set_visible(False) plt.subplot(5, 6, 2 * i + 2) plt.imshow(x_reco[i]) plt.gca().get_xaxis().set_visible(False) plt.gca().get_yaxis().set_visible(False) plt.tight_layout() filename = create_filename("training_plot", "reco", margs) plt.savefig(filename.format("")) except RuntimeError as e: logger.info("Error during training, returning 1e9\n %s", e) return 1e9 return reco_error