def train_manifold_flow_sequential(args, dataset, model, simulator): """ MFMF-A training """ assert not args.specified trainer = ManifoldFlowTrainer(model) if simulator.parameter_dim( ) is None else ConditionalManifoldFlowTrainer(model) common_kwargs = { "dataset": dataset, "batch_size": args.batchsize, "initial_lr": args.lr, "scheduler": optim.lr_scheduler.CosineAnnealingLR, "clip_gradient": args.clip, "validation_split": args.validationsplit, } if args.weightdecay is not None: common_kwargs["optimizer_kwargs"] = { "weight_decay": float(args.weightdecay) } logger.info("Starting training MF, phase 1: manifold training") learning_curves = trainer.train( loss_functions=[losses.mse], loss_labels=["MSE"], loss_weights=[args.msefactor], epochs=args.epochs // 2, parameters=list(model.outer_transform.parameters()) + list(model.encoder.parameters()) if args.algorithm == "emf" else model.outer_transform.parameters(), callbacks=[ callbacks.save_model_after_every_epoch( create_filename("checkpoint", None, args)[:-3] + "_epoch_A{}.pt") ], forward_kwargs={"mode": "projection"}, **common_kwargs, ) learning_curves = np.vstack(learning_curves).T logger.info("Starting training MF, phase 2: density training") learning_curves_ = trainer.train( loss_functions=[losses.nll], loss_labels=["NLL"], loss_weights=[args.nllfactor], epochs=args.epochs - (args.epochs // 2), parameters=model.inner_transform.parameters(), callbacks=[ callbacks.save_model_after_every_epoch( create_filename("checkpoint", None, args)[:-3] + "_epoch_B{}.pt") ], forward_kwargs={"mode": "mf-fixed-manifold"}, **common_kwargs, ) learning_curves_ = np.vstack(learning_curves_).T learning_curves = learning_curves_ if learning_curves is None else np.vstack( (learning_curves, learning_curves_)) return learning_curves
def train_pie(args, dataset, model, simulator): """ PIE training """ trainer = ForwardTrainer(model) if simulator.parameter_dim( ) is None else ConditionalForwardTrainer( model) if args.scandal is None else SCANDALForwardTrainer(model) common_kwargs, scandal_loss, scandal_label, scandal_weight = make_training_kwargs( args, dataset) callbacks_ = [ callbacks.save_model_after_every_epoch( create_filename("checkpoint", None, args)) ] if simulator.is_image(): callbacks_.append( callbacks.plot_sample_images( create_filename("training_plot", None, args), context=None if simulator.parameter_dim() is None else torch.zeros(30, simulator.parameter_dim()))) callbacks_.append( callbacks.plot_reco_images( create_filename("training_plot", "reco_epoch", args))) logger.info("Starting training PIE on NLL") learning_curves = trainer.train( loss_functions=[losses.nll] + scandal_loss, loss_labels=["NLL"] + scandal_label, loss_weights=[args.nllfactor * nat_to_bit_per_dim(args.datadim)] + scandal_weight, epochs=args.epochs, callbacks=callbacks_, forward_kwargs={"mode": "pie"}, initial_epoch=args.startepoch, **common_kwargs, ) learning_curves = np.vstack(learning_curves).T return learning_curves
def main(): # NOTE: All the following information will come from the UI once it's created template_fname = ( r'C:/Users/erins/OneDrive - University of North Carolina at Chapel Hill/Protocols and SOPs/' r'MARG-PTC-001 Primary Cell Spinoculation and Latency/MARG-PTC-001a-v1_1-Lewin Template - Copy.xlsx' ) out_dir = r'C:/Users/erins/OneDrive - University of North Carolina at Chapel Hill/Protocols and SOPs/' \ r'MARG-PTC-001 Primary Cell Spinoculation and Latency/' template_name: str = 'test' template_desc: str = 'test' ######## outfile: str = create_filename(out_dir, 'formulas and names', extension='xlsx') template_data: dict = process_template_file(template_fname, outfile) for name, items in template_data: output_formulas_to_excel(outfile, name, items) filename: str = create_filename(out_dir, template_desc) # NOTE: Before the document is created, will need to update the formulas with: # - Manual formula variable names # - LaTeX for manual formula # - How to account for potential multiple results? # - These are formulas with Ifs, MIN, MAX # - Dealing with Table formulas document: Document = create_document(template_data, template_name, template_desc) document.save(filename)
def evaluate_model_samples(args, simulator, x_gen): """ Evaluate model samples and save results """ logger.info("Calculating likelihood of generated samples") try: if simulator.parameter_dim() is None: log_likelihood_gen = simulator.log_density(x_gen) else: params = simulator.default_parameters(true_param_id=args.trueparam) params = np.asarray([params for _ in range(args.generate)]) log_likelihood_gen = simulator.log_density(x_gen, parameters=params) log_likelihood_gen[np.isnan(log_likelihood_gen)] = -1.0e-12 np.save(create_filename("results", "samples_likelihood", args), log_likelihood_gen) except IntractableLikelihoodError: logger.info("True simulator likelihood is intractable for dataset %s", args.dataset) # Distance from manifold try: logger.info("Calculating distance from manifold of generated samples") distances_gen = simulator.distance_from_manifold(x_gen) np.save(create_filename("results", "samples_manifold_distance", args), distances_gen) except NotImplementedError: logger.info("Cannot calculate distance from manifold for dataset %s", args.dataset)
def evaluate_model_samples(args, simulator, x_gen): """ Evaluate model samples and save results """ logger.info("Calculating likelihood of generated samples") try: if simulator.parameter_dim() is None: log_likelihood_gen = simulator.log_density(x_gen) else: params = simulator.default_parameters(true_param_id=args.trueparam) params = np.asarray([params for _ in range(args.generate)]) log_likelihood_gen = simulator.log_density(x_gen, parameters=params) log_likelihood_gen[np.isnan(log_likelihood_gen)] = -1.0e-12 np.save(create_filename("results", "samples_likelihood", args), log_likelihood_gen) except IntractableLikelihoodError: logger.info("True simulator likelihood is intractable for dataset %s", args.dataset) logger.info("Calculating distance from manifold of generated samples") try: distances_gen = simulator.distance_from_manifold(x_gen) np.save(create_filename("results", "samples_manifold_distance", args), distances_gen) except NotImplementedError: logger.info("Cannot calculate distance from manifold for dataset %s", args.dataset) if simulator.is_image(): if calculate_fid_given_paths is None: logger.warning( "Cannot compute FID score, did not find FID implementation") return logger.info("Calculating FID score of generated samples") # The FID script needs an image folder with tempfile.TemporaryDirectory() as gen_dir: logger.debug( f"Storing generated images in temporary folder {gen_dir}") array_to_image_folder(x_gen, gen_dir) true_dir = create_filename("dataset", None, args) + "/test" os.makedirs(os.path.dirname(true_dir), exist_ok=True) if not os.path.exists(f"{true_dir}/0.jpg"): array_to_image_folder( simulator.load_dataset(train=False, numpy=True, dataset_dir=create_filename( "dataset", None, args), true_param_id=args.trueparam)[0], true_dir) logger.debug("Beginning FID calculation with batchsize 50") fid = calculate_fid_given_paths([gen_dir, true_dir], 50, "", 2048) logger.info(f"FID = {fid}") np.save(create_filename("results", "samples_fid", args), [fid])
def train_specified_manifold_flow(args, dataset, model, simulator): """ FOM training """ trainer = ForwardTrainer(model) if simulator.parameter_dim( ) is None else ConditionalForwardTrainer( model) if args.scandal is None else SCANDALForwardTrainer(model) common_kwargs, scandal_loss, scandal_label, scandal_weight = make_training_kwargs( args, dataset) logger.info("Starting training MF with specified manifold on NLL") learning_curves = trainer.train( loss_functions=[losses.mse, losses.nll] + scandal_loss, loss_labels=["MSE", "NLL"] + scandal_label, loss_weights=[ 0.0, args.nllfactor * nat_to_bit_per_dim(args.modellatentdim) ] + scandal_weight, epochs=args.epochs, callbacks=[ callbacks.save_model_after_every_epoch( create_filename("checkpoint", None, args)) ], forward_kwargs={"mode": "mf"}, initial_epoch=args.startepoch, **common_kwargs, ) learning_curves = np.vstack(learning_curves).T return learning_curves
def train_generative_adversarial_manifold_flow(args, dataset, model, simulator): """ MFMF-OT training """ gen_trainer = AdversarialTrainer(model) if simulator.parameter_dim( ) is None else ConditionalAdversarialTrainer(model) common_kwargs, scandal_loss, scandal_label, scandal_weight = make_training_kwargs( args, dataset) common_kwargs["batch_size"] = args.genbatchsize logger.info("Starting training GAMF: Sinkhorn-GAN") callbacks_ = [ callbacks.save_model_after_every_epoch( create_filename("checkpoint", None, args)) ] if args.debug: callbacks_.append(callbacks.print_mf_weight_statistics()) learning_curves_ = gen_trainer.train( loss_functions=[losses.make_sinkhorn_divergence()], loss_labels=["GED"], loss_weights=[args.sinkhornfactor], epochs=args.epochs, callbacks=callbacks_, compute_loss_variance=True, initial_epoch=args.startepoch, **common_kwargs, ) learning_curves = np.vstack(learning_curves_).T return learning_curves
def run_mcmc(args, simulator, model=None): """ MCMC """ logger.info( "Starting MCMC based on %s after %s observed samples, generating %s posterior samples with %s for parameter point number %s", "true simulator likelihood" if model is None else "neural likelihood estimate", args.observedsamples, args.mcmcsamples, "slice sampler" if args.slicesampler else "Metropolis-Hastings sampler (step = {})".format(args.mcmcstep), args.trueparam, ) # Data true_parameters = simulator.default_parameters(true_param_id=args.trueparam) x_obs, _ = simulator.load_dataset( train=False, numpy=True, dataset_dir=create_filename("dataset", None, args), true_param_id=args.trueparam, joint_score=False, limit_samplesize=args.observedsamples ) x_obs_ = torch.tensor(x_obs, dtype=torch.float) if model is None: # MCMC based on ground truth likelihood def log_posterior(params): log_prob = np.sum(simulator.log_density(x_obs, parameters=params)) log_prob += simulator.evaluate_log_prior(params) return float(log_prob) else: # MCMC based on neural likelihood estimator def log_posterior(params): params_ = np.broadcast_to(params.reshape((-1, params.shape[-1])), (x_obs.shape[0], params.shape[-1])) params_ = torch.tensor(params_, dtype=torch.float) if args.algorithm == "flow": log_prob = np.sum(model.log_prob(x_obs_, context=params_).detach().numpy()) elif args.algorithm in ["pie", "slice"]: log_prob = np.sum(model.log_prob(x_obs_, context=params_, mode=args.algorithm).detach().numpy()) elif not args.conditionalouter: # Slow part of Jacobian drops out in LLR / MCMC acceptance ratio log_prob = np.sum(model.log_prob(x_obs_, context=params_, mode="mf-fixed-manifold").detach().numpy()) else: log_prob = np.sum(model.log_prob(x_obs_, context=params_, mode="mf").detach().numpy()) log_prob += simulator.evaluate_log_prior(params) return float(log_prob) if args.slicesampler: logger.debug("Initializing slice sampler") sampler = mcmc.SliceSampler(true_parameters, log_posterior, thin=args.thin) else: logger.debug("Initializing Gaussian Metropolis-Hastings sampler") sampler = mcmc.GaussianMetropolis(true_parameters, log_posterior, step=args.mcmcstep, thin=args.thin) if args.burnin > 0: logger.info("Starting burn in") sampler.gen(args.burnin) logger.info("Burn in done, starting main chain") posterior_samples = sampler.gen(args.mcmcsamples) logger.info("MCMC done") return posterior_samples
def train_pie(args, dataset, model, simulator): """ PIE training """ trainer = ManifoldFlowTrainer(model) if simulator.parameter_dim( ) is None else ConditionalManifoldFlowTrainer(model) logger.info("Starting training PIE on NLL") common_kwargs = { "dataset": dataset, "batch_size": args.batchsize, "initial_lr": args.lr, "scheduler": optim.lr_scheduler.CosineAnnealingLR, "clip_gradient": args.clip, "validation_split": args.validationsplit, } if args.weightdecay is not None: common_kwargs["optimizer_kwargs"] = { "weight_decay": float(args.weightdecay) } learning_curves = trainer.train( loss_functions=[losses.nll], loss_labels=["NLL"], loss_weights=[args.nllfactor], epochs=args.epochs, callbacks=[ callbacks.save_model_after_every_epoch( create_filename("checkpoint", None, args)[:-3] + "_epoch_{}.pt") ], forward_kwargs={"mode": "pie"}, **common_kwargs, ) learning_curves = np.vstack(learning_curves).T return learning_curves
def sample_from_model(args, model, simulator, batchsize=200): """ Generate samples from model and store """ logger.info("Sampling from model") x_gen_all = [] while len(x_gen_all) < args.generate: n = min(batchsize, args.generate - len(x_gen_all)) if simulator.parameter_dim() is None: x_gen = model.sample(n=n).detach().numpy() elif args.trueparam is None: # Sample from prior params = simulator.sample_from_prior(n) params = torch.tensor(params, dtype=torch.float) x_gen = model.sample(n=n, context=params).detach().numpy() else: params = simulator.default_parameters(true_param_id=args.trueparam) params = np.asarray([params for _ in range(n)]) params = torch.tensor(params, dtype=torch.float) x_gen = model.sample(n=n, context=params).detach().numpy() x_gen_all += list(x_gen) x_gen_all = np.array(x_gen_all) np.save(create_filename("results", "samples", args), x_gen_all) return x_gen_all
def train_generative_adversarial_manifold_flow_alternating( args, dataset, model, simulator): """ MFMF-OTA training """ assert not args.specified gen_trainer = GenerativeTrainer(model) if simulator.parameter_dim( ) is None else ConditionalGenerativeTrainer(model) likelihood_trainer = ManifoldFlowTrainer(model) if simulator.parameter_dim( ) is None else ConditionalManifoldFlowTrainer(model) metatrainer = AlternatingTrainer(model, gen_trainer, likelihood_trainer) meta_kwargs = { "dataset": dataset, "initial_lr": args.lr, "scheduler": optim.lr_scheduler.CosineAnnealingLR, "validation_split": args.validationsplit } if args.weightdecay is not None: meta_kwargs["optimizer_kwargs"] = { "weight_decay": float(args.weightdecay) } phase1_kwargs = {"clip_gradient": args.clip} phase2_kwargs = { "forward_kwargs": { "mode": "mf-fixed-manifold" }, "clip_gradient": args.clip } phase1_parameters = model.parameters() phase2_parameters = model.inner_transform.parameters() logger.info( "Starting training GAMF, alternating between Sinkhorn divergence and log likelihood" ) learning_curves_ = metatrainer.train( loss_functions=[losses.make_sinkhorn_divergence(), losses.nll], loss_function_trainers=[0, 1], loss_labels=["GED", "NLL"], loss_weights=[args.sinkhornfactor, args.nllfactor], batch_sizes=[args.genbatchsize, args.batchsize], epochs=args.epochs // 2, parameters=[phase1_parameters, phase2_parameters], callbacks=[ callbacks.save_model_after_every_epoch( create_filename("checkpoint", None, args)[:-3] + "_epoch_{}.pt") ], trainer_kwargs=[phase1_kwargs, phase2_kwargs], subsets=args.subsets, subset_callbacks=[callbacks.print_mf_weight_statistics()] if args.debug else None, **meta_kwargs, ) learning_curves = np.vstack(learning_curves_).T return learning_curves
def sample_from_model(args, model, simulator): """ Generate samples from model and store """ logger.info("Sampling from model") if simulator.parameter_dim() is None: x_gen = model.sample(n=args.generate).detach().numpy() else: params = simulator.default_parameters(true_param_id=args.trueparam) params = np.asarray([params for _ in range(args.generate)]) params = torch.tensor(params, dtype=torch.float) x_gen = model.sample(n=args.generate, context=params).detach().numpy() np.save(create_filename("results", "samples", args), x_gen) return x_gen
def objective(trial): global counter counter += 1 # Hyperparameters margs = pick_parameters(args, trial, counter) logger.info("Starting training for the following hyperparameters:") for k, v in margs.__dict__.items(): logger.info(" %s: %s", k, v) # Bug fix related to some num_workers > 1 and CUDA. Bad things happen otherwise! torch.multiprocessing.set_start_method("spawn", force=True) # Load data simulator = load_simulator(margs) dataset = load_training_dataset(simulator, margs) # Create model model = create_model(margs, simulator) # Train _ = train.train_model(margs, dataset, model, simulator) # Save torch.save(model.state_dict(), create_filename("model", None, margs)) # Evaluate model.eval() # Evaluate test samples log_likelihood_test, reconstruction_error_test, _ = evaluate.evaluate_test_samples( margs, simulator, model, paramscan=True) mean_log_likelihood_test = np.mean(log_likelihood_test) mean_reco_error_test = np.mean(reconstruction_error_test) # Generate samples x_gen = evaluate.sample_from_model(margs, model, simulator) distances_gen = simulator.distance_from_manifold(x_gen) mean_gen_distance = np.mean(distances_gen) # Report results logger.info("Results:") logger.info(" test log p: %s", mean_log_likelihood_test) logger.info(" test reco err: %s", mean_reco_error_test) logger.info(" gen distance: %s", mean_gen_distance) return (-1.0 * margs.metricnllfactor * mean_log_likelihood_test + margs.metricrecoerrorfactor * mean_reco_error_test + margs.metricdistancefactor * mean_gen_distance)
def timing(args): logger.info( "Timing algorithm %s with %s outer layers with transformation %s and %s inner layers with transformation %s", args.algorithm, args.outerlayers, args.outertransform, args.innerlayers, args.innertransform, ) # Bug fix related to some num_workers > 1 and CUDA. Bad things happen otherwise! torch.multiprocessing.set_start_method("spawn", force=True) if torch.cuda.is_available(): torch.set_default_tensor_type("torch.cuda.DoubleTensor") # Loop over data dims all_times = [] for datadim in args.datadims: logger.info("Starting timing for %s-dimensional data", datadim) args.datadim = datadim # Data data = torch.randn(args.batchsize, datadim) data.requires_grad = True # Model model = create_model(args, context_features=None) if torch.cuda.is_available(): model = model.to(torch.device("cuda")) # Time forward pass times = [] for _ in range(args.repeats): time_before = time.time() _ = model(data) times.append(time.time() - time_before) logger.info("Mean time: %s s", np.mean(times)) all_times.append(times) # Save results logger.info("Saving results") np.save(create_filename("timing", None, args), all_times)
def create_index_file(root_dir, width_tiles, height_tiles, xchunks, ychunks, xtiles_per_chunk, ytiles_per_chunk): filename = os.path.join(root_dir, "world.dat") with open(filename, "wb") as f: tile_size = np.array([width_tiles, height_tiles], dtype=np.uint32) other_data = np.array( [xchunks, ychunks, xtiles_per_chunk, ytiles_per_chunk], dtype=np.uint16) fname_length = np.array([utils.FILENAME_LENGTH], dtype=np.uint8) f.write(tile_size.tobytes()) f.write(other_data.tobytes()) f.write(fname_length.tobytes()) f.write(bytes(pad_filename(utils.CHUNK_DIRECTORY), encoding="utf-8")) for y in range(ychunks): for x in range(xchunks): f.write( bytes(pad_filename(utils.create_filename(x, y, xchunks)), encoding="utf-8"))
def create_empty(directory, width_tiles, height_tiles, xtiles_per_chunk, ytiles_per_chunk): xchunks = int(width_tiles / xtiles_per_chunk) + ( 1 if width_tiles % xtiles_per_chunk != 0 else 0) ychunks = int(height_tiles / ytiles_per_chunk) + ( 1 if height_tiles % ytiles_per_chunk != 0 else 0) ensure_directory_exists(directory) clean_directory(directory) ensure_directory_exists(os.path.join(directory, utils.CHUNK_DIRECTORY)) create_index_file(directory, width_tiles, height_tiles, xchunks, ychunks, xtiles_per_chunk, ytiles_per_chunk) data = np.ones(xtiles_per_chunk * ytiles_per_chunk, dtype=utils.TILE_DTYPE) * tiles.TILE_NONE for y in range(ychunks): for x in range(xchunks): filename = os.path.join(directory, utils.CHUNK_DIRECTORY, utils.create_filename(x, y, xchunks)) with open(filename, "wb") as f: write_chunk(data, f)
def train_generative_adversarial_manifold_flow(args, dataset, model, simulator): """ MFMF-OT training """ gen_trainer = GenerativeTrainer(model) if simulator.parameter_dim( ) is None else ConditionalGenerativeTrainer(model) common_kwargs = { "dataset": dataset, "initial_lr": args.lr, "scheduler": optim.lr_scheduler.CosineAnnealingLR, "clip_gradient": args.clip, "validation_split": args.validationsplit, } if args.weightdecay is not None: common_kwargs["optimizer_kwargs"] = { "weight_decay": float(args.weightdecay) } logger.info("Starting training GAMF: Sinkhorn-GAN") callbacks_ = [ callbacks.save_model_after_every_epoch( create_filename("checkpoint", None, args)[:-3] + "_epoch_{}.pt") ] if args.debug: callbacks_.append(callbacks.print_mf_weight_statistics()) learning_curves_ = gen_trainer.train( loss_functions=[losses.make_sinkhorn_divergence()], loss_labels=["GED"], loss_weights=[args.sinkhornfactor], epochs=args.epochs, callbacks=callbacks_, batch_size=args.genbatchsize, compute_loss_variance=True, **common_kwargs, ) learning_curves = np.vstack(learning_curves_).T return learning_curves
def download_file(item, path, course, output_dir): filepath = create_filepath(course, path) description = item["Description"]["Html"] topic_type = item["TopicType"] title = item["Title"] if topic_type == 1: filename = create_filename(item) full_path = f"{output_dir}/{filepath}/{filename}" # These documents are real files that we want to download download_from_url(f"""{ufora}{item["Url"]}""", full_path) if item["Url"].endswith(".html"): # HTML files on Ufora need a little special treatment # We'll prepend a title, <base> tag and convert them to pdf with open(full_path, "r") as f: content = f.read() filename_without_extension = ".".join(filename.split(".")[:-1]) description_path = f"{output_dir}/{filepath}/{filename_without_extension}.pdf" create_metadata(description_path, content, filename_without_extension) new_content = f"<base href={ufora}><h1>{title}</h1>{content}" with open(full_path, "w") as f: f.write(new_content) elif description: # Choosing this filename might cause an overlap... filename_without_extension = ".".join(filename.split(".")[:-1]) description_path = f"{output_dir}/{filepath}/{filename_without_extension}.pdf" create_metadata(description_path, description, filename_without_extension) elif topic_type == 3: # These documents are just clickable links, we'll render them in a pdf url = item["Url"] filename = create_filename_without_extension(item) full_path = f"{output_dir}/{filepath}/{filename}" create_metadata(f"{full_path}.pdf", f"<a href={url}>{url}</a>{description}", item["Title"]) else: print(f"Don't know this topic type: {topic_type}") exit()
def train_dough(args, dataset, model, simulator): """ PIE with variable epsilons training """ trainer = VariableDimensionManifoldFlowTrainer( model) if simulator.parameter_dim( ) is None else ConditionalVariableDimensionManifoldFlowTrainer(model) common_kwargs = { "dataset": dataset, "batch_size": args.batchsize, "initial_lr": args.lr, "scheduler": optim.lr_scheduler.CosineAnnealingLR, "clip_gradient": args.clip, "validation_split": args.validationsplit, } if args.weightdecay is not None: common_kwargs["optimizer_kwargs"] = { "weight_decay": float(args.weightdecay) } logger.info( "Starting training dough, phase 1: NLL without latent regularization") learning_curves = trainer.train( loss_functions=[losses.nll], loss_labels=["NLL"], loss_weights=[args.nllfactor], epochs=args.epochs, callbacks=[ callbacks.save_model_after_every_epoch( create_filename("checkpoint", None, args)[:-3] + "_epoch_{}.pt") ], l1=args.doughl1reg, **common_kwargs, ) learning_curves = np.vstack(learning_curves).T return learning_curves
def evaluate_test_samples(args, simulator, filename, model=None, ood=False, n_save_reco=100): """ Likelihood evaluation """ logger.info( "Evaluating %s samples according to %s, %s likelihood evaluation, saving in %s", "the ground truth" if model is None else "a trained model", "ood" if ood else "test", "with" if not args.skiplikelihood else "without", filename, ) # Prepare x, _ = simulator.load_dataset( train=False, numpy=True, ood=ood, dataset_dir=create_filename("dataset", None, args), true_param_id=args.trueparam, joint_score=False, limit_samplesize=args.evaluate, ) parameter_grid = [None] if simulator.parameter_dim() is None else simulator.eval_parameter_grid(resolution=args.gridresolution) log_probs = [] x_recos = None reco_error = None # Evaluate for i, params in enumerate(parameter_grid): logger.debug("Evaluating grid point %s / %s", i + 1, len(parameter_grid)) if model is None: params_ = None if params is None else np.asarray([params for _ in x]) log_prob = simulator.log_density(x, parameters=params_) else: log_prob = [] reco_error_ = [] x_recos_ = [] n_batches = (args.evaluate - 1) // args.evalbatchsize + 1 for j in range(n_batches): x_ = torch.tensor(x[j * args.evalbatchsize : (j + 1) * args.evalbatchsize], dtype=torch.float) if params is None: params_ = None else: params_ = np.asarray([params for _ in x_]) params_ = torch.tensor(params_, dtype=torch.float) if args.algorithm == "flow": x_reco, log_prob_, _ = model(x_, context=params_) elif args.algorithm in ["pie", "slice"]: x_reco, log_prob_, _ = model(x_, context=params_, mode=args.algorithm if not args.skiplikelihood else "projection") else: x_reco, log_prob_, _ = model(x_, context=params_, mode="mf" if not args.skiplikelihood else "projection") if not args.skiplikelihood: log_prob.append(log_prob_.detach().numpy()) reco_error_.append((sum_except_batch((x_ - x_reco) ** 2) ** 0.5).detach().numpy()) x_recos_.append(x_reco.detach().numpy()) if not args.skiplikelihood: log_prob = np.concatenate(log_prob, axis=0) if reco_error is None: reco_error = np.concatenate(reco_error_, axis=0) if x_recos is None: x_recos = np.concatenate(x_recos_, axis=0) if not args.skiplikelihood: log_probs.append(log_prob) # Save results if len(log_probs) > 0: if simulator.parameter_dim() is None: log_probs = log_probs[0] np.save(create_filename("results", filename.format("log_likelihood"), args), log_probs) if len(x_recos) > 0: np.save(create_filename("results", filename.format("x_reco"), args), x_recos[:n_save_reco]) if reco_error is not None: np.save(create_filename("results", filename.format("reco_error"), args), reco_error) if parameter_grid is not None: np.save(create_filename("results", "parameter_grid_test", args), parameter_grid)
create_modelname(args) logger.info("Evaluating simulator truth") else: create_modelname(args) logger.info("Evaluating model %s", args.modelname) # Bug fix related to some num_workers > 1 and CUDA. Bad things happen otherwise! torch.multiprocessing.set_start_method("spawn", force=True) # Data set simulator = load_simulator(args) # Load model if not args.truth: model = create_model(args, simulator=simulator) model.load_state_dict(torch.load(create_filename("model", None, args), map_location=torch.device("cpu"))) model.eval() else: model = None # Evaluate generative performance if args.skipgeneration: logger.info("Skipping generative evaluation") elif not args.truth: x_gen = sample_from_model(args, model, simulator) evaluate_model_samples(args, simulator, x_gen) if args.skipinference: logger.info("Skipping all inference tasks. Have a nice day!") exit()
if containing_file == path_item: is_file_found = True break if is_file_found: utils.chdir(path_item) is_file_found = False else: print(f'NOT FOUND FILE: {path_item}') is_allow_continue = False return is_allow_continue file_locs = utils.get_all_file_locs(TARGETS) file_name = utils.create_filename(FILE_NAME_PREFIX, FILE_EXT) path_array = DESTINATION.split('\\') path_root = path_array[0] utils.chdir(path_root) utils.log_folders_destination_targets(DESTINATION, TARGETS, file_name) if validate_target_loc(): with utils.zipfile.ZipFile(file_name, 'x', utils.zipfile.ZIP_LZMA, True) as backup_file: backup_file.close() # Starting pool with x workers. with Pool(processes=MAX_PROCESSES) as pool: # Launching multiple evaluations asynchronously *may* use more processes multiple_results = [pool.apply_async(utils.f, (i, file_name,)) for i in file_locs]
help='save the agent and the results') parser.add_argument('--flipped_terminals', action='store_true', default=False, help='flip the rewards associated ' 'with terminal 1 and terminal 2') parser.add_argument('--flipped_actions', action='store_true', default=False, help='Shuffle the actions to cancel ' 'the effect of model learning') args = parser.parse_args() experiment_settings = get_experiment_setting(args) domain_settings = get_domain_setting(args) filename = create_filename(args) print("file: ", filename) experiment_settings['filename'] = filename if experiment_settings['method'] == 'sarsa_lambda': agent_config = [] from sarsa_lambda.sarsa_lambda import build_agent, load_agent elif experiment_settings['method'] == 'MuZero': from muzero.MuZeroAgent import MuZeroAgent, build_agent from muzero.env import muzero_config agent_config = muzero_config agent_config.flippedTask = args.flipped_terminals agent_config.flippedActions = args.flipped_actions from muzero.MuZeroAgent import build_agent, load_agent else: assert False, 'HvS: Invalid method id.'
def train_manifold_flow_sequential(args, dataset, model, simulator): """ Sequential MFMF-M/D training """ assert not args.specified if simulator.parameter_dim() is None: trainer1 = ForwardTrainer(model) trainer2 = ForwardTrainer(model) else: trainer1 = ConditionalForwardTrainer(model) if args.scandal is None: trainer2 = ConditionalForwardTrainer(model) else: trainer2 = SCANDALForwardTrainer(model) common_kwargs, scandal_loss, scandal_label, scandal_weight = make_training_kwargs( args, dataset) callbacks1 = [ callbacks.save_model_after_every_epoch( create_filename("checkpoint", "A", args)), callbacks.print_mf_latent_statistics(), callbacks.print_mf_weight_statistics() ] callbacks2 = [ callbacks.save_model_after_every_epoch( create_filename("checkpoint", "B", args)), callbacks.print_mf_latent_statistics(), callbacks.print_mf_weight_statistics() ] if simulator.is_image(): callbacks1.append( callbacks.plot_sample_images( create_filename("training_plot", "sample_epoch_A", args), context=None if simulator.parameter_dim() is None else torch.zeros(30, simulator.parameter_dim()), )) callbacks2.append( callbacks.plot_sample_images( create_filename("training_plot", "sample_epoch_B", args), context=None if simulator.parameter_dim() is None else torch.zeros(30, simulator.parameter_dim()), )) callbacks1.append( callbacks.plot_reco_images( create_filename("training_plot", "reco_epoch_A", args))) callbacks2.append( callbacks.plot_reco_images( create_filename("training_plot", "reco_epoch_B", args))) logger.info("Starting training MF, phase 1: manifold training") learning_curves = trainer1.train( loss_functions=[losses.smooth_l1_loss if args.l1 else losses.mse] + ([] if args.uvl2reg is None else [losses.hiddenl2reg]), loss_labels=["L1" if args.l1 else "MSE"] + ([] if args.uvl2reg is None else ["L2_lat"]), loss_weights=[args.msefactor] + ([] if args.uvl2reg is None else [args.uvl2reg]), epochs=args.epochs // 2, parameters=list(model.outer_transform.parameters()) + list(model.encoder.parameters()) if args.algorithm == "emf" else list( model.outer_transform.parameters()), callbacks=callbacks1, forward_kwargs={ "mode": "projection", "return_hidden": args.uvl2reg is not None }, initial_epoch=args.startepoch, **common_kwargs, ) learning_curves = np.vstack(learning_curves).T logger.info("Starting training MF, phase 2: density training") learning_curves_ = trainer2.train( loss_functions=[losses.nll] + scandal_loss, loss_labels=["NLL"] + scandal_label, loss_weights=[ args.nllfactor * nat_to_bit_per_dim(args.modellatentdim) ] + scandal_weight, epochs=args.epochs - (args.epochs // 2), parameters=list(model.inner_transform.parameters()), callbacks=callbacks2, forward_kwargs={"mode": "mf-fixed-manifold"}, initial_epoch=args.startepoch - args.epochs // 2, **common_kwargs, ) learning_curves = np.vstack( (learning_curves, np.vstack(learning_curves_).T)) return learning_curves
def train_manifold_flow_alternating(args, dataset, model, simulator): """ MFMF-A training """ assert not args.specified trainer1 = ForwardTrainer(model) if simulator.parameter_dim( ) is None else ConditionalForwardTrainer(model) trainer2 = ForwardTrainer(model) if simulator.parameter_dim( ) is None else ConditionalForwardTrainer( model) if args.scandal is None else SCANDALForwardTrainer(model) metatrainer = AlternatingTrainer(model, trainer1, trainer2) meta_kwargs = { "dataset": dataset, "initial_lr": args.lr, "scheduler": optim.lr_scheduler.CosineAnnealingLR, "validation_split": args.validationsplit } if args.weightdecay is not None: meta_kwargs["optimizer_kwargs"] = { "weight_decay": float(args.weightdecay) } _, scandal_loss, scandal_label, scandal_weight = make_training_kwargs( args, dataset) phase1_kwargs = { "forward_kwargs": { "mode": "projection" }, "clip_gradient": args.clip } phase2_kwargs = { "forward_kwargs": { "mode": "mf-fixed-manifold" }, "clip_gradient": args.clip } phase1_parameters = list(model.outer_transform.parameters()) + list( model.encoder.parameters( )) if args.algorithm == "emf" else model.outer_transform.parameters() phase2_parameters = list(model.inner_transform.parameters()) logger.info( "Starting training MF, alternating between reconstruction error and log likelihood" ) learning_curves_ = metatrainer.train( loss_functions=[ losses.smooth_l1_loss if args.l1 else losses.mse, losses.nll ] + scandal_loss, loss_function_trainers=[0, 1] + [1] if args.scandal is not None else [], loss_labels=["L1" if args.l1 else "MSE", "NLL"] + scandal_label, loss_weights=[ args.msefactor, args.nllfactor * nat_to_bit_per_dim(args.modellatentdim) ] + scandal_weight, epochs=args.epochs // 2, subsets=args.subsets, batch_sizes=[args.batchsize, args.batchsize], parameters=[phase1_parameters, phase2_parameters], callbacks=[ callbacks.save_model_after_every_epoch( create_filename("checkpoint", None, args)) ], trainer_kwargs=[phase1_kwargs, phase2_kwargs], **meta_kwargs, ) learning_curves = np.vstack(learning_curves_).T return learning_curves
def train_manifold_flow(args, dataset, model, simulator): """ MFMF-S training """ assert not args.specified trainer = ForwardTrainer(model) if simulator.parameter_dim( ) is None else ConditionalForwardTrainer( model) if args.scandal is None else SCANDALForwardTrainer(model) common_kwargs, scandal_loss, scandal_label, scandal_weight = make_training_kwargs( args, dataset) logger.info( "Starting training MF, phase 1: pretraining on reconstruction error") learning_curves = trainer.train( loss_functions=[losses.mse], loss_labels=["MSE"], loss_weights=[args.msefactor], epochs=args.epochs // 3, callbacks=[ callbacks.save_model_after_every_epoch( create_filename("checkpoint", "A", args)) ], forward_kwargs={"mode": "projection"}, initial_epoch=args.startepoch, **common_kwargs, ) learning_curves = np.vstack(learning_curves).T logger.info("Starting training MF, phase 2: mixed training") learning_curves_ = trainer.train( loss_functions=[losses.mse, losses.nll] + scandal_loss, loss_labels=["MSE", "NLL"] + scandal_label, loss_weights=[ args.msefactor, args.addnllfactor * nat_to_bit_per_dim(args.modellatentdim) ] + scandal_weight, epochs=args.epochs - 2 * (args.epochs // 3), parameters=list(model.parameters()), callbacks=[ callbacks.save_model_after_every_epoch( create_filename("checkpoint", "B", args)) ], forward_kwargs={"mode": "mf"}, initial_epoch=args.startepoch - (args.epochs // 3), **common_kwargs, ) learning_curves_ = np.vstack(learning_curves_).T learning_curves = learning_curves_ if learning_curves is None else np.vstack( (learning_curves, learning_curves_)) logger.info( "Starting training MF, phase 3: training only inner flow on NLL") learning_curves_ = trainer.train( loss_functions=[losses.mse, losses.nll] + scandal_loss, loss_labels=["MSE", "NLL"] + scandal_label, loss_weights=[ 0.0, args.nllfactor * nat_to_bit_per_dim(args.modellatentdim) ] + scandal_weight, epochs=args.epochs // 3, parameters=list(model.inner_transform.parameters()), callbacks=[ callbacks.save_model_after_every_epoch( create_filename("checkpoint", "C", args)) ], forward_kwargs={"mode": "mf-fixed-manifold"}, initial_epoch=args.startepoch - (args.epochs - (args.epochs // 3)), **common_kwargs, ) learning_curves_ = np.vstack(learning_curves_).T learning_curves = np.vstack( (learning_curves, np.vstack(learning_curves_).T)) return learning_curves
if __name__ == "__main__": # Logger args = parse_args() logging.basicConfig( format="%(asctime)-5.5s %(name)-20.20s %(levelname)-7.7s %(message)s", datefmt="%H:%M", level=logging.DEBUG if args.debug else logging.INFO) logger.info("Hi!") logger.debug("Starting train.py with arguments %s", args) create_modelname(args) if args.resume is not None: resume_filename = create_filename("resume", None, args) args.startepoch = args.resume logger.info( "Resuming training. Loading file %s and continuing with epoch %s.", resume_filename, args.resume + 1) elif args.load is None: logger.info("Training model %s with algorithm %s on data set %s", args.modelname, args.algorithm, args.dataset) else: logger.info( "Loading model %s and training it as %s with algorithm %s on data set %s", args.load, args.modelname, args.algorithm, args.dataset) # Bug fix related to some num_workers > 1 and CUDA. Bad things happen otherwise! torch.multiprocessing.set_start_method("spawn", force=True)
# Generate samples logger.info("Evaluating sample closure") x_gen = evaluate.sample_from_model(margs, model, simulator) distances_gen = simulator.distance_from_manifold(x_gen) mean_gen_distance = np.mean(distances_gen) # Report results logger.info("Results:") logger.info(" reco err: %s", reco_error) logger.info(" gen distance: %s", mean_gen_distance) return margs.metricrecoerrorfactor * reco_error + margs.metricdistancefactor * mean_gen_distance # Load saved study object if args.resumestudy: filename = create_filename("paramscan", None, args) logger.info("Loading parameter scan from %s", filename) with open(filename, "rb") as file: study = pickle.load(file) else: study = optuna.create_study(study_name=args.paramscanstudyname, direction="minimize") # Optimize! try: study.optimize(objective, n_trials=args.trials) except (KeyboardInterrupt, SystemExit): logger.warning("Optimization interrupted!")
def objective(trial): global counter counter += 1 # Hyperparameters margs = pick_parameters(args, trial, counter) logger.info(f"Starting run {counter} / {args.trials}") logger.info(f"Hyperparams:") logger.info(f" outer layers: {margs.outerlayers}") logger.info(f" inner layers: {margs.innerlayers}") logger.info(f" linear transform: {margs.lineartransform}") logger.info(f" spline range: {margs.splinerange}") logger.info(f" spline bins: {margs.splinebins}") logger.info(f" batchnorm: {margs.batchnorm}") logger.info(f" dropout: {margs.dropout}") logger.info(f" batch size: {margs.batchsize}") logger.info(f" MSE factor: {margs.msefactor}") logger.info(f" latent L2 reg: {margs.uvl2reg}") logger.info(f" weight decay: {margs.weightdecay}") logger.info(f" gradient clipping: {margs.clip}") # Bug fix related to some num_workers > 1 and CUDA. Bad things happen otherwise! torch.multiprocessing.set_start_method("spawn", force=True) # Load data simulator = load_simulator(margs) dataset = simulator.load_dataset(train=True, dataset_dir=create_filename( "dataset", None, args), limit_samplesize=margs.samplesize) # Create model model = create_model(margs, simulator) # Train trainer1 = ForwardTrainer(model) if simulator.parameter_dim( ) is None else ConditionalForwardTrainer(model) trainer2 = ForwardTrainer(model) if simulator.parameter_dim( ) is None else ConditionalForwardTrainer(model) common_kwargs, _, _, _ = train.make_training_kwargs(margs, dataset) logger.info("Starting training MF, phase 1: manifold training") np.random.seed(123) _, val_losses = trainer1.train( loss_functions=[losses.mse, losses.hiddenl2reg], loss_labels=["MSE", "L2_lat"], loss_weights=[ margs.msefactor, 0.0 if margs.uvl2reg is None else margs.uvl2reg ], epochs=margs.epochs, parameters=(list(model.outer_transform.parameters()) + list(model.encoder.parameters()) if args.algorithm == "emf" else model.outer_transform.parameters()), forward_kwargs={ "mode": "projection", "return_hidden": True }, **common_kwargs, ) logger.info("Starting training MF, phase 2: density training") np.random.seed(123) _ = trainer2.train( loss_functions=[losses.nll], loss_labels=["NLL"], loss_weights=[args.nllfactor], epochs=args.densityepochs, parameters=model.inner_transform.parameters(), forward_kwargs={"mode": "mf-fixed-manifold"}, **common_kwargs, ) # Save torch.save(model.state_dict(), create_filename("model", None, margs)) # Evaluate reco error logger.info("Evaluating reco error") model.eval() np.random.seed(123) x, params = next( iter( trainer1.make_dataloader( simulator.load_dataset(train=True, dataset_dir=create_filename( "dataset", None, args), limit_samplesize=args.samplesize), args.validationsplit, 1000, 0)[1])) x = x.to(device=trainer1.device, dtype=trainer1.dtype) params = None if simulator.parameter_dim() is None else params.to( device=trainer1.device, dtype=trainer1.dtype) x_reco, _, _ = model(x, context=params, mode="projection") reco_error = torch.mean(torch.sum((x - x_reco)**2, dim=1)**0.5).detach().cpu().numpy() # Generate samples logger.info("Evaluating sample closure") x_gen = evaluate.sample_from_model(margs, model, simulator) distances_gen = simulator.distance_from_manifold(x_gen) mean_gen_distance = np.mean(distances_gen) # Report results logger.info("Results:") logger.info(" reco err: %s", reco_error) logger.info(" gen distance: %s", mean_gen_distance) return margs.metricrecoerrorfactor * reco_error + margs.metricdistancefactor * mean_gen_distance
logger.info("Evaluating simulator truth") else: create_modelname(args) logger.info("Evaluating model %s", args.modelname) # Bug fix related to some num_workers > 1 and CUDA. Bad things happen otherwise! torch.multiprocessing.set_start_method("spawn", force=True) # Data set simulator = load_simulator(args) # Load model if not args.truth: model = create_model(args, simulator=simulator) model.load_state_dict( torch.load(create_filename("model", None, args), map_location=torch.device("cpu"))) model.eval() else: model = None # Evaluate generative performance if args.skipgeneration: logger.info("Skipping generative evaluation as per request.") elif not args.truth: x_gen = sample_from_model(args, model, simulator) evaluate_model_samples(args, simulator, x_gen) if args.skipinference: logger.info( "Skipping all inference tasks as per request. Have a nice day!")