def run_single(device, M, noise, epochs, adaptive, prior_weight, dataset_name, fold, n_folds): # Get data and model test_size = 1 / n_folds if n_folds > 1 else 0 dataset = load_data(dataset_name, seed=fold, device=device, test_size=test_size) dataset.add_noise(noise) collapsed = (dataset.task_type == "regression") model = get_model(dataset, n_inducing=M, device=device, collapsed=collapsed, prior_weight=prior_weight) # Initialize hyperparameters gp, = model.gps gp.kernel.outputscale = 1.0 gp.kernel.base_kernel.lengthscale = 1.0 if not adaptive: # Fit model model.fit(X=dataset.X_train, Y=dataset.Y_train, max_epochs=epochs) else: # Pre-fit model.fit(X=dataset.X_train, Y=dataset.Y_train, max_epochs=epochs // 2) # Prune gp.prior_point_process.rate.fill_(prior_weight) gp.variational_point_process.probabilities = 0.5 model.fit_score_function_estimator(X=dataset.X_train, Y=dataset.Y_train, learning_rate=0.3, max_epochs=300, n_mcmc_samples=64) remove_points(gp) eprint(f"Post pruning: {gp.n_inducing}\n") # Post-fit model.fit(X=dataset.X_train, Y=dataset.Y_train, max_epochs=epochs) # Log metrics scaled_log_lik, KL = get_ELBO(model, dataset, reps=1) ELBO = scaled_log_lik - KL mlflow.log_metrics({ "ELBO": ELBO, "n_inducing": gp.n_inducing, }) if adaptive: vpp = gp.variational_point_process mlflow.log_metrics({ "mean_M": vpp.expected_points.item(), "var_M": vpp.expected_points_variance.item(), })
def run(M, device, prior_weight, n_folds): # ID of currently running experiment exp_id = get_experiment_id("dgp_kin8nm_adaptive_pruning") for fold in range(1, n_folds + 1): eprint(bold(f"Fold {fold}/{n_folds}")) # Set parameters and tags defining this run params = {"M": M, "prior_weight": prior_weight, "fold": fold} if run_exists(params): eprint(green("Already exists\n")) continue with mlflow.start_run(experiment_id=exp_id): mlflow.log_params(params) run_single(device=device, n_folds=n_folds, **params)
def run_single(M1, M2, fold, n_folds, device): test_size = 1 / n_folds # Get dataset and model dataset = load_data("uci_kin8nm", seed=fold, device=device, test_size=test_size) model = get_model(dataset, n_inducing=[M1, M2], n_layers=2, device=device, add_input=True) # Create callback for logging status to tracking server def status_cb(): mlflow.log_metric("current_epoch", model.epoch.item()) model.register_callback(status_cb, update_interval=10) # Fit model eprint(bold("Layerwise fitting")) fit_layerwise(model, dataset, batch_size=4096, max_epochs=500) eprint(bold("\nJoint fitting")) model.fit(X=dataset.X_train, Y=dataset.Y_train, batch_size=4096, max_epochs=3000) # Log metrics eprint(bold("\nEvaluating metrics")) model.eval() log_lik, KL = get_ELBO(model, dataset, batch_size=4096) clock_time, wall_time = get_prediction_times(model, dataset) train_log_lik, test_log_lik = get_loglik(model, dataset, train=True, test=True, batch_size=4096) mlflow.log_metrics({ "log_lik": log_lik, "KL": KL, "ELBO": train_log_lik - KL, "clock_time": clock_time, "wall_time": wall_time, "train_log_lik": train_log_lik, "test_log_lik": test_log_lik, }) eprint()
def run_all(grid_range, index_range, device, n_folds): grid_range = np.arange(*grid_range) K = len(grid_range) # Number of steps along each grid dimension exp_id = get_experiment_id("dgp_kin8nm_gridsearch") if not index_range: index_range = (0, K**2) index_range = range(*index_range) eprint(f"Index range [{min(index_range)}, {max(index_range) + 1}) out of " f"{K**2} in total.") for i, fold in itertools.product(index_range, range(1, n_folds + 1)): # Get number of inducing points per dimension M1, M2 = [grid_range[n] for n in np.unravel_index(i, [K] * 2)] eprint(f"{bold('Grid index')}: {i}\n" f"{bold('M1')}: {M1}\n" f"{bold('M2')}: {M2}\n" f"{bold('Fold')} {fold}/{n_folds}") # Set parameters defining this run params = {"M1": M1, "M2": M2, "fold": fold} if run_exists(params): eprint(green("Already exists\n")) continue with mlflow.start_run(experiment_id=exp_id): mlflow.log_params(params) run_single(device=device, n_folds=n_folds, **params)
def run_all(characteristic, inducing_range, adaptive, device, prior_weight, n_posterior_samples): exp_id = get_experiment_id("controlled_setting_synth_data") characteristic_vector = { "noise": logspace(0.3, 1.0, 5).tolist(), "clustering": logspace(0.03, 0.5, 5).tolist(), "lengthscale": logspace(0.8, 2.5, 5).tolist(), }[characteristic] Ms = torch.arange(*inducing_range).tolist() product = [(n, M) for n in characteristic_vector for M in Ms] for c, M in product: eprint(f"{characteristic}: {c:.3f}\n" f"M: {M}\n") # Set parameters defining this run params = { "M": M, "characteristic": characteristic, "char_value": c, "adaptive": adaptive, "prior_weight": prior_weight } if run_exists(params): eprint(green("Already exists\n")) continue with mlflow.start_run(experiment_id=exp_id): mlflow.log_params(params) run_single(device=device, n_posterior_samples=n_posterior_samples, **params) eprint()
def run_all(dataset_name, epochs, device, n_folds, adaptive, prior_weight, noise, inducing_range): dataset = load_data(dataset_name) eprint(f"{bold('Dataset: ')} {dataset_name}\n" f"{bold('Task type: ')} {dataset.task_type}\n" f"{bold('N: ')} {len(dataset)}\n" f"{bold('D: ')} {dataset.input_dims}\n") Ms = torch.arange(*inducing_range).tolist() # ID of currently running experiment exp_id = get_experiment_id("controlled_setting_real_data") for M, fold in itertools.product(Ms, range(1, n_folds + 1)): eprint(f"{bold('Noise: ')} {noise:.3f}\n" f"{bold('M: ')} {M}\n" f"Fold {fold}/{n_folds}") # Set parameters defining this run params = { "M": M, "noise": noise, "epochs": epochs, "adaptive": adaptive, "prior_weight": prior_weight, "dataset_name": dataset_name, "fold": fold } if run_exists(params): eprint(green("Already exists\n")) continue with mlflow.start_run(experiment_id=exp_id): mlflow.log_params(params) run_single(n_folds=n_folds, device=device, **params) eprint()
def run_single(M, characteristic, char_value, adaptive, prior_weight, device, n_posterior_samples): default_values = {"noise": 0.3, "clustering": None, "lengthscale": 1.0} default_values[characteristic] = char_value dataset = load_synthetic_data(**default_values, seed=0, device=device) model = get_model(dataset, n_inducing=M, n_layers=1, device=device, scale_X=False, scale_Y=False, collapsed=True, prior_weight=prior_weight) # Initialize hyper-parameters gp, = model.gps gp.kernel.outputscale = 1.0 gp.kernel.base_kernel.lengthscale = default_values["lengthscale"] # Initialise inducing points with k-means clustering initialize_inducing(model, dataset.X_train, dataset.Y_train) # Get log marginal of an exact GP exact_log_lik = get_exact_log_lik(dataset.X_train, dataset.Y_train) mlflow.log_metric("exact_log_lik", exact_log_lik) if not adaptive: # Fit model and record log likelihood model.fit(X=dataset.X_train, Y=dataset.Y_train, max_epochs=300) log_lik, KL = get_ELBO(model, dataset, batch_size=None, reps=1) mlflow.log_metrics({ "sparse_log_lik": log_lik, "sparse_KL": KL, "sparse_ELBO": log_lik - KL, "n_u": gp.n_inducing, }) else: # Pre-fit model.fit(X=dataset.X_train, Y=dataset.Y_train, max_epochs=300) # Prune model model.fit_score_function_estimator(X=dataset.X_train, Y=dataset.Y_train, max_epochs=500, n_mcmc_samples=16, learning_rate=0.3) # Record statistics vpp = gp.variational_point_process with torch.no_grad(): p = vpp.probabilities expected_points = p.sum().item() stddev_points = (p * (1 - p)).sum().sqrt().item() mlflow.log_metrics({ "expected_points": expected_points, "stddev_points": stddev_points, }) # Draw sets from point process and record log likelihood state_dict = deepcopy(gp.state_dict()) for i in range(n_posterior_samples): eprint(f"\nSample {i + 1:02d}/{n_posterior_samples}") if i > 0: for n, t in gp.named_parameters(): t.data = state_dict[n] t.grad = None for n, t in gp.named_buffers(): t.data = state_dict[n] remove_points(gp) # Post-fit model.fit(X=dataset.X_train, Y=dataset.Y_train, max_epochs=200) log_lik, KL = get_ELBO(model, dataset, batch_size=None, reps=1) prefix = f"draw_{i:02d}__" mlflow.log_metrics({ prefix + "sparse_log_lik": log_lik, prefix + "sparse_KL": KL, prefix + "sparse_ELBO": log_lik - KL, prefix + "n_u": gp.n_inducing })
def run_single(prior_weight, device, M, fold, n_folds): # Get dataset and model test_size = 1 / n_folds dataset = load_data("uci_kin8nm", seed=fold, device=device, test_size=test_size) model = get_model(dataset, n_inducing=M, n_layers=2, device=device, add_input=True) # Create callback for logging status to tracking server def status_cb(): mlflow.set_tag("current_epoch", model.epoch.item()) model.register_callback(status_cb, update_interval=10) # Pre-fit model, first one layer at a time, all layers the jointly eprint(bold("\nLayerwise pre-fit")) fit_layerwise(model, dataset, batch_size=4096, max_epochs=300) eprint(bold("\nJoint pre-fit")) model.fit(X=dataset.X_train, Y=dataset.Y_train, batch_size=4096, max_epochs=500) # Infer probabilities of inclusion for all pseudo-points and sample # from resulting distribution to prune model eprint(bold("\nPruning")) for gp in model.gps: gp.variational_point_process.probabilities = 0.8 model.fit_score_function_estimator(X=dataset.X_train, Y=dataset.Y_train, learning_rate=0.3, max_epochs=10, n_mcmc_samples=32) for gp in model.gps: remove_points(gp) # Post-fit model, all layers jointly eprint(bold("\nJoint post-fit")) model.fit(X=dataset.X_train, Y=dataset.Y_train, batch_size=4096, max_epochs=500) # Log metrics eprint(bold("\nEvaluating metrics")) model.eval() log_lik, KL = get_ELBO(model, dataset, batch_size=4096) clock_time, wall_time = get_prediction_times(model, dataset) train_log_lik, test_log_lik = get_loglik(model, dataset, train=True, test=True, batch_size=4096) mlflow.log_metrics({ "log_lik": log_lik, "KL": KL, "ELBO": train_log_lik - KL, "clock_time": clock_time, "wall_time": wall_time, "train_log_lik": train_log_lik, "test_log_lik": test_log_lik, }) for layer, gp in enumerate(model.gps, 1): mlflow.log_param(f"M{layer}", gp.n_inducing) eprint()