def run_single(device, M, noise, epochs, adaptive, prior_weight, dataset_name,
               fold, n_folds):
    # Get data and model
    test_size = 1 / n_folds if n_folds > 1 else 0
    dataset = load_data(dataset_name,
                        seed=fold,
                        device=device,
                        test_size=test_size)
    dataset.add_noise(noise)

    collapsed = (dataset.task_type == "regression")

    model = get_model(dataset,
                      n_inducing=M,
                      device=device,
                      collapsed=collapsed,
                      prior_weight=prior_weight)

    # Initialize hyperparameters
    gp, = model.gps
    gp.kernel.outputscale = 1.0
    gp.kernel.base_kernel.lengthscale = 1.0

    if not adaptive:
        # Fit model
        model.fit(X=dataset.X_train, Y=dataset.Y_train, max_epochs=epochs)
    else:
        # Pre-fit
        model.fit(X=dataset.X_train, Y=dataset.Y_train, max_epochs=epochs // 2)

        # Prune
        gp.prior_point_process.rate.fill_(prior_weight)
        gp.variational_point_process.probabilities = 0.5
        model.fit_score_function_estimator(X=dataset.X_train,
                                           Y=dataset.Y_train,
                                           learning_rate=0.3,
                                           max_epochs=300,
                                           n_mcmc_samples=64)
        remove_points(gp)
        eprint(f"Post pruning: {gp.n_inducing}\n")

        # Post-fit
        model.fit(X=dataset.X_train, Y=dataset.Y_train, max_epochs=epochs)

    # Log metrics
    scaled_log_lik, KL = get_ELBO(model, dataset, reps=1)
    ELBO = scaled_log_lik - KL
    mlflow.log_metrics({
        "ELBO": ELBO,
        "n_inducing": gp.n_inducing,
    })

    if adaptive:
        vpp = gp.variational_point_process
        mlflow.log_metrics({
            "mean_M": vpp.expected_points.item(),
            "var_M": vpp.expected_points_variance.item(),
        })
Ejemplo n.º 2
0
def run(M, device, prior_weight, n_folds):
    # ID of currently running experiment
    exp_id = get_experiment_id("dgp_kin8nm_adaptive_pruning")

    for fold in range(1, n_folds + 1):
        eprint(bold(f"Fold {fold}/{n_folds}"))

        # Set parameters and tags defining this run
        params = {"M": M, "prior_weight": prior_weight, "fold": fold}

        if run_exists(params):
            eprint(green("Already exists\n"))
            continue

        with mlflow.start_run(experiment_id=exp_id):
            mlflow.log_params(params)
            run_single(device=device, n_folds=n_folds, **params)
Ejemplo n.º 3
0
def run_single(M1, M2, fold, n_folds, device):
    test_size = 1 / n_folds

    # Get dataset and model
    dataset = load_data("uci_kin8nm",
                        seed=fold,
                        device=device,
                        test_size=test_size)
    model = get_model(dataset,
                      n_inducing=[M1, M2],
                      n_layers=2,
                      device=device,
                      add_input=True)

    # Create callback for logging status to tracking server
    def status_cb():
        mlflow.log_metric("current_epoch", model.epoch.item())

    model.register_callback(status_cb, update_interval=10)

    # Fit model
    eprint(bold("Layerwise fitting"))
    fit_layerwise(model, dataset, batch_size=4096, max_epochs=500)

    eprint(bold("\nJoint fitting"))
    model.fit(X=dataset.X_train,
              Y=dataset.Y_train,
              batch_size=4096,
              max_epochs=3000)

    # Log metrics
    eprint(bold("\nEvaluating metrics"))
    model.eval()
    log_lik, KL = get_ELBO(model, dataset, batch_size=4096)
    clock_time, wall_time = get_prediction_times(model, dataset)
    train_log_lik, test_log_lik = get_loglik(model,
                                             dataset,
                                             train=True,
                                             test=True,
                                             batch_size=4096)

    mlflow.log_metrics({
        "log_lik": log_lik,
        "KL": KL,
        "ELBO": train_log_lik - KL,
        "clock_time": clock_time,
        "wall_time": wall_time,
        "train_log_lik": train_log_lik,
        "test_log_lik": test_log_lik,
    })

    eprint()
Ejemplo n.º 4
0
def run_all(grid_range, index_range, device, n_folds):
    grid_range = np.arange(*grid_range)
    K = len(grid_range)  # Number of steps along each grid dimension

    exp_id = get_experiment_id("dgp_kin8nm_gridsearch")

    if not index_range:
        index_range = (0, K**2)
    index_range = range(*index_range)

    eprint(f"Index range [{min(index_range)}, {max(index_range) + 1}) out of "
           f"{K**2} in total.")

    for i, fold in itertools.product(index_range, range(1, n_folds + 1)):

        # Get number of inducing points per dimension
        M1, M2 = [grid_range[n] for n in np.unravel_index(i, [K] * 2)]

        eprint(f"{bold('Grid index')}: {i}\n"
               f"{bold('M1')}:         {M1}\n"
               f"{bold('M2')}:         {M2}\n"
               f"{bold('Fold')}        {fold}/{n_folds}")

        # Set parameters defining this run
        params = {"M1": M1, "M2": M2, "fold": fold}

        if run_exists(params):
            eprint(green("Already exists\n"))
            continue

        with mlflow.start_run(experiment_id=exp_id):
            mlflow.log_params(params)
            run_single(device=device, n_folds=n_folds, **params)
Ejemplo n.º 5
0
def run_all(characteristic, inducing_range, adaptive, device, prior_weight,
            n_posterior_samples):
    exp_id = get_experiment_id("controlled_setting_synth_data")

    characteristic_vector = {
        "noise": logspace(0.3, 1.0, 5).tolist(),
        "clustering": logspace(0.03, 0.5, 5).tolist(),
        "lengthscale": logspace(0.8, 2.5, 5).tolist(),
    }[characteristic]

    Ms = torch.arange(*inducing_range).tolist()
    product = [(n, M) for n in characteristic_vector for M in Ms]
    for c, M in product:
        eprint(f"{characteristic}: {c:.3f}\n" f"M: {M}\n")

        # Set parameters defining this run
        params = {
            "M": M,
            "characteristic": characteristic,
            "char_value": c,
            "adaptive": adaptive,
            "prior_weight": prior_weight
        }

        if run_exists(params):
            eprint(green("Already exists\n"))
            continue

        with mlflow.start_run(experiment_id=exp_id):
            mlflow.log_params(params)
            run_single(device=device,
                       n_posterior_samples=n_posterior_samples,
                       **params)

        eprint()
def run_all(dataset_name, epochs, device, n_folds, adaptive, prior_weight,
            noise, inducing_range):

    dataset = load_data(dataset_name)
    eprint(f"{bold('Dataset:   ')} {dataset_name}\n"
           f"{bold('Task type: ')} {dataset.task_type}\n"
           f"{bold('N:         ')} {len(dataset)}\n"
           f"{bold('D:         ')} {dataset.input_dims}\n")

    Ms = torch.arange(*inducing_range).tolist()

    # ID of currently running experiment
    exp_id = get_experiment_id("controlled_setting_real_data")

    for M, fold in itertools.product(Ms, range(1, n_folds + 1)):
        eprint(f"{bold('Noise: ')} {noise:.3f}\n"
               f"{bold('M:     ')} {M}\n"
               f"Fold {fold}/{n_folds}")

        # Set parameters defining this run
        params = {
            "M": M,
            "noise": noise,
            "epochs": epochs,
            "adaptive": adaptive,
            "prior_weight": prior_weight,
            "dataset_name": dataset_name,
            "fold": fold
        }

        if run_exists(params):
            eprint(green("Already exists\n"))
            continue

        with mlflow.start_run(experiment_id=exp_id):
            mlflow.log_params(params)
            run_single(n_folds=n_folds, device=device, **params)

        eprint()
Ejemplo n.º 7
0
def run_single(M, characteristic, char_value, adaptive, prior_weight, device,
               n_posterior_samples):

    default_values = {"noise": 0.3, "clustering": None, "lengthscale": 1.0}
    default_values[characteristic] = char_value

    dataset = load_synthetic_data(**default_values, seed=0, device=device)
    model = get_model(dataset,
                      n_inducing=M,
                      n_layers=1,
                      device=device,
                      scale_X=False,
                      scale_Y=False,
                      collapsed=True,
                      prior_weight=prior_weight)

    # Initialize hyper-parameters
    gp, = model.gps
    gp.kernel.outputscale = 1.0
    gp.kernel.base_kernel.lengthscale = default_values["lengthscale"]

    # Initialise inducing points with k-means clustering
    initialize_inducing(model, dataset.X_train, dataset.Y_train)

    # Get log marginal of an exact GP
    exact_log_lik = get_exact_log_lik(dataset.X_train, dataset.Y_train)
    mlflow.log_metric("exact_log_lik", exact_log_lik)

    if not adaptive:
        # Fit model and record log likelihood
        model.fit(X=dataset.X_train, Y=dataset.Y_train, max_epochs=300)

        log_lik, KL = get_ELBO(model, dataset, batch_size=None, reps=1)
        mlflow.log_metrics({
            "sparse_log_lik": log_lik,
            "sparse_KL": KL,
            "sparse_ELBO": log_lik - KL,
            "n_u": gp.n_inducing,
        })
    else:
        # Pre-fit
        model.fit(X=dataset.X_train, Y=dataset.Y_train, max_epochs=300)

        # Prune model
        model.fit_score_function_estimator(X=dataset.X_train,
                                           Y=dataset.Y_train,
                                           max_epochs=500,
                                           n_mcmc_samples=16,
                                           learning_rate=0.3)

        # Record statistics
        vpp = gp.variational_point_process
        with torch.no_grad():
            p = vpp.probabilities
        expected_points = p.sum().item()
        stddev_points = (p * (1 - p)).sum().sqrt().item()

        mlflow.log_metrics({
            "expected_points": expected_points,
            "stddev_points": stddev_points,
        })

        # Draw sets from point process and record log likelihood
        state_dict = deepcopy(gp.state_dict())
        for i in range(n_posterior_samples):
            eprint(f"\nSample {i + 1:02d}/{n_posterior_samples}")

            if i > 0:
                for n, t in gp.named_parameters():
                    t.data = state_dict[n]
                    t.grad = None
                for n, t in gp.named_buffers():
                    t.data = state_dict[n]

            remove_points(gp)

            # Post-fit
            model.fit(X=dataset.X_train, Y=dataset.Y_train, max_epochs=200)

            log_lik, KL = get_ELBO(model, dataset, batch_size=None, reps=1)

            prefix = f"draw_{i:02d}__"
            mlflow.log_metrics({
                prefix + "sparse_log_lik": log_lik,
                prefix + "sparse_KL": KL,
                prefix + "sparse_ELBO": log_lik - KL,
                prefix + "n_u": gp.n_inducing
            })
Ejemplo n.º 8
0
def run_single(prior_weight, device, M, fold, n_folds):
    # Get dataset and model
    test_size = 1 / n_folds
    dataset = load_data("uci_kin8nm",
                        seed=fold,
                        device=device,
                        test_size=test_size)
    model = get_model(dataset,
                      n_inducing=M,
                      n_layers=2,
                      device=device,
                      add_input=True)

    # Create callback for logging status to tracking server
    def status_cb():
        mlflow.set_tag("current_epoch", model.epoch.item())

    model.register_callback(status_cb, update_interval=10)

    # Pre-fit model, first one layer at a time, all layers the jointly
    eprint(bold("\nLayerwise pre-fit"))
    fit_layerwise(model, dataset, batch_size=4096, max_epochs=300)

    eprint(bold("\nJoint pre-fit"))
    model.fit(X=dataset.X_train,
              Y=dataset.Y_train,
              batch_size=4096,
              max_epochs=500)

    # Infer probabilities of inclusion for all pseudo-points and sample
    # from resulting distribution to prune model
    eprint(bold("\nPruning"))
    for gp in model.gps:
        gp.variational_point_process.probabilities = 0.8

    model.fit_score_function_estimator(X=dataset.X_train,
                                       Y=dataset.Y_train,
                                       learning_rate=0.3,
                                       max_epochs=10,
                                       n_mcmc_samples=32)

    for gp in model.gps:
        remove_points(gp)

    # Post-fit model, all layers jointly
    eprint(bold("\nJoint post-fit"))
    model.fit(X=dataset.X_train,
              Y=dataset.Y_train,
              batch_size=4096,
              max_epochs=500)

    # Log metrics
    eprint(bold("\nEvaluating metrics"))
    model.eval()
    log_lik, KL = get_ELBO(model, dataset, batch_size=4096)
    clock_time, wall_time = get_prediction_times(model, dataset)
    train_log_lik, test_log_lik = get_loglik(model,
                                             dataset,
                                             train=True,
                                             test=True,
                                             batch_size=4096)

    mlflow.log_metrics({
        "log_lik": log_lik,
        "KL": KL,
        "ELBO": train_log_lik - KL,
        "clock_time": clock_time,
        "wall_time": wall_time,
        "train_log_lik": train_log_lik,
        "test_log_lik": test_log_lik,
    })

    for layer, gp in enumerate(model.gps, 1):
        mlflow.log_param(f"M{layer}", gp.n_inducing)

    eprint()