def run_dp_gp_lvm(y_train,
                  y_test_observed,
                  y_test_unobserved,
                  num_latent_dimensions,
                  num_inducing_points,
                  truncation_level,
                  dp_mask_size,
                  train_iter,
                  predict_iter,
                  learning_rate,
                  save_file,
                  seed_val=1):
    """
    TODO
    :param y_train:
    :param y_test_observed:
    :param y_test_unobserved:
    :param num_latent_dimensions:
    :param num_inducing_points:
    :param truncation_level:
    :param dp_mask_size:
    :param train_iter:
    :param predict_iter:
    :param learning_rate:
    :param save_file:
    :param seed_val:
    :return:
    """

    # Set seed.
    np.random.seed(seed=seed_val)

    # Define instance of DP-GP-LVM .
    gpdp = dp_gp_lvm(y_train=y_train,
                     num_latent_dims=num_latent_dimensions,
                     num_inducing_points=num_inducing_points,
                     truncation_level=truncation_level,
                     mask_size=dp_mask_size)

    num_unobserved_dimensions = np.shape(y_test_unobserved)[1]

    # Define objectives.
    training_objective = gpdp.objective
    predict_lower_bound, x_mean_test, x_covar_test, \
        predicted_mean, predicted_covar = gpdp.predict_missing_data(y_test=y_test_observed)
    predict_objective = tf.negative(predict_lower_bound)

    # Optimisation.
    training_var_list = get_training_variables()
    predict_var_list = get_prediction_variables()

    opt_train = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(
        loss=training_objective, var_list=training_var_list)
    opt_predict = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(
        loss=predict_objective, var_list=predict_var_list)

    with tf.Session() as s:

        # Initialise variables.
        s.run(tf.variables_initializer(var_list=training_var_list)
              )  # Initialise training variables first.
        s.run(tf.variables_initializer(var_list=predict_var_list)
              )  # Then initialise prediction variables.
        s.run(
            tf.global_variables_initializer()
        )  # Finally initialise any remaining global variables such as opt ones.

        # Training optimisation loop.
        start_time = time()
        print('\nTraining DP-GP-LVM..')
        for c in range(train_iter):
            s.run(opt_train)
            if (c % 100) == 0:
                print('  DP-GP-LVM opt iter {:5}: {}'.format(
                    c, s.run(training_objective)))
        end_time = time()
        train_opt_time = end_time - start_time
        print('Final iter {:5}:'.format(c))
        print('  DP-GP-LVM: {}'.format(s.run(training_objective)))
        print('Time to optimise: {} s'.format(train_opt_time))

        # Get converged values as numpy arrays.
        ard_weights, noise_precision, signal_variance, inducing_input, assignments = \
            s.run((gpdp.ard_weights, gpdp.noise_precision, gpdp.signal_variance, gpdp.inducing_input, gpdp.assignments))
        x_mean, x_covar = s.run(gpdp.q_x)
        gamma_atoms, alpha_atoms, beta_atoms = s.run(gpdp.dp_atoms)

        # Initialise prediction variables.
        s.run(tf.variables_initializer(var_list=predict_var_list))

        # Prediction optimisation loop.
        start_time = time()
        print('\nOptimising Predictions..')
        for c in range(predict_iter):
            s.run(opt_predict)
            if (c % 100) == 0:
                print('  DP-GP-LVM opt iter {:5}: {}'.format(
                    c, s.run(predict_objective)))
        end_time = time()
        predict_opt_time = end_time - start_time
        print('Final iter {:5}:'.format(c))
        print('  DP-GP-LVM: {}'.format(s.run(predict_objective)))
        print('Time to optimise: {} s'.format(predict_opt_time))

        # Get converged values as numpy arrays.
        x_mean_test_np, x_covar_test_np, predicted_mean_np, predicted_covar_np = s.run(
            (x_mean_test, x_covar_test, predicted_mean, predicted_covar))

        # Calculate log-likelihood of ground truth with predicted posteriors.
        gt_log_likelihoods = [
            mvn_log_pdf(x=tf.transpose(
                tf.slice(y_test_unobserved, begin=[0, du], size=[-1, 1])),
                        mean=tf.transpose(
                            tf.slice(predicted_mean,
                                     begin=[0, du],
                                     size=[-1, 1])),
                        covariance=tf.squeeze(tf.slice(predicted_covar,
                                                       begin=[du, 0, 0],
                                                       size=[1, -1, -1]),
                                              axis=0))
            for du in range(num_unobserved_dimensions)
        ]
        gt_log_likelihoods_np = np.array(s.run(gt_log_likelihoods))
        gt_log_likelihood = np.sum(gt_log_likelihoods_np)

    # Save results.
    np.savez(save_file,
             y_train=y_train,
             y_test_observed=y_test_observed,
             y_test_unobserved=y_test_unobserved,
             ard_weights=ard_weights,
             noise_precision=noise_precision,
             signal_variance=signal_variance,
             x_u=inducing_input,
             x_mean=x_mean,
             x_covar=x_covar,
             gamma_atoms=gamma_atoms,
             alpha_atoms=alpha_atoms,
             beta_atoms=beta_atoms,
             train_opt_time=train_opt_time,
             x_mean_test=x_mean_test_np,
             x_covar_test=x_covar_test_np,
             predicted_mean=predicted_mean_np,
             predicted_covar=predicted_covar_np,
             predict_opt_time=predict_opt_time,
             gt_log_likelihoods=gt_log_likelihoods_np,
             gt_log_likelihood=gt_log_likelihood)

    # Print results.
    print('\nDP-GP-LVM:')
    print('  Ground Truth Predicted Posterior Log-Likelihood: {}'.format(
        gt_log_likelihood))
    print('  Noise Precisions: {}'.format(np.squeeze(noise_precision)))
예제 #2
0
    mrd_fully_independent_results_file = RESULTS_FILE_NAME.format(
        model='mrd_fully_independent', dataset=dataset_str)
    gpdp_results_file = RESULTS_FILE_NAME.format(
        model='dp_gp_lvm', dataset=dataset_str)  # Keep 3d points together.
    gpdp_mask_results_file = RESULTS_FILE_NAME.format(
        model='dp_gp_lvm_mask_93', dataset=dataset_str)

    # Define instance of necessary model.
    if not isfile(gpdp_results_file):
        # Reset default graph before building new model graph. This speeds up script.
        tf.reset_default_graph()
        np.random.seed(1)  # Random seed.
        # Define instance of DP-GP-LVM.
        model = dp_gp_lvm(y_train=y_train,
                          num_inducing_points=num_inducing_points,
                          num_latent_dims=num_latent_dimensions,
                          truncation_level=truncation_level,
                          mask_size=3)

        model_training_objective = model.objective
        # Optimisation.
        model_opt_train = tf.train.AdamOptimizer(
            learning_rate=learning_rate).minimize(
                loss=model_training_objective)

        with tf.Session() as s:
            # Initialise variables.
            s.run(tf.global_variables_initializer())

            # Training optimisation loop.
            start_time = time()
    scaler = StandardScaler()
    normalized_hinselmann_data = scaler.fit_transform(hinselmann_data)
    normalized_green_data = scaler.fit_transform(green_data)
    normalized_schiller_data = scaler.fit_transform(schiller_data)

    # TEMP: Test with Bayesian GP-LVM and DP-GP-LVM.
    # Set seed.
    np.random.seed(seed=1)
    # # Train Bayesian GP-LVM.
    # model = bayesian_gp_lvm(y_train=normalized_hinselmann_data,
    #                         num_latent_dims=num_latent_dimensions,
    #                         num_inducing_points=num_inducing_points)
    # Define instance of DP-GP-LVM .
    model = dp_gp_lvm(y_train=normalized_hinselmann_data,
                      num_latent_dims=num_latent_dimensions,
                      num_inducing_points=num_inducing_points,
                      truncation_level=truncation_level,
                      mask_size=1)

    # Define objectives.
    training_objective = model.objective

    # Optimisation.
    training_var_list = get_training_variables()

    opt_train = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(
        loss=training_objective, var_list=training_var_list)

    with tf.Session() as s:

        # Initialise variables.
        num_latent_dimensions))

    # Define file path for results.
    dataset_str = 'cmu_subject7_joint_angles'
    dp_gp_lvm_results_file = RESULTS_FILE_NAME.format(model='dp_gp_lvm',
                                                      dataset=dataset_str)

    # Define instance of necessary model.
    if not isfile(dp_gp_lvm_results_file):
        # Reset default graph before building new model graph. This speeds up script.
        tf.reset_default_graph()
        np.random.seed(1)  # Random seed.
        # Define instance of DP-GP-LVM.
        model = dp_gp_lvm(
            y_train=y_train,
            num_inducing_points=num_inducing_points,
            num_latent_dims=num_latent_dimensions,
            truncation_level=truncation_level,
            mask_size=1)  # Treat each observed dimension as independent.

        model_training_objective = model.objective
        # Optimisation.
        model_opt_train = tf.train.AdamOptimizer(
            learning_rate=learning_rate).minimize(
                loss=model_training_objective)

        with tf.Session() as s:
            # Initialise variables.
            s.run(tf.global_variables_initializer())

            # Training optimisation loop.
            start_time = time()