예제 #1
0
    # and obtain all possible pairwise preferences between these points.   
    for it in range(num_iterations):
       
        # Print status:
        print('Run %i of %i, iteration %i of %i' % (i + 1, len(run_nums), 
            it + 1, num_iterations))
        
        # Preference data observed so far (used to train GP preference model):
        X = data_pt_idxs[: pref_count, :]
        y = labels[: pref_count, 1]
        
        # Update the Gaussian process preference model:
        posterior_model = feedback(X, y, GP_prior_cov_inv, preference_noise)    
        
        # Sample new points at which to query for a preference:
        sampled_point_idxs, _ = advance(posterior_model, num_samples)

        # Obtain coordinate points corresponding to these indices, and 
        # store the objective function values:
        sampled_points = np.empty((num_samples, state_dim))
        
        for j in range(num_samples):
           
            sampled_point_idx = sampled_point_idxs[j]
            # Coordinate point representation:
            sampled_points[j, :] = points_to_sample[sampled_point_idx, :]
            
            sample_idx = it * num_samples + j
            # Objective function value:
            objective_values[sample_idx] = \
                get_objective_value(sampled_point_idx, objective_function) 
예제 #2
0
    # Construct posterior covariance matrix:
    prior_cov = cov_evecs @ np.diag(cov_evals) @ np.linalg.inv(cov_evecs)

    # Posterior standard deviation at each point:
    GP_stdev = np.sqrt(np.diag(prior_cov))

    # Check whether we have already drawn some posterior samples for this
    # number of preferences.
    post_sample_filename = 'Plotting_data/Samples_from_reward_model_' + str(pref_num) + \
                           '_pref.mat'

    if not os.path.isfile(post_sample_filename):

        # Draw a number of samples from the posterior model:
        trial_samples, reward_samples = advance(GP_model, 50)

        # Save samples and reward models:
        io.savemat(post_sample_filename, {
            'samples': trial_samples,
            'reward_models': reward_samples
        })

    # Load and unpack saved reward information:
    data = io.loadmat(post_sample_filename)

    trial_samples = data['samples'].flatten()
    reward_samples = data['reward_models']

    # Actions sampled in this simulation iteration:
    samples = data_pt_idxs[pref_num, :]
예제 #3
0
    # and obtain all possible pairwise preferences between these points.
    for it in range(num_iterations):

        # Print status:
        print('Run %i of %i, iteration %i of %i' %
              (i + 1, len(run_nums), it + 1, num_iterations))

        # Preference data observed so far (used to train GP preference model):
        X = data_pt_idxs[:pref_count, :]
        y = labels[:pref_count, 1]

        # Update the Gaussian process preference model:
        posterior_model = feedback(X, y, GP_prior_cov_inv, preference_noise)

        # Sample new points at which to query for a preference:
        sampled_point_idxs, reward_models = advance(posterior_model,
                                                    num_samples)

        if ((it + 1) % 5 == 0):
            plot_progress(save_folder, points_to_sample, points_per_dimension,
                          state_dim, posterior_model, it + 1, reward_models,
                          0)  #dimension = 0 for visualization

        # Obtain coordinate points corresponding to these indices, and
        # store the objective function values:
        sampled_points = np.empty((num_samples, state_dim))
        temp_obj_values = []
        for j in range(num_samples):

            sampled_point_idx = sampled_point_idxs[j]
            # Coordinate point representation:
            sampled_points[j, :] = points_to_sample[sampled_point_idx]