예제 #1
0
 def __init__(self,
              num_gp=3,
              gps=[GP() for i in range(3)],
              gating_gps=[GP() for i in range(3)],
              epsilon=0.05,
              max_iter=100):
     self.num_gp = num_gp
     self.gps = gps
     self.gating_gps = gating_gps
     self.X_train = None
     self.Y_train = None
     self.P = None
     self.epsilon = epsilon
     self.max_iter = max_iter
예제 #2
0
def try_bec():

    # get train and test data
    in_provider = InputProvider()
    harmonic_sims, tr, te, va = in_provider.get_bec_data()
    data = bec.get_within_range(tr, g_low=30, g_high=50, n=100)
    # data_test = te.sample(100)

    # get gp model and fit
    gp = GP()
    gp.fit(torch.FloatTensor(data[['g', 'x']].to_numpy()),
           torch.FloatTensor(data.psi.to_numpy()), True)

    # predict around one some fixed Dim
    df = bec.get_closest_sim(harmonic_sims, g=30.)
    test_gx = np.stack([30. * np.ones(df.x.shape[0]), df.x]).transpose()

    y_pred, sigma = gp.predict(torch.FloatTensor(test_gx))
    print(y_pred, sigma)

    # plot subplots with multiple fixed dimensions

    # specify input dimensions,
    # the first entry is fixed for each iteration
    # the second entry is plotted against the fixed dimension
    input_dimensions = ['g', 'x']
    out_dimensions = 'psi'
    sub_plot_multiple_gp(gp, harmonic_sims, [5, 30, 60, 90], input_dimensions,
                         out_dimensions)
예제 #3
0
    def __init__(self,
                 data_generator,
                 init_sample_size,
                 max_steps,
                 sigma_obs=None,
                 is_mcmc=False,
                 mcmc_opts=None):
        # Initializing Bayesian optimization objects:
        # I need to have an object that generates data and specifies domain of optimization
        # max_steps refer to the maximum number of sampled points
        self.max_steps = max_steps
        self.data_generator = data_generator

        # Initializing seen observations and adding a couple of variables for later bookkeeping
        self.domain = self.data_generator.domain
        pick_x = np.random.choice(range(len(self.domain)),
                                  size=init_sample_size,
                                  replace=False)
        self.x = self.domain[pick_x]
        self.y = self.data_generator.sample(self.x)
        self.best_y = np.max(self.y)
        self.mu_posterior = None
        self.std_posterior = None

        # Initializing underlying GP
        self.gp = GP(self.x, self.y)
        self.sigma_obs = sigma_obs

        # Initializing MCMC properties (mcmc_properties is supposed to be an instance of MCMCProperties class)
        self.is_mcmc = is_mcmc
        self.mcmc_opts = mcmc_opts
예제 #4
0
def try_1D():

    ip = InputProvider()
    x_data, y_data, x_test = ip.get_1d_regression_data()

    gp = GP()
    gp.fit(x_data, y_data, True)
    gp.plot(x_test)
예제 #5
0
def sample_function(kernel, nsamples, n_fsamples, initialize=True):
    
    train_x = torch.linspace(0, 1, nsamples)
    train_y = torch.linspace(0, 1, nsamples)
    gp = GP(train_x, train_y, kernel, initialize)
    fsamples = gp.sample_f(n_fsamples)
    
    
    return gp, fsamples
예제 #6
0
 def objective(x):
     noise = x[0]
     hps = {}
     for idx in xrange(len(hp_specs)):
         hps[hp_specs[idx].name] = x[idx + 1]
     kernel = kernel_creator(hps=hps, **kwargs)
     gp = GP(domain, kernel, noise)
     gp.add_observations(x_data, y_data)
     return_val = -1 * gp.get_log_marginal_likelihood()
     return return_val
    def optimise(reuse=True):
        '''Run an optimisation loop to find better
        model hyperparams.
        Inputs:
            reuse | bool, True if you want to carry on from where you left off
        Outputs:
            best_hyperarams | list
        '''

        if reuse:
            x = self.x_data
            y = self.score_data
        else:
            x = []
            y = []

        for loop in range(self.loops):
            for iter in range(self.iter_per_loop):

                if x != []:
                    gp = GP(x, y, 'matern')
                    new_x = self._choose_next(
                        gp)  #choose hyperparams to try next
                    new_x_dict = self._array_to_hyperparams(new_x)
                else:
                    #randomly choose
                    pass

                #Train and predict
                try:
                    self.model.train(self.X_train, self.y_train, **new_x_dict)
                    output_pred = self.model.predict(self.X_val)

                    #make sure it's an Nx1 array
                    if len(output_pred.shape) == 1:
                        output_pred = np.reshape(output_pred,
                                                 (output_pred.shape[0], 1))

                except Exception as e:
                    raise e("Model did not have method `train` " \
                                        "or `predict`")

                new_score = self._score(output_pred)

                if new_score < self.best_score:
                    self.best_score = new_score
                    self.best_hyperparams = new_x_dict
                    print("Best score is {}".format(new_score))
                    print("\n")

                x.append(new_x)
                y.append(new_score)

        return self.best_hyperparams
예제 #8
0
    def init(self, params_tl, params_l):
        """Initialize the GPs.

        Parameters
        ----------
        params_tl : np.ndarray
            initial parameters for GP over :math:`\log\ell`
        params_l : np.ndarray
            initial parameters for GP over :math:`\exp(\log\ell)`

        """

        kernel = self.options['kernel']

        # create the gaussian process over log(l)
        self.gp_log_l = GP(kernel(*params_tl[:-1]),
                           self.x_s,
                           self.tl_s,
                           s=params_tl[-1])

        # TODO: improve matrix conditioning for log(l)
        self.gp_log_l.jitter = np.zeros(self.ns, dtype=DTYPE)

        # pick candidate points
        self._choose_candidates()

        # create the gaussian process over exp(log(l))
        self.gp_l = GP(kernel(*params_l[:-1]),
                       self.x_sc,
                       self.l_sc,
                       s=params_l[-1])

        # TODO: improve matrix conditioning for exp(log(l))
        self.gp_l.jitter = np.zeros(self.nsc, dtype=DTYPE)

        # make the vector of locations for approximations
        self._approx_x = self._make_approx_x()
        self._approx_px = self._make_approx_px()

        self.initialized = True
 def __init__(self):
     rospy.init_node('sampling_modeling_node')
     num_gp = rospy.get_param("~num_gp", 3)
     self.optimize_kernel = rospy.get_param("~online_kernel_optimization",
                                            True)
     modeling_gps = []
     gating_gps = []
     for i in range(num_gp):
         modeling_gp_param = rospy.get_param(
             "~modeling_gp_" + str(i) + "_kernel", [0.5, 0.5, 0.1])
         gating_gp_param = rospy.get_param(
             "~gating_gp_" + str(i) + "_kernel", [0.5, 0.5, 0.1])
         assert len(modeling_gp_param) == 3
         assert len(gating_gp_param) == 3
         modeling_gps.append(
             GP(modeling_gp_param[0], modeling_gp_param[1],
                modeling_gp_param[2]))
         gating_gps.append(
             GP(gating_gp_param[0], gating_gp_param[1], gating_gp_param[2]))
     EM_epsilon = rospy.get_param("~EM_epsilon", 0.03)
     EM_max_iteration = rospy.get_param("~EM_max_iteration", 100)
     self.model = MixtureGaussianProcess(num_gp=num_gp,
                                         gps=modeling_gps,
                                         gating_gps=gating_gps,
                                         epsilon=EM_epsilon,
                                         max_iter=EM_max_iteration)
     self.X_test = None
     self.add_test_position_server = rospy.Service(
         KModelingNameSpace + 'add_test_position', AddTestPositionToModel,
         self.AddTestPosition)
     self.add_sample_server = rospy.Service(
         KModelingNameSpace + 'add_samples_to_model', AddSampleToModel,
         self.AddSampleToModel)
     self.update_model_server = rospy.Service(
         KModelingNameSpace + 'update_model', Trigger, self.UpdateModel)
     self.model_predict_server = rospy.Service(
         KModelingNameSpace + 'model_predict', ModelPredict,
         self.ModelPredict)
     self.sample_count = 0
     rospy.spin()
예제 #10
0
def create_gp(domain, kernel_name, noise=None, hps=None, **kwargs):
    """Create GP with the specified kernel.
    Args:
        domain: List of lists [[dim1_low, dim1_high], ...]
        kernel_name: Name of kernel to use.
        noise: The amount of noise in the system. If None tune or make default.
        pre_tune_pts: The amount of points to use to learn the GP.
        kwargs: Other arguments to be passed to kernel.
    Returns: GP object.
    """
    kernel = None
    for k_info in all_kernels:
        if k_info.name.lower() == kernel_name.lower():
            kernel = k_info.obj(hps=hps, **kwargs)
    if kernel is None:
        raise ValueError('Kernel %s not found.' % kernel_name)
    # Make default noise small but positive. Helps with SPD conditions.
    default_noise = 0.01
    gp = GP(domain, kernel, default_noise)
    return gp
예제 #11
0
def create_tuned_gp(domain, kernel_name, x_data, y_data, maxfs=50, **kwargs):
    """Tune the gp.
    Args:
        gp: The GP object.
        pts: List of lists representing the points.
        num_pts: Number of random points to be used if pts not specified.
        maxfs: Maximum number of GPs to build in tuning.
    Returns: Tuned GP (note does not have data added to it).
    """
    kernel_creator = None
    for k_info in all_kernels:
        if k_info.name.lower() == kernel_name.lower():
            kernel_creator = k_info.obj
    if kernel_creator is None:
        raise ValueError('Kernel %s not found.' % kernel_name)
    hp_specs = kernel_creator.get_hp_specs()

    def objective(x):
        noise = x[0]
        hps = {}
        for idx in xrange(len(hp_specs)):
            hps[hp_specs[idx].name] = x[idx + 1]
        kernel = kernel_creator(hps=hps, **kwargs)
        gp = GP(domain, kernel, noise)
        gp.add_observations(x_data, y_data)
        return_val = -1 * gp.get_log_marginal_likelihood()
        return return_val

    bounds = [[0.0001, 1]] + [[hp_info.lower, hp_info.upper]
                              for hp_info in hp_specs]
    if maxfs is not None:
        best_specs = direct_min(objective, bounds, maxf=maxfs).x
    else:
        best_specs = direct_min(objective, bounds).x
    noise = best_specs[0]
    hps = {}
    for idx in xrange(len(hp_specs)):
        hps[hp_specs[idx].name] = best_specs[idx + 1]
    print hps
    kernel = kernel_creator(hps=hps, **kwargs)
    return GP(domain, kernel, noise)
예제 #12
0
def test_posterior_std():
    np.random.seed(1)
    N, n = 10, 50
    f = lambda x: np.sin(0.9 * x).flatten()
    X = np.random.uniform(-5, 5, size=(N, 1))
    Xtest = np.linspace(-5, 5, n).reshape(-1, 1)
    y = f(X)
    gg = GP(X, y, SquaredExp)
    means, stds = gg.draw_posterior(Xtest)
    truth = np.array([
        0.04202604, 0.06074646, 0.06442741, 0.06198905, 0.05028453, 0.01173271,
        0.04384755, 0.03729233, 0.0337959, 0.04233938, 0.05163432, 0.06106133,
        0.06421379, 0.05957212, 0.05028201, 0.04232989, 0.04012184, 0.0419569,
        0.04305928, 0.04062566, 0.03890225, 0.05161809, 0.07836714, 0.10348942,
        0.11188181, 0.09352313, 0.05289924, 0.07014139, 0.15883308, 0.25247954,
        0.32610151, 0.3625603, 0.35170763, 0.29152607, 0.18832523, 0.06334274,
        0.11994324, 0.27386883, 0.42075257, 0.54795666, 0.64986834, 0.72557935,
        0.77767936, 0.81080593, 0.83020954, 0.84065061, 0.84580095, 0.84812688,
        0.84908808, 0.8494516
    ])

    assert (np.allclose(stds, truth, atol=1e-5))
예제 #13
0
파일: cp.py 프로젝트: ziyw/gp
def plot_results(time_points, values):

	axis_x = np.arange(0,5.1,0.1)
	fig = plt.figure(0)

	plt.axis([0,5,-2,2], facecolor = 'g')
	plt.grid(color='w', linestyle='-', linewidth=0.5)

	ax = fig.add_subplot(111)
	ax.spines['top'].set_visible(False)
	ax.spines['right'].set_visible(False)
	ax.patch.set_facecolor('#E8E8F1')

	# show mean 
	mu = np.zeros(axis_x.size)
	var = np.zeros(axis_x.size)

	ker = Kernel()
	ker.SE(1,1)

	gp = GP()

	for i in range(axis_x.size):
		mu[i],var[i],_ = gp.GPR(time_points = time_points,values = values, predict_point = axis_x[i], kernel = ker)

	# show covariance 

	print mu
	plt.fill_between(axis_x,mu + var,mu-var,color = '#D1D9F0')

	# show mean 
	plt.plot(axis_x, mu, linewidth = 2, color = "#5B8CEB")

	# show the points
	plt.scatter(time, value,color = '#598BEB')
	
	plt.show()
예제 #14
0
def test_gp_posterior_mean():
    np.random.seed(1)
    N, n = 10, 50
    f = lambda x: np.sin(0.9 * x).flatten()
    X = np.random.uniform(-5, 5, size=(N, 1))
    Xtest = np.linspace(-5, 5, n).reshape(-1, 1)
    y = f(X)
    gg = GP(X, y, SquaredExp)
    means, stds = gg.draw_posterior(Xtest)

    # Truth
    truth = [
        0.97406338, 0.93351725, 0.8504679, 0.73040922, 0.589338, 0.42420947,
        0.24978696, 0.07058869, -0.11020691, -0.28851528, -0.45856577,
        -0.61317727, -0.74742986, -0.8562826, -0.93591749, -0.98394361,
        -0.99921925, -0.98145889, -0.93090595, -0.84831721, -0.73533425,
        -0.59509, -0.4327257, -0.25546679, -0.07205324, 0.10841341, 0.27787463,
        0.43049268, 0.56317516, 0.67525843, 0.76748952, 0.84072345, 0.89486151,
        0.92845102, 0.93909474, 0.92449526, 0.88371745, 0.81818343, 0.73203338,
        0.6317308, 0.52505586, 0.4198154, 0.3226457, 0.23820076, 0.1688575,
        0.11490075, 0.07503359, 0.04701673, 0.02826614, 0.01630293
    ]

    assert (np.allclose(means, truth))
예제 #15
0
params = {}
params['ln_noise'] = 0.5
params['ln_signal'] = 1.0
params['ln_length'] = 0.3

#training data
np.random.seed(42)
x = np.random.random((50, 1)) * 20
y = np.cos(x) + 0.5 * x + np.random.normal(loc=0.0, scale=0.2, size=(50, 1))

#test data X points
x_test = np.linspace(-5, 22, 100)
x_test = np.reshape(x_test, (100, 1))

a_gp = GP(x, y, "matern", params)
b = GP(x, y, "rbf", params)
'''
product_of_experts = distributedGP(x,y,8)
general_poe = distributedGP(x,y,8, method='gpoe')
bcm = distributedGP(x,y,8, method='bcm')
rbcm = distributedGP(x,y,8,method='rbcm')
'''

mean, cov = a_gp.predict(x_test)
m2, c2 = b.predict(x_test)
'''
mean2, cov2 = product_of_experts.predict(x_test)
mean3, cov3 = general_poe.predict(x_test)
mean4, cov4 = bcm.predict(x_test)
mean5, cov5 = rbcm.predict(x_test)
# Generate training data.
X, Y = generate_points(start=np.pi * 0, end=np.pi * 2)

data_fits = []
model_complexities = []
lengthscales = []

# Grid search for optimal lengthscale values, while keeping
# signal_variance and noise_variance unchanged.
for lengthscale in np.linspace(0, 2.9, 30):
    lengthscale += 0.1

    kernel = ExponentialSquaredKernel(lengthscale=lengthscale,
                                      signal_variance=1.)

    gp = GP(kernel, noise_variance=0.1)

    data_fit = gp.data_fit_term(X, Y)
    model_complexity = gp.model_complexity_term(X)
    objective = gp.objective(X, Y)

    lengthscales.append(lengthscale)
    data_fits.append(data_fit)
    model_complexities.append(model_complexity)

# Find the lengthscale that gives the maximum objective.
objectives = np.array(data_fits) + np.array(model_complexities)
optimal_lengthscale_id = np.argmax(objectives)
max_objective = objectives[optimal_lengthscale_id]

# Plotting.
예제 #17
0
def test4():
    system = GP(terminals, functions, fitness)
    system.init_population()
    return system.run(3)
예제 #18
0
def run_bo(xs,
           oracle,
           kern,
           aq_type='ei',
           noise_var=1e-2,
           n_init=1,
           n_itr=50,
           seed=0):
    """run bayesian optimization(maximum problem)
    
    Parameters
    ----------
    xs : 2d-ndarray
        Candidate input points
    oracle : function-obj
        Oracle objective function object(see oracle.py)  
    kern : kernel-obj
        Kernel function class object(see kern.py)
    aq_type : str, optional
        Aquisition function type(ei or pi or ucb), by default is ei
    noise_var : float, optional
        obsearvation noise for GP, by default 1e-2
    n_init : int, optional
        Initial input points of GP, by default 1
    n_itr : int, optional
        Bayesian optimization iteration num, by default 50
    seed : int, optional
        Random number generator's seed, by default 0
    
    Returns
    -------
    dict
        Bayesian optimization results and logs
    """

    rand_gen = np.random.RandomState(seed)

    nx = xs.shape[0]
    xdim = xs.shape[1]

    aq_vals = np.zeros([n_itr, nx])
    selected_xs = np.zeros([n_itr + n_init, xdim])
    selected_ys = np.zeros([n_itr + n_init])
    cumlative_times = np.zeros([n_itr + 1])
    regret = np.zeros([n_itr + 1])

    gp_mus = np.zeros([n_itr, nx])
    gp_vars = np.zeros([n_itr, nx])

    true_max = np.max(oracle(xs))
    true_xmax = xs[np.argmax(oracle(xs))]

    ## select initial xs ##
    init_indices = rand_gen.choice(nx, n_init, replace=False)
    selected_xs[:n_init] = xs[init_indices]
    selected_ys[:n_init] = oracle(selected_xs[:n_init])
    cur_max = np.max(selected_ys)
    cur_xmax = xs[np.argmax(selected_ys)]

    regret[0] = true_max - cur_max

    ### start bayesian optimization
    for i in trange(n_itr):
        st = time.time()
        gp_model = GP(selected_xs[:n_init + i],
                      selected_ys[:n_init + i],
                      kern,
                      noise_var=noise_var,
                      seed=seed)
        pmean, pvar = gp_model.predict_f(xs)
        gp_mus[i] = pmean
        gp_vars[i] = pvar

        if aq_type == 'ei':
            aq_val = EI(pmean, pvar, cur_max)
        elif aq_type == 'pi':
            aq_val = PI(pmean, pvar, cur_max)
        else:
            aq_val = UCB(pmean, pvar)

        next_x = xs[np.argmax(aq_val)]

        aq_vals[i] = aq_val
        selected_xs[i + n_init] = next_x
        y = oracle(next_x[:, np.newaxis])[0]
        selected_ys[i + n_init] = y

        if cur_max < y:
            cur_max = y
            cur_xmax = next_x

        cumlative_times[i + 1] = cumlative_times[i] + time.time() - st
        regret[i + 1] = true_max - cur_max

    hist = {
        'selected_xs': selected_xs,
        'selected_ys': selected_ys,
        'aq_vals': aq_vals,
        'cumulative_times': cumlative_times,
        'true_max': true_max,
        'ture_xmax': true_xmax,
        'regret': regret,
        'cur_max': cur_max,
        'cur_xmax': cur_xmax,
        'gp_mus': gp_mus,
        'gp_vars': gp_vars,
        'options': {
            'n_itr': n_itr,
            'n_init': n_init,
            'noise_var': noise_var,
            'aq_type': aq_type,
            'seed': seed
        }
    }

    return hist
예제 #19
0
def test_gp_mut(generations=20):
    data_path = Path('./containerfs/tmp/cetdl1772small.dat')
    training_data = parse_data(data_path)

    gpobj = GP(POP_SIZE, training_data, mutation_method='branch_replacement')
    gpobj.run(generations)
예제 #20
0
def vizualize_sample_execution(world_config_file, schedule_config_file,
                               planner_config_file, model_config_file,
                               base_model_filepath, schedule_filepath,
                               strategies, num_deliveries_runs,
                               availability_percents, stat_run, visualize,
                               out_gif_path, out_img_path):

    ## params
    params = load_params(world_config_file, schedule_config_file,
                         planner_config_file, model_config_file)

    ## import world
    # g = Graph()
    # g.read_graph_from_file(os.path.dirname(os.path.abspath(__file__)) + params['graph_filename'])

    # g = read_graph_from_file(os.path.dirname(os.path.abspath(__file__)) + params['graph_filename'])
    g, rooms = generate_graph(params['graph_generator_type'],
                              os.path.dirname(os.path.abspath(__file__)),
                              params['graph_filename'], params['max_rooms'],
                              params['rooms'], params['max_traversal_cost'],
                              params['distance_scaling'])
    params['rooms'] = rooms

    for num_deliveries in num_deliveries_runs:
        for availability_percent in availability_percents:

            # temporal consistency parameter
            if params['availabilities'] == 'windows':
                # available_time = params['budget']*availability_percent
                # num_windows = max(int(round(float(available_time)/params['availability_length'])), 1)
                # ave_window_offset = float(params['budget'] - available_time)/num_windows
                # mu = max(ave_window_offset, 1)

                available_time = params['budget'] * availability_percent
                num_windows = max(
                    1,
                    int(
                        round(
                            float(available_time) /
                            params['availability_length'])))
                # new_availability_length = int(float(available_time)/num_windows)
                ave_window_offset = min(
                    float(params['budget'] - available_time) / num_windows,
                    float(params['budget'] - available_time) / 2)
                mu = int(ave_window_offset / 2)

                # mu = int(params['availability_length']/2)
            elif params['availabilities'] == 'simple':
                mu = int(params['availability_length'] / 2)
            else:
                mu = 30
            params['mu'] = mu

            # base models, true schedules
            stat_run = 0
            model_file_exists = os.path.exists(base_model_filepath +
                                               str(num_deliveries) + "_" +
                                               str(availability_percent) +
                                               "_" + str(stat_run) + ".yaml")
            schedule_file_exists = os.path.exists(schedule_filepath +
                                                  str(num_deliveries) + "_" +
                                                  str(availability_percent) +
                                                  "_" + str(stat_run) +
                                                  ".yaml")
            if model_file_exists and schedule_file_exists:
                # load pre-generated schedules/models
                base_availability_models, base_model_variances, node_requests = load_base_models_from_file(
                    base_model_filepath, num_deliveries, availability_percent,
                    stat_run)
                true_availability_models, true_schedules = load_schedules_from_file(
                    schedule_filepath, num_deliveries, availability_percent,
                    stat_run)
                availabilities = base_availability_models
            else:

                if params['availabilities'] == 'windows':
                    # sample rooms for delivieries
                    if params['node_closeness'] == 'random':
                        node_requests = random.sample(params['rooms'],
                                                      num_deliveries)
                    if params['node_closeness'] == 'sequential':
                        node_requests = params['rooms'][0:num_deliveries]

                    ## base availability models
                    avails, base_model_variances = generate_windows_overlapping(
                        node_requests, params['start_time'],
                        availability_percent, params['budget'],
                        params['time_interval'], params['availability_length'],
                        params['availability_chance'])

                    if params['use_gp']:
                        from gp import GP
                        gps = {}
                        availabilities = {}
                        for request in node_requests:
                            x_in = list(
                                range(params['start_time'], params['budget'],
                                      params['time_interval']))
                            gps[request] = GP(None, x_in, avails[request],
                                              params['budget'],
                                              params['spacing'],
                                              params['noise_scaling'], True,
                                              'values')
                            availabilities[request] = gps[request].get_preds(
                                x_in)
                        base_availability_models = gps
                    else:
                        base_availability_models = avails
                        availabilities = avails

                    ## true availability models
                    # sampled_availability_models = sample_model_parameters(node_requests, base_availability_models, base_model_variances, params['sampling_method'])
                    true_availability_models = avails

                    ## true schedules
                    true_schedules = generate_schedule(
                        node_requests, true_availability_models, params['mu'],
                        params['num_intervals'],
                        params['schedule_generation_method'],
                        params['temporal_consistency'])

                    # save_base_models_to_file(base_model_filepath, base_availability_models, base_model_variances, node_requests, num_deliveries, availability_percent, stat_run)
                    # save_schedules_to_file(schedule_filepath, true_availability_models, true_schedules, node_requests, num_deliveries, availability_percent, stat_run)

                elif params['availabilities'] == 'simple':

                    # sample rooms for delivieries
                    if params['node_closeness'] == 'random':
                        node_requests = random.sample(params['rooms'],
                                                      num_deliveries)
                    if params['node_closeness'] == 'sequential':
                        node_requests = params['rooms'][0:num_deliveries]

                    ## base availability models
                    base_availability_models, base_model_variances = generate_simple_models(
                        node_requests, params['start_time'],
                        availability_percent, params['budget'],
                        params['time_interval'], params['availability_length'],
                        params['availability_chance'])
                    availabilities = base_availability_models

                    # ## true availability models
                    # sampled_avails = sample_model_parameters(node_requests[stat_run], avails, variances, params['sampling_method'])
                    # true_availability_models.append(sampled_avails)

                    ## true schedules
                    true_schedules = generate_simple_schedules(
                        node_requests, base_availability_models, params['mu'],
                        params['num_intervals'],
                        params['schedule_generation_method'])
                    # true_schedules.append(sample_schedule_from_model(node_requests[stat_run], sampled_avails, mu, params['num_intervals'], params['temporal_consistency']))

                    # save_base_models_to_file(base_model_filepath, base_availability_models[stat_run], base_model_variances[stat_run], node_requests[stat_run], num_deliveries, availability_percent, stat_run)
                    # save_schedules_to_file(schedule_filepath, true_availability_models[stat_run], true_schedules[stat_run], node_requests[stat_run], num_deliveries, availability_percent, stat_run)

                else:
                    raise ValueError(params['availabilities'])

            ## "learned" availability models
            availability_models = base_availability_models
            model_variances = base_model_variances

            # plan and execute paths for specified strategies
            visit_traces = {}
            for strategy in strategies:
                if strategy == 'mcts':
                    total_profit, competitive_ratio, maintenance_competitive_ratio, path_history = create_policy_and_execute(
                        strategy, g, availability_models, model_variances,
                        true_schedules, node_requests, params['mu'], params,
                        visualize, out_gif_path)
                else:
                    total_profit, competitive_ratio, maintenance_competitive_ratio, path_history = plan_and_execute(
                        strategy, g, availability_models, model_variances,
                        true_schedules, node_requests, params['mu'], params,
                        visualize, out_gif_path)
                visit_traces[strategy] = path_history

            visualize_path_willow(strategies, visit_traces, availabilities,
                                  true_schedules, node_requests,
                                  params['maintenance_node'],
                                  params['start_time'], params['budget'],
                                  params['time_interval'], out_img_path)
예제 #21
0
def stat_runs(world_config_file, schedule_config_file, planner_config_file,
              model_config_file, base_model_filepath, schedule_filepath,
              output_file, strategies, num_deliveries_runs,
              availability_percents, budgets, num_stat_runs, visualize,
              out_gif_path):

    if output_file == None:
        record_output = False
    else:
        record_output = True

    ## params
    params = load_params(world_config_file, schedule_config_file,
                         planner_config_file, model_config_file)

    ## load world
    # g = Graph()
    # g.read_graph_from_file(os.path.dirname(os.path.abspath(__file__)) + params['graph_filename'])

    # g = read_graph_from_file(os.path.dirname(os.path.abspath(__file__)) + params['graph_filename'])
    g, rooms = generate_graph(params['graph_generator_type'],
                              os.path.dirname(os.path.abspath(__file__)),
                              params['graph_filename'], params['max_rooms'],
                              params['rooms'], params['max_traversal_cost'],
                              params['distance_scaling'])
    params['rooms'] = rooms

    # for num_deliveries in num_deliveries_runs:
    num_deliveries = num_deliveries_runs[0]
    for availability_percent in availability_percents:

        for budget in budgets:

            params['budget'] = budget
            params['num_intervals'] = int(params['budget'] /
                                          params['time_interval'])
            params['longest_period'] = budget

            # temporal consistency parameter
            if params['availabilities'] == 'windows':
                # available_time = params['budget']*availability_percent
                # num_windows = max(int(round(float(available_time)/params['availability_length'])), 1)
                # ave_window_offset = float(params['budget'] - available_time)/num_windows
                # mu = max(ave_window_offset, 1)

                available_time = params['budget'] * availability_percent
                num_windows = max(
                    1,
                    int(
                        round(
                            float(available_time) /
                            params['availability_length'])))
                # new_availability_length = int(float(available_time)/num_windows)
                ave_window_offset = min(
                    float(params['budget'] - available_time) / num_windows,
                    float(params['budget'] - available_time) / 2)
                mu = int(ave_window_offset / 2)

                # mu = int(params['availability_length']/2)
            elif params['availabilities'] == 'simple':
                mu = int(params['availability_length'] / 2)
            else:
                mu = 30
            params['mu'] = mu

            # base models, true schedules
            node_requests = []
            base_availability_models = []
            base_model_variances = []
            true_availability_models = []
            true_schedules = []
            num_test_runs = 0
            for stat_run in range(num_stat_runs):
                model_file_exists = os.path.exists(base_model_filepath +
                                                   str(num_deliveries) + "_" +
                                                   str(availability_percent) +
                                                   "_" + str(stat_run) + ".p")
                schedule_file_exists = os.path.exists(
                    schedule_filepath + str(num_deliveries) + "_" +
                    str(availability_percent) + "_" + str(stat_run) + ".yaml")
                if model_file_exists and schedule_file_exists:
                    #     # load pre-generated schedules/models
                    gmms, base_variances, requests = load_base_models_from_file(
                        base_model_filepath, num_deliveries,
                        availability_percent, stat_run)
                    true_avails, schedules = load_schedules_from_file(
                        schedule_filepath, num_deliveries,
                        availability_percent, stat_run)
                    node_requests.append(requests)

                    # gmms = {}
                    # for request in node_requests[stat_run]:
                    #     x_in = list(range(int(params['start_time']), int(params['budget']), int(params['time_interval'])))
                    #     y_in = Y_in[request][:len(x_in)]
                    #     gmms[request] = build_gmm(x_in, y_in, params['start_time'], params['start_time'] + params['budget'], params['time_interval'], params, True)

                    base_availability_models.append(gmms)
                    base_model_variances.append(base_variances)
                    true_availability_models.append(true_avails)
                    true_schedules.append(schedules)
                else:

                    if params['availabilities'] == 'brayford':

                        # model
                        if params['use_gp']:
                            from gp import GP
                            gps = {}
                        # if params['use_gmm']:
                        gmms = {}
                        # mus = {}
                        mu = 0.0
                        mu_n = 0
                        node_requests.append(params['rooms'])
                        for request in node_requests[stat_run]:
                            x_in, y_in, mu_combined, mu_combined_n = load_brayford_training_data(
                                request,
                                os.path.dirname(os.path.abspath(__file__)) +
                                params['data_path'], out_gif_path)
                            if params['use_gp']:
                                gps[request] = GP(None, x_in, y_in,
                                                  params['budget'], 1,
                                                  params['noise_scaling'],
                                                  True, 'values')
                            else:
                                gmms[request] = build_gmm(
                                    x_in, y_in, params['start_time'],
                                    params['start_time'] + params['budget'],
                                    params['time_interval'], params)
                                # gmms[request].visualize(out_gif_path + "train_" + request + "_gmm_histogram_10.jpg", request)
                            # mus[request] = mu_combined/mu_combined_n
                            mu += mu_combined
                            mu_n += mu_combined_n

                            # gps[request].visualize(out_gif_path + "train_" + request + "_model_histogram_10.jpg", request)

                        if params['use_gp']:
                            base_availability_models.append(gps)
                        else:
                            base_availability_models.append(gmms)
                        base_model_variances.append({})
                        mu = mu / mu_n
                        params['mu'] = mu

                        # true schedule
                        # if params['availabilities'] == 'brayford':

                        schedules = {}
                        for request in node_requests[stat_run]:
                            X, Y = load_brayford_testing_data(
                                request,
                                os.path.dirname(os.path.abspath(__file__)) +
                                params['data_path'], stat_run, out_gif_path)
                            schedules[request] = Y[stat_run]
                            # for i in range(Y.shape[0]):
                            #     if not(i in schedules):
                            #         schedules[i] = {}
                            #     schedules[i][request] = Y[i]
                            # num_test_runs = Y.shape[0]

                            # schedules[request] = Y
                            # if params['use_gp']:
                            # from gp import GP
                            # test_gp = GP(None, x_in, y_in, params['budget'], 1, params['noise_scaling'], True, 'values')
                            # if params['use_gmm']:
                            #     test_gp = build_gmm(x_in, y_in, params['start_time'], params['start_time'] + params['budget'], params['time_interval'], params)
                            # if stat_run == 0:
                            #     test_gp.visualize(out_gif_path + "february_" + request + "_model_10.jpg", request)
                            # else:
                            # test_gp.visualize(out_gif_path + "november_" + request + "_model_histogram_10.jpg", request)
                            # schedules[request] = test_gp.threshold_sample_schedule(params['start_time'], params['budget'], params['time_interval'])

                            # # visualize:
                            # fig = plt.figure()
                            # X = np.array(list(range(params['start_time'], params['budget'], params['time_interval'])))
                            # Y = np.array(schedules[request])
                            # plt.scatter(X, Y)
                            # if stat_run == 0:
                            #     plt.title("Brayford Schedule Node " + request + ": February")
                            #     plt.savefig(out_gif_path + "february_" + request + ".jpg")
                            # else:
                            #     plt.title("Brayford Schedule Node " + request + ": November")
                            #     plt.savefig(out_gif_path + "november_" + request + ".jpg")

                        true_schedules.append(schedules)

                    elif params['availabilities'] == 'windows':

                        # sample rooms for delivieries
                        if params['node_closeness'] == 'random':
                            node_requests.append(
                                random.sample(params['rooms'], num_deliveries))
                        if params['node_closeness'] == 'sequential':
                            node_requests.append(
                                params['rooms'][0:num_deliveries])

                        ## base availability models
                        avails, variances = generate_windows_overlapping(
                            node_requests[stat_run], params['start_time'],
                            availability_percent, params['budget'],
                            params['time_interval'],
                            params['availability_length'],
                            params['availability_chance'])
                        # X_in = {}
                        # Y_in = {}
                        if params['use_gp']:
                            from gp import GP
                            gps = {}
                            for request in node_requests[stat_run]:
                                x_in = list(
                                    range(int(params['start_time']),
                                          int(params['budget']),
                                          int(params['time_interval'])))
                                y_in = copy.deepcopy(avails[request])
                                for i in range(len(y_in)):
                                    y = max(
                                        y_in[i] + random.random() *
                                        params['noise_scaling'] -
                                        params['noise_scaling'] / 2.0, 0.01)
                                    y = min(y, .99)
                                    y_in[i] = y

                                gps[request] = GP(None, x_in, y_in,
                                                  params['budget'],
                                                  params['spacing'], 0.0, True,
                                                  'values')
                                # Y_in[request] = y_in
                            base_availability_models.append(gps)
                        else:
                            gmms = {}
                            for request in node_requests[stat_run]:
                                x_in = list(
                                    range(int(params['start_time']),
                                          int(params['budget']),
                                          int(params['time_interval'])))
                                y_in = copy.deepcopy(avails[request])
                                for i in range(len(y_in)):
                                    y = max(
                                        y_in[i] + random.random() *
                                        params['noise_scaling'] -
                                        params['noise_scaling'] / 2.0, 0.01)
                                    y = min(y, .99)
                                    y_in[i] = y
                                gmms[request] = build_gmm(
                                    x_in, y_in, params['start_time'],
                                    params['start_time'] + params['budget'],
                                    params['time_interval'], params, True)
                                # Y_in[request] = y_in
                                # gmms[request].visualize(out_gif_path + "train_" + request + "_gmm_histogram_10.jpg", request)
                                # mus[request] = mu_combined/mu_combined_n
                                # mu += mu_combined
                                # mu_n += mu_combined_n
                            base_availability_models.append(gmms)
                        # else:
                        #     base_availability_models.append(avails)

                        # base_availability_models.append(avails)
                        base_model_variances.append(variances)

                        # true availability models
                        sampled_avails = sample_model_parameters(
                            node_requests[stat_run], avails, variances,
                            params['sampling_method'])
                        sampled_avails = avails

                        true_availability_models.append(avails)

                        ## true schedules
                        true_schedules.append(
                            generate_schedule(
                                node_requests[stat_run], avails, params['mu'],
                                params['num_intervals'],
                                params['schedule_generation_method'],
                                params['temporal_consistency']))
                        # true_schedules.append(sample_schedule_from_model(node_requests[stat_run], sampled_avails, mu, params['num_intervals'], params['temporal_consistency']))

                        save_base_models_to_file(
                            base_model_filepath,
                            base_availability_models[stat_run],
                            base_model_variances[stat_run],
                            node_requests[stat_run], num_deliveries,
                            availability_percent, stat_run)
                        save_schedules_to_file(
                            schedule_filepath,
                            true_availability_models[stat_run],
                            true_schedules[stat_run], node_requests[stat_run],
                            num_deliveries, availability_percent, stat_run)

                    # elif params['availabilities'] == 'simple':

                    #     # sample rooms for delivieries
                    #     if params['node_closeness'] == 'random':
                    #         node_requests.append(random.sample(params['rooms'], num_deliveries))
                    #     if params['node_closeness'] == 'sequential':
                    #         node_requests.append(params['rooms'][0:num_deliveries])

                    #     ## base availability models
                    #     avails, variances = generate_simple_models(node_requests[stat_run], params['start_time'], availability_percent, params['budget'], params['time_interval'], params['availability_length'], params['availability_chance'])
                    #     base_availability_models.append(avails)
                    #     base_model_variances.append(variances)

                    #     # ## true availability models
                    #     # sampled_avails = sample_model_parameters(node_requests[stat_run], avails, variances, params['sampling_method'])
                    #     # true_availability_models.append(sampled_avails)

                    #     ## true schedules
                    #     true_schedules.append(generate_simple_schedules(node_requests[stat_run], sampled_avails, params['mu'], params['num_intervals'], params['schedule_generation_method']))
                    #     # true_schedules.append(sample_schedule_from_model(node_requests[stat_run], sampled_avails, mu, params['num_intervals'], params['temporal_consistency']))

                    #     # save_base_models_to_file(base_model_filepath, base_availability_models[stat_run], base_model_variances[stat_run], node_requests[stat_run], num_deliveries, availability_percent, stat_run)
                    #     # save_schedules_to_file(schedule_filepath, true_availability_models[stat_run], true_schedules[stat_run], node_requests[stat_run], num_deliveries, availability_percent, stat_run)

                    else:
                        raise ValueError(params['availabilities'])

            ## "learned" availability models
            availability_models = base_availability_models
            model_variances = base_model_variances

            # plan and execute paths for specified strategies
            for strategy in strategies:

                strategy_name = strategy

                params['uncertainty_penalty'] = 0.0
                params['observation_reward'] = 0.0
                params['deliver_threshold'] = 0.0

                if strategy == 'observe_mult_visits_up_5_or_0_dt_0':
                    params['uncertainty_penalty'] = 0.5
                    params['observation_reward'] = 0.0
                    params['deliver_threshold'] = 0.0
                    strategy_name = strategy
                    strategy = 'observe_mult_visits'

                if strategy == 'observe_mult_visits_up_0_or_7_dt_0':
                    params['uncertainty_penalty'] = 0.0
                    params['observation_reward'] = 0.7
                    params['deliver_threshold'] = 0.0
                    strategy_name = strategy
                    strategy = 'observe_mult_visits'

                if strategy == 'observe_mult_visits_up_5_or_7_dt_0':
                    params['uncertainty_penalty'] = 0.5
                    params['observation_reward'] = 0.7
                    params['deliver_threshold'] = 0.0
                    strategy_name = strategy
                    strategy = 'observe_mult_visits'

                # for stat_run in range(num_stat_runs):
                for stat_run in [2]:
                    # stat_run = 0
                    # for test_run in range(num_test_runs):
                    if strategy == 'mcts':
                        total_profit, competitive_ratio, maintenance_competitive_ratio, path_history, ave_plan_time = create_policy_and_execute(
                            strategy, g, availability_models[stat_run],
                            model_variances[stat_run],
                            true_schedules[stat_run], node_requests[stat_run],
                            params['mu'], params, visualize, out_gif_path)
                    else:
                        total_profit, competitive_ratio, maintenance_competitive_ratio, path_history, ave_plan_time = plan_and_execute(
                            strategy, g, availability_models[stat_run],
                            model_variances[stat_run],
                            true_schedules[stat_run], node_requests[stat_run],
                            params['mu'], params, visualize, out_gif_path)

                    if record_output:
                        with open(output_file, 'a', newline='') as csvfile:
                            writer = csv.writer(csvfile,
                                                delimiter=',',
                                                quotechar='|',
                                                quoting=csv.QUOTE_MINIMAL)
                            writer.writerow([
                                strategy_name, params['budget'],
                                num_deliveries, availability_percent,
                                params['availability_chance'],
                                params['maintenance_reward'],
                                params['max_noise_amplitude'],
                                params['variance_bias'], competitive_ratio,
                                maintenance_competitive_ratio, ave_plan_time
                            ])
예제 #22
0
    modeldir = "/export/a10/kduh/p/mt/gridsearch/" + args.dataset + "/models/"

    x, y, _ = extract_data(modeldir=modeldir,
                           threshold=args.threshold,
                           architecture=args.architecture,
                           rnn_cell_type=args.rnn_cell_type)

    result = np.zeros((len(y) - 3, len(y)))
    for i in range(len(y) - 3):
        print("step {0}/{1}".format(i + 1, len(y) - 3))
        label_ids = np.array([i, i + 1, i + 2])
        while len(label_ids) != len(y):
            if args.model == "gbssl":
                opt_model = GBSSL(x, y[label_ids], label_ids)
            elif args.model == "gp":
                opt_model = GP(x, y[label_ids], label_ids)
            elif args.model == "krr":
                opt_model = KRR(x, y[label_ids], label_ids)
            y_preds, y_vars = opt_model.fit_predict()
            del opt_model
            unlabel_ids = np.array(
                [u for u in range(len(y)) if u not in label_ids])

            def get_risk(candidate_id):
                opt_model = GBSSL(
                    x, np.append(y[label_ids], y_preds[candidate_id]),
                    np.append(label_ids, candidate_id))
                new_y_preds, new_y_vars = opt_model.fit_predict()
                del opt_model
                return np.linalg.norm(
                    np.array(new_y_preds)[label_ids] - y[label_ids])
예제 #23
0
import matplotlib.pyplot as plt
from torch.optim import Adam
import torch
import numpy as np

from gp import GP


if __name__ == '__main__':
  gp = GP()
  f = lambda x: ( torch.cos(2 * x[:, 0]) + torch.sin(x[:, 1]) ).view(-1)
  n1, n2, ny = 40, 100, 5
  domain = (-5, 5)
  X_data = torch.distributions.Uniform(
      domain[0] + 2, domain[1] - 2).sample((n1, 2))
  y_data = f(X_data)
  X_test = torch.linspace(domain[0], domain[1], n2)
  X_test = torch.stack([X_test, X_test]).view(-1, 2)
  y_test = f(X_test)

  gp.fit(X_data, y_data, True, epochs=10000)
  y_pred, _ = gp.predict(X_test)
  print('MSE : ', ((y_pred - y_test)**2).mean().item() )
예제 #24
0
def main():

    torch.manual_seed(opt.seed)

    if opt.debug:
        pdb.set_trace()

    # load data
    img, obj, view = read_face_data(opt.data)  # image, object, and view
    train_data = FaceDataset(img["train"], obj["train"], view["train"])
    val_data = FaceDataset(img["val"], obj["val"], view["val"])
    train_queue = DataLoader(train_data, batch_size=opt.bs, shuffle=True)
    val_queue = DataLoader(val_data, batch_size=opt.bs, shuffle=False)

    # longint view and object repr
    Dt = Variable(obj["train"][:, 0].long(), requires_grad=False).to(device)
    Wt = Variable(view["train"][:, 0].long(), requires_grad=False).to(device)
    Dv = Variable(obj["val"][:, 0].long(), requires_grad=False).to(device)
    Wv = Variable(view["val"][:, 0].long(), requires_grad=False).to(device)

    # define VAE and optimizer
    vae = FaceVAE(**vae_cfg).to(device)
    RV = torch.load(opt.vae_weights)
    vae.load_state_dict(RV)
    vae.to(device)
    vae.eval()

    for params in vae.parameters():
        params.requires_grad = False

    # define gp
    P = sp.unique(obj["train"]).shape[0]
    Q = sp.unique(view["train"]).shape[0]
    vm = Vmodel(P, Q, opt.xdim, Q).to(device)
    gp = GP(n_rand_effs=1).to(device)
    gp_params = nn.ParameterList()
    gp_params.extend(vm.parameters())
    gp_params.extend(gp.parameters())

    # define optimizers

    gp_optimizer = optim.Adam(gp_params, lr=opt.gp_lr)
    bce = nn.BCELoss(reduction='sum').to(device)

    if opt.debug:
        pdb.set_trace()

    history = {}
    for epoch in range(opt.epochs):

        # 1. encode Y in mini-batches
        Zm, Zs = encode_Y(vae, train_queue)

        # 2. sample Z
        Eps = Variable(torch.randn(*Zs.shape), requires_grad=False).to(device)
        Z = Zm + Eps * Zs

        # 3. evaluation step (not needed for training)
        Vt = vm(Dt, Wt).detach()
        Vv = vm(Dv, Wv).detach()
        rv_eval, imgs, covs = eval_step(vae, gp, vm, val_queue, Zm, Vt, Vv)

        # 4. compute first-order Taylor expansion coefficient
        Zb, Vbs, vbs, gp_nll = gp.taylor_coeff(Z, [Vt])
        rv_eval["gp_nll"] = float(gp_nll.data.mean().cpu()) / vae.K

        # 5. accumulate gradients over mini-batches and update params
        rv_back = backprop_and_update(
            vae,
            bce,
            gp,
            vm,
            train_queue,
            Dt,
            Wt,
            Eps,
            Zb,
            Vbs,
            vbs,
            gp_optimizer,
        )
        rv_back["loss"] = (rv_back["recon_term"] + rv_eval["gp_nll"] +
                           rv_back["pen_term"])

        smartAppendDict(history, rv_eval)
        smartAppendDict(history, rv_back)
        smartAppend(history, "vs", gp.get_vs().data.cpu().numpy())

        logging.info(
            "epoch %d - resons_term: %f - gp_nll: %f - pen_term: %f - mse: %f - abs: %f - fake_loss: %f - tra_mse_val: %f - train_mse_out: %f"
            % (epoch, rv_back["recon_term"], rv_back["gp_nll"],
               rv_back["pen_term"], rv_back["mse"], rv_back["abs"],
               rv_back["fake_loss"], rv_eval["mse_val"], rv_eval["mse_out"]))

        # callback?
        if epoch % opt.epoch_cb == 0:
            logging.info("epoch %d - executing callback" % epoch)
            ffile = os.path.join(opt.outdir, "plot.%.5d.png" % epoch)
            callback_gppvae(epoch, history, covs, imgs, ffile)
예제 #25
0
X = np.array([p[0] for p in data])
Y = np.array([p[1] for p in data])

# Normalize the Y dimension.
mean_Y = np.mean(Y)
std_Y = np.std(Y)
Y = (Y - mean_Y) / std_Y

# Fit a Gaussian Process to the data points.
lengthscale = 40
signal_variance = 3.
noise_variance = 0.1
X_star = np.linspace(0, 960, 50)
kernel = SquaredExponentialKernel(lengthscale=lengthscale,
                                  signal_variance=signal_variance)
gp = GP(kernel, noise_variance=noise_variance)
post_m, post_var, weights = gp.posterior(X, Y, X_star)

# Plot results.
color = 'yellow'
ax.plot(X_star, post_m * std_Y + mean_Y, color=color)
ax.scatter(X, Y * std_Y + mean_Y, s=30, color=color)

post_var = np.diagonal(post_var)

plt.fill_between(X_star, (post_m - 1.96 * np.sqrt(post_var)) * std_Y + mean_Y,
                 (post_m + 1.96 * np.sqrt(post_var)) * std_Y + mean_Y,
                 color=color,
                 alpha=0.2)

plt.xlim(0, 960)
예제 #26
0
파일: main.py 프로젝트: Hybbon/GP
def main():
    gp = GP(terminals, functions, fitness, config)
    gp.init_population()
    result = gp.run()
    print_results(result, training_cases)
    return gp