Example #1
0
class BayesianOptimizer(object):
    def __init__(self, run_multi_threaded=False, metric=None):

        if metric is not None:
            global MMetric
            MMetric = metric

        if run_multi_threaded:
            self.num_threads = multiprocessing.cpu_count()
        else:
            self.num_threads = 1
            self.pool = None

        self.evaluation_depth = maximum_search_points * self.num_threads
        self.random_points = 0.0
        self.decay = 0.95

        if __USE_CPP_BACKEND__:
            self.regressor = None
        else:
            if __REGRESSOR_LIB__ == "GPy":
                self.regressor = None
            else:
                kernel = (ConstantKernel(1.0, constant_value_bounds="fixed") *
                          sRBF(1.0, length_scale_bounds="fixed"))
                self.regressor = GaussianProcessRegressor(copy_X_train=False,
                                                          normalize_y=True,
                                                          kernel=kernel)

    def set_map(self, space_map):
        self.regressor = BayesOptimizer_wrapper.PyBayesOptimizer(
            extract_map_from_config(space_map))

    def reset(self):
        del self.regressor
        self.regressor = None
        self.regressor = GaussianProcessRegressor(copy_X_train=False,
                                                  normalize_y=True)

    def fit(self, xs, ys):
        norm = np.linalg.norm(ys)
        ys = ys / norm
        if __USE_CPP_BACKEND__:
            self.regressor.fit(xs, ys)
        else:
            ys = np.reshape(ys, (len(ys), 1))
            if __REGRESSOR_LIB__ == "GPy":
                kern = GPy.kern.RBF(input_dim=1, ARD=True)
                M = 15
                RR = np.linspace(-1, 1, M)[:, None]
                α = 0.0001
                self.regressor = GPy.models.SparseGPRegression(xs,
                                                               ys,
                                                               kern.copy(),
                                                               Z=RR.copy())
                self.regressor.inference_method = GPy.inference.latent_function_inference.PEP(
                    α)
                self.regressor.optimize(messages=False)
            else:
                self.regressor.fit(xs, ys)

    def create_calc_score_threads(self, sliced_test_points, batch_size):
        start = timeit.default_timer()
        with Manager() as manager:
            threads = []
            maxes = []
            for i in range(self.num_threads):
                maximums = manager.dict()
                thread = RegressorThread(regressor=copy.copy(self.regressor),
                                         search_space=sliced_test_points[i],
                                         id=i,
                                         maximums=maximums,
                                         batch_size=batch_size)
                thread.start()
                threads.append(thread)
                maxes.append(maximums)

            for thread in threads:
                thread.join()
                thread.close()
                del thread
            local_maximums = {}

            minimum = 1e9
            for i in range(len(maxes)):
                for key, value in maxes[i].items():
                    if len(local_maximums) < batch_size:
                        local_maximums[key +
                                       i * len(sliced_test_points[-1])] = value
                        minimum = min(minimum, value)
                    elif value > minimum:
                        local_maximums[key +
                                       i * len(sliced_test_points[-1])] = value
                        for key_2, value_2 in local_maximums.items():
                            if value_2 == minimum:
                                local_maximums[key_2]
                        minimum = min(local_maximums.keys(),
                                      key=(lambda k: local_maximums[k]))
            print(timeit.default_timer() - start)
            return local_maximums

    def predict(self, xs, return_std=False):
        if isinstance(self.regressor, "GPy.models.SparseGPRegression"):
            mean, var = self.regressor.predict(xs)
            if not return_std:
                return mean
            return mean, var
        predicates = self.regressor.predict(xs, return_std=return_std)
        return predicates

    def next_batch(self, visited, visited_indexes, batch_size, test_points,
                   visited_results):
        if __measure_time__:
            start = timeit.default_timer()

        if __USE_CPP_BACKEND__:
            maximums = self.regressor.next_batch(batch_size, visited_indexes)
            if __measure_time__:
                stop = timeit.default_timer()
                print('Time-find maximums: ', stop - start)
                print(
                    "Visited points so far {}, points to be tested {}".format(
                        len(visited_indexes), len(test_points)))
            return maximums

        local_maximums = []
        for i in range(len(test_points) // (self.evaluation_depth) + 1):
            multi_threaded = False
            if self.num_threads > 1:
                if min(self.num_threads, 2**21 / len(test_points)) > 1:
                    multi_threaded = True
                    self.num_threads = int(
                        min(self.num_threads,
                            maximum_search_points / len(test_points)))
            if multi_threaded:
                step = min(self.evaluation_depth,
                           len(test_points[i * self.evaluation_depth:])
                           ) // self.num_threads
                sliced_test_points = [
                    test_points[i * self.evaluation_depth +
                                step * j:i * self.evaluation_depth + step *
                                (j + 1)] for j in range(self.num_threads)
                ]
                scores = self.create_calc_score_threads(
                    sliced_test_points, batch_size)

                for key, value in scores.items():
                    local_maximums.append(
                        [value, key + i * self.evaluation_depth])

            else:
                end_point = min(len(test_points),
                                (i + 1) * self.evaluation_depth)
                sliced_test_points = test_points[i * (
                    self.evaluation_depth):end_point]
                scores = self.acquisition(
                    sliced_test_points,
                    visited_results / np.linalg.norm(visited_results))
                accepted = 0
                while accepted < batch_size:
                    arg_max = np.argmax(scores)
                    if arg_max + (
                            i *
                        (self.evaluation_depth)) not in visited_indexes:
                        local_maximums.append([
                            scores[arg_max],
                            arg_max + (i * (self.evaluation_depth))
                        ])
                        accepted += 1
                    scores[arg_max] = -1

        maximums = []
        for _ in range(batch_size):
            arg_max = np.argmax(local_maximums, axis=0)
            maximums.append(local_maximums[arg_max[0]][1])
            local_maximums[arg_max[0]][0] = 0

        if __measure_time__:
            stop = timeit.default_timer()
            print('Time-find maximas: ', stop - start)
            print("Visited points so far {}, points to be tested {}".format(
                len(visited_indexes), len(test_points)))
        for i in range(int(self.random_points * len(maximums))):
            x = np.random.randint(0, len(test_points))
            while x in visited_indexes or x in maximums:
                x = np.random.randint(0, len(test_points))
            maximums[-i] = x
        self.random_points *= self.decay

        return maximums

    def acquisition(self, test_points, evaluation_results):
        if __ACQUISITION__ == 'PI':
            return self.expected_improvement(test_points, evaluation_results)

    def PI(self, test_points, evaluation_results):
        minimum = min(evaluation_results)
        mu, std = self.surrogate(test_points)
        mu = mu[:, 0]
        probs = norm.cdf(minimum, loc=mu, scale=std)
        return probs

    def expected_improvement(self, test_points, evaluation_results, xi=0.02):
        best = min(evaluation_results)
        mu, std = self.surrogate(test_points)
        mu = mu[:, 0]
        # std = std
        with catch_warnings():
            # ignore generated warnings
            simplefilter("ignore")
            imp = mu - best - xi
            Z = imp / std
            ei = imp * norm.cdf(Z) + std * norm.pdf(Z)
            ei[std == 0.0] = 0.0

        return ei

    def surrogate(self, x):
        with catch_warnings():
            # ignore generated warnings
            simplefilter("ignore")
            return self.regressor.predict(x, return_std=True)
Example #2
0
class BO_algo():
    def __init__(self, gpy_impl=False):
        """Initializes the algorithm with a parameter configuration. """
        # constants
        self.v_min = 1.2
        self.logv_min = math.log(self.v_min)
        self.gpy_impl = gpy_impl

        # data holders
        self.x_sample = np.array([]).reshape(-1, domain.shape[0])
        self.f_sample = np.array([]).reshape(-1, domain.shape[0])
        self.v_sample = np.array([]).reshape(-1, domain.shape[0])
        self.logv_sample = np.array([]).reshape(-1, domain.shape[0])
        self.gv_sample = np.array([]).reshape(-1, domain.shape[0])

        # incorporate prior beliefs about f() and v()
        self.f_sigma = 0.15
        self.f_variance = 0.5
        self.f_lengthscale = 0.5
        self.f_kernel = Matern(length_scale=0.5, nu=2.5)
        self.f_gpr = GaussianProcessRegressor(kernel=self.f_kernel,
                                              alpha=self.f_sigma**2)
        if self.gpy_impl:
            self.f_kernel = GPy.kern.Matern52(input_dim=domain.shape[0],
                                              variance=self.f_variance,
                                              lengthscale=self.f_lengthscale)
            self.f_gpr = None

        self.v_sigma = 0.0001
        self.v_variance = math.sqrt(2)
        self.v_lengthscale = 0.5
        self.v_const = 1.5
        self.v_kernel = ConstantKernel(self.v_const) + Matern(
            length_scale=self.v_lengthscale, nu=2.5)
        self.v_gpr = GaussianProcessRegressor(kernel=self.v_kernel,
                                              alpha=self.v_sigma**2)
        if self.gpy_impl:
            self.v_kernel = GPy.kern.Matern52(
                input_dim=domain.shape[0],
                variance=self.v_variance,
                lengthscale=self.v_lengthscale) + GPy.kern.Bias(
                    input_dim=domain.shape[0], variance=self.v_const)
            self.v_gpr = None

        self.gv_const = self.v_const - self.v_min
        self.gv_sigma = 0.0001
        self.gv_kernel = ConstantKernel(self.gv_const) + Matern(
            length_scale=0.5, nu=2.5)
        self.gv_gpr = GaussianProcessRegressor(kernel=self.gv_kernel,
                                               alpha=self.gv_sigma**2)

        self.logv_const = math.log(self.v_const)
        self.logv_sigma = 0.0001
        self.logv_kernel = ConstantKernel(self.logv_const) + Matern(
            length_scale=self.v_lengthscale, nu=2.5)
        self.logv_gpr = GaussianProcessRegressor(kernel=self.logv_kernel,
                                                 alpha=self.logv_sigma**2)

    def next_recommendation(self):
        """
        Recommend the next input to sample.

        Returns
        -------
        recommendation: np.ndarray
            1 x domain.shape[0] array containing the next point to evaluate
        """

        # In implementing this function, you may use optimize_acquisition_function() defined below.
        if self.x_sample.size == 0:
            # if no point has been sampled yet, we can't optimize the acquisition function yet
            # we instead sample a random starting point in the domain
            x0 = domain[:, 0] + (domain[:, 1] - domain[:, 0]) * np.random.rand(
                domain.shape[0])
            next_x = np.array([x0]).reshape(-1, domain.shape[0])
        else:
            if len(self.f_sample) == 12 and np.all(self.f_sample < 0.4):
                # if after 10 iterations we have not found a point with a accuracy larger than 0.3
                # we take a random point in the domain as next point
                x0 = domain[:, 0] + (domain[:, 1] - domain[:, 0]
                                     ) * np.random.rand(domain.shape[0])
                x0 = (self.x_sample[0] +
                      (domain[:, 1] - domain[:, 0]) / 2) % domain[:, 1]
                next_x = np.array([x0]).reshape(-1, domain.shape[0])
            else:
                next_x = self.optimize_acquisition_function()

        assert next_x.shape == (1, domain.shape[0])
        return next_x

    def optimize_acquisition_function(self):
        """
        Optimizes the acquisition function.

        Returns
        -------
        x_opt: np.ndarray
            1 x domain.shape[0] array containing the point that maximize the acquisition function.
        """
        def objective(x):
            return -self.acquisition_function(x)

        f_values = []
        x_values = []

        # Restarts the optimization 20 times and pick best solution
        for _ in range(30):
            x0 = domain[:, 0] + (domain[:, 1] - domain[:, 0]) * np.random.rand(
                domain.shape[0])
            result = fmin_l_bfgs_b(func=objective,
                                   x0=x0,
                                   bounds=domain,
                                   approx_grad=True)

            x_values.append(np.clip(result[0], *domain[0]))
            f_values.append(-result[1])

        ind = np.argmax(f_values)
        return np.atleast_2d(x_values[ind])

    def acquisition_function(self, x):
        """
        Compute the acquisition function.
        Constrained acquisition function as proposed by https://arxiv.org/abs/1403.5607

        Parameters
        ----------
        x: np.ndarray
            x in domain of f

        Returns
        ------
        af_value: float
            Value of the acquisition function at x
        """
        ei = self.expected_improvement(x, xi=0.015)
        constraint_weight = self.constraint_function(x)

        return float(ei * constraint_weight)

    def expected_improvement(self, x, xi=0.01):
        """
        Compute expected improvement at points x based on samples x_samples
        and y_samples using Gaussian process surrogate

        Args:
            x: Points at which EI should be computed
            xi: Exploitation-exploration trade-off parameter
        """
        if self.gpy_impl:
            mu, sigma = self.f_gpr.predict(x.reshape(-1, domain.shape[0]))
            mu_sample = self.f_gpr.predict(self.x_sample)
        else:
            mu, sigma = self.f_gpr.predict([x], return_std=True)
            mu_sample = self.f_gpr.predict(self.x_sample)

        sigma = sigma.reshape(-1, 1)
        mu_sample_opt = np.max(mu_sample)
        with np.errstate(divide='warn'):
            imp = mu - mu_sample_opt - xi
            Z = imp / sigma
            ei = imp * norm.cdf(Z) + sigma * norm.pdf(Z)
            ei[sigma == 0.0] = 0.0

        return ei

    def constraint_function(self, x):
        """
        Model constraint condition v(theta) > v_min as a real-valued latent constraint function
        g_k(x) with g_k(theta) = v(theta) - v_min > 0 and then infer PR(g_k > 0) from its posterior

        Following: https://arxiv.org/abs/1403.5607
        """

        # predict distribution of speed v
        if self.gpy_impl:
            mu, sigma = self.v_gpr.predict(x.reshape(-1, domain.shape[0]))
        else:
            mu, sigma = self.v_gpr.predict([x], return_std=True)

        # Gaussian CDF with params from GPR prediction
        if sigma != 0:
            pr = 1 - norm.cdf(self.v_min, loc=mu, scale=sigma)
        else:
            pr = 0.95 * (mu - self.v_min) if mu >= self.v_min else 0.05 * (
                self.v_min - mu)

        return pr

    def add_data_point(self, x, f, v):
        """
        Add data points to the model.

        Parameters
        ----------
        x: np.ndarray
            Hyperparameters
        f: np.ndarray
            Model accuracy
        v: np.ndarray
            Model training speed
        """

        # stack the newly obtained data point onto the existing data points
        self.x_sample = np.vstack((self.x_sample, x))
        self.f_sample = np.vstack((self.f_sample, f))
        self.v_sample = np.vstack((self.v_sample, v))
        #self.logv_sample = np.vstack((self.logv_sample, math.log(v)))
        self.gv_sample = np.vstack((self.gv_sample, v - self.v_min))

        # add new datapoint to GPs and retrain
        if self.gpy_impl:
            self.f_gpr = GPy.models.gp_regression.GPRegression(
                X=self.x_sample,
                Y=self.f_sample,
                kernel=self.f_kernel,
                noise_var=self.f_sigma**2)
            self.v_gpr = GPy.models.gp_regression.GPRegression(
                X=self.x_sample,
                Y=self.v_sample,
                kernel=self.v_kernel,
                noise_var=self.v_sigma**2)
            self.v_gpr.optimize()
            self.f_gpr.optimize()
        else:
            self.f_gpr.fit(self.x_sample, self.f_sample)
            self.v_gpr.fit(self.x_sample, self.v_sample)
            #self.gv_gpr.fit(self.x_sample, self.gv_sample)
            #self.logv_gpr.fit(self.x_sample, self.logv_sample)

    def get_solution(self):
        """
        Return x_opt that is believed to be the maximizer of f.

        Returns
        -------
        solution: np.ndarray
            1 x domain.shape[0] array containing the optimal solution of the problem
        """

        # select the highest accuracy sample from all valid samples (i.e. samples above the speed threshold)
        valid_samples = self.f_sample
        valid_samples[self.v_sample < 1.2] = -1e6  # heuristically low number
        best_index = np.argmax(
            valid_samples)  # get the index of highest accuracy
        x_opt = self.x_sample[best_index]  # get the corresponding x value

        return x_opt
Example #3
0
assert (np.isclose(sigma_f_opt, sigma_f))

# Plot the results
plot_gp(mu_s, cov_s, X, X_train=X_train, Y_train=Y_train)

import GPy

rbf = GPy.kern.RBF(input_dim=1, variance=1.0, lengthscale=1.0)
gpr = GPy.models.GPRegression(X_train, Y_train, rbf)

# Fix the noise variance to known value
gpr.Gaussian_noise.variance = noise**2
gpr.Gaussian_noise.variance.fix()

# Run optimization
gpr.optimize()

# Obtain optimized kernel parameters
l = gpr.rbf.lengthscale.values[0]
sigma_f = np.sqrt(gpr.rbf.variance.values[0])

# Compare with previous results
assert (np.isclose(l_opt, l))
assert (np.isclose(sigma_f_opt, sigma_f))

# Plot the results with the built-in plot function
gpr.plot()

import numpy as np
from sklearn.gaussian_process import GaussianProcess
from matplotlib import pyplot as pl
Example #4
0
# https://sheffieldml.github.io/GPy/
# See also https://gpytorch.ai/
#import sys
#sys.path.append("/home/kpmurphy/github/GPy")

import GPy

rbf = GPy.kern.RBF(input_dim=1, variance=1.0, lengthscale=1.0)
gpr = GPy.models.GPRegression(X_train, Y_train, rbf)

# Fix the noise variance to known value 
gpr.Gaussian_noise.variance = noise**2
gpr.Gaussian_noise.variance.fix()

# Run optimization
gpr.optimize();

# Display optimized parameter values
#display(gpr)

# Obtain optimized kernel parameters
l = gpr.rbf.lengthscale.values[0]
sigma_f = np.sqrt(gpr.rbf.variance.values[0])

# Compare with previous results
assert(np.isclose(l_opt, l))
assert(np.isclose(sigma_f_opt, sigma_f))

# Plot the results with the built-in plot function
gpr.plot();
Example #5
0
class GPRegressor(object):
    def __init__(
            self,
            n_restarts=0,
            kernel=None,
            normalize_y=True,
            backend='sklearn',
            batch_size=1000,
            n_jobs=1,
            verbose=False,
    ):
        self.n_restarts_ = n_restarts
        self.kernel_ = kernel
        self.normalize_y_ = normalize_y
        self.backend_ = backend
        self.batch_size_ = batch_size
        self.n_jobs_ = n_jobs
        self.verbose_ = verbose

    def fit(self, X, y):
        n_samples, n_features = X.shape

        if self.verbose_:
            tprint('Fitting GP model on {} data points with dimension {}...'
                   .format(*X.shape))

        # scikit-learn backend.
        if self.backend_ == 'sklearn':
            self.model_ = GaussianProcessRegressor(
                kernel=self.kernel_,
                normalize_y=self.normalize_y_,
                n_restarts_optimizer=self.n_restarts_,
                copy_X_train=False,
            ).fit(X, y)

        # GPy backend.
        elif self.backend_ == 'gpy':
            import GPy
            if self.kernel_ == 'rbf':
                kernel = GPy.kern.RBF(
                    input_dim=n_features, variance=1., lengthscale=1.
                )
            else:
                raise ValueError('Kernel value {} not supported'
                                 .format(self.kernel_))

            self.model_ = GPy.models.SparseGPRegression(
                X, y.reshape(-1, 1), kernel=kernel,
                num_inducing=min(self.n_inducing_, n_samples)
            )
            self.model_.Z.unconstrain()
            self.model_.optimize(messages=self.verbose_)

        # GPyTorch with CUDA backend.
        elif self.backend_ == 'gpytorch':
            X = torch.Tensor(X).contiguous().cuda()
            y = torch.Tensor(y).contiguous().cuda()

            likelihood = gpytorch.likelihoods.GaussianLikelihood().cuda()
            model = GPyTorchRegressor(X, y, likelihood).cuda()

            model.train()
            likelihood.train()

            # Use the Adam optimizer.
            #optimizer = torch.optim.LBFGS([ {'params': model.parameters()} ])
            optimizer = torch.optim.Adam([
                {'params': model.parameters()}, # Includes GaussianLikelihood parameters.
            ], lr=1.)

            # Loss for GPs is the marginal log likelihood.
            mll = gpytorch.mlls.ExactMarginalLogLikelihood(likelihood, model)

            training_iterations = 100
            for i in range(training_iterations):
                optimizer.zero_grad()
                output = model(X)
                loss = -mll(output, y)
                loss.backward()
                if self.verbose_:
                    tprint('Iter {}/{} - Loss: {:.3f}'
                           .format(i + 1, training_iterations, loss.item()))
                optimizer.step()

            self.model_ = model
            self.likelihood_ = likelihood

        if self.verbose_:
            tprint('Done fitting GP model.')

        return self

    def predict(self, X):
        if self.verbose_:
            tprint('Finding GP model predictions on {} data points...'
                   .format(X.shape[0]))

        if self.backend_ == 'sklearn':
            n_batches = int(ceil(float(X.shape[0]) / self.batch_size_))
            results = Parallel(n_jobs=self.n_jobs_)(#, max_nbytes=None)(
                delayed(parallel_predict)(
                    self.model_,
                    X[batch_num*self.batch_size_:(batch_num+1)*self.batch_size_],
                    batch_num, n_batches, self.verbose_
                )
                for batch_num in range(n_batches)
            )
            mean = np.concatenate([ result[0] for result in results ])
            var = np.concatenate([ result[1] for result in results ])

        elif self.backend_ == 'gpy':
            mean, var = self.model_.predict(X, full_cov=False)

        elif self.backend_ == 'gpytorch':
            X = torch.Tensor(X).contiguous().cuda()

            # Set into eval mode.
            self.model_.eval()
            self.likelihood_.eval()

            with torch.no_grad(), \
                 gpytorch.settings.fast_pred_var(), \
                 gpytorch.settings.max_root_decomposition_size(35):
                preds = self.model_(X)

            mean = preds.mean.detach().cpu().numpy()
            var = preds.variance.detach().cpu().numpy()

        if self.verbose_:
            tprint('Done predicting with GP model.')

        self.uncertainties_ = var.flatten()
        return mean.flatten()
class LearnedTransitionModel():
    def __init__(self, fit_to_default = True, use_GP = False):
        self.high_state = np.array([0.1])
        self.low_state = np.array([-0.1])
        lengthscale = (self.high_state-self.low_state) * 0.001
        #self.k = gpy.kern.Matern52(self.high_state.shape[0], ARD=True, lengthscale=lengthscale)
        self.k =  Matern(length_scale= 0.4, length_scale_bounds=(0.00001, 1),nu=5/2.)
        self.scaler = preprocessing.StandardScaler()
        self.use_GP = use_GP
        if use_GP:
            self.model = GPR(kernel = self.k, random_state=17, optimizer="fmin_l_bfgs_b", n_restarts_optimizer = 200, normalize_y=True) #TODO fill in with better prior
        else:
            n_estimators = [int(x) for x in np.linspace(start=20, stop=1000, num=10)]
            # Number of features to consider at every split
            max_features = ['auto', 'sqrt']
            # Maximum number of levels in tree
            max_depth = [int(x) for x in np.linspace(10, 110, num=11)]
            max_depth.append(None)
            # Minimum number of samples required to split a node
            min_samples_split = [2, 5, 10]
            # Minimum number of samples required at each leaf node
            min_samples_leaf = [1, 2, 4]
            # Method of selecting samples for training each tree
            bootstrap = [True, False]  # Create the random grid
            rf = RFR()
            random_grid = {'n_estimators': n_estimators,
                           'max_features': max_features,
                           'max_depth': max_depth,
                           'min_samples_split': min_samples_split,
                           'min_samples_leaf': min_samples_leaf,
                           'bootstrap': bootstrap}
            self.rf_random = RandomizedSearchCV(estimator=rf, param_distributions=random_grid, n_iter=3, cv=3, verbose=0,
                                           random_state=42, n_jobs=12)
            self.model = self.rf_random

        self.states_fn = "data/states.npy"
        self.actions_fn = "data/actions.npy"
        self.next_states_fn = "data/next_states.npy"
        if fit_to_default:
            print("Training on old data")
            self.train_on_old_data_if_present()

    def train_on_old_data_if_present(self):
        old_states, old_actions, old_next_states = self.load_data_if_present()
        if old_states is not None:
            inputs = self.get_features(old_states, old_actions, train=True)
            if self.model is None:
                self.model = gpy.models.GPRegression(inputs, old_next_states[:,1].reshape(-1,1),self.k)
                self.model['.*variance'].constrain_bounded(1e-4, 2., warning=False)
                self.model['Gaussian_noise.variance'].constrain_bounded(1e-4, 0.1, warning=False)
                for i in range(20):
                    self.model.optimize(messages=True)
                self.model.fit = lambda x,y: 3
            self.model.fit(inputs, old_next_states[:,1])


    def load_data_if_present(self):
        try:
            old_states = np.load(self.states_fn)
            old_actions = np.load(self.actions_fn)
            old_next_states = np.load(self.next_states_fn)
        except FileNotFoundError:
            old_states, old_actions, old_next_states = None, None, None
        return old_states, old_actions, old_next_states

    def train(self, states, actions, next_states, save_data=False, load_data=False):
        old_states, old_actions, old_next_states = self.load_data_if_present()
        if load_data:
            if old_states is None:
                training_states = states
                training_actions = actions
                training_next_states = next_states
            else:
                training_states =  np.vstack([old_states, states])
                training_actions = np.vstack([old_actions, actions])
                training_next_states =  np.vstack([old_next_states, next_states])
        else:
            training_states = states
            training_actions = actions
            training_next_states = next_states

        inputs = self.get_features(training_states, training_actions, train=True)
        #self.model = gpy.models.GPRegression(inputs, next_states, self.k)
        #self.model['.*variance'].constrain_bounded(1e-1,2., warning=False)
        #self.model['Gaussian_noise.variance'].constrain_bounded(1e-4,0.01, warning=False)
        # These GP hyper parameters need to be calibrated for good uncertainty predictions.
        #self.model.optimize(messages=False)

        self.model.fit(inputs, training_next_states[:, 1])
        if save_data:
            if not load_data:
                if old_states is None:
                    states_to_save = states
                    actions_to_save = actions
                    next_states_to_save = next_states
                else:
                    states =  np.vstack([old_states, states])
                    actions_to_save = np.vstack([old_actions, actions])
                    next_states_to_save =  np.vstack([old_next_states, next_states])
            else:
                states_to_save = training_states
                actions_to_save = training_actions
                next_states_to_save = training_next_states
            np.save(self.states_fn, states_to_save)
            np.save(self.actions_fn, actions_to_save)
            np.save(self.next_states_fn, next_states_to_save)


    def get_features(self, states, actions, train=False):
        #unprocessed_input = np.hstack([states, actions])
        unprocessed_input = states
        if len(unprocessed_input.shape) == 1:
            unprocessed_input = unprocessed_input.reshape(1,-1)
        unprocessed_input = unprocessed_input[:,1].reshape(-1,1) #hack
        if train:
            self.scaler.fit(unprocessed_input)
        return self.scaler.transform(unprocessed_input)


    #requires it be of shape N X M where N is number of samples
    def predict(self, states, actions, flatten=True):
        #assuming same x
        inputs = self.get_features(states, actions)
        if self.use_GP:
            try:
                mean, sigma = self.model.predict(inputs, return_std=True)
            except TypeError: #wrong GP library
                mean, sigma = self.model.predict(inputs)
        else:
            try:
                mean = self.model.predict(inputs)
            except sklearn.exceptions.NotFittedError:
                mean = self.model.predict(inputs)
        if len(mean.shape) == 1:
            mean = mean.reshape(-1,1)
        if mean.shape[0] < mean.shape[1]:
            mean = mean.T
        if len(states.shape) == 1:
            next_state = np.hstack([states[0], mean.flatten()])
        else:
            next_state = np.hstack([states[:,0].reshape(-1,1), mean.reshape(-1,1)])  # , 2*sigma
        action_dist = np.linalg.norm(next_state-states)
        if action_dist < 1e-3:
            print("Low action dist")
            print(states, "states")
        return next_state