def __init__(self, lower, upper, explore_priority=0.0001): """ Initialise the Delaunay class. .. note :: Currently only supports rectangular type restrictions on the parameter space Parameters ---------- lower : array_like Lower or minimum bounds for the parameter space upper : array_like Upper or maximum bounds for the parameter space explore_priority : float, optional The priority of exploration against exploitation """ Sampler.__init__(self, lower, upper) self.triangulation = None # Delaunay model self.simplex_cache = {} # Pre-computed values of simplices self.explore_priority = explore_priority
def __init__(self, lower, upper, kerneldef=None, n_train=50, acq_name='var_sum', explore_priority=1., seed=None): """ Initialise the GaussianProcess class. .. note:: Currently only supports rectangular type restrictions on the parameter space Parameters ---------- lower : array_like Lower or minimum bounds for the parameter space upper : array_like Upper or maximum bounds for the parameter space kerneldef : function Kernel function definition. See the 'gp' module. n_train : int Number of training samples required before sampler can be trained acq_name : str A string specifying the type of acquisition function used explore_priority : float, optional The priority of exploration against exploitation """ Sampler.__init__(self, lower, upper) self.kerneldef = kerneldef self.n_min = n_train self.acq_name = acq_name self.explore_priority = explore_priority self.hyperparams = None self.regressors = None self.y_mean = None self.n_tasks = None if seed: np.random.seed(seed)
def pick(self): """ Pick the next feature location for the next observation to be taken. This uses the recursive Delaunay subdivision algorithm. Returns ------- numpy.ndarray Location in the parameter space for the next observation to be taken str A random hexadecimal ID to identify the corresponding job """ n = len(self.X) # -- note that we are assuming the points in X are not reordered by # the scipy Delaunay implementation n_corners = 2**self.dims if n < n_corners + 1: # Bootstrap with a regular sampling strategy to get it started xq = grid_sample(self.lower, self.upper, n) yq_exp = [0.] else: X = self.X() # calling returns the value as an array y = self.y() virtual = self.virtual_flag() # Otherwise, recursive subdivide the edges with the Delaunay model if not self.triangulation: self.triangulation = ScipyDelaunay(X, incremental=True) # Weight by hyper-volume simplices = [tuple(s) for s in self.triangulation.vertices] cache = self.simplex_cache def get_value(s): # Computes the sample value as: # hyper-volume of simplex * variance of values in simplex ind = list(s) value = (np.var(y[ind]) + self.explore_priority) * \ np.linalg.det((X[ind] - X[ind[0]])[1:]) if not np.max(virtual[ind]): cache[s] = value return value # Mostly the simplices won't change from call to call - cache! sample_value = [ cache[s] if s in cache else get_value(s) for s in simplices ] # alternatively, a nicely vectorised computation might work here # profile and check what the bottleneck is # Extract the points in the highest value simplex simplex_indices = list(simplices[np.argmax(sample_value)]) simplex = X[simplex_indices] simplex_v = y[simplex_indices] # Weight the position in this simplex based on value deviation eps = 1e-3 weight = eps + np.abs(simplex_v - np.mean(simplex_v)) weight /= np.sum(weight) xq = np.sum(weight * simplex, axis=0) # dot yq_exp = np.sum(weight * simplex_v, axis=0) self.triangulation.add_points(xq[np.newaxis, :]) # incremental uid = Sampler._assign(self, xq, yq_exp) return xq, uid
def pick(self): """ Pick the next feature location for the next observation to be taken. This uses the recursive Delaunay subdivision algorithm. Returns ------- numpy.ndarray Location in the parameter space for the next observation to be taken str A random hexadecimal ID to identify the corresponding job """ n = len(self.X) # -- note that we are assuming the points in X are not reordered by # the scipy Delaunay implementation n_corners = 2 ** self.dims if n < n_corners + 1: # Bootstrap with a regular sampling strategy to get it started xq = grid_sample(self.lower, self.upper, n) yq_exp = [0.] else: X = self.X() # calling returns the value as an array y = self.y() virtual = self.virtual_flag() # Otherwise, recursive subdivide the edges with the Delaunay model if not self.triangulation: self.triangulation = ScipyDelaunay(X, incremental=True) # Weight by hyper-volume simplices = [tuple(s) for s in self.triangulation.vertices] cache = self.simplex_cache def get_value(s): # Computes the sample value as: # hyper-volume of simplex * variance of values in simplex ind = list(s) value = (np.var(y[ind]) + self.explore_priority) * \ np.linalg.det((X[ind] - X[ind[0]])[1:]) if not np.max(virtual[ind]): cache[s] = value return value # Mostly the simplices won't change from call to call - cache! sample_value = [cache[s] if s in cache else get_value(s) for s in simplices] # alternatively, a nicely vectorised computation might work here # profile and check what the bottleneck is # Extract the points in the highest value simplex simplex_indices = list(simplices[np.argmax(sample_value)]) simplex = X[simplex_indices] simplex_v = y[simplex_indices] # Weight the position in this simplex based on value deviation eps = 1e-3 weight = eps + np.abs(simplex_v - np.mean(simplex_v)) weight /= np.sum(weight) xq = np.sum(weight * simplex, axis=0) # dot yq_exp = np.sum(weight * simplex_v, axis=0) self.triangulation.add_points(xq[np.newaxis, :]) # incremental uid = Sampler._assign(self, xq, yq_exp) return xq, uid
def pick(self, n_test=500): """ Pick the next feature location for the next observation to be taken. .. note :: [Properties Modified] X, y, virtual_flag, pending_results, y_mean, hyperparameters, regressors Parameters ---------- n_test : int, optional The number of random query points across the search space to pick from Returns ------- numpy.ndarray Location in the parameter space for the next observation to be taken str A random hexadecimal ID to identify the corresponding job """ n = len(self.X) self.update_y_mean() # If we do not have enough samples yet, randomly sample for more! if n < self.n_min: xq = random_sample(self.lower, self.upper, 1)[0] yq_exp = self.y_mean # Note: Can be 'None' initially else: if self.regressors is None: self.train() # Randomly sample the volume for test points Xq = random_sample(self.lower, self.upper, n_test) # Generate cached predictors for those test points predictors = [gp.query(r, Xq) for r in self.regressors] # Compute the posterior distributions at those points # Note: No covariance information implemented at this stage Yq_exp = np.asarray([gp.mean(p) for p in predictors]).T + \ self.y_mean Yq_var = np.asarray([gp.variance(p) for p in predictors]).T # Aquisition Functions acq_defs_current = acq_defs(y_mean=self.y_mean, explore_priority=self.explore_priority) # Compute the acquisition levels at those test points yq_acq = acq_defs_current[self.acq_name](Yq_exp, Yq_var) # Find the test point with the highest acquisition level iq_acq = np.argmax(yq_acq) xq = Xq[iq_acq, :] yq_exp = Yq_exp[iq_acq, :] # Place a virtual observation... uid = Sampler._assign(self, xq, yq_exp) # it can be None... return xq, uid