def __init__(self, input_dim: int, variance: float = 1., period: float = 2. * np.pi, lengthscale: float = 2. * np.pi, active_dims: int = None, name: str = 'pure_std_periodic') -> None: super(PureStdPeriodicKernel, self).__init__(input_dim, active_dims, name) self.name = name if period is not None: period = np.asarray(period) assert period.size == input_dim, "bad number of periods" else: period = 2. * np.pi * np.ones(input_dim) if lengthscale is not None: lengthscale = np.asarray(lengthscale) assert lengthscale.size == input_dim, "bad number of lengthscales" else: lengthscale = 2. * np.pi * np.ones(input_dim) self.variance = Param('variance', variance, Logexp()) assert self.variance.size == 1, "Variance size must be one" self.period = Param('period', period, Logexp()) self.lengthscale = Param('lengthscale', lengthscale, Logexp()) self.link_parameters(self.variance, self.period, self.lengthscale)
def __init__(self, input_dim, input_space_dim=None, active_dims=None, kernel=None, name='shapeintegral', Nperunit=100, lengthscale=[1.0], variance=1.0): """ NOTE: Added input_space_dim as the number of columns in X isn't the dimensionality of the space. I.e. for pentagons there will be 10 columns in X, while only 2 dimensions of input space. The lengthscale, variance, etc are ideally set by specifying the kernel we'll use input_dim = number of actual columns in data input_space_dim = number of dimensions in the domain active_dims = potential list of dimensions we'll use kernel = latent function kernel Nperunit = resolution of approximation The last column of X should specify if it's the latent function or the integral that the Y refers to. if it's the latent function then we just use the first d-columns, and the rest can be NaN, e.g. X Y 0,0,1,0,0,1,0,1,1,0,1,1,0 2 1,1,nananananananananan,1 3 is a 1x1 square with an integral of 2, and a single point in the [1,1] corner of the square with a value of 3. """ super(ShapeIntegral, self).__init__(input_dim, active_dims, name) assert ( (kernel is not None) or (input_space_dim is not None) ), "Need either the input space dimensionality defining or the latent kernel defining (to infer input space)" if kernel is None: kernel = RBF(input_space_dim) else: input_space_dim = kernel.input_dim assert kernel.input_dim == input_space_dim, "Latent kernel (dim=%d) should have same input dimensionality as specified in input_space_dim (dim=%d)" % ( kernel.input_dim, input_space_dim) #assert len(kern.lengthscale)==input_space_dim, "Lengthscale of length %d, but input space has %d dimensions" % (len(lengthscale),input_space_dim) #self.lengthscale = Param('lengthscale', kernel.lengthscale, Logexp()) #Logexp - transforms to allow positive only values... #self.variance = Param('variance', kernel.variance, Logexp()) #and here. #self.link_parameters(self.variance, self.lengthscale) #this just takes a list of parameters we need to optimise. self.kernel = kernel self.Nperunit = Nperunit self.input_space_dim = input_space_dim self.cached_points = { } #this is important, not only is it a speed up - we also get the same points for each shape, which makes our covariances more stable self.lengthscale = Param( 'lengthscale', lengthscale, Logexp()) #Logexp - transforms to allow positive only values... self.variance = Param('variance', variance, Logexp()) #and here. self.link_parameters( self.variance, self.lengthscale ) #this just takes a list of parameters we need to optimise.
def __init__(self, gap_decay=1.0, match_decay=2.0, order_coefs=[1.0], alphabet=[], maxlen=0, active_dims=None, normalize=True, batch_size=1000): super(StringKernel, self).__init__(1, active_dims, 'sk') self._name = "sk" self.gap_decay = Param('Gap_decay', gap_decay, Logexp()) self.match_decay = Param('Match_decay', match_decay, Logexp()) self.order_coefs = Param('Order_coefs', order_coefs, Logexp()) self.link_parameters(self.gap_decay, self.match_decay, self.order_coefs) self.alphabet = alphabet self.maxlen = maxlen self.normalize = normalize self.kernel = NPStringKernel(_gap_decay=gap_decay, _match_decay=match_decay, _order_coefs=list(order_coefs), alphabet=self.alphabet, maxlen=maxlen, normalize=normalize)
def __init__(self, input_dim, variances=1.0, lengthscale=1.0, ARD=False, active_dims=None, lengthscalefun=None, name='nonstatRBF'): super(NonstationaryRBF, self).__init__(input_dim, active_dims, name) if lengthscale is None: lengthscale = np.ones(1) else: lengthscale = np.asarray(lengthscale) if lengthscalefun is None: lengthscalefun = lambda x: lengthscale self.lengthscalefun = lengthscalefun self.lengthscale = Param( 'lengthscale', lengthscale, Logexp()) #Logexp - transforms to allow positive only values... self.variances = Param('variances', variances, Logexp()) #and here. self.link_parameters( self.variances, self.lengthscale ) #this just takes a list of parameters we need to optimise.
def __init__(self, _lambda=1, _sigma=1, normalize=True, active_dims=None): super(SubsetTreeKernel, self).__init__(1, active_dims, 'sstk') self._lambda = Param('Lambda', _lambda,Logexp()) self._sigma = Param('Sigma', _sigma,Logexp()) self.link_parameters(self._lambda, self._sigma) self.normalize = normalize self.kernel = wrapper_raw_SubsetTreeKernel(_lambda, _sigma, normalize)
def __init__(self, first, second, sigmoidal, location: float = 0., slope: float = 0.5, name='change_base', fixed_slope=False): _newkerns = [kern.copy() for kern in (first, second)] super(ChangeKernelBase, self).__init__(_newkerns, name) self.first = first self.second = second self._fixed_slope = fixed_slope # Note: here to be used by subclasses, and changing it from the outside does not link the parameter if self._fixed_slope: self.slope = slope else: self.slope = Param('slope', slope, Logexp()) self.link_parameter(self.slope) if isinstance(location, tuple): self.sigmoidal = sigmoidal(1, False, 1., location[0], location[1], slope) self.sigmoidal_reverse = sigmoidal(1, True, 1., location[0], location[1], slope) self.location = Param('location', location[0]) self.stop_location = Param('stop_location', location[1]) self.link_parameters(self.location, self.stop_location) else: self.sigmoidal = sigmoidal(1, False, 1., location, slope) self.sigmoidal_reverse = sigmoidal(1, True, 1., location, slope) self.location = Param('location', location) self.link_parameter(self.location)
def __init__(self, first, second, sigmoidal, sigmoidal_indicator, location: float = 0., slope: float = 0.5, width=1., name='change_window_shifted_sides_base', fixed_slope=False): _newkerns = [kern.copy() for kern in (first, second)] super(ChangeWindowShiftedSidesBase, self).__init__(_newkerns, name) self.first = first self.second = second self._fixed_slope = fixed_slope # Note: here to be used by subclasses, and changing it from the outside does not link the parameter if self._fixed_slope: self.slope = slope else: self.slope = Param('slope', np.array(slope), Logexp()) self.link_parameter(self.slope) self.sigmoidal = sigmoidal(1, False, 1., location, slope) self.sigmoidal_reverse = sigmoidal(1, True, 1., location, slope) self.sigmoidal_indicator = sigmoidal_indicator(1, False, 1., location, slope, width) # self.shift = _Gk.Bias(1) self.location = Param('location', np.array(location)) self.width = Param('width', np.array(width), Logexp()) # self.shift_variance = Param('shift_variance', self.shift.variance.values, Logexp()) self.shift_variance = Param('shift_variance', np.array(0), Logexp()) self.link_parameters(self.location, self.width, self.shift_variance)
def __init__(self,gp_link=None, deg_free=5, sigma2=2): if gp_link is None: gp_link = link_functions.Identity() super(HetStudentT, self).__init__(gp_link, name='Hetro_Student_T') self.v = Param('deg_free', float(deg_free), Logexp()) self.link_parameter(self.v) self.v.constrain_fixed() self.log_concave = False
def __init__(self, input_dim, variances=None, lengthscale=None, ARD=False, active_dims=None, name='integral'): super(Integral_Output_Observed, self).__init__(input_dim, active_dims, name) if lengthscale is None: lengthscale = np.ones(1) else: lengthscale = np.asarray(lengthscale) assert len(lengthscale)==input_dim/2 self.lengthscale = Param('lengthscale', lengthscale, Logexp()) #Logexp - transforms to allow positive only values... self.variances = Param('variances', variances, Logexp()) #and here. self.link_parameters(self.variances, self.lengthscale) #this just takes a list of parameters we need to optimise.
def __init__(self,n_terms=3): """n_terms specifies the number of tanh terms to be used""" self.n_terms = n_terms self.num_parameters = 3 * self.n_terms + 1 self.psi = np.ones((self.n_terms, 3)) super(TanhWarpingFunction_d, self).__init__(name='warp_tanh') self.psi = Param('psi', self.psi) self.psi[:, :2].constrain_positive() self.d = Param('%s' % ('d'), 1.0, Logexp()) self.link_parameter(self.psi) self.link_parameter(self.d)
def __init__(self, gap_decay=1.0, match_decay=2.0, order_coefs=[1.0], alphabet=[], maxlen=0, num_splits=1, normalize=True): super(SplitStringKernel, self).__init__(1, None, "sk") self._name = "sk" self.num_splits = num_splits self.gap_decay = Param('Gap_decay', gap_decay, Logexp()) self.match_decay = Param('Match_decay', match_decay, Logexp()) self.order_coefs = Param('Order_coefs', order_coefs, Logexp()) self.link_parameters(self.gap_decay, self.match_decay, self.order_coefs) self.alphabet = alphabet self.maxlen = maxlen self.normalize = normalize # make new kernels for each section self.kernels = [] for i in range(0, num_splits - 1): self.kernels.append( StringKernel(gap_decay=gap_decay, match_decay=match_decay, order_coefs=order_coefs, alphabet=alphabet, maxlen=int((self.maxlen / self.num_splits)), normalize=normalize)) # final kernel might be operating on slightly loinger string if maxlen/num_splits % !=0 self.kernels.append( StringKernel(gap_decay=gap_decay, match_decay=match_decay, order_coefs=order_coefs, alphabet=alphabet, maxlen=int((self.maxlen / self.num_splits)) + self.maxlen - self.num_splits * int( (self.maxlen / self.num_splits)), normalize=normalize)) #tie the params across the kernels for kern in self.kernels: kern.unlink_parameter(kern.gap_decay) kern.gap_decay = self.gap_decay kern.unlink_parameter(kern.match_decay) kern.match_decay = self.match_decay kern.unlink_parameter(kern.order_coefs) kern.order_coefs = self.order_coefs
def __init__(self, input_dim, input_space_dim=None, active_dims=None, name='shapeintegralhc',lengthscale=None, variances=None,Nrecs=10,step=0.025,Ntrials=10,dims=2): super(ShapeIntegralHC, self).__init__(input_dim, active_dims, name) assert ((input_space_dim is not None)), "Need the input space dimensionality defining" kernel = Integral(input_dim=input_space_dim*2,lengthscale=lengthscale,variances=variances) self.lengthscale = Param('lengthscale', kernel.lengthscale, Logexp()) self.variances = Param('variances', kernel.variances, Logexp()) self.link_parameters(self.variances, self.lengthscale) #this just takes a list of parameters we need to optimise. self.kernel = kernel self.input_space_dim = input_space_dim self.rectangle_cache = {} #this is important, not only is it a speed up - we also get the same points for each shape, which makes our covariances more stable self.Nrecs=Nrecs self.step=step self.Ntrials=Ntrials
def __init__(self, input_dim,variance=1,active_dims=[0],name="categorical", inverse=False,useGPU=False): super(Categorical, self).__init__(input_dim, active_dims, name,useGPU=useGPU) self.inverse = inverse self.variance = Param('variance',variance,Logexp()) self.link_parameter(self.variance)
def __init__(self, gap_decay=1.0, match_decay=1.0, order_coefs=[1.0], variance=1.0, mode='tf-batch', sim='dot', wrapper='none', active_dims=None, name='rbf_string', embs=None, alphabet=None, device='/cpu:0', batch_size=1000, config=None, index=None): Kern.__init__(self, 1, active_dims, name) StringKernel.__init__(self, gap_decay, match_decay, order_coefs, variance, mode, sim=sim, wrapper=wrapper, embs=embs, alphabet=alphabet, device=device, batch_size=batch_size, config=config, index=index) self.gap_decay = Param('gap_decay', gap_decay, Logexp()) self.match_decay = Param('match_decay', match_decay, Logexp()) self.order_coefs = Param('coefs', order_coefs, Logexp()) self.graph = None self.link_parameter(self.gap_decay) self.link_parameter(self.match_decay) self.link_parameter(self.order_coefs) self.variance = Param('variance', variance, Logexp()) self.link_parameter(self.variance) self.use_invLengthscale = False self.ARD = False self.lengthscale = Param('lengthscale', 1.0, Logexp()) self.lengthscale.constrain_fixed(1.0)
def __init__(self, input_dim, input_type, variance=1., lengthscale=1., active_dims=None): super(CustomMatern52, self).__init__(input_dim, active_dims, 'matern52') self.variance = Param('variance', variance) self.lengthscale = Param('lengthscale', lengthscale) self.link_parameters(self.variance, self.lengthscale) assert isinstance( input_type, (InputY, InputX, InputPsi)), "The type of input_object is not supported" self.input_type = input_type
def __init__(self, Y_metadata, gp_link=None, noise_mult=1., known_variances=1., name='Scaled_het_Gauss'): if gp_link is None: gp_link = link_functions.Identity() if not isinstance(gp_link, link_functions.Identity): print( "Warning, Exact inference is not implemeted for non-identity link functions,\ if you are not already, ensure Laplace inference_method is used") # note the known_variances are fixed, not parameterse self.known_variances = known_variances self.noise_mult = Param('noise_mult', noise_mult, Logexp()) # Logexp ensures its positive # this is a parameter, so it gets optimized, gradients calculated etc. #super(ScaledHeteroscedasticGaussian, self).__init__(gp_link, variance=1.0, name=name) super(Gaussian, self).__init__(gp_link, name=name) # note: we're inheriting from Likelihood here, not Gaussian, so as to avoid problems with the Gaussian variance. #add a new parameter by linking it (see just above in GPy.likelihoods.gaussian.Gaussian). self.link_parameter(self.noise_mult) if isinstance(gp_link, link_functions.Identity): self.log_concave = True
def __init__(self, input_dim: int, variance: float = 1., offset: float = 0., active_dims: int = None, name: str = 'linear_with_offset') -> None: super(LinearWithOffset, self).__init__(input_dim, active_dims, name) if variance is not None: variance = np.asarray(variance) assert variance.size == 1 else: variance = np.ones(1) self.variance = Param('variance', variance, Logexp()) self.offset = Param('offset', offset) self.link_parameters(self.variance, self.offset)
def __init__(self, input_dim: int, reverse: bool = False, variance: float = 1., location: float = 0., slope: float = 0.2, active_dims: int = None, name: str = 'sigmoidal_kernel_base', fixed_slope = False) -> None: self.reverse = reverse super(SigmoidalKernelBase, self).__init__(input_dim, variance, active_dims, False, name) # TO REMOVE VARIANCE: comment line above; uncomment below; remove self.variance factors from subclass methods # super(BasisFuncKernel, self).__init__(input_dim, active_dims, name) # assert self.input_dim == 1, "Basis Function Kernel only implemented for one dimension. Use one kernel per dimension (and add them together) for more dimensions" # self.ARD = False # self.variance = 1 self.location = Param('location', location) self.link_parameter(self.location) self._fixed_slope = fixed_slope # Note: here to be used by subclasses, and changing it from the outside does not link the parameter if self._fixed_slope: self.slope = slope else: self.slope = Param('slope', slope, Logexp()) # This +ve constraint makes non-reverse sigmoids only fit (+ve or -ve) curves going away from 0; similarly for other kernels self.link_parameter(self.slope)
def set_l(self, l, safe=False): assert safe assert l.shape == (self.active_dim,) l = np.maximum( 1.e-3, l ) self.inner_kernel.lengthscale = Param('lengthscale', l)
def __init__(self, gp_link=None, r=1.0): if gp_link is None: #Parameterised not as link_f but as f #gp_link = Identity() gp_link = Log() super(LogLogistic, self).__init__(gp_link, name='LogLogistic') self.r = Param('r_shape', float(r), Logexp()) self.link_parameter(self.r)
def update_parameter_bounds(self, X): if self.data_range is None: self.data_range = (X.min(), X.max()) self.location = Param('location', self.location, Logistic(*self.data_range)) self.sigmoidal_indicator.location = Param( 'location', self.location, Logistic(*self.data_range)) # self.sigmoidal_reverse.location = Param('location', self.location, Logistic(*self.data_range)) # self.sigmoidal.location = Param('location', self.location + self.width, Logistic(*self.data_range)) # self.location.constrain_bounded(*self.data_range) # self.sigmoidal_indicator.location.constrain_bounded(*self.data_range) # # self.sigmoidal_reverse.location.constrain_bounded(*self.data_range) # # self.sigmoidal.location.constrain_bounded(*self.data_range) max_width = self.data_range[1] - self.location max_width = max_width if max_width > 0 else self.data_range[ 1] - self.data_range[0] self.width = Param('width', self.width, Logistic(0, max_width)) self.sigmoidal_indicator.width = Param('width', self.width, Logistic(0, max_width))
def __init__(self, input_dim, variances=None, ARD=False, active_dims=None, name='mix_integral_linear'): super(Mix_Integral_Linear, self).__init__(input_dim, active_dims, name) self.variances = Param('variances', variances, Logexp()) #and here. self.link_parameters( self.variances ) #this just takes a list of parameters we need to optimise.
def __init__(self, first, second, location: float = 0., slope: float = 0.5, width: float = 1., name='change_window', fixed_slope=False): super(ChangeWindowKernel, self).__init__(first, second, SigmoidalIndicatorKernel, location, slope, name, fixed_slope) self.width = Param('width', width, Logexp()) self.link_parameter(self.width)
def __init__(self, input_dim, input_space_dim=None, active_dims=None, kernel=None, name='shapeintegral', Nperunit=100, lengthscale=None, variance=None): """ NOTE: Added input_space_dim as the number of columns in X isn't the dimensionality of the space. I.e. for pentagons there will be 10 columns in X, while only 2 dimensions of input space. """ super(ShapeIntegral, self).__init__(input_dim, active_dims, name) assert ( (kernel is not None) or (input_space_dim is not None) ), "Need either the input space dimensionality defining or the latent kernel defining (to infer input space)" if kernel is None: kernel = RBF(input_space_dim, lengthscale=lengthscale) else: input_space_dim = kernel.input_dim assert kernel.input_dim == input_space_dim, "Latent kernel (dim=%d) should have same input dimensionality as specified in input_space_dim (dim=%d)" % ( kernel.input_dim, input_space_dim) #assert len(kern.lengthscale)==input_space_dim, "Lengthscale of length %d, but input space has %d dimensions" % (len(lengthscale),input_space_dim) self.lengthscale = Param( 'lengthscale', kernel.lengthscale, Logexp()) #Logexp - transforms to allow positive only values... self.variance = Param('variance', kernel.variance, Logexp()) #and here. self.link_parameters( self.variance, self.lengthscale ) #this just takes a list of parameters we need to optimise. self.kernel = kernel self.Nperunit = Nperunit self.input_space_dim = input_space_dim
def __init__(self, first, second, sigmoidal, sigmoidal_indicator, third=None, location: float = 0., slope: float = 0.5, width=1., name='change_window_independent_base', fixed_slope=False): third = deepcopy(first) if third is None else third _newkerns = [kern.copy() for kern in (first, second, third)] super(ChangeWindowIndependentBase, self).__init__(_newkerns, name) self.first = first self.second = second self.third = third self._fixed_slope = fixed_slope # Note: here to be used by subclasses, and changing it from the outside does not link the parameter if self._fixed_slope: self.slope = slope else: self.slope = Param('slope', np.array(slope), Logexp()) self.link_parameter(self.slope) self.sigmoidal = sigmoidal(1, False, 1., location, slope) self.sigmoidal_reverse = sigmoidal(1, True, 1., location, slope) self.sigmoidal_indicator = sigmoidal_indicator(1, False, 1., location, slope, width) self.location = Param('location', np.array(location)) self.width = Param('width', np.array(width), Logexp()) self.link_parameters(self.location, self.width) self.data_range = None self.one_off_bounds_set = False self.last_parameter_values = { 'location': np.array(location), 'slope': np.array(slope), 'width': np.array(width) }
def __init__(self, input_dim, variance, lengthscale, ARD, active_dims, name, useGPU=False): super(Stationary, self).__init__(input_dim, active_dims, name, useGPU=useGPU) self.ARD = ARD if not ARD: if lengthscale is None: lengthscale = np.ones(1) else: lengthscale = np.asarray(lengthscale) assert lengthscale.size == 1, "Only 1 lengthscale needed for non-ARD kernel" else: if lengthscale is not None: lengthscale = np.asarray(lengthscale) assert lengthscale.size in [1, input_dim ], "Bad number of lengthscales" if lengthscale.size != input_dim: lengthscale = np.ones(input_dim) * lengthscale else: lengthscale = np.ones(self.input_dim) # lengthscale = np.ones(2) # n = A(1) # t = A(1) # lengthscale = np.array([n, n, n, n, n, n, t, t, t]) self.lengthscale = Param('lengthscale', lengthscale, Logexp()) # self.lengthscale = self.lengthscale.repeat(6)[:self.input_dim] # print(self.lengthscale) self.variance = Param('variance', variance, Logexp()) assert self.variance.size == 1 self.link_parameters(self.variance, self.lengthscale)
def __init__(self,k1,k2,kc,xc,cpDim): if k2 is None: super(Changepoint,self).__init__([k1],"changepoint") k2 = k1 else: super(Changepoint,self).__init__([k1,k2],"changepoint") self.k1 = k1 self.k2 = k2 self.kc = Param('kc', kc, Logexp()) self.link_parameter(self.kc) self.xc = np.array(xc) self.cpDim = cpDim
def __init__(self, k1, k2=None, kc=1., xc=np.array([[0]]), cpDim=0, changepointParameter=False): """ arguments: k1, k2: GPy.kern.Kernel kc: float, covariance at the changepoint xc: np.array, position of changepoint(s) cpDim: int, dimension that changepoint exists on changepointParameter: bool, whether xc should be linked as a parameter """ if k2 is None: super(Changepoint, self).__init__([k1], "changepoint") k2 = k1 else: super(Changepoint, self).__init__([k1, k2], "changepoint") self.k1 = k1 self.k2 = k2 self.kc = Param('kc', kc, Logexp()) self.link_parameter(self.kc) self.changepointParameter = changepointParameter self.xc = np.array(xc) if self.changepointParameter: self.xc = Param('xc', self.xc) self.link_parameter(self.xc) self.xc.gradient = [[0]] self.cpDim = cpDim
def __init__(self, input_dim, basis, variance=None, ARD=False, active_dims=None, name='mean', useGP=False): """ Initialize the object. """ super(MeanFunction, self).__init__(input_dim, active_dims, name, useGP=useGP) self.input_dim = int(input_dim) self._ARD = ARD if not hasattr(basis, '__call__'): raise TypeError('The basis functions must implement the ' '\'__call__()\' method. This method should ' ' the basis functions given a 2D dimensional numpy' ' numpy array of \'num_points x input_dim\'' ' dimensions.') if not hasattr(basis, 'num_output'): raise TypeError('The basis functions must have an attribute ' ' \'num_output\' which should store the number of' ' basis functions it contains.') self._basis = basis self._num_params = basis.num_output if not ARD: if variance is None: variance = np.ones(1) else: variance = np.asarray(variance) assert variance.size == 1, 'Only 1 variance needed for a non-ARD kernel' else: if variance is not None: variance = np.asarray(variance) assert variance.size in [1, self.num_params ], 'Bad number of variances' if variance.size != self.num_params: variance = np.ones(self.num_params) * variance else: variance = np.ones(self.num_params) self.variance = Param('variance', variance, Logexp()) self.link_parameters(self.variance)
def __init__(self, warping_indices, hidden_dims, out_dim, warped_indices, name): super(NNwarpingFunction, self).__init__(name='nn_warping_' + name) self.warping_indices = warping_indices self.warped_indices = warped_indices self.nnwarping = NNwarping(len(warping_indices), hidden_dims, out_dim) self.params_name = list(self.nnwarping.state_dict().keys()) self.params_value = [ _.numpy() for _ in list(self.nnwarping.state_dict().values()) ] self.params = [ Param(self.params_name[_], self.params_value[_]) for _ in range(len(self.params_value)) ] for param in self.params: self.link_parameter(param) # training statistics self.params_updated_num = 0
def __init__(self, input_dim, inner_kernel, W=None, name='ActiveSubspaceKernel', fixed_cols=0): """ Initialize the object. """ super(ActiveSubspaceKernel, self).__init__(input_dim, None, name, useGPU=False) self.inner_kernel = inner_kernel pW = StiefelPrior(input_dim, inner_kernel.input_dim, fixed_cols=fixed_cols, alpha=100.) self.fixed_cols = fixed_cols if W is None: W = pW.rvs().reshape(input_dim, inner_kernel.input_dim) else: assert W.shape == (self.input_dim, self.active_dim) self.W = Param('W', W) self.W.set_prior(pW) self.link_parameters(self.W, self.inner_kernel)
def __init__(self, kernels): """ This kernel is used for multi-fidelity problems. Args: kernels - List of GPy kernels to use for each fidelity from low to high fidelity Reference: Predicting the output from a complex computer code when fast approximations are available. M. C. KENNEDY AND A. O'HAGAN (2000) Any number of fidelities are supported. Fidelity s is modelled as: f_s(x) = p_t * f_t(x) + d_s(x) where: s is the fidelity t is the previous fidelity f_s(x) is the function modelling fidelity s d_s(x) models the difference between fidelity s-1 and s p_t a scaling parameter between fidelity t and s """ self.kernels = kernels self.n_fidelities = len(kernels) super(LinearMultiFidelityKernel, self).__init__(kernels=self.kernels, name='multifidelity', extra_dims=[-1]) self.scaling_param = Param('scale', np.ones(self.n_fidelities - 1)) # Link parameters so paramz knows about them self.link_parameters(self.scaling_param)
class ActiveSubspaceKernel(Kern): """ A kernel of the following form: .. math:: k(x, x') = k_0(Wx, Wx') """ _inner_kernel = None @property def inner_kernel(self): """ :getter: Get the inner kernel. """ return self._inner_kernel @inner_kernel.setter def inner_kernel(self, value): """ :setter: Set the inner kernel. """ assert isinstance(value, Kern), 'The inner kernel must be a'\ + ' proper `Gpy.kern.Kern` object.' assert value.input_dim <= self.input_dim, 'The number of active'\ + ' dimensions must be smaller than or equal to the number'\ + ' of inputs.' self._inner_kernel = value @property def active_dim(self): """ :getter: Get the number of active dimensions. """ return self.inner_kernel.input_dim def __init__(self, input_dim, inner_kernel, W=None, name='ActiveSubspaceKernel', fixed_cols=0): """ Initialize the object. """ super(ActiveSubspaceKernel, self).__init__(input_dim, None, name, useGPU=False) self.inner_kernel = inner_kernel pW = StiefelPrior(input_dim, inner_kernel.input_dim, fixed_cols=fixed_cols, alpha=100.) self.fixed_cols = fixed_cols if W is None: W = pW.rvs().reshape(input_dim, inner_kernel.input_dim) else: assert W.shape == (self.input_dim, self.active_dim) self.W = Param('W', W) self.W.set_prior(pW) self.link_parameters(self.W, self.inner_kernel) def _get_Z(self, X): return None if X is None else np.dot(X, self.W) def _get_Zs(self, X, X2): return self._get_Z(X), self._get_Z(X2) @Cache_this(limit=5, ignore_args=()) def K(self, X, X2=None): """ Kernel function applied on inputs X and X2. """ Z, Z2 = self._get_Zs(X, X2) return self.inner_kernel.K(Z, Z2) @Cache_this(limit=3, ignore_args=()) def Kdiag(self, X): """ Evaluate only the diagonal of the covariance matrix. """ return self.inner_kernel.Kdiag(self._get_Z(X)) def gradients_X(self, dL_dK, X, X2=None): Z, Z2 = self._get_Zs(X, X2) tmp = self.inner_kernel.gradients_X(dL_dK, Z, Z2) return np.einsum('ik,jk->ij', tmp, self.W) def update_gradients_full(self, dL_dK, X, X2=None): """ Given the derivative of the objective wrt the covariance matrix (dL_dK), compute the gradient wrt the parameters of this kernel, and store in the parameters object as e.g. self.variance.gradient """ assert X2 is None Z = self._get_Z(X) self.inner_kernel.update_gradients_full(dL_dK, Z) dL_dZ = self.inner_kernel.gradients_X(dL_dK, Z) self.W.gradient = np.einsum('ij,ik->kj', dL_dZ, X) k = self.fixed_cols if k >= 1: self.W.gradient[:-k, -k:] = 0. self.W.gradient[-k:, :-k] = 0. self.W.gradient[-k:, -k:] = 0.
class HetStudentT(MultiLikelihood): """ Student T likelihood For nomanclature see Bayesian Data Analysis 2003 p576 .. math:: p(y_{i}|\\lambda(f_{i})) = \\frac{\\Gamma\\left(\\frac{v+1}{2}\\right)}{\\Gamma\\left(\\frac{v}{2}\\right)\\sqrt{v\\pi\\exp(g_{i})}}\\left(1 + \\frac{1}{v}\\left(\\frac{(y_{i} - f_{i})^{2}}{\\exp(g_{i})}\\right)\\right)^{\\frac{-v+1}{2}} """ def __init__(self,gp_link=None, deg_free=5, sigma2=2): if gp_link is None: gp_link = link_functions.Identity() super(HetStudentT, self).__init__(gp_link, name='Hetro_Student_T') self.v = Param('deg_free', float(deg_free), Logexp()) self.link_parameter(self.v) self.v.constrain_fixed() self.log_concave = False def request_num_latent_functions(self, Y): """ The likelihood should infer how many latent functions are needed for the likelihood In this case we have one latent function for mean and one for scale, for each output dimension """ return Y.shape[1]*2 def pdf(self, f, g, y, Y_metadata=None): """ .. math:: p(y_{i}|\\lambda(f_{i})) = \\frac{\\Gamma\\left(\\frac{v+1}{2}\\right)}{\\Gamma\\left(\\frac{v}{2}\\right)\\sqrt{v\\pi\\exp(g_{i})}}\\left(1 + \\frac{1}{v}\\left(\\frac{(y_{i} - f_{i})^{2}}{\\exp(g_{i})}\\right)\\right)^{\\frac{-v+1}{2}} """ df = float(self.deg_free[:]) e_g = np.exp(g) y_f2 = (y-f)**2 pdf = (gamma(0.5*(df+1)) / (gamma(0.5*df)*np.sqrt(df*np.pi*e_g)))*(1 + y_f2/(df*e_g))**(-0.5*(df+1)) return pdf def logpdf(self, f, y, Y_metadata=None): D = y.shape[1] fv, gv = f[:, :D], f[:, D:] df = float(self.deg_free[:]) y_f2 = (y-fv)**2 lnpdf = gammaln(0.5*(df+1)) - gammaln(0.5*df) - 0.5*np.log(df*np.pi) - 0.5*gv - 0.5*(df+1)*np.log1p(y_f2/(df*np.exp(gv))) return lnpdf def update_gradients(self, grads): """ Pull out the gradients, be careful as the order must match the order in which the parameters are added """ self.v.gradient = grads[0] def predictive_mean(self, mu, sigma, Y_metadata=None): # The comment here confuses mean and median. return self.gp_link.transf(mu) # only true if link is monotonic, which it is. def predictive_variance(self, mu,variance, predictive_mean=None, Y_metadata=None): if self.deg_free<=2.: return np.empty(mu.shape)*np.nan # does not exist for degrees of freedom <= 2. else: return super(StudentT, self).predictive_variance(mu, variance, predictive_mean, Y_metadata) def conditional_mean(self, gp): return self.gp_link.transf(gp) def conditional_variance(self, gp): #Expects just g! return self.deg_free*np.exp(gp)/(self.deg_free - 2.) def samples(self, gp, Y_metadata=None): """ Returns a set of samples of observations based on a given value of the latent variable. :param gp: latent variable """ orig_shape = gp.shape gp = gp.flatten() #FIXME: Very slow as we are computing a new random variable per input! #Can't get it to sample all at the same time #student_t_samples = np.array([stats.t.rvs(self.v, self.gp_link.transf(gpj),scale=np.sqrt(self.sigma2), size=1) for gpj in gp]) dfs = np.ones_like(gp)*self.v scales = np.ones_like(gp)*np.sqrt(self.sigma2) student_t_samples = stats.t.rvs(dfs, loc=self.gp_link.transf(gp), scale=scales) return student_t_samples.reshape(orig_shape) def variational_expectations(self, Y, m, v, gh_points=None, Y_metadata=None): D = Y.shape[1] mf, mg = m[:, :D], m[:, D:] vf, vg = v[:, :D], v[:, D:] df = float(self.deg_free[:]) #F = -0.5*mg #Parameterize sigma not sigma2 as sigma itself needs to be positive! #F = -mg F = (gammaln((df + 1) * 0.5) - gammaln(df * 0.5) - 0.5*np.log(df * np.pi * np.exp(mg)) ) """ #Some little code to check the result numerically using quadrature from functools import partial from scipy import integrate i = 5 # datapoint index def quad_func(fi, gi, yi, mgi, vgi, mfi, vfi): return ((-0.5*(df+1)*np.log1p(((yi-fi)**2)/(df*np.exp(gi)))) #p(y|f,g) * np.exp(-0.5*np.log(2*np.pi*vgi) - 0.5*((gi - mgi)**2)/vgi) #q(g) * np.exp(-0.5*np.log(2*np.pi*vfi) - 0.5*((fi - mfi)**2)/vfi) #q(f) ) quad_func_l = partial(quad_func, yi=Y[i], mgi=mg[i], vgi=vg[i], mfi=mf[i], vfi=vf[i]) def integrl(gi): return integrate.quad(quad_func_l, -50, 50, args=(gi))[0] print "Numeric scipy F quad" print integrate.quad(lambda fi: integrl(fi), -50, 50) """ #Do some testing to see if the quadrature works well for one datapoint Ngh = 20 if gh_points is None: gh_x, gh_w = self._gh_points(T=Ngh) else: gh_x, gh_w = gh_points N = Y.shape[0] F_quad = np.zeros(Y.shape) dF_dmg = np.zeros(mg.shape) dF_dmf = np.zeros(mf.shape) dF_dvf = np.zeros(vf.shape) dF_dvg = np.zeros(vg.shape) dF_ddf = np.zeros(vg.shape) for d in range(D): quad_cython.quad2d_stut(N, mf.flatten(), vf.flatten(), mg.flatten(), vg.flatten(), Y.flatten(), Ngh, df, gh_x, gh_w, F_quad[:,d], dF_dmf[:,d], dF_dvf[:,d], dF_dmg[:,d], dF_dvg[:,d], dF_ddf[:,d]) F_quad /= np.pi dF_dmg /= np.pi dF_dmf /= np.pi dF_dvf /= np.pi dF_dvg /= np.pi dF_ddf /= np.pi F += F_quad dF_dmg += -0.5 # from -0.5<g> term dF_dvf /= 2.0 dF_dvg /= 2.0 dF_dm = np.hstack((dF_dmf, dF_dmg)) dF_dv = np.hstack((dF_dvf, dF_dvg)) #derivative wrt to degrees of freedom dF_ddf += 0.5*digamma(0.5*(df+1)) - 0.5*digamma(0.5*df) - 1.0/(2*df) #Since we are the only parameter, our first dimension is 1 dF_dtheta = dF_ddf[None, :] return F, dF_dm, dF_dv, dF_dtheta def variational_expectations_pure(self, Y, m, v, gh_points=None, Y_metadata=None): D = Y.shape[1] mf, mg = m[:, :D], m[:, D:] vf, vg = v[:, :D], v[:, D:] df = float(self.deg_free[:]) #F = -0.5*mg #Parameterize sigma not sigma2 as sigma itself needs to be positive! #F = -mg F = (gammaln((df + 1) * 0.5) - gammaln(df * 0.5) - 0.5*np.log(df * np.pi * np.exp(mg)) ) """ #Some little code to check the result numerically using quadrature from functools import partial from scipy import integrate i = 5 # datapoint index def quad_func(fi, gi, yi, mgi, vgi, mfi, vfi): return ((-0.5*(df+1)*np.log1p(((yi-fi)**2)/(df*np.exp(gi)))) #p(y|f,g) * np.exp(-0.5*np.log(2*np.pi*vgi) - 0.5*((gi - mgi)**2)/vgi) #q(g) * np.exp(-0.5*np.log(2*np.pi*vfi) - 0.5*((fi - mfi)**2)/vfi) #q(f) ) quad_func_l = partial(quad_func, yi=Y[i], mgi=mg[i], vgi=vg[i], mfi=mf[i], vfi=vf[i]) def integrl(gi): return integrate.quad(quad_func_l, -50, 50, args=(gi))[0] print "Numeric scipy F quad" print integrate.quad(lambda fi: integrl(fi), -50, 50) """ from functools import partial def F_quad_func(f, e_g, y, df): y_f2 = (y-f)**2 return -0.5*(df+1)*np.log1p(y_f2/(df*e_g)) def F_dquad_df_func(f, e_g, y, df): y_f2 = (y-f)**2 return (df+1)*(y-f)/(df*e_g + y_f2) def F_d2quad_df2_func(f, e_g, y, df): y_f2 = (y-f)**2 df_eg = df*e_g return (df+1)*(y_f2 - df_eg)/(df_eg + y_f2)**2 def F_dquad_dg_func(f, e_g, y, df): y_f2 = (y-f)**2 return 0.5*(df+1)*y_f2/(df*e_g + y_f2) def F_d2quad_dg2_func(f, e_g, y, df): y_f2 = (y-f)**2 df_eg = df*e_g return -0.5*(df+1)*y_f2*df_eg/(df_eg + y_f2)**2 def F_dquad_ddf_func(f, e_g, y, df): y_f2 = (y-f)**2 df_eg = df*e_g return 0.5*( (df+1)*y_f2/(df*(df_eg + y_f2)) - np.log1p(y_f2/(df_eg)) ) F_quad_func_p = partial(F_quad_func, df=df) F_dquad_df_func_p = partial(F_dquad_df_func, df=df) F_d2quad_df2_func_p = partial(F_d2quad_df2_func, df=df) F_dquad_dg_func_p = partial(F_dquad_dg_func, df=df) F_d2quad_dg2_func_p = partial(F_d2quad_dg2_func, df=df) F_dquad_ddf_func_p = partial(F_dquad_ddf_func, df=df) #(F_quad, dF_dmf, dF_dvf, #dF_dmg, dF_dvg) = self.quad2d([F_quad_func_p, F_dquad_df_func_p, #F_d2quad_df2_func_p, F_dquad_dg_func_p, #F_d2quad_dg2_func_p],#, F_dquad_ddf_func_p], #Y, mf, vf, mg, vg, gh_points, exp_g=True) (F_quad, dF_dmf, dF_dvf, dF_dmg, dF_dvg, dF_ddf) = self.quad2d([F_quad_func_p, F_dquad_df_func_p, F_d2quad_df2_func_p, F_dquad_dg_func_p, F_d2quad_dg2_func_p, F_dquad_ddf_func_p], Y, mf, vf, mg, vg, gh_points, exp_g=True) F += F_quad dF_dmg += -0.5 # from -0.5<g> term dF_dvf /= 2.0 dF_dvg /= 2.0 dF_dm = np.hstack((dF_dmf, dF_dmg)) dF_dv = np.hstack((dF_dvf, dF_dvg)) #derivative wrt to degrees of freedom dF_ddf += 0.5*digamma(0.5*(df+1)) - 0.5*digamma(0.5*df) - 1.0/(2*df) #Since we are the only parameter, our first dimension is 1 dF_dtheta = dF_ddf[None, :] return F, dF_dm, dF_dv, dF_dtheta #def pdf_partial(self, Y_metadata): #""" #Should be overriden for models with parameters that are fixed throughout a quadrature #""" #from functools import partial #df = float(self.deg_free[:]) #return partial(self.pdf, df=df, Y_metadata=Y_metadata) def quad2D_weave(self, f, g, y, v, gh_w, f_string, e_g, D): #Broken! raise NotImplementedError from scipy import weave N = y.shape[0] h = gh_w.shape[0] F = np.zeros((y.shape[0], D)) #f_string = "pow(x(n,i), y(n,j))" support_code = """ #include <stdio.h> """ omp = False if omp: pragma = "#pragma omp parallel for private(d, n, i, j)" support_code += """ #include <math.h> #include <omp.h> """ weave_options = {'headers' : ['<omp.h>'], 'extra_compile_args': ['-fopenmp -O3'], # -march=native'], 'extra_link_args' : ['-lgomp']} else: pragma = "" weave_options = {} code = """ int d,n,i,j; {pragma} for(d=0; d<D; d++){{ for(n=0; n<N; n++){{ for(i=0; i<h; i++){{ for(j=0; j<h; j++){{ F(n,d) += gh_w(i)*gh_w(j)*{func}; }} }} }} }} """.format(func=f_string, pragma=pragma) weave.inline(code, ['F', 'f', 'g', 'y', 'v', 'N', 'D', 'h', 'gh_w', 'e_g'], type_converters=weave.converters.blitz, support_code=support_code, **weave_options) return F
class RBFStringKernel(StringKernel, GPy.kern.RBF): """ String kernel with an RBF wrapper """ def __init__(self, gap_decay=1.0, match_decay=1.0, order_coefs=[1.0], variance=1.0, mode='tf-batch', sim='dot', wrapper='none', active_dims=None, name='rbf_string', embs=None, alphabet=None, device='/cpu:0', batch_size=1000, config=None, index=None): Kern.__init__(self, 1, active_dims, name) StringKernel.__init__(self, gap_decay, match_decay, order_coefs, variance, mode, sim=sim, wrapper=wrapper, embs=embs, alphabet=alphabet, device=device, batch_size=batch_size, config=config, index=index) self.gap_decay = Param('gap_decay', gap_decay, Logexp()) self.match_decay = Param('match_decay', match_decay, Logexp()) self.order_coefs = Param('coefs', order_coefs, Logexp()) self.graph = None self.link_parameter(self.gap_decay) self.link_parameter(self.match_decay) self.link_parameter(self.order_coefs) self.variance = Param('variance', variance, Logexp()) self.link_parameter(self.variance) self.use_invLengthscale = False self.ARD = False self.lengthscale = Param('lengthscale', 1.0, Logexp()) self.lengthscale.constrain_fixed(1.0) @Cache_this(limit=3, ignore_args=()) def _string_K(self, X, X2=None): result = StringKernel.K(self, X, X2) gap_grads = self.gap_grads match_grads = self.match_grads coef_grads = self.coef_grads return result, gap_grads, match_grads, coef_grads @Cache_this(limit=3, ignore_args=()) def _string_Kdiag(self, X): result = StringKernel.K(self, X, X, diag=True) gap_grads = self.gap_grads match_grads = self.match_grads coef_grads = self.coef_grads return result, gap_grads, match_grads, coef_grads @Cache_this(limit=3, ignore_args=()) def _scaled_dist_and_grads(self, X, X2=None): """ Returns the scaled distance between inputs. We assume lengthscale=1 since any ls changes can be absorbed into the sk coeficients. We also precalculate gradients. """ #print "CALCULATING r" k, gap_g, match_g, coefs_g = self._string_K(X, X2) diag1, diag_gap_g1, diag_match_g1, diag_coefs_g1 = self._string_Kdiag(X) if X2 == None: diag2, diag_gap_g2, diag_match_g2, diag_coefs_g2 = diag1, diag_gap_g1, diag_match_g1, diag_coefs_g1 else: diag2, diag_gap_g2, diag_match_g2, diag_coefs_g2 = self._string_Kdiag(X2) # Direct sum dsum = diag1[:, None] + diag2[None, :] r = dsum - (2 * k) dsum_dgap = diag_gap_g1[:, None] + diag_gap_g2[None, :] dr_dgap = dsum_dgap - (2 * gap_g) dsum_dmatch = diag_match_g1[:, None] + diag_match_g2[None, :] dr_dmatch = dsum_dmatch - (2 * match_g) dr_dcoefs = np.zeros_like(coefs_g) for i in xrange(self.order): dsum_dcoef = diag_coefs_g1[:, None, i] + diag_coefs_g2[None, :, i] dr_dcoefs[:, : , i] = dsum_dcoef - (2 * coefs_g[:, : ,i]) return r, dr_dgap, dr_dmatch, dr_dcoefs def _scaled_dist(self, X, X2=None): return self._scaled_dist_and_grads(X, X2)[0] @Cache_this(limit=3, ignore_args=()) def K(self, X, X2=None): return GPy.kern.RBF.K(self, X, X2) def update_gradients_full(self, dL_dK, X, X2=None): self.variance.gradient = np.sum(self.K(X, X2) * dL_dK) / self.variance #self.variance.gradient = np.sum(self.K(X, X2) * dL_dK) r, dr_dgap, dr_dmatch, dr_dcoefs = self._scaled_dist_and_grads(X, X2) dterm = -self.K(X, X2) * r self.gap_decay.gradient = np.sum(dterm * dr_dgap * dL_dK) self.match_decay.gradient = np.sum(dterm * dr_dmatch * dL_dK) for i in xrange(self.order): self.order_coefs.gradient[i] = np.sum(dterm * dr_dcoefs[:, :, i] * dL_dK) def _get_params(self): """ Overriding this because of the way GPy handles parameters. """ return [self.gap_decay[0], self.match_decay[0], self.order_coefs, self.variance[0]]