Example #1
0
    def __init__(self,
                 input_dim: int,
                 variance: float = 1.,
                 period: float = 2. * np.pi,
                 lengthscale: float = 2. * np.pi,
                 active_dims: int = None,
                 name: str = 'pure_std_periodic') -> None:
        super(PureStdPeriodicKernel, self).__init__(input_dim, active_dims,
                                                    name)

        self.name = name

        if period is not None:
            period = np.asarray(period)
            assert period.size == input_dim, "bad number of periods"
        else:
            period = 2. * np.pi * np.ones(input_dim)
        if lengthscale is not None:
            lengthscale = np.asarray(lengthscale)
            assert lengthscale.size == input_dim, "bad number of lengthscales"
        else:
            lengthscale = 2. * np.pi * np.ones(input_dim)

        self.variance = Param('variance', variance, Logexp())
        assert self.variance.size == 1, "Variance size must be one"
        self.period = Param('period', period, Logexp())
        self.lengthscale = Param('lengthscale', lengthscale, Logexp())

        self.link_parameters(self.variance, self.period, self.lengthscale)
Example #2
0
    def __init__(self,
                 input_dim,
                 input_space_dim=None,
                 active_dims=None,
                 kernel=None,
                 name='shapeintegral',
                 Nperunit=100,
                 lengthscale=[1.0],
                 variance=1.0):
        """
        NOTE: Added input_space_dim as the number of columns in X isn't the dimensionality of the space. I.e. for pentagons there
        will be 10 columns in X, while only 2 dimensions of input space.
        
        The lengthscale, variance, etc are ideally set by specifying the kernel we'll use
        
        input_dim = number of actual columns in data
        input_space_dim = number of dimensions in the domain
        active_dims = potential list of dimensions we'll use
        kernel = latent function kernel
        Nperunit = resolution of approximation
        
        The last column of X should specify if it's the latent function or the integral that the Y refers to.
        if it's the latent function then we just use the first d-columns, and the rest can be NaN, e.g.
        X                               Y
        0,0,1,0,0,1,0,1,1,0,1,1,0     2
        1,1,nananananananananan,1     3
        is a 1x1 square with an integral of 2, and a single point in the [1,1] corner of the square with a value of 3.
        
        """
        super(ShapeIntegral, self).__init__(input_dim, active_dims, name)

        assert (
            (kernel is not None) or (input_space_dim is not None)
        ), "Need either the input space dimensionality defining or the latent kernel defining (to infer input space)"
        if kernel is None:
            kernel = RBF(input_space_dim)
        else:
            input_space_dim = kernel.input_dim
        assert kernel.input_dim == input_space_dim, "Latent kernel (dim=%d) should have same input dimensionality as specified in input_space_dim (dim=%d)" % (
            kernel.input_dim, input_space_dim)

        #assert len(kern.lengthscale)==input_space_dim, "Lengthscale of length %d, but input space has %d dimensions" % (len(lengthscale),input_space_dim)

        #self.lengthscale = Param('lengthscale', kernel.lengthscale, Logexp()) #Logexp - transforms to allow positive only values...
        #self.variance = Param('variance', kernel.variance, Logexp()) #and here.
        #self.link_parameters(self.variance, self.lengthscale) #this just takes a list of parameters we need to optimise.

        self.kernel = kernel
        self.Nperunit = Nperunit
        self.input_space_dim = input_space_dim
        self.cached_points = {
        }  #this is important, not only is it a speed up - we also get the same points for each shape, which makes our covariances more stable

        self.lengthscale = Param(
            'lengthscale', lengthscale,
            Logexp())  #Logexp - transforms to allow positive only values...
        self.variance = Param('variance', variance, Logexp())  #and here.
        self.link_parameters(
            self.variance, self.lengthscale
        )  #this just takes a list of parameters we need to optimise.
Example #3
0
    def __init__(self,
                 gap_decay=1.0,
                 match_decay=2.0,
                 order_coefs=[1.0],
                 alphabet=[],
                 maxlen=0,
                 active_dims=None,
                 normalize=True,
                 batch_size=1000):
        super(StringKernel, self).__init__(1, active_dims, 'sk')
        self._name = "sk"
        self.gap_decay = Param('Gap_decay', gap_decay, Logexp())
        self.match_decay = Param('Match_decay', match_decay, Logexp())
        self.order_coefs = Param('Order_coefs', order_coefs, Logexp())
        self.link_parameters(self.gap_decay, self.match_decay,
                             self.order_coefs)

        self.alphabet = alphabet
        self.maxlen = maxlen
        self.normalize = normalize

        self.kernel = NPStringKernel(_gap_decay=gap_decay,
                                     _match_decay=match_decay,
                                     _order_coefs=list(order_coefs),
                                     alphabet=self.alphabet,
                                     maxlen=maxlen,
                                     normalize=normalize)
Example #4
0
    def __init__(self,
                 input_dim,
                 variances=1.0,
                 lengthscale=1.0,
                 ARD=False,
                 active_dims=None,
                 lengthscalefun=None,
                 name='nonstatRBF'):
        super(NonstationaryRBF, self).__init__(input_dim, active_dims, name)

        if lengthscale is None:
            lengthscale = np.ones(1)
        else:
            lengthscale = np.asarray(lengthscale)

        if lengthscalefun is None:
            lengthscalefun = lambda x: lengthscale

        self.lengthscalefun = lengthscalefun
        self.lengthscale = Param(
            'lengthscale', lengthscale,
            Logexp())  #Logexp - transforms to allow positive only values...
        self.variances = Param('variances', variances, Logexp())  #and here.
        self.link_parameters(
            self.variances, self.lengthscale
        )  #this just takes a list of parameters we need to optimise.
Example #5
0
 def __init__(self, _lambda=1, _sigma=1, normalize=True, active_dims=None):
     super(SubsetTreeKernel, self).__init__(1, active_dims, 'sstk')
     self._lambda = Param('Lambda', _lambda,Logexp())
     self._sigma = Param('Sigma', _sigma,Logexp())
     self.link_parameters(self._lambda, self._sigma)
     self.normalize = normalize
     self.kernel = wrapper_raw_SubsetTreeKernel(_lambda, _sigma, normalize)
Example #6
0
    def __init__(self,
                 first,
                 second,
                 sigmoidal,
                 location: float = 0.,
                 slope: float = 0.5,
                 name='change_base',
                 fixed_slope=False):
        _newkerns = [kern.copy() for kern in (first, second)]
        super(ChangeKernelBase, self).__init__(_newkerns, name)
        self.first = first
        self.second = second

        self._fixed_slope = fixed_slope  # Note: here to be used by subclasses, and changing it from the outside does not link the parameter
        if self._fixed_slope: self.slope = slope
        else:
            self.slope = Param('slope', slope, Logexp())
            self.link_parameter(self.slope)

        if isinstance(location, tuple):
            self.sigmoidal = sigmoidal(1, False, 1., location[0], location[1],
                                       slope)
            self.sigmoidal_reverse = sigmoidal(1, True, 1., location[0],
                                               location[1], slope)
            self.location = Param('location', location[0])
            self.stop_location = Param('stop_location', location[1])
            self.link_parameters(self.location, self.stop_location)
        else:
            self.sigmoidal = sigmoidal(1, False, 1., location, slope)
            self.sigmoidal_reverse = sigmoidal(1, True, 1., location, slope)
            self.location = Param('location', location)
            self.link_parameter(self.location)
    def __init__(self,
                 first,
                 second,
                 sigmoidal,
                 sigmoidal_indicator,
                 location: float = 0.,
                 slope: float = 0.5,
                 width=1.,
                 name='change_window_shifted_sides_base',
                 fixed_slope=False):
        _newkerns = [kern.copy() for kern in (first, second)]
        super(ChangeWindowShiftedSidesBase, self).__init__(_newkerns, name)
        self.first = first
        self.second = second

        self._fixed_slope = fixed_slope  # Note: here to be used by subclasses, and changing it from the outside does not link the parameter
        if self._fixed_slope: self.slope = slope
        else:
            self.slope = Param('slope', np.array(slope), Logexp())
            self.link_parameter(self.slope)

        self.sigmoidal = sigmoidal(1, False, 1., location, slope)
        self.sigmoidal_reverse = sigmoidal(1, True, 1., location, slope)
        self.sigmoidal_indicator = sigmoidal_indicator(1, False, 1., location,
                                                       slope, width)
        # self.shift = _Gk.Bias(1)
        self.location = Param('location', np.array(location))
        self.width = Param('width', np.array(width), Logexp())
        # self.shift_variance = Param('shift_variance', self.shift.variance.values, Logexp())
        self.shift_variance = Param('shift_variance', np.array(0), Logexp())
        self.link_parameters(self.location, self.width, self.shift_variance)
Example #8
0
    def __init__(self,gp_link=None, deg_free=5, sigma2=2):
        if gp_link is None:
            gp_link = link_functions.Identity()

        super(HetStudentT, self).__init__(gp_link, name='Hetro_Student_T')
        self.v = Param('deg_free', float(deg_free), Logexp())
        self.link_parameter(self.v)
        self.v.constrain_fixed()

        self.log_concave = False
    def __init__(self, input_dim, variances=None, lengthscale=None, ARD=False, active_dims=None, name='integral'):
        super(Integral_Output_Observed, self).__init__(input_dim, active_dims, name)

        if lengthscale is None:
            lengthscale = np.ones(1)
        else:
            lengthscale = np.asarray(lengthscale)
            
        assert len(lengthscale)==input_dim/2            

        self.lengthscale = Param('lengthscale', lengthscale, Logexp()) #Logexp - transforms to allow positive only values...
        self.variances = Param('variances', variances, Logexp()) #and here.
        self.link_parameters(self.variances, self.lengthscale) #this just takes a list of parameters we need to optimise.
Example #10
0
    def __init__(self,n_terms=3):
        """n_terms specifies the number of tanh terms to be used"""
        self.n_terms = n_terms
        self.num_parameters = 3 * self.n_terms + 1
        self.psi = np.ones((self.n_terms, 3))

        super(TanhWarpingFunction_d, self).__init__(name='warp_tanh')
        self.psi = Param('psi', self.psi)
        self.psi[:, :2].constrain_positive()

        self.d = Param('%s' % ('d'), 1.0, Logexp())
        self.link_parameter(self.psi)
        self.link_parameter(self.d)
Example #11
0
    def __init__(self,
                 gap_decay=1.0,
                 match_decay=2.0,
                 order_coefs=[1.0],
                 alphabet=[],
                 maxlen=0,
                 num_splits=1,
                 normalize=True):
        super(SplitStringKernel, self).__init__(1, None, "sk")
        self._name = "sk"
        self.num_splits = num_splits
        self.gap_decay = Param('Gap_decay', gap_decay, Logexp())
        self.match_decay = Param('Match_decay', match_decay, Logexp())
        self.order_coefs = Param('Order_coefs', order_coefs, Logexp())
        self.link_parameters(self.gap_decay, self.match_decay,
                             self.order_coefs)

        self.alphabet = alphabet
        self.maxlen = maxlen
        self.normalize = normalize

        # make new kernels for each section
        self.kernels = []
        for i in range(0, num_splits - 1):
            self.kernels.append(
                StringKernel(gap_decay=gap_decay,
                             match_decay=match_decay,
                             order_coefs=order_coefs,
                             alphabet=alphabet,
                             maxlen=int((self.maxlen / self.num_splits)),
                             normalize=normalize))
        # final kernel might be operating on slightly loinger string if maxlen/num_splits % !=0
        self.kernels.append(
            StringKernel(gap_decay=gap_decay,
                         match_decay=match_decay,
                         order_coefs=order_coefs,
                         alphabet=alphabet,
                         maxlen=int((self.maxlen / self.num_splits)) +
                         self.maxlen - self.num_splits * int(
                             (self.maxlen / self.num_splits)),
                         normalize=normalize))
        #tie the params across the kernels
        for kern in self.kernels:
            kern.unlink_parameter(kern.gap_decay)
            kern.gap_decay = self.gap_decay
            kern.unlink_parameter(kern.match_decay)
            kern.match_decay = self.match_decay
            kern.unlink_parameter(kern.order_coefs)
            kern.order_coefs = self.order_coefs
Example #12
0
 def __init__(self, input_dim, input_space_dim=None, active_dims=None, name='shapeintegralhc',lengthscale=None, variances=None,Nrecs=10,step=0.025,Ntrials=10,dims=2):
     super(ShapeIntegralHC, self).__init__(input_dim, active_dims, name)
     assert ((input_space_dim is not None)), "Need the input space dimensionality defining"
     kernel = Integral(input_dim=input_space_dim*2,lengthscale=lengthscale,variances=variances)
     self.lengthscale = Param('lengthscale', kernel.lengthscale, Logexp())
     self.variances = Param('variances', kernel.variances, Logexp()) 
     self.link_parameters(self.variances, self.lengthscale) #this just takes a list of parameters we need to optimise.
     
     
     self.kernel = kernel
     self.input_space_dim = input_space_dim
     self.rectangle_cache = {} #this is important, not only is it a speed up - we also get the same points for each shape, which makes our covariances more stable        
     
     self.Nrecs=Nrecs
     self.step=step
     self.Ntrials=Ntrials
Example #13
0
	def __init__(self, input_dim,variance=1,active_dims=[0],name="categorical", inverse=False,useGPU=False):
		super(Categorical, self).__init__(input_dim, active_dims, name,useGPU=useGPU)

		self.inverse = inverse

		self.variance = Param('variance',variance,Logexp())
		self.link_parameter(self.variance)
Example #14
0
    def __init__(self, gap_decay=1.0, match_decay=1.0,
                 order_coefs=[1.0], variance=1.0, 
                 mode='tf-batch',
                 sim='dot', wrapper='none',
                 active_dims=None, name='rbf_string',
                 embs=None, alphabet=None,
                 device='/cpu:0', batch_size=1000,
                 config=None, index=None):
        Kern.__init__(self, 1, active_dims, name)
        StringKernel.__init__(self, gap_decay, match_decay,
                              order_coefs, variance, mode, 
                              sim=sim, wrapper=wrapper, embs=embs,
                              alphabet=alphabet, device=device,
                              batch_size=batch_size, config=config,
                              index=index)
        self.gap_decay = Param('gap_decay', gap_decay, Logexp())
        self.match_decay = Param('match_decay', match_decay, Logexp())
        self.order_coefs = Param('coefs', order_coefs, Logexp())
        self.graph = None
        self.link_parameter(self.gap_decay)
        self.link_parameter(self.match_decay)
        self.link_parameter(self.order_coefs)

        self.variance = Param('variance', variance, Logexp())
        self.link_parameter(self.variance)
        self.use_invLengthscale = False
        self.ARD = False
        self.lengthscale = Param('lengthscale', 1.0, Logexp())
        self.lengthscale.constrain_fixed(1.0)
Example #15
0
 def __init__(self,
              input_dim,
              input_type,
              variance=1.,
              lengthscale=1.,
              active_dims=None):
     super(CustomMatern52, self).__init__(input_dim, active_dims,
                                          'matern52')
     self.variance = Param('variance', variance)
     self.lengthscale = Param('lengthscale', lengthscale)
     self.link_parameters(self.variance, self.lengthscale)
     assert isinstance(
         input_type,
         (InputY, InputX,
          InputPsi)), "The type of input_object is not supported"
     self.input_type = input_type
    def __init__(self,
                 Y_metadata,
                 gp_link=None,
                 noise_mult=1.,
                 known_variances=1.,
                 name='Scaled_het_Gauss'):
        if gp_link is None:
            gp_link = link_functions.Identity()

        if not isinstance(gp_link, link_functions.Identity):
            print(
                "Warning, Exact inference is not implemeted for non-identity link functions,\
            if you are not already, ensure Laplace inference_method is used")

        # note the known_variances are fixed, not parameterse
        self.known_variances = known_variances
        self.noise_mult = Param('noise_mult', noise_mult,
                                Logexp())  # Logexp ensures its positive
        # this is a parameter, so it gets optimized, gradients calculated etc.

        #super(ScaledHeteroscedasticGaussian, self).__init__(gp_link, variance=1.0, name=name)
        super(Gaussian, self).__init__(gp_link, name=name)
        # note: we're inheriting from Likelihood here, not Gaussian, so as to avoid problems with the Gaussian variance.

        #add a new parameter by linking it (see just above in GPy.likelihoods.gaussian.Gaussian).
        self.link_parameter(self.noise_mult)

        if isinstance(gp_link, link_functions.Identity):
            self.log_concave = True
Example #17
0
    def __init__(self,
                 input_dim: int,
                 variance: float = 1.,
                 offset: float = 0.,
                 active_dims: int = None,
                 name: str = 'linear_with_offset') -> None:
        super(LinearWithOffset, self).__init__(input_dim, active_dims, name)
        if variance is not None:
            variance = np.asarray(variance)
            assert variance.size == 1
        else:
            variance = np.ones(1)

        self.variance = Param('variance', variance, Logexp())
        self.offset = Param('offset', offset)

        self.link_parameters(self.variance, self.offset)
Example #18
0
    def __init__(self, input_dim: int, reverse: bool = False, variance: float = 1., location: float = 0., slope: float = 0.2,
                 active_dims: int = None, name: str = 'sigmoidal_kernel_base', fixed_slope = False) -> None:
        self.reverse = reverse
        super(SigmoidalKernelBase, self).__init__(input_dim, variance, active_dims, False, name)
        # TO REMOVE VARIANCE: comment line above; uncomment below; remove self.variance factors from subclass methods
        # super(BasisFuncKernel, self).__init__(input_dim, active_dims, name)
        # assert self.input_dim == 1, "Basis Function Kernel only implemented for one dimension. Use one kernel per dimension (and add them together) for more dimensions"
        # self.ARD = False
        # self.variance = 1
        self.location = Param('location', location)
        self.link_parameter(self.location)

        self._fixed_slope = fixed_slope # Note: here to be used by subclasses, and changing it from the outside does not link the parameter
        if self._fixed_slope: self.slope = slope
        else:
            self.slope = Param('slope', slope, Logexp()) # This +ve constraint makes non-reverse sigmoids only fit (+ve or -ve) curves going away from 0; similarly for other kernels
            self.link_parameter(self.slope)
Example #19
0
 def set_l(self, l, safe=False):
     assert safe
     assert l.shape == (self.active_dim,)
     l = np.maximum(
         1.e-3,
         l
     )
     self.inner_kernel.lengthscale = Param('lengthscale', l)
Example #20
0
    def __init__(self, gp_link=None, r=1.0):
        if gp_link is None:
            #Parameterised not as link_f but as f
            #gp_link = Identity()
            gp_link = Log()

        super(LogLogistic, self).__init__(gp_link, name='LogLogistic')
        self.r = Param('r_shape', float(r), Logexp())
        self.link_parameter(self.r)
Example #21
0
    def update_parameter_bounds(self, X):
        if self.data_range is None:
            self.data_range = (X.min(), X.max())
            self.location = Param('location', self.location,
                                  Logistic(*self.data_range))
            self.sigmoidal_indicator.location = Param(
                'location', self.location, Logistic(*self.data_range))
            # self.sigmoidal_reverse.location = Param('location', self.location, Logistic(*self.data_range))
            # self.sigmoidal.location = Param('location', self.location + self.width, Logistic(*self.data_range))
            # self.location.constrain_bounded(*self.data_range)
            # self.sigmoidal_indicator.location.constrain_bounded(*self.data_range)
            # # self.sigmoidal_reverse.location.constrain_bounded(*self.data_range)
            # # self.sigmoidal.location.constrain_bounded(*self.data_range)

        max_width = self.data_range[1] - self.location
        max_width = max_width if max_width > 0 else self.data_range[
            1] - self.data_range[0]
        self.width = Param('width', self.width, Logistic(0, max_width))
        self.sigmoidal_indicator.width = Param('width', self.width,
                                               Logistic(0, max_width))
 def __init__(self,
              input_dim,
              variances=None,
              ARD=False,
              active_dims=None,
              name='mix_integral_linear'):
     super(Mix_Integral_Linear, self).__init__(input_dim, active_dims, name)
     self.variances = Param('variances', variances, Logexp())  #and here.
     self.link_parameters(
         self.variances
     )  #this just takes a list of parameters we need to optimise.
Example #23
0
 def __init__(self,
              first,
              second,
              location: float = 0.,
              slope: float = 0.5,
              width: float = 1.,
              name='change_window',
              fixed_slope=False):
     super(ChangeWindowKernel,
           self).__init__(first, second, SigmoidalIndicatorKernel, location,
                          slope, name, fixed_slope)
     self.width = Param('width', width, Logexp())
     self.link_parameter(self.width)
Example #24
0
    def __init__(self,
                 input_dim,
                 input_space_dim=None,
                 active_dims=None,
                 kernel=None,
                 name='shapeintegral',
                 Nperunit=100,
                 lengthscale=None,
                 variance=None):
        """
        NOTE: Added input_space_dim as the number of columns in X isn't the dimensionality of the space. I.e. for pentagons there
        will be 10 columns in X, while only 2 dimensions of input space.
        """
        super(ShapeIntegral, self).__init__(input_dim, active_dims, name)

        assert (
            (kernel is not None) or (input_space_dim is not None)
        ), "Need either the input space dimensionality defining or the latent kernel defining (to infer input space)"
        if kernel is None:
            kernel = RBF(input_space_dim, lengthscale=lengthscale)
        else:
            input_space_dim = kernel.input_dim
        assert kernel.input_dim == input_space_dim, "Latent kernel (dim=%d) should have same input dimensionality as specified in input_space_dim (dim=%d)" % (
            kernel.input_dim, input_space_dim)

        #assert len(kern.lengthscale)==input_space_dim, "Lengthscale of length %d, but input space has %d dimensions" % (len(lengthscale),input_space_dim)

        self.lengthscale = Param(
            'lengthscale', kernel.lengthscale,
            Logexp())  #Logexp - transforms to allow positive only values...
        self.variance = Param('variance', kernel.variance,
                              Logexp())  #and here.
        self.link_parameters(
            self.variance, self.lengthscale
        )  #this just takes a list of parameters we need to optimise.

        self.kernel = kernel
        self.Nperunit = Nperunit
        self.input_space_dim = input_space_dim
Example #25
0
    def __init__(self,
                 first,
                 second,
                 sigmoidal,
                 sigmoidal_indicator,
                 third=None,
                 location: float = 0.,
                 slope: float = 0.5,
                 width=1.,
                 name='change_window_independent_base',
                 fixed_slope=False):
        third = deepcopy(first) if third is None else third
        _newkerns = [kern.copy() for kern in (first, second, third)]
        super(ChangeWindowIndependentBase, self).__init__(_newkerns, name)
        self.first = first
        self.second = second
        self.third = third

        self._fixed_slope = fixed_slope  # Note: here to be used by subclasses, and changing it from the outside does not link the parameter
        if self._fixed_slope: self.slope = slope
        else:
            self.slope = Param('slope', np.array(slope), Logexp())
            self.link_parameter(self.slope)

        self.sigmoidal = sigmoidal(1, False, 1., location, slope)
        self.sigmoidal_reverse = sigmoidal(1, True, 1., location, slope)
        self.sigmoidal_indicator = sigmoidal_indicator(1, False, 1., location,
                                                       slope, width)
        self.location = Param('location', np.array(location))
        self.width = Param('width', np.array(width), Logexp())
        self.link_parameters(self.location, self.width)

        self.data_range = None
        self.one_off_bounds_set = False
        self.last_parameter_values = {
            'location': np.array(location),
            'slope': np.array(slope),
            'width': np.array(width)
        }
Example #26
0
 def __init__(self,
              input_dim,
              variance,
              lengthscale,
              ARD,
              active_dims,
              name,
              useGPU=False):
     super(Stationary, self).__init__(input_dim,
                                      active_dims,
                                      name,
                                      useGPU=useGPU)
     self.ARD = ARD
     if not ARD:
         if lengthscale is None:
             lengthscale = np.ones(1)
         else:
             lengthscale = np.asarray(lengthscale)
             assert lengthscale.size == 1, "Only 1 lengthscale needed for non-ARD kernel"
     else:
         if lengthscale is not None:
             lengthscale = np.asarray(lengthscale)
             assert lengthscale.size in [1, input_dim
                                         ], "Bad number of lengthscales"
             if lengthscale.size != input_dim:
                 lengthscale = np.ones(input_dim) * lengthscale
         else:
             lengthscale = np.ones(self.input_dim)
             # lengthscale = np.ones(2)
             # n = A(1)
             # t = A(1)
             # lengthscale = np.array([n, n, n, n, n, n, t, t, t])
     self.lengthscale = Param('lengthscale', lengthscale, Logexp())
     # self.lengthscale = self.lengthscale.repeat(6)[:self.input_dim]
     # print(self.lengthscale)
     self.variance = Param('variance', variance, Logexp())
     assert self.variance.size == 1
     self.link_parameters(self.variance, self.lengthscale)
Example #27
0
 def __init__(self,k1,k2,kc,xc,cpDim):
     if k2 is None:
         super(Changepoint,self).__init__([k1],"changepoint")
         k2 = k1
     else:
         super(Changepoint,self).__init__([k1,k2],"changepoint")
     
     self.k1 = k1
     self.k2 = k2
     
     self.kc = Param('kc', kc, Logexp())
     self.link_parameter(self.kc)
     
     self.xc = np.array(xc)
     self.cpDim = cpDim
    def __init__(self,
                 k1,
                 k2=None,
                 kc=1.,
                 xc=np.array([[0]]),
                 cpDim=0,
                 changepointParameter=False):
        """
        arguments:
            k1, k2: GPy.kern.Kernel
            kc: float, covariance at the changepoint
            xc: np.array, position of changepoint(s)
            cpDim: int, dimension that changepoint exists on
            changepointParameter: bool, whether xc should be linked as a parameter

        """
        if k2 is None:
            super(Changepoint, self).__init__([k1], "changepoint")
            k2 = k1
        else:
            super(Changepoint, self).__init__([k1, k2], "changepoint")

        self.k1 = k1
        self.k2 = k2

        self.kc = Param('kc', kc, Logexp())
        self.link_parameter(self.kc)

        self.changepointParameter = changepointParameter
        self.xc = np.array(xc)
        if self.changepointParameter:
            self.xc = Param('xc', self.xc)
            self.link_parameter(self.xc)
            self.xc.gradient = [[0]]

        self.cpDim = cpDim
Example #29
0
 def __init__(self,
              input_dim,
              basis,
              variance=None,
              ARD=False,
              active_dims=None,
              name='mean',
              useGP=False):
     """
     Initialize the object.
     """
     super(MeanFunction, self).__init__(input_dim,
                                        active_dims,
                                        name,
                                        useGP=useGP)
     self.input_dim = int(input_dim)
     self._ARD = ARD
     if not hasattr(basis, '__call__'):
         raise TypeError('The basis functions must implement the '
                         '\'__call__()\' method. This method should '
                         ' the basis functions given a 2D dimensional numpy'
                         ' numpy array of \'num_points x input_dim\''
                         ' dimensions.')
     if not hasattr(basis, 'num_output'):
         raise TypeError('The basis functions must have an attribute '
                         ' \'num_output\' which should store the number of'
                         ' basis functions it contains.')
     self._basis = basis
     self._num_params = basis.num_output
     if not ARD:
         if variance is None:
             variance = np.ones(1)
         else:
             variance = np.asarray(variance)
             assert variance.size == 1, 'Only 1 variance needed for a non-ARD kernel'
     else:
         if variance is not None:
             variance = np.asarray(variance)
             assert variance.size in [1, self.num_params
                                      ], 'Bad number of variances'
             if variance.size != self.num_params:
                 variance = np.ones(self.num_params) * variance
         else:
             variance = np.ones(self.num_params)
     self.variance = Param('variance', variance, Logexp())
     self.link_parameters(self.variance)
Example #30
0
    def __init__(self, warping_indices, hidden_dims, out_dim, warped_indices,
                 name):
        super(NNwarpingFunction, self).__init__(name='nn_warping_' + name)
        self.warping_indices = warping_indices
        self.warped_indices = warped_indices
        self.nnwarping = NNwarping(len(warping_indices), hidden_dims, out_dim)
        self.params_name = list(self.nnwarping.state_dict().keys())
        self.params_value = [
            _.numpy() for _ in list(self.nnwarping.state_dict().values())
        ]
        self.params = [
            Param(self.params_name[_], self.params_value[_])
            for _ in range(len(self.params_value))
        ]
        for param in self.params:
            self.link_parameter(param)

        # training statistics
        self.params_updated_num = 0
 def __init__(self, input_dim, inner_kernel, W=None,
              name='ActiveSubspaceKernel',
              fixed_cols=0):
     """
     Initialize the object.
     """
     super(ActiveSubspaceKernel, self).__init__(input_dim, None, name,
                                                useGPU=False)
     self.inner_kernel = inner_kernel
     pW = StiefelPrior(input_dim, inner_kernel.input_dim,
                       fixed_cols=fixed_cols,
                       alpha=100.)
     self.fixed_cols = fixed_cols
     if W is None:
         W = pW.rvs().reshape(input_dim, inner_kernel.input_dim)
     else:
         assert W.shape == (self.input_dim, self.active_dim)
     self.W = Param('W', W)
     self.W.set_prior(pW)
     self.link_parameters(self.W, self.inner_kernel)
Example #32
0
    def __init__(self, kernels):
        """
        This kernel is used for multi-fidelity problems.

        Args:
            kernels - List of GPy kernels to use for each fidelity from low
                      to high fidelity

        Reference:

        Predicting the output from a complex computer code when fast
        approximations are available. M. C. KENNEDY AND A. O'HAGAN (2000)

        Any number of fidelities are supported.

        Fidelity s is modelled as:
        f_s(x) = p_t * f_t(x) + d_s(x)

        where:
        s is the fidelity
        t is the previous fidelity
        f_s(x) is the function modelling fidelity s
        d_s(x) models the difference between fidelity s-1 and s
        p_t a scaling parameter between fidelity t and s
        """

        self.kernels = kernels
        self.n_fidelities = len(kernels)

        super(LinearMultiFidelityKernel, self).__init__(kernels=self.kernels,
                                                        name='multifidelity',
                                                        extra_dims=[-1])
        self.scaling_param = Param('scale', np.ones(self.n_fidelities - 1))

        # Link parameters so paramz knows about them
        self.link_parameters(self.scaling_param)
class ActiveSubspaceKernel(Kern):

    """
    A kernel of the following form:

        .. math::

            k(x, x') = k_0(Wx, Wx')

    """

    _inner_kernel = None

    @property
    def inner_kernel(self):
        """
        :getter: Get the inner kernel.
        """
        return self._inner_kernel

    @inner_kernel.setter
    def inner_kernel(self, value):
        """
        :setter: Set the inner kernel.
        """
        assert isinstance(value, Kern), 'The inner kernel must be a'\
               + ' proper `Gpy.kern.Kern` object.'
        assert value.input_dim <= self.input_dim, 'The number of active'\
               + ' dimensions must be smaller than or equal to the number'\
               + ' of inputs.'
        self._inner_kernel = value

    @property
    def active_dim(self):
        """
        :getter: Get the number of active dimensions.
        """
        return self.inner_kernel.input_dim

    def __init__(self, input_dim, inner_kernel, W=None,
                 name='ActiveSubspaceKernel',
                 fixed_cols=0):
        """
        Initialize the object.
        """
        super(ActiveSubspaceKernel, self).__init__(input_dim, None, name,
                                                   useGPU=False)
        self.inner_kernel = inner_kernel
        pW = StiefelPrior(input_dim, inner_kernel.input_dim,
                          fixed_cols=fixed_cols,
                          alpha=100.)
        self.fixed_cols = fixed_cols
        if W is None:
            W = pW.rvs().reshape(input_dim, inner_kernel.input_dim)
        else:
            assert W.shape == (self.input_dim, self.active_dim)
        self.W = Param('W', W)
        self.W.set_prior(pW)
        self.link_parameters(self.W, self.inner_kernel)

    def _get_Z(self, X):
        return None if X is None else np.dot(X, self.W)

    def _get_Zs(self, X, X2):
        return self._get_Z(X), self._get_Z(X2)

    @Cache_this(limit=5, ignore_args=())
    def K(self, X, X2=None):
        """
        Kernel function applied on inputs X and X2.
        """
        Z, Z2 = self._get_Zs(X, X2)
        return self.inner_kernel.K(Z, Z2)

    @Cache_this(limit=3, ignore_args=())
    def Kdiag(self, X):
        """
        Evaluate only the diagonal of the covariance matrix.
        """
        return self.inner_kernel.Kdiag(self._get_Z(X))
    
    def gradients_X(self, dL_dK, X, X2=None):
        Z, Z2 = self._get_Zs(X, X2)
        tmp = self.inner_kernel.gradients_X(dL_dK, Z, Z2)
        return np.einsum('ik,jk->ij', tmp, self.W)

    def update_gradients_full(self, dL_dK, X, X2=None):
        """
        Given the derivative of the objective wrt the covariance matrix
        (dL_dK), compute the gradient wrt the parameters of this kernel,
        and store in the parameters object as e.g. self.variance.gradient
        """
        assert X2 is None
        Z = self._get_Z(X)
        self.inner_kernel.update_gradients_full(dL_dK, Z)
        dL_dZ = self.inner_kernel.gradients_X(dL_dK, Z)
        self.W.gradient = np.einsum('ij,ik->kj', dL_dZ, X)
        k = self.fixed_cols
        if k >= 1:
            self.W.gradient[:-k, -k:] = 0.
            self.W.gradient[-k:, :-k] = 0.
            self.W.gradient[-k:, -k:] = 0.
Example #34
0
class HetStudentT(MultiLikelihood):
    """
    Student T likelihood

    For nomanclature see Bayesian Data Analysis 2003 p576

    .. math::
        p(y_{i}|\\lambda(f_{i})) = \\frac{\\Gamma\\left(\\frac{v+1}{2}\\right)}{\\Gamma\\left(\\frac{v}{2}\\right)\\sqrt{v\\pi\\exp(g_{i})}}\\left(1 + \\frac{1}{v}\\left(\\frac{(y_{i} - f_{i})^{2}}{\\exp(g_{i})}\\right)\\right)^{\\frac{-v+1}{2}}

    """
    def __init__(self,gp_link=None, deg_free=5, sigma2=2):
        if gp_link is None:
            gp_link = link_functions.Identity()

        super(HetStudentT, self).__init__(gp_link, name='Hetro_Student_T')
        self.v = Param('deg_free', float(deg_free), Logexp())
        self.link_parameter(self.v)
        self.v.constrain_fixed()

        self.log_concave = False

    def request_num_latent_functions(self, Y):
        """
        The likelihood should infer how many latent functions are needed for the likelihood

        In this case we have one latent function for mean and one for scale, for each output dimension
        """
        return Y.shape[1]*2

    def pdf(self, f, g, y, Y_metadata=None):
        """
        .. math::
            p(y_{i}|\\lambda(f_{i})) = \\frac{\\Gamma\\left(\\frac{v+1}{2}\\right)}{\\Gamma\\left(\\frac{v}{2}\\right)\\sqrt{v\\pi\\exp(g_{i})}}\\left(1 + \\frac{1}{v}\\left(\\frac{(y_{i} - f_{i})^{2}}{\\exp(g_{i})}\\right)\\right)^{\\frac{-v+1}{2}}
        """
        df = float(self.deg_free[:])
        e_g = np.exp(g)
        y_f2 = (y-f)**2
        pdf = (gamma(0.5*(df+1)) / (gamma(0.5*df)*np.sqrt(df*np.pi*e_g)))*(1 + y_f2/(df*e_g))**(-0.5*(df+1))
        return pdf

    def logpdf(self, f, y, Y_metadata=None):
        D = y.shape[1]
        fv, gv = f[:, :D], f[:, D:]
        df = float(self.deg_free[:])
        y_f2 = (y-fv)**2
        lnpdf = gammaln(0.5*(df+1)) - gammaln(0.5*df) - 0.5*np.log(df*np.pi) - 0.5*gv - 0.5*(df+1)*np.log1p(y_f2/(df*np.exp(gv)))
        return lnpdf

    def update_gradients(self, grads):
        """
        Pull out the gradients, be careful as the order must match the order
        in which the parameters are added
        """
        self.v.gradient = grads[0]

    def predictive_mean(self, mu, sigma, Y_metadata=None):
        # The comment here confuses mean and median.
        return self.gp_link.transf(mu) # only true if link is monotonic, which it is.

    def predictive_variance(self, mu,variance, predictive_mean=None, Y_metadata=None):
        if self.deg_free<=2.:
            return np.empty(mu.shape)*np.nan # does not exist for degrees of freedom <= 2.
        else:
            return super(StudentT, self).predictive_variance(mu, variance, predictive_mean, Y_metadata)

    def conditional_mean(self, gp):
        return self.gp_link.transf(gp)

    def conditional_variance(self, gp):
        #Expects just g!
        return self.deg_free*np.exp(gp)/(self.deg_free - 2.)

    def samples(self, gp, Y_metadata=None):
        """
        Returns a set of samples of observations based on a given value of the latent variable.

        :param gp: latent variable
        """
        orig_shape = gp.shape
        gp = gp.flatten()
        #FIXME: Very slow as we are computing a new random variable per input!
        #Can't get it to sample all at the same time
        #student_t_samples = np.array([stats.t.rvs(self.v, self.gp_link.transf(gpj),scale=np.sqrt(self.sigma2), size=1) for gpj in gp])
        dfs = np.ones_like(gp)*self.v
        scales = np.ones_like(gp)*np.sqrt(self.sigma2)
        student_t_samples = stats.t.rvs(dfs, loc=self.gp_link.transf(gp),
                                        scale=scales)
        return student_t_samples.reshape(orig_shape)

    def variational_expectations(self, Y, m, v, gh_points=None, Y_metadata=None):
        D = Y.shape[1]
        mf, mg = m[:, :D], m[:, D:]
        vf, vg = v[:, :D], v[:, D:]

        df = float(self.deg_free[:])
        #F =  -0.5*mg
        #Parameterize sigma not sigma2 as sigma itself needs to be positive!
        #F = -mg

        F = (gammaln((df + 1) * 0.5)
            - gammaln(df * 0.5)
            - 0.5*np.log(df * np.pi * np.exp(mg))
             )

        """
        #Some little code to check the result numerically using quadrature
        from functools import partial
        from scipy import integrate
        i = 5  # datapoint index
        def quad_func(fi, gi, yi, mgi, vgi, mfi, vfi):
            return ((-0.5*(df+1)*np.log1p(((yi-fi)**2)/(df*np.exp(gi))))       #p(y|f,g)
                    * np.exp(-0.5*np.log(2*np.pi*vgi) - 0.5*((gi - mgi)**2)/vgi) #q(g)
                    * np.exp(-0.5*np.log(2*np.pi*vfi) - 0.5*((fi - mfi)**2)/vfi) #q(f)
                    )
        quad_func_l = partial(quad_func, yi=Y[i], mgi=mg[i], vgi=vg[i], mfi=mf[i], vfi=vf[i])
        def integrl(gi):
            return integrate.quad(quad_func_l, -50, 50, args=(gi))[0]
        print "Numeric scipy F quad"
        print integrate.quad(lambda fi: integrl(fi), -50, 50)
        """

        #Do some testing to see if the quadrature works well for one datapoint
        Ngh = 20
        if gh_points is None:
            gh_x, gh_w = self._gh_points(T=Ngh)
        else:
            gh_x, gh_w = gh_points

        N = Y.shape[0]
        F_quad = np.zeros(Y.shape)
        dF_dmg = np.zeros(mg.shape)
        dF_dmf = np.zeros(mf.shape)
        dF_dvf = np.zeros(vf.shape)
        dF_dvg = np.zeros(vg.shape)
        dF_ddf = np.zeros(vg.shape)
        for d in range(D):
            quad_cython.quad2d_stut(N, mf.flatten(), vf.flatten(), mg.flatten(), vg.flatten(), Y.flatten(), Ngh, df,
                                    gh_x, gh_w, F_quad[:,d], dF_dmf[:,d], dF_dvf[:,d], dF_dmg[:,d], dF_dvg[:,d], dF_ddf[:,d])
        F_quad /= np.pi
        dF_dmg /= np.pi
        dF_dmf /= np.pi
        dF_dvf /= np.pi
        dF_dvg /= np.pi
        dF_ddf /= np.pi

        F += F_quad
        dF_dmg += -0.5  # from -0.5<g> term

        dF_dvf /= 2.0
        dF_dvg /= 2.0

        dF_dm = np.hstack((dF_dmf, dF_dmg))
        dF_dv = np.hstack((dF_dvf, dF_dvg))

        #derivative wrt to degrees of freedom
        dF_ddf += 0.5*digamma(0.5*(df+1)) - 0.5*digamma(0.5*df) - 1.0/(2*df)
        #Since we are the only parameter, our first dimension is 1
        dF_dtheta = dF_ddf[None, :]
        return F, dF_dm, dF_dv, dF_dtheta

    def variational_expectations_pure(self, Y, m, v, gh_points=None, Y_metadata=None):
        D = Y.shape[1]
        mf, mg = m[:, :D], m[:, D:]
        vf, vg = v[:, :D], v[:, D:]

        df = float(self.deg_free[:])
        #F =  -0.5*mg
        #Parameterize sigma not sigma2 as sigma itself needs to be positive!
        #F = -mg

        F = (gammaln((df + 1) * 0.5)
            - gammaln(df * 0.5)
            - 0.5*np.log(df * np.pi * np.exp(mg))
             )

        """
        #Some little code to check the result numerically using quadrature
        from functools import partial
        from scipy import integrate
        i = 5  # datapoint index
        def quad_func(fi, gi, yi, mgi, vgi, mfi, vfi):
            return ((-0.5*(df+1)*np.log1p(((yi-fi)**2)/(df*np.exp(gi))))       #p(y|f,g)
                    * np.exp(-0.5*np.log(2*np.pi*vgi) - 0.5*((gi - mgi)**2)/vgi) #q(g)
                    * np.exp(-0.5*np.log(2*np.pi*vfi) - 0.5*((fi - mfi)**2)/vfi) #q(f)
                    )
        quad_func_l = partial(quad_func, yi=Y[i], mgi=mg[i], vgi=vg[i], mfi=mf[i], vfi=vf[i])
        def integrl(gi):
            return integrate.quad(quad_func_l, -50, 50, args=(gi))[0]
        print "Numeric scipy F quad"
        print integrate.quad(lambda fi: integrl(fi), -50, 50)
        """

        from functools import partial
        def F_quad_func(f, e_g, y, df):
            y_f2 = (y-f)**2
            return -0.5*(df+1)*np.log1p(y_f2/(df*e_g))

        def F_dquad_df_func(f, e_g, y, df):
            y_f2 = (y-f)**2
            return (df+1)*(y-f)/(df*e_g + y_f2)

        def F_d2quad_df2_func(f, e_g, y, df):
            y_f2 = (y-f)**2
            df_eg = df*e_g
            return (df+1)*(y_f2 - df_eg)/(df_eg + y_f2)**2

        def F_dquad_dg_func(f, e_g, y, df):
            y_f2 = (y-f)**2
            return 0.5*(df+1)*y_f2/(df*e_g + y_f2)

        def F_d2quad_dg2_func(f, e_g, y, df):
            y_f2 = (y-f)**2
            df_eg = df*e_g
            return -0.5*(df+1)*y_f2*df_eg/(df_eg + y_f2)**2

        def F_dquad_ddf_func(f, e_g, y, df):
            y_f2 = (y-f)**2
            df_eg = df*e_g
            return 0.5*( (df+1)*y_f2/(df*(df_eg + y_f2))
                        - np.log1p(y_f2/(df_eg))
                        )

        F_quad_func_p = partial(F_quad_func, df=df)
        F_dquad_df_func_p = partial(F_dquad_df_func, df=df)
        F_d2quad_df2_func_p = partial(F_d2quad_df2_func, df=df)
        F_dquad_dg_func_p = partial(F_dquad_dg_func, df=df)
        F_d2quad_dg2_func_p = partial(F_d2quad_dg2_func, df=df)
        F_dquad_ddf_func_p = partial(F_dquad_ddf_func, df=df)

        #(F_quad, dF_dmf, dF_dvf,
        #dF_dmg, dF_dvg) = self.quad2d([F_quad_func_p, F_dquad_df_func_p,
                                               #F_d2quad_df2_func_p, F_dquad_dg_func_p,
                                               #F_d2quad_dg2_func_p],#, F_dquad_ddf_func_p],
                                              #Y, mf, vf, mg, vg, gh_points, exp_g=True)
        (F_quad, dF_dmf, dF_dvf,
        dF_dmg, dF_dvg, dF_ddf) = self.quad2d([F_quad_func_p, F_dquad_df_func_p,
                                               F_d2quad_df2_func_p, F_dquad_dg_func_p,
                                               F_d2quad_dg2_func_p, F_dquad_ddf_func_p],
                                              Y, mf, vf, mg, vg, gh_points, exp_g=True)

        F += F_quad
        dF_dmg += -0.5  # from -0.5<g> term

        dF_dvf /= 2.0
        dF_dvg /= 2.0

        dF_dm = np.hstack((dF_dmf, dF_dmg))
        dF_dv = np.hstack((dF_dvf, dF_dvg))

        #derivative wrt to degrees of freedom
        dF_ddf += 0.5*digamma(0.5*(df+1)) - 0.5*digamma(0.5*df) - 1.0/(2*df)
        #Since we are the only parameter, our first dimension is 1
        dF_dtheta = dF_ddf[None, :]
        return F, dF_dm, dF_dv, dF_dtheta

    #def pdf_partial(self, Y_metadata):
        #"""
        #Should be overriden for models with parameters that are fixed throughout a quadrature
        #"""
        #from functools import partial
        #df = float(self.deg_free[:])
        #return partial(self.pdf, df=df, Y_metadata=Y_metadata)

    def quad2D_weave(self, f, g, y, v, gh_w, f_string, e_g, D):
        #Broken!
        raise NotImplementedError
        from scipy import weave
        N = y.shape[0]
        h = gh_w.shape[0]
        F = np.zeros((y.shape[0], D))
        #f_string = "pow(x(n,i), y(n,j))"

        support_code = """
        #include <stdio.h>
        """
        omp = False
        if omp:
            pragma = "#pragma omp parallel for private(d, n, i, j)"
            support_code += """
            #include <math.h>
            #include <omp.h>
            """
            weave_options = {'headers'           : ['<omp.h>'],
                            'extra_compile_args': ['-fopenmp -O3'], # -march=native'],
                            'extra_link_args'   : ['-lgomp']}
        else:
            pragma = ""
            weave_options = {}


        code = """
        int d,n,i,j;
        {pragma}
        for(d=0; d<D; d++){{
            for(n=0; n<N; n++){{
                for(i=0; i<h; i++){{
                    for(j=0; j<h; j++){{
                        F(n,d) += gh_w(i)*gh_w(j)*{func};
                    }}
                }}
            }}
        }}
        """.format(func=f_string, pragma=pragma)

        weave.inline(code, ['F', 'f', 'g', 'y', 'v', 'N', 'D', 'h', 'gh_w', 'e_g'],
                     type_converters=weave.converters.blitz,
                     support_code=support_code, **weave_options)
        return F
Example #35
0
class RBFStringKernel(StringKernel, GPy.kern.RBF):
    """
    String kernel with an RBF wrapper
    """

    def __init__(self, gap_decay=1.0, match_decay=1.0,
                 order_coefs=[1.0], variance=1.0, 
                 mode='tf-batch',
                 sim='dot', wrapper='none',
                 active_dims=None, name='rbf_string',
                 embs=None, alphabet=None,
                 device='/cpu:0', batch_size=1000,
                 config=None, index=None):
        Kern.__init__(self, 1, active_dims, name)
        StringKernel.__init__(self, gap_decay, match_decay,
                              order_coefs, variance, mode, 
                              sim=sim, wrapper=wrapper, embs=embs,
                              alphabet=alphabet, device=device,
                              batch_size=batch_size, config=config,
                              index=index)
        self.gap_decay = Param('gap_decay', gap_decay, Logexp())
        self.match_decay = Param('match_decay', match_decay, Logexp())
        self.order_coefs = Param('coefs', order_coefs, Logexp())
        self.graph = None
        self.link_parameter(self.gap_decay)
        self.link_parameter(self.match_decay)
        self.link_parameter(self.order_coefs)

        self.variance = Param('variance', variance, Logexp())
        self.link_parameter(self.variance)
        self.use_invLengthscale = False
        self.ARD = False
        self.lengthscale = Param('lengthscale', 1.0, Logexp())
        self.lengthscale.constrain_fixed(1.0)

    @Cache_this(limit=3, ignore_args=())
    def _string_K(self, X, X2=None):
        result = StringKernel.K(self, X, X2)
        gap_grads = self.gap_grads
        match_grads = self.match_grads
        coef_grads = self.coef_grads
        return result, gap_grads, match_grads, coef_grads

    @Cache_this(limit=3, ignore_args=())
    def _string_Kdiag(self, X):
        result = StringKernel.K(self, X, X, diag=True)
        gap_grads = self.gap_grads
        match_grads = self.match_grads
        coef_grads = self.coef_grads
        return result, gap_grads, match_grads, coef_grads

    @Cache_this(limit=3, ignore_args=())
    def _scaled_dist_and_grads(self, X, X2=None):
        """
        Returns the scaled distance between inputs.
        We assume lengthscale=1 since any ls changes
        can be absorbed into the sk coeficients.
        We also precalculate gradients.
        """
        #print "CALCULATING r"
        k, gap_g, match_g, coefs_g = self._string_K(X, X2)
        diag1, diag_gap_g1, diag_match_g1, diag_coefs_g1 = self._string_Kdiag(X)
        if X2 == None:
            diag2, diag_gap_g2, diag_match_g2, diag_coefs_g2 = diag1, diag_gap_g1, diag_match_g1, diag_coefs_g1
        else:
            diag2, diag_gap_g2, diag_match_g2, diag_coefs_g2 = self._string_Kdiag(X2)
        # Direct sum
        dsum = diag1[:, None] + diag2[None, :]
        r = dsum - (2 * k)
        
        dsum_dgap = diag_gap_g1[:, None] + diag_gap_g2[None, :]
        dr_dgap = dsum_dgap - (2 * gap_g)
        dsum_dmatch = diag_match_g1[:, None] + diag_match_g2[None, :]
        dr_dmatch = dsum_dmatch - (2 * match_g)
        dr_dcoefs = np.zeros_like(coefs_g)
        for i in xrange(self.order):
            dsum_dcoef = diag_coefs_g1[:, None, i] + diag_coefs_g2[None, :, i]
            dr_dcoefs[:, : , i] = dsum_dcoef - (2 * coefs_g[:, : ,i])
               
        return r, dr_dgap, dr_dmatch, dr_dcoefs

    def _scaled_dist(self, X, X2=None):
        return self._scaled_dist_and_grads(X, X2)[0]

    @Cache_this(limit=3, ignore_args=())
    def K(self, X, X2=None):
        return GPy.kern.RBF.K(self, X, X2)

    def update_gradients_full(self, dL_dK, X, X2=None):
        self.variance.gradient = np.sum(self.K(X, X2) * dL_dK) / self.variance
        #self.variance.gradient = np.sum(self.K(X, X2) * dL_dK)
        r, dr_dgap, dr_dmatch, dr_dcoefs = self._scaled_dist_and_grads(X, X2)

        dterm = -self.K(X, X2) * r
        self.gap_decay.gradient = np.sum(dterm * dr_dgap * dL_dK)
        self.match_decay.gradient = np.sum(dterm * dr_dmatch * dL_dK)
        for i in xrange(self.order):
            self.order_coefs.gradient[i] = np.sum(dterm * dr_dcoefs[:, :, i] * dL_dK)            

    def _get_params(self):
        """
        Overriding this because of the way GPy handles parameters.
        """
        return [self.gap_decay[0], self.match_decay[0],
                self.order_coefs, self.variance[0]]