def __init__(self, input_dim, variance_adjustment, variance=1., lengthscale=None, rescale_variance=1., ARD=False, active_dims=None, name='rbf', useGPU=False, inv_l=False): super(CausalRBF, self).__init__(input_dim, variance, lengthscale, ARD, active_dims, name, useGPU=useGPU) if self.useGPU: self.psicomp = PSICOMP_RBF_GPU() else: self.psicomp = PSICOMP_RBF() self.use_invLengthscale = inv_l if inv_l: self.unlink_parameter(self.lengthscale) self.inv_l = Param('inv_lengthscale', 1. / self.lengthscale**2, Logexp()) self.link_parameter(self.inv_l) self.variance_adjustment = variance_adjustment self.rescale_variance = Param('rescale_variance', rescale_variance, Logexp())
def __init__(self, X, Y, Z, kernel, likelihood, mean_function=None, inference_method=None, name='sparse gp', Y_metadata=None, normalizer=False, mpi_comm=None, mpi_root=0, auto_update=True): self.mpi_comm = mpi_comm self.mpi_root = mpi_root self.psicov = False self.svi = False self.qU_ratio = 1. self.auto_update = auto_update if inference_method is None: from ..inference import VarDTC_parallel, VarDTC if mpi_comm is None: inference_method = VarDTC() else: inference_method = VarDTC_parallel(mpi_comm, mpi_root) elif inference_method=='inferentia' and mpi_comm is None: from ..inference import VarDTC_Inferentia inference_method = VarDTC_Inferentia() self.psicov = True elif inference_method=='svi': from ..inference import SVI_VarDTC inference_method = SVI_VarDTC() self.svi = True super(SparseGP_MPI, self).__init__(X, Y, Z, kernel, likelihood, mean_function=mean_function, inference_method=inference_method, name=name, Y_metadata=Y_metadata, normalizer=normalizer) if self.svi: from ..util.misc import comp_mapping W = comp_mapping(self.X, self.Y) qu_mean = self.Z.dot(W) self.qU_mean = Param('qU_m', qu_mean) self.qU_W = Param('qU_W', np.random.randn(Z.shape[0], Z.shape[0])*0.01) self.qU_a = Param('qU_a', 1e-3, Logexp()) self.link_parameters(self.qU_mean, self.qU_W, self.qU_a)
def __init__(self,input_dim=1,active_dims=[0],var=1,lengthscale=1.): super(Kt, self).__init__(input_dim,active_dims, 'Kt') assert input_dim == 1, "For this kernel we assume input_dim=1" self.var = Param('var', var) self.lengthscale = Param('lengthscale', lengthscale) self.var.constrain_positive() self.lengthscale.constrain_positive() self.link_parameters(self.var,self.lengthscale)
def __init__(self, input_dim=1, active_dims=[0], var=1, lengthscale=1.): super(Kt, self).__init__(input_dim, active_dims, 'Kt') assert input_dim == 1, "For this kernel we assume input_dim=1" self.var = Param('var', var) self.lengthscale = Param('lengthscale', lengthscale) self.var.constrain_positive() self.lengthscale.constrain_positive() self.link_parameters(self.var, self.lengthscale)
def __init__(self,input_dim,active_dim=[0,1],l_df=1.,l_cf=1,ratio=1.): super(myKernel, self).__init__(input_dim,active_dim, 'myKern') assert input_dim == 2, "For this kernel we assume input_dim=2" self.length_df = Param('length_df', l_df) self.length_cf = Param('length_cf', l_cf) self.ratio = Param('ratio', ratio) self.length_df.constrain_positive() self.length_cf.constrain_positive() self.ratio.constrain_bounded(0,1) self.link_parameters(self.length_df, self.length_cf, self.ratio)
class Kt(Kern): def __init__(self,input_dim=1,active_dims=[0],var=1,lengthscale=1.): super(Kt, self).__init__(input_dim,active_dims, 'Kt') assert input_dim == 1, "For this kernel we assume input_dim=1" self.var = Param('var', var) self.lengthscale = Param('lengthscale', lengthscale) self.var.constrain_positive() self.lengthscale.constrain_positive() self.link_parameters(self.var,self.lengthscale) def parameters_changed(self): # nothing todo here pass def K(self,X,X2): if X2 is None: X2 = X var = self.var[0] ls = self.lengthscale[0] kt = GPy.kern.RBF(input_dim=1, active_dims=[0], variance = var, lengthscale = ls) C = kt.K(X,X2) K = np.concatenate([np.concatenate([C,C],axis=1), np.concatenate([C,C],axis=1)],axis=0) return K def Kdiag(self,X): return np.ones(X.shape[0]*X.shape[1]*2)*self.var def update_gradients_full(self, dL_dK, X, X2): # edit this###########3 if X2 is None: X2 = X var = self.var[0] ls = self.lengthscale[0] kt = GPy.kern.RBF(input_dim=1, active_dims=[0], variance = var, lengthscale = ls) self.var.gradient = kt.lengthscale.gradient #kt.variance.gradient self.lengthscale.gradient = kt.lengthscale.gradient # 1/x^2exp(-r^2/(2x^2)) def update_gradients_diag(self, dL_dKdiag, X): pass def gradients_X(self,dL_dK,X,X2=None): if X2 is None: X2 = X var = self.var[0] ls = self.lengthscale[0] kt = GPy.kern.RBF(input_dim=1, active_dims=[0], variance = var, lengthscale = ls) return kt.gradients_X(dL_dK,X,X2) def gradients_X_diag(self,dL_dKdiag,X): # no diagonal gradients pass
def __init__(self, input_dim, variance=1., lengthscale=1., epsilon=0., active_dims=None): super().__init__(input_dim, active_dims, 'time_se') self.variance = Param('variance', variance) self.lengthscale = Param('lengthscale', lengthscale) self.epsilon = Param('epsilon', epsilon) self.link_parameters(self.variance, self.lengthscale, self.epsilon)
def __init__(self, input_dim, variance=1., lengthscale=1., epsilon=0., active_dims=None): super().__init__(input_dim, active_dims, "time_se") self.variance = Param("variance", variance) self.lengthscale = Param("lengthscale", lengthscale) self.epsilon = Param("epsilon", epsilon) self.link_parameters(self.variance, self.lengthscale, self.epsilon)
def __init__(self, p_layer_num, p_input_dims, p_output_dims, p_hidden_dim, h_0_type='zero', rnn_type='rnn', bidirectional=False, name='seq_encoder'): """ """ super(seq_encoder, self).__init__(name=name) #import pdb; pdb.set_trace() self.encoder = Mean_var_multilayer(p_layer_num, p_input_dims, p_output_dims, p_hidden_dim, h_0_type=h_0_type, rnn_type=rnn_type, bidirectional=bidirectional).double() #self.encoder.double() # convert all the parameters to float64 self.params_dict= {} self.encoder_param_names_dics = {} # inverse transform from pytorch to gpy for ee in self.encoder.named_parameters(): param_name = ee[0].replace('.','_') # transform paparm name from pytorch to gpy self.encoder_param_names_dics[param_name] = ee[0] tt = ee[1].data.numpy().copy() param = Param( param_name, tt ) setattr(self, param_name, param ) self.params_dict[param_name] = getattr(self, param_name) self.link_parameters(param) pass
def __init__( self, mu, lam, A, ): super(Gompertz, self).__init__(1, 1, name='gompertz') self.mu = Param('mu', mu, Logexp()) self.lam = Param('lam', lam, Logexp()) self.A = Param('A', A, Logexp()) #self.mu = Param('mu', mu) #self.lam = Param('lam', lam) #self.A = Param('A', A) self.link_parameters(self.mu, self.lam, self.A)
def __init__(self, input_dim, output_dim, values, breaks, name='piecewise_linear'): assert input_dim == 1 assert output_dim == 1 super(PiecewiseLinear, self).__init__(input_dim, output_dim, name) values, breaks = np.array(values).flatten(), np.array(breaks).flatten() assert values.size == breaks.size self.values = Param('values', values) self.breaks = Param('breaks', breaks) self.link_parameter(self.values) self.link_parameter(self.breaks)
def __init__(self, dim_up, dim_down, activation, regularization=None, reg_weight=0, W=None, b=None, name='layer'): super(Layer, self).__init__(name=name) self.dim_down = dim_down self.dim_up = dim_up self.layer_forward = None # the link to its lower layer self.layer_backward = None # the link to its upper layer self.activation = None if activation is None else activation.lower() self.regularization = regularization self.reg_weight = reg_weight if W is None: W = np.random.rand(dim_down, dim_up) * 2 - 1 W *= np.sqrt(6. / (dim_up + dim_down)) if b is None: b = np.zeros((dim_down, )) self.W = Param('W', W) self.b = Param('b', b) self.link_parameters(self.W, self.b) self.W_theano = shared(self.W.values.astype(theano.config.floatX), name=name + '_W') self.W_grad_theano = shared(self.W.gradient.astype( theano.config.floatX), name=name + '_W_grad') self.b_theano = shared(self.b.values.astype(theano.config.floatX), name=name + '_b') self.b_grad_theano = shared(self.b.gradient.astype( theano.config.floatX), name=name + '_b_grad')
def __init__(self,input_dim,active_dims=[0,1],var=1.,ly=1.,lx=1.): super(divFreeK, self).__init__(input_dim,active_dims, 'divFreeK') assert input_dim == 2, "For this kernel we assume input_dim=2" self.var = Param('var', var) self.var.constrain_positive() self.ly = Param('ly', ly) self.ly.constrain_positive() self.lx = Param('lx', lx) self.lx.constrain_positive() self.link_parameters(self.var,self.ly,self.lx)
def __init__(self, input_dim, active_dims=[0, 1, 2], var=1., lt=1., ly=1., lx=1.): super(DivFreeK, self).__init__(input_dim, active_dims, 'divFreeK') assert input_dim == 3, "For this kernel we assume input_dim=3" self.var = Param('var', var) self.var.constrain_positive() # self.var.constrain_bounded(1e-06,1) self.lt = Param('lt', lt) self.lt.constrain_positive() self.ly = Param('ly', ly) self.ly.constrain_positive() self.lx = Param('lx', lx) self.lx.constrain_positive() self.link_parameters(self.var, self.lt, self.ly, self.lx)
def __init__(self, input_dim, active_dim=[0, 1], l_df=1., l_cf=1, ratio=1.): super(myKernel, self).__init__(input_dim, active_dim, 'myKern') assert input_dim == 2, "For this kernel we assume input_dim=2" self.length_df = Param('length_df', l_df) self.length_cf = Param('length_cf', l_cf) self.ratio = Param('ratio', ratio) self.length_df.constrain_positive() self.length_cf.constrain_positive() self.ratio.constrain_bounded(0, 1) self.link_parameters(self.length_df, self.length_cf, self.ratio)
def _init_encoder(self, MLP_dims): from .mlp import MLP from copy import deepcopy from GPy.core.parameterization.transformations import Logexp X_win, X_dim, U_win, U_dim = self.X_win, self.X_dim, self.U_win, self.U_dim assert X_win > 0, "Neural Network constraints only applies autoregressive structure!" Q = X_win * X_dim + U_win * U_dim if self.withControl else X_win * X_dim self.init_Xs = [ NormalPosterior(self.Xs_flat[i].mean.values[:X_win], self.Xs_flat[i].variance.values[:X_win], name='init_Xs_' + str(i)) for i in range(self.nSeq) ] for init_X in self.init_Xs: init_X.mean[:] = np.random.randn(*init_X.shape) * 1e-2 self.encoder = MLP([Q, Q * 2, Q + X_dim / 2, X_dim] if MLP_dims is None else [Q] + deepcopy(MLP_dims) + [X_dim]) self.Xs_var = [ Param('X_var_' + str(i), self.Xs_flat[i].variance.values[X_win:].copy(), Logexp()) for i in range(self.nSeq) ]
def __init__(self, layer_lower, dim_down, dim_up, likelihood, X=None, X_variance=None, init='PCA', Z=None, num_inducing=10, kernel=None, inference_method=None, uncertain_inputs=True,mpi_comm=None, mpi_root=0, back_constraint=True, encoder=None, auto_update=True, name='layer'): self.uncertain_inputs = uncertain_inputs self.layer_lower = layer_lower Y = self.Y if self.layer_lower is None else self.layer_lower.X self.back_constraint = back_constraint from deepgp.util.util import initialize_latent if X is None: X, _ = initialize_latent(init, Y.shape[0], dim_up, Y.mean.values if isinstance(Y, VariationalPosterior) else Y) if X_variance is None: X_variance = 0.01*np.ones(X.shape) + 0.01*np.random.rand(*X.shape) if Z is None: if self.back_constraint: Z = np.random.rand(num_inducing, dim_up)*2-1. else: if num_inducing<=X.shape[0]: Z = X[np.random.permutation(X.shape[0])[:num_inducing]].copy() else: Z_more = np.random.rand(num_inducing-X.shape[0],X.shape[1])*(X.max(0)-X.min(0))+X.min(0) Z = np.vstack([X.copy(),Z_more]) assert Z.shape[1] == X.shape[1] if mpi_comm is not None: from ..util.parallel import broadcastArrays broadcastArrays([Z], mpi_comm, mpi_root) if uncertain_inputs: X = NormalPosterior(X, X_variance) if kernel is None: kernel = kern.RBF(dim_up, ARD = True) assert kernel.input_dim==X.shape[1], "The dimensionality of input has to be equal to the input dimensionality of kernel!" self.Kuu_sigma = Param('Kuu_var', np.zeros(num_inducing)+1e-3, Logexp()) super(Layer, self).__init__(X, Y, Z, kernel, likelihood, inference_method=inference_method, mpi_comm=mpi_comm, mpi_root=mpi_root, auto_update=auto_update, name=name) self.link_parameter(self.Kuu_sigma) if back_constraint: self.encoder = encoder if self.uncertain_inputs and not self.back_constraint: self.link_parameter(self.X)
def _init_X(self, model, Y_new, init='L2'): # Initialize the new X by finding the nearest point in Y space. Y = model.Y if self.missing_data: Y = Y[:,self.valid_dim] Y_new = Y_new[:,self.valid_dim] dist = -2.*Y_new.dot(Y.T) + np.square(Y_new).sum(axis=1)[:,None]+ np.square(Y).sum(axis=1)[None,:] else: if init=='L2': dist = -2.*Y_new.dot(Y.T) + np.square(Y_new).sum(axis=1)[:,None]+ np.square(Y).sum(axis=1)[None,:] elif init=='NCC': dist = Y_new.dot(Y.T) elif init=='rand': dist = np.random.rand(Y_new.shape[0],Y.shape[0]) idx = dist.argmin(axis=1) from GPy.core import Param if isinstance(model.X, variational.VariationalPosterior): X = Param('latent mean',model.X.mean.values[idx].copy()) X.set_prior(GPy.core.parameterization.priors.Gaussian(0.,1.), warning=False) else: X = Param('latent mean',(model.X[idx].values).copy()) return X
class CurlFreeK(Kern): def __init__(self, input_dim, active_dims=[0, 1, 2], var=1., lt=1., ly=1., lx=1.): super(CurlFreeK, self).__init__(input_dim, active_dims, 'CurlFreeK') assert input_dim == 3, "For this kernel we assume input_dim=3" self.var = Param('var', var) self.var.constrain_positive() # self.var.constrain_bounded(1e-06,1) self.lt = Param('lt', lt) self.lt.constrain_positive() self.ly = Param('ly', ly) self.ly.constrain_positive() self.lx = Param('lx', lx) self.lx.constrain_positive() self.link_parameters(self.var, self.lt, self.ly, self.lx) def parameters_changed(self): # nothing todo here pass def K(self, X, X2): if X2 is None: X2 = X dt = X[:, 0][:, None] - X2[:, 0] dy = X[:, 1][:, None] - X2[:, 1] dx = X[:, 2][:, None] - X2[:, 2] lt2 = np.square(self.lt) ly2 = np.square(self.ly) lx2 = np.square(self.lx) Btt = dt * dt / lt2 Byy = dy * dy / ly2 Bxx = dx * dx / lx2 expo = (Btt + Byy + Bxx) / (-2.) C = self.var * np.exp(expo) # curl-free By = (1 - Byy) / ly2 Bx = (1 - Bxx) / lx2 Byx = (-1) * dy * dx / (ly2 * lx2) A = np.concatenate([ np.concatenate([By, Byx], axis=1), np.concatenate([Byx, Bx], axis=1) ], axis=0) C = np.concatenate( [np.concatenate([C, C], axis=1), np.concatenate([C, C], axis=1)], axis=0) return C * A def Kdiag(self, X): return np.ones(X.shape[0] * 2) * self.var def update_gradients_full(self, dL_dK, X, X2): # edit this###########3 if X2 is None: X2 = X # variance gradient self.var.gradient = np.sum(self.K(X, X2) * dL_dK) / self.var # lt gradient dt = X[:, 0][:, None] - X2[:, 0] Btt = dt * dt / (self.lt**3) Bt = np.concatenate([ np.concatenate([Btt, Btt], axis=1), np.concatenate([Btt, Btt], axis=1) ], axis=0) self.lt.gradient = np.sum(self.K(X, X2) * Bt * dL_dK) # ly and lx terms ly2 = np.square(self.ly) ly3 = self.ly * ly2 lx2 = np.square(self.lx) lx3 = self.lx * lx2 dy = X[:, 1][:, None] - X2[:, 1] dx = X[:, 2][:, None] - X2[:, 2] Byy = (dy * dy) / ly2 By = np.concatenate([ np.concatenate([Byy, Byy], axis=1), np.concatenate([Byy, Byy], axis=1) ], axis=0) Bxx = (dx * dx) / lx2 Bx = np.concatenate([ np.concatenate([Bxx, Bxx], axis=1), np.concatenate([Bxx, Bxx], axis=1) ], axis=0) Byx = (dy * dx) / (lx2 * ly2) expo = (Btt * self.lt + Byy + Bxx) / (-2.) C = self.var * np.exp(expo) C = np.concatenate( [np.concatenate([C, C], axis=1), np.concatenate([C, C], axis=1)], axis=0) # ly.gradient dA1 = (4 * Byy - 2) / ly3 dA2 = Bxx * 0 dA12 = 2 * Byx / (self.ly) dA = np.concatenate([ np.concatenate([dA1, dA12], axis=1), np.concatenate([dA12, dA2], axis=1) ], axis=0) self.ly.gradient = np.sum( ((By / self.ly) * self.K(X, X2) + C * dA) * dL_dK) # lx.gradient dA1 = Bxx * 0 dA2 = (4 * Bxx - 2) / lx3 dA12 = 2 * Byx / (self.lx) dA = np.concatenate([ np.concatenate([dA1, dA12], axis=1), np.concatenate([dA12, dA2], axis=1) ], axis=0) self.lx.gradient = np.sum( ((Bx / self.lx) * self.K(X, X2) + C * dA) * dL_dK) def update_gradients_diag(self, dL_dKdiag, X): pass def gradients_X(self, dL_dK, X, X2): pass def gradients_X_diag(self, dL_dKdiag, X): # no diagonal gradients pass
def do_gpy_gplvm(d, gprf, X0, C0, sdata, method, maxsec=3600, parallel=False, gplvm_type="bayesian", num_inducing=100): import GPy dim = sdata.SX.shape[1] # adjust kernel lengthscale to match GPy's defn of the RBF kernel incl a -.5 factor k = GPy.kern.RBF(dim, ARD=0, lengthscale=np.sqrt(.5) * sdata.lscale, variance=1.0) if C0 is None: k.lengthscale.fix() k.variance.fix() XObs = sdata.X_obs.copy() p = GPyConstDiagonalGaussian(XObs.flatten(), sdata.obs_std**2) if gplvm_type == "bayesian": print "bayesian GPLVM with %d inducing inputs" % num_inducing m = GPy.models.BayesianGPLVM(sdata.SY, dim, X=X0, X_variance=np.ones(XObs.shape) * sdata.obs_std**2, kernel=k, num_inducing=num_inducing) #m.X.mean.set_prior(p) elif gplvm_type == "sparse": print "sparse non-bayesian GPLVM with %d inducing inputs" % num_inducing m = GPy.models.SparseGPLVM(sdata.SY, dim, X=X0, kernel=k, num_inducing=num_inducing) from GPy.core import Param m.X = Param('latent_mean', X0) m.link_parameter(m.X, index=0) #m.X.set_prior(p) elif gplvm_type == "basic": print "basic GPLVM on full dataset" m = GPy.models.GPLVM(sdata.SY, dim, X=XObs, kernel=k) #m.X.set_prior(p) m.likelihood.variance = sdata.noise_var m.likelihood.variance.fix() nmeans = X0.size sstep = [ 0, ] f_log = open(os.path.join(d, "log.txt"), 'w') t0 = time.time() def llgrad_wrapper(xx): XX = xx[:nmeans].reshape(X0.shape) xd = X0.shape[1] n_ix = num_inducing * xd IX = xx[nmeans:nmeans + n_ix].reshape((-1, xd)) np.save(os.path.join(d, "step_%05d_X.npy" % sstep[0]), XX) np.save(os.path.join(d, "step_%05d_IX.npy" % sstep[0]), IX) ll, grad = m._objective_grads(xx) prior_ll, prior_grad = sdata.x_prior(xx[:nmeans]) ll -= prior_ll grad[:nmeans] -= prior_grad if C0 is not None: print "lscale", np.exp(xx[-1]) print "%d %.2f %.2f" % (sstep[0], time.time() - t0, -ll) f_log.write("%d %.2f %.2f\n" % (sstep[0], time.time() - t0, -ll)) f_log.flush() sstep[0] += 1 if time.time() - t0 > maxsec: raise OutOfTimeError return ll, grad x0 = m.optimizer_array bounds = None try: r = scipy.optimize.minimize(llgrad_wrapper, x0, jac=True, method=method, bounds=bounds, options={ "ftol": 1e-6, "maxiter": 200 }) rx = r.x except OutOfTimeError: print "terminated optimization for time" t1 = time.time() f_log.write("optimization finished after %.fs\n" % (time.time() - t0)) f_log.close() with open(os.path.join(d, "finished"), 'w') as f: f.write("")
class Kt(Kern): def __init__(self, input_dim=1, active_dims=[0], var=1, lengthscale=1.): super(Kt, self).__init__(input_dim, active_dims, 'Kt') assert input_dim == 1, "For this kernel we assume input_dim=1" self.var = Param('var', var) self.lengthscale = Param('lengthscale', lengthscale) self.var.constrain_positive() self.lengthscale.constrain_positive() self.link_parameters(self.var, self.lengthscale) def parameters_changed(self): # nothing todo here pass def K(self, X, X2): if X2 is None: X2 = X var = self.var[0] ls = self.lengthscale[0] kt = GPy.kern.RBF(input_dim=1, active_dims=[0], variance=var, lengthscale=ls) C = kt.K(X, X2) K = np.concatenate( [np.concatenate([C, C], axis=1), np.concatenate([C, C], axis=1)], axis=0) return K def Kdiag(self, X): return np.ones(X.shape[0] * X.shape[1] * 2) * self.var def update_gradients_full(self, dL_dK, X, X2): # edit this###########3 if X2 is None: X2 = X var = self.var[0] ls = self.lengthscale[0] kt = GPy.kern.RBF(input_dim=1, active_dims=[0], variance=var, lengthscale=ls) self.var.gradient = kt.lengthscale.gradient #kt.variance.gradient self.lengthscale.gradient = kt.lengthscale.gradient # 1/x^2exp(-r^2/(2x^2)) def update_gradients_diag(self, dL_dKdiag, X): pass def gradients_X(self, dL_dK, X, X2=None): if X2 is None: X2 = X var = self.var[0] ls = self.lengthscale[0] kt = GPy.kern.RBF(input_dim=1, active_dims=[0], variance=var, lengthscale=ls) return kt.gradients_X(dL_dK, X, X2) def gradients_X_diag(self, dL_dKdiag, X): # no diagonal gradients pass
def __init__(self, input_dim, active_dim=[0, 1], l=1.): super(nonRotK, self).__init__(input_dim, active_dim, 'nonRotK') assert input_dim == 2, "For this kernel we assume input_dim=2" self.length = Param('length', l) self.length.constrain_positive() self.link_parameters(self.length)
class nonRotK(Kern): def __init__(self, input_dim, active_dim=[0, 1], l=1.): super(nonRotK, self).__init__(input_dim, active_dim, 'nonRotK') assert input_dim == 2, "For this kernel we assume input_dim=2" self.length = Param('length', l) self.length.constrain_positive() self.link_parameters(self.length) def parameters_changed(self): # nothing todo here pass def K(self, X, X2): if X2 is None: X2 = X p = 2 # number of dimensions dx1 = X[:, 0][:, None] - X2[:, 0] dx2 = X[:, 1][:, None] - X2[:, 1] B11 = dx1 * dx1 B12 = dx1 * dx2 B22 = dx2 * dx2 norm = np.sqrt(np.square(dx1) + np.square(dx2)) # curl free (cf) l2 = np.square(self.length) C = np.square(norm / self.length) A = np.concatenate([ np.concatenate([1 - B11 / l2, -B12 / l2], axis=1), np.concatenate([-B12 / l2, 1 - B22 / l2], axis=1) ], axis=0) C = np.concatenate( [np.concatenate([C, C], axis=1), np.concatenate([C, C], axis=1)], axis=0) return (1. / l2) * np.exp(-C / 2.) * A def Kdiag(self, X): var = 1 / self.length**2 return np.ones(X.shape[0] * X.shape[1]) * var def update_gradients_full(self, dL_dK, X, X2): # edit this###########3 if X2 is None: X2 = X p = 2 # number of dimensions dx1 = X[:, 0][:, None] - X2[:, 0] dx2 = X[:, 1][:, None] - X2[:, 1] B11 = dx1 * dx1 B12 = dx1 * dx2 B22 = dx2 * dx2 norm2 = np.square(dx1) + np.square(dx2) # derivative of the length scale from curl-free kernel l2 = np.square(self.length) l3 = self.length**3 l5 = self.length**5 C = norm2 / l2 A = np.concatenate([ np.concatenate([1 - B11 / l2, -B12 / l2], axis=1), np.concatenate([-B12 / l2, 1 - B22 / l2], axis=1) ], axis=0) dA = (2 / l3) * np.concatenate([ np.concatenate([B11, B12], axis=1), np.concatenate([B12, B22], axis=1) ], axis=0) C = np.concatenate( [np.concatenate([C, C], axis=1), np.concatenate([C, C], axis=1)], axis=0) dl = np.exp(-C / 2.) * (dA + A * (2 * l2 - C * l2) / (l5)) self.length_cf.gradient = np.sum(dl * dL_dK) # 1/x^2exp(-r^2/(2x^2)) def update_gradients_diag(self, dL_dKdiag, X): pass def gradients_X(self, dL_dK, X, X2): """derivative of the covariance matrix with respect to X.""" if X2 is None: X2 = X p = 2 # number of dimensions dx1 = X[:, 0][:, None] - X2[:, 0] dx2 = X[:, 1][:, None] - X2[:, 1] B11 = dx1 * dx1 B12 = dx1 * dx2 B22 = dx2 * dx2 norm2 = np.square(dx1) + np.square(dx2) norm = np.sqrt(norm) # derivative of curl-free part l2 = np.square(self.length) C = norm2 / l2 A = np.concatenate([ np.concatenate([1 - B11 / l2, -B12 / l2], axis=1), np.concatenate([-B12 / l2, 1 - B22 / l2], axis=1) ], axis=0) dA = (1 / l2) * np.concatenate([ np.concatenate([-2 * dx1, -dx1 - dx2], axis=1), np.concatenate([-dx1 - dx2, -2 * dx2], axis=1) ], axis=0) dX = np.exp(-norm2 / (2 * l2)) * (dA - A * norm / l2) / l2 return np.sum(dL_dK * dX, 1)[:, None] def gradients_X_diag(self, dL_dKdiag, X): # no diagonal gradients pass
class myKernel(Kern): def __init__(self, input_dim, active_dim=[0, 1], l_df=1., l_cf=1, ratio=1.): super(myKernel, self).__init__(input_dim, active_dim, 'myKern') assert input_dim == 2, "For this kernel we assume input_dim=2" self.length_df = Param('length_df', l_df) self.length_cf = Param('length_cf', l_cf) self.ratio = Param('ratio', ratio) self.length_df.constrain_positive() self.length_cf.constrain_positive() self.ratio.constrain_bounded(0, 1) self.link_parameters(self.length_df, self.length_cf, self.ratio) def parameters_changed(self): # nothing todo here pass def K(self, X, X2): if X2 is None: X2 = X p = 2 # number of dimensions dx1 = X[:, 0][:, None] - X2[:, 0] dx2 = X[:, 1][:, None] - X2[:, 1] B11 = dx1 * dx1 B12 = dx1 * dx2 B22 = dx2 * dx2 norm = np.sqrt(np.square(dx1) + np.square(dx2)) # divergence free (df) rdf2 = np.square(self.length_df) Cdf = np.square(norm / self.length_df) aux = (p - 1) - Cdf Adf = np.concatenate([ np.concatenate([B11 / rdf2 + aux, B12 / rdf2], axis=1), np.concatenate([B12 / rdf2, B22 / rdf2 + aux], axis=1) ], axis=0) Cdf = np.concatenate([ np.concatenate([Cdf, Cdf], axis=1), np.concatenate([Cdf, Cdf], axis=1) ], axis=0) Kdf = np.square(1. / self.length_df) * np.exp(-Cdf / 2.) * Adf # curl free (cf) rcf2 = np.square(self.length_cf) Ccf = np.square(norm / self.length_cf) Acf = np.concatenate([ np.concatenate([1 - B11 / rcf2, -B12 / rcf2], axis=1), np.concatenate([-B12 / rcf2, 1 - B22 / rcf2], axis=1) ], axis=0) Ccf = np.concatenate([ np.concatenate([Ccf, Ccf], axis=1), np.concatenate([Ccf, Ccf], axis=1) ], axis=0) Kcf = np.square(1. / self.length_cf) * np.exp(-Ccf / 2.) * Acf return (self.ratio * Kdf) + (1 - self.ratio) * Kcf def Kdiag(self, X): var = self.ratio * (1 / self.length_df**2) + (1 - self.ratio) * ( 1 / self.length_cf**2) return np.ones(X.shape[0] * X.shape[1]) * var def update_gradients_full(self, dL_dK, X, X2): # edit this###########3 if X2 is None: X2 = X p = 2 # number of dimensions dx1 = X[:, 0][:, None] - X2[:, 0] dx2 = X[:, 1][:, None] - X2[:, 1] B11 = dx1 * dx1 B12 = dx1 * dx2 B22 = dx2 * dx2 norm2 = np.square(dx1) + np.square(dx2) # derivative of the length scale from divergent-free kernel ldf2 = np.square(self.length_df) ldf3 = self.length_df**3 ldf5 = self.length_df**5 Cdf = norm2 / ldf2 aux = (p - 1) - Cdf Adf = np.concatenate([ np.concatenate([B11 / ldf2 + aux, B12 / ldf2], axis=1), np.concatenate([B12 / ldf2, B22 / ldf2 + aux], axis=1) ], axis=0) dAdf = (2 / ldf3) * np.concatenate([ np.concatenate([norm2 - B11, -B12], axis=1), np.concatenate([-B12, norm2 - B22], axis=1) ], axis=0) Cdf = np.concatenate([ np.concatenate([Cdf, Cdf], axis=1), np.concatenate([Cdf, Cdf], axis=1) ], axis=0) dl_df = self.ratio * np.exp(-Cdf / 2.) * (dAdf + Adf * (2 * ldf2 - Cdf * ldf2) / (ldf5)) # derivative of the length scale from curl-free kernel lcf2 = np.square(self.length_cf) lcf3 = self.length_cf**3 lcf5 = self.length_cf**5 Ccf = norm2 / lcf2 Acf = np.concatenate([ np.concatenate([1 - B11 / lcf2, -B12 / lcf2], axis=1), np.concatenate([-B12 / lcf2, 1 - B22 / lcf2], axis=1) ], axis=0) dAcf = (2 / lcf3) * np.concatenate([ np.concatenate([B11, B12], axis=1), np.concatenate([B12, B22], axis=1) ], axis=0) Ccf = np.concatenate([ np.concatenate([Ccf, Ccf], axis=1), np.concatenate([Ccf, Ccf], axis=1) ], axis=0) dl_cf = (1 - self.ratio) * np.exp( -Ccf / 2.) * (dAcf + Acf * (2 * lcf2 - Ccf * lcf2) / (lcf5)) # derivative of the ratio Kdf = (1. / ldf2) * np.exp(-Cdf / 2.) * Adf Kcf = (1. / lcf2) * np.exp(-Ccf / 2.) * Acf dr = Kdf - Kcf self.length_df.gradient = np.sum(dl_df * dL_dK) self.length_cf.gradient = np.sum(dl_cf * dL_dK) self.ratio.gradient = np.sum(dr * dL_dK) def update_gradients_diag(self, dL_dKdiag, X): pass def gradients_X(self, dL_dK, X, X2): """derivative of the covariance matrix with respect to X.""" if X2 is None: X2 = X p = 2 # number of dimensions dx1 = X[:, 0][:, None] - X2[:, 0] dx2 = X[:, 1][:, None] - X2[:, 1] B11 = dx1 * dx1 B12 = dx1 * dx2 B22 = dx2 * dx2 norm2 = np.square(dx1) + np.square(dx2) norm = np.sqrt(norm) # derivative of divergent-free part ldf2 = np.square(self.length_df) Cdf = norm2 / ldf2 aux = (p - 1) - Cdf Adf = np.concatenate([ np.concatenate([B11 / ldf2 + aux, B12 / ldf2], axis=1), np.concatenate([B12 / ldf2, B22 / ldf2 + aux], axis=1) ], axis=0) dAdf = (2 / ldf2) * np.concatenate([ np.concatenate([dx1 + norm, (dx1 + dx2) / 2.], axis=1), np.concatenate([(dx1 + dx2) / 2., dx2 + norm], axis=1) ], axis=0) dX_df = self.ratio * np.exp( -norm2 / (2 * ldf2)) * (dAdf - Adf * norm / ldf2) / ldf2 # derivative of curl-free part lcf2 = np.square(self.length_cf) Ccf = norm2 / lcf2 Acf = np.concatenate([ np.concatenate([1 - B11 / lcf2, -B12 / lcf2], axis=1), np.concatenate([-B12 / lcf2, 1 - B22 / lcf2], axis=1) ], axis=0) dAcf = (1 / lcf2) * np.concatenate([ np.concatenate([-2 * dx1, -dx1 - dx2], axis=1), np.concatenate([-dx1 - dx2, -2 * dx2], axis=1) ], axis=0) dX_cf = (1 - self.ratio) * np.exp( -norm2 / (2 * lcf2)) * (dAcf - Acf * norm / lcf2) / lcf2 return np.sum(dL_dK * (dX_df + dX_cf), 1)[:, None] def gradients_X_diag(self, dL_dKdiag, X): # no diagonal gradients pass
class myKernel(Kern): def __init__(self,input_dim,active_dim=[0,1],l_df=1.,l_cf=1,ratio=1.): super(myKernel, self).__init__(input_dim,active_dim, 'myKern') assert input_dim == 2, "For this kernel we assume input_dim=2" self.length_df = Param('length_df', l_df) self.length_cf = Param('length_cf', l_cf) self.ratio = Param('ratio', ratio) self.length_df.constrain_positive() self.length_cf.constrain_positive() self.ratio.constrain_bounded(0,1) self.link_parameters(self.length_df, self.length_cf, self.ratio) def parameters_changed(self): # nothing todo here pass def K(self,X,X2): if X2 is None: X2 = X p = 2 # number of dimensions dx1 = X[:,0][:,None] - X2[:,0] dx2 = X[:,1][:,None] - X2[:,1] B11 = dx1*dx1 B12 = dx1*dx2 B22 = dx2*dx2 norm = np.sqrt(np.square(dx1)+np.square(dx2)) # divergence free (df) rdf2 = np.square(self.length_df) Cdf = np.square(norm/self.length_df) aux = (p-1) - Cdf Adf = np.concatenate([np.concatenate([B11/rdf2+aux,B12/rdf2],axis=1), np.concatenate([B12/rdf2,B22/rdf2+aux],axis=1)],axis=0) Cdf = np.concatenate([np.concatenate([Cdf,Cdf],axis=1), np.concatenate([Cdf,Cdf],axis=1)],axis=0) Kdf = np.square(1./self.length_df)*np.exp(-Cdf/2.)*Adf # curl free (cf) rcf2 = np.square(self.length_cf) Ccf = np.square(norm/self.length_cf) Acf = np.concatenate([np.concatenate([1-B11/rcf2,-B12/rcf2],axis=1), np.concatenate([-B12/rcf2,1-B22/rcf2],axis=1)],axis=0) Ccf = np.concatenate([np.concatenate([Ccf,Ccf],axis=1), np.concatenate([Ccf,Ccf],axis=1)],axis=0) Kcf = np.square(1./self.length_cf)*np.exp(-Ccf/2.)*Acf return (self.ratio*Kdf)+(1-self.ratio)*Kcf def Kdiag(self,X): var = self.ratio*(1/self.length_df**2)+(1-self.ratio)*(1/self.length_cf**2) return np.ones(X.shape[0]*X.shape[1])*var def update_gradients_full(self, dL_dK, X, X2): # edit this###########3 if X2 is None: X2 = X p = 2 # number of dimensions dx1 = X[:,0][:,None] - X2[:,0] dx2 = X[:,1][:,None] - X2[:,1] B11 = dx1*dx1 B12 = dx1*dx2 B22 = dx2*dx2 norm2 = np.square(dx1)+np.square(dx2) # derivative of the length scale from divergent-free kernel ldf2 = np.square(self.length_df) ldf3 = self.length_df**3 ldf5 = self.length_df**5 Cdf = norm2/ldf2 aux = (p-1) - Cdf Adf = np.concatenate([np.concatenate([B11/ldf2+aux,B12/ldf2],axis=1), np.concatenate([B12/ldf2,B22/ldf2+aux],axis=1)],axis=0) dAdf = (2/ldf3)*np.concatenate([np.concatenate([norm2-B11,-B12],axis=1), np.concatenate([-B12,norm2-B22],axis=1)],axis=0) Cdf = np.concatenate([np.concatenate([Cdf,Cdf],axis=1), np.concatenate([Cdf,Cdf],axis=1)],axis=0) dl_df = self.ratio*np.exp(-Cdf/2.)*(dAdf + Adf*(2*ldf2 - Cdf*ldf2)/(ldf5)) # derivative of the length scale from curl-free kernel lcf2 = np.square(self.length_cf) lcf3 = self.length_cf**3 lcf5 = self.length_cf**5 Ccf = norm2/lcf2 Acf = np.concatenate([np.concatenate([1-B11/lcf2,-B12/lcf2],axis=1), np.concatenate([-B12/lcf2,1-B22/lcf2],axis=1)],axis=0) dAcf = (2/lcf3)*np.concatenate([np.concatenate([B11,B12],axis=1), np.concatenate([B12,B22],axis=1)],axis=0) Ccf = np.concatenate([np.concatenate([Ccf,Ccf],axis=1), np.concatenate([Ccf,Ccf],axis=1)],axis=0) dl_cf = (1-self.ratio)*np.exp(-Ccf/2.)*(dAcf + Acf*(2*lcf2 - Ccf*lcf2)/(lcf5)) # derivative of the ratio Kdf = (1./ldf2)*np.exp(-Cdf/2.)*Adf Kcf = (1./lcf2)*np.exp(-Ccf/2.)*Acf dr = Kdf - Kcf self.length_df.gradient = np.sum(dl_df*dL_dK) self.length_cf.gradient = np.sum(dl_cf*dL_dK) self.ratio.gradient = np.sum(dr*dL_dK) def update_gradients_diag(self, dL_dKdiag, X): pass def gradients_X(self,dL_dK,X,X2): """derivative of the covariance matrix with respect to X.""" if X2 is None: X2 = X p = 2 # number of dimensions dx1 = X[:,0][:,None] - X2[:,0] dx2 = X[:,1][:,None] - X2[:,1] B11 = dx1*dx1 B12 = dx1*dx2 B22 = dx2*dx2 norm2 = np.square(dx1)+np.square(dx2) norm = np.sqrt(norm) # derivative of divergent-free part ldf2 = np.square(self.length_df) Cdf = norm2/ldf2 aux = (p-1) - Cdf Adf = np.concatenate([np.concatenate([B11/ldf2+aux,B12/ldf2],axis=1), np.concatenate([B12/ldf2,B22/ldf2+aux],axis=1)],axis=0) dAdf = (2/ldf2)*np.concatenate([np.concatenate([dx1+norm,(dx1+dx2)/2.],axis=1), np.concatenate([(dx1+dx2)/2.,dx2+norm],axis=1)],axis=0) dX_df = self.ratio*np.exp(-norm2/(2*ldf2))*(dAdf - Adf*norm/ldf2)/ldf2 # derivative of curl-free part lcf2 = np.square(self.length_cf) Ccf = norm2/lcf2 Acf = np.concatenate([np.concatenate([1-B11/lcf2,-B12/lcf2],axis=1), np.concatenate([-B12/lcf2,1-B22/lcf2],axis=1)],axis=0) dAcf = (1/lcf2)*np.concatenate([np.concatenate([-2*dx1,-dx1-dx2],axis=1), np.concatenate([-dx1-dx2,-2*dx2],axis=1)],axis=0) dX_cf = (1-self.ratio)*np.exp(-norm2/(2*lcf2))*(dAcf - Acf*norm/lcf2)/lcf2 return np.sum(dL_dK*(dX_df+dX_cf),1)[:,None] def gradients_X_diag(self,dL_dKdiag,X): # no diagonal gradients pass
class nonRotK(Kern): def __init__(self,input_dim,active_dim=[0,1],l=1.): super(nonRotK, self).__init__(input_dim,active_dim, 'nonRotK') assert input_dim == 2, "For this kernel we assume input_dim=2" self.length = Param('length', l) self.length.constrain_positive() self.link_parameters(self.length) def parameters_changed(self): # nothing todo here pass def K(self,X,X2): if X2 is None: X2 = X p = 2 # number of dimensions dx1 = X[:,0][:,None] - X2[:,0] dx2 = X[:,1][:,None] - X2[:,1] B11 = dx1*dx1 B12 = dx1*dx2 B22 = dx2*dx2 norm = np.sqrt(np.square(dx1)+np.square(dx2)) # curl free (cf) l2 = np.square(self.length) C = np.square(norm/self.length) A = np.concatenate([np.concatenate([1-B11/l2,-B12/l2],axis=1), np.concatenate([-B12/l2,1-B22/l2],axis=1)],axis=0) C = np.concatenate([np.concatenate([C,C],axis=1), np.concatenate([C,C],axis=1)],axis=0) return (1./l2)*np.exp(-C/2.)*A def Kdiag(self,X): var = 1/self.length**2 return np.ones(X.shape[0]*X.shape[1])*var def update_gradients_full(self, dL_dK, X, X2): # edit this###########3 if X2 is None: X2 = X p = 2 # number of dimensions dx1 = X[:,0][:,None] - X2[:,0] dx2 = X[:,1][:,None] - X2[:,1] B11 = dx1*dx1 B12 = dx1*dx2 B22 = dx2*dx2 norm2 = np.square(dx1)+np.square(dx2) # derivative of the length scale from curl-free kernel l2 = np.square(self.length) l3 = self.length**3 l5 = self.length**5 C = norm2/l2 A = np.concatenate([np.concatenate([1-B11/l2,-B12/l2],axis=1), np.concatenate([-B12/l2,1-B22/l2],axis=1)],axis=0) dA = (2/l3)*np.concatenate([np.concatenate([B11,B12],axis=1), np.concatenate([B12,B22],axis=1)],axis=0) C = np.concatenate([np.concatenate([C,C],axis=1), np.concatenate([C,C],axis=1)],axis=0) dl = np.exp(-C/2.)*(dA + A*(2*l2 - C*l2)/(l5)) self.length_cf.gradient = np.sum(dl*dL_dK) # 1/x^2exp(-r^2/(2x^2)) def update_gradients_diag(self, dL_dKdiag, X): pass def gradients_X(self,dL_dK,X,X2): """derivative of the covariance matrix with respect to X.""" if X2 is None: X2 = X p = 2 # number of dimensions dx1 = X[:,0][:,None] - X2[:,0] dx2 = X[:,1][:,None] - X2[:,1] B11 = dx1*dx1 B12 = dx1*dx2 B22 = dx2*dx2 norm2 = np.square(dx1)+np.square(dx2) norm = np.sqrt(norm) # derivative of curl-free part l2 = np.square(self.length) C = norm2/l2 A = np.concatenate([np.concatenate([1-B11/l2,-B12/l2],axis=1), np.concatenate([-B12/l2,1-B22/l2],axis=1)],axis=0) dA = (1/l2)*np.concatenate([np.concatenate([-2*dx1,-dx1-dx2],axis=1), np.concatenate([-dx1-dx2,-2*dx2],axis=1)],axis=0) dX = np.exp(-norm2/(2*l2))*(dA - A*norm/l2)/l2 return np.sum(dL_dK*dX,1)[:,None] def gradients_X_diag(self,dL_dKdiag,X): # no diagonal gradients pass
def __init__(self,input_dim,active_dim=[0,1],l=1.): super(nonRotK, self).__init__(input_dim,active_dim, 'nonRotK') assert input_dim == 2, "For this kernel we assume input_dim=2" self.length = Param('length', l) self.length.constrain_positive() self.link_parameters(self.length)
class divFreeK(Kern): def __init__(self,input_dim,active_dims=[0,1],var=1.,ly=1.,lx=1.): super(divFreeK, self).__init__(input_dim,active_dims, 'divFreeK') assert input_dim == 2, "For this kernel we assume input_dim=2" self.var = Param('var', var) self.var.constrain_positive() self.ly = Param('ly', ly) self.ly.constrain_positive() self.lx = Param('lx', lx) self.lx.constrain_positive() self.link_parameters(self.var,self.ly,self.lx) def parameters_changed(self): # nothing todo here pass def K(self,X,X2): if X2 is None: X2 = X p = 2 # number of dimensions dy = X[:,0][:,None] - X2[:,0] dx = X[:,1][:,None] - X2[:,1] ly2 = np.square(self.ly) lx2 = np.square(self.lx) Byy = dy*dy/ly2 Bxx = dx*dx/lx2 expo = ( Byy + Bxx)/(-2.) C = self.var*np.exp(expo) # divergence free (df) By = (1 - Bxx)/lx2 Bx = (1 - Byy)/ly2 Byx = dy*dx/(ly2*lx2) A = np.concatenate([np.concatenate([By , Byx],axis=1), np.concatenate([Byx , Bx],axis=1)],axis=0) C = np.concatenate([np.concatenate([C,C],axis=1), np.concatenate([C,C],axis=1)],axis=0) return C*A def Kdiag(self,X): return np.ones(X.shape[0]*2)*self.var def update_gradients_full(self, dL_dK, X, X2): # edit this###########3 if X2 is None: X2 = X # variance gradient self.var.gradient = np.sum(self.K(X, X2)* dL_dK)/self.var # ly and lx terms ly2 = np.square(self.ly) ly3 = self.ly * ly2 lx2 = np.square(self.lx) lx3 = self.lx * lx2 dy = X[:,0][:,None] - X2[:,0] dx = X[:,1][:,None] - X2[:,1] Byy = (dy*dy)/ly2 By = np.concatenate([np.concatenate([Byy,Byy],axis=1), np.concatenate([Byy,Byy],axis=1)],axis=0) Bxx = (dx*dx)/lx2 Bx = np.concatenate([np.concatenate([Bxx,Bxx],axis=1), np.concatenate([Bxx,Bxx],axis=1)],axis=0) Byx = (dy*dx)/(lx2*ly2) expo = ( Byy + Bxx)/(-2.) C = self.var*np.exp(expo) C = np.concatenate([np.concatenate([C,C],axis=1), np.concatenate([C,C],axis=1)],axis=0) # ly.gradient dA1 = Bxx*0 dA12 = -2*Byx/(self.ly) dA2 = (4*Byy - 2)/ly3 dA = np.concatenate([np.concatenate([dA1,dA12],axis=1), np.concatenate([dA12,dA2],axis=1)],axis=0) self.ly.gradient = np.sum(((By/self.ly) * self.K(X,X2) + C*dA)*dL_dK) # lx.gradient dA1 = (4*Bxx - 2)/lx3 dA12 = -2*Byx/(self.lx) dA2 = Bxx*0 dA = np.concatenate([np.concatenate([dA1,dA12],axis=1), np.concatenate([dA12,dA2],axis=1)],axis=0) self.lx.gradient = np.sum(((Bx/self.lx) * self.K(X,X2) + C*dA)*dL_dK) def update_gradients_diag(self, dL_dKdiag, X): pass def gradients_X(self,dL_dK,X,X2): pass def gradients_X_diag(self,dL_dKdiag,X): # no diagonal gradients pass