def __CV_Leave_One_Curve(self, x0, candidate_h, ker_fun, binning, dtype, **kwargs): """ input variable: """ grid_shape = np.array(x0.shape[:-1]) d = x0.shape[-1] sse = np.ones(candidate_h.shape[0]) bin_width = np.ptp(x0.reshape(-1, d), 0) / (grid_shape - 1) if binning == True: bin_data = kwargs['bin_data'] test_bin_data = kwargs['test_bin_data'] for i in range(candidate_h.shape[0]): h = candidate_h.take(i, 0) for test_data in test_bin_data: train_bin_data = bin_data - test_data fit_y = lpr.Lpr_For_Bin(train_bin_data, bin_width, h, ker_fun, dtype) if np.any(np.isnan(fit_y)): sse[i] = np.nan break nozero = (test_data.take(1, 0) != 0).flatten() test_bin_data_nozero = np.compress( nozero, test_data.reshape(2, -1), 1) sse[i] += (((test_bin_data_nozero.take(0, 0) - np.compress( nozero, fit_y) * test_bin_data_nozero.take(1, 0))**2) / test_bin_data_nozero.take(1, 0)).sum() else: x, y = kwargs['x'], kwargs['y'] if self.__num_fun > 100: test_index = np.random.random_integers(0, self.__num_fun - 1, 100) else: test_index = range(self.__num_fun) for i in range(candidate_h.shape[0]): h = candidate_h.take(i, 0) print(i) for j in test_index: print(j) fit_y = lpr.Lpr(np.vstack(x[-j]), np.hstack(y[-j]), x[j], h.take(i, 0), binning=binning, ker_fun=ker_fun) if np.any(np.isnan(fit_y)): sse[i] = np.nan break sse[i] += ((y[j] - fit_y)**2).sum() opt_h = candidate_h.take(np.nanargmin(sse), 0) return (opt_h)
def __Fit_Cov(self, x, y, x0, candidate_h, binning, ker_fun): self.cov_bw, xx_p, yy = self.__CV_Cov_Leave_One_Out( x, y, x0, candidate_h, ker_fun) bin_xx = np.bincount(xx_p.reshape(-1), minlength=np.prod(self.__grid_shape)**2) bin_yy = np.bincount(xx_p.reshape(-1), yy.reshape(-1), minlength=np.prod(self.__grid_shape)**2) fit_yy = lpr.Lpr_For_Bin([ bin_yy.reshape(np.tile(self.__grid_shape, 2)), bin_xx.reshape(np.tile(self.__grid_shape, 2)) ], np.tile(self.__bin_width, 2), h=self.cov_bw, ker_fun=ker_fun) self.cov_fun = fit_yy.reshape(np.repeat(np.prod( self.__grid_shape), 2)) - np.outer(self.mean_fun, self.mean_fun)
def __BW_Partition(self, x0, candidate_h, ker_fun, binning, dtype, **kwargs): """ input variable: """ grid_shape = np.asarray(x0.shape[:-1]) n_h, d = candidate_h.shape bin_width = np.ptp(x0.reshape(-1, d), 0) / (grid_shape - 1) ssq = np.zeros(n_h) if binning == True: train_bin_data = kwargs['train_bin_data'] test_bin_data = kwargs['test_bin_data'] for i in range(n_h): fit_y = lpr.Lpr_For_Bin(train_bin_data, bin_width, candidate_h.take(i, 0), ker_fun, dtype).reshape(x0.shape[:-1]) if np.isnan(fit_y).any(): ssq[i] = np.nan continue nozero = (test_bin_data.take(1, 0) != 0).flatten() test_bin_data_nozero = np.compress( nozero, test_bin_data.reshape(2, -1), 1) ssq[i] = (((test_bin_data_nozero.take(0, 0) - np.compress(nozero, fit_y.flatten()) * test_bin_data_nozero.take(1, 0))**2) / test_bin_data_nozero.take(1, 0)).sum() else: x, y = kwargs['x'], kwargs['y'] combined = list(zip(x, y)) random.shuffle(combined) train_data, test_data = combined[:self.__n_train], combined[ self.__n_train:] train_x, train_y = list(zip(*train_data)) test_x, test_y = list(zip(*test_data)) for i in range(n_h): fit_y = lpr.Lpr(np.vstack(train_x), np.hstack(train_y), np.vstack(test_x), candidate_h.take(i, 0), binning=binning, ker_fun=ker_fun) ssq[i] = ((test_y - fit_y)**2).sum() h_opt = candidate_h.take(np.nanargmin(ssq), 0) return (h_opt)
def __CV_Cov_Partition(self, x, y, x0, h, ker_fun): grid_shape, d = np.asarray(x0.shape[:-1]), x0.shape[-1] n_grid = np.prod(self.__grid_shape) x_displacement = ( (x - x0.reshape(-1, d).min(axis=0)) / self.__bin_width + np.ones(self.__d) / 2).astype(np.int32) x_p = np.sum(x_displacement * np.append(grid_shape[::-1].cumprod()[-2::-1], 1), axis=2) xx_p = (x_p.repeat(x_p.shape[1], 1) * n_grid + np.tile(x_p, x_p.shape[1])) xx_p = np.delete(xx_p, np.arange(0, self.__num_pt**2, self.__num_pt + 1), 1) yy = np.einsum('ij,ik->ijk', y, y).reshape(self.__num_fun, -1) yy = np.delete(yy, np.arange(0, self.__num_pt**2, self.__num_pt + 1), 1) non_nan_value = ~np.isnan(yy).reshape(-1) n_real_val = non_nan_value.sum() xx_p = np.compress(non_nan_value, xx_p.reshape(-1)) yy = np.compress(non_nan_value, yy.reshape(-1)) n_train = lpr.Partition_Data_Size(n_real_val) random_order = np.random.permutation(n_real_val) train_xx, test_xx = np.split( xx_p.reshape(-1).take(random_order), [n_train]) train_yy, test_yy = np.split( yy.reshape(-1).take(random_order), [n_train]) bin_xx = np.bincount(train_xx, minlength=n_grid**2) bin_yy = np.bincount(train_xx, train_yy, minlength=n_grid**2) bin_xx = bin_xx.reshape(np.tile(self.__grid_shape, 2)) bin_yy = bin_yy.reshape(np.tile(self.__grid_shape, 2)) ssq = np.zeros(h.shape[0]) for i in range(h.shape[0]): fit_y = lpr.Lpr_For_Bin([bin_yy, bin_xx], np.tile(self.__bin_width, 2), h.take(i, 0), ker_fun=ker_fun) ssq[i] = ((test_yy - fit_y[test_xx])**2).sum() if np.isnan(fit_y).any(): ssq[i] = np.nan h_opt = h.take(np.nanargmin(ssq), 0) return ([h_opt, xx_p, yy])
def __Fit_Cov(self, x, y, x0, candidate_h, binning, ker_fun, bw_select): if bw_select is 'Partition': self.cov_bw, xx_p, yy = self.__CV_Cov_Partition( x, y, x0, candidate_h, ker_fun) elif bw_select is 'LeaveOneOut': self.cov_bw, xx_p, yy = self.__CV_Cov_Leave_One_Out( x, y, x0, candidate_h, ker_fun) bin_xx = np.bincount(xx_p.reshape(-1), minlength=np.prod(self.__grid_shape)**2) bin_yy = np.bincount(xx_p.reshape(-1), yy.reshape(-1), minlength=np.prod(self.__grid_shape)**2) fit_yy = lpr.Lpr_For_Bin([ bin_yy.reshape(np.tile(self.__grid_shape, 2)), bin_xx.reshape(np.tile(self.__grid_shape, 2)) ], np.tile(self.__bin_width, 2), h=self.cov_bw, ker_fun=ker_fun).reshape( np.tile(np.prod(self.__grid_shape), 2)) fit_yy = (fit_yy.T + fit_yy) / 2 self.cov_fun = fit_yy - np.outer(self.mean_fun, self.mean_fun)
def __Fit_Sigma2(self, x, y, x0, candidate_h, binning, bin_weight, ker_fun, bw_select, dtype): yy = [yi**2 for yi in y] if binning == True: cov_diag_bin_data = np.asarray( list( map(lambda xy: lpr.Bin_Data(xy[0], xy[1], x0, bin_weight), zip(x, yy)))) if bw_select == 'Partition': rand_index = np.random.permutation(self.__num_fun) train_bin_data, test_bin_data = np.split( cov_diag_bin_data.take(rand_index, 0), [self.__n_train]) self.cov_diag_bw = self.__BW_Partition( x0=x0, candidate_h=candidate_h, ker_fun=ker_fun, binning=binning, dtype=dtype, train_bin_data=train_bin_data.sum(0), test_bin_data=test_bin_data.sum(0)) elif bw_select == 'LeaveOneOut': if self.__num_fun > 100: test_index = np.random.random_integers( 0, self.__num_fun - 1, 100) else: test_index = range(self.__num_fun) test_bin_data = cov_diag_bin_data.take(test_index, 0) self.cov_diag_bw = self.__CV_Leave_One_Curve( x0=x0, candidate_h=candidate_h, ker_fun=ker_fun, binning=binning, dtype=dtype, bin_data=cov_diag_bin_data.sum(0), test_bin_data=test_bin_data) self.cov_dia_fun = lpr.Lpr_For_Bin( bin_data=cov_diag_bin_data.sum(0), bin_width=self.__bin_width, h=self.cov_diag_bw, ker_fun=ker_fun, dtype=dtype) - self.mean_fun**2 else: if bw_select == 'Partition': self.cov_bw = self.__BW_Partition(x0=x0, candidate_h=candidate_h, ker_fun=ker_fun, binning=binning, dtype=dtype, x=x, y=yy) elif bw_select == 'LeaveOneOut': self.cov_bw = self.__CV_Leave_One_Curve( x0=x0, candidate_h=candidate_h, ker_fun=ker_fun, binning=binning, dtype=dtype, x=x, y=yy) self.cov_dia_fun = lpr.Lpr(x=np.vstack(x), y=np.hstack(yy), x0=x0, h=self.cov_diag_bw, binning=binning, bin_weight=bin_weight, ker_fun=ker_fun, dtype=dtype) - self.mean_fun**2 interval = np.array([(self.__grid_shape / 4).astype('int'), (3 * self.__grid_shape / 4).astype('int')]) restruct_cov_fun = np.matmul( np.matmul(self.eig_fun.T, np.diag(self.eig_val)), self.eig_fun) sigma2_t = (self.cov_dia_fun - np.diag(restruct_cov_fun)).reshape( self.__grid_shape) for i in range(self.__d): left, right = interval.take(i, 1) sigma2_t = sigma2_t.take(np.arange(left, right), i) self.sigma2 = (sigma2_t).mean() self.sigma2 = 0 if self.sigma2 < 0 else self.sigma2
def __Fit_Cov(self, x, y, x0, candidate_h, binning, bin_weight, ker_fun, bw_select, dtype, bin_data=None): cov_x0 = x0.reshape( -1, self.__d)[np.mgrid[0:self.__grid_shape.prod(), 0:self.__grid_shape.prod()].T].reshape( np.append(self.__grid_shape.repeat(2), -1)) if binning == True: if bw_select == 'Partition': rand_index = np.random.permutation(self.__num_fun) train_bin_data, test_bin_data = np.split( bin_data.take(rand_index, 0), [self.__n_train]) train_cov_bin_data = self.__Get_Row_Cov( [x[i] for i in rand_index[:self.__n_train]], [y[i] for i in rand_index[:self.__n_train]], cov_x0, binning, bin_weight, train_bin_data) test_cov_bin_data = self.__Get_Row_Cov( [x[i] for i in rand_index[self.__n_train:]], [y[i] for i in rand_index[self.__n_train:]], cov_x0, binning, bin_weight, test_bin_data) self.cov_bw = self.__BW_Partition( x0=cov_x0, candidate_h=candidate_h, ker_fun=ker_fun, binning=binning, dtype=dtype, train_bin_data=train_cov_bin_data, test_bin_data=test_cov_bin_data) elif bw_select == 'LeaveOneOut': if self.__num_fun > 100: test_index = np.random.random_integers( 0, self.__num_fun - 1, 100) else: test_index = range(self.__num_fun) test_cov_bin_data = np.zeros((len(test_index), 2) + cov_x0.shape[:-1]) for i in range(len(test_index)): testi = test_index[i] test_cov_bin_data[i] = self.__Get_Row_Cov( [x[testi]], [y[testi]], cov_x0, binning, bin_weight, bin_data.take(testi, 0).reshape((1, ) + bin_data.shape[1:])) cov_bin_data = self.__Get_Row_Cov(x, y, cov_x0, binning, bin_weight, bin_data) self.cov_bw = self.__CV_Leave_One_Curve( x0=cov_x0, candidate_h=candidate_h, ker_fun=ker_fun, binning=binning, dtype=dtype, bin_data=cov_bin_data, test_bin_data=test_cov_bin_data) fit_yy = lpr.Lpr_For_Bin( bin_data=self.__Get_Row_Cov(x, y, cov_x0, binning, bin_weight, bin_data), bin_width=np.tile(self.__bin_width, 2), h=self.cov_bw, ker_fun=ker_fun, dtype=dtype).reshape(np.repeat(np.prod(self.__grid_shape), 2)) else: xx, yy, cov_x0 = self.__Get_Row_Cov(x, y, x0, binning, bin_weight) if bw_select == 'Partition': self.cov_bw = self.__BW_Partition(x0=cov_x0, candidate_h=candidate_h, ker_fun=ker_fun, bin_weight=bin_weight, binning=binning, dtype=dtype, x=xx, y=yy) elif bw_select == 'LeaveOneOut': self.cov_bw = self.__CV_Leave_One_Curve( x0=cov_x0, candidate_h=candidate_h, ker_fun=ker_fun, binning=binning, bin_weight=bin_weight, dtype=dtype, x=xx, y=yy) fit_yy = lpr.Lpr(x=np.vstack(xx), y=np.hstack(yy), x0=cov_x0, h=self.cov_bw, binning=binning, bin_weight=bin_weight, ker_fun=ker_fun, dtype=dtype).reshape( np.repeat(np.prod(self.__grid_shape), 2)) fit_yy = (fit_yy.T + fit_yy) / 2 self.cov_fun = fit_yy - np.outer(self.mean_fun, self.mean_fun)
def __Fit_Mean(self, x, y, x0, candidate_h, binning, bin_weight, ker_fun, bw_select, dtype, bin_data=None): if binning == True: if bw_select == 'Partition': rand_index = np.random.permutation(self.__num_fun) train_bin_data, test_bin_data = np.split( bin_data.take(rand_index, 0), [self.__n_train]) self.mean_bw = self.__BW_Partition( x0=x0, candidate_h=candidate_h, ker_fun=ker_fun, binning=binning, dtype=dtype, train_bin_data=train_bin_data.sum(0), test_bin_data=test_bin_data.sum(0)) elif bw_select == 'LeaveOneOut': if self.__num_fun > 100: test_index = np.random.random_integers( 0, self.__num_fun - 1, 100) else: test_index = range(self.__num_fun) test_bin_data = bin_data.take(test_index, 0) self.mean_bw = self.__CV_Leave_One_Curve( x0=x0, candidate_h=candidate_h, ker_fun=ker_fun, binning=binning, dtype=dtype, bin_data=bin_data.sum(0), test_bin_data=test_bin_data) self.mean_fun = lpr.Lpr_For_Bin(bin_data=bin_data.sum(0), bin_width=self.__bin_width, h=self.mean_bw, ker_fun=ker_fun, dtype=dtype) else: if bw_select == 'Partition': self.mean_bw = self.__BW_Partition(x0=x0, candidate_h=candidate_h, ker_fun=ker_fun, binning=binning, dtype=dtype, x=x, y=y) elif bw_select == 'LeaveOneOut': self.mean_bw = self.__CV_Leave_One_Curve( x0=x0, candidate_h=candidate_h, ker_fun=ker_fun, binning=binning, dtype=dtype, x=x, y=y) self.mean_fun = lpr.Lpr(np.vstack(x), np.hstack(y), x0, self.mean_bw, binning, bin_weight, ker_fun, dtype)