def Cv_Leave_One_Curve(x, y, x0, candidate_h, ker_fun='Gaussian'): """ input variable: x : matrix with dimension (num_of_points, dim) y : vecor with length num_of_points x0 : narray h : vector bin_width : vector """ grid_shape = np.asarray(x0.shape[:-1]) num_fun, num_pt, d = x.shape no_nan_val = ~np.isnan(y) bin_width = np.ptp(x0.reshape(-1, d), 0) / (grid_shape - 1) x0_min = x0.reshape(-1, d).min(0) x0_max = x0.reshape(-1, d).max(0) grid = tuple( np.linspace(x0_min.take(i), x0_max.take(i), x0.shape[i]) for i in range(d)) bin_data_y, bin_data_num = lpr.Bin_Data( np.compress(no_nan_val.reshape(-1), x.reshape(-1, d), 0), np.compress(no_nan_val.reshape(-1), y.reshape(-1)), x0) #rand_num_fun = np.random.choice(num_fun, 100 if num_fun > 100 else num_fun, replace = False) rand_num_fun = np.arange(num_fun) test_fun = x.take(rand_num_fun, 0) test_y = y.take(rand_num_fun, 0) rand_num_no_nan_val = no_nan_val.take(rand_num_fun, 0) sse = np.ones(candidate_h.shape[0]) for i in range(candidate_h.shape[0]): h = candidate_h.take(i, 0) r = lpr.Get_Range(bin_width, h, ker_fun) delta_x = lpr.Get_Delta_x(bin_width, r) weight = lpr.Get_Weight(delta_x, h, ker_fun) big_x = np.hstack((np.ones((delta_x.shape[0], 1)), delta_x)) h_too_small = False for fun_i in range(test_fun.shape[0]): test_funi = test_fun.take(fun_i, 0) test_yi = test_y.take(fun_i, 0) fun_no_nan_value = rand_num_no_nan_val.take(fun_i, 0) fun_biny, fun_binx = lpr.Bin_Data( np.compress(fun_no_nan_value, test_funi, 0), np.compress(fun_no_nan_value, test_yi), x0) biny = bin_data_y - fun_biny binx = bin_data_num - fun_binx ext_biny, ext_binx = lpr.Extend_Bin_Data([biny, binx], r) train_y = lpr.Get_Linear_Solve(big_x.T, weight, ext_binx, ext_biny, r) if np.any(np.isnan(train_y)): h_too_small = True break interp_fun = sp.interpolate.RegularGridInterpolator( grid, train_y.reshape(x0.shape[:-1]), bounds_error=False) interp_y = interp_fun(test_funi) sse[i] += np.nansum((test_yi - interp_y)**2) if h_too_small: sse[i] = np.nan opt_h = candidate_h.take(np.nanargmin(sse), 0) return (opt_h)
def __Main(self, x, y, x0, h_mean, h_cov, h_cov_dia, fve, binning, bin_weight, ker_fun, bw_select, dtype): """ input: x: ndarray (#functions, #points, #dimension) y: ndarray (#functions, #points) x0: ndarray candidate_h: ndarray (#h, #dimension) num_grid: vector """ ### if binning is True: bin_data = np.asarray( list( map(lambda xy: lpr.Bin_Data(xy[0], xy[1], x0, bin_weight), zip(x, y)))) else: bin_data = None self.__Fit_Mean(x, y, x0, h_mean, binning, bin_weight, ker_fun, bw_select, dtype, bin_data) ### self.__Fit_Cov(x, y, x0, h_cov, binning, bin_weight, ker_fun, bw_select, dtype, bin_data) ### self.__Fit_EigPairs(fve) ### self.__Fit_Sigma2(x, y, x0, h_cov_dia, binning, bin_weight, ker_fun, bw_select, dtype) ###PACE self.fpc_scores = self.__Fit_Fpc_Scores(x, y)
def __Get_Row_Cov(self, x, y, cov_x0, binning, bin_weight, bin_data=None): cov_grid_shape = cov_x0.shape[:-1] num_fun = len(bin_data) if binning == True: bin_cov_diag_data = np.zeros((2, ) + cov_grid_shape) bin_cov_y = np.matmul( bin_data.take(0, 1).reshape(num_fun, -1).T, bin_data.take(0, 1).reshape(num_fun, -1)) bin_cov_x = np.matmul( bin_data.take(1, 1).reshape(num_fun, -1).T, bin_data.take(1, 1).reshape(num_fun, -1)) bin_cov_data = np.asarray([bin_cov_y, bin_cov_x]) for i in range(num_fun): xx = np.tile(x[i], 2) yy = y[i]**2 bin_cov_diag_data += lpr.Bin_Data(xx, yy, cov_x0, bin_weight) bin_cov_data = bin_cov_data.reshape( (2, ) + cov_grid_shape) - bin_cov_diag_data return (bin_cov_data) else: xx = [None] * self.__num_fun yy = [None] * self.__num_fun for i in range(self.__num_fun): num_pt = y[i].size yy[i] = np.delete( np.outer(y[i], y[i]).flatten(), np.arange(0, num_pt**2, num_pt + 1)) xx[i] = np.delete( np.vstack(np.dstack(x[i][np.mgrid[:num_pt, :num_pt]])), np.arange(0, num_pt**2, num_pt + 1), 0) return ([xx, yy, cov_x0])
def __Fit_Sigma2(self, x, y, x0, candidate_h, binning, bin_weight, ker_fun, bw_select, dtype): yy = [yi**2 for yi in y] if binning == True: cov_diag_bin_data = np.asarray( list( map(lambda xy: lpr.Bin_Data(xy[0], xy[1], x0, bin_weight), zip(x, yy)))) if bw_select == 'Partition': rand_index = np.random.permutation(self.__num_fun) train_bin_data, test_bin_data = np.split( cov_diag_bin_data.take(rand_index, 0), [self.__n_train]) self.cov_diag_bw = self.__BW_Partition( x0=x0, candidate_h=candidate_h, ker_fun=ker_fun, binning=binning, dtype=dtype, train_bin_data=train_bin_data.sum(0), test_bin_data=test_bin_data.sum(0)) elif bw_select == 'LeaveOneOut': if self.__num_fun > 100: test_index = np.random.random_integers( 0, self.__num_fun - 1, 100) else: test_index = range(self.__num_fun) test_bin_data = cov_diag_bin_data.take(test_index, 0) self.cov_diag_bw = self.__CV_Leave_One_Curve( x0=x0, candidate_h=candidate_h, ker_fun=ker_fun, binning=binning, dtype=dtype, bin_data=cov_diag_bin_data.sum(0), test_bin_data=test_bin_data) self.cov_dia_fun = lpr.Lpr_For_Bin( bin_data=cov_diag_bin_data.sum(0), bin_width=self.__bin_width, h=self.cov_diag_bw, ker_fun=ker_fun, dtype=dtype) - self.mean_fun**2 else: if bw_select == 'Partition': self.cov_bw = self.__BW_Partition(x0=x0, candidate_h=candidate_h, ker_fun=ker_fun, binning=binning, dtype=dtype, x=x, y=yy) elif bw_select == 'LeaveOneOut': self.cov_bw = self.__CV_Leave_One_Curve( x0=x0, candidate_h=candidate_h, ker_fun=ker_fun, binning=binning, dtype=dtype, x=x, y=yy) self.cov_dia_fun = lpr.Lpr(x=np.vstack(x), y=np.hstack(yy), x0=x0, h=self.cov_diag_bw, binning=binning, bin_weight=bin_weight, ker_fun=ker_fun, dtype=dtype) - self.mean_fun**2 interval = np.array([(self.__grid_shape / 4).astype('int'), (3 * self.__grid_shape / 4).astype('int')]) restruct_cov_fun = np.matmul( np.matmul(self.eig_fun.T, np.diag(self.eig_val)), self.eig_fun) sigma2_t = (self.cov_dia_fun - np.diag(restruct_cov_fun)).reshape( self.__grid_shape) for i in range(self.__d): left, right = interval.take(i, 1) sigma2_t = sigma2_t.take(np.arange(left, right), i) self.sigma2 = (sigma2_t).mean() self.sigma2 = 0 if self.sigma2 < 0 else self.sigma2