def Cv_Leave_One_Curve(x, y, x0, candidate_h, ker_fun='Gaussian'): """ input variable: x : matrix with dimension (num_of_points, dim) y : vecor with length num_of_points x0 : narray h : vector bin_width : vector """ grid_shape = np.asarray(x0.shape[:-1]) num_fun, num_pt, d = x.shape no_nan_val = ~np.isnan(y) bin_width = np.ptp(x0.reshape(-1, d), 0) / (grid_shape - 1) x0_min = x0.reshape(-1, d).min(0) x0_max = x0.reshape(-1, d).max(0) grid = tuple( np.linspace(x0_min.take(i), x0_max.take(i), x0.shape[i]) for i in range(d)) bin_data_y, bin_data_num = lpr.Bin_Data( np.compress(no_nan_val.reshape(-1), x.reshape(-1, d), 0), np.compress(no_nan_val.reshape(-1), y.reshape(-1)), x0) #rand_num_fun = np.random.choice(num_fun, 100 if num_fun > 100 else num_fun, replace = False) rand_num_fun = np.arange(num_fun) test_fun = x.take(rand_num_fun, 0) test_y = y.take(rand_num_fun, 0) rand_num_no_nan_val = no_nan_val.take(rand_num_fun, 0) sse = np.ones(candidate_h.shape[0]) for i in range(candidate_h.shape[0]): h = candidate_h.take(i, 0) r = lpr.Get_Range(bin_width, h, ker_fun) delta_x = lpr.Get_Delta_x(bin_width, r) weight = lpr.Get_Weight(delta_x, h, ker_fun) big_x = np.hstack((np.ones((delta_x.shape[0], 1)), delta_x)) h_too_small = False for fun_i in range(test_fun.shape[0]): test_funi = test_fun.take(fun_i, 0) test_yi = test_y.take(fun_i, 0) fun_no_nan_value = rand_num_no_nan_val.take(fun_i, 0) fun_biny, fun_binx = lpr.Bin_Data( np.compress(fun_no_nan_value, test_funi, 0), np.compress(fun_no_nan_value, test_yi), x0) biny = bin_data_y - fun_biny binx = bin_data_num - fun_binx ext_biny, ext_binx = lpr.Extend_Bin_Data([biny, binx], r) train_y = lpr.Get_Linear_Solve(big_x.T, weight, ext_binx, ext_biny, r) if np.any(np.isnan(train_y)): h_too_small = True break interp_fun = sp.interpolate.RegularGridInterpolator( grid, train_y.reshape(x0.shape[:-1]), bounds_error=False) interp_y = interp_fun(test_funi) sse[i] += np.nansum((test_yi - interp_y)**2) if h_too_small: sse[i] = np.nan opt_h = candidate_h.take(np.nanargmin(sse), 0) return (opt_h)
def __CV_Cov_Leave_One_Out(self, x, y, x0, candidate_h, ker_fun): grid_shape, d = np.asarray(x0.shape[:-1]), x0.shape[-1] n_grid = np.prod(self.__grid_shape) x_displacement = np.rint((x - x0.reshape(-1, d).min(axis=0)) / self.__bin_width).astype(np.int32) x_p = np.sum(x_displacement * np.append(grid_shape[::-1].cumprod()[-2::-1], 1), axis=2) xx_p = (x_p.repeat(x_p.shape[1], 1) * n_grid + np.tile(x_p, x_p.shape[1])).reshape(-1) yy = np.einsum('ij,ik->ijk', y, y).reshape(-1) #random_order = np.random.choice(self.__num_fun, 100 if self.__num_fun > 100 else self.__num_fun, replace=False) random_order = np.arange(self.__num_fun) test_fun = x.take(random_order, 0) test_xx_p = xx_p.reshape(self.__num_fun, -1).take(random_order, 0) test_yy = yy.reshape(self.__num_fun, -1).take(random_order, 0) non_nan_value = ~np.isnan(yy) xx_p = np.compress(non_nan_value, xx_p) yy = np.compress(non_nan_value, yy) tot_binx = np.bincount(xx_p, minlength=n_grid**2) tot_biny = np.bincount(xx_p, yy, minlength=n_grid**2) sse = np.ones(candidate_h.shape[0]) for i in range(candidate_h.shape[0]): h = candidate_h.take(i, 0) r = lpr.Get_Range(self.__bin_width, h, ker_fun) delta_x = lpr.Get_Delta_x(self.__bin_width, r) weight = lpr.Get_Weight(delta_x, h, ker_fun) big_x = np.hstack((np.ones((delta_x.shape[0], 1)), delta_x)) h_too_small = False for fun_i in range(test_xx_p.shape[0]): test_funi = test_fun.take(fun_i, 0) fun_non_nan_value = non_nan_value.reshape( self.__num_fun, -1).take(random_order.take(fun_i), 0) fun_xx = np.compress(fun_non_nan_value, test_xx_p.take(fun_i, 0)) fun_yy = np.compress(fun_non_nan_value, test_yy.take(fun_i, 0)) fun_binx = np.bincount(fun_xx, minlength=n_grid**2) fun_biny = np.bincount(fun_xx, fun_yy, minlength=n_grid**2) binx = tot_binx - fun_binx biny = tot_biny - fun_biny ext_biny, ext_binx = lpr.Extend_Bin_Data([ biny.reshape(n_grid, n_grid), binx.reshape(n_grid, n_grid) ], r) train_yy = lpr.Get_Linear_Solve(big_x.T, weight, ext_binx, ext_biny, r) if np.any(np.isnan(train_yy)): h_too_small = True break interp_fun = sp.interpolate.RegularGridInterpolator( self.__grid * 2, train_yy.reshape(x0.shape[:-1] * 2), bounds_error=False) interp_y = interp_fun( np.hstack((test_funi.repeat(self.__num_pt, 0), np.tile(test_funi.reshape(-1), self.__num_pt).reshape(-1, self.__d)))) sse[i] += np.nansum((test_yy.take(fun_i, 0) - interp_y)**2) if h_too_small: sse[i] = np.nan h_opt = candidate_h.take(np.nanargmin(sse), 0) return ([h_opt, xx_p, yy])