def nndr(X, Y, **kwargs): N, D = X.shape d = kwargs['d'] return_mat = kwargs.get('return_mat', False) whiten = kwargs.get('whiten', False) if whiten: Z, cov_all_sqrtinv = whiten_data(X) reg_net = RegressionNetwork(**kwargs) reg_net.train(Z, Y, **kwargs) weight_matrix = reg_net.net.layers[0].get_weights()[0] opm_matrix = np.zeros((D, D)) for i in range(weight_matrix.shape[1]): opm_matrix += np.outer(weight_matrix[:,i], weight_matrix[:,i]) U, S, V = np.linalg.svd(opm_matrix) # Apply inverse transformation vecs = cov_all_sqrtinv.dot(U[:,:d]) proj_vecs, dummy = np.linalg.qr(vecs) else: reg_net = RegressionNetwork(**kwargs) reg_net.train(X, Y, **kwargs) weight_matrix = reg_net.net.layers[0].get_weights()[0] opm_matrix = np.zeros((D, D)) for i in range(weight_matrix.shape[1]): opm_matrix += np.outer(weight_matrix[:,i], weight_matrix[:,i]) U, S, V = np.linalg.svd(opm_matrix) proj_vecs = U[:,:d] if return_mat: return proj_vecs, opm_matrix else: return proj_vecs
def save(X, Y, **kwargs): """ Parameters ---------- X : array-like, shape = [N, D] Training data, where N is the number of samples and D is the number of features. Y : array-like, shape = [N] Response variable, where n_samples is the number of samples Argument dictionary should contain: kwargs = { 'd' : intrinsic dimension (int) 'n_levelsets' : number of slices to use (int) 'split_by' : 'dyadic' (dyadic decomposition) or 'stateq' (statistically equivalent blocks) (default: 'dyadic') 'return_mat' : Boolean whether key SIR matrix should be returned (defaults to False). } Returns ----------- proj_vecs : array-like, shape = [n_features, d] Orthonormal system spanning the sufficient dimension subspace, where d refers to the intrinsic dimension. M : SAVE matrix, only if return_mat option is True } """ # Extract arguments from dictionary d = kwargs['d'] n_levelsets = kwargs['n_levelsets'] split_by = kwargs.get('split_by', 'dyadic') return_mat = kwargs.get('return_mat', False) N, D = X.shape # Standardize X Z, cov_all_sqrtinv = whiten_data(X) # Create partition labels = split(Y, n_levelsets, split_by) M = np.zeros((D, D)) # Container for key matrix in SIR # Compute SAVE matrix empirical_probabilities = np.zeros(n_levelsets) for i in range(n_levelsets): empirical_probabilities[i] = float(len( np.where(labels == i)[0])) / float(N) if empirical_probabilities[i] == 0: continue cov_sub = empirical_covariance( Z[labels == i, :]) # Covariance of all samples M += empirical_probabilities[i] * (np.eye(D) - cov_sub).dot(np.eye(D) - cov_sub) U, S, V = np.linalg.svd(M) # Apply inverse transformation vecs = cov_all_sqrtinv.dot(U[:, :d]) proj_vecs, dummy = np.linalg.qr(vecs) if return_mat: return proj_vecs, M else: return proj_vecs
def directional_regression(X, Y, **kwargs): """ Parameters ---------- X : array-like, shape = [N, D] Training data, where N is the number of samples and D is the number of features. Y : array-like, shape = [N] Response variable, where n_samples is the number of samples Argument dictionary should contain: kwargs = { 'd' : intrinsic dimension (int) 'n_levelsets' : number of slices to use (int) 'split_by' : 'dyadic' (dyadic decomposition) or 'stateq' (statistically equivalent blocks) (default: 'dyadic') 'return_mat' : Boolean whether key SIR matrix should be returned (defaults to False). 't1' : scaling parameter 1 for generalized directional regression (see [2]) 't2' : scaling parameter 2 for generalized directional regression (see [2]) Returns ----------- proj_vecs : array-like, shape = [n_features, d] Orthonormal system spanning the sufficient dimension subspace, where d refers to the intrinsic dimension. } """ # Extract arguments from dictionary d = kwargs['d'] n_levelsets = kwargs['n_levelsets'] split_by = kwargs.get('split_by', 'dyadic') return_mat = kwargs.get('return_mat', False) t1 = kwargs.get( 't1', 0.5) # Generalized directional regression parameter 1 (default is DR) t2 = kwargs.get( 't2', 1.0) # Generalized directional regression parameter 1 (default is DR) N, D = X.shape # Standardize X Z, cov_all_sqrtinv = whiten_data(X) # Create partition labels = split(Y, n_levelsets, split_by) # Containers for DR matrices sum_1 = np.zeros((D, D)) sum_2 = np.zeros((D, D)) sum_3 = 0 empirical_probabilities = np.zeros(n_levelsets) for i in range(n_levelsets): empirical_probabilities[i] = float(len( np.where(labels == i)[0])) / float(N) if empirical_probabilities[i] == 0: continue U_h = np.mean(Z[labels == i, :], axis=0) cov_local = empirical_covariance(Z[labels == i, :]) V_h = cov_local - np.eye(D) # Compute sums sum_1 += empirical_probabilities[i] * V_h.dot(V_h) sum_2 += empirical_probabilities[i] * np.outer(U_h, U_h) sum_3 += empirical_probabilities[i] * np.dot(U_h, U_h) F = t1 * sum_1 + (1.0 - t1) * sum_2.dot(sum_2) + (1.0 - t1) * t2 * sum_3 * sum_2 U, S, V = np.linalg.svd(F) # Apply inverse transformation vecs = cov_all_sqrtinv.dot(U[:, :d]) # Get Projection from vecs (don't need to be norm 1 anymore) proj_vecs, dummy = np.linalg.qr(vecs) if return_mat: return proj_vecs, F else: return proj_vecs
def iht(X, Y, **kwargs): """ Parameters ---------- X : array-like, shape = [N, D] Training data, where N is the number of samples and D is the number of features. Y : array-like, shape = [N] Response variable, where n_samples is the number of samples Argument dictionary should contain: kwargs = { 'd' : intrinsic dimension (int) 'use_residuals' : If True, creates PHDs from the use_residuals of linear regression (defaults to False) 'return_mat' : Boolean whether key PHD matrix should be returned (defaults to False). } Returns ----------- proj_vecs : array-like, shape = [n_features, d] Orthonormal system spanning the sufficient dimension subspace, where d refers to the intrinsic dimension. } """ # Extract arguments from dictionary d = kwargs['d'] n_iter = kwargs.get('n_iter', 20) use_residuals = kwargs.get('use_residuals', False) return_mat = kwargs.get('return_mat', False) N, D = X.shape # Standardize X Z, cov_all_sqrtinv = whiten_data(X) # Compute OLS vector ols_vector = np.mean((Z.T * (Y - np.mean(Y))).T, axis=0) # Compute Hessian matrix mean_all = np.mean(X, axis=0) cov_all = empirical_covariance(X) weighted_cov = np.zeros(cov_all.shape) if use_residuals: linreg = LinearRegression() linreg = linreg.fit(X, Y) res = Y - linreg.predict(X) Y = res else: Ymean = np.mean(Y) Y = Y - Ymean for i in range(N): weighted_cov += Y[i] * np.outer(X[i, :] - mean_all, X[i, :] - mean_all) weighted_cov = weighted_cov / float(N) # Apply iterative transformations M = np.zeros((D, D)) # critical mat for IHT iterative_matrix = np.eye(D) for i in range(d): M += np.outer(iterative_matrix.dot(ols_vector), iterative_matrix.dot(ols_vector)) iterative_matrix = iterative_matrix.dot(iterative_matrix) # Compute eigendecomposition U, S, V = np.linalg.svd(M) # Apply inverse transformation vecs = cov_all_sqrtinv.dot(U[:, :d]) proj_vecs, dummy = np.linalg.qr(vecs) if return_mat: return proj_vecs, M else: return proj_vecs
def rclr(X, Y, **kwargs): """ Parameters ---------- X : array-like, shape = [N, D] Training data, where N is the number of samples and D is the number of features. Y : array-like, shape = [N] Response variable, where N is the number of samples Argument dictionary should contain: kwargs = { 'd' : intrinsic dimension (int) 'n_levelsets' : number of slices to use (int) 'split_by' : 'dyadic' (dyadic decomposition) or 'stateq' (statistically equivalent blocks) (default: 'dyadic') 'return_mat' : Boolean whether SIR matrix should be returned (default: False) 'whiten' : If true, the data is whitened before applying the method (default: False) 'return_proxy' : If true, a data-driven guess for the projection error is returned. Returns ----------- proj_vecs : array-like, shape = [n_features, d] Orthonormal system spanning the sufficient dimension subspace, where d refers to the intrinsic dimension. M : SIR matrix, only if return_mat option is True } """ # Extract arguments from dictionary d = kwargs['d'] n_levelsets = kwargs['n_levelsets'] split_by = kwargs.get('split_by', 'dyadic') return_proxy = kwargs.get('return_proxy', False) whiten = kwargs.get('whiten', False) N, D = X.shape data_driven_proxy = 0 if whiten: # Standardize X Z, cov_all_sqrtinv = whiten_data(X) copy_kwargs = copy.deepcopy(kwargs) copy_kwargs['whiten'] = False if return_proxy: transformed_vecs, data_driven_proxy = rclr(Z, Y, **copy_kwargs) else: transformed_vecs = rclr(Z, Y, **copy_kwargs) # Apply inverse transformation vecs = cov_all_sqrtinv.dot(transformed_vecs) proj_vecs, dummy = np.linalg.qr(vecs) else: # Create partition labels = split(Y, n_levelsets, split_by) M = np.zeros((D, D)) # Container for key matrix in SIR # Compute SIR matrix empirical_probabilities = np.zeros(n_levelsets) for i in range(n_levelsets): empirical_probabilities[i] = float(len( np.where(labels == i)[0])) / float(N) if empirical_probabilities[i] == 0: continue sigma_j = empirical_covariance(X[labels == i, :]) sigma_j_inv = np.linalg.pinv(sigma_j) rhs = np.mean((X[labels == i, :] - np.mean(X[labels == i, :])).T * (Y[labels == i] - np.mean(Y[labels == i])), axis=1) local_ols = sigma_j_inv.dot(rhs) M += empirical_probabilities[i] * np.outer(local_ols, local_ols) # Compute proxy error if desired if return_proxy: if empirical_probabilities[i] * N > 5.0 * D and np.linalg.norm( local_ols) > 0.0: # Projections normalized_ols = local_ols / np.linalg.norm(local_ols) P = np.outer(normalized_ols, normalized_ols) Q = np.eye(D) - P # Compute spectral norms PSP = P.dot(sigma_j).dot(P) PSDP = P.dot(sigma_j_inv).dot(P) QSQ = Q.dot(sigma_j).dot(Q) QSDQ = Q.dot(sigma_j_inv).dot(Q) # Compute variance local_var_y = np.var(Y[labels == i]) n_PSP = np.linalg.norm(PSP, 2) n_QSQ = np.linalg.norm(QSQ, 2) n_PSDP = np.linalg.norm(PSDP, 2) n_QSDQ = np.linalg.norm(QSDQ, 2) local_kappa = np.maximum(n_PSP * n_PSDP, n_QSQ * n_QSDQ) local_eta_q = np.sqrt(local_var_y * n_QSDQ) # Removing some uncertainty by using only sufficiently populated level sets data_driven_proxy += np.sqrt( empirical_probabilities[i] * local_kappa) * np.linalg.norm(local_ols) * local_eta_q U, S, V = np.linalg.svd(M) if return_proxy: if data_driven_proxy == 0: # This just means that on no level set there we enough samples to compute the proxy quantity, thus # we effectively do not have a proxy data_driven_proxy = 1e16 data_driven_proxy = data_driven_proxy * np.sqrt( 1 + np.log(n_levelsets)) * 1.0 / S[d - 1] # Apply inverse transformation proj_vecs = U[:, :d] if return_proxy: return proj_vecs, data_driven_proxy else: return proj_vecs