def _bootstrap_pool(X, Y, X_saliences, Y_saliences, n_components,procrustes, algorithm, boot_i): """ basic version for parallel implementation of bootstrapping using pool """ #call random seed so not the same random number is used in each process np.random.seed( int( time() ) + boot_i) #choose indices to resample randomly with replacement for a sample of same size sample_indices = np.random.choice(range(X.shape[0]), size=X.shape[0], replace=True) X_boot = X[sample_indices,:] Y_boot = Y[sample_indices,:] X_boot_scaled = scale(X_boot) Y_boot_scaled = scale(Y_boot) covariance_boot = np.dot(Y_boot_scaled.T, X_boot_scaled) svd = TruncatedSVD(n_components, algorithm=algorithm) Y_saliences_boot, _, X_saliences_boot = svd._fit(covariance_boot) X_saliences_boot = X_saliences_boot.T #It does not matter which side we use to calculate the rotated singular values #let's pick the smaller one for optimization if len(X_saliences_boot) > len(Y_saliences_boot): #use procrustes_rotation on smaller dataset Y_bootstraps, rotation_matrix = _procrustes_rotation(Y_saliences, Y_saliences_boot) X_bootstraps = np.dot(X_saliences_boot, rotation_matrix) else: X_bootstraps, rotation_matrix = _procrustes_rotation(X_saliences, X_saliences_boot) Y_bootstraps = np.dot(Y_saliences_boot, rotation_matrix) #print np.shape(X_bootstraps) #print np.shape(Y_bootstraps) return X_bootstraps, Y_bootstraps
def _permute_and_calc_singular_values_process(X, Y, a, b, n_components, algorithm, output, x): #perm_i """ basic version for parallel implementation using processes and output queue """ #call random seed so not the same random number is used each time #pid = current_process()._identity[0] #randst = np.random.mtrand.RandomState(pid) np.random.seed(int(time()) + x + 50) #test how permutation works c = np.random.permutation(a) print a print c if len(X) < len(Y): #apply permutation to shorter list #print "randomization X<Y" X_perm = np.random.permutation(X) covariance_perm = np.dot(Y.T, X_perm) else: #print "other permutation" Y_perm = np.random.permutation(Y) covariance_perm = np.dot(Y_perm.T, X) svd = TruncatedSVD(n_components, algorithm=algorithm) #print covariance_perm Y_saliences_perm, singular_values_perm, X_saliences_perm = svd._fit( covariance_perm) output.put(singular_values_perm)
def _permute_and_calc_singular_values_pool(X, Y, X_saliences, Y_saliences, n_components, procrustes, algorithm, perm_i): """ basic version for parallel implementation using pool """ #call random seed so not the same random number is used in each process np.random.seed(int(time()) + perm_i) if len(X) < len(Y): #apply permutation to shorter list #print "randomization X<Y" X_perm = np.random.permutation(X) covariance_perm = np.dot(Y.T, X_perm) else: #print "other permutation" Y_perm = np.random.permutation(Y) covariance_perm = np.dot(Y_perm.T, X) svd = TruncatedSVD(n_components, algorithm=algorithm) Y_saliences_perm, singular_values_perm, X_saliences_perm = svd._fit( covariance_perm) if procrustes: #It does not matter which side we use to calculate the rotated singular values #let's pick the smaller one for optimization if len(X_saliences_perm) > len(Y_saliences_perm): _, _, singular_values_perm = _procrustes_rotation( Y_saliences, Y_saliences_perm, singular_values_perm) else: X_saliences_perm = X_saliences_perm.T _, _, singular_values_perm = _procrustes_rotation( X_saliences, X_saliences_perm, singular_values_perm) return singular_values_perm
def _permute_and_calc_singular_values(X, Y, X_saliences, Y_saliences, singular_values_samples, perm_i, n_components, procrustes=False, algorithm="randomized"): if len(X) < len(Y): X_perm = np.random.permutation(X) covariance_perm = np.dot(Y.T, X_perm) else: Y_perm = np.random.permutation(Y) covariance_perm = np.dot(Y_perm.T, X) svd = TruncatedSVD(n_components, algorithm=algorithm) Y_saliences_perm, singular_values_perm, X_saliences_perm = svd._fit( covariance_perm) if procrustes: #It does not matter which side we use to calculate the rotated singular values #let's pick the smaller one for optimization if len(X_saliences_perm) > len(Y_saliences_perm): _, _, singular_values_samples[:, perm_i] = _procrustes_rotation( Y_saliences, Y_saliences_perm, singular_values_perm) else: X_saliences_perm = X_saliences_perm.T _, _, singular_values_samples[:, perm_i] = _procrustes_rotation( X_saliences, X_saliences_perm, singular_values_perm) else: singular_values_samples[:, perm_i] = singular_values_perm
def fit_pls(X, Y, n_components, scale=True, algorithm="randomized"): #scaling if scale: X_scaled = zscore(X, axis=0, ddof=1) Y_scaled = zscore(Y, axis=0, ddof=1) covariance = np.dot(Y_scaled.T, X_scaled) else: covariance = np.dot(Y.T, X) svd = TruncatedSVD(n_components, algorithm) Y_saliences, singular_values, X_saliences = svd._fit(covariance) X_saliences = X_saliences.T inertia = singular_values.sum() if scale: return X_saliences, Y_saliences, singular_values, inertia, X_scaled, Y_scaled else: return X_saliences, Y_saliences, singular_values, inertia
def _boostrap(X, Y, X_saliences, Y_saliences, X_saliences_bootstraps, Y_saliences_bootstraps, bootstrap_i, n_components, algorithm="randomized"): sample_indices = np.random.choice(list(range(X.shape[0])), size=X.shape[0], replace=True) X_boot = X[sample_indices, :] Y_boot = Y[sample_indices, :] X_boot_scaled = scale(X_boot) Y_boot_scaled = scale(Y_boot) covariance_boot = np.dot(Y_boot_scaled.T, X_boot_scaled) svd = TruncatedSVD(n_components, algorithm=algorithm) Y_saliences_boot, _, X_saliences_boot = svd._fit(covariance_boot) X_saliences_boot = X_saliences_boot.T #It does not matter which side we use to calculate the rotated singular values #let's pick the smaller one for optimization if len(X_saliences_boot) > len(Y_saliences_boot): Y_saliences_bootstraps[:, :, bootstrap_i], rotation_matrix = _procrustes_rotation( Y_saliences, Y_saliences_boot) X_saliences_bootstraps[:, :, bootstrap_i] = np.dot(X_saliences_boot, rotation_matrix) else: X_saliences_bootstraps[:, :, bootstrap_i], rotation_matrix = _procrustes_rotation( X_saliences, X_saliences_boot) Y_saliences_bootstraps[:, :, bootstrap_i] = np.dot(Y_saliences_boot, rotation_matrix)
def fit_pls(X, Y, n_components, scale=True, algorithm="randomized"): #scaling print "calculating SVD" if scale: X_scaled = zscore(X, axis=0, ddof=1) Y_scaled = zscore(Y, axis=0, ddof=1) covariance = np.dot(Y_scaled.T, X_scaled) else: covariance = np.dot(Y.T, X) print np.shape(covariance) sum_var = covariance svd = TruncatedSVD(n_components, algorithm) #computes only the first n_components largest singular values #produces a low-rank approximation of covariance matrix Y_saliences, singular_values, X_saliences = svd._fit(covariance) X_saliences = X_saliences.T inertia = singular_values.sum() if scale: return X_saliences, Y_saliences, singular_values, inertia, X_scaled, Y_scaled, sum_var else: return X_saliences, Y_saliences, singular_values, inertia