def _bootstrap_pool(X, Y, X_saliences, Y_saliences, n_components,procrustes, algorithm, boot_i): 
    """ basic version for parallel implementation of bootstrapping using pool
    """
    #call random seed so not the same random number is used in each process
    np.random.seed( int( time() ) + boot_i)
    #choose indices to resample randomly with replacement for a sample of same size
    sample_indices = np.random.choice(range(X.shape[0]), size=X.shape[0], replace=True)
    X_boot = X[sample_indices,:]
    Y_boot = Y[sample_indices,:]
    X_boot_scaled = scale(X_boot)
    Y_boot_scaled = scale(Y_boot)

    covariance_boot = np.dot(Y_boot_scaled.T, X_boot_scaled)
    svd = TruncatedSVD(n_components, algorithm=algorithm)
    Y_saliences_boot, _, X_saliences_boot = svd._fit(covariance_boot)
    X_saliences_boot = X_saliences_boot.T
    
    #It does not matter which side we use to calculate the rotated singular values
    #let's pick the smaller one for optimization
    if len(X_saliences_boot) > len(Y_saliences_boot):
        #use procrustes_rotation on smaller dataset
        Y_bootstraps, rotation_matrix = _procrustes_rotation(Y_saliences, Y_saliences_boot)
        X_bootstraps = np.dot(X_saliences_boot, rotation_matrix)
    else:
        X_bootstraps, rotation_matrix = _procrustes_rotation(X_saliences, X_saliences_boot)
        Y_bootstraps = np.dot(Y_saliences_boot, rotation_matrix)  
         
    
    #print np.shape(X_bootstraps)
    #print np.shape(Y_bootstraps)
   
    return X_bootstraps, Y_bootstraps
Beispiel #2
0
def _permute_and_calc_singular_values_process(X, Y, a, b, n_components,
                                              algorithm, output, x):  #perm_i
    """ basic version for parallel implementation using processes and output queue
    """

    #call random seed so not the same random number is used each time
    #pid = current_process()._identity[0]
    #randst = np.random.mtrand.RandomState(pid)
    np.random.seed(int(time()) + x + 50)

    #test how permutation works
    c = np.random.permutation(a)
    print a
    print c

    if len(X) < len(Y):
        #apply permutation to shorter list
        #print "randomization X<Y"
        X_perm = np.random.permutation(X)
        covariance_perm = np.dot(Y.T, X_perm)
    else:
        #print "other permutation"
        Y_perm = np.random.permutation(Y)
        covariance_perm = np.dot(Y_perm.T, X)

    svd = TruncatedSVD(n_components, algorithm=algorithm)

    #print covariance_perm
    Y_saliences_perm, singular_values_perm, X_saliences_perm = svd._fit(
        covariance_perm)

    output.put(singular_values_perm)
Beispiel #3
0
def _permute_and_calc_singular_values_pool(X, Y, X_saliences, Y_saliences,
                                           n_components, procrustes, algorithm,
                                           perm_i):
    """ basic version for parallel implementation using pool
    """
    #call random seed so not the same random number is used in each process
    np.random.seed(int(time()) + perm_i)

    if len(X) < len(Y):
        #apply permutation to shorter list
        #print "randomization X<Y"
        X_perm = np.random.permutation(X)
        covariance_perm = np.dot(Y.T, X_perm)
    else:
        #print "other permutation"
        Y_perm = np.random.permutation(Y)
        covariance_perm = np.dot(Y_perm.T, X)

    svd = TruncatedSVD(n_components, algorithm=algorithm)

    Y_saliences_perm, singular_values_perm, X_saliences_perm = svd._fit(
        covariance_perm)

    if procrustes:
        #It does not matter which side we use to calculate the rotated singular values
        #let's pick the smaller one for optimization
        if len(X_saliences_perm) > len(Y_saliences_perm):
            _, _, singular_values_perm = _procrustes_rotation(
                Y_saliences, Y_saliences_perm, singular_values_perm)
        else:
            X_saliences_perm = X_saliences_perm.T
            _, _, singular_values_perm = _procrustes_rotation(
                X_saliences, X_saliences_perm, singular_values_perm)

    return singular_values_perm
Beispiel #4
0
def _permute_and_calc_singular_values(X,
                                      Y,
                                      X_saliences,
                                      Y_saliences,
                                      singular_values_samples,
                                      perm_i,
                                      n_components,
                                      procrustes=False,
                                      algorithm="randomized"):
    if len(X) < len(Y):
        X_perm = np.random.permutation(X)
        covariance_perm = np.dot(Y.T, X_perm)
    else:
        Y_perm = np.random.permutation(Y)
        covariance_perm = np.dot(Y_perm.T, X)
    svd = TruncatedSVD(n_components, algorithm=algorithm)
    Y_saliences_perm, singular_values_perm, X_saliences_perm = svd._fit(
        covariance_perm)

    if procrustes:
        #It does not matter which side we use to calculate the rotated singular values
        #let's pick the smaller one for optimization
        if len(X_saliences_perm) > len(Y_saliences_perm):
            _, _, singular_values_samples[:, perm_i] = _procrustes_rotation(
                Y_saliences, Y_saliences_perm, singular_values_perm)
        else:
            X_saliences_perm = X_saliences_perm.T
            _, _, singular_values_samples[:, perm_i] = _procrustes_rotation(
                X_saliences, X_saliences_perm, singular_values_perm)
    else:
        singular_values_samples[:, perm_i] = singular_values_perm
Beispiel #5
0
def fit_pls(X, Y, n_components, scale=True, algorithm="randomized"):
    #scaling
    if scale:
        X_scaled = zscore(X, axis=0, ddof=1)
        Y_scaled = zscore(Y, axis=0, ddof=1)
        covariance = np.dot(Y_scaled.T, X_scaled)
    else:
        covariance = np.dot(Y.T, X)

    svd = TruncatedSVD(n_components, algorithm)
    Y_saliences, singular_values, X_saliences = svd._fit(covariance)
    X_saliences = X_saliences.T
    inertia = singular_values.sum()

    if scale:
        return X_saliences, Y_saliences, singular_values, inertia, X_scaled, Y_scaled
    else:
        return X_saliences, Y_saliences, singular_values, inertia
Beispiel #6
0
def _boostrap(X,
              Y,
              X_saliences,
              Y_saliences,
              X_saliences_bootstraps,
              Y_saliences_bootstraps,
              bootstrap_i,
              n_components,
              algorithm="randomized"):
    sample_indices = np.random.choice(list(range(X.shape[0])),
                                      size=X.shape[0],
                                      replace=True)
    X_boot = X[sample_indices, :]
    Y_boot = Y[sample_indices, :]
    X_boot_scaled = scale(X_boot)
    Y_boot_scaled = scale(Y_boot)

    covariance_boot = np.dot(Y_boot_scaled.T, X_boot_scaled)
    svd = TruncatedSVD(n_components, algorithm=algorithm)
    Y_saliences_boot, _, X_saliences_boot = svd._fit(covariance_boot)
    X_saliences_boot = X_saliences_boot.T

    #It does not matter which side we use to calculate the rotated singular values
    #let's pick the smaller one for optimization
    if len(X_saliences_boot) > len(Y_saliences_boot):
        Y_saliences_bootstraps[:, :,
                               bootstrap_i], rotation_matrix = _procrustes_rotation(
                                   Y_saliences, Y_saliences_boot)
        X_saliences_bootstraps[:, :,
                               bootstrap_i] = np.dot(X_saliences_boot,
                                                     rotation_matrix)
    else:
        X_saliences_bootstraps[:, :,
                               bootstrap_i], rotation_matrix = _procrustes_rotation(
                                   X_saliences, X_saliences_boot)
        Y_saliences_bootstraps[:, :,
                               bootstrap_i] = np.dot(Y_saliences_boot,
                                                     rotation_matrix)
Beispiel #7
0
def fit_pls(X, Y, n_components, scale=True, algorithm="randomized"):
    #scaling

    print "calculating SVD"
    if scale:
        X_scaled = zscore(X, axis=0, ddof=1)
        Y_scaled = zscore(Y, axis=0, ddof=1)
        covariance = np.dot(Y_scaled.T, X_scaled)
    else:
        covariance = np.dot(Y.T, X)

    print np.shape(covariance)
    sum_var = covariance
    svd = TruncatedSVD(n_components, algorithm)
    #computes only the first n_components largest singular values
    #produces a low-rank approximation of covariance matrix
    Y_saliences, singular_values, X_saliences = svd._fit(covariance)
    X_saliences = X_saliences.T
    inertia = singular_values.sum()

    if scale:
        return X_saliences, Y_saliences, singular_values, inertia, X_scaled, Y_scaled, sum_var
    else:
        return X_saliences, Y_saliences, singular_values, inertia