Esempio n. 1
0
def tryParameters((data_matrix, O, tol_perc, gamma)):
    logMsg("Trying tol=%f, gamma=%f" % (tol_perc, gamma))
    L,C,term,n_iter = opursuit(data_matrix, O, gamma, tol_perc=tol_perc, eps_ratio=10)
    c_vals = [(sum(square(C[:,i]))!=0)*1 for i in  xrange(C.shape[1])]
    c_perc = float(sum(c_vals)) / len(c_vals)
    centered_L = scale_and_center(L, scale=False)
    pcs, robust_lowdim_data = pca(centered_L, 100000)
    num_pca_dimensions = pcs.shape[1]
    
    print([tol_perc, gamma, c_perc, num_pca_dimensions, n_iter])
    return [tol_perc, gamma, c_perc, num_pca_dimensions, n_iter]
Esempio n. 2
0
def tryParameters((data_matrix, O, tol_perc, gamma)):
    logMsg("Trying tol=%f, gamma=%f" % (tol_perc, gamma))
    L, C, term, n_iter = opursuit(data_matrix,
                                  O,
                                  gamma,
                                  tol_perc=tol_perc,
                                  eps_ratio=10)
    c_vals = [(sum(square(C[:, i])) != 0) * 1 for i in xrange(C.shape[1])]
    c_perc = float(sum(c_vals)) / len(c_vals)
    centered_L = scale_and_center(L, scale=False)
    pcs, robust_lowdim_data = pca(centered_L, 100000)
    num_pca_dimensions = pcs.shape[1]

    print([tol_perc, gamma, c_perc, num_pca_dimensions, n_iter])
    return [tol_perc, gamma, c_perc, num_pca_dimensions, n_iter]
Esempio n. 3
0
def compare_eps(vectors):
    data_matrix = column_stack(vectors)
    O = (data_matrix!=0)*1 # Observation matrix - 1 where we have data, 0 where we do not
    
    
    gamma = .5
    tol_perc = .002
    tol_perc = .00000001
    for e in [2,5,10,20,50,100]:
        L,C,term,n_iter = opursuit(data_matrix, O, gamma, tol_perc=tol_perc, eps_ratio=e)
        obj = obj_func(L,C,gamma)
        c_vals = [(sum(square(C[:,i]))!=0)*1 for i in  xrange(C.shape[1])]
        c_perc = float(sum(c_vals)) / len(c_vals)
        centered_L = scale_and_center(L, scale=False)
        pcs, robust_lowdim_data = pca(centered_L, 100000)
        num_pca_dimensions = pcs.shape[1]
        print "eps=%d term=%f n_iter=%d obj=%f rank=%d c_perc=%f" % (e,term, n_iter, obj, num_pca_dimensions, c_perc)
Esempio n. 4
0
def compare_eps(vectors):
    data_matrix = column_stack(vectors)
    O = (data_matrix !=
         0) * 1  # Observation matrix - 1 where we have data, 0 where we do not

    gamma = .5
    tol_perc = .002
    tol_perc = .00000001
    for e in [2, 5, 10, 20, 50, 100]:
        L, C, term, n_iter = opursuit(data_matrix,
                                      O,
                                      gamma,
                                      tol_perc=tol_perc,
                                      eps_ratio=e)
        obj = obj_func(L, C, gamma)
        c_vals = [(sum(square(C[:, i])) != 0) * 1 for i in xrange(C.shape[1])]
        c_perc = float(sum(c_vals)) / len(c_vals)
        centered_L = scale_and_center(L, scale=False)
        pcs, robust_lowdim_data = pca(centered_L, 100000)
        num_pca_dimensions = pcs.shape[1]
        print "eps=%d term=%f n_iter=%d obj=%f rank=%d c_perc=%f" % (
            e, term, n_iter, obj, num_pca_dimensions, c_perc)
Esempio n. 5
0
    # vectors - a list of Numpy column vectors
    # robust - True if RPCA via OP is desired
    # k - Number of PCs to use in PCA
    # gamma - gamma parameter for RPCA
def computeMahalanobisDistances((key,vectors), robust=False, k=10, gamma=.5, tol_perc=1e-06):
    data_matrix = column_stack(vectors)
    if(robust):
        
        if(gamma=="tune"):
            gamma, tol_perc, num_guesses, hi_num_pcs, L, C = increasing_tolerance_search(vectors)
            (weekday, hour) = key
            logMsg("Successfully tuned %s @ %d  after %d guesses : gamma=%f, tol=%f"%(weekday, hour, num_guesses, gamma, tol_perc))
        else:
            O = (data_matrix!=0)*1 # Observation matrix - 1 where we have data, 0 where we do not
             # Use outlier pursuit to get robust low-rank approximation of data
            L,C,term,n_iter = opursuit(data_matrix, O, gamma, tol_perc=tol_perc)
        
        
        #logMsg("PCA")
        # Perform PCA on the low-rank approximation, and estimate the statistics
        centered_L = scale_and_center(L, scale=False)
        pcs, robust_lowdim_data = pca(centered_L, k)
        num_pca_dimensions = pcs.shape[1]
        logMsg("Num eigenvalues : %d" % num_pca_dimensions)
        
        
        centered_corrupt = scale_and_center(L+C, reference_matrix=L, scale=False)
        
        stdout.flush()
        mahals5 = lowdim_mahalanobis_distance(pcs, robust_lowdim_data, centered_corrupt, 5)
        mahals10 = lowdim_mahalanobis_distance(pcs, robust_lowdim_data, centered_corrupt,10)