Beispiel #1
0
     '/home/aurora/workspace/PycharmProjects/data/hist_adjacent_matrix.npy')
 # hist_weights_constraint_33 = np.load('/home/aurora/hdd/workspace/PycharmProjects/data/sub_matrix_distance_2015_1211.npy')
 hist_weights_constraint_33 = np.load(
     '/home/aurora/hdd/workspace/PycharmProjects/data/aurora_img_matches_matrix_20151212.npy'
 )
 # show_weights_img(img_weights)
 # datas = [img_weights_3, img_weights_9, img_weights_13, img_weights_33]
 datas = [hist_weights_constraint_33]
 start = clock()
 category = [100, 200, 300, 400, 500]
 # category = [500, 600, 700, 800]
 # category = [4, 6, 8, 10]
 results = np.zeros((4, len(category), hist_weights_constraint_33.shape[0]))
 for idx, data in enumerate(datas):
     for k in category:
         eigval, eigvec = ncut.ncut(data, k)
         discret_eigvec = ncut.discretisation(eigvec)
         group_img = discret_eigvec[:, 0]
         for i in range(1, k):
             group_img += (i + 1) * discret_eigvec[:, i]
             # print results[category.index(k)].shape
         results[idx, category.index(k)] = group_img.todense().T
         # results[0, category.index(k)] = (results[0, category.index(k)]/k)*256
 print results.shape
 np.save(
     '/home/aurora/hdd/workspace/PycharmProjects/data/img_sub_ncuts_matrix_distance_2015_1211_m_400',
     results)
 # print np.unique(results[0][0])
 # print np.unique(results[0][1])
 # print np.unique(results[0][2])
 # print np.unique(results[0][3])
Beispiel #2
0
    img_weights_9 = np.load('/home/aurora/hdd/workspace/PycharmProjects/data/similary_gausses_9.npy')
    img_weights_13 = np.load('/home/aurora/hdd/workspace/PycharmProjects/data/similary_gausses_13.npy')
    img_weights_33 = np.load('/home/aurora/hdd/workspace/PycharmProjects/data/similary_gausses_33.npy')
    hist_weights_33 = np.load('/home/aurora/workspace/PycharmProjects/data/hist_adjacent_matrix.npy')
    hist_weights_constraint_33 = np.load('/home/aurora/workspace/PycharmProjects'
                                         '/data/hist_adjacent_matrix_constraint.npy')
    # show_weights_img(img_weights)
    # datas = [img_weights_3, img_weights_9, img_weights_13, img_weights_33]
    datas = [hist_weights_constraint_33]
    start = clock()
    category = [100, 200, 300, 400]
    # category = [4, 6, 8, 10]
    results = np.zeros((4, len(category), img_weights_3.shape[0]))
    for idx, data in enumerate(datas):
        for k in category:
            eigval, eigvec = ncut.ncut(data, k)
            discret_eigvec = ncut.discretisation(eigvec)
            group_img = discret_eigvec[:, 0]
            for i in range(1, k):
                group_img += (i+1)*discret_eigvec[:, i]
                # print results[category.index(k)].shape
            results[idx, category.index(k)] = group_img.todense().T
            # results[0, category.index(k)] = (results[0, category.index(k)]/k)*256
    print results.shape
    np.save('/home/aurora/hdd/workspace/PycharmProjects/data/hist_ncuts_constraint_sigma_33', results)
    print np.unique(results[0])
    print np.unique(results[1])
    print np.unique(results[2])
    print np.unique(results[3])

    end = clock()
Beispiel #3
0
def cluster_timeseries(X, n_clusters, similarity_metric = 'k_neighbors', affinity_threshold = 0.0, neighbors = 10):
    """
    Cluster a given timeseries
        
    Parameters
    ----------
    X : array_like
        A matrix of shape (`N`, `M`) with `N` samples and `M` dimensions
    n_clusters : integer
        Number of clusters
    similarity_metric : {'k_neighbors', 'correlation', 'data'}
        Type of similarity measure for spectral clustering.  The pairwise similarity measure
        specifies the edges of the similarity graph. 'data' option assumes X as the similarity
        matrix and hence must be symmetric.  Default is kneighbors_graph [1]_ (forced to be 
        symmetric)
    affinity_threshold : float
        Threshold of similarity metric when 'correlation' similarity metric is used.
        
    Returns
    -------
    y_pred : array_like
        Predicted cluster labels

    Examples
    --------
    
    
    References
    ----------
    .. [1] http://scikit-learn.org/dev/modules/generated/sklearn.neighbors.kneighbors_graph.html
    
    """

    if similarity_metric == 'correlation':
        # Calculate empirical correlation matrix between samples
        Xn = X - X.mean(1)[:,np.newaxis]
        Xn = Xn/np.sqrt( (Xn**2.).sum(1)[:,np.newaxis] )
        C_X = np.dot(Xn, Xn.T)
        C_X[C_X < affinity_threshold] = 0
        from scipy.sparse import lil_matrix
        C_X = lil_matrix(C_X)
    elif similarity_metric == 'data':
        C_X = X
    elif similarity_metric == 'k_neighbors':
        from sklearn.neighbors import kneighbors_graph
        C_X = kneighbors_graph(X, n_neighbors=neighbors)
        C_X = 0.5 * (C_X + C_X.T)
    else:
        raise ValueError("Unknown value for similarity_metric: '%s'." % similarity_metric)
    
    #sklearn code is not stable for bad clusters which using correlation as a stability metric
    #tends to give for more info see:
    #http://scikit-learn.org/dev/modules/clustering.html#spectral-clustering warning
#    from sklearn import cluster
#    algorithm = cluster.SpectralClustering(k=n_clusters, mode='arpack')
#    algorithm.fit(C_X)
#    y_pred = algorithm.labels_.astype(np.int)

    from python_ncut_lib import ncut, discretisation
    eigen_val, eigen_vec = ncut(C_X, n_clusters)
    eigen_discrete = discretisation(eigen_vec)

    #np.arange(n_clusters)+1 isn't really necessary since the first cluster can be determined
    #by the fact that the each cluster is a disjoint set
    y_pred = np.dot(eigen_discrete.toarray(), np.diag(np.arange(n_clusters))).sum(1)
    
    return y_pred
    av_dist = dist / num
    return av_dist

av_mat_class_distance = cal_av_mat_class_distance(people) 
print av_mat_class_distance


dist1,vec_class_distance_people1 = cal_mat_class_distance(mat_people1) 
std1 = np.std(vec_class_distance_people1)# std of the people1
dist2,vec_class_distance_people2 = cal_mat_class_distance(mat_people2) 
std2 = np.std(vec_class_distance_people2) # std of the people2
std12 = (std1+std2)/2 # average std between people1 and people2
print std12


mat_W = np.exp(-mat_dist/std12) #mat_W is the similarity matrix , the kernal function is  exp(-d/std)
print mat_W

#only for test
print mat_W.shape


import python_ncut_lib as nc # import the normalized cut 
#unlimited display
#np.set_printoptions(threshehold = np.nan)
nbEigen = 3
eigen_value,vector=nc.ncut(mat_W,nbEigen)
vec_dis = nc.discretisation(vector)
print eigen_value
print vec_dis