Beispiel #1
0
def spectral_partition(dataset, filename):
    G = nx.read_gml(dataset)
    adj_matrix = nx.adjacency_matrix(G)
    distance_matrix = adj_matrix.todense()
    res = spectral.spectral_clustering(distance_matrix)
    # print res, len(res)
    draw_graph(G, res, filename)
Beispiel #2
0
    def compute_cluster_means(self, n_components, threshold):

        #get connection matrix from similarity
        connection_matrix = copy.deepcopy(self.similarity)
        connection_matrix[where(self.similarity < threshold)] = 0.0
        connection_matrix[where(self.similarity >= threshold)] = 1.0

        labels = spectral.spectral_clustering(connection_matrix,
                                              n_components=n_components,
                                              eigen_solver='arpack')

        #organize data by labels
        g = []
        for ilabel in range(n_components):
            g.append(self.x[where(labels == ilabel)[0]])

        #compute mean of data in each cluster
        v = []
        for mat in g:
            v.append(mean(mat, axis=0))

        self.vectors = matrix(v)
Beispiel #3
0
std=np.std(upper_triangle)
W = mathEx.gaussian(distances,std)
#W = np.zeros((16, 16))



#for i in range(0,16):
#    for j in range(i,16):
#        g = mathEx.gaussian(distances[i,j], std)
#        W[i,j]=g
#        W[j,i]=g


#-------------------------------------------
from sklearn.cluster import spectral
spectral.spectral_clustering(W, n_clusters = 4)

from sklearn.cluster import affinity_propagation_
affinity_propagation_.affinity_propagation(W)

from sklearn.cluster import hierarchical
al = hierarchical._average_linkage(W)
Z = al[0]
hierarchical._complete_linkage(W)

import scipy.cluster.hierarchy as h

# calculate full dendrogram
plt.figure(figsize=(25, 10))
plt.title('Hierarchical Clustering Dendrogram')
plt.xlabel('sample index')
Beispiel #4
0
    def run_spectral_clustering(self):
        print 'Reading FFT data from pickle...'
        input_pkl = open('/tmp/robot_sounds/fft/all_ffts.pkl', 'rb')
        action_labels, object_labels, processed_ffts = pickle.load(input_pkl)
        input_pkl.close()
        print 'done\n'

        action_names = self.action_names
        object_names = self.object_names
        labels = np.asarray(object_labels)
        act_labels = np.asarray(action_labels)

        self.generate_cost_matrix()

        ########### TRAIN SOM/KNN MODELS #################
        print 'Training SOM and kNN models...'
        inds = range(len(processed_ffts))
        np.random.shuffle(inds)

        num_tot = len(processed_ffts)
        num_train = int(0.8 * num_tot)
        num_test = num_tot - num_train
        print '\tNumber of training instances', num_train
        print '\tNumber of testing instances', num_test

        train_set = [processed_ffts[idx] for idx in inds[:num_train]]
        test_set = [processed_ffts[idx] for idx in inds[num_train:]]

        del processed_ffts

        train_labels = labels[inds][:num_train]
        test_labels = labels[inds][num_train:]

        act_train_labels = act_labels[inds][:num_train]
        act_test_labels = act_labels[inds][num_train:]

        l = {}
        for act in action_names:
            l[act] = []

        for idx, act in enumerate(act_train_labels):
            l[act].append(idx)

        soms = {}
        knns = {}
        for act in action_names:
            ts = [train_set[i] for i in l[act]]
            tl = [train_labels[i] for i in l[act]]

            if ts and tl:
                som, knn_model = self.train_model(ts, tl)
                soms[act] = som
                knns[act] = knn_model

        print 'done\n'

        by_action = {}
        for idx, action in enumerate(act_train_labels):
            if action not in by_action:
                by_action[action] = []
            by_action[action].append((train_labels[idx], train_set[idx]))

        ########### COMPUTE AFFINITY MATRICES ##################
        print 'Computing affinity matrices keyed on action...'
        affinity_by_action = {}

        for act in by_action:
            print 'Processing %s action' % act
            affinity_by_action[act] = []
            objs_ffts = by_action[act]
            num_objs = len(objs_ffts)

            for idx_obj1 in range(num_objs):
                print '\t[%d/%d]' % (idx_obj1 + 1, num_objs)

                som = soms[act]

                str1 = self.sound_fft_to_string(objs_ffts[idx_obj1][1], som)
                row = []

                for idx_obj2 in range(num_objs):
                    #print '\t\tComputing distance between %s and %s objects' % (objs_ffts[idx_obj1][0], objs_ffts[idx_obj2][0])
                    str2 = self.sound_fft_to_string(objs_ffts[idx_obj2][1],
                                                    som)
                    dist = self.sound_seq_distance_str(str1, str2)
                    #dist = math.exp(-(dist-1)/0.5) / math.exp(0)
                    #delta = 0.1
                    #row.append(np.exp(-dist ** 2 / (2. * delta ** 2)))
                    #row.append(1.0/dist if dist > 0.0 else 1)
                    row.append(dist)
                    #print '\t\t\tdistance = %f' % row[-1]

                affinity_by_action[act].append(row)

            var = np.asarray(affinity_by_action[act]).var()
            affinity_by_action[act] = np.exp(
                -np.asarray(affinity_by_action[act])**2 / (2. * var**2)) * 10
            #print '\t%f' % var
            #print affinity_by_action[act]

        output = open('/tmp/affinity_by_action.pkl', 'wb')
        pickle.dump(affinity_by_action, output)
        output.close()

        print 'done\n'
        #print affinity_by_action

        ############ CLUSTER INSTANCES ######################
        print 'Clustering instances...'

        for action, affinity_matrix in affinity_by_action.items():
            #print action
            #print affinity_matrix
            print 'Processing %s action' % action
            am = np.asarray(affinity_matrix)
            #print 'shape = %s' % str(am.shape)

            c_labels = spectral_clustering(am, len(object_names))
            print 'labels: %s' % str(c_labels)

            objs_ffts = by_action[action]
            obj_ids = [of[0] for of in objs_ffts]
            print 'objs: %s' % str(obj_ids)

            l_to_o = {}
            for idx, l in enumerate(c_labels):
                if l not in l_to_o: l_to_o[l] = []
                l_to_o[l].append(obj_ids[idx])

            print action, l_to_o

        print 'done\n'
Beispiel #5
0
pkl_file = open("data_gold.pkl", "rb")
data_gold = pickle.load(pkl_file)
data = data_gold[0]
gold = data_gold[1]
for i in range(len(gold)):
    gold[i] = gold[i] - 1
pkl_file.close()
n_samples, n_features = data.shape
print[n_samples, n_features]

pkl_file = open("matrix_hsic.pkl", "rb")
matrix_hsic = pickle.load(pkl_file)
pkl_file.close()

labels_predict = spectral_clustering(matrix_hsic, n_clusters=M)
print labels_predict

plt.figure(0)
draw_similarity_matrix(matrix_hsic, labels_predict, M)

# id_airway is the cluster id of 'WallAreaPct_seg'
# id_emphysema is the cluster id of 'pctEmph'
id_airway = labels_predict[13]
id_emphysema = labels_predict[10]

# id_score is the cluster id of Feature Set 2
# id_fev1 is the cluster id of Feature Set 3
id_score = labels_predict[6]
id_fev1 = labels_predict[21]
Beispiel #6
0
file_data_train.close()

# Normalization of dataset
data = scale(data_con_use)

# Obtain Normalized HSIC matrix from HISC matrix
mtr_nhsic = np.zeros(mtr_hsic.shape)
for i in range(mtr_nhsic.shape[0]):
    for j in range(mtr_nhsic.shape[1]):
        mtr_nhsic[i, j] = mtr_hsic[i, j] / np.sqrt(
            (mtr_hsic[i, i] * mtr_hsic[j, j]))

# Apply spectral clustering on the Normalized HSIC matrix
# Set the number of clusters
n_clusters_f = 5
labels_f = spectral_clustering(mtr_hsic, n_clusters=n_clusters_f, n_init=10)
cnt = [0] * n_clusters_f

tp = [[], [], [], [], []]
tp_id = [[], [], [], [], []]

for i in range(len(labels_f)):
    cnt[labels_f[i]] += 1
    tp[labels_f[i]].append(features_name_use[i])
    tp_id[labels_f[i]].append(i)
#print cnt
#print tp_id
#ax,pos_old = draw_similarity_matrix(mtr_nhsic,labels_f,n_clusters_f)
#plt.show()

flag_id = 0
Beispiel #7
0
data_con_use,features_name_use = pickle.load(file_data_train)
file_data_train.close()

# Normalization of dataset
data = scale(data_con_use)

# Obtain Normalized HSIC matrix from HISC matrix
mtr_nhsic = np.zeros(mtr_hsic.shape)
for i in range(mtr_nhsic.shape[0]):
    for j in range(mtr_nhsic.shape[1]):
        mtr_nhsic[i,j] = mtr_hsic[i,j]/np.sqrt((mtr_hsic[i,i]*mtr_hsic[j,j]))

# Apply spectral clustering on the Normalized HSIC matrix
# Set the number of clusters
n_clusters_f = 5
labels_f = spectral_clustering(mtr_hsic,n_clusters=n_clusters_f,n_init=10)
cnt = [0]*n_clusters_f

tp = [[],[],[],[],[]]
tp_id = [[],[],[],[],[]]

for i in range(len(labels_f)):
    cnt[labels_f[i]] += 1
    tp[labels_f[i]].append(features_name_use[i])
    tp_id[labels_f[i]].append(i)
#print cnt
#print tp_id
#ax,pos_old = draw_similarity_matrix(mtr_nhsic,labels_f,n_clusters_f)
#plt.show()

flag_id = 0
Beispiel #8
0
 def run_spectral_clustering(self):
     print 'Reading FFT data from pickle...'
     input_pkl = open('/tmp/robot_sounds/fft/all_ffts.pkl', 'rb')
     action_labels, object_labels, processed_ffts = pickle.load(input_pkl)
     input_pkl.close()
     print 'done\n'
     
     action_names = self.action_names
     object_names = self.object_names
     labels = np.asarray(object_labels)
     act_labels = np.asarray(action_labels)
     
     self.generate_cost_matrix()
     
     ########### TRAIN SOM/KNN MODELS #################
     print 'Training SOM and kNN models...'
     inds = range(len(processed_ffts))
     np.random.shuffle(inds)
     
     num_tot = len(processed_ffts)
     num_train = int(0.8 * num_tot)
     num_test = num_tot - num_train
     print '\tNumber of training instances', num_train
     print '\tNumber of testing instances', num_test
     
     train_set = [processed_ffts[idx] for idx in inds[:num_train]]
     test_set = [processed_ffts[idx] for idx in inds[num_train:]]
     
     del processed_ffts
     
     train_labels = labels[inds][:num_train]
     test_labels = labels[inds][num_train:]
     
     act_train_labels = act_labels[inds][:num_train]
     act_test_labels = act_labels[inds][num_train:]
     
     l = {}
     for act in action_names:
         l[act] = []
         
     for idx,act in enumerate(act_train_labels):
         l[act].append(idx)
         
     soms = {}
     knns = {}
     for act in action_names:
         ts = [train_set[i] for i in l[act]]
         tl = [train_labels[i] for i in l[act]]
         
         if ts and tl:
             som, knn_model = self.train_model(ts, tl)
             soms[act] = som
             knns[act] = knn_model
             
     print 'done\n'
     
     by_action = {}
     for idx,action in enumerate(act_train_labels):
         if action not in by_action:
             by_action[action] = []
         by_action[action].append((train_labels[idx], train_set[idx]))
         
     ########### COMPUTE AFFINITY MATRICES ##################
     print 'Computing affinity matrices keyed on action...'
     affinity_by_action = {}
     
     for act in by_action:
         print 'Processing %s action' % act
         affinity_by_action[act] = []
         objs_ffts = by_action[act]
         num_objs = len(objs_ffts)
         
         for idx_obj1 in range(num_objs):
             print '\t[%d/%d]' % (idx_obj1+1, num_objs)
             
             som = soms[act]
             
             str1 = self.sound_fft_to_string(objs_ffts[idx_obj1][1], som)
             row = []
             
             for idx_obj2 in range(num_objs):
                 #print '\t\tComputing distance between %s and %s objects' % (objs_ffts[idx_obj1][0], objs_ffts[idx_obj2][0])
                 str2 = self.sound_fft_to_string(objs_ffts[idx_obj2][1], som)
                 dist = self.sound_seq_distance_str(str1, str2)
                 #dist = math.exp(-(dist-1)/0.5) / math.exp(0)
                 #delta = 0.1
                 #row.append(np.exp(-dist ** 2 / (2. * delta ** 2)))
                 #row.append(1.0/dist if dist > 0.0 else 1)
                 row.append(dist)
                 #print '\t\t\tdistance = %f' % row[-1]
                 
             affinity_by_action[act].append(row)
             
         var = np.asarray(affinity_by_action[act]).var()
         affinity_by_action[act] = np.exp(-np.asarray(affinity_by_action[act]) ** 2 / (2. * var ** 2)) * 10
         #print '\t%f' % var
         #print affinity_by_action[act]
         
     output = open('/tmp/affinity_by_action.pkl', 'wb')
     pickle.dump(affinity_by_action, output)
     output.close()
     
     print 'done\n'
     #print affinity_by_action
     
     ############ CLUSTER INSTANCES ######################
     print 'Clustering instances...'
     
     for action,affinity_matrix in affinity_by_action.items():
         #print action
         #print affinity_matrix
         print 'Processing %s action' % action
         am = np.asarray(affinity_matrix)
         #print 'shape = %s' % str(am.shape)
         
         c_labels = spectral_clustering(am, len(object_names))
         print 'labels: %s' % str(c_labels)
         
         objs_ffts = by_action[action]
         obj_ids = [of[0] for of in objs_ffts]
         print 'objs: %s' % str(obj_ids)
         
         l_to_o = {}
         for idx,l in enumerate(c_labels):
             if l not in l_to_o: l_to_o[l] = []
             l_to_o[l].append(obj_ids[idx])
             
         print action, l_to_o
         
     print 'done\n'
Beispiel #9
0
pkl_file = open("data_gold.pkl","rb")
data_gold = pickle.load(pkl_file)
data = data_gold[0]
gold = data_gold[1]
for i in range(len(gold)):
    gold[i] = gold[i]-1
pkl_file.close()
n_samples,n_features = data.shape
print [n_samples,n_features]

pkl_file = open("matrix_hsic.pkl","rb")
matrix_hsic = pickle.load(pkl_file)
pkl_file.close()

labels_predict = spectral_clustering(matrix_hsic,n_clusters=M)
print labels_predict

plt.figure(0)
draw_similarity_matrix(matrix_hsic,labels_predict,M)


# id_airway is the cluster id of 'WallAreaPct_seg'
# id_emphysema is the cluster id of 'pctEmph'
id_airway = labels_predict[13]
id_emphysema = labels_predict[10]

# id_score is the cluster id of Feature Set 2
# id_fev1 is the cluster id of Feature Set 3
id_score = labels_predict[6]
id_fev1 = labels_predict[21]