def spectral_partition(dataset, filename): G = nx.read_gml(dataset) adj_matrix = nx.adjacency_matrix(G) distance_matrix = adj_matrix.todense() res = spectral.spectral_clustering(distance_matrix) # print res, len(res) draw_graph(G, res, filename)
def compute_cluster_means(self, n_components, threshold): #get connection matrix from similarity connection_matrix = copy.deepcopy(self.similarity) connection_matrix[where(self.similarity < threshold)] = 0.0 connection_matrix[where(self.similarity >= threshold)] = 1.0 labels = spectral.spectral_clustering(connection_matrix, n_components=n_components, eigen_solver='arpack') #organize data by labels g = [] for ilabel in range(n_components): g.append(self.x[where(labels == ilabel)[0]]) #compute mean of data in each cluster v = [] for mat in g: v.append(mean(mat, axis=0)) self.vectors = matrix(v)
std=np.std(upper_triangle) W = mathEx.gaussian(distances,std) #W = np.zeros((16, 16)) #for i in range(0,16): # for j in range(i,16): # g = mathEx.gaussian(distances[i,j], std) # W[i,j]=g # W[j,i]=g #------------------------------------------- from sklearn.cluster import spectral spectral.spectral_clustering(W, n_clusters = 4) from sklearn.cluster import affinity_propagation_ affinity_propagation_.affinity_propagation(W) from sklearn.cluster import hierarchical al = hierarchical._average_linkage(W) Z = al[0] hierarchical._complete_linkage(W) import scipy.cluster.hierarchy as h # calculate full dendrogram plt.figure(figsize=(25, 10)) plt.title('Hierarchical Clustering Dendrogram') plt.xlabel('sample index')
def run_spectral_clustering(self): print 'Reading FFT data from pickle...' input_pkl = open('/tmp/robot_sounds/fft/all_ffts.pkl', 'rb') action_labels, object_labels, processed_ffts = pickle.load(input_pkl) input_pkl.close() print 'done\n' action_names = self.action_names object_names = self.object_names labels = np.asarray(object_labels) act_labels = np.asarray(action_labels) self.generate_cost_matrix() ########### TRAIN SOM/KNN MODELS ################# print 'Training SOM and kNN models...' inds = range(len(processed_ffts)) np.random.shuffle(inds) num_tot = len(processed_ffts) num_train = int(0.8 * num_tot) num_test = num_tot - num_train print '\tNumber of training instances', num_train print '\tNumber of testing instances', num_test train_set = [processed_ffts[idx] for idx in inds[:num_train]] test_set = [processed_ffts[idx] for idx in inds[num_train:]] del processed_ffts train_labels = labels[inds][:num_train] test_labels = labels[inds][num_train:] act_train_labels = act_labels[inds][:num_train] act_test_labels = act_labels[inds][num_train:] l = {} for act in action_names: l[act] = [] for idx, act in enumerate(act_train_labels): l[act].append(idx) soms = {} knns = {} for act in action_names: ts = [train_set[i] for i in l[act]] tl = [train_labels[i] for i in l[act]] if ts and tl: som, knn_model = self.train_model(ts, tl) soms[act] = som knns[act] = knn_model print 'done\n' by_action = {} for idx, action in enumerate(act_train_labels): if action not in by_action: by_action[action] = [] by_action[action].append((train_labels[idx], train_set[idx])) ########### COMPUTE AFFINITY MATRICES ################## print 'Computing affinity matrices keyed on action...' affinity_by_action = {} for act in by_action: print 'Processing %s action' % act affinity_by_action[act] = [] objs_ffts = by_action[act] num_objs = len(objs_ffts) for idx_obj1 in range(num_objs): print '\t[%d/%d]' % (idx_obj1 + 1, num_objs) som = soms[act] str1 = self.sound_fft_to_string(objs_ffts[idx_obj1][1], som) row = [] for idx_obj2 in range(num_objs): #print '\t\tComputing distance between %s and %s objects' % (objs_ffts[idx_obj1][0], objs_ffts[idx_obj2][0]) str2 = self.sound_fft_to_string(objs_ffts[idx_obj2][1], som) dist = self.sound_seq_distance_str(str1, str2) #dist = math.exp(-(dist-1)/0.5) / math.exp(0) #delta = 0.1 #row.append(np.exp(-dist ** 2 / (2. * delta ** 2))) #row.append(1.0/dist if dist > 0.0 else 1) row.append(dist) #print '\t\t\tdistance = %f' % row[-1] affinity_by_action[act].append(row) var = np.asarray(affinity_by_action[act]).var() affinity_by_action[act] = np.exp( -np.asarray(affinity_by_action[act])**2 / (2. * var**2)) * 10 #print '\t%f' % var #print affinity_by_action[act] output = open('/tmp/affinity_by_action.pkl', 'wb') pickle.dump(affinity_by_action, output) output.close() print 'done\n' #print affinity_by_action ############ CLUSTER INSTANCES ###################### print 'Clustering instances...' for action, affinity_matrix in affinity_by_action.items(): #print action #print affinity_matrix print 'Processing %s action' % action am = np.asarray(affinity_matrix) #print 'shape = %s' % str(am.shape) c_labels = spectral_clustering(am, len(object_names)) print 'labels: %s' % str(c_labels) objs_ffts = by_action[action] obj_ids = [of[0] for of in objs_ffts] print 'objs: %s' % str(obj_ids) l_to_o = {} for idx, l in enumerate(c_labels): if l not in l_to_o: l_to_o[l] = [] l_to_o[l].append(obj_ids[idx]) print action, l_to_o print 'done\n'
pkl_file = open("data_gold.pkl", "rb") data_gold = pickle.load(pkl_file) data = data_gold[0] gold = data_gold[1] for i in range(len(gold)): gold[i] = gold[i] - 1 pkl_file.close() n_samples, n_features = data.shape print[n_samples, n_features] pkl_file = open("matrix_hsic.pkl", "rb") matrix_hsic = pickle.load(pkl_file) pkl_file.close() labels_predict = spectral_clustering(matrix_hsic, n_clusters=M) print labels_predict plt.figure(0) draw_similarity_matrix(matrix_hsic, labels_predict, M) # id_airway is the cluster id of 'WallAreaPct_seg' # id_emphysema is the cluster id of 'pctEmph' id_airway = labels_predict[13] id_emphysema = labels_predict[10] # id_score is the cluster id of Feature Set 2 # id_fev1 is the cluster id of Feature Set 3 id_score = labels_predict[6] id_fev1 = labels_predict[21]
file_data_train.close() # Normalization of dataset data = scale(data_con_use) # Obtain Normalized HSIC matrix from HISC matrix mtr_nhsic = np.zeros(mtr_hsic.shape) for i in range(mtr_nhsic.shape[0]): for j in range(mtr_nhsic.shape[1]): mtr_nhsic[i, j] = mtr_hsic[i, j] / np.sqrt( (mtr_hsic[i, i] * mtr_hsic[j, j])) # Apply spectral clustering on the Normalized HSIC matrix # Set the number of clusters n_clusters_f = 5 labels_f = spectral_clustering(mtr_hsic, n_clusters=n_clusters_f, n_init=10) cnt = [0] * n_clusters_f tp = [[], [], [], [], []] tp_id = [[], [], [], [], []] for i in range(len(labels_f)): cnt[labels_f[i]] += 1 tp[labels_f[i]].append(features_name_use[i]) tp_id[labels_f[i]].append(i) #print cnt #print tp_id #ax,pos_old = draw_similarity_matrix(mtr_nhsic,labels_f,n_clusters_f) #plt.show() flag_id = 0
data_con_use,features_name_use = pickle.load(file_data_train) file_data_train.close() # Normalization of dataset data = scale(data_con_use) # Obtain Normalized HSIC matrix from HISC matrix mtr_nhsic = np.zeros(mtr_hsic.shape) for i in range(mtr_nhsic.shape[0]): for j in range(mtr_nhsic.shape[1]): mtr_nhsic[i,j] = mtr_hsic[i,j]/np.sqrt((mtr_hsic[i,i]*mtr_hsic[j,j])) # Apply spectral clustering on the Normalized HSIC matrix # Set the number of clusters n_clusters_f = 5 labels_f = spectral_clustering(mtr_hsic,n_clusters=n_clusters_f,n_init=10) cnt = [0]*n_clusters_f tp = [[],[],[],[],[]] tp_id = [[],[],[],[],[]] for i in range(len(labels_f)): cnt[labels_f[i]] += 1 tp[labels_f[i]].append(features_name_use[i]) tp_id[labels_f[i]].append(i) #print cnt #print tp_id #ax,pos_old = draw_similarity_matrix(mtr_nhsic,labels_f,n_clusters_f) #plt.show() flag_id = 0
def run_spectral_clustering(self): print 'Reading FFT data from pickle...' input_pkl = open('/tmp/robot_sounds/fft/all_ffts.pkl', 'rb') action_labels, object_labels, processed_ffts = pickle.load(input_pkl) input_pkl.close() print 'done\n' action_names = self.action_names object_names = self.object_names labels = np.asarray(object_labels) act_labels = np.asarray(action_labels) self.generate_cost_matrix() ########### TRAIN SOM/KNN MODELS ################# print 'Training SOM and kNN models...' inds = range(len(processed_ffts)) np.random.shuffle(inds) num_tot = len(processed_ffts) num_train = int(0.8 * num_tot) num_test = num_tot - num_train print '\tNumber of training instances', num_train print '\tNumber of testing instances', num_test train_set = [processed_ffts[idx] for idx in inds[:num_train]] test_set = [processed_ffts[idx] for idx in inds[num_train:]] del processed_ffts train_labels = labels[inds][:num_train] test_labels = labels[inds][num_train:] act_train_labels = act_labels[inds][:num_train] act_test_labels = act_labels[inds][num_train:] l = {} for act in action_names: l[act] = [] for idx,act in enumerate(act_train_labels): l[act].append(idx) soms = {} knns = {} for act in action_names: ts = [train_set[i] for i in l[act]] tl = [train_labels[i] for i in l[act]] if ts and tl: som, knn_model = self.train_model(ts, tl) soms[act] = som knns[act] = knn_model print 'done\n' by_action = {} for idx,action in enumerate(act_train_labels): if action not in by_action: by_action[action] = [] by_action[action].append((train_labels[idx], train_set[idx])) ########### COMPUTE AFFINITY MATRICES ################## print 'Computing affinity matrices keyed on action...' affinity_by_action = {} for act in by_action: print 'Processing %s action' % act affinity_by_action[act] = [] objs_ffts = by_action[act] num_objs = len(objs_ffts) for idx_obj1 in range(num_objs): print '\t[%d/%d]' % (idx_obj1+1, num_objs) som = soms[act] str1 = self.sound_fft_to_string(objs_ffts[idx_obj1][1], som) row = [] for idx_obj2 in range(num_objs): #print '\t\tComputing distance between %s and %s objects' % (objs_ffts[idx_obj1][0], objs_ffts[idx_obj2][0]) str2 = self.sound_fft_to_string(objs_ffts[idx_obj2][1], som) dist = self.sound_seq_distance_str(str1, str2) #dist = math.exp(-(dist-1)/0.5) / math.exp(0) #delta = 0.1 #row.append(np.exp(-dist ** 2 / (2. * delta ** 2))) #row.append(1.0/dist if dist > 0.0 else 1) row.append(dist) #print '\t\t\tdistance = %f' % row[-1] affinity_by_action[act].append(row) var = np.asarray(affinity_by_action[act]).var() affinity_by_action[act] = np.exp(-np.asarray(affinity_by_action[act]) ** 2 / (2. * var ** 2)) * 10 #print '\t%f' % var #print affinity_by_action[act] output = open('/tmp/affinity_by_action.pkl', 'wb') pickle.dump(affinity_by_action, output) output.close() print 'done\n' #print affinity_by_action ############ CLUSTER INSTANCES ###################### print 'Clustering instances...' for action,affinity_matrix in affinity_by_action.items(): #print action #print affinity_matrix print 'Processing %s action' % action am = np.asarray(affinity_matrix) #print 'shape = %s' % str(am.shape) c_labels = spectral_clustering(am, len(object_names)) print 'labels: %s' % str(c_labels) objs_ffts = by_action[action] obj_ids = [of[0] for of in objs_ffts] print 'objs: %s' % str(obj_ids) l_to_o = {} for idx,l in enumerate(c_labels): if l not in l_to_o: l_to_o[l] = [] l_to_o[l].append(obj_ids[idx]) print action, l_to_o print 'done\n'
pkl_file = open("data_gold.pkl","rb") data_gold = pickle.load(pkl_file) data = data_gold[0] gold = data_gold[1] for i in range(len(gold)): gold[i] = gold[i]-1 pkl_file.close() n_samples,n_features = data.shape print [n_samples,n_features] pkl_file = open("matrix_hsic.pkl","rb") matrix_hsic = pickle.load(pkl_file) pkl_file.close() labels_predict = spectral_clustering(matrix_hsic,n_clusters=M) print labels_predict plt.figure(0) draw_similarity_matrix(matrix_hsic,labels_predict,M) # id_airway is the cluster id of 'WallAreaPct_seg' # id_emphysema is the cluster id of 'pctEmph' id_airway = labels_predict[13] id_emphysema = labels_predict[10] # id_score is the cluster id of Feature Set 2 # id_fev1 is the cluster id of Feature Set 3 id_score = labels_predict[6] id_fev1 = labels_predict[21]