def test_partial_fit(): # Test that fit is equivalent to calling partial_fit multiple times X, y = make_blobs(n_samples=100) brc = Birch(n_clusters=3) brc.fit(X) brc_partial = Birch(n_clusters=None) brc_partial.partial_fit(X[:50]) brc_partial.partial_fit(X[50:]) assert_array_almost_equal(brc_partial.subcluster_centers_, brc.subcluster_centers_) # Test that same global labels are obtained after calling partial_fit # with None brc_partial.set_params(n_clusters=3) brc_partial.partial_fit(None) assert_array_equal(brc_partial.subcluster_labels_, brc.subcluster_labels_)
def birchCluster(zD, maxd, out='dict', N=None, start=0, stop=None): #The radius of the subcluster obtained by merging a new sample and the closest subcluster should be lesser than the threshold. #Otherwise a new subcluster is started. Setting this value to be very low promotes splitting and vice-versa. data = zD.dictPos stop = len(zD.pList) if not stop else stop X = [[data['x'][i], data['y'][i], data['z'][i]] for i in range(start, stop)] brc = Birch(branching_factor=50, n_clusters=None, threshold=maxd, compute_labels=True) brc.fit(X) if N: brc.set_params(n_clusters=N) brc.partial_fit(np.matrix(X)) groups = brc.predict(X) if out == 'dict': return list2dict(zD, groups) elif out == 'list': return groups else: raise Exception("Out argument must have valus 'dict' or 'list'")
class Mini(): def __init__(self,minis,mini_names,mini_finds,sample_freq): self.mini_names =mini_names self.minis = minis self.sample_freq = sample_freq self.mini_finds=mini_finds self.offsets= self.fit_paras= self.event_sizes= self.amplitudes= self.fast_constants= self.slow_constants=self.a_constants=self.cur_labels = None self.dict=['mini_names','minis','offsets','fit_paras','event_sizes','amplitudes','fast_constants','slow_constants','a_constants','cur_labels','mini_finds'] self.delete_index = set() def _delete_mini(self,index): # truly delete for name in self.dict: if hasattr(self,name): llist=getattr(self,name) if isinstance(llist,list): llist.pop(index) #print(llist==getattr(self,name)) else: print(name) setattr(self,name,list(llist)) llist = getattr(self, name) llist.pop(index) def mark_delete_mini(self,indexs): # delete candidate # indexs is list or union or tuple self.delete_index=self.delete_index.union(indexs) def truly_delete_mini(self): print(self.delete_index) self.delete_index=list(self.delete_index) self.delete_index.sort(reverse=True) for number in self.delete_index: self._delete_mini(number) self.delete_index=set() # clear the delete flush def reindex_mini(self): self.mini_reindex={'label':{},'sweep':{}} #self.mini_reindex['label']=func_base.list_to_dict(self.cur_labels,self.minis) self.mini_reindex['label']=func_base.list_to_dict(self.cur_labels,range(len(self.cur_labels))) #self.mini_reindex['sweep']=func_base.list_to_dict([x[0] for x in self.mini_finds],self.minis) self.mini_reindex['sweep']=func_base.list_to_dict([x[0] for x in self.mini_finds],range(len(self.mini_finds))) print(self.mini_reindex['label']) # self.minis_number,self.event_sizes,self.offsets,self.fast_constants,self.slow_constants,self.rise_10_90s,self.decay_90_50s=mini_base.statis(self.minis) def statis(self): if not self.minis: print('couldn\'t find any minis' ) return #print(self.minis) self.mini_number=len(self.minis) def templete_func(x,a0,a1,tau1,tau2,t0): try: return np.piecewise(x,[x>=t0,x<t0],[lambda x: a0+a1*(1-math.exp((x-t0)/tau1))*(math.exp((x-t0)/tau2)),a0]) except: print('xxx',x) self.fit_paras=[] self.event_sizes=[] self.amplitudes=[] self.offsets=[] self.fast_constants=[] self.slow_constants=[] self.a_constants=[] # fit use two expenent function param_bounds=([-np.inf,-np.inf,0,0,-np.inf],[np.inf,0,np.inf,np.inf,np.inf]) #nn=0 for mini in self.minis: self.amplitudes.append(max(mini)-min(mini)) minilen= len(mini) # if too large fitcurve cannt work if minilen>10000: minilen=10000 mini=mini[:minilen] x_label=np.arange(0,minilen)/self.sample_freq #nn+=1 #print(len(x_label)) try: paraments,pcov = curve_fit(templete_func,x_label,mini,bounds=param_bounds) except: #print(nn) print("mini",mini,"label",x_label) plt.figure() plt.plot(x_label,mini) plt.show() raise self.fit_paras.append(paraments) self.offsets.append(paraments[4]) self.fast_constants.append(paraments[2]) self.slow_constants.append(paraments[3]) self.a_constants.append(paraments[1]) fit_mini=templete_func(x_label,*paraments) self.event_sizes.append(max(fit_mini)-min(fit_mini)) def mini_dim_reduce(self,dim=5): # PCA anylysis pca=PCA(n_components=dim) # Convert Python sequence to NumPy array, filling missing values minis=np.array(list(itertools.zip_longest(*self.minis, fillvalue=0))).T # transform return array like self.proced_minis=pca.fit_transform(minis) print('explained variance ratio (first two components): %s' %str(pca.explained_variance_ratio_)) def get_mini_info(self,index): #print(locals()) mini=self.minis[index] x_label=np.arange(len(mini))/self.sample_freq return self.mini_names[index],mini,self.cur_labels[index],x_label def classify(self,n_cluster=5): # Using BIRCH cluster self.birch = Birch(threshold=0.5,n_clusters=n_cluster) self.birch.fit(self.proced_minis) self.ori_labels = self.birch.labels_ self.ori_centroids = self.birch.subcluster_centers_ self.ori_n_clusters = np.unique(self.ori_labels) self.ori_n_cluster = np.unique(self.ori_labels).size self.cur_labels = self.ori_labels self.cur_centroids = self.ori_centroids self.cur_n_cluster = self.ori_n_cluster self.cur_n_clusters = self.ori_n_clusters def set_n_cluster(self,n_cluster): self.birch.set_params(n_clusters=n_cluster) self.cur_labels = self.ori_labels=self.birch.predict(self.proced_minis) self.cur_n_cluster = np.unique(self.cur_labels).size self.cur_n_clusters = np.unique(self.cur_labels) self.cur_centroids = self.birch.subcluster_centers_
affinity_propagation_test_performance_metrics_for_plotting[item + 1] = affinity_propagation_test_performance_metric_array[item] Figures.save_valid_test_performance_measures_vs_hyper_parameters_figure(affinity_propagation_parameter_search_space_for_plotting, affinity_propagation_valid_performance_metrics_for_plotting, affinity_propagation_test_performance_metrics_for_plotting, 'Adjusted Mutual Information Score', 'AffinityPropagation Clustering damping parameter', 'Affinity_Propagation_Performance', 0, 0.5, left_horizontal_limit=0.5) # Do BIRCH, optimizing number of calls to partial_fit over a validation set current_optimal_birch_number_of_calls = 1 initial_optimal_birch_clusterer = Birch() initial_optimal_birch_clusterer.partial_fit(train_data_set) initial_optimal_birch_clusterer.set_params(n_clusters=number_of_classes) initial_birch_valid_predictions = initial_optimal_birch_clusterer.predict(valid_data_set) initial_birch_test_predictions = initial_optimal_birch_clusterer.predict(test_data_set) # Add one to the predictions to make them match up with range of labels, then apply Hungarian Fix for element in range(number_of_valid_observations): initial_birch_valid_predictions[element] += 1 for element in range(number_of_test_observations): initial_birch_test_predictions[element] += 1 initial_birch_valid_predictions = Clustering.Hungarian_Fix(initial_birch_valid_predictions, valid_labels).astype('int') initial_birch_test_predictions = Clustering.Hungarian_Fix(initial_birch_test_predictions, test_labels).astype('int') # Set a starting point for optimality of the initial performance metric, to be possibly adjusted later birch_number_of_calls_integer_search_space_start = current_optimal_birch_number_of_calls + 1