def classification_per_id(self): from gmode_module import collapse_classification from file_module import writeit text = deque() catalogue = collapse_classification(self.cluster_members,self.design) form = "{0:>10} {1}".format [text.append(form(each,l_to_s(catalogue[each]))) for each in catalogue.keys()] writeit(text, open(pathjoin("TESTS",self.label,'gmode2_'+self.label+'.dat'), 'w'))
def run(self, realtime_map='n', save='y', **arg): from kernel import clustering from plot_module import plot_map from gmode_module import stats, cov, free if len(arg) == 0: q1 = self.q1 grid = self.grid ulim = self.ulim mlim = self.mlim else: q1 = arg['q1'] grid = arg['grid'] ulim = arg['ulim'] mlim = arg['mlim'] if save == 'y': self.load(**arg) print(q1, mlim, ulim) #30/free(Rt)) ################################################# design = copy(self.design) elems = copy(self.elems) #errs.extend(self.errs) indexs = copy(self.indexs) t0 = time() # Start counting procedure time N=len(elems) # Sample size M=len(elems[0]) # Variable size ################################################## ctt, devt, St, r2t = stats(elems) #Se = cov(self.errs/devt, zeros(M), 1e0) #mlim = (mlim**2) * Se mlim = matrix(diagflat(mlim*ones(ctt.size))) ################# START REPORT ################# #print('mlim: ',sqrt(diagonal(mlim))) #print('Se: ',sqrt(diagonal(Se))) clusters_report =["Clump N median st. dev."] report = deque([" Sample size: "+str(N)+" Variable size: "+str(M)]) report.append(" S.D.: "+str(devt)) report.append("Upper Limit: "+str(ulim)) report.append(" Minimum Deviation: "+str(diagonal(mlim))) report.append(" Confidence level q1: "+str(normal.cdf(q1) - normal.cdf(-q1))) report.append('grid: '+str(grid)+" --> "+str(grid**(M))) excluded = deque() failed_seed = deque() cluster_members = deque() cluster_stats = deque() n_failedseeds = 0 if realtime_map == 'y': plot_map(0, [], [], elems, q1, [], [], self.label) report.append('############################ Part I : Recognize Clusters and Classify ################################## \n ') ################### Cluster Recognition ################# Nc = 0 while Nc == 0 or N >= (M - 1): Nc+=1 report.append('#################################### Clump '+str(Nc)+' ######################################### \n ') cluster, seed, report, freedom = clustering(q1, ulim, mlim, grid, design, elems/devt, report) # whitten happenning here Na = len(cluster) if Na > 2 and freedom >= 30: #press = raw_input("press enter") # Save cluster member indexes cluster_members.append(indexs[cluster]) # save cluster statistics cluster_stats.append(stats(elems[cluster])) # Exclude group members from the sample: elems = delete(elems, cluster, 0) design = delete(design, cluster, 0) indexs = delete(indexs, cluster, 0) if realtime_map == 'y': print(Nc, "Seed size: ",len(seed),'Na= ',Na,' N= ',N,' f= ',freedom) try: plot_map(Nc, cluster, seed, elems, q1, cluster_stats[-1][0], cluster_stats[-1][2], self.label) except IndexError: pass # appending into logs report.append("\nC.T.: "+l_to_s(cluster_stats[-1][0])+"\nS.D.: "+l_to_s(cluster_stats[-1][1])+ \ "\nSize: "+str(Na)+" Left: "+str(N)+"\nCov. Matrix: \n"+str(cluster_stats[-1][2])+"\n") clusters_report.append(str(Nc)+3*" "+str(Na)+3*" "+l_to_s(cluster_stats[-1][0])+3*" "+l_to_s(cluster_stats[-1][1])) else: Nc-=1 # Exclude clump members from the sample: if len(seed) > 2 and Na > 2: # Has initial seed and members. report.append("Failed Clump: "+l_to_s(design[cluster])) #map(lambda i: design[i], cluster))) failed_seed.append(set(indexs[cluster])) #map(lambda i: indexs[i], cluster)) n_failedseeds = n_failedseeds + len(cluster) elems = delete(elems, cluster, 0) design = delete(design, cluster, 0) indexs = delete(indexs, cluster, 0) elif len(seed) > 2 and Na < 3: # Has initial seed but no members. report.append("Failed Seed: "+l_to_s(design[seed])) failed_seed.append(set(indexs[seed])) n_failedseeds = n_failedseeds + len(cluster) elems = delete(elems, seed, 0) design = delete(design, seed, 0) indexs = delete(indexs, seed, 0) elif len(seed) < 3: # It does not have initial seed. break N = len(indexs) excluded.extend(indexs) report.append("######################### Excluded ###############################") report.append("Excluded Sample Size: "+str(len(excluded))) report.append("Failed Seeds: "+str(n_failedseeds)) print("Number of Clusters: ", len(cluster_stats)) print("Excluded Sample Size: ",len(excluded)) print("Failed Seeds: ",len(failed_seed)) # Setting in self self.t0 = t0 # logs self.report = report self.clusters_report = clusters_report # python objects self.cluster_members = cluster_members self.cluster_stats = cluster_stats self.excluded = excluded self.failed_seed = failed_seed # Robustness self.robustness() print("Robustness: ", self.robust) report.append("Robustness: "+str(self.robust))
def clustering(q1, ulim, mlim, grid, design, data, report): ''' Iterative procedure of clustering''' N = data.shape[0] # Sample Size M = data.shape[1] # Variable size #________________________________Barycenter______________________________________ seed = barycenter_density(data, grid, amax(data, axis=0), amin(data, axis=0), sqrt(mlim) )#nmin=int(2*grid*30/M)) #seed = barycenter_hist(grid, design, data) if len(seed) > 2: report.append("Barycenter: "+l_to_s(design[seed])) # map(lambda j: design[j], seed))) #print("Barycenter: "+l_to_s(map(lambda j: design[j], seed))) Na = len(seed) cluster = list(seed) else: return [], seed, report, 0.0 # ______________________________CLASSIFICATION__________________________________ i = 0 f_rec = deque() cluster = seed index = range(N) while (i == 0 or cluster[:] != cluster_prior[:]) and i < 30 and Na > 2: # *c --> cluster statistics ct, std, S, r2 = stats(data[cluster]) # Replace lower deviations than minimal limit: if i == 0 and any(S < mlim): S = mlim std = sqrt(diagonal(S)) iS = Invert(S) f = free(r2) cluster_prior = cluster #f_rec.append(f) #f_ = min(f_rec) # Once upper limit is reached the iteration is haulted. if ulim < 1e0 and any(std >= ulim): return cluster, seed, report, Na*f report.append("Run "+str(i)+" Size: "+str(Na)+" S.D.: "+l_to_s(arround(std, 3))+"\nf: "+str(f)+"\n") # G hypothesis test: if i == 0 or Na*f >= 30: cluster = filter(lambda x: x != None, \ imap(lambda ind, y: hyp_test(Na,q1,f,ind,y,ct,iS), index, data)) else: return cluster, seed, report, Na*f Na = len(cluster) # Discreetly increase std. until the seed start growing by itself. #if i == 0 and Na <= Na_prior: # S = S + mlim # std = sqrt(diagonal(S)) #else: i += 1 return cluster, seed, report, Na*f