def extension(self,q1=None): from itertools import imap, chain from gmode_module import Invert, free, hyp_test if q1 == None: q1 = self.q1 cluster_members = self.cluster_members sample = self.elems[self.excluded] self.reclass = deque() for n, st in enumerate(self.cluster_stats): self.reclass.append(list()) iS = Invert(st[2]) #, Invert(st[3]) f = free(st[3]) size = len(cluster_members[n]) selected = filter(lambda x: x != None, \ imap(lambda ind, y: hyp_test(size,q1,f,ind,y,st[0],iS), self.excluded, sample)) if len(selected) != 0: self.reclass[n].extend(selected) N = set(chain.from_iterable(reclass)) self.report.append("\n Reclassified Excluded Sample Size: "+str(len(N))) print("Reclassified : ",len(N)) self.report.append("\n Totally Excluded: "+str(len(sample) - len(N)))
def correspondence(self, templates, template_name, q1=None, artifact=None, var=None): ''' Fulchignoni et al. (2000) extension used to give a correspondence to clusters''' from itertools import imap from gmode_module import Invert, free, hyp_test from file_module import pickle, writedict import cPickle as pkl if q1 == None: q1 = self.q1 cluster_members = self.cluster_members cluster_stats = self.cluster_stats templ = pkl.load(open(templates,'rb')) interpretation = dict() for n, stat in enumerate(cluster_stats): iS = Invert(stat[2][var, :][:, var]) f = free(stat[3][var, :][:, var]) size = len(cluster_members[n]) selected = filter(lambda x: x != None, \ imap(lambda key, y: hyp_test(size, q1, f, key, y[var], stat[0][var], iS), templ.keys(), templ.values())) interpretation[n+1] = selected writedict(interpretation,open(pathjoin("TESTS",self.label,'correspondence_q'+str(q1)+'_'+template_name+'.dat'), 'w')) pickle(interpretation, self.label, "correspondence_q"+str(q1)+'_'+template_name)
def clustering(q1, ulim, mlim, grid, design, data, report): ''' Iterative procedure of clustering''' N = data.shape[0] # Sample Size M = data.shape[1] # Variable size #________________________________Barycenter______________________________________ seed = barycenter_density(data, grid, amax(data, axis=0), amin(data, axis=0), sqrt(mlim) )#nmin=int(2*grid*30/M)) #seed = barycenter_hist(grid, design, data) if len(seed) > 2: report.append("Barycenter: "+l_to_s(design[seed])) # map(lambda j: design[j], seed))) #print("Barycenter: "+l_to_s(map(lambda j: design[j], seed))) Na = len(seed) cluster = list(seed) else: return [], seed, report, 0.0 # ______________________________CLASSIFICATION__________________________________ i = 0 f_rec = deque() cluster = seed index = range(N) while (i == 0 or cluster[:] != cluster_prior[:]) and i < 30 and Na > 2: # *c --> cluster statistics ct, std, S, r2 = stats(data[cluster]) # Replace lower deviations than minimal limit: if i == 0 and any(S < mlim): S = mlim std = sqrt(diagonal(S)) iS = Invert(S) f = free(r2) cluster_prior = cluster #f_rec.append(f) #f_ = min(f_rec) # Once upper limit is reached the iteration is haulted. if ulim < 1e0 and any(std >= ulim): return cluster, seed, report, Na*f report.append("Run "+str(i)+" Size: "+str(Na)+" S.D.: "+l_to_s(arround(std, 3))+"\nf: "+str(f)+"\n") # G hypothesis test: if i == 0 or Na*f >= 30: cluster = filter(lambda x: x != None, \ imap(lambda ind, y: hyp_test(Na,q1,f,ind,y,ct,iS), index, data)) else: return cluster, seed, report, Na*f Na = len(cluster) # Discreetly increase std. until the seed start growing by itself. #if i == 0 and Na <= Na_prior: # S = S + mlim # std = sqrt(diagonal(S)) #else: i += 1 return cluster, seed, report, Na*f