Beispiel #1
0
     def extension(self,q1=None):      
         from itertools import  imap, chain
         from gmode_module import Invert, free, hyp_test
 
         if q1 == None:  q1  = self.q1
           
         cluster_members = self.cluster_members
         sample = self.elems[self.excluded]
          
         self.reclass = deque()
         for n, st in enumerate(self.cluster_stats):
             self.reclass.append(list())
             iS = Invert(st[2]) #, Invert(st[3])
             f = free(st[3])
             size = len(cluster_members[n])
             
             selected = filter(lambda x: x != None, \
                               imap(lambda ind, y: hyp_test(size,q1,f,ind,y,st[0],iS), self.excluded, sample))

 
             if len(selected) != 0: 
                self.reclass[n].extend(selected)
         
         N = set(chain.from_iterable(reclass))
 
         self.report.append("\n Reclassified Excluded Sample Size: "+str(len(N)))
         print("Reclassified : ",len(N))
         self.report.append("\n Totally Excluded: "+str(len(sample) - len(N)))
Beispiel #2
0
     def correspondence(self, templates, template_name, q1=None, artifact=None, var=None):
         ''' Fulchignoni et al. (2000) extension used to give a correspondence to clusters'''
       
         from itertools import  imap
         from gmode_module import Invert, free, hyp_test
         from file_module import pickle, writedict
         import cPickle as pkl

         if q1 == None:  q1  = self.q1

         cluster_members = self.cluster_members
         cluster_stats   = self.cluster_stats
         
         templ = pkl.load(open(templates,'rb')) 
         
         interpretation = dict()

         for n, stat in enumerate(cluster_stats):
             iS = Invert(stat[2][var, :][:, var])
             f = free(stat[3][var, :][:, var])
             size =  len(cluster_members[n])

             selected = filter(lambda x: x != None, \
                               imap(lambda key, y: hyp_test(size, q1, f, key, y[var], stat[0][var], iS), templ.keys(), templ.values()))
           
             interpretation[n+1] = selected

         writedict(interpretation,open(pathjoin("TESTS",self.label,'correspondence_q'+str(q1)+'_'+template_name+'.dat'),  'w'))
         pickle(interpretation, self.label, "correspondence_q"+str(q1)+'_'+template_name)
Beispiel #3
0
def clustering(q1, ulim, mlim, grid, design, data, report):
    ''' Iterative procedure of clustering'''

    N = data.shape[0]    # Sample Size
    M = data.shape[1]    # Variable size

#________________________________Barycenter______________________________________
    seed = barycenter_density(data, grid, amax(data, axis=0), amin(data, axis=0), sqrt(mlim) )#nmin=int(2*grid*30/M))
    #seed = barycenter_hist(grid, design, data)
    
    if len(seed) > 2:
       report.append("Barycenter: "+l_to_s(design[seed])) # map(lambda j: design[j], seed)))
       #print("Barycenter: "+l_to_s(map(lambda j: design[j], seed)))
       Na = len(seed)
       cluster = list(seed)
    else:      
       return [], seed, report, 0.0

# ______________________________CLASSIFICATION__________________________________

    i = 0
    f_rec = deque()
    cluster = seed
    index = range(N)

    while (i == 0 or cluster[:] != cluster_prior[:]) and i < 30 and Na > 2:

# *c --> cluster statistics

          ct, std, S, r2 = stats(data[cluster])
          
# Replace lower deviations than minimal limit:
          if i == 0 and any(S < mlim):
             S = mlim                    
             std = sqrt(diagonal(S))

          iS = Invert(S)
          f = free(r2)

          cluster_prior = cluster

          #f_rec.append(f)
          #f_ = min(f_rec)
          
# Once upper limit is reached the iteration is haulted.
          if ulim < 1e0 and any(std >= ulim):
             return cluster, seed, report, Na*f

          report.append("Run "+str(i)+" Size: "+str(Na)+" S.D.: "+l_to_s(arround(std, 3))+"\nf: "+str(f)+"\n")
          
# G hypothesis test:

          if i == 0 or Na*f >= 30:
             cluster = filter(lambda x: x != None, \
                              imap(lambda ind, y: hyp_test(Na,q1,f,ind,y,ct,iS), index, data))          
          else:
             return cluster, seed, report, Na*f

          Na = len(cluster)

# Discreetly increase std. until the seed start growing by itself.
          #if i == 0 and Na <= Na_prior:
          #   S = S + mlim
          #   std = sqrt(diagonal(S))
          #else:
          i += 1
             
    return cluster, seed, report, Na*f