예제 #1
0
     def run(self, realtime_map='n', save='y', **arg):

         from kernel import clustering
         from plot_module import plot_map
         from gmode_module import stats, cov, free

         if len(arg) == 0:

            q1     =  self.q1
            grid   =  self.grid
            ulim   =  self.ulim
            mlim   =  self.mlim

         else:

            q1    = arg['q1']
            grid  = arg['grid']
            ulim  = arg['ulim']
            mlim  = arg['mlim']

         if save == 'y': self.load(**arg)

         print(q1, mlim, ulim) #30/free(Rt))
         #################################################   
         
         design = copy(self.design)
         elems  = copy(self.elems)
         #errs.extend(self.errs)
         indexs = copy(self.indexs)

         t0 = time() # Start counting procedure time

         N=len(elems)    # Sample size
         M=len(elems[0]) # Variable size

         ##################################################
         
         ctt, devt, St, r2t = stats(elems)
         #Se = cov(self.errs/devt, zeros(M), 1e0)
         
         #mlim = (mlim**2) * Se
         mlim = matrix(diagflat(mlim*ones(ctt.size)))

         ################# START REPORT #################

         #print('mlim: ',sqrt(diagonal(mlim)))
         #print('Se: ',sqrt(diagonal(Se)))
         clusters_report =["Clump   N                median                      st. dev."]
         report = deque([" Sample size: "+str(N)+" Variable size: "+str(M)])
         report.append(" S.D.: "+str(devt))
         report.append("Upper Limit: "+str(ulim))
         report.append(" Minimum Deviation: "+str(diagonal(mlim)))
         report.append(" Confidence level q1: "+str(normal.cdf(q1) - normal.cdf(-q1)))
         report.append('grid: '+str(grid)+" --> "+str(grid**(M)))

         excluded          = deque()
         failed_seed       = deque()
         cluster_members   = deque()
         cluster_stats     = deque()
         n_failedseeds     = 0

         if realtime_map == 'y': plot_map(0, [], [], elems, q1, [], [], self.label)
         
         report.append('############################ Part I : Recognize Clusters and Classify ################################## \n ')

         ################### Cluster Recognition #################
               
         Nc = 0
         while Nc == 0 or N >= (M - 1):
               Nc+=1
               report.append('#################################### Clump '+str(Nc)+' ######################################### \n ')
               cluster, seed, report, freedom = clustering(q1, ulim, mlim, grid, design, elems/devt, report) # whitten happenning here

               Na = len(cluster)

               if Na > 2 and freedom >= 30:

                     #press = raw_input("press enter") 
                     # Save cluster member indexes
                     cluster_members.append(indexs[cluster])

                     # save cluster statistics
                     cluster_stats.append(stats(elems[cluster]))
                        
                     # Exclude group members from the sample:
                     elems  = delete(elems, cluster, 0)
                     design = delete(design, cluster, 0)
                     indexs = delete(indexs, cluster, 0)

                     if realtime_map == 'y':
                       print(Nc, "Seed size: ",len(seed),'Na= ',Na,' N= ',N,' f= ',freedom)
                       try:
                         plot_map(Nc, cluster, seed, elems, q1, cluster_stats[-1][0], cluster_stats[-1][2], self.label)
                       except IndexError:
                         pass

                     # appending into logs
                     report.append("\nC.T.: "+l_to_s(cluster_stats[-1][0])+"\nS.D.: "+l_to_s(cluster_stats[-1][1])+ \
                        "\nSize: "+str(Na)+"       Left: "+str(N)+"\nCov. Matrix: \n"+str(cluster_stats[-1][2])+"\n")

                     clusters_report.append(str(Nc)+3*" "+str(Na)+3*" "+l_to_s(cluster_stats[-1][0])+3*" "+l_to_s(cluster_stats[-1][1]))
               else:

                     Nc-=1
                     # Exclude clump members from the sample:
                     if len(seed) > 2 and Na > 2: # Has initial seed and members.

                       report.append("Failed Clump: "+l_to_s(design[cluster])) #map(lambda i: design[i], cluster)))
                                        
                       failed_seed.append(set(indexs[cluster])) #map(lambda i: indexs[i], cluster))
                       n_failedseeds = n_failedseeds + len(cluster)
                           
                       elems  = delete(elems, cluster, 0)
                       design = delete(design, cluster, 0)
                       indexs = delete(indexs, cluster, 0)

                     elif len(seed) > 2 and Na < 3: # Has initial seed but no members.

                       report.append("Failed Seed: "+l_to_s(design[seed]))
                                        
                       failed_seed.append(set(indexs[seed]))                        
                       n_failedseeds = n_failedseeds + len(cluster)

                       elems  = delete(elems, seed, 0)
                       design = delete(design, seed, 0)
                       indexs = delete(indexs, seed, 0)

                     elif len(seed) < 3: # It does not have initial seed.
                       break

               N = len(indexs)


         excluded.extend(indexs)

         report.append("######################### Excluded ###############################")
         report.append("Excluded Sample Size: "+str(len(excluded)))
         report.append("Failed Seeds: "+str(n_failedseeds))
         print("Number of Clusters: ", len(cluster_stats))
         print("Excluded Sample Size: ",len(excluded))
         print("Failed Seeds: ",len(failed_seed))
     
         # Setting in self
         self.t0              = t0
         # logs
         self.report          = report
         self.clusters_report = clusters_report
         # python objects
         self.cluster_members = cluster_members
         self.cluster_stats   = cluster_stats
         self.excluded        = excluded
         self.failed_seed     = failed_seed
         
         # Robustness        
         self.robustness()
         print("Robustness: ", self.robust)
         report.append("Robustness: "+str(self.robust))
예제 #2
0
def clustering(q1, ulim, mlim, grid, design, data, report):
    ''' Iterative procedure of clustering'''

    N = data.shape[0]    # Sample Size
    M = data.shape[1]    # Variable size

#________________________________Barycenter______________________________________
    seed = barycenter_density(data, grid, amax(data, axis=0), amin(data, axis=0), sqrt(mlim) )#nmin=int(2*grid*30/M))
    #seed = barycenter_hist(grid, design, data)
    
    if len(seed) > 2:
       report.append("Barycenter: "+l_to_s(design[seed])) # map(lambda j: design[j], seed)))
       #print("Barycenter: "+l_to_s(map(lambda j: design[j], seed)))
       Na = len(seed)
       cluster = list(seed)
    else:      
       return [], seed, report, 0.0

# ______________________________CLASSIFICATION__________________________________

    i = 0
    f_rec = deque()
    cluster = seed
    index = range(N)

    while (i == 0 or cluster[:] != cluster_prior[:]) and i < 30 and Na > 2:

# *c --> cluster statistics

          ct, std, S, r2 = stats(data[cluster])
          
# Replace lower deviations than minimal limit:
          if i == 0 and any(S < mlim):
             S = mlim                    
             std = sqrt(diagonal(S))

          iS = Invert(S)
          f = free(r2)

          cluster_prior = cluster

          #f_rec.append(f)
          #f_ = min(f_rec)
          
# Once upper limit is reached the iteration is haulted.
          if ulim < 1e0 and any(std >= ulim):
             return cluster, seed, report, Na*f

          report.append("Run "+str(i)+" Size: "+str(Na)+" S.D.: "+l_to_s(arround(std, 3))+"\nf: "+str(f)+"\n")
          
# G hypothesis test:

          if i == 0 or Na*f >= 30:
             cluster = filter(lambda x: x != None, \
                              imap(lambda ind, y: hyp_test(Na,q1,f,ind,y,ct,iS), index, data))          
          else:
             return cluster, seed, report, Na*f

          Na = len(cluster)

# Discreetly increase std. until the seed start growing by itself.
          #if i == 0 and Na <= Na_prior:
          #   S = S + mlim
          #   std = sqrt(diagonal(S))
          #else:
          i += 1
             
    return cluster, seed, report, Na*f