from hcluster import pdist, linkage, dendrogram, centroid, weighted import numpy from numpy.random import rand import numpy as np import matplotlib import matplotlib.pyplot as plt X = rand(5, 2) print X X[0:3, :] *= 50 print X Y = pdist(X) print Y Z = weighted(Y) print Z hh = dendrogram(Z) print hh plt.show()
def hierarchical_clustering(nets,dico,dist_mat): multi_level_dyn_net={} for inter,net in nets.iteritems(): #print 'periode : ' + str(inter) #print net #net.afficher_champs() champs_liste = net.champs_liste champs_dist = net.champs_dist #on reformate les similarités entre champs pour les transformer en distance dist_tot = [] ii = -1 for champ1 in champs_liste[:-1]: ii=ii+1 for champ2 in champs_liste[ii+1:]: dist_tot.append(float(1.-champs_dist.get((champ1,champ2),0.))) #print dist_tot Z = weighted(dist_tot) print '***periode : ' + str(inter) + 'on calcule le dendrogramme associe' #print '**************' #print '**************' #dd = dendrogram(Z) #matplotlib.pyplot.show() N=len(champs_liste)-1 res_niv_0= Network(champs_liste,champs_dist) multi_level_dyn_net[(inter,0)] = Network(champs_liste[:],champs_dist) #Z ressemble à : [ # [ 2. 6. 0.36216925 2. ] # [ 10. 15. 0.42559153 2. ] # [ 8. 13. 0.43538316 2. ] # [ 0. 19. 0.43583879 2. ] for ev_fusion in Z: N=N+1 dessous={} #couple de champs qui sont fusionnés fusion_couple = [int(ev_fusion[0]),int(ev_fusion[1])] fusion_level = ev_fusion[2] fields_rm_liste=[] for champ in champs_liste[:]: if champ.index in fusion_couple: champs_liste.remove(champ) fields_rm_liste.append(champ) else: dessous[champ] = [champ] new_champ = addnfields(fields_rm_liste,N,inter,dico,dist_mat) #print 'on a construit: ' + str(new_champ) dessous[new_champ]=fields_rm_liste champs_liste.append(new_champ) new_champs_dist={} ii = 0 for champ1 in champs_liste[:-1]: ii=ii+1 for champ2 in champs_liste[ii:]: if champ1 != champ2: if (champ1, champ2) in champs_dist: new_champs_dist[(champ1, champ2)] = champs_dist[(champ1, champ2)] new_champs_dist[(champ2, champ1)] = champs_dist[(champ2, champ1)] else: stre=0. nivel=0 #print 'fusion des champs' #on invese les champs de façon à mettre le fusionne en seconde position if len(dessous[champ1])>1: champ1_new = champ1 champ2_new = champ2 champ1=champ2_new champ2=champ1_new #print 'premier champ: ' + str(champ1) for y in dessous[champ2]: #print 'second champ: ' + str(y) stre = stre + champs_dist.get((champ1,y),0.) * float(len(y.niveau)) nivel = nivel+len(y.niveau) #print nivel new_champs_dist[(champ1, champ2)] = stre / float(nivel) new_champs_dist[(champ2, champ1 )] = stre / float(nivel) champs_dist=new_champs_dist multi_level_dyn_net[(inter,fusion_level)] = Network(champs_liste[:],champs_dist) return multi_level_dyn_net
def h_clustering(clusters,nb_level=6): multi_level_net={} #on reformate les similarités entre champs pour les transformer en distance dist_tot = []#aggrège toutes les distances entre champs univ2index={} index2univ={} max_periode = 0 for i,univ in enumerate(clusters.keys()): univ2index[univ] = i index2univ[i] = univ max_periode = max(max_periode,clusters[univ].get('periode',0)) #on renumérote pour plus de confort: clusters_index={} for univ,clu in clusters.iteritems(): index = univ2index[univ] clusters_index[index]=clu if 'syn' in clu: temp = clu['syn'] temp_index = reindex(univ2index,temp) clusters_index[index]['syn'] = temp_index if 'dia' in clu: temp = clu['dia'] temp_index = reindex(univ2index,temp) clusters_index[index]['dia'] = temp_index #on prépare les niveaux supérieurs. #on divise dans un premier temps les liens synchros en n quantiles sync_strenghts={} for index,clu in clusters_index.iteritems(): voisins = clu['syn'] for voisin,stre in voisins.iteritems(): sync_strenghts[(index,voisin)] = stre for i,champ1 in enumerate(clusters_index.keys()[:-1]): for champ2 in clusters_index.keys()[i+1:]: dist_tot.append(float(1.- max(sync_strenghts.get((champ2,champ1),0.),sync_strenghts.get((champ1,champ2),0.)))) #print dist_tot Z = weighted(dist_tot) #print Z print '***on calcule le dendrogramme associe' # dd = dendrogram(Z) # matplotlib.pyplot.show() #res_niv_0 = Network(champs_liste,champs_dist) #Z ressemble à : [ # [ 2. 6. 0.36216925 2. ] # [ 10. 15. 0.42559153 2. ] # [ 8. 13. 0.43538316 2. ] # [ 0. 19. 0.43583879 2. ] pas = len(Z)/min(len(Z),nb_level) next_step=pas level = 0 cluster_level={} champs_courants=clusters_index.keys() N=len(champs_courants[:])-1 clusters_level={} below,above={},{} print len(Z) decalage={} m=10000 for i,ev_fusion in enumerate(Z[:-max_periode]): if i>=len(Z)*level/nb_level -1: print i,level #on fige le réseau dans son état. if level>0: for x in clusters_level[level-1]: if x in champs_courants: m+=1 champs_courants.remove(x) champs_courants.append(m) decalage[x]=m above[x]=m below.setdefault(m,[]).append(x) clusters_level[level]=champs_courants[:] level +=1 fusion_level = ev_fusion[2]#niveau de la fusion N += 1 ch1=rec_retrieval(decalage,int(ev_fusion[0])) ch2=rec_retrieval(decalage,int(ev_fusion[1])) champs_courants.remove(ch1) champs_courants.remove(ch2) champs_courants.append(N) below.setdefault(N,[]).append(ch1) below.setdefault(N,[]).append(ch2) above[ch1]=N above[ch2]=N if i==len(Z)-max_periode-1: print i,level #on fige le réseau dans son état. if level>0: for x in clusters_level[level-1]: if x in champs_courants: m+=1 champs_courants.remove(x) champs_courants.append(m) decalage[x]=m above[x]=m below.setdefault(m,[]).append(x) clusters_level[level]=champs_courants[:] return clusters_index,clusters_level,below,above