def test_affinity_propagation(): """Affinity Propagation algorithm """ # Compute similarities S = -euclidean_distances(X, squared=True) preference = np.median(S) * 10 # Compute Affinity Propagation cluster_centers_indices, labels = affinity_propagation(S, preference=preference) n_clusters_ = len(cluster_centers_indices) assert_equal(n_clusters, n_clusters_) af = AffinityPropagation(preference=preference, affinity="precomputed") labels_precomputed = af.fit(S).labels_ af = AffinityPropagation(preference=preference) labels = af.fit(X).labels_ assert_array_equal(labels, labels_precomputed) cluster_centers_indices = af.cluster_centers_indices_ n_clusters_ = len(cluster_centers_indices) assert_equal(np.unique(labels).size, n_clusters_) assert_equal(n_clusters, n_clusters_) # Test also with no copy _, labels_no_copy = affinity_propagation(S, preference=preference, copy=False) assert_array_equal(labels, labels_no_copy)
def test_affinity_propagation(): """Affinity Propagation algorithm """ # Compute similarities S = -euclidean_distances(X, squared=True) preference = np.median(S) * 10 # Compute Affinity Propagation cluster_centers_indices, labels = affinity_propagation( S, preference=preference) n_clusters_ = len(cluster_centers_indices) assert_equal(n_clusters, n_clusters_) af = AffinityPropagation(preference=preference, affinity="precomputed") labels_precomputed = af.fit(S).labels_ af = AffinityPropagation(preference=preference) labels = af.fit(X).labels_ assert_array_equal(labels, labels_precomputed) cluster_centers_indices = af.cluster_centers_indices_ n_clusters_ = len(cluster_centers_indices) assert_equal(np.unique(labels).size, n_clusters_) assert_equal(n_clusters, n_clusters_) # Test also with no copy _, labels_no_copy = affinity_propagation(S, preference=preference, copy=False) assert_array_equal(labels, labels_no_copy)
def sims_to_apcluster(sim_scores, origin_corpus=None): cluster_centers_indices, labels = affinity_propagation(sim_scores) ap_cluster_dict = {} ap_cluster_dict["name"] = "top" ap_cluster_dict["children"] = [] for i, label in enumerate(labels): label = str(label) found = False for c in ap_cluster_dict["children"]: # use itemgetter? if c["name"] == ("cluster" + label): cluster = c found = True break if not found: cluster = {} ap_cluster_dict["children"].append(cluster) if not cluster.has_key("name"): cluster["name"] = "cluster" + label if not cluster.has_key("children"): cluster["children"] = [] cluster["children"].append({"name": origin_corpus[i], "size": 10}) return ap_cluster_dict
def sims_to_apcluster(sim_scores, origin_corpus=None): cluster_centers_indices, labels = affinity_propagation(sim_scores) ap_cluster_dict = {} ap_cluster_dict["name"] = "top" ap_cluster_dict["children"] = [] for i, label in enumerate(labels): label = str(label) found = False for c in ap_cluster_dict["children"]: # use itemgetter? if c["name"] == ("cluster" + label): cluster = c found = True break if not found: cluster = {} ap_cluster_dict["children"].append(cluster) if not cluster.has_key("name"): cluster["name"] = "cluster" + label if not cluster.has_key("children"): cluster["children"] = [] cluster["children"].append({"name": origin_corpus[i], "size": 10}) return ap_cluster_dict
def test_affinity_propagation(): # Affinity Propagation algorithm # Compute similarities S = -euclidean_distances(X, squared=True) preference = np.median(S) * 10 # Compute Affinity Propagation cluster_centers_indices, labels = affinity_propagation( S, preference=preference) n_clusters_ = len(cluster_centers_indices) assert_equal(n_clusters, n_clusters_) af = AffinityPropagation(preference=preference, affinity="precomputed") labels_precomputed = af.fit(S).labels_ af = AffinityPropagation(preference=preference, verbose=True) labels = af.fit(X).labels_ assert_array_equal(labels, labels_precomputed) cluster_centers_indices = af.cluster_centers_indices_ n_clusters_ = len(cluster_centers_indices) assert_equal(np.unique(labels).size, n_clusters_) assert_equal(n_clusters, n_clusters_) # Test also with no copy _, labels_no_copy = affinity_propagation(S, preference=preference, copy=False) assert_array_equal(labels, labels_no_copy) # Test input validation assert_raises(ValueError, affinity_propagation, S[:, :-1]) assert_raises(ValueError, affinity_propagation, S, damping=0) af = AffinityPropagation(affinity="unknown") assert_raises(ValueError, af.fit, X) af_2 = AffinityPropagation(affinity='precomputed') assert_raises(TypeError, af_2.fit, csr_matrix((3, 3)))
def test_affinity_propagation(): # Affinity Propagation algorithm # Compute similarities S = -euclidean_distances(X, squared=True) preference = np.median(S) * 10 # Compute Affinity Propagation cluster_centers_indices, labels = affinity_propagation( S, preference=preference) n_clusters_ = len(cluster_centers_indices) assert_equal(n_clusters, n_clusters_) af = AffinityPropagation(preference=preference, affinity="precomputed") labels_precomputed = af.fit(S).labels_ af = AffinityPropagation(preference=preference, verbose=True) labels = af.fit(X).labels_ assert_array_equal(labels, labels_precomputed) cluster_centers_indices = af.cluster_centers_indices_ n_clusters_ = len(cluster_centers_indices) assert_equal(np.unique(labels).size, n_clusters_) assert_equal(n_clusters, n_clusters_) # Test also with no copy _, labels_no_copy = affinity_propagation(S, preference=preference, copy=False) assert_array_equal(labels, labels_no_copy) # Test input validation assert_raises(ValueError, affinity_propagation, S[:, :-1]) assert_raises(ValueError, affinity_propagation, S, damping=0) af = AffinityPropagation(affinity="unknown") assert_raises(ValueError, af.fit, X) af_2 = AffinityPropagation(affinity='precomputed') assert_raises(TypeError, af_2.fit, csr_matrix((3, 3)))
#for i in range(0,16): # for j in range(i,16): # g = mathEx.gaussian(distances[i,j], std) # W[i,j]=g # W[j,i]=g #------------------------------------------- from sklearn.cluster import spectral spectral.spectral_clustering(W, n_clusters = 4) from sklearn.cluster import affinity_propagation_ affinity_propagation_.affinity_propagation(W) from sklearn.cluster import hierarchical al = hierarchical._average_linkage(W) Z = al[0] hierarchical._complete_linkage(W) import scipy.cluster.hierarchy as h # calculate full dendrogram plt.figure(figsize=(25, 10)) plt.title('Hierarchical Clustering Dendrogram') plt.xlabel('sample index') plt.ylabel('distance') sklearn h.dendrogram(
estimator = KMeans(init=pca.components_, n_clusters=n_clusters, n_init=1) # estimator = KMeans(init='random', n_clusters=n_clusters, n_init=10) # estimator = KMeans(init='k-means++', n_clusters=n_clusters, n_init=10) estimator.fit(tfidf_vectors) labels = estimator.labels_ accuracy_from_kmeans += count_accuracy(mygraph, predict_candidates, labels) affinity_matrix = construct_affinity_matrix(mygraph, predict_candidates) cluster_centers_indices, labels = affinity_propagation(affinity_matrix) accuracy_from_affinity_propogation += count_accuracy(mygraph, predict_candidates, labels) labels = construct_graph_from_affinity_matrix_and_community_label(affinity_matrix) accuracy_from_community_detection += count_accuracy(mygraph, predict_candidates, labels) # db = DBSCAN(eps=0.8, min_samples=1) # X = np.array(tfidf_vectors) # db.fit(X) # labels = db.labels_
def affinitypropagation(input_file,type,pref,Output): lvltrace.lvltrace("LVLEntree dans affinitypropagation unsupervised") ncol=tools.file_col_coma(input_file) data = np.loadtxt(input_file, delimiter=',', usecols=range(ncol-1)) X = data[:,1:] #print (" ici X vaut ") #print X #print (" fin de print X") labels_true = data[:,0] # A tester if type == 'spearmanr': X = scipy.stats.stats.spearmanr(X,axis=1)[0] else: if type == 'euclidean': X = -euclidean_distances(X, squared=True) else: print "something wrong" if pref == 'median': # A tester entre min ou median preference = np.median(X) else: if pref == 'mean': preference = np.mean(X) else: if pref == 'min': preference = np.min(X) else: print "something wrong" print "#########################################################################################################\n" print "Affinity Propagation" print preference n_samples, n_features = X.shape cluster_centers_indices, labels = affinity_propagation(X, preference=preference) #print cluster_centers_indices n_clusters_ = len(cluster_centers_indices) n_clusters_ = len(cluster_centers_indices) #print labels_true #print labels print('Estimated number of clusters: %d' % n_clusters_) print("Homogeneity: %0.3f" % metrics.homogeneity_score(labels_true, labels)) print("Completeness: %0.3f" % metrics.completeness_score(labels_true, labels)) print("V-measure: %0.3f" % metrics.v_measure_score(labels_true, labels)) print("Adjusted Rand Index: %0.3f" % metrics.adjusted_rand_score(labels_true, labels)) print("Adjusted Mutual Information: %0.3f" % metrics.adjusted_mutual_info_score(labels_true, labels)) print("Silhouette Coefficient: %0.3f" % metrics.silhouette_score(X, labels, metric='sqeuclidean')) print "\n" print "#########################################################################################################\n" results = Output+"affinity_propagation.txt" file = open(results, "w") file.write("Affinity Propagation\n") file.write("Homogeneity Score: %f\n"%metrics.homogeneity_score(labels_true, labels)) file.write("Completeness Score: %f\n"%metrics.completeness_score(labels_true, labels)) file.write("V-Measure: %f\n"%metrics.v_measure_score(labels_true, labels)) file.write("The adjusted Rand index: %f\n"%metrics.adjusted_rand_score(labels_true, labels)) file.write("Adjusted Mutual Information: %f\n"%metrics.adjusted_mutual_info_score(labels_true, labels)) file.write("Silhouette Score: %f\n"%metrics.silhouette_score(X, labels, metric='sqeuclidean')) file.write("\n") file.write("True Value, Clusters, Iteration\n") for n in xrange(len(labels_true)): file.write("%f,%f,%i\n"%(labels_true[n],labels[n],(n+1))) file.close() # Plot result import pylab as pl from itertools import cycle pl.close('all') pl.figure(1) pl.clf() colors = cycle('bgrcmykbgrcmykbgrcmykbgrcmykbgrcmykbgrcmykbg') for k, col in zip(range(n_clusters_), colors): class_members = labels == k cluster_center = X[cluster_centers_indices[k]] pl.plot(X[class_members, 0], X[class_members, 1], col + '.') pl.plot(cluster_center[0], cluster_center[1], 'o', markerfacecolor=col, markeredgecolor='k', markersize=14) for x in X[class_members]: pl.plot([cluster_center[0], x[0]], [cluster_center[1], x[1]], col) pl.title('Estimated number of clusters: %d' % n_clusters_) save = Output + "affinity_propagation.png" plt.savefig(save) lvltrace.lvltrace("LVLSortie dans affinitypropagation unsupervised")
return index if __name__ == "__main__": dictionary, tfidf_transformation, lsi_transformation = load_gensim_tools() corpus = create_corpus() index = create_index(corpus, tfidf_transformation, lsi_transformation) tfidf_vec_doc = tfidf_transformation[corpus] lsi_vec_doc = lsi_transformation[tfidf_vec_doc] #lsi_transformation.print_topics(10) index_doc = index[lsi_vec_doc] sims = [s for s in index_doc] cluster_centers_indices, labels = affinity_propagation(sims) #print 'start kmeans calcing' #k = KMeans(init='k-means++', n_init=10) #k.fit(sims) #centroids = k.cluster_centers_ #labels = k.labels_ docs = [] for obj in HtmlContent.objects.filter(~Q(retry=3)).filter(~Q(content='')): docs.append(obj.title.split('|')[0]) doc_arr = np.array(range(len(labels))) with codecs.open('zzz','w','utf-8') as file: for i in range(np.max(labels)):