예제 #1
0
def test_affinity_propagation():
    """Affinity Propagation algorithm
    """
    # Compute similarities
    S = -euclidean_distances(X, squared=True)
    preference = np.median(S) * 10
    # Compute Affinity Propagation
    cluster_centers_indices, labels = affinity_propagation(S,
            preference=preference)

    n_clusters_ = len(cluster_centers_indices)

    assert_equal(n_clusters, n_clusters_)

    af = AffinityPropagation(preference=preference, affinity="precomputed")
    labels_precomputed = af.fit(S).labels_

    af = AffinityPropagation(preference=preference)
    labels = af.fit(X).labels_

    assert_array_equal(labels, labels_precomputed)

    cluster_centers_indices = af.cluster_centers_indices_

    n_clusters_ = len(cluster_centers_indices)
    assert_equal(np.unique(labels).size, n_clusters_)
    assert_equal(n_clusters, n_clusters_)

    # Test also with no copy
    _, labels_no_copy = affinity_propagation(S, preference=preference,
            copy=False)
    assert_array_equal(labels, labels_no_copy)
예제 #2
0
def test_affinity_propagation():
    """Affinity Propagation algorithm
    """
    # Compute similarities
    S = -euclidean_distances(X, squared=True)
    preference = np.median(S) * 10
    # Compute Affinity Propagation
    cluster_centers_indices, labels = affinity_propagation(
        S, preference=preference)

    n_clusters_ = len(cluster_centers_indices)

    assert_equal(n_clusters, n_clusters_)

    af = AffinityPropagation(preference=preference, affinity="precomputed")
    labels_precomputed = af.fit(S).labels_

    af = AffinityPropagation(preference=preference)
    labels = af.fit(X).labels_

    assert_array_equal(labels, labels_precomputed)

    cluster_centers_indices = af.cluster_centers_indices_

    n_clusters_ = len(cluster_centers_indices)
    assert_equal(np.unique(labels).size, n_clusters_)
    assert_equal(n_clusters, n_clusters_)

    # Test also with no copy
    _, labels_no_copy = affinity_propagation(S,
                                             preference=preference,
                                             copy=False)
    assert_array_equal(labels, labels_no_copy)
예제 #3
0
def sims_to_apcluster(sim_scores, origin_corpus=None):
    cluster_centers_indices, labels = affinity_propagation(sim_scores)

    ap_cluster_dict = {}
    ap_cluster_dict["name"] = "top"
    ap_cluster_dict["children"] = []

    for i, label in enumerate(labels):
        label = str(label)
        found = False
        for c in ap_cluster_dict["children"]:  # use itemgetter?
            if c["name"] == ("cluster" + label):
                cluster = c
                found = True
                break

        if not found:
            cluster = {}
            ap_cluster_dict["children"].append(cluster)

        if not cluster.has_key("name"):
            cluster["name"] = "cluster" + label

        if not cluster.has_key("children"):
            cluster["children"] = []

        cluster["children"].append({"name": origin_corpus[i], "size": 10})

    return ap_cluster_dict
예제 #4
0
def sims_to_apcluster(sim_scores, origin_corpus=None):
    cluster_centers_indices, labels = affinity_propagation(sim_scores)

    ap_cluster_dict = {}
    ap_cluster_dict["name"] = "top"
    ap_cluster_dict["children"] = []

    for i, label in enumerate(labels):
        label = str(label)
        found = False
        for c in ap_cluster_dict["children"]:  # use itemgetter?
            if c["name"] == ("cluster" + label):
                cluster = c
                found = True
                break

        if not found:
            cluster = {}
            ap_cluster_dict["children"].append(cluster)

        if not cluster.has_key("name"):
            cluster["name"] = "cluster" + label

        if not cluster.has_key("children"):
            cluster["children"] = []

        cluster["children"].append({"name": origin_corpus[i], "size": 10})

    return ap_cluster_dict
def test_affinity_propagation():
    # Affinity Propagation algorithm
    # Compute similarities
    S = -euclidean_distances(X, squared=True)
    preference = np.median(S) * 10
    # Compute Affinity Propagation
    cluster_centers_indices, labels = affinity_propagation(
        S, preference=preference)

    n_clusters_ = len(cluster_centers_indices)

    assert_equal(n_clusters, n_clusters_)

    af = AffinityPropagation(preference=preference, affinity="precomputed")
    labels_precomputed = af.fit(S).labels_

    af = AffinityPropagation(preference=preference, verbose=True)
    labels = af.fit(X).labels_

    assert_array_equal(labels, labels_precomputed)

    cluster_centers_indices = af.cluster_centers_indices_

    n_clusters_ = len(cluster_centers_indices)
    assert_equal(np.unique(labels).size, n_clusters_)
    assert_equal(n_clusters, n_clusters_)

    # Test also with no copy
    _, labels_no_copy = affinity_propagation(S,
                                             preference=preference,
                                             copy=False)
    assert_array_equal(labels, labels_no_copy)

    # Test input validation
    assert_raises(ValueError, affinity_propagation, S[:, :-1])
    assert_raises(ValueError, affinity_propagation, S, damping=0)
    af = AffinityPropagation(affinity="unknown")
    assert_raises(ValueError, af.fit, X)
    af_2 = AffinityPropagation(affinity='precomputed')
    assert_raises(TypeError, af_2.fit, csr_matrix((3, 3)))
def test_affinity_propagation():
    # Affinity Propagation algorithm
    # Compute similarities
    S = -euclidean_distances(X, squared=True)
    preference = np.median(S) * 10
    # Compute Affinity Propagation
    cluster_centers_indices, labels = affinity_propagation(
        S, preference=preference)

    n_clusters_ = len(cluster_centers_indices)

    assert_equal(n_clusters, n_clusters_)

    af = AffinityPropagation(preference=preference, affinity="precomputed")
    labels_precomputed = af.fit(S).labels_

    af = AffinityPropagation(preference=preference, verbose=True)
    labels = af.fit(X).labels_

    assert_array_equal(labels, labels_precomputed)

    cluster_centers_indices = af.cluster_centers_indices_

    n_clusters_ = len(cluster_centers_indices)
    assert_equal(np.unique(labels).size, n_clusters_)
    assert_equal(n_clusters, n_clusters_)

    # Test also with no copy
    _, labels_no_copy = affinity_propagation(S, preference=preference,
                                             copy=False)
    assert_array_equal(labels, labels_no_copy)

    # Test input validation
    assert_raises(ValueError, affinity_propagation, S[:, :-1])
    assert_raises(ValueError, affinity_propagation, S, damping=0)
    af = AffinityPropagation(affinity="unknown")
    assert_raises(ValueError, af.fit, X)
    af_2 = AffinityPropagation(affinity='precomputed')
    assert_raises(TypeError, af_2.fit, csr_matrix((3, 3)))
예제 #7
0


#for i in range(0,16):
#    for j in range(i,16):
#        g = mathEx.gaussian(distances[i,j], std)
#        W[i,j]=g
#        W[j,i]=g


#-------------------------------------------
from sklearn.cluster import spectral
spectral.spectral_clustering(W, n_clusters = 4)

from sklearn.cluster import affinity_propagation_
affinity_propagation_.affinity_propagation(W)

from sklearn.cluster import hierarchical
al = hierarchical._average_linkage(W)
Z = al[0]
hierarchical._complete_linkage(W)

import scipy.cluster.hierarchy as h

# calculate full dendrogram
plt.figure(figsize=(25, 10))
plt.title('Hierarchical Clustering Dendrogram')
plt.xlabel('sample index')
plt.ylabel('distance')
sklearn
h.dendrogram(
예제 #8
0
		estimator = KMeans(init=pca.components_, n_clusters=n_clusters, n_init=1)
		# estimator = KMeans(init='random', n_clusters=n_clusters, n_init=10)
		# estimator = KMeans(init='k-means++', n_clusters=n_clusters, n_init=10)
		
		estimator.fit(tfidf_vectors)
		labels = estimator.labels_
		accuracy_from_kmeans += count_accuracy(mygraph, predict_candidates, labels)







		affinity_matrix = construct_affinity_matrix(mygraph,  predict_candidates)
		cluster_centers_indices, labels = affinity_propagation(affinity_matrix)


		accuracy_from_affinity_propogation += count_accuracy(mygraph, predict_candidates, labels)


		labels = construct_graph_from_affinity_matrix_and_community_label(affinity_matrix)

		accuracy_from_community_detection += count_accuracy(mygraph, predict_candidates, labels)


		
		# db = DBSCAN(eps=0.8, min_samples=1)
		# X = np.array(tfidf_vectors)
		# db.fit(X)
		# labels = db.labels_
예제 #9
0
def affinitypropagation(input_file,type,pref,Output):
    lvltrace.lvltrace("LVLEntree dans affinitypropagation unsupervised")
    ncol=tools.file_col_coma(input_file)
    data = np.loadtxt(input_file, delimiter=',', usecols=range(ncol-1))

    X = data[:,1:]
    #print (" ici X vaut ")
    #print X
    #print (" fin de print X")
    labels_true = data[:,0]
    # A tester
    if type == 'spearmanr':
        X = scipy.stats.stats.spearmanr(X,axis=1)[0]
    else:
        if type == 'euclidean':
            X = -euclidean_distances(X, squared=True)
        else:
            print "something wrong"
    if pref == 'median':
        # A tester entre min ou median
        preference = np.median(X)
    else:
        if pref == 'mean':
            preference = np.mean(X)
        else:
            if pref == 'min':
                preference = np.min(X)
            else:
                print "something wrong"
    print "#########################################################################################################\n"
    print "Affinity Propagation"
    print preference
    n_samples, n_features = X.shape
    cluster_centers_indices, labels = affinity_propagation(X, preference=preference)
    #print cluster_centers_indices
    n_clusters_ = len(cluster_centers_indices)
    n_clusters_ = len(cluster_centers_indices)
    #print labels_true
    #print labels
    print('Estimated number of clusters: %d' % n_clusters_)
    print("Homogeneity: %0.3f" % metrics.homogeneity_score(labels_true, labels))
    print("Completeness: %0.3f" % metrics.completeness_score(labels_true, labels))
    print("V-measure: %0.3f" % metrics.v_measure_score(labels_true, labels))
    print("Adjusted Rand Index: %0.3f"
          % metrics.adjusted_rand_score(labels_true, labels))
    print("Adjusted Mutual Information: %0.3f"
          % metrics.adjusted_mutual_info_score(labels_true, labels))
    print("Silhouette Coefficient: %0.3f"
          % metrics.silhouette_score(X, labels, metric='sqeuclidean'))
    print "\n"
    print "#########################################################################################################\n"
    results = Output+"affinity_propagation.txt"
    file = open(results, "w")
    file.write("Affinity Propagation\n")
    file.write("Homogeneity Score: %f\n"%metrics.homogeneity_score(labels_true, labels))
    file.write("Completeness Score: %f\n"%metrics.completeness_score(labels_true, labels))
    file.write("V-Measure: %f\n"%metrics.v_measure_score(labels_true, labels))
    file.write("The adjusted Rand index: %f\n"%metrics.adjusted_rand_score(labels_true, labels))
    file.write("Adjusted Mutual Information: %f\n"%metrics.adjusted_mutual_info_score(labels_true,  labels))
    file.write("Silhouette Score: %f\n"%metrics.silhouette_score(X, labels, metric='sqeuclidean'))
    file.write("\n")
    file.write("True Value, Clusters, Iteration\n")
    for n in xrange(len(labels_true)):
        file.write("%f,%f,%i\n"%(labels_true[n],labels[n],(n+1)))
    file.close()
    
    # Plot result
    import pylab as pl
    from itertools import cycle
    pl.close('all')
    pl.figure(1)
    pl.clf()
    colors = cycle('bgrcmykbgrcmykbgrcmykbgrcmykbgrcmykbgrcmykbg')
    for k, col in zip(range(n_clusters_), colors):
        class_members = labels == k
        cluster_center = X[cluster_centers_indices[k]]
        pl.plot(X[class_members, 0], X[class_members, 1], col + '.')
        pl.plot(cluster_center[0], cluster_center[1], 'o', markerfacecolor=col,
                markeredgecolor='k', markersize=14)
        for x in X[class_members]:
            pl.plot([cluster_center[0], x[0]], [cluster_center[1], x[1]], col)
    pl.title('Estimated number of clusters: %d' % n_clusters_)
    save = Output + "affinity_propagation.png"
    plt.savefig(save)
    lvltrace.lvltrace("LVLSortie dans affinitypropagation unsupervised")
예제 #10
0
    return index

if __name__ == "__main__":
    dictionary, tfidf_transformation, lsi_transformation = load_gensim_tools()
    corpus = create_corpus()
    index = create_index(corpus, tfidf_transformation, lsi_transformation)

    tfidf_vec_doc = tfidf_transformation[corpus]
    lsi_vec_doc = lsi_transformation[tfidf_vec_doc]
    #lsi_transformation.print_topics(10)
    
    index_doc = index[lsi_vec_doc]
    sims = [s for s in index_doc]

    cluster_centers_indices, labels = affinity_propagation(sims)
    
    #print 'start kmeans calcing'
    #k = KMeans(init='k-means++', n_init=10)
    #k.fit(sims)
    #centroids = k.cluster_centers_
    #labels = k.labels_
    
    docs = []
    for obj in HtmlContent.objects.filter(~Q(retry=3)).filter(~Q(content='')):
        docs.append(obj.title.split('|')[0])

    doc_arr = np.array(range(len(labels)))

    with codecs.open('zzz','w','utf-8') as file:
        for i in range(np.max(labels)):