def perform_clustering(seed, m_data, labels, n_clusters):
    # Singleview spherical kmeans clustering
    # Cluster each view separately
    s_kmeans = SphericalKMeans(n_clusters=n_clusters,
                               random_state=seed,
                               n_init=100)
    s_clusters_v1 = s_kmeans.fit_predict(m_data[0])
    s_clusters_v2 = s_kmeans.fit_predict(m_data[1])

    # Concatenate the multiple views into a single view
    s_data = np.hstack(m_data)
    s_clusters = s_kmeans.fit_predict(s_data)

    # Compute nmi between true class labels and singleview cluster labels
    s_nmi_v1 = nmi_score(labels, s_clusters_v1)
    s_nmi_v2 = nmi_score(labels, s_clusters_v2)
    s_nmi = nmi_score(labels, s_clusters)
    print('Singleview View 1 NMI Score: {0:.3f}\n'.format(s_nmi_v1))
    print('Singleview View 2 NMI Score: {0:.3f}\n'.format(s_nmi_v2))
    print('Singleview Concatenated NMI Score: {0:.3f}\n'.format(s_nmi))

    # Multiview spherical kmeans clustering

    # Use the MultiviewKMeans instance to cluster the data
    m_kmeans = MultiviewSphericalKMeans(n_clusters=n_clusters,
                                        n_init=100,
                                        random_state=seed)
    m_clusters = m_kmeans.fit_predict(m_data)

    # Compute nmi between true class labels and multiview cluster labels
    m_nmi = nmi_score(labels, m_clusters)
    print('Multiview NMI Score: {0:.3f}\n'.format(m_nmi))

    return m_clusters
Ejemplo n.º 2
0
def Silhouette(X, seguradora):
    insurance_label = dbm.GetAccountLabel(seguradora)
    maxx = len(X)

    if maxx > 11:
        maxx = 11

    range_of_clusters = list(range(2, maxx))
    clusters_silhouette = dict()

    for n_clusters in range_of_clusters:
        # Initialize the clusterer with n_clusters value
        #...and a random generator
        # seed of 10 for reproducibility.
        clusterer = SKMeans(n_clusters=n_clusters, random_state=0)
        cluster_labels = clusterer.fit_predict(X)

        # The silhouette_score gives the average value for all the samples.
        # This gives a perspective into the density and separation
        #...of the formed
        # clusters
        silhouette_avg = silhouette_score(X, cluster_labels)

        clusters_silhouette.update({n_clusters: silhouette_avg})

        # Compute the silhouette scores for each sample
        sample_silhouette_values = silhouette_samples(X, cluster_labels)

    plt.title('Silhueta media de %s' % insurance_label)
    plt.xlabel('Numero de clusters', fontsize=16)
    plt.ylabel("Silhueta media", fontsize=16)
    plt.plot(clusters_silhouette.keys(), clusters_silhouette.values())
    plt.savefig("../analytics/%s/%s_silhuette.png" \
        % (insurance_label, insurance_label))
    plt.close()

    silhouettes = [v for v in clusters_silhouette.values()]

    for k, v in clusters_silhouette.iteritems():
        if max(silhouettes) == v:
            return k
meta_file = "maggot_models/data/processed/2019-09-18-v2/BP_metadata.csv"

meta_df = pd.read_csv(meta_file, index_col=0)
print(meta_df.head())
class_labels = meta_df.loc[nodelist.astype(int), "BP_Class"].values
class_labels[class_labels == "LH2N"] = "Other"

uni_class, class_counts = np.unique(class_labels, return_counts=True)
inds = np.argsort(class_counts)[::-1]
uni_class = uni_class[inds]
class_counts = class_counts[inds]

n_clusters = 12
for k in range(2, n_clusters):
    skmeans = SphericalKMeans(n_clusters=k, **skmeans_kws)
    pred_labels = skmeans.fit_predict(latent)
    pred_labels = relabel(pred_labels)
    models.append(skmeans)

    # gridplot(
    #     [adj], inner_hier_labels=pred_labels, hier_label_fontsize=18, sizes=(2, 10)
    # )
    fig, ax = plt.subplots(1, 2, figsize=(30, 18))
    heatmap(
        binarize(adj),
        inner_hier_labels=pred_labels,
        # outer_hier_labels=side_labels,
        hier_label_fontsize=18,
        ax=ax[0],
        cbar=False,
        sort_nodes=True,
Ejemplo n.º 4
0
'''
model = gensim.models.Word2Vec(sentences, size=100, window=5, min_count=5, workers=4)
model.save('/home/sdp/Downloads/Movie/mymodel')
'''
new_model = gensim.models.Word2Vec.load('/home/sdp/Downloads/Movie/mymodel')

word_vectors = new_model.wv.syn0

num_clusters = 10

# Initalize a k-means object and use it to extract centroids
#print(len(word_vectors))
#kmeans_clustering = sphericalKMeans(word_vectors, vectorSize = 20, numberOfVectors = len(word_vectors), numberOfClusters = 10)
kmeans_clustering = SphericalKMeans(n_clusters=10)
idx = kmeans_clustering.fit(word_vectors)
idx2 = kmeans_clustering.fit_predict(word_vectors)

##########################Save the word2vec centroids in a file#############################
centroid_file = open("wordCentroids.txt", "w")
word_centroids = idx.cluster_centers_
centroid_file.write(str(len(word_centroids)) + "\n")
centroid_file.write(str(len(word_centroids[0])) + "\n")
for cen in word_centroids:
    for i in range(len(cen)):
        centroid_file.write(str(cen[i]) + "\n")
centroid_file.close()
############################################################################################

file = open("vecSummary.txt", "w")
word_centroid_map = dict(zip(new_model.wv.index2word, idx2))
document_vectors = list()
Ejemplo n.º 5
0
 def run(self, X, n_clusters, distance):
     cluster = SphericalKMeans(n_clusters)
     return cluster.fit_predict(X)
Ejemplo n.º 6
0
def spherical_clustering(X, n_clusters):
    cluster = SphericalKMeans(n_clusters)
    return cluster.fit_predict(X)