예제 #1
0
    :param topics: array, [n_tokens]
    """
    top_features = topic.argsort()[::-1]
    return [terms[ind] for ind in top_features[:n_tokens]]


print("Top 5-term topics per cluster:")
km_centroids = km.cluster_centers_
terms = vectorizer.get_feature_names()


def get_features_in_classifier(classfier_data, nodes, topics_data, terms):
    sorted_centroids = classfier_data.argsort()[:, ::-1]
    for i in range(nodes):
        print("Cluster %d:" % i)
        top_topics = sorted_centroids[i, :4]
        for topic_id in top_topics:
            tokens = get_tokens_per_topic(topics_data[topic_id, :], terms, 15)
            print('Topic:%d:\t' % (topic_id) + ','.join(tokens))
        print()


get_features_in_classifier(km_centroids, true_k, Y, terms)
###############################################################################
# Perform SOM clustering
from som.som import SOMMapper

som = SOMMapper(kshape=(5, 5), n_iter=300, learning_rate=0.005)
kohonen = som.fit_transform(Y)
get_features_in_classifier(kohonen, som.n_nodes, Y, terms)
예제 #2
0
__author__ = 'husser'

import numpy as np
from matplotlib import pyplot as plt
from sklearn.feature_extraction.image import grid_to_graph
from sklearn.cluster import AgglomerativeClustering
from som.som import SOMMapper, build_U_matrix

kshape_test = (30, 20)
n_iter_test = 300
learning_rate_test = 0.005
n_colors = 200

spcolors = np.random.rand(n_colors, 3)
mapper = SOMMapper(kshape=kshape_test, n_iter=n_iter_test, learning_rate=learning_rate_test)
kohonen = mapper.fit_transform(spcolors)
U_Matrix = build_U_matrix(kohonen, kshape_test, topology="rect")

fig = plt.figure()
ax1 = fig.add_subplot(121)
ax1.imshow(np.split(kohonen, kshape_test[0], axis=0))
ax1.set_title("Kohonen Map")

# Clustering
n_clusters = 8  # number of regions
connectivity = grid_to_graph(kshape_test[0], kshape_test[1])
ward = AgglomerativeClustering(n_clusters=n_clusters,
                               linkage='ward', connectivity=connectivity).fit(kohonen)

label = np.reshape(ward.labels_, kshape_test)