# get vectorized dataset
X = w_tf_idf.get_vectorized_dataset()
# init K-means
k = len(w_tf_idf.get_cluster_list())
labels = w_tf_idf.get_label_vector()
wkm = WikiKmeans(k, verbose=opts.verbose, mini_batch=opts.minibatch, init=opts.init)
# apply K-means
km = wkm.apply_K_means(X)

#print(labels)
#print(km.labels_)
print("Homogeneity: %0.3f" % metrics.homogeneity_score(labels, km.labels_))
print("Completeness: %0.3f" % metrics.completeness_score(labels, km.labels_))
print("V-measure: %0.3f" % metrics.v_measure_score(labels, km.labels_))
print("Adjusted Rand-Index: %.3f"
      % metrics.adjusted_rand_score(labels, km.labels_))
print("Silhouette Coefficient: %0.3f"
      % metrics.silhouette_score(X, km.labels_, sample_size=1000))

if not opts.use_hashing:
    print()
    print("Top terms per cluster:")
    order_centroids = km.cluster_centers_.argsort()[:, ::-1]
    terms = w_tf_idf.get_vectorizer().get_feature_names()
    for i in range(k):
        print("Cluster %d" % i)
        for ind in order_centroids[i, :10]:
            print(' %s' % terms[ind], end='')
        print()