sys.exit('Usage: python spectral.py dataset k') ## Data preprocessing data = parse_tab(sys.argv[1]) k = int(sys.argv[2]) classes = [example[-1] for example in data] examples = data_to_na(data) distances = euclidean_distances(examples, examples) # Apply gaussian kernel as suggested in the documentation: gamma = 0.5 # == 1 / num_features (heuristic) similarity_matrix = numpy.exp(-distances * gamma) ## Clustering sc = SpectralClustering(k=k, random_state=0) sc.fit(similarity_matrix) labels = sc.labels_ ## Performance evaluation ari = adjusted_rand_score(labels, classes) homogeneity, completeness, v_measure = homogeneity_completeness_v_measure(labels, classes) print('ARI: {0}'.format(ari)) print('Homogeneity: {0}'.format(homogeneity)) print('Completeness: {0}'.format(completeness)) print('V-measure: {0}'.format(v_measure)) addToResult('Spectral', ari, homogeneity, completeness, v_measure) draw.scatter(examples, labels) print(os.path.splitext(os.path.basename(sys.argv[1]))[0]) draw.setImgTitle('spectral_' + os.path.splitext(os.path.basename(sys.argv[1]))[0]) draw.showImage()
clusters = int(sys.argv[2]) classes = [example[-1] for example in data] examples = data_to_na(data) ## Clustering clf = mixture.GMM(n_components=clusters) clf.fit(examples) labels = clf.predict(examples) ## Performance evaluation ari = adjusted_rand_score(labels, classes) homogeneity, completeness, v_measure = homogeneity_completeness_v_measure(labels, classes) print('ARI: {0}'.format(ari)) print('Homogeneity: {0}'.format(homogeneity)) print('Completeness: {0}'.format(completeness)) print('V-measure: {0}'.format(v_measure)) addToResult('GMM', ari, homogeneity, completeness, v_measure) #x = np.linspace(min([x[0] for x in examples]), max([x[0] for x in examples])) #y = np.linspace(min([y[1] for y in examples]), max([y[1] for y in examples])) #X, Y = np.meshgrid(x, y) #XX = np.c_[X.ravel(), Y.ravel()] #Z = np.log(-clf.eval(XX)[0]) #Z = Z.reshape(X.shape) #pylab.contour(Y, X, Z) draw.scatter(examples, labels) print(os.path.splitext(os.path.basename(sys.argv[1]))[0]) draw.setImgTitle('GMM_' + os.path.splitext(os.path.basename(sys.argv[1]))[0]) draw.showImage()
import drawing as draw draw.savedImages = {} if len(sys.argv) < 3: sys.exit('Usage: python kmeans.py dataset k') ## Data preprocessing data = parse_tab(sys.argv[1]) k = int(sys.argv[2]) classes = [example[-1] for example in data] examples = data_to_na(data) ## Clustering kmeans = KMeans(k=k, random_state=0) kmeans.fit(examples) codebook = kmeans.cluster_centers_ labels = kmeans.predict(examples) ## Performance evaluation ari = adjusted_rand_score(labels, classes) homogeneity, completeness, v_measure = homogeneity_completeness_v_measure(labels, classes) print('ARI: {0}'.format(ari)) print('Homogeneity: {0}'.format(homogeneity)) print('Completeness: {0}'.format(completeness)) print('V-measure: {0}'.format(v_measure)) addToResult('k-means', ari, homogeneity, completeness, v_measure) draw.scatter(examples, labels) print(os.path.splitext(os.path.basename(sys.argv[1]))[0]) draw.setImgTitle('kmeans_' + os.path.splitext(os.path.basename(sys.argv[1]))[0]) draw.showImage()
import os import drawing as draw draw.savedImages = {} if len(sys.argv) < 4: sys.exit('Usage: python DBSCAN-comparison.py dataset eps min_samples') ## Data preprocessing data = parse_tab(sys.argv[1]) eps = float(sys.argv[2]) min_samples = int(sys.argv[3]) classes = [example[-1] for example in data] examples = data_to_na(data) ## Clustering db = DBSCAN().fit(examples, eps=eps, min_samples=min_samples) labels = db.labels_ ## Performance evaluation ari = adjusted_rand_score(labels, classes) homogeneity, completeness, v_measure = homogeneity_completeness_v_measure(labels, classes) print('ARI: {0}'.format(ari)) print('Homogeneity: {0}'.format(homogeneity)) print('Completeness: {0}'.format(completeness)) print('V-measure: {0}'.format(v_measure)) addToResult('DBSCAN', ari, homogeneity, completeness, v_measure) draw.scatter(examples, labels) print(os.path.splitext(os.path.basename(sys.argv[1]))[0]) draw.setImgTitle('DBSCAN_' + os.path.splitext(os.path.basename(sys.argv[1]))[0]) draw.showImage()