for k in range(len(X)): y[k] = y[k] - 1 for j in range(0, 4): if j == 0: kmeans = KMeans(n_clusters=kmeansValues[i]).fit(X) labels = kmeans.labels_ elif j == 1: if i == 1: min_samples = np.log(len(X)) * 10 else: min_samples = np.log(len(X)) db = DBSCAN(eps=epsValues[i], min_samples=min_samples).fit(X) labels = db.labels_ elif j == 2: labels = alg.nDimAlg(X, n, 1) elif j == 3: labels = alg.nDimAlg(X, n, 2) label_color = [f.LABEL_COLOR_MAP[l] for l in labels] fig = plt.figure() plt.scatter(X[:, 0], X[:, 1], marker='o', c=label_color, s=25, edgecolor='k') plt.show() # f.benchmark(labels, y) print(dataName[i] + " - " + algName[j] + " - " + "ARI:" +
plt.show() print('### KMEANS CORRECTNESS') #f.benchmark(labels, y) print("ARI:" + str(metrics.adjusted_rand_score(labels, y))) print("AMI:" + str(metrics.adjusted_mutual_info_score(labels, y))) ###### UNBALANCE DBSCAN ###### #eps = 5000 #min_samples=np.log(len(X))*10 eps = 18000 min_samples = np.log(len(X)) db = DBSCAN(eps=eps, min_samples=min_samples).fit(X) labels = db.labels_ label_color = [f.LABEL_COLOR_MAP[l] for l in labels] unique, counts = np.unique(labels, return_counts=True) #print('#FINAL:'+ str(dict(zip(unique, counts)))) fig = plt.figure() plt.scatter(X[:, 0], X[:, 1], marker='o', c=label_color, s=25, edgecolor='k') plt.show() print('### DBSCAN CORRECTNESS') #f.benchmark(labels, y) print("ARI:" + str(metrics.adjusted_rand_score(labels, y))) print("AMI:" + str(metrics.adjusted_mutual_info_score(labels, y))) ###### UNBALANCE MYALG ##### labels = alg.nDimAlg(X, n) #f.benchmark(labels, y) print("ARI:" + str(metrics.adjusted_rand_score(labels, y))) print("AMI:" + str(metrics.adjusted_mutual_info_score(labels, y)))
plt.show() print("ARI:" + str(metrics.adjusted_rand_score(labels, y))) print("AMI:" + str(metrics.adjusted_mutual_info_score(labels, y))) NN = NearestNeighbors(n_neighbors=int(np.log(len(X)))).fit(X) distances, indices = NN.kneighbors(X) fig = plt.figure() plt.plot(np.sort(distances[:, distances.shape[1] - 1]), color='red') ###### S1 DBSCAN ###### eps = 0.25 min_samples = np.log(len(X)) db = DBSCAN(eps=eps, min_samples=min_samples).fit(X) labels = db.labels_ label_color = [f.LABEL_COLOR_MAP[l] for l in labels] unique, counts = np.unique(labels, return_counts=True) #print('#FINAL:'+ str(dict(zip(unique, counts)))) fig = plt.figure() plt.scatter(X[:, 0], X[:, 1], marker='o', c=label_color, s=25, edgecolor='k') plt.show() print("ARI:" + str(metrics.adjusted_rand_score(labels, y))) print("AMI:" + str(metrics.adjusted_mutual_info_score(labels, y))) ###### S1 MYALG ##### labels = alg.nDimAlg(X, n, True) plt.show() print("ARI:" + str(metrics.adjusted_rand_score(labels, y))) print("AMI:" + str(metrics.adjusted_mutual_info_score(labels, y)))