def closure_1_3_4(): k = 4 best_model = None min_error = np.inf for i in range(50): model = Kmedians(k) model.fit(X) error = model.error(X) if error < min_error: min_error = error best_model = model plt.figure() utils.plot_2dclustering(X, best_model.predict(X)) fname = os.path.join("..", "figs", "kmedians_outliers_best_model.png") plt.savefig(fname) print("\nFigure saved as '%s'" % fname)
plt.plot(k_values, y, label="K-Means minimum error plot against k values") plt.xlabel("k values") plt.ylabel("Minimum error") plt.legend() fname = os.path.join("..", "figs", "q13minerror.pdf") plt.savefig(fname) if question == '1.3.4': X = utils.load_dataset('clusterData2')['X'] model_array = [] error_array = [] for i in range(0, 50): model = Kmedians(k=4) model_array.append(model) model_array[i].fit(X) model_array[i].predict(X) error_array.append(model_array[i].error(X)) y = min(error_array) index = error_array.index(y) model = model_array[index] y = model.predict(X) utils.plot_2dclustering(X, y) if question == '1.4': X = utils.load_dataset('clusterData2')['X']
# model_2 = Kmeans(k=4) # model_2 = Kmedians(k=4) # model_2.fit(X) # if model_2.error(X) < error: # model = model_2 # plot_2dclustering(X, model.predict(X)) # # fname = os.path.join("..", "figs", "kmeans_3_3.png") # plt.savefig(fname) # print("\nFigure saved as '%s'" % fname) ''' Part 2, Part 4 ''' errors = np.ones([10, ]) * -1 for i in range(50): kVal = random.randint(1, 10) # model = Kmeans(k=kVal) model = Kmedians(k=kVal) model.fit(X) error = model.error(X) if errors[kVal - 1] == -1 or error < errors[kVal - 1]: errors[kVal - 1] = error print(errors) plt.plot(range(1, 11), errors) plt.xlabel('k') plt.ylabel('Error') plt.title('Error vs k') fname = os.path.join("..", "figs", "error_vs_kmedians_3_3.png") plt.savefig(fname) print("\nFigure saved as '%s'" % fname) elif question == '3.4':
# fname3 = "C:\\Users\\wangzhen\\Desktop\\cpsc340\\g5e0b_u7p1b_a2-master\\figs\\q3_3_kmedians.png" # plt.savefig(fname3) # print("\nFigure saved as '%s'" % fname3) X = load_dataset('clusterData2.pkl')['X'] # model=Kmeans(k=1) # model.fit(X) # error=model.error(X) # print(error) minError = np.zeros(10) for k in range(10): minError[k] = np.inf larg_slope = 0 slope_k = 1 for i in range(50): model = Kmedians(k=k + 1) model.fit(X) error = model.error(X) if error < minError[k]: minError[k] = error print(-minError[k] + minError[k - 1]) if k != 0: if -minError[k] + minError[k - 1] > larg_slope: larg_slope = -minError[k] + minError[k - 1] x1 = [i + 1 for i in range(10)] y1 = minError plt.figure(1) plot1 = plt.plot(x1, y1, 'b', label='minError') plt.title(
plt.draw() fname = os.path.join("..", "figs", "kmeans_elbow_method.png") plt.savefig(fname) print("\nFigure saved as '%s'" % fname) #print(errors) # Using K-medians for different clustering problems elif module == '4': X = load_dataset('clusterData2.pkl')['X'] # using elbow method to eyeball the best k errors = np.zeros(10) for i in range(10): min_error = np.inf for j in range(50): model = Kmedians(k=i + 1) model.fit(X) err = model.error(X) if err < min_error: min_error = err errors[i] = min_error plt.plot(np.arange(1, 11), errors) plt.title("k Vs min_error") plt.xlabel("k") plt.ylabel("Min error across 50 random initializations") plt.draw() fname = os.path.join("..", "figs", "kmedians_elbow_method.png") plt.savefig(fname) print("\nFigure saved as '%s'" % fname) # Trying out scikit's DBSCAN