plt.show() def output_image(arr, m, n, filename): arr = [255 if i == 1 else 0 for i in arr] a = np.array(arr) a = a.reshape((m, n)) cv2.imwrite(filename, a) if __name__ == "__main__": ##################### # read files colors = ["red", "blue"] k = 2 circs_X = read_file("data.data") # Q1 circs ##################### # regular k-means kmeans_model1 = KMeans(n_clusters=k, random_state=np.random.randint(100)).fit(circs_X) kmeans_labels1 = kmeans_model1.labels_ # show(circs_X, kmeans_labels1) print(calc_loss(circs_X, kmeans_labels1)) # spectral clustering # sigma1 = 0.01 # sigma1 = 0.1 # sigma1 = 1 # sigma1 = 5
return linear_kernel @staticmethod def gauss_kernel_generator(sigma2): def gauss_kernel(x1, x2): return np.exp(-np.linalg.norm(x1 - x2) ** 2 / (2. * sigma2)) return gauss_kernel def distance(hyperplane, point): return np.inner(hyperplane.flatten(), point) / np.linalg.norm(hyperplane[1:-1]) if __name__ == "__main__": # read data train_X, train_Y = read_file("train.data") validation_X, validation_Y = read_file("validation.data") test_X, test_Y = read_file("test.data") train_len = train_X.shape[0] validation_len = validation_X.shape[0] test_len = test_X.shape[0] # c_arr = [1, 10, 100, 1000, 1e4, 1e5, 1e6, 1e7, 1e8] sigma2_arr = [0.1, 1, 10, 100, 1000] # s = dict() for c in c_arr: for sigma2 in sigma2_arr: # s[c, sigma2] = SVM("spam_train.data", c=c, sigma2=sigma2) print("c: ", c, " sigma2: ", sigma2)
@staticmethod def predict(dt, X): predicts = [] for i in range(X.shape[0]): x = X[i] predicts += [dt.predict_single(x)] return predicts @staticmethod def calc_accu(t, test_X, test_Y): pred = DecisionTree.predict(t, test_X) accu = np.sum(test_Y == pred) / test_Y.shape[0] return accu if __name__ == "__main__": feature_names = np.array([ 'cap-shape', 'cap-surface', 'cap-color', 'bruises?', 'odor', 'gill-attachment', 'gill-spacing', 'gill-size', 'gill-color', 'stalk-shape', 'stalk-root', 'stalk-surface-above-ring', 'stalk-surface-below-ring', 'stalk-color-above-ring', 'stalk-color-below-ring', 'veil-type', 'veil-color', 'ring-number', 'ring-type', 'spore-print-color', 'population', 'habitat' ]) train_X, train_Y = read_file("train.data") test_X, test_Y = read_file("test.data") t = DecisionTree(train_X, train_Y, feature_names) accu = DecisionTree.calc_accu(t, test_X, test_Y)
# t subtree tsub = t.tsub G.add_edge(t.name, tsub.name) e = G.get_edge(t.name, tsub.name) e.attr['label'] = "t" draw_tree_helper(G, tsub) # f subtree fsub = t.fsub G.add_edge(t.name, fsub.name) e = G.get_edge(t.name, fsub.name) e.attr['label'] = "f" draw_tree_helper(G, fsub) if __name__ == "__main__": heart_train_X, heart_train_Y = read_file("train.data") heart_test_X, heart_test_Y = read_file("test.data") # 1 (a) epsilons1, alphas1, predictors1 = adaboost(heart_train_X, heart_train_Y, 5) ada_pred_Y = adaboost_pred(alphas1, predictors1, heart_train_X) accu = np.sum(ada_pred_Y == heart_train_Y) / heart_train_Y.size # draw tree for i in range(len(predictors1)): t = predictors1[i] draw_tree(t, str(i)) # 1 (b) epsilons2, alphas2, predictors2 = adaboost(heart_train_X, heart_train_Y,
for i in range(X.shape[0]): xi = X[i] labeli = labels[i] s += np.sum((xi - mus[labeli])**2) return s def func_uniform_draw(left, right): def draw(m, n): return np.random.uniform(left, right, (m, n)) return draw if __name__ == "__main__": train_X, train_Y = read_file("train.data") func_draw = func_uniform_draw(-3, 3) ks = [12, 18, 24, 36, 42] # k-means k1, g1 = dict(), dict() for k in ks: print(k) k1[k], g1[k] = [], [] for i in range(20): init_centres = func_draw(k, train_X.shape[1]) k1[k] += [KMeans(n_clusters=k, init=init_centres).fit(train_X)] g1[k] += [GMM(train_X, k, mu=init_centres)] # k-means++, GMM++ k2, g2 = dict(), dict()