def task1(): dataset = handout.get_linear_seperatable_2d_2c_dataset() N = len(dataset.X) # print(len(dataset.X)) w0 = np.ones([len(dataset.X), 1]) # print(w0) X = np.concatenate((w0, dataset.X), axis=1) Y = np.zeros(N) for i in range(N): if dataset.y[i]: Y[i] = 1 else: Y[i] = -1 # print(X) W = None method = input('Least square model or perceptron algorithm? (l/p)') if method == 'l': W = least_square(X, Y) elif method == 'p': W = perceptron(X, Y) print("W = ", W) print("accuracy rate = ", get_accuracy(W, X, Y)) min_data = min(X.T[1]) max_data = max(X.T[1]) x = np.arange(min_data, max_data, 0.01) y = (W[0] + W[1] * x) / (-W[2]) plt.plot(x, y) plt.scatter(X[:, 1], X[:, 2], c=dataset.y) plt.plot(x, y) plt.show()
def least_square(): # get data data = get_linear_seperatable_2d_2c_dataset() # preprocess data # extend x using bias trick x = data.X x_extended = np.insert(x, 0, 1, axis=1) # Change two type into 1 and -1 y = data.y c = y * 2 - 1 # get w using pseudo-inverse w = np.linalg.pinv(x_extended) @ c # show the hyperplane xl = np.linspace(-1.5, 1.5, 2) yl = -(w[0] + w[1] * xl) / w[2] line_info = '{:.4f} + {:.4f} * x + {:.4f} * y = 0'.format(w[0], w[1], w[2]) plt.plot(xl, yl, label=line_info) plt.legend() # predict pred_y = x_extended @ w pred_y[pred_y < 0] = 0 pred_y[pred_y > 0] = 1 plt.title("least square, acc: " + str(data.acc(pred_y))) print(data.acc(pred_y)) # show the result """need to add legend and title""" data.plot(plt).show()
def perceptron(eta=1e-2): # get data data = get_linear_seperatable_2d_2c_dataset() # preprocess data # extend x using bias trick x = data.X x_extended = np.insert(x, 0, 1, axis=1) # Change two type into 1 and -1 y = data.y c = y * 2 - 1 # shuffle data N = len(x) rng = np.random.RandomState(233) rand_ind = np.arange(N) rng.shuffle(rand_ind) rand_x = x_extended[rand_ind] rand_c = c[rand_ind] # get w using SGD # initialize weight w = np.zeros(3) # SGD for w epoch = 0 while True: flag = True epoch += 1 for i in range(N): p = rand_x[i] @ w p = (p > 0) * 2 - 1 if p != rand_c[i]: flag = False w += eta * rand_x[i] * rand_c[i] if flag: break print(epoch) # show the hyperplane xl = np.linspace(-1.5, 1.5, 2) yl = -(w[0] + w[1] * xl) / w[2] line_info = '{:.4f} + {:.4f} * x + {:.4f} * y = 0'.format(w[0], w[1], w[2]) plt.plot(xl, yl, label=line_info) plt.legend() # predict pred_y = x_extended @ w pred_y[pred_y < 0] = 0 pred_y[pred_y > 0] = 1 plt.title("perceptron, acc: " + str(data.acc(pred_y)) + ", epoch: " + str(epoch)) print(data.acc(pred_y)) # show the result """need to add legend and title""" data.plot(plt).show()
def perceptron_algorithm(): data = get_linear_seperatable_2d_2c_dataset() w = np.array([-1.0, 1.0]) offset = np.array([0.1, -0.30]) # within 3 cycles the w can be figure out for cycle in range(3): for x, y in zip(data.X, data.y): x_ = x - offset if (x_ @ w) * (int(y) - 0.5) < 0: w += x_ * (int(y) - 0.5) * 2 data.plot(plt) plt.scatter([x[0]], [x[1]], c='#ff0000') plt.scatter([offset[0]], [offset[1]], c='#0000ff') line_x = np.linspace(-1.5, 1.5, 10) line_y = -w[0] / w[1] * (line_x - offset[0]) + offset[1] plt.plot(line_x, line_y) plt.show()
def part1(choose): dataset = get_linear_seperatable_2d_2c_dataset() data = dataset.X label = dataset.y bias = np.ones(data.shape[0]).reshape(data.shape[0], 1) data = np.hstack([data, bias]) t = np.repeat(-1, label.shape[0]).reshape(label.shape[0], 1) t[label] = 1 w1 = np.zeros(3) w2 = np.ones(3) w1 = leastsquare(w1, data[0:data.shape[0] * 4 // 5], t[0:data.shape[0] * 4 // 5]) w2 = perception(w2, data[0:data.shape[0] * 4 // 5], t[0:data.shape[0] * 4 // 5]) cal_precision(w1.reshape(3, 1), data[data.shape[0] * 1 // 5:data.shape[0]], t[data.shape[0] * 1 // 5:data.shape[0]]) cal_precision(w2.reshape(3, 1), data[data.shape[0] * 1 // 5:data.shape[0]], t[data.shape[0] * 1 // 5:data.shape[0]]) plotx = np.linspace(-1.0, 1.0, 10) if (choose == 0): plt.title('Least Square') ploty1 = -(w1[0] * plotx + w1[2]) / w1[1] plt.xlabel("x") plt.ylabel("y") plt.scatter(dataset.X[label, 0], dataset.X[label, 1], c='orange') plt.scatter(dataset.X[~label, 0], dataset.X[~label, 1], c='purple') plt.plot(plotx, ploty1, c='black') print(w1) elif (choose == 1): plt.title('Perception') ploty2 = -(w2[0] * plotx + w2[2]) / w2[1] plt.xlabel("x") plt.ylabel("y") plt.scatter(dataset.X[label, 0], dataset.X[label, 1], c='orange') plt.scatter(dataset.X[~label, 0], dataset.X[~label, 1], c='purple') plt.plot(plotx, ploty2, c='black') print(w2) plt.show()
def least_square_model(): data = get_linear_seperatable_2d_2c_dataset() X = [] for x in data.X: X.append([1, x[0], x[1]]) X = np.array(X) # calculate the W matrix X_ = np.linalg.inv(X.transpose() @ X) @ X.transpose() T = np.array([[int(x), 1 - int(x)] for x in data.y]) W = X_ @ T # draw the plot data.plot(plt) line_x = np.linspace(-1.5, 1.5, 10) line_y = -(W[0][0] - W[0][1] + (W[1][0] - W[1][1]) * line_x) / (W[2][0] - W[2][1]) plt.plot(line_x, line_y) plt.title('y={} x {}'.format(-(W[1][0] - W[1][1]) / (W[2][0] - W[2][1]), -(W[0][0] - W[0][1]) / (W[2][0] - W[2][1]))) plt.show()
def program_parser(): parser = argparse.ArgumentParser(description='Assignment 2') parser.add_argument('--algorithm', choices=["least_square", "perceptron", "logistic"], help='the algorithms') parser.add_argument('--n', choices=["run", "batch", "lambda", "alpha", "check"], default="run", help='the algorithms of logistic') args = parser.parse_args() linear_dataset = get_linear_seperatable_2d_2c_dataset() lsm = LSM(linear_dataset) perceptron = Perceptron(linear_dataset) algos = {"least_square": lsm.run, "perceptron": perceptron.run} if args.algorithm == "logistic": np.random.seed(2333) dataset_train, dataset_test = get_text_classification_datasets() logistic = Logistic(dataset_train, dataset_test) if args.n == "run": logistic.show() elif args.n == "check": logistic.check_gradient() elif args.n == "batch": logistic.show_batch_diff() elif args.n == "lambda": logistic.show_lamb_diff() elif args.n == "alpha": logistic.show_alpha_diff() elif args.algorithm in algos.keys(): algos[args.algorithm]() else: parser.print_help()
import os os.sys.path.append('..') from handout import get_linear_seperatable_2d_2c_dataset from handout import get_text_classification_datasets import numpy as np import matplotlib.pyplot as plt import math import random from sklearn.datasets import fetch_20newsgroups import string import re import collections data = get_linear_seperatable_2d_2c_dataset() def predict(a, b, c): y = data.y pre_y = np.zeros(len(y)) X = data.X for i in range(len(pre_y)): x1 = X[i][0] x2 = X[i][1] res = a * x1 + b * x2 + c if res > 0: pre_y[i] = True else: pre_y[i] = False print(data.acc(pre_y))
os.sys.path.append('..') import numpy as np from matplotlib import pyplot as plt from handout import get_linear_seperatable_2d_2c_dataset from handout import get_text_classification_datasets import string import math import random # =========================================== # # PART 1 # # =========================================== dataset_a, dataset_b = get_linear_seperatable_2d_2c_dataset().split_dataset() def handleX(x): return [1, x[0], x[1]] X = np.array([handleX(item) for item in dataset_a.X]) def transform_to_vector(t): return [1, 0] if t else [0, 1] def get_func_value(A, B, C, x): return (A * x + B) / C
#!/usr/bin/env python # coding: utf-8 # In[2]: #PART1 import os os.sys.path.append('..') from handout import get_linear_seperatable_2d_2c_dataset data_sample = get_linear_seperatable_2d_2c_dataset() import numpy as np import matplotlib.pyplot as plt import time #list square model def accurate(x, y, w): count = 0 tr = 0 for i in range(len(y)): count += 1 if ((w[0] * x[i][0] + w[1] * x[i][1] + w[2] >= 0.5) and y[i] == True): tr += 1 elif ((w[0] * x[i][0] + w[1] * x[i][1] + w[2] < 0.5) and y[i] == False): tr += 1 return str(tr / count)
import os os.sys.path.append('..') import handout import matplotlib.pyplot as plt import argparse import numpy as np import model LEARNING_RATE = 0.02 if __name__ == '__main__': data_points = handout.get_linear_seperatable_2d_2c_dataset() target = [1 if i==True else -1 for i in data_points.y] new_input_data = np.array([np.append([1], i) for i in data_points.X]) # build model perception_model = model.Perception_model(new_input_data, target, LEARNING_RATE) weight = perception_model.run() print(weight) graph = plt.subplot(1,1,1) perception_model.plot(graph) plt.legend(loc='best', prop={'size':14}) plt.show() accuracy_val = (perception_model.accuracy_cal()) print(accuracy_val)
parser = argparse.ArgumentParser() parser.add_argument( 'operation', type=str, choices=['lsm', 'perceptron', 'full_batch', 'stochastic', 'batched'], help='tell me what you want to do') parser.add_argument('--c', '-c', type=float, help='para c in regularization') parser.add_argument('--learning_rate', '-lr', type=float) parser.add_argument('--batch_size', '-bs', type=int) parser.add_argument('--iterations', '-i', type=int) args = parser.parse_args() if args.operation == 'lsm': lsm(get_linear_seperatable_2d_2c_dataset()) elif args.operation == 'perceptron': perceptron(get_linear_seperatable_2d_2c_dataset(), args.learning_rate, args.iterations) else: try: train_vec_x = np.loadtxt('train_vec_x.txt', dtype=np.bool) train_vec_y = np.loadtxt('train_vec_y.txt', dtype=np.bool) print('[NOTE]train data load complete!') test_vec_x = np.loadtxt('test_vec_x.txt', dtype=np.bool) test_vec_y = np.loadtxt('test_vec_y.txt', dtype=np.bool) print('[NOTE]test data load complete!') except OSError: train_data, test_data = get_text_classification_datasets() train_vec_x, test_vec_x = convert_data(train_data.data, test_data.data, 10)