예제 #1
0
def task1():
	dataset = handout.get_linear_seperatable_2d_2c_dataset()
	N = len(dataset.X)
	# print(len(dataset.X))

	w0 = np.ones([len(dataset.X), 1])
	# print(w0)
	X = np.concatenate((w0, dataset.X), axis=1)
	Y = np.zeros(N)
	for i in range(N):
		if dataset.y[i]:
			Y[i] = 1
		else:
			Y[i] = -1
	# print(X)
	W = None
	method = input('Least square model or perceptron algorithm? (l/p)')
	if method == 'l':
		W = least_square(X, Y)
	elif method == 'p':
		W = perceptron(X, Y)

	print("W = ", W)
	print("accuracy rate = ", get_accuracy(W, X, Y))

	min_data = min(X.T[1])
	max_data = max(X.T[1])
	x = np.arange(min_data, max_data, 0.01)
	y = (W[0] + W[1] * x) / (-W[2])

	plt.plot(x, y)
	plt.scatter(X[:, 1], X[:, 2], c=dataset.y)
	plt.plot(x, y)
	plt.show()
예제 #2
0
def least_square():
    # get data
    data = get_linear_seperatable_2d_2c_dataset()

    # preprocess data
    # extend x using bias trick
    x = data.X
    x_extended = np.insert(x, 0, 1, axis=1)
    # Change two type into 1 and -1
    y = data.y
    c = y * 2 - 1

    # get w using pseudo-inverse
    w = np.linalg.pinv(x_extended) @ c

    # show the hyperplane
    xl = np.linspace(-1.5, 1.5, 2)
    yl = -(w[0] + w[1] * xl) / w[2]
    line_info = '{:.4f} + {:.4f} * x + {:.4f} * y = 0'.format(w[0], w[1], w[2])
    plt.plot(xl, yl, label=line_info)
    plt.legend()

    # predict
    pred_y = x_extended @ w
    pred_y[pred_y < 0] = 0
    pred_y[pred_y > 0] = 1
    plt.title("least square, acc: " + str(data.acc(pred_y)))
    print(data.acc(pred_y))

    # show the result
    """need to add legend and title"""
    data.plot(plt).show()
예제 #3
0
def perceptron(eta=1e-2):
    # get data
    data = get_linear_seperatable_2d_2c_dataset()

    # preprocess data
    # extend x using bias trick
    x = data.X
    x_extended = np.insert(x, 0, 1, axis=1)
    # Change two type into 1 and -1
    y = data.y
    c = y * 2 - 1
    # shuffle data
    N = len(x)
    rng = np.random.RandomState(233)
    rand_ind = np.arange(N)
    rng.shuffle(rand_ind)
    rand_x = x_extended[rand_ind]
    rand_c = c[rand_ind]

    # get w using SGD
    # initialize weight
    w = np.zeros(3)
    # SGD for w
    epoch = 0
    while True:
        flag = True
        epoch += 1
        for i in range(N):
            p = rand_x[i] @ w
            p = (p > 0) * 2 - 1
            if p != rand_c[i]:
                flag = False
                w += eta * rand_x[i] * rand_c[i]
        if flag:
            break
    print(epoch)

    # show the hyperplane
    xl = np.linspace(-1.5, 1.5, 2)
    yl = -(w[0] + w[1] * xl) / w[2]
    line_info = '{:.4f} + {:.4f} * x + {:.4f} * y = 0'.format(w[0], w[1], w[2])
    plt.plot(xl, yl, label=line_info)
    plt.legend()

    # predict
    pred_y = x_extended @ w
    pred_y[pred_y < 0] = 0
    pred_y[pred_y > 0] = 1
    plt.title("perceptron, acc: " + str(data.acc(pred_y)) + ", epoch: " +
              str(epoch))
    print(data.acc(pred_y))

    # show the result
    """need to add legend and title"""
    data.plot(plt).show()
예제 #4
0
def perceptron_algorithm():
    data = get_linear_seperatable_2d_2c_dataset()
    w = np.array([-1.0, 1.0])
    offset = np.array([0.1, -0.30])
    # within 3 cycles the w can be figure out
    for cycle in range(3):
        for x, y in zip(data.X, data.y):
            x_ = x - offset
            if (x_ @ w) * (int(y) - 0.5) < 0:
                w += x_ * (int(y) - 0.5) * 2
                data.plot(plt)
                plt.scatter([x[0]], [x[1]], c='#ff0000')
                plt.scatter([offset[0]], [offset[1]], c='#0000ff')
                line_x = np.linspace(-1.5, 1.5, 10)
                line_y = -w[0] / w[1] * (line_x - offset[0]) + offset[1]
                plt.plot(line_x, line_y)
                plt.show()
예제 #5
0
def part1(choose):
    dataset = get_linear_seperatable_2d_2c_dataset()
    data = dataset.X

    label = dataset.y
    bias = np.ones(data.shape[0]).reshape(data.shape[0], 1)
    data = np.hstack([data, bias])
    t = np.repeat(-1, label.shape[0]).reshape(label.shape[0], 1)
    t[label] = 1

    w1 = np.zeros(3)
    w2 = np.ones(3)
    w1 = leastsquare(w1, data[0:data.shape[0] * 4 // 5],
                     t[0:data.shape[0] * 4 // 5])
    w2 = perception(w2, data[0:data.shape[0] * 4 // 5],
                    t[0:data.shape[0] * 4 // 5])

    cal_precision(w1.reshape(3, 1), data[data.shape[0] * 1 // 5:data.shape[0]],
                  t[data.shape[0] * 1 // 5:data.shape[0]])
    cal_precision(w2.reshape(3, 1), data[data.shape[0] * 1 // 5:data.shape[0]],
                  t[data.shape[0] * 1 // 5:data.shape[0]])

    plotx = np.linspace(-1.0, 1.0, 10)

    if (choose == 0):
        plt.title('Least Square')
        ploty1 = -(w1[0] * plotx + w1[2]) / w1[1]
        plt.xlabel("x")
        plt.ylabel("y")
        plt.scatter(dataset.X[label, 0], dataset.X[label, 1], c='orange')
        plt.scatter(dataset.X[~label, 0], dataset.X[~label, 1], c='purple')
        plt.plot(plotx, ploty1, c='black')
        print(w1)

    elif (choose == 1):
        plt.title('Perception')
        ploty2 = -(w2[0] * plotx + w2[2]) / w2[1]
        plt.xlabel("x")
        plt.ylabel("y")
        plt.scatter(dataset.X[label, 0], dataset.X[label, 1], c='orange')
        plt.scatter(dataset.X[~label, 0], dataset.X[~label, 1], c='purple')
        plt.plot(plotx, ploty2, c='black')
        print(w2)

    plt.show()
예제 #6
0
def least_square_model():
    data = get_linear_seperatable_2d_2c_dataset()
    X = []
    for x in data.X:
        X.append([1, x[0], x[1]])
    X = np.array(X)
    # calculate the W matrix
    X_ = np.linalg.inv(X.transpose() @ X) @ X.transpose()
    T = np.array([[int(x), 1 - int(x)] for x in data.y])
    W = X_ @ T
    # draw the plot
    data.plot(plt)
    line_x = np.linspace(-1.5, 1.5, 10)
    line_y = -(W[0][0] - W[0][1] +
               (W[1][0] - W[1][1]) * line_x) / (W[2][0] - W[2][1])
    plt.plot(line_x, line_y)
    plt.title('y={} x {}'.format(-(W[1][0] - W[1][1]) / (W[2][0] - W[2][1]),
                                 -(W[0][0] - W[0][1]) / (W[2][0] - W[2][1])))
    plt.show()
예제 #7
0
def program_parser():
    parser = argparse.ArgumentParser(description='Assignment 2')

    parser.add_argument('--algorithm',
                        choices=["least_square", "perceptron", "logistic"],
                        help='the algorithms')

    parser.add_argument('--n',
                        choices=["run", "batch", "lambda", "alpha", "check"],
                        default="run",
                        help='the algorithms of logistic')

    args = parser.parse_args()

    linear_dataset = get_linear_seperatable_2d_2c_dataset()
    lsm = LSM(linear_dataset)
    perceptron = Perceptron(linear_dataset)

    algos = {"least_square": lsm.run, "perceptron": perceptron.run}

    if args.algorithm == "logistic":
        np.random.seed(2333)
        dataset_train, dataset_test = get_text_classification_datasets()
        logistic = Logistic(dataset_train, dataset_test)
        if args.n == "run":
            logistic.show()
        elif args.n == "check":
            logistic.check_gradient()
        elif args.n == "batch":
            logistic.show_batch_diff()
        elif args.n == "lambda":
            logistic.show_lamb_diff()
        elif args.n == "alpha":
            logistic.show_alpha_diff()
    elif args.algorithm in algos.keys():
        algos[args.algorithm]()
    else:
        parser.print_help()
예제 #8
0
import os

os.sys.path.append('..')
from handout import get_linear_seperatable_2d_2c_dataset
from handout import get_text_classification_datasets
import numpy as np
import matplotlib.pyplot as plt
import math
import random
from sklearn.datasets import fetch_20newsgroups
import string
import re
import collections

data = get_linear_seperatable_2d_2c_dataset()


def predict(a, b, c):
    y = data.y
    pre_y = np.zeros(len(y))
    X = data.X
    for i in range(len(pre_y)):
        x1 = X[i][0]
        x2 = X[i][1]
        res = a * x1 + b * x2 + c
        if res > 0:
            pre_y[i] = True
        else:
            pre_y[i] = False
    print(data.acc(pre_y))
예제 #9
0
os.sys.path.append('..')
import numpy as np
from matplotlib import pyplot as plt
from handout import get_linear_seperatable_2d_2c_dataset
from handout import get_text_classification_datasets
import string
import math
import random

# ===========================================
#
#  PART 1
#
# ===========================================

dataset_a, dataset_b = get_linear_seperatable_2d_2c_dataset().split_dataset()


def handleX(x):
    return [1, x[0], x[1]]


X = np.array([handleX(item) for item in dataset_a.X])


def transform_to_vector(t):
    return [1, 0] if t else [0, 1]


def get_func_value(A, B, C, x):
    return (A * x + B) / C
예제 #10
0
#!/usr/bin/env python
# coding: utf-8

# In[2]:

#PART1

import os
os.sys.path.append('..')
from handout import get_linear_seperatable_2d_2c_dataset
data_sample = get_linear_seperatable_2d_2c_dataset()
import numpy as np
import matplotlib.pyplot as plt
import time

#list square model


def accurate(x, y, w):
    count = 0
    tr = 0
    for i in range(len(y)):
        count += 1
        if ((w[0] * x[i][0] + w[1] * x[i][1] + w[2] >= 0.5) and y[i] == True):
            tr += 1
        elif ((w[0] * x[i][0] + w[1] * x[i][1] + w[2] < 0.5)
              and y[i] == False):
            tr += 1
    return str(tr / count)

예제 #11
0
import os
os.sys.path.append('..')

import handout
import matplotlib.pyplot as plt
import argparse
import numpy as np
import model

LEARNING_RATE = 0.02


if __name__ == '__main__':
    data_points = handout.get_linear_seperatable_2d_2c_dataset()
    target = [1 if i==True else -1 for i in data_points.y]
    new_input_data = np.array([np.append([1], i) for i in data_points.X])
    # build model
    perception_model = model.Perception_model(new_input_data, target, LEARNING_RATE)
    weight = perception_model.run()
    print(weight)
    graph = plt.subplot(1,1,1)
    perception_model.plot(graph)
    plt.legend(loc='best', prop={'size':14})
    plt.show()
         
    accuracy_val = (perception_model.accuracy_cal())
    print(accuracy_val)

예제 #12
0
 parser = argparse.ArgumentParser()
 parser.add_argument(
     'operation',
     type=str,
     choices=['lsm', 'perceptron', 'full_batch', 'stochastic', 'batched'],
     help='tell me what you want to do')
 parser.add_argument('--c',
                     '-c',
                     type=float,
                     help='para c in regularization')
 parser.add_argument('--learning_rate', '-lr', type=float)
 parser.add_argument('--batch_size', '-bs', type=int)
 parser.add_argument('--iterations', '-i', type=int)
 args = parser.parse_args()
 if args.operation == 'lsm':
     lsm(get_linear_seperatable_2d_2c_dataset())
 elif args.operation == 'perceptron':
     perceptron(get_linear_seperatable_2d_2c_dataset(), args.learning_rate,
                args.iterations)
 else:
     try:
         train_vec_x = np.loadtxt('train_vec_x.txt', dtype=np.bool)
         train_vec_y = np.loadtxt('train_vec_y.txt', dtype=np.bool)
         print('[NOTE]train data load complete!')
         test_vec_x = np.loadtxt('test_vec_x.txt', dtype=np.bool)
         test_vec_y = np.loadtxt('test_vec_y.txt', dtype=np.bool)
         print('[NOTE]test data load complete!')
     except OSError:
         train_data, test_data = get_text_classification_datasets()
         train_vec_x, test_vec_x = convert_data(train_data.data,
                                                test_data.data, 10)