Ejemplo n.º 1
0
def plot_Decision_Boundary(X, Y, theta):
    plot_data(X[:, 1:3], Y)

    if X.shape[1] <= 3:
        plot_x = np.array([np.min(X[:, 1]) - 2, np.max(X[:, 1]) + 2])
        plot_y = (-1 / theta[2]) * (theta[1] * plot_x + theta[0])

        plt.plot(plot_x, plot_y)
        plt.legend(['Decision Boundary', 'Admitted', 'Not admitted'])
        plt.axis([30, 100, 30, 100])
    else:
        u = np.linspace(-1, 1.5, 50)
        v = np.linspace(-1, 1.5, 50)
        z = np.zeros((u.size, v.size))

        for i in range(0, u.size):
            for j in range(0, v.size):
                z[i, j] = np.dot(
                    map_feature(u[i], v[j], 6),
                    theta,
                )

        z = z.T  # 转置
        cs = plt.contour(u,
                         v,
                         z,
                         level=[0],
                         colors='b',
                         label='Decision Boundary')
        plt.legend(
            [cs.collections[0]],
            ['Decision Boundary'])  # [cs.collections[0]] 显示里面的分界线,与后面列表中的字符串对应
Ejemplo n.º 2
0
def plot_decesion_boundary(theta, x_data, y_data):
    # plotData focus : x is 100 * 3 matrix
    pos = np.where(y_data == 1)
    neg = np.where(y_data == 0)
    plt.plot(x_data[pos[0], 1], x_data[pos[0], 2], 'k+')
    plt.plot(x_data[neg[0], 1], x_data[neg[0], 2], 'ko', color="y")
    plt.xlabel("exam1 score")
    plt.ylabel("exam2 score")
    plt.legend(["Admitted", "Not admitted"], loc='upper right')

    # ignore the number of colum = 2
    # when the number of colum = 3
    if x_data.shape[1] == 3:
        # two points can define a line
        plot_x = np.zeros((2, ))
        plot_x[0] = np.min(x_data[:, 1])
        plot_x[1] = np.max(x_data[:, 2])
        # calculate plot_y
        plot_y = -(theta[1]*plot_x+theta[0])/theta[2]
        plt.plot(plot_x, plot_y)
        plt.show()
    elif x_data.shape[1] > 3:
        u = np.linspace(-1, 1.5, 50)
        v = np.linspace(-1, 1.5, 50)
        z = np.ones((u.shape[0], v.shape[0]))
        for i in range(u.shape[0]):
            for j in range(v.shape[0]):
                tempt = np.column_stack((np.ones((1, 1)), mapFeature.map_feature(np.array([u[i]]), np.array([v[j]]))))
                a = np.dot(tempt, theta)
                z[i, j] = a
        plt.contour(u, v, z, [0], colors='k')
        plt.show()
Ejemplo n.º 3
0
def plot_decision_boundary(theta, X, y):
    """
    to plot classifier's decision boundary
    """
    plot_data(X[:, 1:3], y)

    if X.shape[1] <= 3:
        # Only need two points to define a line, so choose two endpoints
        plot_x = np.array([np.min(X[:, 1])-2, np.max(X[:, 1])+2])

        # Calculate the decision boundary line
        plot_y = (-1/theta[2])*(theta[1]*plot_x+theta[0])
        plt.plot(plot_x, plot_y)
        plt.legend(['Decision Boundary', 'Admitted', 'Not admitted'], loc=1)
        plt.axis([30, 100, 30, 100])
    else:
        # Here is the grid range
        u = np.linspace(-1, 1.5, 50)
        v = np.linspace(-1, 1.5, 50)
        z = np.zeros((u.size, v.size))

        # Evaluate z = theta*x over the grid
        for i in range(0, u.size):
            for j in range(0, v.size):
                z[i, j] = np.dot(map_feature(u[i], v[j]), theta)
        z = z.T

        # Plot z = 0
        # Notice you need to specify the range [0, 0]
        cs = plt.contour(u, v, z, levels=[0], colors='r', label='Decision Boundary')
        plt.legend([cs.collections[0]], ['Decision Boundary'])
Ejemplo n.º 4
0
def plotDecisionBoundary(theta, X, y):

    if X.shape[0] == 100:
        plot_x = np.array([min(X[:, 1]) - 2, max(X[:, 2]) + 2])
        plot_y = (-1.0 / theta[2]) * (theta[1] * plot_x + theta[0])
        plot(plot_x, plot_y)
        #legend(['Decision Boundary', 'Not admitted', 'Admitted'])
        show()
    else:
        u = np.linspace(-1, 1.5, 50)
        v = np.linspace(-1, 1.5, 50)
        z = np.zeros(shape=(len(u), len(v)))
        for i in range(len(u)):
            for j in range(len(v)):
                z[i, j] = (mapFeature.map_feature(np.array(u[i]),
                                                  np.array(v[j])).dot(
                                                      np.array(theta)))

        z = z.T
        contour(u, v, z, 1, colors='green')
        title('lambda = 1.0')
        xlabel('Microchip Test 1')
        ylabel('Microchip Test 2')
        legend(['y = 1', 'y = 0', 'Decision boundary'])
        show()
Ejemplo n.º 5
0
def predit(X, y, theta):
    n = len(theta)
    pos = np.where(y[:, 0] == 1)
    neg = np.where(y[:, 0] == 0)
    plt.figure(figsize=(6, 6))
    pos_data = X[pos]
    x_pos_data = pos_data[:, 1]
    y_pos_data = pos_data[:, 2]
    plt.scatter(x_pos_data,
                y_pos_data,
                marker='x',
                linewidths=2,
                color='red',
                label="Admitted")

    neg_data = X[neg]
    x_neg_data = neg_data[:, 1]
    y_neg_data = neg_data[:, 2]
    plt.scatter(x_neg_data,
                y_neg_data,
                marker='*',
                linewidths=2,
                color='blue',
                label="Not admitted")
    if n <= 3:
        x_data = np.linspace(30, 100)
        y_data = np.linspace(30, 100)
        x_data, y_data = np.meshgrid(x_data, y_data)
        z = theta[0][0] + theta[1][0] * x_data + theta[2][0] * y_data  # 50*50
        print(z.shape)
        # h=1/1+np.exp(-z)
        # levels参数:确定轮廓线/区域的数量和位置。如果int Ñ,使用Ñ数据间隔; 即绘制n + 1个等高线。水平高度自动选择。
        # 如果是数组,则在指定的级别绘制轮廓线。值必须按递增顺序排列。
        # 分界线的理解:z>>0为1;z<<0,为0
        plt.contour(x_data, y_data, z, levels=[-1, 0, 1])
    else:
        x_data = np.linspace(-1, 1)
        y_data = np.linspace(-1, 1)
        z = np.zeros((len(x_data), len(y_data)))
        # z=theta[0][0]+theta[1][0]*x0+theta[2][0]*x1
        for i in range(len(x_data)):
            for j in range(len(y_data)):
                features = map_feature(
                    np.append(x_data[i].reshape(-1, 1),
                              y_data[j].reshape(-1, 1),
                              axis=1))
                z[i, j] = np.dot(features, theta)  # 50*50
        # x_data, y_data = np.meshgrid(x_data, y_data)
        plt.contour(x_data, y_data, z,
                    levels=[-1, 0,
                            1]).collections[0].set_label("Decision boundary")

    plt.xlabel('Exam1 Score')
    plt.ylabel('Exam2 Score')
    plt.show()
Ejemplo n.º 6
0
def plot_decision_boundary(theta, x, y):
    # Plot Data
    pos = np.where(y[:, 0] == 1.0)
    neg = np.where(y[:, 0] == 0.0)
    temp_x = x[:, [1, 2]]

    plt.ion()
    plt.figure()
    plt.scatter(temp_x[pos, 0], temp_x[pos, 1], marker="+", label="Admitted")
    plt.scatter(temp_x[neg, 0],
                temp_x[neg, 1],
                marker="o",
                label="Not admitted")

    m, n = x.shape
    theta = theta.reshape(theta.shape[0], 1)
    if n <= 3:
        # Only need 2 points to define a line, so choose two endpoints
        plot_x = np.array(([x[:, 1].min(), x[:, 1].max()])).reshape(2, 1)
        plot_y = ((-1 / theta[2]) * (theta[1] * plot_x + theta[0])).reshape(
            2, 1)
        # Plot, and adjust axes for better viewing
        plt.plot(plot_x[0], plot_y[0], 'rx', markersize=10)
        plt.plot(plot_x[1], plot_y[1], 'rx', markersize=10)
        plt.plot(plot_x, plot_y, '-', label="Linear regression")
        plt.axis([15, 120, 15, 120])

    else:
        u = np.linspace(-1, 1.5, 50)
        v = np.linspace(-1, 1.5, 50)
        z = np.zeros((len(u), len(v)))

        for i in range(len(u)):
            for j in range(len(v)):
                z[i,
                  j] = np.dot(map_feature(np.array([u[i]]), np.array([v[j]])),
                              theta)
        plt.contour(u, v, z.T,
                    [0]).collections[0].set_label("Decision boundary")
Ejemplo n.º 7
0
raw_input('Program paused. Press ENTER to continue')

# ===================== Part 1: Regularized Logistic Regression =====================
# In this part, you are given a dataset with data points that are not
# linearly separable. However, you would still like to use logistic
# regression to classify the data points.

# To do so, you introduce more feature to use -- in particular, you add
# polynomial features to our data matrix (similar to polynomial regression)
#

# Add polynomial features

# Note that mapFeature also adds a column of ones for us, so the intercept
# term is handled
X = mf.map_feature(X[:, 0], X[:, 1])

# Initialize fitting parameters
initial_theta = np.zeros(X.shape[1])

# Set regularization parameter lambda to 1
lmd = 1

# Compute and display initial cost and gradient for regularized logistic regression
cost, grad = cfr.cost_function_reg(initial_theta, X, y, lmd)

np.set_printoptions(formatter={'float': '{: 0.4f}\n'.format})
print('Cost at initial theta (zeros): {}'.format(cost))
print('Expected cost (approx): 0.693')
print(
    'Gradient at initial theta (zeros) - first five values only: \n{}'.format(
Ejemplo n.º 8
0
import sigmoid
import mapFeature
import multiprocessing
from multiprocessing import Pool, Value
import math
import pylab as pl
import time

#load the dataset
data = np.loadtxt('ex2data2.txt', delimiter=',')
X = data[:, 0:2]
#print (X.shape)
y = data[:, 2]
m, n = X.shape

new_data = mapFeature.map_feature(X[:, 0], X[:, 1])

#########data standarized#########
##################################
for i in range(2, new_data.shape[1]):
    m = np.mean(new_data[:, i])
    s = np.std(new_data[:, i])
    new_data[:, i] = (new_data[:, i] - m) / s
##################################
##################################

############parameters############
##################################
lamda = 0.1
learning_rate = 0.1
max_iters = 400
Ejemplo n.º 9
0
    return X, y


def normal_data(x):
    data = x.copy()
    mean = np.mean(data, axis=0)
    std = np.std(data, axis=0, ddof=1)

    for i in range(len(mean)):
        data[:, i] = (data[:, i] - mean[i]) / std[i]

    return data


if __name__ == '__main__':
    X, y = load_data()
    print(X[0], y[0])
    m = len(X)
    # plot_data(X,y)
    x = map_feature(X)
    X_input = np.append(np.ones((m, 1)), X, axis=1)
    n = len(x[0])
    theta = np.ones((n, 1)).reshape(-1, 1)
    # loss=cost_function_reg(x,y,theta,1)
    # print(loss)
    ret = my_fminunc(x, y, theta, 2)
    print(ret['success'])
    print(np.round(ret['x'], 2))
    theta = ret['x'].reshape(-1, 1)
    predit(x, y, theta)
Ejemplo n.º 10
0
# linearly separable. However, you would still like to use logistic
# regression to classify the data points.

# To do so, you introduce more feature to use -- in particular, you add
# polynomial features to our data matrix (similar to polynomial regression)
#

# Add polynomial features

# Note that mapFeature also adds a column of ones for us, so the intercept
# term is handled
cols = data.shape[1]
X = data.iloc[:, :cols - 1]
y = data.iloc[:, cols - 1:]

X = mf.map_feature(X)

# Initialize fitting parameters
initial_theta = np.zeros(X.shape[1])

# Set regularization parameter lambda to 1
lmd = 1

# Compute and display initial cost and gradient for regularized logistic regression
cost, grad = cfr.cost_function_reg(initial_theta, X, y, lmd)

print('Cost at initial theta (zeros): {}'.format(cost))
print('Expected cost (approx): 0.693')
print(
    'Gradient at initial theta (zeros) - first five values only: \n{}'.format(
        grad[0:5]))
Ejemplo n.º 11
0
line_list = file.readlines()
m = len(line_list)
n = len(line_list[0].split(",")) - 1  # ignore x0
x_data = np.zeros((m, n))
y_data = np.zeros((m, 1))
for i in range(m):
    line_tempt = line_list[i].split(",")
    x_data[i, :] = line_tempt[:2]
    y_data[i, :] = line_tempt[-1]
file.close()
# ================== plotData ===================
plotData_reg.plot_data(x_data, y_data)
# ================== plotData end ===============
# ================== regularized logistic regression ==============
# ================== map feature ================================
map_x = mapFeature.map_feature(x_data[:, 0], x_data[:, 1])
x_data = np.column_stack((np.ones((m, 1)), map_x))
# ================== costFunctionReg ==========================
# set regulation params
lambd = 10  # you can try to change the lambda(0, 1, 10 ,100 or others),when the lambda is 1,the result is best
theta = np.ones((x_data.shape[1]))
# ====== test cost and grad =========  lambda = 10
# cost = costFunctionReg.cost_function(theta, x_data, y_data, lambd)
# print("cost is {}".format(cost))
# print("expected cost is 3.16")
grad = costFunctionReg.gradient(theta, x_data, y_data, lambd)
print("grad is {}".format(grad[0:5]))
print("0.3460\n 0.1614\n 0.1948\n 0.2269\n 0.0922\n")
# ====== test end ====================================
result = op.minimize(costFunctionReg.cost_function,
                     theta,