-
Notifications
You must be signed in to change notification settings - Fork 0
/
LogReg1QuadFeats.py
112 lines (93 loc) · 3.6 KB
/
LogReg1QuadFeats.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
#Author: James Alford-Golojuch
#Used tutorial for logistic regression with gradient descent for help with basis of code
#http://aimotion.blogspot.com/2011/11/machine-learning-with-python-logistic.html
import numpy as np
import matplotlib.pyplot as plt
import scipy.optimize as opt
from scipy.special import expit
#For plotting logistic regression function
# define our hypothesis (vectorized!)
def f(x,thetas):
return expit(np.matrix(thetas)*x);
#For plotting logistic regression function
# basis expansion
expand = lambda x,y: np.vstack((
np.ones(x.size), # add the bias term
x.ravel(), # make the matrix into a vector
y.ravel(),
x.ravel() * y.ravel(),
x.ravel()**2,
y.ravel()**2))
#Sigmoid function implementation
def sigmoid(z):
return 1/(1 + (np.exp(-1 * z)))
#Cost function
def costfunc(theta,data,labels):
#Finds predicted probability of label 1
probOf1 = sigmoid(np.dot(data,theta))
#Finds log likelihood vector
logOf1 = (-1 * labels) * np.log(probOf1) - (1 - labels) * np.log(1 - probOf1)
return logOf1.mean()
#gradient descent
def gradDec(theta,data,labels):
#Finds predicted probability of label 1
probOf1 = sigmoid(np.dot(data, theta))
#Difference between label and prediction
error = probOf1 - labels
#Finds gradient vector
grad = np.dot(error, finalDataQuad) / labels.size
return grad
def predict(theta, data, labels):
correct = 0
total = 0
probOf1 = sigmoid(np.dot(data, theta))
for x in range(0,len(probOf1)):
if ((probOf1[x] > .5 and labels[x] == 1) or (probOf1[x] <= .5 and labels[x] == 0)) :
correct = correct + 1
total = total + 1
return correct / total
#Test and train data contains 2 columns
trainData1 = np.loadtxt('synthetic-6.csv', dtype='float', delimiter=',')
data = trainData1[:,0:2]
labels = trainData1[:,2]
thetaInit = 0.1* np.random.randn(6)
#Basis expansion to qaudratic features
#Append column of 1s
dataAdd1 = np.append( np.ones((data.shape[0], 1)), data, axis=1)
dataQuad = [(data[0,0] * data[0,1]),(data[0,0] * data[0,0]),(data[0,1] * data[0,1])]
for x in range(1,len(labels)):
temp = [(data[x,0] * data[x,1]),(data[x,0] * data[x,0]),(data[x,1] * data[x,1])]
dataQuad = np.vstack((dataQuad,temp))
finalDataQuad = np.append( dataAdd1,dataQuad, axis=1)
print(finalDataQuad)
#Find final theta values for logistic regression function
thetaFinal = opt.fmin_bfgs(costfunc, thetaInit, fprime=gradDec, args=(finalDataQuad, labels))
# create the domain for the plot
x_min =min(data[:,0]) - 1; x_max = max(data[:,0]) + 1
y_min =min(data[:,1]) - 1; y_max = max(data[:,1]) + 1
x1 = np.linspace(x_min, x_max, 200)
y1 = np.linspace(y_min, y_max , 200)
x,y = np.meshgrid(x1, y1)
# evalute it in a vectorized way (and reshape into a matrix)
data = expand(x,y)
z = f(data,thetaFinal)
z = z.reshape(x.shape)
#Prints scatter plot of data
plt.figure(1)
plt.title("Gradient Descent w/ Quadratic Features: Synthetic-6")
plt.scatter(trainData1[:,0], trainData1[:,1], c=trainData1[:,2], alpha=0.5)
# show the function value in the background
plt.set_cmap('prism')
cs = plt.imshow(z,
extent=(x_min,x_max,y_max,y_min), # define limits of grid, note reversed y axis
cmap=plt.cm.jet)
plt.clim(0,1) # defines the value to assign the min/max color
# draw the line on top
levels = np.array([.5])
cs_line = plt.contour(x,y,z,levels)
# add a color bar
CB = plt.colorbar(cs)
plt.show()
#Misclassification rate
p = predict(thetaFinal,finalDataQuad,labels)
print("The misclassification rate is ", 1-p, " %")