/
candyRegression.py
47 lines (37 loc) · 1.74 KB
/
candyRegression.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
import sys
from processData import processData
from logisticRegression import logisticRegression
from logHypo import logHypo
from accuracy import accuracy
def main(args):
'Tests accuracy of logistic regression in predicting if candies are chocolate'
alpha, regParam, iterations = _getParams(args)
#Regression on full data set
fullDesign, fullLabels = processData('data/candy-data.csv')
fullTheta, fullAccuracy = _runLogRegression(fullDesign, fullLabels, alpha, regParam, iterations)
#Regression on first half of data set
halfDesign, halfLabels = processData('data/firstHalf-candy-data.csv')
halfTheta, halfAccuracy = _runLogRegression(halfDesign, halfLabels, alpha, regParam, iterations)
#Test accuracy of params trained on first half on second half
secondHalfDesign, secondHalfLabels = processData('data/secondHalf-candy-data.csv')
secondHalfAccuracy = accuracy(logHypo(halfTheta,secondHalfDesign), secondHalfLabels)
_displayResults(fullAccuracy, halfAccuracy, secondHalfAccuracy)
def _getParams(args):
if (len(args) < 4):
alpha = .001
regParam = 0
iterations = 1000
else:
alpha, regParam, iterations = map(float, sys.argv[1:])
return alpha, regParam, iterations
def _displayResults(fullAccuracy, halfAccuracy, secondHalfAccuracy):
print('-----Accuracy-----')
print('Full data: ', fullAccuracy)
print('Fist half: ', halfAccuracy)
print('Second half (trained on first half):', secondHalfAccuracy)
def _runLogRegression(designM, labels, alpha, regParam, iterations):
theta = logisticRegression(designM, labels, alpha, regParam, iterations)
a = accuracy(logHypo(theta, designM), labels)
return theta, a
if __name__ == '__main__':
main(sys.argv)