/
Grad_Decent.py
90 lines (48 loc) · 2.09 KB
/
Grad_Decent.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
# -*- coding: utf-8 -*-
"""
Created on Thu Jul 02 08:43:09 2015
@author: jpaukov
"""
import numpy as np
import pandas
from sklearn.linear_model import SGDRegressor
def normalize_features(features):
means = np.mean(features, axis=0)
std_devs = np.std(features, axis=0)
normalized_features = (features - means) / std_devs
return means, std_devs, normalized_features
def recover_params(means, std_devs, norm_intercept, norm_params):
intercept = norm_intercept - np.sum(means * norm_params / std_devs)
params = norm_params / std_devs
return intercept, params
def linear_regression(features, values):
model = SGDRegressor(n_iter=1000)
results = model.fit(features, values)
intercept = results.intercept_
params = results.coef_
return intercept, params
def Rsquar(values, predictions):
depMean = np.mean(values)
denom = np.sum((values - depMean) ** 2)
nomnom = np.sum((values - predictions) ** 2)
r_squared = 1 - (nomnom / denom)
return r_squared
""" Ends helper funtions """
""" Ends helper funtions """
""" Ends helper funtions """
""" Ends helper funtions """
dataframe = pandas.read_csv('C:\Users\jpaukov\Documents\Udacity\Project1\Udacity_Project_1_2429\Turnstile_weather_v2.csv')
features = dataframe[['rain', 'hour', 'weekday']]
dummy_units = pandas.get_dummies(dataframe['UNIT'], prefix='unit')
features = features.join(dummy_units)
values = dataframe['ENTRIESn_hourly']
features_array = features.values
values_array = values.values
means, std_devs, normalized_features_array = normalize_features(features_array)
norm_intercept, norm_params = linear_regression(normalized_features_array, values_array)
intercept, params = recover_params(means, std_devs, norm_intercept, norm_params)
predictions = intercept + np.dot(features_array, params)
r_squared = Rsquar(values, predictions)
print "R^2 - ", r_squared
print "Intercept - ", intercept
print "Params - ", params[0:4]