Beispiel #1
0
def fit_and_display(X,Y, sample, deg):
    N = len(X)
    trainidx = np.random.choice(N, sample)
    Xtrain = X[trainidx]
    Ytrain = Y[trainidx]
    
    plt.scatter(Xtrain, Ytrain)
    plt.show()
    #fit polynomial
#     Xtrain_poly = ax.polyfy(Xtrain, deg)
    X_poly = ax.polyfy(X, deg)
    Xtrain_poly = X_poly[trainidx]
#     w = fit(Xtrain_poly, Ytrain)
    
    
    #Compute redisiduals     
    w, resid0 = lin(Xtrain_poly, Ytrain)    
    resid1    = r2(w, X_poly, Y)
    print ("degree %d .: in-sample %.4f out sample %.4f" % (deg ,resid0, resid1))
    
    #display the polynomial
#     X_poly = ax.polyfy(X, deg)
    Y_hat  = X_poly.dot(w)
    
    plt.plot(X,Y) 
    plt.plot(X,Y_hat)
    plt.scatter(Xtrain, Ytrain)
    plt.title("deg= %d" % deg)
    plt.show()
    def testLinearRegression1D(self):

        df = pd.read_csv('../datasets/1d.csv', header=None)
        X = df.loc[:, 0]
        Y = df.loc[:, 1]

        W, rsq = lin(X, Y)

        self.assertAlmostEqual(W[0], 2.8644240756607786, None,
                               "W[0]*X + W[1] .: W[0] should ~ 2.8644", 0.0001)
        self.assertAlmostEqual(W[1], 1.972612167484588, None,
                               "W[0]*X + W[1] .: W[1] should ~ 1.9726", 0.0001)
        self.assertAlmostEqual(rsq, 0.99118382029778052, None, "r2 ~ 0.99118",
                               0.0001)
    def testLinearRegression2D(self):

        df = pd.read_csv('../datasets/2d.csv', header=None)
        X = df.loc[:, [0, 1]]
        Y = df.loc[:, 2]
        W, rsq = lin(X, Y, fill=True)

        self.assertAlmostEqual(W[0], 2.01666793, None,
                               "W[0] .: expected ~2.0166\ngot " + str(W[0]),
                               0.0001)
        self.assertAlmostEqual(W[1], 2.96985048, None,
                               "W[1] .: expected ~2.96985\ngot " + str(W[1]),
                               0.0001)
        self.assertAlmostEqual(W[2], 1.46191241, None,
                               "W[2] .: expected ~1.46191\ngot " + str(W[2]),
                               0.0001)
        self.assertAlmostEqual(rsq, 0.99800406124757779, None,
                               "r2 .:   ~0.99800\ngot " + str(rsq), 0.0001)
if __name__ == '__main__':
    pass

N = 50
X = np.linspace(0, 10, N)
Y = 0.5 * X + np.random.randn(N)
Y[-1] += 30
Y[-2] += 30

plt.scatter(X, Y)
plt.show()
#Why??
X = np.vstack([np.ones(N), X]).T
# XX = np.concatenate((np.ones(N),np.array([N])))
wml, residml = lin(X, Y)
Yhat_ml = X.dot(wml)

print wml
print residml

l2 = 1000
wmap, residmap = linl2(X, Y, l2)
Yhat_map = X.dot(wmap)

plt.scatter(X[:, 1], Y)
plt.plot(X[:, 1], Yhat_ml, label=("ml r2=%.4f" % (residml)))
plt.plot(X[:, 1], Yhat_map, label=("map r2=%.4f" % (residmap)))
plt.legend()
plt.show()
'''
Created on May 10, 2017

@author: Varela
'''

import pandas as pd
import regress as rg
from aux import get_dataset
# import matplotlib.pyplot as plt

df = pd.read_csv(get_dataset('1d.csv'), header=None)
X = df.iloc[:, 0]
Y = df.iloc[:, 1]

W, r2 = rg.lin(X, Y)

print W
print r2

#plot the data to see what's look like
# plt.scatter(X, Y)
# plt.show()
Beispiel #6
0
# \[(.*)\] matches anything enclosed in brackets
#  [^\d]+ matches anything which is not a decimal
Y = df.loc[:, 1].str.replace(r'\[(.*)\]', '')
Y = Y.str.replace(r'[^\d]+', '').astype('int')
X = df.loc[:, 2].str.replace(r'\[(.*)\]', '')
X = X.replace(r'[^\d]+', '').astype('int')

# plt.scatter(X, Y)
# plt.show()
#
# Y = np.log(Y)
# plt.scatter(X, Y)
# plt.show()

Y = np.log(Y)
W, r2 = lin(X, Y)
print W
print r2

#LAZY PROGRAMMER SOLUTION
# import re
# import numpy as np
# import matplotlib.pyplot as plt
#
# X = []
# Y = []
#
# non_decimal = re.compile(r'[^\d]+')
#
# for line in open('../src/datasets/moore.csv'):
#     r = line.split('\t')