''' Created on May 10, 2017 @author: Varela ''' import pandas as pd import regress as rg from aux import get_dataset # import matplotlib.pyplot as plt df = pd.read_csv(get_dataset('1d.csv'), header=None) X = df.iloc[:, 0] Y = df.iloc[:, 1] W, r2 = rg.lin(X, Y) print W print r2 #plot the data to see what's look like # plt.scatter(X, Y) # plt.show()
# https://deeplearningcourses.com/c/data-science-linear-regression-in-python # need to sudo pip install xlrd to use pd.read_excel # data is from: # http://college.cengage.com/mathematics/brase/understandable_statistics/7e/students/datasets/mlr/frames/mlr02.html # The data (X1, X2, X3) are for each patient. # X1 = systolic blood pressure # X2 = age in years # X3 = weight in pounds import numpy as np # import matplotlib.pyplot as plt import pandas as pd from aux import get_dataset df = pd.read_excel(get_dataset('mlr02.xls')) X = df.as_matrix() # plt.scatter(X[:,1], X[:,0]) # plt.show() # plt.scatter(X[:,2], X[:,0]) # plt.show() df['ones'] = 1 Y = df['X1'] X = df[['X2','X3','ones']] X2only = df[['X2','ones']] X3only = df[['X3','ones']] def get_r2(X,Y):
import numpy as np import pandas as pd from aux import get_dataset from regress import lin # import re # import matplotlib.pyplot as plt df = pd.read_table(get_dataset('moore.csv'), header=None) # \[(.*)\] matches anything enclosed in brackets # [^\d]+ matches anything which is not a decimal Y = df.loc[:, 1].str.replace(r'\[(.*)\]', '') Y = Y.str.replace(r'[^\d]+', '').astype('int') X = df.loc[:, 2].str.replace(r'\[(.*)\]', '') X = X.replace(r'[^\d]+', '').astype('int') # plt.scatter(X, Y) # plt.show() # # Y = np.log(Y) # plt.scatter(X, Y) # plt.show() Y = np.log(Y) W, r2 = lin(X, Y) print W print r2 #LAZY PROGRAMMER SOLUTION # import re