'''
Created on May 10, 2017

@author: Varela
'''

import pandas as pd
import regress as rg
from aux import get_dataset
# import matplotlib.pyplot as plt

df = pd.read_csv(get_dataset('1d.csv'), header=None)
X = df.iloc[:, 0]
Y = df.iloc[:, 1]

W, r2 = rg.lin(X, Y)

print W
print r2

#plot the data to see what's look like
# plt.scatter(X, Y)
# plt.show()
#  https://deeplearningcourses.com/c/data-science-linear-regression-in-python
# need to sudo pip install xlrd to use pd.read_excel
# data is from:
# http://college.cengage.com/mathematics/brase/understandable_statistics/7e/students/datasets/mlr/frames/mlr02.html

# The data (X1, X2, X3) are for each patient.
# X1 = systolic blood pressure
# X2 = age in years
# X3 = weight in pounds

import numpy as np 
# import matplotlib.pyplot as plt
import pandas as pd
from aux import get_dataset
df = pd.read_excel(get_dataset('mlr02.xls'))
X = df.as_matrix()

# plt.scatter(X[:,1], X[:,0])
# plt.show()

# plt.scatter(X[:,2], X[:,0])
# plt.show()

df['ones'] = 1
Y = df['X1']
X = df[['X2','X3','ones']]
X2only = df[['X2','ones']]
X3only = df[['X3','ones']]

def get_r2(X,Y): 
Beispiel #3
0
import numpy as np
import pandas as pd
from aux import get_dataset
from regress import lin

# import re
# import matplotlib.pyplot as plt

df = pd.read_table(get_dataset('moore.csv'), header=None)

# \[(.*)\] matches anything enclosed in brackets
#  [^\d]+ matches anything which is not a decimal
Y = df.loc[:, 1].str.replace(r'\[(.*)\]', '')
Y = Y.str.replace(r'[^\d]+', '').astype('int')
X = df.loc[:, 2].str.replace(r'\[(.*)\]', '')
X = X.replace(r'[^\d]+', '').astype('int')

# plt.scatter(X, Y)
# plt.show()
#
# Y = np.log(Y)
# plt.scatter(X, Y)
# plt.show()

Y = np.log(Y)
W, r2 = lin(X, Y)
print W
print r2

#LAZY PROGRAMMER SOLUTION
# import re