Beispiel #1
0
def Linear_Regression(df, df1, df2, name):
    '''
        Input: dataframe, string, string, string
        output: plot, numpy array
        makes a linear regression model given the inputs and returns
        the coefficents founds
        '''
    X = df['user_rating_x'].values.reshape(-1, 1)
    y = df['user_rating_y'].values
    X_train, X_test, y_train, y_test = train_test_split(X,
                                                        y,
                                                        test_size=.33,
                                                        random_state=10)
    model = LinearRegression().fit(X_train, y_train)
    pred = model.predict(X_test)
    coeff = model.coef_
    print(LR.linearReg(X_test, y_test).rmsle(y_test, pred))
    plt.scatter(X_test * 5, y_test)
    plt.plot(X_test * 5, pred)
    plt.ylabel(df2)
    plt.xlabel(df1)
    plt.title('Linear Regression for {}'.format(name))
    plt.savefig('images/{}_linear_model'.format(name))
    plt.show()
    return coeff
Beispiel #2
0
def Lasso_model(df, df1, df2, name):
    '''
        Input: dataframe, string, string, string
        output: plot, numpy array, float
        Runs through multiple alphas through the k-folding and makes a lasso model
        baised on the lowest RMSLE value and makes a plot bassed on the data while
        returning the coeficents and lambda value used
        '''
    X = df['user_rating_x'].values.reshape(-1, 1)
    y = df['user_rating_y'].values
    X_train, X_test, y_train, y_test = train_test_split(X,
                                                        y,
                                                        test_size=.33,
                                                        random_state=10)
    LR_L = LR.linearReg(X_train, y_train)
    alpha = [.00001, .0001, .001, .01, .1, 1, 10, 100]
    lasso = LR_L.lasso(alpha, 5)
    index = lasso['CVtest_mean_RMSLE'].idxmin()
    a = lasso['lambda'][index]
    model_L = Lasso(alpha=a).fit(X_train, y_train)
    pred = model_L.predict(X_test)
    coeff = model_L.coef_
    print(LR_L.rmsle(y_test, pred))
    plt.plot(X_test * 5, pred)
    plt.scatter(X_test * 5, y_test)
    plt.ylabel(df2)
    plt.xlabel(df1)
    plt.title('Lasso Regression for {}'.format(name))
    plt.savefig('images/{}_lasso_model'.format(name))
    plt.show()
    return coeff, a
label_encoder = LabelEncoder()
X.preferred_foot = label_encoder.fit_transform(X.preferred_foot)

# Splitting the train and test data
from sklearn.model_selection import train_test_split
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.10,random_state=0)

# Find the most significant features using backward elemination 
print('-'*50)
import backwardEle
backwardEle.backwardEle(X_train, y_train)

print('-'*50)
# Applying Linear regression
import linearReg
linearReg_score = linearReg.linearReg(X_train,X_test,y_train,y_test)
print("Pridicting based on Simple Linear regression we get the score as : {:.5}%" \
                                      .format(linearReg_score*100))
print('-'*50)

# Applying the XGBoost
import xgb
reg_xg_score = xgb.xgb(X_train,X_test,y_train,y_test)
print("Pridicting after applying XGBoost we get the score as : {:.5}%" \
                                      .format(reg_xg_score*100))
print('-'*50)

# Decision Tree

# Fitting Decision Tree Regression to the Training set
import decisionTreeReg
Beispiel #4
0
#!/usr/bin/python
import sys
import numpy
import matplotlib
import matplotlib.pyplot as plt
from linearReg import linearReg

sys.path.append("../../common")
from class_vis import plot_regression  # noqa
from ages_net_worths import ageNetWorthData  # noqa

matplotlib.use('agg')

ages_train, ages_test, net_worths_train, net_worths_test = ageNetWorthData()
reg = linearReg(ages_train, net_worths_train)

plot_regression(reg, ages_train, ages_test, net_worths_train, net_worths_test)

print("Net worth prediction:", reg.predict([[29]])[0][0])
print("Slope", reg.coef_[0][0])
print("Intercept", reg.intercept_[0])

print("Test r-squared:", reg.score(ages_test, net_worths_test))
print("Train r-squared:", reg.score(ages_train, net_worths_train))