Beispiel #1
0
def get_trained_coefficients():
    """
    Create and train a model based on the training_data_file data.

    Return the model, and the list of coefficients for the 'columns' variables in the regression.
    """
    _, X_train, y_train = get_data(training_data_file)

    # TODO: create regression model and train.
    model = lin(fit_intercept=False)
    model = model.fit(X_train, y_train)
    coefficients = model.coef_
    return model, coefficients
Beispiel #2
0
#record column names
scaled_df.columns =['id','bedrooms','bathrooms','sqft_living','sqft_lot', 'floors','waterfront'
                    ,'district','condition','yr_built','yr_renovated','zipcode','price']






#x = scaled_df's relevant columns(except price and abs(value)<0.1)
x=np.array(pd.DataFrame(scaled_df,columns=cov.index.tolist()))
#y = df's price(which has not scaled)
y=np.array(pd.DataFrame(df,columns=['price']))
#linear regression model valdiation
reg = lin(fit_intercept=True, normalize=False, n_jobs=None)
reg.fit(x, y)

#print accuracy of linear regression model
accuracy = cross_val_score(reg, x, y, cv = 5)
acc = round(sum(accuracy)/5, 2)
print ("Linear Regression test file accuracy:"+str(acc))

#compare predicted price and real price
y_pred = reg.predict(x)
plt.scatter(y,y_pred)
#plot x = y 
plt.plot([0,5000000],[0,5000000],color='black',lw=2,linestyle='solid')
plt.xlabel("real price($100,000)")
plt.ylabel("predicted price($100,000)")
plt.show()
Beispiel #3
0
    plt.title("Cost V/S Iterations")
    plt.show()


    print("The optimum parameters for the given data set is: \n", new_params)
# Printing Error values of my model...

    print("\n\nError values: (Lower is better) : \nMY PREDICTOR:")
    predictions = test_features.dot(new_params)
    print("The root mean square error is: ", root_mean_sq_err(test_targets, predictions))
    print("The r^2 score is: ", r2_score(test_targets, predictions), "\n\n")


    # Building a linear regressor from Sklearn for comparision...

    reg = lin()
    reg.fit(train_features, train_targets)
    skpred = reg.predict(test_features)

    # Printing Sklearn's error values...

    print("SKLEARN :")
    print("The root mean square error of sklearn is: ", root_mean_sq_err(test_targets, skpred))
    print("The r^2 score of sklearn is: ", r2_score(test_targets, skpred), "\n\n")

    errRate=0.3
    print(f"The accuracy for the model is {simpleAccuracy(test_targets,predictions,errRate)}")

    # Printing first 10 predictions of my model and sklearn side by side to actual values...
    #print("ACTUAL \t\t\t\t MY MODEL \t\t\t\t\t SKLEARN\n")
    #for i in range(10):
Beispiel #4
0
df_f = df_f.join(two_df,rsuffix = 'city_')

df_f['State'] = le.fit_transform(df_f['State'])
three_df = pd.DataFrame(one.fit_transform(df_f[['State']]).toarray())
df_f = df_f.join(three_df,rsuffix = '_state')

df_f = df_f.drop(['Raw Labor Classification','State','city'], axis = 1)

df_f = df_f.dropna()

y = df_f.iloc[:,0:1].values
x= df_f.iloc[:,1:].values    
x_train,x_test,y_train,y_test = train_test_split(x, y, test_size=0.33, random_state=42)

# Linear Regression ( Very low Accuracy )
regressor = lin()
regressor.fit(x_train, y_train)
regressor.score(x_test,y_test)
cv_score = cross_val_score(regressor,x_train,y_train,cv = 10)
cv_score.mean()
y_pred = regressor.predict(x_test)
forecast_accuracy(y_pred,y_test)  

#Random Forest Regressor (good cv, good mape)
reg =RR(n_estimators = 100)
reg.fit(x_train, y_train)
cv_score = cross_val_score(reg,x_train,y_train,cv = 10)
cv_score.mean()
y_pred = reg.predict(x_test)
reg.score(x_test,y_test)
forecast_accuracy(y_pred,y_test)