from sklearn import linear_model from sklearn import cross_validation import Plots data = pd.read_csv('housing_data.csv') feature_cols = ['CRIM', 'ZN', 'INDUS', 'CHAS', 'NOX', 'RM', 'AGE', 'DIS', 'RAD', 'TAX', 'PTRATIO', 'B', 'LSTAT'] X = data[feature_cols] y = data.MEDV X_train, X_test, y_train, y_test = cross_validation.train_test_split(X, y, test_size=0.3, random_state=3) lm = LinearRegression() predicted=Functions.callClassifierFeatures(lm, X_train, y_train, X_test, y_test, feature_cols, 'Linear Regression') # Plotting Plots.scatterPlot(predicted, y_test, 'Fitted', 'Actual', 'Fitted VS Actual LR', 'green', 'HousingLRScatterPlot') Plots.residualPlot(predicted, (predicted - y_test), 'Fitted', 'Residual', 'Fitted VS Residual LR', 'blue', 'HousingLRResidualPlot') # LR - cross val predicted=Functions.callCrossVal(lm, X, y, 10, 'Linear Regression') Plots.scatterPlot(predicted, y, 'Fitted', 'Actual', 'Fitted VS Actual LR-CV', 'green', 'HousingLRScatterPlotCV') Plots.residualPlot(predicted, (predicted - y), 'Fitted', 'Residual', 'Fitted VS Residual LR-CV', 'blue', 'HousingLRResidualPlotCV') # Polynomial Regression Functions.polynomialRegression(lm, X_train, y_train, X_test, y_test, 6,'Linear Regression') Functions.polynomialRegressionCV(lm, X, y, 10, 6, 'Linear Regression') # Ridge ridge = linear_model.RidgeCV(alphas=[0.1, 0.01, 0.001]) Functions.callClassifierFeatures(ridge, X_train, y_train, X_test, y_test,feature_cols, 'Ridge') print("The tuned alpha value selected for Ridge is: %.4f" %ridge.alpha_) Functions.callCrossVal(ridge, X, y, 10, 'Ridge') # Lasso lasso = linear_model.LassoCV(alphas=[0.1, 0.01, 0.001]) Functions.callClassifierFeatures(lasso, X_train, y_train, X_test, y_test,feature_cols, 'Lasso') print("The tuned alpha value selected for Lasso is: %.4f" %lasso.alpha_) Functions.callCrossVal(lasso, X, y, 10, 'Lasso')
y_train_nn = y_train.copy().reshape(-1, 1) ds.setField( 'target', y_train_nn ) ds_test = SupervisedDataSet(45, 1) ds_test.setField( 'input', X_test) y_test_nn = y_test.copy().reshape( -1, 1 ) ds_test.setField( 'target', y_test_nn ) for hidden in range(1,5): for epoch in range(10,40,10): hidden_size = hidden net = buildNetwork(45, hidden_size, 1, bias = True) trainer = BackpropTrainer( net, ds ) trainer.trainUntilConvergence(maxEpochs = epoch) p = net.activateOnDataset( ds_test ) print('Neural Network - Hidden size: %d Epchs: %d RMSE: %.4f' % (hidden, epoch, np.sqrt(np.sum((p - y_test_nn) ** 2)/y_test.size))) for num in range(0,5): data_workflow = one_hot_data[one_hot_data['Work-Flow-ID=work_flow_'+str(num)] == 1] X = data_workflow[feature_cols] y = data_workflow['Size of Backup (GB)'] Functions.fitWorkFlow(LinearRegression(), X, y, num) #Polynomial Regression Functions.polynomialRegression(LinearRegression(), X_train, y_train, X_test, y_test, 3,'NetworkBackupPoly') Functions.polynomialRegressionCV(LinearRegression(), X, y, 10, 3, 'NetworkBackupPolyCV')