예제 #1
0
from sklearn import linear_model
from sklearn import cross_validation
import Plots

data = pd.read_csv('housing_data.csv')
feature_cols = ['CRIM', 'ZN', 'INDUS', 'CHAS', 'NOX', 'RM', 'AGE', 'DIS', 'RAD', 'TAX', 'PTRATIO', 'B', 'LSTAT']
X = data[feature_cols]
y = data.MEDV
X_train, X_test, y_train, y_test = cross_validation.train_test_split(X, y, test_size=0.3, random_state=3)
lm = LinearRegression()
predicted=Functions.callClassifierFeatures(lm, X_train, y_train, X_test, y_test, feature_cols, 'Linear Regression')
# Plotting
Plots.scatterPlot(predicted, y_test, 'Fitted', 'Actual', 'Fitted VS Actual LR', 'green', 'HousingLRScatterPlot')
Plots.residualPlot(predicted, (predicted - y_test), 'Fitted', 'Residual', 'Fitted VS Residual LR', 'blue', 'HousingLRResidualPlot')
# LR - cross val
predicted=Functions.callCrossVal(lm, X, y, 10, 'Linear Regression')
Plots.scatterPlot(predicted, y, 'Fitted', 'Actual', 'Fitted VS Actual LR-CV', 'green', 'HousingLRScatterPlotCV')
Plots.residualPlot(predicted, (predicted - y), 'Fitted', 'Residual', 'Fitted VS Residual LR-CV', 'blue', 'HousingLRResidualPlotCV')
# Polynomial Regression
Functions.polynomialRegression(lm, X_train, y_train, X_test, y_test, 6,'Linear Regression')
Functions.polynomialRegressionCV(lm, X, y, 10, 6, 'Linear Regression')
# Ridge
ridge = linear_model.RidgeCV(alphas=[0.1, 0.01, 0.001])
Functions.callClassifierFeatures(ridge, X_train, y_train, X_test, y_test,feature_cols, 'Ridge')
print("The tuned alpha value selected for Ridge is: %.4f" %ridge.alpha_)
Functions.callCrossVal(ridge, X, y, 10, 'Ridge')
# Lasso
lasso = linear_model.LassoCV(alphas=[0.1, 0.01, 0.001])
Functions.callClassifierFeatures(lasso, X_train, y_train, X_test, y_test,feature_cols, 'Lasso')
print("The tuned alpha value selected for Lasso is: %.4f" %lasso.alpha_)
Functions.callCrossVal(lasso, X, y, 10, 'Lasso')
예제 #2
0
y_train_nn = y_train.copy().reshape(-1, 1)
ds.setField( 'target', y_train_nn )

ds_test = SupervisedDataSet(45, 1)
ds_test.setField( 'input', X_test)
y_test_nn = y_test.copy().reshape( -1, 1 )
ds_test.setField( 'target', y_test_nn )

for hidden in range(1,5):
    for epoch in range(10,40,10):
        hidden_size = hidden

        net = buildNetwork(45, hidden_size, 1, bias = True)
        trainer = BackpropTrainer( net, ds )

        trainer.trainUntilConvergence(maxEpochs = epoch)

        p = net.activateOnDataset( ds_test )
        print('Neural Network - Hidden size: %d Epchs: %d RMSE: %.4f' % (hidden, epoch, np.sqrt(np.sum((p - y_test_nn) ** 2)/y_test.size)))


for num in range(0,5):
    data_workflow =  one_hot_data[one_hot_data['Work-Flow-ID=work_flow_'+str(num)] == 1]
    X = data_workflow[feature_cols]
    y = data_workflow['Size of Backup (GB)']
    Functions.fitWorkFlow(LinearRegression(), X, y, num)

#Polynomial Regression
Functions.polynomialRegression(LinearRegression(), X_train, y_train, X_test, y_test, 3,'NetworkBackupPoly')

Functions.polynomialRegressionCV(LinearRegression(), X, y, 10, 3, 'NetworkBackupPolyCV')