Exemple #1
0
home = pd.DataFrame(le.transform(X[['HOME']]))
away = pd.DataFrame(le.transform(X[['AWAY']]))

X = pd.concat([home, away, X], axis=1)

X = X.drop(['HOME', 'AWAY', 'DATE_VALUE'], axis=1)

X_train = X.iloc[298:591]
X_test = X.iloc[259:297]

y_train = y.iloc[298:591]
y_test = y.iloc[259:297]


#print(X_train)
#print(y_train)

regressor = MLPRegressor(hidden_layer_sizes=(10, 5), solver='sgd', max_iter=2000)

regressor.fit(X_train, y_train.squeeze().tolist())

print(regressor.score(X_train, y_train.squeeze().tolist()))
print(regressor.score(X_test, y_test.squeeze().tolist()))

print(regressor.get_params())
y_predict = regressor.predict(X_test)

plt.plot(y_test.squeeze().tolist(), y_predict, 'o');
plt.show()
Exemple #2
0
 def feature_regression(
         self,
         data = None,
         features = None,
         targets = None,
         N_train = 10000,
         N_test  =  5000,
         validation_fraction = 0.2,
         solver = 'sgd',
         max_iter = 150,
         #comm = MPI.COMM_WORLD, # just when doing some MPI parallelization
         ):
     '''
         Feature Regression
     
     This function just does a single layer regression of the data and tries
     to fit linear model (mainly Perceptron) to this data and which shall 
     then infer given output values of a given validation fraction from the 
     data.
     
     Here, we consider a given number of input features 'features' and try 
     to fit a linear regressor into a given number of output variables
     'targets'. Thus gradient descent is used. 
     
     Parameters:
         data        :   a dictionary containing formatted data of features
                         and variables to be considered
         targets   :   names of the variables to be dependenton the input 
                         features
         fae_names   :   names of the input features to be considered for
                         the regression model.
             Model parameters
         N_train     :   Number of training samples
         N_test      :   Number of test samples
         validation_fraction
                     :   fraction of the data always been used for 
                         validation of the moel to avoid over-fitting
         solver      :   which numerical solver will be used: 'sgd', 'lbfs'
         max_iter    :   maximum iterations
                         
         Here, Scikit-Learn's "MLP_Regressor" is being used for simplicity.
         If any other hidden layer sizes are defined in the additional 
         parameters, they are not considered here! The hidden layer size is 
         always empty. 
         If you need to use hidden layers, you need to use a (deep) learning
         library / module instead!
                         
         NOTE: This is a simple single layer linear 
     '''
     
     if type(data).__name__ == 'NoneType':
         data = self.data.copy()
         if type(data).__name__ == 'NoneType':
             print('Error: No data found!')
     
     from sklearn.neural_network.multilayer_perceptron import MLPRegressor
     
     Ntr,Nte = N_train,N_test
     
     mlp = MLPRegressor(
             hidden_layer_sizes = [], 
             validation_fraction = validation_fraction,
             solver = solver,
             max_iter = max_iter,
             )
     
     Y = dict2arr(data,targets)
     X = dict2arr(data,features)
     
     X_tr = X[:Ntr]
     Y_tr = Y[:Ntr]
     X_te = X[Ntr:Ntr+Nte]
     Y_te = Y[Ntr:Ntr+Nte]
     
     print('Training MLP regressor. . .')
     
     mlp.fit(X_tr,Y_tr)
     tr_scr = mlp.score(X_tr,Y_tr)
     te_scr = mlp.score(X_te,Y_te)
     
     print('done')
     print()
     print('Training score: %0.2f' %tr_scr)
     print('Tesr score: %0.2f' %te_scr)
     
     try: 
         tr_loss = mlp.loss_curve_ 
     except: 
         tr_loss = mlp._loss_grad_lbfgs
     
     
     
     plt.figure()
     plt.plot(tr_loss)
     plt.xlabel('Interations')
     plt.ylabel('Loss')
     plt.title('Gradient Dscent')
     
     self.mlp_reg = mlp