home = pd.DataFrame(le.transform(X[['HOME']])) away = pd.DataFrame(le.transform(X[['AWAY']])) X = pd.concat([home, away, X], axis=1) X = X.drop(['HOME', 'AWAY', 'DATE_VALUE'], axis=1) X_train = X.iloc[298:591] X_test = X.iloc[259:297] y_train = y.iloc[298:591] y_test = y.iloc[259:297] #print(X_train) #print(y_train) regressor = MLPRegressor(hidden_layer_sizes=(10, 5), solver='sgd', max_iter=2000) regressor.fit(X_train, y_train.squeeze().tolist()) print(regressor.score(X_train, y_train.squeeze().tolist())) print(regressor.score(X_test, y_test.squeeze().tolist())) print(regressor.get_params()) y_predict = regressor.predict(X_test) plt.plot(y_test.squeeze().tolist(), y_predict, 'o'); plt.show()
def feature_regression( self, data = None, features = None, targets = None, N_train = 10000, N_test = 5000, validation_fraction = 0.2, solver = 'sgd', max_iter = 150, #comm = MPI.COMM_WORLD, # just when doing some MPI parallelization ): ''' Feature Regression This function just does a single layer regression of the data and tries to fit linear model (mainly Perceptron) to this data and which shall then infer given output values of a given validation fraction from the data. Here, we consider a given number of input features 'features' and try to fit a linear regressor into a given number of output variables 'targets'. Thus gradient descent is used. Parameters: data : a dictionary containing formatted data of features and variables to be considered targets : names of the variables to be dependenton the input features fae_names : names of the input features to be considered for the regression model. Model parameters N_train : Number of training samples N_test : Number of test samples validation_fraction : fraction of the data always been used for validation of the moel to avoid over-fitting solver : which numerical solver will be used: 'sgd', 'lbfs' max_iter : maximum iterations Here, Scikit-Learn's "MLP_Regressor" is being used for simplicity. If any other hidden layer sizes are defined in the additional parameters, they are not considered here! The hidden layer size is always empty. If you need to use hidden layers, you need to use a (deep) learning library / module instead! NOTE: This is a simple single layer linear ''' if type(data).__name__ == 'NoneType': data = self.data.copy() if type(data).__name__ == 'NoneType': print('Error: No data found!') from sklearn.neural_network.multilayer_perceptron import MLPRegressor Ntr,Nte = N_train,N_test mlp = MLPRegressor( hidden_layer_sizes = [], validation_fraction = validation_fraction, solver = solver, max_iter = max_iter, ) Y = dict2arr(data,targets) X = dict2arr(data,features) X_tr = X[:Ntr] Y_tr = Y[:Ntr] X_te = X[Ntr:Ntr+Nte] Y_te = Y[Ntr:Ntr+Nte] print('Training MLP regressor. . .') mlp.fit(X_tr,Y_tr) tr_scr = mlp.score(X_tr,Y_tr) te_scr = mlp.score(X_te,Y_te) print('done') print() print('Training score: %0.2f' %tr_scr) print('Tesr score: %0.2f' %te_scr) try: tr_loss = mlp.loss_curve_ except: tr_loss = mlp._loss_grad_lbfgs plt.figure() plt.plot(tr_loss) plt.xlabel('Interations') plt.ylabel('Loss') plt.title('Gradient Dscent') self.mlp_reg = mlp