예제 #1
0
def mlp_bench(x_train, y_train, x_test, fh):
    """
    Forecasts using a simple MLP which 6 nodes in the hidden layer

    :param x_train: train input data
    :param y_train: target values for training
    :param x_test: test data
    :param fh: forecasting horizon
    :return:
    """
    y_hat_test = []

    model = MLPRegressor(hidden_layer_sizes=6, activation='identity', solver='adam',
                         max_iter=100, learning_rate='adaptive', learning_rate_init=0.001,
                         random_state=42)
    model.fit(x_train, y_train)

    last_prediction = model.predict(x_test)[0]
    for i in range(0, fh):
        y_hat_test.append(last_prediction)
        x_test[0] = np.roll(x_test[0], -1)
        x_test[0, (len(x_test[0]) - 1)] = last_prediction
        last_prediction = model.predict(x_test)[0]

    return np.asarray(y_hat_test)
예제 #2
0
def construct_train(train_length, **kwargs):
    """
    Train and test model with given input
    window and number of neurons in layer
    """
    start_cur_postion = 0
    steps, steplen = observations.size/(2 * train_length), train_length

    if 'hidden_layer' in kwargs:
        network = MLPRegressor(hidden_layer_sizes=kwargs['hidden_layer'])
    else:
        network = MLPRegressor()

    quality = []

    # fit model - configure parameters
    network.fit(observations[start_cur_postion:train_length][:, 1].reshape(1, train_length),
                observations[:, 1][start_cur_postion:train_length].reshape(1, train_length))

    parts = []

    # calculate predicted values
    # for each step add all predicted values to a list
    # TODO: add some parallelism here
    for i in xrange(0, steps):
        parts.append(network.predict(observations[start_cur_postion:train_length][:, 1]))
        start_cur_postion += steplen
        train_length += steplen

    # estimate model quality using 
    result = np.array(parts).flatten().tolist()
    for valnum, value in enumerate(result):
        quality.append((value - observations[valnum][1])**2)

    return sum(quality)/len(quality)
예제 #3
0
def test_partial_fit_regression():
    # Test partial_fit on regression.
    # `partial_fit` should yield the same results as 'fit' for regression.
    X = Xboston
    y = yboston

    for momentum in [0, .9]:
        mlp = MLPRegressor(solver='sgd', max_iter=100, activation='relu',
                           random_state=1, learning_rate_init=0.01,
                           batch_size=X.shape[0], momentum=momentum)
        with warnings.catch_warnings(record=True):
            # catch convergence warning
            mlp.fit(X, y)
        pred1 = mlp.predict(X)
        mlp = MLPRegressor(solver='sgd', activation='relu',
                           learning_rate_init=0.01, random_state=1,
                           batch_size=X.shape[0], momentum=momentum)
        for i in range(100):
            mlp.partial_fit(X, y)

        pred2 = mlp.predict(X)
        assert_almost_equal(pred1, pred2, decimal=2)
        score = mlp.score(X, y)
        assert_greater(score, 0.75)
예제 #4
0
class Ann:

    def __init__(self):

        self._nn = MLPRegressor(hidden_layer_sizes=(10,), verbose=False, warm_start=True)
        self._entradas_entrenamiento = []
        self._salidas_esperadas_entrenamiento = []
        self.lambdaCoefficient = 0.9

    def evaluar(self, entrada):
        return self._nn.predict(entrada)

    def agregar_a_entrenamiento(self, tableros, resultado):

        tableros.reverse()
        for i in xrange(len(tableros)):
            tablero, valorEstimado = tableros[i][0], tableros[i][1]
            self._entradas_entrenamiento.append(tablero)
            if i == 0 or True:
                self._salidas_esperadas_entrenamiento.append(resultado.value)
            else:
                valorAAprender = valorEstimado + self.lambdaCoefficient * (self._salidas_esperadas_entrenamiento[i-1] -
                    valorEstimado)
                self._salidas_esperadas_entrenamiento.append(valorAAprender)

    def entrenar(self):
        self._nn.partial_fit(self._entradas_entrenamiento, self._salidas_esperadas_entrenamiento)
        self._entradas_entrenamiento = []
        self._salidas_esperadas_entrenamiento = []

    def almacenar(self):
        pickle.dump(self._nn, open(self.path,'wb'))

    def cargar(self, path, red):
        self.path = path
        if os.path.isfile(path):
            self._nn = pickle.load(open(path, 'rb'))
        else:
            self._nn = red
            tableroVacio = ([EnumCasilla.EMPTY.value for _ in xrange(64)],0)
            self.agregar_a_entrenamiento([tableroVacio], EnumResultado.EMPATE)
            self.entrenar()
예제 #5
0
ti = "Importance of Numeric and Categorical Encoded Features. Gradient Boosting Regressor(PCA)"
display_importance(GradientBoostingRegressor(max_depth=4, n_estimators=32 * 8),
                   X_train_cat_enc_pca, y_train_cat_enc, ti, 18)
"""## MLP Regressors. Scikit-Learn
Fit Regressors
"""

mlpr = MLPRegressor(hidden_layer_sizes=(32 * 8, ),
                    max_iter=500,
                    solver='adam',
                    batch_size=12,
                    learning_rate='adaptive',
                    verbose='True')
mlpr.fit(X_train, y_train)

y_train_mlpr = mlpr.predict(X_train)
y_test_mlpr = mlpr.predict(X_test)
scores('MLP Regressor. Numeric Features', y_train, y_test, y_train_mlpr,
       y_test_mlpr)

mlpr_cat = MLPRegressor(hidden_layer_sizes=(32 * 8, ),
                        max_iter=500,
                        solver='adam',
                        batch_size=12,
                        learning_rate='adaptive',
                        verbose='True')
mlpr_cat.fit(X_train_cat, y_train_cat)

y_train_cat_mlpr = mlpr_cat.predict(X_train_cat)
y_test_cat_mlpr = mlpr_cat.predict(X_test_cat)
scores('MLP Regressor. Numeric and Categorical Features', y_train_cat,
예제 #6
0
class Ann:
    '''
        Implementación e interfaz de la funcionalidad presentada de la ANN
    '''
    def __init__(self):

        self._nn = MLPRegressor(hidden_layer_sizes=(10,), verbose=False, warm_start=True)
        self._entradas_entrenamiento = []
        self._salidas_esperadas_entrenamiento = []
        # Parámetro de TD-lambda
        self.lambdaCoefficient = 0.9

    def evaluar(self, entrada):
        '''
            Devuelve la evaluación de la red para la entrada
        '''
        return self._nn.predict(entrada)

    def agregar_a_entrenamiento(self, tableros, resultado):
        '''
            Incorpora los datos de la partida a los ejemplos de entrenamiento
        '''

        # Presento la partida de adelante para atrás
        tableros.reverse()
        for i in xrange(len(tableros)):
            # Representación del tablero, Valor estimado
            tablero, valorEstimado = tableros[i][0], tableros[i][1]
            self._entradas_entrenamiento.append(tablero)
            if i == 0 or True:
                # Si es el resultado final, utilizo como salida esperada el resultado de la partida
                self._salidas_esperadas_entrenamiento.append(resultado.value)
            else:
                # El valor a aprender dado por según TD-lambda
                valorAAprender = valorEstimado + self.lambdaCoefficient * (
                    self._salidas_esperadas_entrenamiento[i - 1] - valorEstimado)
                self._salidas_esperadas_entrenamiento.append(valorAAprender)

    def entrenar(self):
        '''
            Aplico el entrenamiento a partir de los datos almacenados
        '''
        self._nn.partial_fit(self._entradas_entrenamiento, self._salidas_esperadas_entrenamiento)
        self._entradas_entrenamiento = []
        self._salidas_esperadas_entrenamiento = []

    def almacenar(self):
        '''
            Serializo y persisto la red
        '''
        pickle.dump(self._nn, open(self.path, 'wb'))

    def cargar(self, path, red):
        '''
            Deserealizo o creo una nueva red
        '''
        self.path = path
        if os.path.isfile(path):
            # Si el archivo especificado existe, deserealizo la red
            self._nn = pickle.load(open(path, 'rb'))
        else:
            # Si no, inicializo la red especificada
            self._nn = red
            tableroVacio = ([EnumCasilla.EMPTY.value for _ in xrange(64)], 0)
            self.agregar_a_entrenamiento([tableroVacio], EnumResultado.EMPATE)
            self.entrenar()
from __future__ import print_function, division
from future.utils import iteritems
from builtins import range, input
# Note: you may need to update your version of future
# sudo pip install -U future


import numpy as np
from sklearn.neural_network import MLPRegressor
from util import getKaggleMNIST



# get data
X, _, Xt, _ = getKaggleMNIST()

# create the model and train it
model = MLPRegressor()
model.fit(X, X)

# test the model
print("Train R^2:", model.score(X, X))
print("Test R^2:", model.score(Xt, Xt))

Xhat = model.predict(X)
mse = ((Xhat - X)**2).mean()
print("Train MSE:", mse)

Xhat = model.predict(Xt)
mse = ((Xhat - Xt)**2).mean()
print("Test MSE:", mse)
예제 #8
0
y_pred = bayes.predict(X_test)

print "O MAE do bayes foi "+str(mean_absolute_error(y_test, y_pred))

#############################################################################
#Aplicacao do Neural Net Regressor
nn_parameters = {'hidden_layer_sizes':[10,20,30,40]}

grid_nn = GridSearchCV(MLPRegressor(solver='lbfgs'), nn_parameters, cv=3)
grid_nn.fit(X_train, y_train)

nnet = MLPRegressor(hidden_layer_sizes=grid_nn.best_params_['hidden_layer_sizes'],
	 solver='lbfgs')
nnet.fit(X_train, y_train)

y_pred = nnet.predict(X_test)

print "O MAE da nnet foi "+str(mean_absolute_error(y_test, y_pred))

#############################################################################
#Carregando o conjunto de dados de teste do csv usando o pandas
data_test = pd.read_csv('test.csv', header=None)

#Convertendo os dados categoricos para labels numericos
for column in categoricos:
	data_test[column-1] = pd.Categorical(data_test[column-1]).codes

numericos_array_test = data_test[numericos-1].values

#############################################################################
#Juntando os dados de teste numericos e categoricos
예제 #9
0
f2 = open("DATA/OPT_NN2.dat", "w")
f3 = open("DATA/OPT_NN3.dat", "w")
f3 = open("DATA/OPT_NN4.dat", "w")
for layer1 in range(5, 151):
    for layer2 in range(1, 2):
        for rate in np.linspace(0.0001, 0.1, 1000):
            regr = MLPRegressor(hidden_layer_sizes=(layer1,),activation='relu',\
            solver='adam', alpha=0.0001, batch_size='auto', \
            learning_rate='constant', learning_rate_init=rate, \
            power_t=0.5, max_iter=1000, shuffle=True, random_state=None, \
            tol=0.0001, verbose=False, warm_start=False, momentum=0.9, \
            nesterovs_momentum=True, early_stopping=False, validation_fraction=0.1,\
             beta_1=0.9, beta_2=0.999, epsilon=1e-08)

            regr.fit(X, y)
            y2 = regr.predict(X2)

            print(layer1, layer2, rate)

            L1_error_xnorm_dt = sum(abs(y2[:, 0] - Y3[:, 0])) / n_test
            L1_error_ynorm_dt = sum(abs(y2[:, 1] - Y3[:, 1])) / n_test
            L1_error_area_dt = sum(abs(y2[:, 2] - Y3[:, 2])) / n_test

            if (L1_error_xnorm_dt < err1):
                f1 = open("DATA/OPT_NN1.dat", "w")
                f1.write('%d, ' % layer1)
                f1.write('%d, ' % layer2)
                f1.write('%f' % rate)
                f1.write('\n')
                f1.close()
#Writting name and UID
results.write("UID: " + uid + "      Name: " + name + "       Seed: " +
              str(seed) + "\n")

#Generating sample data
data, target = load_diabetes(return_X_y=True)
data_train, data_test, target_train, target_test = train_test_split(
    data, target, test_size=.25, train_size=.75)

#Error BackProp w/Regression Learning
mlp = MLPRegressor(max_iter=200, random_state=13)

####Before Training(After 1 epoch)
mlp.partial_fit(data_train, target_train)
#RMSE Train Data
predict_train0 = mlp.predict(data_train)
rmse_train0 = rmse(target_train, predict_train0)

#RMSE Test Data
predict_test0 = mlp.predict(data_test)
rmse_test0 = rmse(target_test, predict_test0)

####After Training
mlp.fit(data_train, target_train)
#RMSE Train Data
predict_train1 = mlp.predict(data_train)
rmse_train1 = rmse(target_train, predict_train1)

#RMSE Test Data
predict_test1 = mlp.predict(data_test)
rmse_test1 = rmse(target_test, predict_test1)
예제 #11
0
##    model = pickle.load(file)

for i in range(1, 81):
    # load training data
    training_data = pd.read_csv(
        'training_datasets/training_data_{}.csv'.format(i), header=None)
    x = training_data.iloc[:, :-1]
    y = training_data.iloc[:, -1]

    # (re)train model
    model.partial_fit(x, y)

    # see how results are changing with more learning
    # expected score = lowest
    test_x = np.array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]).reshape(1, -1)
    print(model.predict(test_x))

    # expected score = low
    test_x = np.array([0, 0, 0, 0, 2, 0, 0, 2, 0, 0, 0]).reshape(1, -1)
    print(model.predict(test_x))

    # expected score = high
    test_x = np.array([0, 0, 0, 0, 2, 0, 1, 2, 0, 0, 1]).reshape(1, -1)
    print(model.predict(test_x))

    # expected score = highest
    test_x = np.array([0, 1, 1, 2, 2, 1, 2, 0, 0, 0, 0]).reshape(1, -1)
    print(model.predict(test_x))
    print('------------------------')

# dump pickled model
예제 #12
0
Y_tr = pheno[:1000,1:]   #slicing pheno
#Y_va = pheno[201:250,:]
Y_te = pheno[1001:,1:]

diabetes_X_train = X_tr
diabetes_X_test = X_te
diabetes_y_train = Y_tr
diabetes_y_test = Y_te

reg = MLPRegressor(hidden_layer_sizes=(1, ),algorithm='l-bfgs')
reg.fit(X_tr,Y_tr)

scores = cross_val_score(reg,geno[:,1:],pheno[:,1:],cv=10)

#Result_Y = np.zeros((249,1), dtype='float64')
Result_Y = reg.predict(X_te)
#Yte = np.array(Y_te, dtype=np.float64) 
r_row,p_score = pearsonr(Result_Y,Y_te)

# The mean square error
print("Residual sum of squares: %.2f"
      % np.mean((reg.predict(diabetes_X_test) - diabetes_y_test) ** 2))
# Explained variance score: 1 is perfect prediction
print('Variance score: %.2f' % reg.score(diabetes_X_test, diabetes_y_test))
print(Result_Y)
print(scores)
print(Result_Y.shape)
print(r_row)
print(p_score)

예제 #13
0
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.neural_network import MLPRegressor

data = pd.read_csv("temp.csv")
data.head()
X = data['temp'].values
Y = data['hum'].values
X = np.reshape(X, (-1, 1))
X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.30)

rna = MLPRegressor(solver='lbfgs',
                   activation='logistic',
                   max_iter=100000,
                   hidden_layer_sizes=(15))

rna.fit(X_train, y_train)

Y2 = rna.predict(X_test)

print(Y2)
print("Coeficiente de determinación: ", rna.score(X_test, y_test))
import matplotlib.pyplot as plt
plt.plot(X_test, y_test, 'bo', label='Original')
plt.plot(X_test, Y2, 'ro', label='Predicción')
plt.legend(loc='upper right')
plt.ylabel('Humedad')
plt.xlabel('Temperatura')
plt.show()
예제 #14
0
def example_data(rows=100):
    x = np.linspace(start=0, stop=30, num=100).reshape((rows, 1))
    #y = 2*np.sin(x) + x
    y = np.sqrt(x)
    # scale the data
    x = (x - 15) / 10
    y = y / 10
    return x, y.reshape(len(y))


X, y = example_data()

model = MLPRegressor(hidden_layer_sizes=(50, 5),
                     activation='relu',
                     shuffle=False,
                     batch_size=len(y),
                     solver='sgd',
                     alpha=0,
                     learning_rate='constant',
                     learning_rate_init=0.0001,
                     max_iter=100000,
                     validation_fraction=0)

model.fit(X=X, y=y)
print(model.loss_)

yhat = model.predict(X)
plt.plot(y)
plt.plot(yhat)
plt.show()
from sklearn.neural_network import MLPRegressor
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import mean_squared_error



data = pd.read_csv('network_backup_dataset.csv')
train = data.loc[:,['WeekNumber','DayofWeek','BackupStartTime','WorkFlowID','FileName','BackupTime']]
target = data.loc[:,['SizeofBackup']]
mlp = MLPRegressor(algorithm='sgd', hidden_layer_sizes=150,
                   max_iter=200, shuffle=False, random_state=1)

mlp.fit(train, target)
prediction = mlp.predict(train)

plt.plot(prediction,label='Prediction',color='red')
plt.plot(target,label='Real Data',color='blue')
plt.title('Copy Size versus Time based on Neural Network Regression')
plt.xlabel('Time')
plt.ylabel('Copy Size')
plt.legend()
plt.show()

rmse = mean_squared_error(target.SizeofBackup,prediction)**0.5
print (rmse)
예제 #16
0
파일: neural_bot.py 프로젝트: lnemzer/MTG
def main():

    # Sets working directory
    os.chdir("/project/csbio/henry/Documents/projects/draftsim/MTG")
    output_folder = ("ml")

    # Sets seed for reproducibility
    seed = 5001

    # Reads in mtgJSON data
    setName = 'GRN'
    jsonSubset = None
    with open('../data/all_sets.json', 'r', encoding='utf-8') as json_data:
        mtgJSON = json.load(json_data)
        jsonSubset = mtgJSON[setName]['cards']
    if setName == 'XLN':
        jsonSubset = jsonSubset + mtgJSON['RIX']['cards']

    # Converts cards to dict with lowercase names as indices for cards
    this_set = {utils.getName(card): card for card in jsonSubset}
    dict((k.lower(), v) for k, v in this_set.items())
    cardlist = list(this_set.keys())

    # Reads in draftsim data and formats it
    rec_data = pd.read_csv("../data/GRNrecdata.csv",
                           names=["deck", "pack", "pick"])
    rec_data = rec_data.drop(["deck"], axis=1)
    rec_data["pack"] = [
        re.sub('_\d+', '', x).lower() for x in rec_data["pack"]
    ]
    rec_data["pick"] = [
        re.sub('_\d+', '', x).lower() for x in rec_data["pick"]
    ]

    # One-hot encodes draftsim data
    labels = dict(zip(cardlist, range(len(cardlist))))
    rec_data["pick"] = [labels[x] for x in rec_data["pick"]]
    rec_data["pack"] = rec_data["pack"].astype(object)
    rec_data["pick"] = rec_data["pick"].astype(object)
    rec_data["pack"] = [ast.literal_eval(x) for x in rec_data["pack"]]
    formatted = rec_data
    for index, row in rec_data.iterrows():
        pick_encode = [0 for i in range(len(cardlist))]
        pack_encode = [0 for i in range(len(cardlist))]
        pick_encode[row["pick"]] = 1
        for name in row["pack"]:
            pack_encode[labels[name]] = 1
        formatted.at[index, "pick"] = pick_encode
        formatted.at[index, "pack"] = pack_encode

    final = np.zeros(formatted.shape[0],
                     dtype=[('x', 'int', len(cardlist)),
                            ('y', 'int', len(cardlist))])
    for i in range(formatted.shape[0]):
        final["x"][i] = [el for el in formatted["pack"][i]]
        final["y"][i] = [el for el in formatted["pick"][i]]

    # Converts to training/test data with an 80/20 split
    x_train, x_test, y_train, y_test, = train_test_split(final["x"],
                                                         final["y"],
                                                         test_size=0.2,
                                                         random_state=seed)

    # Trains an MLP regressor
    model = MLPRegressor()
    grid = dict(activation=["relu"],
                solver=["adam"],
                hidden_layer_sizes=[(500, 1000, 500)],
                alpha=[1e-5, 1e-3, 0.1, 10],
                random_state=[seed],
                early_stopping=[True],
                max_iter=[50])
    model = GridSearchCV(model, param_grid=grid, verbose=True, n_jobs=16, cv=5)
    model.fit(x_train, y_train)
    print(model.best_params_)

    # Gets training and testing metrics
    train_predictions = np.asarray(model.predict(x_train))
    test_predictions = np.asarray(model.predict(x_test))
    train_correct = 0
    test_correct = 0
    for i in range(train_predictions.shape[0]):
        choices = np.where(x_train[i] == 1)
        predictions = train_predictions[i][choices]
        correct_ind = np.where(y_train[i][choices] == 1)
        ranks = np.sort(predictions)[::-1]
        if (len(ranks) > 1):
            if ranks[0] == correct_ind or ranks[1] == correct_ind:
                train_correct += 1
        else:
            if ranks[0] == correct_ind:
                train_correct += 1
    for i in range(test_predictions.shape[0]):
        choices = np.where(x_test[i] == 1)
        predictions = test_predictions[i][choices]
        correct_ind = np.where(y_test[i][choices] == 1)
        #ranks =  np.sort(predictions)[::-1]
        ranks = np.sort(predictions)
        if (len(ranks) > 1):
            if ranks[0] == correct_ind or ranks[1] == correct_ind:
                test_correct += 1
        else:
            if ranks[0] == correct_ind:
                test_correct += 1
    print(train_correct)
    print(float(train_correct) / float(len(train_predictions)) * 100)
    print(float(test_correct) / float(len(test_predictions)) * 100)
mlp = MLPRegressor(hidden_layer_sizes=(4, 4, 4),
                   activation='relu',
                   solver='adam',
                   max_iter=500)
lm.fit(x_train, y_train)
mlp.fit(x_train, y_train)
clf.fit(x_train, y_train)
dtr.fit(x_train, y_train)
rdf.fit(x_train, y_train)
svmmodel.fit(x_train, y_train)

from sklearn.metrics import r2_score, mean_absolute_error
print("Linear Regression Model Accuracy : %f" %
      (r2_score(y_test, lm.predict(x_test))))
print("Multilayer Perceptron Regression Model Accuracy : %f" %
      (r2_score(y_test, mlp.predict(x_test))))
print("Support Vector Machine Regression Model Accuracy : %f" %
      (r2_score(y_test, svmmodel.predict(x_test))))
print("Decision Tree Regression Model Accuracy : %f)" %
      (r2_score(y_test, dtr.predict(x_test))))
print("Gredient Boosting Regression Model Accuracy : %f)" %
      (r2_score(y_test, clf.predict(x_test))))
print("Random Forest Regression Model Accuracy : %f)" %
      (r2_score(y_test, rdf.predict(x_test))))

print("All Model's Mean Squared Error ")
print("Linear Regression Model MSE Error : %f" %
      (mean_squared_error(y_test, lm.predict(x_test))))
print("Multilayer Perceptron Regression Model MSE Error : %f" %
      (mean_squared_error(y_test, mlp.predict(x_test))))
print("Support Vector Machine Regression Model MSE Error : %f" %
예제 #18
0
# print('\nModelo - KNeighborsRegressor')                     # KNeighborsRegressor
# a = time.process_time()
# modelo_svr = KNeighborsRegressor()
# modelo_svr = modelo_svr.fit(x_treino, y_treino)
# predicoes_svr = modelo_svr.predict(x_teste)
# qualidade_svr0 = mean_squared_error(y_teste, predicoes_svr)
# del modelo_svr, predicoes_svr
# resultados['KNeighborsRegressor \t\t']=qualidade_svr0
# print(f'Tempo gasto: {time.process_time()- a} s')
#
#
print('\nModelo - MLPRegressor')  # MLPRegressor
a = time.process_time()
modelo_svr = MLPRegressor(alpha=1, max_iter=500)
modelo_svr = modelo_svr.fit(x_treino, y_treino)
predicoes_svr = modelo_svr.predict(x_teste)
qualidade_svr0 = mean_squared_error(y_teste, predicoes_svr)
# del modelo_svr, predicoes_svr
resultados['MLP alpha=3: \t\t'] = qualidade_svr0
print(f'Tempo gasto: {time.process_time()- a} s')
#
#
# print('\nModelo - MLPRegressor')                     # MLPRegressor
# a = time.process_time()
# modelo_svr = MLPRegressor(solver='sgd')
# modelo_svr = modelo_svr.fit(x_treino, y_treino)
# predicoes_svr = modelo_svr.predict(x_teste)
# qualidade_svr0 = mean_squared_error(y_teste, predicoes_svr)
# del modelo_svr, predicoes_svr
# resultados['MLP sgd: \t\t']=qualidade_svr0
# print(f'Tempo gasto: {time.process_time()- a} s')
예제 #19
0
df = pd.DataFrame(Q, columns=['index', 'prime'])
ax1 = df.plot.scatter(x='index', y='prime', c='DarkBlue')

################################### NEURAL NETWORK ####################################################

neural_net = MLPRegressor([500, 500], random_state=9,
                          max_iter=2000).fit(X[:, :-1], X[:, -1])

###################################  TESTING DATA ##################################################
testingdata = indexedprimesfrom2to(1000000)

residuals = []

Y = []
percent_error = []

for i in range(0, len(testingdata)):
    nnresult = float(neural_net.predict([[i]]))
    actualnumber = testingdata[i][1]

    Y += [[i + 1, nnresult]]
    residuals += [[i + 1, nnresult - actualnumber]]
    percent_error += [[
        i + 1, 100 * abs(nnresult - actualnumber) / actualnumber
    ]]

df2 = pd.DataFrame(percent_error, columns=['index', 'percenterror'])
ax2 = df2.plot.scatter(x='index', y='percenterror', c='DarkBlue')

print(percent_error)
예제 #20
0
uri = 'https://archive.ics.uci.edu/ml/machine-learning-databases/00244/fertility_Diagnosis.txt'
X, y = load_csv(uri, ',', 0, 9, 9, 10, True)
y = pd.get_dummies(y.ravel(), drop_first=True)
'''
Split into training and test set
'''
X_train, X_test, y_train, y_test = train_test_split(X,
                                                    y,
                                                    test_size=0.2,
                                                    random_state=1)
'''
Feature scaling 
'''
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)
sc_y = StandardScaler()
y_train = sc.fit_transform(y_train)
y_test = sc.transform(y_test)
'''
Fit DecisionTreeRegressor with Bike Day data
'''
regressor = MLPRegressor(hidden_layer_sizes=(100, 50))
regressor.fit(X_train, y_train)
'''
Predicting result
'''
y_pred = regressor.predict(X_test)
rmse = sqrt(mean_squared_error(y_test, y_pred))
예제 #21
0
class QN(object):
    def __init__(self, num_inputs, num_outputs):
        self.nx = num_inputs
        self.ny = num_outputs
        self.net = MLPRegressor(hidden_layer_sizes=(50, 10),
                                max_iter=1,
                                algorithm='sgd',
                                learning_rate='constant',
                                learning_rate_init=0.001,
                                warm_start=True,
                                momentum=0.9,
                                nesterovs_momentum=True
                                )

        self.initialize_network()

        # set experience replay
        self.mbsize = 128 # mini-batch size
        self.er_s = []
        self.er_a = []
        self.er_r = []
        self.er_done = []
        self.er_sp = []

        self.er_size = 2000  # total size of mb, impliment as queue
        self.whead = 0  # write head

    def initialize_network(self):
        # function to initialize network weights
        xtrain = np.random.rand(256, self.nx)
        ytrain = 10 + np.random.rand(256, self.ny)
        self.net.fit(xtrain, ytrain)

    def update_network(self):
        # function updates network by sampling a mini-batch from the ER
        # Prepare train data
        chosen = list(np.random.randint(len(self.er_s), size=min(len(self.er_s), self.mbsize)))
        Xtrain = np.asarray([self.er_s[i] for i in chosen])
        # calculate target
        target = np.random.rand(len(chosen), self.ny)

        for j, i in enumerate(chosen):
            # do a forward pass through s and sp
            Q_s = self.net.predict(self.er_s[i].reshape(1, -1))
            Q_sp = self.net.predict(self.er_sp[i].reshape(1, -1))
            target[j, :] = Q_s  # target initialized to current prediction

            if (self.er_done[i] == True):
                target[j, self.er_a[i]] = self.er_r[i]  # if end of episode, target is terminal reward
            else:
                target[j, self.er_a[i]] = self.er_r[i] + 0.9 * max(max(Q_sp))  # Q_sp is list of list (why?)

        # fit the network
        self.net.fit(Xtrain, target)  # single step of SGD

    def append_memory(self, s, a, r, sp, done):
        if (len(self.er_s) < self.er_size):
            self.er_s.append(s)
            self.er_a.append(a)
            self.er_r.append(r)
            self.er_sp.append(sp)
            self.er_done.append(done)
            self.whead = (self.whead + 1) % self.er_size
        else:
            self.er_s[self.whead] = s
            self.er_a[self.whead] = a
            self.er_r[self.whead] = r
            self.er_sp[self.whead] = sp
            self.er_done[self.whead] = done
            self.whead = (self.whead+1) % self.er_size
예제 #22
0
loo = LeaveOneOut()
loo.get_n_splits(X)
for train_index, test_index in loo.split(X):
    # print("TRAIN:", train_index, "TEST:", test_index)
    X_train, X_test = X[train_index], X[test_index]
    y_train, y_test = y[train_index], y[test_index]

scaler = StandardScaler()
scaler.fit(X_train)
StandardScaler(copy=True, with_mean=True, with_std=True)
X_train = scaler.transform(X_train)
# print(X_train)
X_test = scaler.transform(X_test)
mlp = MLPRegressor(hidden_layer_sizes=(8, 8, 8), max_iter=5000, solver='lbfgs')
mlp.fit(X_train, y_train)
predictions = mlp.predict(X_test)
# print(X_test)
# print(mlp.n_iter_)
predictions_int = predictions.astype(int)
y_test_int = y_test.astype(int)
print(predictions_int)
print(y_test_int)
score = 1

if (predictions_int > y_test_int and predictions_int <= y_test_int + 15):
    diff = predictions_int - y_test_int
    while (diff > 0):
        score -= 0.06
        diff -= 1
print(score)
train = train.drop('Genre', axis=1)

from sklearn.cross_validation import train_test_split

X_train, X_test, y_train, y_test = train_test_split(train,
                                                    feature,
                                                    test_size=0.30)

from sklearn.neural_network import MLPRegressor
clf = MLPRegressor(hidden_layer_sizes=(5, ),
                   activation='relu',
                   solver='adam',
                   learning_rate='adaptive',
                   max_iter=1000,
                   learning_rate_init=0.01,
                   alpha=0.01)

clf.fit(X_train, y_train)

res = clf.predict(X_test)
lsd = []

for item in res:
    if item >= 0.5:
        lsd.append(1)
    else:
        lsd.append(0)

from sklearn.metrics import accuracy_score

print(accuracy_score(lsd, y_test))
예제 #24
0
    if (method == 5):
        print('GradientBoosting 01')
        str_method = 'GradientBoosting01'
        r = GradientBoostingRegressor(n_estimators=95,
                                      max_depth=6,
                                      learning_rate=0.04,
                                      random_state=ra2,
                                      verbose=0,
                                      warm_start=True,
                                      subsample=0.7,
                                      max_features=0.8)

    r.fit(x1[col], y1)

    a1 = NWRMSLE(y2, r.predict(x2[col]), x2['perishable'])
    # part of the output file name
    N1 = str(a1)

    test['transactions'] = r.predict(test[col])
    test['transactions'] = test['transactions'].clip(lower=0. + 1e-12)

    col = [c for c in x1 if c not in ['id', 'unit_sales', 'perishable']]
    y1 = x1['unit_sales'].values
    y2 = x2['unit_sales'].values

    # set a new seed to generate random numbers
    ra2 = round(method + 31 * method + 51 * method)
    np.random.seed(ra2)

    if (method == 1):
예제 #25
0
y_pred_dtr = dtr_energy.predict(X_test_energy_stand)
print("Mean squared error for DTR: {:.3f}.".format(
    mean_squared_error(y_pred_dtr, y_test_energy)))

#Random Forest Regressor
from sklearn.ensemble import RandomForestRegressor as RFR
rfr_energy = RFR(n_estimators=100,
                 min_samples_leaf=2,
                 max_leaf_nodes=1000,
                 random_state=37).fit(X_train_energy, y_train_energy)
y_pred_rfr = rfr_energy.predict(X_test_energy)
print("Mean squared error for RFR: {:.3f}.".format(
    mean_squared_error(y_pred_rfr, y_test_energy)))

#Support Vector
from sklearn.svm import SVR
svr_energy = SVR().fit(X_train_energy_stand, y_train_energy)
y_pred_svr = svr_energy.predict(X_test_energy_stand)
print("Mean squared error for SVR: {:.3f}.".format(
    mean_squared_error(y_pred_svr, y_test_energy)))

from sklearn.neural_network import MLPRegressor as MLPR
mlpr_energy = MLPR(hidden_layer_sizes=(100, 100),
                   alpha=.3,
                   random_state=37,
                   beta_1=.89,
                   beta_2=.9995).fit(X_train_energy_stand, y_train_energy)
y_pred_mlpr = mlpr_energy.predict(X_test_energy_stand)
print("Mean squared error for MLPR: {:.3f}.".format(
    mean_squared_error(y_pred_mlpr, y_test_energy)))
print('energy')
예제 #26
0
model = MLPRegressor(hidden_layer_sizes=(1, ),
                     solver='sgd',
                     early_stopping=False,
                     max_iter=1000).fit(x_train, y_train)

# In[8]:

print("{:.2%}".format(model.score(x_train, y_train)))

# In[9]:

print("{:.2%}".format(model.score(x_test, y_test)))

# In[10]:

# plot prediction and actual data
y_pred = model.predict(x_test)
plt.plot(y_test, y_pred, '.')

# plot a line, a perfit predict would all fall on this line
x = np.linspace(-2, 2.5, 2)
y = x
plt.plot(x, y)
plt.show()

# In[11]:

print(model.coefs_)

# In[ ]:
예제 #27
0
# generates data & split it into X (training input) and y (target output)
X = library2[:, 0:5]
y = library2[:, 6]

#print(X)
#print(y)

neurons = 20  # <- number of neurons in the hidden layer
eta = 0.1  # <- the learning rate parameter

# here we create the MLP regressor
mlp = MLPRegressor(hidden_layer_sizes=(neurons, ),
                   verbose=True,
                   learning_rate_init=eta)
# here we train the MLP
mlp.fit(X, y)
while (mlp.score(X, y) < 0):
    mlp.fit(X, y)
# E_out in training
print("Training set score: %f" % mlp.score(X, y))

# now we generate new data as testing set and get E_out for testing set
Xtest = np.array([genreScore, hourScore, critScore, userScore, pubScore])
#print("Testing set score: %f" % mlp.score(X, y))
ypred = mlp.predict(Xtest)
fResult = float(ypred)
rResult = round(fResult)
print(ypred)
print("Final Score: %f" % rResult)
예제 #28
0
best_linear_regressor = LinearRegression(copy_X=True,
                                         fit_intercept=True,
                                         normalize=True)
best_linear_regressor.fit(x_train_scaled, y_train)
y_pred = best_linear_regressor.predict(x_test_scaled)
print('MSE for Linear Regressor: ' + str(mean_squared_error(y_test, y_pred)))

# In[27]:

best_neural_network_regressor = MLPRegressor(activation='tanh',
                                             alpha=0.0001,
                                             hidden_layer_sizes=10,
                                             learning_rate='constant',
                                             learning_rate_init=0.01,
                                             random_state=0)
best_neural_network_regressor.fit(x_train_scaled, y_train)
y_pred = best_neural_network_regressor.predict(x_test_scaled)
print('MSE for Neural Network Regressor: ' +
      str(mean_squared_error(y_test, y_pred)))

# In[28]:

best_gaussian_regressor = GaussianProcessRegressor(
    kernel=1**2 * RationalQuadratic(alpha=0.1, length_scale=1))
best_gaussian_regressor.fit(x_train_scaled, y_train)
y_pred = best_gaussian_regressor.predict(x_test_scaled)
print('MSE for Gaussian Regressor: ' + str(mean_squared_error(y_test, y_pred)))

# In[ ]:
예제 #29
0
y_true = test_preNreal5['real']
y_pred = test_preNreal5['pre']

print(np.sqrt(metrics.mean_squared_error(y_true, y_pred)))
print(mean_absolute_percentage_error(y_true, y_pred))

from sklearn.neural_network import MLPRegressor
ANN = MLPRegressor(learning_rate_init=0.001,
                   batch_size=20,
                   tol=0.01,
                   learning_rate='constant',
                   hidden_layer_sizes=(1000, ),
                   solver='adam')
ANN.fit(train_feat, train_label)

model_pre = ANN.predict(test_feat)
test_preNreal5 = pd.DataFrame()
test_preNreal5['real'] = test_label.flatten()
test_preNreal5['pre'] = model_pre
test_preNreal5.to_csv('ANN_wow.csv', index=False)


def mean_absolute_percentage_error(y_true, y_pred):
    y_true, y_pred = np.array(y_true), np.array(y_pred)
    return np.mean(np.abs((y_true - y_pred) / y_true)) * 100


from sklearn import metrics

y_true = test_preNreal5['real']
y_pred = test_preNreal5['pre']
# -*- coding: utf-8 -*-
import pandas as pd

df = pd.read_csv("..\\Data\\health_insurance_2.csv")

features = df.iloc[:, 0:1].values
target = df.iloc[:, 1:2].values

from sklearn.preprocessing import StandardScaler
scaler_x = StandardScaler()
features = scaler_x.fit_transform(features)
scaler_y = StandardScaler()
target = scaler_y.fit_transform(target)

from sklearn.neural_network import MLPRegressor
regression = MLPRegressor()
regression.fit(features, target)
score_1 = regression.score(features, target)

import matplotlib.pyplot as plt
plt.scatter(features, target)
plt.plot(features, regression.predict(features), color='red')
plt.title("Neural Net Regression")
plt.xlabel("Age")
plt.ylabel("Cost")

# scaler.inverse_transform -> Back to real scale
prediction = scaler_y.inverse_transform(
    regression.predict(scaler_x.fit_transform([[40]])))
예제 #31
0
#Applying MLPRegressor Model 

'''
sklearn.neural_network.MLPRegressor(hidden_layer_sizes=(100, ), activation='relu’, solver=’adam’,
                                    alpha=0.0001,batch_size='auto’, learning_rate=’constant’,
                                    learning_rate_init=0.001, power_t=0.5,max_iter=200, shuffle=True,
                                    random_state=None,tol=0.0001, verbose=False, warm_start=False,
                                    momentum=0.9, nesterovs_momentum=True,early_stopping=False,
                                    validation_fraction=0.1,beta_1=0.9, beta_2=0.999, epsilon=1E-08,
                                    n_iter_no_change=10)
'''

MLPRegressorModel = MLPRegressor(activation='tanh', # can be also identity , logistic , relu
                                 solver='lbfgs',  # can be also sgd , adam
                                 learning_rate='constant', # can be also invscaling , adaptive
                                 early_stopping= False,
                                 alpha=0.0001 ,hidden_layer_sizes=(100, 3),random_state=33)
MLPRegressorModel.fit(X_train, y_train)

#Calculating Details
print('MLPRegressorModel Train Score is : ' , MLPRegressorModel.score(X_train, y_train))
print('MLPRegressorModel Test Score is : ' , MLPRegressorModel.score(X_test, y_test))
print('MLPRegressorModel loss is : ' , MLPRegressorModel.loss_)
print('MLPRegressorModel No. of iterations is : ' , MLPRegressorModel.n_iter_)
print('MLPRegressorModel No. of layers is : ' , MLPRegressorModel.n_layers_)
print('MLPRegressorModel last activation is : ' , MLPRegressorModel.out_activation_)
#print('----------------------------------------------------')

#Calculating Prediction
y_pred = MLPRegressorModel.predict(X_test)
print('Predicted Value for MLPRegressorModel is : ' , y_pred[:10])
                         momentum=0.9,
                         nesterovs_momentum=True,
                         power_t=0.5,
                         random_state=1,
                         shuffle=True,
                         solver='adam',
                         tol=0.0001,
                         validation_fraction=0.1,
                         verbose=False,
                         warm_start=True)

batches = iter_minibatches(sparse_matrix, prices, chunksize=1000)

count = 0
for X_chunk, y_chunk in batches:
    print(count)
    count += 1
    if len(X_chunk) != 0:
        neuralnet._partial_fit(X_chunk, y_chunk)

valmat = sparse_matrix[999999:].todense()
valprices = get_price_list(train)
print(valmat.shape)
print(valprices.shape)

predicted_prices = neuralnet.predict(valmat)
print('Prices predicted', time.time() - start)

print(valprices.shape)
print(predicted_prices.shape)
print("The score is:", calc_score(valprices, predicted_prices))
y_test =y_test.astype(np.float32)






mlp = MLPRegressor( activation='relu', alpha=1e-05, batch_size='auto',
       beta_1=0.9, beta_2=0.999, early_stopping=False,
       epsilon=1e-08, hidden_layer_sizes=(400, 600), learning_rate='constant',
       learning_rate_init=0.001, max_iter=200, momentum=0.9
     )
mlp.fit(X_train, y_train) 

 
y_test_predict = mlp.predict(X_test)
fig = matplotlib.pyplot.gcf()
fig.set_size_inches(12,6)
n_faces=5
n_cols=5
image_shape = (64, 64)
for i in range(n_faces):
    true_face = np.hstack((X_test[i], y_test[i]))

    if i:
        sub = plt.subplot(n_faces, n_cols, i * n_cols + 1)
    else:
        sub = plt.subplot(n_faces, n_cols, i * n_cols + 1,
                          title="Real")

예제 #34
0
trainX = trainX / np.max(trainX, axis=0)

rows = []
for k in test:
    row = test[k]
    rows.append(row)
# create [samples x input] numpy array, one row for each training data
testX = np.array([row for row in rows])
testX = testX / np.max(testX, axis=0)

testY = y[-testSize:]

x = [[0, 0], [1., 1.]]
y = [0, 1]

clf = MLPRegressor(solver='lbfgs',
                   alpha=1e-5,
                   hidden_layer_sizes=(5, 2),
                   random_state=1)

clf.fit(trainX, trainY)
print(clf.score(testX, testY))
yHats = clf.predict(testX)
yHats = np.round(yHats)

print(yHats)
print(testY)
print('\n')
print('Misclssified: ' + str(sum(abs(testY - yHats))) + ' out of ' +
      str(testSize) + ' = ' +
      str(float(abs(sum(testY - yHats))) / float(testSize)))
예제 #35
0
class NeuralNetwork:
    ################# Fields #######################
    # dataset_filename: string - path to dataset
    # header: list - header of the dataset
    # enumerable_columns: list - the enumerable columns

    # df: matrix - data set
    # training_set: matrix - training set
    # test_set: matrix - test set

    # TSnew_X: matrix - training set of TSnew (see documentation)
    # TSnew_Y: matrix - training set of TSnew (see documentation)
    # dim_random_subset: int - number of features to set to 0 (see documentation)
    # repeatSometimes: int - number of for cicles (see documentation)

    def __init__(self, repeatSometimes = 2, dim_random_subset = 2):
        # variables initialization
        self.enumerable_columns = []
        self.dataset_filename = ""
        self.header = []
        self.df = pandas.DataFrame()
        self.trainSet = pandas.DataFrame()
        self.testSet = pandas.DataFrame()
        self.TSnew_X = pandas.DataFrame()
        self.TSnew_Y = pandas.DataFrame()

        self.repeatSometimes = repeatSometimes
        self.dim_random_subset = dim_random_subset

        # This code really needs much time and therefore I save some computations
        if not os.path.isfile('trainSet{}-{}.csv'.format(repeatSometimes, dim_random_subset)):
            self.readDataset()
            self.discretization()
            self.preprocess()

            # creating TSnew
            self.createTrainingAndTestSet()
            self.createTSnew()

            # backup encoded sets
            self.writeCSV()
        else:
            self.readCSV()

        # training and test
        self.train()
        self.predict()


    def readDataset(self):
        print("DEB Read dataset")

        with open('header.txt') as f:
            self.header = f.read().split(',')
            print(self.header)
        with open('dataset.txt') as f:
            self.dataset_filename = f.read()
            print(self.dataset_filename)
        self.df = pandas.read_csv(self.dataset_filename, names=self.header)
        print('Dataset with {} entries'.format(self.df.__len__()))

############# Preprocessing ##########################
    # helper function (should not be called from other functions)
    def discretize(self, column):
        print("DEB Discretize column " + column)
        sorted_col = sorted(column)
        l = len(column)
        n = int(numpy.floor(l / 2))
        if l % 2 == 0:
            median_1 = numpy.median(sorted_col[0:n])
            median_2 = numpy.median(sorted_col[n:])
        else:
            median_1 = numpy.median(sorted_col[0:(n + 1)])
            median_2 = numpy.median(sorted_col[(n + 1):])
        iqr = median_2 - median_1
        h = 2 * iqr * (1 / numpy.cbrt(l))
        if h > 0:
            bins_number = numpy.ceil((column.max() - column.min()) / h)
            new_col, bins = pandas.cut(column, bins_number, labels=False, retbins=True, include_lowest=False)
        else:
           new_col = column
           bins = []
        return new_col, bins

    # helper function (should not be called from other functions)
    def normalize(column):
        print("DEB Normalize")
        h = abs(column.min())
        new_col = column + h
        return new_col

    def discretization(self):
        print("DEB Discretization")
        replacements = {}
        bins = {}
        for i in range(0, self.df.shape[1]):  # for each feature
            bins[i] = []
            col = self.df.as_matrix()[:, i]
            flag_str = False
            flag_float = False
            flag_negative = False

            for j in col:
                if type(j) is str: flag_str = True
                elif type(j) is float: flag_float = True
                elif type(j) is int and j < 0: flag_negative = True

            if flag_str:
                continue
            elif flag_negative:
                new_col = self.normalize(col)
                replacements[i] = new_col
                bins[i] = []
            elif flag_float:
                new_col, new_bins = self.discretize(col)
                replacements[i] = new_col
                bins[i] = new_bins
            for k, v in replacements.items():
                self.df.iloc[:, k] = v

    def preprocess(self, removeColumnsWithMissingValues = False):
        print("DEB Preprocessing")
        m = self.df.as_matrix()

        # it is possible to encode enumerable features and to remove missing values
        with open('enumerable_columns.txt') as f:  # e.g., self.enumerable_columns = [0, 5, 8]
            self.enumerable_columns = f.read()
            if self.enumerable_columns.__contains__(','):
                self.enumerable_columns = list(map(int, self.enumerable_columns.split(',')))
            else:
                self.enumerable_columns = [int(self.enumerable_columns)]
            print("enumerable columns are: " + str(self.enumerable_columns))
        le = preprocessing.LabelEncoder()
        for col in self.enumerable_columns:
            # if the column is enumerable
            self.df[self.header[col]] = le.fit_transform(self.df[self.header[col]])  #  A -> 0, B -> 1, ...

        #  remove cols with missing values (NaN), even though you risk to reduce too much the dataset
        if removeColumnsWithMissingValues:
            for i in range(0, m.shape[1]):
                if True in m[:, i]:
                    self.df = numpy.delete(self.df, 0, i)  # delete column


############## MPL architecture #######################
    def createTrainingAndTestSet(self):
        print("DEB Create Training set. Using formula 80-20%")
        self.trainSet, self.testSet = train_test_split(self.df, test_size=0.20)

    # hearth of the algorithm!
    def createTSnew(self):
        print("DEB Create TS new")
        for i in range(0, self.trainSet.shape[0]):
            for j in range(0, self.repeatSometimes):
                # choose small random subset of features X_hat
                X_hat = [int(self.trainSet.shape[1] * random.random()) for i in range(0, self.dim_random_subset)]
                # insert into TSnew the sample: (x1...X_hat = 0 ... xk ; x1...xk)
                row = numpy.copy(self.trainSet.as_matrix()[i, :])
                for feature in X_hat:  # here you set the random features to 0. X_hat represents the indices of such features
                    row[feature] = 0
                self.TSnew_X = self.TSnew_X.append(pandas.DataFrame(row.reshape(-1, len(row))))  # append row to TSnew_X
                copy = numpy.copy(self.trainSet.as_matrix()[i, :])
                self.TSnew_Y = self.TSnew_Y.append(pandas.DataFrame(copy.reshape(-1, len(copy))))  # Y = x1...xk

############## Train & Predict ########################
    def train(self):
        print("DEB Training with TSnew")
        self.MLP = MLPRegressor(activation='relu', alpha=1e-05, batch_size='auto', beta_1=0.9,
                                 beta_2=0.999, early_stopping=False, epsilon=1e-08,
                                 hidden_layer_sizes=len(self.TSnew_Y.columns), learning_rate='constant',
                                 learning_rate_init=0.001, max_iter=200, momentum=0.9,
                                 nesterovs_momentum=True, power_t=0.5, random_state=1, shuffle=True,
                                 solver='lbfgs', tol=0.0001, validation_fraction=0.1, verbose=False,
                                 warm_start=False)
        self.MLP.fit(self.TSnew_X, self.TSnew_Y)

    def predict(self):
        print("DEB Test")

        testSetNew_X = pandas.DataFrame()
        testSetNew_Y = pandas.DataFrame()

        # preparing the test set - here you do the same as in function createTSnew:
        if not os.path.isfile('testSetNew_X{}-{}.csv'.format(self.repeatSometimes, self.dim_random_subset)):
            for i in range(0, self.testSet.shape[0]):
                # choose small random subset of features X_hat
                X_hat = [int(self.testSet.shape[1] * random.random()) for i in range(0, self.dim_random_subset)]
                # insert into TSnew the sample: (x1...X_hat = 0 ... xk ; x1...xk)
                row = numpy.copy(self.testSet.as_matrix()[i, :])
                for feature in X_hat:  # here you set the random features to 0. X_hat represents the indices of such features
                    row[feature] = 0
                testSetNew_X = testSetNew_X.append(pandas.DataFrame(row.reshape(-1, len(row))))
                copy = numpy.copy(self.testSet.as_matrix()[i, :])
                testSetNew_Y = testSetNew_Y.append(pandas.DataFrame(copy.reshape(-1, len(copy))))  # Y = x1...xk
            testSetNew_Y.to_csv('testSetNew_X{}-{}.csv'.format(self.repeatSometimes, self.dim_random_subset))
            testSetNew_Y.to_csv('testSetNew_Y{}-{}.csv'.format(self.repeatSometimes, self.dim_random_subset))
        else:  # if the needed DataFrames have already been calculated, simply load them from disk
            self.trainSet = self.trainSet.from_csv('testSetNew_X{}-{}.csv'.format(self.repeatSometimes, self.dim_random_subset))
            self.trainSet = self.trainSet.from_csv('testSetNew_Y{}-{}.csv'.format(self.repeatSometimes, self.dim_random_subset))

        # predictions
        self.MLP.predict(testSetNew_X)
        print("Score of method (repetitions={}, subset={}): {}%".format(self.repeatSometimes, self.dim_random_subset, self.MLP.score(testSetNew_X, testSetNew_Y) * 100))

########################## Helper functions ####################
    def writeCSV(self):
        print("DEB WriteCSV")
        self.trainSet.to_csv('trainSet{}-{}.csv'.format(self.repeatSometimes, self.dim_random_subset))
        self.testSet.to_csv('testSet{}-{}.csv'.format(self.repeatSometimes, self.dim_random_subset))
        self.TSnew_X.to_csv('TSnew_X{}-{}.csv'.format(self.repeatSometimes, self.dim_random_subset))
        self.TSnew_Y.to_csv('TSnew_Y{}-{}.csv'.format(self.repeatSometimes, self.dim_random_subset))

    def readCSV(self):
        print("DEB ReadCSV")
        self.trainSet = self.trainSet.from_csv('trainSet{}-{}.csv'.format(self.repeatSometimes, self.dim_random_subset))
        self.testSet = self.testSet.from_csv('testSet{}-{}.csv'.format(self.repeatSometimes, self.dim_random_subset))
        self.TSnew_X = self.TSnew_X.from_csv('TSnew_X{}-{}.csv'.format(self.repeatSometimes, self.dim_random_subset))
        self.TSnew_Y = self.TSnew_Y.from_csv('TSnew_Y{}-{}.csv'.format(self.repeatSometimes, self.dim_random_subset))
예제 #36
0
                    max_iter=5000,
                    shuffle=False,
                    tol=0.00005,
                    momentum=0.9,
                    verbose=False)

mlpr_model = mlpr.fit(x_train, y_train)
print(f"Training iteration : {mlpr_model.n_iter_}")

#%% [markdown]
#  ## Testing phase & result
#  * Show MSE score of train phase
#  * Show MSE, R2 and variance score of test phase

#%%
mlpr_predict = mlpr.predict(x_test)

mse = mean_squared_error(y_test, mlpr_predict)
r2 = r2_score(y_test, mlpr_predict)
evs = explained_variance_score(y_test, mlpr_predict)

print(f"MSE train : {mlpr_model.loss_}")
print(f"MSE test : {mse}")
print(f"R2 score : {r2}")
print(f"Variance score : {evs}")

#%% [markdown]
# ## Training Loss Curve

#%%
plt.style.use('seaborn')
예제 #37
0
from datetime import datetime

startTime = datetime.now()

fileTrain = open("fingerDataTrain.dat",'r')
fileVal = open("fingerDataVal.dat",'r')
trainingSet = np.loadtxt(fileTrain)
valSet = np.loadtxt(fileVal)
fileTrain.close()
fileVal.close()

trainX = trainingSet[:,:13]
trainY = trainingSet[:,14:]
valX = valSet[:,:13]
valY = valSet[:,14:]

for i in range(trainX.shape[1]):
    m = trainX[:,i].mean()
    s = trainX[:,i].std()
    trainX[:,i] = (trainX[:,i]-m)/s
    valX[:,i] = (valX[:,i]-m)/s


ann = MLPRegressor()
ann.fit(trainX,trainY)
sqError = ((ann.predict(valX)-valY)**2).mean()

plt.scatter(valX[:,1], valY[:,3],  color='black')
plt.plot(valX[:,1], ann.predict(valX)[:,3], color='blue', linewidth=3)

print datetime.now() - startTime
예제 #38
0
def MLPRegressorr(data1, y):

    X_train, X_test, y_train, y_test = train_test_split(data1,
                                                        y,
                                                        test_size=0.2,
                                                        random_state=Hcurstate)

    X_train_new = X_train.reset_index(drop=True)
    y_train_new = y_train.reset_index(drop=True)

    X_train_new = X_train_new.values
    y_train_new = y_train_new.values

    k = 5
    kf = KFold(n_splits=k, random_state=Hcurstate)
    avg_train_acc, avg_test_acc = 0, 0

    n_estimators_grid = [5, 25, 50, 75, 100, 500]
    max_depth_grid = [5, 10, 25, 50, 100, 500]

    avgsc_lst, avgsc_train_lst, avgsc_hld_lst = [], [], []
    avgsc, avgsc_train, avgsc_hld = 0, 0, 0

    i = 0
    for train_index, test_index in kf.split(X_train_new):
        #         if i>0: break
        #         i=i+1
        X_train_cur, X_test_cur = X_train_new[train_index], X_train_new[
            test_index]
        y_train_cur, y_test_cur = y_train_new[train_index], y_train_new[
            test_index]
        X_train_train, X_val, y_train_train, y_val = train_test_split(
            X_train_cur, y_train_cur, test_size=0.25, random_state=Hcurstate)

        print(X_train_train.shape)
        print(X_val.shape)

        bestPerformingModel = MLPRegressor(hidden_layer_sizes=(100, 100),
                                           max_iter=300,
                                           random_state=Hcurstate)
        bestPerformingModel = bestPerformingModel.fit(X_train, y_train)
        print(bestPerformingModel.n_layers_)

        y_pred = bestPerformingModel.predict(X_train_cur)
        bscr_train = sqrt(mean_squared_error(y_pred, y_train_cur))

        y_pred = bestPerformingModel.predict(X_test_cur)
        bscr = sqrt(mean_squared_error(y_pred, y_test_cur))

        y_pred = bestPerformingModel.predict(X_test)
        bscr_hld = sqrt(mean_squared_error(y_pred, y_test))

        avgsc_train_lst.append(bscr_train)
        avgsc_lst.append(bscr)
        avgsc_hld_lst.append(bscr_hld)

        avgsc_train = avgsc_train + bscr_train
        avgsc = avgsc + bscr
        avgsc_hld = avgsc_hld + bscr_hld

        print(bscr_train)
        print(bscr)
        print(bscr_hld)

    print('5-fold Train, Validation, and Test loss:')
    print(avgsc_train_lst)
    print(avgsc_lst)
    print(avgsc_hld_lst)

    print('Avg Train, Validation, and Test loss:')
    print(avgsc_train / k)
    print(avgsc / k)
    print(avgsc_hld / k)

    y_pred = bestPerformingModel.predict(X_test)
    cnf_matrix = metrics.confusion_matrix(y_test, y_pred)

    return avgsc_train_lst, avgsc_lst, avgsc_hld_lst
def train_evaluate(job):
    '''
    train MLP Regressor models for COF and intercept
    for the parameters given in the job statepoints
    
    evaluate using R^2, root mean square error, and
    mean absolute error
    '''
    for target in TARGETS:
        
        # read training data
        with open(root_dir + '/csv-files/{}_training_4.csv'.format(target)) as f:
            train = pd.read_csv(f, index_col=0)
        # read testing data
        with open(root_dir + '/csv-files/{}_testing.csv'.format(target)) as f:
            test = pd.read_csv(f, index_col=0)
        
        # Reduce the number of features by running data thru dimensionality reduction
        features_all = list(train.drop([target] + IDENTIFIERS, axis=1))
        train_red = dimensionality_reduction(train, features_all,
                                             filter_missing=True,filter_var=True,
                                             filter_corr=True,
                                             missing_threshold=0.4,
                                             var_threshold=0.02,
                                             corr_threshold=0.9)
        features = list(train_red.drop([target] + IDENTIFIERS, axis=1))
        
        # split train and test data into features (X) and target (y)
        X_train, y_train = train[features], train[target]
        X_test, y_test = test[features], test[target]
        
        # normalize input features                        
        scaler = MinMaxScaler()
        X_train_scaled = scaler.fit_transform(X_train)
        X_test_scaled = scaler.transform(X_test)
        
        # train multi-layer perceptron neural network
        hidden_layers = [job.sp.num_perceptrons]*job.sp.num_layers
        MLP = MLPRegressor(hidden_layer_sizes=hidden_layers, alpha=job.sp.alpha,
                           random_state=43, tol=1e-6, max_iter=1000)
        MLP.fit(X_train_scaled, y_train)
        
        # score the model on train and test data using RMSE, MAE, R^2
        # store the scores in job document
        r2_test = MLP.score(X_test_scaled, y_test)
        r2_train = MLP.score(X_train_scaled, y_train)
        job.doc['{}_r2_test'.format(target)] = r2_test
        job.doc['{}_r2_train'.format(target)] = r2_train
        
        y_test_pred = MLP.predict(X_test_scaled)
        y_train_pred = MLP.predict(X_train_scaled)
        rmse_test = mean_squared_error(y_test, y_test_pred, squared=False)
        rmse_train = mean_squared_error(y_train, y_train_pred, squared=False)
        job.doc['{}_rmse_test'.format(target)] = rmse_test
        job.doc['{}_rmse_train'.format(target)] = rmse_train
        
        mae_test = mean_absolute_error(y_test, y_test_pred)
        mae_train = mean_absolute_error(y_train, y_train_pred)
        job.doc['{}_mae_test'.format(target)] = mae_test
        job.doc['{}_mae_train'.format(target)] = mae_train
        
        # add features to json file in job workspace
        with open(job.fn('{}_features.json'.format(target)), 'w') as f:
            json.dump(features, f)
        
        # pickle out the model and scaler
        with open(job.fn('{}_trained.pickle'.format(target)), 'wb') as f:
            pickle.dump(MLP, f)
        with open(job.fn('{}_scaler.pickle'.format(target)), 'wb') as f:
            pickle.dump(scaler, f)
    
    # copy the job directory to external hard drive
    job_dir_path = pathlib.Path(root_dir + '/workspace/' + job.id)
    hard_drive_path = pathlib.Path('/mnt/d/neural-networks-with-signac/workspace/')
    process = Popen(['cp', '-r', job_dir_path, hard_drive_path], stdout=PIPE, stderr=PIPE)
    stdout, stderr = process.communicate()
    
    # remove trained model pickle files from job directory
    # because they are backed up to external hard drive and take up a lot of space
    for target in TARGETS:
        path_to_pickle = pathlib.Path(str(job_dir_path) + '/{}_trained.pickle'.format(target))
        process = Popen(['rm', path_to_pickle], stdout=PIPE, stderr=PIPE)
        stdout, stderr = process.communicate()
예제 #40
0
# print(X)
# print(y)

print('Begin Train')
m = MLPRegressor(verbose=True,
                 activation='logistic',
                 solver='adam',
                 early_stopping=False,
                 hidden_layer_sizes=(50))
m.fit(X_train, y_train)
print(m)
print('End Train')

# print(y[:2])
print(m.predict(X_train[:5, ]))
print(y_train[:5])
print()
print(m.predict(X_test[:5]))
print(y_test[:5])

print(m.score(X_train, y_train))
print(m.score(X_test, y_test))

y_pred = m.predict(X_test)

correct = 0
total = 0
for i in range(0, y_test.size):
    total += 1
    if y_test[i] < 0 and y_pred[i] < 0:
axes.set_title("Data: " + file)
axes.set_ylabel('Normalized distant count')
axes.set_xlabel('Distance ($\AA$)')

axes.hist(y_train, 150, color='blue',normed=True, label='plot',linewidth=2,alpha=1.0)
plt.show()
"""

# Fit model
clf.fit(X_train, y_train)

# Compute and print r^2 score
print(clf.score(X_test, y_test))

# Store predicted energies
Ecmp = clf.predict(X_test)

Ecmp = gt.hatokcal * (Ecmp)
Eact = gt.hatokcal * (y_test)

# Compute RMSE in kcal/mol
rmse = gt.calculaterootmeansqrerror(Ecmp, Eact)

# End timer
_t1e = tm.time()
print("Computation complete. Time: " + "{:.4f}".format((_t1e - _t1b)) + "s")

# Output model information
print("RMSE: " + str(rmse))
# print(clf.coef_)
# print(clf.intercept_)
# split into training and validation dataset
from sklearn.cross_validation import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X,
                                                    y,
                                                    test_size=0.2,
                                                    random_state=1)

# train
mlpr_model = MLPRegressor()
mlpr_model.fit(X_train, y_train)

# In[ ]:

#%% 19 - 1
# start to predict
y_pred = mlpr_model.predict(X_test)

# In[ ]:

#%% 19 - 2
# transform
threshold = 0.5
y_pred2 = pd.DataFrame({'Predicted': y_pred})
y_pred2 = transform_predicted(y_pred2)
y_pred2.head(10)

# In[ ]:

#%% 19 - 3
y_test2 = pd.DataFrame({'Survived': y_test})
y_test2.head(10)
예제 #43
0
#Example  with a Regressor using the scikit-learn library
# example for the XOr gate
from sklearn.neural_network import MLPRegressor 

X = [[0., 0.],[0., 1.], [1., 0.], [1., 1.]] # each one of the entries 00 01 10 11
y = [0, 1, 1, 0] # outputs for each one of the entries

# check http://scikit-learn.org/dev/modules/generated/sklearn.neural_network.MLPRegressor.html#sklearn.neural_network.MLPRegressor
#for more details
reg = MLPRegressor(hidden_layer_sizes=(5),activation='tanh', algorithm='sgd', alpha=0.001, learning_rate='constant',
                   max_iter=10000, random_state=None, verbose=False, warm_start=False, momentum=0.8, tol=10e-8, shuffle=False)

reg.fit(X,y)

outp =  reg.predict([[0., 0.],[0., 1.], [1., 0.], [1., 1.]])

print'Results:'
print '0 0 0:', outp[0]
print '0 1 1:', outp[1]
print '1 0 1:', outp[2]
print '1 1 0:', outp[0]
print'Score:', reg.score(X, y)
예제 #44
0
KNN = KNeighborsRegressor()
knn_param_grid = {'n_neighbors':[3,10]}
knn_grid = model_selection.GridSearchCV(KNN, knn_param_grid, cv=10, n_jobs=25, verbose=1, scoring='neg_mean_squared_error')
knn_grid.fit(X_train, y_train)
print(' Best  Params:' + str(knn_grid.best_params_))
KNN = KNeighborsRegressor(n_neighbors=10)
KNN.fit(X_train, y_train)
y_predict_knn=KNN.predict(X_test)
mae_knn=(np.abs(y_predict_knn-y_test)).sum()/9467
joblib.dump(KNN, 'KNN.model')
print(mae_knn)
#mlp
from sklearn.neural_network import MLPRegressor
MLP = MLPRegressor(hidden_layer_sizes=(300, 200,200),max_iter=100,activation='relu')
MLP.fit(X_train, y_train)
y_predict_MLP=MLP.predict(X_test)
mae_MLP=(np.abs(y_predict_MLP-y_test)).sum()/9467
joblib.dump(MLP, 'MLP.model')
print(mae_MLP)
#xgb
import xgboost  as xgb
x_regress = xgb.XGBRegressor(max_depth=20,n_estimators =5000)
x_regress_param_grid = {'max_depth': [5,20]}
x_regress_grid = model_selection.GridSearchCV(x_regress, x_regress_param_grid, cv=10, n_jobs=25, verbose=1, scoring='neg_mean_squared_error')
x_regress.fit(X_train, y_train)
joblib.dump(x_regress, 'x_regress_grid.model')
y_predict_xgb=x_regress.predict(X_test)

mae_xgb=(np.abs(y_predict_xgb-y_test)).sum()/9467
# 模型融合
#简单平均