예제 #1
0
def test_lbfgs_regression():
    # Test lbfgs on the boston dataset, a regression problems.
    X = Xboston
    y = yboston
    for activation in ACTIVATION_TYPES:
        mlp = MLPRegressor(solver='lbfgs', hidden_layer_sizes=50,
                           max_iter=150, shuffle=True, random_state=1,
                           activation=activation)
        mlp.fit(X, y)
        if activation == 'identity':
            assert_greater(mlp.score(X, y), 0.84)
        else:
            # Non linear models perform much better than linear bottleneck:
            assert_greater(mlp.score(X, y), 0.95)
예제 #2
0
def test_multioutput_regression():
    # Test that multi-output regression works as expected
    X, y = make_regression(n_samples=200, n_targets=5)
    mlp = MLPRegressor(solver='lbfgs', hidden_layer_sizes=50, max_iter=200,
                       random_state=1)
    mlp.fit(X, y)
    assert_greater(mlp.score(X, y), 0.9)
예제 #3
0
def test_lbfgs_regression():
    # Test lbfgs on the boston dataset, a regression problems."""
    X = Xboston
    y = yboston
    for activation in ACTIVATION_TYPES:
        mlp = MLPRegressor(algorithm='l-bfgs', hidden_layer_sizes=50,
                           max_iter=150, shuffle=True, random_state=1,
                           activation=activation)
        mlp.fit(X, y)
        assert_greater(mlp.score(X, y), 0.95)
예제 #4
0
def test_partial_fit_regression():
    # Test partial_fit on regression.
    # `partial_fit` should yield the same results as 'fit' for regression.
    X = Xboston
    y = yboston

    for momentum in [0, .9]:
        mlp = MLPRegressor(solver='sgd', max_iter=100, activation='relu',
                           random_state=1, learning_rate_init=0.01,
                           batch_size=X.shape[0], momentum=momentum)
        with warnings.catch_warnings(record=True):
            # catch convergence warning
            mlp.fit(X, y)
        pred1 = mlp.predict(X)
        mlp = MLPRegressor(solver='sgd', activation='relu',
                           learning_rate_init=0.01, random_state=1,
                           batch_size=X.shape[0], momentum=momentum)
        for i in range(100):
            mlp.partial_fit(X, y)

        pred2 = mlp.predict(X)
        assert_almost_equal(pred1, pred2, decimal=2)
        score = mlp.score(X, y)
        assert_greater(score, 0.75)
예제 #5
0
X = create_X(x, y, n=n)    

# only training data, no advanced splitting
X_train = X
Y_train = z
# only one simple layer with 100 neurons
n_hidden_neurons = 100
epochs = 100
# store models for later use
eta_vals = np.logspace(-5, 1, 7)
lmbd_vals = np.logspace(-5, 1, 7)
# store the models for later use
DNN_scikit = np.zeros((len(eta_vals), len(lmbd_vals)), dtype=object)
train_accuracy = np.zeros((len(eta_vals), len(lmbd_vals)))
sns.set()
for i, eta in enumerate(eta_vals):
    for j, lmbd in enumerate(lmbd_vals):
        dnn = MLPRegressor(hidden_layer_sizes=(n_hidden_neurons), activation='logistic',
                            alpha=lmbd, learning_rate_init=eta, max_iter=epochs)
        dnn.fit(X_train, Y_train)
        DNN_scikit[i][j] = dnn
        train_accuracy[i][j] = dnn.score(X_train, Y_train)

fig, ax = plt.subplots(figsize = (10, 10))
sns.heatmap(train_accuracy, annot=True, ax=ax, cmap="viridis")
ax.set_title("Training Accuracy")
ax.set_ylabel("$\eta$")
ax.set_xlabel("$\lambda$")
plt.show()


from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.neural_network import MLPRegressor

scaler = StandardScaler()

#X_train, X_test, y_train, y_test = train_test_split(train_data, train_target, test_size=0.20, random_state=0)

#X_train_scaled = scaler.fit(X_train).transform(X_train)
#X_test_scaled = scaler.fit(X_test).transform(X_test)
train_data_scaled = scaler.fit_transform(train_data)
test_data_scaled = scaler.fit_transform(test_data)

acc_train = []
acc_test = []
alp = [0.0001, 0.001, 0.01, 0.1, 1]
for i in range(1, 100, 10):
    for j in alp:
        mlp = MLPRegressor(max_iter = 200, solver='lbfgs', hidden_layer_sizes = (i, i), alpha = j, activation='identity')
        mlp.fit(train_data, train_target)

        acc_train.append(mlp.score(train_data, train_target))
        acc_test.append(mlp.score(test_data, test_target))


print(np.reshape(acc_test, (5, 10)))
print(np.amax(acc_test))
#print(len(acc_test))
#print(np.amax(acc_train))
예제 #7
0
파일: stock.py 프로젝트: dashqua/mlpstock
N_data = len(X)
N_train = int(0.8 * N_data)
N_test = N_data - N_train

X_train = X.iloc[0:N_train, :]
X_test = X.iloc[N_train + 1:N_data - 1, :]
y_train = y.iloc[0:N_train]
y_test = y.iloc[N_train + 1:N_data - 1]

mlp = MLPRegressor(hidden_layer_sizes=(10, ))
mlp.fit(X_train, y_train)
y_pred_mlp = mlp.predict(X_test)

#Compute R^2 and root mean squared error
print("R^2 = {}".format(mlp.score(X_test, y_test)))
rmse_mlp = np.sqrt(mean_squared_error(y_test, y_pred_mlp))
print("Root mean square error = {}".format(rmse_mlp))

y_test_pred_mlp = pd.DataFrame({
    'y_test': y_test,
    'y_pred_mlp': y_pred_mlp
},
                               index=X_test.index)

y_test_pred_mlp.plot(legend=True)
plt.title('MLP Apple Stock Prediction')

reg = LinearRegression()
reg.fit(X_train, y_train)
y_pred_reg = reg.predict(X_test)
예제 #8
0
#X_train = scaler.transform(X_train)
#X_test = scaler.transform(X_test)

trainSize = len(X) / 4 * 3

X_train = X[:trainSize]
y_train = y[:trainSize]
X_val = X[trainSize:]
y_val = y[trainSize:]

mlp = MLPRegressor(hidden_layer_sizes=(
    100,
    50,
),
                   random_state=1,
                   max_iter=1,
                   warm_start=True,
                   learning_rate_init=0.000001)

for i in range(1000):
    mlp.fit(X_train, y_train)
    if (i % 100 == 0):
        #print("Validation set score: %f" % mlp.score(X_val, y_val))
        #print("Training set score: %f" % mlp.score(X_train, y_train))
        p = mlp.predict(X_val)
        mse = MSE(y_val, p)
        rmse = sqrt(mse)
        print rmse

print("Training set score: %f" % mlp.score(X_train, y_train))
예제 #9
0
import sklearn as sk
from sklearn.model_selection import train_test_split
from sklearn.neural_network import MLPRegressor
import numpy as np
import random
a = np.random.uniform(0, 1, 500)
b = np.random.uniform(0, 1, 500)

d = a * b

data = np.array(list(zip(a, b)))
print(data)
total = 0

for i in range(30):
    XTrain, XTest, YTrain, YTest = train_test_split(data, d, test_size=0.2)

    clf = MLPRegressor(hidden_layer_sizes=(50, 50),
                       activation='relu',
                       solver='lbfgs')
    clf = clf.fit(XTrain, YTrain)
    out = clf.score(XTest, YTest)
    total += out
print("Output on is:", total / 30)
# Escalonamento da variável de resposta Y
scaler_y = StandardScaler()
y = scaler_y.fit_transform(y)

# ==== Criação da base de dadtos trienamento (70%) e base de teste (30%) =======
from sklearn.model_selection import train_test_split
X_treinamento, X_teste, y_treinamento, y_teste = train_test_split(
    X, y, test_size=0.3, random_state=0)

# Importação e implemantação de rede neural
from sklearn.neural_network import MLPRegressor
regressor = MLPRegressor(hidden_layer_sizes=(9, 9))

# ===== Gerando o treinamento =====
regressor.fit(X_treinamento, y_treinamento)

# ==== Visualizando o valor do score para saber como esta se comportando o previssor ====
score = regressor.score(X_treinamento, y_treinamento)

# ==== Visualizando o valor do score para saber como esta se comportando a resposta ====
regressor.score(X_teste, y_teste)

# ===== Realizando algumas previssões =====
previsoes = regressor.predict(X_teste)
y_teste = scaler_y.inverse_transform(y_teste)
previsoes = scaler_y.inverse_transform(previsoes)

# ===== Outra maneira de visualizar a diferença =====
from sklearn.metrics import mean_absolute_error
mae = mean_absolute_error(y_teste, previsoes)
예제 #11
0
#KNN
knn = KNeighborsRegressor()
knn.fit(X_16, y_16)
knn_score = knn.score(X_17,y_17)

#svm
svm = SVR()
svm.fit(X_16, y_16)
svm_score = svm.score(X_17,y_17)

# #xgboost
# xgb = xgboost.XGBRegressor()
# xgb.fit(X_16, y_16)
# xgb_score = xgb.score(X_17,y_17)

#mlp regressor
mlp = MLPRegressor(hidden_layer_sizes = (100,100,100,100), random_state=444)
mlp.fit(X_16,y_16)
mlp_score = mlp.score(X_17, y_17)

#Combine all models into one data frame
mlp_predicts = mlp.predict(X_16)

hour_totals_17['Predicted_mlp'] = pd.Series(mlp_predicts)

hour_totals_17['Crimes'] = hour_totals_17['Crimes']/365
hour_totals_17['Predicted_mlp'] = hour_totals_17['Predicted_mlp']/365
hour_totals_17 = np.round(hour_totals_17,2)

hour_totals_17.to_json("./nairobi_crime_predictions.json", orient='records', double_precision=2)
예제 #12
0
                fit_time = fit_end - fit_start
                fit_min = int(fit_time / 60)
                fit_sec = fit_time % 60
                print(
                    'Fitting completed in {} minutes {} seconds. Saving model to .pkl file \n'
                    .format(fit_min, fit_sec))
                model_filename = model_folder + output_text + '_and_alpha' + str(
                    av) + '.pkl'
                joblib.dump(nn_model, model_filename)

                print('Predicting...\n')
                y_pred_train = nn_model.predict(X_t)
                print('Validating...\n')
                y_pred_val = nn_model.predict(X_val)
                print('Getting scores\n')
                scr = nn_model.score(X_t, y_t)
                scr_val = nn_model.score(X_val, y_val)
                scrs.append(scr)
                scrs_val.append(scr_val)

                rmse_train_pers = (np.mean(
                    (y_pred_persistence -
                     y_t)**2))**0.5  #our persistence score
                rmse_val_pers = (np.mean(
                    (y_pred_persistence_val - y_val)**2))**0.5
                rmse_train_pers_scores.append(rmse_train_pers)
                rmse_validation_pers_scores.append(rmse_val_pers)

                rmse_val = (np.mean((y_pred_val - y_val)**2))**0.5
                rmse_train = (np.mean((y_pred_train - y_t)**2))**0.5
from sklearn.neural_network import MLPRegressor
from sklearn.model_selection import train_test_split
data = pd.read_csv("new_dataset.csv")
data.head()
data = data.drop(columns='id')
data = data.drop(columns='date')
data.head()

y = data.loc[:, 'price']
X = data.drop(columns='price')

# 1 using input data
#with open('input.txt') as my_file:
#    test_array = my_file.readlines()
#regressor = MLPRegressor(random_state=0)
#regressor.fit(X, y)
#y_pred = regressor.predict(np.array(test_array).reshape(1, 16))

#sc_X = StandardScaler()
#sc_y = StandardScaler()
#X = sc_X.fit_transform(X)
#y = sc_y.fit_transform(y)

#2 using train data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, random_state=0)
regressor = MLPRegressor(random_state=0)
regressor.fit(X_train, y_train)
y_pred = regressor.predict(np.array(X_test))

print('evaluating estimator performance: '+str(regressor.score(X_test, y_test)))
print(str(int(y_pred[0])) + '$')
예제 #14
0
X_train = pd.read_csv(result["TrainFileName"],
                      usecols=result["OtherAttributes"])
y_train = pd.read_csv(result["TrainFileName"],
                      usecols=[result["RequiredAttribute"]])

X_test = pd.read_csv(result["TestFileName"], usecols=result["OtherAttributes"])
y_test = pd.read_csv(result["TestFileName"],
                     usecols=[result["RequiredAttribute"]])

model = MLPRegressor(hidden_layer_sizes=(10, 10), activation='relu')

model.fit(X_train, y_train)
y_pred = model.predict(X_test)

print(model.score(X_test, y_test))
print(mean_absolute_error(y_test, y_pred))
print(sqrt(mean_squared_error(y_test, y_pred)))

y_actual = y_test.values
y_actual = [i for v in y_actual for i in v]

y_obs = [i for i in y_pred]

x_axis = []
for i in range(715):
    x_axis.append(i)

plt.figure()
plt.plot(x_axis, y_actual, color='b', label='actual')
plt.plot(x_axis, y_pred, color='r', label='predicted')
예제 #15
0
                               activation='tanh',
                               solver='sgd',
                               learning_rate_init=0.01,
                               max_iter=1000,
                               random_state=1,
                               validation_fraction=0.1)
######################################################################################################################
cv = KFold(n_splits=10, random_state=1, shuffle=True)
for train_index, test_index in cv.split(x):
    X_train, X_test, y_train, y_test, yy_train, yy_test = x[train_index], x[
        test_index], y[train_index], y[test_index], yy[train_index], yy[
            test_index]
    #
    MLP_Regressor_1.fit(X_train.values.reshape(-1, 1), yy_train)
    scores_1.append(
        MLP_Regressor_1.score(X_test.values.reshape(-1, 1), yy_test))
    #
    MLP_Regressor_2.fit(X_train.values.reshape(-1, 1), yy_train)
    scores_2.append(
        MLP_Regressor_2.score(X_test.values.reshape(-1, 1), yy_test))
    #
    MLP_Regressor_3.fit(X_train.values.reshape(-1, 1), yy_train)
    scores_3.append(
        MLP_Regressor_3.score(X_test.values.reshape(-1, 1), yy_test))
    #
    MLP_Regressor_4.fit(X_train.values.reshape(-1, 1), yy_train)
    scores_4.append(
        MLP_Regressor_4.score(X_test.values.reshape(-1, 1), yy_test))
    #
    MLP_Regressor_5.fit(X_train.values.reshape(-1, 1), yy_train)
    scores_5.append(
예제 #16
0
class NeuralNetwork:
    ################# Fields #######################
    # dataset_filename: string - path to dataset
    # header: list - header of the dataset
    # enumerable_columns: list - the enumerable columns

    # df: matrix - data set
    # training_set: matrix - training set
    # test_set: matrix - test set

    # TSnew_X: matrix - training set of TSnew (see documentation)
    # TSnew_Y: matrix - training set of TSnew (see documentation)
    # dim_random_subset: int - number of features to set to 0 (see documentation)
    # repeatSometimes: int - number of for cicles (see documentation)

    def __init__(self, repeatSometimes = 2, dim_random_subset = 2):
        # variables initialization
        self.enumerable_columns = []
        self.dataset_filename = ""
        self.header = []
        self.df = pandas.DataFrame()
        self.trainSet = pandas.DataFrame()
        self.testSet = pandas.DataFrame()
        self.TSnew_X = pandas.DataFrame()
        self.TSnew_Y = pandas.DataFrame()

        self.repeatSometimes = repeatSometimes
        self.dim_random_subset = dim_random_subset

        # This code really needs much time and therefore I save some computations
        if not os.path.isfile('trainSet{}-{}.csv'.format(repeatSometimes, dim_random_subset)):
            self.readDataset()
            self.discretization()
            self.preprocess()

            # creating TSnew
            self.createTrainingAndTestSet()
            self.createTSnew()

            # backup encoded sets
            self.writeCSV()
        else:
            self.readCSV()

        # training and test
        self.train()
        self.predict()


    def readDataset(self):
        print("DEB Read dataset")

        with open('header.txt') as f:
            self.header = f.read().split(',')
            print(self.header)
        with open('dataset.txt') as f:
            self.dataset_filename = f.read()
            print(self.dataset_filename)
        self.df = pandas.read_csv(self.dataset_filename, names=self.header)
        print('Dataset with {} entries'.format(self.df.__len__()))

############# Preprocessing ##########################
    # helper function (should not be called from other functions)
    def discretize(self, column):
        print("DEB Discretize column " + column)
        sorted_col = sorted(column)
        l = len(column)
        n = int(numpy.floor(l / 2))
        if l % 2 == 0:
            median_1 = numpy.median(sorted_col[0:n])
            median_2 = numpy.median(sorted_col[n:])
        else:
            median_1 = numpy.median(sorted_col[0:(n + 1)])
            median_2 = numpy.median(sorted_col[(n + 1):])
        iqr = median_2 - median_1
        h = 2 * iqr * (1 / numpy.cbrt(l))
        if h > 0:
            bins_number = numpy.ceil((column.max() - column.min()) / h)
            new_col, bins = pandas.cut(column, bins_number, labels=False, retbins=True, include_lowest=False)
        else:
           new_col = column
           bins = []
        return new_col, bins

    # helper function (should not be called from other functions)
    def normalize(column):
        print("DEB Normalize")
        h = abs(column.min())
        new_col = column + h
        return new_col

    def discretization(self):
        print("DEB Discretization")
        replacements = {}
        bins = {}
        for i in range(0, self.df.shape[1]):  # for each feature
            bins[i] = []
            col = self.df.as_matrix()[:, i]
            flag_str = False
            flag_float = False
            flag_negative = False

            for j in col:
                if type(j) is str: flag_str = True
                elif type(j) is float: flag_float = True
                elif type(j) is int and j < 0: flag_negative = True

            if flag_str:
                continue
            elif flag_negative:
                new_col = self.normalize(col)
                replacements[i] = new_col
                bins[i] = []
            elif flag_float:
                new_col, new_bins = self.discretize(col)
                replacements[i] = new_col
                bins[i] = new_bins
            for k, v in replacements.items():
                self.df.iloc[:, k] = v

    def preprocess(self, removeColumnsWithMissingValues = False):
        print("DEB Preprocessing")
        m = self.df.as_matrix()

        # it is possible to encode enumerable features and to remove missing values
        with open('enumerable_columns.txt') as f:  # e.g., self.enumerable_columns = [0, 5, 8]
            self.enumerable_columns = f.read()
            if self.enumerable_columns.__contains__(','):
                self.enumerable_columns = list(map(int, self.enumerable_columns.split(',')))
            else:
                self.enumerable_columns = [int(self.enumerable_columns)]
            print("enumerable columns are: " + str(self.enumerable_columns))
        le = preprocessing.LabelEncoder()
        for col in self.enumerable_columns:
            # if the column is enumerable
            self.df[self.header[col]] = le.fit_transform(self.df[self.header[col]])  #  A -> 0, B -> 1, ...

        #  remove cols with missing values (NaN), even though you risk to reduce too much the dataset
        if removeColumnsWithMissingValues:
            for i in range(0, m.shape[1]):
                if True in m[:, i]:
                    self.df = numpy.delete(self.df, 0, i)  # delete column


############## MPL architecture #######################
    def createTrainingAndTestSet(self):
        print("DEB Create Training set. Using formula 80-20%")
        self.trainSet, self.testSet = train_test_split(self.df, test_size=0.20)

    # hearth of the algorithm!
    def createTSnew(self):
        print("DEB Create TS new")
        for i in range(0, self.trainSet.shape[0]):
            for j in range(0, self.repeatSometimes):
                # choose small random subset of features X_hat
                X_hat = [int(self.trainSet.shape[1] * random.random()) for i in range(0, self.dim_random_subset)]
                # insert into TSnew the sample: (x1...X_hat = 0 ... xk ; x1...xk)
                row = numpy.copy(self.trainSet.as_matrix()[i, :])
                for feature in X_hat:  # here you set the random features to 0. X_hat represents the indices of such features
                    row[feature] = 0
                self.TSnew_X = self.TSnew_X.append(pandas.DataFrame(row.reshape(-1, len(row))))  # append row to TSnew_X
                copy = numpy.copy(self.trainSet.as_matrix()[i, :])
                self.TSnew_Y = self.TSnew_Y.append(pandas.DataFrame(copy.reshape(-1, len(copy))))  # Y = x1...xk

############## Train & Predict ########################
    def train(self):
        print("DEB Training with TSnew")
        self.MLP = MLPRegressor(activation='relu', alpha=1e-05, batch_size='auto', beta_1=0.9,
                                 beta_2=0.999, early_stopping=False, epsilon=1e-08,
                                 hidden_layer_sizes=len(self.TSnew_Y.columns), learning_rate='constant',
                                 learning_rate_init=0.001, max_iter=200, momentum=0.9,
                                 nesterovs_momentum=True, power_t=0.5, random_state=1, shuffle=True,
                                 solver='lbfgs', tol=0.0001, validation_fraction=0.1, verbose=False,
                                 warm_start=False)
        self.MLP.fit(self.TSnew_X, self.TSnew_Y)

    def predict(self):
        print("DEB Test")

        testSetNew_X = pandas.DataFrame()
        testSetNew_Y = pandas.DataFrame()

        # preparing the test set - here you do the same as in function createTSnew:
        if not os.path.isfile('testSetNew_X{}-{}.csv'.format(self.repeatSometimes, self.dim_random_subset)):
            for i in range(0, self.testSet.shape[0]):
                # choose small random subset of features X_hat
                X_hat = [int(self.testSet.shape[1] * random.random()) for i in range(0, self.dim_random_subset)]
                # insert into TSnew the sample: (x1...X_hat = 0 ... xk ; x1...xk)
                row = numpy.copy(self.testSet.as_matrix()[i, :])
                for feature in X_hat:  # here you set the random features to 0. X_hat represents the indices of such features
                    row[feature] = 0
                testSetNew_X = testSetNew_X.append(pandas.DataFrame(row.reshape(-1, len(row))))
                copy = numpy.copy(self.testSet.as_matrix()[i, :])
                testSetNew_Y = testSetNew_Y.append(pandas.DataFrame(copy.reshape(-1, len(copy))))  # Y = x1...xk
            testSetNew_Y.to_csv('testSetNew_X{}-{}.csv'.format(self.repeatSometimes, self.dim_random_subset))
            testSetNew_Y.to_csv('testSetNew_Y{}-{}.csv'.format(self.repeatSometimes, self.dim_random_subset))
        else:  # if the needed DataFrames have already been calculated, simply load them from disk
            self.trainSet = self.trainSet.from_csv('testSetNew_X{}-{}.csv'.format(self.repeatSometimes, self.dim_random_subset))
            self.trainSet = self.trainSet.from_csv('testSetNew_Y{}-{}.csv'.format(self.repeatSometimes, self.dim_random_subset))

        # predictions
        self.MLP.predict(testSetNew_X)
        print("Score of method (repetitions={}, subset={}): {}%".format(self.repeatSometimes, self.dim_random_subset, self.MLP.score(testSetNew_X, testSetNew_Y) * 100))

########################## Helper functions ####################
    def writeCSV(self):
        print("DEB WriteCSV")
        self.trainSet.to_csv('trainSet{}-{}.csv'.format(self.repeatSometimes, self.dim_random_subset))
        self.testSet.to_csv('testSet{}-{}.csv'.format(self.repeatSometimes, self.dim_random_subset))
        self.TSnew_X.to_csv('TSnew_X{}-{}.csv'.format(self.repeatSometimes, self.dim_random_subset))
        self.TSnew_Y.to_csv('TSnew_Y{}-{}.csv'.format(self.repeatSometimes, self.dim_random_subset))

    def readCSV(self):
        print("DEB ReadCSV")
        self.trainSet = self.trainSet.from_csv('trainSet{}-{}.csv'.format(self.repeatSometimes, self.dim_random_subset))
        self.testSet = self.testSet.from_csv('testSet{}-{}.csv'.format(self.repeatSometimes, self.dim_random_subset))
        self.TSnew_X = self.TSnew_X.from_csv('TSnew_X{}-{}.csv'.format(self.repeatSometimes, self.dim_random_subset))
        self.TSnew_Y = self.TSnew_Y.from_csv('TSnew_Y{}-{}.csv'.format(self.repeatSometimes, self.dim_random_subset))
예제 #17
0
def runMLP(hd_layers_p=(30, ),
           activation_p='tanh',
           solver_p='adam',
           learn_rate_p=0.001,
           early_stopping_p=True,
           momentum_p=0.9,
           max_iter_p=1000):

    k_fold_lenght = 4
    scoreList = []
    MLPs = []
    y_Test_Total = []
    y_Pred_Total = []

    (X, y) = readData()
    #(X_Train_Container, y_Train_Container, X_Test_Container,y_Test_Container) = dataStratification(X, y, k_fold_lenght)

    #for i in range(k_fold_lenght):
    mlp = MLPRegressor(hidden_layer_sizes=hd_layers_p,
                       activation=activation_p,
                       solver=solver_p,
                       learning_rate_init=learn_rate_p,
                       max_iter=max_iter_p,
                       momentum=momentum_p)

    #Get subsets
    X_Train, X_Test, y_Train, y_Test = train_test_split(X,
                                                        y,
                                                        test_size=0.33,
                                                        random_state=42)

    print 'Size of X_Train: ' + str(len(X_Train))
    print 'Size of y_Train: ' + str(len(y_Train))
    print 'Size of X_Test: ' + str(len(X_Test))
    print 'Size of y_Test: ' + str(len(y_Test))

    y_Test_Total += y_Test
    #Scale
    scaler = StandardScaler()
    scaler.fit(X_Train)
    X_Train = scaler.transform(X_Train)
    X_Test = scaler.transform(X_Test)
    #Train
    mlp.fit(X_Train, y_Train)
    #print "Weight matrix", mlp.coefs_

    #Test
    scoreList.append(mlp.score(X_Test, y_Test))
    #y_Pred_Total += mlp.predict(X_Test)
    print "\tTest score ", "\t", scoreList[0]
    #end for
    #End Train and run MLP
    mean = 0
    for score in scoreList:
        mean += score

    mean = mean / len(scoreList)

    #print vetor1
    #print type(vetor1)

    #cnf_matrix = confusion_matrix(y_Test_Total,y_Pred_Total)

    #print cnf_matrix

    np.set_printoptions(precision=2)

    # Plot non-normalized confusion matrix
    #plt.figure()
    #plot_confusion_matrix(cnf_matrix, classes=['M','F','I'],
    #                      title='Confusion matrix, without normalization')

    # Plot normalized confusion matrix
    #plt.figure()
    #plot_confusion_matrix(cnf_matrix, classes=['M','F','I'], normalize=True,
    #                     title='Normalized confusion matrix')

    #plt.show()

    return mean
예제 #18
0
confidence1kfold = model_selection.cross_val_score(clf1, X, y, cv=kfold)
print("SVR (KFold) : %.3f%% (%.3f%%)" %
      (confidence1kfold.mean() * 100.0, confidence1kfold.std() * 100.0))

clf1.fit(X_train, y_train)
confidence1 = clf1.score(X_test, y_test)
print("SVR : %.3f%%" % (confidence1 * 100.0))

clf2 = MLPRegressor()

confidence2kfold = model_selection.cross_val_score(clf2, X, y, cv=kfold)
print("MLP (KFold) : %.3f%% (%.3f%%)" %
      (confidence2kfold.mean() * 100.0, confidence2kfold.std() * 100.0))

clf2.fit(X_train, y_train)
confidence2 = clf2.score(X_test, y_test)
print("MLP : %.3f%%" % (confidence2 * 100.0))

clf = LinearRegression()

confidencekfold = model_selection.cross_val_score(clf, X, y, cv=kfold)
print("LR (KFold) : %.3f%% (%.3f%%)" %
      (confidencekfold.mean() * 100.0, confidencekfold.std() * 100.0))

clf.fit(X_train, y_train)
confidence = clf.score(X_test, y_test)
print("LR : %.3f%%" % (confidence * 100.0))

#Add forecasting code for submission on 11th November, 2017
forecast_set = clf.predict(X_lately)
#print(forecast_set, confidence, forecast_out)
        缺点:
            受权重的影响大
            需要调节很多超参数
            对特征的缩放很敏感
        复杂度:O(n*m*h^k*o*i)
            n——样本数
            m——特征数
            h——隐藏层数
            k——神经元数量
            o——输出神经元
            i——迭代次数
'''
rg = MLPRegressor(hidden_layer_sizes=(100, ), activation='relu', solver='adam', alpha=0.0001, batch_size='auto', learning_rate='constant', learning_rate_init=0.001, power_t=0.5, max_iter=200, shuffle=True, random_state=None, tol=0.0001, verbose=False, warm_start=False, momentum=0.9, nesterovs_momentum=True, early_stopping=False, validation_fraction=0.1, beta_1=0.9, beta_2=0.999, epsilon=1e-08)
rg.fit(X_train,Y_train)
Y_pre = rg.predict(X_test)
rg.score(X_test,Y_test)
rg.loss_
'''
    hidden_layer_sizes                      第i个元素表示第i个隐藏层中神经元的数量
    activation                              隐藏层的激活函数
        identity                                f(x) = x
        logistic                                f(x) = 1 / (1 + exp(-x))
        tanh                                    f(x) = tanh(x)
        relu                                    f(x) = max(0, x)
    solver                                  权重优化方法
        lbfgs                                   标准牛顿法
        sgd                                     随机梯度下降
        adam                                    随机梯度优化法,没见过...
    alpha                                   L2正则化项的惩罚系数
    batch_size                              使用随机梯度优化法时,minibatch的样本数
    learning_rate                           权重更新的学习速度
#Round y_pred to nearest integer due to PWM mechanical constraints
#-------------------------

for i in range(len(y_pred)):
    if math.modf(y_pred[i])[0]>0.5:
        y_pred[i]=math.ceil(y_pred[i])
    else:
        y_pred[i]=math.floor(y_pred[i])        

#--------------------------
#METRICS
#-------------------------
print ('Number of points used for training : ' + str(len(X_train)))
print ('Number of points used for test     : ' + str(len(X_test)))
#R2 score
R2=mymodel.score(X_test,y_test) 
print('R2 score = ' + str(R2))
#compute sum of squared errors
sse=((y_pred-y_true)**2).sum() 
#absolute error (vector)
error=np.abs(y_true - y_pred)
#absolute percent error (vector)
aperr= np.abs((y_true - y_pred) / y_true)
#MAPE 
mape=np.mean(np.abs((y_true - y_pred) / y_true)) 
print('MAPE = ' + str(mape))


#--------------------------
#PLOT
#-------------------------
예제 #21
0
for k in range(4):
    # 配置MLP(BP算法)的参数,尝试改变hidden_layer_sizes
    ref = MLPRegressor(
            solver='lbfgs',
            alpha=0,
            hidden_layer_sizes=(
                    5*(k+1)),     #尝试改变“10”
                    tol=1e-6,
                    random_state=20)
    #‘lbfgs’ is an optimizer in the family of quasi-Newton methods.
    # alpha : L2 penalty (regularization term) parameter.
    # hidden_layer_sizes :The ith element represents the number of neurons in      
    # the ith hidden layer.

    # 模型训练及评价
    ref.fit(X_train, y_train)
    print('训练集-判定系数R^2为:{0:.2f}'.format(ref.score(X_train, y_train)))
    print('测试集-判定系数R^2为:{0:.2f}'.format(ref.score(X_test, y_test)))


   # 画出回归曲线
    y_predict = ref.predict(X_train)

    plt.subplot(221+k)
    plt.scatter(X_train, y_train, s=5, c='k', marker='.')
    plt.plot(X_train, y_predict,color='r',linewidth=4)
    plt.xlabel('x')
    plt.ylabel('y')
    plt.grid(True)
plt.show()
예제 #22
0
          (X_test[:, 2] - .5)**2 + 10 * X_test[:, 3] + 5 * X_test[:, 4]**5 +
          np.random.normal(0, 1))

lr = LinearRegression(normalize=True)
lr.fit(X_train, Y_train)

lasso = Lasso(alpha=0.01)
lasso.fit(X_train, Y_train)

ridge = Ridge(alpha=0.1)
ridge.fit(X_train, Y_train)

rfr = RandomForestRegressor()
rfr.fit(X_train, Y_train)

mlp = MLPRegressor(hidden_layer_sizes=(200, ), max_iter=1000)
mlp.fit(X_train, Y_train)

from sklearn.metrics import accuracy_score

acc_lr = lr.score(X_test, Y_test)
acc_lasso = lasso.score(X_test, Y_test)
acc_ridge = ridge.score(X_test, Y_test)
acc_rfr = rfr.score(X_test, Y_test)
acc_mlp = mlp.score(X_test, Y_test)

print("LinearRegression: ", acc_lr)
print("Lasso: ", acc_lasso)
print("Ridge: ", acc_ridge)
print("RandomForestRegressor: ", acc_rfr)
print("MLPRegressor: ", acc_mlp)
예제 #23
0
                    warm_start=False,
                    momentum=0.9,
                    nesterovs_momentum=True,
                    early_stopping=False,
                    validation_fraction=0.1,
                    beta_1=0.9,
                    beta_2=0.999,
                    epsilon=1e-08)

# Train the model using the training sets
regr.fit(diabetes_X_train, diabetes_y_train)

# Make predictions using the testing set
diabetes_y_pred = regr.predict(diabetes_X_test)

pontos = regr.score(diabetes_X_test, diabetes_y_test)
# The coefficients
print('Coefficients: \n', pontos)
# The mean squared error
print("Mean squared error: %.2f" %
      mean_squared_error(diabetes_y_test, diabetes_y_pred))
# Explained variance score: 1 is perfect prediction
print('Variance score: %.2f' % r2_score(diabetes_y_test, diabetes_y_pred))

# Plot outputs
plt.scatter(diabetes_X_test, diabetes_y_test, color='black')
plt.plot(diabetes_X_test, diabetes_y_pred, color='blue', linewidth=3)

plt.xticks(())
plt.yticks(())
MLPRegressorModel = MLPRegressor(activation='relu',
                                 solver='adam',
                                 learning_rate='adaptive',
                                 hidden_layer_sizes=(15),
                                 max_iter=1000,
                                 batch_size=5,
                                 random_state=33)
# 0.8281730812610498
MLPRegressorModel.fit(X_train, y_train)
# print(MLPRegressorModel.get_params)
# print("="*10)
# ----------------------------------------------------
# Calculating Details
print('MLPRegressorModel Train Score is : ',
      MLPRegressorModel.score(X_train, y_train))
print('MLPRegressorModel Test Score is : ',
      MLPRegressorModel.score(X_test, y_test))
print("=" * 10)
# ---------------------
print("Number of outputs : ", MLPRegressorModel.n_outputs_)
print('MLPRegressorModel last activation is : ',
      MLPRegressorModel.out_activation_)
print('MLPRegressorModel No. of layers is : ', MLPRegressorModel.n_layers_)
print('MLPRegressorModel No. of iterations is : ', MLPRegressorModel.n_iter_)
print("The number of training samples seen by the solver during fitting : ",
      MLPRegressorModel.t_)
print("=" * 10)
# ---------------------
print('MLPRegressorModel loss is : ', MLPRegressorModel.loss_)
print("MLPRegressorModel best loss is : ",
Calculating RMSE for all 6 cases
----------------------------------------------------------------------
"""
RMSE1=mean_squared_error(y_test1, y_pred1, squared=False)
print("RMSE: ", RMSE1)
RMSE2=mean_squared_error(y_test2, y_pred2, squared=False)
print("RMSE: ", RMSE2)
RMSE3=mean_squared_error(y_test3, y_pred3, squared=False)
print("RMSE: ", RMSE3)
RMSE4=mean_squared_error(y_test4, y_pred4, squared=False)
print("RMSE: ", RMSE4)
RMSE5=mean_squared_error(y_test5, y_pred5, squared=False)
print("RMSE: ", RMSE5)
RMSE6=mean_squared_error(y_test6, y_pred6, squared=False)
print("RMSE: ", RMSE6)
score= MLP1.score( x_test1, y_test1, sample_weight=None)
print(score)
score= MLP2.score( x_test2, y_test2, sample_weight=None)
print(score)
score= MLP3.score( x_test3, y_test3, sample_weight=None)
print(score)
score= MLP4.score( x_test4, y_test4, sample_weight=None)
print(score)
score= MLP5.score( x_test5, y_test5, sample_weight=None)
print(score)
score= MLP6.score( x_test6, y_test6, sample_weight=None)
print(score)

"""
Plotting loss functions as a function of iterations
----------------------------------------------------------------------
예제 #26
0
def apply_regression(filename):

    reply_message = ""

    # read filename containing data from commandline argument
    # filename = sys.argv[1]
    input, output = load_all(filename)

    input = array(input)
    output = array(output)
    # freq = array(freq);

    # normalize degrees and ranks between [0,1]
    MaxInput = max(input)
    MaxOutput = max(output)
    MinInput = min(input)
    MinOutput = min(output)

    input = input - MinInput
    output = output - MinOutput

    input = input / float(MaxInput - MinInput)
    output = output / float(MaxOutput - MinOutput)

    if pri > 2:
        printV(zip(input, output))

    print input.shape, output.shape

    N = len(input)

    # Shuffle input and output array together
    x = np.arange(1, len(input))
    np.random.shuffle(x)
    shuf = x[:]

    comb = zip(x, input)
    comb = sorted(comb)

    input = [x[1] for x in comb]

    comb = zip(shuf, output)
    comb = sorted(comb)

    output = [x[1] for x in comb]

    input = array(input)
    output = array(output)

    input = input.reshape(-1, 1)
    # output = output.reshape(-1,1);

    # split data into training and testing instances
    splitRatio = 0.8  # splitRatio determines how many instances are used for training and testing. eg: 0.2 means 20% train, 80% test
    spl = int(splitRatio * N)  # split location for train-test
    print "Train : ", int(splitRatio * N), "\t Test: ", int(
        (1.0 - splitRatio) * N)
    reply_message += '\n' + "Train : " + str(int(
        splitRatio * N)) + "\t Test: " + str(int((1.0 - splitRatio) * N))

    trI = array(input[:spl])
    trL = array(
        output[:spl])  # trI - training instances, trL - training labels
    teI = array(input[spl:])
    teL = array(output[spl:])  # teI - testing instances, teL - testing labels

    trI = trI.astype('float')
    teI = teI.astype('float')

    # set parameters of neural network regression model
    nn = MLPRegressor(hidden_layer_sizes=(100, 50),
                      activation='relu',
                      solver='lbfgs',
                      alpha=0.0001,
                      batch_size='auto',
                      learning_rate='adaptive',
                      learning_rate_init=0.001,
                      max_iter=200,
                      shuffle=True,
                      random_state=None,
                      tol=0.00001,
                      verbose=False,
                      momentum=0.5,
                      early_stopping=True,
                      validation_fraction=0.15)

    print trI.shape, trL.shape

    # train NN regression model
    nn.fit(trI, trL)

    # test model to get accuracy
    res = nn.score(teI, teL)

    # 'res' represents how well regression model is learned.
    # It is defined as (1 - u/v), where u is the residual sum of squares ((y_true - y_pred) ** 2).sum()
    # and v is the total sum of squares ((y_true - y_true.mean()) ** 2).sum()

    print 'Accuracy measure: ', res
    reply_message += '\n' + 'Accuracy measure: ' + str(res)

    # predict label/output for test instances/degrees for calculating error
    yres = nn.predict(teI)

    R = []  # output
    Dev = []  # deviation from true output

    sum = 0
    if pri > 1:
        print 'Predicted', '\t', 'Actual'
    # calculate deviation from true output for each test instance
    for e in sorted(zip(yres, teL, teI)):

        prank = (e[0] *
                 (MaxOutput - MinOutput)) + MinOutput  # predicted output
        trank = (e[1] * (MaxOutput - MinOutput)) + MinOutput  # true output
        if (pri > 1):
            print(e[2] * (MaxInput - MinInput)) + MinInput, "\t", int(
                prank), "\t", trank

        sum += abs(prank - trank)

        R.append(e[1])
        Dev.append(abs(prank - trank))

    print 'Avg error: ', (sum / len(yres))
    reply_message += '\n' + 'Avg error: ' + str(sum / len(yres))

    # ===============================================================================
    #
    # # save plot image of input vs output on log scale
    # plt.plot(input,output,'o',ms=1.5)
    #
    # savefig(filename+".png");
    #
    # plt.clf();
    # #plt.plot(R,Dev,'o',ms=1.5)
    # #plt.show()
    # ===============================================================================

    # correct predicted output < 1 to 1(e^0)
    # yres = [ max(0,x) for x in yres ]

    # show plot of predicted output(dotted line) and actual output (continuous line).
    # NOTE : x-axis is input. Both input(x-axis) and output(y-axis) are on log scale and normalized
    # if pri > 1 :
    #     z = array(sorted(zip(teI, teL, yres)));
    #     plt.plot(z[:, 0], z[:, 2], 'o', ms=4)
    #     plt.plot(z[:, 0], z[:, 1], '-', ms=2)
    #     plt.show()

    return reply_message


# apply_regression("testfile.txt")
# apply_regression("dp_data.txt")
예제 #27
0
            else:
                inputs.append(float(splitted[j]))
        test.append(inputs)

with open(path4) as f2:
    lines = f2.readlines()
    for l in islice(lines, 8000, 10044):
        test_y.append(float(l.strip()))

values = np.array(stringlabeltest)
print(values)
label_encoder = LabelEncoder()
integer_encoded = label_encoder.fit_transform(values)
print(integer_encoded)
onehot_encoder = OneHotEncoder(sparse=False)
integer_encoded = integer_encoded.reshape(len(integer_encoded), 1)
onehot_encoded = onehot_encoder.fit_transform(integer_encoded)
print(onehot_encoded)

for x in range(len(test)):
    test[x].extend(onehot_encoded[x])

mlp = MLPRegressor(hidden_layer_sizes=(1000, 1000),
                   batch_size=100,
                   solver='adam',
                   learning_rate_init=0.001,
                   max_iter=100)
mlp.fit(train, train_y)
print(mlp.score(train, train_y))
print(mlp.score(test, test_y))
예제 #28
0
from sklearn import linear_model
from sklearn.svm import SVR
from sklearn.pipeline import make_pipeline
from sklearn.neural_network import MLPRegressor
import numpy as np

df = pd.read_excel("dataStandard.xlsx")

df_test = df.iloc[:200, :]
df_train = df.iloc[201:, :]

X = df_train[[
    'Price Points', 'Space Points', 'Placement Points', 'Min Price',
    'Max Price'
]]
y = df_train[['Quality Points']]

regr = MLPRegressor(random_state=1, max_iter=500).fit(X, y)

regr.predict(df_test[[
    'Price Points', 'Space Points', 'Placement Points', 'Min Price',
    'Max Price'
]])

scoreCalMLP = regr.score(
    df_test[[
        'Price Points', 'Space Points', 'Placement Points', 'Min Price',
        'Max Price'
    ]], df_test[["Quality Points"]])

print(scoreCalMLP)
예제 #29
0
def calculate(frame, data):
    # create list of budget, user_rating, runtimes, release year and cast fb likes for correlation analysis
    budget = []
    user = []
    fb_likes = []
    runtime = []
    release = []

    for pt in data:
        budget.append(pt.budget)
        user.append(pt.rating)
        fb_likes.append(pt.fb_likes)
        runtime.append(pt.runtime)
        release.append(pt.release)

    # Calculate correlations among numerical data types
    p_fb_likes = pearsonr(fb_likes, user)
    p_budget = pearsonr(budget, user)
    p_runtime = pearsonr(runtime, user)
    p_release = pearsonr(release, user)

    s_fb_likes = spearmanr(fb_likes, user)
    s_budget = spearmanr(budget, user)
    s_runtime = spearmanr(runtime, user)
    s_release = spearmanr(release, user)

    # set up mlp to perform user rating prediction
    mlp = MLPRegressor(hidden_layer_sizes=(20, 20),
                       activation='tanh',
                       solver='adam',
                       max_iter=500,
                       verbose=True)

    data_pts = data
    random.shuffle(data_pts)
    data_array = np.array(data_pts)

    X = data_array[:, 0:-1]
    Y = data_array[:, -1]
    cut = int(0.66 * len(X))

    # Scale data to avoid network saturation
    X_train = X[:cut]
    X_test = X[cut:]
    scaler = StandardScaler()
    scaler.fit(X_train)
    X_train = scaler.transform(X_train)
    X_test = scaler.transform(X_test)

    mlp.fit(X_train, Y[:cut])
    mlp_score = mlp.score(X_test, Y[cut:])

    # Print results to screen
    frame.text.insert(
        INSERT,
        "Correlation Results:\nCast Facebook Likes & User Rating, Pearson: %.2f; Spearman: %.2f\n"
        "Budget and User Rating, Pearson: %.2f; Spearman: %.2f\n"
        "Runtime and User Rating, Pearson: %.2f; Spearman: %.2f\n"
        "Release Year and User Rating, Pearson: %.2f; Spearman: %.2f\n"
        "\nNeural Network R^2 Score: %.2f\n"
        "\n" %
        (p_fb_likes[0], s_fb_likes[0], p_budget[0], s_budget[0], p_runtime[0],
         s_runtime[0], p_release[0], s_release[0], mlp_score))

    # Send plot of top revenue earning genre probability distributions to GUI
    frame.ax.scatter(release, user)
    frame.ax.set_title("Release Year vs. User Rating")
    frame.ax.set_xlabel('Release Year')
    frame.ax.set_ylabel('User Rating')
예제 #30
0
import quandl
import pandas as pd
from sklearn.neural_network import MLPRegressor
from sklearn.model_selection import train_test_split
from sklearn.tree import tree

Data = pd.read_csv('infibeam.csv')
x = Data.loc[:, 'High':'Turnover (Lacs)']
y = Data.loc[:, 'Open']

x_train, x_test, y_train, y_test = train_test_split(x,
                                                    y,
                                                    test_size=0.2,
                                                    random_state=42)
# DT=tree.DecisionTreeRegressor(random_state=42)
MLP = MLPRegressor(random_state=42)
MLP.fit(x_train, y_train)
Test = [[2239.65, 230.35, 235.15, 234.9, 3357625.0, 7898.64]]
Prediction = MLP.predict(Test)
print(Prediction)
print(MLP.score(x_test, y_test))
plt.rc('font', **font)

fig, axes = plt.subplots(nrows=1, ncols=1)
axes.set_title("Data: " + file)
axes.set_ylabel('Normalized distant count')
axes.set_xlabel('Distance ($\AA$)')

axes.hist(y_train, 150, color='blue',normed=True, label='plot',linewidth=2,alpha=1.0)
plt.show()
"""

# Fit model
clf.fit(X_train, y_train)

# Compute and print r^2 score
print(clf.score(X_test, y_test))

# Store predicted energies
Ecmp = clf.predict(X_test)

Ecmp = gt.hatokcal * (Ecmp)
Eact = gt.hatokcal * (y_test)

# Compute RMSE in kcal/mol
rmse = gt.calculaterootmeansqrerror(Ecmp, Eact)

# End timer
_t1e = tm.time()
print("Computation complete. Time: " + "{:.4f}".format((_t1e - _t1b)) + "s")

# Output model information
X_train_nn, X_test_nn, Y_train_nn, Y_test_nn = train_test_split(
    neural_network_df.ix[:, neural_network_df.columns != 'int_rate'],
    neural_network_df.int_rate,
    test_size=0.2)

X_train_nn = StandardScaler().fit_transform(X_train_nn)
X_test_nn = StandardScaler().fit_transform(X_test_nn)

mlp = MLPRegressor(solver='lbfgs',
                   hidden_layer_sizes=50,
                   max_iter=150,
                   shuffle=True,
                   random_state=1)
mlp.fit(X_train_nn, Y_train_nn)
print("Training score is ", mlp.score(X_train_nn, Y_train_nn))
print("Testing score is ", mlp.score(X_test_nn, Y_test_nn))

# In[28]:

from sklearn.neighbors import KNeighborsRegressor
from sklearn.metrics import r2_score

knn_df = df.copy()
knn_df = knn_df[[
    'grade', 'total_pymnt_inv', 'revol_util', 'loan_status',
    'fico_range_grade', 'total_rec_prncp', 'revol_bal',
    'grade_based_on_inq_last_6mths', 'acc_open_past_24mths', 'installment',
    'last_pymnt_amnt', 'funded_amnt_inv', 'total_acc', 'credit_age', 'issue_d',
    'annual_inc', 'meanfico', 'int_rate'
]]
예제 #33
0
Y_tr = pheno[:1000,1:]   #slicing pheno
#Y_va = pheno[201:250,:]
Y_te = pheno[1001:,1:]

diabetes_X_train = X_tr
diabetes_X_test = X_te
diabetes_y_train = Y_tr
diabetes_y_test = Y_te

reg = MLPRegressor(hidden_layer_sizes=(1, ),algorithm='l-bfgs')
reg.fit(X_tr,Y_tr)

scores = cross_val_score(reg,geno[:,1:],pheno[:,1:],cv=10)

#Result_Y = np.zeros((249,1), dtype='float64')
Result_Y = reg.predict(X_te)
#Yte = np.array(Y_te, dtype=np.float64) 
r_row,p_score = pearsonr(Result_Y,Y_te)

# The mean square error
print("Residual sum of squares: %.2f"
      % np.mean((reg.predict(diabetes_X_test) - diabetes_y_test) ** 2))
# Explained variance score: 1 is perfect prediction
print('Variance score: %.2f' % reg.score(diabetes_X_test, diabetes_y_test))
print(Result_Y)
print(scores)
print(Result_Y.shape)
print(r_row)
print(p_score)

예제 #34
0
scaler = StandardScaler().fit(X_train)
X_train = scaler.transform(X_train)
X_test = scaler.transform(X_test)

regr = MLPRegressor(max_iter=10,
                    hidden_layer_sizes=(100, 50, 25, 10, 5),
                    verbose=True)

#### 8e changement : Parallélisme pour l'entrainement et la prédiction
with joblib.parallel_backend('dask'):
    regr.fit(X_train, y_train)

# Prédiction et Score
with joblib.parallel_backend('dask'):
    score = regr.score(X_test, y_test)

stop = datetime.now()

print("Temps préparation et inférence (ML) : ", (stop - start).seconds, "s")
print(f"model score: {score}")

# %% [markdown]
# Seul le training et le scale est parallélisé par Dask car le MLPRegressor n'a pas d'implémentation "Dask"
# %% [markdown]
# ## Entrainement et inférence avec pipeline

# %%
#### 6e changement : Utilisation d'un backend spécifique pour dask
import joblib
예제 #35
0
    K += 1
    model = neighbors.KNeighborsRegressor(n_neighbors=K)

    model.fit(x_train, y_train)  # fit the model
    pred = model.predict(x_test)  # make prediction on test set
    error = sqrt(mean_squared_error(y_test, pred))  # calculate rmse
    rmse_val.append(error)  # store rmse values

curve = pd.DataFrame(rmse_val)  # elbow curve

plt.figure(figsize=(20, 5))
plt.plot(curve[1:])
plt.title('Elbow Curve').set_fontsize(16)
plt.xlabel('K Value').set_fontsize(14)
plt.ylabel('Root Mean Squared Error').set_fontsize(14)
plt.show()

knn = neighbors.KNeighborsRegressor(n_neighbors=12)
mlp = MLPRegressor()
regr = RandomForestRegressor(max_depth=2, random_state=0, n_estimators=100)

knn.fit(x_train, y_train)
mlp.fit(x_train, y_train)
regr.fit(x_train, y_train)

knn_score = knn.score(x_test, y_test)
mlp_score = mlp.score(x_test, y_test)
regr_score = regr.score(x_test, y_test)

print([knn_score, mlp_score, regr_score])
예제 #36
0
from sklearn.linear_model import LinearRegression, SGDRegressor
from sklearn.svm import SVR
from sklearn.neural_network import MLPRegressor
from sklearn.tree import DecisionTreeRegressor

import pandas as pd
from sklearn.model_selection import train_test_split
df = pd.read_csv('Housing.csv')
df.drop(['ID'], axis=1, inplace=True)
from matplotlib import pyplot

for i, column in enumerate(df.columns):
    pyplot.subplot(5, 3, i + 1)
    pyplot.scatter(df[column], df.iloc[:, -1])
    pyplot.xlabel(column)
pyplot.show()

df = (df - df.mean()) / df.std()
X = df.iloc[:, :-1]
y = df.iloc[:, -1]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

a = MLPRegressor()
a.fit(X_train, y_train)
print(a.score(X_test, y_test))
예제 #37
0
test_target = days_true

#print(test_data)

from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.neural_network import MLPRegressor

scaler = StandardScaler()

#X_train, X_test, y_train, y_test = train_test_split(train_data, train_target, test_size=0.20, random_state=0)

#X_train_scaled = scaler.fit(X_train).transform(X_train)
#X_test_scaled = scaler.fit(X_test).transform(X_test)
train_data_scaled = scaler.fit_transform(train_data)
test_data_scaled = scaler.fit_transform(test_data)

mlp = MLPRegressor(max_iter=500,
                   solver='lbfgs',
                   hidden_layer_sizes=(5, 5),
                   alpha=0.1,
                   random_state=8)
mlp.fit(train_data_scaled, train_target)

print(mlp.score(train_data_scaled, train_target))
print(mlp.score(test_data_scaled, test_target))

pred_day = mlp.predict(test_data_scaled)
print(test_target)
print(pred_day.astype(int))
예제 #38
0
#Example  with a Regressor using the scikit-learn library
# example for the XOr gate
from sklearn.neural_network import MLPRegressor 

X = [[0., 0.],[0., 1.], [1., 0.], [1., 1.]] # each one of the entries 00 01 10 11
y = [0, 1, 1, 0] # outputs for each one of the entries

# check http://scikit-learn.org/dev/modules/generated/sklearn.neural_network.MLPRegressor.html#sklearn.neural_network.MLPRegressor
#for more details
reg = MLPRegressor(hidden_layer_sizes=(5),activation='tanh', algorithm='sgd', alpha=0.001, learning_rate='constant',
                   max_iter=10000, random_state=None, verbose=False, warm_start=False, momentum=0.8, tol=10e-8, shuffle=False)

reg.fit(X,y)

outp =  reg.predict([[0., 0.],[0., 1.], [1., 0.], [1., 1.]])

print'Results:'
print '0 0 0:', outp[0]
print '0 1 1:', outp[1]
print '1 0 1:', outp[2]
print '1 1 0:', outp[0]
print'Score:', reg.score(X, y)
from __future__ import print_function, division
from future.utils import iteritems
from builtins import range, input
# Note: you may need to update your version of future
# sudo pip install -U future


import numpy as np
from sklearn.neural_network import MLPRegressor
from util import getKaggleMNIST



# get data
X, _, Xt, _ = getKaggleMNIST()

# create the model and train it
model = MLPRegressor()
model.fit(X, X)

# test the model
print("Train R^2:", model.score(X, X))
print("Test R^2:", model.score(Xt, Xt))

Xhat = model.predict(X)
mse = ((Xhat - X)**2).mean()
print("Train MSE:", mse)

Xhat = model.predict(Xt)
mse = ((Xhat - Xt)**2).mean()
print("Test MSE:", mse)