예제 #1
0
mean_absolute_error(y_test, [prediction])

# In[86]:

plt.plot(prediction, label="prediction")
plt.plot(y_test.iloc[0], label="real")
plt.legend()

# In[87]:

from sklearn.neural_network import MLPRegressor

# In[140]:

MLP = MLPRegressor(max_iter=5000,
                   hidden_layer_sizes=(100, 100, 100),
                   random_state=42)

# In[141]:

MLP.fit(x, y)

# In[142]:

prediction = MLP.predict(x_test)[0]

# In[143]:

mean_absolute_error(y_test, [prediction])

# In[144]:
def grid_search_multi_models(X_train,
                             y_train,
                             single_regressor=None,
                             parameters_grid_passed=None):
    global preparation_pipeline
    regressors = [
        ('Linear Regression',
         LinearRegression(fit_intercept=True,
                          normalize=False,
                          copy_X=True,
                          n_jobs=1)),
        ('Lasso Regression',
         Lasso(fit_intercept=True,
               normalize=False,
               copy_X=True,
               random_state=1)),
        ('Gradient Boost Regressor',
         GradientBoostingRegressor(random_state=1)),
        ('Random Forest Regressor', RandomForestRegressor(random_state=1)),
        ('Neural Networks',
         MLPRegressor(random_state=50, activation='relu', max_iter=100)),
    ]

    if single_regressor:
        regressors = single_regressor

    params_grid = {
        'Linear Regression': {},
        'Lasso Regression': {
            'alpha': [0.3, 0.5, 1.0],
        },
        'Gradient Boost Regressor': {
            'learning_rate': [0.1],
            'n_estimators': [50, 70, 100],
            'max_depth': [5, 10],
        },
        'Random Forest Regressor': {
            'n_estimators': [50, 70, 100],
            'max_depth': [5, 10],
        },
        'Neural Networks': {
            'hidden_layer_sizes': [(10, 10), (10, 10, 10)],
        },
    }
    if parameters_grid_passed:
        for each in parameters_grid_passed:
            params_grid[each] = parameters_grid_passed[each]

    results = pd.DataFrame(columns=[
        "Best tuned model", "Train Accuracy", "Train MAE",
        "Best hyper parameters"
    ])
    for (name, regressor) in regressors:

        parameters = params_grid[name]

        #preparing th pipeline for estimator

        preparation_pipeline_with_regressor = Pipeline([
            ("preparation", preparation_pipeline), ("regressor", regressor)
        ])

        # Perform the grid search for best parameters

        hyper_params = {}
        for params in parameters.keys():
            hyp_p = 'regressor__' + str(params)
            hyper_params[hyp_p] = parameters[params]

        print("Performing Grid Search for", str(name))
        grid_search_clf = GridSearchCV(preparation_pipeline_with_regressor,
                                       hyper_params,
                                       scoring='neg_mean_absolute_error',
                                       n_jobs=-1,
                                       cv=5,
                                       verbose=2)
        grid_search_clf.fit(X_train, y_train)

        # Store the results
        best_train_accuracy = grid_search_clf.best_estimator_.score(
            X_train, y_train)
        y_train_predicted = grid_search_clf.best_estimator_.predict(X_train)
        train_mae = mean_absolute_error(np.array(y_train), y_train_predicted)
        best_parameters = grid_search_clf.best_estimator_.get_params()
        param_dummy = []
        for param_name in sorted(hyper_params.keys()):
            param_dummy.append((param_name, best_parameters[param_name]))
        results.loc[len(results)] = [
            name, best_train_accuracy, train_mae,
            json.dumps(param_dummy)
        ]
    print('Results of model')
    pd.set_option('display.max_colwidth', -1)
    print(results)
    if single_regressor:
        save_scatterplot(y_train, y_train_predicted, train=args.train)
        save_histogram(y_train, y_train_predicted, train=args.train)
        return grid_search_clf
예제 #3
0
import numpy as np
import matplotlib.pyplot as plt

import warnings

warnings.filterwarnings('ignore')

X = [[1], [2], [3], [77], [99], [45]]
Y = [1, 2, 3, 77, 99, 45]

X = np.array(X)
Y = np.array(Y)

model = MLPRegressor(verbose=True,
                     hidden_layer_sizes=(
                         20,
                         20,
                     ),
                     max_iter=5000)
'''
model.fit(X,Y)
print(model.loss_)
#print(model.coefs_)
#print(model.intercepts_)
print(model.n_iter_)
#print(model.n_layers_)
#print(model.n_outputs_)
#print(model.out_activation_)

##plt.plot(model.loss_curve_)
##plt.show()
'''
예제 #4
0
y_test=normalize(y_test)
y_test=y_test.ravel()
y_train=y_train.ravel()

#linear regression
reg = LinearRegression().fit(X_train, y_train)
score=reg.score(X_test, y_test)
print(score)
score=reg.score(X_test,y_test)
pred=reg.predict(X_test)
print(mean_squared_error(y_test,pred))
visualize_scatterplot(pred,y_test,score,method='linear')


#MLP
regr = MLPRegressor(random_state=1,max_iter=10000).fit(X_train, y_train)
pred=regr.predict(X_test)
score=regr.score(X_test,y_test)
print(mean_squared_error(y_test,pred))
print(score)
visualize_scatterplot(pred,y_test,score,method="MLP")


#Gaussian Process
kernel = DotProduct() + WhiteKernel()
gpr = GaussianProcessRegressor(kernel=kernel,
         random_state=0).fit(X_train, y_train)

pred=gpr.predict(X_test)
score=gpr.score(X_test, y_test)
print(mean_squared_error(y_test,pred))
예제 #5
0
def model(model_name, train_x, train_y, test_x,alpha = 0.1,):
        summary = None
        from sklearn.neural_network import MLPRegressor
        from sklearn.svm import SVR
        import sklearn
        import statsmodels.regression.linear_model as sm
        if model_name == 'Random':
            test_y = pd.Series(np.random.random_sample((len(test_x),)), index=test_x.index)
        if model_name == 'None':
            test_y = test_x.iloc[:,0]
        if model_name == 'MLPRegressor':
            mlp = MLPRegressor(hidden_layer_sizes=(20, 20))
            mlp.fit(train_x, train_y)
            y_pred = mlp.predict(test_x)
            test_y = pd.Series(y_pred, index=test_x.index)
        if model_name == 'Lasso':
            model = sklearn.linear_model.Lasso(0.001,fit_intercept = False)
            lasso = model.fit(train_x, train_y)
            test_y = pd.Series(lasso.predict(test_x), index=test_x.index)
            summary = lasso.score(train_x,train_y)
        if model_name == 'Ridge':
            model = sklearn.linear_model.Ridge(1.0,fit_intercept = False)
            ridge = model.fit(train_x, train_y)
            test_y = pd.Series(ridge.predict(test_x), index=test_x.index)
            summary = ridge.score(train_x, train_y)
        if model_name == 'SVR':
            svr_rbf = SVR(kernel='rbf', C=1, gamma=0.0001, epsilon=0.1)
            svr_rbf.fit(train_x, train_y)
            y_pred_rbf = svr_rbf.predict(test_x)
            test_y = pd.Series(y_pred_rbf, index=test_x.index)
        if model_name == 'StepWise':

            feature_col = list(train_x.columns.values)
            length = len(feature_col)
            final_feature = []
            for i in range(length):
                pvalue_min = 1
                column_min = ""
                for feature in feature_col:
                    temp_feature = final_feature + [feature]
                    x = sm.add_constant(train_x.loc[:,temp_feature])
                    model = sm.OLS(train_y, x)
                    pvalue = model.fit().pvalues[i + 1]
                    # print(pvalue)
                    if pvalue < pvalue_min and pvalue < alpha:
                        pvalue_min = pvalue
                        column_min = feature

                if column_min != "":
                    feature_col.remove(column_min)
                    final_feature.append(column_min)
                else:
                    break

            X = sm.add_constant(train_x.loc[:,final_feature])
            model = sm.OLS(train_y, X)
            res = model.fit()
            summary = pd.Series(res.pvalues, index=['const'] + final_feature)
            if ~np.isnan(res.f_pvalue):
                summary['f_test'] = res.f_pvalue
            if ~np.isnan(res.rsquared_adj):
                summary['score'] = res.rsquared_adj
            xx = sm.add_constant(test_x.loc[:,final_feature],has_constant='raise')
            test_y = res.predict(xx)

        if model_name == 'AdaBoost':
            from sklearn.ensemble import AdaBoostRegressor
            model = AdaBoostRegressor(n_estimators=100,learning_rate = 0.5)
            adaboost = model.fit(train_x,train_y)
            test_y = pd.Series(adaboost.predict(test_x),index = test_x.index)

        if model_name == 'RandomForestRegressor':
            from sklearn.ensemble import RandomForestRegressor
            rfr = RandomForestRegressor(n_estimators=100, criterion='mse',max_features='auto')
            rfr.fit(train_x, train_y)
            y_pred_rfr = rfr.predict(test_x)
            test_y = pd.Series(y_pred_rfr, index=test_x.index)


        return test_y,summary
names = list(X_train)[1:]
X_train.drop('opis', axis=1, inplace=True)
y_train = X_train['cena']
X_train.drop('cena', axis=1, inplace=True)

X_train = encoder(X_train)


X_dev0 = pandas.read_csv('dev-0/in.tsv', sep='\t', header=None, names=names)
X_dev0.drop('opis', axis=1, inplace=True)
X_dev0 = encoder(X_dev0)
y_dev0 = pandas.read_csv('dev-0/expected.tsv', sep='\t', header=None)

X_testA = pandas.read_csv('test-A/in.tsv', sep='\t', header=None, names=names)
X_testA.drop('opis', axis=1, inplace=True)
X_testA = encoder(X_testA)

neuralNetwork = MLPRegressor(solver='lbfgs')
model = neuralNetwork.fit(X_train, y_train)

y_out_dev0 = model.predict(X_dev0)
y_out_testA = model.predict(X_testA)

with open('dev-0/out.tsv', 'w') as output_file:
    for out in y_out_dev0:
        print('%.0f' % out, file = output_file)

with open('test-A/out.tsv', 'w') as output_file:
    for out in y_out_testA:
        print('%.0f' % out, file = output_file)
예제 #7
0
# MODELE Multi Layer Perceptron Classifier
feature = [
    'month', 'temperature', 'day', 'day_bis', 'sportbad_closed',
    'freizeitbad_closed', 'kursbecken_closed', 'event', 'sloop_dummy',
    'school_holiday', 'bank_holiday'
]

X_train = training_set[feature]
X_validation = validation_set[feature]
X_test = test_set[feature]
X_submission = submission_set[feature]

mlp = MLPRegressor(hidden_layer_sizes=(200, 200, 200, 200, 200, 200),
                   max_iter=100,
                   alpha=.5,
                   batch_size=10,
                   learning_rate_init=0.0005,
                   random_state=1)
mlp.fit(X_train, valide_train)

result_vald_bis = mlp.predict(X_validation)
result_test_bis = mlp.predict(X_test)
result_subm_bis = mlp.predict(X_submission)

# Récupération de la prédiction la plus faible
res_min = []

for i in range(0, len(result_vald)):
    if result_vald[i] < result_vald_bis[i]:
        res_min.append(result_vald_bis[i])
    else:
예제 #8
0
                                                    test_size=0.1,
                                                    random_state=0)

##############################################################################
# Partial Dependence computation for multi-layer perceptron
# ---------------------------------------------------------
#
# Let's fit a MLPRegressor and compute single-variable partial dependence
# plots

print("Training MLPRegressor...")
tic = time()
est = make_pipeline(
    QuantileTransformer(),
    MLPRegressor(hidden_layer_sizes=(50, 50),
                 learning_rate_init=0.01,
                 early_stopping=True))
est.fit(X_train, y_train)
print("done in {:.3f}s".format(time() - tic))
print("Test R2 score: {:.2f}".format(est.score(X_test, y_test)))

##############################################################################
# We configured a pipeline to scale the numerical input features and tuned the
# neural network size and learning rate to get a reasonable compromise between
# training time and predictive performance on a test set.
#
# Importantly, this tabular dataset has very different dynamic ranges for its
# features. Neural networks tend to be very sensitive to features with varying
# scales and forgetting to preprocess the numeric feature would lead to a very
# poor model.
#
예제 #9
0
    train_size=0.70)

# In[51]:

print("train", X2_train.shape)
print("test", X2_test.shape)
print("train_y", y2_train.shape)

# ### 2a. MLR ( Multi linear Regression)

# In[52]:

from sklearn.neural_network import MLPRegressor
mlr_nw = MLPRegressor(solver='lbfgs',
                      alpha=0.01,
                      max_iter=2000,
                      hidden_layer_sizes=(5, 2),
                      random_state=1,
                      activation='relu')
#sgd

# In[53]:

mlr_model = mlr_nw.fit(X2_train, y2_train)
mlr_model

# In[54]:

y2_mlr_predicted = mlr_model.predict(X2_test)

# In[55]:
        print(mape)
        RMSE = mean_squared_error(y_true, y_pred_lr)**0.5
        print(RMSE)

        rf = RandomForestRegressor(n_estimators=100, n_jobs=1)
        rf.fit(x_train, y_train)
        y_pred_rf = rf.predict(x_test)

        mape = mean_absolute_percentage_error(y_true, y_pred_rf)
        print(mape)
        RMSE = mean_squared_error(y_true, y_pred_rf)**0.5
        print(RMSE)

        mlp = MLPRegressor(solver='lbfgs',
                           alpha=1e-5,
                           hidden_layer_sizes=(100, 30),
                           random_state=1,
                           max_iter=100)
        mlp.fit(x_train, y_train)
        y_pred_mlp = mlp.predict(x_test)

        mape = mean_absolute_percentage_error(y_true, y_pred_mlp)
        print(mape)
        RMSE = mean_squared_error(y_true, y_pred_mlp)**0.5
        print(RMSE)

        y_pred_nm = np.hstack((y_train[-1], y_test[0:-1]))

        pd.DataFrame(np.vstack([y_pred_nm, y_pred_lr, y_pred_rf, y_pred_mlp
                                ])).T.to_csv(path_or_buf='all_for_graph.csv',
                                             index=False)
예제 #11
0
X_train, X_test, y_train, y_test = train_test_split(X,
                                                    y,
                                                    test_size=0.15,
                                                    random_state=42)

#:# preprocessing

transform_pipeline = Pipeline([('scaler', StandardScaler())])

X_train = pd.DataFrame(transform_pipeline.fit_transform(X_train),
                       columns=X_train.columns)

#:# model

regressor = MLPRegressor(hidden_layer_sizes=[50, 15],
                         random_state=42,
                         max_iter=400,
                         alpha=0.0002)
regressor.fit(X_train, y_train)

#:# hash
#:# b5e36cb00a948148308cccec6aff6b05
md5 = hashlib.md5(str(regressor).encode('utf-8')).hexdigest()
print(f'md5: {md5}')

#:# audit
y_pred = regressor.predict(transform_pipeline.transform(X_test))

print(f'MSE: {mean_squared_error(y_test, y_pred)}')
print(f'RMSE: {np.sqrt(mean_squared_error(y_test, y_pred))}')
print(f'MAE: {mean_absolute_error(y_test, y_pred)}')
print(f'R2: {r2_score(y_test, y_pred)}')
    def __init__(self, selection="linear"):
        self.selection = selection
        self.init = True
        if selection == "linear":
            """LINEAR REGRESSION"""
            parameters = {
                'fit_intercept': [True, False],
                'normalize': [True, False],
                'copy_X': [True, False]
            }
            self.model = GridSearchCV(LinearRegression(),
                                      param_grid=parameters,
                                      n_jobs=-1,
                                      cv=3)
        elif selection == "adaboost":
            """ADABOOST REGRESSION"""
            param_dist = {
                'n_estimators': [50, 100],
                'learning_rate': [0.01, 0.05, 0.1],
                'loss': ['linear', 'square', 'exponential']
            }
            self.model = GridSearchCV(AdaBoostRegressor(),
                                      param_grid=param_dist,
                                      n_jobs=-1,
                                      cv=3)

        elif selection == "randomforest":
            """RANDOM FOREST REGRESSION"""
            random_grid = {
                'n_estimators': [1, 5, 10, 50],
                'max_features': ['auto', 'sqrt'],
                'max_depth': [10, 20, 50],
                'min_samples_split': [2, 5, 10],
                'min_samples_leaf': [1, 2, 4],
                'bootstrap': [True, False]
            }
            rf = RandomForestRegressor()

            self.model = GridSearchCV(estimator=rf,
                                      param_grid=random_grid,
                                      n_jobs=-1,
                                      cv=3)
        elif selection == "svm":
            """SUPPORT VECTOR MACHINE REGRESSION"""
            parameters_space = {
                'kernel': ('linear', 'rbf', 'poly'),
                'C': [1.5, 10],
                'gamma': [1e-7, 1e-4],
                'epsilon': [0.1, 0.2, 0.5, 0.3]
            }
            self.model = GridSearchCV(svm.SVR(),
                                      param_grid=parameters_space,
                                      n_jobs=-1,
                                      cv=3)
        elif selection == "mlp":
            """MULTILAYER PERCEPTRON REGRESSION"""
            param_list = {
                'hidden_layer_sizes': [(50), (100)],
                'activation': ['relu'],
                'solver': ['sgd', 'adam'],
                'learning_rate_init': [0.001, 0.005],
                'learning_rate': ['adaptive'],
            }
            self.model = GridSearchCV(estimator=MLPRegressor(max_iter=5000),
                                      param_grid=param_list,
                                      n_jobs=-1,
                                      cv=3)
        df_train[item + ' 12 roll avg'] = df_train[item].rolling(window=12).mean()
    else:
        df_train[item + ' 4 roll avg'] = df_train[item].rolling(window=4).mean()


ss = pp.StandardScaler()
df_train = pd.DataFrame(ss.fit_transform(df_train), index=df_train.index)

df_train = df_train.dropna()
target_df = target_df.loc[df_train.index]

target_df = target_df.dropna()
df_train = df_train.loc[target_df.index]


model = MLPRegressor(hidden_layer_sizes = (25, 400, 400, 25), activation = 'relu', solver = 'adam', alpha = 0.05, max_iter=300)
model.fit(df_train, target_df)


# Create test dataframe
df_test = df_raw.copy()
df_test = df_test.loc['2019-10-01':'2019-12-31']

df_test = df_test[['dewpoint', 'rel_humidity', 'temperature', 'wind_direction', 'wind_speed',
          'Fuel_Price', 'Wind_MW', 'Solar_MW', 'Demand_DLAP_MW', 'Demand_MW', 'Year', 
          'Month', 'Day', 'Hour', 'Weekday', 'Weekend', 'LMP_Price_Per_MWh']]

# Create wind vectors
df_test['wind_x'] = df_test['wind_speed'] * np.cos(np.deg2rad(df_test['wind_direction']))
df_test['wind_y'] = df_test['wind_speed'] * np.sin(np.deg2rad(df_test['wind_direction']))
예제 #14
0
y = dataset.iloc[:, 0].values
from sklearn.cross_validation import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X,
                                                    y,
                                                    test_size=0.3,
                                                    random_state=0)

hd = [10, 20, 50, 100, 150, 200, 300, 400, 500]
act = ['identity', 'logistic', 'tanh', 'relu']
solver = ['lbfgs', 'sgd', 'adam']

rms_test = []
rms_train = []

for j in hd:
    mlp = MLPRegressor(hidden_layer_sizes=j)
    mlp.fit(X_train, y_train)

    tabel_test = np.zeros((len(X_test), 2))

    for i in range(len(X_test)):
        y_pred = mlp.predict(X_test[i].reshape(1, -1))
        tabel_test[i, 0] = y_pred
        tabel_test[i, 1] = y_test[i]

    rmstest = sqrt(mean_squared_error(tabel_test[:, 1], tabel_test[:, 0]))
    rms_test.append(rmstest)

    tabel_train = np.zeros((len(X_test), 2))
    for i in range(len(X_test)):
        y_pred = mlp.predict(X_train[i].reshape(1, -1))
예제 #15
0
def multilayerPerceptron():
    variables = preModel()
    Y = variables[0]
    Y2 = variables[1]
    X = variables[2]
    X2 = variables[2]

    print("\n ---------------------------------")
    print("Making multilayer perceptron.............\n")

    #Split data into train and test datasets
    from sklearn.model_selection import train_test_split
    X_train, X_test, Y_train, Y_test = train_test_split(X,
                                                        Y,
                                                        test_size=test_size,
                                                        random_state=20)
    X2_train, X2_test, Y2_train, Y2_test = train_test_split(
        X2, Y2, test_size=test_size, random_state=20)

    #Add match to predict
    for index, team in enumerate(teamsList, start=1):
        print(index, team.name)
    homeTeamToPredict = int(input("Select the home team number: "))
    awayTeamToPredict = int(input("Select the away team number: "))
    newDf = newRowToTest(teamsList[homeTeamToPredict - 1],
                         teamsList[awayTeamToPredict - 1])
    X_test = pd.concat([X_test, newDf])
    X2_test = pd.concat([X2_test, newDf])

    #Get the model
    from sklearn.neural_network import MLPRegressor
    model = MLPRegressor(random_state=20,
                         max_iter=5000,
                         hidden_layer_sizes=100,
                         activation='tanh')

    model.fit(X_train, Y_train)
    prediction_test = model.predict(
        X_test)  #Results of the predictions in a list[]

    model.fit(X2_train, Y2_train)
    prediction_test2 = model.predict(X2_test)

    from sklearn import metrics
    # We have to do [:-1] to delete the last row that we introduced manually.
    print(
        "\nMean sq. error for the home team->", '{:.2f}'.format(100 * round(
            metrics.mean_squared_error(Y_test, prediction_test[:-1]), 2)), "%")
    #print ("Mean abs. error for the home team->", '{:.2f}'.format(100*round(metrics.mean_absolute_error(Y_test, prediction_test),2)), "%")
    print(
        "Mean sq. error for the away team->", '{:.2f}'.format(100 * round(
            metrics.mean_squared_error(Y2_test, prediction_test2[:-1]), 2)),
        "%")
    #print ("Mean abs. error for the away team->", '{:.2f}'.format(100*round(metrics.mean_absolute_error(Y2_test, prediction_test2),2)), "%")

    print("\nPrediction for",
          "{0:15}".format(str(teamsList[homeTeamToPredict - 1].name) + ": "),
          '{:.2f}'.format(prediction_test[-1]))
    print("Prediction for",
          "{0:15}".format(str(teamsList[awayTeamToPredict - 1].name) + ": "),
          '{:.2f}'.format(prediction_test2[-1]))

    sel = '0'
    while sel != 'n':
        print("\nDo you want to have more data of this model?")
        print("   1-Yes, show me the predictions of the test file")
        print("   n-No, quit\n")
        sel = input("Type an option from the ones above and hit enter: ")

        if sel == 'n':
            return
        sel = int(sel)
        if sel == 1:
            printTestPredictions(X_test, prediction_test, prediction_test2,
                                 Y_test, Y2_test)
        if sel == 2:
            pass
예제 #16
0
def nnet_tuning(n_layers=[1, 2, 1],
                layer_size=[20, 21, 1],
                k=5,
                train_data_path='../data/training_data.csv',
                save_model=False,
                tracking_uri="http://0.0.0.0:5000"):

    # Log the parameters with mlflow
    mlflow.log_param("n_layers", n_layers)
    mlflow.set_tag("layer_size", layer_size)

    # Set random seed for reproducibility
    np.random.seed(RANDOM_SEED)
    random.seed(RANDOM_SEED)

    # Get data shuffled and split into training and test sets
    mdr = MiningDataReader(path=train_data_path)
    (variable_names, X_train, X_test, y_train,
     y_test) = mdr.get_splitted_data()

    pipeline = Pipeline(
        steps=[('scaling', StandardScaler()
                ), ('regression', MLPRegressor(random_state=RANDOM_SEED))])

    ### TRAINING ###
    ################

    # Generate all combinations for number of layers and layer size
    neurons_per_layer = tuple(
        np.arange(layer_size[0], layer_size[1], layer_size[2]))
    hls_values = []
    for layers_num in np.arange(n_layers[0], n_layers[1], n_layers[2]):
        hls_values.append([
            x for x in itertools.product(neurons_per_layer, repeat=layers_num)
        ])

    # Flatten the list
    hls_values = [item for sublist in hls_values for item in sublist]

    # Generate grid search for hyperparam tuning
    hyperparams = {}
    hyperparams['regression__hidden_layer_sizes'] = hls_values

    print("Training started...\n")

    # Create an instance of Random Forest Regressor and fit the data for the grid parameters using all processors
    modelCV = GridSearchCV(estimator=pipeline,
                           param_grid=hyperparams,
                           cv=k,
                           scoring='neg_mean_squared_error',
                           n_jobs=-1)

    with ProgressBar():
        modelCV.fit(X_train, y_train)

    # Iterate over the results storing training error for each hyperparameter combination
    results = modelCV.cv_results_
    param_list, training_err_list, training_dev_list = [], [], []
    for i in range(len(results['params'])):
        param = results['params'][i]
        score = (-1) * results['mean_test_score'][i]  # NEGATIVE MSE
        std = results['std_test_score'][i]
        param_list.append(param)
        training_err_list.append(score)
        training_dev_list.append(std)

    print(
        f"\nBest parameter set found for the training set:\n{modelCV.best_params_}"
    )

    # Store the index of the best combination
    best_index = param_list.index(modelCV.best_params_)

    # Get the best values for hyperparams
    best_hls = modelCV.best_params_['regression__hidden_layer_sizes']

    print("\nTraining finished. Evaluating model...\n")

    ### EVALUATION ###
    ##################

    # Criteria is hidden_layer_sizes
    criteria = 'hidden_layer_sizes'
    mlflow.set_tag("criteria", criteria)
    param_values = hls_values

    # Predict test data variying criteria param and evaluate the models
    training_err_by_criteria, training_dev_by_criteria, test_err_list = [], [], []
    rmse_score, mae_score, r2_score = -1, -1, -1
    feature_names, feature_importances = [], []
    for param_value in tqdm(param_values):
        model = Pipeline(steps=[('scaler', StandardScaler()),
                                ('regression',
                                 MLPRegressor(hidden_layer_sizes=param_value,
                                              random_state=RANDOM_SEED))])
        param = {'regression__hidden_layer_sizes': param_value}

        # Fit model and evaluate results
        model.fit(X_train, y_train)
        prediction = model.predict(X_test)
        index = param_list.index(param)
        training_err = training_err_list[index]
        training_dev = training_dev_list[index]
        (training_mse, test_mse, rmse, mae,
         r2) = get_test_metrics(training_err, y_test, prediction)
        # Store metrics
        training_err_by_criteria.append(training_mse)
        training_dev_by_criteria.append(training_dev)
        test_err_list.append(test_mse)
        # Set aditional metrics for the best combination
        if index == best_index:
            rmse_score = rmse
            mae_score = mae
            r2_score = r2

    # Generate the plots
    empty_img_folder()
    plot_errors(criteria, param_values, training_err_by_criteria,
                training_dev_by_criteria, test_err_list)

    # Once hyperparameters are selected, train and save the best model
    if save_model:
        print(
            "\nEvaluation finished. Training final model with train + test data with the best hyperparameters..."
        )
        final_model = Pipeline(
            steps=[('scaler', StandardScaler()),
                   ('regression',
                    MLPRegressor(hidden_layer_sizes=param_list[best_index]
                                 ['regression__hidden_layer_sizes']))])

        # Train the best model with all the data (training + test)
        full_X = np.vstack((X_train, X_test))
        full_y = np.concatenate((y_train, y_test))
        final_model.fit(full_X, full_y)

        # Log plots and model with mlflow
        mlflow.log_artifacts('./img')
        mlflow.sklearn.log_model(final_model, 'model')

    # Log results with mlflow
    mlflow.log_metric("train_mse", training_err_list[best_index])
    mlflow.log_metric("test_mse", min(test_err_list))
    mlflow.log_metric("rmse", rmse_score)
    mlflow.log_metric("mae", mae_score)
    mlflow.log_metric("r2", r2_score)
    mlflow.set_tag("best_params", param_list[best_index])

    # Output the results
    print(f'''
-----------------------------------------------------------------------------------------------------------------------
RESULTS
-----------------------------------------------------------------------------------------------------------------------
Best params: {param_list[best_index]}
Training MSE: {training_err_list[best_index]}
Test MSE: {min(test_err_list)}
RMSE: {rmse_score}
MAE: {mae_score}
R2: {r2_score}
-----------------------------------------------------------------------------------------------------------------------
''')
예제 #17
0
def train(seeds=[1],
          k=5,
          datafilepath='./data/HRB95.txt',
          test_size=5,
          label_flag='就业增长率'):
    seed = 2
    random.seed(seed)
    np.random.seed(seed)
    # data = np.loadtxt('./data/HRB95.txt', dtype=float, delimiter=',', skiprows=1)
    # x = data[:,1:data.shape[1]]
    # y = data[:,0]
    cv = k
    if cv == 1:
        cv = LeaveOneOut()
    models = [
        # KNeighborsRegressor(leaf_size=3, n_neighbors= 2, p=1, weights='distance'),
        # GridSearchCV(SVR(), param_grid={"C": np.logspace(0, 2, 4), "gamma": np.logspace(-2, 2, 7)},n_jobs=-1),
        # RidgeCV(alphas=(0.1, 1.0, 10.0,100.0)),
        MLPRegressor(hidden_layer_sizes=(5), random_state=seed),
        # RandomForestRegressor(random_state=seed),
        # GradientBoostingRegressor(random_state=seed),

        # StackingRegressor(estimators=[
        # ( 'KNN', KNeighborsRegressor(leaf_size=3, n_neighbors= 2, p=1, weights='distance')),
        # ("ridge", RidgeCV(alphas=(0.1, 1.0, 10.0, 100.0))),
        # ("gbdt",GradientBoostingRegressor(random_state=seed)),
        # ("RandomForest",RandomForestRegressor(random_state=seed)),
        # ("mlp", MLPRegressor(hidden_layer_sizes=(50,100,50),max_iter=700,random_state=seed)),
        #         ("svr", GridSearchCV(SVR(), n_jobs=-1, param_grid={"C": np.logspace(0, 2, 4), "gamma": np.logspace(-2, 2, 7)})),
        # ],  final_estimator=RidgeCV(alphas=(0.1, 1.0, 10.0, 100.0)), n_jobs=-1,cv=cv),
    ]
    models_str = [
        # 'KNeighborsRegressor',
        # 'SVR',
        # 'RidgeCV',
        'MLP',
        # 'RF',
        # 'GBDT',
        # 'Stacking',
    ]

    #times次平均得分,
    MAE, MSE, R2 = {}, {}, {}
    for time, seed in enumerate(seeds):
        print("-----第%d次(seed=%s)-----" % (time + 1, seed))
        print("{:20s}{:10s}{:10s}{:10s}".format("方法", "MAE", "MSE", "R2"))
        x, y = loadXY(datafilepath, label_flag)
        x_train, x_test, y_train, y_test = train_test_split(
            x, y, test_size=test_size, random_state=seed, shuffle=True)
        x_train, y_train = x, y
        plt.figure(time, figsize=(10, 10))
        plt.tick_params(labelsize=18)
        # plt.xlim(0, 6)
        # plt.ylim(3, 7, 0.3)
        # plt.plot([x for x in range(1, test_size + 1)],scale_y.inverse_transform(y_test),label='True Label')
        plt.scatter([x for x in range(1, test_size + 1)],
                    scale_y.inverse_transform(y_test),
                    marker='*',
                    label='True Label',
                    s=250)
        for i, name, m in zip(range(100), models_str, models):
            if not name in MAE.keys():
                MAE[name] = []
            if not name in MSE.keys():
                MSE[name] = []
            if not name in R2.keys():
                R2[name] = []
            print("%18s" % name)
            y_vals, y_val_p_s, mae_test, mse_test, r2_test = [], [], [], [], []
            model = clone(m)
            # stacking模型,已经内置交叉验证
            if isinstance(model, StackingRegressor):
                model.fit(x_train, y_train)
                train_pred = model.predict(x_train)
                test_pred = model.predict(x_test)
                MAE[name] = np.append(MAE[name], mae(test_pred, y_test))
                MSE[name] = np.append(MSE[name], mse(test_pred, y_test))
                R2[name] = np.append(R2[name], model.score(x_test, y_test))
                print("{:20s}{:6.4f}{:10.4f}{:10.3f}".format(
                    "train", mae(train_pred, y_train),
                    mse(train_pred, y_train), model.score(x_train, y_train)))
                print("{:20s}{:6.4f}{:10.4f}{:10.3f}".format(
                    "test", MAE[name][-1], MSE[name][-1], R2[name][-1]))
            else:
                # 交叉验证
                if k > 1:
                    kf = RepeatedKFold(n_splits=k,
                                       n_repeats=10,
                                       random_state=seed)
                else:
                    kf = LeaveOneOut()
                for t, v in kf.split(x_train):
                    model.fit(x_train[t], y_train[t])  # fitting
                    y_val_p = model.predict(x_train[v])
                    y_vals = np.append(y_vals, y_train[v])
                    y_val_p_s = np.append(y_val_p_s, y_val_p)
                test_pred = model.predict(x_test)
                mse_test = np.append(mse_test, mse(y_test, test_pred))
                mae_test = np.append(mae_test, mae(y_test, test_pred))
                r2_test = np.append(r2_test, model.score(x_test, y_test))
                matrix = {
                    'val': {
                        'mae': mae(y_vals, y_val_p_s),
                        'mse': mse(y_vals, y_val_p_s),
                        'r2': r2_score(y_vals, y_val_p_s)
                    },
                    'test': {
                        'mae': mae_test.mean(),
                        'mse': mse_test.mean(),
                        'r2': r2_test.mean()
                    },
                }
                print("{:20s}{:6.4f}{:10.4f}{:10.3f}".format(
                    "val",
                    matrix['val']['mae'],
                    matrix['val']['mse'],
                    matrix['val']['r2'],
                ))
                print("{:20s}{:6.4f}{:10.4f}{:10.3f}".format(
                    "test", matrix['test']['mae'], matrix['test']['mse'],
                    matrix['test']['r2']))
                joblib.dump(model, 'save/%s%d.model' % (name, time))
                MAE[name] = np.append(MAE[name], matrix['test']['mae'])
                MSE[name] = np.append(MSE[name], matrix['test']['mse'])
                R2[name] = np.append(R2[name], matrix['test']['r2'])
            print(model.coefs_)
            print(len(model.coefs_))
            print(len(model.coefs_[0]))

            plt.matshow(model.coefs_[0], cmap='hot')
            plt.colorbar()
            plt.show()
            '''
            plt.plot([x for x in range(1, test_size + 1)], scale_y.inverse_transform(model.predict(x_test)),
                     marker='o', linestyle=':', label=name,c=colors.pop())
            # plt.scatter([x+i*0.2 for x in range(1, test_size + 1)], scale_y.inverse_transform(model.predict(x_test)),
            #             label=name,c=randomcolor())
            plt.legend(edgecolor='black', loc=1, prop=font2,ncol=2)  # 让图例标签展示
            plt.xlabel(u"Test Data",fontdict=font1)  # X轴标签
            plt.ylabel(label_flag,fontdict=font1)  # Y轴标签
            plt.title('Prediction on GI20',fontdict=font1)  # 标题
        plt.ioff()
        print() #所有模型交叉训练结束(一次) 每一次样本集不一样
        plt.show()
        '''
    print("---------%d次训练测试平均得分----------" % len(seeds))
    print("{:20s}{:10s}{:10s}{:10s}".format("方法", "MAE", "MSE", "R2"))
    for name in MAE.keys():
        print("{:20s}{:6.4f}{:10.4f}{:10.3f}".format(name, np.mean(MAE[name]),
                                                     np.mean(MSE[name]),
                                                     np.mean(R2[name])))
예제 #18
0
    def from_chemsys(cls,
                     chemsys,
                     prefix="proto-dft-2/runs",
                     n_max_atoms=20,
                     agent=None,
                     analyzer=None,
                     experiment=None,
                     log_file="campaign.log",
                     cloudwatch_group="/camd/worker/dev/"):
        """
        Class factory method for constructing campaign from
        chemsys.

        Args:
            chemsys (str): chemical system for the campaign
            prefix (str): prefix for s3
            n_max_atoms (int): number of maximum atoms
            agent (Agent): agent for stability campaign
            analyzer (Analyzer): analyzer for stability campaign
            experiment (Agent): experiment for stability campaign
            log_file (str): log filename
            cloudwatch_group (str): cloudwatch group to log to

        Returns:
            (ProtoDFTCampaign): Standard proto-dft campaign from
                the chemical system

        """
        logger = logging.Logger("camd")
        logger.setLevel("INFO")
        file_handler = logging.FileHandler(log_file)
        cw_handler = CloudWatchLogHandler(log_group=cloudwatch_group,
                                          stream_name=chemsys)
        logger.addHandler(file_handler)
        logger.addHandler(cw_handler)
        logger.addHandler(logging.StreamHandler())

        logger.info(
            "Starting campaign factory from_chemsys {}".format(chemsys))
        s3_prefix = "{}/{}".format(prefix, chemsys)

        # Initialize s3
        dumpfn({
            "started": datetime.now().isoformat(),
            "version": __version__
        }, "start.json")
        s3_sync(s3_bucket=CAMD_S3_BUCKET, s3_prefix=s3_prefix, sync_path='.')

        # Get structure domain
        # Check cache
        cache_key = "protosearch_cache/v1/{}/{}/candidates.pickle".format(
            chemsys, n_max_atoms)
        # TODO: create test of isfile
        if s3_key_exists(bucket=CAMD_S3_BUCKET, key=cache_key):
            logger.info("Found cached protosearch domain.")
            candidate_data = pd.read_pickle("s3://{}/{}".format(
                CAMD_S3_BUCKET, cache_key))
            logger.info("Loaded cached {}.".format(cache_key))
        else:
            logger.info(
                "Generating domain with max {} atoms.".format(n_max_atoms))
            element_list = chemsys.split('-')
            max_coeff, charge_balanced = heuristic_setup(element_list)
            domain = StructureDomain.from_bounds(
                element_list,
                charge_balanced=charge_balanced,
                n_max_atoms=n_max_atoms,
                **{'grid': range(1, max_coeff)})
            candidate_data = domain.candidates()
            logger.info("Candidates generated")
            candidate_data.to_pickle("s3://{}/{}".format(
                CAMD_S3_BUCKET, cache_key))
            logger.info("Cached protosearch domain at {}.".format(cache_key))

        # Dump structure/candidate data
        candidate_data.to_pickle("candidate_data.pickle")
        s3_sync(s3_bucket=CAMD_S3_BUCKET, s3_prefix=s3_prefix, sync_path='.')

        # Set up agents and loop parameters
        agent = agent or AgentStabilityAdaBoost(
            model=MLPRegressor(hidden_layer_sizes=(84, 50)),
            n_query=10,
            hull_distance=0.2,
            exploit_fraction=1.0,
            uncertainty=True,
            alpha=0.5,
            diversify=True,
            n_estimators=20)
        analyzer = analyzer or StabilityAnalyzer(hull_distance=0.2)
        experiment = experiment or OqmdDFTonMC1(timeout=30000,
                                                prefix_append="proto-dft")
        seed_data = load_dataframe("oqmd1.2_exp_based_entries_featurized_v2")

        # Load cached experiments
        logger.info("Loading cached experiments")
        cached_experiments = experiment.fetch_cached(candidate_data)
        logger.info("Found {} experiments.".format(len(cached_experiments)))
        if len(cached_experiments) > 0:
            summary, seed_data = analyzer.analyze(cached_experiments,
                                                  seed_data)
            # Remove cached experiments from candidate_data
            candidate_space = candidate_data.index.difference(
                cached_experiments.index, sort=False).tolist()
            candidate_data = candidate_data.loc[candidate_space]
            logger.info("Cached experiments added to seed.")

        # Construct and start loop
        return cls(candidate_data=candidate_data,
                   agent=agent,
                   experiment=experiment,
                   analyzer=analyzer,
                   seed_data=seed_data,
                   heuristic_stopper=5,
                   s3_prefix=s3_prefix,
                   logger=logger)
from onnxcustom.utils.onnx_helper import onnx_rename_weights
from onnxcustom.training.optimizers_partial import (
    OrtGradientForwardBackwardOptimizer)
from onnxcustom.training.sgd_learning_rate import LearningRateSGDNesterov
from onnxcustom.training.sgd_learning_penalty import ElasticLearningPenalty

X, y = make_regression(1000, n_features=10, bias=2)
X = X.astype(numpy.float32)
y = y.astype(numpy.float32)
X_train, X_test, y_train, y_test = train_test_split(X, y)

nn = MLPRegressor(hidden_layer_sizes=(10, 10),
                  max_iter=100,
                  solver='sgd',
                  learning_rate_init=5e-5,
                  n_iter_no_change=1000,
                  batch_size=10,
                  alpha=0,
                  momentum=0.9,
                  nesterovs_momentum=True)

with warnings.catch_warnings():
    warnings.simplefilter('ignore')
    nn.fit(X_train, y_train)

print(nn.loss_curve_)

#################################
# Score:

print(f"mean_squared_error={mean_squared_error(y_test, nn.predict(X_test))!r}")
예제 #20
0
print(f"[INFO] Reading data from {arg['dataset']}")
X, y = data_to_model(pd.read_csv(arg["dataset"]))

## PLAIN RANDOM FOREST

report.write("ESPERIMENTO 1. PLAIN MULTILAYER PERCEPTRON REGRESSOR:\n")
report.write("\t\t Dati non riscalati\n\n")

scoring = {
    'r2': 'r2',
    "explained_variance_score": 'explained_variance',
    "max error": 'max_error'
}
#scoring=make_scorer(explained_variance_score,max_error,mean_absolute_error,r2_score)
regr = MLPRegressor()
scores = cross_validate(regr, X, y, cv=10, n_jobs=-1, verbose=1)

print(scores)

report.write(f"10 fold-cross validation: \n{scores}\n")

X_train, X_test, y_train, y_test = train_test_split(X,
                                                    y,
                                                    test_size=0.10,
                                                    random_state=42)

print(f"[INFO] Fitting model")
regr.fit(X_train, y_train)

y_pred = regr.predict(X_test)
예제 #21
0
        'adaptive',
    ),
    #'nesterovs_momentum': (True, False,),
    #'alpha': (0.00001, 0.0001, 0.001, 0.01, 0.1, 0.0,),
    'warm_start': (
        True,
        False,
    ),
    'early_stopping': (
        True,
        False,
    ),
    'max_iter': (1000, )
}]

est = MLPRegressor(random_state=69)
gs = GridSearchCV(est,
                  cv=10,
                  param_grid=hyper_params,
                  verbose=2,
                  n_jobs=n_jobs,
                  scoring='r2')

t0 = time.time()
gs.fit(x_train, y_train)
runtime = time.time() - t0
print("Complexity and bandwidth selected and model fitted in %.6f s" % runtime)

train_score_mse = mean_squared_error(
    sc_y.inverse_transform(y_train),
    sc_y.inverse_transform(gs.predict(x_train)))
예제 #22
0
    true = dict(mu=.01, sigma=0., zmu=-.01, zsigma=0.)
    truth = NoisyModel('truth', model=toy, nx=nx, ny=ny, **true)
    #print('sampling truth...')
    data = truth.sample([(0, 1), (1, 10)] + [(0, 10)] * (nx - 2), pts=-16)
    Ns = 25  #XXX: number of samples, when model has randomness

    # build a surrogate model by training on the data
    args = dict(hidden_layer_sizes=(100, 75, 50, 25),
                max_iter=1000,
                n_iter_no_change=5,
                solver='lbfgs',
                learning_rate_init=0.001)
    from sklearn.neural_network import MLPRegressor
    from sklearn.preprocessing import StandardScaler
    from ml import Estimator, MLData, improve_score
    kwds = dict(estimator=MLPRegressor(**args), transform=StandardScaler())
    # iteratively improve estimator
    mlp = Estimator(**kwds)  #FIXME: traintest so train != test ?
    best = improve_score(mlp,
                         MLData(data.coords, data.coords, data.values,
                                data.values),
                         tries=10,
                         verbose=True)
    mlkw = dict(estimator=best.estimator, transform=best.transform)

    #print('building estimator G(x) from truth data...')
    surrogate = LearnedModel('surrogate', nx=nx, ny=ny, data=truth, **mlkw)
    #print('building UQ model of model error...')
    error = ErrorModel('error', model=truth, surrogate=surrogate)

    rnd = Ns if error.rnd else None
#
# Author: Quan Pan <*****@*****.**>
# License: MIT License
# Create: 2016-12-02

# import itertools
# import unittest
#
# from numpy import array, linspace, sin, cos, pi

from sklearn.neural_network import MLPRegressor

from surrogate.estimator import ANNSurrogate

if __name__ == "__main__":
    X = [[0., 0.], [1., 1.], [10., 10.]]
    y = [0.0, 1.0, 10.0]
    x_pred = [[5., 5.], [-10., -2.]]

    surrogate = ANNSurrogate(algorithm='l-bfgs', alpha=1e-5, hidden_layer_sizes=(5, 2), random_state=1)
    surrogate.fit(X, y)
    y_pred = surrogate.predict(X)
    # print surrogate.regressor
    # print y_pred

    regressor = MLPRegressor(algorithm='l-bfgs', alpha=1e-5, hidden_layer_sizes=(5, 2), random_state=1)
    regressor.fit(X, y)
    y_pred = regressor.predict(X)
    print regressor
    print y_pred
예제 #24
0
y88 = next_move['y88']
y89 = next_move['y89']
y90 = next_move['y90']
y91 = next_move['y91']
y92 = next_move['y92']
y93 = next_move['y93']
y94 = next_move['y94']
y95 = next_move['y95']
y96 = next_move['y96']
y97 = next_move['y97']
y98 = next_move['y98']
y99 = next_move['y99']
y100 = next_move['y100']

mlpreg1 = MLPRegressor(hidden_layer_sizes=[100, 100],
                       activation='tanh',
                       alpha=1000,
                       solver='lbfgs').fit(X, y1)
mlpreg2 = MLPRegressor(hidden_layer_sizes=[100, 100],
                       activation='tanh',
                       alpha=1000,
                       solver='lbfgs').fit(X, y2)
mlpreg3 = MLPRegressor(hidden_layer_sizes=[100, 100],
                       activation='tanh',
                       alpha=1000,
                       solver='lbfgs').fit(X, y3)
mlpreg4 = MLPRegressor(hidden_layer_sizes=[100, 100],
                       activation='tanh',
                       alpha=1000,
                       solver='lbfgs').fit(X, y4)
mlpreg5 = MLPRegressor(hidden_layer_sizes=[100, 100],
                       activation='tanh',
예제 #25
0
def __model(model_name, train_x, train_y, test_x,alpha = 0.1,*args,**kwargs):
    summary = None
    from sklearn.neural_network import MLPRegressor
    from sklearn.svm import SVR
    import sklearn
    import statsmodels.regression.linear_model as sm
    from sklearn.model_selection import TimeSeriesSplit

    cv = TimeSeriesSplit(n_splits=3)
    if model_name == 'Random':
        test_y = pd.Series(np.random.random_sample((len(test_x),)), index=test_x.index)
    if model_name == 'None':
        test_y = test_x.iloc[:,0]
    if model_name == 'MLPRegressor':
        mlp = MLPRegressor(hidden_layer_sizes=(20, 20))
        mlp.fit(train_x, train_y)
        y_pred = mlp.predict(test_x)
        test_y = pd.Series(y_pred, index=test_x.index)
    if model_name == 'Lasso':
        model = sklearn.linear_model.Lasso(0.001,fit_intercept = False)
        lasso = model.fit(train_x, train_y)
        test_y = pd.Series(lasso.predict(test_x), index=test_x.index)
        summary = lasso.score(train_x,train_y)
        # print(test_y.head())
        # model = sklearn.linear_model.Lasso()
        # param_grid = {'alpha':[1e-5,0.5*1e-4,1e-4,1e-3,1e-2,1e-1]}
        # opt = sklearn.model_selection.GridSearchCV(model,param_grid,cv=cv)
        # opt = opt.fit(train_x,train_y)
        # test_y = pd.Series(opt.predict(test_x),index=test_x.index)
        # summary = opt.score(train_x,train_y)
        # print(opt.best_params_, summary)
    if model_name == 'Ridge':
        model = sklearn.linear_model.Ridge(1.0,fit_intercept = False)
        ridge = model.fit(train_x, train_y)
        test_y = pd.Series(ridge.predict(test_x), index=test_x.index)

        summary = ridge.score(train_x, train_y)
    if model_name == 'SVR':
        # param_grid = {'gamma':list(1.0/k*np.array([1e-4,1e-3,1e-2])),\
        #               'C':[0.01,0.05,0.25,1.25]}
        # param_grid = {
        #               'C':[0.002,0.01,0.05,0.25,1.25]}
        # opt = sklearn.model_selection.GridSearchCV(svr_rbf,param_grid,cv=cv)
        # opt = opt.fit(train_x,train_y)
        # y_pred_rbf = opt.predict(test_x)
        # summary = opt.score(train_x,train_y)
        # print(opt.best_params_, summary)

        k = len(train_x.columns)
        svr_rbf = SVR(kernel='rbf', C=0.05, gamma=1.0/k*1e-4,epsilon = 0.005, max_iter = 5000)
        svr_rbf = svr_rbf.fit(train_x, train_y)
        y_pred_rbf = svr_rbf.predict(test_x)
        test_y = pd.Series(y_pred_rbf, index=test_x.index)
        summary = svr_rbf.score(train_x,train_y)
        # print(test_y.head())

    if model_name == 'StepWise':

        feature_col = list(train_x.columns.values)
        length = len(feature_col)
        final_feature = []
        for i in range(length):
            pvalue_min = 1
            column_min = ""
            for feature in feature_col:
                temp_feature = final_feature + [feature]
                x = sm.add_constant(train_x.loc[:,temp_feature])
                model = sm.OLS(train_y, x)
                pvalue = model.fit().pvalues[i + 1]
                # print(pvalue)
                if pvalue < pvalue_min and pvalue < alpha:
                    pvalue_min = pvalue
                    column_min = feature

            if column_min != "":
                feature_col.remove(column_min)
                final_feature.append(column_min)
            else:
                break

        X = sm.add_constant(train_x.loc[:,final_feature])
        model = sm.OLS(train_y, X)
        res = model.fit()
        summary = pd.Series(res.pvalues, index=['const'] + final_feature)
        if ~np.isnan(res.f_pvalue):
            summary['f_test'] = res.f_pvalue
        if ~np.isnan(res.rsquared_adj):
            summary['score'] = res.rsquared_adj
        xx = sm.add_constant(test_x.loc[:,final_feature],has_constant='raise')
        test_y = res.predict(xx)


    if model_name == 'AdaBoost':
        from sklearn.ensemble import  AdaBoostRegressor
        model = AdaBoostRegressor(n_estimators=100,learning_rate = 0.1)
        adaboost = model.fit(train_x,train_y)
        test_y = pd.Series(adaboost.predict(test_x),index = test_x.index)
        summary = adaboost.score(train_x,train_y)

    if model_name == 'RandomForestRegressor':
        from sklearn.ensemble import RandomForestRegressor
        rfr = RandomForestRegressor(n_estimators=100, criterion='mse',max_features='auto')
        rfr.fit(train_x, train_y)
        y_pred_rfr = rfr.predict(test_x)
        test_y = pd.Series(y_pred_rfr, index=test_x.index)

    return test_y,summary
x_test = np.load('NPY_FILES/first_disp_test.npy')
y_test = np.load('NPY_FILES/final_disp_test.npy')

########################################
# scale data
########################################
scaler = StandardScaler()
scaler.fit(x_train)
x_train = scaler.transform(x_train)
x_test = scaler.transform(x_test)

##########################################################################################
# ~ - ~ - ~ - ~ - ~ - ~ - ~ - ~       RUN MODEL              ~ - ~ - ~ - ~ - ~ - ~ - ~ - ~
##########################################################################################
model = MLPRegressor(hidden_layer_sizes=(500,500,500), activation='relu', solver='adam',\
 learning_rate='adaptive', max_iter=1000, learning_rate_init=0.01, alpha=0.01)
model.fit(x_train, y_train)

##########################################################################################
# ~ - ~ - ~ - ~ - ~ - ~ - ~ - ~     MAKE PRED, CALC ERR      ~ - ~ - ~ - ~ - ~ - ~ - ~ - ~
##########################################################################################

########################################
# prediction
########################################
y_train_predict = model.predict(x_train)
y_test_predict = model.predict(x_test)

########################################
# error
########################################
from sklearn.metrics import r2_score
from sklearn.model_selection import train_test_split
ds = dft.values
traindata, testdata, trainanswers, testanswers = train_test_split(
    dft.iloc[:, 2:7], dft.iloc[:, 7], test_size=0.4)
while True:  # Each iteration is one model training instance. A condition necessary here to stop the code when wanted, maybe we could scan the time for which this loop runs.
    n1, n2 = np.random.randint(3, 21), np.random.randint(3, 21)
    lr = 'constant' if np.random.randint(0, 2) == 0 else 'adaptive'
    maxiter, iternochange = np.random.randint(50000,
                                              150000), np.random.randint(
                                                  5000, 25000)
    model = MLPRegressor(hidden_layer_sizes=(
        n1,
        n2,
    ),
                         learning_rate=lr,
                         max_iter=maxiter,
                         verbose=False,
                         early_stopping=True,
                         validation_fraction=0.2,
                         n_iter_no_change=iternochange)
    st = time.time()
    model.fit(traindata, trainanswers)
    et = time.time()
    trainpredictions = model.predict(traindata)
    trainr2 = r2_score(trainanswers, trainpredictions)
    testpredictions = model.predict(testdata)
    testr2 = r2_score(testanswers, testpredictions)
    metrics = pd.read_csv('metrics.csv')
    if testr2 > metrics['testr2'].max(
    ):  # If this condition is passed, the 'bestmodel.nnm' file is updated with current model. A better condition necessary here which also takes into account the size of data set.
        pickle.dump(model, open('bestmodel.nnm', 'wb'))
예제 #28
0
from sklearn.neural_network import MLPRegressor

# create Trainig Dataset
train_x = [[x] for x in range(200)]
train_y = [x[0]**2 for x in train_x]

#create neural net regressor
# reg = MLPRegressor(hidden_layer_sizes=(50,),algorithm="l-bfgs")
reg = MLPRegressor(hidden_layer_sizes=(50, ), solver='lbfgs')
reg.fit(train_x, train_y)

#test prediction
test_x = [[x] for x in range(201, 220, 2)]

predict = reg.predict(test_x)
print "_Input_\t_output_"
for i in range(len(test_x)):
    print "  ", test_x[i], "---->", predict[i]
예제 #29
0
]
n_output_weights = n_nn_hidden[len(n_nn_hidden) - 1] * n_fx_output
n_total_weights = sum(n_hidden_weights) + n_output_weights
n_total_intercepts = sum(n_nn_hidden) + n_fx_output
n_total_nn_parameters = n_total_weights + n_total_intercepts
n_joint_ukf_process_states = n_fx_input + n_total_nn_parameters
n_ukf_process_noise = n_fx_input
n_joint_ukf_process_noise = n_ukf_process_noise + n_total_nn_parameters
Xdim = n_joint_ukf_process_states
Vdim = n_joint_ukf_process_noise
Ndim = 1  # measurement
Ldim = Xdim + Vdim + Ndim

# create NN model
model = MLPRegressor(hidden_layer_sizes=tuple(n_nn_hidden),  activation=nn_activation, solver='lbfgs', alpha=0.001, batch_size='auto', learning_rate='constant', \
    learning_rate_init=0.00001, power_t=0.5, max_iter=1, shuffle=True, random_state=9, tol=1000, verbose=False, warm_start=False, momentum=0.9, \
    nesterovs_momentum=True, early_stopping=False, validation_fraction=0.1, beta_1=0.9, beta_2=0.999, epsilon=1e08)

# fake fit to initialze coef
model.fit(np.random.normal(size=(1, n_fx_input)),
          np.random.normal(size=(1, n_fx_output)))


def array2coef_intercepts(w):
    left = 0

    coef = []
    weight_sizes = [n_fx_input] + n_nn_hidden + [n_fx_output]
    for i in range(len(n_nn_hidden) + 1):
        weight_size = weight_sizes[i] * weight_sizes[i + 1]
        coef.append(w[left:(left + weight_size)].reshape(
예제 #30
0
    def generate(self, params):

        neuroticismModelEmotion = MLPRegressor().set_params(**params)
        extraversionModelEmotion = MLPRegressor().set_params(**params)
        conscientiousnessModelEmotion = MLPRegressor().set_params(**params)
        agreeablenessModelEmotion = MLPRegressor().set_params(**params)
        opennessModelEmotion = MLPRegressor().set_params(**params)

        print("Training Started")

        TrainingEmotionDictPickle = open("trainingdict.pickle", "rb")

        TrainingEmotionDict = pickle.load(TrainingEmotionDictPickle)

        videosDataFile = open("../AnnotationFiles/annotation_training.pkl", "rb")
        print('Loading data from pickle file.')

        videosData = pickle.load(videosDataFile, encoding='latin1')

        print('Getting names of all the video files.')
        videoNames = list(videosData['extraversion'].keys())

        i = 1

        for videoName in videoNames:

            print(videoName)

            position = videoName.find(".mp4")
            audioFileName = videoName[:position] + '.wav'

            print (audioFileName)

            if audioFileName in TrainingEmotionDict:

                feature = []

                feature.append(TrainingEmotionDict[audioFileName]['neutral'])

                feature.append(TrainingEmotionDict[audioFileName]['happy'])

                feature.append(TrainingEmotionDict[audioFileName]['sad'])

                feature.append(TrainingEmotionDict[audioFileName]['angry'])

                feature.append(TrainingEmotionDict[audioFileName]['fear'])

                features = np.array(feature)
                features = features.reshape(1, -1)

                neuroticismModelEmotion.fit(features, np.array(videosData['neuroticism'][videoName]).ravel())

                extraversionModelEmotion.fit(features, np.array(videosData['extraversion'][videoName]).ravel())

                opennessModelEmotion.fit(features, np.array(videosData['openness'][videoName]).ravel())

                agreeablenessModelEmotion.fit(features, np.array(videosData['agreeableness'][videoName]).ravel())

                conscientiousnessModelEmotion.fit(features, np.array(videosData['conscientiousness'][videoName]).ravel())

                print("File number: {}".format(i))
                i = i+1


        for k in range (1, 3):
            randomfilename = random.choice(videoNames)

            position = videoName.find(".mp4")
            audioFileName = videoName[:position] + '.wav'

            feature = []

            feature.append(TrainingEmotionDict[audioFileName]['neutral'])

            feature.append(TrainingEmotionDict[audioFileName]['happy'])

            feature.append(TrainingEmotionDict[audioFileName]['sad'])

            feature.append(TrainingEmotionDict[audioFileName]['angry'])

            feature.append(TrainingEmotionDict[audioFileName]['fear'])

            features = np.array(feature)
            features = features.reshape(1, -1)

            print("The prediction for openness of file {} is: {} ".format(randomfilename, opennessModelEmotion.predict(features)))
            print("The actual value is: {} ".format(videosData['openness'][randomfilename]))


            print("The prediction for agreeableness of file {} is: {} ".format(randomfilename, agreeablenessModelEmotion.predict(features)))
            print("The actual value is: {} ".format(videosData['agreeableness'][randomfilename]))


            print("The prediction for neuroticism of file {} is: {} ".format(randomfilename, neuroticismModelEmotion.predict(features)))
            print("The actual value is: {} ".format(videosData['neuroticism'][randomfilename]))


            print("The prediction for extraversion of file {} is: {} ".format(randomfilename, extraversionModelEmotion.predict(features)))
            print("The actual value is: {} ".format(videosData['extraversion'][randomfilename]))


            print("The prediction for conscientiousness of file {} is: {} ".format(randomfilename, conscientiousnessModelEmotion.predict(features)))
            print("The actual value is: {} ".format(videosData['conscientiousness'][randomfilename]))

            NNModelDict = defaultdict(dict)

            NNModelDict['openness'] = opennessModelEmotion
            NNModelDict['agreeableness'] = agreeablenessModelEmotion
            NNModelDict['extraversion'] = extraversionModelEmotion
            NNModelDict['conscientiousness'] = conscientiousnessModelEmotion
            NNModelDict['neuroticism'] = neuroticismModelEmotion

            NNDataFile = open("../AnnotationFiles/emotionnn.pkl", "wb")
            pickle.dump(NNModelDict, NNDataFile)