activation='relu'))
    #model.add(Dense(6, kernel_initializer='normal', activation='relu'))
    model.add(Dense(1, kernel_initializer='normal'))
    # Compile model
    model.compile(loss='mean_squared_error', optimizer='adam')
    return model


# fix random seed for reproducibility
seed = 7
# evaluate model with standardized dataset
numpy.random.seed(seed)
estimators = []
estimators.append(('standardize', StandardScaler()))
model = KerasRegressor(build_fn=wider_model,
                       epochs=100,
                       batch_size=10,
                       verbose=0)
estimators.append(('mlp', model))
pipeline = Pipeline(estimators)
kfold = KFold(n_splits=10, random_state=seed)
results = cross_val_score(pipeline, X_TRAIN, Y_TRAIN, cv=kfold)
print("Wider: %.2f (%.2f) MSE" % (results.mean(), results.std()))
pipeline.fit(X_TRAIN, Y_TRAIN)
pred = pipeline.predict(X_TEST)
print(pred)
# print(pred.shape)
# print (mean_absolute_error(Y_TEST,pred))
directory = os.path.dirname(os.path.realpath(__file__))
model_step = pipeline.steps.pop(-1)[1]
joblib.dump(pipeline, os.path.join(directory, 'pipeline_plat.pkl'))
models.save_model(model_step.model, os.path.join(directory, 'model_plat.h5'))
Esempio n. 2
0
from keras.layers import Dense
from keras.wrappers.scikit_learn import KerasRegressor
from sklearn.model_selection import cross_val_score, KFold

def ANNModel():
    model = Sequential()
    model.add(Dense(output_dim = 238, init = 'normal', activation = 'relu', input_dim = 238))
    model.add(Dense(output_dim = 100, init = 'normal', activation = 'relu'))
    model.add(Dense(output_dim = 1, init = 'normal'))
    model.compile(optimizer = 'adam', loss = 'mean_squared_logarithmic_error')
    return model

seed = 10
np.random.seed(seed)

ANNReg = KerasRegressor(build_fn = ANNModel, epochs = 100, batch_size = 5, verbose = 1)
kfold = KFold(n_splits=10, random_state=seed)
results = cross_val_score(ANNReg, X_train, Y_train, cv=kfold)
ANNReg.fit(X_train, Y_train)


#Prediction
RanRegPred = RanReg.predict(X_val)
GBRegPred = GBReg.predict(X_val)
XGBRegPred = XGBReg.predict(X_val)
ANNRegPred = ANNReg.predict(X_val).ravel()

#Checking the RMSLE
def rmsle(y, y0):
    assert len(y) == len(y0)
    return np.sqrt(np.mean(np.power(np.log1p(y)-np.log1p(y0), 2)))
Esempio n. 3
0
import predictor
import prepare_data
import model_builder

if __name__ == "__main__":
    print("Grid searching!")

    #get home path
    root_dir = os.path.dirname(os.path.realpath(__file__))

    x, y, sc_X, sc_Y = prepare_data.training(
        os.path.join(root_dir, "data", "results.csv"))

    # create model
    model = KerasRegressor(build_fn=model_builder.create_model,
                           verbose=1,
                           feature_count=len(x[0]),
                           output_count=len(y[0]))

    # grid search epochs, batch size and optimizer
    optimizers = ['rmsprop']  #, 'adam']
    init = ['glorot_uniform']  #, 'normal', 'uniform']
    epochs = [1000, 5000, 10000]
    batches = [50]
    hidden_layer_counts = [1, 2, 3]
    param_grid = dict(optimizer=optimizers,
                      epochs=epochs,
                      batch_size=batches,
                      hidden_layer_count=hidden_layer_counts,
                      init=init)
    grid = GridSearchCV(estimator=model, param_grid=param_grid)
    grid_result = grid.fit(x, y)
def baseline_model():
    # 12 nodes -> 6 nodes -> 1 node
    # through trial and error by adding nodes, removing layers, and
    # changing epochs based on where I see the loss asymptote
    model = Sequential()
    model.add(
        Dense(12, input_dim=12, kernel_initializer='normal',
              activation='relu'))
    model.add(
        Dense(6, input_dim=12, kernel_initializer='normal', activation='relu'))
    model.add(Dense(1, kernel_initializer='normal'))
    model.compile(loss='mean_squared_error', optimizer='adam')
    return model


estimator = KerasRegressor(build_fn=baseline_model, epochs=28)
estimator.fit(X_train, y_train)

# In[30]:

# create a dataframe containing the results from all the methods
df_test = (X_test.join(df_sub_wtb[['normand', 'stull', 'half',
                                   'third']]).assign(
                                       **{
                                           'lreg': visualizer.predict(X_test),
                                           'keras': estimator.predict(X_test)
                                       }))
df_test['time'] = pd.to_datetime(df_test['year'].astype(str) +
                                 df_test['dayofyear'].astype(str) +
                                 df_test['hour'].astype(str),
                                 format='%Y%j%H')
Esempio n. 5
0
# split into input (X) and output (Y) variables
X = dataset[:, 0:13]
Y = dataset[:, 13]


# define the model
def larger_model():
    # create model
    model = Sequential()
    model.add(Dense(13, input_dim=13, activation='relu'))
    model.add(Dense(6, activation='relu'))
    model.add(Dense(1))
    # Compile model
    model.compile(loss='mean_squared_error', optimizer='adam')
    return model


# evaluate model with standardized dataset
estimators = []
estimators.append(('standardize', StandardScaler()))
estimators.append(('mlp',
                   KerasRegressor(build_fn=larger_model,
                                  epochs=50,
                                  batch_size=5,
                                  verbose=0)))
pipeline = Pipeline(estimators)

kfold = KFold(n_splits=10)
results = cross_val_score(pipeline, X, Y, cv=kfold)
print("Larger: %.2f (%.2f) MSE" % (results.mean(), results.std()))
def deepLearning(data, target, iteraNum, funNum):
    # load dataset
    global kerasModel
    X_norm = data
    print("This is X_norm: ", X_norm)
    y = target
    print("This is target : ", y)
    tempDim = len(X_norm[0])
    print("This is input dimension: ", tempDim)

    kerasList = []
    batch_size = [50, 100, 150, 200]
    epochs = [10, 20, 30, 50, 80]
    inputDim = [tempDim]
    # neurons = [40,50,60,100,200]
    param_grid = dict(batch_size=batch_size,
                      nb_epoch=epochs,
                      input_dim=inputDim)

    if funNum == 1:
        kerasModel = KerasRegressor(build_fn=baseline_model, verbose=0)
    elif funNum == 2:
        kerasModel = KerasRegressor(build_fn=wider_model, verbose=0)
    elif funNum == 3:
        kerasModel = KerasRegressor(build_fn=larger_model, verbose=0)

    for j in range(iteraNum):
        X_train, X_test, y_train, y_test = train_test_split(X_norm,
                                                            y,
                                                            test_size=0.2)
        print("This is X_train: ", X_train)
        print("This is y_train: ", y_train)
        grid = GridSearchCV(estimator=kerasModel, cv=5, param_grid=param_grid)
        newModel = grid.fit(X_train, y_train)
        print("Best: %f using %s" %
              (newModel.best_score_, newModel.best_params_))
        y_pred = newModel.predict(X_test).tolist()
        print("This is y_pred: ", y_pred)
        sum_mean = 0
        y_test_list = y_test.tolist()
        print("This is y_test_list: ", y_test_list)
        # for n in range(len(y_pred)):
        #     print("This is REAL value %.4f, ===========> PRED value: %.4f" % (y_test_list[n], y_pred[n]))
        #     # sum_mean += (y_pred[n] - y_test[n]) ** 2
        #     sum_mean += (float("{0:.4f}".format(float(y_pred[n]))) - y_test_list[n]) ** 2
        # sum_erro = np.sqrt(sum_mean / len(y_pred))
        #
        # print("This is sum_erro: ", sum_erro)
        sum_erro = np.sqrt(mean_squared_error(y_test_list, y_pred))
        print("This is : sum_erro ", sum_erro)
        print("This is iteration number: ", j + 1)
        kerasList.append(sum_erro)
    # # Train the model, iterating on the data in batches of n(32/64/128) samples
    # for j in range(iteraNum):
    #     X_train, X_test, y_train, y_test = train_test_split(X_norm, y, test_size=0.2)
    #     if funNum == 1:
    #         kerasModel = KerasRegressor(build_fn=baseline_model(inputDim), verbose=0)
    #         grid = GridSearchCV(estimator=kerasModel, param_grid=param_grid, n_jobs=1)
    #         bestDLModel = grid.fit(X_train, y_train)
    #         print("Best: %f using %s" % (bestDLModel.best_score_, bestDLModel.best_params_))
    #         y_pred = bestDLModel.predict(X_test)
    #
    #         # kerasModel = baseline_model(inputDim)
    #         # kerasModel.fit(X_train, y_train, epochs=200, batch_size=128)
    #         # y_pred = kerasModel.predict(X_test)
    #         sum_mean = 0
    #         for n in range(len(y_pred)):
    #             print("This is REAL value %.4f, ===========> PRED value: %.4f" % (y_test[n], y_pred[n]))
    #             sum_mean += (float("{0:.4f}".format(float(y_pred[n]))) - y_test[n]) ** 2
    #         sum_erro = np.sqrt(sum_mean / len(y_pred))
    #         print("This is sum_erro: ", sum_erro)
    #         print("This is iteration number: ", j + 1)
    #         kerasList.append(sum_erro)
    #         # plotFigure(y_pred, y_test, sum_erro[0])
    #     elif funNum == 2:
    #         # kerasModel = wider_model(inputDim, 2)
    #         # kerasModel.fit(X_train, y_train, epochs=100, batch_size=scalar, shuffle=True)
    #         # y_pred = kerasModel.predict(X_test)
    #         kerasModel = KerasRegressor(build_fn=wider_model(inputDim), verbose=0)
    #         grid = GridSearchCV(estimator=kerasModel, param_grid=param_grid, n_jobs=1)
    #         bestDLModel = grid.fit(X_train, y_train)
    #         print("Best: %f using %s" % (bestDLModel.best_score_, bestDLModel.best_params_))
    #         y_pred = bestDLModel.predict(X_test)
    #
    #         sum_mean = 0
    #         for n in range(len(y_pred)):
    #             print("This is REAL value %.4f, ===========> PRED value: %.4f" % (y_test[n], y_pred[n]))
    #             sum_mean += (float("{0:.4f}".format(float(y_pred[n]))) - y_test[n]) ** 2
    #         sum_erro = np.sqrt(sum_mean / len(y_pred))
    #         print("This is sum_erro: ", sum_erro)
    #         print("This is iteration number: ", j + 1)
    #         kerasList.append(sum_erro)
    #         # plotFigure(y_pred,y_test,sum_erro[0])
    #     elif funNum == 3:
    #
    #         # kerasModel = larger_model(inputDim)
    #         # kerasModel.fit(X_train, y_train, epochs=100, batch_size=scalar, shuffle=True)
    #
    #         kerasModel = KerasRegressor(build_fn=larger_model(inputDim), verbose=0)
    #         grid = GridSearchCV(estimator=kerasModel, cv=5,param_grid=param_grid)
    #         grid.fit(X_train, y_train)
    #         print("Best: %f using %s" % (grid.best_score_, grid.best_params_))
    #         y_pred = grid.predict(X_test)
    #         sum_mean = 0
    #         for n in range(len(y_pred)):
    #             print("This is REAL value %.4f, ===========> PRED value: %.4f" % (y_test[n], y_pred[n]))
    #             # sum_mean += (y_pred[n] - y_test[n]) ** 2
    #             sum_mean += (float("{0:.4f}".format(float(y_pred[n]))) - y_test[n]) ** 2
    #         sum_erro = np.sqrt(sum_mean / len(y_pred))
    #         print("This is sum_erro: ", sum_erro)
    #         print("This is iteration number: ", j + 1)
    #         kerasList.append(sum_erro)
    #         # plotFigure(y_pred, y_test, sum_erro)
    return kerasList
Esempio n. 7
0
    regressor.add(LSTM(units=50, return_sequences=True))
    regressor.add(Dropout(0.2))

    regressor.add(LSTM(units=50))
    regressor.add(Dropout(0.2))

    regressor.add(Dense(units=1))

    regressor.compile(optimizer=optimizer, loss='mean_squared_error')
    return regressor


from keras.wrappers.scikit_learn import KerasRegressor
from sklearn.model_selection import GridSearchCV

regressor = KerasRegressor(build_fn=build_regressor)

parameters = {
    'batch_size': [10, 25, 32],
    'nb_epoch': [50, 100],
    'optimizer': ['adam', 'rmsprop']
}

grid_search = GridSearchCV(estimator=regressor,
                           param_grid=parameters,
                           scoring='neg_mean_squared_error',
                           cv=None)
grid_search.fit(X_train[:, :, -1], y_train)

best_param = grid_search.best_params_
best_accuracy = grid_search.best_score_
Esempio n. 8
0
#Add First Hidden Layer    
    model.add(Dense(units = 4, kernel_initializer = 'uniform', activation = 'relu', input_dim = 7))

# Adding the second hidden layer
    model.add(Dense(units = 4, kernel_initializer = 'uniform', activation = 'relu'))

# Adding the output layer
    model.add(Dense(units = 1, kernel_initializer = 'uniform', activation = 'relu'))

# Compiling the ANN
    model.compile(loss='mse', 
                optimizer='adam', 
                metrics=['mse'])
    return model
#build KerasRegressor
model=KerasRegressor(build_fn=build_regressor, batch_size = 10, epochs = 100)



#fitting model
model.fit(X_train, y_train, batch_size = 10, epochs = 1000)
#predicting model
y_pred=model.predict(X_test)
#finding MSE
m=(mean_squared_error(y_test,y_pred))
#for MSE <100
while(m>100):
    model.fit(X_train, y_train, batch_size = 10, epochs = 1000)
    y_pred=model.predict(X_test)
    m=(mean_squared_error(y_test,y_pred))
    
    model.compile(optimizer='Nadam', loss='mean_squared_error', metrics=['mae'])
    return model

# fix random seed for reproducibility
seed = 7
np.random.seed(seed)
# load training dataset
train_data = pd.read_csv("train_data.csv", usecols=['position', 'area', 'diameter', 'angleup', 'angledown', 'shape', 'pdrop', 'reloc'])
train_targets = pd.read_csv("train_targets.csv", usecols=['Fr'])
# feature scaling and mean normalization for train data
mean = train_data.mean(axis=0)
train_data -= mean
std = train_data.std(axis=0)
train_data /= std
train_data = train_data.values
train_targets = train_targets.values.flatten('F')
# create model
model = KerasRegressor(build_fn=create_model, epochs=10000, batch_size=128, verbose=0)
# define the grid search parameters
activation = ['softmax', 'softplus', 'softsign', 'relu', 'tanh', 'sigmoid', 'hard_sigmoid', 'linear']
param_grid = dict(activation=activation)
grid = GridSearchCV(estimator=model, param_grid=param_grid, n_jobs=1)
grid_result = grid.fit(train_data, train_targets)
# summarize results
print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))
means = grid_result.cv_results_['mean_test_score']
stds = grid_result.cv_results_['std_test_score']
params = grid_result.cv_results_['params']
for mean, stdev, param in zip(means, stds, params):
    print("%f (%f) with: %r" % (mean, stdev, param))
Esempio n. 10
0
def baseline_model():
    # create model
    model = Sequential()
    model.add(Dense(256, kernel_initializer='normal', activation='sigmoid'))
    model.add(Dropout(0.5))
    model.add(Dense(64, kernel_initializer='normal', activation='sigmoid'))
    model.add(Dropout(0.5))
    model.add(Dense(1, kernel_initializer='normal'))
    # Compile model
    model.compile(loss='mean_squared_error', optimizer='adam')
    return model


step_scaler = StandardScaler()
step_regressor = KerasRegressor(build_fn=baseline_model,
                                epochs=100,
                                batch_size=32,
                                verbose=2)
steps = []
steps.append(('step_scaler', step_scaler))
steps.append(('step_regressor', step_regressor))
pipeline = Pipeline(steps)
clf = pipeline
# kfold = KFold(n_splits=3, random_state=seed)
# cross_val_scores = cross_val_score(pipeline, X_train, y_train, cv=kfold)
# Results: -19376733.79 (49264686.37) MSE (no scaling)
# Results: -144.28 (148.91) MSE (relu, [62*1])
# Results: -59.37 (44.19) MSE (sigmoid, [62*1])
# Results: -87.21 (28.69) MSE (sigmoid, [62,32,1]) - deeper
# Results: -53.58 (32.51) MSE (sigmoid, [512,1]) - wider

clf.fit(X_train, y_train)
Esempio n. 11
0
previsores[:, 10] = labelencoder_previsores.fit_transform(previsores[:, 10])

onehotencoder = OneHotEncoder(categorical_features = [0,1,3,5,8,9,10])
previsores = onehotencoder.fit_transform(previsores).toarray()

def criar_rede():
    regressor = Sequential()
    regressor.add(Dense(units = 158, activation = 'relu', input_dim = 316))
    regressor.add(Dense(units = 158, activation = 'relu'))
    regressor.add(Dense(units = 1, activation = 'linear'))
    regressor.compile(loss = 'mean_absolute_error', optimizer = 'adam',
                      metrics = ['mean_absolute_error'])
    return regressor

regressor = KerasRegressor(build_fn = criar_rede,
                           epochs = 100,
                           batch_size = 300)
resultados = cross_val_score(estimator = regressor,
                             X = previsores, y = preco_real,
                             cv = 10, scoring = 'mean_absolute_error')
media = resultados.mean()
desvio = resultados.std()
    
    
    
    
    
    
    
    
    
    model = Sequential()
    model.add(
        Dense(13, input_dim=13, kernel_initializer='normal',
              activation='relu'))
    model.add(Dense(1, kernel_initializer='normal'))
    # Compile model
    model.compile(loss='mean_squared_error', optimizer='adam')
    return model


# fix random seed for reproducibility
seed = 7
numpy.random.seed(seed)
# evaluate model
estimator = KerasRegressor(build_fn=baseline_model,
                           epochs=100,
                           batch_size=5,
                           verbose=0)
kfold = KFold(n_splits=10, random_state=seed)
results = cross_val_score(estimator, X, Y, cv=kfold)
print("Baseline: %.2f (%.2f) MSE" % (results.mean(), results.std()))

# Regression Example With Boston Dataset: Standardized
import numpy
from pandas import read_csv
from keras.models import Sequential
from keras.layers import Dense
from keras.wrappers.scikit_learn import KerasRegressor
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import KFold
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
Esempio n. 13
0
    model.add(Dropout(0.2))
    model.add(Flatten())
    model.add(Dense(1, kernel_initializer='normal'))

    model.compile(loss='mean_squared_error',
                  optimizer='adam',
                  metrics=['mse', rmse, r_sq])

    return model


#estimators = []
#estimators.append(('standardize', MinMaxScaler()))
#estimators.append(('mlp',KerasRegressor(build_fn=build_model, epochs=20, batch_size=5, verbose=1)))
estimators = KerasRegressor(build_fn=build_model,
                            epochs=50,
                            batch_size=64,
                            verbose=1)

#pipeline = Pipeline(estimators)
kfold = KFold(n_splits=5)
results = cross_val_score(estimators, data_scaled, target_scaled, cv=kfold)

print("MSE Score: %.6f (%.6f) MSE" % (results.mean(), results.std()))

train_data, test_data, train_target, test_target = train_test_split(
    data_scaled, target_scaled, test_size=0.2, random_state=21)
'''
from keras.callbacks import ModelCheckpoint

chk = ModelCheckpoint("Modelling/ibd.h5", monitor='loss', save_best_only=True, mode='min')
callback_list=[chk]
Esempio n. 14
0
adam = Adam(lr=0.001, beta_1=0.9, beta_2=0.999, epsilon=None, decay=0.0, amsgrad=True)
nadam = Nadam(lr=0.002, beta_1=0.9, beta_2=0.999, epsilon=None, schedule_decay=0.004)

# 1. BATCH_SIZE AND EPOCHS
##########################
# Grid Search Hyperparameters
def grid_model(optim=adam):
    model = models.Sequential()
    model.add(layers.Dense(30, activation='relu',
        input_shape=(X_train.shape[1], )))
    model.add(layers.Dense(30, activation='relu'))
    model.add(layers.Dense(1))
    model.compile(optimizer=optim, loss='mse', metrics=['mae', 'mse'])
    return model

model = KerasRegressor(build_fn=grid_model, verbose=0)

# grid search parameters
batch_size = [20, 25, 30, 35, 40]
epochs = [150, 200, 250]

param_grid = dict(batch_size=batch_size, epochs=epochs)

grid = GridSearchCV(estimator=model,
        param_grid=param_grid,
        scoring=['neg_mean_squared_error', 'r2', 'explained_variance'],
        cv=5,
        n_jobs=-1,
        refit='neg_mean_squared_error',
        verbose=2)
Esempio n. 15
0
class NNNBA:
    """
    NNNBA class, which contains all the calculated information
    """

    default_model_type = "lasso"
    assumed_max_salary = 35350000.0

    __threshold_per_col = {
        "OFF_RATING": 12,
        "PIE": 0.11,
        "NET_RATING": 18,
        "GP": 50,
        "DEF_RATING": 7,
        "USG_PCT": 0.12,
        "FGA": None,
        "FGM": None,
        "FG3A": None,
        "PTS": None,
        "FTM": None,
        "FGM": None,
        "REB_PCT": None,
        "AGE": 4
    }

    __outlier_cols_upper = [
    ]  #["OFF_RATING", "PIE", "NET_RATING", "USG_PCT", "PTS"]
    __outlier_cols_lower = []  #["DEF_RATING"]

    __ridge_init_alpha = [0.01, 0.03, 0.06, 0.1, 0.3, 0.6, 1, 3, 6, 10, 30, 60]
    __lasso_init_alpha = [
        0.0001, 0.0003, 0.0006, 0.001, 0.003, 0.006, 0.01, 0.03, 0.06, 0.1,
        0.3, 0.6, 1
    ]
    __elasticnet_init = {
        "l1_ratio": [0.1, 0.3, 0.5, 0.6, 0.7, 0.8, 0.85, 0.9, 0.95, 1],
        "alpha": [
            0.0001, 0.0003, 0.0006, 0.001, 0.003, 0.006, 0.01, 0.03, 0.06, 0.1,
            0.3, 0.6, 1, 3, 6
        ]
    }

    def __realpha__(self, alpha):
        """
        Function to recalculate alpha
        """
        return [
            alpha * .6, alpha * .65, alpha * .7, alpha * .75, alpha * .8,
            alpha * .85, alpha * .9, alpha * .95, alpha, alpha * 1.05,
            alpha * 1.1, alpha * 1.15, alpha * 1.25, alpha * 1.3, alpha * 1.35,
            alpha * 1.4
        ]

    def __reratio__(self, ratio):
        """
        Function to recalculate ratio
        """
        return [
            ratio * .85, ratio * .9, ratio * .95, ratio, ratio * 1.05,
            ratio * 1.1, ratio * 1.15
        ]

    def __baseline_model__():
        """
        Base Neural Network model
        """
        input = 39
        model = Sequential()
        model.add(
            Dense(input,
                  input_dim=input,
                  kernel_initializer='normal',
                  activation='relu'))
        model.add(
            Dense(int(input / 2),
                  kernel_initializer='normal',
                  activation='relu'))
        model.add(Dense(input, kernel_initializer='normal', activation='relu'))
        model.add(
            Dense(int(input / 2),
                  kernel_initializer='normal',
                  activation='relu'))
        model.add(
            Dense(int(input / 4),
                  kernel_initializer='normal',
                  activation='relu'))
        model.add(Dense(1, kernel_initializer='normal'))
        model.compile(loss='mean_squared_error', optimizer='adam')
        return model

    def __idx_of_median_outlier__(self,
                                  col,
                                  threshold=None,
                                  upper_outlier=True):  #may need threshold=2
        """
        Find index of outlier based on distance from median
        Distance from median = threshold, which is either passed in or calculated as a function of std from the passed in data
        """
        if threshold is None:
            threshold = col.std() * 2.5
        logger.debug("median: " + str(col.median()) + " threshold: " +
                     str(threshold))
        diff = col - col.median()
        if upper_outlier:
            outlier = diff > threshold
        else:
            outlier = -1 * diff > threshold
        return list(outlier.index[outlier])

    models = {
        "linear regression":
        linear_model.LinearRegression(fit_intercept=True),
        "ridge":
        linear_model.RidgeCV(alphas=__ridge_init_alpha, fit_intercept=True),
        "lasso":
        linear_model.LassoCV(alphas=__lasso_init_alpha,
                             max_iter=5000,
                             cv=10,
                             fit_intercept=True),
        "bayes ridge":
        linear_model.BayesianRidge(),
        "keras regressor":
        KerasRegressor(build_fn=__baseline_model__,
                       nb_epoch=100,
                       batch_size=5,
                       verbose=0),
        "xgb":
        xgb.XGBRegressor(n_estimators=1500, max_depth=2, learning_rate=0.01),
        "elasticnet":
        linear_model.ElasticNetCV(l1_ratio=__elasticnet_init["l1_ratio"],
                                  alphas=__elasticnet_init["alpha"],
                                  max_iter=1000,
                                  cv=3),
        "theilsen":
        linear_model.TheilSenRegressor(),
        "polynomial":
        Pipeline([('poly', PolynomialFeatures(degree=2)),
                  ('linear', linear_model.LinearRegression(fit_intercept=True))
                  ])
    }

    def __remodel__(self, model_type, regr, __X_train, __Y_train):
        """
        Function to retrain certain models based on optimal alphas and/or ratios
        """
        if model_type == "ridge":
            alpha = regr.alpha_
            regr = linear_model.RidgeCV(alphas=self.__realpha__(alpha), cv=10)
        elif model_type == "lasso":
            alpha = regr.alpha_
            regr = linear_model.LassoCV(alphas=self.__realpha__(alpha),
                                        max_iter=5000,
                                        cv=10)
        elif model_type == "elasticnet":
            alpha = regr.alpha_
            ratio = regr.l1_ratio_
            regr = linear_model.ElasticNetCV(
                l1_ratio=self.__reratio__(ratio),
                alphas=self.__elasticnet_init["alpha"],
                max_iter=1000,
                cv=3)

        regr.fit(__X_train, __Y_train)
        return regr

    def __normalize_salary__(
        self,
        col,
        max_salary=assumed_max_salary
    ):  # scales out to max contract; max taken from https://www.hoopsrumors.com/2017/05/nba-maximum-salary-projections-for-201718.html
        """
        Function to normalize salary so that the max is maximum salary possible, as yoy max salary changes
        """
        min_salary = min(col)
        local_max_salary = max(col)
        return max_salary - (local_max_salary - col) / (
            local_max_salary - min_salary) * (max_salary - min_salary)

    def __init__(self, debug=False):
        logger.setLevel(logging.DEBUG if debug else logging.ERROR)
        with open("crawled_data/raw_data.json", "r") as data_file:
            raw_data = json.load(data_file)

        columns = raw_data[0]["header"]
        unique_columns = list(set(raw_data[0]["header"]))
        position_names = [
            "Point Guard", "Shooting Guard", "Small Forward", "Power Forward",
            "Center"
        ]
        positions = []

        for i, val in enumerate(position_names):
            positions.append((val, i))
        positions_convert = dict(positions)

        self.X_df = pd.DataFrame(columns=columns)
        Y_df = pd.DataFrame(columns=["SALARIES"])
        age = []
        positions_df = pd.DataFrame(columns=position_names)
        names = pd.DataFrame(columns=["NAME", "PROJECTED_SALARIES"])

        logger.debug("Processing data")
        for i, player in enumerate(raw_data):
            if "2016_17" in player["salaries"] and "2016-17" in player["stats"]:
                Y_df.loc[len(Y_df)] = player["salaries"]["2016_17"]
                self.X_df.loc[len(self.X_df)] = player["stats"]["2016-17"]
                age.append(player["age"])

                positions_df.loc[len(positions_df)] = [0, 0, 0, 0, 0]
                for position in player["positions"]:
                    positions_df[position][len(positions_df)] = 1

                projected_salaries = 0
                try:
                    projected_salaries = player["projected_salaries"][0]
                except:
                    pass
                names.loc[len(names)] = [player["name"], projected_salaries]
            else:
                continue

        for col in []:
            try:
                self.X_df[col] = np.tanh(self.X_df[col])
            except:
                pass

        self.X_df = self.X_df.T.drop_duplicates().T
        self.X_df = pd.concat(
            [self.X_df, pd.Series(age, name="AGE"), positions_df], axis=1)

        self.X_df = self.X_df.drop([
            "FGA", "L", "AGE", "PCT_TOV", "BLKA", "AST_PCT", "AST_RATIO",
            "OREB_PCT", "DREB_PCT", "REB_PCT", "TM_TOV_PCT", "PACE",
            "OPP_PTS_OFF_TOV", "OPP_PTS_FB", "OPP_PTS_PAINT",
            'OPP_PTS_2ND_CHANCE', 'OPP_PTS_FB', 'PCT_FGA_2PT', 'PCT_FGA_3PT',
            'PCT_PTS_2PT', 'PCT_PTS_2PT_MR', 'PCT_PTS_3PT', 'PCT_PTS_FB',
            'PCT_PTS_FT', 'PCT_PTS_OFF_TOV', 'PCT_PTS_PAINT', 'PCT_AST_2PM',
            'PCT_UAST_2PM', 'PCT_AST_3PM', 'PCT_UAST_3PM', 'PCT_AST_FGM',
            'PCT_UAST_FGM', 'PCT_FGM', 'PCT_FGA', 'PCT_FG3M', 'PCT_FG3A',
            'PCT_FTM', 'PCT_FTA', 'PCT_OREB', 'PCT_DREB', 'PCT_REB', 'PCT_AST',
            'PCT_STL', 'PCT_BLK', 'PCT_BLKA', 'PTS_OFF_TOV', 'PTS_FB',
            'PTS_PAINT'
        ], 1)

        logger.debug("Columns: " + ", ".join(self.X_df.columns))
        # remove players who's played less than 15 games
        idx_of_lt_gp = self.X_df.index[(self.X_df["GP"] < 15)]
        self.X_df = self.X_df.drop(idx_of_lt_gp)
        Y_df = Y_df.drop(idx_of_lt_gp)
        age = pd.Series(age).drop(idx_of_lt_gp)
        positions_df = positions_df.drop(idx_of_lt_gp)
        names = names.drop(idx_of_lt_gp)

        # Remove outliers
        logger.debug("Remove outliers")

        X_train = self.X_df.copy()
        Y_train = Y_df.copy()
        logger.debug("No of rows before removing outliers: " +
                     str(X_train.shape[0]))
        to_be_dropped = []
        ## remove upper
        for col in self.__outlier_cols_upper:
            logger.debug(col)
            idx_of_median_outlier = self.__idx_of_median_outlier__(
                X_train[col], self.__threshold_per_col[col])
            logger.debug(
                col + " should drop " +
                ", ".join(names["NAME"][idx_of_median_outlier].values))
            to_be_dropped = to_be_dropped + idx_of_median_outlier

        ## remove lower
        for col in self.__outlier_cols_lower:
            logger.debug(col)
            idx_of_median_outlier = self.__idx_of_median_outlier__(
                X_train[col],
                self.__threshold_per_col[col],
                upper_outlier=False)
            logger.debug(
                col + " should drop " +
                ", ".join(names["NAME"][idx_of_median_outlier].values))
            to_be_dropped = to_be_dropped + idx_of_median_outlier

        to_be_dropped = list(set(to_be_dropped))
        logger.debug("Outliers: " +
                     ", ".join(names["NAME"][to_be_dropped].values))
        X_train = X_train.drop(to_be_dropped)
        Y_train = Y_train.drop(to_be_dropped)
        logger.debug("No of rows after removing outliers: " +
                     str(X_train.shape))
        logger.debug("No of rows after removing outliers: " +
                     str(Y_train.shape))

        __X_train = X_train.values  # training data only includes non-rookies
        __Y_train = np.log1p(Y_train["SALARIES"].values)  # y = log(1+y)

        self.Y_df = Y_df
        self.model_results = {}
        self.names = names

        for model_type, regr in self.models.items():
            logger.debug("Started  " + model_type)
            this_results = names.copy()
            regr.fit(__X_train, __Y_train)

            regr = self.__remodel__(model_type, regr, __X_train, __Y_train)

            results = self.__normalize_salary__(
                np.expm1(regr.predict(self.X_df.values)))  # y = exp(y) - 1
            this_results['WORTH'] = results

            diffY = this_results["PROJECTED_SALARIES"].values - results
            this_results['SALARY_DIFF'] = diffY
            this_results = this_results.sort_values(by="SALARY_DIFF",
                                                    ascending=False)

            self.models[model_type] = regr
            self.model_results[model_type] = this_results
            logger.debug("Finished " + model_type)

        #get avg
        this_results = self.model_results["linear regression"].copy()
        this_results["WORTH"] = self.__normalize_salary__(
            (1. * self.model_results["bayes ridge"]["WORTH"] +
             1. * self.model_results["lasso"]["WORTH"] +
             1. * self.model_results["elasticnet"]["WORTH"]) / 3)
        diffY = this_results["PROJECTED_SALARIES"].values - this_results[
            "WORTH"]
        this_results['SALARY_DIFF'] = diffY
        self.model_results["avg"] = this_results

    def getUndervalued(self, model_type=default_model_type):
        names = self.model_results[model_type]
        print(names.loc[(names["SALARY_DIFF"] < 0)
                        & (names["PROJECTED_SALARIES"] > 0)])

    def getPlayerValue(self, player_name, model_type=default_model_type):
        names = self.model_results[model_type]
        idx = names[names["NAME"] == player_name].index[0]

        print("\nPaid: " +
              '${:,.2f}'.format(float(self.Y_df.loc[idx]["SALARIES"])) +
              "\tFuture Salary: " +
              '${:,.2f}'.format(float(self.names["PROJECTED_SALARIES"][idx])) +
              "\tWorth: " + '${:,.2f}'.format(float(names["WORTH"][idx])) +
              "\n")
        self.getPlayerStats(player_name, trim=True)

    def getPlayerStats(self, player_name, trim=False):
        columns = self.X_df.columns
        if trim:
            columns = columns[:30]
        print(self.X_df.loc[self.names["NAME"] == player_name, columns])

    def getMostValuablePlayers(self, model_type=default_model_type):
        names = self.model_results[model_type]
        print(names.sort_values(by="WORTH"))

    def showAvailableModels(self):
        for model in self.models:
            print(model)

    def getPlayerNameByIndex(self, index):
        return self.names[self.name.index == index]

    def getCoefFromModel(self, model_type=default_model_type):
        return pd.DataFrame(self.models[model_type].coef_,
                            index=self.X_df.columns,
                            columns=["coef"]).sort_values(by="coef")

    def plotXCol(self, col_name, X=None):
        import matplotlib.pyplot as plt
        if X is None:
            X = self.X_df.sort_values(by=col_name)[col_name].values
        plt.figure()
        plt.scatter(range(len(X)), X)
        plt.show()
Esempio n. 16
0
     model = Sequential()
     model.add(Dense(20, input_dim=398, init='normal', activation='relu'))
     model.add(Dense(10, init='normal', activation='relu'))
     model.add(Dense(1, init='normal'))
     model.compile(loss='mean_squared_error', optimizer = 'adam')
     return model

seed = 7
np.random.seed(seed)

scale = StandardScaler()
X_train = scale.fit_transform(train_new)
X_test = scale.fit_transform(test_new)

keras_label = label_df.as_matrix()
clf = KerasRegressor(build_fn=base_model, nb_epoch=1000, batch_size=5,verbose=0)
clf.fit(X_train,keras_label)

#make predictions and create the submission file 
kpred = clf.predict(X_test) 
kpred = np.exp(kpred)
pred_df = pd.DataFrame(kpred, index=test["Id"], columns=["SalePrice"]) 
pred_df.to_csv('keras1.csv', header=True, index_label='Id') 


#simple average
y_pred = (y_pred_xgb + y_pred_lasso) / 2
y_pred = np.exp(y_pred)
pred_df = pd.DataFrame(y_pred, index=test["Id"], columns=["SalePrice"])
pred_df.to_csv('ensemble1.csv', header=True, index_label='Id')
Esempio n. 17
0
def update_trainee_score(x):
    print('Updating Profile Scores ...')
    academic_score = []
    honesty = []
    emotionality = []
    extraversion = []
    agreeableness = []
    conscientiousness = []
    openness = []
    iq = []
    verbal_ability = []
    score = []
    course_score = []
    qa_score = []
    project_score = []

    for p in Trainee.objects.all().exclude(pk=x.pk):

        if p.academic_score is None or p.personality_c is None or p.personality_h is None or p.personality_a is None or p.personality_e is None or p.personality_o is None or p.personality_x is None or p.iq_score is None or p.course_score is None or p.project_score is None or p.verbal_ability_score is None or p.qa_score is None or p.score is None:
            continue
        academic_score.append(p.academic_score)
        honesty.append(p.personality_h)
        emotionality.append(p.personality_e)
        extraversion.append(p.personality_x)
        agreeableness.append(p.personality_a)
        conscientiousness.append(p.personality_c)
        openness.append(p.personality_o)
        iq.append(p.iq_score)
        verbal_ability.append(p.verbal_ability_score)
        score.append(p.score)
        project_score.append(p.project_score)
        course_score.append(p.course_score)
        qa_score.append(p.qa_score)

    if len(academic_score) == 0:
        x.score = 0.6
        x.save()

    else:

        d = {'1': academic_score, '2': honesty, '3': emotionality, '4': extraversion, '5': agreeableness,
             '6': conscientiousness, '7': openness, '8': iq, '9': verbal_ability, '10': project_score,
             '11': course_score,
             '12': qa_score, '13': score}
        df = pd.DataFrame(data=d)
        X = df.iloc[:, [0, 1, 2, 3, 5, 6, 7, 8, 9, 10, 11, 12]].values
        y = df.iloc[:, 4].values

        sc = StandardScaler()
        X = sc.fit_transform(X)
        estimator = KerasRegressor(build_fn=baseline_model, batch_size=50, epochs=100, verbose=0)
        estimator.fit(X, y)

        test_pred_temp = []
        test_pred = []
        test_pred_temp.append(x.academic_score)
        test_pred_temp.append(x.personality_h)
        test_pred_temp.append(x.personality_e)
        test_pred_temp.append(x.personality_x)
        test_pred_temp.append(x.personality_a)
        test_pred_temp.append(x.personality_c)
        test_pred_temp.append(x.personality_o)
        test_pred_temp.append(x.iq_score)
        test_pred_temp.append(x.verbal_ability_score)
        test_pred_temp.append(x.project_score)
        test_pred_temp.append(x.course_score)
        test_pred_temp.append(x.qa_score)

        test_pred.append(test_pred_temp)
        test_pred_1 = np.asarray(test_pred)
        new_prediction = estimator.predict(test_pred_1)

        y = np.insert(y, y.size, new_prediction)
        X = np.concatenate((X, test_pred_1), axis=0)

        y_new = []
        for x in y:
            y_new.append(x)

        tot = 0
        for i in y_new:
            tot = tot + i
        mn = tot / len(y_new)
        std = 0
        for i in y_new:
            std = std + (i - mn) * (i - mn)
        sd = math.sqrt(std / len(y_new))
        avg = mn

        y_final = []

        for i in range(len(y_new)):
            pp = (y_new[i] - avg) / sd * 0.1 + 0.8
            if pp >= 1.0:
                pp = 0.9999
            if pp <= 0.6:
                pp = 0.0001
            y_final.append(pp)

        ctr = 0
        for p in Trainee.objects.all():
            p.score = y_final[ctr]
            p.save()
            ctr += 1
Esempio n. 18
0
# fix random seed for reproducibility
np.random.seed(SEED)


# Multi-class Neural Network
def build_model():
    clf = Sequential()
    clf.add(Dense(features.shape[1], activation='relu'))
    clf.add(Dense(5, activation='relu'))
    clf.add(Dropout(0.3))
    clf.add(Dense(3, activation='relu'))
    clf.add(Dropout(0.3))
    clf.add(Dense(1, kernel_initializer='normal'))
    clf.compile(optimizer='adam', loss='mean_squared_error')
    return clf


# evaluate model with standardized dataset
estimators = []
estimators.append(('standardize', StandardScaler()))
estimators.append(('mlp',
                   KerasRegressor(build_fn=build_model,
                                  epochs=EPOCHS,
                                  batch_size=BATCH_SIZE,
                                  verbose=1)))
pipeline = Pipeline(estimators)
kfold = KFold(n_splits=10)
results = cross_val_score(pipeline, features, labels, cv=kfold)
print("Standardized: %.2f (%.2f) MSE" % (results.mean(), results.std()))
Esempio n. 19
0
    model.add(
        Dense(135,
              input_dim=270,
              kernel_initializer='normal',
              activation='elu'))
    model.add(Dense(1, kernel_initializer='normal'))

    # Compile model (configure for training)
    # optimizer 'adam' was chosen because it (on average) is the speediest
    model.compile(loss='mean_squared_error', optimizer='adam')
    return model


# evaluate model with standardized dataset
estimator = KerasRegressor(build_fn=deep_learning_model,
                           epochs=100,
                           batch_size=5,
                           verbose=0)

estimator.fit(X, Y)
y_keras_pred = estimator.predict(X_test)

create_submission(test_data, y_keras_pred, 3)

# ### Third Trial Summary -- Big improvement! Deep learning received a score on Kaggle of 0.207

# In[13]:

# Create build function for KerasRegressor


def deep_learning_model2():
Esempio n. 20
0
# X_FINAL, y_FINAL = X_scaled[remove_inds,:], y_scaled[remove_inds,:]
# X_scaled, y_scaled = X_scaled[keep_inds,:], y_scaled[keep_inds,:]
#--------------------

#Split data to 90% train & 10% unseen
X_train, X_unseen, y_train, y_unseen = train_test_split(X_scaled,
                                                        y_scaled,
                                                        test_size=0.10,
                                                        random_state=32)

kf = KFold(n_splits=4, shuffle=True)
fig, ax = plt.subplots(1, 1, figsize=(8, 8))
fig2, ax2 = plt.subplots(1, 1, figsize=(8, 8))
for train_index, test_index in kf.split(X_train, y=y_train):

    model = KerasRegressor(build_fn=baseline_model, epochs=100)
    history = model.fit(X_train[train_index],
                        y_train[train_index],
                        validation_data=(X_train[test_index],
                                         y_train[test_index]))

    ax.plot(history.history['loss'], label='loss')
    ax.plot(history.history['val_loss'], label='validation loss')
    ax.set_ylabel('Loss')
    ax.set_xlabel('Epoch')
    ax.legend()
    ax.minorticks_on()
    ax.grid(which='major', ls='-', color=[0.15, 0.15, 0.15], alpha=0.15)
    ax.grid(which='minor',
            ls=':',
            dashes=(1, 5, 1, 5),
Esempio n. 21
0
                                                            downcast='infer')


def baseline_model():
    # create model
    model = Sequential()
    model.add(
        Dense(5, input_dim=5, kernel_initializer='normal',
              activation='linear'))
    model.add(Dense(1, kernel_initializer='normal'))
    # Compile model
    model.compile(loss='mean_squared_error', optimizer='adam')
    return model


estimator = KerasRegressor(build_fn=baseline_model, epochs=1000, verbose=0)

t0 = time.clock()
estimator.fit(X, y)
t1 = time.clock()

prediction = estimator.predict(X)

train_error = np.abs(y - prediction)
mean_error = np.mean(train_error)
min_error = np.min(train_error)
max_error = np.max(train_error)
std_error = np.std(train_error)

#print('prediction :',prediction)
#print('train error :')
Esempio n. 22
0
    model.add(Activation('relu'))
    model.add(Dense(1))

    #compile model
    model.compile(loss='mean_squared_error',
                  optimizer=OPTIMIZER,
                  metrics=['mean_squared_error'])

    return model


# evaluate model with standardized dataset
np.random.seed(seed)

kreg = KerasRegressor(build_fn=baseline_model,
                      epochs=NB_EPOCH,
                      batch_size=BATCH_SIZE,
                      verbose=VERBOSE)
estimators = []
estimators.append(('standardize', StandardScaler()))
estimators.append(('mlp', kreg))
pipeline = Pipeline(estimators)
kfold = KFold(n_splits=2, random_state=seed)
results = cross_val_score(pipeline, X, Y, cv=kfold)
print("Error: %.4f (%.4f) MSE" % (results.mean(), results.std()))


#denormalize data
def denorm(min, max, input):
    z = (input * (max - min)) + min
    return z
Esempio n. 23
0
def get_model_from_name(model_name, training_params=None, is_hp_search=False):
    global keras_imported

    # For Keras
    epochs = 1000
    # if os.environ.get('is_test_suite', 0) == 'True' and model_name[:12] == 'DeepLearning':
    #     print('Heard that this is the test suite. Limiting number of epochs, which will increase training speed dramatically at the expense of model accuracy')
    #     epochs = 100

    all_model_params = {
        'LogisticRegression': {},
        'RandomForestClassifier': {
            'n_jobs': -2,
            'n_estimators': 30
        },
        'ExtraTreesClassifier': {
            'n_jobs': -1
        },
        'AdaBoostClassifier': {},
        'SGDClassifier': {
            'n_jobs': -1
        },
        'Perceptron': {
            'n_jobs': -1
        },
        'LinearSVC': {
            'dual': False
        },
        'LinearRegression': {
            'n_jobs': -2
        },
        'RandomForestRegressor': {
            'n_jobs': -2,
            'n_estimators': 30
        },
        'LinearSVR': {
            'dual': False,
            'loss': 'squared_epsilon_insensitive'
        },
        'ExtraTreesRegressor': {
            'n_jobs': -1
        },
        'MiniBatchKMeans': {
            'n_clusters': 8
        },
        'GradientBoostingRegressor': {
            'presort': False,
            'learning_rate': 0.1,
            'warm_start': True
        },
        'GradientBoostingClassifier': {
            'presort': False,
            'learning_rate': 0.1,
            'warm_start': True
        },
        'SGDRegressor': {
            'shuffle': False
        },
        'PassiveAggressiveRegressor': {
            'shuffle': False
        },
        'AdaBoostRegressor': {},
        'LGBMRegressor': {
            'n_estimators': 2000,
            'learning_rate': 0.15,
            'num_leaves': 8,
            'lambda_l2': 0.001,
            'histogram_pool_size': 16384
        },
        'LGBMClassifier': {
            'n_estimators': 2000,
            'learning_rate': 0.15,
            'num_leaves': 8,
            'lambda_l2': 0.001,
            'histogram_pool_size': 16384
        },
        'DeepLearningRegressor': {
            'epochs': epochs,
            'batch_size': 50,
            'verbose': 2
        },
        'DeepLearningClassifier': {
            'epochs': epochs,
            'batch_size': 50,
            'verbose': 2
        },
        'CatBoostRegressor': {},
        'CatBoostClassifier': {}
    }

    # if os.environ.get('is_test_suite', 0) == 'True':
    #     all_model_params

    model_params = all_model_params.get(model_name, None)
    if model_params is None:
        model_params = {}

    if is_hp_search == True:
        if model_name[:12] == 'DeepLearning':
            model_params['epochs'] = 50
        if model_name[:4] == 'LGBM':
            model_params['n_estimators'] = 500

    if training_params is not None:
        print('Now using the model training_params that you passed in:')
        print(training_params)
        # Overwrite our stock params with what the user passes in (i.e., if the user wants 10,000 trees, we will let them do it)
        model_params.update(training_params)
        print(
            'After overwriting our defaults with your values, here are the final params that will be used to initialize the model:'
        )
        print(model_params)

    model_map = {
        # Classifiers
        'LogisticRegression': LogisticRegression(),
        'RandomForestClassifier': RandomForestClassifier(),
        'RidgeClassifier': RidgeClassifier(),
        'GradientBoostingClassifier': GradientBoostingClassifier(),
        'ExtraTreesClassifier': ExtraTreesClassifier(),
        'AdaBoostClassifier': AdaBoostClassifier(),
        'LinearSVC': LinearSVC(),

        # Regressors
        'LinearRegression': LinearRegression(),
        'RandomForestRegressor': RandomForestRegressor(),
        'Ridge': Ridge(),
        'LinearSVR': LinearSVR(),
        'ExtraTreesRegressor': ExtraTreesRegressor(),
        'AdaBoostRegressor': AdaBoostRegressor(),
        'RANSACRegressor': RANSACRegressor(),
        'GradientBoostingRegressor': GradientBoostingRegressor(),
        'Lasso': Lasso(),
        'ElasticNet': ElasticNet(),
        'LassoLars': LassoLars(),
        'OrthogonalMatchingPursuit': OrthogonalMatchingPursuit(),
        'BayesianRidge': BayesianRidge(),
        'ARDRegression': ARDRegression(),

        # Clustering
        'MiniBatchKMeans': MiniBatchKMeans(),
    }

    try:
        model_map['SGDClassifier'] = SGDClassifier(max_iter=1000, tol=0.001)
        model_map['Perceptron'] = Perceptron(max_iter=1000, tol=0.001)
        model_map['PassiveAggressiveClassifier'] = PassiveAggressiveClassifier(
            max_iter=1000, tol=0.001)
        model_map['SGDRegressor'] = SGDRegressor(max_iter=1000, tol=0.001)
        model_map['PassiveAggressiveRegressor'] = PassiveAggressiveRegressor(
            max_iter=1000, tol=0.001)
    except TypeError:
        model_map['SGDClassifier'] = SGDClassifier()
        model_map['Perceptron'] = Perceptron()
        model_map['PassiveAggressiveClassifier'] = PassiveAggressiveClassifier(
        )
        model_map['SGDRegressor'] = SGDRegressor()
        model_map['PassiveAggressiveRegressor'] = PassiveAggressiveRegressor()

    if xgb_installed:
        model_map['XGBClassifier'] = XGBClassifier()
        model_map['XGBRegressor'] = XGBRegressor()

    if lgb_installed:
        model_map['LGBMRegressor'] = LGBMRegressor()
        model_map['LGBMClassifier'] = LGBMClassifier()

    if catboost_installed:
        model_map['CatBoostRegressor'] = CatBoostRegressor(
            calc_feature_importance=True)
        model_map['CatBoostClassifier'] = CatBoostClassifier(
            calc_feature_importance=True)

    if model_name[:12] == 'DeepLearning':
        if keras_imported == False:
            # Suppress some level of logs if TF is installed (but allow it to not be installed, and use Theano instead)
            try:
                os.environ['TF_CPP_MIN_VLOG_LEVEL'] = '3'
                os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
                from tensorflow import logging
                logging.set_verbosity(logging.INFO)
            except:
                pass

            global maxnorm
            global Dense, Dropout
            global LeakyReLU, PReLU, ThresholdedReLU, ELU
            global Sequential
            global keras_load_model
            global regularizers, optimizers
            global Activation
            global KerasRegressor, KerasClassifier

            from keras.constraints import maxnorm
            from keras.layers import Activation, Dense, Dropout
            from keras.layers.advanced_activations import LeakyReLU, PReLU, ThresholdedReLU, ELU
            from keras.models import Sequential
            from keras.models import load_model as keras_load_model
            from keras import regularizers, optimizers
            from keras.wrappers.scikit_learn import KerasRegressor, KerasClassifier
            keras_imported = True

        model_map['DeepLearningClassifier'] = KerasClassifier(
            build_fn=make_deep_learning_classifier)
        model_map['DeepLearningRegressor'] = KerasRegressor(
            build_fn=make_deep_learning_model)

    try:
        model_without_params = model_map[model_name]
    except KeyError as e:
        print(
            'It appears you are trying to use a library that is not available when we try to import it, or using a value for model_names that we do not recognize'
        )
        raise (e)

    if os.environ.get('is_test_suite', False) == 'True':
        if 'n_jobs' in model_params:
            model_params['n_jobs'] = 1
    model_with_params = model_without_params.set_params(**model_params)

    return model_with_params
Esempio n. 24
0
def LSTM_Model(n_feat):
    return KerasRegressor(build_fn=(lambda: LSTM_Model_gen(n_feat)),
                          verbose=0,  batch_size=8,
                          epochs=50)
    # create model
    model = Sequential()
    model.add(
        Dense(7, input_dim=7, kernel_initializer='normal', activation='relu'))
    model.add(Dense(4, kernel_initializer='normal'))
    # Compile model
    model.compile(loss='mean_absolute_error', optimizer='adam')
    return model


# fix random seed for reproducibility
seed = 7
np.random.seed(seed)
# evaluate model with standardized dataset
estimator = KerasRegressor(build_fn=baseline_model,
                           epochs=3000,
                           batch_size=8474,
                           verbose=0)

kfold = KFold(n_splits=10, random_state=seed)
results = cross_val_score(estimator, X, y, cv=kfold)
print("Results: %.2f (%.2f) MSE" % (results.mean(), results.std()))

# example of training a final regression model
from sklearn.linear_model import LinearRegression
from sklearn.datasets import make_regression
# generate regression dataset
X, y = make_regression(n_samples=100, n_features=2, noise=0.1)
# fit final model
model = LinearRegression()
model.fit(X, y)
# new instances where we do not know the answer
Esempio n. 26
0
batch_size = 1000
print('Epochs: ', epochs)
print('Batch size: ', batch_size)

keras_callbacks = [
    # ModelCheckpoint('/tmp/keras_checkpoints/model.{epoch:02d}-{val_loss:.2f}.hdf5', monitor='val_loss', save_best_only=True, verbose=2)
    # ModelCheckpoint('/tmp/keras_checkpoints/model.{epoch:02d}.hdf5', monitor='val_loss', save_best_only=True, verbose=0)
    # TensorBoard(log_dir='/tmp/keras_logs/model_3', histogram_freq=0, write_graph=True, write_images=True, embeddings_freq=0, embeddings_layer_names=None, embeddings_metadata=None),
    EarlyStopping(monitor='val_mean_absolute_error', patience=80, verbose=0) # 20
]
print(x_train.shape)

#keras.wrappers.scikit_learn.KerasRegressor

from keras.wrappers.scikit_learn import KerasRegressor
model = KerasRegressor(build_fn=make_model, epochs=epochs, batch_size=batch_size, verbose=True, callbacks=keras_callbacks)
model.fit(x_train, y_train)

'''
history = model.fit(x_train, y_train,
    batch_size=batch_size,
    epochs=epochs,
    shuffle=True,
    verbose=2,#0, # Change it to 2, if wished to observe execution
    #validation_data=(arr_x_valid, arr_y_valid),
    callbacks=keras_callbacks)
'''
y_pred = model.predict(x_test[:20,])

print (y_pred)
print (y_test[:20])
    regressor.add(
        Dense(units=nb_units,
              kernel_initializer='uniform',
              activation='relu',
              input_dim=325))
    regressor.add(
        Dense(units=nb_units, kernel_initializer='uniform', activation='relu'))
    regressor.add(
        Dense(units=1, kernel_initializer='uniform', activation='linear'))
    regressor.compile(optimizer='adam',
                      loss='mae',
                      metrics=['mse', 'mae', 'mape'])
    return regressor


grid_regressor = KerasRegressor(build_fn=build_regressor_for_grid)
parameters = {
    'batch_size': [30, 50, 100],
    'epochs': [10, 30],
    'regressor': ['adam'],
    'nb_units': [100, 150, 200]
}
grid_search = GridSearchCV(estimator=grid_regressor, param_grid=parameters)
grid_search = grid_search.fit(X_train, y_train)
best_parameters = grid_search.best_params_
best_accuracy = grid_search.best_score_


### Build one ANN
def build_regressor():
    regressor = Sequential()
Esempio n. 28
0
              activation='relu'))
    model.add(
        Dense(12, input_dim=12, kernel_initializer='normal',
              activation='relu'))
    model.add(
        Dense(12, input_dim=12, kernel_initializer='normal',
              activation='relu'))
    model.add(Dense(1, kernel_initializer='normal'))

    # compile model
    model.compile(loss='mean_squared_error', optimizer='adam')
    return model


# fix random seed for reproducibility
seed = 7
numpy.random.seed(seed)
x_train, x_test, y_train, y_test = train_test_split(X, Y)
# evaluate model with standardized dataset
estimator = KerasRegressor(build_fn=baseline_model,
                           nb_epoch=1,
                           batch_size=5,
                           verbose=0)

kfold = KFold(n_splits=30, random_state=seed)
results = cross_val_score(estimator, X, Y, cv=kfold)
print("Results: %.2f (%.2f) MSE" % (results.mean(), results.std()))
estimator.fit(x_train, y_train)
y_pred = estimator.predict(x_test)
# print(y_pred.shape())
fnc.errors(y_test, y_pred)
Esempio n. 29
0
def baseline_model():
    model = Sequential()
    model.add(
        Dense(output_dim=3, init='uniform', activation='relu', input_dim=5))
    model.add(Dense(output_dim=3, init='uniform', activation='relu'))
    model.add(Dense(output_dim=1, init='uniform'))
    model.compile(optimizer='adam',
                  loss='mean_squared_error',
                  metrics=['mean_squared_error'])
    return model


from keras.wrappers.scikit_learn import KerasRegressor
from sklearn.model_selection import KFold, cross_val_score
estimator = KerasRegressor(build_fn=baseline_model,
                           epochs=1000,
                           batch_size=5,
                           verbose=1)
kfold = KFold(n_splits=10, random_state=1)
results = cross_val_score(estimator,
                          x_train,
                          y_train[:, 0],
                          cv=kfold,
                          n_jobs=1)
estimator.fit(x_train, y_train[:, 0])

y_pred = estimator.predict(x_test)
y_pred

plt.scatter(x_train[:, 0], y_train[:, 0], color='red')
plt.plot(x_test[:, 0], y_pred, color='blue')
plt.xlabel('Product')
Esempio n. 30
0
            smoothing_window_length=5,
            smoothing_polyorder=3,
            reshape=True)
        #X_train, y_train, X_test, y_test = dataload.load_sin_data(seq_len, normalise_window=True)

        print('> Data Loaded. Compiling...')

        # Grid search parameters
        kernel_sizes = [5, 9]
        step_sizes = [2]
        single_branch = True
        stride = [3]
        lstm_units = [200, 400]
        branches = [3]

        model = KerasRegressor(build_fn=single_cnn_gru.build_model,
                               validation_split=0.20)

        cnn_layers = [3]
        filter_nums = [128]
        batch_size = [32]
        single_lstm = [True]
        cat_branches = [True]
        param_grid = {}
        if (single_branch):
            param_grid = dict(layers=[(1, seq_len)],
                              epochs=[epochs],
                              cnn_layers=cnn_layers,
                              lstm_units=lstm_units,
                              kernel_size=kernel_sizes,
                              stride_1=stride,
                              filter_num=filter_nums,