Beispiel #1
0
def assert_regression_predict_shape_correct(num_test):
    reg = KerasRegressor(
        build_fn=build_fn_reg, hidden_dims=hidden_dims,
        batch_size=batch_size, epochs=epochs)
    reg.fit(X_train, y_train, batch_size=batch_size, epochs=epochs)

    preds = reg.predict(X_test[:num_test], batch_size=batch_size)
    assert preds.shape == (num_test, )
def test_keras_regressor():
    model = Sequential()
    model.add(Dense(input_dim, input_shape=(input_dim,)))
    model.add(Activation('relu'))
    model.add(Dense(1))
    model.add(Activation('softmax'))

    sklearn_regressor = KerasRegressor(model, optimizer=optim, loss=loss,
                                       train_batch_size=batch_size,
                                       test_batch_size=batch_size,
                                       nb_epoch=nb_epoch)
    sklearn_regressor.fit(X_train_reg, y_train_reg)
    sklearn_regressor.score(X_test_reg, y_test_reg)
Beispiel #3
0
 def train_model(self, X_train, Y_train):
     print("training model %d_%d.pkl" % (self.frame_len, self.predict_dist))
     model_name = "../model/simple_reg_model/%d_%d.pkl" % (self.frame_len, self.predict_dist)
     self.estimator = RandomForestRegressor(random_state=0, n_estimators=100, n_jobs=-1)
     self.estimator.fit(X_train, Y_train)
     print("finish training model")
     joblib.dump(self.estimator, model_name)
Beispiel #4
0
 def train_model_keras(self, X_train, Y_train, date):
     print("training model %d_%d.h5" % (self.frame_len, self.predict_dist))
     model_name = "../model/reg_keras/%d_%d_%s.h5" % (self.frame_len, self.predict_dist, date)
     self.estimator = KerasRegressor(build_fn=baseline_model, nb_epoch=200, batch_size=64, verbose=1)
     self.estimator.fit(X_train, Y_train)
     print("finish training model")
     # saving model
     json_model = self.estimator.model.to_json()
     open(model_name.replace('h5', 'json'), 'w').write(json_model)
     self.estimator.model.save_weights(model_name, overwrite=True)
Beispiel #5
0
def run_regressor(model=LSTM2,
                  data=None,
                  data_file='df_dh.csv',
                  isload_model=True,
                  testonly=False):
    epochs = 8000
    path_to_dataset = data_file
    sequence_length = SEQ_LENGTH

    if data is None:

        X_train, y_train, X_test, y_test, X_val, Y_val = get_data(
            sequence_length=sequence_length,
            stateful=STATEFUL,
            path_to_dataset=data_file)
    else:
        X_train, y_train, X_test, y_test, X_val, Y_val = data

    if STATEFUL:
        X_test = X_test[:int(X_test.shape[0] / batch_size) * batch_size]
        y_test = y_test[:int(y_test.shape[0] / batch_size) * batch_size]

    estimator = KerasRegressor(build_fn=lambda x=X_train: model(x))

    # if testonly == True:
    #     # predicted = model.predict(X_test, verbose=1,batch_size=batch_size)
    #     prediction = estimator.predict(X_test)

    #     stat_metrics(X_test, y_test, prediction)
    #     draw_scatter(predicted_arr[0], y_test, X_test, X_train, y_train, data_file)
    #     return

    early_stopping = EarlyStopping(monitor='val_loss', verbose=1, patience=20)
    checkpoint = ModelCheckpoint("./lstm.h5",
                                 monitor='val_loss',
                                 verbose=1,
                                 save_best_only=True,
                                 save_weights_only=True)
    ################
    hist = estimator.fit(X_train,
                         y_train,
                         validation_data=(X_val, Y_val),
                         callbacks=[checkpoint],
                         epochs=epochs,
                         batch_size=batch_size,
                         verbose=1)

    # prediction = estimator.predict(X_test)
    score = mean_squared_error(y_test, estimator.predict(X_test))
    estimator_score = estimator.score(X_test, y_test)
    print(score)

    prediction = estimator.predict(X_test)
    # invert predictions
    prediction_trans = scaler.inverse_transform(prediction)
    X_test_trans = scaler.inverse_transform(X_test)
    y_test_trans = scaler.inverse_transform(y_test)
    X_train_trans = scaler.inverse_transform(X_train)
    y_train_trans = scaler.inverse_transform(y_train)

    print(prediction)
    print(X_test)
    print("##############################################")
    # predicted_arr = prediction.T.tolist()
    # print(predicted_arr)
    draw_scatter(prediction, y_test, X_test, X_train, y_train, data_file)
    his_figures(hist)
Beispiel #6
0
    testFrame = pandas.read_csv(f'2_{ds}.csv')
    testFrame = testFrame.drop(columns=['y'])
    testFrame = testFrame.drop(columns=['TestIndex'])
    testSet = testFrame.values

    # fix random seed for reproducibility
    seed = 16
    numpy.random.seed(seed)
    # evaluate model with standardized dataset
    estimators = []
    estimators.append(('standardize', StandardScaler()))
    estimators.append(('mlp',
                       KerasRegressor(build_fn=larger_model,
                                      epochs=25,
                                      batch_size=32,
                                      verbose=1,
                                      validation_split=0.05,
                                      shuffle=True)))
    pipeline = Pipeline(estimators)

    # estimator = KerasRegressor(build_fn=larger_model, epochs=100, batch_size=32, verbose=1)

    print('start')
    rmse_scorer = make_scorer(root_mean_squared_error, greater_is_better=False)
    kfold = KFold(n_splits=5, random_state=seed)
    results = cross_val_score(pipeline,
                              X,
                              Y,
                              cv=kfold,
                              verbose=2,
                              scoring="mean_squared_error")
Beispiel #7
0
def baseline_model():
    model = Sequential()
    model.add(
        Dense(output_dim=3, init='uniform', activation='relu', input_dim=5))
    model.add(Dense(output_dim=3, init='uniform', activation='relu'))
    model.add(Dense(output_dim=1, init='uniform'))
    model.compile(optimizer='adam',
                  loss='mean_squared_error',
                  metrics=['mean_squared_error'])
    return model


from keras.wrappers.scikit_learn import KerasRegressor
from sklearn.model_selection import KFold, cross_val_score
estimator = KerasRegressor(build_fn=baseline_model,
                           epochs=1000,
                           batch_size=5,
                           verbose=1)
kfold = KFold(n_splits=10, random_state=1)
results = cross_val_score(estimator,
                          x_train,
                          y_train[:, 0],
                          cv=kfold,
                          n_jobs=1)
estimator.fit(x_train, y_train[:, 0])

y_pred = estimator.predict(x_test)
y_pred

plt.scatter(x_train[:, 0], y_train[:, 0], color='red')
plt.plot(x_test[:, 0], y_pred, color='blue')
plt.xlabel('Product')
Beispiel #8
0
class NeuralNet:
    """
    Neural Network model implemented with tensorflow and evaluation methods
    implemented with sklearn.        
    """
    def __init__(self):
        self.model = Sequential()

    def add_first_layer(self, input_shape, num_features, activation_function):
        """
        Add the first layer to the neural net.
        
        num_features: an int specifying the number of features, or nodes
        activation function: string specifying the type of activation function
        """
        self.model.add(
            Dense(input_shape,
                  input_dim=num_features,
                  kernel_initializer='normal',
                  activation=activation_function))

    def add_layer(self, num_nodes, activation_function, regularization=None):
        """
        Add a hidden layer to the neural net.
        
        num_nodes: int specifying the number of hidden nodes
        activation_function: string specifying the type of activation function
        """
        if regularization != None:
            regularization = regularizers.l1(0.01)
        self.model.add(
            Dense(num_nodes,
                  kernel_initializer='normal',
                  activation=activation_function,
                  kernel_regularizer=regularization))

    def add_last_layer(self):
        """
        Add the last layer of the NN. We are doing regression, so there will 
        be only one output.
        """
        self.model.add(
            Dense(1, kernel_initializer='normal', activation='linear'))

    def compile(self, loss_function, optimizer, epochs, batch_size, verbosity):
        """
        Compile the model for training and build an estimator

        loss_function: string specifying the loss function that will be used
        optimizer: string specifying the optimization method
        epochs: int specifying the number of epochs
        batch_size: int specifying the batch size
        verbosity: int specify the level of textual feedback
        validation_split: percentage of data that will be used for validation                 
        """
        self.batch_size = batch_size
        self.model.compile(loss=loss_function,
                           optimizer=optimizer,
                           metrics=['mean_squared_error'])
        self.estimator = KerasRegressor(build_fn=self.get_model,
                                        epochs=epochs,
                                        batch_size=batch_size,
                                        verbose=verbosity)
        print(self.model.summary())

    def get_model(self):
        """
        Returns the NeuralNet model, because this is the only way it keras will work.
        """
        return self.model

    def train(self, X, y, epochs, batch_size, validation_split):
        """
        Train the neural network. 
        
        params:
            X: set of features
            y: set of targets
            validation_split: a float from 0 to 1 specifying the portion of the set to use as
                              validation
        
        returns:
            a history object containing the training and validation loss at each epoch
        """
        return self.estimator.fit(X,
                                  y,
                                  epochs=epochs,
                                  batch_size=batch_size,
                                  validation_split=validation_split,
                                  shuffle=True)

    def evaluate(self, X_train, y_train, X_test, y_test):
        """
        Evaluate the neural network.
        
        """
        self.test_predictions = self.model.predict(X_test)
        self.train_predictions = self.model.predict(X_train)

        print("Training MSE:",
              round(mean_squared_error(y_train, self.train_predictions), 4))
        print("Validation MSE:",
              round(mean_squared_error(y_test, self.test_predictions), 4))
        print("\nTraining r2:",
              round(r2_score(y_train, self.train_predictions), 4))
        print("Validation r2:",
              round(r2_score(y_test, self.test_predictions), 4))

        self.results = self.model.history.history
        plt.plot(list(range(1,
                            len(self.results['loss']) + 1)),
                 self.results['loss'][0:],
                 label='Train')
        plt.plot(list(range(1,
                            len(self.results['val_loss']) + 1)),
                 self.results['val_loss'][0:],
                 label='Test',
                 color='green')
        plt.legend()
        plt.title('Training and test loss at each epoch', fontsize=14)
        plt.show()
        """
from keras.layers import Dense
from keras.wrappers.scikit_learn import KerasRegressor
from sklearn.model_selection import cross_val_score, KFold

def ANNModel():
    model = Sequential()
    model.add(Dense(output_dim = 238, init = 'normal', activation = 'relu', input_dim = 238))
    model.add(Dense(output_dim = 100, init = 'normal', activation = 'relu'))
    model.add(Dense(output_dim = 1, init = 'normal'))
    model.compile(optimizer = 'adam', loss = 'mean_squared_logarithmic_error')
    return model

seed = 10
np.random.seed(seed)

ANNReg = KerasRegressor(build_fn = ANNModel, epochs = 100, batch_size = 5, verbose = 1)
kfold = KFold(n_splits=10, random_state=seed)
results = cross_val_score(ANNReg, X_train, Y_train, cv=kfold)
ANNReg.fit(X_train, Y_train)


#Prediction
RanRegPred = RanReg.predict(X_val)
GBRegPred = GBReg.predict(X_val)
XGBRegPred = XGBReg.predict(X_val)
ANNRegPred = ANNReg.predict(X_val).ravel()

#Checking the RMSLE
def rmsle(y, y0):
    assert len(y) == len(y0)
    return np.sqrt(np.mean(np.power(np.log1p(y)-np.log1p(y0), 2)))
Beispiel #10
0
# split into input (X) and output (Y) variables
X = dataset[:, 0:13]
Y = dataset[:, 13]


# define the model
def larger_model():
    # create model
    model = Sequential()
    model.add(Dense(13, input_dim=13, activation='relu'))
    model.add(Dense(6, activation='relu'))
    model.add(Dense(1))
    # Compile model
    model.compile(loss='mean_squared_error', optimizer='adam')
    return model


# evaluate model with standardized dataset
estimators = []
estimators.append(('standardize', StandardScaler()))
estimators.append(('mlp',
                   KerasRegressor(build_fn=larger_model,
                                  epochs=50,
                                  batch_size=5,
                                  verbose=0)))
pipeline = Pipeline(estimators)

kfold = KFold(n_splits=10)
results = cross_val_score(pipeline, X, Y, cv=kfold)
print("Larger: %.2f (%.2f) MSE" % (results.mean(), results.std()))
def keras_mlp1(train2, y, test2, v, z):
    cname = sys._getframe().f_code.co_name
    v[cname], z[cname] = 0, 0
    from keras import layers
    from keras import models
    from keras import optimizers
    from keras.wrappers.scikit_learn import KerasRegressor
    scores = list()
    scaler = preprocessing.RobustScaler()
    train3 = scaler.fit_transform(train2)
    test3 = scaler.transform(test2)
    input_dims = train3.shape[1]
    def build_model():
        input_ = layers.Input(shape=(input_dims,))
        model = layers.Dense(256, kernel_initializer='Orthogonal')(input_)
        #model = layers.BatchNormalization()(model)
        #model = layers.advanced_activations.PReLU()(model)
        model = layers.Activation('selu')(model)
        #model = layers.Dropout(0.7)(model)

        model = layers.Dense(64, kernel_initializer='Orthogonal')(model)
        #model = layers.BatchNormalization()(model)
        model = layers.Activation('selu')(model)
        #model = layers.advanced_activations.PReLU()(model)
        #model = layers.Dropout(0.9)(model)

        model = layers.Dense(16, kernel_initializer='Orthogonal')(model)
        #model = layers.BatchNormalization()(model)
        model = layers.Activation('selu')(model)
        #model = layers.advanced_activations.PReLU()(model)

        model = layers.Dense(1, activation='sigmoid')(model)

        model = models.Model(input_, model)
        model.compile(loss = 'binary_crossentropy', optimizer = optimizers.Nadam())
        #print(model.summary(line_length=120))
        return model
    np.random.seed(1234)
    est = KerasRegressor(build_fn=build_model,
                         nb_epoch=10000,
                         batch_size=256,
                         #verbose=2
                        )
    build_model().summary(line_length=120)
    model_path = '../data/working/' + cname + '_keras_model.h5'
    num_splits = 9
    ss = model_selection.ShuffleSplit(n_splits=num_splits, random_state=11, test_size=1/num_splits)
    for n, (itrain, ival) in enumerate(ss.split(train3, y)):
        xtrain, xval = train3[itrain], train3[ival]
        ytrain, yval = y[itrain], y[ival]
        est.fit(
                xtrain, ytrain,
                epochs=10000,
                validation_data=(xval, yval),
                verbose=0,
                callbacks=build_keras_fit_callbacks(model_path),
                shuffle=True
            )
        est.model.load_weights(model_path)
        p = est.predict(xval)
        v.loc[ival, cname] += pconvert(p)
        score = metrics.log_loss(y[ival], p)
        print(cname, 'fold %d: '%(n+1), score, now())
        scores.append(score)
        z[cname] += pconvert(est.predict(test3))
    os.remove(model_path)

    cv=np.array(scores)
    print(cv, cv.mean(), cv.std())
    z[cname] /= num_splits
Beispiel #12
0
                                                            downcast='infer')


def baseline_model():
    # create model
    model = Sequential()
    model.add(
        Dense(5, input_dim=5, kernel_initializer='normal',
              activation='linear'))
    model.add(Dense(1, kernel_initializer='normal'))
    # Compile model
    model.compile(loss='mean_squared_error', optimizer='adam')
    return model


estimator = KerasRegressor(build_fn=baseline_model, epochs=1000, verbose=0)

t0 = time.clock()
estimator.fit(X, y)
t1 = time.clock()

prediction = estimator.predict(X)

train_error = np.abs(y - prediction)
mean_error = np.mean(train_error)
min_error = np.min(train_error)
max_error = np.max(train_error)
std_error = np.std(train_error)

#print('prediction :',prediction)
#print('train error :')
Beispiel #13
0
# X_FINAL, y_FINAL = X_scaled[remove_inds,:], y_scaled[remove_inds,:]
# X_scaled, y_scaled = X_scaled[keep_inds,:], y_scaled[keep_inds,:]
#--------------------

#Split data to 90% train & 10% unseen
X_train, X_unseen, y_train, y_unseen = train_test_split(X_scaled,
                                                        y_scaled,
                                                        test_size=0.10,
                                                        random_state=32)

kf = KFold(n_splits=4, shuffle=True)
fig, ax = plt.subplots(1, 1, figsize=(8, 8))
fig2, ax2 = plt.subplots(1, 1, figsize=(8, 8))
for train_index, test_index in kf.split(X_train, y=y_train):

    model = KerasRegressor(build_fn=baseline_model, epochs=100)
    history = model.fit(X_train[train_index],
                        y_train[train_index],
                        validation_data=(X_train[test_index],
                                         y_train[test_index]))

    ax.plot(history.history['loss'], label='loss')
    ax.plot(history.history['val_loss'], label='validation loss')
    ax.set_ylabel('Loss')
    ax.set_xlabel('Epoch')
    ax.legend()
    ax.minorticks_on()
    ax.grid(which='major', ls='-', color=[0.15, 0.15, 0.15], alpha=0.15)
    ax.grid(which='minor',
            ls=':',
            dashes=(1, 5, 1, 5),
Beispiel #14
0
# fix random seed for reproducibility
np.random.seed(SEED)


# Multi-class Neural Network
def build_model():
    clf = Sequential()
    clf.add(Dense(features.shape[1], activation='relu'))
    clf.add(Dense(5, activation='relu'))
    clf.add(Dropout(0.3))
    clf.add(Dense(3, activation='relu'))
    clf.add(Dropout(0.3))
    clf.add(Dense(1, kernel_initializer='normal'))
    clf.compile(optimizer='adam', loss='mean_squared_error')
    return clf


# evaluate model with standardized dataset
estimators = []
estimators.append(('standardize', StandardScaler()))
estimators.append(('mlp',
                   KerasRegressor(build_fn=build_model,
                                  epochs=EPOCHS,
                                  batch_size=BATCH_SIZE,
                                  verbose=1)))
pipeline = Pipeline(estimators)
kfold = KFold(n_splits=10)
results = cross_val_score(pipeline, features, labels, cv=kfold)
print("Standardized: %.2f (%.2f) MSE" % (results.mean(), results.std()))
     model = Sequential()
     model.add(Dense(20, input_dim=398, init='normal', activation='relu'))
     model.add(Dense(10, init='normal', activation='relu'))
     model.add(Dense(1, init='normal'))
     model.compile(loss='mean_squared_error', optimizer = 'adam')
     return model

seed = 7
np.random.seed(seed)

scale = StandardScaler()
X_train = scale.fit_transform(train_new)
X_test = scale.fit_transform(test_new)

keras_label = label_df.as_matrix()
clf = KerasRegressor(build_fn=base_model, nb_epoch=1000, batch_size=5,verbose=0)
clf.fit(X_train,keras_label)

#make predictions and create the submission file 
kpred = clf.predict(X_test) 
kpred = np.exp(kpred)
pred_df = pd.DataFrame(kpred, index=test["Id"], columns=["SalePrice"]) 
pred_df.to_csv('keras1.csv', header=True, index_label='Id') 


#simple average
y_pred = (y_pred_xgb + y_pred_lasso) / 2
y_pred = np.exp(y_pred)
pred_df = pd.DataFrame(y_pred, index=test["Id"], columns=["SalePrice"])
pred_df.to_csv('ensemble1.csv', header=True, index_label='Id')
Beispiel #16
0
    model = Sequential()
    # The term "Dense" means layer. When we add extra "Dense()" to the model
    # that means we are adding extra layers to the neural network model.
    model.add(Dense(NN1, input_dim=ZZZ, activation='relu'))
    model.add(Dense(NN2, activation='relu'))
    model.add(Dense(1, activation='sigmoid'))
    # Compile model
    model.compile(loss='mean_squared_error', optimizer='adam')
    model.summary()
    dot_img_file = 'model.png'
    tf.keras.utils.plot_model(model, to_file=dot_img_file, show_shapes=True)
    return model


estimator = KerasRegressor(build_fn=baseline_model,
                           epochs=10,
                           batch_size=256,
                           verbose=0)
"""""" """""" """
Q12: Make predictions for entire dataset
""" """""" """"""
estimator.fit(df_normalized, schirmer_norm)
predicted_values = estimator.predict(df_normalized)
"""""" """""" """
Q13: Compute correlation coefficient and mean absolute error (MAE)
""" """""" """"""
model_corr, _ = pearsonr(schirmer_norm, predicted_values)
model_mae = mean_absolute_error(schirmer_norm, predicted_values)

print('Pearsons correlation: %.3f' % model_corr)
print('Mean absolute error: %.3f' % model_mae)
freq = np.absolute(np.fft.fft(m_fc[:,-32:], axis=1)[:,0:16])

from keras.wrappers.scikit_learn import KerasRegressor
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import KFold
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline

def whole_fc_m_ann_ensemble():
    model = Sequential()
    model.add(Dense(units=T*3+16, kernel_initializer='normal', activation='relu', input_dim=T*3+16))
    model.add(Dense(units=T*3, kernel_initializer='normal', activation='relu'))
    model.add(Dense(units=T, kernel_initializer='normal'))
    model.compile(loss='mean_squared_error', optimizer='adam')
    return model
X = np.hstack((h[8:], s[8:], m_fc[:-8], freq[:-8]))
Y = m_fc[8:]
seed = 7
np.random.seed(seed)
estimator = KerasRegressor(build_fn=whole_fc_m_ann_ensemble, epochs=14, batch_size=10, verbose=0)
kfold = KFold(n_splits=10, random_state=seed)
results = cross_val_score(estimator, X, Y, cv=kfold)
print("Results: %.2f (%.2f) MSE" % (results.mean(), results.std()))

estimator.fit(X, Y)
print(estimator.model.summary())

predicted = estimator.predict(X)
plot_summary(h[8:], s[8:], ens[8:], m_fc[8:], predicted, 50)
plt.savefig('pics\\keras_clean_test_out.png')
def baseline_model():
    # 12 nodes -> 6 nodes -> 1 node
    # through trial and error by adding nodes, removing layers, and
    # changing epochs based on where I see the loss asymptote
    model = Sequential()
    model.add(
        Dense(12, input_dim=12, kernel_initializer='normal',
              activation='relu'))
    model.add(
        Dense(6, input_dim=12, kernel_initializer='normal', activation='relu'))
    model.add(Dense(1, kernel_initializer='normal'))
    model.compile(loss='mean_squared_error', optimizer='adam')
    return model


estimator = KerasRegressor(build_fn=baseline_model, epochs=28)
estimator.fit(X_train, y_train)

# In[30]:

# create a dataframe containing the results from all the methods
df_test = (X_test.join(df_sub_wtb[['normand', 'stull', 'half',
                                   'third']]).assign(
                                       **{
                                           'lreg': visualizer.predict(X_test),
                                           'keras': estimator.predict(X_test)
                                       }))
df_test['time'] = pd.to_datetime(df_test['year'].astype(str) +
                                 df_test['dayofyear'].astype(str) +
                                 df_test['hour'].astype(str),
                                 format='%Y%j%H')
Beispiel #19
0
    model.add(Activation('relu'))
    model.add(Dense(1))

    #compile model
    model.compile(loss='mean_squared_error',
                  optimizer=OPTIMIZER,
                  metrics=['mean_squared_error'])

    return model


# evaluate model with standardized dataset
np.random.seed(seed)

kreg = KerasRegressor(build_fn=baseline_model,
                      epochs=NB_EPOCH,
                      batch_size=BATCH_SIZE,
                      verbose=VERBOSE)
estimators = []
estimators.append(('standardize', StandardScaler()))
estimators.append(('mlp', kreg))
pipeline = Pipeline(estimators)
kfold = KFold(n_splits=2, random_state=seed)
results = cross_val_score(pipeline, X, Y, cv=kfold)
print("Error: %.4f (%.4f) MSE" % (results.mean(), results.std()))


#denormalize data
def denorm(min, max, input):
    z = (input * (max - min)) + min
    return z
Beispiel #20
0
class SimpleModel:
    def __init__(self):
        self.data = dict()
        self.frame_len = 30
        self.predict_dist = 5
        self.scaler = dict()

    def load_all_data(self, begin_date, end_date):
        con = sqlite3.connect('../data/stock.db')
        code_list = con.execute("SELECT name FROM sqlite_master WHERE type='table'").fetchall()
        X_data_list, Y_data_list, DATA_list = [0]*10, [0]*10, [0]*10
        idx = 0
        split = int(len(code_list) / 9)
        bar = ProgressBar(len(code_list), max_width=80)
        for code in code_list:
            data = self.load_data(code[0], begin_date, end_date)
            data = data.dropna()
            X, Y = self.make_x_y(data, code[0])
            if len(X) <= 1: continue
            code_array = [code[0]] * len(X)
            assert len(X) == len(data.loc[29:len(data)-6, '일자'])
            if idx%split == 0:
                X_data_list[int(idx/split)] = list(X)
                Y_data_list[int(idx/split)] = list(Y)
                DATA_list[int(idx/split)] = np.array([data.loc[29:len(data)-6, '일자'].values.tolist(), code_array, data.loc[29:len(data)-6, '현재가'], data.loc[34:len(data), '현재가']]).T.tolist()
            else:
                X_data_list[int(idx/split)].extend(X)
                Y_data_list[int(idx/split)].extend(Y)
                DATA_list[int(idx/split)].extend(np.array([data.loc[29:len(data)-6, '일자'].values.tolist(), code_array, data.loc[29:len(data)-6, '현재가'], data.loc[34:len(data), '현재가']]).T.tolist())
            bar.numerator += 1
            print("%s | %d" % (bar, len(X_data_list[int(idx/split)])), end='\r')
            sys.stdout.flush()
            idx += 1
        print("%s" % bar)

        print("Merge splited data")
        bar = ProgressBar(10, max_width=80)
        for i in range(10):
            if type(X_data_list[i]) == type(1):
                continue
            if i == 0:
                X_data = X_data_list[i]
                Y_data = Y_data_list[i]
                DATA = DATA_list[i]
            else:
                X_data.extend(X_data_list[i])
                Y_data.extend(Y_data_list[i])
                DATA.extend(DATA_list[i])
            bar.numerator = i+1
            print("%s | %d" % (bar, len(DATA)), end='\r')
            sys.stdout.flush()
        print("%s | %d" % (bar, len(DATA)))
        return np.array(X_data), np.array(Y_data), np.array(DATA)

    def load_data(self, code, begin_date, end_date):
        con = sqlite3.connect('../data/stock.db')
        df = pd.read_sql("SELECT * from '%s'" % code, con, index_col='일자').sort_index()
        data = df.loc[df.index > str(begin_date)]
        data = data.loc[data.index < str(end_date)]
        data = data.reset_index()
        return data

    def make_x_y(self, data, code):
        data_x = []
        data_y = []
        for col in data.columns:
            try:
                data.loc[:, col] = data.loc[:, col].str.replace('--', '-')
                data.loc[:, col] = data.loc[:, col].str.replace('+', '')
            except AttributeError as e:
                pass
                print(e)
        data.loc[:, 'month'] = data.loc[:, '일자'].str[4:6]
        data = data.drop(['일자', '체결강도'], axis=1)

        # normalization
        data = np.array(data)
        if len(data) <= 0 :
            return np.array([]), np.array([])

        if code not in self.scaler:
            self.scaler[code] = StandardScaler()
            data = self.scaler[code].fit_transform(data)
        elif code not in self.scaler:
            return np.array([]), np.array([])
        else:
            data = self.scaler[code].transform(data)

        for i in range(self.frame_len, len(data)-self.predict_dist+1):
            data_x.extend(np.array(data[i-self.frame_len:i, :]))
            data_y.append(data[i+self.predict_dist-1][0])
        np_x = np.array(data_x).reshape(-1, 23*30)
        np_y = np.array(data_y)
        return np_x, np_y

    def train_model(self, X_train, Y_train):
        print("training model %d_%d.pkl" % (self.frame_len, self.predict_dist))
        model_name = "../model/simple_reg_model/%d_%d.pkl" % (self.frame_len, self.predict_dist)
        self.estimator = RandomForestRegressor(random_state=0, n_estimators=100, n_jobs=-1)
        self.estimator.fit(X_train, Y_train)
        print("finish training model")
        joblib.dump(self.estimator, model_name)

    def set_config(self):
        #Tensorflow GPU optimization
        config = tf.ConfigProto()
        config.gpu_options.allow_growth = True
        sess = tf.Session(config=config)
        K.set_session(sess)

    def train_model_keras(self, X_train, Y_train, date):
        print("training model %d_%d.h5" % (self.frame_len, self.predict_dist))
        model_name = "../model/reg_keras/%d_%d_%s.h5" % (self.frame_len, self.predict_dist, date)
        self.estimator = KerasRegressor(build_fn=baseline_model, nb_epoch=200, batch_size=64, verbose=1)
        self.estimator.fit(X_train, Y_train)
        print("finish training model")
        # saving model
        json_model = self.estimator.model.to_json()
        open(model_name.replace('h5', 'json'), 'w').write(json_model)
        self.estimator.model.save_weights(model_name, overwrite=True)

    def evaluate_model(self, X_test, Y_test, orig_data, s_date):
        print("Evaluate model %d_%d.pkl" % (self.frame_len, self.predict_dist))
        if MODEL_TYPE == 'random_forest':
            model_name = "../model/simple_reg_model/%d_%d.pkl" % (self.frame_len, self.predict_dist)
            self.estimator = joblib.load(model_name)
        elif MODEL_TYPE == 'keras':
            model_name = "../model/reg_keras/%d_%d_%s.h5" % (self.frame_len, self.predict_dist, s_date)
            self.estimator = model_from_json(open(model_name.replace('h5', 'json')).read())
            self.estimator.load_weights(model_name)
        pred = self.estimator.predict(X_test)
        res = 0
        score = 0
        assert(len(pred) == len(Y_test))
        pred = np.array(pred).reshape(-1)
        Y_test = np.array(Y_test).reshape(-1)
        for i in range(len(pred)):
            score += (float(pred[i]) - float(Y_test[i]))*(float(pred[i]) - float(Y_test[i]))
        score = np.sqrt(score/len(pred))
        print("score: %f" % score)
        for idx in range(len(pred)):
            buy_price = int(orig_data[idx][2])
            future_price = int(orig_data[idx][3])
            date = int(orig_data[idx][0])
            pred_transform = self.scaler[orig_data[idx][1]].inverse_transform([pred[idx]] + [0]*22)[0]
            cur_transform = self.scaler[orig_data[idx][1]].inverse_transform([X_test[idx][23*29]] + [0]*22)[0]
            if pred_transform > buy_price * 1.01:
                res += (future_price - buy_price*1.005)*(100000/buy_price+1)
                print("[%s] buy: %6d, sell: %6d, earn: %6d" % (str(date), buy_price, future_price, (future_price - buy_price*1.005)*(100000/buy_price)))
        print("result: %d" % res)

    def load_current_data(self):
        con = sqlite3.connect('../data/stock.db')
        code_list = con.execute("SELECT name FROM sqlite_master WHERE type='table'").fetchall()
        X_test = []
        DATA = []
        code_list = list(map(lambda x: x[0], code_list))
        first = True
        bar = ProgressBar(len(code_list), max_width=80)
        for code in code_list:
            bar.numerator += 1
            print("%s | %d" % (bar, len(X_test)), end='\r')
            sys.stdout.flush()
            df = pd.read_sql("SELECT * from '%s'" % code, con, index_col='일자').sort_index()
            data = df.iloc[-30:,:]
            data = data.reset_index()
            for col in data.columns:
                try:
                    data.loc[:, col] = data.loc[:, col].str.replace('--', '-')
                    data.loc[:, col] = data.loc[:, col].str.replace('+', '')
                except AttributeError as e:
                    pass
            data.loc[:, 'month'] = data.loc[:, '일자'].str[4:6]
            data = data.drop(['일자', '체결강도'], axis=1)
            if len(data) < 30:
                code_list.remove(code)
                continue
            DATA.append(int(data.loc[len(data)-1, '현재가']))
            try:
                data = self.scaler[code].transform(np.array(data))
            except KeyError:
                code_list.remove(code)
                continue
            X_test.extend(np.array(data))
        X_test = np.array(X_test).reshape(-1, 23*30) 
        return X_test, code_list, DATA

    def make_buy_list(self, X_test, code_list, orig_data, s_date):
        BUY_UNIT = 10000
        print("make buy_list")
        if MODEL_TYPE == 'random_forest':
            model_name = "../model/simple_reg_model/%d_%d.pkl" % (self.frame_len, self.predict_dist)
            self.estimator = joblib.load(model_name)
        elif MODEL_TYPE == 'keras':
            model_name = "../model/reg_keras/%d_%d_%s.h5" % (self.frame_len, self.predict_dist, s_date)
            self.estimator = model_from_json(open(model_name.replace('h5', 'json')).read())
            self.estimator.load_weights(model_name)
        pred = self.estimator.predict(X_test)
        res = 0
        score = 0
        pred = np.array(pred).reshape(-1)

        # load code list from account
        set_account = set([])
        with open('../data/stocks_in_account.txt') as f_stocks:
            for line in f_stocks.readlines():
                data = line.split(',')
                set_account.add(data[6].replace('A', ''))

        buy_item = ["매수", "", "시장가", 0, 0, "매수전"]  # 매수/매도, code, 시장가/현재가, qty, price, "주문전/주문완료"
        with open("../data/buy_list.txt", "wt") as f_buy:
            for idx in range(len(pred)):
                real_buy_price = int(orig_data[idx])
                buy_price = float(X_test[idx][23*29])
                try:
                    pred_transform = self.scaler[code_list[idx]].inverse_transform([pred[idx]] + [0]*22)[0]
                except KeyError:
                    continue
                print("[BUY PREDICT] code: %s, cur: %5d, predict: %5d" % (code_list[idx], real_buy_price, pred_transform))
                if pred_transform > real_buy_price * 3 and code_list[idx] not in set_account:
                    print("add to buy_list %s" % code_list[idx])
                    buy_item[1] = code_list[idx]
                    buy_item[3] = int(BUY_UNIT / real_buy_price) + 1
                    for item in buy_item:
                        f_buy.write("%s;"%str(item))
                    f_buy.write('\n')

    def load_data_in_account(self):
        # load code list from account
        DATA = []
        with open('../data/stocks_in_account.txt') as f_stocks:
            for line in f_stocks.readlines():
                data = line.split(',')
                DATA.append([data[6].replace('A', ''), data[1], data[0]])

        # load data in DATA
        con = sqlite3.connect('../data/stock.db')
        X_test = []
        idx_rm = []
        first = True
        bar = ProgressBar(len(DATA), max_width=80)
        for idx, code in enumerate(DATA):
            bar.numerator += 1
            print("%s | %d" % (bar, len(X_test)), end='\r')
            sys.stdout.flush()

            try:
                df = pd.read_sql("SELECT * from '%s'" % code[0], con, index_col='일자').sort_index()
            except pd.io.sql.DatabaseError as e:
                print(e)
                idx_rm.append(idx)
                continue
            data = df.iloc[-30:,:]
            data = data.reset_index()
            for col in data.columns:
                try:
                    data.loc[:, col] = data.loc[:, col].str.replace('--', '-')
                    data.loc[:, col] = data.loc[:, col].str.replace('+', '')
                except AttributeError as e:
                    pass
                    print(e)
            data.loc[:, 'month'] = data.loc[:, '일자'].str[4:6]
            DATA[idx].append(int(data.loc[len(data)-1, '현재가']))
            data = data.drop(['일자', '체결강도'], axis=1)
            if len(data) < 30:
                idx_rm.append(idx)
                continue
            try:
                data = self.scaler[code[0]].transform(np.array(data))
            except KeyError:
                idx_rm.append(idx)
                continue
            X_test.extend(np.array(data))
        for i in idx_rm[-1:0:-1]:
            del DATA[i]
        X_test = np.array(X_test).reshape(-1, 23*30) 
        return X_test, DATA

    def make_sell_list(self, X_test, DATA, s_date):
        print("make sell_list")
        if MODEL_TYPE == 'random_forest':
            model_name = "../model/simple_reg_model/%d_%d.pkl" % (self.frame_len, self.predict_dist)
            self.estimator = joblib.load(model_name)
        elif MODEL_TYPE == 'keras':
            model_name = "../model/reg_keras/%d_%d_%s.h5" % (self.frame_len, self.predict_dist, s_date)
            self.estimator = model_from_json(open(model_name.replace('h5', 'json')).read())
            self.estimator.load_weights(model_name)
        pred = self.estimator.predict(X_test)
        res = 0
        score = 0
        pred = np.array(pred).reshape(-1)

        sell_item = ["매도", "", "시장가", 0, 0, "매도전"]  # 매수/매도, code, 시장가/현재가, qty, price, "주문전/주문완료"
        with open("../data/sell_list.txt", "wt") as f_sell:
            for idx in range(len(pred)):
                current_price = float(X_test[idx][23*29])
                current_real_price = int(DATA[idx][3])
                name = DATA[idx][2]
                print("[SELL PREDICT] name: %s, code: %s, cur: %f(%d), predict: %f" % (name, DATA[idx][0], current_price, current_real_price, pred[idx]))
                if pred[idx] < current_price:
                    print("add to sell_list %s" % name)
                    sell_item[1] = DATA[idx][0]
                    sell_item[3] = DATA[idx][1]
                    for item in sell_item:
                        f_sell.write("%s;"%str(item))
                    f_sell.write('\n')
    def save_scaler(self, s_date):
        model_name = "../model/scaler_%s.pkl" % s_date
        joblib.dump(self.scaler, model_name)

    def load_scaler(self, s_date):
        model_name = "../model/scaler_%s.pkl" % s_date
        self.scaler = joblib.load(model_name)
Beispiel #21
0
    def baseline_model():
        # create model
        model = Sequential()
        model.add(
            Dense(20,
                  input_dim=train_x.shape[1],
                  kernel_initializer='uniform',
                  activation='softplus'))
        model.add(Dense(1, kernel_initializer='uniform', activation='relu'))
        # Compile model
        model.compile(loss='mse', optimizer='Nadam', metrics=['mse'])
        # model.compile(loss='mean_squared_error', optimizer='adam')
        return model

    estimator = KerasRegressor(build_fn=baseline_model,
                               verbose=1,
                               epochs=5,
                               batch_size=55000)

    estimator.fit(train_x, train_y)
    pred_test = estimator.predict(test_x)
    preds.append(pred_test)

    run = time.perf_counter() - start
    print('{} runs for {:.2f} seconds.'.format('lightgbm', run))

    cur_month_run_total = time.perf_counter() - start_cur_month

    print('Total running time was {:.2f} minutes.'.format(cur_month_run_total /
                                                          60))
    print('-' * 50)
Beispiel #22
0
def LSTM_Model(n_feat):
    return KerasRegressor(build_fn=(lambda: LSTM_Model_gen(n_feat)),
                          verbose=0,  batch_size=8,
                          epochs=50)
Beispiel #23
0
def get_model_from_name(model_name, training_params=None, is_hp_search=False):
    global keras_imported

    # For Keras
    epochs = 1000
    # if os.environ.get('is_test_suite', 0) == 'True' and model_name[:12] == 'DeepLearning':
    #     print('Heard that this is the test suite. Limiting number of epochs, which will increase training speed dramatically at the expense of model accuracy')
    #     epochs = 100

    all_model_params = {
        'LogisticRegression': {},
        'RandomForestClassifier': {
            'n_jobs': -2,
            'n_estimators': 30
        },
        'ExtraTreesClassifier': {
            'n_jobs': -1
        },
        'AdaBoostClassifier': {},
        'SGDClassifier': {
            'n_jobs': -1
        },
        'Perceptron': {
            'n_jobs': -1
        },
        'LinearSVC': {
            'dual': False
        },
        'LinearRegression': {
            'n_jobs': -2
        },
        'RandomForestRegressor': {
            'n_jobs': -2,
            'n_estimators': 30
        },
        'LinearSVR': {
            'dual': False,
            'loss': 'squared_epsilon_insensitive'
        },
        'ExtraTreesRegressor': {
            'n_jobs': -1
        },
        'MiniBatchKMeans': {
            'n_clusters': 8
        },
        'GradientBoostingRegressor': {
            'presort': False,
            'learning_rate': 0.1,
            'warm_start': True
        },
        'GradientBoostingClassifier': {
            'presort': False,
            'learning_rate': 0.1,
            'warm_start': True
        },
        'SGDRegressor': {
            'shuffle': False
        },
        'PassiveAggressiveRegressor': {
            'shuffle': False
        },
        'AdaBoostRegressor': {},
        'LGBMRegressor': {
            'n_estimators': 2000,
            'learning_rate': 0.15,
            'num_leaves': 8,
            'lambda_l2': 0.001,
            'histogram_pool_size': 16384
        },
        'LGBMClassifier': {
            'n_estimators': 2000,
            'learning_rate': 0.15,
            'num_leaves': 8,
            'lambda_l2': 0.001,
            'histogram_pool_size': 16384
        },
        'DeepLearningRegressor': {
            'epochs': epochs,
            'batch_size': 50,
            'verbose': 2
        },
        'DeepLearningClassifier': {
            'epochs': epochs,
            'batch_size': 50,
            'verbose': 2
        },
        'CatBoostRegressor': {},
        'CatBoostClassifier': {}
    }

    # if os.environ.get('is_test_suite', 0) == 'True':
    #     all_model_params

    model_params = all_model_params.get(model_name, None)
    if model_params is None:
        model_params = {}

    if is_hp_search == True:
        if model_name[:12] == 'DeepLearning':
            model_params['epochs'] = 50
        if model_name[:4] == 'LGBM':
            model_params['n_estimators'] = 500

    if training_params is not None:
        print('Now using the model training_params that you passed in:')
        print(training_params)
        # Overwrite our stock params with what the user passes in (i.e., if the user wants 10,000 trees, we will let them do it)
        model_params.update(training_params)
        print(
            'After overwriting our defaults with your values, here are the final params that will be used to initialize the model:'
        )
        print(model_params)

    model_map = {
        # Classifiers
        'LogisticRegression': LogisticRegression(),
        'RandomForestClassifier': RandomForestClassifier(),
        'RidgeClassifier': RidgeClassifier(),
        'GradientBoostingClassifier': GradientBoostingClassifier(),
        'ExtraTreesClassifier': ExtraTreesClassifier(),
        'AdaBoostClassifier': AdaBoostClassifier(),
        'LinearSVC': LinearSVC(),

        # Regressors
        'LinearRegression': LinearRegression(),
        'RandomForestRegressor': RandomForestRegressor(),
        'Ridge': Ridge(),
        'LinearSVR': LinearSVR(),
        'ExtraTreesRegressor': ExtraTreesRegressor(),
        'AdaBoostRegressor': AdaBoostRegressor(),
        'RANSACRegressor': RANSACRegressor(),
        'GradientBoostingRegressor': GradientBoostingRegressor(),
        'Lasso': Lasso(),
        'ElasticNet': ElasticNet(),
        'LassoLars': LassoLars(),
        'OrthogonalMatchingPursuit': OrthogonalMatchingPursuit(),
        'BayesianRidge': BayesianRidge(),
        'ARDRegression': ARDRegression(),

        # Clustering
        'MiniBatchKMeans': MiniBatchKMeans(),
    }

    try:
        model_map['SGDClassifier'] = SGDClassifier(max_iter=1000, tol=0.001)
        model_map['Perceptron'] = Perceptron(max_iter=1000, tol=0.001)
        model_map['PassiveAggressiveClassifier'] = PassiveAggressiveClassifier(
            max_iter=1000, tol=0.001)
        model_map['SGDRegressor'] = SGDRegressor(max_iter=1000, tol=0.001)
        model_map['PassiveAggressiveRegressor'] = PassiveAggressiveRegressor(
            max_iter=1000, tol=0.001)
    except TypeError:
        model_map['SGDClassifier'] = SGDClassifier()
        model_map['Perceptron'] = Perceptron()
        model_map['PassiveAggressiveClassifier'] = PassiveAggressiveClassifier(
        )
        model_map['SGDRegressor'] = SGDRegressor()
        model_map['PassiveAggressiveRegressor'] = PassiveAggressiveRegressor()

    if xgb_installed:
        model_map['XGBClassifier'] = XGBClassifier()
        model_map['XGBRegressor'] = XGBRegressor()

    if lgb_installed:
        model_map['LGBMRegressor'] = LGBMRegressor()
        model_map['LGBMClassifier'] = LGBMClassifier()

    if catboost_installed:
        model_map['CatBoostRegressor'] = CatBoostRegressor(
            calc_feature_importance=True)
        model_map['CatBoostClassifier'] = CatBoostClassifier(
            calc_feature_importance=True)

    if model_name[:12] == 'DeepLearning':
        if keras_imported == False:
            # Suppress some level of logs if TF is installed (but allow it to not be installed, and use Theano instead)
            try:
                os.environ['TF_CPP_MIN_VLOG_LEVEL'] = '3'
                os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
                from tensorflow import logging
                logging.set_verbosity(logging.INFO)
            except:
                pass

            global maxnorm
            global Dense, Dropout
            global LeakyReLU, PReLU, ThresholdedReLU, ELU
            global Sequential
            global keras_load_model
            global regularizers, optimizers
            global Activation
            global KerasRegressor, KerasClassifier

            from keras.constraints import maxnorm
            from keras.layers import Activation, Dense, Dropout
            from keras.layers.advanced_activations import LeakyReLU, PReLU, ThresholdedReLU, ELU
            from keras.models import Sequential
            from keras.models import load_model as keras_load_model
            from keras import regularizers, optimizers
            from keras.wrappers.scikit_learn import KerasRegressor, KerasClassifier
            keras_imported = True

        model_map['DeepLearningClassifier'] = KerasClassifier(
            build_fn=make_deep_learning_classifier)
        model_map['DeepLearningRegressor'] = KerasRegressor(
            build_fn=make_deep_learning_model)

    try:
        model_without_params = model_map[model_name]
    except KeyError as e:
        print(
            'It appears you are trying to use a library that is not available when we try to import it, or using a value for model_names that we do not recognize'
        )
        raise (e)

    if os.environ.get('is_test_suite', False) == 'True':
        if 'n_jobs' in model_params:
            model_params['n_jobs'] = 1
    model_with_params = model_without_params.set_params(**model_params)

    return model_with_params
Beispiel #24
0
batch_size = 1000
print('Epochs: ', epochs)
print('Batch size: ', batch_size)

keras_callbacks = [
    # ModelCheckpoint('/tmp/keras_checkpoints/model.{epoch:02d}-{val_loss:.2f}.hdf5', monitor='val_loss', save_best_only=True, verbose=2)
    # ModelCheckpoint('/tmp/keras_checkpoints/model.{epoch:02d}.hdf5', monitor='val_loss', save_best_only=True, verbose=0)
    # TensorBoard(log_dir='/tmp/keras_logs/model_3', histogram_freq=0, write_graph=True, write_images=True, embeddings_freq=0, embeddings_layer_names=None, embeddings_metadata=None),
    EarlyStopping(monitor='val_mean_absolute_error', patience=80, verbose=0) # 20
]
print(x_train.shape)

#keras.wrappers.scikit_learn.KerasRegressor

from keras.wrappers.scikit_learn import KerasRegressor
model = KerasRegressor(build_fn=make_model, epochs=epochs, batch_size=batch_size, verbose=True, callbacks=keras_callbacks)
model.fit(x_train, y_train)

'''
history = model.fit(x_train, y_train,
    batch_size=batch_size,
    epochs=epochs,
    shuffle=True,
    verbose=2,#0, # Change it to 2, if wished to observe execution
    #validation_data=(arr_x_valid, arr_y_valid),
    callbacks=keras_callbacks)
'''
y_pred = model.predict(x_test[:20,])

print (y_pred)
print (y_test[:20])
    # create model
    model = Sequential()
    model.add(
        Dense(7, input_dim=7, kernel_initializer='normal', activation='relu'))
    model.add(Dense(4, kernel_initializer='normal'))
    # Compile model
    model.compile(loss='mean_absolute_error', optimizer='adam')
    return model


# fix random seed for reproducibility
seed = 7
np.random.seed(seed)
# evaluate model with standardized dataset
estimator = KerasRegressor(build_fn=baseline_model,
                           epochs=3000,
                           batch_size=8474,
                           verbose=0)

kfold = KFold(n_splits=10, random_state=seed)
results = cross_val_score(estimator, X, y, cv=kfold)
print("Results: %.2f (%.2f) MSE" % (results.mean(), results.std()))

# example of training a final regression model
from sklearn.linear_model import LinearRegression
from sklearn.datasets import make_regression
# generate regression dataset
X, y = make_regression(n_samples=100, n_features=2, noise=0.1)
# fit final model
model = LinearRegression()
model.fit(X, y)
# new instances where we do not know the answer
Beispiel #26
0
              activation='relu'))
    model.add(
        Dense(12, input_dim=12, kernel_initializer='normal',
              activation='relu'))
    model.add(
        Dense(12, input_dim=12, kernel_initializer='normal',
              activation='relu'))
    model.add(Dense(1, kernel_initializer='normal'))

    # compile model
    model.compile(loss='mean_squared_error', optimizer='adam')
    return model


# fix random seed for reproducibility
seed = 7
numpy.random.seed(seed)
x_train, x_test, y_train, y_test = train_test_split(X, Y)
# evaluate model with standardized dataset
estimator = KerasRegressor(build_fn=baseline_model,
                           nb_epoch=1,
                           batch_size=5,
                           verbose=0)

kfold = KFold(n_splits=30, random_state=seed)
results = cross_val_score(estimator, X, Y, cv=kfold)
print("Results: %.2f (%.2f) MSE" % (results.mean(), results.std()))
estimator.fit(x_train, y_train)
y_pred = estimator.predict(x_test)
# print(y_pred.shape())
fnc.errors(y_test, y_pred)
    regressor.add(
        Dense(units=nb_units,
              kernel_initializer='uniform',
              activation='relu',
              input_dim=325))
    regressor.add(
        Dense(units=nb_units, kernel_initializer='uniform', activation='relu'))
    regressor.add(
        Dense(units=1, kernel_initializer='uniform', activation='linear'))
    regressor.compile(optimizer='adam',
                      loss='mae',
                      metrics=['mse', 'mae', 'mape'])
    return regressor


grid_regressor = KerasRegressor(build_fn=build_regressor_for_grid)
parameters = {
    'batch_size': [30, 50, 100],
    'epochs': [10, 30],
    'regressor': ['adam'],
    'nb_units': [100, 150, 200]
}
grid_search = GridSearchCV(estimator=grid_regressor, param_grid=parameters)
grid_search = grid_search.fit(X_train, y_train)
best_parameters = grid_search.best_params_
best_accuracy = grid_search.best_score_


### Build one ANN
def build_regressor():
    regressor = Sequential()
Beispiel #28
0
class NNNBA:
    """
    NNNBA class, which contains all the calculated information
    """

    default_model_type = "lasso"
    assumed_max_salary = 35350000.0

    __threshold_per_col = {
        "OFF_RATING": 12,
        "PIE": 0.11,
        "NET_RATING": 18,
        "GP": 50,
        "DEF_RATING": 7,
        "USG_PCT": 0.12,
        "FGA": None,
        "FGM": None,
        "FG3A": None,
        "PTS": None,
        "FTM": None,
        "FGM": None,
        "REB_PCT": None,
        "AGE": 4
    }

    __outlier_cols_upper = [
    ]  #["OFF_RATING", "PIE", "NET_RATING", "USG_PCT", "PTS"]
    __outlier_cols_lower = []  #["DEF_RATING"]

    __ridge_init_alpha = [0.01, 0.03, 0.06, 0.1, 0.3, 0.6, 1, 3, 6, 10, 30, 60]
    __lasso_init_alpha = [
        0.0001, 0.0003, 0.0006, 0.001, 0.003, 0.006, 0.01, 0.03, 0.06, 0.1,
        0.3, 0.6, 1
    ]
    __elasticnet_init = {
        "l1_ratio": [0.1, 0.3, 0.5, 0.6, 0.7, 0.8, 0.85, 0.9, 0.95, 1],
        "alpha": [
            0.0001, 0.0003, 0.0006, 0.001, 0.003, 0.006, 0.01, 0.03, 0.06, 0.1,
            0.3, 0.6, 1, 3, 6
        ]
    }

    def __realpha__(self, alpha):
        """
        Function to recalculate alpha
        """
        return [
            alpha * .6, alpha * .65, alpha * .7, alpha * .75, alpha * .8,
            alpha * .85, alpha * .9, alpha * .95, alpha, alpha * 1.05,
            alpha * 1.1, alpha * 1.15, alpha * 1.25, alpha * 1.3, alpha * 1.35,
            alpha * 1.4
        ]

    def __reratio__(self, ratio):
        """
        Function to recalculate ratio
        """
        return [
            ratio * .85, ratio * .9, ratio * .95, ratio, ratio * 1.05,
            ratio * 1.1, ratio * 1.15
        ]

    def __baseline_model__():
        """
        Base Neural Network model
        """
        input = 39
        model = Sequential()
        model.add(
            Dense(input,
                  input_dim=input,
                  kernel_initializer='normal',
                  activation='relu'))
        model.add(
            Dense(int(input / 2),
                  kernel_initializer='normal',
                  activation='relu'))
        model.add(Dense(input, kernel_initializer='normal', activation='relu'))
        model.add(
            Dense(int(input / 2),
                  kernel_initializer='normal',
                  activation='relu'))
        model.add(
            Dense(int(input / 4),
                  kernel_initializer='normal',
                  activation='relu'))
        model.add(Dense(1, kernel_initializer='normal'))
        model.compile(loss='mean_squared_error', optimizer='adam')
        return model

    def __idx_of_median_outlier__(self,
                                  col,
                                  threshold=None,
                                  upper_outlier=True):  #may need threshold=2
        """
        Find index of outlier based on distance from median
        Distance from median = threshold, which is either passed in or calculated as a function of std from the passed in data
        """
        if threshold is None:
            threshold = col.std() * 2.5
        logger.debug("median: " + str(col.median()) + " threshold: " +
                     str(threshold))
        diff = col - col.median()
        if upper_outlier:
            outlier = diff > threshold
        else:
            outlier = -1 * diff > threshold
        return list(outlier.index[outlier])

    models = {
        "linear regression":
        linear_model.LinearRegression(fit_intercept=True),
        "ridge":
        linear_model.RidgeCV(alphas=__ridge_init_alpha, fit_intercept=True),
        "lasso":
        linear_model.LassoCV(alphas=__lasso_init_alpha,
                             max_iter=5000,
                             cv=10,
                             fit_intercept=True),
        "bayes ridge":
        linear_model.BayesianRidge(),
        "keras regressor":
        KerasRegressor(build_fn=__baseline_model__,
                       nb_epoch=100,
                       batch_size=5,
                       verbose=0),
        "xgb":
        xgb.XGBRegressor(n_estimators=1500, max_depth=2, learning_rate=0.01),
        "elasticnet":
        linear_model.ElasticNetCV(l1_ratio=__elasticnet_init["l1_ratio"],
                                  alphas=__elasticnet_init["alpha"],
                                  max_iter=1000,
                                  cv=3),
        "theilsen":
        linear_model.TheilSenRegressor(),
        "polynomial":
        Pipeline([('poly', PolynomialFeatures(degree=2)),
                  ('linear', linear_model.LinearRegression(fit_intercept=True))
                  ])
    }

    def __remodel__(self, model_type, regr, __X_train, __Y_train):
        """
        Function to retrain certain models based on optimal alphas and/or ratios
        """
        if model_type == "ridge":
            alpha = regr.alpha_
            regr = linear_model.RidgeCV(alphas=self.__realpha__(alpha), cv=10)
        elif model_type == "lasso":
            alpha = regr.alpha_
            regr = linear_model.LassoCV(alphas=self.__realpha__(alpha),
                                        max_iter=5000,
                                        cv=10)
        elif model_type == "elasticnet":
            alpha = regr.alpha_
            ratio = regr.l1_ratio_
            regr = linear_model.ElasticNetCV(
                l1_ratio=self.__reratio__(ratio),
                alphas=self.__elasticnet_init["alpha"],
                max_iter=1000,
                cv=3)

        regr.fit(__X_train, __Y_train)
        return regr

    def __normalize_salary__(
        self,
        col,
        max_salary=assumed_max_salary
    ):  # scales out to max contract; max taken from https://www.hoopsrumors.com/2017/05/nba-maximum-salary-projections-for-201718.html
        """
        Function to normalize salary so that the max is maximum salary possible, as yoy max salary changes
        """
        min_salary = min(col)
        local_max_salary = max(col)
        return max_salary - (local_max_salary - col) / (
            local_max_salary - min_salary) * (max_salary - min_salary)

    def __init__(self, debug=False):
        logger.setLevel(logging.DEBUG if debug else logging.ERROR)
        with open("crawled_data/raw_data.json", "r") as data_file:
            raw_data = json.load(data_file)

        columns = raw_data[0]["header"]
        unique_columns = list(set(raw_data[0]["header"]))
        position_names = [
            "Point Guard", "Shooting Guard", "Small Forward", "Power Forward",
            "Center"
        ]
        positions = []

        for i, val in enumerate(position_names):
            positions.append((val, i))
        positions_convert = dict(positions)

        self.X_df = pd.DataFrame(columns=columns)
        Y_df = pd.DataFrame(columns=["SALARIES"])
        age = []
        positions_df = pd.DataFrame(columns=position_names)
        names = pd.DataFrame(columns=["NAME", "PROJECTED_SALARIES"])

        logger.debug("Processing data")
        for i, player in enumerate(raw_data):
            if "2016_17" in player["salaries"] and "2016-17" in player["stats"]:
                Y_df.loc[len(Y_df)] = player["salaries"]["2016_17"]
                self.X_df.loc[len(self.X_df)] = player["stats"]["2016-17"]
                age.append(player["age"])

                positions_df.loc[len(positions_df)] = [0, 0, 0, 0, 0]
                for position in player["positions"]:
                    positions_df[position][len(positions_df)] = 1

                projected_salaries = 0
                try:
                    projected_salaries = player["projected_salaries"][0]
                except:
                    pass
                names.loc[len(names)] = [player["name"], projected_salaries]
            else:
                continue

        for col in []:
            try:
                self.X_df[col] = np.tanh(self.X_df[col])
            except:
                pass

        self.X_df = self.X_df.T.drop_duplicates().T
        self.X_df = pd.concat(
            [self.X_df, pd.Series(age, name="AGE"), positions_df], axis=1)

        self.X_df = self.X_df.drop([
            "FGA", "L", "AGE", "PCT_TOV", "BLKA", "AST_PCT", "AST_RATIO",
            "OREB_PCT", "DREB_PCT", "REB_PCT", "TM_TOV_PCT", "PACE",
            "OPP_PTS_OFF_TOV", "OPP_PTS_FB", "OPP_PTS_PAINT",
            'OPP_PTS_2ND_CHANCE', 'OPP_PTS_FB', 'PCT_FGA_2PT', 'PCT_FGA_3PT',
            'PCT_PTS_2PT', 'PCT_PTS_2PT_MR', 'PCT_PTS_3PT', 'PCT_PTS_FB',
            'PCT_PTS_FT', 'PCT_PTS_OFF_TOV', 'PCT_PTS_PAINT', 'PCT_AST_2PM',
            'PCT_UAST_2PM', 'PCT_AST_3PM', 'PCT_UAST_3PM', 'PCT_AST_FGM',
            'PCT_UAST_FGM', 'PCT_FGM', 'PCT_FGA', 'PCT_FG3M', 'PCT_FG3A',
            'PCT_FTM', 'PCT_FTA', 'PCT_OREB', 'PCT_DREB', 'PCT_REB', 'PCT_AST',
            'PCT_STL', 'PCT_BLK', 'PCT_BLKA', 'PTS_OFF_TOV', 'PTS_FB',
            'PTS_PAINT'
        ], 1)

        logger.debug("Columns: " + ", ".join(self.X_df.columns))
        # remove players who's played less than 15 games
        idx_of_lt_gp = self.X_df.index[(self.X_df["GP"] < 15)]
        self.X_df = self.X_df.drop(idx_of_lt_gp)
        Y_df = Y_df.drop(idx_of_lt_gp)
        age = pd.Series(age).drop(idx_of_lt_gp)
        positions_df = positions_df.drop(idx_of_lt_gp)
        names = names.drop(idx_of_lt_gp)

        # Remove outliers
        logger.debug("Remove outliers")

        X_train = self.X_df.copy()
        Y_train = Y_df.copy()
        logger.debug("No of rows before removing outliers: " +
                     str(X_train.shape[0]))
        to_be_dropped = []
        ## remove upper
        for col in self.__outlier_cols_upper:
            logger.debug(col)
            idx_of_median_outlier = self.__idx_of_median_outlier__(
                X_train[col], self.__threshold_per_col[col])
            logger.debug(
                col + " should drop " +
                ", ".join(names["NAME"][idx_of_median_outlier].values))
            to_be_dropped = to_be_dropped + idx_of_median_outlier

        ## remove lower
        for col in self.__outlier_cols_lower:
            logger.debug(col)
            idx_of_median_outlier = self.__idx_of_median_outlier__(
                X_train[col],
                self.__threshold_per_col[col],
                upper_outlier=False)
            logger.debug(
                col + " should drop " +
                ", ".join(names["NAME"][idx_of_median_outlier].values))
            to_be_dropped = to_be_dropped + idx_of_median_outlier

        to_be_dropped = list(set(to_be_dropped))
        logger.debug("Outliers: " +
                     ", ".join(names["NAME"][to_be_dropped].values))
        X_train = X_train.drop(to_be_dropped)
        Y_train = Y_train.drop(to_be_dropped)
        logger.debug("No of rows after removing outliers: " +
                     str(X_train.shape))
        logger.debug("No of rows after removing outliers: " +
                     str(Y_train.shape))

        __X_train = X_train.values  # training data only includes non-rookies
        __Y_train = np.log1p(Y_train["SALARIES"].values)  # y = log(1+y)

        self.Y_df = Y_df
        self.model_results = {}
        self.names = names

        for model_type, regr in self.models.items():
            logger.debug("Started  " + model_type)
            this_results = names.copy()
            regr.fit(__X_train, __Y_train)

            regr = self.__remodel__(model_type, regr, __X_train, __Y_train)

            results = self.__normalize_salary__(
                np.expm1(regr.predict(self.X_df.values)))  # y = exp(y) - 1
            this_results['WORTH'] = results

            diffY = this_results["PROJECTED_SALARIES"].values - results
            this_results['SALARY_DIFF'] = diffY
            this_results = this_results.sort_values(by="SALARY_DIFF",
                                                    ascending=False)

            self.models[model_type] = regr
            self.model_results[model_type] = this_results
            logger.debug("Finished " + model_type)

        #get avg
        this_results = self.model_results["linear regression"].copy()
        this_results["WORTH"] = self.__normalize_salary__(
            (1. * self.model_results["bayes ridge"]["WORTH"] +
             1. * self.model_results["lasso"]["WORTH"] +
             1. * self.model_results["elasticnet"]["WORTH"]) / 3)
        diffY = this_results["PROJECTED_SALARIES"].values - this_results[
            "WORTH"]
        this_results['SALARY_DIFF'] = diffY
        self.model_results["avg"] = this_results

    def getUndervalued(self, model_type=default_model_type):
        names = self.model_results[model_type]
        print(names.loc[(names["SALARY_DIFF"] < 0)
                        & (names["PROJECTED_SALARIES"] > 0)])

    def getPlayerValue(self, player_name, model_type=default_model_type):
        names = self.model_results[model_type]
        idx = names[names["NAME"] == player_name].index[0]

        print("\nPaid: " +
              '${:,.2f}'.format(float(self.Y_df.loc[idx]["SALARIES"])) +
              "\tFuture Salary: " +
              '${:,.2f}'.format(float(self.names["PROJECTED_SALARIES"][idx])) +
              "\tWorth: " + '${:,.2f}'.format(float(names["WORTH"][idx])) +
              "\n")
        self.getPlayerStats(player_name, trim=True)

    def getPlayerStats(self, player_name, trim=False):
        columns = self.X_df.columns
        if trim:
            columns = columns[:30]
        print(self.X_df.loc[self.names["NAME"] == player_name, columns])

    def getMostValuablePlayers(self, model_type=default_model_type):
        names = self.model_results[model_type]
        print(names.sort_values(by="WORTH"))

    def showAvailableModels(self):
        for model in self.models:
            print(model)

    def getPlayerNameByIndex(self, index):
        return self.names[self.name.index == index]

    def getCoefFromModel(self, model_type=default_model_type):
        return pd.DataFrame(self.models[model_type].coef_,
                            index=self.X_df.columns,
                            columns=["coef"]).sort_values(by="coef")

    def plotXCol(self, col_name, X=None):
        import matplotlib.pyplot as plt
        if X is None:
            X = self.X_df.sort_values(by=col_name)[col_name].values
        plt.figure()
        plt.scatter(range(len(X)), X)
        plt.show()
import predictor
import prepare_data
import model_builder

if __name__ == "__main__":
    print("Grid searching!")

    #get home path
    root_dir = os.path.dirname(os.path.realpath(__file__))

    x, y, sc_X, sc_Y = prepare_data.training(
        os.path.join(root_dir, "data", "results.csv"))

    # create model
    model = KerasRegressor(build_fn=model_builder.create_model,
                           verbose=1,
                           feature_count=len(x[0]),
                           output_count=len(y[0]))

    # grid search epochs, batch size and optimizer
    optimizers = ['rmsprop']  #, 'adam']
    init = ['glorot_uniform']  #, 'normal', 'uniform']
    epochs = [1000, 5000, 10000]
    batches = [50]
    hidden_layer_counts = [1, 2, 3]
    param_grid = dict(optimizer=optimizers,
                      epochs=epochs,
                      batch_size=batches,
                      hidden_layer_count=hidden_layer_counts,
                      init=init)
    grid = GridSearchCV(estimator=model, param_grid=param_grid)
    grid_result = grid.fit(x, y)
Beispiel #30
0
class ML_TimeSeries(ML_Base):

    def __init__(self, **kwargs):
        super().__init__(**kwargs)
        self._logger.info("{0} initializing...".format(self.__class__.__name__))
        
        self._out_dim1 = kwargs.get('out_dim1',30)
        self._nb_epoch = kwargs.get('nb_epoch',150)
        self._batch_size = kwargs.get('batch_size',100)
        self._params = {'out_dim1': kwargs.get('out_dim1',60),
                        'nb_epoch': kwargs.get('nb_epoch',1000),
                        'batch_size': kwargs.get('batch_size',100)}

        self._maxlen = kwargs.get('maxlen', 3)
        config = tf.ConfigProto(gpu_options=tf.GPUOptions(visible_device_list="0",
                                                          allow_growth=True))
        
        sess = tf.Session(config=config)
        K.set_session(sess)

        self._logger.info("{0} initialized.".format(self.__class__.__name__))

    @property
    def maxlen(self):
        return self._maxlen
    

    def _create_ts_data(self, target_data):
        ts_data = []
        for i in range(self._maxlen, len(target_data)+1):
            ts_data.append(np.array(target_data.iloc[i-self._maxlen:i]))

        #for i in range(len(target_data)-self._maxlen):
        #    ts_data.append(np.array(target_data.iloc[i:i+self._maxlen]))
        return np.array(ts_data)


    def learn(self, training_data, training_label, tunes_param=False):
        seed = 1234
        self._input_dim = training_data.shape[1]
        np.random.seed(seed)
        
        ts_training_data = self._create_ts_data(training_data)
        ts_training_label = self._create_ts_data(training_label)
        # evaluate model with standardized dataset
        if self._is_regression:
            self._model = KerasRegressor(build_fn=self._create_model,
                                         verbose=1,
                                         input_dim=training_data.shape[1],
                                         **self._params)
            hist = self._model.fit(ts_training_data,
                                   ts_training_label,
                                   callbacks=[EarlyStopping(monitor='loss',
                                                            patience=1,
                                                            verbose=0)]
                                   , batch_size=self._batch_size
                                   , epochs=self._nb_epoch
                                   , validation_split = 0.2
                                   )
        else:
            
            self._model = self._create_model(input_dim=training_data.shape[1],
                                             out_dim1=self._params['out_dim1'])
            hist = self._model.fit(ts_training_data,
                                   ts_training_label
                                   , callbacks=[EarlyStopping(monitor='loss'
                                                              ,patience=1
                                                              ,verbose=0)]
                                   , batch_size=self._batch_size
                                   , nb_epoch=self._nb_epoch
                                   #, validation_split = 0.2
                                   )
        #import matplotlib.pyplot as plt
        #plt.plot(hist.history['loss'])
        #import pdb;pdb.set_trace()
            

    def predict_one(self, test_data):
        if self._is_regression:
            return float(self._model.predict(test_data)[-1])
        else:
            predicted = self._model.predict(test_data)
            return 1 if predicted[0][-1][0] > 0 else 0

    def predict(self, test_data):
        
        if type(test_data) == np.ndarray:
            ts_test_data = test_data
        else:
            ts_test_data = self._create_ts_data(test_data)
        
        if self._is_regression:
            return super().predict(ts_test_data)[:,-1]
        else:
            
            predicted = self._model.predict(ts_test_data)
            #import pdb;pdb.set_trace()
            return [1 if predicted[i][-1] > 0.5 else 0
                    for i in range(len(predicted))]
            #return [1 if predicted[i][0] > predicted[i][1] else 0
            #        for i in range(len(predicted))]

    def predict_one_proba(self, test_data):
        proba = self._model.predict_proba(test_data)[0][-1]
        return [proba, 1-proba]

    def _encode_one_hot(self, label):
        return np.array([[1,0] if label.Return.iloc[i] > 0.0 else [0,1]
                        for i in range(label.shape[0])])

    def _change_label_format(self, label_data):
        return np.matrix([[1,0] if label_data[i] == 0 else [0,1]
                         for i in range(len(label_data))])

    def dispose(self):
        super().dispose()
        K.clear_session()
#==============================================================================
#==============================================================================
#
# # evaluate model with standardized dataset
# np.random.seed(seed)
# estimators = []
# estimators.append(('standardize', StandardScaler()))
# estimators.append(('mlp', KerasRegressor(build_fn=baseline_model, epochs=50, batch_size=5, verbose=0)))
# pipeline = Pipeline(estimators)
# kfold = KFold(n_splits=10, random_state=seed)
# results = cross_val_score(pipeline, X, y, cv=kfold)
# print("Standardized: %.2f (%.2f) MSE" % (results.mean(), results.std()))
#==============================================================================

kr = KerasRegressor(build_fn=baseline_model,
                    nb_epoch=200,
                    batch_size=50,
                    verbose=0)

kr.fit(X_train, y_train)

# saving the model
#model_name = './model3-ANN.joblib.pkl'
#_ = joblib.dump(model, model_name, compress=9)
# Save the weights
kr.model.save('model_weights-2.h5')

# Save the model architecture
with open('model_architecture-2.json', 'w') as f:
    f.write(kr.model.to_json())

# Testing model
def update_trainee_score(x):
    print('Updating Profile Scores ...')
    academic_score = []
    honesty = []
    emotionality = []
    extraversion = []
    agreeableness = []
    conscientiousness = []
    openness = []
    iq = []
    verbal_ability = []
    score = []
    course_score = []
    qa_score = []
    project_score = []

    for p in Trainee.objects.all().exclude(pk=x.pk):

        if p.academic_score is None or p.personality_c is None or p.personality_h is None or p.personality_a is None or p.personality_e is None or p.personality_o is None or p.personality_x is None or p.iq_score is None or p.course_score is None or p.project_score is None or p.verbal_ability_score is None or p.qa_score is None or p.score is None:
            continue
        academic_score.append(p.academic_score)
        honesty.append(p.personality_h)
        emotionality.append(p.personality_e)
        extraversion.append(p.personality_x)
        agreeableness.append(p.personality_a)
        conscientiousness.append(p.personality_c)
        openness.append(p.personality_o)
        iq.append(p.iq_score)
        verbal_ability.append(p.verbal_ability_score)
        score.append(p.score)
        project_score.append(p.project_score)
        course_score.append(p.course_score)
        qa_score.append(p.qa_score)

    if len(academic_score) == 0:
        x.score = 0.6
        x.save()

    else:

        d = {'1': academic_score, '2': honesty, '3': emotionality, '4': extraversion, '5': agreeableness,
             '6': conscientiousness, '7': openness, '8': iq, '9': verbal_ability, '10': project_score,
             '11': course_score,
             '12': qa_score, '13': score}
        df = pd.DataFrame(data=d)
        X = df.iloc[:, [0, 1, 2, 3, 5, 6, 7, 8, 9, 10, 11, 12]].values
        y = df.iloc[:, 4].values

        sc = StandardScaler()
        X = sc.fit_transform(X)
        estimator = KerasRegressor(build_fn=baseline_model, batch_size=50, epochs=100, verbose=0)
        estimator.fit(X, y)

        test_pred_temp = []
        test_pred = []
        test_pred_temp.append(x.academic_score)
        test_pred_temp.append(x.personality_h)
        test_pred_temp.append(x.personality_e)
        test_pred_temp.append(x.personality_x)
        test_pred_temp.append(x.personality_a)
        test_pred_temp.append(x.personality_c)
        test_pred_temp.append(x.personality_o)
        test_pred_temp.append(x.iq_score)
        test_pred_temp.append(x.verbal_ability_score)
        test_pred_temp.append(x.project_score)
        test_pred_temp.append(x.course_score)
        test_pred_temp.append(x.qa_score)

        test_pred.append(test_pred_temp)
        test_pred_1 = np.asarray(test_pred)
        new_prediction = estimator.predict(test_pred_1)

        y = np.insert(y, y.size, new_prediction)
        X = np.concatenate((X, test_pred_1), axis=0)

        y_new = []
        for x in y:
            y_new.append(x)

        tot = 0
        for i in y_new:
            tot = tot + i
        mn = tot / len(y_new)
        std = 0
        for i in y_new:
            std = std + (i - mn) * (i - mn)
        sd = math.sqrt(std / len(y_new))
        avg = mn

        y_final = []

        for i in range(len(y_new)):
            pp = (y_new[i] - avg) / sd * 0.1 + 0.8
            if pp >= 1.0:
                pp = 0.9999
            if pp <= 0.6:
                pp = 0.0001
            y_final.append(pp)

        ctr = 0
        for p in Trainee.objects.all():
            p.score = y_final[ctr]
            p.save()
            ctr += 1
Beispiel #33
0
def baseline_model():
    # create model
    model = Sequential()
    model.add(
        Dense(7, input_dim=7, kernel_initializer='normal', activation='relu'))
    model.add(Dense(1, kernel_initializer='normal'))
    # Compile model
    model.compile(loss='mean_squared_error', optimizer='adam')
    return model


seed = 7
np.random.seed(seed)
# evaluate model with standardized dataset
estimator = KerasRegressor(build_fn=baseline_model,
                           epochs=100,
                           batch_size=5,
                           verbose=0)

kfold = KFold(n_splits=10, random_state=seed)
results = cross_val_score(estimator, X, Y, cv=kfold)
print("Results: %.2f (%.2f) MSE" % (results.mean(), results.std()))
kfold = KFold(n_splits=10, random_state=seed)
results = cross_val_score(estimator, X.values, Y.values, cv=kfold, n_jobs=1)
print("Results: %.2f (%.2f) MSE" % (results.mean(), results.std()))

# evaluate model with standardized dataset
np.random.seed(seed)
estimators = []
estimators.append(('standardize', StandardScaler()))
estimators.append(('mlp',
                   KerasRegressor(build_fn=baseline_model,
def deepLearning(data, target, iteraNum, funNum):
    # load dataset
    global kerasModel
    X_norm = data
    print("This is X_norm: ", X_norm)
    y = target
    print("This is target : ", y)
    tempDim = len(X_norm[0])
    print("This is input dimension: ", tempDim)

    kerasList = []
    batch_size = [50, 100, 150, 200]
    epochs = [10, 20, 30, 50, 80]
    inputDim = [tempDim]
    # neurons = [40,50,60,100,200]
    param_grid = dict(batch_size=batch_size,
                      nb_epoch=epochs,
                      input_dim=inputDim)

    if funNum == 1:
        kerasModel = KerasRegressor(build_fn=baseline_model, verbose=0)
    elif funNum == 2:
        kerasModel = KerasRegressor(build_fn=wider_model, verbose=0)
    elif funNum == 3:
        kerasModel = KerasRegressor(build_fn=larger_model, verbose=0)

    for j in range(iteraNum):
        X_train, X_test, y_train, y_test = train_test_split(X_norm,
                                                            y,
                                                            test_size=0.2)
        print("This is X_train: ", X_train)
        print("This is y_train: ", y_train)
        grid = GridSearchCV(estimator=kerasModel, cv=5, param_grid=param_grid)
        newModel = grid.fit(X_train, y_train)
        print("Best: %f using %s" %
              (newModel.best_score_, newModel.best_params_))
        y_pred = newModel.predict(X_test).tolist()
        print("This is y_pred: ", y_pred)
        sum_mean = 0
        y_test_list = y_test.tolist()
        print("This is y_test_list: ", y_test_list)
        # for n in range(len(y_pred)):
        #     print("This is REAL value %.4f, ===========> PRED value: %.4f" % (y_test_list[n], y_pred[n]))
        #     # sum_mean += (y_pred[n] - y_test[n]) ** 2
        #     sum_mean += (float("{0:.4f}".format(float(y_pred[n]))) - y_test_list[n]) ** 2
        # sum_erro = np.sqrt(sum_mean / len(y_pred))
        #
        # print("This is sum_erro: ", sum_erro)
        sum_erro = np.sqrt(mean_squared_error(y_test_list, y_pred))
        print("This is : sum_erro ", sum_erro)
        print("This is iteration number: ", j + 1)
        kerasList.append(sum_erro)
    # # Train the model, iterating on the data in batches of n(32/64/128) samples
    # for j in range(iteraNum):
    #     X_train, X_test, y_train, y_test = train_test_split(X_norm, y, test_size=0.2)
    #     if funNum == 1:
    #         kerasModel = KerasRegressor(build_fn=baseline_model(inputDim), verbose=0)
    #         grid = GridSearchCV(estimator=kerasModel, param_grid=param_grid, n_jobs=1)
    #         bestDLModel = grid.fit(X_train, y_train)
    #         print("Best: %f using %s" % (bestDLModel.best_score_, bestDLModel.best_params_))
    #         y_pred = bestDLModel.predict(X_test)
    #
    #         # kerasModel = baseline_model(inputDim)
    #         # kerasModel.fit(X_train, y_train, epochs=200, batch_size=128)
    #         # y_pred = kerasModel.predict(X_test)
    #         sum_mean = 0
    #         for n in range(len(y_pred)):
    #             print("This is REAL value %.4f, ===========> PRED value: %.4f" % (y_test[n], y_pred[n]))
    #             sum_mean += (float("{0:.4f}".format(float(y_pred[n]))) - y_test[n]) ** 2
    #         sum_erro = np.sqrt(sum_mean / len(y_pred))
    #         print("This is sum_erro: ", sum_erro)
    #         print("This is iteration number: ", j + 1)
    #         kerasList.append(sum_erro)
    #         # plotFigure(y_pred, y_test, sum_erro[0])
    #     elif funNum == 2:
    #         # kerasModel = wider_model(inputDim, 2)
    #         # kerasModel.fit(X_train, y_train, epochs=100, batch_size=scalar, shuffle=True)
    #         # y_pred = kerasModel.predict(X_test)
    #         kerasModel = KerasRegressor(build_fn=wider_model(inputDim), verbose=0)
    #         grid = GridSearchCV(estimator=kerasModel, param_grid=param_grid, n_jobs=1)
    #         bestDLModel = grid.fit(X_train, y_train)
    #         print("Best: %f using %s" % (bestDLModel.best_score_, bestDLModel.best_params_))
    #         y_pred = bestDLModel.predict(X_test)
    #
    #         sum_mean = 0
    #         for n in range(len(y_pred)):
    #             print("This is REAL value %.4f, ===========> PRED value: %.4f" % (y_test[n], y_pred[n]))
    #             sum_mean += (float("{0:.4f}".format(float(y_pred[n]))) - y_test[n]) ** 2
    #         sum_erro = np.sqrt(sum_mean / len(y_pred))
    #         print("This is sum_erro: ", sum_erro)
    #         print("This is iteration number: ", j + 1)
    #         kerasList.append(sum_erro)
    #         # plotFigure(y_pred,y_test,sum_erro[0])
    #     elif funNum == 3:
    #
    #         # kerasModel = larger_model(inputDim)
    #         # kerasModel.fit(X_train, y_train, epochs=100, batch_size=scalar, shuffle=True)
    #
    #         kerasModel = KerasRegressor(build_fn=larger_model(inputDim), verbose=0)
    #         grid = GridSearchCV(estimator=kerasModel, cv=5,param_grid=param_grid)
    #         grid.fit(X_train, y_train)
    #         print("Best: %f using %s" % (grid.best_score_, grid.best_params_))
    #         y_pred = grid.predict(X_test)
    #         sum_mean = 0
    #         for n in range(len(y_pred)):
    #             print("This is REAL value %.4f, ===========> PRED value: %.4f" % (y_test[n], y_pred[n]))
    #             # sum_mean += (y_pred[n] - y_test[n]) ** 2
    #             sum_mean += (float("{0:.4f}".format(float(y_pred[n]))) - y_test[n]) ** 2
    #         sum_erro = np.sqrt(sum_mean / len(y_pred))
    #         print("This is sum_erro: ", sum_erro)
    #         print("This is iteration number: ", j + 1)
    #         kerasList.append(sum_erro)
    #         # plotFigure(y_pred, y_test, sum_erro)
    return kerasList
def keras1(train2, y, test2, v, z):
    cname = sys._getframe().f_code.co_name
    v[cname], z[cname] = 0, 0
    scores = list()
    scaler = preprocessing.RobustScaler()
    train3 = scaler.fit_transform(train2)
    test3 = scaler.transform(test2)
    input_dims = train3.shape[1]

    def build_model():
        input_ = layers.Input(shape=(input_dims, ))
        model = layers.Dense(
            int(input_dims * 7.33),
            kernel_initializer='Orthogonal',
            activation=layers.advanced_activations.PReLU())(input_)
        model = layers.BatchNormalization()(model)
        #model = layers.Dropout(0.7)(model)
        model = layers.Dense(
            int(input_dims * 4.35),
            kernel_initializer='Orthogonal',
            activation=layers.advanced_activations.PReLU())(model)
        model = layers.BatchNormalization()(model)
        #model = layers.Dropout(0.9)(model)
        model = layers.Dense(
            int(input_dims * 2.35),
            kernel_initializer='Orthogonal',
            activation=layers.advanced_activations.PReLU())(model)
        model = layers.BatchNormalization()(model)
        #model = layers.Dropout(0.9)(model)
        model = layers.Dense(
            int(input_dims * 0.51),
            kernel_initializer='Orthogonal',
            activation=layers.advanced_activations.PReLU())(model)
        model = layers.BatchNormalization()(model)
        model = layers.Dense(1, activation='sigmoid')(model)
        model = models.Model(input_, model)
        model.compile(loss='binary_crossentropy',
                      optimizer=optimizers.Nadam(),
                      metrics=["accuracy"])
        #print(model.summary(line_length=120))
        return model

    np.random.seed(1234)
    est = KerasRegressor(
        build_fn=build_model,
        nb_epoch=10000,
        batch_size=128,
        #verbose=2
    )
    print(build_model().summary(line_length=120))
    model_path = '../data/working/' + csv_name_suffix()
    model_path = model_path[:-4] + '_keras_model.h5'
    kcb = [
        callbacks.EarlyStopping(monitor='val_loss', patience=20
                                #verbose=1
                                ),
        callbacks.ModelCheckpoint(model_path,
                                  monitor='val_loss',
                                  save_best_only=True,
                                  save_weights_only=True,
                                  verbose=0),
        callbacks.ReduceLROnPlateau(monitor='val_loss',
                                    min_lr=1e-7,
                                    factor=0.2,
                                    verbose=1)
    ]
    num_splits = 5
    ss = model_selection.ShuffleSplit(n_splits=num_splits, random_state=11)
    for n, (itrain, ival) in enumerate(ss.split(train3, y)):
        xtrain, xval = train3[itrain], train3[ival]
        ytrain, yval = y[itrain], y[ival]
        est.fit(xtrain,
                ytrain,
                epochs=10000,
                validation_data=(xval, yval),
                verbose=0,
                callbacks=kcb,
                shuffle=True)
        est.model.load_weights(model_path)
        p = est.predict(xval)
        v.loc[ival, cname] += p
        score = metrics.log_loss(y[ival], p)
        print(cname, 'fold %d: ' % (n + 1), score, now())
        scores.append(score)
        z[cname] += np.log1p(est.predict(test3))
    os.remove(model_path)

    cv = np.array(scores)
    print(cv, cv.mean(), cv.std())
    z[cname] /= num_splits
Beispiel #36
0
    model.add(
        Dense(135,
              input_dim=270,
              kernel_initializer='normal',
              activation='elu'))
    model.add(Dense(1, kernel_initializer='normal'))

    # Compile model (configure for training)
    # optimizer 'adam' was chosen because it (on average) is the speediest
    model.compile(loss='mean_squared_error', optimizer='adam')
    return model


# evaluate model with standardized dataset
estimator = KerasRegressor(build_fn=deep_learning_model,
                           epochs=100,
                           batch_size=5,
                           verbose=0)

estimator.fit(X, Y)
y_keras_pred = estimator.predict(X_test)

create_submission(test_data, y_keras_pred, 3)

# ### Third Trial Summary -- Big improvement! Deep learning received a score on Kaggle of 0.207

# In[13]:

# Create build function for KerasRegressor


def deep_learning_model2():
def keras1(train2, y, test2, v, z):
    cname = sys._getframe().f_code.co_name
    v[cname], z[cname] = 0, 0
    scores = list()
    scaler = preprocessing.RobustScaler()
    train3 = scaler.fit_transform(train2)
    test3 = scaler.transform(test2)
    input_dims = train3.shape[1]
    def build_model():
        input_ = layers.Input(shape=(input_dims,))
        model = layers.Dense(int(input_dims * 4.33),
                             kernel_initializer='Orthogonal',
                             activation=layers.advanced_activations.PReLU())(input_)
        model = layers.BatchNormalization()(model)
        #model = layers.Dropout(0.7)(model)
        model = layers.Dense(int(input_dims * 2.35),
                             kernel_initializer='Orthogonal',
                             activation=layers.advanced_activations.PReLU())(model)
        model = layers.BatchNormalization()(model)
        #model = layers.Dropout(0.9)(model)
        model = layers.Dense(int(input_dims * 0.51),
                             kernel_initializer='Orthogonal',
                             activation=layers.advanced_activations.PReLU())(model)
        model = layers.BatchNormalization()(model)
        model = layers.Dense(1,
                             activation='sigmoid')(model)
        model = models.Model(input_, model)
        model.compile(loss = 'binary_crossentropy',
                      optimizer = optimizers.Nadam(lr=0.02),
                      metrics=["accuracy"])
        #print(model.summary(line_length=120))
        return model
    np.random.seed(1234)
    est = KerasRegressor(build_fn=build_model,
                         nb_epoch=10000,
                         batch_size=32,
                         #verbose=2
                        )
    build_model().summary(line_length=120)
    model_path = '../data/working/' + csv_name_suffix()
    model_path = model_path[:-4] + '_keras_model.h5'
    kcb = [
           callbacks.EarlyStopping(
                  monitor='val_loss',
                  patience=20
                  #verbose=1
                   ),
           callbacks.ModelCheckpoint(
                  model_path,
                  monitor='val_loss',
                  save_best_only=True,
                  save_weights_only=True,
                  verbose=0
                   ),
           callbacks.ReduceLROnPlateau(
                  monitor='val_loss',
                  min_lr=1e-7,
                  factor=0.2,
                  verbose=1
                   )
           ]
    num_splits = 7
    ss = model_selection.ShuffleSplit(n_splits=num_splits, random_state=11)
    for n, (itrain, ival) in enumerate(ss.split(train3, y)):
        xtrain, xval = train3[itrain], train3[ival]
        ytrain, yval = y[itrain], y[ival]
        est.fit(
                xtrain, ytrain,
                epochs=10000,
                validation_data=(xval, yval),
                verbose=0,
                callbacks=kcb,
                shuffle=True
            )
        est.model.load_weights(model_path)
        p = est.predict(xval)
        v.loc[ival, cname] += pconvert(p)
        score = metrics.log_loss(y[ival], p)
        print(cname, 'fold %d: '%(n+1), score, now())
        scores.append(score)
        z[cname] += pconvert(est.predict(test3))
    os.remove(model_path)

    cv=np.array(scores)
    print(cv, cv.mean(), cv.std())
    z[cname] /= num_splits
Beispiel #38
0
    regressor.add(LSTM(units=50, return_sequences=True))
    regressor.add(Dropout(0.2))

    regressor.add(LSTM(units=50))
    regressor.add(Dropout(0.2))

    regressor.add(Dense(units=1))

    regressor.compile(optimizer=optimizer, loss='mean_squared_error')
    return regressor


from keras.wrappers.scikit_learn import KerasRegressor
from sklearn.model_selection import GridSearchCV

regressor = KerasRegressor(build_fn=build_regressor)

parameters = {
    'batch_size': [10, 25, 32],
    'nb_epoch': [50, 100],
    'optimizer': ['adam', 'rmsprop']
}

grid_search = GridSearchCV(estimator=regressor,
                           param_grid=parameters,
                           scoring='neg_mean_squared_error',
                           cv=None)
grid_search.fit(X_train[:, :, -1], y_train)

best_param = grid_search.best_params_
best_accuracy = grid_search.best_score_