def assert_regression_predict_shape_correct(num_test): reg = KerasRegressor( build_fn=build_fn_reg, hidden_dims=hidden_dims, batch_size=batch_size, epochs=epochs) reg.fit(X_train, y_train, batch_size=batch_size, epochs=epochs) preds = reg.predict(X_test[:num_test], batch_size=batch_size) assert preds.shape == (num_test, )
def test_keras_regressor(): model = Sequential() model.add(Dense(input_dim, input_shape=(input_dim,))) model.add(Activation('relu')) model.add(Dense(1)) model.add(Activation('softmax')) sklearn_regressor = KerasRegressor(model, optimizer=optim, loss=loss, train_batch_size=batch_size, test_batch_size=batch_size, nb_epoch=nb_epoch) sklearn_regressor.fit(X_train_reg, y_train_reg) sklearn_regressor.score(X_test_reg, y_test_reg)
def train_model(self, X_train, Y_train): print("training model %d_%d.pkl" % (self.frame_len, self.predict_dist)) model_name = "../model/simple_reg_model/%d_%d.pkl" % (self.frame_len, self.predict_dist) self.estimator = RandomForestRegressor(random_state=0, n_estimators=100, n_jobs=-1) self.estimator.fit(X_train, Y_train) print("finish training model") joblib.dump(self.estimator, model_name)
def train_model_keras(self, X_train, Y_train, date): print("training model %d_%d.h5" % (self.frame_len, self.predict_dist)) model_name = "../model/reg_keras/%d_%d_%s.h5" % (self.frame_len, self.predict_dist, date) self.estimator = KerasRegressor(build_fn=baseline_model, nb_epoch=200, batch_size=64, verbose=1) self.estimator.fit(X_train, Y_train) print("finish training model") # saving model json_model = self.estimator.model.to_json() open(model_name.replace('h5', 'json'), 'w').write(json_model) self.estimator.model.save_weights(model_name, overwrite=True)
def run_regressor(model=LSTM2, data=None, data_file='df_dh.csv', isload_model=True, testonly=False): epochs = 8000 path_to_dataset = data_file sequence_length = SEQ_LENGTH if data is None: X_train, y_train, X_test, y_test, X_val, Y_val = get_data( sequence_length=sequence_length, stateful=STATEFUL, path_to_dataset=data_file) else: X_train, y_train, X_test, y_test, X_val, Y_val = data if STATEFUL: X_test = X_test[:int(X_test.shape[0] / batch_size) * batch_size] y_test = y_test[:int(y_test.shape[0] / batch_size) * batch_size] estimator = KerasRegressor(build_fn=lambda x=X_train: model(x)) # if testonly == True: # # predicted = model.predict(X_test, verbose=1,batch_size=batch_size) # prediction = estimator.predict(X_test) # stat_metrics(X_test, y_test, prediction) # draw_scatter(predicted_arr[0], y_test, X_test, X_train, y_train, data_file) # return early_stopping = EarlyStopping(monitor='val_loss', verbose=1, patience=20) checkpoint = ModelCheckpoint("./lstm.h5", monitor='val_loss', verbose=1, save_best_only=True, save_weights_only=True) ################ hist = estimator.fit(X_train, y_train, validation_data=(X_val, Y_val), callbacks=[checkpoint], epochs=epochs, batch_size=batch_size, verbose=1) # prediction = estimator.predict(X_test) score = mean_squared_error(y_test, estimator.predict(X_test)) estimator_score = estimator.score(X_test, y_test) print(score) prediction = estimator.predict(X_test) # invert predictions prediction_trans = scaler.inverse_transform(prediction) X_test_trans = scaler.inverse_transform(X_test) y_test_trans = scaler.inverse_transform(y_test) X_train_trans = scaler.inverse_transform(X_train) y_train_trans = scaler.inverse_transform(y_train) print(prediction) print(X_test) print("##############################################") # predicted_arr = prediction.T.tolist() # print(predicted_arr) draw_scatter(prediction, y_test, X_test, X_train, y_train, data_file) his_figures(hist)
testFrame = pandas.read_csv(f'2_{ds}.csv') testFrame = testFrame.drop(columns=['y']) testFrame = testFrame.drop(columns=['TestIndex']) testSet = testFrame.values # fix random seed for reproducibility seed = 16 numpy.random.seed(seed) # evaluate model with standardized dataset estimators = [] estimators.append(('standardize', StandardScaler())) estimators.append(('mlp', KerasRegressor(build_fn=larger_model, epochs=25, batch_size=32, verbose=1, validation_split=0.05, shuffle=True))) pipeline = Pipeline(estimators) # estimator = KerasRegressor(build_fn=larger_model, epochs=100, batch_size=32, verbose=1) print('start') rmse_scorer = make_scorer(root_mean_squared_error, greater_is_better=False) kfold = KFold(n_splits=5, random_state=seed) results = cross_val_score(pipeline, X, Y, cv=kfold, verbose=2, scoring="mean_squared_error")
def baseline_model(): model = Sequential() model.add( Dense(output_dim=3, init='uniform', activation='relu', input_dim=5)) model.add(Dense(output_dim=3, init='uniform', activation='relu')) model.add(Dense(output_dim=1, init='uniform')) model.compile(optimizer='adam', loss='mean_squared_error', metrics=['mean_squared_error']) return model from keras.wrappers.scikit_learn import KerasRegressor from sklearn.model_selection import KFold, cross_val_score estimator = KerasRegressor(build_fn=baseline_model, epochs=1000, batch_size=5, verbose=1) kfold = KFold(n_splits=10, random_state=1) results = cross_val_score(estimator, x_train, y_train[:, 0], cv=kfold, n_jobs=1) estimator.fit(x_train, y_train[:, 0]) y_pred = estimator.predict(x_test) y_pred plt.scatter(x_train[:, 0], y_train[:, 0], color='red') plt.plot(x_test[:, 0], y_pred, color='blue') plt.xlabel('Product')
class NeuralNet: """ Neural Network model implemented with tensorflow and evaluation methods implemented with sklearn. """ def __init__(self): self.model = Sequential() def add_first_layer(self, input_shape, num_features, activation_function): """ Add the first layer to the neural net. num_features: an int specifying the number of features, or nodes activation function: string specifying the type of activation function """ self.model.add( Dense(input_shape, input_dim=num_features, kernel_initializer='normal', activation=activation_function)) def add_layer(self, num_nodes, activation_function, regularization=None): """ Add a hidden layer to the neural net. num_nodes: int specifying the number of hidden nodes activation_function: string specifying the type of activation function """ if regularization != None: regularization = regularizers.l1(0.01) self.model.add( Dense(num_nodes, kernel_initializer='normal', activation=activation_function, kernel_regularizer=regularization)) def add_last_layer(self): """ Add the last layer of the NN. We are doing regression, so there will be only one output. """ self.model.add( Dense(1, kernel_initializer='normal', activation='linear')) def compile(self, loss_function, optimizer, epochs, batch_size, verbosity): """ Compile the model for training and build an estimator loss_function: string specifying the loss function that will be used optimizer: string specifying the optimization method epochs: int specifying the number of epochs batch_size: int specifying the batch size verbosity: int specify the level of textual feedback validation_split: percentage of data that will be used for validation """ self.batch_size = batch_size self.model.compile(loss=loss_function, optimizer=optimizer, metrics=['mean_squared_error']) self.estimator = KerasRegressor(build_fn=self.get_model, epochs=epochs, batch_size=batch_size, verbose=verbosity) print(self.model.summary()) def get_model(self): """ Returns the NeuralNet model, because this is the only way it keras will work. """ return self.model def train(self, X, y, epochs, batch_size, validation_split): """ Train the neural network. params: X: set of features y: set of targets validation_split: a float from 0 to 1 specifying the portion of the set to use as validation returns: a history object containing the training and validation loss at each epoch """ return self.estimator.fit(X, y, epochs=epochs, batch_size=batch_size, validation_split=validation_split, shuffle=True) def evaluate(self, X_train, y_train, X_test, y_test): """ Evaluate the neural network. """ self.test_predictions = self.model.predict(X_test) self.train_predictions = self.model.predict(X_train) print("Training MSE:", round(mean_squared_error(y_train, self.train_predictions), 4)) print("Validation MSE:", round(mean_squared_error(y_test, self.test_predictions), 4)) print("\nTraining r2:", round(r2_score(y_train, self.train_predictions), 4)) print("Validation r2:", round(r2_score(y_test, self.test_predictions), 4)) self.results = self.model.history.history plt.plot(list(range(1, len(self.results['loss']) + 1)), self.results['loss'][0:], label='Train') plt.plot(list(range(1, len(self.results['val_loss']) + 1)), self.results['val_loss'][0:], label='Test', color='green') plt.legend() plt.title('Training and test loss at each epoch', fontsize=14) plt.show() """
from keras.layers import Dense from keras.wrappers.scikit_learn import KerasRegressor from sklearn.model_selection import cross_val_score, KFold def ANNModel(): model = Sequential() model.add(Dense(output_dim = 238, init = 'normal', activation = 'relu', input_dim = 238)) model.add(Dense(output_dim = 100, init = 'normal', activation = 'relu')) model.add(Dense(output_dim = 1, init = 'normal')) model.compile(optimizer = 'adam', loss = 'mean_squared_logarithmic_error') return model seed = 10 np.random.seed(seed) ANNReg = KerasRegressor(build_fn = ANNModel, epochs = 100, batch_size = 5, verbose = 1) kfold = KFold(n_splits=10, random_state=seed) results = cross_val_score(ANNReg, X_train, Y_train, cv=kfold) ANNReg.fit(X_train, Y_train) #Prediction RanRegPred = RanReg.predict(X_val) GBRegPred = GBReg.predict(X_val) XGBRegPred = XGBReg.predict(X_val) ANNRegPred = ANNReg.predict(X_val).ravel() #Checking the RMSLE def rmsle(y, y0): assert len(y) == len(y0) return np.sqrt(np.mean(np.power(np.log1p(y)-np.log1p(y0), 2)))
# split into input (X) and output (Y) variables X = dataset[:, 0:13] Y = dataset[:, 13] # define the model def larger_model(): # create model model = Sequential() model.add(Dense(13, input_dim=13, activation='relu')) model.add(Dense(6, activation='relu')) model.add(Dense(1)) # Compile model model.compile(loss='mean_squared_error', optimizer='adam') return model # evaluate model with standardized dataset estimators = [] estimators.append(('standardize', StandardScaler())) estimators.append(('mlp', KerasRegressor(build_fn=larger_model, epochs=50, batch_size=5, verbose=0))) pipeline = Pipeline(estimators) kfold = KFold(n_splits=10) results = cross_val_score(pipeline, X, Y, cv=kfold) print("Larger: %.2f (%.2f) MSE" % (results.mean(), results.std()))
def keras_mlp1(train2, y, test2, v, z): cname = sys._getframe().f_code.co_name v[cname], z[cname] = 0, 0 from keras import layers from keras import models from keras import optimizers from keras.wrappers.scikit_learn import KerasRegressor scores = list() scaler = preprocessing.RobustScaler() train3 = scaler.fit_transform(train2) test3 = scaler.transform(test2) input_dims = train3.shape[1] def build_model(): input_ = layers.Input(shape=(input_dims,)) model = layers.Dense(256, kernel_initializer='Orthogonal')(input_) #model = layers.BatchNormalization()(model) #model = layers.advanced_activations.PReLU()(model) model = layers.Activation('selu')(model) #model = layers.Dropout(0.7)(model) model = layers.Dense(64, kernel_initializer='Orthogonal')(model) #model = layers.BatchNormalization()(model) model = layers.Activation('selu')(model) #model = layers.advanced_activations.PReLU()(model) #model = layers.Dropout(0.9)(model) model = layers.Dense(16, kernel_initializer='Orthogonal')(model) #model = layers.BatchNormalization()(model) model = layers.Activation('selu')(model) #model = layers.advanced_activations.PReLU()(model) model = layers.Dense(1, activation='sigmoid')(model) model = models.Model(input_, model) model.compile(loss = 'binary_crossentropy', optimizer = optimizers.Nadam()) #print(model.summary(line_length=120)) return model np.random.seed(1234) est = KerasRegressor(build_fn=build_model, nb_epoch=10000, batch_size=256, #verbose=2 ) build_model().summary(line_length=120) model_path = '../data/working/' + cname + '_keras_model.h5' num_splits = 9 ss = model_selection.ShuffleSplit(n_splits=num_splits, random_state=11, test_size=1/num_splits) for n, (itrain, ival) in enumerate(ss.split(train3, y)): xtrain, xval = train3[itrain], train3[ival] ytrain, yval = y[itrain], y[ival] est.fit( xtrain, ytrain, epochs=10000, validation_data=(xval, yval), verbose=0, callbacks=build_keras_fit_callbacks(model_path), shuffle=True ) est.model.load_weights(model_path) p = est.predict(xval) v.loc[ival, cname] += pconvert(p) score = metrics.log_loss(y[ival], p) print(cname, 'fold %d: '%(n+1), score, now()) scores.append(score) z[cname] += pconvert(est.predict(test3)) os.remove(model_path) cv=np.array(scores) print(cv, cv.mean(), cv.std()) z[cname] /= num_splits
downcast='infer') def baseline_model(): # create model model = Sequential() model.add( Dense(5, input_dim=5, kernel_initializer='normal', activation='linear')) model.add(Dense(1, kernel_initializer='normal')) # Compile model model.compile(loss='mean_squared_error', optimizer='adam') return model estimator = KerasRegressor(build_fn=baseline_model, epochs=1000, verbose=0) t0 = time.clock() estimator.fit(X, y) t1 = time.clock() prediction = estimator.predict(X) train_error = np.abs(y - prediction) mean_error = np.mean(train_error) min_error = np.min(train_error) max_error = np.max(train_error) std_error = np.std(train_error) #print('prediction :',prediction) #print('train error :')
# X_FINAL, y_FINAL = X_scaled[remove_inds,:], y_scaled[remove_inds,:] # X_scaled, y_scaled = X_scaled[keep_inds,:], y_scaled[keep_inds,:] #-------------------- #Split data to 90% train & 10% unseen X_train, X_unseen, y_train, y_unseen = train_test_split(X_scaled, y_scaled, test_size=0.10, random_state=32) kf = KFold(n_splits=4, shuffle=True) fig, ax = plt.subplots(1, 1, figsize=(8, 8)) fig2, ax2 = plt.subplots(1, 1, figsize=(8, 8)) for train_index, test_index in kf.split(X_train, y=y_train): model = KerasRegressor(build_fn=baseline_model, epochs=100) history = model.fit(X_train[train_index], y_train[train_index], validation_data=(X_train[test_index], y_train[test_index])) ax.plot(history.history['loss'], label='loss') ax.plot(history.history['val_loss'], label='validation loss') ax.set_ylabel('Loss') ax.set_xlabel('Epoch') ax.legend() ax.minorticks_on() ax.grid(which='major', ls='-', color=[0.15, 0.15, 0.15], alpha=0.15) ax.grid(which='minor', ls=':', dashes=(1, 5, 1, 5),
# fix random seed for reproducibility np.random.seed(SEED) # Multi-class Neural Network def build_model(): clf = Sequential() clf.add(Dense(features.shape[1], activation='relu')) clf.add(Dense(5, activation='relu')) clf.add(Dropout(0.3)) clf.add(Dense(3, activation='relu')) clf.add(Dropout(0.3)) clf.add(Dense(1, kernel_initializer='normal')) clf.compile(optimizer='adam', loss='mean_squared_error') return clf # evaluate model with standardized dataset estimators = [] estimators.append(('standardize', StandardScaler())) estimators.append(('mlp', KerasRegressor(build_fn=build_model, epochs=EPOCHS, batch_size=BATCH_SIZE, verbose=1))) pipeline = Pipeline(estimators) kfold = KFold(n_splits=10) results = cross_val_score(pipeline, features, labels, cv=kfold) print("Standardized: %.2f (%.2f) MSE" % (results.mean(), results.std()))
model = Sequential() model.add(Dense(20, input_dim=398, init='normal', activation='relu')) model.add(Dense(10, init='normal', activation='relu')) model.add(Dense(1, init='normal')) model.compile(loss='mean_squared_error', optimizer = 'adam') return model seed = 7 np.random.seed(seed) scale = StandardScaler() X_train = scale.fit_transform(train_new) X_test = scale.fit_transform(test_new) keras_label = label_df.as_matrix() clf = KerasRegressor(build_fn=base_model, nb_epoch=1000, batch_size=5,verbose=0) clf.fit(X_train,keras_label) #make predictions and create the submission file kpred = clf.predict(X_test) kpred = np.exp(kpred) pred_df = pd.DataFrame(kpred, index=test["Id"], columns=["SalePrice"]) pred_df.to_csv('keras1.csv', header=True, index_label='Id') #simple average y_pred = (y_pred_xgb + y_pred_lasso) / 2 y_pred = np.exp(y_pred) pred_df = pd.DataFrame(y_pred, index=test["Id"], columns=["SalePrice"]) pred_df.to_csv('ensemble1.csv', header=True, index_label='Id')
model = Sequential() # The term "Dense" means layer. When we add extra "Dense()" to the model # that means we are adding extra layers to the neural network model. model.add(Dense(NN1, input_dim=ZZZ, activation='relu')) model.add(Dense(NN2, activation='relu')) model.add(Dense(1, activation='sigmoid')) # Compile model model.compile(loss='mean_squared_error', optimizer='adam') model.summary() dot_img_file = 'model.png' tf.keras.utils.plot_model(model, to_file=dot_img_file, show_shapes=True) return model estimator = KerasRegressor(build_fn=baseline_model, epochs=10, batch_size=256, verbose=0) """""" """""" """ Q12: Make predictions for entire dataset """ """""" """""" estimator.fit(df_normalized, schirmer_norm) predicted_values = estimator.predict(df_normalized) """""" """""" """ Q13: Compute correlation coefficient and mean absolute error (MAE) """ """""" """""" model_corr, _ = pearsonr(schirmer_norm, predicted_values) model_mae = mean_absolute_error(schirmer_norm, predicted_values) print('Pearsons correlation: %.3f' % model_corr) print('Mean absolute error: %.3f' % model_mae)
freq = np.absolute(np.fft.fft(m_fc[:,-32:], axis=1)[:,0:16]) from keras.wrappers.scikit_learn import KerasRegressor from sklearn.model_selection import cross_val_score from sklearn.model_selection import KFold from sklearn.preprocessing import StandardScaler from sklearn.pipeline import Pipeline def whole_fc_m_ann_ensemble(): model = Sequential() model.add(Dense(units=T*3+16, kernel_initializer='normal', activation='relu', input_dim=T*3+16)) model.add(Dense(units=T*3, kernel_initializer='normal', activation='relu')) model.add(Dense(units=T, kernel_initializer='normal')) model.compile(loss='mean_squared_error', optimizer='adam') return model X = np.hstack((h[8:], s[8:], m_fc[:-8], freq[:-8])) Y = m_fc[8:] seed = 7 np.random.seed(seed) estimator = KerasRegressor(build_fn=whole_fc_m_ann_ensemble, epochs=14, batch_size=10, verbose=0) kfold = KFold(n_splits=10, random_state=seed) results = cross_val_score(estimator, X, Y, cv=kfold) print("Results: %.2f (%.2f) MSE" % (results.mean(), results.std())) estimator.fit(X, Y) print(estimator.model.summary()) predicted = estimator.predict(X) plot_summary(h[8:], s[8:], ens[8:], m_fc[8:], predicted, 50) plt.savefig('pics\\keras_clean_test_out.png')
def baseline_model(): # 12 nodes -> 6 nodes -> 1 node # through trial and error by adding nodes, removing layers, and # changing epochs based on where I see the loss asymptote model = Sequential() model.add( Dense(12, input_dim=12, kernel_initializer='normal', activation='relu')) model.add( Dense(6, input_dim=12, kernel_initializer='normal', activation='relu')) model.add(Dense(1, kernel_initializer='normal')) model.compile(loss='mean_squared_error', optimizer='adam') return model estimator = KerasRegressor(build_fn=baseline_model, epochs=28) estimator.fit(X_train, y_train) # In[30]: # create a dataframe containing the results from all the methods df_test = (X_test.join(df_sub_wtb[['normand', 'stull', 'half', 'third']]).assign( **{ 'lreg': visualizer.predict(X_test), 'keras': estimator.predict(X_test) })) df_test['time'] = pd.to_datetime(df_test['year'].astype(str) + df_test['dayofyear'].astype(str) + df_test['hour'].astype(str), format='%Y%j%H')
model.add(Activation('relu')) model.add(Dense(1)) #compile model model.compile(loss='mean_squared_error', optimizer=OPTIMIZER, metrics=['mean_squared_error']) return model # evaluate model with standardized dataset np.random.seed(seed) kreg = KerasRegressor(build_fn=baseline_model, epochs=NB_EPOCH, batch_size=BATCH_SIZE, verbose=VERBOSE) estimators = [] estimators.append(('standardize', StandardScaler())) estimators.append(('mlp', kreg)) pipeline = Pipeline(estimators) kfold = KFold(n_splits=2, random_state=seed) results = cross_val_score(pipeline, X, Y, cv=kfold) print("Error: %.4f (%.4f) MSE" % (results.mean(), results.std())) #denormalize data def denorm(min, max, input): z = (input * (max - min)) + min return z
class SimpleModel: def __init__(self): self.data = dict() self.frame_len = 30 self.predict_dist = 5 self.scaler = dict() def load_all_data(self, begin_date, end_date): con = sqlite3.connect('../data/stock.db') code_list = con.execute("SELECT name FROM sqlite_master WHERE type='table'").fetchall() X_data_list, Y_data_list, DATA_list = [0]*10, [0]*10, [0]*10 idx = 0 split = int(len(code_list) / 9) bar = ProgressBar(len(code_list), max_width=80) for code in code_list: data = self.load_data(code[0], begin_date, end_date) data = data.dropna() X, Y = self.make_x_y(data, code[0]) if len(X) <= 1: continue code_array = [code[0]] * len(X) assert len(X) == len(data.loc[29:len(data)-6, '일자']) if idx%split == 0: X_data_list[int(idx/split)] = list(X) Y_data_list[int(idx/split)] = list(Y) DATA_list[int(idx/split)] = np.array([data.loc[29:len(data)-6, '일자'].values.tolist(), code_array, data.loc[29:len(data)-6, '현재가'], data.loc[34:len(data), '현재가']]).T.tolist() else: X_data_list[int(idx/split)].extend(X) Y_data_list[int(idx/split)].extend(Y) DATA_list[int(idx/split)].extend(np.array([data.loc[29:len(data)-6, '일자'].values.tolist(), code_array, data.loc[29:len(data)-6, '현재가'], data.loc[34:len(data), '현재가']]).T.tolist()) bar.numerator += 1 print("%s | %d" % (bar, len(X_data_list[int(idx/split)])), end='\r') sys.stdout.flush() idx += 1 print("%s" % bar) print("Merge splited data") bar = ProgressBar(10, max_width=80) for i in range(10): if type(X_data_list[i]) == type(1): continue if i == 0: X_data = X_data_list[i] Y_data = Y_data_list[i] DATA = DATA_list[i] else: X_data.extend(X_data_list[i]) Y_data.extend(Y_data_list[i]) DATA.extend(DATA_list[i]) bar.numerator = i+1 print("%s | %d" % (bar, len(DATA)), end='\r') sys.stdout.flush() print("%s | %d" % (bar, len(DATA))) return np.array(X_data), np.array(Y_data), np.array(DATA) def load_data(self, code, begin_date, end_date): con = sqlite3.connect('../data/stock.db') df = pd.read_sql("SELECT * from '%s'" % code, con, index_col='일자').sort_index() data = df.loc[df.index > str(begin_date)] data = data.loc[data.index < str(end_date)] data = data.reset_index() return data def make_x_y(self, data, code): data_x = [] data_y = [] for col in data.columns: try: data.loc[:, col] = data.loc[:, col].str.replace('--', '-') data.loc[:, col] = data.loc[:, col].str.replace('+', '') except AttributeError as e: pass print(e) data.loc[:, 'month'] = data.loc[:, '일자'].str[4:6] data = data.drop(['일자', '체결강도'], axis=1) # normalization data = np.array(data) if len(data) <= 0 : return np.array([]), np.array([]) if code not in self.scaler: self.scaler[code] = StandardScaler() data = self.scaler[code].fit_transform(data) elif code not in self.scaler: return np.array([]), np.array([]) else: data = self.scaler[code].transform(data) for i in range(self.frame_len, len(data)-self.predict_dist+1): data_x.extend(np.array(data[i-self.frame_len:i, :])) data_y.append(data[i+self.predict_dist-1][0]) np_x = np.array(data_x).reshape(-1, 23*30) np_y = np.array(data_y) return np_x, np_y def train_model(self, X_train, Y_train): print("training model %d_%d.pkl" % (self.frame_len, self.predict_dist)) model_name = "../model/simple_reg_model/%d_%d.pkl" % (self.frame_len, self.predict_dist) self.estimator = RandomForestRegressor(random_state=0, n_estimators=100, n_jobs=-1) self.estimator.fit(X_train, Y_train) print("finish training model") joblib.dump(self.estimator, model_name) def set_config(self): #Tensorflow GPU optimization config = tf.ConfigProto() config.gpu_options.allow_growth = True sess = tf.Session(config=config) K.set_session(sess) def train_model_keras(self, X_train, Y_train, date): print("training model %d_%d.h5" % (self.frame_len, self.predict_dist)) model_name = "../model/reg_keras/%d_%d_%s.h5" % (self.frame_len, self.predict_dist, date) self.estimator = KerasRegressor(build_fn=baseline_model, nb_epoch=200, batch_size=64, verbose=1) self.estimator.fit(X_train, Y_train) print("finish training model") # saving model json_model = self.estimator.model.to_json() open(model_name.replace('h5', 'json'), 'w').write(json_model) self.estimator.model.save_weights(model_name, overwrite=True) def evaluate_model(self, X_test, Y_test, orig_data, s_date): print("Evaluate model %d_%d.pkl" % (self.frame_len, self.predict_dist)) if MODEL_TYPE == 'random_forest': model_name = "../model/simple_reg_model/%d_%d.pkl" % (self.frame_len, self.predict_dist) self.estimator = joblib.load(model_name) elif MODEL_TYPE == 'keras': model_name = "../model/reg_keras/%d_%d_%s.h5" % (self.frame_len, self.predict_dist, s_date) self.estimator = model_from_json(open(model_name.replace('h5', 'json')).read()) self.estimator.load_weights(model_name) pred = self.estimator.predict(X_test) res = 0 score = 0 assert(len(pred) == len(Y_test)) pred = np.array(pred).reshape(-1) Y_test = np.array(Y_test).reshape(-1) for i in range(len(pred)): score += (float(pred[i]) - float(Y_test[i]))*(float(pred[i]) - float(Y_test[i])) score = np.sqrt(score/len(pred)) print("score: %f" % score) for idx in range(len(pred)): buy_price = int(orig_data[idx][2]) future_price = int(orig_data[idx][3]) date = int(orig_data[idx][0]) pred_transform = self.scaler[orig_data[idx][1]].inverse_transform([pred[idx]] + [0]*22)[0] cur_transform = self.scaler[orig_data[idx][1]].inverse_transform([X_test[idx][23*29]] + [0]*22)[0] if pred_transform > buy_price * 1.01: res += (future_price - buy_price*1.005)*(100000/buy_price+1) print("[%s] buy: %6d, sell: %6d, earn: %6d" % (str(date), buy_price, future_price, (future_price - buy_price*1.005)*(100000/buy_price))) print("result: %d" % res) def load_current_data(self): con = sqlite3.connect('../data/stock.db') code_list = con.execute("SELECT name FROM sqlite_master WHERE type='table'").fetchall() X_test = [] DATA = [] code_list = list(map(lambda x: x[0], code_list)) first = True bar = ProgressBar(len(code_list), max_width=80) for code in code_list: bar.numerator += 1 print("%s | %d" % (bar, len(X_test)), end='\r') sys.stdout.flush() df = pd.read_sql("SELECT * from '%s'" % code, con, index_col='일자').sort_index() data = df.iloc[-30:,:] data = data.reset_index() for col in data.columns: try: data.loc[:, col] = data.loc[:, col].str.replace('--', '-') data.loc[:, col] = data.loc[:, col].str.replace('+', '') except AttributeError as e: pass data.loc[:, 'month'] = data.loc[:, '일자'].str[4:6] data = data.drop(['일자', '체결강도'], axis=1) if len(data) < 30: code_list.remove(code) continue DATA.append(int(data.loc[len(data)-1, '현재가'])) try: data = self.scaler[code].transform(np.array(data)) except KeyError: code_list.remove(code) continue X_test.extend(np.array(data)) X_test = np.array(X_test).reshape(-1, 23*30) return X_test, code_list, DATA def make_buy_list(self, X_test, code_list, orig_data, s_date): BUY_UNIT = 10000 print("make buy_list") if MODEL_TYPE == 'random_forest': model_name = "../model/simple_reg_model/%d_%d.pkl" % (self.frame_len, self.predict_dist) self.estimator = joblib.load(model_name) elif MODEL_TYPE == 'keras': model_name = "../model/reg_keras/%d_%d_%s.h5" % (self.frame_len, self.predict_dist, s_date) self.estimator = model_from_json(open(model_name.replace('h5', 'json')).read()) self.estimator.load_weights(model_name) pred = self.estimator.predict(X_test) res = 0 score = 0 pred = np.array(pred).reshape(-1) # load code list from account set_account = set([]) with open('../data/stocks_in_account.txt') as f_stocks: for line in f_stocks.readlines(): data = line.split(',') set_account.add(data[6].replace('A', '')) buy_item = ["매수", "", "시장가", 0, 0, "매수전"] # 매수/매도, code, 시장가/현재가, qty, price, "주문전/주문완료" with open("../data/buy_list.txt", "wt") as f_buy: for idx in range(len(pred)): real_buy_price = int(orig_data[idx]) buy_price = float(X_test[idx][23*29]) try: pred_transform = self.scaler[code_list[idx]].inverse_transform([pred[idx]] + [0]*22)[0] except KeyError: continue print("[BUY PREDICT] code: %s, cur: %5d, predict: %5d" % (code_list[idx], real_buy_price, pred_transform)) if pred_transform > real_buy_price * 3 and code_list[idx] not in set_account: print("add to buy_list %s" % code_list[idx]) buy_item[1] = code_list[idx] buy_item[3] = int(BUY_UNIT / real_buy_price) + 1 for item in buy_item: f_buy.write("%s;"%str(item)) f_buy.write('\n') def load_data_in_account(self): # load code list from account DATA = [] with open('../data/stocks_in_account.txt') as f_stocks: for line in f_stocks.readlines(): data = line.split(',') DATA.append([data[6].replace('A', ''), data[1], data[0]]) # load data in DATA con = sqlite3.connect('../data/stock.db') X_test = [] idx_rm = [] first = True bar = ProgressBar(len(DATA), max_width=80) for idx, code in enumerate(DATA): bar.numerator += 1 print("%s | %d" % (bar, len(X_test)), end='\r') sys.stdout.flush() try: df = pd.read_sql("SELECT * from '%s'" % code[0], con, index_col='일자').sort_index() except pd.io.sql.DatabaseError as e: print(e) idx_rm.append(idx) continue data = df.iloc[-30:,:] data = data.reset_index() for col in data.columns: try: data.loc[:, col] = data.loc[:, col].str.replace('--', '-') data.loc[:, col] = data.loc[:, col].str.replace('+', '') except AttributeError as e: pass print(e) data.loc[:, 'month'] = data.loc[:, '일자'].str[4:6] DATA[idx].append(int(data.loc[len(data)-1, '현재가'])) data = data.drop(['일자', '체결강도'], axis=1) if len(data) < 30: idx_rm.append(idx) continue try: data = self.scaler[code[0]].transform(np.array(data)) except KeyError: idx_rm.append(idx) continue X_test.extend(np.array(data)) for i in idx_rm[-1:0:-1]: del DATA[i] X_test = np.array(X_test).reshape(-1, 23*30) return X_test, DATA def make_sell_list(self, X_test, DATA, s_date): print("make sell_list") if MODEL_TYPE == 'random_forest': model_name = "../model/simple_reg_model/%d_%d.pkl" % (self.frame_len, self.predict_dist) self.estimator = joblib.load(model_name) elif MODEL_TYPE == 'keras': model_name = "../model/reg_keras/%d_%d_%s.h5" % (self.frame_len, self.predict_dist, s_date) self.estimator = model_from_json(open(model_name.replace('h5', 'json')).read()) self.estimator.load_weights(model_name) pred = self.estimator.predict(X_test) res = 0 score = 0 pred = np.array(pred).reshape(-1) sell_item = ["매도", "", "시장가", 0, 0, "매도전"] # 매수/매도, code, 시장가/현재가, qty, price, "주문전/주문완료" with open("../data/sell_list.txt", "wt") as f_sell: for idx in range(len(pred)): current_price = float(X_test[idx][23*29]) current_real_price = int(DATA[idx][3]) name = DATA[idx][2] print("[SELL PREDICT] name: %s, code: %s, cur: %f(%d), predict: %f" % (name, DATA[idx][0], current_price, current_real_price, pred[idx])) if pred[idx] < current_price: print("add to sell_list %s" % name) sell_item[1] = DATA[idx][0] sell_item[3] = DATA[idx][1] for item in sell_item: f_sell.write("%s;"%str(item)) f_sell.write('\n') def save_scaler(self, s_date): model_name = "../model/scaler_%s.pkl" % s_date joblib.dump(self.scaler, model_name) def load_scaler(self, s_date): model_name = "../model/scaler_%s.pkl" % s_date self.scaler = joblib.load(model_name)
def baseline_model(): # create model model = Sequential() model.add( Dense(20, input_dim=train_x.shape[1], kernel_initializer='uniform', activation='softplus')) model.add(Dense(1, kernel_initializer='uniform', activation='relu')) # Compile model model.compile(loss='mse', optimizer='Nadam', metrics=['mse']) # model.compile(loss='mean_squared_error', optimizer='adam') return model estimator = KerasRegressor(build_fn=baseline_model, verbose=1, epochs=5, batch_size=55000) estimator.fit(train_x, train_y) pred_test = estimator.predict(test_x) preds.append(pred_test) run = time.perf_counter() - start print('{} runs for {:.2f} seconds.'.format('lightgbm', run)) cur_month_run_total = time.perf_counter() - start_cur_month print('Total running time was {:.2f} minutes.'.format(cur_month_run_total / 60)) print('-' * 50)
def LSTM_Model(n_feat): return KerasRegressor(build_fn=(lambda: LSTM_Model_gen(n_feat)), verbose=0, batch_size=8, epochs=50)
def get_model_from_name(model_name, training_params=None, is_hp_search=False): global keras_imported # For Keras epochs = 1000 # if os.environ.get('is_test_suite', 0) == 'True' and model_name[:12] == 'DeepLearning': # print('Heard that this is the test suite. Limiting number of epochs, which will increase training speed dramatically at the expense of model accuracy') # epochs = 100 all_model_params = { 'LogisticRegression': {}, 'RandomForestClassifier': { 'n_jobs': -2, 'n_estimators': 30 }, 'ExtraTreesClassifier': { 'n_jobs': -1 }, 'AdaBoostClassifier': {}, 'SGDClassifier': { 'n_jobs': -1 }, 'Perceptron': { 'n_jobs': -1 }, 'LinearSVC': { 'dual': False }, 'LinearRegression': { 'n_jobs': -2 }, 'RandomForestRegressor': { 'n_jobs': -2, 'n_estimators': 30 }, 'LinearSVR': { 'dual': False, 'loss': 'squared_epsilon_insensitive' }, 'ExtraTreesRegressor': { 'n_jobs': -1 }, 'MiniBatchKMeans': { 'n_clusters': 8 }, 'GradientBoostingRegressor': { 'presort': False, 'learning_rate': 0.1, 'warm_start': True }, 'GradientBoostingClassifier': { 'presort': False, 'learning_rate': 0.1, 'warm_start': True }, 'SGDRegressor': { 'shuffle': False }, 'PassiveAggressiveRegressor': { 'shuffle': False }, 'AdaBoostRegressor': {}, 'LGBMRegressor': { 'n_estimators': 2000, 'learning_rate': 0.15, 'num_leaves': 8, 'lambda_l2': 0.001, 'histogram_pool_size': 16384 }, 'LGBMClassifier': { 'n_estimators': 2000, 'learning_rate': 0.15, 'num_leaves': 8, 'lambda_l2': 0.001, 'histogram_pool_size': 16384 }, 'DeepLearningRegressor': { 'epochs': epochs, 'batch_size': 50, 'verbose': 2 }, 'DeepLearningClassifier': { 'epochs': epochs, 'batch_size': 50, 'verbose': 2 }, 'CatBoostRegressor': {}, 'CatBoostClassifier': {} } # if os.environ.get('is_test_suite', 0) == 'True': # all_model_params model_params = all_model_params.get(model_name, None) if model_params is None: model_params = {} if is_hp_search == True: if model_name[:12] == 'DeepLearning': model_params['epochs'] = 50 if model_name[:4] == 'LGBM': model_params['n_estimators'] = 500 if training_params is not None: print('Now using the model training_params that you passed in:') print(training_params) # Overwrite our stock params with what the user passes in (i.e., if the user wants 10,000 trees, we will let them do it) model_params.update(training_params) print( 'After overwriting our defaults with your values, here are the final params that will be used to initialize the model:' ) print(model_params) model_map = { # Classifiers 'LogisticRegression': LogisticRegression(), 'RandomForestClassifier': RandomForestClassifier(), 'RidgeClassifier': RidgeClassifier(), 'GradientBoostingClassifier': GradientBoostingClassifier(), 'ExtraTreesClassifier': ExtraTreesClassifier(), 'AdaBoostClassifier': AdaBoostClassifier(), 'LinearSVC': LinearSVC(), # Regressors 'LinearRegression': LinearRegression(), 'RandomForestRegressor': RandomForestRegressor(), 'Ridge': Ridge(), 'LinearSVR': LinearSVR(), 'ExtraTreesRegressor': ExtraTreesRegressor(), 'AdaBoostRegressor': AdaBoostRegressor(), 'RANSACRegressor': RANSACRegressor(), 'GradientBoostingRegressor': GradientBoostingRegressor(), 'Lasso': Lasso(), 'ElasticNet': ElasticNet(), 'LassoLars': LassoLars(), 'OrthogonalMatchingPursuit': OrthogonalMatchingPursuit(), 'BayesianRidge': BayesianRidge(), 'ARDRegression': ARDRegression(), # Clustering 'MiniBatchKMeans': MiniBatchKMeans(), } try: model_map['SGDClassifier'] = SGDClassifier(max_iter=1000, tol=0.001) model_map['Perceptron'] = Perceptron(max_iter=1000, tol=0.001) model_map['PassiveAggressiveClassifier'] = PassiveAggressiveClassifier( max_iter=1000, tol=0.001) model_map['SGDRegressor'] = SGDRegressor(max_iter=1000, tol=0.001) model_map['PassiveAggressiveRegressor'] = PassiveAggressiveRegressor( max_iter=1000, tol=0.001) except TypeError: model_map['SGDClassifier'] = SGDClassifier() model_map['Perceptron'] = Perceptron() model_map['PassiveAggressiveClassifier'] = PassiveAggressiveClassifier( ) model_map['SGDRegressor'] = SGDRegressor() model_map['PassiveAggressiveRegressor'] = PassiveAggressiveRegressor() if xgb_installed: model_map['XGBClassifier'] = XGBClassifier() model_map['XGBRegressor'] = XGBRegressor() if lgb_installed: model_map['LGBMRegressor'] = LGBMRegressor() model_map['LGBMClassifier'] = LGBMClassifier() if catboost_installed: model_map['CatBoostRegressor'] = CatBoostRegressor( calc_feature_importance=True) model_map['CatBoostClassifier'] = CatBoostClassifier( calc_feature_importance=True) if model_name[:12] == 'DeepLearning': if keras_imported == False: # Suppress some level of logs if TF is installed (but allow it to not be installed, and use Theano instead) try: os.environ['TF_CPP_MIN_VLOG_LEVEL'] = '3' os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' from tensorflow import logging logging.set_verbosity(logging.INFO) except: pass global maxnorm global Dense, Dropout global LeakyReLU, PReLU, ThresholdedReLU, ELU global Sequential global keras_load_model global regularizers, optimizers global Activation global KerasRegressor, KerasClassifier from keras.constraints import maxnorm from keras.layers import Activation, Dense, Dropout from keras.layers.advanced_activations import LeakyReLU, PReLU, ThresholdedReLU, ELU from keras.models import Sequential from keras.models import load_model as keras_load_model from keras import regularizers, optimizers from keras.wrappers.scikit_learn import KerasRegressor, KerasClassifier keras_imported = True model_map['DeepLearningClassifier'] = KerasClassifier( build_fn=make_deep_learning_classifier) model_map['DeepLearningRegressor'] = KerasRegressor( build_fn=make_deep_learning_model) try: model_without_params = model_map[model_name] except KeyError as e: print( 'It appears you are trying to use a library that is not available when we try to import it, or using a value for model_names that we do not recognize' ) raise (e) if os.environ.get('is_test_suite', False) == 'True': if 'n_jobs' in model_params: model_params['n_jobs'] = 1 model_with_params = model_without_params.set_params(**model_params) return model_with_params
batch_size = 1000 print('Epochs: ', epochs) print('Batch size: ', batch_size) keras_callbacks = [ # ModelCheckpoint('/tmp/keras_checkpoints/model.{epoch:02d}-{val_loss:.2f}.hdf5', monitor='val_loss', save_best_only=True, verbose=2) # ModelCheckpoint('/tmp/keras_checkpoints/model.{epoch:02d}.hdf5', monitor='val_loss', save_best_only=True, verbose=0) # TensorBoard(log_dir='/tmp/keras_logs/model_3', histogram_freq=0, write_graph=True, write_images=True, embeddings_freq=0, embeddings_layer_names=None, embeddings_metadata=None), EarlyStopping(monitor='val_mean_absolute_error', patience=80, verbose=0) # 20 ] print(x_train.shape) #keras.wrappers.scikit_learn.KerasRegressor from keras.wrappers.scikit_learn import KerasRegressor model = KerasRegressor(build_fn=make_model, epochs=epochs, batch_size=batch_size, verbose=True, callbacks=keras_callbacks) model.fit(x_train, y_train) ''' history = model.fit(x_train, y_train, batch_size=batch_size, epochs=epochs, shuffle=True, verbose=2,#0, # Change it to 2, if wished to observe execution #validation_data=(arr_x_valid, arr_y_valid), callbacks=keras_callbacks) ''' y_pred = model.predict(x_test[:20,]) print (y_pred) print (y_test[:20])
# create model model = Sequential() model.add( Dense(7, input_dim=7, kernel_initializer='normal', activation='relu')) model.add(Dense(4, kernel_initializer='normal')) # Compile model model.compile(loss='mean_absolute_error', optimizer='adam') return model # fix random seed for reproducibility seed = 7 np.random.seed(seed) # evaluate model with standardized dataset estimator = KerasRegressor(build_fn=baseline_model, epochs=3000, batch_size=8474, verbose=0) kfold = KFold(n_splits=10, random_state=seed) results = cross_val_score(estimator, X, y, cv=kfold) print("Results: %.2f (%.2f) MSE" % (results.mean(), results.std())) # example of training a final regression model from sklearn.linear_model import LinearRegression from sklearn.datasets import make_regression # generate regression dataset X, y = make_regression(n_samples=100, n_features=2, noise=0.1) # fit final model model = LinearRegression() model.fit(X, y) # new instances where we do not know the answer
activation='relu')) model.add( Dense(12, input_dim=12, kernel_initializer='normal', activation='relu')) model.add( Dense(12, input_dim=12, kernel_initializer='normal', activation='relu')) model.add(Dense(1, kernel_initializer='normal')) # compile model model.compile(loss='mean_squared_error', optimizer='adam') return model # fix random seed for reproducibility seed = 7 numpy.random.seed(seed) x_train, x_test, y_train, y_test = train_test_split(X, Y) # evaluate model with standardized dataset estimator = KerasRegressor(build_fn=baseline_model, nb_epoch=1, batch_size=5, verbose=0) kfold = KFold(n_splits=30, random_state=seed) results = cross_val_score(estimator, X, Y, cv=kfold) print("Results: %.2f (%.2f) MSE" % (results.mean(), results.std())) estimator.fit(x_train, y_train) y_pred = estimator.predict(x_test) # print(y_pred.shape()) fnc.errors(y_test, y_pred)
regressor.add( Dense(units=nb_units, kernel_initializer='uniform', activation='relu', input_dim=325)) regressor.add( Dense(units=nb_units, kernel_initializer='uniform', activation='relu')) regressor.add( Dense(units=1, kernel_initializer='uniform', activation='linear')) regressor.compile(optimizer='adam', loss='mae', metrics=['mse', 'mae', 'mape']) return regressor grid_regressor = KerasRegressor(build_fn=build_regressor_for_grid) parameters = { 'batch_size': [30, 50, 100], 'epochs': [10, 30], 'regressor': ['adam'], 'nb_units': [100, 150, 200] } grid_search = GridSearchCV(estimator=grid_regressor, param_grid=parameters) grid_search = grid_search.fit(X_train, y_train) best_parameters = grid_search.best_params_ best_accuracy = grid_search.best_score_ ### Build one ANN def build_regressor(): regressor = Sequential()
class NNNBA: """ NNNBA class, which contains all the calculated information """ default_model_type = "lasso" assumed_max_salary = 35350000.0 __threshold_per_col = { "OFF_RATING": 12, "PIE": 0.11, "NET_RATING": 18, "GP": 50, "DEF_RATING": 7, "USG_PCT": 0.12, "FGA": None, "FGM": None, "FG3A": None, "PTS": None, "FTM": None, "FGM": None, "REB_PCT": None, "AGE": 4 } __outlier_cols_upper = [ ] #["OFF_RATING", "PIE", "NET_RATING", "USG_PCT", "PTS"] __outlier_cols_lower = [] #["DEF_RATING"] __ridge_init_alpha = [0.01, 0.03, 0.06, 0.1, 0.3, 0.6, 1, 3, 6, 10, 30, 60] __lasso_init_alpha = [ 0.0001, 0.0003, 0.0006, 0.001, 0.003, 0.006, 0.01, 0.03, 0.06, 0.1, 0.3, 0.6, 1 ] __elasticnet_init = { "l1_ratio": [0.1, 0.3, 0.5, 0.6, 0.7, 0.8, 0.85, 0.9, 0.95, 1], "alpha": [ 0.0001, 0.0003, 0.0006, 0.001, 0.003, 0.006, 0.01, 0.03, 0.06, 0.1, 0.3, 0.6, 1, 3, 6 ] } def __realpha__(self, alpha): """ Function to recalculate alpha """ return [ alpha * .6, alpha * .65, alpha * .7, alpha * .75, alpha * .8, alpha * .85, alpha * .9, alpha * .95, alpha, alpha * 1.05, alpha * 1.1, alpha * 1.15, alpha * 1.25, alpha * 1.3, alpha * 1.35, alpha * 1.4 ] def __reratio__(self, ratio): """ Function to recalculate ratio """ return [ ratio * .85, ratio * .9, ratio * .95, ratio, ratio * 1.05, ratio * 1.1, ratio * 1.15 ] def __baseline_model__(): """ Base Neural Network model """ input = 39 model = Sequential() model.add( Dense(input, input_dim=input, kernel_initializer='normal', activation='relu')) model.add( Dense(int(input / 2), kernel_initializer='normal', activation='relu')) model.add(Dense(input, kernel_initializer='normal', activation='relu')) model.add( Dense(int(input / 2), kernel_initializer='normal', activation='relu')) model.add( Dense(int(input / 4), kernel_initializer='normal', activation='relu')) model.add(Dense(1, kernel_initializer='normal')) model.compile(loss='mean_squared_error', optimizer='adam') return model def __idx_of_median_outlier__(self, col, threshold=None, upper_outlier=True): #may need threshold=2 """ Find index of outlier based on distance from median Distance from median = threshold, which is either passed in or calculated as a function of std from the passed in data """ if threshold is None: threshold = col.std() * 2.5 logger.debug("median: " + str(col.median()) + " threshold: " + str(threshold)) diff = col - col.median() if upper_outlier: outlier = diff > threshold else: outlier = -1 * diff > threshold return list(outlier.index[outlier]) models = { "linear regression": linear_model.LinearRegression(fit_intercept=True), "ridge": linear_model.RidgeCV(alphas=__ridge_init_alpha, fit_intercept=True), "lasso": linear_model.LassoCV(alphas=__lasso_init_alpha, max_iter=5000, cv=10, fit_intercept=True), "bayes ridge": linear_model.BayesianRidge(), "keras regressor": KerasRegressor(build_fn=__baseline_model__, nb_epoch=100, batch_size=5, verbose=0), "xgb": xgb.XGBRegressor(n_estimators=1500, max_depth=2, learning_rate=0.01), "elasticnet": linear_model.ElasticNetCV(l1_ratio=__elasticnet_init["l1_ratio"], alphas=__elasticnet_init["alpha"], max_iter=1000, cv=3), "theilsen": linear_model.TheilSenRegressor(), "polynomial": Pipeline([('poly', PolynomialFeatures(degree=2)), ('linear', linear_model.LinearRegression(fit_intercept=True)) ]) } def __remodel__(self, model_type, regr, __X_train, __Y_train): """ Function to retrain certain models based on optimal alphas and/or ratios """ if model_type == "ridge": alpha = regr.alpha_ regr = linear_model.RidgeCV(alphas=self.__realpha__(alpha), cv=10) elif model_type == "lasso": alpha = regr.alpha_ regr = linear_model.LassoCV(alphas=self.__realpha__(alpha), max_iter=5000, cv=10) elif model_type == "elasticnet": alpha = regr.alpha_ ratio = regr.l1_ratio_ regr = linear_model.ElasticNetCV( l1_ratio=self.__reratio__(ratio), alphas=self.__elasticnet_init["alpha"], max_iter=1000, cv=3) regr.fit(__X_train, __Y_train) return regr def __normalize_salary__( self, col, max_salary=assumed_max_salary ): # scales out to max contract; max taken from https://www.hoopsrumors.com/2017/05/nba-maximum-salary-projections-for-201718.html """ Function to normalize salary so that the max is maximum salary possible, as yoy max salary changes """ min_salary = min(col) local_max_salary = max(col) return max_salary - (local_max_salary - col) / ( local_max_salary - min_salary) * (max_salary - min_salary) def __init__(self, debug=False): logger.setLevel(logging.DEBUG if debug else logging.ERROR) with open("crawled_data/raw_data.json", "r") as data_file: raw_data = json.load(data_file) columns = raw_data[0]["header"] unique_columns = list(set(raw_data[0]["header"])) position_names = [ "Point Guard", "Shooting Guard", "Small Forward", "Power Forward", "Center" ] positions = [] for i, val in enumerate(position_names): positions.append((val, i)) positions_convert = dict(positions) self.X_df = pd.DataFrame(columns=columns) Y_df = pd.DataFrame(columns=["SALARIES"]) age = [] positions_df = pd.DataFrame(columns=position_names) names = pd.DataFrame(columns=["NAME", "PROJECTED_SALARIES"]) logger.debug("Processing data") for i, player in enumerate(raw_data): if "2016_17" in player["salaries"] and "2016-17" in player["stats"]: Y_df.loc[len(Y_df)] = player["salaries"]["2016_17"] self.X_df.loc[len(self.X_df)] = player["stats"]["2016-17"] age.append(player["age"]) positions_df.loc[len(positions_df)] = [0, 0, 0, 0, 0] for position in player["positions"]: positions_df[position][len(positions_df)] = 1 projected_salaries = 0 try: projected_salaries = player["projected_salaries"][0] except: pass names.loc[len(names)] = [player["name"], projected_salaries] else: continue for col in []: try: self.X_df[col] = np.tanh(self.X_df[col]) except: pass self.X_df = self.X_df.T.drop_duplicates().T self.X_df = pd.concat( [self.X_df, pd.Series(age, name="AGE"), positions_df], axis=1) self.X_df = self.X_df.drop([ "FGA", "L", "AGE", "PCT_TOV", "BLKA", "AST_PCT", "AST_RATIO", "OREB_PCT", "DREB_PCT", "REB_PCT", "TM_TOV_PCT", "PACE", "OPP_PTS_OFF_TOV", "OPP_PTS_FB", "OPP_PTS_PAINT", 'OPP_PTS_2ND_CHANCE', 'OPP_PTS_FB', 'PCT_FGA_2PT', 'PCT_FGA_3PT', 'PCT_PTS_2PT', 'PCT_PTS_2PT_MR', 'PCT_PTS_3PT', 'PCT_PTS_FB', 'PCT_PTS_FT', 'PCT_PTS_OFF_TOV', 'PCT_PTS_PAINT', 'PCT_AST_2PM', 'PCT_UAST_2PM', 'PCT_AST_3PM', 'PCT_UAST_3PM', 'PCT_AST_FGM', 'PCT_UAST_FGM', 'PCT_FGM', 'PCT_FGA', 'PCT_FG3M', 'PCT_FG3A', 'PCT_FTM', 'PCT_FTA', 'PCT_OREB', 'PCT_DREB', 'PCT_REB', 'PCT_AST', 'PCT_STL', 'PCT_BLK', 'PCT_BLKA', 'PTS_OFF_TOV', 'PTS_FB', 'PTS_PAINT' ], 1) logger.debug("Columns: " + ", ".join(self.X_df.columns)) # remove players who's played less than 15 games idx_of_lt_gp = self.X_df.index[(self.X_df["GP"] < 15)] self.X_df = self.X_df.drop(idx_of_lt_gp) Y_df = Y_df.drop(idx_of_lt_gp) age = pd.Series(age).drop(idx_of_lt_gp) positions_df = positions_df.drop(idx_of_lt_gp) names = names.drop(idx_of_lt_gp) # Remove outliers logger.debug("Remove outliers") X_train = self.X_df.copy() Y_train = Y_df.copy() logger.debug("No of rows before removing outliers: " + str(X_train.shape[0])) to_be_dropped = [] ## remove upper for col in self.__outlier_cols_upper: logger.debug(col) idx_of_median_outlier = self.__idx_of_median_outlier__( X_train[col], self.__threshold_per_col[col]) logger.debug( col + " should drop " + ", ".join(names["NAME"][idx_of_median_outlier].values)) to_be_dropped = to_be_dropped + idx_of_median_outlier ## remove lower for col in self.__outlier_cols_lower: logger.debug(col) idx_of_median_outlier = self.__idx_of_median_outlier__( X_train[col], self.__threshold_per_col[col], upper_outlier=False) logger.debug( col + " should drop " + ", ".join(names["NAME"][idx_of_median_outlier].values)) to_be_dropped = to_be_dropped + idx_of_median_outlier to_be_dropped = list(set(to_be_dropped)) logger.debug("Outliers: " + ", ".join(names["NAME"][to_be_dropped].values)) X_train = X_train.drop(to_be_dropped) Y_train = Y_train.drop(to_be_dropped) logger.debug("No of rows after removing outliers: " + str(X_train.shape)) logger.debug("No of rows after removing outliers: " + str(Y_train.shape)) __X_train = X_train.values # training data only includes non-rookies __Y_train = np.log1p(Y_train["SALARIES"].values) # y = log(1+y) self.Y_df = Y_df self.model_results = {} self.names = names for model_type, regr in self.models.items(): logger.debug("Started " + model_type) this_results = names.copy() regr.fit(__X_train, __Y_train) regr = self.__remodel__(model_type, regr, __X_train, __Y_train) results = self.__normalize_salary__( np.expm1(regr.predict(self.X_df.values))) # y = exp(y) - 1 this_results['WORTH'] = results diffY = this_results["PROJECTED_SALARIES"].values - results this_results['SALARY_DIFF'] = diffY this_results = this_results.sort_values(by="SALARY_DIFF", ascending=False) self.models[model_type] = regr self.model_results[model_type] = this_results logger.debug("Finished " + model_type) #get avg this_results = self.model_results["linear regression"].copy() this_results["WORTH"] = self.__normalize_salary__( (1. * self.model_results["bayes ridge"]["WORTH"] + 1. * self.model_results["lasso"]["WORTH"] + 1. * self.model_results["elasticnet"]["WORTH"]) / 3) diffY = this_results["PROJECTED_SALARIES"].values - this_results[ "WORTH"] this_results['SALARY_DIFF'] = diffY self.model_results["avg"] = this_results def getUndervalued(self, model_type=default_model_type): names = self.model_results[model_type] print(names.loc[(names["SALARY_DIFF"] < 0) & (names["PROJECTED_SALARIES"] > 0)]) def getPlayerValue(self, player_name, model_type=default_model_type): names = self.model_results[model_type] idx = names[names["NAME"] == player_name].index[0] print("\nPaid: " + '${:,.2f}'.format(float(self.Y_df.loc[idx]["SALARIES"])) + "\tFuture Salary: " + '${:,.2f}'.format(float(self.names["PROJECTED_SALARIES"][idx])) + "\tWorth: " + '${:,.2f}'.format(float(names["WORTH"][idx])) + "\n") self.getPlayerStats(player_name, trim=True) def getPlayerStats(self, player_name, trim=False): columns = self.X_df.columns if trim: columns = columns[:30] print(self.X_df.loc[self.names["NAME"] == player_name, columns]) def getMostValuablePlayers(self, model_type=default_model_type): names = self.model_results[model_type] print(names.sort_values(by="WORTH")) def showAvailableModels(self): for model in self.models: print(model) def getPlayerNameByIndex(self, index): return self.names[self.name.index == index] def getCoefFromModel(self, model_type=default_model_type): return pd.DataFrame(self.models[model_type].coef_, index=self.X_df.columns, columns=["coef"]).sort_values(by="coef") def plotXCol(self, col_name, X=None): import matplotlib.pyplot as plt if X is None: X = self.X_df.sort_values(by=col_name)[col_name].values plt.figure() plt.scatter(range(len(X)), X) plt.show()
import predictor import prepare_data import model_builder if __name__ == "__main__": print("Grid searching!") #get home path root_dir = os.path.dirname(os.path.realpath(__file__)) x, y, sc_X, sc_Y = prepare_data.training( os.path.join(root_dir, "data", "results.csv")) # create model model = KerasRegressor(build_fn=model_builder.create_model, verbose=1, feature_count=len(x[0]), output_count=len(y[0])) # grid search epochs, batch size and optimizer optimizers = ['rmsprop'] #, 'adam'] init = ['glorot_uniform'] #, 'normal', 'uniform'] epochs = [1000, 5000, 10000] batches = [50] hidden_layer_counts = [1, 2, 3] param_grid = dict(optimizer=optimizers, epochs=epochs, batch_size=batches, hidden_layer_count=hidden_layer_counts, init=init) grid = GridSearchCV(estimator=model, param_grid=param_grid) grid_result = grid.fit(x, y)
class ML_TimeSeries(ML_Base): def __init__(self, **kwargs): super().__init__(**kwargs) self._logger.info("{0} initializing...".format(self.__class__.__name__)) self._out_dim1 = kwargs.get('out_dim1',30) self._nb_epoch = kwargs.get('nb_epoch',150) self._batch_size = kwargs.get('batch_size',100) self._params = {'out_dim1': kwargs.get('out_dim1',60), 'nb_epoch': kwargs.get('nb_epoch',1000), 'batch_size': kwargs.get('batch_size',100)} self._maxlen = kwargs.get('maxlen', 3) config = tf.ConfigProto(gpu_options=tf.GPUOptions(visible_device_list="0", allow_growth=True)) sess = tf.Session(config=config) K.set_session(sess) self._logger.info("{0} initialized.".format(self.__class__.__name__)) @property def maxlen(self): return self._maxlen def _create_ts_data(self, target_data): ts_data = [] for i in range(self._maxlen, len(target_data)+1): ts_data.append(np.array(target_data.iloc[i-self._maxlen:i])) #for i in range(len(target_data)-self._maxlen): # ts_data.append(np.array(target_data.iloc[i:i+self._maxlen])) return np.array(ts_data) def learn(self, training_data, training_label, tunes_param=False): seed = 1234 self._input_dim = training_data.shape[1] np.random.seed(seed) ts_training_data = self._create_ts_data(training_data) ts_training_label = self._create_ts_data(training_label) # evaluate model with standardized dataset if self._is_regression: self._model = KerasRegressor(build_fn=self._create_model, verbose=1, input_dim=training_data.shape[1], **self._params) hist = self._model.fit(ts_training_data, ts_training_label, callbacks=[EarlyStopping(monitor='loss', patience=1, verbose=0)] , batch_size=self._batch_size , epochs=self._nb_epoch , validation_split = 0.2 ) else: self._model = self._create_model(input_dim=training_data.shape[1], out_dim1=self._params['out_dim1']) hist = self._model.fit(ts_training_data, ts_training_label , callbacks=[EarlyStopping(monitor='loss' ,patience=1 ,verbose=0)] , batch_size=self._batch_size , nb_epoch=self._nb_epoch #, validation_split = 0.2 ) #import matplotlib.pyplot as plt #plt.plot(hist.history['loss']) #import pdb;pdb.set_trace() def predict_one(self, test_data): if self._is_regression: return float(self._model.predict(test_data)[-1]) else: predicted = self._model.predict(test_data) return 1 if predicted[0][-1][0] > 0 else 0 def predict(self, test_data): if type(test_data) == np.ndarray: ts_test_data = test_data else: ts_test_data = self._create_ts_data(test_data) if self._is_regression: return super().predict(ts_test_data)[:,-1] else: predicted = self._model.predict(ts_test_data) #import pdb;pdb.set_trace() return [1 if predicted[i][-1] > 0.5 else 0 for i in range(len(predicted))] #return [1 if predicted[i][0] > predicted[i][1] else 0 # for i in range(len(predicted))] def predict_one_proba(self, test_data): proba = self._model.predict_proba(test_data)[0][-1] return [proba, 1-proba] def _encode_one_hot(self, label): return np.array([[1,0] if label.Return.iloc[i] > 0.0 else [0,1] for i in range(label.shape[0])]) def _change_label_format(self, label_data): return np.matrix([[1,0] if label_data[i] == 0 else [0,1] for i in range(len(label_data))]) def dispose(self): super().dispose() K.clear_session()
#============================================================================== #============================================================================== # # # evaluate model with standardized dataset # np.random.seed(seed) # estimators = [] # estimators.append(('standardize', StandardScaler())) # estimators.append(('mlp', KerasRegressor(build_fn=baseline_model, epochs=50, batch_size=5, verbose=0))) # pipeline = Pipeline(estimators) # kfold = KFold(n_splits=10, random_state=seed) # results = cross_val_score(pipeline, X, y, cv=kfold) # print("Standardized: %.2f (%.2f) MSE" % (results.mean(), results.std())) #============================================================================== kr = KerasRegressor(build_fn=baseline_model, nb_epoch=200, batch_size=50, verbose=0) kr.fit(X_train, y_train) # saving the model #model_name = './model3-ANN.joblib.pkl' #_ = joblib.dump(model, model_name, compress=9) # Save the weights kr.model.save('model_weights-2.h5') # Save the model architecture with open('model_architecture-2.json', 'w') as f: f.write(kr.model.to_json()) # Testing model
def update_trainee_score(x): print('Updating Profile Scores ...') academic_score = [] honesty = [] emotionality = [] extraversion = [] agreeableness = [] conscientiousness = [] openness = [] iq = [] verbal_ability = [] score = [] course_score = [] qa_score = [] project_score = [] for p in Trainee.objects.all().exclude(pk=x.pk): if p.academic_score is None or p.personality_c is None or p.personality_h is None or p.personality_a is None or p.personality_e is None or p.personality_o is None or p.personality_x is None or p.iq_score is None or p.course_score is None or p.project_score is None or p.verbal_ability_score is None or p.qa_score is None or p.score is None: continue academic_score.append(p.academic_score) honesty.append(p.personality_h) emotionality.append(p.personality_e) extraversion.append(p.personality_x) agreeableness.append(p.personality_a) conscientiousness.append(p.personality_c) openness.append(p.personality_o) iq.append(p.iq_score) verbal_ability.append(p.verbal_ability_score) score.append(p.score) project_score.append(p.project_score) course_score.append(p.course_score) qa_score.append(p.qa_score) if len(academic_score) == 0: x.score = 0.6 x.save() else: d = {'1': academic_score, '2': honesty, '3': emotionality, '4': extraversion, '5': agreeableness, '6': conscientiousness, '7': openness, '8': iq, '9': verbal_ability, '10': project_score, '11': course_score, '12': qa_score, '13': score} df = pd.DataFrame(data=d) X = df.iloc[:, [0, 1, 2, 3, 5, 6, 7, 8, 9, 10, 11, 12]].values y = df.iloc[:, 4].values sc = StandardScaler() X = sc.fit_transform(X) estimator = KerasRegressor(build_fn=baseline_model, batch_size=50, epochs=100, verbose=0) estimator.fit(X, y) test_pred_temp = [] test_pred = [] test_pred_temp.append(x.academic_score) test_pred_temp.append(x.personality_h) test_pred_temp.append(x.personality_e) test_pred_temp.append(x.personality_x) test_pred_temp.append(x.personality_a) test_pred_temp.append(x.personality_c) test_pred_temp.append(x.personality_o) test_pred_temp.append(x.iq_score) test_pred_temp.append(x.verbal_ability_score) test_pred_temp.append(x.project_score) test_pred_temp.append(x.course_score) test_pred_temp.append(x.qa_score) test_pred.append(test_pred_temp) test_pred_1 = np.asarray(test_pred) new_prediction = estimator.predict(test_pred_1) y = np.insert(y, y.size, new_prediction) X = np.concatenate((X, test_pred_1), axis=0) y_new = [] for x in y: y_new.append(x) tot = 0 for i in y_new: tot = tot + i mn = tot / len(y_new) std = 0 for i in y_new: std = std + (i - mn) * (i - mn) sd = math.sqrt(std / len(y_new)) avg = mn y_final = [] for i in range(len(y_new)): pp = (y_new[i] - avg) / sd * 0.1 + 0.8 if pp >= 1.0: pp = 0.9999 if pp <= 0.6: pp = 0.0001 y_final.append(pp) ctr = 0 for p in Trainee.objects.all(): p.score = y_final[ctr] p.save() ctr += 1
def baseline_model(): # create model model = Sequential() model.add( Dense(7, input_dim=7, kernel_initializer='normal', activation='relu')) model.add(Dense(1, kernel_initializer='normal')) # Compile model model.compile(loss='mean_squared_error', optimizer='adam') return model seed = 7 np.random.seed(seed) # evaluate model with standardized dataset estimator = KerasRegressor(build_fn=baseline_model, epochs=100, batch_size=5, verbose=0) kfold = KFold(n_splits=10, random_state=seed) results = cross_val_score(estimator, X, Y, cv=kfold) print("Results: %.2f (%.2f) MSE" % (results.mean(), results.std())) kfold = KFold(n_splits=10, random_state=seed) results = cross_val_score(estimator, X.values, Y.values, cv=kfold, n_jobs=1) print("Results: %.2f (%.2f) MSE" % (results.mean(), results.std())) # evaluate model with standardized dataset np.random.seed(seed) estimators = [] estimators.append(('standardize', StandardScaler())) estimators.append(('mlp', KerasRegressor(build_fn=baseline_model,
def deepLearning(data, target, iteraNum, funNum): # load dataset global kerasModel X_norm = data print("This is X_norm: ", X_norm) y = target print("This is target : ", y) tempDim = len(X_norm[0]) print("This is input dimension: ", tempDim) kerasList = [] batch_size = [50, 100, 150, 200] epochs = [10, 20, 30, 50, 80] inputDim = [tempDim] # neurons = [40,50,60,100,200] param_grid = dict(batch_size=batch_size, nb_epoch=epochs, input_dim=inputDim) if funNum == 1: kerasModel = KerasRegressor(build_fn=baseline_model, verbose=0) elif funNum == 2: kerasModel = KerasRegressor(build_fn=wider_model, verbose=0) elif funNum == 3: kerasModel = KerasRegressor(build_fn=larger_model, verbose=0) for j in range(iteraNum): X_train, X_test, y_train, y_test = train_test_split(X_norm, y, test_size=0.2) print("This is X_train: ", X_train) print("This is y_train: ", y_train) grid = GridSearchCV(estimator=kerasModel, cv=5, param_grid=param_grid) newModel = grid.fit(X_train, y_train) print("Best: %f using %s" % (newModel.best_score_, newModel.best_params_)) y_pred = newModel.predict(X_test).tolist() print("This is y_pred: ", y_pred) sum_mean = 0 y_test_list = y_test.tolist() print("This is y_test_list: ", y_test_list) # for n in range(len(y_pred)): # print("This is REAL value %.4f, ===========> PRED value: %.4f" % (y_test_list[n], y_pred[n])) # # sum_mean += (y_pred[n] - y_test[n]) ** 2 # sum_mean += (float("{0:.4f}".format(float(y_pred[n]))) - y_test_list[n]) ** 2 # sum_erro = np.sqrt(sum_mean / len(y_pred)) # # print("This is sum_erro: ", sum_erro) sum_erro = np.sqrt(mean_squared_error(y_test_list, y_pred)) print("This is : sum_erro ", sum_erro) print("This is iteration number: ", j + 1) kerasList.append(sum_erro) # # Train the model, iterating on the data in batches of n(32/64/128) samples # for j in range(iteraNum): # X_train, X_test, y_train, y_test = train_test_split(X_norm, y, test_size=0.2) # if funNum == 1: # kerasModel = KerasRegressor(build_fn=baseline_model(inputDim), verbose=0) # grid = GridSearchCV(estimator=kerasModel, param_grid=param_grid, n_jobs=1) # bestDLModel = grid.fit(X_train, y_train) # print("Best: %f using %s" % (bestDLModel.best_score_, bestDLModel.best_params_)) # y_pred = bestDLModel.predict(X_test) # # # kerasModel = baseline_model(inputDim) # # kerasModel.fit(X_train, y_train, epochs=200, batch_size=128) # # y_pred = kerasModel.predict(X_test) # sum_mean = 0 # for n in range(len(y_pred)): # print("This is REAL value %.4f, ===========> PRED value: %.4f" % (y_test[n], y_pred[n])) # sum_mean += (float("{0:.4f}".format(float(y_pred[n]))) - y_test[n]) ** 2 # sum_erro = np.sqrt(sum_mean / len(y_pred)) # print("This is sum_erro: ", sum_erro) # print("This is iteration number: ", j + 1) # kerasList.append(sum_erro) # # plotFigure(y_pred, y_test, sum_erro[0]) # elif funNum == 2: # # kerasModel = wider_model(inputDim, 2) # # kerasModel.fit(X_train, y_train, epochs=100, batch_size=scalar, shuffle=True) # # y_pred = kerasModel.predict(X_test) # kerasModel = KerasRegressor(build_fn=wider_model(inputDim), verbose=0) # grid = GridSearchCV(estimator=kerasModel, param_grid=param_grid, n_jobs=1) # bestDLModel = grid.fit(X_train, y_train) # print("Best: %f using %s" % (bestDLModel.best_score_, bestDLModel.best_params_)) # y_pred = bestDLModel.predict(X_test) # # sum_mean = 0 # for n in range(len(y_pred)): # print("This is REAL value %.4f, ===========> PRED value: %.4f" % (y_test[n], y_pred[n])) # sum_mean += (float("{0:.4f}".format(float(y_pred[n]))) - y_test[n]) ** 2 # sum_erro = np.sqrt(sum_mean / len(y_pred)) # print("This is sum_erro: ", sum_erro) # print("This is iteration number: ", j + 1) # kerasList.append(sum_erro) # # plotFigure(y_pred,y_test,sum_erro[0]) # elif funNum == 3: # # # kerasModel = larger_model(inputDim) # # kerasModel.fit(X_train, y_train, epochs=100, batch_size=scalar, shuffle=True) # # kerasModel = KerasRegressor(build_fn=larger_model(inputDim), verbose=0) # grid = GridSearchCV(estimator=kerasModel, cv=5,param_grid=param_grid) # grid.fit(X_train, y_train) # print("Best: %f using %s" % (grid.best_score_, grid.best_params_)) # y_pred = grid.predict(X_test) # sum_mean = 0 # for n in range(len(y_pred)): # print("This is REAL value %.4f, ===========> PRED value: %.4f" % (y_test[n], y_pred[n])) # # sum_mean += (y_pred[n] - y_test[n]) ** 2 # sum_mean += (float("{0:.4f}".format(float(y_pred[n]))) - y_test[n]) ** 2 # sum_erro = np.sqrt(sum_mean / len(y_pred)) # print("This is sum_erro: ", sum_erro) # print("This is iteration number: ", j + 1) # kerasList.append(sum_erro) # # plotFigure(y_pred, y_test, sum_erro) return kerasList
def keras1(train2, y, test2, v, z): cname = sys._getframe().f_code.co_name v[cname], z[cname] = 0, 0 scores = list() scaler = preprocessing.RobustScaler() train3 = scaler.fit_transform(train2) test3 = scaler.transform(test2) input_dims = train3.shape[1] def build_model(): input_ = layers.Input(shape=(input_dims, )) model = layers.Dense( int(input_dims * 7.33), kernel_initializer='Orthogonal', activation=layers.advanced_activations.PReLU())(input_) model = layers.BatchNormalization()(model) #model = layers.Dropout(0.7)(model) model = layers.Dense( int(input_dims * 4.35), kernel_initializer='Orthogonal', activation=layers.advanced_activations.PReLU())(model) model = layers.BatchNormalization()(model) #model = layers.Dropout(0.9)(model) model = layers.Dense( int(input_dims * 2.35), kernel_initializer='Orthogonal', activation=layers.advanced_activations.PReLU())(model) model = layers.BatchNormalization()(model) #model = layers.Dropout(0.9)(model) model = layers.Dense( int(input_dims * 0.51), kernel_initializer='Orthogonal', activation=layers.advanced_activations.PReLU())(model) model = layers.BatchNormalization()(model) model = layers.Dense(1, activation='sigmoid')(model) model = models.Model(input_, model) model.compile(loss='binary_crossentropy', optimizer=optimizers.Nadam(), metrics=["accuracy"]) #print(model.summary(line_length=120)) return model np.random.seed(1234) est = KerasRegressor( build_fn=build_model, nb_epoch=10000, batch_size=128, #verbose=2 ) print(build_model().summary(line_length=120)) model_path = '../data/working/' + csv_name_suffix() model_path = model_path[:-4] + '_keras_model.h5' kcb = [ callbacks.EarlyStopping(monitor='val_loss', patience=20 #verbose=1 ), callbacks.ModelCheckpoint(model_path, monitor='val_loss', save_best_only=True, save_weights_only=True, verbose=0), callbacks.ReduceLROnPlateau(monitor='val_loss', min_lr=1e-7, factor=0.2, verbose=1) ] num_splits = 5 ss = model_selection.ShuffleSplit(n_splits=num_splits, random_state=11) for n, (itrain, ival) in enumerate(ss.split(train3, y)): xtrain, xval = train3[itrain], train3[ival] ytrain, yval = y[itrain], y[ival] est.fit(xtrain, ytrain, epochs=10000, validation_data=(xval, yval), verbose=0, callbacks=kcb, shuffle=True) est.model.load_weights(model_path) p = est.predict(xval) v.loc[ival, cname] += p score = metrics.log_loss(y[ival], p) print(cname, 'fold %d: ' % (n + 1), score, now()) scores.append(score) z[cname] += np.log1p(est.predict(test3)) os.remove(model_path) cv = np.array(scores) print(cv, cv.mean(), cv.std()) z[cname] /= num_splits
model.add( Dense(135, input_dim=270, kernel_initializer='normal', activation='elu')) model.add(Dense(1, kernel_initializer='normal')) # Compile model (configure for training) # optimizer 'adam' was chosen because it (on average) is the speediest model.compile(loss='mean_squared_error', optimizer='adam') return model # evaluate model with standardized dataset estimator = KerasRegressor(build_fn=deep_learning_model, epochs=100, batch_size=5, verbose=0) estimator.fit(X, Y) y_keras_pred = estimator.predict(X_test) create_submission(test_data, y_keras_pred, 3) # ### Third Trial Summary -- Big improvement! Deep learning received a score on Kaggle of 0.207 # In[13]: # Create build function for KerasRegressor def deep_learning_model2():
def keras1(train2, y, test2, v, z): cname = sys._getframe().f_code.co_name v[cname], z[cname] = 0, 0 scores = list() scaler = preprocessing.RobustScaler() train3 = scaler.fit_transform(train2) test3 = scaler.transform(test2) input_dims = train3.shape[1] def build_model(): input_ = layers.Input(shape=(input_dims,)) model = layers.Dense(int(input_dims * 4.33), kernel_initializer='Orthogonal', activation=layers.advanced_activations.PReLU())(input_) model = layers.BatchNormalization()(model) #model = layers.Dropout(0.7)(model) model = layers.Dense(int(input_dims * 2.35), kernel_initializer='Orthogonal', activation=layers.advanced_activations.PReLU())(model) model = layers.BatchNormalization()(model) #model = layers.Dropout(0.9)(model) model = layers.Dense(int(input_dims * 0.51), kernel_initializer='Orthogonal', activation=layers.advanced_activations.PReLU())(model) model = layers.BatchNormalization()(model) model = layers.Dense(1, activation='sigmoid')(model) model = models.Model(input_, model) model.compile(loss = 'binary_crossentropy', optimizer = optimizers.Nadam(lr=0.02), metrics=["accuracy"]) #print(model.summary(line_length=120)) return model np.random.seed(1234) est = KerasRegressor(build_fn=build_model, nb_epoch=10000, batch_size=32, #verbose=2 ) build_model().summary(line_length=120) model_path = '../data/working/' + csv_name_suffix() model_path = model_path[:-4] + '_keras_model.h5' kcb = [ callbacks.EarlyStopping( monitor='val_loss', patience=20 #verbose=1 ), callbacks.ModelCheckpoint( model_path, monitor='val_loss', save_best_only=True, save_weights_only=True, verbose=0 ), callbacks.ReduceLROnPlateau( monitor='val_loss', min_lr=1e-7, factor=0.2, verbose=1 ) ] num_splits = 7 ss = model_selection.ShuffleSplit(n_splits=num_splits, random_state=11) for n, (itrain, ival) in enumerate(ss.split(train3, y)): xtrain, xval = train3[itrain], train3[ival] ytrain, yval = y[itrain], y[ival] est.fit( xtrain, ytrain, epochs=10000, validation_data=(xval, yval), verbose=0, callbacks=kcb, shuffle=True ) est.model.load_weights(model_path) p = est.predict(xval) v.loc[ival, cname] += pconvert(p) score = metrics.log_loss(y[ival], p) print(cname, 'fold %d: '%(n+1), score, now()) scores.append(score) z[cname] += pconvert(est.predict(test3)) os.remove(model_path) cv=np.array(scores) print(cv, cv.mean(), cv.std()) z[cname] /= num_splits
regressor.add(LSTM(units=50, return_sequences=True)) regressor.add(Dropout(0.2)) regressor.add(LSTM(units=50)) regressor.add(Dropout(0.2)) regressor.add(Dense(units=1)) regressor.compile(optimizer=optimizer, loss='mean_squared_error') return regressor from keras.wrappers.scikit_learn import KerasRegressor from sklearn.model_selection import GridSearchCV regressor = KerasRegressor(build_fn=build_regressor) parameters = { 'batch_size': [10, 25, 32], 'nb_epoch': [50, 100], 'optimizer': ['adam', 'rmsprop'] } grid_search = GridSearchCV(estimator=regressor, param_grid=parameters, scoring='neg_mean_squared_error', cv=None) grid_search.fit(X_train[:, :, -1], y_train) best_param = grid_search.best_params_ best_accuracy = grid_search.best_score_