def create_scikit_keras_regressor(X, y): # create simple (dummy) Keras DNN model for regression batch_size = 500 epochs = 10 model_func = create_scikit_keras_model_func(X.shape[1]) model = KerasRegressor(build_fn=model_func, nb_epoch=epochs, batch_size=batch_size, verbose=1) model.fit(X, y) return model
def train(data): X = np.asarray(data.drop(['ETA'], axis=1)) y = np.asarray(data["ETA"]) scaler = MinMaxScaler() X = scaler.fit_transform(X) with open("han_bike_scalers.pkl", "wb") as outfile: pkl.dump(scaler, outfile) upload_to_bucket('model/han_bike_scalers.pkl', 'han_bike_scalers.pkl', 'aha-ds-ml-pipeline') X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0) model = KerasRegressor(build_fn=baseline_model, epochs=2, batch_size=3, verbose=1) history = model.fit(X_train, y_train, validation_data=(X_test, y_test), callbacks=[telegram_callback]) #============================================================================== # Predict & Evaluation #============================================================================== prediction = model.predict(X_test) score = mean_absolute_error(y_test, prediction) if score < 5: model.model.save('han_bike_models.h5') upload_to_bucket('model/han_bike_models.h5', 'han_bike_models.h5', 'aha-ds-ml-pipeline') return model
) # same seed for X and y to make them index the same rows as before np.random.seed(seed) np.random.shuffle(X) np.random.seed(seed) np.random.shuffle(y) X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2) if do_load: regressor = load_model(f'../models/kerasregressor_{X_col}.h5') else: regressor = KerasRegressor(build_fn=regression_model, epochs=epochs, batch_size=batch_size, verbose=1) h = regressor.fit(X_train, y_train) regressor.model.save(f'../models/kerasregressor_{X_col}.h5') _predictions = regressor.predict(X_test).round() predictions = list(zip(_predictions, y_test)) prediction_df = pd.DataFrame(predictions, columns=['Predicted', 'Actual']) matrix = confusion_matrix(y_test, _predictions) sns.heatmap(matrix, cmap='coolwarm', linecolor='white', linewidths=1, xticklabels=CATEGORIES, yticklabels=CATEGORIES, annot=True, fmt='d',
print('Creating LSTM...') regressor = create_LSTM(neurons=clf.best_params_.get('neurons'), dropoutRate=clf.best_params_.get('dropoutRate'), constraints=clf.best_params_.get('constraints')) elif recurrent_type == 'GRU': print('Creating GRU...') regressor = create_GRU(neurons=clf.best_params_.get('neurons'), dropoutRate=clf.best_params_.get('dropoutRate'), constraints=clf.best_params_.get('constraints')) else: print('Wrong recurrent type, go with LSTM anyway.') regressor = create_LSTM(neurons=clf.best_params_.get('neurons'), dropoutRate=clf.best_params_.get('dropoutRate'), constraints=clf.best_params_.get('constraints')) regressor.fit(X_train, y_train, epochs=50, batch_size=8) y_pred_scaled = regressor.predict(X_test) sc_flow = MinMaxScaler(feature_range=(0, 1), copy=True) sc_flow.fit_transform(np.array(y_train_not_scaled).reshape(-1, 1)) y_pred = sc_flow.inverse_transform(y_pred_scaled) # Evaluation rootMSE(y_test_not_scaled, y_pred) # ============================================================================= # New LSTM # ============================================================================= ''' # Setting hyperparameters automatically from grid-searching results best_neurons = clf.best_params_.get('neurons') best_dropoutRate = clf.best_params_.get('dropoutRate')
def build_fn(): nn = keras.Sequential([ keras.layers.Dense(units=15, activation='relu'), keras.layers.Dense(units=5, activation='relu'), keras.layers.Dense(units=1, activation='linear'), ]) nn.compile(keras.optimizers.Adam(lr=0.01), 'MAE', metrics=['MAE']) return nn wrapped = KerasRegressor(build_fn=build_fn) wrapped.fit(xtrain, ytrain, validation_data=(xval, yval), epochs=25, batch_size=32) # %% pdp_day = pdp.pdp_isolate(model=wrapped, dataset=pd.DataFrame(xtrain, columns=x_cols), model_features=x_cols, feature='day_Thur day_Fri day_Sat day_Sun'.split()) pdp_size = pdp.pdp_isolate(model=wrapped, dataset=pd.DataFrame(xtrain, columns=x_cols), model_features=x_cols, feature='size', num_grid_points=6)
def regressor(): model = Sequential() model.add(Dense(100, activation='relu', input_dim=input_size)) model.add(Dense(100, activation='relu')) model.add(Dense(100, activation='relu')) model.add(Dense(output_size)) model.compile(loss='mean_absolute_error', optimizer='sgd') return model #model = LogisticRegression(penalty='l1', dual=True, verbose=3) #model = SVR(kernel='poly', degree=5, max_iter=10, verbose=True) #model = KNeighborsRegressor(n_neighbors=5) model = KerasRegressor(build_fn=regressor, batch_size=32, epochs=200) #model = MLPRegressor(hidden_layer_sizes=(86,100,100,10), n_iter_no_change=20, max_iter=300, verbose=True, tol=.00000001, activation='relu') #model_pwd = pwd+"/"+func+"/"+"Models/"+str(num_samples)+"_"+str(low)+"_"+str(high)+"_"+str(n)+"_"+str(d)+"_"+str(num_updates)+"_"+str(intercept)+".h5" #model.save(model_pwd) model.fit(Xtrain, ytrain) #loss = model.evaluate(Xtest, ytest) #loss = model.score(Xtrain,ytrain) #ypreds = model.predict(Xtest, verbose=1) ypreds = model.predict(Xtest) ypreds_pwd = pwd+"/"+func+"/"+"Predictions/"+str(num_samples)+"_"+str(low)+"_"+str(high)+"_"+str(n)+"_"+str(d)+"_"+str(num_updates)+"_"+str(intercept)+".csv" ds_manager.write_dataset(ypreds, ypreds_pwd) print("Trying to learn: " + func) print("number samples: " + str(num_samples)) print("range: " + "["+str(low)+", "+str(high)+"]") print("number of points in original ds: " + str(n)) print("dim: " + str(d)) print("number of updates:" + str(num_updates)) print("intercept: " + str(intercept)) #print("loss on test set: " + str(loss))
model = Sequential() model.add( Dense(layers[0], input_dim=nFeatures, kernel_regularizer=regularizer)) model.add(LeakyReLU(alpha=0.05)) # hidden layer: 5 nodes by default for l in layers: #model.add(Dense(l, activation=activation, kernel_regularizer=regularizer)) model.add(Dense(l, kernel_regularizer=regularizer)) model.add(LeakyReLU(alpha=0.05)) model.add(BatchNormalization()) #model.add(Dropout(0.2)) model.add(Dense(1, activation='sigmoid')) model.compile(loss="binary_crossentropy", optimizer='adam', metrics=['AUC']) return model from keras.wrappers.scikit_learn import KerasClassifier, KerasRegressor #model=KerasClassifier(build_fn=create_model, verbose=1) model = KerasRegressor(build_fn=create_model, verbose=1) result = model.fit(train, y_train, sample_weight=weights_train, epochs=120) model.model.save("models/keras_model_" + outDir + ".h5") y_train_pred = model.predict(train) y_test_pred = model.predict(test) make_plots('keras', result, outDir, y_train, y_test, y_train_pred, y_test_pred, weights_train, weights_test)
model.add(Dense(3500, activation='relu')) model.add(Dense(3500, activation='relu')) model.add(Dense(len(y_train[0]), activation='linear')) # compile the keras model model.compile(loss='mse', optimizer='adam') return model estimator = KerasRegressor(build_fn=baseline_model, nb_epoch=20, batch_size=1000, verbose=1) # fit the keras model on the dataset estimator.fit(X_train, y_train, epochs=20, batch_size=1000, verbose=1) # make class predictions with the model\ predictions = estimator.predict(X_test) top_20_count = 0 correct_count = 0 pred_list = np.zeros((len(predictions), 2)) for i in range(len(predictions)): pred_list[i][0] = i pred_list_temp = np.zeros((len(y_test), 2)) lowest_mse_index = None lowest_cur_mse = 10000000000000 for j in range(len(y_test)):
labels, shuffle=True, test_size=0.2) trainX.drop(['sessionUID'], inplace=True, axis=1) testX = testX.sort_index() testY = testY.sort_index() test_sessions = testX.pop('sessionUID') # model = build_model(trainX) model = KerasRegressor(build_fn=build_model, trainX=trainX, nb_epoch=1) model.fit(trainX, trainY) perm = PermutationImportance(model, random_state=1).fit(trainX, trainY) eli5.show_weights(perm) # model, history = train_model(trainX, trainY, model) # # # model = tf.keras.models.load_model('ann.h5') # # pred = predictions(model, testX, testY, scalers) # # pred['sessionUID'] = test_sessions.to_numpy() # # pred.to_csv('ANN_predictions.csv') #
from tensorflow.keras.wrappers.scikit_learn import KerasRegressor regressor = KerasRegressor(build_fn=model, batch_size=16, epochs=2000) import tensorflow as tf #print(tf) #I have used Keras Regressor for this purpose. Saved best weights and used 2000 epochs. Mean Absolute Error is our Loss function. # We have an input of 200 after dropping the columns. Next, We are going to obtain four values for each input, High, Low, Open, Close. callback = tf.keras.callbacks.ModelCheckpoint(filepath='Regressor_model.h5', monitor='mean_absolute_error', verbose=0, save_best_only=True, save_weights_only=False, mode='auto') results = regressor.fit(X_train, y_train, callbacks=[callback]) y_pred = regressor.predict(X_test) print(y_pred) print(y_test) import numpy as np y_pred_mod = [] y_test_mod = [] for i in range(0, 4): j = 0 y_pred_temp = [] y_test_temp = []
) model.compile(loss='mean_absolute_error', optimizer=keras.optimizers.SGD(learning_rate=learning_rate, momentum=momentum)) return model # Early stopping condition es = EarlyStopping(monitor='loss', mode='auto', verbose=1, min_delta=0.0001, patience=4) model = KerasRegressor(build_fn=_create_mlp, verbose=1, callbacks=es, epochs=n_epochs, batch_size=batch_size) start_time = time.time() with tf.device(device): history = model.fit(X_train, y_train) end_time = time.time() print('Training time: {:.2f} mins.'.format((end_time - start_time) / 60.)) # Plot the losses vs epoch here fig = plt.figure(figsize=(8, 5)) plot1, = plt.plot(history.epoch, history.history['loss'], c='blue', label='MAE') plt.grid(which='both', linestyle='--') ax = fig.gca() ax.set_xlabel(r'Epoch') ax.set_ylabel(r'Loss') plt.legend(bbox_to_anchor=(0.1, 0.0, 0.80, 1), bbox_transform=fig.transFigure, loc='lower center', ncol=3, mode="expand", borderaxespad=0.)
model = Sequential() model.add( Dense(4, input_dim=4, kernel_initializer='normal', activation='relu')) model.add(Dense(1, kernel_initializer='normal')) # Compile model model.compile(loss='mean_squared_error', optimizer='adam', metrics=['accuracy']) return model # evaluate model estimator = KerasRegressor(build_fn=baseline_model, epochs=10, batch_size=5, verbose=0) estimator.fit(X, y) prediction = estimator.predict(X) train_error = np.abs(y - prediction) mean_error = np.mean(train_error) min_error = np.min(train_error) max_error = np.max(train_error) std_error = np.std(train_error) print(mean_error) print(std_error) ''' kfold = KFold(n_splits=10) results = cross_val_score(estimator, X, Y, cv=kfold) print("Baseline: %.2f (%.2f) MSE" % (results.mean(), results.std())) '''
def TrainNetwork(model, modelfile, x_train=None, y_train=None, x_valid=None, y_valid=None, sample_weight=None, callbacks=[], epochs=20, batch_size=200, verbose=1, overwriteModel=False, finishTraining=True): model, custom_objects = model.model, model.custom_objects # Set up our KerasRegressor wrapper. # I'm not 100% sure why we do this for our regressors (but not our classifiers), # but as we use this in the original training code I'll keep it for now. regressor = KerasRegressor(build_fn=model, batch_size=batch_size, epochs=epochs, verbose=verbose) # Make the model directory if it does not already exist. model_dir = '/'.join(modelfile.split('/')[:-1]) try: os.makedirs(model_dir) except: pass # Check if the model exists -- and load it if not overwriting. history_filename = 0 if ('.h5' in modelfile): history_filename = '.'.join(modelfile.split('.')[:-1]) + '.csv' else: history_filename = modelfile + '.csv' # if using .tf format, there won't be a file extension on the string at all. initial_epoch = 0 if (pathlib.Path(modelfile).exists() and not overwriteModel): regressor.model = load_model(modelfile, custom_objects=custom_objects) # Now we want to figure out for how many epochs the loaded model was already trained, # so that it's trained, in total, for the requested number of epochs. # keras models don't seem to hold on to an epoch attribute for whatever reason, # so we will figure out the current epoch based on CSVLogger output if it exists. if (pathlib.Path(history_filename).exists()): with open(history_filename) as f: for i, l in enumerate(f): pass initial_epoch = i # zero-indexing will take care of the 1st line, which has headers if (not finishTraining): initial_epoch = regressor.get_params()['epochs'] regressor.set_params(initial_epoch=initial_epoch) history = 0 # Train the model if we've specified "finishTraining", or if we don't even # have a model yet. Setting finishTraining=False lets one immediately skip # to evaluating the model, which is especially helpful if EarlyStopping was used # and the final model didn't reach the specified last epoch. if (finishTraining or not pathlib.Path(modelfile).exists()): history = regressor.fit(x=x_train, y=y_train, validation_data=(x_valid, y_valid), sample_weight=sample_weight, callbacks=callbacks) saveModel = True if (initial_epoch == epochs or not finishTraining): saveModel = False if (saveModel): print(' Saving model to {}.'.format(modelfile)) regressor.model.save(modelfile) # Now get the history from the log file, if it exists. # This is a better method than using the results of model.fit(), # since this will give us the whole history (not just whatever # was fitted right now). However, it relies on us having passed # a CSVLogger as one of our callbacks, which we normally do # but might not do in some specific circumstances. # fallback try: history = history.history except: history = {} pass if (pathlib.Path(history_filename).exists()): df = pd.read_csv(history_filename) history = {} for key in df.keys(): history[key] = df[key].to_numpy() else: print('Warning: No log file found for model {}.'.format()) print('This may result in an empty/incomplete history being returned.') print( 'Please provide a CSVLogger callback to prevent this in the future.' ) return regressor, history
kernel_regularizer=regularizer)) model.add(LeakyReLU(alpha=0.05)) model.add(BatchNormalization()) # hidden layer: 5 nodes by default for l in range(layers): #model.add(Dense(l, activation=activation, kernel_regularizer=regularizer)) model.add(Dense(nodes, kernel_regularizer=regularizer)) model.add(LeakyReLU(alpha=0.05)) model.add(BatchNormalization()) #model.add(Dropout(0.2)) model.add(Dense(1, activation='sigmoid')) #model.add(Dense(1, activation=activation)) model.compile(loss="binary_crossentropy", optimizer='adam', metrics=['AUC']) return model #model=KerasClassifier(build_fn=create_model, verbose=1) model = KerasRegressor(build_fn=create_model, verbose=1) #build the model #result=model.fit(train, y_train, validation_split=0.1, epochs=best_params['epochs']) # train the model result = model.fit(train, y_train, epochs=best_params['epochs']) model.model.save("models/keras_model_" + outDir + ".h5") # save the output #plot the performance of the mode y_train_pred = model.predict(train) y_test_pred = model.predict(test) make_plots('keras', result, outDir, y_train, y_test, y_train_pred, y_test_pred)
# In[17]: y = samsung['close'] y_1diff = samsung.diff().dropna()['close'] result = adfuller(y) print(f'원 데이터 ADF Statistic : {result[0] : .4f}') print(f'원 데이터 p-value : {result[1] : .4f}') result = adfuller(y_1diff) print(f'1차 차분 ADF Statistic : {result[0] : .4f}') print(f'1차 차분 p-value : {result[1] : .4f}') # In[18]: model = ARIMA(samsung, order=(1, 1, 0)) # freq='D' model_fit = model.fit() print(model_fit.summary()) # In[19]: # rolling forecast history = [x for x in samsung['close']] predictions = [] for i in range(1): model = ARIMA(history, order=(1, 1, 0)) model_fit = model.fit() output = model_fit.forecast() yhat = output[0] predictions.append(yhat) history.append(yhat)
verbose=0) # checkpoint filepath = "weights_bets.hdf5" checkpoint = ModelCheckpoint(filepath, monitor='val_loss', verbose=1, save_best_only=True, save_weights_only=True, mode='auto') callbacks_list = [checkpoint] # Fit the model history = estimator.fit(train_x, train_y, validation_split=0.33, epochs=10, batch_size=2, callbacks=callbacks_list, verbose=0) #----------------------------- # summarize history for loss f, ax2 = plt.subplots(1, 1) ax2.plot(history.history['loss']) ax2.plot(history.history['val_loss']) ax2.set_title('model loss') ax2.set_ylabel('Performance') ax2.set_xlabel('Epochs') #ax2.set_xlim(0.,10.) ax2.legend(['train', 'test'], loc='upper left') plt.savefig("keras_train_test.pdf")
return model estimator = KerasRegressor(build_fn=model, batch_size=113, epochs=57, verbose=0) # k-fold cross validation for less biased estimate of model performance kfold = KFold(n_splits=10, shuffle=True, random_state=seed) results = cross_val_score(estimator, x_train, y_train, cv=kfold) print('Results: %.2f (%.2f) MSE' % (results.mean(), results.std())) # Fit the model tbCallBack = keras.callbacks.TensorBoard(log_dir='./logs/train', histogram_freq=1) filepath = 'weights_first_model.hdf5' checkpoint = ModelCheckpoint(filepath, monitor='val_loss', verbose=1, save_best_only=True, save_weights_only=True, mode='auto') callbacks_list = [checkpoint, tbCallBack] history = estimator.fit(x_train, y_train, batch_size=113, epochs=57, validation_split=0.33, callbacks=callbacks_list, verbose=2)
def createModel(self): X = self.df[list(self.predictor_list.get(0, tk.END))].to_numpy() y = self.df[self.target_list.get(0)].to_numpy().reshape(-1) layers = self.no_optimization_choice_var.get() learning_rate = self.hyperparameters[4].get() momentum = self.hyperparameters[5].get() optimizers = { "Adam": Adam(learning_rate=learning_rate), "SGD": SGD(learning_rate=learning_rate, momentum=momentum), "RMSprop": RMSprop(learning_rate=learning_rate, momentum=momentum) } def base_model(): model = Sequential() for i in range(layers): neuron_number = self.neuron_numbers_var[i].get() activation = self.activation_var[i].get() if i == 0: model.add( Dense(neuron_number, activation=activation, input_dim=X.shape[1])) else: model.add(Dense(neuron_number, activation=activation)) model.add(Dense(1, activation="relu")) model.compile(optimizer=optimizers[self.hyperparameters[2].get()], loss=self.hyperparameters[3].get()) return model do_forecast = self.do_forecast_option.get() val_option = self.validation_option.get() if val_option == 0 or val_option == 1: model = base_model() elif val_option == 2 or val_option == 3: model = KerasRegressor(build_fn=base_model, epochs=self.hyperparameters[0].get(), batch_size=self.hyperparameters[1].get()) if val_option == 0: model.fit(X, y, epochs=self.hyperparameters[0].get(), batch_size=self.hyperparameters[1].get()) if do_forecast == 0: pred = model.predict(X).reshape(-1) losses = loss(y, pred)[:-1] self.y_test = y self.pred = pred for i, j in enumerate(losses): self.test_metrics_vars[i].set(j) self.model = model elif val_option == 1: X_train, X_test, y_train, y_test = train_test_split( X, y, train_size=self.random_percent_var.get() / 100) model.fit(X_train, y_train, epochs=self.hyperparameters[0].get(), batch_size=self.hyperparameters[1].get()) if do_forecast == 0: pred = model.predict(X_test).reshape(-1) losses = loss(y_test, pred)[:-1] self.y_test = y_test.reshape(-1) self.pred = pred for i, j in enumerate(losses): self.test_metrics_vars[i].set(j) self.model = model elif val_option == 2: cvs = cross_validate(model, X, y, cv=self.cross_val_var.get(), scoring=skloss) for i, j in enumerate(list(cvs.values())[2:]): self.test_metrics_vars[i].set(j.mean()) elif val_option == 3: cvs = cross_validate(model, X, y, cv=X.shape[0] - 1, scoring=skloss) for i, j in enumerate(list(cvs.values())[2:]): self.test_metrics_vars[i].set(j.mean())
def createModel(self): clear_session() X, y = self.getData() print(self.scale_var.get()) layers = self.no_optimization_choice_var.get() learning_rate = self.hyperparameters[4].get() momentum = self.hyperparameters[5].get() optimizers = { "Adam": Adam(learning_rate=learning_rate), "SGD": SGD(learning_rate=learning_rate, momentum=momentum), "RMSprop": RMSprop(learning_rate=learning_rate, momentum=momentum) } def base_model(): model = Sequential() for i in range(layers): neuron_number = self.neuron_numbers_var[i].get() activation = self.activation_var[i].get() if i == 0: model.add(Dense(neuron_number, activation=activation, input_dim=X.shape[1], kernel_initializer=GlorotUniform(seed=0))) else: model.add(Dense(neuron_number, activation=activation, kernel_initializer=GlorotUniform(seed=0))) model.add(Dense(1, activation=self.output_activation.get(), kernel_initializer=GlorotUniform(seed=0))) model.compile(optimizer=optimizers[self.hyperparameters[2].get()], loss=self.hyperparameters[3].get()) return model do_forecast = self.do_forecast_option.get() val_option = self.validation_option.get() if val_option == 0 or val_option == 1: model = base_model() elif val_option == 2 or val_option == 3: model = KerasRegressor(build_fn=base_model, epochs=self.hyperparameters[0].get(), batch_size=self.hyperparameters[1].get()) if val_option == 0: model.fit(X, y, epochs=self.hyperparameters[0].get(), batch_size=self.hyperparameters[1].get()) if do_forecast == 0: pred = model.predict(X).reshape(-1) losses = loss(y, pred)[:-1] self.y_test = y self.pred = pred for i,j in enumerate(losses): self.test_metrics_vars[i].set(j) self.model = model elif val_option == 1: if do_forecast == 0: X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=self.random_percent_var.get()/100) model.fit(X_train, y_train, epochs=self.hyperparameters[0].get(), batch_size=self.hyperparameters[1].get()) pred = model.predict(X_test).reshape(-1) losses = loss(y_test, pred)[:-1] self.y_test = y_test.reshape(-1) self.pred = pred for i,j in enumerate(losses): self.test_metrics_vars[i].set(j) else: size = int((self.random_percent_var.get()/100)*len(X)) X = X[-size:] y = y[-size:] model.fit(X, y, epochs=self.hyperparameters[0].get(), batch_size=self.hyperparameters[1].get()) self.model = model elif val_option == 2: if do_forecast == 0: cvs = cross_validate(model, X, y, cv=self.cross_val_var.get(), scoring=skloss) for i, j in enumerate(list(cvs.values())[2:]): self.test_metrics_vars[i].set(j.mean()) elif val_option == 3: if do_forecast == 0: cvs = cross_validate(model, X, y, cv=X.shape[0]-1, scoring=skloss) for i, j in enumerate(list(cvs.values())[2:]): self.test_metrics_vars[i].set(j.mean()) self.model.summary()