activation='relu')) #model.add(Dense(6, kernel_initializer='normal', activation='relu')) model.add(Dense(1, kernel_initializer='normal')) # Compile model model.compile(loss='mean_squared_error', optimizer='adam') return model # fix random seed for reproducibility seed = 7 # evaluate model with standardized dataset numpy.random.seed(seed) estimators = [] estimators.append(('standardize', StandardScaler())) model = KerasRegressor(build_fn=wider_model, epochs=100, batch_size=10, verbose=0) estimators.append(('mlp', model)) pipeline = Pipeline(estimators) kfold = KFold(n_splits=10, random_state=seed) results = cross_val_score(pipeline, X_TRAIN, Y_TRAIN, cv=kfold) print("Wider: %.2f (%.2f) MSE" % (results.mean(), results.std())) pipeline.fit(X_TRAIN, Y_TRAIN) pred = pipeline.predict(X_TEST) print(pred) # print(pred.shape) # print (mean_absolute_error(Y_TEST,pred)) directory = os.path.dirname(os.path.realpath(__file__)) model_step = pipeline.steps.pop(-1)[1] joblib.dump(pipeline, os.path.join(directory, 'pipeline_plat.pkl')) models.save_model(model_step.model, os.path.join(directory, 'model_plat.h5'))
from keras.layers import Dense from keras.wrappers.scikit_learn import KerasRegressor from sklearn.model_selection import cross_val_score, KFold def ANNModel(): model = Sequential() model.add(Dense(output_dim = 238, init = 'normal', activation = 'relu', input_dim = 238)) model.add(Dense(output_dim = 100, init = 'normal', activation = 'relu')) model.add(Dense(output_dim = 1, init = 'normal')) model.compile(optimizer = 'adam', loss = 'mean_squared_logarithmic_error') return model seed = 10 np.random.seed(seed) ANNReg = KerasRegressor(build_fn = ANNModel, epochs = 100, batch_size = 5, verbose = 1) kfold = KFold(n_splits=10, random_state=seed) results = cross_val_score(ANNReg, X_train, Y_train, cv=kfold) ANNReg.fit(X_train, Y_train) #Prediction RanRegPred = RanReg.predict(X_val) GBRegPred = GBReg.predict(X_val) XGBRegPred = XGBReg.predict(X_val) ANNRegPred = ANNReg.predict(X_val).ravel() #Checking the RMSLE def rmsle(y, y0): assert len(y) == len(y0) return np.sqrt(np.mean(np.power(np.log1p(y)-np.log1p(y0), 2)))
import predictor import prepare_data import model_builder if __name__ == "__main__": print("Grid searching!") #get home path root_dir = os.path.dirname(os.path.realpath(__file__)) x, y, sc_X, sc_Y = prepare_data.training( os.path.join(root_dir, "data", "results.csv")) # create model model = KerasRegressor(build_fn=model_builder.create_model, verbose=1, feature_count=len(x[0]), output_count=len(y[0])) # grid search epochs, batch size and optimizer optimizers = ['rmsprop'] #, 'adam'] init = ['glorot_uniform'] #, 'normal', 'uniform'] epochs = [1000, 5000, 10000] batches = [50] hidden_layer_counts = [1, 2, 3] param_grid = dict(optimizer=optimizers, epochs=epochs, batch_size=batches, hidden_layer_count=hidden_layer_counts, init=init) grid = GridSearchCV(estimator=model, param_grid=param_grid) grid_result = grid.fit(x, y)
def baseline_model(): # 12 nodes -> 6 nodes -> 1 node # through trial and error by adding nodes, removing layers, and # changing epochs based on where I see the loss asymptote model = Sequential() model.add( Dense(12, input_dim=12, kernel_initializer='normal', activation='relu')) model.add( Dense(6, input_dim=12, kernel_initializer='normal', activation='relu')) model.add(Dense(1, kernel_initializer='normal')) model.compile(loss='mean_squared_error', optimizer='adam') return model estimator = KerasRegressor(build_fn=baseline_model, epochs=28) estimator.fit(X_train, y_train) # In[30]: # create a dataframe containing the results from all the methods df_test = (X_test.join(df_sub_wtb[['normand', 'stull', 'half', 'third']]).assign( **{ 'lreg': visualizer.predict(X_test), 'keras': estimator.predict(X_test) })) df_test['time'] = pd.to_datetime(df_test['year'].astype(str) + df_test['dayofyear'].astype(str) + df_test['hour'].astype(str), format='%Y%j%H')
# split into input (X) and output (Y) variables X = dataset[:, 0:13] Y = dataset[:, 13] # define the model def larger_model(): # create model model = Sequential() model.add(Dense(13, input_dim=13, activation='relu')) model.add(Dense(6, activation='relu')) model.add(Dense(1)) # Compile model model.compile(loss='mean_squared_error', optimizer='adam') return model # evaluate model with standardized dataset estimators = [] estimators.append(('standardize', StandardScaler())) estimators.append(('mlp', KerasRegressor(build_fn=larger_model, epochs=50, batch_size=5, verbose=0))) pipeline = Pipeline(estimators) kfold = KFold(n_splits=10) results = cross_val_score(pipeline, X, Y, cv=kfold) print("Larger: %.2f (%.2f) MSE" % (results.mean(), results.std()))
def deepLearning(data, target, iteraNum, funNum): # load dataset global kerasModel X_norm = data print("This is X_norm: ", X_norm) y = target print("This is target : ", y) tempDim = len(X_norm[0]) print("This is input dimension: ", tempDim) kerasList = [] batch_size = [50, 100, 150, 200] epochs = [10, 20, 30, 50, 80] inputDim = [tempDim] # neurons = [40,50,60,100,200] param_grid = dict(batch_size=batch_size, nb_epoch=epochs, input_dim=inputDim) if funNum == 1: kerasModel = KerasRegressor(build_fn=baseline_model, verbose=0) elif funNum == 2: kerasModel = KerasRegressor(build_fn=wider_model, verbose=0) elif funNum == 3: kerasModel = KerasRegressor(build_fn=larger_model, verbose=0) for j in range(iteraNum): X_train, X_test, y_train, y_test = train_test_split(X_norm, y, test_size=0.2) print("This is X_train: ", X_train) print("This is y_train: ", y_train) grid = GridSearchCV(estimator=kerasModel, cv=5, param_grid=param_grid) newModel = grid.fit(X_train, y_train) print("Best: %f using %s" % (newModel.best_score_, newModel.best_params_)) y_pred = newModel.predict(X_test).tolist() print("This is y_pred: ", y_pred) sum_mean = 0 y_test_list = y_test.tolist() print("This is y_test_list: ", y_test_list) # for n in range(len(y_pred)): # print("This is REAL value %.4f, ===========> PRED value: %.4f" % (y_test_list[n], y_pred[n])) # # sum_mean += (y_pred[n] - y_test[n]) ** 2 # sum_mean += (float("{0:.4f}".format(float(y_pred[n]))) - y_test_list[n]) ** 2 # sum_erro = np.sqrt(sum_mean / len(y_pred)) # # print("This is sum_erro: ", sum_erro) sum_erro = np.sqrt(mean_squared_error(y_test_list, y_pred)) print("This is : sum_erro ", sum_erro) print("This is iteration number: ", j + 1) kerasList.append(sum_erro) # # Train the model, iterating on the data in batches of n(32/64/128) samples # for j in range(iteraNum): # X_train, X_test, y_train, y_test = train_test_split(X_norm, y, test_size=0.2) # if funNum == 1: # kerasModel = KerasRegressor(build_fn=baseline_model(inputDim), verbose=0) # grid = GridSearchCV(estimator=kerasModel, param_grid=param_grid, n_jobs=1) # bestDLModel = grid.fit(X_train, y_train) # print("Best: %f using %s" % (bestDLModel.best_score_, bestDLModel.best_params_)) # y_pred = bestDLModel.predict(X_test) # # # kerasModel = baseline_model(inputDim) # # kerasModel.fit(X_train, y_train, epochs=200, batch_size=128) # # y_pred = kerasModel.predict(X_test) # sum_mean = 0 # for n in range(len(y_pred)): # print("This is REAL value %.4f, ===========> PRED value: %.4f" % (y_test[n], y_pred[n])) # sum_mean += (float("{0:.4f}".format(float(y_pred[n]))) - y_test[n]) ** 2 # sum_erro = np.sqrt(sum_mean / len(y_pred)) # print("This is sum_erro: ", sum_erro) # print("This is iteration number: ", j + 1) # kerasList.append(sum_erro) # # plotFigure(y_pred, y_test, sum_erro[0]) # elif funNum == 2: # # kerasModel = wider_model(inputDim, 2) # # kerasModel.fit(X_train, y_train, epochs=100, batch_size=scalar, shuffle=True) # # y_pred = kerasModel.predict(X_test) # kerasModel = KerasRegressor(build_fn=wider_model(inputDim), verbose=0) # grid = GridSearchCV(estimator=kerasModel, param_grid=param_grid, n_jobs=1) # bestDLModel = grid.fit(X_train, y_train) # print("Best: %f using %s" % (bestDLModel.best_score_, bestDLModel.best_params_)) # y_pred = bestDLModel.predict(X_test) # # sum_mean = 0 # for n in range(len(y_pred)): # print("This is REAL value %.4f, ===========> PRED value: %.4f" % (y_test[n], y_pred[n])) # sum_mean += (float("{0:.4f}".format(float(y_pred[n]))) - y_test[n]) ** 2 # sum_erro = np.sqrt(sum_mean / len(y_pred)) # print("This is sum_erro: ", sum_erro) # print("This is iteration number: ", j + 1) # kerasList.append(sum_erro) # # plotFigure(y_pred,y_test,sum_erro[0]) # elif funNum == 3: # # # kerasModel = larger_model(inputDim) # # kerasModel.fit(X_train, y_train, epochs=100, batch_size=scalar, shuffle=True) # # kerasModel = KerasRegressor(build_fn=larger_model(inputDim), verbose=0) # grid = GridSearchCV(estimator=kerasModel, cv=5,param_grid=param_grid) # grid.fit(X_train, y_train) # print("Best: %f using %s" % (grid.best_score_, grid.best_params_)) # y_pred = grid.predict(X_test) # sum_mean = 0 # for n in range(len(y_pred)): # print("This is REAL value %.4f, ===========> PRED value: %.4f" % (y_test[n], y_pred[n])) # # sum_mean += (y_pred[n] - y_test[n]) ** 2 # sum_mean += (float("{0:.4f}".format(float(y_pred[n]))) - y_test[n]) ** 2 # sum_erro = np.sqrt(sum_mean / len(y_pred)) # print("This is sum_erro: ", sum_erro) # print("This is iteration number: ", j + 1) # kerasList.append(sum_erro) # # plotFigure(y_pred, y_test, sum_erro) return kerasList
regressor.add(LSTM(units=50, return_sequences=True)) regressor.add(Dropout(0.2)) regressor.add(LSTM(units=50)) regressor.add(Dropout(0.2)) regressor.add(Dense(units=1)) regressor.compile(optimizer=optimizer, loss='mean_squared_error') return regressor from keras.wrappers.scikit_learn import KerasRegressor from sklearn.model_selection import GridSearchCV regressor = KerasRegressor(build_fn=build_regressor) parameters = { 'batch_size': [10, 25, 32], 'nb_epoch': [50, 100], 'optimizer': ['adam', 'rmsprop'] } grid_search = GridSearchCV(estimator=regressor, param_grid=parameters, scoring='neg_mean_squared_error', cv=None) grid_search.fit(X_train[:, :, -1], y_train) best_param = grid_search.best_params_ best_accuracy = grid_search.best_score_
#Add First Hidden Layer model.add(Dense(units = 4, kernel_initializer = 'uniform', activation = 'relu', input_dim = 7)) # Adding the second hidden layer model.add(Dense(units = 4, kernel_initializer = 'uniform', activation = 'relu')) # Adding the output layer model.add(Dense(units = 1, kernel_initializer = 'uniform', activation = 'relu')) # Compiling the ANN model.compile(loss='mse', optimizer='adam', metrics=['mse']) return model #build KerasRegressor model=KerasRegressor(build_fn=build_regressor, batch_size = 10, epochs = 100) #fitting model model.fit(X_train, y_train, batch_size = 10, epochs = 1000) #predicting model y_pred=model.predict(X_test) #finding MSE m=(mean_squared_error(y_test,y_pred)) #for MSE <100 while(m>100): model.fit(X_train, y_train, batch_size = 10, epochs = 1000) y_pred=model.predict(X_test) m=(mean_squared_error(y_test,y_pred))
model.compile(optimizer='Nadam', loss='mean_squared_error', metrics=['mae']) return model # fix random seed for reproducibility seed = 7 np.random.seed(seed) # load training dataset train_data = pd.read_csv("train_data.csv", usecols=['position', 'area', 'diameter', 'angleup', 'angledown', 'shape', 'pdrop', 'reloc']) train_targets = pd.read_csv("train_targets.csv", usecols=['Fr']) # feature scaling and mean normalization for train data mean = train_data.mean(axis=0) train_data -= mean std = train_data.std(axis=0) train_data /= std train_data = train_data.values train_targets = train_targets.values.flatten('F') # create model model = KerasRegressor(build_fn=create_model, epochs=10000, batch_size=128, verbose=0) # define the grid search parameters activation = ['softmax', 'softplus', 'softsign', 'relu', 'tanh', 'sigmoid', 'hard_sigmoid', 'linear'] param_grid = dict(activation=activation) grid = GridSearchCV(estimator=model, param_grid=param_grid, n_jobs=1) grid_result = grid.fit(train_data, train_targets) # summarize results print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_)) means = grid_result.cv_results_['mean_test_score'] stds = grid_result.cv_results_['std_test_score'] params = grid_result.cv_results_['params'] for mean, stdev, param in zip(means, stds, params): print("%f (%f) with: %r" % (mean, stdev, param))
def baseline_model(): # create model model = Sequential() model.add(Dense(256, kernel_initializer='normal', activation='sigmoid')) model.add(Dropout(0.5)) model.add(Dense(64, kernel_initializer='normal', activation='sigmoid')) model.add(Dropout(0.5)) model.add(Dense(1, kernel_initializer='normal')) # Compile model model.compile(loss='mean_squared_error', optimizer='adam') return model step_scaler = StandardScaler() step_regressor = KerasRegressor(build_fn=baseline_model, epochs=100, batch_size=32, verbose=2) steps = [] steps.append(('step_scaler', step_scaler)) steps.append(('step_regressor', step_regressor)) pipeline = Pipeline(steps) clf = pipeline # kfold = KFold(n_splits=3, random_state=seed) # cross_val_scores = cross_val_score(pipeline, X_train, y_train, cv=kfold) # Results: -19376733.79 (49264686.37) MSE (no scaling) # Results: -144.28 (148.91) MSE (relu, [62*1]) # Results: -59.37 (44.19) MSE (sigmoid, [62*1]) # Results: -87.21 (28.69) MSE (sigmoid, [62,32,1]) - deeper # Results: -53.58 (32.51) MSE (sigmoid, [512,1]) - wider clf.fit(X_train, y_train)
previsores[:, 10] = labelencoder_previsores.fit_transform(previsores[:, 10]) onehotencoder = OneHotEncoder(categorical_features = [0,1,3,5,8,9,10]) previsores = onehotencoder.fit_transform(previsores).toarray() def criar_rede(): regressor = Sequential() regressor.add(Dense(units = 158, activation = 'relu', input_dim = 316)) regressor.add(Dense(units = 158, activation = 'relu')) regressor.add(Dense(units = 1, activation = 'linear')) regressor.compile(loss = 'mean_absolute_error', optimizer = 'adam', metrics = ['mean_absolute_error']) return regressor regressor = KerasRegressor(build_fn = criar_rede, epochs = 100, batch_size = 300) resultados = cross_val_score(estimator = regressor, X = previsores, y = preco_real, cv = 10, scoring = 'mean_absolute_error') media = resultados.mean() desvio = resultados.std()
model = Sequential() model.add( Dense(13, input_dim=13, kernel_initializer='normal', activation='relu')) model.add(Dense(1, kernel_initializer='normal')) # Compile model model.compile(loss='mean_squared_error', optimizer='adam') return model # fix random seed for reproducibility seed = 7 numpy.random.seed(seed) # evaluate model estimator = KerasRegressor(build_fn=baseline_model, epochs=100, batch_size=5, verbose=0) kfold = KFold(n_splits=10, random_state=seed) results = cross_val_score(estimator, X, Y, cv=kfold) print("Baseline: %.2f (%.2f) MSE" % (results.mean(), results.std())) # Regression Example With Boston Dataset: Standardized import numpy from pandas import read_csv from keras.models import Sequential from keras.layers import Dense from keras.wrappers.scikit_learn import KerasRegressor from sklearn.model_selection import cross_val_score from sklearn.model_selection import KFold from sklearn.preprocessing import StandardScaler from sklearn.pipeline import Pipeline
model.add(Dropout(0.2)) model.add(Flatten()) model.add(Dense(1, kernel_initializer='normal')) model.compile(loss='mean_squared_error', optimizer='adam', metrics=['mse', rmse, r_sq]) return model #estimators = [] #estimators.append(('standardize', MinMaxScaler())) #estimators.append(('mlp',KerasRegressor(build_fn=build_model, epochs=20, batch_size=5, verbose=1))) estimators = KerasRegressor(build_fn=build_model, epochs=50, batch_size=64, verbose=1) #pipeline = Pipeline(estimators) kfold = KFold(n_splits=5) results = cross_val_score(estimators, data_scaled, target_scaled, cv=kfold) print("MSE Score: %.6f (%.6f) MSE" % (results.mean(), results.std())) train_data, test_data, train_target, test_target = train_test_split( data_scaled, target_scaled, test_size=0.2, random_state=21) ''' from keras.callbacks import ModelCheckpoint chk = ModelCheckpoint("Modelling/ibd.h5", monitor='loss', save_best_only=True, mode='min') callback_list=[chk]
adam = Adam(lr=0.001, beta_1=0.9, beta_2=0.999, epsilon=None, decay=0.0, amsgrad=True) nadam = Nadam(lr=0.002, beta_1=0.9, beta_2=0.999, epsilon=None, schedule_decay=0.004) # 1. BATCH_SIZE AND EPOCHS ########################## # Grid Search Hyperparameters def grid_model(optim=adam): model = models.Sequential() model.add(layers.Dense(30, activation='relu', input_shape=(X_train.shape[1], ))) model.add(layers.Dense(30, activation='relu')) model.add(layers.Dense(1)) model.compile(optimizer=optim, loss='mse', metrics=['mae', 'mse']) return model model = KerasRegressor(build_fn=grid_model, verbose=0) # grid search parameters batch_size = [20, 25, 30, 35, 40] epochs = [150, 200, 250] param_grid = dict(batch_size=batch_size, epochs=epochs) grid = GridSearchCV(estimator=model, param_grid=param_grid, scoring=['neg_mean_squared_error', 'r2', 'explained_variance'], cv=5, n_jobs=-1, refit='neg_mean_squared_error', verbose=2)
class NNNBA: """ NNNBA class, which contains all the calculated information """ default_model_type = "lasso" assumed_max_salary = 35350000.0 __threshold_per_col = { "OFF_RATING": 12, "PIE": 0.11, "NET_RATING": 18, "GP": 50, "DEF_RATING": 7, "USG_PCT": 0.12, "FGA": None, "FGM": None, "FG3A": None, "PTS": None, "FTM": None, "FGM": None, "REB_PCT": None, "AGE": 4 } __outlier_cols_upper = [ ] #["OFF_RATING", "PIE", "NET_RATING", "USG_PCT", "PTS"] __outlier_cols_lower = [] #["DEF_RATING"] __ridge_init_alpha = [0.01, 0.03, 0.06, 0.1, 0.3, 0.6, 1, 3, 6, 10, 30, 60] __lasso_init_alpha = [ 0.0001, 0.0003, 0.0006, 0.001, 0.003, 0.006, 0.01, 0.03, 0.06, 0.1, 0.3, 0.6, 1 ] __elasticnet_init = { "l1_ratio": [0.1, 0.3, 0.5, 0.6, 0.7, 0.8, 0.85, 0.9, 0.95, 1], "alpha": [ 0.0001, 0.0003, 0.0006, 0.001, 0.003, 0.006, 0.01, 0.03, 0.06, 0.1, 0.3, 0.6, 1, 3, 6 ] } def __realpha__(self, alpha): """ Function to recalculate alpha """ return [ alpha * .6, alpha * .65, alpha * .7, alpha * .75, alpha * .8, alpha * .85, alpha * .9, alpha * .95, alpha, alpha * 1.05, alpha * 1.1, alpha * 1.15, alpha * 1.25, alpha * 1.3, alpha * 1.35, alpha * 1.4 ] def __reratio__(self, ratio): """ Function to recalculate ratio """ return [ ratio * .85, ratio * .9, ratio * .95, ratio, ratio * 1.05, ratio * 1.1, ratio * 1.15 ] def __baseline_model__(): """ Base Neural Network model """ input = 39 model = Sequential() model.add( Dense(input, input_dim=input, kernel_initializer='normal', activation='relu')) model.add( Dense(int(input / 2), kernel_initializer='normal', activation='relu')) model.add(Dense(input, kernel_initializer='normal', activation='relu')) model.add( Dense(int(input / 2), kernel_initializer='normal', activation='relu')) model.add( Dense(int(input / 4), kernel_initializer='normal', activation='relu')) model.add(Dense(1, kernel_initializer='normal')) model.compile(loss='mean_squared_error', optimizer='adam') return model def __idx_of_median_outlier__(self, col, threshold=None, upper_outlier=True): #may need threshold=2 """ Find index of outlier based on distance from median Distance from median = threshold, which is either passed in or calculated as a function of std from the passed in data """ if threshold is None: threshold = col.std() * 2.5 logger.debug("median: " + str(col.median()) + " threshold: " + str(threshold)) diff = col - col.median() if upper_outlier: outlier = diff > threshold else: outlier = -1 * diff > threshold return list(outlier.index[outlier]) models = { "linear regression": linear_model.LinearRegression(fit_intercept=True), "ridge": linear_model.RidgeCV(alphas=__ridge_init_alpha, fit_intercept=True), "lasso": linear_model.LassoCV(alphas=__lasso_init_alpha, max_iter=5000, cv=10, fit_intercept=True), "bayes ridge": linear_model.BayesianRidge(), "keras regressor": KerasRegressor(build_fn=__baseline_model__, nb_epoch=100, batch_size=5, verbose=0), "xgb": xgb.XGBRegressor(n_estimators=1500, max_depth=2, learning_rate=0.01), "elasticnet": linear_model.ElasticNetCV(l1_ratio=__elasticnet_init["l1_ratio"], alphas=__elasticnet_init["alpha"], max_iter=1000, cv=3), "theilsen": linear_model.TheilSenRegressor(), "polynomial": Pipeline([('poly', PolynomialFeatures(degree=2)), ('linear', linear_model.LinearRegression(fit_intercept=True)) ]) } def __remodel__(self, model_type, regr, __X_train, __Y_train): """ Function to retrain certain models based on optimal alphas and/or ratios """ if model_type == "ridge": alpha = regr.alpha_ regr = linear_model.RidgeCV(alphas=self.__realpha__(alpha), cv=10) elif model_type == "lasso": alpha = regr.alpha_ regr = linear_model.LassoCV(alphas=self.__realpha__(alpha), max_iter=5000, cv=10) elif model_type == "elasticnet": alpha = regr.alpha_ ratio = regr.l1_ratio_ regr = linear_model.ElasticNetCV( l1_ratio=self.__reratio__(ratio), alphas=self.__elasticnet_init["alpha"], max_iter=1000, cv=3) regr.fit(__X_train, __Y_train) return regr def __normalize_salary__( self, col, max_salary=assumed_max_salary ): # scales out to max contract; max taken from https://www.hoopsrumors.com/2017/05/nba-maximum-salary-projections-for-201718.html """ Function to normalize salary so that the max is maximum salary possible, as yoy max salary changes """ min_salary = min(col) local_max_salary = max(col) return max_salary - (local_max_salary - col) / ( local_max_salary - min_salary) * (max_salary - min_salary) def __init__(self, debug=False): logger.setLevel(logging.DEBUG if debug else logging.ERROR) with open("crawled_data/raw_data.json", "r") as data_file: raw_data = json.load(data_file) columns = raw_data[0]["header"] unique_columns = list(set(raw_data[0]["header"])) position_names = [ "Point Guard", "Shooting Guard", "Small Forward", "Power Forward", "Center" ] positions = [] for i, val in enumerate(position_names): positions.append((val, i)) positions_convert = dict(positions) self.X_df = pd.DataFrame(columns=columns) Y_df = pd.DataFrame(columns=["SALARIES"]) age = [] positions_df = pd.DataFrame(columns=position_names) names = pd.DataFrame(columns=["NAME", "PROJECTED_SALARIES"]) logger.debug("Processing data") for i, player in enumerate(raw_data): if "2016_17" in player["salaries"] and "2016-17" in player["stats"]: Y_df.loc[len(Y_df)] = player["salaries"]["2016_17"] self.X_df.loc[len(self.X_df)] = player["stats"]["2016-17"] age.append(player["age"]) positions_df.loc[len(positions_df)] = [0, 0, 0, 0, 0] for position in player["positions"]: positions_df[position][len(positions_df)] = 1 projected_salaries = 0 try: projected_salaries = player["projected_salaries"][0] except: pass names.loc[len(names)] = [player["name"], projected_salaries] else: continue for col in []: try: self.X_df[col] = np.tanh(self.X_df[col]) except: pass self.X_df = self.X_df.T.drop_duplicates().T self.X_df = pd.concat( [self.X_df, pd.Series(age, name="AGE"), positions_df], axis=1) self.X_df = self.X_df.drop([ "FGA", "L", "AGE", "PCT_TOV", "BLKA", "AST_PCT", "AST_RATIO", "OREB_PCT", "DREB_PCT", "REB_PCT", "TM_TOV_PCT", "PACE", "OPP_PTS_OFF_TOV", "OPP_PTS_FB", "OPP_PTS_PAINT", 'OPP_PTS_2ND_CHANCE', 'OPP_PTS_FB', 'PCT_FGA_2PT', 'PCT_FGA_3PT', 'PCT_PTS_2PT', 'PCT_PTS_2PT_MR', 'PCT_PTS_3PT', 'PCT_PTS_FB', 'PCT_PTS_FT', 'PCT_PTS_OFF_TOV', 'PCT_PTS_PAINT', 'PCT_AST_2PM', 'PCT_UAST_2PM', 'PCT_AST_3PM', 'PCT_UAST_3PM', 'PCT_AST_FGM', 'PCT_UAST_FGM', 'PCT_FGM', 'PCT_FGA', 'PCT_FG3M', 'PCT_FG3A', 'PCT_FTM', 'PCT_FTA', 'PCT_OREB', 'PCT_DREB', 'PCT_REB', 'PCT_AST', 'PCT_STL', 'PCT_BLK', 'PCT_BLKA', 'PTS_OFF_TOV', 'PTS_FB', 'PTS_PAINT' ], 1) logger.debug("Columns: " + ", ".join(self.X_df.columns)) # remove players who's played less than 15 games idx_of_lt_gp = self.X_df.index[(self.X_df["GP"] < 15)] self.X_df = self.X_df.drop(idx_of_lt_gp) Y_df = Y_df.drop(idx_of_lt_gp) age = pd.Series(age).drop(idx_of_lt_gp) positions_df = positions_df.drop(idx_of_lt_gp) names = names.drop(idx_of_lt_gp) # Remove outliers logger.debug("Remove outliers") X_train = self.X_df.copy() Y_train = Y_df.copy() logger.debug("No of rows before removing outliers: " + str(X_train.shape[0])) to_be_dropped = [] ## remove upper for col in self.__outlier_cols_upper: logger.debug(col) idx_of_median_outlier = self.__idx_of_median_outlier__( X_train[col], self.__threshold_per_col[col]) logger.debug( col + " should drop " + ", ".join(names["NAME"][idx_of_median_outlier].values)) to_be_dropped = to_be_dropped + idx_of_median_outlier ## remove lower for col in self.__outlier_cols_lower: logger.debug(col) idx_of_median_outlier = self.__idx_of_median_outlier__( X_train[col], self.__threshold_per_col[col], upper_outlier=False) logger.debug( col + " should drop " + ", ".join(names["NAME"][idx_of_median_outlier].values)) to_be_dropped = to_be_dropped + idx_of_median_outlier to_be_dropped = list(set(to_be_dropped)) logger.debug("Outliers: " + ", ".join(names["NAME"][to_be_dropped].values)) X_train = X_train.drop(to_be_dropped) Y_train = Y_train.drop(to_be_dropped) logger.debug("No of rows after removing outliers: " + str(X_train.shape)) logger.debug("No of rows after removing outliers: " + str(Y_train.shape)) __X_train = X_train.values # training data only includes non-rookies __Y_train = np.log1p(Y_train["SALARIES"].values) # y = log(1+y) self.Y_df = Y_df self.model_results = {} self.names = names for model_type, regr in self.models.items(): logger.debug("Started " + model_type) this_results = names.copy() regr.fit(__X_train, __Y_train) regr = self.__remodel__(model_type, regr, __X_train, __Y_train) results = self.__normalize_salary__( np.expm1(regr.predict(self.X_df.values))) # y = exp(y) - 1 this_results['WORTH'] = results diffY = this_results["PROJECTED_SALARIES"].values - results this_results['SALARY_DIFF'] = diffY this_results = this_results.sort_values(by="SALARY_DIFF", ascending=False) self.models[model_type] = regr self.model_results[model_type] = this_results logger.debug("Finished " + model_type) #get avg this_results = self.model_results["linear regression"].copy() this_results["WORTH"] = self.__normalize_salary__( (1. * self.model_results["bayes ridge"]["WORTH"] + 1. * self.model_results["lasso"]["WORTH"] + 1. * self.model_results["elasticnet"]["WORTH"]) / 3) diffY = this_results["PROJECTED_SALARIES"].values - this_results[ "WORTH"] this_results['SALARY_DIFF'] = diffY self.model_results["avg"] = this_results def getUndervalued(self, model_type=default_model_type): names = self.model_results[model_type] print(names.loc[(names["SALARY_DIFF"] < 0) & (names["PROJECTED_SALARIES"] > 0)]) def getPlayerValue(self, player_name, model_type=default_model_type): names = self.model_results[model_type] idx = names[names["NAME"] == player_name].index[0] print("\nPaid: " + '${:,.2f}'.format(float(self.Y_df.loc[idx]["SALARIES"])) + "\tFuture Salary: " + '${:,.2f}'.format(float(self.names["PROJECTED_SALARIES"][idx])) + "\tWorth: " + '${:,.2f}'.format(float(names["WORTH"][idx])) + "\n") self.getPlayerStats(player_name, trim=True) def getPlayerStats(self, player_name, trim=False): columns = self.X_df.columns if trim: columns = columns[:30] print(self.X_df.loc[self.names["NAME"] == player_name, columns]) def getMostValuablePlayers(self, model_type=default_model_type): names = self.model_results[model_type] print(names.sort_values(by="WORTH")) def showAvailableModels(self): for model in self.models: print(model) def getPlayerNameByIndex(self, index): return self.names[self.name.index == index] def getCoefFromModel(self, model_type=default_model_type): return pd.DataFrame(self.models[model_type].coef_, index=self.X_df.columns, columns=["coef"]).sort_values(by="coef") def plotXCol(self, col_name, X=None): import matplotlib.pyplot as plt if X is None: X = self.X_df.sort_values(by=col_name)[col_name].values plt.figure() plt.scatter(range(len(X)), X) plt.show()
model = Sequential() model.add(Dense(20, input_dim=398, init='normal', activation='relu')) model.add(Dense(10, init='normal', activation='relu')) model.add(Dense(1, init='normal')) model.compile(loss='mean_squared_error', optimizer = 'adam') return model seed = 7 np.random.seed(seed) scale = StandardScaler() X_train = scale.fit_transform(train_new) X_test = scale.fit_transform(test_new) keras_label = label_df.as_matrix() clf = KerasRegressor(build_fn=base_model, nb_epoch=1000, batch_size=5,verbose=0) clf.fit(X_train,keras_label) #make predictions and create the submission file kpred = clf.predict(X_test) kpred = np.exp(kpred) pred_df = pd.DataFrame(kpred, index=test["Id"], columns=["SalePrice"]) pred_df.to_csv('keras1.csv', header=True, index_label='Id') #simple average y_pred = (y_pred_xgb + y_pred_lasso) / 2 y_pred = np.exp(y_pred) pred_df = pd.DataFrame(y_pred, index=test["Id"], columns=["SalePrice"]) pred_df.to_csv('ensemble1.csv', header=True, index_label='Id')
def update_trainee_score(x): print('Updating Profile Scores ...') academic_score = [] honesty = [] emotionality = [] extraversion = [] agreeableness = [] conscientiousness = [] openness = [] iq = [] verbal_ability = [] score = [] course_score = [] qa_score = [] project_score = [] for p in Trainee.objects.all().exclude(pk=x.pk): if p.academic_score is None or p.personality_c is None or p.personality_h is None or p.personality_a is None or p.personality_e is None or p.personality_o is None or p.personality_x is None or p.iq_score is None or p.course_score is None or p.project_score is None or p.verbal_ability_score is None or p.qa_score is None or p.score is None: continue academic_score.append(p.academic_score) honesty.append(p.personality_h) emotionality.append(p.personality_e) extraversion.append(p.personality_x) agreeableness.append(p.personality_a) conscientiousness.append(p.personality_c) openness.append(p.personality_o) iq.append(p.iq_score) verbal_ability.append(p.verbal_ability_score) score.append(p.score) project_score.append(p.project_score) course_score.append(p.course_score) qa_score.append(p.qa_score) if len(academic_score) == 0: x.score = 0.6 x.save() else: d = {'1': academic_score, '2': honesty, '3': emotionality, '4': extraversion, '5': agreeableness, '6': conscientiousness, '7': openness, '8': iq, '9': verbal_ability, '10': project_score, '11': course_score, '12': qa_score, '13': score} df = pd.DataFrame(data=d) X = df.iloc[:, [0, 1, 2, 3, 5, 6, 7, 8, 9, 10, 11, 12]].values y = df.iloc[:, 4].values sc = StandardScaler() X = sc.fit_transform(X) estimator = KerasRegressor(build_fn=baseline_model, batch_size=50, epochs=100, verbose=0) estimator.fit(X, y) test_pred_temp = [] test_pred = [] test_pred_temp.append(x.academic_score) test_pred_temp.append(x.personality_h) test_pred_temp.append(x.personality_e) test_pred_temp.append(x.personality_x) test_pred_temp.append(x.personality_a) test_pred_temp.append(x.personality_c) test_pred_temp.append(x.personality_o) test_pred_temp.append(x.iq_score) test_pred_temp.append(x.verbal_ability_score) test_pred_temp.append(x.project_score) test_pred_temp.append(x.course_score) test_pred_temp.append(x.qa_score) test_pred.append(test_pred_temp) test_pred_1 = np.asarray(test_pred) new_prediction = estimator.predict(test_pred_1) y = np.insert(y, y.size, new_prediction) X = np.concatenate((X, test_pred_1), axis=0) y_new = [] for x in y: y_new.append(x) tot = 0 for i in y_new: tot = tot + i mn = tot / len(y_new) std = 0 for i in y_new: std = std + (i - mn) * (i - mn) sd = math.sqrt(std / len(y_new)) avg = mn y_final = [] for i in range(len(y_new)): pp = (y_new[i] - avg) / sd * 0.1 + 0.8 if pp >= 1.0: pp = 0.9999 if pp <= 0.6: pp = 0.0001 y_final.append(pp) ctr = 0 for p in Trainee.objects.all(): p.score = y_final[ctr] p.save() ctr += 1
# fix random seed for reproducibility np.random.seed(SEED) # Multi-class Neural Network def build_model(): clf = Sequential() clf.add(Dense(features.shape[1], activation='relu')) clf.add(Dense(5, activation='relu')) clf.add(Dropout(0.3)) clf.add(Dense(3, activation='relu')) clf.add(Dropout(0.3)) clf.add(Dense(1, kernel_initializer='normal')) clf.compile(optimizer='adam', loss='mean_squared_error') return clf # evaluate model with standardized dataset estimators = [] estimators.append(('standardize', StandardScaler())) estimators.append(('mlp', KerasRegressor(build_fn=build_model, epochs=EPOCHS, batch_size=BATCH_SIZE, verbose=1))) pipeline = Pipeline(estimators) kfold = KFold(n_splits=10) results = cross_val_score(pipeline, features, labels, cv=kfold) print("Standardized: %.2f (%.2f) MSE" % (results.mean(), results.std()))
model.add( Dense(135, input_dim=270, kernel_initializer='normal', activation='elu')) model.add(Dense(1, kernel_initializer='normal')) # Compile model (configure for training) # optimizer 'adam' was chosen because it (on average) is the speediest model.compile(loss='mean_squared_error', optimizer='adam') return model # evaluate model with standardized dataset estimator = KerasRegressor(build_fn=deep_learning_model, epochs=100, batch_size=5, verbose=0) estimator.fit(X, Y) y_keras_pred = estimator.predict(X_test) create_submission(test_data, y_keras_pred, 3) # ### Third Trial Summary -- Big improvement! Deep learning received a score on Kaggle of 0.207 # In[13]: # Create build function for KerasRegressor def deep_learning_model2():
# X_FINAL, y_FINAL = X_scaled[remove_inds,:], y_scaled[remove_inds,:] # X_scaled, y_scaled = X_scaled[keep_inds,:], y_scaled[keep_inds,:] #-------------------- #Split data to 90% train & 10% unseen X_train, X_unseen, y_train, y_unseen = train_test_split(X_scaled, y_scaled, test_size=0.10, random_state=32) kf = KFold(n_splits=4, shuffle=True) fig, ax = plt.subplots(1, 1, figsize=(8, 8)) fig2, ax2 = plt.subplots(1, 1, figsize=(8, 8)) for train_index, test_index in kf.split(X_train, y=y_train): model = KerasRegressor(build_fn=baseline_model, epochs=100) history = model.fit(X_train[train_index], y_train[train_index], validation_data=(X_train[test_index], y_train[test_index])) ax.plot(history.history['loss'], label='loss') ax.plot(history.history['val_loss'], label='validation loss') ax.set_ylabel('Loss') ax.set_xlabel('Epoch') ax.legend() ax.minorticks_on() ax.grid(which='major', ls='-', color=[0.15, 0.15, 0.15], alpha=0.15) ax.grid(which='minor', ls=':', dashes=(1, 5, 1, 5),
downcast='infer') def baseline_model(): # create model model = Sequential() model.add( Dense(5, input_dim=5, kernel_initializer='normal', activation='linear')) model.add(Dense(1, kernel_initializer='normal')) # Compile model model.compile(loss='mean_squared_error', optimizer='adam') return model estimator = KerasRegressor(build_fn=baseline_model, epochs=1000, verbose=0) t0 = time.clock() estimator.fit(X, y) t1 = time.clock() prediction = estimator.predict(X) train_error = np.abs(y - prediction) mean_error = np.mean(train_error) min_error = np.min(train_error) max_error = np.max(train_error) std_error = np.std(train_error) #print('prediction :',prediction) #print('train error :')
model.add(Activation('relu')) model.add(Dense(1)) #compile model model.compile(loss='mean_squared_error', optimizer=OPTIMIZER, metrics=['mean_squared_error']) return model # evaluate model with standardized dataset np.random.seed(seed) kreg = KerasRegressor(build_fn=baseline_model, epochs=NB_EPOCH, batch_size=BATCH_SIZE, verbose=VERBOSE) estimators = [] estimators.append(('standardize', StandardScaler())) estimators.append(('mlp', kreg)) pipeline = Pipeline(estimators) kfold = KFold(n_splits=2, random_state=seed) results = cross_val_score(pipeline, X, Y, cv=kfold) print("Error: %.4f (%.4f) MSE" % (results.mean(), results.std())) #denormalize data def denorm(min, max, input): z = (input * (max - min)) + min return z
def get_model_from_name(model_name, training_params=None, is_hp_search=False): global keras_imported # For Keras epochs = 1000 # if os.environ.get('is_test_suite', 0) == 'True' and model_name[:12] == 'DeepLearning': # print('Heard that this is the test suite. Limiting number of epochs, which will increase training speed dramatically at the expense of model accuracy') # epochs = 100 all_model_params = { 'LogisticRegression': {}, 'RandomForestClassifier': { 'n_jobs': -2, 'n_estimators': 30 }, 'ExtraTreesClassifier': { 'n_jobs': -1 }, 'AdaBoostClassifier': {}, 'SGDClassifier': { 'n_jobs': -1 }, 'Perceptron': { 'n_jobs': -1 }, 'LinearSVC': { 'dual': False }, 'LinearRegression': { 'n_jobs': -2 }, 'RandomForestRegressor': { 'n_jobs': -2, 'n_estimators': 30 }, 'LinearSVR': { 'dual': False, 'loss': 'squared_epsilon_insensitive' }, 'ExtraTreesRegressor': { 'n_jobs': -1 }, 'MiniBatchKMeans': { 'n_clusters': 8 }, 'GradientBoostingRegressor': { 'presort': False, 'learning_rate': 0.1, 'warm_start': True }, 'GradientBoostingClassifier': { 'presort': False, 'learning_rate': 0.1, 'warm_start': True }, 'SGDRegressor': { 'shuffle': False }, 'PassiveAggressiveRegressor': { 'shuffle': False }, 'AdaBoostRegressor': {}, 'LGBMRegressor': { 'n_estimators': 2000, 'learning_rate': 0.15, 'num_leaves': 8, 'lambda_l2': 0.001, 'histogram_pool_size': 16384 }, 'LGBMClassifier': { 'n_estimators': 2000, 'learning_rate': 0.15, 'num_leaves': 8, 'lambda_l2': 0.001, 'histogram_pool_size': 16384 }, 'DeepLearningRegressor': { 'epochs': epochs, 'batch_size': 50, 'verbose': 2 }, 'DeepLearningClassifier': { 'epochs': epochs, 'batch_size': 50, 'verbose': 2 }, 'CatBoostRegressor': {}, 'CatBoostClassifier': {} } # if os.environ.get('is_test_suite', 0) == 'True': # all_model_params model_params = all_model_params.get(model_name, None) if model_params is None: model_params = {} if is_hp_search == True: if model_name[:12] == 'DeepLearning': model_params['epochs'] = 50 if model_name[:4] == 'LGBM': model_params['n_estimators'] = 500 if training_params is not None: print('Now using the model training_params that you passed in:') print(training_params) # Overwrite our stock params with what the user passes in (i.e., if the user wants 10,000 trees, we will let them do it) model_params.update(training_params) print( 'After overwriting our defaults with your values, here are the final params that will be used to initialize the model:' ) print(model_params) model_map = { # Classifiers 'LogisticRegression': LogisticRegression(), 'RandomForestClassifier': RandomForestClassifier(), 'RidgeClassifier': RidgeClassifier(), 'GradientBoostingClassifier': GradientBoostingClassifier(), 'ExtraTreesClassifier': ExtraTreesClassifier(), 'AdaBoostClassifier': AdaBoostClassifier(), 'LinearSVC': LinearSVC(), # Regressors 'LinearRegression': LinearRegression(), 'RandomForestRegressor': RandomForestRegressor(), 'Ridge': Ridge(), 'LinearSVR': LinearSVR(), 'ExtraTreesRegressor': ExtraTreesRegressor(), 'AdaBoostRegressor': AdaBoostRegressor(), 'RANSACRegressor': RANSACRegressor(), 'GradientBoostingRegressor': GradientBoostingRegressor(), 'Lasso': Lasso(), 'ElasticNet': ElasticNet(), 'LassoLars': LassoLars(), 'OrthogonalMatchingPursuit': OrthogonalMatchingPursuit(), 'BayesianRidge': BayesianRidge(), 'ARDRegression': ARDRegression(), # Clustering 'MiniBatchKMeans': MiniBatchKMeans(), } try: model_map['SGDClassifier'] = SGDClassifier(max_iter=1000, tol=0.001) model_map['Perceptron'] = Perceptron(max_iter=1000, tol=0.001) model_map['PassiveAggressiveClassifier'] = PassiveAggressiveClassifier( max_iter=1000, tol=0.001) model_map['SGDRegressor'] = SGDRegressor(max_iter=1000, tol=0.001) model_map['PassiveAggressiveRegressor'] = PassiveAggressiveRegressor( max_iter=1000, tol=0.001) except TypeError: model_map['SGDClassifier'] = SGDClassifier() model_map['Perceptron'] = Perceptron() model_map['PassiveAggressiveClassifier'] = PassiveAggressiveClassifier( ) model_map['SGDRegressor'] = SGDRegressor() model_map['PassiveAggressiveRegressor'] = PassiveAggressiveRegressor() if xgb_installed: model_map['XGBClassifier'] = XGBClassifier() model_map['XGBRegressor'] = XGBRegressor() if lgb_installed: model_map['LGBMRegressor'] = LGBMRegressor() model_map['LGBMClassifier'] = LGBMClassifier() if catboost_installed: model_map['CatBoostRegressor'] = CatBoostRegressor( calc_feature_importance=True) model_map['CatBoostClassifier'] = CatBoostClassifier( calc_feature_importance=True) if model_name[:12] == 'DeepLearning': if keras_imported == False: # Suppress some level of logs if TF is installed (but allow it to not be installed, and use Theano instead) try: os.environ['TF_CPP_MIN_VLOG_LEVEL'] = '3' os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' from tensorflow import logging logging.set_verbosity(logging.INFO) except: pass global maxnorm global Dense, Dropout global LeakyReLU, PReLU, ThresholdedReLU, ELU global Sequential global keras_load_model global regularizers, optimizers global Activation global KerasRegressor, KerasClassifier from keras.constraints import maxnorm from keras.layers import Activation, Dense, Dropout from keras.layers.advanced_activations import LeakyReLU, PReLU, ThresholdedReLU, ELU from keras.models import Sequential from keras.models import load_model as keras_load_model from keras import regularizers, optimizers from keras.wrappers.scikit_learn import KerasRegressor, KerasClassifier keras_imported = True model_map['DeepLearningClassifier'] = KerasClassifier( build_fn=make_deep_learning_classifier) model_map['DeepLearningRegressor'] = KerasRegressor( build_fn=make_deep_learning_model) try: model_without_params = model_map[model_name] except KeyError as e: print( 'It appears you are trying to use a library that is not available when we try to import it, or using a value for model_names that we do not recognize' ) raise (e) if os.environ.get('is_test_suite', False) == 'True': if 'n_jobs' in model_params: model_params['n_jobs'] = 1 model_with_params = model_without_params.set_params(**model_params) return model_with_params
def LSTM_Model(n_feat): return KerasRegressor(build_fn=(lambda: LSTM_Model_gen(n_feat)), verbose=0, batch_size=8, epochs=50)
# create model model = Sequential() model.add( Dense(7, input_dim=7, kernel_initializer='normal', activation='relu')) model.add(Dense(4, kernel_initializer='normal')) # Compile model model.compile(loss='mean_absolute_error', optimizer='adam') return model # fix random seed for reproducibility seed = 7 np.random.seed(seed) # evaluate model with standardized dataset estimator = KerasRegressor(build_fn=baseline_model, epochs=3000, batch_size=8474, verbose=0) kfold = KFold(n_splits=10, random_state=seed) results = cross_val_score(estimator, X, y, cv=kfold) print("Results: %.2f (%.2f) MSE" % (results.mean(), results.std())) # example of training a final regression model from sklearn.linear_model import LinearRegression from sklearn.datasets import make_regression # generate regression dataset X, y = make_regression(n_samples=100, n_features=2, noise=0.1) # fit final model model = LinearRegression() model.fit(X, y) # new instances where we do not know the answer
batch_size = 1000 print('Epochs: ', epochs) print('Batch size: ', batch_size) keras_callbacks = [ # ModelCheckpoint('/tmp/keras_checkpoints/model.{epoch:02d}-{val_loss:.2f}.hdf5', monitor='val_loss', save_best_only=True, verbose=2) # ModelCheckpoint('/tmp/keras_checkpoints/model.{epoch:02d}.hdf5', monitor='val_loss', save_best_only=True, verbose=0) # TensorBoard(log_dir='/tmp/keras_logs/model_3', histogram_freq=0, write_graph=True, write_images=True, embeddings_freq=0, embeddings_layer_names=None, embeddings_metadata=None), EarlyStopping(monitor='val_mean_absolute_error', patience=80, verbose=0) # 20 ] print(x_train.shape) #keras.wrappers.scikit_learn.KerasRegressor from keras.wrappers.scikit_learn import KerasRegressor model = KerasRegressor(build_fn=make_model, epochs=epochs, batch_size=batch_size, verbose=True, callbacks=keras_callbacks) model.fit(x_train, y_train) ''' history = model.fit(x_train, y_train, batch_size=batch_size, epochs=epochs, shuffle=True, verbose=2,#0, # Change it to 2, if wished to observe execution #validation_data=(arr_x_valid, arr_y_valid), callbacks=keras_callbacks) ''' y_pred = model.predict(x_test[:20,]) print (y_pred) print (y_test[:20])
regressor.add( Dense(units=nb_units, kernel_initializer='uniform', activation='relu', input_dim=325)) regressor.add( Dense(units=nb_units, kernel_initializer='uniform', activation='relu')) regressor.add( Dense(units=1, kernel_initializer='uniform', activation='linear')) regressor.compile(optimizer='adam', loss='mae', metrics=['mse', 'mae', 'mape']) return regressor grid_regressor = KerasRegressor(build_fn=build_regressor_for_grid) parameters = { 'batch_size': [30, 50, 100], 'epochs': [10, 30], 'regressor': ['adam'], 'nb_units': [100, 150, 200] } grid_search = GridSearchCV(estimator=grid_regressor, param_grid=parameters) grid_search = grid_search.fit(X_train, y_train) best_parameters = grid_search.best_params_ best_accuracy = grid_search.best_score_ ### Build one ANN def build_regressor(): regressor = Sequential()
activation='relu')) model.add( Dense(12, input_dim=12, kernel_initializer='normal', activation='relu')) model.add( Dense(12, input_dim=12, kernel_initializer='normal', activation='relu')) model.add(Dense(1, kernel_initializer='normal')) # compile model model.compile(loss='mean_squared_error', optimizer='adam') return model # fix random seed for reproducibility seed = 7 numpy.random.seed(seed) x_train, x_test, y_train, y_test = train_test_split(X, Y) # evaluate model with standardized dataset estimator = KerasRegressor(build_fn=baseline_model, nb_epoch=1, batch_size=5, verbose=0) kfold = KFold(n_splits=30, random_state=seed) results = cross_val_score(estimator, X, Y, cv=kfold) print("Results: %.2f (%.2f) MSE" % (results.mean(), results.std())) estimator.fit(x_train, y_train) y_pred = estimator.predict(x_test) # print(y_pred.shape()) fnc.errors(y_test, y_pred)
def baseline_model(): model = Sequential() model.add( Dense(output_dim=3, init='uniform', activation='relu', input_dim=5)) model.add(Dense(output_dim=3, init='uniform', activation='relu')) model.add(Dense(output_dim=1, init='uniform')) model.compile(optimizer='adam', loss='mean_squared_error', metrics=['mean_squared_error']) return model from keras.wrappers.scikit_learn import KerasRegressor from sklearn.model_selection import KFold, cross_val_score estimator = KerasRegressor(build_fn=baseline_model, epochs=1000, batch_size=5, verbose=1) kfold = KFold(n_splits=10, random_state=1) results = cross_val_score(estimator, x_train, y_train[:, 0], cv=kfold, n_jobs=1) estimator.fit(x_train, y_train[:, 0]) y_pred = estimator.predict(x_test) y_pred plt.scatter(x_train[:, 0], y_train[:, 0], color='red') plt.plot(x_test[:, 0], y_pred, color='blue') plt.xlabel('Product')
smoothing_window_length=5, smoothing_polyorder=3, reshape=True) #X_train, y_train, X_test, y_test = dataload.load_sin_data(seq_len, normalise_window=True) print('> Data Loaded. Compiling...') # Grid search parameters kernel_sizes = [5, 9] step_sizes = [2] single_branch = True stride = [3] lstm_units = [200, 400] branches = [3] model = KerasRegressor(build_fn=single_cnn_gru.build_model, validation_split=0.20) cnn_layers = [3] filter_nums = [128] batch_size = [32] single_lstm = [True] cat_branches = [True] param_grid = {} if (single_branch): param_grid = dict(layers=[(1, seq_len)], epochs=[epochs], cnn_layers=cnn_layers, lstm_units=lstm_units, kernel_size=kernel_sizes, stride_1=stride, filter_num=filter_nums,