def linearRegression(X_train, X_test, y_train, y_test): print(X_train.shape) print(y_train.shape) print(X_train) print(y_train) model = Sequential() model.add(Dense(1, activation='linear', input_dim=21)) model.compile(loss='mse', optimizer='rmsprop') estimator = KerasRegressor(build_fn=model, epochs=100, batch_size=16, verbose=1) estimator.fit(X_train, y_train) y_test_prediction = estimator.predict(X_test) rmse_error = mean_squared_error(y_pred=y_test_prediction, y_true=y_test) r2_error = r2_score(y_pred=y_test_prediction, y_true=y_test) print("RMSE Error") print(rmse_error) print("R2 Error") print(r2_error) # model.fit(X_train, y_train, nb_epoch=100, batch_size=16,verbose=0) # model.fit(X_train, y_train, epochs=100, batch_size=16,verbose=1) score = model.evaluate(X_test, y_test, batch_size=16) print("Score") print(score)
def train(): load_data() for val_group in range(NUM_SPLITS): validate = GLO.group_nums == val_group train = ~validate print('Training group {} with a split of {}+{} ({:.2f})'.format( val_group, train.sum(), validate.sum(), (train.sum() / (len(train))))) reg = KerasRegressor(build_fn=build_model, nb_epoch=MAX_EPOCHS, batch_size=128, verbose=1) checkpointer = ModelCheckpoint(filepath="model_group" + str(val_group) + "_e{epoch:04d}-{val_loss:.4f}.h5", monitor='val_loss', verbose=1, save_best_only=True) logger = CSVLogger('group{}_train.csv'.format(val_group)) stopper = EarlyStopping(monitor='val_loss', patience=12) reg.fit(GLO.X[train], GLO.y[train], validation_data=(GLO.X[validate], GLO.y[validate]), callbacks=[ checkpointer, logger, stopper, TensorBoard(log_dir='tensorboard{}'.format(val_group)) ])
def Train_CV(X_train, y_train, X_test, y_test, k=5, epochs=1000, batchsize=200, seed=100): estimator = KerasRegressor(build_fn=create_model, nb_epoch=epochs, batch_size=batchsize, verbose=False) kfold = KFold(n_splits=k, random_state=seed) results = cross_val_score(estimator, X_train, y_train, cv=kfold) print("Results: %.2f (%.2f) MAE" % (results.mean(), results.std())) estimator.fit(X_train, y_train) # evaluate model on test set prediction = estimator.predict(X_test) train_error = np.abs(y - prediction) mean_error = np.mean(train_error) # min_error = np.min(train_error) #max_error = np.max(train_error) std_error = np.std(train_error) print('-' * 30) print('Evaluation Results') print("Results (mean, std): %.2f (%.2f) MSE" % (mean_error, std_error))
def fit(is_train=True): global dims provider = KaggleProvider.from_file('train.csv') x_train, y_train = provider.load_data(is_train) dims = x_train.shape[1] # fix random seed for reproducibility seed = 42 np.random.seed(seed) regressor = KerasRegressor(build_fn=baseline_model, epochs=10, batch_size=5, verbose=0) estimators = [] estimators.append(('standardize', StandardScaler())) estimators.append(('mlp', regressor)) pipeline = Pipeline(estimators) kfold = KFold(n_splits=5, random_state=seed) results = cross_val_score(pipeline, x_train, y_train, cv=kfold, scoring='neg_mean_absolute_error') print("Standardized: {} ({}) MSE".format(results.mean(), results.std())) if not is_train: regressor.fit(x_train, y_train) predict(regressor)
def entrenar_regresor(f, l, size): regressor = KerasRegressor(buildModel, epochs=1000, batch_size=size, verbose=0) regressor.fit(f, l) return regressor
def adam_regression(x, y): print(y) # scale the data sc = MinMaxScaler() x = sc.fit_transform(x) y = y.reshape(-1, 1) y = sc.fit_transform(y) print(y) x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.3) model = KerasRegressor(build_fn=build_regressor, batch_size=32, epochs=EPOCHS) model.fit(x_train, y_train) y_pred = model.predict(x_test) y_pred = y_pred.reshape(-1, 1) predictions = sc.inverse_transform(y_pred) print(y_pred) print(predictions) fig, ax = plt.subplots() ax.scatter(y_test, y_pred) ax.plot([y_test.min(), y_test.max()], [y_test.min(), y_test.max()], 'k--', lw=4) ax.set_xlabel('Measured') ax.set_ylabel('Predicted') plt.show()
def main(): X, Y = processamentoDatabase() dadosEntradaTreinamento, dadosEntradaTeste, dadosSaidaTreinamento, dadosSaidaTeste = train_test_split( X, Y, test_size=0.25) Regressor = KerasRegressor(build_fn=criaRede, epochs=200, batch_size=300) #print(sorted ( sklearn.metrics.SCORERS.keys() ) ) #kfold = KFold(n_splits=10, random_state = 1) resultados = cross_val_score(estimator=Regressor, X=dadosEntradaTreinamento, y=dadosSaidaTreinamento, cv=10) Regressor.fit(dadosEntradaTreinamento, dadosSaidaTreinamento) predicao = Regressor.predict(dadosEntradaTeste) plt.plot(dadosSaidaTeste, "rs") plt.plot(predicao, "bs") plt.title("gŕafico de análise") plt.grid(True) plt.show() print("Média do valor dos automóveis, em euros : {}\n".format( resultados.mean())) print("Desvio Padrão : {}\n".format(resultados.std())) print("scoring do modelo : {}".format( accuracy_score(dadosSaidaTeste, predicao)))
def KerasRegression(x_train, y_train, x_test, y_test, x_real_test, i): # create Model # define base model def base_model(): model = Sequential() model.add( Dense(35, input_dim=len(x_train.columns), activation="relu", kernel_initializer="normal")) model.add(Dense(16, activation="relu", kernel_initializer="normal")) model.add(Dense(1, kernel_initializer="normal")) model.compile(loss='mean_squared_error', optimizer='adam') return model keras_label = y_train.as_matrix() clf = KerasRegressor(build_fn=base_model, nb_epoch=1000, batch_size=5, verbose=0) clf.fit(x_train, keras_label) # make predictions keras_pred = clf.predict(x_test) keras_pred = np.exp(keras_pred) plot_roc_curve(y_test, keras_pred, 'Keras Reg. Target: ' + str(i + 1)) #keras_real_pred = clf.predict(x_real_test) #keras_real_pred = np.exp(keras_real_pred) return keras_pred
class LstmModel(BaseModel): name = "LSTM" def build_model(self) -> Model: model = Sequential() model.add(Reshape((1, self.x_train.shape[1]))) model.add(LSTM(3, activation='relu')) model.add(RepeatVector(1)) model.add(LSTM(5, activation='relu', return_sequences=True)) model.add(TimeDistributed(Dense(1))) model.add(Flatten()) model.compile(optimizer='adam', loss='mse', metrics=['accuracy']) return model def fit(self): ea = EarlyStopping(monitor='loss', patience=20, restore_best_weights=True) self.model = KerasRegressor(build_fn=self.build_model, epochs=500, batch_size=16, verbose=0, shuffle=1, callbacks=[ea]) self.model.fit(self.x_train, self.y_train)
def DeepLearningRegressor_(self): """ディープラーニング回帰実行""" from keras.callbacks import EarlyStopping from keras.wrappers.scikit_learn import KerasRegressor import tensorflow as tf """GPU使用率の設定""" config = tf.ConfigProto() config.gpu_options.per_process_gpu_memory_fraction = 1.0 session = tf.Session(config=config) estimator = None """分析グリッドサーチ実行フラグに応じて推定器作成""" if True == self.do_analysis_gridsearch: estimator = KerasRegressor(build_fn=self._make_prd_deepleaning_model) estimator = self.make_grid_search_estimator(estimator, scoring='neg_mean_absolute_error') else: estimator = self._make_prd_deepleaning_model(self.params[PARAM_NHIDDEN][0], self.params[PARAM_NUNIT][0], self.params[PARAM_KEEPDROP][0]) estimator.fit(np.array(self.X_train), np.array(self.y_train), batch_size=self.params[PARAM_BATCHSIZE][0], epochs=100000, shuffle=False, validation_data=(np.array(self.X_test), np.array(self.y_test)), callbacks=[EarlyStopping(patience=3)]) """バギング/アダブースト推定器作成""" estimator = self.make_bagada_prd_estimator(estimator) return estimator
def DLmodel_regressor(Xtrain_in, ytrain_in, Xtest_in, ytest_in, lime_flag=False, df_row=None): start_time = time.time() estimator = KerasRegressor(build_fn=DLmodel_baseline, epochs=20, batch_size=5, verbose=10) seed = 23 numpy.random.seed(seed) estimator.fit(Xtrain_in, ytrain_in) y_test_pred = estimator.predict(Xtest_in) y_train_pred = estimator.predict(Xtrain_in) score_test = r2_score(y_test_pred, ytest_in) score_train = r2_score(y_train_pred, ytrain_in) adj_Rscore_train = adjusted_R2score_calc(Xtrain_in, score_train) adj_Rscore_test = adjusted_R2score_calc(Xtest_in, score_test) time_end = time.time() - start_time mrs_train = mean_squared_error(y_train_pred, ytrain_in) mrs_test = mean_squared_error(y_test_pred, ytest_in) if lime_flag: lime_explainer(Xtrain_in, df_row, estimator, "Keras_base") time_end = time.time() - start_time log_record_result("Keras base model", time_end, score_train, score_test, adj_Rscore_train, adj_Rscore_test, mrs_train, mrs_test) plot_residuals(Xtest_in, ytest_in, estimator, "Keras_base") #plots residual return "Keras base model", str(time_end), str(score_train), str( score_test), str(adj_Rscore_train), str(adj_Rscore_test)
def explain_row_eli5(): global map_values_eli5 # compute explanations only once if map_values_eli5 != None: return map_values_eli5 copy_model = tf.keras.models.load_model('{}/lending-club.h5'.format(name), custom_objects={"f1": kr.f1}) def base_model(): return copy_model my_model = KerasRegressor(build_fn=base_model) my_model.fit(X_test.copy(), y_test.copy()) perm = PermutationImportance(my_model).fit(X_test[0:1000].copy(), y_test[0:1000].copy()) # eli5.show_weights(perm, feature_names=list(df.drop('loan_repaid', axis=1).columns)) s = perm.feature_importances_ sorted_indices = sorted(range(len(s)), key=lambda k: s[k], reverse=True) class_1 = [(a, s[a]) for a in sorted_indices if s[a] > 0] sorted_indices = sorted(range(len(s)), key=lambda k: s[k]) class_0 = [(a, s[a] * -1) for a in sorted_indices if s[a] <= 0] map_values_eli5 = {0: class_1, 1: class_1} return map_values_eli5
def nn_model(self): dataset = get_data() train_data = dataset[dataset['score'] > 0.0] test_data = dataset[dataset['score'] < 0] y_data = train_data['score'] x_data = train_data.drop(columns=['id', 'score']) test_data.reset_index(inplace=True, drop=True) x_test = test_data.drop(columns=['id', 'score']) baseline_model = self._get_nn_base_model estimator = KerasRegressor(build_fn=baseline_model, epochs=100, batch_size=5, verbose=1) # kfold = KFold(n_splits=5) # mae = make_scorer(mean_absolute_error) # res = cross_val_score(estimator, X=x_data, y=y_data, cv=kfold, scoring=mae) # mae_error = np.mean(res) estimator.fit(x_data, y_data) y_pred = estimator.predict(x_data) mae_error = mean_absolute_error(y_pred, y_data) print(f'mae error: {mae_error}') print(f'nn score: {1 / (1 + mae_error)}') pred = estimator.predict(x_test) sub = pd.DataFrame({'id': test_data['id'], 'score': pred}) sub['score'] = sub['score'].apply(lambda item: int(round(item))) sub.to_csv('submittion_5.csv', index=False)
def init_model_villainous(): global model, estimator dataset = loadtxt('game.txt', delimiter=',') X = dataset[:, 0:460] Y = dataset[:, 460] def baseline_model(): # create model model = Sequential() model.add( Dense(460, input_dim=460, kernel_initializer='normal', activation='relu')) model.add(Dense(1, kernel_initializer='normal')) # Compile model model.compile(loss='mean_squared_error', optimizer='adam') return model # evaluate model estimator = KerasRegressor(build_fn=baseline_model, epochs=400, batch_size=50, verbose=2) print(f"Type of x: {type(X)}, Y: {type(Y)}") estimator.fit(X, Y) estimator.model.save("TheModel") #estimator.fit(X[0:1,:], Y[0:1], epochs=1, batch_size=1) return kfold = KFold(n_splits=10) results = cross_val_score(estimator, X, Y, cv=kfold) print("Baseline: %.2f (%.2f) MSE" % (results.mean(), results.std())) estimator.fit(X, Y) return
def final_predict(X_train, y_train, X_test, company_train, company_dev, company_test): global X_DIM, Y_DIM X_DIM = X_train[0].shape[0] y_train = np.array(y_train) print(X_train.shape, y_train.shape) Y_DIM = 1 # SVM # regressor = LinearSVR(C=0.1, verbose=1) regressor = KerasRegressor(build_fn=attention_imp_merge_exp, nb_epoch=NB_EPOCH, batch_size=BATCH_SIZE, verbose=1) print(regressor) regressor.fit([X_train, company_train, X_train], y_train) # predictions = regressor.predict(company_test) predictions = regressor.predict([X_test, company_test, X_test]) print(predictions.shape) print(predictions[:20]) # joblib.dump(predictions, '/raid/data/skar3/semeval/source/ml_semeval17/outputs/subtask2_hl/dl_predictions2.pkl') joblib.dump( predictions, os.path.join(config.RESULTS_DIR, 'subtask2_hl', 'dl_predictions2.pkl')) print( 'Training result', cosine_similarity(y_train, regressor.predict([X_train, company_train, X_train])))
def assert_regression_predict_shape_correct(num_test): reg = KerasRegressor( build_fn=build_fn_reg, hidden_dims=hidden_dims, batch_size=batch_size, epochs=epochs) reg.fit(X_train, y_train, batch_size=batch_size, epochs=epochs) preds = reg.predict(X_test[:num_test], batch_size=batch_size) assert preds.shape == (num_test, )
def train_neural_net_with_keras(x_train, y_train): # used mini batches to speed up training time # verbose tune how much log is printed in output. # With verbose=2, iteration and loss is printed # With verbose=0, nothing is printed estimator = KerasRegressor(build_fn=baseline_model, epochs=ITERATION_COUNT, batch_size=5, verbose=2) estimator.fit(x_train, y_train) return estimator
def assert_regression_predict_shape_correct(num_test): reg = KerasRegressor(build_fn=build_fn_reg, hidden_dims=hidden_dims, batch_size=batch_size, epochs=epochs) reg.fit(X_train, y_train, batch_size=batch_size, epochs=epochs) preds = reg.predict(X_test[:num_test], batch_size=batch_size) assert preds.shape == (num_test, )
def cross_validation_regressor(k, training, target): #folds fold = 100 / k fold = fold / 100 seed = 7 np.random.seed(seed) print('building the regressor') #build a regressor k_model = KerasRegressor(build_fn=neural_network_regressor, epochs=15000, batch_size=30, verbose=0) mse = 0 accuracy = 0 #for i in range(k): #split x_train, x_test, y_train, y_test = train_test_split(training, target, test_size=fold, random_state=seed) #plot #learning_curve(np.array(x_train), np.array(y_train), np.array(x_test), np.array(y_test), neural_network()) print('fitting the regressor') #fit the model k_model.fit(np.array(x_train), np.array(y_train)) #make a prediction y_pred = k_model.predict(np.array(x_test)) #print comparision for i in range(len(y_pred)): print(round(y_pred[i], 1), y_test[i]) #print mse #print('mse: ', mean_squared_error(y_test, y_pred)) mse += mean_squared_error(toFloat(y_test), toFloat(y_pred)) #prepare for accuracy y_pred_round = nearestHalf(y_pred) #change data to string values y_pred_round = ['%.2f' % score for score in y_pred_round] y_test = ['%.2f' % test for test in y_test] accuracy += accuracy_score(y_test, y_pred_round) #accuracy #print ('accuracy: ', round (accuracy_score(y_test, y_pred_round),3)*100, '%') #print(i) # print('mse: ', (mse/k)) # print ('accuracy: ', round (accuracy/k,3)*100, '%') print('mse: ', mse)
def test_keras_regressor(): model = Sequential() model.add(Dense(input_dim, input_shape=(input_dim,))) model.add(Activation('relu')) model.add(Dense(1)) model.add(Activation('softmax')) sklearn_regressor = KerasRegressor(model, optimizer=optim, loss=loss, train_batch_size=batch_size, test_batch_size=batch_size, nb_epoch=nb_epoch) sklearn_regressor.fit(X_train_reg, y_train_reg) sklearn_regressor.score(X_test_reg, y_test_reg)
def kfold_test(feature, label, epoch, batch_size): seed = 10 np.random.seed(seed) estimator = KerasRegressor(build_fn=baseline_model, epochs=epoch, batch_size=batch_size, verbose=0) kfold = KFold(n_splits=5, random_state=seed) results = cross_val_score(estimator, feature, label, cv=kfold) print(results) print("Results: %.2f (%.2f) MSE" % (results.mean(), results.std())) estimator.fit(feature, label) return estimator
def _read_loto_result(): _df_loto = pd.read_csv('C:\\Users\\hal\\Downloads\\loto6.csv', sep=",", encoding='shift_jis') #print(_df_loto) x_train, x_test, y_train, y_test = model_selection.train_test_split( X, Y, test_size=0.2) return for a_i in range(1, 2): a_num_str = '第' + str(a_i) + '数字' # 過去の当選結果の抽出 _df_loto_sub = _df_loto[['日付', a_num_str]] print(_df_loto_sub) #plt.plot(_df_loto_sub) #plt.show() ######################################## # 欠損データの削除 ######################################## #_df_loto_sub = _df_loto_sub.dropna() ######################################## # One-hotエンコーディング ######################################## ######################################## # 学習 ######################################## X = _df_loto_sub['日付'] Y = _df_loto_sub[a_num_str] x_train, x_test, y_train, y_test = model_selection.train_test_split( X, Y, test_size=0.2) # 正規化 scaler = StandardScaler() scaler.fit(x_train) x_train = scaler.transform(x_train) x_test = scaler.transform(x_test) ######################################## # Keras読込 ######################################## # モデルの取得 model = KerasRegressor(build_fn=reg_model, epochs=200, batch_size=16, verbose=0) # 学習 model.fit(x_train, y_train) # スコア(参考値) model.score(x_test, y_test)
class NNReplicator(TransformerMixin): def __init__(self, embedder, layers, dropout, lr, act_func, loss_func, epochs, batch_size): self.embedder = embedder self.layers = layers self.dropout = dropout self.lr = lr self.act_func = act_func self.loss_func = loss_func self.epochs = epochs self.batch_size = batch_size def nnConstruct(self, shape): model = Sequential() for i, (layer, drop) in enumerate(zip(self.layers, self.dropout)): if i == 0: model.add( Dense(layer, input_shape=(shape, ), activation=self.act_func)) else: model.add(Dense(layer, activation=self.act_func)) model.add(Dropout(drop)) model.add(Dense(self.embedder.n_components, activation='linear')) ada = optimizers.Adagrad(lr=self.lr) model.compile(optimizer=ada, loss=self.loss_func) self.krObject = KerasRegressor(lambda: model, epochs=self.epochs, batch_size=self.batch_size) def fit(self, X, y=None): shape = X.shape[1] self.nnConstruct(shape) X_ = self.embedder.fit_transform(X) self.krObject.fit(X, X_) return self def transform(self, X): return self.krObject.predict(X)
def fit_and_predict_nn(self, TEST_YEAR): X, Y, xTrain, yTrain, xTest, yTest, names = self.build_data_arrays(TEST_YEAR) predictor = KerasRegressor(build_fn=wide_model, nb_epoch=1000, batch_size=5, verbose=0) scores = {} output = {} relativeError = {} for p in positions: if len(xTrain[p]) > 1 and len(xTest[p]) > 1: predictor.fit(np.array(xTrain[p]), np.array(yTrain[p])) prediction = predictor.predict(np.array(xTest[p])) output[p] = pd.DataFrame(zip(names[p], prediction), columns = ['name', 'value']).sort_values(by=['value'], ascending=False) scores[p] = (mean_squared_error(np.array(yTest[p]), np.array(prediction)), r2_score(np.array(yTest[p]), np.array(prediction))) relativeError[p] = get_relative_error(output[p], TEST_YEAR) return output
def model(hemi="N"): f = get_data("Data_Proc/%s_seaice_extent_daily_v4.0.csv"%hemi) X = f[["yr","month","doy","day"]].as_matrix() y = f["Extent"].as_matrix() scaler = MinMaxScaler() scaler.fit(X) m = KerasRegressor(build_fn=deep_model, epochs=50, batch_size=100, verbose=0) kfold = KFold(n_splits=10, random_state=seed) results = cross_val_score(m, X, y, cv=kfold) print("Standardized: %.2f (%.2f) MSE" % (results.mean(), results.std())) m.fit(X,y) m.model.save("Data_Proc/%s_model.h5"%hemi) return
def wtf(self): kr = KerasRegressor(build_fn=self.get_model, nb_epoch=100, batch_size=5, verbose=2) kr = kr.fit(self.x_train, self.y_train) return kr
def train_model(dataframe): X_train_scaled, y_train, X_test_scaled, y_test, dataframe_number = prepare_data( dataframe) model = KerasRegressor(build_fn=create_model, epochs=150, batch_size=100, verbose=False) history = model.fit(X_train_scaled, y_train, validation_data=(X_test_scaled, y_test), verbose=True) #, callbacks = [reduce_lr]) cross_val = cross_val_score(model, X_train_scaled, y_train, cv=5, verbose=10, scoring=make_scorer(r2_score)) #cross_val = cross_validate(model, X_train_scaled, y_train, cv=5, verbose=10, scoring=make_scorer(r2_score)) # Save training results # with open('article_networks_nozeros/poly_reg/article%s_history.pkl' %dataframe_number, 'wb') as handle: # pk.dump(history.history, handle, protocol=pk.HIGHEST_PROTOCOL) # with open('article_networks_nozeros/poly_reg/article%s_crossval.pkl' %dataframe_number, 'wb') as handle: # pk.dump(cross_val, handle, protocol=pk.HIGHEST_PROTOCOL) # model.model.save(r'article_networks_nozeros/poly_reg/article%s_model.h5' %dataframe_number) return model, history, cross_val
def evaluate_model(label, **kwargs): # Initialize default model parameters; overwrite them if included in function kwargs model_params = { 'num_epochs': 30, 'batch_size': 100, 'num_layers': 3, 'num_folds': 3, 'activation': 'linear', 'num_layers': 2, 'dropout': False, 'learning_rate': 0.001, 'beta_1': 0.9, 'beta_2': 0.999 } for (key, value) in kwargs.items(): model_params[key] = value kf = StratifiedKFold(n_splits=model_params['num_folds'], shuffle=True, random_state=seed) model_history = { 'all_acc_histories': [], 'all_loss_histories': [], 'all_val_acc_histories': [], 'all_val_loss_histories': [] } # Train model for each validation split for index, (train_indices, val_indices) in enumerate(kf.split(X_train, original_labels)): print('processing fold #', index) xtrain, xval = X_train[train_indices], X_train[val_indices] ytrain, yval = Y_train[train_indices], Y_train[val_indices] # Build the model estimator = KerasRegressor(build_fn=build_model, model_params=model_params, epochs=model_params['num_epochs'], batch_size=model_params['batch_size'], verbose=1) # Train the model history = estimator.fit(xtrain, ytrain, validation_data=(xval, yval)) # Record model metrics acc_history = history.history['accuracy'] loss_history = history.history['loss'] model_history['all_acc_histories'].append(acc_history) model_history['all_loss_histories'].append(loss_history) # Record model validation metrics val_acc_history = history.history['val_accuracy'] val_loss_history = history.history['val_loss'] model_history['all_val_acc_histories'].append(val_acc_history) model_history['all_val_loss_histories'].append(val_loss_history) # Plot model metrics plot_model(model_history, model_params['num_epochs'], label)
def run_model(train, test, epochs, concat, clean_split, outputPath, weightsName, num, X_train, X_test, Y_train, Y_test): reg = KerasRegressor(build_fn=inter, epochs=epochs, verbose=1, validation_split=0.0) # kfold = KFold(n_splits=5, random_state=1234) # results = np.sqrt(-1*cross_val_score(reg, X_train, Y_train, scoring= "neg_mean_squared_error", cv=kfold)) # print("Training RMSE mean and std from CV: {} {}".format(results.mean(),results.std())) print("Testing model") reg.fit(X_train, Y_train) prediction = reg.predict(X_test) print("R2: ", r2_score(Y_test, prediction)) p = pearsonr(Y_test, prediction)[0] if p < 0: p = -p**2 else: p = p**2 print("Pearson's r: ", p) s = spearmanr(Y_test, prediction)[0] if s < 0: s = -s**2 else: s = s**2 print("Spearman's rank correlation rho^2 and p: ", s) pred_rank = ss.rankdata(prediction) true_rank = ss.rankdata(Y_test) meanDiff = np.mean(abs(pred_rank - true_rank)) print("Mean Index Error for " + str(len(Y_test)) + " test examples: ", meanDiff) print("Percent off: ", float(meanDiff) / len(Y_test) * 100) np.save(outputPath + "/pred_test/pred_" + train + "_" + test + str(epochs), prediction) np.save(outputPath + "/pred_test/test_" + train + "_" + test + str(epochs), Y_test) result = np.sqrt(mean_squared_error(Y_test, prediction)) print("Testing RMSE: {}".format(result)) print("Saving model to: ", weightsName) reg.model.save(weightsName) return p
def train(x_train, y_train, x_test, y_test): # y_train = to_categorical(y_train) def build_model(): model = Sequential() # add model layers model.add( Conv2D(64, kernel_size=2, strides=(2, 2), activation="relu", input_shape=(768, 1023, 1))) model.add(MaxPooling2D(pool_size=(2, 2))) model.add(Conv2D(128, kernel_size=2, strides=(2, 2), activation="relu")) model.add(Flatten()) # model.add(Flatten()) # model.add(Dense(10, activation="relu")) model.add(Dense(1)) # compile model using accuracy to measure model performance model.compile(optimizer="adam", loss="mean_squared_error") return model estimator = KerasRegressor(build_fn=build_model, epochs=20, batch_size=5, verbose=0) estimator.fit(x_train, y_train) # serialize model to JSON model_json = estimator.model.to_json() with open("sumatra_model.json", "w") as json_file: json_file.write(model_json) # serialize weights to HDF5 estimator.model.save_weights("sumatra_model.h5") print("Saved model to disk") predictions = estimator.predict(x_test) print('prediction') for prediction in predictions: print(prediction) print('val') for val in y_test: print(val) '''
def train_reg(X, Y, fn, X_test, Y_test, seed=7): np.random.seed(seed) estimator = KerasRegressor(build_fn=fn, epochs=100, batch_size=128, verbose=0) kfold = KFold(n_splits=10, random_state=seed) # results = cross_val_score(pipeline, X, Y, cv=kfold) results = cross_val_score(estimator, X, Y, cv=kfold) print(results) print('Result: %.2f (%.2f) MSE' % (results.mean(), results.std())) estimator.fit(X, Y) netOutput = estimator.predict(X_test) print("Loss and metrics") print(rmse(netOutput, Y_test))
def keras1(train2, y, test2, v, z): cname = sys._getframe().f_code.co_name v[cname], z[cname] = 0, 0 scores = list() scaler = preprocessing.RobustScaler() train3 = scaler.fit_transform(train2) test3 = scaler.transform(test2) input_dims = train3.shape[1] def build_model(): input_ = layers.Input(shape=(input_dims,)) model = layers.Dense(int(input_dims * 4.33), kernel_initializer='Orthogonal', activation=layers.advanced_activations.PReLU())(input_) model = layers.BatchNormalization()(model) #model = layers.Dropout(0.7)(model) model = layers.Dense(int(input_dims * 2.35), kernel_initializer='Orthogonal', activation=layers.advanced_activations.PReLU())(model) model = layers.BatchNormalization()(model) #model = layers.Dropout(0.9)(model) model = layers.Dense(int(input_dims * 0.51), kernel_initializer='Orthogonal', activation=layers.advanced_activations.PReLU())(model) model = layers.BatchNormalization()(model) model = layers.Dense(1, activation='sigmoid')(model) model = models.Model(input_, model) model.compile(loss = 'binary_crossentropy', optimizer = optimizers.Nadam(lr=0.02), metrics=["accuracy"]) #print(model.summary(line_length=120)) return model np.random.seed(1234) est = KerasRegressor(build_fn=build_model, nb_epoch=10000, batch_size=32, #verbose=2 ) build_model().summary(line_length=120) model_path = '../data/working/' + csv_name_suffix() model_path = model_path[:-4] + '_keras_model.h5' kcb = [ callbacks.EarlyStopping( monitor='val_loss', patience=20 #verbose=1 ), callbacks.ModelCheckpoint( model_path, monitor='val_loss', save_best_only=True, save_weights_only=True, verbose=0 ), callbacks.ReduceLROnPlateau( monitor='val_loss', min_lr=1e-7, factor=0.2, verbose=1 ) ] num_splits = 7 ss = model_selection.ShuffleSplit(n_splits=num_splits, random_state=11) for n, (itrain, ival) in enumerate(ss.split(train3, y)): xtrain, xval = train3[itrain], train3[ival] ytrain, yval = y[itrain], y[ival] est.fit( xtrain, ytrain, epochs=10000, validation_data=(xval, yval), verbose=0, callbacks=kcb, shuffle=True ) est.model.load_weights(model_path) p = est.predict(xval) v.loc[ival, cname] += pconvert(p) score = metrics.log_loss(y[ival], p) print(cname, 'fold %d: '%(n+1), score, now()) scores.append(score) z[cname] += pconvert(est.predict(test3)) os.remove(model_path) cv=np.array(scores) print(cv, cv.mean(), cv.std()) z[cname] /= num_splits
freq = np.absolute(np.fft.fft(m_fc[:,-32:], axis=1)[:,0:16]) from keras.wrappers.scikit_learn import KerasRegressor from sklearn.model_selection import cross_val_score from sklearn.model_selection import KFold from sklearn.preprocessing import StandardScaler from sklearn.pipeline import Pipeline def whole_fc_m_ann_ensemble(): model = Sequential() model.add(Dense(units=T*3+16, kernel_initializer='normal', activation='relu', input_dim=T*3+16)) model.add(Dense(units=T*3, kernel_initializer='normal', activation='relu')) model.add(Dense(units=T, kernel_initializer='normal')) model.compile(loss='mean_squared_error', optimizer='adam') return model X = np.hstack((h[8:], s[8:], m_fc[:-8], freq[:-8])) Y = m_fc[8:] seed = 7 np.random.seed(seed) estimator = KerasRegressor(build_fn=whole_fc_m_ann_ensemble, epochs=14, batch_size=10, verbose=0) kfold = KFold(n_splits=10, random_state=seed) results = cross_val_score(estimator, X, Y, cv=kfold) print("Results: %.2f (%.2f) MSE" % (results.mean(), results.std())) estimator.fit(X, Y) print(estimator.model.summary()) predicted = estimator.predict(X) plot_summary(h[8:], s[8:], ens[8:], m_fc[8:], predicted, 50) plt.savefig('pics\\keras_clean_test_out.png')
class SimpleModel: def __init__(self): self.data = dict() self.frame_len = 30 self.predict_dist = 5 self.scaler = dict() def load_all_data(self, begin_date, end_date): con = sqlite3.connect('../data/stock.db') code_list = con.execute("SELECT name FROM sqlite_master WHERE type='table'").fetchall() X_data_list, Y_data_list, DATA_list = [0]*10, [0]*10, [0]*10 idx = 0 split = int(len(code_list) / 9) bar = ProgressBar(len(code_list), max_width=80) for code in code_list: data = self.load_data(code[0], begin_date, end_date) data = data.dropna() X, Y = self.make_x_y(data, code[0]) if len(X) <= 1: continue code_array = [code[0]] * len(X) assert len(X) == len(data.loc[29:len(data)-6, '일자']) if idx%split == 0: X_data_list[int(idx/split)] = list(X) Y_data_list[int(idx/split)] = list(Y) DATA_list[int(idx/split)] = np.array([data.loc[29:len(data)-6, '일자'].values.tolist(), code_array, data.loc[29:len(data)-6, '현재가'], data.loc[34:len(data), '현재가']]).T.tolist() else: X_data_list[int(idx/split)].extend(X) Y_data_list[int(idx/split)].extend(Y) DATA_list[int(idx/split)].extend(np.array([data.loc[29:len(data)-6, '일자'].values.tolist(), code_array, data.loc[29:len(data)-6, '현재가'], data.loc[34:len(data), '현재가']]).T.tolist()) bar.numerator += 1 print("%s | %d" % (bar, len(X_data_list[int(idx/split)])), end='\r') sys.stdout.flush() idx += 1 print("%s" % bar) print("Merge splited data") bar = ProgressBar(10, max_width=80) for i in range(10): if type(X_data_list[i]) == type(1): continue if i == 0: X_data = X_data_list[i] Y_data = Y_data_list[i] DATA = DATA_list[i] else: X_data.extend(X_data_list[i]) Y_data.extend(Y_data_list[i]) DATA.extend(DATA_list[i]) bar.numerator = i+1 print("%s | %d" % (bar, len(DATA)), end='\r') sys.stdout.flush() print("%s | %d" % (bar, len(DATA))) return np.array(X_data), np.array(Y_data), np.array(DATA) def load_data(self, code, begin_date, end_date): con = sqlite3.connect('../data/stock.db') df = pd.read_sql("SELECT * from '%s'" % code, con, index_col='일자').sort_index() data = df.loc[df.index > str(begin_date)] data = data.loc[data.index < str(end_date)] data = data.reset_index() return data def make_x_y(self, data, code): data_x = [] data_y = [] for col in data.columns: try: data.loc[:, col] = data.loc[:, col].str.replace('--', '-') data.loc[:, col] = data.loc[:, col].str.replace('+', '') except AttributeError as e: pass print(e) data.loc[:, 'month'] = data.loc[:, '일자'].str[4:6] data = data.drop(['일자', '체결강도'], axis=1) # normalization data = np.array(data) if len(data) <= 0 : return np.array([]), np.array([]) if code not in self.scaler: self.scaler[code] = StandardScaler() data = self.scaler[code].fit_transform(data) elif code not in self.scaler: return np.array([]), np.array([]) else: data = self.scaler[code].transform(data) for i in range(self.frame_len, len(data)-self.predict_dist+1): data_x.extend(np.array(data[i-self.frame_len:i, :])) data_y.append(data[i+self.predict_dist-1][0]) np_x = np.array(data_x).reshape(-1, 23*30) np_y = np.array(data_y) return np_x, np_y def train_model(self, X_train, Y_train): print("training model %d_%d.pkl" % (self.frame_len, self.predict_dist)) model_name = "../model/simple_reg_model/%d_%d.pkl" % (self.frame_len, self.predict_dist) self.estimator = RandomForestRegressor(random_state=0, n_estimators=100, n_jobs=-1) self.estimator.fit(X_train, Y_train) print("finish training model") joblib.dump(self.estimator, model_name) def set_config(self): #Tensorflow GPU optimization config = tf.ConfigProto() config.gpu_options.allow_growth = True sess = tf.Session(config=config) K.set_session(sess) def train_model_keras(self, X_train, Y_train, date): print("training model %d_%d.h5" % (self.frame_len, self.predict_dist)) model_name = "../model/reg_keras/%d_%d_%s.h5" % (self.frame_len, self.predict_dist, date) self.estimator = KerasRegressor(build_fn=baseline_model, nb_epoch=200, batch_size=64, verbose=1) self.estimator.fit(X_train, Y_train) print("finish training model") # saving model json_model = self.estimator.model.to_json() open(model_name.replace('h5', 'json'), 'w').write(json_model) self.estimator.model.save_weights(model_name, overwrite=True) def evaluate_model(self, X_test, Y_test, orig_data, s_date): print("Evaluate model %d_%d.pkl" % (self.frame_len, self.predict_dist)) if MODEL_TYPE == 'random_forest': model_name = "../model/simple_reg_model/%d_%d.pkl" % (self.frame_len, self.predict_dist) self.estimator = joblib.load(model_name) elif MODEL_TYPE == 'keras': model_name = "../model/reg_keras/%d_%d_%s.h5" % (self.frame_len, self.predict_dist, s_date) self.estimator = model_from_json(open(model_name.replace('h5', 'json')).read()) self.estimator.load_weights(model_name) pred = self.estimator.predict(X_test) res = 0 score = 0 assert(len(pred) == len(Y_test)) pred = np.array(pred).reshape(-1) Y_test = np.array(Y_test).reshape(-1) for i in range(len(pred)): score += (float(pred[i]) - float(Y_test[i]))*(float(pred[i]) - float(Y_test[i])) score = np.sqrt(score/len(pred)) print("score: %f" % score) for idx in range(len(pred)): buy_price = int(orig_data[idx][2]) future_price = int(orig_data[idx][3]) date = int(orig_data[idx][0]) pred_transform = self.scaler[orig_data[idx][1]].inverse_transform([pred[idx]] + [0]*22)[0] cur_transform = self.scaler[orig_data[idx][1]].inverse_transform([X_test[idx][23*29]] + [0]*22)[0] if pred_transform > buy_price * 1.01: res += (future_price - buy_price*1.005)*(100000/buy_price+1) print("[%s] buy: %6d, sell: %6d, earn: %6d" % (str(date), buy_price, future_price, (future_price - buy_price*1.005)*(100000/buy_price))) print("result: %d" % res) def load_current_data(self): con = sqlite3.connect('../data/stock.db') code_list = con.execute("SELECT name FROM sqlite_master WHERE type='table'").fetchall() X_test = [] DATA = [] code_list = list(map(lambda x: x[0], code_list)) first = True bar = ProgressBar(len(code_list), max_width=80) for code in code_list: bar.numerator += 1 print("%s | %d" % (bar, len(X_test)), end='\r') sys.stdout.flush() df = pd.read_sql("SELECT * from '%s'" % code, con, index_col='일자').sort_index() data = df.iloc[-30:,:] data = data.reset_index() for col in data.columns: try: data.loc[:, col] = data.loc[:, col].str.replace('--', '-') data.loc[:, col] = data.loc[:, col].str.replace('+', '') except AttributeError as e: pass data.loc[:, 'month'] = data.loc[:, '일자'].str[4:6] data = data.drop(['일자', '체결강도'], axis=1) if len(data) < 30: code_list.remove(code) continue DATA.append(int(data.loc[len(data)-1, '현재가'])) try: data = self.scaler[code].transform(np.array(data)) except KeyError: code_list.remove(code) continue X_test.extend(np.array(data)) X_test = np.array(X_test).reshape(-1, 23*30) return X_test, code_list, DATA def make_buy_list(self, X_test, code_list, orig_data, s_date): BUY_UNIT = 10000 print("make buy_list") if MODEL_TYPE == 'random_forest': model_name = "../model/simple_reg_model/%d_%d.pkl" % (self.frame_len, self.predict_dist) self.estimator = joblib.load(model_name) elif MODEL_TYPE == 'keras': model_name = "../model/reg_keras/%d_%d_%s.h5" % (self.frame_len, self.predict_dist, s_date) self.estimator = model_from_json(open(model_name.replace('h5', 'json')).read()) self.estimator.load_weights(model_name) pred = self.estimator.predict(X_test) res = 0 score = 0 pred = np.array(pred).reshape(-1) # load code list from account set_account = set([]) with open('../data/stocks_in_account.txt') as f_stocks: for line in f_stocks.readlines(): data = line.split(',') set_account.add(data[6].replace('A', '')) buy_item = ["매수", "", "시장가", 0, 0, "매수전"] # 매수/매도, code, 시장가/현재가, qty, price, "주문전/주문완료" with open("../data/buy_list.txt", "wt") as f_buy: for idx in range(len(pred)): real_buy_price = int(orig_data[idx]) buy_price = float(X_test[idx][23*29]) try: pred_transform = self.scaler[code_list[idx]].inverse_transform([pred[idx]] + [0]*22)[0] except KeyError: continue print("[BUY PREDICT] code: %s, cur: %5d, predict: %5d" % (code_list[idx], real_buy_price, pred_transform)) if pred_transform > real_buy_price * 3 and code_list[idx] not in set_account: print("add to buy_list %s" % code_list[idx]) buy_item[1] = code_list[idx] buy_item[3] = int(BUY_UNIT / real_buy_price) + 1 for item in buy_item: f_buy.write("%s;"%str(item)) f_buy.write('\n') def load_data_in_account(self): # load code list from account DATA = [] with open('../data/stocks_in_account.txt') as f_stocks: for line in f_stocks.readlines(): data = line.split(',') DATA.append([data[6].replace('A', ''), data[1], data[0]]) # load data in DATA con = sqlite3.connect('../data/stock.db') X_test = [] idx_rm = [] first = True bar = ProgressBar(len(DATA), max_width=80) for idx, code in enumerate(DATA): bar.numerator += 1 print("%s | %d" % (bar, len(X_test)), end='\r') sys.stdout.flush() try: df = pd.read_sql("SELECT * from '%s'" % code[0], con, index_col='일자').sort_index() except pd.io.sql.DatabaseError as e: print(e) idx_rm.append(idx) continue data = df.iloc[-30:,:] data = data.reset_index() for col in data.columns: try: data.loc[:, col] = data.loc[:, col].str.replace('--', '-') data.loc[:, col] = data.loc[:, col].str.replace('+', '') except AttributeError as e: pass print(e) data.loc[:, 'month'] = data.loc[:, '일자'].str[4:6] DATA[idx].append(int(data.loc[len(data)-1, '현재가'])) data = data.drop(['일자', '체결강도'], axis=1) if len(data) < 30: idx_rm.append(idx) continue try: data = self.scaler[code[0]].transform(np.array(data)) except KeyError: idx_rm.append(idx) continue X_test.extend(np.array(data)) for i in idx_rm[-1:0:-1]: del DATA[i] X_test = np.array(X_test).reshape(-1, 23*30) return X_test, DATA def make_sell_list(self, X_test, DATA, s_date): print("make sell_list") if MODEL_TYPE == 'random_forest': model_name = "../model/simple_reg_model/%d_%d.pkl" % (self.frame_len, self.predict_dist) self.estimator = joblib.load(model_name) elif MODEL_TYPE == 'keras': model_name = "../model/reg_keras/%d_%d_%s.h5" % (self.frame_len, self.predict_dist, s_date) self.estimator = model_from_json(open(model_name.replace('h5', 'json')).read()) self.estimator.load_weights(model_name) pred = self.estimator.predict(X_test) res = 0 score = 0 pred = np.array(pred).reshape(-1) sell_item = ["매도", "", "시장가", 0, 0, "매도전"] # 매수/매도, code, 시장가/현재가, qty, price, "주문전/주문완료" with open("../data/sell_list.txt", "wt") as f_sell: for idx in range(len(pred)): current_price = float(X_test[idx][23*29]) current_real_price = int(DATA[idx][3]) name = DATA[idx][2] print("[SELL PREDICT] name: %s, code: %s, cur: %f(%d), predict: %f" % (name, DATA[idx][0], current_price, current_real_price, pred[idx])) if pred[idx] < current_price: print("add to sell_list %s" % name) sell_item[1] = DATA[idx][0] sell_item[3] = DATA[idx][1] for item in sell_item: f_sell.write("%s;"%str(item)) f_sell.write('\n') def save_scaler(self, s_date): model_name = "../model/scaler_%s.pkl" % s_date joblib.dump(self.scaler, model_name) def load_scaler(self, s_date): model_name = "../model/scaler_%s.pkl" % s_date self.scaler = joblib.load(model_name)
def keras_mlp1(train2, y, test2, v, z): cname = sys._getframe().f_code.co_name v[cname], z[cname] = 0, 0 from keras import layers from keras import models from keras import optimizers from keras.wrappers.scikit_learn import KerasRegressor scores = list() scaler = preprocessing.RobustScaler() train3 = scaler.fit_transform(train2) test3 = scaler.transform(test2) input_dims = train3.shape[1] def build_model(): input_ = layers.Input(shape=(input_dims,)) model = layers.Dense(256, kernel_initializer='Orthogonal')(input_) #model = layers.BatchNormalization()(model) #model = layers.advanced_activations.PReLU()(model) model = layers.Activation('selu')(model) #model = layers.Dropout(0.7)(model) model = layers.Dense(64, kernel_initializer='Orthogonal')(model) #model = layers.BatchNormalization()(model) model = layers.Activation('selu')(model) #model = layers.advanced_activations.PReLU()(model) #model = layers.Dropout(0.9)(model) model = layers.Dense(16, kernel_initializer='Orthogonal')(model) #model = layers.BatchNormalization()(model) model = layers.Activation('selu')(model) #model = layers.advanced_activations.PReLU()(model) model = layers.Dense(1, activation='sigmoid')(model) model = models.Model(input_, model) model.compile(loss = 'binary_crossentropy', optimizer = optimizers.Nadam()) #print(model.summary(line_length=120)) return model np.random.seed(1234) est = KerasRegressor(build_fn=build_model, nb_epoch=10000, batch_size=256, #verbose=2 ) build_model().summary(line_length=120) model_path = '../data/working/' + cname + '_keras_model.h5' num_splits = 9 ss = model_selection.ShuffleSplit(n_splits=num_splits, random_state=11, test_size=1/num_splits) for n, (itrain, ival) in enumerate(ss.split(train3, y)): xtrain, xval = train3[itrain], train3[ival] ytrain, yval = y[itrain], y[ival] est.fit( xtrain, ytrain, epochs=10000, validation_data=(xval, yval), verbose=0, callbacks=build_keras_fit_callbacks(model_path), shuffle=True ) est.model.load_weights(model_path) p = est.predict(xval) v.loc[ival, cname] += pconvert(p) score = metrics.log_loss(y[ival], p) print(cname, 'fold %d: '%(n+1), score, now()) scores.append(score) z[cname] += pconvert(est.predict(test3)) os.remove(model_path) cv=np.array(scores) print(cv, cv.mean(), cv.std()) z[cname] /= num_splits