예제 #1
0
def DLmodel_regressor(Xtrain_in,
                      ytrain_in,
                      Xtest_in,
                      ytest_in,
                      lime_flag=False,
                      df_row=None):
    start_time = time.time()
    estimator = KerasRegressor(build_fn=DLmodel_baseline,
                               epochs=20,
                               batch_size=5,
                               verbose=10)
    seed = 23
    numpy.random.seed(seed)
    estimator.fit(Xtrain_in, ytrain_in)
    y_test_pred = estimator.predict(Xtest_in)
    y_train_pred = estimator.predict(Xtrain_in)
    score_test = r2_score(y_test_pred, ytest_in)
    score_train = r2_score(y_train_pred, ytrain_in)
    adj_Rscore_train = adjusted_R2score_calc(Xtrain_in, score_train)
    adj_Rscore_test = adjusted_R2score_calc(Xtest_in, score_test)
    time_end = time.time() - start_time
    mrs_train = mean_squared_error(y_train_pred, ytrain_in)
    mrs_test = mean_squared_error(y_test_pred, ytest_in)
    if lime_flag:
        lime_explainer(Xtrain_in, df_row, estimator, "Keras_base")
    time_end = time.time() - start_time
    log_record_result("Keras base model", time_end, score_train, score_test,
                      adj_Rscore_train, adj_Rscore_test, mrs_train, mrs_test)
    plot_residuals(Xtest_in, ytest_in, estimator,
                   "Keras_base")  #plots residual
    return "Keras base model", str(time_end), str(score_train), str(
        score_test), str(adj_Rscore_train), str(adj_Rscore_test)
예제 #2
0
def final_predict(X_train, y_train, X_test, company_train, company_dev,
                  company_test):
    global X_DIM, Y_DIM
    X_DIM = X_train[0].shape[0]
    y_train = np.array(y_train)
    print(X_train.shape, y_train.shape)
    Y_DIM = 1

    # SVM
    # regressor = LinearSVR(C=0.1, verbose=1)
    regressor = KerasRegressor(build_fn=attention_imp_merge_exp,
                               nb_epoch=NB_EPOCH,
                               batch_size=BATCH_SIZE,
                               verbose=1)
    print(regressor)
    regressor.fit([X_train, company_train, X_train], y_train)
    # predictions = regressor.predict(company_test)
    predictions = regressor.predict([X_test, company_test, X_test])
    print(predictions.shape)
    print(predictions[:20])
    # joblib.dump(predictions, '/raid/data/skar3/semeval/source/ml_semeval17/outputs/subtask2_hl/dl_predictions2.pkl')
    joblib.dump(
        predictions,
        os.path.join(config.RESULTS_DIR, 'subtask2_hl', 'dl_predictions2.pkl'))

    print(
        'Training result',
        cosine_similarity(y_train,
                          regressor.predict([X_train, company_train,
                                             X_train])))
예제 #3
0
    def nn_model(self):
        dataset = get_data()

        train_data = dataset[dataset['score'] > 0.0]
        test_data = dataset[dataset['score'] < 0]
        y_data = train_data['score']
        x_data = train_data.drop(columns=['id', 'score'])

        test_data.reset_index(inplace=True, drop=True)
        x_test = test_data.drop(columns=['id', 'score'])

        baseline_model = self._get_nn_base_model
        estimator = KerasRegressor(build_fn=baseline_model,
                                   epochs=100,
                                   batch_size=5,
                                   verbose=1)

        # kfold = KFold(n_splits=5)
        # mae = make_scorer(mean_absolute_error)
        # res = cross_val_score(estimator, X=x_data, y=y_data, cv=kfold, scoring=mae)
        # mae_error = np.mean(res)
        estimator.fit(x_data, y_data)
        y_pred = estimator.predict(x_data)
        mae_error = mean_absolute_error(y_pred, y_data)

        print(f'mae error: {mae_error}')
        print(f'nn score: {1 / (1 + mae_error)}')

        pred = estimator.predict(x_test)
        sub = pd.DataFrame({'id': test_data['id'], 'score': pred})
        sub['score'] = sub['score'].apply(lambda item: int(round(item)))
        sub.to_csv('submittion_5.csv', index=False)
def neural_network_model(data):
    """Neural net work model"""
    X = data[['AXp-0d', 'AXp-1d', 'AXp-2d', 'ETA_eta_L',
              'ETA_epsilon_3']].values
    Y = data[['e_gap_alpha']].values
    X_train_pn, X_test_pn, y_train, y_test = train_test_split(X, Y,
                                                              test_size=0.25,
                                                              random_state=1234
                                                              )
    # create the scaler from the training data only and keep it for later use
    X_train_scaler = StandardScaler().fit(X_train_pn)
    # apply the scaler transform to the training data
    X_train = X_train_scaler.transform(X_train_pn)
    X_test = X_train_scaler.transform(X_test_pn)

    def neural_model():
        # assemble the structure
        model = Sequential()
        model.add(Dense(5, input_dim=5, kernel_initializer='normal',
                        activation='relu',
                        kernel_regularizer=regularizers.l2(0.01)))
        model.add(Dense(8, kernel_initializer='normal', activation='relu',
                        kernel_regularizer=regularizers.l2(0.01)))
        model.add(Dense(20, kernel_initializer='normal', activation='relu',
                        kernel_regularizer=regularizers.l2(0.01)))
        # model.add(Dense(4, kernel_initializer='normal',activation='relu'))
        model.add(Dense(1, kernel_initializer='normal'))
        # compile the model
        model.compile(loss='mean_squared_error', optimizer='adam')
        return model
    # initialize the andom seed as this is used to generate
    # the starting weights
    np.random.seed(1234)
    # create the NN framework
    estimator = KerasRegressor(build_fn=neural_model,
                               epochs=1200, batch_size=25000, verbose=0)
    history = estimator.fit(X_train, y_train, validation_split=0.33,
                            epochs=1200, batch_size=10000, verbose=0)
    print("final MSE for train is %.2f and for validation is %.2f" %
          (history.history['loss'][-1], history.history['val_loss'][-1]))
    plt.figure(figsize=(5, 5))
    prediction = estimator.predict(X_test)
    plt.scatter(y_train, estimator.predict(X_train), color='blue')
    plt.scatter(y_test, prediction, color='red')
    plt.plot([0, 4], [0, 4], lw=4, color='black')
    plt.title('$Neural \ Network \ Model$')
    plt.xlabel('$<Eg> \ Actual \ [eV]$')
    plt.ylabel('$<Eg> \ Predict \ [eV]$')
    return r2_score(y_test, prediction)
예제 #5
0
def predictions(hemi="N",start_time=dt.datetime(2019,1,1), timedelta = 365):
    f = get_data("Data_Proc/%s_seaice_extent_daily_v4.0.csv"%hemi)
    X = f[["yr","month","doy","day"]].as_matrix()

    dn = [start_time + dt.timedelta(x) for x in np.arange(timedelta)]
    u = pd.DataFrame()
    u["yr"] = [x.year-1978 for x in dn]
    u["month"] = [x.month for x in dn]
    u["day"] = [x.day for x in dn]
    u["doy"] = [(x - dt.datetime(x.year,1,1)).days + 1 for x in dn]
    Xp = u[["yr","month","doy","day"]].as_matrix()
    print Xp.shape
    Xn = np.concatenate((X,Xp))
    print Xn.shape,X.shape
    scaler = MinMaxScaler()
    scaler.fit(Xn)
    x = scaler.transform(Xp)

    m = KerasRegressor(build_fn=deep_model, epochs=50, batch_size=100, verbose=0)
    m.model = load_model("Data_Proc/%s_model.h5"%hemi)
    y = m.predict(x)
    print y,x
    #o = distribution(f, key="doy")
    #ano = []
    #for x,doy in zip(y,u.doy.tolist()):
    #    a = x - o[o.doy==doy].Extent["mean"].tolist()[0]
    #    ano.append(a)
    #    pass
    #print ano
    return
예제 #6
0
def main():

    X, Y = processamentoDatabase()

    dadosEntradaTreinamento, dadosEntradaTeste, dadosSaidaTreinamento, dadosSaidaTeste = train_test_split(
        X, Y, test_size=0.25)

    Regressor = KerasRegressor(build_fn=criaRede, epochs=200, batch_size=300)

    #print(sorted ( sklearn.metrics.SCORERS.keys() ) )

    #kfold = KFold(n_splits=10, random_state = 1)

    resultados = cross_val_score(estimator=Regressor,
                                 X=dadosEntradaTreinamento,
                                 y=dadosSaidaTreinamento,
                                 cv=10)
    Regressor.fit(dadosEntradaTreinamento, dadosSaidaTreinamento)
    predicao = Regressor.predict(dadosEntradaTeste)

    plt.plot(dadosSaidaTeste, "rs")
    plt.plot(predicao, "bs")
    plt.title("gŕafico de análise")
    plt.grid(True)
    plt.show()

    print("Média do valor dos automóveis, em euros : {}\n".format(
        resultados.mean()))
    print("Desvio Padrão : {}\n".format(resultados.std()))
    print("scoring do modelo : {}".format(
        accuracy_score(dadosSaidaTeste, predicao)))
예제 #7
0
def adam_regression(x, y):
    print(y)
    # scale the data
    sc = MinMaxScaler()
    x = sc.fit_transform(x)
    y = y.reshape(-1, 1)
    y = sc.fit_transform(y)
    print(y)

    x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.3)

    model = KerasRegressor(build_fn=build_regressor,
                           batch_size=32,
                           epochs=EPOCHS)

    model.fit(x_train, y_train)

    y_pred = model.predict(x_test)
    y_pred = y_pred.reshape(-1, 1)
    predictions = sc.inverse_transform(y_pred)

    print(y_pred)
    print(predictions)

    fig, ax = plt.subplots()
    ax.scatter(y_test, y_pred)
    ax.plot([y_test.min(), y_test.max()],
            [y_test.min(), y_test.max()],
            'k--',
            lw=4)
    ax.set_xlabel('Measured')
    ax.set_ylabel('Predicted')
    plt.show()
예제 #8
0
def linearRegression(X_train, X_test, y_train, y_test):
    print(X_train.shape)
    print(y_train.shape)
    print(X_train)
    print(y_train)
    model = Sequential()
    model.add(Dense(1, activation='linear', input_dim=21))
    model.compile(loss='mse', optimizer='rmsprop')
    estimator = KerasRegressor(build_fn=model,
                               epochs=100,
                               batch_size=16,
                               verbose=1)
    estimator.fit(X_train, y_train)
    y_test_prediction = estimator.predict(X_test)
    rmse_error = mean_squared_error(y_pred=y_test_prediction, y_true=y_test)
    r2_error = r2_score(y_pred=y_test_prediction, y_true=y_test)
    print("RMSE Error")
    print(rmse_error)
    print("R2 Error")
    print(r2_error)
    # model.fit(X_train, y_train, nb_epoch=100, batch_size=16,verbose=0)
    # model.fit(X_train, y_train, epochs=100, batch_size=16,verbose=1)
    score = model.evaluate(X_test, y_test, batch_size=16)
    print("Score")
    print(score)
예제 #9
0
def Train_CV(X_train,
             y_train,
             X_test,
             y_test,
             k=5,
             epochs=1000,
             batchsize=200,
             seed=100):

    estimator = KerasRegressor(build_fn=create_model,
                               nb_epoch=epochs,
                               batch_size=batchsize,
                               verbose=False)
    kfold = KFold(n_splits=k, random_state=seed)
    results = cross_val_score(estimator, X_train, y_train, cv=kfold)
    print("Results: %.2f (%.2f) MAE" % (results.mean(), results.std()))
    estimator.fit(X_train, y_train)

    # evaluate model on test set
    prediction = estimator.predict(X_test)
    train_error = np.abs(y - prediction)
    mean_error = np.mean(train_error)
    # min_error = np.min(train_error)
    #max_error = np.max(train_error)
    std_error = np.std(train_error)
    print('-' * 30)
    print('Evaluation Results')
    print("Results (mean, std): %.2f (%.2f) MSE" % (mean_error, std_error))
예제 #10
0
def KerasRegression(x_train, y_train, x_test, y_test, x_real_test, i):
    # create Model
    # define base model
    def base_model():
        model = Sequential()
        model.add(
            Dense(35,
                  input_dim=len(x_train.columns),
                  activation="relu",
                  kernel_initializer="normal"))
        model.add(Dense(16, activation="relu", kernel_initializer="normal"))
        model.add(Dense(1, kernel_initializer="normal"))
        model.compile(loss='mean_squared_error', optimizer='adam')
        return model

    keras_label = y_train.as_matrix()
    clf = KerasRegressor(build_fn=base_model,
                         nb_epoch=1000,
                         batch_size=5,
                         verbose=0)
    clf.fit(x_train, keras_label)

    # make predictions
    keras_pred = clf.predict(x_test)
    keras_pred = np.exp(keras_pred)

    plot_roc_curve(y_test, keras_pred, 'Keras Reg. Target: ' + str(i + 1))

    #keras_real_pred = clf.predict(x_real_test)
    #keras_real_pred = np.exp(keras_real_pred)

    return keras_pred
예제 #11
0
def model_keras_lstm(file):
    features_basic = pd.read_csv(file)

    infos_nn(file, features_basic)

    features = features_basic.copy()
    features = features.dropna()

    labels = np.array(features['n'])
    mean = np.mean(labels)
    features = features.drop('n', axis=1)  # Saving feature names for later use
    feature_list = list(features.columns)  # Convert to numpy array
    features = np.array(features)

    train_features, test_features, train_labels, test_labels = \
        train_test_split(features, labels, test_size=test_size, random_state=random, shuffle=False)

    train_features = train_features.reshape(
        (train_features.shape[0], 1, train_features.shape[1]))
    test_features = test_features.reshape(
        (test_features.shape[0], 1, test_features.shape[1]))

    print(train_labels.shape, test_labels.shape)

    ######################### MODEL DEFINITIONS ############################

    estimator = KerasRegressor(build_fn=lstm_model,
                               shape=train_features.shape[2],
                               epochs=epochs_lstm,
                               batch_size=1,
                               verbose=verbose)
    history = estimator.fit(train_features, train_labels)

    ######################### MODEL DEFINITIONS ############################

    # plot_history(file + '_NN_', history, 'loss', 'MSLE')
    # plot_history(file + '_NN_', history, 'mae', 'MAE')
    # plot_history(file + '_NN_', history, 'mse', 'MSE')

    predictions = estimator.predict(test_features)
    all_predictions = estimator.predict(features)
    predictions = np.round(predictions, decimals=0)

    plot_predict(file + '_LSTM', test_labels, predictions)
    plot_mixed(file + '_LSTM', labels, all_predictions)
    # weights(estimator, feature_list)
    errors(test_labels, predictions, mean)
예제 #12
0
def assert_regression_predict_shape_correct(num_test):
    reg = KerasRegressor(
        build_fn=build_fn_reg, hidden_dims=hidden_dims,
        batch_size=batch_size, epochs=epochs)
    reg.fit(X_train, y_train, batch_size=batch_size, epochs=epochs)

    preds = reg.predict(X_test[:num_test], batch_size=batch_size)
    assert preds.shape == (num_test, )
예제 #13
0
def featureImportance(tree, importVars, cutDict):
    for MetBin in cutDict:
        # Feature importance    (requires training without TMVA)
        inputVars.append("genMET")
        inputArray = root_numpy.tree2array(tree,
                                           branches=inputVars,
                                           selection=cutDict[MetBin])
        y = inputArray["genMET"]
        x = remove_field_name(inputArray, "genMET")

        x_new = x.view(np.float32).reshape(x.shape + (-1, ))
        x_new = x_new[:, :-1]

        #  ~train_x, val_x, train_y, val_y = train_test_split(x_new, y, random_state=1, test_size=40000, train_size=50000)
        train_x, val_x, train_y, val_y = train_test_split(x_new,
                                                          y,
                                                          random_state=1,
                                                          test_size=10000,
                                                          train_size=15000)
        #  ~train_x, val_x, train_y, val_y = train_test_split(x_new, y, random_state=1, test_size=10000, train_size=40000)
        #  ~train_x, val_x, train_y, val_y = train_test_split(x_new, y, random_state=1)

        #  ~my_model = KerasRegressor(build_fn=baseline_Model, epochs=20, batch_size=64, verbose=1)
        my_model = KerasRegressor(build_fn=baseline_Model,
                                  epochs=40,
                                  batch_size=64,
                                  verbose=1)
        #  ~my_model = KerasRegressor(build_fn=large_Model, epochs=200, batch_size=512, verbose=1)
        #  ~my_model = KerasRegressor(build_fn=deep_Model, epochs=20, batch_size=64, verbose=1)
        my_model.fit(train_x, train_y, validation_split=0.8)

        #  ~perm = PermutationImportance(my_model, random_state=1).fit(val_x,val_y)
        #  ~output = eli5.format_as_text(eli5.explain_weights(perm, target_names = "genMET",feature_names = inputVars[:-1]))
        #  ~print MetBin
        #  ~print output

        y_hat_train = my_model.predict(train_x)
        y_hat_test = my_model.predict(val_x)

        # display error values
        print('Train RMSE: ',
              round(np.sqrt(((train_y - y_hat_train)**2).mean()), 2))
        print('Train MEAN: ', round(((train_y - y_hat_train).mean()), 2))
        print('Test RMSE: ', round(np.sqrt(((val_y - y_hat_test)**2).mean()),
                                   2))
        print('Test MEAN: ', round(((val_y - y_hat_test).mean()), 2))
def assert_regression_predict_shape_correct(num_test):
    reg = KerasRegressor(build_fn=build_fn_reg,
                         hidden_dims=hidden_dims,
                         batch_size=batch_size,
                         epochs=epochs)
    reg.fit(X_train, y_train, batch_size=batch_size, epochs=epochs)

    preds = reg.predict(X_test[:num_test], batch_size=batch_size)
    assert preds.shape == (num_test, )
예제 #15
0
def cross_validation_regressor(k, training, target):
    #folds
    fold = 100 / k
    fold = fold / 100

    seed = 7
    np.random.seed(seed)

    print('building the regressor')
    #build a regressor
    k_model = KerasRegressor(build_fn=neural_network_regressor,
                             epochs=15000,
                             batch_size=30,
                             verbose=0)
    mse = 0
    accuracy = 0
    #for i in range(k):
    #split
    x_train, x_test, y_train, y_test = train_test_split(training,
                                                        target,
                                                        test_size=fold,
                                                        random_state=seed)

    #plot
    #learning_curve(np.array(x_train), np.array(y_train), np.array(x_test), np.array(y_test), neural_network())

    print('fitting the regressor')
    #fit the model
    k_model.fit(np.array(x_train), np.array(y_train))

    #make a prediction
    y_pred = k_model.predict(np.array(x_test))

    #print comparision
    for i in range(len(y_pred)):
        print(round(y_pred[i], 1), y_test[i])

    #print mse
    #print('mse: ', mean_squared_error(y_test, y_pred))
    mse += mean_squared_error(toFloat(y_test), toFloat(y_pred))

    #prepare for accuracy
    y_pred_round = nearestHalf(y_pred)

    #change data to string values
    y_pred_round = ['%.2f' % score for score in y_pred_round]
    y_test = ['%.2f' % test for test in y_test]

    accuracy += accuracy_score(y_test, y_pred_round)
    #accuracy
    #print ('accuracy: ', round (accuracy_score(y_test, y_pred_round),3)*100, '%')
    #print(i)

    # print('mse: ', (mse/k))
    # print ('accuracy: ', round (accuracy/k,3)*100, '%')
    print('mse: ', mse)
예제 #16
0
def stackedgeneralization(xtrain, ytrain, xtest, ytest):
    x_training, x_valid, y_training, y_valid = train_test_split(
        xtrain, ytrain, test_size=0.5, random_state=42)
    #specify models
    model1 = KernelRidge(alpha=0.1)
    model2 = RandomForestRegressor(n_estimators=60,
                                   random_state=1111,
                                   max_depth=35,
                                   max_features=2,
                                   min_samples_leaf=5)
    model3 = GradientBoostingRegressor(n_estimators=30,
                                       random_state=1111,
                                       max_depth=2,
                                       max_features=5,
                                       min_samples_leaf=8,
                                       learning_rate=0.03,
                                       subsample=0.88)
    model4 = KerasRegressor(build_fn=finalneuralnetwork)
    #fit models
    model1.fit(x_training, y_training)
    model2.fit(x_training, y_training)
    model3.fit(x_training, y_training)
    model4.fit(x_training, y_training, verbose=2, epochs=1500)
    #make pred on validation
    preds1 = model1.predict(x_valid)
    preds2 = model2.predict(x_valid)
    preds3 = model3.predict(x_valid)
    preds4 = model4.predict(x_valid)
    #make pred on test
    testpreds1 = model1.predict(xtest)
    testpreds2 = model2.predict(xtest)
    testpreds3 = model3.predict(xtest)
    testpreds4 = model4.predict(xtest)
    #form new dataset from valid and test
    stackedpredictions = np.column_stack((preds1, preds2, preds3, preds4))
    stackedtestpredictions = np.column_stack(
        (testpreds1, testpreds2, testpreds3, testpreds4))
    #make meta model
    metamodel = LinearRegression()
    metamodel.fit(stackedpredictions, y_valid)
    final_predictions = metamodel.predict(stackedtestpredictions)
    print('MAE:', metrics.mean_absolute_error(ytest, final_predictions))
    print('MSE:', metrics.mean_squared_error(ytest, final_predictions))
예제 #17
0
 def DNN_model(x_train,y_train,x_test,model):
             
     '''
     @The function apply DNN model into training, testing data
     @Input x_train, x_test: the first 18 hours measurements, y_test: the following 8 hours measurements in training data 
     @Output is the predicted measurements of the test set 
     '''
     
     DNN_time_start = time.clock()
     estimator = KerasRegressor(build_fn=model, epochs=20, batch_size=5, verbose=0)
     history = estimator.fit( x_train, y_train)
     DNN_time_elapsed = (time.clock() - DNN_time_start)
     exe_time.append(DNN_time_elapsed)
     return(estimator.predict(x_test))
예제 #18
0
class NNReplicator(TransformerMixin):
    def __init__(self, embedder, layers, dropout, lr, act_func, loss_func,
                 epochs, batch_size):

        self.embedder = embedder
        self.layers = layers
        self.dropout = dropout
        self.lr = lr
        self.act_func = act_func
        self.loss_func = loss_func
        self.epochs = epochs
        self.batch_size = batch_size

    def nnConstruct(self, shape):

        model = Sequential()

        for i, (layer, drop) in enumerate(zip(self.layers, self.dropout)):

            if i == 0:
                model.add(
                    Dense(layer,
                          input_shape=(shape, ),
                          activation=self.act_func))
            else:
                model.add(Dense(layer, activation=self.act_func))

            model.add(Dropout(drop))

        model.add(Dense(self.embedder.n_components, activation='linear'))
        ada = optimizers.Adagrad(lr=self.lr)

        model.compile(optimizer=ada, loss=self.loss_func)

        self.krObject = KerasRegressor(lambda: model,
                                       epochs=self.epochs,
                                       batch_size=self.batch_size)

    def fit(self, X, y=None):

        shape = X.shape[1]
        self.nnConstruct(shape)

        X_ = self.embedder.fit_transform(X)

        self.krObject.fit(X, X_)
        return self

    def transform(self, X):
        return self.krObject.predict(X)
예제 #19
0
def model():
    # training
    np.random.seed(seed)
    X_train_pn, X_test_pn, y_train, y_test = split()
    X_train, X_test = normalize(X_train_pn, X_test_pn)
    estimator = KerasRegressor(build_fn=nnmodel,
                               epochs=2000, batch_size=400, verbose=0)
    history = estimator.fit(X_train, y_train, validation_split=0.25,
                            epochs=2000, batch_size=200, verbose=0)
    # display evaluation
    print("final MSE for train is %.2f and for validation is %.2f" %
          (history.history['loss'][-1], history.history['val_loss'][-1]))

    plt.plot(history.history['loss'])
    plt.plot(history.history['val_loss'])
    plt.title('model loss')
    plt.ylabel('loss')
    plt.xlabel('epoch')
    plt.legend(['train', 'validation'], loc='upper left')
    plt.show()

    test_loss = estimator.model.evaluate(X_test, y_test)
    print("test set mse is %.2f" % test_loss)

    plt.scatter(y_test, estimator.predict(X_test), s=4)
    plt.scatter(y_train, estimator.predict(X_train), s=3, c='r')
    plt.xlabel('Experimental PCE', size=15)
    plt.ylabel('Predict PCE', size=15)
    plt.legend(['Test', 'Train'])
    plt.plot(y_test, y_test, c='g')
    plt.show()

    SSR = ((y_test - estimator.predict(X_test)) ** 2).sum()
    SST = ((y_test - y_test.mean()) ** 2).sum()
    R_square = 1 - SSR/SST
    print("R-square of prediction is ", R_square)
    return estimator
예제 #20
0
 def fit_and_predict_nn(self, TEST_YEAR):
     X, Y, xTrain, yTrain, xTest, yTest, names = self.build_data_arrays(TEST_YEAR)
     predictor = KerasRegressor(build_fn=wide_model, nb_epoch=1000, batch_size=5, verbose=0)
     scores = {}
     output = {}
     relativeError = {} 
     for p in positions:
         if len(xTrain[p]) > 1 and len(xTest[p]) > 1:
             predictor.fit(np.array(xTrain[p]), np.array(yTrain[p]))
             prediction = predictor.predict(np.array(xTest[p]))
             output[p] = pd.DataFrame(zip(names[p], prediction), columns = ['name', 'value']).sort_values(by=['value'], ascending=False)
             scores[p] = (mean_squared_error(np.array(yTest[p]), np.array(prediction)), 
                          r2_score(np.array(yTest[p]), np.array(prediction)))
             relativeError[p] = get_relative_error(output[p], TEST_YEAR)
     return output
예제 #21
0
def predict_list(x):

    root_dir = os.path.dirname(os.path.realpath(__file__))

    estimator = KerasRegressor(build_fn=build_by_loading,
                               nb_epoch=5000,
                               batch_size=50,
                               verbose=1)
    estimator.model = load_model(
        os.path.join(root_dir, "output", "model.please"))

    print("Fed to estimator")
    print(x)
    prediction = estimator.predict(x)
    scaler = joblib.load(os.path.join(root_dir, "output", "y_scaler.please"))
    return scaler.inverse_transform(prediction)
예제 #22
0
def train(x_train, y_train, x_test, y_test):
    # y_train = to_categorical(y_train)

    def build_model():
        model = Sequential()

        # add model layers
        model.add(
            Conv2D(64,
                   kernel_size=2,
                   strides=(2, 2),
                   activation="relu",
                   input_shape=(768, 1023, 1)))
        model.add(MaxPooling2D(pool_size=(2, 2)))
        model.add(Conv2D(128, kernel_size=2, strides=(2, 2),
                         activation="relu"))
        model.add(Flatten())
        # model.add(Flatten())
        # model.add(Dense(10, activation="relu"))
        model.add(Dense(1))

        # compile model using accuracy to measure model performance
        model.compile(optimizer="adam", loss="mean_squared_error")
        return model

    estimator = KerasRegressor(build_fn=build_model,
                               epochs=20,
                               batch_size=5,
                               verbose=0)
    estimator.fit(x_train, y_train)

    # serialize model to JSON
    model_json = estimator.model.to_json()
    with open("sumatra_model.json", "w") as json_file:
        json_file.write(model_json)
    # serialize weights to HDF5
    estimator.model.save_weights("sumatra_model.h5")
    print("Saved model to disk")

    predictions = estimator.predict(x_test)
    print('prediction')
    for prediction in predictions:
        print(prediction)
    print('val')
    for val in y_test:
        print(val)
    '''
def run_model(train, test, epochs, concat, clean_split, outputPath,
              weightsName, num, X_train, X_test, Y_train, Y_test):

    reg = KerasRegressor(build_fn=inter,
                         epochs=epochs,
                         verbose=1,
                         validation_split=0.0)

    # kfold = KFold(n_splits=5, random_state=1234)
    # results = np.sqrt(-1*cross_val_score(reg, X_train, Y_train, scoring= "neg_mean_squared_error", cv=kfold))
    # print("Training RMSE mean and std from CV: {} {}".format(results.mean(),results.std()))

    print("Testing model")
    reg.fit(X_train, Y_train)
    prediction = reg.predict(X_test)
    print("R2: ", r2_score(Y_test, prediction))
    p = pearsonr(Y_test, prediction)[0]
    if p < 0:
        p = -p**2
    else:
        p = p**2
    print("Pearson's r: ", p)
    s = spearmanr(Y_test, prediction)[0]
    if s < 0:
        s = -s**2
    else:
        s = s**2

    print("Spearman's rank correlation rho^2 and p: ", s)
    pred_rank = ss.rankdata(prediction)
    true_rank = ss.rankdata(Y_test)
    meanDiff = np.mean(abs(pred_rank - true_rank))
    print("Mean Index Error for " + str(len(Y_test)) + " test examples: ",
          meanDiff)
    print("Percent off: ", float(meanDiff) / len(Y_test) * 100)

    np.save(outputPath + "/pred_test/pred_" + train + "_" + test + str(epochs),
            prediction)
    np.save(outputPath + "/pred_test/test_" + train + "_" + test + str(epochs),
            Y_test)

    result = np.sqrt(mean_squared_error(Y_test, prediction))
    print("Testing RMSE: {}".format(result))

    print("Saving model to: ", weightsName)
    reg.model.save(weightsName)
    return p
예제 #24
0
def local_test(X_train,
               y_train,
               X_test,
               y_test,
               model,
               epoch=50,
               batch_size=100):
    reg = KerasRegressor(build_fn=model,
                         epochs=epoch,
                         batch_size=batch_size,
                         verbose=0)
    sc = StandardScaler()
    X_train = sc.fit_transform(X_train)
    reg.fit(X_train, y_train)
    X_test = sc.transform(X_test)
    y_pred = reg.predict(X_test)
    return reg, r2_score(y_test, y_pred)
예제 #25
0
def train_reg(X, Y, fn, X_test, Y_test, seed=7):
    np.random.seed(seed)
    estimator = KerasRegressor(build_fn=fn,
                               epochs=100,
                               batch_size=128,
                               verbose=0)
    kfold = KFold(n_splits=10, random_state=seed)
    # results = cross_val_score(pipeline, X, Y, cv=kfold)
    results = cross_val_score(estimator, X, Y, cv=kfold)
    print(results)
    print('Result: %.2f (%.2f) MSE' % (results.mean(), results.std()))

    estimator.fit(X, Y)
    netOutput = estimator.predict(X_test)

    print("Loss and metrics")
    print(rmse(netOutput, Y_test))
예제 #26
0
파일: models.py 프로젝트: bjur27r/dash_iot
def get_caudal(gra, p_h, pot):
    model2 = KerasRegressor(build_fn=larger_model,
                            epochs=10,
                            batch_size=10,
                            verbose=1)

    model2.model = load_model('modelBV1.h5')

    gra = gra / 1000
    # gra = 0.021
    # p_h = 340.000
    # pot = 4.00
    Xnew = np.array([[float(gra), float(p_h), float(pot)]])

    caudl = model2.predict(Xnew)
    print(caudl)
    return round(float(caudl), 2)
예제 #27
0
def predict_list(x):

    root_dir = os.path.dirname(os.path.realpath(__file__))
    epochs = int(config["hyperparameters"]["epochs"])
    batch_size = int(config["hyperparameters"]["batch_size"])
    estimator = KerasRegressor(build_fn=build_by_loading,
                               nb_epoch=epochs,
                               batch_size=batch_size,
                               verbose=1)
    estimator.model = load_model(
        os.path.join(root_dir, "output", "model.please"))

    print("Fed to estimator")
    print(x)
    prediction = estimator.predict(x)
    scaler = joblib.load(os.path.join(root_dir, "output", "y_scaler.please"))
    #return prediction
    return scaler.inverse_transform(prediction)
예제 #28
0
def runANN(inputs, hp):
    if inputs:
        data = pickle.loads(inputs)
    else:
        data = build_dataset()

    ts = datetime.datetime.now()

    outputs = []
    for l1_units, l2_units in hp:
        model = KerasRegressor(build_fn=create_baseline_model,
                               l1_units=l1_units,
                               l2_units=l2_units,
                               verbose=0)

        model.fit(data["x_train"],
                  data["y_train"],
                  verbose=0,
                  validation_data=(data["x_val"], data["y_val"]),
                  epochs=300)

        score = model.score(data["x_val"], data["y_val"])
        preds = model.predict(data["x_test"])

        pred = preds.reshape(len(preds))
        real = data["y_test"]

        mse = mean_squared_error(real, pred)

        output = {}
        output['score'] = score
        output['mse'] = mse
        output['hp'] = [l1_units, l2_units]

        outputs.append(output)

    ts = datetime.datetime.now() - ts
    h = socket.gethostname()

    return {
        "msg": "Run ANN! [" + h + "] > elapsed time: " + str(ts),
        "data": outputs,
    }
예제 #29
0
def yieldPred(request):
    if request.method == 'POST':
        form = YieldForm(request.POST)
        if form.is_valid():
            Crop = form.cleaned_data.get('Crop')
            Location = form.cleaned_data.get('Location')
            crop_detail = getCropDataPoint(Location, Crop)
            Input = crop_detail
            pred = 0
            model2 = KerasRegressor(build_fn=build_regressor,
                                    epochs=10,
                                    batch_size=10,
                                    verbose=1)

            if Crop == "Barley":
                model2.model = load_model(
                    'D:\\Projects\\Clone 16 Nov Capstone\\FarmAlert\\farm\\static\\Kerasmodels\\Barleymodelkeras.h5'
                )
            elif Crop == "Wheat":
                model2.model = load_model(
                    'D:\\Projects\\Clone 16 Nov Capstone\\FarmAlert\\farm\\static\\Kerasmodels\\Wheatmodelkeras.h5'
                )
            elif Crop == "Maize":
                model2.model = load_model(
                    'D:\\Projects\\Clone 16 Nov Capstone\\FarmAlert\\farm\\static\\Kerasmodels\\Maizemodelkeras.h5'
                )
            elif Crop == "Rice":
                model2.model = load_model(
                    'D:\\Projects\\Clone 16 Nov Capstone\\FarmAlert\\farm\\static\\Kerasmodels\\Ricemodelkeras.h5'
                )
            else:
                model2.model = load_model(
                    'D:\\Projects\\Clone 16 Nov Capstone\\FarmAlert\\farm\\static\\Kerasmodels\\Sugarcanemodelkeras.h5'
                )

            pred = model2.predict(Input)
            pred = pred[0]

            return render(request, 'services/yieldResult.html', {'data': pred})

    form = YieldForm()
    return render(request, 'services/yieldPred.html', {'form': form})
예제 #30
0
def train(x_train, y_train, x_test, y_test):
    # y_train = to_categorical(y_train)

    def build_model():
        model = Sequential()

        # add model layers
        model.add(
            Conv2D(32,
                   kernel_size=10,
                   strides=(5, 5),
                   activation="relu",
                   input_shape=(756, 1211, 1)))
        # model.add(Conv2D(64, kernel_size=10, strides=(5, 5), activation="relu"))
        model.add(Flatten())
        # model.add(Flatten())
        model.add(Dense(50, activation="relu"))
        model.add(Dense(1))

        # compile model using accuracy to measure model performance
        model.compile(optimizer="adam", loss="mean_squared_error")
        return model

    estimator = KerasRegressor(build_fn=build_model,
                               epochs=20,
                               batch_size=5,
                               verbose=0)
    estimator.fit(x_train, y_train)

    # serialize model to JSON
    model_json = estimator.model.to_json()
    with open("kalimantan_model.json", "w") as json_file:
        json_file.write(model_json)
    # serialize weights to HDF5
    estimator.model.save_weights("kalimantan_model.h5")
    print("Saved model to disk")

    predictions = estimator.predict(x_test)
    for x, prediction in enumerate(predictions):
        print(y_test[x] + ' vs ' + prediction)
    '''
예제 #31
0
class WrappedKerasRegressor(object):

    @staticmethod
    def parameters():
        return {
            'epochs': [50],
        }

    def __init__(self, epochs=500):
        self.epochs = epochs

    def fit(self, X, y):
        def build_model():
            model = Sequential()
            model.add(Dense(128, input_dim=X.shape[1], init='uniform', activation='relu'))
            model.add(Dropout(0.5))
            model.add(Dense(128, init='uniform', activation='relu'))
            model.add(Dropout(0.5))
            model.add(Dense(1, init='uniform', activation='linear'))

            adam = Adam()

            model.compile(loss='mse', optimizer=adam, metrics=['mae', 'mse'])
            return model

        self.model = KerasRegressor(build_model, nb_epoch=self.epochs, batch_size=1024, verbose=1)
        self._history = self.model.fit(X, y)

        # TODO do this outside of the thing so we can name this better
        plt.figure()
        plt.plot(self._history.history['mean_squared_error'])
        plt.savefig("visualizations/keras_mse.png")

        plt.figure()
        plt.plot(self._history.history['mean_absolute_error'])
        plt.savefig("visualizations/keras_mae.png")

    def predict(self, X):
        y_hat = self.model.predict(X)
        print('---- yhat', y_hat)
        return y_hat
def keras1(train2, y, test2, v, z):
    cname = sys._getframe().f_code.co_name
    v[cname], z[cname] = 0, 0
    scores = list()
    scaler = preprocessing.RobustScaler()
    train3 = scaler.fit_transform(train2)
    test3 = scaler.transform(test2)
    input_dims = train3.shape[1]
    def build_model():
        input_ = layers.Input(shape=(input_dims,))
        model = layers.Dense(int(input_dims * 4.33),
                             kernel_initializer='Orthogonal',
                             activation=layers.advanced_activations.PReLU())(input_)
        model = layers.BatchNormalization()(model)
        #model = layers.Dropout(0.7)(model)
        model = layers.Dense(int(input_dims * 2.35),
                             kernel_initializer='Orthogonal',
                             activation=layers.advanced_activations.PReLU())(model)
        model = layers.BatchNormalization()(model)
        #model = layers.Dropout(0.9)(model)
        model = layers.Dense(int(input_dims * 0.51),
                             kernel_initializer='Orthogonal',
                             activation=layers.advanced_activations.PReLU())(model)
        model = layers.BatchNormalization()(model)
        model = layers.Dense(1,
                             activation='sigmoid')(model)
        model = models.Model(input_, model)
        model.compile(loss = 'binary_crossentropy',
                      optimizer = optimizers.Nadam(lr=0.02),
                      metrics=["accuracy"])
        #print(model.summary(line_length=120))
        return model
    np.random.seed(1234)
    est = KerasRegressor(build_fn=build_model,
                         nb_epoch=10000,
                         batch_size=32,
                         #verbose=2
                        )
    build_model().summary(line_length=120)
    model_path = '../data/working/' + csv_name_suffix()
    model_path = model_path[:-4] + '_keras_model.h5'
    kcb = [
           callbacks.EarlyStopping(
                  monitor='val_loss',
                  patience=20
                  #verbose=1
                   ),
           callbacks.ModelCheckpoint(
                  model_path,
                  monitor='val_loss',
                  save_best_only=True,
                  save_weights_only=True,
                  verbose=0
                   ),
           callbacks.ReduceLROnPlateau(
                  monitor='val_loss',
                  min_lr=1e-7,
                  factor=0.2,
                  verbose=1
                   )
           ]
    num_splits = 7
    ss = model_selection.ShuffleSplit(n_splits=num_splits, random_state=11)
    for n, (itrain, ival) in enumerate(ss.split(train3, y)):
        xtrain, xval = train3[itrain], train3[ival]
        ytrain, yval = y[itrain], y[ival]
        est.fit(
                xtrain, ytrain,
                epochs=10000,
                validation_data=(xval, yval),
                verbose=0,
                callbacks=kcb,
                shuffle=True
            )
        est.model.load_weights(model_path)
        p = est.predict(xval)
        v.loc[ival, cname] += pconvert(p)
        score = metrics.log_loss(y[ival], p)
        print(cname, 'fold %d: '%(n+1), score, now())
        scores.append(score)
        z[cname] += pconvert(est.predict(test3))
    os.remove(model_path)

    cv=np.array(scores)
    print(cv, cv.mean(), cv.std())
    z[cname] /= num_splits
예제 #33
0
class SimpleModel:
    def __init__(self):
        self.data = dict()
        self.frame_len = 30
        self.predict_dist = 5
        self.scaler = dict()

    def load_all_data(self, begin_date, end_date):
        con = sqlite3.connect('../data/stock.db')
        code_list = con.execute("SELECT name FROM sqlite_master WHERE type='table'").fetchall()
        X_data_list, Y_data_list, DATA_list = [0]*10, [0]*10, [0]*10
        idx = 0
        split = int(len(code_list) / 9)
        bar = ProgressBar(len(code_list), max_width=80)
        for code in code_list:
            data = self.load_data(code[0], begin_date, end_date)
            data = data.dropna()
            X, Y = self.make_x_y(data, code[0])
            if len(X) <= 1: continue
            code_array = [code[0]] * len(X)
            assert len(X) == len(data.loc[29:len(data)-6, '일자'])
            if idx%split == 0:
                X_data_list[int(idx/split)] = list(X)
                Y_data_list[int(idx/split)] = list(Y)
                DATA_list[int(idx/split)] = np.array([data.loc[29:len(data)-6, '일자'].values.tolist(), code_array, data.loc[29:len(data)-6, '현재가'], data.loc[34:len(data), '현재가']]).T.tolist()
            else:
                X_data_list[int(idx/split)].extend(X)
                Y_data_list[int(idx/split)].extend(Y)
                DATA_list[int(idx/split)].extend(np.array([data.loc[29:len(data)-6, '일자'].values.tolist(), code_array, data.loc[29:len(data)-6, '현재가'], data.loc[34:len(data), '현재가']]).T.tolist())
            bar.numerator += 1
            print("%s | %d" % (bar, len(X_data_list[int(idx/split)])), end='\r')
            sys.stdout.flush()
            idx += 1
        print("%s" % bar)

        print("Merge splited data")
        bar = ProgressBar(10, max_width=80)
        for i in range(10):
            if type(X_data_list[i]) == type(1):
                continue
            if i == 0:
                X_data = X_data_list[i]
                Y_data = Y_data_list[i]
                DATA = DATA_list[i]
            else:
                X_data.extend(X_data_list[i])
                Y_data.extend(Y_data_list[i])
                DATA.extend(DATA_list[i])
            bar.numerator = i+1
            print("%s | %d" % (bar, len(DATA)), end='\r')
            sys.stdout.flush()
        print("%s | %d" % (bar, len(DATA)))
        return np.array(X_data), np.array(Y_data), np.array(DATA)

    def load_data(self, code, begin_date, end_date):
        con = sqlite3.connect('../data/stock.db')
        df = pd.read_sql("SELECT * from '%s'" % code, con, index_col='일자').sort_index()
        data = df.loc[df.index > str(begin_date)]
        data = data.loc[data.index < str(end_date)]
        data = data.reset_index()
        return data

    def make_x_y(self, data, code):
        data_x = []
        data_y = []
        for col in data.columns:
            try:
                data.loc[:, col] = data.loc[:, col].str.replace('--', '-')
                data.loc[:, col] = data.loc[:, col].str.replace('+', '')
            except AttributeError as e:
                pass
                print(e)
        data.loc[:, 'month'] = data.loc[:, '일자'].str[4:6]
        data = data.drop(['일자', '체결강도'], axis=1)

        # normalization
        data = np.array(data)
        if len(data) <= 0 :
            return np.array([]), np.array([])

        if code not in self.scaler:
            self.scaler[code] = StandardScaler()
            data = self.scaler[code].fit_transform(data)
        elif code not in self.scaler:
            return np.array([]), np.array([])
        else:
            data = self.scaler[code].transform(data)

        for i in range(self.frame_len, len(data)-self.predict_dist+1):
            data_x.extend(np.array(data[i-self.frame_len:i, :]))
            data_y.append(data[i+self.predict_dist-1][0])
        np_x = np.array(data_x).reshape(-1, 23*30)
        np_y = np.array(data_y)
        return np_x, np_y

    def train_model(self, X_train, Y_train):
        print("training model %d_%d.pkl" % (self.frame_len, self.predict_dist))
        model_name = "../model/simple_reg_model/%d_%d.pkl" % (self.frame_len, self.predict_dist)
        self.estimator = RandomForestRegressor(random_state=0, n_estimators=100, n_jobs=-1)
        self.estimator.fit(X_train, Y_train)
        print("finish training model")
        joblib.dump(self.estimator, model_name)

    def set_config(self):
        #Tensorflow GPU optimization
        config = tf.ConfigProto()
        config.gpu_options.allow_growth = True
        sess = tf.Session(config=config)
        K.set_session(sess)

    def train_model_keras(self, X_train, Y_train, date):
        print("training model %d_%d.h5" % (self.frame_len, self.predict_dist))
        model_name = "../model/reg_keras/%d_%d_%s.h5" % (self.frame_len, self.predict_dist, date)
        self.estimator = KerasRegressor(build_fn=baseline_model, nb_epoch=200, batch_size=64, verbose=1)
        self.estimator.fit(X_train, Y_train)
        print("finish training model")
        # saving model
        json_model = self.estimator.model.to_json()
        open(model_name.replace('h5', 'json'), 'w').write(json_model)
        self.estimator.model.save_weights(model_name, overwrite=True)

    def evaluate_model(self, X_test, Y_test, orig_data, s_date):
        print("Evaluate model %d_%d.pkl" % (self.frame_len, self.predict_dist))
        if MODEL_TYPE == 'random_forest':
            model_name = "../model/simple_reg_model/%d_%d.pkl" % (self.frame_len, self.predict_dist)
            self.estimator = joblib.load(model_name)
        elif MODEL_TYPE == 'keras':
            model_name = "../model/reg_keras/%d_%d_%s.h5" % (self.frame_len, self.predict_dist, s_date)
            self.estimator = model_from_json(open(model_name.replace('h5', 'json')).read())
            self.estimator.load_weights(model_name)
        pred = self.estimator.predict(X_test)
        res = 0
        score = 0
        assert(len(pred) == len(Y_test))
        pred = np.array(pred).reshape(-1)
        Y_test = np.array(Y_test).reshape(-1)
        for i in range(len(pred)):
            score += (float(pred[i]) - float(Y_test[i]))*(float(pred[i]) - float(Y_test[i]))
        score = np.sqrt(score/len(pred))
        print("score: %f" % score)
        for idx in range(len(pred)):
            buy_price = int(orig_data[idx][2])
            future_price = int(orig_data[idx][3])
            date = int(orig_data[idx][0])
            pred_transform = self.scaler[orig_data[idx][1]].inverse_transform([pred[idx]] + [0]*22)[0]
            cur_transform = self.scaler[orig_data[idx][1]].inverse_transform([X_test[idx][23*29]] + [0]*22)[0]
            if pred_transform > buy_price * 1.01:
                res += (future_price - buy_price*1.005)*(100000/buy_price+1)
                print("[%s] buy: %6d, sell: %6d, earn: %6d" % (str(date), buy_price, future_price, (future_price - buy_price*1.005)*(100000/buy_price)))
        print("result: %d" % res)

    def load_current_data(self):
        con = sqlite3.connect('../data/stock.db')
        code_list = con.execute("SELECT name FROM sqlite_master WHERE type='table'").fetchall()
        X_test = []
        DATA = []
        code_list = list(map(lambda x: x[0], code_list))
        first = True
        bar = ProgressBar(len(code_list), max_width=80)
        for code in code_list:
            bar.numerator += 1
            print("%s | %d" % (bar, len(X_test)), end='\r')
            sys.stdout.flush()
            df = pd.read_sql("SELECT * from '%s'" % code, con, index_col='일자').sort_index()
            data = df.iloc[-30:,:]
            data = data.reset_index()
            for col in data.columns:
                try:
                    data.loc[:, col] = data.loc[:, col].str.replace('--', '-')
                    data.loc[:, col] = data.loc[:, col].str.replace('+', '')
                except AttributeError as e:
                    pass
            data.loc[:, 'month'] = data.loc[:, '일자'].str[4:6]
            data = data.drop(['일자', '체결강도'], axis=1)
            if len(data) < 30:
                code_list.remove(code)
                continue
            DATA.append(int(data.loc[len(data)-1, '현재가']))
            try:
                data = self.scaler[code].transform(np.array(data))
            except KeyError:
                code_list.remove(code)
                continue
            X_test.extend(np.array(data))
        X_test = np.array(X_test).reshape(-1, 23*30) 
        return X_test, code_list, DATA

    def make_buy_list(self, X_test, code_list, orig_data, s_date):
        BUY_UNIT = 10000
        print("make buy_list")
        if MODEL_TYPE == 'random_forest':
            model_name = "../model/simple_reg_model/%d_%d.pkl" % (self.frame_len, self.predict_dist)
            self.estimator = joblib.load(model_name)
        elif MODEL_TYPE == 'keras':
            model_name = "../model/reg_keras/%d_%d_%s.h5" % (self.frame_len, self.predict_dist, s_date)
            self.estimator = model_from_json(open(model_name.replace('h5', 'json')).read())
            self.estimator.load_weights(model_name)
        pred = self.estimator.predict(X_test)
        res = 0
        score = 0
        pred = np.array(pred).reshape(-1)

        # load code list from account
        set_account = set([])
        with open('../data/stocks_in_account.txt') as f_stocks:
            for line in f_stocks.readlines():
                data = line.split(',')
                set_account.add(data[6].replace('A', ''))

        buy_item = ["매수", "", "시장가", 0, 0, "매수전"]  # 매수/매도, code, 시장가/현재가, qty, price, "주문전/주문완료"
        with open("../data/buy_list.txt", "wt") as f_buy:
            for idx in range(len(pred)):
                real_buy_price = int(orig_data[idx])
                buy_price = float(X_test[idx][23*29])
                try:
                    pred_transform = self.scaler[code_list[idx]].inverse_transform([pred[idx]] + [0]*22)[0]
                except KeyError:
                    continue
                print("[BUY PREDICT] code: %s, cur: %5d, predict: %5d" % (code_list[idx], real_buy_price, pred_transform))
                if pred_transform > real_buy_price * 3 and code_list[idx] not in set_account:
                    print("add to buy_list %s" % code_list[idx])
                    buy_item[1] = code_list[idx]
                    buy_item[3] = int(BUY_UNIT / real_buy_price) + 1
                    for item in buy_item:
                        f_buy.write("%s;"%str(item))
                    f_buy.write('\n')

    def load_data_in_account(self):
        # load code list from account
        DATA = []
        with open('../data/stocks_in_account.txt') as f_stocks:
            for line in f_stocks.readlines():
                data = line.split(',')
                DATA.append([data[6].replace('A', ''), data[1], data[0]])

        # load data in DATA
        con = sqlite3.connect('../data/stock.db')
        X_test = []
        idx_rm = []
        first = True
        bar = ProgressBar(len(DATA), max_width=80)
        for idx, code in enumerate(DATA):
            bar.numerator += 1
            print("%s | %d" % (bar, len(X_test)), end='\r')
            sys.stdout.flush()

            try:
                df = pd.read_sql("SELECT * from '%s'" % code[0], con, index_col='일자').sort_index()
            except pd.io.sql.DatabaseError as e:
                print(e)
                idx_rm.append(idx)
                continue
            data = df.iloc[-30:,:]
            data = data.reset_index()
            for col in data.columns:
                try:
                    data.loc[:, col] = data.loc[:, col].str.replace('--', '-')
                    data.loc[:, col] = data.loc[:, col].str.replace('+', '')
                except AttributeError as e:
                    pass
                    print(e)
            data.loc[:, 'month'] = data.loc[:, '일자'].str[4:6]
            DATA[idx].append(int(data.loc[len(data)-1, '현재가']))
            data = data.drop(['일자', '체결강도'], axis=1)
            if len(data) < 30:
                idx_rm.append(idx)
                continue
            try:
                data = self.scaler[code[0]].transform(np.array(data))
            except KeyError:
                idx_rm.append(idx)
                continue
            X_test.extend(np.array(data))
        for i in idx_rm[-1:0:-1]:
            del DATA[i]
        X_test = np.array(X_test).reshape(-1, 23*30) 
        return X_test, DATA

    def make_sell_list(self, X_test, DATA, s_date):
        print("make sell_list")
        if MODEL_TYPE == 'random_forest':
            model_name = "../model/simple_reg_model/%d_%d.pkl" % (self.frame_len, self.predict_dist)
            self.estimator = joblib.load(model_name)
        elif MODEL_TYPE == 'keras':
            model_name = "../model/reg_keras/%d_%d_%s.h5" % (self.frame_len, self.predict_dist, s_date)
            self.estimator = model_from_json(open(model_name.replace('h5', 'json')).read())
            self.estimator.load_weights(model_name)
        pred = self.estimator.predict(X_test)
        res = 0
        score = 0
        pred = np.array(pred).reshape(-1)

        sell_item = ["매도", "", "시장가", 0, 0, "매도전"]  # 매수/매도, code, 시장가/현재가, qty, price, "주문전/주문완료"
        with open("../data/sell_list.txt", "wt") as f_sell:
            for idx in range(len(pred)):
                current_price = float(X_test[idx][23*29])
                current_real_price = int(DATA[idx][3])
                name = DATA[idx][2]
                print("[SELL PREDICT] name: %s, code: %s, cur: %f(%d), predict: %f" % (name, DATA[idx][0], current_price, current_real_price, pred[idx]))
                if pred[idx] < current_price:
                    print("add to sell_list %s" % name)
                    sell_item[1] = DATA[idx][0]
                    sell_item[3] = DATA[idx][1]
                    for item in sell_item:
                        f_sell.write("%s;"%str(item))
                    f_sell.write('\n')
    def save_scaler(self, s_date):
        model_name = "../model/scaler_%s.pkl" % s_date
        joblib.dump(self.scaler, model_name)

    def load_scaler(self, s_date):
        model_name = "../model/scaler_%s.pkl" % s_date
        self.scaler = joblib.load(model_name)
def keras_mlp1(train2, y, test2, v, z):
    cname = sys._getframe().f_code.co_name
    v[cname], z[cname] = 0, 0
    from keras import layers
    from keras import models
    from keras import optimizers
    from keras.wrappers.scikit_learn import KerasRegressor
    scores = list()
    scaler = preprocessing.RobustScaler()
    train3 = scaler.fit_transform(train2)
    test3 = scaler.transform(test2)
    input_dims = train3.shape[1]
    def build_model():
        input_ = layers.Input(shape=(input_dims,))
        model = layers.Dense(256, kernel_initializer='Orthogonal')(input_)
        #model = layers.BatchNormalization()(model)
        #model = layers.advanced_activations.PReLU()(model)
        model = layers.Activation('selu')(model)
        #model = layers.Dropout(0.7)(model)

        model = layers.Dense(64, kernel_initializer='Orthogonal')(model)
        #model = layers.BatchNormalization()(model)
        model = layers.Activation('selu')(model)
        #model = layers.advanced_activations.PReLU()(model)
        #model = layers.Dropout(0.9)(model)

        model = layers.Dense(16, kernel_initializer='Orthogonal')(model)
        #model = layers.BatchNormalization()(model)
        model = layers.Activation('selu')(model)
        #model = layers.advanced_activations.PReLU()(model)

        model = layers.Dense(1, activation='sigmoid')(model)

        model = models.Model(input_, model)
        model.compile(loss = 'binary_crossentropy', optimizer = optimizers.Nadam())
        #print(model.summary(line_length=120))
        return model
    np.random.seed(1234)
    est = KerasRegressor(build_fn=build_model,
                         nb_epoch=10000,
                         batch_size=256,
                         #verbose=2
                        )
    build_model().summary(line_length=120)
    model_path = '../data/working/' + cname + '_keras_model.h5'
    num_splits = 9
    ss = model_selection.ShuffleSplit(n_splits=num_splits, random_state=11, test_size=1/num_splits)
    for n, (itrain, ival) in enumerate(ss.split(train3, y)):
        xtrain, xval = train3[itrain], train3[ival]
        ytrain, yval = y[itrain], y[ival]
        est.fit(
                xtrain, ytrain,
                epochs=10000,
                validation_data=(xval, yval),
                verbose=0,
                callbacks=build_keras_fit_callbacks(model_path),
                shuffle=True
            )
        est.model.load_weights(model_path)
        p = est.predict(xval)
        v.loc[ival, cname] += pconvert(p)
        score = metrics.log_loss(y[ival], p)
        print(cname, 'fold %d: '%(n+1), score, now())
        scores.append(score)
        z[cname] += pconvert(est.predict(test3))
    os.remove(model_path)

    cv=np.array(scores)
    print(cv, cv.mean(), cv.std())
    z[cname] /= num_splits
freq = np.absolute(np.fft.fft(m_fc[:,-32:], axis=1)[:,0:16])

from keras.wrappers.scikit_learn import KerasRegressor
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import KFold
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline

def whole_fc_m_ann_ensemble():
    model = Sequential()
    model.add(Dense(units=T*3+16, kernel_initializer='normal', activation='relu', input_dim=T*3+16))
    model.add(Dense(units=T*3, kernel_initializer='normal', activation='relu'))
    model.add(Dense(units=T, kernel_initializer='normal'))
    model.compile(loss='mean_squared_error', optimizer='adam')
    return model
X = np.hstack((h[8:], s[8:], m_fc[:-8], freq[:-8]))
Y = m_fc[8:]
seed = 7
np.random.seed(seed)
estimator = KerasRegressor(build_fn=whole_fc_m_ann_ensemble, epochs=14, batch_size=10, verbose=0)
kfold = KFold(n_splits=10, random_state=seed)
results = cross_val_score(estimator, X, Y, cv=kfold)
print("Results: %.2f (%.2f) MSE" % (results.mean(), results.std()))

estimator.fit(X, Y)
print(estimator.model.summary())

predicted = estimator.predict(X)
plot_summary(h[8:], s[8:], ens[8:], m_fc[8:], predicted, 50)
plt.savefig('pics\\keras_clean_test_out.png')