Esempio n. 1
0
def fit_naive_bayes(asset, y_col, X_col, window_size, results):
    y, X = make_data(asset,
                     response_col=y_col,
                     input_col=X_col,
                     window_size=window_size)
    X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)

    Log.info("*** Gaussian Naive Bayes")

    clf = GaussianNB()
    clf.fit(X_train, y_train)
    score = clf.score(X_test, y_test)
    results = add_result(results, "Gaussian Naive Bayes", y_col, X_col,
                         window_size, score)
    return results
Esempio n. 2
0
def fit_adaboost(asset, y_col, X_col, window_size, results):
    y, X = make_data(asset,
                     response_col=y_col,
                     input_col=X_col,
                     window_size=window_size)
    X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)

    Log.info("*** AdaBoost")

    clf = AdaBoostClassifier(n_estimators=100, random_state=0)
    clf.fit(X_train, y_train)
    score = clf.score(X_test, y_test)

    results = add_result(results, "AdaBoost", y_col, X_col, window_size, score)
    return results
Esempio n. 3
0
def fit_KNN(asset, y_col, X_col, window_size, results):
    y, X = make_data(asset,
                     response_col=y_col,
                     input_col=X_col,
                     window_size=window_size)
    X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)

    Log.info("*** KNN")

    clf = KNeighborsClassifier(n_neighbors=3)
    clf.fit(X_train, y_train)
    score = clf.score(X_test, y_test)

    results = add_result(results, "KNN", y_col, X_col, window_size, score)
    return results
Esempio n. 4
0
def fit_support_vector_machines(asset, y_col, X_col, window_size, results):
    y, X = make_data(asset,
                     response_col=y_col,
                     input_col=X_col,
                     window_size=window_size)
    X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)

    Log.info("*** Support Vector Machines")

    clf = svm.SVC(gamma='scale', decision_function_shape='ovo')
    clf.fit(X_train, y_train)
    score = clf.score(X_test, y_test)
    results = add_result(results, "Support Vector Machines", y_col, X_col,
                         window_size, score)
    return results
Esempio n. 5
0
def fit_binary_logistic_regression(asset, y_col, X_col, window_size, results):
    y, X = make_data(asset,
                     response_col=y_col,
                     input_col=X_col,
                     window_size=window_size)
    X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)

    Log.info("*** Binary logistic regression")

    clf = LogisticRegression(penalty='l2', solver='lbfgs')
    clf.fit(X_train, y_train)
    score = clf.score(X_test, y_test)

    results = add_result(results, "Binomial logistic regression", y_col, X_col,
                         window_size, score)
    return results
Esempio n. 6
0
def fit_decision_trees(asset, y_col, X_col, window_size, results):
    y, X = make_data(asset,
                     response_col=y_col,
                     input_col=X_col,
                     window_size=window_size)
    X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)

    Log.info("*** Decision Trees")

    clf = tree.DecisionTreeClassifier()
    clf.fit(X_train, y_train)
    score = clf.score(X_test, y_test)

    results = add_result(results, "Decision Trees", y_col, X_col, window_size,
                         score)
    return results
Esempio n. 7
0
def fit_multinomial_logistic_regression(asset, y_col, X_col, window_size,
                                        results):
    y, X = make_data(asset,
                     response_col=y_col,
                     input_col=X_col,
                     window_size=window_size)
    X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)

    Log.info("*** Multinomial logistic regression")

    clf = LogisticRegression(solver='lbfgs',
                             multi_class='multinomial',
                             max_iter=1000)
    clf.fit(X_train, y_train)
    score = clf.score(X_test, y_test)

    results = add_result(results, "Multinomial logistic regression", y_col,
                         X_col, window_size, score)
    return results
Esempio n. 8
0
def fit_bagging_logistic_regression(asset, y_col, X_col, window_size, results):
    y, X = make_data(asset,
                     response_col=y_col,
                     input_col=X_col,
                     window_size=window_size)
    X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)

    Log.info("*** Bagging (Logistic Regression)")

    clf = BaggingClassifier(LogisticRegression(solver='lbfgs',
                                               multi_class='multinomial'),
                            n_estimators=5,
                            max_samples=0.5,
                            max_features=0.5)
    clf.fit(X_train, y_train)
    score = clf.score(X_test, y_test)
    results = add_result(results, "Bagging (log. regression)", y_col, X_col,
                         window_size, score)
    return results
Esempio n. 9
0
def fit_ANN(asset, y_col, X_col, window_size, results):
    y, X = make_data(asset,
                     response_col=y_col,
                     input_col=X_col,
                     window_size=window_size)
    X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)

    Log.info("*** ANN")

    scaler = StandardScaler()
    scaler.fit(X_train)
    X_train_scaled = scaler.transform(X_train)
    X_test_scaled = scaler.transform(X_test)

    from sklearn.neural_network import MLPClassifier
    clf = MLPClassifier(solver='lbfgs',
                        alpha=1e-5,
                        hidden_layer_sizes=(6, 2),
                        random_state=1)
    clf.fit(X_train_scaled, y_train)
    score = clf.score(X_test_scaled, y_test)

    results = add_result(results, "ANN(6,2)", y_col, X_col, window_size, score)
    return results
Esempio n. 10
0
def fit_LSTM_model_signal(asset,
                          response_var,
                          input_vars,
                          window_size,
                          model_layers,
                          epochs=3,
                          outfile=None):
    Log.info(
        "Fitting categorical LSTM model %s",
        str((response_var, input_vars, window_size, model_layers, epochs)))

    # Create features if necessary
    input_cols = []
    for input_col in input_vars:
        asset = create_input_data(asset, input_col)
        input_cols.append(input_col)

    # Create response features if necessary
    response_col = make_response_col(response_var)
    asset = create_response_data(asset, response_var)

    # Make response variable y and input matrix X
    X, y = make_data(asset, response_col, input_cols, window_size,
                     config().days())

    # Convert X to numpy array
    X = np.array([x_i.values for x_i in X])

    # Convert y to categorical variable
    y = to_categorical(np.array(y) + 1, num_classes=3)

    # Split data into training and test set
    X_val_train, X_test, y_val_train, y_test = split_data(X, y)

    X_train, X_validate, y_train, y_validate = train_test_split(
        X_val_train, y_val_train)

    model_name = "LSTM_Signal_" + str(int(np.random.uniform(10000, 99999)))

    if len(np.unique(np.argmax(y_validate, axis=1))) == 1 or len(
            np.unique(np.argmax(y_train, axis=1))) == 1:
        Log.error("Only one class in y_train or y_validate. Skip training.")
        if outfile is not None:
            outfile.write("%s;%s;%s;%s;%s;%d;%d;%s;%s;%s;%s\n" %
                          (asset.symbol, response_var, str(input_vars),
                           model_name, str(model_layers), len(X_train),
                           len(X_test), "n/a", "n/a", "n/a", "n/a"))
            outfile.flush()
        return asset

    # Construct model
    model = Sequential()
    for layer in model_layers:
        model.add(LSTM(layer, return_sequences=True))
    # We end up with 3 categories
    model.add(LSTM(3, activation="softmax"))

    # Compile model
    model.compile(loss='categorical_crossentropy',
                  optimizer='adagrad',
                  metrics=['categorical_accuracy'])

    # Train model
    logfile = open(os.path.join(output_path(), model_name + ".log"), "w")
    model.fit(X_train,
              y_train,
              epochs=epochs,
              validation_split=0.2,
              callbacks=[TestCallback(X_test, y_test, logfile)])
    # print(model.summary())
    logfile.close()

    # Evaluate model
    loss, accuracy = model.evaluate(X_validate, y_validate)

    y_predicted_class = model.predict_classes(X_validate)
    y_true_class = np.argmax(y_validate, axis=1)

    precision = precision_score(y_true_class,
                                y_predicted_class,
                                average="macro")
    recall = recall_score(y_true_class, y_predicted_class, average="macro")

    # print("y true:", y_true_class)
    # print("y_predicted:", y_predicted_class)
    #
    # print("Loss: {:.2f}".format(loss))
    # print("Accuracy: {:.2f}%".format(accuracy * 100))
    # print("Precision: {:.2f}%".format(precision * 100))
    # print("Recall: {:.2f}%".format(recall * 100))

    # Write results to file
    if outfile is not None:
        outfile.write("%s;%s;%s;%s;%s;%d;%d;%.4f;%.4f;%.4f;%.4f\n" %
                      (asset.symbol, response_var, str(input_vars), model_name,
                       str(model_layers), len(X_train), len(X_test), loss,
                       accuracy, precision, recall))
        outfile.flush()

    # Save model to file
    model.save(os.path.join(output_path(), model_name + ".h5"))

    return asset