Ejemplo n.º 1
0
            min_value = df[feature_name].min()
            result[feature_name] = (df[feature_name] -
                                    min_value) / (max_value - min_value)
    return result


for file in data_files:
    file_name = "data/" + file + training_suffix
    print("Working on: " + file_name + " - started at " + get_date_time())
    data = pd.read_csv(file_name)
    data[date_column] = data[date_column].apply(transform_date)
    data = data[(data[date_column] > 0)]
    target = data[target_column]
    data.drop(target_column, inplace=True, axis=1)
    data.drop(id_column, inplace=True, axis=1)
    data.drop(name_column, inplace=True, axis=1)
    normalized_data = normalize(data)
    ada = md.fit_ada_boost(normalized_data, target, True)

    print(ada)
    test_data = pd.read_csv("data/" + file + test_suffix)
    test_data.drop(name_column, inplace=True, axis=1)
    results = pd.DataFrame()
    results[id_column] = test_data[id_column]
    test_data.drop(id_column, inplace=True, axis=1)
    test_data[date_column] = test_data[date_column].apply(transform_date)
    normalized_test_data = normalize(test_data)
    results[target_column] = ada.predict(normalized_test_data)
    results.to_csv(file + "results")
    print("end time: " + get_date_time())
Ejemplo n.º 2
0
import modelling as md
import pandas as pd
import feature_engineering as fe
import os

data = pd.read_csv("data/DJI_1d_10y_signal.csv")

data = data.drop(["Date"], axis=1)
data = data.ffill(axis=0)
print(data)
xTrain, xTest, yTrain, yTest = fe.ordered_train_test_split(data, "Signal")

results = pd.DataFrame()
results["true_y"] = yTest

ada = md.fit_ada_boost(xTrain, yTrain, True)
results["prediction"] = ada.predict(xTest)
print("Adaboost Classifier")
print(results["prediction"])
print(md.get_results(results["true_y"], fe.generate_y(results, "prediction")))

svm = md.fit_SVM(xTrain, yTrain)
results["prediction"] = svm.predict(xTest)
print("SVM Classifier")
print(md.get_results(results["true_y"], results["prediction"]))
print('Accuracy of the SVM on test set: {:.3f}'.format(svm.score(xTest,
                                                                 yTest)))

knn = md.fit_KNN(xTrain, yTrain, True)
results["prediction"] = knn.predict(xTest)
print("KNN")