Ejemplo n.º 1
0
def main(tickers=['AAPL'], start=None, end=None, n_steps=21):
    data = OrderedDict()
    pred_data = OrderedDict()
    forecast_data = OrderedDict()

    for ticker in tickers:
        data[ticker] = fc.get_time_series(ticker, start, end)

        # add the outcome variable, 1 if the trading session was positive (close>open), 0 otherwise
        data[ticker]['outcome'] = data[ticker].apply(
            lambda x: 1 if x['adj_close'] > x['adj_open'] else 0, axis=1)

        data[ticker] = fc.get_sma_classifier_features(data[ticker]).dropna()

        # cross-validation testing
        split = rand.uniform(0.60, 0.80)

        train_size = int(len(data[ticker]) * split)

        train, test = data[ticker][0:train_size], data[ticker][
            train_size:len(data[ticker])]

        features = ['sma_2', 'sma_3', 'sma_4', 'sma_5', 'sma_6']

        # values of features
        X = list(train[features].values)

        # target values
        Y = list(train['outcome'])

        # fit a Naive Bayes model to the data
        mdl = MLPClassifier(hidden_layer_sizes=(100, 100, 100)).fit(X, Y)
        print(mdl)

        # make predictions
        pred = mdl.predict(test[features].values)

        # summarize the fit of the model
        classification_report, confusion_matrix = fc.get_classifier_metrics(
            test['outcome'].values, pred)

        print("{} Neural Network\n"
              "-------------\n"
              "Classification report: {}\n\n"
              "Confusion matrix: {}\n\n".format(ticker, classification_report,
                                                confusion_matrix))

        pred_results = pd.DataFrame(data=dict(original=test['outcome'],
                                              prediction=pred),
                                    index=test.index)

        pred_data[ticker] = pred_results

        # out-of-sample test
        forecast_data[ticker] = fc.forecast_classifier(model=mdl,
                                                       sample=test,
                                                       features=features,
                                                       steps=n_steps)

    return forecast_data
Ejemplo n.º 2
0
def main(tickers=['AAPL'], start=None, end=None, n_steps=21):
    data = OrderedDict()
    pred_data = OrderedDict()
    forecast_data = OrderedDict()

    for ticker in tickers:
        data[ticker] = fc.get_time_series(ticker, start, end)

        # add the outcome variable, 1 if the trading session was positive (close>open), 0 otherwise
        data[ticker]['outcome'] = data[ticker].apply(
            lambda x: 1 if x['adj_close'] > x['adj_open'] else 0, axis=1)

        data[ticker] = fc.get_sma_classifier_features(data[ticker])

        train_size = int(len(data[ticker]) * 0.80)

        train, test = data[ticker][0:train_size], data[ticker][
            train_size:len(data[ticker])]

        features = ['sma_2', 'sma_3', 'sma_4', 'sma_5', 'sma_6']

        # values of features
        X = list(train[features].values)

        # target values
        Y = list(train['outcome'])

        mdl = DecisionTreeClassifier().fit(X, Y)
        print(mdl)
        '''
        dot_data = export_graphviz(mdl,
                                   out_file=None,
                                   feature_names=list(train[['feat1', 'feat2', 'feat3', 'feat4', 'feat5']]),
                                   class_names='outcome',
                                   filled=True,
                                   rounded=True,
                                   special_characters=True)
        
        graph = pydot.graph_from_dot_data(dot_data)
        graph.write_png("charts/decision-tree-classifier2.png")
        '''

        pred = mdl.predict(test[features].values)
        pred_prob = mdl.predict_proba(test[features].values)

        # summarize the fit of the model
        classification_report, confusion_matrix = fc.get_classifier_metrics(
            test['outcome'].values, pred)

        print("{} Decision Tree\n"
              "-------------\n"
              "Classification report: {}\n"
              "Confusion matrix: {}\n"
              "Prediction probability: {}\n".format(ticker,
                                                    classification_report,
                                                    confusion_matrix,
                                                    pred_prob))

        pred_results = pd.DataFrame(data=dict(original=test['outcome'],
                                              prediction=pred),
                                    index=test.index)

        pred_data[ticker] = pred_results

        # out-of-sample test
        forecast_data[ticker] = fc.forecast_classifier(model=mdl,
                                                       sample=test,
                                                       features=features,
                                                       steps=n_steps)

    return forecast_data
Ejemplo n.º 3
0
def main(tickers=['AAPL'], start=None, end=None, n_steps=21):
    data = OrderedDict()
    pred_data = OrderedDict()
    forecast_data = OrderedDict()

    for ticker in tickers:
        data[ticker] = fc.get_time_series(ticker, start, end)

        # add the outcome variable, 1 if the trading session was positive (close>open), 0 otherwise
        data[ticker]['outcome'] = data[ticker].apply(
            lambda x: 1 if x['adj_close'] > x['adj_open'] else -1, axis=1)

        data[ticker] = fc.get_sma_classifier_features(data[ticker])

        # cross-validation testing
        split = rand.uniform(0.60, 0.80)

        train_size = int(len(data[ticker]) * split)

        train, test = data[ticker][0:train_size], data[ticker][
            train_size:len(data[ticker])]

        features = ['sma_2', 'sma_3', 'sma_4', 'sma_5', 'sma_6']

        # values of features
        X = list(train[features].values)

        # target values
        Y = list(train['outcome'])

        clf1 = AdaBoostClassifier()
        clf2 = RandomForestClassifier()
        clf3 = DecisionTreeClassifier()
        clf4 = KNeighborsClassifier()
        clf5 = LogisticRegression()
        clf6 = SGDClassifier()
        clf7 = MLPClassifier()
        clf8 = GaussianNB()
        clf9 = BernoulliNB()
        clf10 = SVC()

        mdl = VotingClassifier(estimators=[('bt', clf1), ('rf', clf2),
                                           ('dt', clf3), ('knn', clf4),
                                           ('lgt', clf5), ('sgd', clf6),
                                           ('mlp', clf7), ('gnb', clf8),
                                           ('bnb', clf9), ('svm', clf10)],
                               voting='hard').fit(X, Y)

        print(mdl)

        confidence = mdl.score(test[features].values, test['outcome'].values)

        print("{} Voting Classifier\n"
              "-------------\n"
              "Confidence: {}\n".format(ticker, confidence))

        pred = mdl.predict(test[features].values)

        pred_data[ticker] = pred

        # out-of-sample test
        forecast_data[ticker] = fc.forecast_classifier(
            model=mdl, sample=test, features=features,
            steps=n_steps)['outcome']

    return forecast_data