コード例 #1
0
def select_features():
    features = load('../../data/interim/features.pickle')
    lookback = 1
    model = build_model(features, lookback)
    xgb.plot_importance(model)
    if lookback == 1:
        fig = plt.figure(figsize=(8, 8))
        plt.xticks(rotation='vertical')
        plt.barh([i for i in range(len(model.feature_importances_))],
                model.feature_importances_.tolist(),
                tick_label=features.columns[list(range(6,14)) + list(range(15, features.shape[1]))])
        plt.title('Istotność cech')
        plt.xlim((0, 0.3))
        plt.ylabel('Cecha')
        plt.xlabel('Istotność')
        plt.show()
    else:
        feature_importances = calculate_feature_importances(features, lookback, model)
        plot_feature_importance(feature_importances)
        selected_features, selected_features_with_lookback = threshold_features(feature_importances, lookback)
        print('Selected features with trees: ', selected_features)
        not_selected_features = list(set(features.columns.to_list()).difference(set(selected_features)))
        print('Not selected features: ', not_selected_features)
        save(selected_features, '../../data/interim/selected_features_labels_trees.pickle')
        save(features[selected_features], '../../data/processed/selected_features_trees.pickle')
        save(selected_features_with_lookback, '../../data/interim/selected_features_labels_with_lookback_trees.pickle')
コード例 #2
0
def build_data_trading_plot():
    features = load('../../data/processed/features_amazon_corr.pickle')
    targets = load('../../data/processed/targets_amazon.pickle')
    not_relevant_days = 1660
    features, targets = drop_not_relevant(features, targets, not_relevant_days)
    test_size = int(0.05 * features.shape[0])
    val_size = int(0.15 * features.shape[0])

    data, name = build_data(features,
                            targets,
                            lookback=1,
                            scaled=False,
                            encode_binary=False,
                            test_size=test_size,
                            val_size=val_size,
                            pct_change=False)
    save(data, '../../data/timeseries/' + name + '_trading_vis_amazon.pickle')
コード例 #3
0
def build_features():
    data = load('../../data/raw/stock_data.pickle')
    # features = data['tesla']
    features = data['amazon']
    features = get_technical_indicators(features)
    # features = get_corr_assets(features)
    features = get_fourier_transforms(features)
    # features = get_automotive_industry_close_prices(features, data)
    # save(features, '../../data/interim/features.pickle')
    save(features, '../../data/interim/features_amazon.pickle')
コード例 #4
0
def build_default_data():
    features = load('../../data/processed/features_amazon_corr.pickle')
    targets = load('../../data/processed/targets_amazon.pickle')
    not_relevant_days = 1660
    features, targets = drop_not_relevant(features, targets, not_relevant_days)
    test_size = int(0.05 * features.shape[0])
    val_size = int(0.15 * features.shape[0])

    for encode_binary in [True, False]:
        for scaled in [True, False]:
            for lookback in [1, 60]:
                data, name = build_data(features,
                                        targets,
                                        lookback=lookback,
                                        scaled=scaled,
                                        encode_binary=encode_binary,
                                        test_size=test_size,
                                        val_size=val_size,
                                        pct_change=True)
                save(data, '../../data/timeseries/' + name + '_amazon.pickle')
コード例 #5
0
def save_targets():
    data = load('../../data/raw/stock_data.pickle')
    save(pd.DataFrame(data['amazon'][['Date', 'Close']], columns=['Date', 'Close']), '../../data/processed/targets_amazon.pickle')
コード例 #6
0
from src.trading_simulation.simulation import Simulation
from src.trading_simulation.strategy import SimpleStrategy, BuyAndHold
from src.utils.io import load, save
import numpy as np
import pandas as pd

if __name__ == '__main__':
    init_investment = 5000
    # targets = load('../../data/processed/targets.pickle')
    targets = load('../../data/processed/targets_amazon.pickle')
    date = pd.DataFrame(targets['Date'])

    # Load model predictions
    dense = load('../../data/predictions/dense.pickle')
    gru = load('../../data/predictions/gru.pickle')
    # gru = load('../../data/predictions/gru_amazon.pickle')
    pseudo_random = load('../../data/predictions/pseudo_random.pickle')
    # pseudo_random = load('../../data/predictions/pseudo_random_amazon.pickle')
    lstm = load('../../data/predictions/lstm.pickle')
    conv_lstm = load('../../data/predictions/conv_lstm.pickle')
    models = [pseudo_random, dense, gru, lstm, conv_lstm]
    model_names = ['pseudo_random', 'dense', 'gru', 'lstm', 'conv_lstm']

    # load stock return for test set
    stock_returns = load(
        '../../data/timeseries/data_lookback_1_notbinary_notscaled.pickle')[5]
    # stock_returns = load('../../data/timeseries/data_lookback_1_notbinary_notscaled_amazon.pickle')[5]
    stock_returns = stock_returns.reshape(1,
                                          stock_returns.shape[0]).tolist()[0]

    # load stock prices for test set
コード例 #7
0
def plot_validation_vs_training(model):
    eval_result = model.evals_result()
    training_rounds = range(len(eval_result['validation_0']['rmse']))
    plt.scatter(x=training_rounds,
                y=eval_result['validation_0']['rmse'],
                label='Training Error')
    plt.scatter(x=training_rounds,
                y=eval_result['validation_1']['rmse'],
                label='Validation Error')
    plt.xlabel('Iterations')
    plt.ylabel('RMSE')
    plt.title('Training Vs Validation Error')
    plt.legend()
    plt.show()


def plot_feature_importance(feature_importances):
    rc('xtick', labelsize=6)
    rc('ytick', labelsize=6)
    fig = plt.figure(figsize=(10, 10))
    plt.xticks(rotation='vertical')
    plt.barh(range(100), feature_importances.iloc[:100, 1])
    plt.yticks(range(100), feature_importances.iloc[:100, 0])
    plt.title('Feature importance')
    plt.show()


if __name__ == '__main__':
    features = load('../../data/interim/features.pickle')
    plot_technical_indicators(features, 500)
def select_features():
    features = load('../../data/interim/features_amazon.pickle')
    date = features['Date']
    features.drop('Date', inplace=True, axis=1)

    # calculate difference between rows
    features_diff = features.pct_change().replace([np.inf, -np.inf], np.nan)

    # impute NaN values
    imp_mean = SimpleImputer(missing_values=np.nan, strategy='mean')
    imp_mean.fit(features_diff)
    features_diff = imp_mean.transform(features_diff)

    # features_diff.dropna()
    features_diff = pd.DataFrame(data=features_diff, columns=features.columns)

    corr_matrix = features_diff.corr().abs()

    # calculate covariance matrix
    close_price_cov = pd.DataFrame(
        corr_matrix['Close'].sort_values(ascending=False))

    # select features
    cov_threshold = 0.001
    selected_features = close_price_cov[
        close_price_cov['Close'] > cov_threshold].index.to_list()
    # print('Selected features with covariance: ', selected_features)
    not_selected_features = list(
        set(features.columns.to_list()).difference(set(selected_features)))
    print('Not selected features: ', not_selected_features)

    save(selected_features,
         '../../data/interim/selected_features_labels_cov.pickle')
    features = features[selected_features]

    # Select upper triangle of correlation matrix
    upper = corr_matrix.where(
        np.triu(np.ones(corr_matrix.shape), k=1).astype(np.bool))

    # Find index of feature columns with correlation greater than 0.95
    to_drop = [column for column in upper.columns if any(upper[column] > 0.95)]
    for f in not_selected_features:
        if f in to_drop:
            to_drop.remove(f)

    features.drop(to_drop, axis=1, inplace=True)
    print(to_drop)

    # normalize
    # for col in features_diff.columns:
    #     features_diff[col] = preprocessing.scale(features_diff[col].values)
    #

    #
    # imp_mean = SimpleImputer(missing_values=np.nan, strategy='mean')
    # features = pd.DataFrame(data=imp_mean.fit_transform(features), columns=features.columns)

    # ftrs_pct = features.pct_change()
    # ftrs_pct = ftrs_pct.replace([np.inf, -np.inf], np.nan)
    # ftrs_pct = pd.DataFrame(data=imp_mean.fit_transform(ftrs_pct), columns=ftrs_pct.columns)
    # ftrs_pct.astype(np.float64)
    # ftrs_pct['Date'] = date
    features['Date'] = date
    features.set_index('Date', inplace=True)
    save(features, '../../data/processed/features_amazon_corr.pickle')
    print()