コード例 #1
0
def build_features():
    data = load('../../data/raw/stock_data.pickle')
    # features = data['tesla']
    features = data['amazon']
    features = get_technical_indicators(features)
    # features = get_corr_assets(features)
    features = get_fourier_transforms(features)
    # features = get_automotive_industry_close_prices(features, data)
    # save(features, '../../data/interim/features.pickle')
    save(features, '../../data/interim/features_amazon.pickle')
コード例 #2
0
def get_raw_stock_data():
    columns = {
        'Open': "1. open",
        'High': "2. high",
        'Low': "3. low",
        'Close': "4. close",
        'Volume': "5. volume"
    }

    api_requests = {
        'tesla':
        'https://www.alphavantage.co/query?function=TIME_SERIES_DAILY&symbol=TSLA&interval=60min&outputsize=full&apikey=CMSZIWYWAHTR01BR',
        'google':
        'https://www.alphavantage.co/query?function=TIME_SERIES_DAILY&symbol=GOOGL&interval=60min&outputsize=full&apikey=CMSZIWYWAHTR01BR',
        'bmw':
        'https://www.alphavantage.co/query?function=TIME_SERIES_DAILY&symbol=FRA:BMW&interval=60min&outputsize=full&apikey=CMSZIWYWAHTR01BR',
        'daimler':
        'https://www.alphavantage.co/query?function=TIME_SERIES_DAILY&symbol=ETR:DAI&interval=60min&outputsize=full&apikey=CMSZIWYWAHTR01BR',
        'porshe':
        'https://www.alphavantage.co/query?function=TIME_SERIES_DAILY&symbol=ETR:PAH3&interval=60min&outputsize=full&apikey=CMSZIWYWAHTR01BR',
        'amazon':
        'https://www.alphavantage.co/query?function=TIME_SERIES_DAILY&symbol=AMZN&interval=60min&outputsize=full&apikey=CMSZIWYWAHTR01BR'
    }

    raw_stock_data = {}

    for company, request in api_requests.items():
        req = requests.get(request)
        data = req.json()
        num_rows = len(list(data['Time Series (Daily)'].keys()))
        num_cols = 6
        array = np.full((num_rows, num_cols), fill_value=np.NaN)

        df = pd.DataFrame(
            data=array,
            columns=['Date', 'Close', 'Open', 'High', 'Low', 'Volume'])

        for i, (date,
                values) in enumerate(data['Time Series (Daily)'].items()):
            row = [
                date, values[columns['Close']], values[columns['Open']],
                values[columns['High']], values[columns['Low']],
                values[columns['Volume']]
            ]
            df.iloc[(num_rows - i - 1), :] = np.array(row)

        df[['Close', 'Open', 'High', 'Low',
            'Volume']] = df[['Close', 'Open', 'High', 'Low',
                             'Volume']].apply(pd.to_numeric)
        df['Date'] = pd.to_datetime(df.Date, format='%Y-%m-%d')
        #     df.index = df['Date']
        #     df.drop('Date', axis=1, inplace=True)
        raw_stock_data[company] = df
    save(raw_stock_data, '../../data/raw/stock_data.pickle')
    return raw_stock_data
コード例 #3
0
def build_data_trading_plot():
    features = load('../../data/processed/features_amazon_corr.pickle')
    targets = load('../../data/processed/targets_amazon.pickle')
    not_relevant_days = 1660
    features, targets = drop_not_relevant(features, targets, not_relevant_days)
    test_size = int(0.05 * features.shape[0])
    val_size = int(0.15 * features.shape[0])

    data, name = build_data(features,
                            targets,
                            lookback=1,
                            scaled=False,
                            encode_binary=False,
                            test_size=test_size,
                            val_size=val_size,
                            pct_change=False)
    save(data, '../../data/timeseries/' + name + '_trading_vis_amazon.pickle')
コード例 #4
0
def build_default_data():
    features = load('../../data/processed/features_amazon_corr.pickle')
    targets = load('../../data/processed/targets_amazon.pickle')
    not_relevant_days = 1660
    features, targets = drop_not_relevant(features, targets, not_relevant_days)
    test_size = int(0.05 * features.shape[0])
    val_size = int(0.15 * features.shape[0])

    for encode_binary in [True, False]:
        for scaled in [True, False]:
            for lookback in [1, 60]:
                data, name = build_data(features,
                                        targets,
                                        lookback=lookback,
                                        scaled=scaled,
                                        encode_binary=encode_binary,
                                        test_size=test_size,
                                        val_size=val_size,
                                        pct_change=True)
                save(data, '../../data/timeseries/' + name + '_amazon.pickle')
コード例 #5
0
def select_features():
    features = load('../../data/interim/features.pickle')
    lookback = 1
    model = build_model(features, lookback)
    xgb.plot_importance(model)
    if lookback == 1:
        fig = plt.figure(figsize=(8, 8))
        plt.xticks(rotation='vertical')
        plt.barh([i for i in range(len(model.feature_importances_))],
                model.feature_importances_.tolist(),
                tick_label=features.columns[list(range(6,14)) + list(range(15, features.shape[1]))])
        plt.title('Istotność cech')
        plt.xlim((0, 0.3))
        plt.ylabel('Cecha')
        plt.xlabel('Istotność')
        plt.show()
    else:
        feature_importances = calculate_feature_importances(features, lookback, model)
        plot_feature_importance(feature_importances)
        selected_features, selected_features_with_lookback = threshold_features(feature_importances, lookback)
        print('Selected features with trees: ', selected_features)
        not_selected_features = list(set(features.columns.to_list()).difference(set(selected_features)))
        print('Not selected features: ', not_selected_features)
        save(selected_features, '../../data/interim/selected_features_labels_trees.pickle')
        save(features[selected_features], '../../data/processed/selected_features_trees.pickle')
        save(selected_features_with_lookback, '../../data/interim/selected_features_labels_with_lookback_trees.pickle')
コード例 #6
0
def save_targets():
    data = load('../../data/raw/stock_data.pickle')
    save(pd.DataFrame(data['amazon'][['Date', 'Close']], columns=['Date', 'Close']), '../../data/processed/targets_amazon.pickle')
def select_features():
    features = load('../../data/interim/features_amazon.pickle')
    date = features['Date']
    features.drop('Date', inplace=True, axis=1)

    # calculate difference between rows
    features_diff = features.pct_change().replace([np.inf, -np.inf], np.nan)

    # impute NaN values
    imp_mean = SimpleImputer(missing_values=np.nan, strategy='mean')
    imp_mean.fit(features_diff)
    features_diff = imp_mean.transform(features_diff)

    # features_diff.dropna()
    features_diff = pd.DataFrame(data=features_diff, columns=features.columns)

    corr_matrix = features_diff.corr().abs()

    # calculate covariance matrix
    close_price_cov = pd.DataFrame(
        corr_matrix['Close'].sort_values(ascending=False))

    # select features
    cov_threshold = 0.001
    selected_features = close_price_cov[
        close_price_cov['Close'] > cov_threshold].index.to_list()
    # print('Selected features with covariance: ', selected_features)
    not_selected_features = list(
        set(features.columns.to_list()).difference(set(selected_features)))
    print('Not selected features: ', not_selected_features)

    save(selected_features,
         '../../data/interim/selected_features_labels_cov.pickle')
    features = features[selected_features]

    # Select upper triangle of correlation matrix
    upper = corr_matrix.where(
        np.triu(np.ones(corr_matrix.shape), k=1).astype(np.bool))

    # Find index of feature columns with correlation greater than 0.95
    to_drop = [column for column in upper.columns if any(upper[column] > 0.95)]
    for f in not_selected_features:
        if f in to_drop:
            to_drop.remove(f)

    features.drop(to_drop, axis=1, inplace=True)
    print(to_drop)

    # normalize
    # for col in features_diff.columns:
    #     features_diff[col] = preprocessing.scale(features_diff[col].values)
    #

    #
    # imp_mean = SimpleImputer(missing_values=np.nan, strategy='mean')
    # features = pd.DataFrame(data=imp_mean.fit_transform(features), columns=features.columns)

    # ftrs_pct = features.pct_change()
    # ftrs_pct = ftrs_pct.replace([np.inf, -np.inf], np.nan)
    # ftrs_pct = pd.DataFrame(data=imp_mean.fit_transform(ftrs_pct), columns=ftrs_pct.columns)
    # ftrs_pct.astype(np.float64)
    # ftrs_pct['Date'] = date
    features['Date'] = date
    features.set_index('Date', inplace=True)
    save(features, '../../data/processed/features_amazon_corr.pickle')
    print()