コード例 #1
0
ファイル: stock_fe.py プロジェクト: gubo2012/py_ref
def add_options(df, ticker):

    df_options = pd.read_pickle(stock_io.options_all_data)
    df_options = df_options[df_options['symbol'] == ticker]

    cols_options = ['LTCallFlow', 'STCallFlow', 'LTPutFlow', 'STPutFlow']

    for col in cols_options:
        df_options[col] = df_options[col].fillna(0)
        df_options[col] = df_options[col] / df_options[
            'Adj Close'] / df_options['Volume'] * 1000 * 1000

    df_options = df_options.drop(
        ['symbol', 'CallFlow', 'PutFlow', 'Adj Close', 'Volume'], axis=1)

    df_options = ts_to_features.mongodb_format(df_options)

    for col in cols_options:
        df_options[col] = df_options[col].fillna(0)

    df_options = fe_pipeline(df_options,
                             cols_options,
                             scale_ma_flag=False,
                             drop_col=True)

    df = pd.merge(df, df_options, how='left', on='date')

    odf_cols = util.show_cols(df, 'Flow_')
    for col in odf_cols:
        df[col] = df[col].fillna(0)

    return df
コード例 #2
0
ファイル: stock_ml.py プロジェクト: gubo2012/py_ref
def nth_day_fcst(df,
                 df_cdl,
                 n,
                 patt_list,
                 test_date,
                 use_cdl_patt,
                 n_total=3,
                 print_features_flag=0):

    print('')
    print('    {}-day Forecast:'.format(n))

    # n > 1-day fcst
    df_d = df.copy()
    if n > 1:
        # remove 1 to n-1 day lags
        for i in range(n - 1):
            cols_lag_p1d = util.show_cols(df, 'lag{}d'.format(i + 1))
            df_d = df_d.drop(cols_lag_p1d, axis=1)

    test_util.assert_no_prior_days_data(df_d, n)

    if use_cdl_patt:
        df_d = stock_fe.add_cdl(df_d.copy(), df_cdl.copy(), patt_list, lag=n)

    output_dict = {}
    df_features, next_date_fcst, df_test, y_test, y_pred, oos_benchmark = ml_pipeline(
        df_d, test_date, n, n_total)
    output_dict['{}d_clf_acc'.format(n)] = round(oos_benchmark, 4)
    output_dict['{}d_clf_fcst'.format(n)] = next_date_fcst
    if print_features_flag:
        print(df_features.head(10))

    df_features, next_date_fcst, df_test, y_test, y_pred, oos_benchmark = ml_pipeline(
        df_d, test_date, n, n_total, target='target_reg')
    output_dict['{}d_reg_rmse'.format(n)] = round(oos_benchmark, 4)
    output_dict['{}d_reg_fcst'.format(n)] = round(next_date_fcst, 4)
    if print_features_flag:
        print(df_features.head(10))

    return output_dict
コード例 #3
0
def run_grid_search(ticker, params):
#    up_down_threshold = 0.002 #0.2%
#    total_shifts = 10
    up_down_threshold = conf_man['up_down_threshold']
    total_shifts = conf_man['total_shifts']
    
    
    use_stocks_all_data = 1
    
    use_pc_flag = params['use_pc_flag']
    use_other_tickers = params['use_other_tickers']
    use_cdl_patt = params['use_cdl_patt']
    use_short_vol_flag = params['use_short_vol_flag']
    use_options = params['use_options_flag']

    ticker_list = params['ticker_list']
    if ticker in ticker_list:
        ticker_list.remove(ticker)
    
    patt_list = ['CDLBELTHOLD', 'CDLCLOSINGMARUBOZU', 'CDLDOJI', 'CDLENGULFING', 'CDLHARAMI', 'CDLHIGHWAVE', 'CDLHIKKAKE', 'CDLLONGLEGGEDDOJI', 'CDLMARUBOZU', 'CDLRICKSHAWMAN', 'CDLSHORTLINE']
    
    print_features_flag = 0
    
    if use_stocks_all_data:
        df = pd.read_pickle(stock_io.stocks_all_data)
        df = df[df['symbol'] == ticker]
        df = ts_to_features.mongodb_format(df)
    else:
        df = pd.read_pickle(stock_io.pkl_data.format(ticker))
        df = ts_to_features.mongodb_format(df)
    
    df = ts_to_features.data_format(df)
    
    start_date = conf_man['train_start_date']
    test_date = conf_man['test_start_date']
    df = df[df.date >= start_date]
    
    df_close = df.copy()
    df_close = df_close[['date', 'Close']]
    
    # use adj close instead of close
    #df = df.drop(['Close'], axis=1)
    #df = df.rename(columns = {'Adj Close':'Close'})
    
    if use_short_vol_flag:
        df = df.drop(['Adj Close', 'ShortVolume'], axis=1)
    else:
        df = df.drop(['Adj Close', 'ShortVolume', 'short_vol_pct'], axis=1)
    
    
    df = df.sort_values(by=['date'])
    
    
    df_raw_copy = df.copy()
    
    
    # start feature engineering
    
    
    df['CO_HL'] = (df['Close'] - df['Open']) / (df['High'] - df['Low'])
    df['HC_HL'] = (df['High'] - df['Close']) / (df['High'] - df['Low'])
    
    shift_only_cols = ['CO_HL', 'HC_HL']
    
    
    # add options
    if use_options:
        df = stock_fe.add_options(df, ticker)
    
    
    # add candle patterns
    if use_cdl_patt:
        df_cdl = df_raw_copy.copy()
        df_cdl = ta_util.add_cdl(df_cdl, patt_list)
    else:
        df_cdl = pd.DataFrame({'empty' : []})
    
    
    # add MAs
    df = ts_to_features.add_mas(df, ['Close'])
    if use_short_vol_flag:
        df = ts_to_features.add_mas(df, ['Volume', 'short_vol_pct'], [20])
    else:
        df = ts_to_features.add_mas(df, ['Volume'], [20])
        
    
    # normalize
    df['Close_raw'] = df['Close']
    df = ts_to_features.add_ratio(df, ['Open', 'High', 'Low', 'Close', 'Close_ma10'], 'Close_ma20')
    df = ts_to_features.add_ratio(df, ['Volume'], 'Volume_ma20')
    if use_short_vol_flag:
        df = ts_to_features.add_ratio(df, ['short_vol_pct'], 'short_vol_pct_ma20')
    
    
    #
    ## single shift
    ##df = ts_to_features.add_shift_cols(df, shift_cols, 1)
    
    # multi shifts
    shift_cols = ['Open', 'High', 'Low', 'Close', 'Volume', 'Close_ma10', 'CO_HL', 'HC_HL']
    if use_short_vol_flag:
        shift_cols.append('short_vol_pct')
    df = ts_to_features.add_multi_shifts(df, shift_cols, total_shifts)
    
    
    
    
    
    
    # add fake-date for forecasting
    
    df = ts_to_features.clone_last_row(df, shift_cols, days = 3)
    
    
    # add target
    df = ts_to_features.add_shift_cols(df, ['Close_raw'], 1)
    df['target'] = 0
    
    df['target'] = np.where(df['Close_raw'] >= df['Close_raw_lag1d'] * (1+up_down_threshold), 1, df['target'])
    df['target'] = np.where(df['Close_raw'] <= df['Close_raw_lag1d'] * (1-up_down_threshold), -1, df['target']) 
    
    df['target_reg'] = df['Close_raw'] / df['Close_raw_lag1d'] - 1
    df = ts_to_features.remove_na(df, 'target_reg')
    
    # for ts debug's purpose
    #df_debug = df[['date', 'Close', 'Close_lag0d', 'Close_lag1d', 'Close_lag2d', 'Close_lag3d', 'Close_raw', 'Close_raw_lag1d', 'target']]
    
    
    # ML
    drop_list = ['Open', 'High', 'Low', 'Close', 'Volume',
                 'CO_HL', 'HC_HL', 'Close_ma10', 'Close_ma20', 'Volume_ma20',
                 'Close_raw', 'Close_raw_lag1d']
    if use_short_vol_flag:
        drop_list.extend(['short_vol_pct', 'short_vol_pct_ma20'])
    lag0d_list = util.show_cols(df, 'lag0d')
    drop_list += lag0d_list
    
    df = df.drop(drop_list, axis=1)
    
    
    
    
    if use_pc_flag:
        df = stock_fe.add_pc_ratios(df)
    
    if use_other_tickers:
        df = stock_fe.add_other_tickers(df, ticker_list)
    #
    #if use_btc_flag:
    #    df = ts_to_features.add_btc(df)
    #    
    #
    print('Ticker: ', ticker)
    if use_short_vol_flag:
        print('Use short volume pct')
    
    
    
    # 1 to 3 day fcst
    output_dict = {'Ticker':ticker}
    for i in range(3):
        n = i+1
        day_outout_dict = stock_ml.nth_day_fcst(df, df_cdl, n, patt_list, test_date, use_cdl_patt,
                                                print_features_flag=print_features_flag)
        output_dict.update(day_outout_dict)
    
    print(output_dict)
    return output_dict
コード例 #4
0
print('Ticker: ', ticker)

# 1-day fcst
print('1-day Forecast:')
if use_cdl_patt:
    df = ta_util.add_cdl(df, patt_list, lag_flag=True, lag=1)

df_features, next_date_fcst, df_test, y_test, y_pred = stock_ml.ml_pipeline(
    df, test_date, 1, 2)
print(df_features.head(10))
df_1d = df.copy()

# 2-day fcst
print('2-day Forecast:')
cols_lag1d = util.show_cols(df, 'lag1d')
df = df.drop(cols_lag1d, axis=1)
if use_cdl_patt:
    df = ta_util.add_cdl(df, patt_list, lag_flag=True, lag=2)

df_features, next_date_fcst, df_test, y_test, y_pred = stock_ml.ml_pipeline(
    df, test_date, 2, 2)
print(df_features.head(10))

#
#df_plot = pd.DataFrame({'date':df_test['date'][:-1], 'y_act':y_test, 'y_fcst':y_pred})
#df_plot = pd.merge(df_plot, df_close, on = 'date', how='left')
#df_plot['date'] = df_plot['date'].apply(lambda x:x[2:])
#df_plot.set_index('date', inplace=True, drop=True)
#
##%matplotlib qt
コード例 #5
0
ファイル: test_util.py プロジェクト: gubo2012/py_ref
def assert_no_prior_days_data(df, n_day):
    for i in range(n_day - 1):
        n = i + 1  # {n}d
        cols = util.show_cols(df, 'lag{}d'.format(n))
        assert (len(cols) == 0)
コード例 #6
0
    df['target'])

df['target_reg'] = df['Close_raw'] / df['Close_raw_lag1d'] - 1
df = ts_to_features.remove_na(df, 'target_reg')

# for ts debug's purpose
#df_debug = df[['date', 'Close', 'Close_lag0d', 'Close_lag1d', 'Close_lag2d', 'Close_lag3d', 'Close_raw', 'Close_raw_lag1d', 'target']]

# ML
drop_list = [
    'Open', 'High', 'Low', 'Close', 'Volume', 'CO_HL', 'HC_HL', 'Close_ma10',
    'Close_ma20', 'Volume_ma20', 'Close_raw', 'Close_raw_lag1d'
]
if use_short_vol_flag:
    drop_list.extend(['short_vol_pct', 'short_vol_pct_ma20'])
lag0d_list = util.show_cols(df, 'lag0d')
drop_list += lag0d_list

df = df.drop(drop_list, axis=1)

if use_pc_flag:
    df = stock_fe.add_pc_ratios(df)

if use_other_tickers:
    df = stock_fe.add_other_tickers(df, ticker_list)

if use_options:
    df = stock_fe.add_options(df, ticker)

#
#if use_btc_flag:
コード例 #7
0
def analyze_ticker(ticker):
    #    ticker = 'QQQ'

    pkl_file = open(stock_io.grid_search_output.format(ticker), 'rb')
    grid_search_results = pickle.load(pkl_file)
    pkl_file.close()

    df_flag_row0 = 1

    for key, value in grid_search_results.items():

        # convert list to str
        ticker_list = value['ticker_list']
        ticker_list_str = reduce((lambda x, y: x + '_' + y), ticker_list)
        value['ticker_list'] = ticker_list_str

        #    df_row = pd.DataFrame.from_dict(value, orient='index')
        df_row = pd.DataFrame([value])

        if df_flag_row0:
            df = df_row.copy()
            df_flag_row0 = 0
        else:
            df = df.append(df_row)

    n = 3
    conf_clf_threshold = 0.2
    df['overall_acc'] = 0
    df['overall_rmse'] = 0
    for i in range(n):
        df['overall_acc'] = df['overall_acc'] + df['{}d_clf_acc'.format(i +
                                                                        1)] / n
        df['overall_rmse'] = df['overall_rmse'] + df['{}d_reg_rmse'.format(
            i + 1)] / n

    df['overall_acc_rank'] = df['overall_acc'].rank(ascending=False)
    df['overall_rmse_rank'] = df['overall_rmse'].rank(ascending=True)
    df['ranks_sum'] = df['overall_acc_rank'] + df['overall_rmse_rank']
    df['overall_rank'] = df['ranks_sum'].rank(ascending=True)

    df.sort_values('overall_rank', inplace=True)

    df_concise = df[[
        'Ticker', 'ticker_list', 'use_cdl_patt', 'use_other_tickers',
        'use_pc_flag', 'use_short_vol_flag', 'overall_acc', 'overall_rmse',
        'overall_acc_rank', 'overall_rmse_rank', 'overall_rank'
    ]].copy()

    df_concise.sort_values('overall_rank', inplace=True)
    #    print(df_concise.head())

    output_dict = df_concise[:5].to_dict('records')

    #emsemble
    emsemble_n = conf_man['emsemble_n']
    cols_fcst = util.show_cols(df, '_fcst')
    final_output = {'Ticker': ticker}
    final_output_confident = {'Ticker': ticker}
    df_top = df[:emsemble_n]  # take top emsemble_n fcsts
    for col in cols_fcst:
        fcst = df_top[col].sum() / emsemble_n  # take simple average
        if 'clf' in col:
            final_output[col] = round(fcst, 2)
        else:
            final_output[col] = '{}%'.format(round(fcst * 100, 1))

    # filter in only confident fcsts
    df_conf_output = pd.DataFrame([{'ticker': ticker}])

    for i in range(n):
        col_clf = f'{i+1}d_clf_fcst'
        col_reg = f'{i+1}d_reg_fcst'
        reg_fcst_num = float(final_output[col_reg][:-1])
        if final_output[col_clf] * reg_fcst_num > 0:
            if final_output[col_clf] > conf_clf_threshold or final_output[
                    col_clf] < -conf_clf_threshold:
                final_output_confident[col_clf] = final_output[col_clf]
                final_output_confident[col_reg] = final_output[col_reg]
                df_conf_output[f'{i+1}d_fcst'] = float(
                    final_output[col_reg]
                    [:-1])  # convert to float after remove %
            else:
                df_conf_output[f'{i+1}d_fcst'] = 0
        else:
            df_conf_output[f'{i+1}d_fcst'] = 0


#    # print flags
#    cols_flag = util.show_cols(df_top, 'use_')
#    flags = {}
#    for col in cols_flag:
#        flags[col] = round(df_top[col].mean(), 2)
#    print(ticker, flags)
#
#    print(df_top['ticker_list'].mode())

    avg_acc = df_top['overall_acc'].mean()
    print('Avg Acc:', avg_acc, final_output)
    return final_output_confident, avg_acc, df_conf_output