Beispiel #1
0
def add_options(df, ticker):

    df_options = pd.read_pickle(stock_io.options_all_data)
    df_options = df_options[df_options['symbol'] == ticker]

    cols_options = ['LTCallFlow', 'STCallFlow', 'LTPutFlow', 'STPutFlow']

    for col in cols_options:
        df_options[col] = df_options[col].fillna(0)
        df_options[col] = df_options[col] / df_options[
            'Adj Close'] / df_options['Volume'] * 1000 * 1000

    df_options = df_options.drop(
        ['symbol', 'CallFlow', 'PutFlow', 'Adj Close', 'Volume'], axis=1)

    df_options = ts_to_features.mongodb_format(df_options)

    for col in cols_options:
        df_options[col] = df_options[col].fillna(0)

    df_options = fe_pipeline(df_options,
                             cols_options,
                             scale_ma_flag=False,
                             drop_col=True)

    df = pd.merge(df, df_options, how='left', on='date')

    odf_cols = util.show_cols(df, 'Flow_')
    for col in odf_cols:
        df[col] = df[col].fillna(0)

    return df
Beispiel #2
0
def add_pc_ratio(file_name, col_name):

    df = pd.read_pickle(file_name)

    df = df[['date', 'PCRatio']]
    df = df.rename(columns={'PCRatio': col_name})
    df = ts_to_features.mongodb_format(df)

    df = fe_pipeline(df, [col_name], scale_ma_flag=False, drop_col=True)

    return df
Beispiel #3
0
def add_other_tickers(df, ticker_list):

    df_tickers = pd.read_pickle(stock_io.ref_data)
    df_tickers = ts_to_features.mongodb_format(df_tickers)

    for ticker in ticker_list:
        df_one_ticker = df_tickers[['date', ticker]]

        df_one_ticker = fe_pipeline(df_one_ticker, [ticker], drop_col=True)
        df = pd.merge(df, df_one_ticker, how='left', on='date')

    return df
Beispiel #4
0
import talib
#from talib import abstract

# UDF
import stock_io
import ts_to_features

ticker = 'SPY'

use_yahoo_flag = 0

if use_yahoo_flag:
    df = pd.read_csv(stock_io.raw_data.format(ticker))
else:
    df = pd.read_pickle(stock_io.pkl_data.format(ticker))
    df = ts_to_features.mongodb_format(df)

df['SMA'] = talib.SMA(df['Close'])

#df['CDL2CROWS'] = talib.CDLHAMMER(df['Open'], df['High'], df['Low'], df['Close'])
#
## not working
##df['CDL3BLACKCROWS'] = abstract.Function('CDL3BLACKCROWS')(df['Open'], df['High'], df['Low'], df['Close'])
#
## alternative
#cdl_func = eval('talib.'+'CDL3BLACKCROWS')
#df['CDL3BLACKCROWS'] = cdl_func(df['Open'], df['High'], df['Low'], df['Close'])

#print(talib.get_functions())
#print(talib.get_function_groups())
def run_grid_search(ticker, params):
#    up_down_threshold = 0.002 #0.2%
#    total_shifts = 10
    up_down_threshold = conf_man['up_down_threshold']
    total_shifts = conf_man['total_shifts']
    
    
    use_stocks_all_data = 1
    
    use_pc_flag = params['use_pc_flag']
    use_other_tickers = params['use_other_tickers']
    use_cdl_patt = params['use_cdl_patt']
    use_short_vol_flag = params['use_short_vol_flag']
    use_options = params['use_options_flag']

    ticker_list = params['ticker_list']
    if ticker in ticker_list:
        ticker_list.remove(ticker)
    
    patt_list = ['CDLBELTHOLD', 'CDLCLOSINGMARUBOZU', 'CDLDOJI', 'CDLENGULFING', 'CDLHARAMI', 'CDLHIGHWAVE', 'CDLHIKKAKE', 'CDLLONGLEGGEDDOJI', 'CDLMARUBOZU', 'CDLRICKSHAWMAN', 'CDLSHORTLINE']
    
    print_features_flag = 0
    
    if use_stocks_all_data:
        df = pd.read_pickle(stock_io.stocks_all_data)
        df = df[df['symbol'] == ticker]
        df = ts_to_features.mongodb_format(df)
    else:
        df = pd.read_pickle(stock_io.pkl_data.format(ticker))
        df = ts_to_features.mongodb_format(df)
    
    df = ts_to_features.data_format(df)
    
    start_date = conf_man['train_start_date']
    test_date = conf_man['test_start_date']
    df = df[df.date >= start_date]
    
    df_close = df.copy()
    df_close = df_close[['date', 'Close']]
    
    # use adj close instead of close
    #df = df.drop(['Close'], axis=1)
    #df = df.rename(columns = {'Adj Close':'Close'})
    
    if use_short_vol_flag:
        df = df.drop(['Adj Close', 'ShortVolume'], axis=1)
    else:
        df = df.drop(['Adj Close', 'ShortVolume', 'short_vol_pct'], axis=1)
    
    
    df = df.sort_values(by=['date'])
    
    
    df_raw_copy = df.copy()
    
    
    # start feature engineering
    
    
    df['CO_HL'] = (df['Close'] - df['Open']) / (df['High'] - df['Low'])
    df['HC_HL'] = (df['High'] - df['Close']) / (df['High'] - df['Low'])
    
    shift_only_cols = ['CO_HL', 'HC_HL']
    
    
    # add options
    if use_options:
        df = stock_fe.add_options(df, ticker)
    
    
    # add candle patterns
    if use_cdl_patt:
        df_cdl = df_raw_copy.copy()
        df_cdl = ta_util.add_cdl(df_cdl, patt_list)
    else:
        df_cdl = pd.DataFrame({'empty' : []})
    
    
    # add MAs
    df = ts_to_features.add_mas(df, ['Close'])
    if use_short_vol_flag:
        df = ts_to_features.add_mas(df, ['Volume', 'short_vol_pct'], [20])
    else:
        df = ts_to_features.add_mas(df, ['Volume'], [20])
        
    
    # normalize
    df['Close_raw'] = df['Close']
    df = ts_to_features.add_ratio(df, ['Open', 'High', 'Low', 'Close', 'Close_ma10'], 'Close_ma20')
    df = ts_to_features.add_ratio(df, ['Volume'], 'Volume_ma20')
    if use_short_vol_flag:
        df = ts_to_features.add_ratio(df, ['short_vol_pct'], 'short_vol_pct_ma20')
    
    
    #
    ## single shift
    ##df = ts_to_features.add_shift_cols(df, shift_cols, 1)
    
    # multi shifts
    shift_cols = ['Open', 'High', 'Low', 'Close', 'Volume', 'Close_ma10', 'CO_HL', 'HC_HL']
    if use_short_vol_flag:
        shift_cols.append('short_vol_pct')
    df = ts_to_features.add_multi_shifts(df, shift_cols, total_shifts)
    
    
    
    
    
    
    # add fake-date for forecasting
    
    df = ts_to_features.clone_last_row(df, shift_cols, days = 3)
    
    
    # add target
    df = ts_to_features.add_shift_cols(df, ['Close_raw'], 1)
    df['target'] = 0
    
    df['target'] = np.where(df['Close_raw'] >= df['Close_raw_lag1d'] * (1+up_down_threshold), 1, df['target'])
    df['target'] = np.where(df['Close_raw'] <= df['Close_raw_lag1d'] * (1-up_down_threshold), -1, df['target']) 
    
    df['target_reg'] = df['Close_raw'] / df['Close_raw_lag1d'] - 1
    df = ts_to_features.remove_na(df, 'target_reg')
    
    # for ts debug's purpose
    #df_debug = df[['date', 'Close', 'Close_lag0d', 'Close_lag1d', 'Close_lag2d', 'Close_lag3d', 'Close_raw', 'Close_raw_lag1d', 'target']]
    
    
    # ML
    drop_list = ['Open', 'High', 'Low', 'Close', 'Volume',
                 'CO_HL', 'HC_HL', 'Close_ma10', 'Close_ma20', 'Volume_ma20',
                 'Close_raw', 'Close_raw_lag1d']
    if use_short_vol_flag:
        drop_list.extend(['short_vol_pct', 'short_vol_pct_ma20'])
    lag0d_list = util.show_cols(df, 'lag0d')
    drop_list += lag0d_list
    
    df = df.drop(drop_list, axis=1)
    
    
    
    
    if use_pc_flag:
        df = stock_fe.add_pc_ratios(df)
    
    if use_other_tickers:
        df = stock_fe.add_other_tickers(df, ticker_list)
    #
    #if use_btc_flag:
    #    df = ts_to_features.add_btc(df)
    #    
    #
    print('Ticker: ', ticker)
    if use_short_vol_flag:
        print('Use short volume pct')
    
    
    
    # 1 to 3 day fcst
    output_dict = {'Ticker':ticker}
    for i in range(3):
        n = i+1
        day_outout_dict = stock_ml.nth_day_fcst(df, df_cdl, n, patt_list, test_date, use_cdl_patt,
                                                print_features_flag=print_features_flag)
        output_dict.update(day_outout_dict)
    
    print(output_dict)
    return output_dict
Beispiel #6
0
import ts_to_features

start_date = '2018-01-01'

shift_flag = True
shifts = 15

df_spy = pd.read_pickle('spy.pkl')
df_qqq = pd.read_pickle('qqq.pkl')
df_btc = pd.read_pickle('btc.pkl')

# ETF
df_spy = df_spy[['date', 'Close']]
df_spy = df_spy.rename(columns={'Close': 'SPY'})
df_spy = ts_to_features.mongodb_format(df_spy)

df_qqq = df_qqq[['date', 'Close']]
df_qqq = df_qqq.rename(columns={'Close': 'QQQ'})
df_qqq = ts_to_features.mongodb_format(df_qqq)

df_btc = df_btc[['date', 'price']]
df_btc = df_btc.rename(columns={'price': 'BTC'})
df_btc = ts_to_features.mongodb_format(df_btc)

# merge
df_merge = pd.merge(df_spy, df_qqq, how='inner', on='date')
df_merge = pd.merge(df_merge, df_btc, how='inner', on='date')

# index to date
df = df_merge.copy()
Beispiel #7
0
start_date = '2018-01-01'

shift_flag = True
shifts = 15

ticker = 'SLV'

df_spy = pd.read_pickle('spy.pkl')
df_qqq = pd.read_pickle('qqq.pkl')
df_ticker = pd.read_pickle('tickers.pkl')


# ETF
df_spy = df_spy[['date', 'Close']]
df_spy = df_spy.rename(columns = {'Close':'SPY'})
df_spy = ts_to_features.mongodb_format(df_spy)

df_qqq = df_qqq[['date', 'Close']]
df_qqq = df_qqq.rename(columns = {'Close':'QQQ'})
df_qqq = ts_to_features.mongodb_format(df_qqq)

df_ticker = df_ticker[['date', ticker]]
df_ticker = ts_to_features.mongodb_format(df_ticker)

# merge
df_merge = pd.merge(df_spy, df_qqq, how = 'inner', on = 'date')
df_merge = pd.merge(df_merge, df_ticker, how = 'inner', on = 'date')


# index to date
df = df_merge.copy()