예제 #1
0
def get_yahoo_data(start, end):
    df = YahooDownloader(start_date=start,
                         end_date=end,
                         ticker_list=config.DOW_30_TICKER).fetch_data()

    df.sort_values(['date', 'tic'], ignore_index=True)

    x = df.tic.unique()
    templ = []

    # get intersection data, smallest data
    for name, group in df.groupby('date'):
        g = group.tic.unique()
        if len(templ) == 0:
            templ = [i for i in g if i in x]
        else:
            templ = [i for i in g if i in templ]

    data_merge = pd.DataFrame(columns=list(df.columns))
    x = np.array(templ).reshape(-1, 1)
    temp_df = pd.DataFrame.from_records(x, columns=['tic'])

    for name, group in df.groupby('date'):
        temp_df['date'] = name

        result_outer = pd.merge(group, temp_df, on=['date', 'tic'])
        result_outer = result_outer.sort_values(['date', 'tic'],
                                                ignore_index=True)

        assert len(result_outer) == len(temp_df.tic.unique())
        data_merge = data_merge.append(result_outer)

    df = data_merge

    fe = FeatureEngineer(use_technical_indicator=True,
                         tech_indicator_list=config.TECHNICAL_INDICATORS_LIST,
                         use_turbulence=True,
                         user_defined_feature=False)

    processed = fe.preprocess_data(df)
    processed.sort_values(['date', 'tic'], ignore_index=True)

    return processed
    os.makedirs("./" + config.RESULTS_DIR)

config.START_DATE

config.END_DATE

print(config.DOW_30_TICKER)

############# DOWNLOAD DATA #############

df = YahooDownloader(start_date='2008-01-01',
                     end_date='2021-01-01',
                     ticker_list=config.DOW_30_TICKER).fetch_data()

df.shape
df.sort_values(['date', 'tic'], ignore_index=True).head()
df.info()

############# PERFORM FEATURE ENGINEERING #############

fe = FeatureEngineer(use_technical_indicator=True,
                     tech_indicator_list=config.TECHNICAL_INDICATORS_LIST,
                     use_turbulence=True,
                     user_defined_feature=False)

processed = fe.preprocess_data(df)

list_ticker = processed["tic"].unique().tolist()
list_date = list(
    pd.date_range(processed['date'].min(),
                  processed['date'].max()).astype(str))
예제 #3
0
def generate_data(rollouts, data_dir, noise_type): # pylint: disable=R0914
    """ Generates data """
    assert exists(data_dir), "The data directory does not exist..."


    df = YahooDownloader(start_date = '2009-01-01',
                        end_date = '2021-01-01',
                       ticker_list = ['AAPL']).fetch_data()

    df.sort_values(['date','tic'],ignore_index=True)

    fe = FeatureEngineer(
                        use_technical_indicator=True,
                        tech_indicator_list = config.TECHNICAL_INDICATORS_LIST,
                        use_turbulence=True,
                        user_defined_feature = False)

    processed = fe.preprocess_data(df)

    
    list_ticker = processed["tic"].unique().tolist()
    list_date = list(pd.date_range(processed['date'].min(),processed['date'].max()).astype(str))
    combination = list(itertools.product(list_date,list_ticker))

    processed_full = pd.DataFrame(combination,columns=["date","tic"]).merge(processed,on=["date","tic"],how="left")
    processed_full = processed_full[processed_full['date'].isin(processed['date'])]
    processed_full = processed_full.sort_values(['date','tic'])

    processed_full = processed_full.fillna(0)


    processed_full.sort_values(['date','tic'],ignore_index=True)

    train = data_split(processed_full, '2009-01-01','2019-01-01')
    trade = data_split(processed_full, '2019-01-01','2021-01-01')
    stock_dimension = len(train.tic.unique())
    state_space = 1 + 2*stock_dimension + len(config.TECHNICAL_INDICATORS_LIST)*stock_dimension
    env_kwargs = {
                "hmax": 100, 
                    "initial_amount": 1000000, 
#                         "buy_cost_pct": 0.001i,
#                             "sell_cost_pct": 0.001,
                             "transaction_cost_pct": 0.001, 
                                "state_space": state_space, 
                                    "stock_dim": stock_dimension, 
                                        "tech_indicator_list": config.TECHNICAL_INDICATORS_LIST, 
                                            "action_space": stock_dimension, 
                                                "reward_scaling": 1e-4
                                                }

    e_train_gym = StockTradingEnv(df = train, **env_kwargs)
    env_train, _ = e_train_gym.get_sb_env()

    env = env_train

#     env = gym.make("CarRacing-v0")

    seq_len = 10000

    for i in range(rollouts):

        env.reset()

#         env.env.viewer.window.dispatch_events()
        if noise_type == 'white':
            a_rollout = [env.action_space.sample() for _ in range(seq_len)]
        elif noise_type == 'brown':
            a_rollout = sample_continuous_policy(env.action_space, seq_len, 1. / 50)

        s_rollout = []
        r_rollout = []
        d_rollout = []


        t = 0
        while True:
            action = a_rollout[t]
            t += 1

            s, r, done, _ = env.step(action)
#             env.env.viewer.window.dispatch_events()
            s_rollout += [s]
            r_rollout += [r]
            d_rollout += [done]
            if done:
                print("> End of rollout {}, {} frames...".format(i, len(s_rollout)))
                np.savez(join(data_dir, 'rollout_{}'.format(i)),
                         observations=np.array(s_rollout),
                         rewards=np.array(r_rollout),
                         actions=np.array(a_rollout),
                         terminals=np.array(d_rollout))
                break