Beispiel #1
0
def new_test():

    processed = pd.read_csv(
        os.path.abspath('./me/datasets/new_data_with_techs_turb.csv'),
        index_col=0)

    train = data_split(processed, '2009-01-01', '2018-01-01')
    trade = data_split(processed, '2018-01-01', '2021-01-01')

    stock_dimension = len(train.tic.unique())
    state_space = 1 + 2 * stock_dimension + len(
        config.TECHNICAL_INDICATORS_LIST) * stock_dimension
    print(f"Stock Dimension: {stock_dimension}, State Space: {state_space}")

    env_kwargs = {
        "hmax": 100,
        "initial_amount": 1000000,
        "transaction_cost_pct": 0.001,
        "state_space": state_space,
        "stock_dim": stock_dimension,
        "tech_indicator_list": config.TECHNICAL_INDICATORS_LIST,
        "action_space": stock_dimension,
        "reward_scaling": 1e-4
    }

    e_train_gym = StockTradingEnv(df=train, **env_kwargs)
    env_train, _ = e_train_gym.get_sb_env()

    log_dir = "me/tmp/"
    os.makedirs(log_dir, exist_ok=True)

    env_train.envs[0] = Monitor(env_train.envs[0], log_dir)

    agent = DRLAgent(env=env_train)
    model_a2c = agent.get_model("a2c", verbose=0)

    trained_a2c = agent.train_model(model=model_a2c,
                                    tb_log_name='a2c',
                                    total_timesteps=100000)

    data_turbulence = processed[(processed.date < '2018-01-01')
                                & (processed.date >= '2009-01-01')]
    insample_turbulence = data_turbulence.drop_duplicates(subset=['date'])
    turbulence_threshold = np.quantile(insample_turbulence.turbulence.values,
                                       1)

    e_trade_gym = StockTradingEnv(df=trade,
                                  turbulence_threshold=380,
                                  **env_kwargs)
    env_trade, obs_trade = e_trade_gym.get_sb_env()

    print("BEGIN PREDICTION")
    df_account_value, df_actions = DRLAgent.DRL_prediction(model=trained_a2c,
                                                           test_data=trade,
                                                           test_env=env_trade,
                                                           test_obs=obs_trade)

    print(df_account_value)

    print("END PREDICTION")
Beispiel #2
0
def train_one():
    env_kwargs, processed = prepare_data()

    # Training & Trading data split
    train = data_split(processed, config.START_DATE, config.START_TRADE_DATE)
    trade = data_split(processed, config.START_TRADE_DATE, config.END_DATE)

    e_train_gym = StockTradingEnv(df=train, **env_kwargs)

    e_trade_gym = StockTradingEnv(df=trade,
                                  turbulence_threshold=250,
                                  **env_kwargs)
    env_train, _ = e_train_gym.get_sb_env()

    agent = DRLAgent(env=env_train)

    print("==============Model Training===========")
    model = agent.get_model(config.CURRENT_MODEL)
    model = agent.train_model(model=model, total_timesteps=80000)

    path = f"{config.TRAINED_MODEL_DIR}/model"
    model.save(path)
    model = model.load(path)

    print("==============Start Trading===========")
    df_account_value, df_actions = DRLAgent.DRL_prediction(
        model=model, environment=e_trade_gym)
    log_account(df_account_value, df_actions)
Beispiel #3
0
    def train(self, origin_data):
        self.build_model()
        train = data_split(origin_data, config.START_DATE,
                           config.START_TRADE_DATE)
        trade = data_split(origin_data, config.START_TRADE_DATE,
                           config.END_DATE)

        y_train = self.cal_result(train)
        y_trade = self.cal_result(trade)

        x_train_tech = self.cal_feature(train, 5)
        x_trade_tech = self.cal_feature(trade, 5)

        x_train_idx = self.cal_index(train, 5)
        x_trade_idx = self.cal_index(trade, 5)

        print(x_train_tech.shape)

        self.model.fit([x_train_idx, x_train_tech],
                       y_train,
                       epochs=100,
                       batch_size=128)
        score = self.model.evaluate([x_trade_idx, x_trade_tech],
                                    y_trade,
                                    return_dict=True)
        print(score)
        self.model.save(self.model_path)
Beispiel #4
0
def main():
    start_date = '2020-01-01'
    trade_start_date = '2020-12-01'
    end_date = '2021-01-01'
    ticker_list = stock_tickers
    numerical_df = YahooDownloader(start_date=start_date,
                                   end_date=end_date,
                                   ticker_list=ticker_list).fetch_data()
    sentiment_df = generate_sentiment_scores(start_date, end_date)
    initial_data = get_initial_data(numerical_df, sentiment_df)
    train_data = data_split(initial_data, start_date, trade_start_date)
    trade_data = data_split(initial_data, trade_start_date, end_date)
    indicator_list = config.TECHNICAL_INDICATORS_LIST + ['sentiment']
    stock_dimension = len(trade_data.tic.unique())
    state_space = 1 + 2 * stock_dimension + len(
        indicator_list) * stock_dimension
    env_kwargs = {
        "hmax": 100,
        "initial_amount": 1000000,
        "buy_cost_pct": 0.001,
        "sell_cost_pct": 0.001,
        "state_space": state_space,
        "stock_dim": stock_dimension,
        "tech_indicator_list": indicator_list,
        "action_space": stock_dimension,
        "reward_scaling": 1e-4,
        "print_verbosity": 5
    }
    e_train_gym = StockTradingEnv(df=train_data, **env_kwargs)
    env_train, _ = e_train_gym.get_sb_env()
    # print(train_data.index)
    # print(trade_data.index)
    # print(trade_data.loc[0])
    e_trade_gym = OnlineStockTradingEnv(trade_data.loc[0], **env_kwargs)
    training_agent = DRLAgent(env=env_train)
    model_a2c = training_agent.get_model("a2c")
    # print(train_data.index)
    # print(trade_data.index)
    #trained_a2c = agent.train_model(model=model_a2c, tb_log_name='a2c',total_timesteps=10000)
    feature_engineer = FeatureEngineer()
    online_stock_pred = OnlineStockPrediction(e_trade_gym, model_a2c)

    for i in range(1, trade_data.index.unique().max()):
        print(trade_data.loc[i])
        online_stock_pred.add_data(trade_data.loc[i])
        action, states, next_obs, rewards = online_stock_pred.predict()
        print("Action:", action)
        print("States: ", states)
        print("Next observation: ", next_obs)
        print("Rewards: ", rewards)
Beispiel #5
0
    def DRL_prediction(self,model,name,last_state,iter_num,turbulence_threshold,initial):
        ### make a prediction based on trained model###

        ## trading env
        trade_data = data_split(self.df, start=self.unique_trade_date[iter_num - self.rebalance_window], end=self.unique_trade_date[iter_num])
        trade_env = DummyVecEnv([lambda: StockTradingEnv(trade_data,
                                                        self.stock_dim,
                                                        self.hmax,
                                                        self.initial_amount,
                                                        self.buy_cost_pct,
                                                        self.sell_cost_pct,
                                                        self.reward_scaling,
                                                        self.state_space,
                                                        self.action_space,
                                                        self.tech_indicator_list,
                                                        turbulence_threshold=turbulence_threshold,
                                                        initial=initial,
                                                        previous_state=last_state,
                                                        model_name=name,
                                                        mode='trade',
                                                        iteration=iter_num,
                                                        print_verbosity=self.print_verbosity)])

        trade_obs = trade_env.reset()

        for i in range(len(trade_data.index.unique())):
            action, _states = model.predict(trade_obs)
            trade_obs, rewards, dones, info = trade_env.step(action)
            if i == (len(trade_data.index.unique()) - 2):
                # print(env_test.render())
                last_state = trade_env.render()

        df_last_state = pd.DataFrame({'last_state': last_state})
        df_last_state.to_csv('results/last_state_{}_{}.csv'.format(name, i), index=False)
        return last_state
Beispiel #6
0
def train_one():
    """
    train an agent
    """
    print("==============Start Fetching Data===========")
    df = YahooDownloader(start_date=config.START_DATE,
                         end_date=config.END_DATE,
                         ticker_list=config.DOW_30_TICKER).fetch_data()
    print("==============Start Feature Engineering===========")
    df = FeatureEngineer(df,
                         feature_number=5,
                         use_technical_indicator=True,
                         use_turbulence=True).preprocess_data()

    train = data_split(df, config.START_DATE, config.START_TRADE_DATE)
    trade = data_split(df, config.START_TRADE_DATE, config.END_DATE)
    env_setup = EnvSetup(stock_dim=len(train.tic.unique()))
    env_train = env_setup.create_env_training(data=train,
                                              env_class=StockEnvTrain)
    agent = DRLAgent(env=env_train)
    print("==============Model Training===========")
    now = datetime.datetime.now().strftime('%Y%m%d-%Hh%M')
    a2c_params_tuning = {
        'n_steps': 5,
        'ent_coef': 0.005,
        'learning_rate': 0.0007,
        'verbose': 0,
        'timesteps': 100000
    }
    model_a2c = agent.train_A2C(model_name="A2C_{}".format(now),
                                model_params=a2c_params_tuning)

    print("==============Start Trading===========")
    env_trade, obs_trade = env_setup.create_env_trading(
        data=trade, env_class=StockEnvTrade, turbulence_threshold=250)

    df_account_value = DRLAgent.DRL_prediction(model=model_a2c,
                                               test_data=trade,
                                               test_env=env_trade,
                                               test_obs=obs_trade)
    df_account_value.to_csv("./" + config.RESULTS_DIR + "/" + now + '.csv')

    print("==============Get Backtest Results===========")
    perf_stats_all = BackTestStats(df_account_value)
    perf_stats_all = pd.DataFrame(perf_stats_all)
    perf_stats_all.to_csv("./" + config.RESULTS_DIR + "/perf_stats_all_" +
                          now + '.csv')
Beispiel #7
0
def load_stock_trading_data():
    from finrl.config import config

    cwd = './env/FinRL'
    raw_data_path = f'{cwd}/StockTradingEnv_raw_data.df'
    processed_data_path = f'{cwd}/StockTradingEnv_processed_data.df'

    os.makedirs(cwd, exist_ok=True)

    print("==============Start Fetching Data===========")
    if os.path.exists(raw_data_path):
        raw_df = pd.read_pickle(raw_data_path)  # DataFrame of Pandas
        print('| raw_df.columns.values:', raw_df.columns.values)
    else:
        from finrl.marketdata.yahoodownloader import YahooDownloader
        raw_df = YahooDownloader(
            start_date=config.START_DATE,
            end_date=config.END_DATE,
            ticker_list=config.DOW_30_TICKER,
        ).fetch_data()
        raw_df.to_pickle(raw_data_path)

    print("==============Start Feature Engineering===========")
    if os.path.exists(processed_data_path):
        processed_df = pd.read_pickle(processed_data_path)  # DataFrame of Pandas
        print('| processed_df.columns.values:', processed_df.columns.values)
    else:
        from finrl.preprocessing.preprocessors import FeatureEngineer
        fe = FeatureEngineer(
            use_technical_indicator=True,
            tech_indicator_list=config.TECHNICAL_INDICATORS_LIST,
            use_turbulence=True,
            user_defined_feature=False,
        )
        processed_df = fe.preprocess_data(raw_df)
        processed_df.to_pickle(processed_data_path)

    # Training & Trading data split
    from finrl.preprocessing.data import data_split
    train_df = data_split(processed_df, '2008-03-19', '2016-01-01')  # 1963/3223
    eval_df = data_split(processed_df, '2016-01-01', '2021-01-01')  # 1260/3223

    return train_df, eval_df
Beispiel #8
0
def predict():
    env_kwargs, processed = prepare_data()
    trade = data_split(processed, config.START_TRADE_DATE, config.END_DATE)

    e_trade_gym = StockTradingEnv(df=trade, **env_kwargs)

    path = config.TRAINED_MODEL_DIR + "/model"
    trained_sac = eval(config.CURRENT_MODEL.upper() + ".load(path)")
    df_account_value, df_actions = DRLAgent.DRL_prediction(
        model=trained_sac, environment=e_trade_gym)
    log_account(df_account_value, df_actions)
Beispiel #9
0
def test_process_data():
    start_date = '2020-11-01'
    end_date='2021-01-01'
    ticker_list=stock_tickers
    numerical_df = YahooDownloader(start_date=start_date,end_date=end_date,ticker_list=ticker_list).fetch_data()
    sentiment_df = generate_sentiment_scores(start_date,end_date)
    initial_data = get_initial_data(numerical_df,sentiment_df)
    trade_data = data_split(initial_data,start_date,'2020-12-01')
    numerical_feed_data = numerical_df[numerical_df.date > '2020-12-01']
    sentiment_feed_data = sentiment_df[sentiment_df.date > '2020-12-01']
    data_processor = DataProcessor(FeatureEngineer(),trade_data)
    for date in numerical_feed_data.date.unique():
        
        new_numerical = numerical_feed_data[numerical_feed_data.date==date]
        new_sentiment = sentiment_feed_data.loc[sentiment_feed_data.date==date]
        new_df=data_processor.process_data(new_numerical,new_sentiment)
        print(new_df)
Beispiel #10
0
def check_finrl():
    from FinRL import StockTradingEnv
    from numpy import random as rd
    from finrl.config import config
    from finrl.preprocessing.data import data_split
    import pandas as pd

    # df = pd.read_pickle('finrl_data.df')  # DataFrame of Pandas
    #
    # from finrl.preprocessing.preprocessors import FeatureEngineer
    # fe = FeatureEngineer(
    #     use_technical_indicator=True,
    #     tech_indicator_list=config.TECHNICAL_INDICATORS_LIST,
    #     use_turbulence=True,
    #     user_defined_feature=False,
    # )
    #
    # processed_df = fe.preprocess_data(df)
    # processed_df.to_pickle('finrl_processed_data.df')  # DataFrame of Pandas
    processed_data_path = 'StockTradingEnv_processed_data.df'
    processed_df = pd.read_pickle(processed_data_path)  # DataFrame of Pandas
    print(processed_df.columns.values)

    split_df = data_split(processed_df, start='2008-03-19', end='2021-01-01')
    # `start`

    env = StockTradingEnv(df=split_df,
                          tech_indicator_list=config.TECHNICAL_INDICATORS_LIST)
    action_dim = env.action_dim

    state = env.reset()
    print('state_dim', len(state))

    done = False
    step = 1
    from time import time
    timer = time()
    while not done:
        action = rd.rand(action_dim) * 2 - 1
        next_state, reward, done, _ = env.step(action)
        print(';', step, len(next_state), env.day, reward)
        step += 1

    print(';;', step, int(time() - timer))  # 44 seconds
Beispiel #11
0
def test():
    trade = data_split(origin_data, config.START_TRADE_DATE, config.END_DATE)
    action_list = model.test(trade)
    # print(action_list)

    hold_map = {}

    asset = 10000
    hold = 0
    price = 0
    for index, vec in enumerate(action_list):
        if index == 0 or index == len(action_list) - 1:
            continue
        vec = vec[0]
        # print("%.2f" % trade.loc[index]["close"], vec)
        last_vec = action_list[index - 1][0]
        price = trade.iloc[index + 1]["open"]
        date = trade.iloc[index + 1]["date"]
        tic = trade.iloc[index + 1]["tic"]
        hold = hold_map.get(tic, 0)
        # print(date, tic, vec, last_vec)
        if vec[0] > last_vec[0] + 0.1 and vec[0] > 0.3 and hold <= 0:
            print(f"buy {tic} at {date}, price = {price}")
            if hold == 0:
                hold_map[tic] = price
            else:
                hold_map.pop(tic)
            asset -= price * 100
        elif vec[2] > last_vec[2] + 0.1 and vec[2] > 0.3 and hold >= 0:
            print(f"sell {tic} at {date}, price = {price}")
            if hold == 0:
                hold_map[tic] = -price
            else:
                hold_map.pop(tic)
            asset += price * 100

    print(asset)
    print(hold_map)
Beispiel #12
0
import pandas as pd

df = plotdf=processed[processed['tic']=='JPM']
df.plot(x="date", y=["turbulence", "close"])
plt.show()

# In[10]:
processed['log_volume'] = np.log(processed.volume*processed.close)
processed['change'] = (processed.close-processed.open)/processed.close
processed['daily_variance'] = (processed.high-processed.low)/processed.close
processed.head()

# ## Training data split: 2009-01-01 to 2018-12-31
# ## Trade data split: 2019-01-01 to 2020-09-30
# In[11]:
train = data_split(processed, '2009-01-01','2019-01-01')
trade = data_split(processed, '2019-01-01','2021-01-01')
print(len(train))
print(len(trade))

# In[12]:
import numpy as np
import pandas as pd
from gym.utils import seeding
import gym
from gym import spaces
import matplotlib
from copy import deepcopy

matplotlib.use("Agg")
import matplotlib.pyplot as plt
Beispiel #13
0
def train_one():
    """
    train an agent
    """
    print("==============Start Fetching Data===========")
    df = YahooDownloader(start_date=config.START_DATE,
                         end_date=config.END_DATE,
                         ticker_list=config.SP_500_TICKER).fetch_data()
    print("==============Start Feature Engineering===========")
    df = FeatureEngineer(df, use_technical_indicator=True,
                         use_turbulence=True).preprocess_data()

    # Training & Trade data split
    train = data_split(df, config.START_DATE, config.START_TRADE_DATE)
    trade = data_split(df, config.START_TRADE_DATE, config.END_DATE)

    # data normalization
    #feaures_list = list(train.columns)
    #feaures_list.remove('date')
    #feaures_list.remove('tic')
    #feaures_list.remove('close')
    #print(feaures_list)
    #data_normaliser = preprocessing.StandardScaler()
    #train[feaures_list] = data_normaliser.fit_transform(train[feaures_list])
    #trade[feaures_list] = data_normaliser.fit_transform(trade[feaures_list])

    # calculate state action space
    stock_dimension = len(train.tic.unique())
    state_space = 1 + 2 * stock_dimension + len(
        config.TECHNICAL_INDICATORS_LIST) * stock_dimension

    env_setup = EnvSetup(stock_dim=stock_dimension,
                         state_space=state_space,
                         hmax=100,
                         initial_amount=3000,
                         transaction_cost_pct=0.001)

    env_train = env_setup.create_env_training(data=train,
                                              env_class=StockEnvTrain)
    agent = DRLAgent(env=env_train)

    print("==============Model Training===========")
    now = datetime.datetime.now().strftime('%Y%m%d-%Hh%M')

    a2c_params_tuning = {
        'n_steps': 5,
        'ent_coef': 0.005,
        'learning_rate': 0.0007,
        'verbose': 0,
        'timesteps': 80000
    }

    model = agent.train_A2C(model_name="A2C_{}".format(now),
                            model_params=a2c_params_tuning)

    print("==============Start Trading===========")
    env_trade, obs_trade = env_setup.create_env_trading(
        data=trade, env_class=StockEnvTrade, turbulence_threshold=250)

    df_account_value, df_actions = DRLAgent.DRL_prediction(model=model,
                                                           test_data=trade,
                                                           test_env=env_trade,
                                                           test_obs=obs_trade)
    df_account_value.to_csv("./" + config.RESULTS_DIR + "/df_account_value_" +
                            now + '.csv')
    df_actions.to_csv("./" + config.RESULTS_DIR + "/df_actions_" + now +
                      '.csv')

    print("==============Get Backtest Results===========")
    perf_stats_all = BackTestStats(df_account_value)
    perf_stats_all = pd.DataFrame(perf_stats_all)
    perf_stats_all.to_csv("./" + config.RESULTS_DIR + "/perf_stats_all_" +
                          now + '.csv')
# 
# Our trading environments, based on OpenAI Gym framework, simulate live stock markets with real market data according to the principle of time-driven simulation.
# 
# The action space describes the allowed actions that the agent interacts with the environment. Normally, action a includes three actions: {-1, 0, 1}, where -1, 0, 1 represent selling, holding, and buying one share. Also, an action can be carried upon multiple shares. We use an action space {-k,…,-1, 0, 1, …, k}, where k denotes the number of shares to buy and -k denotes the number of shares to sell. For example, "Buy 10 shares of AAPL" or "Sell 10 shares of AAPL" are 10 or -10, respectively. The continuous action space needs to be normalized to [-1, 1], since the policy is defined on a Gaussian distribution, which needs to be normalized and symmetric.

# <a id='4.1'></a>
# ## 5.1 Training & Trade data split
# * Training: 2009-01-01 to 2018-12-31
# * Trade: 2019-01-01 to 2020-09-30

# In[16]:


#train = data_split(data_df, start = config.START_DATE, end = config.START_TRADE_DATE)
#trade = data_split(data_df, start = config.START_TRADE_DATE, end = config.END_DATE)
train = data_split(data_df, start = '2009-01-01', end = '2019-01-01')
trade = data_split(data_df, start = '2019-01-01', end = '2021-01-01')


# In[17]:


## data normalization, this part is optional, have little impact
#feaures_list = list(train.columns)
#feaures_list.remove('date')
#feaures_list.remove('tic')
#feaures_list.remove('close')
#print(feaures_list)
#from sklearn import preprocessing
#data_normaliser = preprocessing.StandardScaler()
#train[feaures_list] = data_normaliser.fit_transform(train[feaures_list])
Beispiel #15
0
def train_one(fetch=False):
    """
    train an agent
    """
    if fetch:
        df = fetch_and_store()
    else:
        df = load()

    counts = df[['date', 'tic']].groupby(['date']).count().tic
    assert counts.min() == counts.max()

    print("==============Start Feature Engineering===========")
    fe = FeatureEngineer(
        use_technical_indicator=True,
        tech_indicator_list=config.TECHNICAL_INDICATORS_LIST,
        use_turbulence=True,
        # use_turbulence=False,
        user_defined_feature=False,
    )

    processed = fe.preprocess_data(df)

    # Training & Trading data split
    start_date, trade_date, end_date = calculate_split(df,
                                                       start=config.START_DATE)
    print(start_date, trade_date, end_date)
    train = data_split(processed, start_date, trade_date)
    trade = data_split(processed, trade_date, end_date)

    print(
        f'\n******\nRunning from {start_date} to {end_date} for:\n{", ".join(config.CRYPTO_TICKER)}\n******\n'
    )

    # calculate state action space
    stock_dimension = len(train.tic.unique())
    state_space = (1 + (2 * stock_dimension) +
                   (len(config.TECHNICAL_INDICATORS_LIST) * stock_dimension))

    env_kwargs = {
        "hmax": 100,
        "initial_amount": 100000,
        "buy_cost_pct": 0.0026,
        "sell_cost_pct": 0.0026,
        "state_space": state_space,
        "stock_dim": stock_dimension,
        "tech_indicator_list": config.TECHNICAL_INDICATORS_LIST,
        "action_space": stock_dimension,
        "reward_scaling": 1e-4
    }

    e_train_gym = StockTradingEnv(df=train, **env_kwargs)

    e_trade_gym = StockTradingEnv(df=trade,
                                  turbulence_threshold=250,
                                  make_plots=True,
                                  **env_kwargs)

    env_train, _ = e_train_gym.get_sb_env()
    env_trade, obs_trade = e_trade_gym.get_sb_env()

    agent = DRLAgent(env=env_train)

    print("==============Model Training===========")
    now = datetime.datetime.now().strftime(config.DATETIME_FMT)

    model_sac = agent.get_model("sac")
    trained_sac = agent.train_model(
        model=model_sac,
        tb_log_name="sac",
        # total_timesteps=100
        total_timesteps=80000)

    print("==============Start Trading===========")
    df_account_value, df_actions = DRLAgent.DRL_prediction(
        # model=trained_sac, test_data=trade, test_env=env_trade, test_obs=obs_trade
        trained_sac,
        e_trade_gym)
    df_account_value.to_csv(
        f"./{config.RESULTS_DIR}/df_account_value_{now}.csv")
    df_actions.to_csv(f"./{config.RESULTS_DIR}/df_actions_{now}.csv")

    df_txns = pd.DataFrame(e_trade_gym.transactions,
                           columns=['date', 'amount', 'price', 'symbol'])
    df_txns = df_txns.set_index(pd.DatetimeIndex(df_txns['date'], tz=pytz.utc))
    df_txns.to_csv(f'./{config.RESULTS_DIR}/df_txns_{now}.csv')

    df_positions = pd.DataFrame(e_trade_gym.positions,
                                columns=['date', 'cash'] +
                                config.CRYPTO_TICKER)
    df_positions = df_positions.set_index(
        pd.DatetimeIndex(df_positions['date'],
                         tz=pytz.utc)).drop(columns=['date'])
    df_positions['cash'] = df_positions.astype(
        {col: np.float64
         for col in df_positions.columns})
    df_positions.to_csv(f'./{config.RESULTS_DIR}/df_positions_{now}.csv')

    print("==============Get Backtest Results===========")
    perf_stats_all = backtest_stats(df_account_value,
                                    transactions=df_txns,
                                    positions=df_positions)
    perf_stats_all = pd.DataFrame(perf_stats_all)
    perf_stats_all.to_csv(f"./{config.RESULTS_DIR}/perf_stats_all_{now}.csv")

    backtest_plot(df_account_value,
                  baseline_start=trade_date,
                  baseline_end=end_date,
                  positions=df_positions,
                  transactions=df_txns)
def generate_data(rollouts, data_dir, noise_type): # pylint: disable=R0914
    """ Generates data """
    assert exists(data_dir), "The data directory does not exist..."


    df = YahooDownloader(start_date = '2009-01-01',
                        end_date = '2021-01-01',
                       ticker_list = ['AAPL']).fetch_data()

    df.sort_values(['date','tic'],ignore_index=True)

    fe = FeatureEngineer(
                        use_technical_indicator=True,
                        tech_indicator_list = config.TECHNICAL_INDICATORS_LIST,
                        use_turbulence=True,
                        user_defined_feature = False)

    processed = fe.preprocess_data(df)

    
    list_ticker = processed["tic"].unique().tolist()
    list_date = list(pd.date_range(processed['date'].min(),processed['date'].max()).astype(str))
    combination = list(itertools.product(list_date,list_ticker))

    processed_full = pd.DataFrame(combination,columns=["date","tic"]).merge(processed,on=["date","tic"],how="left")
    processed_full = processed_full[processed_full['date'].isin(processed['date'])]
    processed_full = processed_full.sort_values(['date','tic'])

    processed_full = processed_full.fillna(0)


    processed_full.sort_values(['date','tic'],ignore_index=True)

    train = data_split(processed_full, '2009-01-01','2019-01-01')
    trade = data_split(processed_full, '2019-01-01','2021-01-01')
    stock_dimension = len(train.tic.unique())
    state_space = 1 + 2*stock_dimension + len(config.TECHNICAL_INDICATORS_LIST)*stock_dimension
    env_kwargs = {
                "hmax": 100, 
                    "initial_amount": 1000000, 
#                         "buy_cost_pct": 0.001i,
#                             "sell_cost_pct": 0.001,
                             "transaction_cost_pct": 0.001, 
                                "state_space": state_space, 
                                    "stock_dim": stock_dimension, 
                                        "tech_indicator_list": config.TECHNICAL_INDICATORS_LIST, 
                                            "action_space": stock_dimension, 
                                                "reward_scaling": 1e-4
                                                }

    e_train_gym = StockTradingEnv(df = train, **env_kwargs)
    env_train, _ = e_train_gym.get_sb_env()

    env = env_train

#     env = gym.make("CarRacing-v0")

    seq_len = 10000

    for i in range(rollouts):

        env.reset()

#         env.env.viewer.window.dispatch_events()
        if noise_type == 'white':
            a_rollout = [env.action_space.sample() for _ in range(seq_len)]
        elif noise_type == 'brown':
            a_rollout = sample_continuous_policy(env.action_space, seq_len, 1. / 50)

        s_rollout = []
        r_rollout = []
        d_rollout = []


        t = 0
        while True:
            action = a_rollout[t]
            t += 1

            s, r, done, _ = env.step(action)
#             env.env.viewer.window.dispatch_events()
            s_rollout += [s]
            r_rollout += [r]
            d_rollout += [done]
            if done:
                print("> End of rollout {}, {} frames...".format(i, len(s_rollout)))
                np.savez(join(data_dir, 'rollout_{}'.format(i)),
                         observations=np.array(s_rollout),
                         rewards=np.array(r_rollout),
                         actions=np.array(a_rollout),
                         terminals=np.array(d_rollout))
                break
Beispiel #17
0
def train_one():
    """
    train an agent
    """
    print("==============Start Fetching Data===========")
    df = YahooDownloader(
        start_date=config.START_DATE,
        end_date=config.END_DATE,
        ticker_list=config.DOW_30_TICKER,
    ).fetch_data()
    print("==============Start Feature Engineering===========")
    fe = FeatureEngineer(
        use_technical_indicator=True,
        tech_indicator_list=config.TECHNICAL_INDICATORS_LIST,
        use_turbulence=True,
        user_defined_feature=False,
    )

    processed = fe.preprocess_data(df)

    list_ticker = processed["tic"].unique().tolist()
    list_date = list(
        pd.date_range(processed['date'].min(),
                      processed['date'].max()).astype(str))
    combination = list(itertools.product(list_date, list_ticker))

    processed_full = pd.DataFrame(combination,
                                  columns=["date",
                                           "tic"]).merge(processed,
                                                         on=["date", "tic"],
                                                         how="left")
    processed_full = processed_full[processed_full['date'].isin(
        processed['date'])]
    processed_full = processed_full.sort_values(['date', 'tic'])

    processed_full = processed_full.fillna(0)

    # Training & Trading data split
    train = data_split(processed_full, config.START_DATE,
                       config.START_TRADE_DATE)
    trade = data_split(processed_full, config.START_TRADE_DATE,
                       config.END_DATE)

    # calculate state action space
    stock_dimension = len(train.tic.unique())
    state_space = (1 + 2 * stock_dimension +
                   len(config.TECHNICAL_INDICATORS_LIST) * stock_dimension)

    env_kwargs = {
        "hmax": 100,
        "initial_amount": 1000000,
        "buy_cost_pct": 0.001,
        "sell_cost_pct": 0.001,
        "state_space": state_space,
        "stock_dim": stock_dimension,
        "tech_indicator_list": config.TECHNICAL_INDICATORS_LIST,
        "action_space": stock_dimension,
        "reward_scaling": 1e-4
    }

    e_train_gym = StockTradingEnv(df=train, **env_kwargs)
    env_train, _ = e_train_gym.get_sb_env()

    agent = DRLAgent(env=env_train)

    print("==============Model Training===========")
    now = datetime.datetime.now().strftime("%Y%m%d-%Hh%M")

    model_sac = agent.get_model("sac")
    trained_sac = agent.train_model(model=model_sac,
                                    tb_log_name="sac",
                                    total_timesteps=80000)

    print("==============Start Trading===========")
    e_trade_gym = StockTradingEnv(df=trade,
                                  turbulence_threshold=250,
                                  **env_kwargs)

    df_account_value, df_actions = DRLAgent.DRL_prediction(
        model=trained_sac, environment=e_trade_gym)
    df_account_value.to_csv("./" + config.RESULTS_DIR + "/df_account_value_" +
                            now + ".csv")
    df_actions.to_csv("./" + config.RESULTS_DIR + "/df_actions_" + now +
                      ".csv")

    print("==============Get Backtest Results===========")
    perf_stats_all = backtest_stats(df_account_value)
    perf_stats_all = pd.DataFrame(perf_stats_all)
    perf_stats_all.to_csv("./" + config.RESULTS_DIR + "/perf_stats_all_" +
                          now + ".csv")
Beispiel #18
0
def load_and_save():
    stocks_tradable = configs["stocks_tradable"]

    dataset = dict()
    for root, dirs, files in os.walk("./data/sentiments", topdown=False):
        for name in files:
            if name.split("_")[0] in stocks_tradable:
            dataset[name.split("_")[0]] = pd.read_csv(os.path.join(root, name), index_col=0).reset_index(drop=True)
            dataset[name.split("_")[0]]["date"] = pd.to_datetime(dataset[name.split("_")[0]]["date"], format="%Y-%m-%d")

    df = get_stock_data(
    i    configs["train"]["start_date"], configs["test"]["end_date"], configs["stocks_tradable"]

    )

    df = add_sentiments(configs["sentiments"]["days"], dataset, df)

    train = data_split(df, cofings["train"]["start_date"], configs["train"]["end_date"])
    validation = data_split(df, cofings["validation"]["start_date"], configs["validation"]["end_date"])
    train_for_test = data_split(df, configs["train"]["start_date"], configs["validation"]["end_date"])
    test = data_split(df, configs["test"]["start_date"], configs["test"]["end_date"])

    testing_days = pd.Series(test.date.unique())

    train.to_csv("./data/train_data.csv", index=False)
    validation.to_csv("./data/validation_data.csv", index=False)
    train_for_test.to_csv("./data/train_for_test.csv", index=False)
    test.to_csv("./data/test_data.csv", index=False)
    testing_days.to_csv("./data/testing_days.csv", index=False)

    print("train, validation, train_for_test, test files saved")
    
def train():
    train = pd.read_csv("./data/train_data.csv")
    validation = pd.read_csv("./data/validation_data.csv")
    features = [
        ["open", "high", "low", "close", "volume"],
        ["open", "high", "low", "close", "volume"] + ["sentiment_mean", "sentiment_std"],
        config.TECHNICAL_INDICATORS,
        config.TECHNICAL_INDICATORS + ["sentiment_mean", "sentiment_std"]
    ]

    model_names = [
        "OHLCV",
        "OHLCV_sentiments",
        "MACD",
        "MACD_sentiments"
    ]

    batch_sizes = [32, 64, 128]
    learning_rates = [0.0001, 0.001, 0.005, 0.01]

    repetition = 3

    for model_name, feature_set in zip(features, model_names):
        for rep in range(repetition):
            perf_results = dict()
            for batch_size in batch_sizes:
                for lr in learning_rates:
                    
                    ctime = time.time()
                    perf_stats_all, _ = train_configuration(
                        f"{model_name}_{rep}",
                        train,
                        validation,
                        feature_set,
                        batch_sizes,
                        learning_rates,
                        42
                    )


                    perf_results[f"result_{batch_size}_{lr}"] = perf_stats_all.to_json()

            open(f"{model_name}_{rep}.json","w").write(json.dumps(perf_results)) 
            print(f"Results saved to {save_fname}.json") 

            print(f"Time taken {(time.time() - ctime)/60}")
    

def test():
    daily_risk_free_rates = fill_missing_daily_rf_rates(
        date_to_daily_risk_free_rate, [datetime.datetime.strptime(date, "%Y-%m-%d") for date in dates["0"]]
    )
    
    train = pd.read_csv("./data/train_for_test.csv")
    test = pd.read_csv("./data/test_data.csv")


if __name__ == "__main__":
    
Beispiel #19
0
def train_one():
    """
    train an agent
    """
    print("==============Start Fetching Data===========")
    df = YahooDownloader(
        start_date=config.START_DATE,
        end_date=config.END_DATE,
        ticker_list=['FXAIX'],
    ).fetch_data()
    print("==============Start Feature Engineering===========")
    fe = FeatureEngineer(
        use_technical_indicator=True,
        tech_indicator_list=config.TECHNICAL_INDICATORS_LIST,
        use_turbulence=True,
        user_defined_feature=False,
    )

    processed = fe.preprocess_data(df)

    # Training & Trading data split
    train = data_split(processed, config.START_DATE, config.START_TRADE_DATE)
    trade = data_split(processed, config.START_TRADE_DATE, config.END_DATE)

    # calculate state action space
    stock_dimension = len(train.tic.unique())
    state_space = (1 + 2 * stock_dimension +
                   len(config.TECHNICAL_INDICATORS_LIST) * stock_dimension)
    env_kwargs = {
        "hmax": 100,
        "initial_amount": 1000000,
        "buy_cost_pct": 0.001,
        "sell_cost_pct": 0.001,
        "state_space": state_space,
        "stock_dim": stock_dimension,
        "tech_indicator_list": config.TECHNICAL_INDICATORS_LIST,
        "action_space": stock_dimension,
        "reward_scaling": 1e-4
    }
    e_train_gym = StockTradingEnv(df=train, **env_kwargs)
    e_trade_gym = StockTradingEnv(df=trade,
                                  turbulence_threshold=250,
                                  **env_kwargs)
    env_train, _ = e_train_gym.get_sb_env()
    env_trade, obs_trade = e_trade_gym.get_sb_env()

    agent = DRLAgent(env=env_train)

    print("==============Model Training===========")
    now = datetime.datetime.now().strftime("%Y%m%d-%Hh%M")
    user_input = input('train model? 1 train 0 don\'t train')
    if user_input == 1:
        model_sac = agent.get_model("sac")
        trained_sac = agent.train_model(model=model_sac,
                                        tb_log_name="sac",
                                        total_timesteps=8000)
        trained_sac.save("../models/sac_8k" + df.tic[0] + "_frl")
    else:
        trained_sac = SAC.load('../models/sac_80k_msft_working')
    print("==============Start Trading===========")
    df_account_value, df_actions = DRLAgent.DRL_prediction(
        trained_sac, e_trade_gym)
    df_account_value.to_csv("../" + config.RESULTS_DIR +
                            "/SAC_df_account_value_" + df.tic[0] + "_" + now +
                            ".csv")
    df_actions.to_csv("../" + config.RESULTS_DIR + "/SAC_df_actions_" +
                      df.tic[0] + "_" + now + ".csv")

    # print("==============Get Backtest Results===========")
    perf_stats_all = backtest_stats(df_account_value)
    perf_stats_all = pd.DataFrame(perf_stats_all)
    perf_stats_all.to_csv("../" + config.RESULTS_DIR + "/SAC_perf_stats_all_" +
                          df.tic[0] + "_" + now + ".csv")

    #plot acc value
    actions = df_actions['actions']
    x = np.arange(0, df_account_value['account_value'].shape[0])
    y = df_account_value['account_value']

    points = np.array([x, y]).T.reshape(-1, 1, 2)
    segments = np.concatenate([points[:-1], points[1:]], axis=1)

    fig, axs = plt.subplots(2, 1, sharex=True, sharey=False)

    # plt.plot(x, y)

    # Use a boundary norm instead
    cmap = ListedColormap(['r', 'g', 'b'])
    norm = BoundaryNorm([-100, -0.1, 0.1, 100], cmap.N)
    lc = LineCollection(segments, cmap=cmap, norm=norm)
    lc.set_array(actions)
    lc.set_linewidth(2)
    line = axs[0].add_collection(lc)
    # fig.colorbar(line, ax=axs)

    axs[1].set_xlabel('Trading Day (' + 'From ' + config.START_TRADE_DATE +
                      " to " + config.END_DATE + ')')
    axs[0].set_ylabel('Account Value (10000 of USD)')
    axs[0].set_title("Trading Test on " + df.tic[0])

    axs[0].set_xlim(x.min(), x.max())
    axs[0].set_ylim(y.min(), y.max())

    custom_lines = [
        Line2D([0], [0], color=cmap(0.), lw=4),
        Line2D([0], [0], color=cmap(.5), lw=4),
        Line2D([0], [0], color=cmap(1.), lw=4)
    ]

    # lines = ax.plot(data)
    axs[0].legend(custom_lines, ['Sell', 'Hold', 'Buy'])

    #plot stock value
    tx = np.arange(0, df_account_value['account_value'].shape[0])
    ty = trade['close']
    plt.ylabel('Price (USD)')
    plt.title(df.tic[0] + " Closing Price")
    plt.plot(tx, ty)

    plt.savefig("../" + config.RESULTS_DIR + "/plots/"
                "SAC_plot_" + df.tic[0] + "_" + now + ".png")
Beispiel #20
0
    def run_ensemble_strategy(self,A2C_model_kwargs,PPO_model_kwargs,DDPG_model_kwargs,timesteps_dict):
        """Ensemble Strategy that combines PPO, A2C and DDPG"""
        print("============Start Ensemble Strategy============")
        # for ensemble model, it's necessary to feed the last state
        # of the previous model to the current model as the initial state
        last_state_ensemble = []

        ppo_sharpe_list = []
        ddpg_sharpe_list = []
        a2c_sharpe_list = []

        model_use = []
        validation_start_date_list = []
        validation_end_date_list = []
        iteration_list = []

        insample_turbulence = self.df[(self.df.date<self.train_period[1]) & (self.df.date>=self.train_period[0])]
        insample_turbulence_threshold = np.quantile(insample_turbulence.turbulence.values, .90)

        start = time.time()
        for i in range(self.rebalance_window + self.validation_window, len(self.unique_trade_date), self.rebalance_window):
            validation_start_date = self.unique_trade_date[i - self.rebalance_window - self.validation_window]
            validation_end_date = self.unique_trade_date[i - self.rebalance_window]

            validation_start_date_list.append(validation_start_date)
            validation_end_date_list.append(validation_end_date)
            iteration_list.append(i)

            print("============================================")
            ## initial state is empty
            if i - self.rebalance_window - self.validation_window == 0:
                # inital state
                initial = True
            else:
                # previous state
                initial = False

            # Tuning trubulence index based on historical data
            # Turbulence lookback window is one quarter (63 days)
            end_date_index = self.df.index[self.df["date"] == self.unique_trade_date[i - self.rebalance_window - self.validation_window]].to_list()[-1]
            start_date_index = end_date_index - 63 + 1

            historical_turbulence = self.df.iloc[start_date_index:(end_date_index + 1), :]

            historical_turbulence = historical_turbulence.drop_duplicates(subset=['date'])

            historical_turbulence_mean = np.mean(historical_turbulence.turbulence.values)

            #print(historical_turbulence_mean)

            if historical_turbulence_mean > insample_turbulence_threshold:
                # if the mean of the historical data is greater than the 90% quantile of insample turbulence data
                # then we assume that the current market is volatile,
                # therefore we set the 90% quantile of insample turbulence data as the turbulence threshold
                # meaning the current turbulence can't exceed the 90% quantile of insample turbulence data
                turbulence_threshold = insample_turbulence_threshold
            else:
                # if the mean of the historical data is less than the 90% quantile of insample turbulence data
                # then we tune up the turbulence_threshold, meaning we lower the risk
                turbulence_threshold = np.quantile(insample_turbulence.turbulence.values, 1)
                
            turbulence_threshold = np.quantile(insample_turbulence.turbulence.values, 0.99) 
            print("turbulence_threshold: ", turbulence_threshold)

            ############## Environment Setup starts ##############
            ## training env
            train = data_split(self.df, start=self.train_period[0], end=self.unique_trade_date[i - self.rebalance_window - self.validation_window])
            self.train_env = DummyVecEnv([lambda: StockTradingEnv(train,
                                                                self.stock_dim,
                                                                self.hmax,
                                                                self.initial_amount,
                                                                self.buy_cost_pct,
                                                                self.sell_cost_pct,
                                                                self.reward_scaling,
                                                                self.state_space,
                                                                self.action_space,
                                                                self.tech_indicator_list,
                                                                print_verbosity=self.print_verbosity)])

            validation = data_split(self.df, start=self.unique_trade_date[i - self.rebalance_window - self.validation_window],
                                    end=self.unique_trade_date[i - self.rebalance_window])
            ############## Environment Setup ends ##############

            ############## Training and Validation starts ##############
            print("======Model training from: ", self.train_period[0], "to ",
                  self.unique_trade_date[i - self.rebalance_window - self.validation_window])
            # print("training: ",len(data_split(df, start=20090000, end=test.datadate.unique()[i-rebalance_window]) ))
            # print("==============Model Training===========")
            print("======A2C Training========")
            model_a2c = self.get_model("a2c",self.train_env,policy="MlpPolicy",model_kwargs=A2C_model_kwargs)
            model_a2c = self.train_model(model_a2c, "a2c", tb_log_name="a2c_{}".format(i), iter_num = i, total_timesteps=timesteps_dict['a2c']) #100_000

            print("======A2C Validation from: ", validation_start_date, "to ",validation_end_date)
            val_env_a2c = DummyVecEnv([lambda: StockTradingEnv(validation,
                                                                self.stock_dim,
                                                                self.hmax,
                                                                self.initial_amount,
                                                                self.buy_cost_pct,
                                                                self.sell_cost_pct,
                                                                self.reward_scaling,
                                                                self.state_space,
                                                                self.action_space,
                                                                self.tech_indicator_list,
                                                                turbulence_threshold=turbulence_threshold,
                                                                iteration=i,
                                                                model_name='A2C',
                                                                mode='validation',
                                                                print_verbosity=self.print_verbosity)])
            val_obs_a2c = val_env_a2c.reset()
            self.DRL_validation(model=model_a2c,test_data=validation,test_env=val_env_a2c,test_obs=val_obs_a2c)
            sharpe_a2c = self.get_validation_sharpe(i,model_name="A2C")
            print("A2C Sharpe Ratio: ", sharpe_a2c)

            print("======PPO Training========")
            model_ppo = self.get_model("ppo",self.train_env,policy="MlpPolicy",model_kwargs=PPO_model_kwargs)
            model_ppo = self.train_model(model_ppo, "ppo", tb_log_name="ppo_{}".format(i), iter_num = i, total_timesteps=timesteps_dict['ppo']) #100_000
            print("======PPO Validation from: ", validation_start_date, "to ",validation_end_date)
            val_env_ppo = DummyVecEnv([lambda: StockTradingEnv(validation,
                                                                self.stock_dim,
                                                                self.hmax,
                                                                self.initial_amount,
                                                                self.buy_cost_pct,
                                                                self.sell_cost_pct,
                                                                self.reward_scaling,
                                                                self.state_space,
                                                                self.action_space,
                                                                self.tech_indicator_list,
                                                                turbulence_threshold=turbulence_threshold,
                                                                iteration=i,
                                                                model_name='PPO',
                                                                mode='validation',
                                                                print_verbosity=self.print_verbosity)])
            val_obs_ppo = val_env_ppo.reset()
            self.DRL_validation(model=model_ppo,test_data=validation,test_env=val_env_ppo,test_obs=val_obs_ppo)
            sharpe_ppo = self.get_validation_sharpe(i,model_name="PPO")
            print("PPO Sharpe Ratio: ", sharpe_ppo)

            print("======DDPG Training========")
            model_ddpg = self.get_model("ddpg",self.train_env,policy="MlpPolicy",model_kwargs=DDPG_model_kwargs)
            model_ddpg = self.train_model(model_ddpg, "ddpg", tb_log_name="ddpg_{}".format(i), iter_num = i, total_timesteps=timesteps_dict['ddpg'])  #50_000
            print("======DDPG Validation from: ", validation_start_date, "to ",validation_end_date)
            val_env_ddpg = DummyVecEnv([lambda: StockTradingEnv(validation,
                                                                self.stock_dim,
                                                                self.hmax,
                                                                self.initial_amount,
                                                                self.buy_cost_pct,
                                                                self.sell_cost_pct,
                                                                self.reward_scaling,
                                                                self.state_space,
                                                                self.action_space,
                                                                self.tech_indicator_list,
                                                                turbulence_threshold=turbulence_threshold,
                                                                iteration=i,
                                                                model_name='DDPG',
                                                                mode='validation',
                                                                print_verbosity=self.print_verbosity)])
            val_obs_ddpg = val_env_ddpg.reset()
            self.DRL_validation(model=model_ddpg,test_data=validation,test_env=val_env_ddpg,test_obs=val_obs_ddpg)
            sharpe_ddpg = self.get_validation_sharpe(i,model_name="DDPG")

            ppo_sharpe_list.append(sharpe_ppo)
            a2c_sharpe_list.append(sharpe_a2c)
            ddpg_sharpe_list.append(sharpe_ddpg)

            print("======Best Model Retraining from: ", self.train_period[0], "to ",
                  self.unique_trade_date[i - self.rebalance_window])
            # Environment setup for model retraining up to first trade date
            #train_full = data_split(self.df, start=self.train_period[0], end=self.unique_trade_date[i - self.rebalance_window])
            #self.train_full_env = DummyVecEnv([lambda: StockTradingEnv(train_full,
            #                                                    self.stock_dim,
            #                                                    self.hmax,
            #                                                    self.initial_amount,
            #                                                    self.buy_cost_pct,
            #                                                    self.sell_cost_pct,
            #                                                    self.reward_scaling,
            #                                                    self.state_space,
            #                                                    self.action_space,
            #                                                    self.tech_indicator_list,
            #                                                    print_verbosity=self.print_verbosity)])
            # Model Selection based on sharpe ratio
            if (sharpe_ppo >= sharpe_a2c) & (sharpe_ppo >= sharpe_ddpg):
                model_use.append('PPO')
                model_ensemble = model_ppo

                #model_ensemble = self.get_model("ppo",self.train_full_env,policy="MlpPolicy",model_kwargs=PPO_model_kwargs)
                #model_ensemble = self.train_model(model_ensemble, "ensemble", tb_log_name="ensemble_{}".format(i), iter_num = i, total_timesteps=timesteps_dict['ppo']) #100_000
            elif (sharpe_a2c > sharpe_ppo) & (sharpe_a2c > sharpe_ddpg):
                model_use.append('A2C')
                model_ensemble = model_a2c

                #model_ensemble = self.get_model("a2c",self.train_full_env,policy="MlpPolicy",model_kwargs=A2C_model_kwargs)
                #model_ensemble = self.train_model(model_ensemble, "ensemble", tb_log_name="ensemble_{}".format(i), iter_num = i, total_timesteps=timesteps_dict['a2c']) #100_000
            else:
                model_use.append('DDPG')
                model_ensemble = model_ddpg

                #model_ensemble = self.get_model("ddpg",self.train_full_env,policy="MlpPolicy",model_kwargs=DDPG_model_kwargs)
                #model_ensemble = self.train_model(model_ensemble, "ensemble", tb_log_name="ensemble_{}".format(i), iter_num = i, total_timesteps=timesteps_dict['ddpg']) #50_000

            ############## Training and Validation ends ##############

            ############## Trading starts ##############
            print("======Trading from: ", self.unique_trade_date[i - self.rebalance_window], "to ", self.unique_trade_date[i])
            #print("Used Model: ", model_ensemble)
            last_state_ensemble = self.DRL_prediction(model=model_ensemble, name="ensemble",
                                                     last_state=last_state_ensemble, iter_num=i,
                                                     turbulence_threshold = turbulence_threshold,
                                                     initial=initial)
            ############## Trading ends ##############

        end = time.time()
        print("Ensemble Strategy took: ", (end - start) / 60, " minutes")

        df_summary = pd.DataFrame([iteration_list,validation_start_date_list,validation_end_date_list,model_use,a2c_sharpe_list,ppo_sharpe_list,ddpg_sharpe_list]).T
        df_summary.columns = ['Iter','Val Start','Val End','Model Used','A2C Sharpe','PPO Sharpe','DDPG Sharpe']

        return df_summary
Beispiel #21
0
processed = fe.preprocess_data(df)

list_ticker = processed["tic"].unique().tolist()
list_date = list(pd.date_range(processed['date'].min(),processed['date'].max()).astype(str))
combination = list(itertools.product(list_date,list_ticker))
processed["date"] = processed["date"].astype(str)
processed_full = pd.DataFrame(combination,columns=["date","tic"]).merge(processed,on=["date","tic"],how="left")
processed_full = processed_full[processed_full['date'].isin(processed['date'])]
processed_full = processed_full.sort_values(['date','tic'])

processed_full = processed_full.fillna(0)

processed_full.sort_values(['date','tic'],ignore_index=True).tail()

train = data_split(processed_full, '2018-05-16','2020-05-16')
trade = data_split(processed_full, '2020-05-17','2020-10-31')
print(len(train))
print(len(trade))

train.head()

trade.head()

config["TECHNICAL_INDICATORS_LIST"]

stock_dimension = len(train.tic.unique())
state_space = 1 + 2*stock_dimension + len(config["TECHNICAL_INDICATORS_LIST"])*stock_dimension
print(f"Stock Dimension: {stock_dimension}, State Space: {state_space}")

env_kwargs = {
Beispiel #22
0
def train_one():
    """
    train an agent
    """
    print("==============Start Fetching Data===========")
    df = YahooDownloader(
        start_date=config.START_DATE,
        end_date=config.END_DATE,
        ticker_list=config.DOW_30_TICKER,
    ).fetch_data()
    print("==============Start Feature Engineering===========")
    fe = FeatureEngineer(
        use_technical_indicator=True,
        tech_indicator_list=config.TECHNICAL_INDICATORS_LIST,
        use_turbulence=True,
        user_defined_feature=False,
    )

    processed = fe.preprocess_data(df)

    # Training & Trading data split
    train = data_split(processed, config.START_DATE, config.START_TRADE_DATE)
    trade = data_split(processed, config.START_TRADE_DATE, config.END_DATE)

    # calculate state action space
    stock_dimension = len(train.tic.unique())
    state_space = (1 + 2 * stock_dimension +
                   len(config.TECHNICAL_INDICATORS_LIST) * stock_dimension)

    env_kwargs = {
        "hmax": 100,
        "initial_amount": 1000000,
        "buy_cost_pct": 0.001,
        "sell_cost_pct": 0.001,
        "state_space": state_space,
        "stock_dim": stock_dimension,
        "tech_indicator_list": config.TECHNICAL_INDICATORS_LIST,
        "action_space": stock_dimension,
        "reward_scaling": 1e-4
    }

    e_train_gym = StockTradingEnv(df=train, **env_kwargs)

    e_trade_gym = StockTradingEnv(df=trade,
                                  turbulence_threshold=250,
                                  **env_kwargs)
    env_train, _ = e_train_gym.get_sb_env()
    env_trade, obs_trade = e_trade_gym.get_sb_env()

    agent = DRLAgent(env=env_train)

    print("==============Model Training===========")
    now = datetime.datetime.now().strftime("%Y%m%d-%Hh%M")

    model_sac = agent.get_model("sac")
    trained_sac = agent.train_model(model=model_sac,
                                    tb_log_name="sac",
                                    total_timesteps=80000)

    print("==============Start Trading===========")
    df_account_value, df_actions = DRLAgent.DRL_prediction(model=trained_sac,
                                                           test_data=trade,
                                                           test_env=env_trade,
                                                           test_obs=obs_trade)
    df_account_value.to_csv("./" + config.RESULTS_DIR + "/df_account_value_" +
                            now + ".csv")
    df_actions.to_csv("./" + config.RESULTS_DIR + "/df_actions_" + now +
                      ".csv")

    print("==============Get Backtest Results===========")
    perf_stats_all = BackTestStats(df_account_value)
    perf_stats_all = pd.DataFrame(perf_stats_all)
    perf_stats_all.to_csv("./" + config.RESULTS_DIR + "/perf_stats_all_" +
                          now + ".csv")
Beispiel #23
0
def main():
    parser = build_parser()
    options = parser.parse_args()

    # Basic setup
    #Disable warnings
    warnings.filterwarnings('ignore')

    # Load the saved data in a pandas DataFrame:
    data_frame = pd.read_csv("./" + config.DATA_SAVE_DIR + "/" + options.name +
                             ".csv")

    print("Data Frame shape is: ", data_frame.shape)
    print("Data Frame format is following: \n\n", data_frame.head())

    ## we store the stockstats technical indicator column names in config.py
    tech_indicator_list = config.TECHNICAL_INDICATORS_LIST
    print("Technical Indicators that are going to be calculated: ",
          tech_indicator_list)

    feature_engineering = FeatureEngineer(
        use_technical_indicator=True,
        tech_indicator_list=tech_indicator_list,
        use_turbulence=True,
        user_defined_feature=False)

    processed = feature_engineering.preprocess_data(data_frame)

    print(processed.sort_values(['date', 'tic'], ignore_index=True).head(10))

    training_set = data_split(processed, config.START_DATE,
                              config.START_TRADE_DATE)
    testing_set = data_split(processed, config.START_TRADE_DATE,
                             config.END_DATE)
    print("Size of training set: ", len(training_set))
    print("Size of testing set: ", len(testing_set))

    print("Training set format:\n\n", training_set.head())

    print("Testing set format: \n\n", testing_set.head())

    stock_dimension = len(training_set.tic.unique())
    state_space = 1 + 2 * stock_dimension + len(
        tech_indicator_list) * stock_dimension
    print(f"Stock Dimension: {stock_dimension}, State Space: {state_space}")

    ##
    ## Save data to file, both training and trading
    ##
    if os.path.exists("./" + config.DATA_SAVE_DIR + "/training.txt"):
        os.remove("./" + config.DATA_SAVE_DIR + "/training.txt")
        print("The training data file deleted")
    else:
        print("The training data file does not exist")

    if os.path.exists("./" + config.DATA_SAVE_DIR + "/testing.txt"):
        os.remove("./" + config.DATA_SAVE_DIR + "/testing.txt")
        print("The testing data file deleted")
    else:
        print("The testing data file does not exist")

    path_training = "./" + config.DATA_SAVE_DIR + "/training.txt"
    path_testing = "./" + config.DATA_SAVE_DIR + "/testing.txt"

    with open(path_training, "wb") as f:
        pickle.dump(training_set, f, pickle.HIGHEST_PROTOCOL)

    with open(path_testing, "wb") as f:
        pickle.dump(testing_set, f, pickle.HIGHEST_PROTOCOL)

    print(
        "Successfuly completed the task of creation of test and training data files."
    )
Beispiel #24
0
def objective(trial: optuna.Trial):
    """ test """
    df = pd.read_csv(os.path.abspath('./me/datasets/data_with_techs_turb.csv'),
                     index_col=0)
    df.sort_values(['date', 'tic'], ignore_index=True)
    train_data = data_split(df, '2009-01-01', '2016-01-01')
    trade_data = data_split(df, '2019-01-01', '2020-12-01')

    # params
    stock_dimension = len(train_data.tic.unique())
    state_space = 1 + 2 * stock_dimension + len(
        config.TECHNICAL_INDICATORS_LIST) * stock_dimension

    hmax = 100
    starting_capital = 1000000
    transaction_cost_pct = 0.001
    reward_scaling = 1e-4
    technical_indicator_list = config.TECHNICAL_INDICATORS_LIST

    # test
    eval_env = gym.make(ENV_ID,
                        df=train_data,
                        stock_dim=stock_dimension,
                        hmax=hmax,
                        initial_amount=starting_capital,
                        transaction_cost_pct=transaction_cost_pct,
                        reward_scaling=reward_scaling,
                        state_space=state_space,
                        action_space=stock_dimension,
                        tech_indicator_list=technical_indicator_list,
                        turbulence_threshold=250,
                        day=0)

    #kwargs = DEFAULT_HYPERPARAMS.copy()
    # Sample hyperparameters
    kwargs = sample_a2c_params(trial)
    # Create the RL model
    model = A2C('MlpPolicy', eval_env, **kwargs)
    # Create env used for evaluation

    # Create the callback that will periodically evaluate
    # and report the performance
    eval_callback = TrialEvalCallback(eval_env,
                                      trial,
                                      n_eval_episodes=N_EVAL_EPISODES,
                                      eval_freq=EVAL_FREQ,
                                      deterministic=True)

    nan_encountered = False
    try:
        model.learn(N_TIMESTEPS, callback=eval_callback)
    except AssertionError as e:
        # Sometimes, random hyperparams can generate NaN
        print(e)
        nan_encountered = True
    finally:
        # Free memory
        model.env.close()
        eval_env.close()

    # Tell the optimizer that the trial failed
    if nan_encountered:
        return float("nan")

    if eval_callback.is_pruned:
        raise optuna.exceptions.TrialPruned()

    return eval_callback.last_mean_reward
Beispiel #25
0
import os
import pandas as pd
from finrl.preprocessing.data import data_split
from finrl.env.env_stocktrading import StockTradingEnv
from finrl.model.models import DRLAgent
from finrl.trade.backtest import backtest_stats

os.environ["CUDA_VISIBLE_DEVICES"] = "2"
stock_id = "600570"
stock_handle_dir = "dataset/stock/stock_handle/"

df = pd.read_csv(stock_handle_dir + stock_id + ".csv")
df["tic"] = stock_id
# print(df)

train = data_split(df, "2004-02-13", "2018-09-11")
trade = data_split(df, "2018-09-11", "2021-05-25")

technical_indications_list = [
    "open", "close", "high", "low", "volume", "ma5", "ma10", "ema12", "ema26",
    "MACD", "DEA"
]
stock_dimension = len(train.tic.unique())
state_space = 1 + 2 * stock_dimension + len(
    technical_indications_list) * stock_dimension

env_kwargs = {
    "hmax": 100,
    "initial_amount": 1000000,
    "buy_cost_pct": 0.001,
    "sell_cost_pct": 0.001,