Ejemplo n.º 1
0
def new_test():

    processed = pd.read_csv(
        os.path.abspath('./me/datasets/new_data_with_techs_turb.csv'),
        index_col=0)

    train = data_split(processed, '2009-01-01', '2018-01-01')
    trade = data_split(processed, '2018-01-01', '2021-01-01')

    stock_dimension = len(train.tic.unique())
    state_space = 1 + 2 * stock_dimension + len(
        config.TECHNICAL_INDICATORS_LIST) * stock_dimension
    print(f"Stock Dimension: {stock_dimension}, State Space: {state_space}")

    env_kwargs = {
        "hmax": 100,
        "initial_amount": 1000000,
        "transaction_cost_pct": 0.001,
        "state_space": state_space,
        "stock_dim": stock_dimension,
        "tech_indicator_list": config.TECHNICAL_INDICATORS_LIST,
        "action_space": stock_dimension,
        "reward_scaling": 1e-4
    }

    e_train_gym = StockTradingEnv(df=train, **env_kwargs)
    env_train, _ = e_train_gym.get_sb_env()

    log_dir = "me/tmp/"
    os.makedirs(log_dir, exist_ok=True)

    env_train.envs[0] = Monitor(env_train.envs[0], log_dir)

    agent = DRLAgent(env=env_train)
    model_a2c = agent.get_model("a2c", verbose=0)

    trained_a2c = agent.train_model(model=model_a2c,
                                    tb_log_name='a2c',
                                    total_timesteps=100000)

    data_turbulence = processed[(processed.date < '2018-01-01')
                                & (processed.date >= '2009-01-01')]
    insample_turbulence = data_turbulence.drop_duplicates(subset=['date'])
    turbulence_threshold = np.quantile(insample_turbulence.turbulence.values,
                                       1)

    e_trade_gym = StockTradingEnv(df=trade,
                                  turbulence_threshold=380,
                                  **env_kwargs)
    env_trade, obs_trade = e_trade_gym.get_sb_env()

    print("BEGIN PREDICTION")
    df_account_value, df_actions = DRLAgent.DRL_prediction(model=trained_a2c,
                                                           test_data=trade,
                                                           test_env=env_trade,
                                                           test_obs=obs_trade)

    print(df_account_value)

    print("END PREDICTION")
Ejemplo n.º 2
0
def train_one():
    env_kwargs, processed = prepare_data()

    # Training & Trading data split
    train = data_split(processed, config.START_DATE, config.START_TRADE_DATE)
    trade = data_split(processed, config.START_TRADE_DATE, config.END_DATE)

    e_train_gym = StockTradingEnv(df=train, **env_kwargs)

    e_trade_gym = StockTradingEnv(df=trade,
                                  turbulence_threshold=250,
                                  **env_kwargs)
    env_train, _ = e_train_gym.get_sb_env()

    agent = DRLAgent(env=env_train)

    print("==============Model Training===========")
    model = agent.get_model(config.CURRENT_MODEL)
    model = agent.train_model(model=model, total_timesteps=80000)

    path = f"{config.TRAINED_MODEL_DIR}/model"
    model.save(path)
    model = model.load(path)

    print("==============Start Trading===========")
    df_account_value, df_actions = DRLAgent.DRL_prediction(
        model=model, environment=e_trade_gym)
    log_account(df_account_value, df_actions)
Ejemplo n.º 3
0
    def train(self, origin_data):
        self.build_model()
        train = data_split(origin_data, config.START_DATE,
                           config.START_TRADE_DATE)
        trade = data_split(origin_data, config.START_TRADE_DATE,
                           config.END_DATE)

        y_train = self.cal_result(train)
        y_trade = self.cal_result(trade)

        x_train_tech = self.cal_feature(train, 5)
        x_trade_tech = self.cal_feature(trade, 5)

        x_train_idx = self.cal_index(train, 5)
        x_trade_idx = self.cal_index(trade, 5)

        print(x_train_tech.shape)

        self.model.fit([x_train_idx, x_train_tech],
                       y_train,
                       epochs=100,
                       batch_size=128)
        score = self.model.evaluate([x_trade_idx, x_trade_tech],
                                    y_trade,
                                    return_dict=True)
        print(score)
        self.model.save(self.model_path)
Ejemplo n.º 4
0
def main():
    start_date = '2020-01-01'
    trade_start_date = '2020-12-01'
    end_date = '2021-01-01'
    ticker_list = stock_tickers
    numerical_df = YahooDownloader(start_date=start_date,
                                   end_date=end_date,
                                   ticker_list=ticker_list).fetch_data()
    sentiment_df = generate_sentiment_scores(start_date, end_date)
    initial_data = get_initial_data(numerical_df, sentiment_df)
    train_data = data_split(initial_data, start_date, trade_start_date)
    trade_data = data_split(initial_data, trade_start_date, end_date)
    indicator_list = config.TECHNICAL_INDICATORS_LIST + ['sentiment']
    stock_dimension = len(trade_data.tic.unique())
    state_space = 1 + 2 * stock_dimension + len(
        indicator_list) * stock_dimension
    env_kwargs = {
        "hmax": 100,
        "initial_amount": 1000000,
        "buy_cost_pct": 0.001,
        "sell_cost_pct": 0.001,
        "state_space": state_space,
        "stock_dim": stock_dimension,
        "tech_indicator_list": indicator_list,
        "action_space": stock_dimension,
        "reward_scaling": 1e-4,
        "print_verbosity": 5
    }
    e_train_gym = StockTradingEnv(df=train_data, **env_kwargs)
    env_train, _ = e_train_gym.get_sb_env()
    # print(train_data.index)
    # print(trade_data.index)
    # print(trade_data.loc[0])
    e_trade_gym = OnlineStockTradingEnv(trade_data.loc[0], **env_kwargs)
    training_agent = DRLAgent(env=env_train)
    model_a2c = training_agent.get_model("a2c")
    # print(train_data.index)
    # print(trade_data.index)
    #trained_a2c = agent.train_model(model=model_a2c, tb_log_name='a2c',total_timesteps=10000)
    feature_engineer = FeatureEngineer()
    online_stock_pred = OnlineStockPrediction(e_trade_gym, model_a2c)

    for i in range(1, trade_data.index.unique().max()):
        print(trade_data.loc[i])
        online_stock_pred.add_data(trade_data.loc[i])
        action, states, next_obs, rewards = online_stock_pred.predict()
        print("Action:", action)
        print("States: ", states)
        print("Next observation: ", next_obs)
        print("Rewards: ", rewards)
Ejemplo n.º 5
0
    def DRL_prediction(self,model,name,last_state,iter_num,turbulence_threshold,initial):
        ### make a prediction based on trained model###

        ## trading env
        trade_data = data_split(self.df, start=self.unique_trade_date[iter_num - self.rebalance_window], end=self.unique_trade_date[iter_num])
        trade_env = DummyVecEnv([lambda: StockTradingEnv(trade_data,
                                                        self.stock_dim,
                                                        self.hmax,
                                                        self.initial_amount,
                                                        self.buy_cost_pct,
                                                        self.sell_cost_pct,
                                                        self.reward_scaling,
                                                        self.state_space,
                                                        self.action_space,
                                                        self.tech_indicator_list,
                                                        turbulence_threshold=turbulence_threshold,
                                                        initial=initial,
                                                        previous_state=last_state,
                                                        model_name=name,
                                                        mode='trade',
                                                        iteration=iter_num,
                                                        print_verbosity=self.print_verbosity)])

        trade_obs = trade_env.reset()

        for i in range(len(trade_data.index.unique())):
            action, _states = model.predict(trade_obs)
            trade_obs, rewards, dones, info = trade_env.step(action)
            if i == (len(trade_data.index.unique()) - 2):
                # print(env_test.render())
                last_state = trade_env.render()

        df_last_state = pd.DataFrame({'last_state': last_state})
        df_last_state.to_csv('results/last_state_{}_{}.csv'.format(name, i), index=False)
        return last_state
Ejemplo n.º 6
0
def train_one():
    """
    train an agent
    """
    print("==============Start Fetching Data===========")
    df = YahooDownloader(start_date=config.START_DATE,
                         end_date=config.END_DATE,
                         ticker_list=config.DOW_30_TICKER).fetch_data()
    print("==============Start Feature Engineering===========")
    df = FeatureEngineer(df,
                         feature_number=5,
                         use_technical_indicator=True,
                         use_turbulence=True).preprocess_data()

    train = data_split(df, config.START_DATE, config.START_TRADE_DATE)
    trade = data_split(df, config.START_TRADE_DATE, config.END_DATE)
    env_setup = EnvSetup(stock_dim=len(train.tic.unique()))
    env_train = env_setup.create_env_training(data=train,
                                              env_class=StockEnvTrain)
    agent = DRLAgent(env=env_train)
    print("==============Model Training===========")
    now = datetime.datetime.now().strftime('%Y%m%d-%Hh%M')
    a2c_params_tuning = {
        'n_steps': 5,
        'ent_coef': 0.005,
        'learning_rate': 0.0007,
        'verbose': 0,
        'timesteps': 100000
    }
    model_a2c = agent.train_A2C(model_name="A2C_{}".format(now),
                                model_params=a2c_params_tuning)

    print("==============Start Trading===========")
    env_trade, obs_trade = env_setup.create_env_trading(
        data=trade, env_class=StockEnvTrade, turbulence_threshold=250)

    df_account_value = DRLAgent.DRL_prediction(model=model_a2c,
                                               test_data=trade,
                                               test_env=env_trade,
                                               test_obs=obs_trade)
    df_account_value.to_csv("./" + config.RESULTS_DIR + "/" + now + '.csv')

    print("==============Get Backtest Results===========")
    perf_stats_all = BackTestStats(df_account_value)
    perf_stats_all = pd.DataFrame(perf_stats_all)
    perf_stats_all.to_csv("./" + config.RESULTS_DIR + "/perf_stats_all_" +
                          now + '.csv')
Ejemplo n.º 7
0
def load_stock_trading_data():
    from finrl.config import config

    cwd = './env/FinRL'
    raw_data_path = f'{cwd}/StockTradingEnv_raw_data.df'
    processed_data_path = f'{cwd}/StockTradingEnv_processed_data.df'

    os.makedirs(cwd, exist_ok=True)

    print("==============Start Fetching Data===========")
    if os.path.exists(raw_data_path):
        raw_df = pd.read_pickle(raw_data_path)  # DataFrame of Pandas
        print('| raw_df.columns.values:', raw_df.columns.values)
    else:
        from finrl.marketdata.yahoodownloader import YahooDownloader
        raw_df = YahooDownloader(
            start_date=config.START_DATE,
            end_date=config.END_DATE,
            ticker_list=config.DOW_30_TICKER,
        ).fetch_data()
        raw_df.to_pickle(raw_data_path)

    print("==============Start Feature Engineering===========")
    if os.path.exists(processed_data_path):
        processed_df = pd.read_pickle(processed_data_path)  # DataFrame of Pandas
        print('| processed_df.columns.values:', processed_df.columns.values)
    else:
        from finrl.preprocessing.preprocessors import FeatureEngineer
        fe = FeatureEngineer(
            use_technical_indicator=True,
            tech_indicator_list=config.TECHNICAL_INDICATORS_LIST,
            use_turbulence=True,
            user_defined_feature=False,
        )
        processed_df = fe.preprocess_data(raw_df)
        processed_df.to_pickle(processed_data_path)

    # Training & Trading data split
    from finrl.preprocessing.data import data_split
    train_df = data_split(processed_df, '2008-03-19', '2016-01-01')  # 1963/3223
    eval_df = data_split(processed_df, '2016-01-01', '2021-01-01')  # 1260/3223

    return train_df, eval_df
Ejemplo n.º 8
0
def predict():
    env_kwargs, processed = prepare_data()
    trade = data_split(processed, config.START_TRADE_DATE, config.END_DATE)

    e_trade_gym = StockTradingEnv(df=trade, **env_kwargs)

    path = config.TRAINED_MODEL_DIR + "/model"
    trained_sac = eval(config.CURRENT_MODEL.upper() + ".load(path)")
    df_account_value, df_actions = DRLAgent.DRL_prediction(
        model=trained_sac, environment=e_trade_gym)
    log_account(df_account_value, df_actions)
Ejemplo n.º 9
0
def test_process_data():
    start_date = '2020-11-01'
    end_date='2021-01-01'
    ticker_list=stock_tickers
    numerical_df = YahooDownloader(start_date=start_date,end_date=end_date,ticker_list=ticker_list).fetch_data()
    sentiment_df = generate_sentiment_scores(start_date,end_date)
    initial_data = get_initial_data(numerical_df,sentiment_df)
    trade_data = data_split(initial_data,start_date,'2020-12-01')
    numerical_feed_data = numerical_df[numerical_df.date > '2020-12-01']
    sentiment_feed_data = sentiment_df[sentiment_df.date > '2020-12-01']
    data_processor = DataProcessor(FeatureEngineer(),trade_data)
    for date in numerical_feed_data.date.unique():
        
        new_numerical = numerical_feed_data[numerical_feed_data.date==date]
        new_sentiment = sentiment_feed_data.loc[sentiment_feed_data.date==date]
        new_df=data_processor.process_data(new_numerical,new_sentiment)
        print(new_df)
Ejemplo n.º 10
0
def check_finrl():
    from FinRL import StockTradingEnv
    from numpy import random as rd
    from finrl.config import config
    from finrl.preprocessing.data import data_split
    import pandas as pd

    # df = pd.read_pickle('finrl_data.df')  # DataFrame of Pandas
    #
    # from finrl.preprocessing.preprocessors import FeatureEngineer
    # fe = FeatureEngineer(
    #     use_technical_indicator=True,
    #     tech_indicator_list=config.TECHNICAL_INDICATORS_LIST,
    #     use_turbulence=True,
    #     user_defined_feature=False,
    # )
    #
    # processed_df = fe.preprocess_data(df)
    # processed_df.to_pickle('finrl_processed_data.df')  # DataFrame of Pandas
    processed_data_path = 'StockTradingEnv_processed_data.df'
    processed_df = pd.read_pickle(processed_data_path)  # DataFrame of Pandas
    print(processed_df.columns.values)

    split_df = data_split(processed_df, start='2008-03-19', end='2021-01-01')
    # `start`

    env = StockTradingEnv(df=split_df,
                          tech_indicator_list=config.TECHNICAL_INDICATORS_LIST)
    action_dim = env.action_dim

    state = env.reset()
    print('state_dim', len(state))

    done = False
    step = 1
    from time import time
    timer = time()
    while not done:
        action = rd.rand(action_dim) * 2 - 1
        next_state, reward, done, _ = env.step(action)
        print(';', step, len(next_state), env.day, reward)
        step += 1

    print(';;', step, int(time() - timer))  # 44 seconds
Ejemplo n.º 11
0
def test():
    trade = data_split(origin_data, config.START_TRADE_DATE, config.END_DATE)
    action_list = model.test(trade)
    # print(action_list)

    hold_map = {}

    asset = 10000
    hold = 0
    price = 0
    for index, vec in enumerate(action_list):
        if index == 0 or index == len(action_list) - 1:
            continue
        vec = vec[0]
        # print("%.2f" % trade.loc[index]["close"], vec)
        last_vec = action_list[index - 1][0]
        price = trade.iloc[index + 1]["open"]
        date = trade.iloc[index + 1]["date"]
        tic = trade.iloc[index + 1]["tic"]
        hold = hold_map.get(tic, 0)
        # print(date, tic, vec, last_vec)
        if vec[0] > last_vec[0] + 0.1 and vec[0] > 0.3 and hold <= 0:
            print(f"buy {tic} at {date}, price = {price}")
            if hold == 0:
                hold_map[tic] = price
            else:
                hold_map.pop(tic)
            asset -= price * 100
        elif vec[2] > last_vec[2] + 0.1 and vec[2] > 0.3 and hold >= 0:
            print(f"sell {tic} at {date}, price = {price}")
            if hold == 0:
                hold_map[tic] = -price
            else:
                hold_map.pop(tic)
            asset += price * 100

    print(asset)
    print(hold_map)
Ejemplo n.º 12
0
import pandas as pd

df = plotdf=processed[processed['tic']=='JPM']
df.plot(x="date", y=["turbulence", "close"])
plt.show()

# In[10]:
processed['log_volume'] = np.log(processed.volume*processed.close)
processed['change'] = (processed.close-processed.open)/processed.close
processed['daily_variance'] = (processed.high-processed.low)/processed.close
processed.head()

# ## Training data split: 2009-01-01 to 2018-12-31
# ## Trade data split: 2019-01-01 to 2020-09-30
# In[11]:
train = data_split(processed, '2009-01-01','2019-01-01')
trade = data_split(processed, '2019-01-01','2021-01-01')
print(len(train))
print(len(trade))

# In[12]:
import numpy as np
import pandas as pd
from gym.utils import seeding
import gym
from gym import spaces
import matplotlib
from copy import deepcopy

matplotlib.use("Agg")
import matplotlib.pyplot as plt
Ejemplo n.º 13
0
def train_one():
    """
    train an agent
    """
    print("==============Start Fetching Data===========")
    df = YahooDownloader(start_date=config.START_DATE,
                         end_date=config.END_DATE,
                         ticker_list=config.SP_500_TICKER).fetch_data()
    print("==============Start Feature Engineering===========")
    df = FeatureEngineer(df, use_technical_indicator=True,
                         use_turbulence=True).preprocess_data()

    # Training & Trade data split
    train = data_split(df, config.START_DATE, config.START_TRADE_DATE)
    trade = data_split(df, config.START_TRADE_DATE, config.END_DATE)

    # data normalization
    #feaures_list = list(train.columns)
    #feaures_list.remove('date')
    #feaures_list.remove('tic')
    #feaures_list.remove('close')
    #print(feaures_list)
    #data_normaliser = preprocessing.StandardScaler()
    #train[feaures_list] = data_normaliser.fit_transform(train[feaures_list])
    #trade[feaures_list] = data_normaliser.fit_transform(trade[feaures_list])

    # calculate state action space
    stock_dimension = len(train.tic.unique())
    state_space = 1 + 2 * stock_dimension + len(
        config.TECHNICAL_INDICATORS_LIST) * stock_dimension

    env_setup = EnvSetup(stock_dim=stock_dimension,
                         state_space=state_space,
                         hmax=100,
                         initial_amount=3000,
                         transaction_cost_pct=0.001)

    env_train = env_setup.create_env_training(data=train,
                                              env_class=StockEnvTrain)
    agent = DRLAgent(env=env_train)

    print("==============Model Training===========")
    now = datetime.datetime.now().strftime('%Y%m%d-%Hh%M')

    a2c_params_tuning = {
        'n_steps': 5,
        'ent_coef': 0.005,
        'learning_rate': 0.0007,
        'verbose': 0,
        'timesteps': 80000
    }

    model = agent.train_A2C(model_name="A2C_{}".format(now),
                            model_params=a2c_params_tuning)

    print("==============Start Trading===========")
    env_trade, obs_trade = env_setup.create_env_trading(
        data=trade, env_class=StockEnvTrade, turbulence_threshold=250)

    df_account_value, df_actions = DRLAgent.DRL_prediction(model=model,
                                                           test_data=trade,
                                                           test_env=env_trade,
                                                           test_obs=obs_trade)
    df_account_value.to_csv("./" + config.RESULTS_DIR + "/df_account_value_" +
                            now + '.csv')
    df_actions.to_csv("./" + config.RESULTS_DIR + "/df_actions_" + now +
                      '.csv')

    print("==============Get Backtest Results===========")
    perf_stats_all = BackTestStats(df_account_value)
    perf_stats_all = pd.DataFrame(perf_stats_all)
    perf_stats_all.to_csv("./" + config.RESULTS_DIR + "/perf_stats_all_" +
                          now + '.csv')
# 
# Our trading environments, based on OpenAI Gym framework, simulate live stock markets with real market data according to the principle of time-driven simulation.
# 
# The action space describes the allowed actions that the agent interacts with the environment. Normally, action a includes three actions: {-1, 0, 1}, where -1, 0, 1 represent selling, holding, and buying one share. Also, an action can be carried upon multiple shares. We use an action space {-k,…,-1, 0, 1, …, k}, where k denotes the number of shares to buy and -k denotes the number of shares to sell. For example, "Buy 10 shares of AAPL" or "Sell 10 shares of AAPL" are 10 or -10, respectively. The continuous action space needs to be normalized to [-1, 1], since the policy is defined on a Gaussian distribution, which needs to be normalized and symmetric.

# <a id='4.1'></a>
# ## 5.1 Training & Trade data split
# * Training: 2009-01-01 to 2018-12-31
# * Trade: 2019-01-01 to 2020-09-30

# In[16]:


#train = data_split(data_df, start = config.START_DATE, end = config.START_TRADE_DATE)
#trade = data_split(data_df, start = config.START_TRADE_DATE, end = config.END_DATE)
train = data_split(data_df, start = '2009-01-01', end = '2019-01-01')
trade = data_split(data_df, start = '2019-01-01', end = '2021-01-01')


# In[17]:


## data normalization, this part is optional, have little impact
#feaures_list = list(train.columns)
#feaures_list.remove('date')
#feaures_list.remove('tic')
#feaures_list.remove('close')
#print(feaures_list)
#from sklearn import preprocessing
#data_normaliser = preprocessing.StandardScaler()
#train[feaures_list] = data_normaliser.fit_transform(train[feaures_list])
Ejemplo n.º 15
0
def train_one(fetch=False):
    """
    train an agent
    """
    if fetch:
        df = fetch_and_store()
    else:
        df = load()

    counts = df[['date', 'tic']].groupby(['date']).count().tic
    assert counts.min() == counts.max()

    print("==============Start Feature Engineering===========")
    fe = FeatureEngineer(
        use_technical_indicator=True,
        tech_indicator_list=config.TECHNICAL_INDICATORS_LIST,
        use_turbulence=True,
        # use_turbulence=False,
        user_defined_feature=False,
    )

    processed = fe.preprocess_data(df)

    # Training & Trading data split
    start_date, trade_date, end_date = calculate_split(df,
                                                       start=config.START_DATE)
    print(start_date, trade_date, end_date)
    train = data_split(processed, start_date, trade_date)
    trade = data_split(processed, trade_date, end_date)

    print(
        f'\n******\nRunning from {start_date} to {end_date} for:\n{", ".join(config.CRYPTO_TICKER)}\n******\n'
    )

    # calculate state action space
    stock_dimension = len(train.tic.unique())
    state_space = (1 + (2 * stock_dimension) +
                   (len(config.TECHNICAL_INDICATORS_LIST) * stock_dimension))

    env_kwargs = {
        "hmax": 100,
        "initial_amount": 100000,
        "buy_cost_pct": 0.0026,
        "sell_cost_pct": 0.0026,
        "state_space": state_space,
        "stock_dim": stock_dimension,
        "tech_indicator_list": config.TECHNICAL_INDICATORS_LIST,
        "action_space": stock_dimension,
        "reward_scaling": 1e-4
    }

    e_train_gym = StockTradingEnv(df=train, **env_kwargs)

    e_trade_gym = StockTradingEnv(df=trade,
                                  turbulence_threshold=250,
                                  make_plots=True,
                                  **env_kwargs)

    env_train, _ = e_train_gym.get_sb_env()
    env_trade, obs_trade = e_trade_gym.get_sb_env()

    agent = DRLAgent(env=env_train)

    print("==============Model Training===========")
    now = datetime.datetime.now().strftime(config.DATETIME_FMT)

    model_sac = agent.get_model("sac")
    trained_sac = agent.train_model(
        model=model_sac,
        tb_log_name="sac",
        # total_timesteps=100
        total_timesteps=80000)

    print("==============Start Trading===========")
    df_account_value, df_actions = DRLAgent.DRL_prediction(
        # model=trained_sac, test_data=trade, test_env=env_trade, test_obs=obs_trade
        trained_sac,
        e_trade_gym)
    df_account_value.to_csv(
        f"./{config.RESULTS_DIR}/df_account_value_{now}.csv")
    df_actions.to_csv(f"./{config.RESULTS_DIR}/df_actions_{now}.csv")

    df_txns = pd.DataFrame(e_trade_gym.transactions,
                           columns=['date', 'amount', 'price', 'symbol'])
    df_txns = df_txns.set_index(pd.DatetimeIndex(df_txns['date'], tz=pytz.utc))
    df_txns.to_csv(f'./{config.RESULTS_DIR}/df_txns_{now}.csv')

    df_positions = pd.DataFrame(e_trade_gym.positions,
                                columns=['date', 'cash'] +
                                config.CRYPTO_TICKER)
    df_positions = df_positions.set_index(
        pd.DatetimeIndex(df_positions['date'],
                         tz=pytz.utc)).drop(columns=['date'])
    df_positions['cash'] = df_positions.astype(
        {col: np.float64
         for col in df_positions.columns})
    df_positions.to_csv(f'./{config.RESULTS_DIR}/df_positions_{now}.csv')

    print("==============Get Backtest Results===========")
    perf_stats_all = backtest_stats(df_account_value,
                                    transactions=df_txns,
                                    positions=df_positions)
    perf_stats_all = pd.DataFrame(perf_stats_all)
    perf_stats_all.to_csv(f"./{config.RESULTS_DIR}/perf_stats_all_{now}.csv")

    backtest_plot(df_account_value,
                  baseline_start=trade_date,
                  baseline_end=end_date,
                  positions=df_positions,
                  transactions=df_txns)
Ejemplo n.º 16
0
def generate_data(rollouts, data_dir, noise_type): # pylint: disable=R0914
    """ Generates data """
    assert exists(data_dir), "The data directory does not exist..."


    df = YahooDownloader(start_date = '2009-01-01',
                        end_date = '2021-01-01',
                       ticker_list = ['AAPL']).fetch_data()

    df.sort_values(['date','tic'],ignore_index=True)

    fe = FeatureEngineer(
                        use_technical_indicator=True,
                        tech_indicator_list = config.TECHNICAL_INDICATORS_LIST,
                        use_turbulence=True,
                        user_defined_feature = False)

    processed = fe.preprocess_data(df)

    
    list_ticker = processed["tic"].unique().tolist()
    list_date = list(pd.date_range(processed['date'].min(),processed['date'].max()).astype(str))
    combination = list(itertools.product(list_date,list_ticker))

    processed_full = pd.DataFrame(combination,columns=["date","tic"]).merge(processed,on=["date","tic"],how="left")
    processed_full = processed_full[processed_full['date'].isin(processed['date'])]
    processed_full = processed_full.sort_values(['date','tic'])

    processed_full = processed_full.fillna(0)


    processed_full.sort_values(['date','tic'],ignore_index=True)

    train = data_split(processed_full, '2009-01-01','2019-01-01')
    trade = data_split(processed_full, '2019-01-01','2021-01-01')
    stock_dimension = len(train.tic.unique())
    state_space = 1 + 2*stock_dimension + len(config.TECHNICAL_INDICATORS_LIST)*stock_dimension
    env_kwargs = {
                "hmax": 100, 
                    "initial_amount": 1000000, 
#                         "buy_cost_pct": 0.001i,
#                             "sell_cost_pct": 0.001,
                             "transaction_cost_pct": 0.001, 
                                "state_space": state_space, 
                                    "stock_dim": stock_dimension, 
                                        "tech_indicator_list": config.TECHNICAL_INDICATORS_LIST, 
                                            "action_space": stock_dimension, 
                                                "reward_scaling": 1e-4
                                                }

    e_train_gym = StockTradingEnv(df = train, **env_kwargs)
    env_train, _ = e_train_gym.get_sb_env()

    env = env_train

#     env = gym.make("CarRacing-v0")

    seq_len = 10000

    for i in range(rollouts):

        env.reset()

#         env.env.viewer.window.dispatch_events()
        if noise_type == 'white':
            a_rollout = [env.action_space.sample() for _ in range(seq_len)]
        elif noise_type == 'brown':
            a_rollout = sample_continuous_policy(env.action_space, seq_len, 1. / 50)

        s_rollout = []
        r_rollout = []
        d_rollout = []


        t = 0
        while True:
            action = a_rollout[t]
            t += 1

            s, r, done, _ = env.step(action)
#             env.env.viewer.window.dispatch_events()
            s_rollout += [s]
            r_rollout += [r]
            d_rollout += [done]
            if done:
                print("> End of rollout {}, {} frames...".format(i, len(s_rollout)))
                np.savez(join(data_dir, 'rollout_{}'.format(i)),
                         observations=np.array(s_rollout),
                         rewards=np.array(r_rollout),
                         actions=np.array(a_rollout),
                         terminals=np.array(d_rollout))
                break
Ejemplo n.º 17
0
def train_one():
    """
    train an agent
    """
    print("==============Start Fetching Data===========")
    df = YahooDownloader(
        start_date=config.START_DATE,
        end_date=config.END_DATE,
        ticker_list=config.DOW_30_TICKER,
    ).fetch_data()
    print("==============Start Feature Engineering===========")
    fe = FeatureEngineer(
        use_technical_indicator=True,
        tech_indicator_list=config.TECHNICAL_INDICATORS_LIST,
        use_turbulence=True,
        user_defined_feature=False,
    )

    processed = fe.preprocess_data(df)

    list_ticker = processed["tic"].unique().tolist()
    list_date = list(
        pd.date_range(processed['date'].min(),
                      processed['date'].max()).astype(str))
    combination = list(itertools.product(list_date, list_ticker))

    processed_full = pd.DataFrame(combination,
                                  columns=["date",
                                           "tic"]).merge(processed,
                                                         on=["date", "tic"],
                                                         how="left")
    processed_full = processed_full[processed_full['date'].isin(
        processed['date'])]
    processed_full = processed_full.sort_values(['date', 'tic'])

    processed_full = processed_full.fillna(0)

    # Training & Trading data split
    train = data_split(processed_full, config.START_DATE,
                       config.START_TRADE_DATE)
    trade = data_split(processed_full, config.START_TRADE_DATE,
                       config.END_DATE)

    # calculate state action space
    stock_dimension = len(train.tic.unique())
    state_space = (1 + 2 * stock_dimension +
                   len(config.TECHNICAL_INDICATORS_LIST) * stock_dimension)

    env_kwargs = {
        "hmax": 100,
        "initial_amount": 1000000,
        "buy_cost_pct": 0.001,
        "sell_cost_pct": 0.001,
        "state_space": state_space,
        "stock_dim": stock_dimension,
        "tech_indicator_list": config.TECHNICAL_INDICATORS_LIST,
        "action_space": stock_dimension,
        "reward_scaling": 1e-4
    }

    e_train_gym = StockTradingEnv(df=train, **env_kwargs)
    env_train, _ = e_train_gym.get_sb_env()

    agent = DRLAgent(env=env_train)

    print("==============Model Training===========")
    now = datetime.datetime.now().strftime("%Y%m%d-%Hh%M")

    model_sac = agent.get_model("sac")
    trained_sac = agent.train_model(model=model_sac,
                                    tb_log_name="sac",
                                    total_timesteps=80000)

    print("==============Start Trading===========")
    e_trade_gym = StockTradingEnv(df=trade,
                                  turbulence_threshold=250,
                                  **env_kwargs)

    df_account_value, df_actions = DRLAgent.DRL_prediction(
        model=trained_sac, environment=e_trade_gym)
    df_account_value.to_csv("./" + config.RESULTS_DIR + "/df_account_value_" +
                            now + ".csv")
    df_actions.to_csv("./" + config.RESULTS_DIR + "/df_actions_" + now +
                      ".csv")

    print("==============Get Backtest Results===========")
    perf_stats_all = backtest_stats(df_account_value)
    perf_stats_all = pd.DataFrame(perf_stats_all)
    perf_stats_all.to_csv("./" + config.RESULTS_DIR + "/perf_stats_all_" +
                          now + ".csv")
Ejemplo n.º 18
0
def load_and_save():
    stocks_tradable = configs["stocks_tradable"]

    dataset = dict()
    for root, dirs, files in os.walk("./data/sentiments", topdown=False):
        for name in files:
            if name.split("_")[0] in stocks_tradable:
            dataset[name.split("_")[0]] = pd.read_csv(os.path.join(root, name), index_col=0).reset_index(drop=True)
            dataset[name.split("_")[0]]["date"] = pd.to_datetime(dataset[name.split("_")[0]]["date"], format="%Y-%m-%d")

    df = get_stock_data(
    i    configs["train"]["start_date"], configs["test"]["end_date"], configs["stocks_tradable"]

    )

    df = add_sentiments(configs["sentiments"]["days"], dataset, df)

    train = data_split(df, cofings["train"]["start_date"], configs["train"]["end_date"])
    validation = data_split(df, cofings["validation"]["start_date"], configs["validation"]["end_date"])
    train_for_test = data_split(df, configs["train"]["start_date"], configs["validation"]["end_date"])
    test = data_split(df, configs["test"]["start_date"], configs["test"]["end_date"])

    testing_days = pd.Series(test.date.unique())

    train.to_csv("./data/train_data.csv", index=False)
    validation.to_csv("./data/validation_data.csv", index=False)
    train_for_test.to_csv("./data/train_for_test.csv", index=False)
    test.to_csv("./data/test_data.csv", index=False)
    testing_days.to_csv("./data/testing_days.csv", index=False)

    print("train, validation, train_for_test, test files saved")
    
def train():
    train = pd.read_csv("./data/train_data.csv")
    validation = pd.read_csv("./data/validation_data.csv")
    features = [
        ["open", "high", "low", "close", "volume"],
        ["open", "high", "low", "close", "volume"] + ["sentiment_mean", "sentiment_std"],
        config.TECHNICAL_INDICATORS,
        config.TECHNICAL_INDICATORS + ["sentiment_mean", "sentiment_std"]
    ]

    model_names = [
        "OHLCV",
        "OHLCV_sentiments",
        "MACD",
        "MACD_sentiments"
    ]

    batch_sizes = [32, 64, 128]
    learning_rates = [0.0001, 0.001, 0.005, 0.01]

    repetition = 3

    for model_name, feature_set in zip(features, model_names):
        for rep in range(repetition):
            perf_results = dict()
            for batch_size in batch_sizes:
                for lr in learning_rates:
                    
                    ctime = time.time()
                    perf_stats_all, _ = train_configuration(
                        f"{model_name}_{rep}",
                        train,
                        validation,
                        feature_set,
                        batch_sizes,
                        learning_rates,
                        42
                    )


                    perf_results[f"result_{batch_size}_{lr}"] = perf_stats_all.to_json()

            open(f"{model_name}_{rep}.json","w").write(json.dumps(perf_results)) 
            print(f"Results saved to {save_fname}.json") 

            print(f"Time taken {(time.time() - ctime)/60}")
    

def test():
    daily_risk_free_rates = fill_missing_daily_rf_rates(
        date_to_daily_risk_free_rate, [datetime.datetime.strptime(date, "%Y-%m-%d") for date in dates["0"]]
    )
    
    train = pd.read_csv("./data/train_for_test.csv")
    test = pd.read_csv("./data/test_data.csv")


if __name__ == "__main__":
    
Ejemplo n.º 19
0
def train_one():
    """
    train an agent
    """
    print("==============Start Fetching Data===========")
    df = YahooDownloader(
        start_date=config.START_DATE,
        end_date=config.END_DATE,
        ticker_list=['FXAIX'],
    ).fetch_data()
    print("==============Start Feature Engineering===========")
    fe = FeatureEngineer(
        use_technical_indicator=True,
        tech_indicator_list=config.TECHNICAL_INDICATORS_LIST,
        use_turbulence=True,
        user_defined_feature=False,
    )

    processed = fe.preprocess_data(df)

    # Training & Trading data split
    train = data_split(processed, config.START_DATE, config.START_TRADE_DATE)
    trade = data_split(processed, config.START_TRADE_DATE, config.END_DATE)

    # calculate state action space
    stock_dimension = len(train.tic.unique())
    state_space = (1 + 2 * stock_dimension +
                   len(config.TECHNICAL_INDICATORS_LIST) * stock_dimension)
    env_kwargs = {
        "hmax": 100,
        "initial_amount": 1000000,
        "buy_cost_pct": 0.001,
        "sell_cost_pct": 0.001,
        "state_space": state_space,
        "stock_dim": stock_dimension,
        "tech_indicator_list": config.TECHNICAL_INDICATORS_LIST,
        "action_space": stock_dimension,
        "reward_scaling": 1e-4
    }
    e_train_gym = StockTradingEnv(df=train, **env_kwargs)
    e_trade_gym = StockTradingEnv(df=trade,
                                  turbulence_threshold=250,
                                  **env_kwargs)
    env_train, _ = e_train_gym.get_sb_env()
    env_trade, obs_trade = e_trade_gym.get_sb_env()

    agent = DRLAgent(env=env_train)

    print("==============Model Training===========")
    now = datetime.datetime.now().strftime("%Y%m%d-%Hh%M")
    user_input = input('train model? 1 train 0 don\'t train')
    if user_input == 1:
        model_sac = agent.get_model("sac")
        trained_sac = agent.train_model(model=model_sac,
                                        tb_log_name="sac",
                                        total_timesteps=8000)
        trained_sac.save("../models/sac_8k" + df.tic[0] + "_frl")
    else:
        trained_sac = SAC.load('../models/sac_80k_msft_working')
    print("==============Start Trading===========")
    df_account_value, df_actions = DRLAgent.DRL_prediction(
        trained_sac, e_trade_gym)
    df_account_value.to_csv("../" + config.RESULTS_DIR +
                            "/SAC_df_account_value_" + df.tic[0] + "_" + now +
                            ".csv")
    df_actions.to_csv("../" + config.RESULTS_DIR + "/SAC_df_actions_" +
                      df.tic[0] + "_" + now + ".csv")

    # print("==============Get Backtest Results===========")
    perf_stats_all = backtest_stats(df_account_value)
    perf_stats_all = pd.DataFrame(perf_stats_all)
    perf_stats_all.to_csv("../" + config.RESULTS_DIR + "/SAC_perf_stats_all_" +
                          df.tic[0] + "_" + now + ".csv")

    #plot acc value
    actions = df_actions['actions']
    x = np.arange(0, df_account_value['account_value'].shape[0])
    y = df_account_value['account_value']

    points = np.array([x, y]).T.reshape(-1, 1, 2)
    segments = np.concatenate([points[:-1], points[1:]], axis=1)

    fig, axs = plt.subplots(2, 1, sharex=True, sharey=False)

    # plt.plot(x, y)

    # Use a boundary norm instead
    cmap = ListedColormap(['r', 'g', 'b'])
    norm = BoundaryNorm([-100, -0.1, 0.1, 100], cmap.N)
    lc = LineCollection(segments, cmap=cmap, norm=norm)
    lc.set_array(actions)
    lc.set_linewidth(2)
    line = axs[0].add_collection(lc)
    # fig.colorbar(line, ax=axs)

    axs[1].set_xlabel('Trading Day (' + 'From ' + config.START_TRADE_DATE +
                      " to " + config.END_DATE + ')')
    axs[0].set_ylabel('Account Value (10000 of USD)')
    axs[0].set_title("Trading Test on " + df.tic[0])

    axs[0].set_xlim(x.min(), x.max())
    axs[0].set_ylim(y.min(), y.max())

    custom_lines = [
        Line2D([0], [0], color=cmap(0.), lw=4),
        Line2D([0], [0], color=cmap(.5), lw=4),
        Line2D([0], [0], color=cmap(1.), lw=4)
    ]

    # lines = ax.plot(data)
    axs[0].legend(custom_lines, ['Sell', 'Hold', 'Buy'])

    #plot stock value
    tx = np.arange(0, df_account_value['account_value'].shape[0])
    ty = trade['close']
    plt.ylabel('Price (USD)')
    plt.title(df.tic[0] + " Closing Price")
    plt.plot(tx, ty)

    plt.savefig("../" + config.RESULTS_DIR + "/plots/"
                "SAC_plot_" + df.tic[0] + "_" + now + ".png")
Ejemplo n.º 20
0
    def run_ensemble_strategy(self,A2C_model_kwargs,PPO_model_kwargs,DDPG_model_kwargs,timesteps_dict):
        """Ensemble Strategy that combines PPO, A2C and DDPG"""
        print("============Start Ensemble Strategy============")
        # for ensemble model, it's necessary to feed the last state
        # of the previous model to the current model as the initial state
        last_state_ensemble = []

        ppo_sharpe_list = []
        ddpg_sharpe_list = []
        a2c_sharpe_list = []

        model_use = []
        validation_start_date_list = []
        validation_end_date_list = []
        iteration_list = []

        insample_turbulence = self.df[(self.df.date<self.train_period[1]) & (self.df.date>=self.train_period[0])]
        insample_turbulence_threshold = np.quantile(insample_turbulence.turbulence.values, .90)

        start = time.time()
        for i in range(self.rebalance_window + self.validation_window, len(self.unique_trade_date), self.rebalance_window):
            validation_start_date = self.unique_trade_date[i - self.rebalance_window - self.validation_window]
            validation_end_date = self.unique_trade_date[i - self.rebalance_window]

            validation_start_date_list.append(validation_start_date)
            validation_end_date_list.append(validation_end_date)
            iteration_list.append(i)

            print("============================================")
            ## initial state is empty
            if i - self.rebalance_window - self.validation_window == 0:
                # inital state
                initial = True
            else:
                # previous state
                initial = False

            # Tuning trubulence index based on historical data
            # Turbulence lookback window is one quarter (63 days)
            end_date_index = self.df.index[self.df["date"] == self.unique_trade_date[i - self.rebalance_window - self.validation_window]].to_list()[-1]
            start_date_index = end_date_index - 63 + 1

            historical_turbulence = self.df.iloc[start_date_index:(end_date_index + 1), :]

            historical_turbulence = historical_turbulence.drop_duplicates(subset=['date'])

            historical_turbulence_mean = np.mean(historical_turbulence.turbulence.values)

            #print(historical_turbulence_mean)

            if historical_turbulence_mean > insample_turbulence_threshold:
                # if the mean of the historical data is greater than the 90% quantile of insample turbulence data
                # then we assume that the current market is volatile,
                # therefore we set the 90% quantile of insample turbulence data as the turbulence threshold
                # meaning the current turbulence can't exceed the 90% quantile of insample turbulence data
                turbulence_threshold = insample_turbulence_threshold
            else:
                # if the mean of the historical data is less than the 90% quantile of insample turbulence data
                # then we tune up the turbulence_threshold, meaning we lower the risk
                turbulence_threshold = np.quantile(insample_turbulence.turbulence.values, 1)
                
            turbulence_threshold = np.quantile(insample_turbulence.turbulence.values, 0.99) 
            print("turbulence_threshold: ", turbulence_threshold)

            ############## Environment Setup starts ##############
            ## training env
            train = data_split(self.df, start=self.train_period[0], end=self.unique_trade_date[i - self.rebalance_window - self.validation_window])
            self.train_env = DummyVecEnv([lambda: StockTradingEnv(train,
                                                                self.stock_dim,
                                                                self.hmax,
                                                                self.initial_amount,
                                                                self.buy_cost_pct,
                                                                self.sell_cost_pct,
                                                                self.reward_scaling,
                                                                self.state_space,
                                                                self.action_space,
                                                                self.tech_indicator_list,
                                                                print_verbosity=self.print_verbosity)])

            validation = data_split(self.df, start=self.unique_trade_date[i - self.rebalance_window - self.validation_window],
                                    end=self.unique_trade_date[i - self.rebalance_window])
            ############## Environment Setup ends ##############

            ############## Training and Validation starts ##############
            print("======Model training from: ", self.train_period[0], "to ",
                  self.unique_trade_date[i - self.rebalance_window - self.validation_window])
            # print("training: ",len(data_split(df, start=20090000, end=test.datadate.unique()[i-rebalance_window]) ))
            # print("==============Model Training===========")
            print("======A2C Training========")
            model_a2c = self.get_model("a2c",self.train_env,policy="MlpPolicy",model_kwargs=A2C_model_kwargs)
            model_a2c = self.train_model(model_a2c, "a2c", tb_log_name="a2c_{}".format(i), iter_num = i, total_timesteps=timesteps_dict['a2c']) #100_000

            print("======A2C Validation from: ", validation_start_date, "to ",validation_end_date)
            val_env_a2c = DummyVecEnv([lambda: StockTradingEnv(validation,
                                                                self.stock_dim,
                                                                self.hmax,
                                                                self.initial_amount,
                                                                self.buy_cost_pct,
                                                                self.sell_cost_pct,
                                                                self.reward_scaling,
                                                                self.state_space,
                                                                self.action_space,
                                                                self.tech_indicator_list,
                                                                turbulence_threshold=turbulence_threshold,
                                                                iteration=i,
                                                                model_name='A2C',
                                                                mode='validation',
                                                                print_verbosity=self.print_verbosity)])
            val_obs_a2c = val_env_a2c.reset()
            self.DRL_validation(model=model_a2c,test_data=validation,test_env=val_env_a2c,test_obs=val_obs_a2c)
            sharpe_a2c = self.get_validation_sharpe(i,model_name="A2C")
            print("A2C Sharpe Ratio: ", sharpe_a2c)

            print("======PPO Training========")
            model_ppo = self.get_model("ppo",self.train_env,policy="MlpPolicy",model_kwargs=PPO_model_kwargs)
            model_ppo = self.train_model(model_ppo, "ppo", tb_log_name="ppo_{}".format(i), iter_num = i, total_timesteps=timesteps_dict['ppo']) #100_000
            print("======PPO Validation from: ", validation_start_date, "to ",validation_end_date)
            val_env_ppo = DummyVecEnv([lambda: StockTradingEnv(validation,
                                                                self.stock_dim,
                                                                self.hmax,
                                                                self.initial_amount,
                                                                self.buy_cost_pct,
                                                                self.sell_cost_pct,
                                                                self.reward_scaling,
                                                                self.state_space,
                                                                self.action_space,
                                                                self.tech_indicator_list,
                                                                turbulence_threshold=turbulence_threshold,
                                                                iteration=i,
                                                                model_name='PPO',
                                                                mode='validation',
                                                                print_verbosity=self.print_verbosity)])
            val_obs_ppo = val_env_ppo.reset()
            self.DRL_validation(model=model_ppo,test_data=validation,test_env=val_env_ppo,test_obs=val_obs_ppo)
            sharpe_ppo = self.get_validation_sharpe(i,model_name="PPO")
            print("PPO Sharpe Ratio: ", sharpe_ppo)

            print("======DDPG Training========")
            model_ddpg = self.get_model("ddpg",self.train_env,policy="MlpPolicy",model_kwargs=DDPG_model_kwargs)
            model_ddpg = self.train_model(model_ddpg, "ddpg", tb_log_name="ddpg_{}".format(i), iter_num = i, total_timesteps=timesteps_dict['ddpg'])  #50_000
            print("======DDPG Validation from: ", validation_start_date, "to ",validation_end_date)
            val_env_ddpg = DummyVecEnv([lambda: StockTradingEnv(validation,
                                                                self.stock_dim,
                                                                self.hmax,
                                                                self.initial_amount,
                                                                self.buy_cost_pct,
                                                                self.sell_cost_pct,
                                                                self.reward_scaling,
                                                                self.state_space,
                                                                self.action_space,
                                                                self.tech_indicator_list,
                                                                turbulence_threshold=turbulence_threshold,
                                                                iteration=i,
                                                                model_name='DDPG',
                                                                mode='validation',
                                                                print_verbosity=self.print_verbosity)])
            val_obs_ddpg = val_env_ddpg.reset()
            self.DRL_validation(model=model_ddpg,test_data=validation,test_env=val_env_ddpg,test_obs=val_obs_ddpg)
            sharpe_ddpg = self.get_validation_sharpe(i,model_name="DDPG")

            ppo_sharpe_list.append(sharpe_ppo)
            a2c_sharpe_list.append(sharpe_a2c)
            ddpg_sharpe_list.append(sharpe_ddpg)

            print("======Best Model Retraining from: ", self.train_period[0], "to ",
                  self.unique_trade_date[i - self.rebalance_window])
            # Environment setup for model retraining up to first trade date
            #train_full = data_split(self.df, start=self.train_period[0], end=self.unique_trade_date[i - self.rebalance_window])
            #self.train_full_env = DummyVecEnv([lambda: StockTradingEnv(train_full,
            #                                                    self.stock_dim,
            #                                                    self.hmax,
            #                                                    self.initial_amount,
            #                                                    self.buy_cost_pct,
            #                                                    self.sell_cost_pct,
            #                                                    self.reward_scaling,
            #                                                    self.state_space,
            #                                                    self.action_space,
            #                                                    self.tech_indicator_list,
            #                                                    print_verbosity=self.print_verbosity)])
            # Model Selection based on sharpe ratio
            if (sharpe_ppo >= sharpe_a2c) & (sharpe_ppo >= sharpe_ddpg):
                model_use.append('PPO')
                model_ensemble = model_ppo

                #model_ensemble = self.get_model("ppo",self.train_full_env,policy="MlpPolicy",model_kwargs=PPO_model_kwargs)
                #model_ensemble = self.train_model(model_ensemble, "ensemble", tb_log_name="ensemble_{}".format(i), iter_num = i, total_timesteps=timesteps_dict['ppo']) #100_000
            elif (sharpe_a2c > sharpe_ppo) & (sharpe_a2c > sharpe_ddpg):
                model_use.append('A2C')
                model_ensemble = model_a2c

                #model_ensemble = self.get_model("a2c",self.train_full_env,policy="MlpPolicy",model_kwargs=A2C_model_kwargs)
                #model_ensemble = self.train_model(model_ensemble, "ensemble", tb_log_name="ensemble_{}".format(i), iter_num = i, total_timesteps=timesteps_dict['a2c']) #100_000
            else:
                model_use.append('DDPG')
                model_ensemble = model_ddpg

                #model_ensemble = self.get_model("ddpg",self.train_full_env,policy="MlpPolicy",model_kwargs=DDPG_model_kwargs)
                #model_ensemble = self.train_model(model_ensemble, "ensemble", tb_log_name="ensemble_{}".format(i), iter_num = i, total_timesteps=timesteps_dict['ddpg']) #50_000

            ############## Training and Validation ends ##############

            ############## Trading starts ##############
            print("======Trading from: ", self.unique_trade_date[i - self.rebalance_window], "to ", self.unique_trade_date[i])
            #print("Used Model: ", model_ensemble)
            last_state_ensemble = self.DRL_prediction(model=model_ensemble, name="ensemble",
                                                     last_state=last_state_ensemble, iter_num=i,
                                                     turbulence_threshold = turbulence_threshold,
                                                     initial=initial)
            ############## Trading ends ##############

        end = time.time()
        print("Ensemble Strategy took: ", (end - start) / 60, " minutes")

        df_summary = pd.DataFrame([iteration_list,validation_start_date_list,validation_end_date_list,model_use,a2c_sharpe_list,ppo_sharpe_list,ddpg_sharpe_list]).T
        df_summary.columns = ['Iter','Val Start','Val End','Model Used','A2C Sharpe','PPO Sharpe','DDPG Sharpe']

        return df_summary
Ejemplo n.º 21
0
processed = fe.preprocess_data(df)

list_ticker = processed["tic"].unique().tolist()
list_date = list(pd.date_range(processed['date'].min(),processed['date'].max()).astype(str))
combination = list(itertools.product(list_date,list_ticker))
processed["date"] = processed["date"].astype(str)
processed_full = pd.DataFrame(combination,columns=["date","tic"]).merge(processed,on=["date","tic"],how="left")
processed_full = processed_full[processed_full['date'].isin(processed['date'])]
processed_full = processed_full.sort_values(['date','tic'])

processed_full = processed_full.fillna(0)

processed_full.sort_values(['date','tic'],ignore_index=True).tail()

train = data_split(processed_full, '2018-05-16','2020-05-16')
trade = data_split(processed_full, '2020-05-17','2020-10-31')
print(len(train))
print(len(trade))

train.head()

trade.head()

config["TECHNICAL_INDICATORS_LIST"]

stock_dimension = len(train.tic.unique())
state_space = 1 + 2*stock_dimension + len(config["TECHNICAL_INDICATORS_LIST"])*stock_dimension
print(f"Stock Dimension: {stock_dimension}, State Space: {state_space}")

env_kwargs = {
Ejemplo n.º 22
0
def train_one():
    """
    train an agent
    """
    print("==============Start Fetching Data===========")
    df = YahooDownloader(
        start_date=config.START_DATE,
        end_date=config.END_DATE,
        ticker_list=config.DOW_30_TICKER,
    ).fetch_data()
    print("==============Start Feature Engineering===========")
    fe = FeatureEngineer(
        use_technical_indicator=True,
        tech_indicator_list=config.TECHNICAL_INDICATORS_LIST,
        use_turbulence=True,
        user_defined_feature=False,
    )

    processed = fe.preprocess_data(df)

    # Training & Trading data split
    train = data_split(processed, config.START_DATE, config.START_TRADE_DATE)
    trade = data_split(processed, config.START_TRADE_DATE, config.END_DATE)

    # calculate state action space
    stock_dimension = len(train.tic.unique())
    state_space = (1 + 2 * stock_dimension +
                   len(config.TECHNICAL_INDICATORS_LIST) * stock_dimension)

    env_kwargs = {
        "hmax": 100,
        "initial_amount": 1000000,
        "buy_cost_pct": 0.001,
        "sell_cost_pct": 0.001,
        "state_space": state_space,
        "stock_dim": stock_dimension,
        "tech_indicator_list": config.TECHNICAL_INDICATORS_LIST,
        "action_space": stock_dimension,
        "reward_scaling": 1e-4
    }

    e_train_gym = StockTradingEnv(df=train, **env_kwargs)

    e_trade_gym = StockTradingEnv(df=trade,
                                  turbulence_threshold=250,
                                  **env_kwargs)
    env_train, _ = e_train_gym.get_sb_env()
    env_trade, obs_trade = e_trade_gym.get_sb_env()

    agent = DRLAgent(env=env_train)

    print("==============Model Training===========")
    now = datetime.datetime.now().strftime("%Y%m%d-%Hh%M")

    model_sac = agent.get_model("sac")
    trained_sac = agent.train_model(model=model_sac,
                                    tb_log_name="sac",
                                    total_timesteps=80000)

    print("==============Start Trading===========")
    df_account_value, df_actions = DRLAgent.DRL_prediction(model=trained_sac,
                                                           test_data=trade,
                                                           test_env=env_trade,
                                                           test_obs=obs_trade)
    df_account_value.to_csv("./" + config.RESULTS_DIR + "/df_account_value_" +
                            now + ".csv")
    df_actions.to_csv("./" + config.RESULTS_DIR + "/df_actions_" + now +
                      ".csv")

    print("==============Get Backtest Results===========")
    perf_stats_all = BackTestStats(df_account_value)
    perf_stats_all = pd.DataFrame(perf_stats_all)
    perf_stats_all.to_csv("./" + config.RESULTS_DIR + "/perf_stats_all_" +
                          now + ".csv")
Ejemplo n.º 23
0
def main():
    parser = build_parser()
    options = parser.parse_args()

    # Basic setup
    #Disable warnings
    warnings.filterwarnings('ignore')

    # Load the saved data in a pandas DataFrame:
    data_frame = pd.read_csv("./" + config.DATA_SAVE_DIR + "/" + options.name +
                             ".csv")

    print("Data Frame shape is: ", data_frame.shape)
    print("Data Frame format is following: \n\n", data_frame.head())

    ## we store the stockstats technical indicator column names in config.py
    tech_indicator_list = config.TECHNICAL_INDICATORS_LIST
    print("Technical Indicators that are going to be calculated: ",
          tech_indicator_list)

    feature_engineering = FeatureEngineer(
        use_technical_indicator=True,
        tech_indicator_list=tech_indicator_list,
        use_turbulence=True,
        user_defined_feature=False)

    processed = feature_engineering.preprocess_data(data_frame)

    print(processed.sort_values(['date', 'tic'], ignore_index=True).head(10))

    training_set = data_split(processed, config.START_DATE,
                              config.START_TRADE_DATE)
    testing_set = data_split(processed, config.START_TRADE_DATE,
                             config.END_DATE)
    print("Size of training set: ", len(training_set))
    print("Size of testing set: ", len(testing_set))

    print("Training set format:\n\n", training_set.head())

    print("Testing set format: \n\n", testing_set.head())

    stock_dimension = len(training_set.tic.unique())
    state_space = 1 + 2 * stock_dimension + len(
        tech_indicator_list) * stock_dimension
    print(f"Stock Dimension: {stock_dimension}, State Space: {state_space}")

    ##
    ## Save data to file, both training and trading
    ##
    if os.path.exists("./" + config.DATA_SAVE_DIR + "/training.txt"):
        os.remove("./" + config.DATA_SAVE_DIR + "/training.txt")
        print("The training data file deleted")
    else:
        print("The training data file does not exist")

    if os.path.exists("./" + config.DATA_SAVE_DIR + "/testing.txt"):
        os.remove("./" + config.DATA_SAVE_DIR + "/testing.txt")
        print("The testing data file deleted")
    else:
        print("The testing data file does not exist")

    path_training = "./" + config.DATA_SAVE_DIR + "/training.txt"
    path_testing = "./" + config.DATA_SAVE_DIR + "/testing.txt"

    with open(path_training, "wb") as f:
        pickle.dump(training_set, f, pickle.HIGHEST_PROTOCOL)

    with open(path_testing, "wb") as f:
        pickle.dump(testing_set, f, pickle.HIGHEST_PROTOCOL)

    print(
        "Successfuly completed the task of creation of test and training data files."
    )
Ejemplo n.º 24
0
def objective(trial: optuna.Trial):
    """ test """
    df = pd.read_csv(os.path.abspath('./me/datasets/data_with_techs_turb.csv'),
                     index_col=0)
    df.sort_values(['date', 'tic'], ignore_index=True)
    train_data = data_split(df, '2009-01-01', '2016-01-01')
    trade_data = data_split(df, '2019-01-01', '2020-12-01')

    # params
    stock_dimension = len(train_data.tic.unique())
    state_space = 1 + 2 * stock_dimension + len(
        config.TECHNICAL_INDICATORS_LIST) * stock_dimension

    hmax = 100
    starting_capital = 1000000
    transaction_cost_pct = 0.001
    reward_scaling = 1e-4
    technical_indicator_list = config.TECHNICAL_INDICATORS_LIST

    # test
    eval_env = gym.make(ENV_ID,
                        df=train_data,
                        stock_dim=stock_dimension,
                        hmax=hmax,
                        initial_amount=starting_capital,
                        transaction_cost_pct=transaction_cost_pct,
                        reward_scaling=reward_scaling,
                        state_space=state_space,
                        action_space=stock_dimension,
                        tech_indicator_list=technical_indicator_list,
                        turbulence_threshold=250,
                        day=0)

    #kwargs = DEFAULT_HYPERPARAMS.copy()
    # Sample hyperparameters
    kwargs = sample_a2c_params(trial)
    # Create the RL model
    model = A2C('MlpPolicy', eval_env, **kwargs)
    # Create env used for evaluation

    # Create the callback that will periodically evaluate
    # and report the performance
    eval_callback = TrialEvalCallback(eval_env,
                                      trial,
                                      n_eval_episodes=N_EVAL_EPISODES,
                                      eval_freq=EVAL_FREQ,
                                      deterministic=True)

    nan_encountered = False
    try:
        model.learn(N_TIMESTEPS, callback=eval_callback)
    except AssertionError as e:
        # Sometimes, random hyperparams can generate NaN
        print(e)
        nan_encountered = True
    finally:
        # Free memory
        model.env.close()
        eval_env.close()

    # Tell the optimizer that the trial failed
    if nan_encountered:
        return float("nan")

    if eval_callback.is_pruned:
        raise optuna.exceptions.TrialPruned()

    return eval_callback.last_mean_reward
Ejemplo n.º 25
0
import os
import pandas as pd
from finrl.preprocessing.data import data_split
from finrl.env.env_stocktrading import StockTradingEnv
from finrl.model.models import DRLAgent
from finrl.trade.backtest import backtest_stats

os.environ["CUDA_VISIBLE_DEVICES"] = "2"
stock_id = "600570"
stock_handle_dir = "dataset/stock/stock_handle/"

df = pd.read_csv(stock_handle_dir + stock_id + ".csv")
df["tic"] = stock_id
# print(df)

train = data_split(df, "2004-02-13", "2018-09-11")
trade = data_split(df, "2018-09-11", "2021-05-25")

technical_indications_list = [
    "open", "close", "high", "low", "volume", "ma5", "ma10", "ema12", "ema26",
    "MACD", "DEA"
]
stock_dimension = len(train.tic.unique())
state_space = 1 + 2 * stock_dimension + len(
    technical_indications_list) * stock_dimension

env_kwargs = {
    "hmax": 100,
    "initial_amount": 1000000,
    "buy_cost_pct": 0.001,
    "sell_cost_pct": 0.001,