Exemple #1
0
def setup_model(initial_data, model_type='a2c', load_path=''):
    indicator_list = config.TECHNICAL_INDICATORS_LIST + ['sentiment']
    stock_dimension = len(initial_data.tic.unique())
    state_space = 1 + 2 * stock_dimension + len(
        indicator_list) * stock_dimension
    env_kwargs = {
        "hmax": 100,
        "initial_amount": 1000000,
        "buy_cost_pct": 0.001,
        "sell_cost_pct": 0.001,
        "state_space": state_space,
        "stock_dim": stock_dimension,
        "tech_indicator_list": indicator_list,
        "action_space": stock_dimension,
        "reward_scaling": 1e-4,
        "print_verbosity": 5
    }
    cur_date = initial_data.date.unique()[-1]
    trade_data = initial_data[initial_data.date == cur_date]

    e_trade_gym = OnlineStockTradingEnv(trade_data, **env_kwargs)
    env_trade, _ = e_trade_gym.get_sb_env()
    trading_agent = DRLAgent(env=env_trade)
    model = trading_agent.get_model(model_type)
    if load_path:
        print("LOADING MODEL PARAMETERS")
        model = model.load(load_model_path)
    online_stock_pred = OnlineStockPrediction(e_trade_gym, model)
    print(online_stock_pred.predict())
    return online_stock_pred
Exemple #2
0
def train_one():
    env_kwargs, processed = prepare_data()

    # Training & Trading data split
    train = data_split(processed, config.START_DATE, config.START_TRADE_DATE)
    trade = data_split(processed, config.START_TRADE_DATE, config.END_DATE)

    e_train_gym = StockTradingEnv(df=train, **env_kwargs)

    e_trade_gym = StockTradingEnv(df=trade,
                                  turbulence_threshold=250,
                                  **env_kwargs)
    env_train, _ = e_train_gym.get_sb_env()

    agent = DRLAgent(env=env_train)

    print("==============Model Training===========")
    model = agent.get_model(config.CURRENT_MODEL)
    model = agent.train_model(model=model, total_timesteps=80000)

    path = f"{config.TRAINED_MODEL_DIR}/model"
    model.save(path)
    model = model.load(path)

    print("==============Start Trading===========")
    df_account_value, df_actions = DRLAgent.DRL_prediction(
        model=model, environment=e_trade_gym)
    log_account(df_account_value, df_actions)
Exemple #3
0
def new_test():

    processed = pd.read_csv(
        os.path.abspath('./me/datasets/new_data_with_techs_turb.csv'),
        index_col=0)

    train = data_split(processed, '2009-01-01', '2018-01-01')
    trade = data_split(processed, '2018-01-01', '2021-01-01')

    stock_dimension = len(train.tic.unique())
    state_space = 1 + 2 * stock_dimension + len(
        config.TECHNICAL_INDICATORS_LIST) * stock_dimension
    print(f"Stock Dimension: {stock_dimension}, State Space: {state_space}")

    env_kwargs = {
        "hmax": 100,
        "initial_amount": 1000000,
        "transaction_cost_pct": 0.001,
        "state_space": state_space,
        "stock_dim": stock_dimension,
        "tech_indicator_list": config.TECHNICAL_INDICATORS_LIST,
        "action_space": stock_dimension,
        "reward_scaling": 1e-4
    }

    e_train_gym = StockTradingEnv(df=train, **env_kwargs)
    env_train, _ = e_train_gym.get_sb_env()

    log_dir = "me/tmp/"
    os.makedirs(log_dir, exist_ok=True)

    env_train.envs[0] = Monitor(env_train.envs[0], log_dir)

    agent = DRLAgent(env=env_train)
    model_a2c = agent.get_model("a2c", verbose=0)

    trained_a2c = agent.train_model(model=model_a2c,
                                    tb_log_name='a2c',
                                    total_timesteps=100000)

    data_turbulence = processed[(processed.date < '2018-01-01')
                                & (processed.date >= '2009-01-01')]
    insample_turbulence = data_turbulence.drop_duplicates(subset=['date'])
    turbulence_threshold = np.quantile(insample_turbulence.turbulence.values,
                                       1)

    e_trade_gym = StockTradingEnv(df=trade,
                                  turbulence_threshold=380,
                                  **env_kwargs)
    env_trade, obs_trade = e_trade_gym.get_sb_env()

    print("BEGIN PREDICTION")
    df_account_value, df_actions = DRLAgent.DRL_prediction(model=trained_a2c,
                                                           test_data=trade,
                                                           test_env=env_trade,
                                                           test_obs=obs_trade)

    print(df_account_value)

    print("END PREDICTION")
Exemple #4
0
def main():
    start_date = '2020-01-01'
    trade_start_date = '2020-12-01'
    end_date = '2021-01-01'
    ticker_list = stock_tickers
    numerical_df = YahooDownloader(start_date=start_date,
                                   end_date=end_date,
                                   ticker_list=ticker_list).fetch_data()
    sentiment_df = generate_sentiment_scores(start_date, end_date)
    initial_data = get_initial_data(numerical_df, sentiment_df)
    train_data = data_split(initial_data, start_date, trade_start_date)
    trade_data = data_split(initial_data, trade_start_date, end_date)
    indicator_list = config.TECHNICAL_INDICATORS_LIST + ['sentiment']
    stock_dimension = len(trade_data.tic.unique())
    state_space = 1 + 2 * stock_dimension + len(
        indicator_list) * stock_dimension
    env_kwargs = {
        "hmax": 100,
        "initial_amount": 1000000,
        "buy_cost_pct": 0.001,
        "sell_cost_pct": 0.001,
        "state_space": state_space,
        "stock_dim": stock_dimension,
        "tech_indicator_list": indicator_list,
        "action_space": stock_dimension,
        "reward_scaling": 1e-4,
        "print_verbosity": 5
    }
    e_train_gym = StockTradingEnv(df=train_data, **env_kwargs)
    env_train, _ = e_train_gym.get_sb_env()
    # print(train_data.index)
    # print(trade_data.index)
    # print(trade_data.loc[0])
    e_trade_gym = OnlineStockTradingEnv(trade_data.loc[0], **env_kwargs)
    training_agent = DRLAgent(env=env_train)
    model_a2c = training_agent.get_model("a2c")
    # print(train_data.index)
    # print(trade_data.index)
    #trained_a2c = agent.train_model(model=model_a2c, tb_log_name='a2c',total_timesteps=10000)
    feature_engineer = FeatureEngineer()
    online_stock_pred = OnlineStockPrediction(e_trade_gym, model_a2c)

    for i in range(1, trade_data.index.unique().max()):
        print(trade_data.loc[i])
        online_stock_pred.add_data(trade_data.loc[i])
        action, states, next_obs, rewards = online_stock_pred.predict()
        print("Action:", action)
        print("States: ", states)
        print("Next observation: ", next_obs)
        print("Rewards: ", rewards)
Exemple #5
0
def train_one():
    """
    train an agent
    """
    print("==============Start Fetching Data===========")
    df = YahooDownloader(start_date=config.START_DATE,
                         end_date=config.END_DATE,
                         ticker_list=config.DOW_30_TICKER).fetch_data()
    print("==============Start Feature Engineering===========")
    df = FeatureEngineer(df,
                         feature_number=5,
                         use_technical_indicator=True,
                         use_turbulence=True).preprocess_data()

    train = data_split(df, config.START_DATE, config.START_TRADE_DATE)
    trade = data_split(df, config.START_TRADE_DATE, config.END_DATE)
    env_setup = EnvSetup(stock_dim=len(train.tic.unique()))
    env_train = env_setup.create_env_training(data=train,
                                              env_class=StockEnvTrain)
    agent = DRLAgent(env=env_train)
    print("==============Model Training===========")
    now = datetime.datetime.now().strftime('%Y%m%d-%Hh%M')
    a2c_params_tuning = {
        'n_steps': 5,
        'ent_coef': 0.005,
        'learning_rate': 0.0007,
        'verbose': 0,
        'timesteps': 100000
    }
    model_a2c = agent.train_A2C(model_name="A2C_{}".format(now),
                                model_params=a2c_params_tuning)

    print("==============Start Trading===========")
    env_trade, obs_trade = env_setup.create_env_trading(
        data=trade, env_class=StockEnvTrade, turbulence_threshold=250)

    df_account_value = DRLAgent.DRL_prediction(model=model_a2c,
                                               test_data=trade,
                                               test_env=env_trade,
                                               test_obs=obs_trade)
    df_account_value.to_csv("./" + config.RESULTS_DIR + "/" + now + '.csv')

    print("==============Get Backtest Results===========")
    perf_stats_all = BackTestStats(df_account_value)
    perf_stats_all = pd.DataFrame(perf_stats_all)
    perf_stats_all.to_csv("./" + config.RESULTS_DIR + "/perf_stats_all_" +
                          now + '.csv')
Exemple #6
0
def train_model(e_train_gym,
                tb_log_name,
                model_type="a2c",
                load_model_path='',
                save_model_path='',
                train_timesteps=80000):
    training_env, _ = e_train_gym.get_sb_env()
    training_agent = DRLAgent(training_env)
    model = training_agent.get_model(model_type)
    if load_model_path:
        print("LOADING MODEL PARAMETERS")
        model = model.load(load_model_path)
    print("=======TRAINING MODEL========")
    trained_model = training_agent.train_model(model,
                                               tb_log_name=tb_log_name,
                                               total_timesteps=train_timesteps)
    trained_model.save(save_model_path)
    return trained_model
Exemple #7
0
def predict():
    env_kwargs, processed = prepare_data()
    trade = data_split(processed, config.START_TRADE_DATE, config.END_DATE)

    e_trade_gym = StockTradingEnv(df=trade, **env_kwargs)

    path = config.TRAINED_MODEL_DIR + "/model"
    trained_sac = eval(config.CURRENT_MODEL.upper() + ".load(path)")
    df_account_value, df_actions = DRLAgent.DRL_prediction(
        model=trained_sac, environment=e_trade_gym)
    log_account(df_account_value, df_actions)
Exemple #8
0
def get_trade_results(env_setup, model):
    # # --------------- Trading
    env_trade, obs_trade = env_setup.create_env_trading(
        data=trading_data, env_class=StockEnvTrade, turbulence_threshold=230)

    # # --------------- Predict
    df_account_value, df_actions = DRLAgent.DRL_prediction(
        model=model,
        test_data=trading_data,
        test_env=env_trade,
        test_obs=obs_trade)

    return df_account_value, df_actions
env_kwargs = {
    "hmax": 100,
    "initial_amount": 100000,
    "transaction_cost_pct": 0.001,
    "state_space": state_space,
    "stock_dim": stock_dimension,
    "tech_indicator_list": config.TECHNICAL_INDICATORS_LIST,
    "action_space": stock_dimension,
    "reward_scaling": 1e-4
}

e_train_gym = StockTradingEnv(df=train, **env_kwargs)

env_train, _ = e_train_gym.get_sb_env()

agent = DRLAgent(env=env_train)

model_wm = agent.get_model("wm")

print('START TRAIN')
trained_wm = agent.train_model(model=model_wm,
                               tb_log_name='wm',
                               total_timesteps=30000)

trade = data_split(data_df, start='2019-01-01', end='2021-01-01')

e_trade_gym = StockTradingEnv(df=trade, **env_kwargs)
env_trade, obs_trade = e_trade_gym.get_sb_env()

df_account_value, df_actions = DRLAgent.DRL_prediction(model=trained_wm,
                                                       test_data=trade,
    "initial_amount": 1000000,
    "buy_cost_pct": 0.001,
    "sell_cost_pct": 0.001,
    "state_space": state_space,
    "stock_dim": stock_dimension,
    "tech_indicator_list": config.TECHNICAL_INDICATORS_LIST,
    "action_space": stock_dimension,
    "reward_scaling": 1e-4
}
'''Create Train Env'''
e_train_gym = StockTradingEnv(df=train, **env_kwargs)
env_train, _ = e_train_gym.get_sb_env()

############# BUILD AGENT #############

agent = DRLAgent(env=env_train)
'''Set Agent Parameters'''
A2C_params = {
    "n_steps": 2048,
    "ent_coef": 0.01,
    "learning_rate": 0.00025,
}

model_a2c = agent.get_model("a2c", model_kwargs=A2C_params)
'''Train Agent'''
trained_a2c = agent.train_model(model=model_a2c,
                                tb_log_name='a2c',
                                total_timesteps=1000)

############# BUILD TRADING ENV #############
'''Set Turbulence Threshold'''
Exemple #11
0
def test(training_data, trading_data):
    # params
    stock_dimension = len(training_data.tic.unique())
    state_space = 1 + 2 * stock_dimension + len(
        config.TECHNICAL_INDICATORS_LIST) * stock_dimension

    hmax = 100
    starting_capital = 1000000
    transaction_cost_pct = 0.001
    reward_scaling = 1e-4
    technical_indicator_list = config.TECHNICAL_INDICATORS_LIST

    env_setup = EnvSetup(stock_dim=stock_dimension,
                         state_space=state_space,
                         hmax=100,
                         initial_amount=1000000,
                         transaction_cost_pct=0.001)

    # pre-make training environment
    # env_train = StockEnvTrain(params)

    # transition to make
    # https://medium.com/@apoddar573/making-your-own-custom-environment-in-gym-c3b65ff8cdaa
    env_train = gym.make('multi-stock-train-v0',
                         df=training_data,
                         stock_dim=stock_dimension,
                         hmax=hmax,
                         initial_amount=starting_capital,
                         transaction_cost_pct=transaction_cost_pct,
                         reward_scaling=reward_scaling,
                         state_space=state_space,
                         action_space=stock_dimension,
                         tech_indicator_list=technical_indicator_list,
                         turbulence_threshold=250,
                         day=0)

    # --------------- Training

    log_dir = "me/tmp/"
    os.makedirs(log_dir, exist_ok=True)
    env_train = Monitor(env_train, log_dir)

    callback = SaveOnBestTrainingRewardCallback(check_freq=5000,
                                                log_dir=log_dir)

    agent = DRLAgent(env=env_train)

    print("==============Model Training===========")
    now = datetime.datetime.now().strftime('%Y%m%d-%Hh%M')
    # a2c_params_tuning = {'n_steps': 512,
    #                      'ent_coef': 0.005,
    #                      'learning_rate': 0.0002,
    #                      'verbose': 0,
    #                      'timesteps': 150000}
    a2c_params_tuning = {
        "n_steps": 32,
        "gamma": 0.999304473794672,
        "gae_lambda": 0.994452346235796,
        "learning_rate": 0.00010054610987642753,
        "ent_coef": 0.00215496380633495,
        "max_grad_norm": 2.217146296318495,
        'verbose': 0,
        'timesteps': 2e5,  # 2e5
        "policy_kwargs": {
            "net_arch": 'tiny',
            "activation_fn": 'tanh',
            "ortho_init": False,
        }
    }
    model_a2c = agent.train_A2C(
        model_name="A2C_full_train_tuned{}".format(now),
        model_params=a2c_params_tuning,
        save=True,
        callback=callback)
    print("============End Model Training=========")

    # model_a2c = A2C.load(os.path.abspath('./me/tmp/best_model.zip'))

    account_value, actions = get_trade_results(env_setup, model_a2c)
Exemple #12
0
def train_one():
    """
    train an agent
    """
    print("==============Start Fetching Data===========")
    df = YahooDownloader(
        start_date=config.START_DATE,
        end_date=config.END_DATE,
        ticker_list=['FXAIX'],
    ).fetch_data()
    print("==============Start Feature Engineering===========")
    fe = FeatureEngineer(
        use_technical_indicator=True,
        tech_indicator_list=config.TECHNICAL_INDICATORS_LIST,
        use_turbulence=True,
        user_defined_feature=False,
    )

    processed = fe.preprocess_data(df)

    # Training & Trading data split
    train = data_split(processed, config.START_DATE, config.START_TRADE_DATE)
    trade = data_split(processed, config.START_TRADE_DATE, config.END_DATE)

    # calculate state action space
    stock_dimension = len(train.tic.unique())
    state_space = (1 + 2 * stock_dimension +
                   len(config.TECHNICAL_INDICATORS_LIST) * stock_dimension)
    env_kwargs = {
        "hmax": 100,
        "initial_amount": 1000000,
        "buy_cost_pct": 0.001,
        "sell_cost_pct": 0.001,
        "state_space": state_space,
        "stock_dim": stock_dimension,
        "tech_indicator_list": config.TECHNICAL_INDICATORS_LIST,
        "action_space": stock_dimension,
        "reward_scaling": 1e-4
    }
    e_train_gym = StockTradingEnv(df=train, **env_kwargs)
    e_trade_gym = StockTradingEnv(df=trade,
                                  turbulence_threshold=250,
                                  **env_kwargs)
    env_train, _ = e_train_gym.get_sb_env()
    env_trade, obs_trade = e_trade_gym.get_sb_env()

    agent = DRLAgent(env=env_train)

    print("==============Model Training===========")
    now = datetime.datetime.now().strftime("%Y%m%d-%Hh%M")
    user_input = input('train model? 1 train 0 don\'t train')
    if user_input == 1:
        model_sac = agent.get_model("sac")
        trained_sac = agent.train_model(model=model_sac,
                                        tb_log_name="sac",
                                        total_timesteps=8000)
        trained_sac.save("../models/sac_8k" + df.tic[0] + "_frl")
    else:
        trained_sac = SAC.load('../models/sac_80k_msft_working')
    print("==============Start Trading===========")
    df_account_value, df_actions = DRLAgent.DRL_prediction(
        trained_sac, e_trade_gym)
    df_account_value.to_csv("../" + config.RESULTS_DIR +
                            "/SAC_df_account_value_" + df.tic[0] + "_" + now +
                            ".csv")
    df_actions.to_csv("../" + config.RESULTS_DIR + "/SAC_df_actions_" +
                      df.tic[0] + "_" + now + ".csv")

    # print("==============Get Backtest Results===========")
    perf_stats_all = backtest_stats(df_account_value)
    perf_stats_all = pd.DataFrame(perf_stats_all)
    perf_stats_all.to_csv("../" + config.RESULTS_DIR + "/SAC_perf_stats_all_" +
                          df.tic[0] + "_" + now + ".csv")

    #plot acc value
    actions = df_actions['actions']
    x = np.arange(0, df_account_value['account_value'].shape[0])
    y = df_account_value['account_value']

    points = np.array([x, y]).T.reshape(-1, 1, 2)
    segments = np.concatenate([points[:-1], points[1:]], axis=1)

    fig, axs = plt.subplots(2, 1, sharex=True, sharey=False)

    # plt.plot(x, y)

    # Use a boundary norm instead
    cmap = ListedColormap(['r', 'g', 'b'])
    norm = BoundaryNorm([-100, -0.1, 0.1, 100], cmap.N)
    lc = LineCollection(segments, cmap=cmap, norm=norm)
    lc.set_array(actions)
    lc.set_linewidth(2)
    line = axs[0].add_collection(lc)
    # fig.colorbar(line, ax=axs)

    axs[1].set_xlabel('Trading Day (' + 'From ' + config.START_TRADE_DATE +
                      " to " + config.END_DATE + ')')
    axs[0].set_ylabel('Account Value (10000 of USD)')
    axs[0].set_title("Trading Test on " + df.tic[0])

    axs[0].set_xlim(x.min(), x.max())
    axs[0].set_ylim(y.min(), y.max())

    custom_lines = [
        Line2D([0], [0], color=cmap(0.), lw=4),
        Line2D([0], [0], color=cmap(.5), lw=4),
        Line2D([0], [0], color=cmap(1.), lw=4)
    ]

    # lines = ax.plot(data)
    axs[0].legend(custom_lines, ['Sell', 'Hold', 'Buy'])

    #plot stock value
    tx = np.arange(0, df_account_value['account_value'].shape[0])
    ty = trade['close']
    plt.ylabel('Price (USD)')
    plt.title(df.tic[0] + " Closing Price")
    plt.plot(tx, ty)

    plt.savefig("../" + config.RESULTS_DIR + "/plots/"
                "SAC_plot_" + df.tic[0] + "_" + now + ".png")
Exemple #13
0
def train_one():
    """
    train an agent
    """
    print("==============Start Fetching Data===========")
    df = YahooDownloader(
        start_date=config.START_DATE,
        end_date=config.END_DATE,
        ticker_list=config.DOW_30_TICKER,
    ).fetch_data()
    print("==============Start Feature Engineering===========")
    fe = FeatureEngineer(
        use_technical_indicator=True,
        tech_indicator_list=config.TECHNICAL_INDICATORS_LIST,
        use_turbulence=True,
        user_defined_feature=False,
    )

    processed = fe.preprocess_data(df)

    list_ticker = processed["tic"].unique().tolist()
    list_date = list(
        pd.date_range(processed['date'].min(),
                      processed['date'].max()).astype(str))
    combination = list(itertools.product(list_date, list_ticker))

    processed_full = pd.DataFrame(combination,
                                  columns=["date",
                                           "tic"]).merge(processed,
                                                         on=["date", "tic"],
                                                         how="left")
    processed_full = processed_full[processed_full['date'].isin(
        processed['date'])]
    processed_full = processed_full.sort_values(['date', 'tic'])

    processed_full = processed_full.fillna(0)

    # Training & Trading data split
    train = data_split(processed_full, config.START_DATE,
                       config.START_TRADE_DATE)
    trade = data_split(processed_full, config.START_TRADE_DATE,
                       config.END_DATE)

    # calculate state action space
    stock_dimension = len(train.tic.unique())
    state_space = (1 + 2 * stock_dimension +
                   len(config.TECHNICAL_INDICATORS_LIST) * stock_dimension)

    env_kwargs = {
        "hmax": 100,
        "initial_amount": 1000000,
        "buy_cost_pct": 0.001,
        "sell_cost_pct": 0.001,
        "state_space": state_space,
        "stock_dim": stock_dimension,
        "tech_indicator_list": config.TECHNICAL_INDICATORS_LIST,
        "action_space": stock_dimension,
        "reward_scaling": 1e-4
    }

    e_train_gym = StockTradingEnv(df=train, **env_kwargs)
    env_train, _ = e_train_gym.get_sb_env()

    agent = DRLAgent(env=env_train)

    print("==============Model Training===========")
    now = datetime.datetime.now().strftime("%Y%m%d-%Hh%M")

    model_sac = agent.get_model("sac")
    trained_sac = agent.train_model(model=model_sac,
                                    tb_log_name="sac",
                                    total_timesteps=80000)

    print("==============Start Trading===========")
    e_trade_gym = StockTradingEnv(df=trade,
                                  turbulence_threshold=250,
                                  **env_kwargs)

    df_account_value, df_actions = DRLAgent.DRL_prediction(
        model=trained_sac, environment=e_trade_gym)
    df_account_value.to_csv("./" + config.RESULTS_DIR + "/df_account_value_" +
                            now + ".csv")
    df_actions.to_csv("./" + config.RESULTS_DIR + "/df_actions_" + now +
                      ".csv")

    print("==============Get Backtest Results===========")
    perf_stats_all = backtest_stats(df_account_value)
    perf_stats_all = pd.DataFrame(perf_stats_all)
    perf_stats_all.to_csv("./" + config.RESULTS_DIR + "/perf_stats_all_" +
                          now + ".csv")
Exemple #14
0
def train_one():
    """
    train an agent
    """
    print("==============Start Fetching Data===========")
    df = YahooDownloader(start_date=config.START_DATE,
                         end_date=config.END_DATE,
                         ticker_list=config.SP_500_TICKER).fetch_data()
    print("==============Start Feature Engineering===========")
    df = FeatureEngineer(df, use_technical_indicator=True,
                         use_turbulence=True).preprocess_data()

    # Training & Trade data split
    train = data_split(df, config.START_DATE, config.START_TRADE_DATE)
    trade = data_split(df, config.START_TRADE_DATE, config.END_DATE)

    # data normalization
    #feaures_list = list(train.columns)
    #feaures_list.remove('date')
    #feaures_list.remove('tic')
    #feaures_list.remove('close')
    #print(feaures_list)
    #data_normaliser = preprocessing.StandardScaler()
    #train[feaures_list] = data_normaliser.fit_transform(train[feaures_list])
    #trade[feaures_list] = data_normaliser.fit_transform(trade[feaures_list])

    # calculate state action space
    stock_dimension = len(train.tic.unique())
    state_space = 1 + 2 * stock_dimension + len(
        config.TECHNICAL_INDICATORS_LIST) * stock_dimension

    env_setup = EnvSetup(stock_dim=stock_dimension,
                         state_space=state_space,
                         hmax=100,
                         initial_amount=3000,
                         transaction_cost_pct=0.001)

    env_train = env_setup.create_env_training(data=train,
                                              env_class=StockEnvTrain)
    agent = DRLAgent(env=env_train)

    print("==============Model Training===========")
    now = datetime.datetime.now().strftime('%Y%m%d-%Hh%M')

    a2c_params_tuning = {
        'n_steps': 5,
        'ent_coef': 0.005,
        'learning_rate': 0.0007,
        'verbose': 0,
        'timesteps': 80000
    }

    model = agent.train_A2C(model_name="A2C_{}".format(now),
                            model_params=a2c_params_tuning)

    print("==============Start Trading===========")
    env_trade, obs_trade = env_setup.create_env_trading(
        data=trade, env_class=StockEnvTrade, turbulence_threshold=250)

    df_account_value, df_actions = DRLAgent.DRL_prediction(model=model,
                                                           test_data=trade,
                                                           test_env=env_trade,
                                                           test_obs=obs_trade)
    df_account_value.to_csv("./" + config.RESULTS_DIR + "/df_account_value_" +
                            now + '.csv')
    df_actions.to_csv("./" + config.RESULTS_DIR + "/df_actions_" + now +
                      '.csv')

    print("==============Get Backtest Results===========")
    perf_stats_all = BackTestStats(df_account_value)
    perf_stats_all = pd.DataFrame(perf_stats_all)
    perf_stats_all.to_csv("./" + config.RESULTS_DIR + "/perf_stats_all_" +
                          now + '.csv')
env_train, _ = e_train_gym.get_sb_env()
print(type(env_train))


# <a id='5'></a>
# # Part 6: Implement DRL Algorithms
# * The implementation of the DRL algorithms are based on **OpenAI Baselines** and **Stable Baselines**. Stable Baselines is a fork of OpenAI Baselines, with a major structural refactoring, and code cleanups.
# * FinRL library includes fine-tuned standard DRL algorithms, such as DQN, DDPG,
# Multi-Agent DDPG, PPO, SAC, A2C and TD3. We also allow users to
# design their own DRL algorithms by adapting these DRL algorithms.

# In[26]:


agent = DRLAgent(env = env_train)


# ### Model Training: 5 models, A2C DDPG, PPO, TD3, SAC
# 
# 

# ### Model 1: A2C

# In[27]:


agent = DRLAgent(env = env_train)

A2C_PARAMS = {"n_steps": 5, "ent_coef": 0.005, "learning_rate": 0.0002}
model_a2c = agent.get_model(model_name="a2c",model_kwargs = A2C_PARAMS)
Exemple #16
0
        "tech_indicator_list": features,
        "action_space": stock_dimension, 
        "reward_scaling": 1e-4,
        "model_name": model_name 
    }

    e_train_gym = StockTradingEnv(df = train, **env_kwargs)
    e_train_gym.seed(42)
    e_train_gym.action_space.seed(42)

    env_train, _ = e_train_gym.get_sb_env()
    env_train.seed(seed)
    env_train.action_space.seed(seed)
    print(type(env_train))

    agent = DRLAgent(env = env_train)
    model_ddpg = agent.get_model("ddpg", 
                                 model_kwargs={"batch_size": batch_size, 
                                                "buffer_size": 50000, 
                                                "learning_rate": lr}
                                  )
    trained_ddpg = agent.train_model(model=model_ddpg,
                                     tb_log_name='ddpg',
                                     total_timesteps=50000)

    e_trade_gym = StockTradingEnv(df = validation, **env_kwargs)
    e_trade_gym.seed(seed)
    e_trade_gym.action_space.seed(seed)
    df_account_value, df_actions = DRLAgent.DRL_prediction(
          model=trained_ddpg,
          environment = e_trade_gym
env_kwargs = {
    "hmax": 100,
    "initial_amount": 100000,
    "transaction_cost_pct": 0.001,
    "state_space": state_space,
    "stock_dim": stock_dimension,
    "tech_indicator_list": config.TECHNICAL_INDICATORS_LIST,
    "action_space": stock_dimension,
    "reward_scaling": 1e-4
}

e_train_gym = StockTradingEnv(df=train, **env_kwargs)

env_train, _ = e_train_gym.get_sb_env()

agent = DRLAgent(env=env_train)

model_wm = agent.get_model("wm")

trade = data_split(data_df, start='2019-01-01', end='2021-01-01')

e_trade_gym = StockTradingEnv(df=trade, **env_kwargs)
env_trade, obs_trade = e_trade_gym.get_sb_env()

df_account_value, df_actions = DRLAgent.DRL_prediction(model=model_wm,
                                                       test_data=trade,
                                                       test_env=env_trade,
                                                       test_obs=obs_trade)

print("==============Get Backtest Results===========")
now = datetime.datetime.now().strftime('%Y%m%d-%Hh%M')
Exemple #18
0
    env = default.create(
        portfolio=portfolio,
        action_scheme="simple",#"managed-risk", simpleBuy
        reward_scheme="simple",#"risk-adjusted",
        feed=feed,
        renderer="screen-log",  # ScreenLogger used with default settings
        window_size=20
    )

    # if agent is None:
    #     agent = DQNAgent(env)
    # else:
    #     agent = DQNAgent(env,policy_network=agent.policy_network)
    # agent.train(n_episodes=1, n_steps=720, render_interval=10)

    #if agent is None:
    agent = DRLAgent(env = env)
    PPO_PARAMS = {
        "n_steps": 1440,
        "ent_coef": 0.01,
        "learning_rate": 0.00025,
        "batch_size": 128,
    }
    model_ppo = agent.get_model("ppo",model_kwargs = PPO_PARAMS)

    trained_ppo = agent.train_model(model=model_ppo, 
                                tb_log_name='ppo',
                                total_timesteps=50000)
    
    pd.DataFrame(portfolio.performance).transpose()[['net_worth']].to_pickle('./tmp/result3_'+str(i)+'.pkl')
Exemple #19
0
    "initial_amount": 1000000,
    "buy_cost_pct": 0.001,
    "sell_cost_pct": 0.001,
    "state_space": state_space,
    "stock_dim": stock_dimension,
    "tech_indicator_list": indicators,
    "action_space": stock_dimension,
    "reward_scaling": 0.6
}

e_trade_gym = StockTradingEnv(df=df_test, **env_kwargs)
e_train_gym = StockTradingEnv(df=df_train, **env_kwargs)

env_trade, _ = e_trade_gym.get_sb_env()
env_train, _ = e_train_gym.get_sb_env()

agent = DRLAgent(env=env_train)
model_params = config.__dict__[f"{args.model.upper()}_PARAMS"]

model = agent.get_model(args.model, model_kwargs=model_params, verbose=1)

print('Training model')

trained_model = model.learn(tb_log_name='{}_{}'.format(
    modelName, datetime.datetime.now()),
                            total_timesteps=train_steps,
                            eval_env=e_trade_gym,
                            n_eval_episodes=10)

trained_model.save(os.path.join(args.modeldir, modelName))
Exemple #20
0
    print(f"Stock Dimension: {stock_dimension}, State Space: {state_space}")

    env_kwargs = {
        "hmax": 1,
        "initial_amount": 100000,
        "buy_cost_pct": 0,
        "sell_cost_pct": 0,
        "state_space": state_space,
        "stock_dim": stock_dimension,
        "tech_indicator_list": tech_indicator_list,
        "action_space": stock_dimension,
        "reward_scaling": 1e-5
    }
    e_train_gym = StockTradingEnv(df=train, **env_kwargs)
    env_train, _ = e_train_gym.get_sb_env()
    agent = DRLAgent(env=env_train)

    #A2C
    '''
    A2C_PARAMS = {"n_steps": 5, "ent_coef": 0.005, "learning_rate": 0.0002}
    model_a2c = agent.get_model(model_name="a2c",model_kwargs = A2C_PARAMS)
    trained_a2c = agent.train_model(model=model_a2c, 
                                tb_log_name='a2c',
                                total_timesteps=50000)
    '''
    #PPO
    PPO_PARAMS = {
        "n_steps": 2048,
        "ent_coef": 0.005,
        "learning_rate": 0.0001,
        "batch_size": 128,
Exemple #21
0
def train_one():
    """
    train an agent
    """
    print("==============Start Fetching Data===========")
    df = YahooDownloader(
        start_date=config.START_DATE,
        end_date=config.END_DATE,
        ticker_list=config.DOW_30_TICKER,
    ).fetch_data()
    print("==============Start Feature Engineering===========")
    fe = FeatureEngineer(
        use_technical_indicator=True,
        tech_indicator_list=config.TECHNICAL_INDICATORS_LIST,
        use_turbulence=True,
        user_defined_feature=False,
    )

    processed = fe.preprocess_data(df)

    # Training & Trading data split
    train = data_split(processed, config.START_DATE, config.START_TRADE_DATE)
    trade = data_split(processed, config.START_TRADE_DATE, config.END_DATE)

    # calculate state action space
    stock_dimension = len(train.tic.unique())
    state_space = (1 + 2 * stock_dimension +
                   len(config.TECHNICAL_INDICATORS_LIST) * stock_dimension)

    env_kwargs = {
        "hmax": 100,
        "initial_amount": 1000000,
        "buy_cost_pct": 0.001,
        "sell_cost_pct": 0.001,
        "state_space": state_space,
        "stock_dim": stock_dimension,
        "tech_indicator_list": config.TECHNICAL_INDICATORS_LIST,
        "action_space": stock_dimension,
        "reward_scaling": 1e-4
    }

    e_train_gym = StockTradingEnv(df=train, **env_kwargs)

    e_trade_gym = StockTradingEnv(df=trade,
                                  turbulence_threshold=250,
                                  **env_kwargs)
    env_train, _ = e_train_gym.get_sb_env()
    env_trade, obs_trade = e_trade_gym.get_sb_env()

    agent = DRLAgent(env=env_train)

    print("==============Model Training===========")
    now = datetime.datetime.now().strftime("%Y%m%d-%Hh%M")

    model_sac = agent.get_model("sac")
    trained_sac = agent.train_model(model=model_sac,
                                    tb_log_name="sac",
                                    total_timesteps=80000)

    print("==============Start Trading===========")
    df_account_value, df_actions = DRLAgent.DRL_prediction(model=trained_sac,
                                                           test_data=trade,
                                                           test_env=env_trade,
                                                           test_obs=obs_trade)
    df_account_value.to_csv("./" + config.RESULTS_DIR + "/df_account_value_" +
                            now + ".csv")
    df_actions.to_csv("./" + config.RESULTS_DIR + "/df_actions_" + now +
                      ".csv")

    print("==============Get Backtest Results===========")
    perf_stats_all = BackTestStats(df_account_value)
    perf_stats_all = pd.DataFrame(perf_stats_all)
    perf_stats_all.to_csv("./" + config.RESULTS_DIR + "/perf_stats_all_" +
                          now + ".csv")
Exemple #22
0
print(f"Stock Dimension: {stock_dimension}, State Space: {state_space}")

env_kwargs = {
    "hmax": 500,
    "initial_amount": 1000000,
    "buy_cost_pct": 0.001,
    "sell_cost_pct": 0.001,
    "state_space": state_space,
    "stock_dim": stock_dimension,
    "tech_indicator_list": indicators,
    "action_space": stock_dimension,
    "reward_scaling": 1e-4
}

test_gym_env = StockTradingEnv(df = df,turbulence_threshold = 329, **env_kwargs)
agent = DRLAgent(env = test_gym_env)


if model == 'ensemble':
    trained_model = EnsembleModel(test_gym_env,model_paths,'binaverage')
else:
    model_params = config.__dict__[f"{model.upper()}_PARAMS"]
    trained_model = agent.get_model(model,
                            model_kwargs = model_params,
                            verbose = 0).load(model_paths)



print('Testing...')
df_account_value, df_actions = DRLAgent.average_predict(
    model=trained_model,
Exemple #23
0
    "initial_amount": 1000000, 
    "buy_cost_pct": 0.001,
    "sell_cost_pct": 0.001,
    "state_space": state_space, 
    "stock_dim": stock_dimension, 
    "tech_indicator_list": config["TECHNICAL_INDICATORS_LIST"], 
    "action_space": stock_dimension, 
    "reward_scaling": 1e-4
    
}
e_train_gym = StockTradingEnv(df = train, **env_kwargs)

env_train, _ = e_train_gym.get_sb_env()
print(type(env_train))

agent = DRLAgent(env = env_train)

agent = DRLAgent(env = env_train)
model_a2c = agent.get_model("a2c")

trained_a2c = agent.train_model(model=model_a2c, 
                              tb_log_name='a2c',
                              total_timesteps=100000)


### Model 2: DDPG
agent = DRLAgent(env = env_train)
model_ddpg = agent.get_model("ddpg")

trained_ddpg = agent.train_model(model=model_ddpg, 
                              tb_log_name='ddpg',
Exemple #24
0
#this is our training env. It allows multiprocessing
# env_train, _ = e_train_gym.get_multiproc_env(n = n_cores)
env_train, _ = e_train_gym.get_sb_env()

#this is our observation environment. It allows full diagnostics
env_trade, _ = e_trade_gym.get_sb_env()

# # Part 6: Implement DRL Algorithms
# * The implementation of the DRL algorithms are based on **OpenAI Baselines** and **Stable Baselines**. Stable Baselines is a fork of OpenAI Baselines, with a major structural refactoring, and code cleanups.
# * FinRL library includes fine-tuned standard DRL algorithms, such as DQN, DDPG,
# Multi-Agent DDPG, PPO, SAC, A2C and TD3. We also allow users to
# design their own DRL algorithms by adapting these DRL algorithms.

# In[53]:
agent = DRLAgent(env = env_train)

# ### Model PPO
# In[54]:
# from torch.nn import Softsign, ReLU
ppo_params ={'n_steps': 256, 
             'ent_coef': 0.01, 
             'learning_rate': 0.00009, 
             'batch_size': 1024, 
            'gamma': 0.99}

policy_kwargs = {
#     "activation_fn": ReLU,
    "net_arch": [1024, 1024,1024, 1024,  1024], 
#     "squash_output": True
}
Exemple #25
0
def train_one(fetch=False):
    """
    train an agent
    """
    if fetch:
        df = fetch_and_store()
    else:
        df = load()

    counts = df[['date', 'tic']].groupby(['date']).count().tic
    assert counts.min() == counts.max()

    print("==============Start Feature Engineering===========")
    fe = FeatureEngineer(
        use_technical_indicator=True,
        tech_indicator_list=config.TECHNICAL_INDICATORS_LIST,
        use_turbulence=True,
        # use_turbulence=False,
        user_defined_feature=False,
    )

    processed = fe.preprocess_data(df)

    # Training & Trading data split
    start_date, trade_date, end_date = calculate_split(df,
                                                       start=config.START_DATE)
    print(start_date, trade_date, end_date)
    train = data_split(processed, start_date, trade_date)
    trade = data_split(processed, trade_date, end_date)

    print(
        f'\n******\nRunning from {start_date} to {end_date} for:\n{", ".join(config.CRYPTO_TICKER)}\n******\n'
    )

    # calculate state action space
    stock_dimension = len(train.tic.unique())
    state_space = (1 + (2 * stock_dimension) +
                   (len(config.TECHNICAL_INDICATORS_LIST) * stock_dimension))

    env_kwargs = {
        "hmax": 100,
        "initial_amount": 100000,
        "buy_cost_pct": 0.0026,
        "sell_cost_pct": 0.0026,
        "state_space": state_space,
        "stock_dim": stock_dimension,
        "tech_indicator_list": config.TECHNICAL_INDICATORS_LIST,
        "action_space": stock_dimension,
        "reward_scaling": 1e-4
    }

    e_train_gym = StockTradingEnv(df=train, **env_kwargs)

    e_trade_gym = StockTradingEnv(df=trade,
                                  turbulence_threshold=250,
                                  make_plots=True,
                                  **env_kwargs)

    env_train, _ = e_train_gym.get_sb_env()
    env_trade, obs_trade = e_trade_gym.get_sb_env()

    agent = DRLAgent(env=env_train)

    print("==============Model Training===========")
    now = datetime.datetime.now().strftime(config.DATETIME_FMT)

    model_sac = agent.get_model("sac")
    trained_sac = agent.train_model(
        model=model_sac,
        tb_log_name="sac",
        # total_timesteps=100
        total_timesteps=80000)

    print("==============Start Trading===========")
    df_account_value, df_actions = DRLAgent.DRL_prediction(
        # model=trained_sac, test_data=trade, test_env=env_trade, test_obs=obs_trade
        trained_sac,
        e_trade_gym)
    df_account_value.to_csv(
        f"./{config.RESULTS_DIR}/df_account_value_{now}.csv")
    df_actions.to_csv(f"./{config.RESULTS_DIR}/df_actions_{now}.csv")

    df_txns = pd.DataFrame(e_trade_gym.transactions,
                           columns=['date', 'amount', 'price', 'symbol'])
    df_txns = df_txns.set_index(pd.DatetimeIndex(df_txns['date'], tz=pytz.utc))
    df_txns.to_csv(f'./{config.RESULTS_DIR}/df_txns_{now}.csv')

    df_positions = pd.DataFrame(e_trade_gym.positions,
                                columns=['date', 'cash'] +
                                config.CRYPTO_TICKER)
    df_positions = df_positions.set_index(
        pd.DatetimeIndex(df_positions['date'],
                         tz=pytz.utc)).drop(columns=['date'])
    df_positions['cash'] = df_positions.astype(
        {col: np.float64
         for col in df_positions.columns})
    df_positions.to_csv(f'./{config.RESULTS_DIR}/df_positions_{now}.csv')

    print("==============Get Backtest Results===========")
    perf_stats_all = backtest_stats(df_account_value,
                                    transactions=df_txns,
                                    positions=df_positions)
    perf_stats_all = pd.DataFrame(perf_stats_all)
    perf_stats_all.to_csv(f"./{config.RESULTS_DIR}/perf_stats_all_{now}.csv")

    backtest_plot(df_account_value,
                  baseline_start=trade_date,
                  baseline_end=end_date,
                  positions=df_positions,
                  transactions=df_txns)