Esempi in Python per StockTradingEnv.get_sb_env, esempi in Python per finrl.env.env_stocktrading.StockTradingEnv.get_sb_env

Esempio n. 1

0

Mostra file

def new_test():

    processed = pd.read_csv(
        os.path.abspath('./me/datasets/new_data_with_techs_turb.csv'),
        index_col=0)

    train = data_split(processed, '2009-01-01', '2018-01-01')
    trade = data_split(processed, '2018-01-01', '2021-01-01')

    stock_dimension = len(train.tic.unique())
    state_space = 1 + 2 * stock_dimension + len(
        config.TECHNICAL_INDICATORS_LIST) * stock_dimension
    print(f"Stock Dimension: {stock_dimension}, State Space: {state_space}")

    env_kwargs = {
        "hmax": 100,
        "initial_amount": 1000000,
        "transaction_cost_pct": 0.001,
        "state_space": state_space,
        "stock_dim": stock_dimension,
        "tech_indicator_list": config.TECHNICAL_INDICATORS_LIST,
        "action_space": stock_dimension,
        "reward_scaling": 1e-4
    }

    e_train_gym = StockTradingEnv(df=train, **env_kwargs)
    env_train, _ = e_train_gym.get_sb_env()

    log_dir = "me/tmp/"
    os.makedirs(log_dir, exist_ok=True)

    env_train.envs[0] = Monitor(env_train.envs[0], log_dir)

    agent = DRLAgent(env=env_train)
    model_a2c = agent.get_model("a2c", verbose=0)

    trained_a2c = agent.train_model(model=model_a2c,
                                    tb_log_name='a2c',
                                    total_timesteps=100000)

    data_turbulence = processed[(processed.date < '2018-01-01')
                                & (processed.date >= '2009-01-01')]
    insample_turbulence = data_turbulence.drop_duplicates(subset=['date'])
    turbulence_threshold = np.quantile(insample_turbulence.turbulence.values,
                                       1)

    e_trade_gym = StockTradingEnv(df=trade,
                                  turbulence_threshold=380,
                                  **env_kwargs)
    env_trade, obs_trade = e_trade_gym.get_sb_env()

    print("BEGIN PREDICTION")
    df_account_value, df_actions = DRLAgent.DRL_prediction(model=trained_a2c,
                                                           test_data=trade,
                                                           test_env=env_trade,
                                                           test_obs=obs_trade)

    print(df_account_value)

    print("END PREDICTION")

Esempio n. 2

0

Mostra file

def train_one():
    env_kwargs, processed = prepare_data()

    # Training & Trading data split
    train = data_split(processed, config.START_DATE, config.START_TRADE_DATE)
    trade = data_split(processed, config.START_TRADE_DATE, config.END_DATE)

    e_train_gym = StockTradingEnv(df=train, **env_kwargs)

    e_trade_gym = StockTradingEnv(df=trade,
                                  turbulence_threshold=250,
                                  **env_kwargs)
    env_train, _ = e_train_gym.get_sb_env()

    agent = DRLAgent(env=env_train)

    print("==============Model Training===========")
    model = agent.get_model(config.CURRENT_MODEL)
    model = agent.train_model(model=model, total_timesteps=80000)

    path = f"{config.TRAINED_MODEL_DIR}/model"
    model.save(path)
    model = model.load(path)

    print("==============Start Trading===========")
    df_account_value, df_actions = DRLAgent.DRL_prediction(
        model=model, environment=e_trade_gym)
    log_account(df_account_value, df_actions)

Esempio n. 3

0

Mostra file

def main():
    start_date = '2020-01-01'
    trade_start_date = '2020-12-01'
    end_date = '2021-01-01'
    ticker_list = stock_tickers
    numerical_df = YahooDownloader(start_date=start_date,
                                   end_date=end_date,
                                   ticker_list=ticker_list).fetch_data()
    sentiment_df = generate_sentiment_scores(start_date, end_date)
    initial_data = get_initial_data(numerical_df, sentiment_df)
    train_data = data_split(initial_data, start_date, trade_start_date)
    trade_data = data_split(initial_data, trade_start_date, end_date)
    indicator_list = config.TECHNICAL_INDICATORS_LIST + ['sentiment']
    stock_dimension = len(trade_data.tic.unique())
    state_space = 1 + 2 * stock_dimension + len(
        indicator_list) * stock_dimension
    env_kwargs = {
        "hmax": 100,
        "initial_amount": 1000000,
        "buy_cost_pct": 0.001,
        "sell_cost_pct": 0.001,
        "state_space": state_space,
        "stock_dim": stock_dimension,
        "tech_indicator_list": indicator_list,
        "action_space": stock_dimension,
        "reward_scaling": 1e-4,
        "print_verbosity": 5
    }
    e_train_gym = StockTradingEnv(df=train_data, **env_kwargs)
    env_train, _ = e_train_gym.get_sb_env()
    # print(train_data.index)
    # print(trade_data.index)
    # print(trade_data.loc[0])
    e_trade_gym = OnlineStockTradingEnv(trade_data.loc[0], **env_kwargs)
    training_agent = DRLAgent(env=env_train)
    model_a2c = training_agent.get_model("a2c")
    # print(train_data.index)
    # print(trade_data.index)
    #trained_a2c = agent.train_model(model=model_a2c, tb_log_name='a2c',total_timesteps=10000)
    feature_engineer = FeatureEngineer()
    online_stock_pred = OnlineStockPrediction(e_trade_gym, model_a2c)

    for i in range(1, trade_data.index.unique().max()):
        print(trade_data.loc[i])
        online_stock_pred.add_data(trade_data.loc[i])
        action, states, next_obs, rewards = online_stock_pred.predict()
        print("Action:", action)
        print("States: ", states)
        print("Next observation: ", next_obs)
        print("Rewards: ", rewards)

Esempio n. 4

0

Mostra file

 def DRL_prediction(model: PPO, environment: StockTradingEnv) -> object:
     test_env, test_obs = environment.get_sb_env()
     """make a prediction"""
     account_memory = []
     actions_memory = []
     test_env.reset()
     for i in range(len(environment.df.index.unique())):
         action, _ = model.predict(test_obs, deterministic=True)
         test_obs, rewards, dones, info = test_env.step(action)
         if i == (len(environment.df.index.unique()) - 2):
             account_memory = test_env.env_method(
                 method_name="save_asset_memory")
             actions_memory = test_env.env_method(
                 method_name="save_action_memory")
         if dones[0]:
             print("hit end!")
             break
     return account_memory[0], actions_memory[0]

Esempio n. 5

0

Mostra file

File: FinRL_single_stock_trading_v3.py Progetto: Erfanit64/FinRL-Library-master

    "sell_cost_pct": 0.001, 
    "state_space": state_space, 
    "stock_dim": stock_dimension, 
    "tech_indicator_list": config.TECHNICAL_INDICATORS_LIST, 
    "action_space": stock_dimension, 
    "reward_scaling": 1e-4
    
}

e_train_gym = StockTradingEnv(df = train, **env_kwargs)


# In[25]:


env_train, _ = e_train_gym.get_sb_env()
print(type(env_train))


# <a id='5'></a>
# # Part 6: Implement DRL Algorithms
# * The implementation of the DRL algorithms are based on **OpenAI Baselines** and **Stable Baselines**. Stable Baselines is a fork of OpenAI Baselines, with a major structural refactoring, and code cleanups.
# * FinRL library includes fine-tuned standard DRL algorithms, such as DQN, DDPG,
# Multi-Agent DDPG, PPO, SAC, A2C and TD3. We also allow users to
# design their own DRL algorithms by adapting these DRL algorithms.

# In[26]:


agent = DRLAgent(env = env_train)

Esempio n. 6

0

Mostra file

def main():

    # Basic setup
    # Disable warnings
    warnings.filterwarnings('ignore')

    tech_indicator_list = config.TECHNICAL_INDICATORS_LIST

    # add following folders
    if not os.path.exists("./" + config.TRAINED_MODEL_DIR):
        os.makedirs("./" + config.TRAINED_MODEL_DIR)
    if not os.path.exists("./" + config.TENSORBOARD_LOG_DIR):
        os.makedirs("./" + config.TENSORBOARD_LOG_DIR)

    print()
    print("==============Load Training Data===========")
    path_training = "./" + config.DATA_SAVE_DIR + "/training.txt"

    with open(path_training, "rb") as f:
        dump = pickle.load(f)

    stock_dimension = len(dump.tic.unique())
    state_space = 1 + 2 * stock_dimension + len(
        tech_indicator_list) * stock_dimension
    print(f"Stock Dimension: {stock_dimension}, State Space: {state_space}")

    env_kwargs = {
        "hmax": 100,
        "initial_amount": 1000000,
        "buy_cost_pct": 0.001,
        "sell_cost_pct": 0.001,
        "state_space": state_space,
        "stock_dim": stock_dimension,
        "tech_indicator_list": tech_indicator_list,
        "action_space": stock_dimension,
        "reward_scaling": 1e-4
    }

    e_training_gym = StockTradingEnv(df=dump, **env_kwargs)

    print("Get Environment for Training")
    env_training, _ = e_training_gym.get_sb_env()
    print(type(env_training))

    # Implement DRL Algorithms
    #
    # The implementation of the DRL algorithms are based on OpenAI Baselines and Stable
    # Baselines.Stable Baselines is a fork of OpenAI Baselines,
    # with a major structural refactoring, and code cleanups.
    # FinRL library includes fine - tuned standard DRL algorithms, such as DQN, DDPG, Multi - Agent
    # DDPG, PPO, SAC, A2C and TD3. We also allow users to design their own
    # DRL algorithms by adapting these DRL algorithms.Instead of installing
    # FinRL lib I have included the source code and created my own version.

    agent = Agent(env=env_training)

    print("======================================================")
    print("Please select which training you want me to perform.")
    print("1. A2C - Advalntage Actor-Critic algorithm")
    print("2. DDPG - Deep Deterministic Policy Gradient algorithm")
    print("3. PPO - Proximal Policy Optimization algorithm")
    print("4. TD3 - Twin Delayed Deep Deterministic Policy Gradient algorithm")
    print("5. SAC - Soft Actor-Critic algorithm")
    print("6. All Algorithms")
    print("7. Exit")
    print("-------------------------------------------------------")
    selection = int(input("Select what you want me to do: "))

    if selection == 1:
        train_a2c(agent)

    elif selection == 2:
        train_ddpg(agent)

    elif selection == 3:
        train_ppo(agent)

    elif selection == 4:
        train_td3(agent)

    elif selection == 5:
        train_sac(agent)

    elif selection == 6:
        train_a2c(agent)
        train_ddpg(agent)
        train_ppo(agent)
        train_td3(agent)
        train_sac(agent)

    elif selection == 7:
        print("exit")
    else:
        print("Invalid option selected!")

Esempio n. 7

0

Mostra file

File: RL - Multiple_Stock_Trading.py Progetto: Manudecara/Quant-Finance-with-R

print(f"Stock Dimension: {stock_dimension}, State Space: {state_space}")
'''Set Env Parameters'''
env_kwargs = {
    "hmax": 100,
    "initial_amount": 1000000,
    "buy_cost_pct": 0.001,
    "sell_cost_pct": 0.001,
    "state_space": state_space,
    "stock_dim": stock_dimension,
    "tech_indicator_list": config.TECHNICAL_INDICATORS_LIST,
    "action_space": stock_dimension,
    "reward_scaling": 1e-4
}
'''Create Train Env'''
e_train_gym = StockTradingEnv(df=train, **env_kwargs)
env_train, _ = e_train_gym.get_sb_env()

############# BUILD AGENT #############

agent = DRLAgent(env=env_train)
'''Set Agent Parameters'''
A2C_params = {
    "n_steps": 2048,
    "ent_coef": 0.01,
    "learning_rate": 0.00025,
}

model_a2c = agent.get_model("a2c", model_kwargs=A2C_params)
'''Train Agent'''
trained_a2c = agent.train_model(model=model_a2c,
                                tb_log_name='a2c',

Esempio n. 8

0

Mostra file

def train_one():
    """
    train an agent
    """
    print("==============Start Fetching Data===========")
    df = YahooDownloader(
        start_date=config.START_DATE,
        end_date=config.END_DATE,
        ticker_list=config.DOW_30_TICKER,
    ).fetch_data()
    print("==============Start Feature Engineering===========")
    fe = FeatureEngineer(
        use_technical_indicator=True,
        tech_indicator_list=config.TECHNICAL_INDICATORS_LIST,
        use_turbulence=True,
        user_defined_feature=False,
    )

    processed = fe.preprocess_data(df)

    # Training & Trading data split
    train = data_split(processed, config.START_DATE, config.START_TRADE_DATE)
    trade = data_split(processed, config.START_TRADE_DATE, config.END_DATE)

    # calculate state action space
    stock_dimension = len(train.tic.unique())
    state_space = (1 + 2 * stock_dimension +
                   len(config.TECHNICAL_INDICATORS_LIST) * stock_dimension)

    env_kwargs = {
        "hmax": 100,
        "initial_amount": 1000000,
        "buy_cost_pct": 0.001,
        "sell_cost_pct": 0.001,
        "state_space": state_space,
        "stock_dim": stock_dimension,
        "tech_indicator_list": config.TECHNICAL_INDICATORS_LIST,
        "action_space": stock_dimension,
        "reward_scaling": 1e-4
    }

    e_train_gym = StockTradingEnv(df=train, **env_kwargs)

    e_trade_gym = StockTradingEnv(df=trade,
                                  turbulence_threshold=250,
                                  **env_kwargs)
    env_train, _ = e_train_gym.get_sb_env()
    env_trade, obs_trade = e_trade_gym.get_sb_env()

    agent = DRLAgent(env=env_train)

    print("==============Model Training===========")
    now = datetime.datetime.now().strftime("%Y%m%d-%Hh%M")

    model_sac = agent.get_model("sac")
    trained_sac = agent.train_model(model=model_sac,
                                    tb_log_name="sac",
                                    total_timesteps=80000)

    print("==============Start Trading===========")
    df_account_value, df_actions = DRLAgent.DRL_prediction(model=trained_sac,
                                                           test_data=trade,
                                                           test_env=env_trade,
                                                           test_obs=obs_trade)
    df_account_value.to_csv("./" + config.RESULTS_DIR + "/df_account_value_" +
                            now + ".csv")
    df_actions.to_csv("./" + config.RESULTS_DIR + "/df_actions_" + now +
                      ".csv")

    print("==============Get Backtest Results===========")
    perf_stats_all = BackTestStats(df_account_value)
    perf_stats_all = pd.DataFrame(perf_stats_all)
    perf_stats_all.to_csv("./" + config.RESULTS_DIR + "/perf_stats_all_" +
                          now + ".csv")

Esempio n. 9

0

Mostra file

File: main.py Progetto: jkeithry/RL_Stock_Trading

def train_one():
    """
    train an agent
    """
    print("==============Start Fetching Data===========")
    df = YahooDownloader(
        start_date=config.START_DATE,
        end_date=config.END_DATE,
        ticker_list=['FXAIX'],
    ).fetch_data()
    print("==============Start Feature Engineering===========")
    fe = FeatureEngineer(
        use_technical_indicator=True,
        tech_indicator_list=config.TECHNICAL_INDICATORS_LIST,
        use_turbulence=True,
        user_defined_feature=False,
    )

    processed = fe.preprocess_data(df)

    # Training & Trading data split
    train = data_split(processed, config.START_DATE, config.START_TRADE_DATE)
    trade = data_split(processed, config.START_TRADE_DATE, config.END_DATE)

    # calculate state action space
    stock_dimension = len(train.tic.unique())
    state_space = (1 + 2 * stock_dimension +
                   len(config.TECHNICAL_INDICATORS_LIST) * stock_dimension)
    env_kwargs = {
        "hmax": 100,
        "initial_amount": 1000000,
        "buy_cost_pct": 0.001,
        "sell_cost_pct": 0.001,
        "state_space": state_space,
        "stock_dim": stock_dimension,
        "tech_indicator_list": config.TECHNICAL_INDICATORS_LIST,
        "action_space": stock_dimension,
        "reward_scaling": 1e-4
    }
    e_train_gym = StockTradingEnv(df=train, **env_kwargs)
    e_trade_gym = StockTradingEnv(df=trade,
                                  turbulence_threshold=250,
                                  **env_kwargs)
    env_train, _ = e_train_gym.get_sb_env()
    env_trade, obs_trade = e_trade_gym.get_sb_env()

    agent = DRLAgent(env=env_train)

    print("==============Model Training===========")
    now = datetime.datetime.now().strftime("%Y%m%d-%Hh%M")
    user_input = input('train model? 1 train 0 don\'t train')
    if user_input == 1:
        model_sac = agent.get_model("sac")
        trained_sac = agent.train_model(model=model_sac,
                                        tb_log_name="sac",
                                        total_timesteps=8000)
        trained_sac.save("../models/sac_8k" + df.tic[0] + "_frl")
    else:
        trained_sac = SAC.load('../models/sac_80k_msft_working')
    print("==============Start Trading===========")
    df_account_value, df_actions = DRLAgent.DRL_prediction(
        trained_sac, e_trade_gym)
    df_account_value.to_csv("../" + config.RESULTS_DIR +
                            "/SAC_df_account_value_" + df.tic[0] + "_" + now +
                            ".csv")
    df_actions.to_csv("../" + config.RESULTS_DIR + "/SAC_df_actions_" +
                      df.tic[0] + "_" + now + ".csv")

    # print("==============Get Backtest Results===========")
    perf_stats_all = backtest_stats(df_account_value)
    perf_stats_all = pd.DataFrame(perf_stats_all)
    perf_stats_all.to_csv("../" + config.RESULTS_DIR + "/SAC_perf_stats_all_" +
                          df.tic[0] + "_" + now + ".csv")

    #plot acc value
    actions = df_actions['actions']
    x = np.arange(0, df_account_value['account_value'].shape[0])
    y = df_account_value['account_value']

    points = np.array([x, y]).T.reshape(-1, 1, 2)
    segments = np.concatenate([points[:-1], points[1:]], axis=1)

    fig, axs = plt.subplots(2, 1, sharex=True, sharey=False)

    # plt.plot(x, y)

    # Use a boundary norm instead
    cmap = ListedColormap(['r', 'g', 'b'])
    norm = BoundaryNorm([-100, -0.1, 0.1, 100], cmap.N)
    lc = LineCollection(segments, cmap=cmap, norm=norm)
    lc.set_array(actions)
    lc.set_linewidth(2)
    line = axs[0].add_collection(lc)
    # fig.colorbar(line, ax=axs)

    axs[1].set_xlabel('Trading Day (' + 'From ' + config.START_TRADE_DATE +
                      " to " + config.END_DATE + ')')
    axs[0].set_ylabel('Account Value (10000 of USD)')
    axs[0].set_title("Trading Test on " + df.tic[0])

    axs[0].set_xlim(x.min(), x.max())
    axs[0].set_ylim(y.min(), y.max())

    custom_lines = [
        Line2D([0], [0], color=cmap(0.), lw=4),
        Line2D([0], [0], color=cmap(.5), lw=4),
        Line2D([0], [0], color=cmap(1.), lw=4)
    ]

    # lines = ax.plot(data)
    axs[0].legend(custom_lines, ['Sell', 'Hold', 'Buy'])

    #plot stock value
    tx = np.arange(0, df_account_value['account_value'].shape[0])
    ty = trade['close']
    plt.ylabel('Price (USD)')
    plt.title(df.tic[0] + " Closing Price")
    plt.plot(tx, ty)

    plt.savefig("../" + config.RESULTS_DIR + "/plots/"
                "SAC_plot_" + df.tic[0] + "_" + now + ".png")

Esempio n. 10

0

Mostra file

File: training.py Progetto: yutiansut/FinRL-Library

def train_one():
    """
    train an agent
    """
    print("==============Start Fetching Data===========")
    df = YahooDownloader(
        start_date=config.START_DATE,
        end_date=config.END_DATE,
        ticker_list=config.DOW_30_TICKER,
    ).fetch_data()
    print("==============Start Feature Engineering===========")
    fe = FeatureEngineer(
        use_technical_indicator=True,
        tech_indicator_list=config.TECHNICAL_INDICATORS_LIST,
        use_turbulence=True,
        user_defined_feature=False,
    )

    processed = fe.preprocess_data(df)

    list_ticker = processed["tic"].unique().tolist()
    list_date = list(
        pd.date_range(processed['date'].min(),
                      processed['date'].max()).astype(str))
    combination = list(itertools.product(list_date, list_ticker))

    processed_full = pd.DataFrame(combination,
                                  columns=["date",
                                           "tic"]).merge(processed,
                                                         on=["date", "tic"],
                                                         how="left")
    processed_full = processed_full[processed_full['date'].isin(
        processed['date'])]
    processed_full = processed_full.sort_values(['date', 'tic'])

    processed_full = processed_full.fillna(0)

    # Training & Trading data split
    train = data_split(processed_full, config.START_DATE,
                       config.START_TRADE_DATE)
    trade = data_split(processed_full, config.START_TRADE_DATE,
                       config.END_DATE)

    # calculate state action space
    stock_dimension = len(train.tic.unique())
    state_space = (1 + 2 * stock_dimension +
                   len(config.TECHNICAL_INDICATORS_LIST) * stock_dimension)

    env_kwargs = {
        "hmax": 100,
        "initial_amount": 1000000,
        "buy_cost_pct": 0.001,
        "sell_cost_pct": 0.001,
        "state_space": state_space,
        "stock_dim": stock_dimension,
        "tech_indicator_list": config.TECHNICAL_INDICATORS_LIST,
        "action_space": stock_dimension,
        "reward_scaling": 1e-4
    }

    e_train_gym = StockTradingEnv(df=train, **env_kwargs)
    env_train, _ = e_train_gym.get_sb_env()

    agent = DRLAgent(env=env_train)

    print("==============Model Training===========")
    now = datetime.datetime.now().strftime("%Y%m%d-%Hh%M")

    model_sac = agent.get_model("sac")
    trained_sac = agent.train_model(model=model_sac,
                                    tb_log_name="sac",
                                    total_timesteps=80000)

    print("==============Start Trading===========")
    e_trade_gym = StockTradingEnv(df=trade,
                                  turbulence_threshold=250,
                                  **env_kwargs)

    df_account_value, df_actions = DRLAgent.DRL_prediction(
        model=trained_sac, environment=e_trade_gym)
    df_account_value.to_csv("./" + config.RESULTS_DIR + "/df_account_value_" +
                            now + ".csv")
    df_actions.to_csv("./" + config.RESULTS_DIR + "/df_actions_" + now +
                      ".csv")

    print("==============Get Backtest Results===========")
    perf_stats_all = backtest_stats(df_account_value)
    perf_stats_all = pd.DataFrame(perf_stats_all)
    perf_stats_all.to_csv("./" + config.RESULTS_DIR + "/perf_stats_all_" +
                          now + ".csv")

Esempio n. 11

0

Mostra file

File: training.py Progetto: barneyjackson/FinRL

def train_one(fetch=False):
    """
    train an agent
    """
    if fetch:
        df = fetch_and_store()
    else:
        df = load()

    counts = df[['date', 'tic']].groupby(['date']).count().tic
    assert counts.min() == counts.max()

    print("==============Start Feature Engineering===========")
    fe = FeatureEngineer(
        use_technical_indicator=True,
        tech_indicator_list=config.TECHNICAL_INDICATORS_LIST,
        use_turbulence=True,
        # use_turbulence=False,
        user_defined_feature=False,
    )

    processed = fe.preprocess_data(df)

    # Training & Trading data split
    start_date, trade_date, end_date = calculate_split(df,
                                                       start=config.START_DATE)
    print(start_date, trade_date, end_date)
    train = data_split(processed, start_date, trade_date)
    trade = data_split(processed, trade_date, end_date)

    print(
        f'\n******\nRunning from {start_date} to {end_date} for:\n{", ".join(config.CRYPTO_TICKER)}\n******\n'
    )

    # calculate state action space
    stock_dimension = len(train.tic.unique())
    state_space = (1 + (2 * stock_dimension) +
                   (len(config.TECHNICAL_INDICATORS_LIST) * stock_dimension))

    env_kwargs = {
        "hmax": 100,
        "initial_amount": 100000,
        "buy_cost_pct": 0.0026,
        "sell_cost_pct": 0.0026,
        "state_space": state_space,
        "stock_dim": stock_dimension,
        "tech_indicator_list": config.TECHNICAL_INDICATORS_LIST,
        "action_space": stock_dimension,
        "reward_scaling": 1e-4
    }

    e_train_gym = StockTradingEnv(df=train, **env_kwargs)

    e_trade_gym = StockTradingEnv(df=trade,
                                  turbulence_threshold=250,
                                  make_plots=True,
                                  **env_kwargs)

    env_train, _ = e_train_gym.get_sb_env()
    env_trade, obs_trade = e_trade_gym.get_sb_env()

    agent = DRLAgent(env=env_train)

    print("==============Model Training===========")
    now = datetime.datetime.now().strftime(config.DATETIME_FMT)

    model_sac = agent.get_model("sac")
    trained_sac = agent.train_model(
        model=model_sac,
        tb_log_name="sac",
        # total_timesteps=100
        total_timesteps=80000)

    print("==============Start Trading===========")
    df_account_value, df_actions = DRLAgent.DRL_prediction(
        # model=trained_sac, test_data=trade, test_env=env_trade, test_obs=obs_trade
        trained_sac,
        e_trade_gym)
    df_account_value.to_csv(
        f"./{config.RESULTS_DIR}/df_account_value_{now}.csv")
    df_actions.to_csv(f"./{config.RESULTS_DIR}/df_actions_{now}.csv")

    df_txns = pd.DataFrame(e_trade_gym.transactions,
                           columns=['date', 'amount', 'price', 'symbol'])
    df_txns = df_txns.set_index(pd.DatetimeIndex(df_txns['date'], tz=pytz.utc))
    df_txns.to_csv(f'./{config.RESULTS_DIR}/df_txns_{now}.csv')

    df_positions = pd.DataFrame(e_trade_gym.positions,
                                columns=['date', 'cash'] +
                                config.CRYPTO_TICKER)
    df_positions = df_positions.set_index(
        pd.DatetimeIndex(df_positions['date'],
                         tz=pytz.utc)).drop(columns=['date'])
    df_positions['cash'] = df_positions.astype(
        {col: np.float64
         for col in df_positions.columns})
    df_positions.to_csv(f'./{config.RESULTS_DIR}/df_positions_{now}.csv')

    print("==============Get Backtest Results===========")
    perf_stats_all = backtest_stats(df_account_value,
                                    transactions=df_txns,
                                    positions=df_positions)
    perf_stats_all = pd.DataFrame(perf_stats_all)
    perf_stats_all.to_csv(f"./{config.RESULTS_DIR}/perf_stats_all_{now}.csv")

    backtest_plot(df_account_value,
                  baseline_start=trade_date,
                  baseline_end=end_date,
                  positions=df_positions,
                  transactions=df_txns)

Esempio n. 12

0

Mostra file

File: carracing.py Progetto: Adwaver4157/WorldModel_for_FinRL

def generate_data(rollouts, data_dir, noise_type): # pylint: disable=R0914
    """ Generates data """
    assert exists(data_dir), "The data directory does not exist..."


    df = YahooDownloader(start_date = '2009-01-01',
                        end_date = '2021-01-01',
                       ticker_list = ['AAPL']).fetch_data()

    df.sort_values(['date','tic'],ignore_index=True)

    fe = FeatureEngineer(
                        use_technical_indicator=True,
                        tech_indicator_list = config.TECHNICAL_INDICATORS_LIST,
                        use_turbulence=True,
                        user_defined_feature = False)

    processed = fe.preprocess_data(df)

    
    list_ticker = processed["tic"].unique().tolist()
    list_date = list(pd.date_range(processed['date'].min(),processed['date'].max()).astype(str))
    combination = list(itertools.product(list_date,list_ticker))

    processed_full = pd.DataFrame(combination,columns=["date","tic"]).merge(processed,on=["date","tic"],how="left")
    processed_full = processed_full[processed_full['date'].isin(processed['date'])]
    processed_full = processed_full.sort_values(['date','tic'])

    processed_full = processed_full.fillna(0)


    processed_full.sort_values(['date','tic'],ignore_index=True)

    train = data_split(processed_full, '2009-01-01','2019-01-01')
    trade = data_split(processed_full, '2019-01-01','2021-01-01')
    stock_dimension = len(train.tic.unique())
    state_space = 1 + 2*stock_dimension + len(config.TECHNICAL_INDICATORS_LIST)*stock_dimension
    env_kwargs = {
                "hmax": 100, 
                    "initial_amount": 1000000, 
#                         "buy_cost_pct": 0.001i,
#                             "sell_cost_pct": 0.001,
                             "transaction_cost_pct": 0.001, 
                                "state_space": state_space, 
                                    "stock_dim": stock_dimension, 
                                        "tech_indicator_list": config.TECHNICAL_INDICATORS_LIST, 
                                            "action_space": stock_dimension, 
                                                "reward_scaling": 1e-4
                                                }

    e_train_gym = StockTradingEnv(df = train, **env_kwargs)
    env_train, _ = e_train_gym.get_sb_env()

    env = env_train

#     env = gym.make("CarRacing-v0")

    seq_len = 10000

    for i in range(rollouts):

        env.reset()

#         env.env.viewer.window.dispatch_events()
        if noise_type == 'white':
            a_rollout = [env.action_space.sample() for _ in range(seq_len)]
        elif noise_type == 'brown':
            a_rollout = sample_continuous_policy(env.action_space, seq_len, 1. / 50)

        s_rollout = []
        r_rollout = []
        d_rollout = []


        t = 0
        while True:
            action = a_rollout[t]
            t += 1

            s, r, done, _ = env.step(action)
#             env.env.viewer.window.dispatch_events()
            s_rollout += [s]
            r_rollout += [r]
            d_rollout += [done]
            if done:
                print("> End of rollout {}, {} frames...".format(i, len(s_rollout)))
                np.savez(join(data_dir, 'rollout_{}'.format(i)),
                         observations=np.array(s_rollout),
                         rewards=np.array(r_rollout),
                         actions=np.array(a_rollout),
                         terminals=np.array(d_rollout))
                break