Ejemplo n.º 1
0
def main():
    #env_id = "CartPole-v1"
    vix_env = trading_vix_env.trading_vix_env()
    num_cpu = 20  # Number of processes to use
    # Create the vectorized environment
    env = SubprocVecEnv([make_env(vix_env, i) for i in range(num_cpu)])

    # Create log dir
    log_dir = './ppo_data'
    os.makedirs(log_dir, exist_ok=True)
    env = VecMonitor(env, log_dir)
    callback = custom_call_back.CustomCallback(check_freq = 1000,log_dir = log_dir)

    model = PPO('MlpPolicy', env, verbose=1,n_steps=500,batch_size = 10000)
    model.learn(total_timesteps=2500000000,callback = callback)
Ejemplo n.º 2
0
def main():
    # Create log dir
    log_dir = './ddpg_data'
    os.makedirs(log_dir, exist_ok=True)

    vix_env = trading_vix_env.trading_vix_env()
    env = Monitor(vix_env, log_dir)

    # Create action noise because TD3 and DDPG use a deterministic policy
    n_actions = env.action_space.shape[-1]
    action_noise = NormalActionNoise(mean=np.zeros(n_actions), sigma=0.1 * np.ones(n_actions))
    # Create the callback: check every 20000 steps
    callback = custom_call_back.CustomCallback(check_freq = 20000,log_dir = log_dir)
    # Create RL model
    model = DDPG('MlpPolicy',env,action_noise = action_noise, verbose=2,batch_size = 10000)
    # Train the agent
    model.learn(total_timesteps=int(5e9), callback=callback)