#env = CustomEnv(3, 6, "tcp://*:5556") # Stable Baselines provides you with make_vec_env() helper # which does exactly the previous steps for you: # env = make_vec_env(env_id, n_envs=num_cpu, seed=0) # Create log dir log_dir = "Logs/Custom_env/" os.makedirs(log_dir, exist_ok=True) # Create the callback: check every 1000 steps callback = SaveOnBestTrainingRewardCallback(check_freq=500, log_dir=log_dir) #env = Monitor(env, log_dir) model = ACKTR(MlpPolicy, env, verbose=2) #model.load("DQN_agent") model.learn(total_timesteps=20000, callback=callback) model.save("temp_agent") a = input("Training completed") obs = env.reset() for _ in range(1000): action, _states = model.predict(obs, deterministic=True) probs = model.action_probability(obs) obs, rewards, dones, info = env.step(action) print("Observation:", obs, rewards, probs) results_plotter.plot_results([log_dir], 1e5, results_plotter.X_TIMESTEPS, "Lane Manager") plt.show()
set_global_seeds(seed) return _init if __name__ == '__main__': env_id = "CartPole-v1" num_cpu = 4 # Number of processes to use # Create the vectorized environment #env = SubprocVecEnv([make_env(env_id, i) for i in range(num_cpu)]) #env = gym.make(env_id) env = CustomEnv(3, 6, "tcp://*:5556") # Stable Baselines provides you with make_vec_env() helper # which does exactly the previous steps for you: # env = make_vec_env(env_id, n_envs=num_cpu, seed=0) # Create log dir log_dir = "Logs/env_id/" os.makedirs(log_dir, exist_ok=True) # Create the callback: check every 1000 steps callback = SaveOnBestTrainingRewardCallback(check_freq=1000, log_dir=log_dir) # env = Monitor(env, log_dir) model = ACKTR(MlpPolicy, env, verbose=2) model.load("RL_agent") while True: user_in = input("Enter States: ").split(',') obs = [int(i) for i in user_in] print(model.action_probability(obs)) action = model.predict(obs, deterministic = True) print(action)