agent, logger=env_loop_logger) # Run a `num_episodes` training episodes. # Rerun this cell until the agent has learned the given task. env_loop.run(num_episodes=5000) @tf.function(input_signature=[tf.TensorSpec(shape=(1, 32), dtype=np.float32)]) def policy_inference(x): return policy_network(x) p_save = snt.Module() p_save.inference = policy_inference p_save.all_variables = list(policy_network.variables) tf.saved_model.save(p_save, "p_save") environment = TestRSEnv() environment = wrappers.GymWrapper(environment) environment = wrappers.SinglePrecisionWrapper(environment) timestep = environment.reset() while not timestep.last(): # Simple environment loop. action = agent.select_action(timestep.observation) timestep = environment.step(action) environment.render() environment.close()
import gym from stable_baselines.common.policies import MlpPolicy from stable_baselines import PPO1 from env.RSEnv import RSEnv from env.TestRSEnv import TestRSEnv env = RSEnv() #model = PPO1(MlpPolicy, env, verbose=1) model = PPO1.load("sbppov3") model.set_env(env) model.learn(total_timesteps=3000000, log_interval=10, reset_num_timesteps=False) model.save("sbppov4") env = TestRSEnv() obs = env.reset() done = False while not done: action, _ = model.predict(obs) obs, rewards, done, info = env.step(action) env.render() env.close()