예제 #1
0
                                            agent,
                                            logger=env_loop_logger)

# Run a `num_episodes` training episodes.
# Rerun this cell until the agent has learned the given task.
env_loop.run(num_episodes=5000)


@tf.function(input_signature=[tf.TensorSpec(shape=(1, 32), dtype=np.float32)])
def policy_inference(x):
    return policy_network(x)


p_save = snt.Module()
p_save.inference = policy_inference
p_save.all_variables = list(policy_network.variables)
tf.saved_model.save(p_save, "p_save")

environment = TestRSEnv()
environment = wrappers.GymWrapper(environment)
environment = wrappers.SinglePrecisionWrapper(environment)

timestep = environment.reset()
while not timestep.last():
    # Simple environment loop.
    action = agent.select_action(timestep.observation)
    timestep = environment.step(action)
    environment.render()

environment.close()
예제 #2
0
import gym

from stable_baselines.common.policies import MlpPolicy
from stable_baselines import PPO1
from env.RSEnv import RSEnv
from env.TestRSEnv import TestRSEnv

env = RSEnv()

#model = PPO1(MlpPolicy, env, verbose=1)
model = PPO1.load("sbppov3")
model.set_env(env)
model.learn(total_timesteps=3000000,
            log_interval=10,
            reset_num_timesteps=False)
model.save("sbppov4")

env = TestRSEnv()
obs = env.reset()
done = False
while not done:
    action, _ = model.predict(obs)
    obs, rewards, done, info = env.step(action)
    env.render()
env.close()