def make_model(self, args): env = GymEnv(args.game) num_dims = env.observation_dims()["sensor"] action_dims = env.action_dims()["action"] print("Input dims: {}".format(num_dims)) print("Action dims: {}".format(action_dims)) print("Action space: {}".format(env.action_range()["action"])) hidden_size = 256 mlp = nn.Sequential(nn.Linear(num_dims[0], hidden_size), nn.ReLU(), nn.Linear(hidden_size, hidden_size), nn.ReLU(), nn.Linear(hidden_size, hidden_size), nn.ReLU(), nn.Linear(hidden_size, hidden_size), nn.ReLU()) model = GaussianPolicyModel(num_dims, action_dims, perception_net=mlp) return model
from flare.agent_zoo.simple_rl_agents import SimpleRNNRLAgent from flare.framework.agent import OnlineHelper from flare.env_zoo.gym_env import GymEnv if __name__ == '__main__': """ A demo of how to run a simple RL experiment """ game = "CartPole-v0" num_agents = 16 num_games = 8000 env = GymEnv(game) state_shape = env.observation_dims()["sensor"] num_actions = env.action_dims()["action"] # 1. Spawn one agent for each instance of environment. # Agent's behavior depends on the actual algorithm being used. Since we # are using SimpleAC, a proper type of Agent is SimpleRLAgent. reward_shaping_f = lambda x: x / 100.0 agents = [] for _ in range(num_agents): agent = SimpleRNNRLAgent(num_games, reward_shaping_f=reward_shaping_f) agent.set_env(GymEnv, game_name=game) agents.append(agent) # 2. Construct the network and specify the algorithm. # Here we use a small MLP and apply the Actor-Critic algorithm hidden_size = 128 mlp = nn.Sequential(nn.Linear(state_shape[0], hidden_size), nn.ReLU(),
self.action_noise.reset() return super(ActionNoiseAgent, self)._reset_env() if __name__ == '__main__': """ A demo of how to run a simple RL experiment """ game = "Pendulum-v0" num_agents = 1 num_games = 10000 env = GymEnv(game) state_shape = env.observation_dims()["sensor"] action_dims = env.action_dims()["action"] # 1. Spawn one agent for each instance of environment. # Agent's behavior depends on the actual algorithm being used. Since we # are using DDPG, a proper type of Agent is ActionNoiseAgent. agents = [] for _ in range(num_agents): agent = ActionNoiseAgent(num_games, OUNoise(action_dims)) agent.set_env(GymEnv, game) agents.append(agent) alg = DDPG( model=ContinuousDeterministicModel(input_dims=state_shape[0], action_dims=action_dims), update_ref_interval=1, update_weight=0.001,