def make_model(self, args): env = GymEnv(args.game) num_dims = env.observation_dims()["sensor"] action_dims = env.action_dims()["action"] print("Input dims: {}".format(num_dims)) print("Action dims: {}".format(action_dims)) print("Action space: {}".format(env.action_range()["action"])) hidden_size = 256 mlp = nn.Sequential(nn.Linear(num_dims[0], hidden_size), nn.ReLU(), nn.Linear(hidden_size, hidden_size), nn.ReLU(), nn.Linear(hidden_size, hidden_size), nn.ReLU(), nn.Linear(hidden_size, hidden_size), nn.ReLU()) model = GaussianPolicyModel(num_dims, action_dims, perception_net=mlp) return model
from flare.agent_zoo.simple_rl_agents import SimpleRLAgent from flare.framework.agent import OnlineHelper from flare.env_zoo.gym_env import GymEnv if __name__ == '__main__': """ A demo of how to run a simple RL experiment """ game = "CartPole-v0" num_agents = 16 num_games = 8000 # 1. Create environments envs = [] for _ in range(num_agents): envs.append(GymEnv(game)) state_shape = envs[-1].observation_dims()[0] num_actions = envs[-1].action_dims()[0] # 2. Construct the network and specify the algorithm. # Here we use a small MLP and apply the Actor-Critic algorithm mlp = nn.Sequential( nn.Linear(state_shape[0], 128), nn.ReLU(), nn.Linear(128, 128), nn.ReLU(), nn.Linear(128, 128), nn.ReLU()) alg = SimpleAC(model=SimpleModelAC( dims=state_shape, num_actions=num_actions, perception_net=mlp)) # 3. Specify the settings for learning: data sampling strategy # (OnlineHelper here) and other settings used by
from flare.framework.manager import Manager from flare.model_zoo.simple_models import SimpleRNNModelAC from flare.agent_zoo.simple_rl_agents import SimpleRNNRLAgent from flare.framework.agent import OnlineHelper from flare.env_zoo.gym_env import GymEnv if __name__ == '__main__': """ A demo of how to run a simple RL experiment """ game = "CartPole-v0" num_agents = 16 num_games = 8000 env = GymEnv(game) state_shape = env.observation_dims()["sensor"] num_actions = env.action_dims()["action"] # 1. Spawn one agent for each instance of environment. # Agent's behavior depends on the actual algorithm being used. Since we # are using SimpleAC, a proper type of Agent is SimpleRLAgent. reward_shaping_f = lambda x: x / 100.0 agents = [] for _ in range(num_agents): agent = SimpleRNNRLAgent(num_games, reward_shaping_f=reward_shaping_f) agent.set_env(GymEnv, game_name=game) agents.append(agent) # 2. Construct the network and specify the algorithm. # Here we use a small MLP and apply the Actor-Critic algorithm