def make_model(self, args):
        env = GymEnv(
        num_dims = env.observation_dims()["sensor"]
        action_dims = env.action_dims()["action"]

        print("Input dims: {}".format(num_dims))
        print("Action dims: {}".format(action_dims))
        print("Action space: {}".format(env.action_range()["action"]))

        hidden_size = 256
        mlp = nn.Sequential(nn.Linear(num_dims[0], hidden_size), nn.ReLU(),
                            nn.Linear(hidden_size, hidden_size), nn.ReLU(),
                            nn.Linear(hidden_size, hidden_size), nn.ReLU(),
                            nn.Linear(hidden_size, hidden_size), nn.ReLU())
        model = GaussianPolicyModel(num_dims, action_dims, perception_net=mlp)
        return model
Beispiel #2
from flare.agent_zoo.simple_rl_agents import SimpleRLAgent
from flare.framework.agent import OnlineHelper
from flare.env_zoo.gym_env import GymEnv

if __name__ == '__main__':
    A demo of how to run a simple RL experiment
    game = "CartPole-v0"

    num_agents = 16
    num_games = 8000
    # 1. Create environments
    envs = []
    for _ in range(num_agents):
    state_shape = envs[-1].observation_dims()[0]
    num_actions = envs[-1].action_dims()[0]

    # 2. Construct the network and specify the algorithm.
    #    Here we use a small MLP and apply the Actor-Critic algorithm
    mlp = nn.Sequential(
        nn.Linear(state_shape[0], 128),
        nn.Linear(128, 128), nn.ReLU(), nn.Linear(128, 128), nn.ReLU())

    alg = SimpleAC(model=SimpleModelAC(
        dims=state_shape, num_actions=num_actions, perception_net=mlp))

    # 3. Specify the settings for learning: data sampling strategy
    # (OnlineHelper here) and other settings used by
Beispiel #3
from flare.framework.manager import Manager
from flare.model_zoo.simple_models import SimpleRNNModelAC
from flare.agent_zoo.simple_rl_agents import SimpleRNNRLAgent
from flare.framework.agent import OnlineHelper
from flare.env_zoo.gym_env import GymEnv

if __name__ == '__main__':
    A demo of how to run a simple RL experiment
    game = "CartPole-v0"

    num_agents = 16
    num_games = 8000

    env = GymEnv(game)
    state_shape = env.observation_dims()["sensor"]
    num_actions = env.action_dims()["action"]

    # 1. Spawn one agent for each instance of environment.
    #    Agent's behavior depends on the actual algorithm being used. Since we
    #    are using SimpleAC, a proper type of Agent is SimpleRLAgent.
    reward_shaping_f = lambda x: x / 100.0
    agents = []
    for _ in range(num_agents):
        agent = SimpleRNNRLAgent(num_games, reward_shaping_f=reward_shaping_f)
        agent.set_env(GymEnv, game_name=game)

    # 2. Construct the network and specify the algorithm.
    #    Here we use a small MLP and apply the Actor-Critic algorithm