Beispiel #1
0
def test_policies(environment, policy, batch_size):
    environment = GymEnvironment(environment, SEED)

    critic = NNQFunction(
        dim_state=environment.dim_observation,
        dim_action=environment.dim_action,
        num_states=environment.num_states,
        num_actions=environment.num_actions,
        layers=LAYERS,
        tau=TARGET_UPDATE_TAU,
    )

    policy = policy(critic, 0.1)

    optimizer = torch.optim.Adam(critic.parameters(), lr=LEARNING_RATE)
    criterion = torch.nn.MSELoss
    agent = SARSAAgent(
        critic=critic,
        policy=policy,
        criterion=criterion,
        optimizer=optimizer,
        batch_size=batch_size,
        target_update_frequency=TARGET_UPDATE_FREQUENCY,
        gamma=GAMMA,
    )
    train_agent(
        agent,
        environment,
        num_episodes=NUM_EPISODES,
        max_steps=MAX_STEPS,
        plot_flag=False,
    )
    evaluate_agent(agent, environment, 1, MAX_STEPS, render=False)
    agent.logger.delete_directory()  # Cleanup directory.
Beispiel #2
0
def get_default_q_function(environment, function_approximation):
    """Get default Q-Function."""
    if function_approximation == "tabular":
        q_function = TabularQFunction.default(environment)
    elif function_approximation == "linear":
        q_function = NNQFunction.default(environment, layers=[200])
        freeze_hidden_layers(q_function)
    else:
        q_function = NNQFunction.default(environment)
    return q_function
Beispiel #3
0
 def default(
     cls,
     environment,
     critic=None,
     policy=None,
     lr=3e-4,
     deterministic=True,
     exploration_noise=None,
     policy_update_frequency=2,
     clip_gradient_val=10,
     *args,
     **kwargs,
 ):
     """See `AbstractAgent.default'."""
     if critic is None:
         critic = NNQFunction.default(environment)
     if policy is None:
         policy = NNPolicy.default(environment, deterministic=deterministic)
     optimizer = Adam(chain(policy.parameters(), critic.parameters()),
                      lr=lr)
     if exploration_noise is None:
         exploration_noise = OUNoise(dim=environment.dim_action)
     return super().default(
         environment=environment,
         critic=critic,
         policy=policy,
         optimizer=optimizer,
         exploration_noise=exploration_noise,
         policy_update_frequency=policy_update_frequency,
         clip_gradient_val=clip_gradient_val,
         *args,
         **kwargs,
     )
Beispiel #4
0
    def default(
        cls,
        environment,
        policy=None,
        critic=None,
        critic_lr=1e-3,
        actor_lr=3e-4,
        *args,
        **kwargs,
    ):
        """See `AbstractAgent.default'."""
        if policy is None:
            policy = NNPolicy.default(environment)
        if critic is None:
            critic = NNQFunction.default(environment)

        optimizer = Adam(
            [
                {"params": policy.parameters(), "lr": actor_lr},
                {"params": critic.parameters(), "lr": critic_lr},
            ]
        )

        return super().default(
            environment=environment,
            policy=policy,
            critic=critic,
            optimizer=optimizer,
            *args,
            **kwargs,
        )
Beispiel #5
0
    def default(
        cls,
        environment,
        critic=None,
        policy=None,
        epsilon=None,
        lr=3e-4,
        *args,
        **kwargs,
    ):
        """See `AbstractAgent.default'."""
        if critic is None:
            critic = NNQFunction.default(environment, tau=0)
        if policy is None:
            if epsilon is None:
                epsilon = ExponentialDecay(start=1.0, end=0.01, decay=500)
            policy = EpsGreedy(critic, epsilon)
        optimizer = Adam(critic.parameters(), lr=lr)

        return super().default(
            environment,
            critic=critic,
            policy=policy,
            optimizer=optimizer,
            *args,
            **kwargs,
        )
Beispiel #6
0
    def init(
        self,
        discrete_state,
        discrete_action,
        dim_state,
        dim_action,
        num_heads,
        num_samples=1,
        layers=None,
        biased_head=True,
    ):
        self.num_states, self.dim_state = ((dim_state,
                                            ()) if discrete_state else
                                           (-1, (dim_state, )))
        self.num_actions, self.dim_action = ((dim_action,
                                              ()) if discrete_action else
                                             (-1, (dim_action, )))

        layers = layers if layers is not None else [32, 32]

        if num_heads is None:
            self.q_function = NNQFunction(
                dim_state=self.dim_state,
                dim_action=self.dim_action,
                num_states=self.num_states,
                num_actions=self.num_actions,
                layers=layers,
                biased_head=biased_head,
            )
        else:
            self.q_function = NNEnsembleQFunction(
                dim_state=self.dim_state,
                dim_action=self.dim_action,
                num_states=self.num_states,
                num_actions=self.num_actions,
                num_heads=num_heads,
                layers=layers,
                biased_head=biased_head,
            )

        self.policy = NNPolicy(
            dim_state=self.dim_state,
            dim_action=self.dim_action,
            num_states=self.num_states,
            num_actions=self.num_actions,
            layers=layers,
            biased_head=biased_head,
        )

        self.value_function = IntegrateQValueFunction(
            q_function=self.q_function,
            policy=self.policy,
            num_samples=num_samples)
Beispiel #7
0
 def test_input_transform(self, batch_size):
     q_function = NNQFunction(
         dim_state=(2,),
         dim_action=(1,),
         layers=[64, 64],
         non_linearity="Tanh",
         input_transform=StateTransform(),
     )
     value = q_function(
         random_tensor(False, 2, batch_size), random_tensor(False, 1, batch_size)
     )
     assert value.shape == torch.Size([batch_size] if batch_size else [])
     assert value.dtype is torch.get_default_dtype()
Beispiel #8
0
def _get_q_function(dim_state, dim_action, params, input_transform=None):
    if params.exploration == "optimistic":
        dim_action = (dim_action[0] + dim_state[0], )

    q_function = NNQFunction(
        dim_state=dim_state,
        dim_action=dim_action,
        layers=params.q_function_layers,
        biased_head=not params.q_function_unbiased_head,
        non_linearity=params.q_function_non_linearity,
        input_transform=input_transform,
        tau=params.q_function_tau,
    )

    params.update({"q_function": q_function.__class__.__name__})
    # value_function = torch.jit.script(value_function)
    return q_function
    def test_from_q_function(self, discrete_state, discrete_action, dim_state,
                             dim_action, num_heads):
        num_states, dim_state = ((dim_state, ()) if discrete_state else
                                 (-1, (dim_state, )))
        num_actions, dim_action = ((dim_action, ()) if discrete_action else
                                   (-1, (dim_action, )))

        if not (discrete_state and not discrete_action):
            q_function = NNQFunction(
                dim_state=dim_state,
                num_states=num_states,
                dim_action=dim_action,
                num_actions=num_actions,
            )

            other = NNEnsembleQFunction.from_q_function(q_function, num_heads)

            assert q_function is not other
            assert other.num_heads == num_heads
Beispiel #10
0
    def test_from_nn(
        self, discrete_state, discrete_action, dim_state, dim_action, batch_size
    ):
        if not (discrete_state and not discrete_action):
            self.init(discrete_state, discrete_action, dim_state, dim_action)
            q_function = NNQFunction.from_nn(
                nn.Linear(
                    self.q_function.nn.kwargs["in_dim"][0],
                    self.q_function.nn.kwargs["out_dim"][0],
                ),
                self.dim_state,
                self.dim_action,
                num_states=self.num_states,
                num_actions=self.num_actions,
            )

            state = random_tensor(discrete_state, dim_state, batch_size)
            action = random_tensor(discrete_action, dim_action, batch_size)
            value = q_function(state, action)
            assert value.shape == torch.Size([batch_size] if batch_size else [])
            assert value.dtype is torch.get_default_dtype()
Beispiel #11
0
    def default(cls,
                environment,
                policy=None,
                critic=None,
                lr=5e-4,
                *args,
                **kwargs):
        """See `AbstractAgent.default'."""
        if critic is None:
            critic = NNQFunction.default(environment)
        if policy is None:
            policy = NNPolicy.default(environment, layers=[100, 100])

        optimizer = Adam(chain(policy.parameters(), critic.parameters()),
                         lr=lr)

        return super().default(
            environment,
            policy=policy,
            critic=critic,
            optimizer=optimizer,
            *args,
            **kwargs,
        )
 def default(cls, environment, *args, **kwargs):
     """See AbstractValueFunction.default."""
     q_function = NNQFunction.default(environment, *args, **kwargs)
     return super().default(environment, q_function=q_function, param=1)
Beispiel #13
0
EPS_START = 1.0
EPS_END = 0.01
EPS_DECAY = 500
LAYERS = [64, 64]
SEED = 1
MEMORY = "ER"

torch.manual_seed(SEED)
np.random.seed(SEED)

environment = GymEnvironment(ENVIRONMENT, SEED)
q_function = NNQFunction(
    dim_state=environment.dim_state,
    dim_action=environment.dim_action,
    num_states=environment.num_states,
    num_actions=environment.num_actions,
    layers=LAYERS,
    non_linearity="ReLU",
    tau=TARGET_UPDATE_TAU,
)

zero_bias(q_function)
init_head_weight(q_function)
# init_head_bias(q_function, offset=(1 - GAMMA ** 200) / (1 - GAMMA))
policy = EpsGreedy(q_function, ExponentialDecay(EPS_START, EPS_END, EPS_DECAY))

optimizer = torch.optim.Adam(q_function.parameters(),
                             lr=LEARNING_RATE,
                             weight_decay=WEIGHT_DECAY)
criterion = torch.nn.MSELoss
Beispiel #14
0
GAMMA = 0.99
EPS_START = 1.0
EPS_END = 0.01
EPS_DECAY = 500
LAYERS = [64, 64]
SEED = 0

for name, Policy in {"eps_greedy": EpsGreedy, "softmax": SoftMax}.items():
    torch.manual_seed(SEED)
    np.random.seed(SEED)

    environment = GymEnvironment(ENVIRONMENT, SEED)
    q_function = NNQFunction(
        dim_state=environment.dim_observation,
        dim_action=environment.dim_action,
        num_states=environment.num_states,
        num_actions=environment.num_actions,
        layers=LAYERS,
    )
    policy = Policy(q_function, ExponentialDecay(start=1.0,
                                                 end=0.01,
                                                 decay=500))
    q_target = NNQFunction(
        dim_state=environment.dim_observation,
        dim_action=environment.dim_action,
        num_states=environment.num_states,
        num_actions=environment.num_actions,
        layers=LAYERS,
        tau=TARGET_UPDATE_TAU,
    )
def q_function():
    return NNQFunction(num_actions=2,
                       num_states=4,
                       dim_state=(),
                       dim_action=())