def __init__(self, kwargs):
        super().__init__()

        self.env_name = str(kwargs["env_name"])
        self.device = str(kwargs["device"])
        self.state_dim = int(kwargs["state_dim"])
        self.action_dim = int(kwargs["action_dim"])
        self.solved_reward = float(kwargs["solved_reward"])

        # for gym compatibility
        self._max_episode_steps = int(kwargs["max_steps"])
        self.action_space = kwargs["action_space"]
        self.observation_space = kwargs["observation_space"]
        self.reset_env = kwargs["reset_env"]

        self.state_net = build_nn_from_config(input_dim=self.state_dim + self.action_dim,
                                              output_dim=self.state_dim,
                                              nn_config=kwargs).to(self.device)
        self.reward_net = build_nn_from_config(input_dim=self.state_dim + self.action_dim,
                                               output_dim=1,
                                               nn_config=kwargs).to(self.device)
        self.done_net = build_nn_from_config(input_dim=self.state_dim + self.action_dim,
                                             output_dim=1,
                                             nn_config=kwargs).to(self.device)

        self.state = self.reset()
Exemple #2
0
    def __init__(self, state_dim, action_dim, max_action, agent_name, config):
        super().__init__()

        self.net = build_nn_from_config(input_dim=state_dim,
                                        output_dim=action_dim,
                                        nn_config=config["agents"][agent_name])
        self.max_action = max_action
Exemple #3
0
    def build_reward_net(self, kwargs):
        # 0: original reward
        # 1: potential function (exclusive)
        # 2: potential function (additive)
        # 3: potential function with additional info vector (exclusive)
        # 4: potential function with additional info vector (additive)
        # 5: non-potential function (exclusive)
        # 6: non-potential function (additive)
        # 7: non-potential function with additional info vector (exclusive)
        # 8: non-potential function with additional info vector (additive)
        # 101: weighted info vector as baseline (exclusive)
        # 102: weighted info vector as baseline (additive)

        if self.reward_env_type < 100:
            if self.reward_env_type == 0:
                input_dim = 1  # dummy dimension
            elif self.reward_env_type == 1 or self.reward_env_type == 2 or self.reward_env_type == 5 or self.reward_env_type == 6:
                input_dim = self.state_dim
            elif self.reward_env_type == 3 or self.reward_env_type == 4 or self.reward_env_type == 7 or self.reward_env_type == 8:
                input_dim = self.state_dim + self.info_dim
            else:
                raise NotImplementedError('Unknown reward_env_type: ' +
                                          str(self.reward_env_type))

            return build_nn_from_config(input_dim=input_dim,
                                        output_dim=1,
                                        nn_config=kwargs).to(self.device)
        else:
            if self.reward_env_type == 101 or self.reward_env_type == 102:
                return nn.Linear(self.info_dim, 1, bias=False).to(self.device)
            else:
                raise NotImplementedError('Unknown reward_env_type: ' +
                                          str(self.reward_env_type))
Exemple #4
0
    def __init__(self, state_dim, action_dim, agent_name, config):
        super().__init__()

        self.net = build_nn_from_config(input_dim=state_dim,
                                        output_dim=action_dim,
                                        nn_config=config["agents"][agent_name])
        self.action_std = torch.nn.Parameter(
            torch.ones(action_dim, device=config["device"]) *
            config["agents"][agent_name]["action_std"])
Exemple #5
0
    def __init__(self, state_dim, action_dim, max_action, agent_name, config):
        super().__init__()

        self.net = build_nn_from_config(input_dim=state_dim,
                                        output_dim=action_dim,
                                        nn_config=config["agents"][agent_name])
        self.max_action = max_action
        # gumbel_softmax_temp = config["agents"][agent_name]["gumbel_softmax_temp"]
        # self.gumbel_softmax_temp = torch.nn.Parameter(torch.tensor(gumbel_softmax_temp), requires_grad=True)
        self.gumbel_softmax_temp = config["agents"][agent_name][
            "gumbel_softmax_temp"]
        self.gumbel_softmax_hard = config["agents"][agent_name][
            "gumbel_softmax_hard"]
Exemple #6
0
    def __init__(self, state_dim, action_dim, max_action, agent_name, config):
        super().__init__()

        self.net = build_nn_from_config(input_dim=state_dim,
                                        output_dim=1,
                                        nn_config=config["agents"][agent_name])
        self.output_limit = max_action
        self.log_std_min = config["agents"][agent_name]['log_std_min']
        self.log_std_max = config["agents"][agent_name]['log_std_max']

        # Set output layers
        self.mu_layer = nn.Linear(action_dim, action_dim)
        self.log_std_layer = nn.Linear(action_dim, action_dim)
Exemple #7
0
    def __init__(self, state_dim, action_dim, agent_name, config):
        super().__init__()

        self.feature_stream = build_nn_from_config(
            input_dim=state_dim,
            output_dim=config["agents"][agent_name]["feature_dim"],
            nn_config=config["agents"][agent_name])

        heads_config = copy.copy(config["agents"][agent_name])
        heads_config["hidden_layer"] = 1
        heads_config["hidden_size"] = config["agents"][agent_name][
            "feature_dim"]

        self.value_stream = build_nn_from_config(
            input_dim=config["agents"][agent_name]["feature_dim"],
            output_dim=1,
            nn_config=heads_config)

        self.advantage_stream = build_nn_from_config(
            input_dim=config["agents"][agent_name]["feature_dim"],
            output_dim=action_dim,
            nn_config=heads_config)
Exemple #8
0
    def __init__(
        self,
        state_dim,
        action_dim,
        has_discrete_actions,
        feature_dim=64,
        hidden_size=128,
    ):
        super(ICMModel, self).__init__()

        self.state_dim = state_dim
        self.action_dim = action_dim
        self.feature_dim = feature_dim
        self.has_discrete_actions = has_discrete_actions

        nn_features_config = {
            'hidden_size': hidden_size,
            'hidden_layer': 2,
            'activation_fn': "leakyrelu"
        }
        nn_inverse_config = {
            'hidden_size': hidden_size,
            'hidden_layer': 2,
            'activation_fn': "relu"
        }
        nn_forward_pre_config = {
            'hidden_size': hidden_size,
            'hidden_layer': 2,
            'activation_fn': "leakyrelu"
        }
        nn_forward_post_config = {
            'hidden_size': hidden_size,
            'hidden_layer': 1,
            'activation_fn': "leakyrelu"
        }

        if self.has_discrete_actions and self.action_dim == 2:
            action_dim = 1

        self.features_model = build_nn_from_config(
            input_dim=state_dim,
            output_dim=feature_dim,
            nn_config=nn_features_config)

        self.inverse_model = build_nn_from_config(input_dim=feature_dim * 2,
                                                  output_dim=action_dim,
                                                  nn_config=nn_inverse_config)

        self.forward_pre_model = build_nn_from_config(
            input_dim=action_dim + feature_dim,
            output_dim=feature_dim,
            nn_config=nn_forward_pre_config)

        class ResidualBlock(nn.Module):
            def __init__(self, input_dim, output_dim):
                super().__init__()
                self.fc1 = nn.Sequential(
                    nn.Linear(input_dim, output_dim),
                    nn.LeakyReLU(inplace=True),
                )
                self.fc2 = nn.Sequential(nn.Linear(input_dim, output_dim))

            def forward(self, feature, action):
                x = feature
                x = self.fc1(torch.cat([x, action], dim=1))
                x = self.fc2(torch.cat([x, action], dim=1))
                return feature + x

        # original implementation uses residual blocks:
        # https://github.com/openai/large-scale-curiosity/blob/master/dynamics.py#L55-L61
        self.residual_block1 = ResidualBlock(input_dim=action_dim +
                                             feature_dim,
                                             output_dim=feature_dim)
        self.residual_block2 = ResidualBlock(input_dim=action_dim +
                                             feature_dim,
                                             output_dim=feature_dim)
        self.residual_block3 = ResidualBlock(input_dim=action_dim +
                                             feature_dim,
                                             output_dim=feature_dim)
        self.residual_block4 = ResidualBlock(input_dim=action_dim +
                                             feature_dim,
                                             output_dim=feature_dim)

        self.forward_post_model = build_nn_from_config(
            input_dim=feature_dim,
            output_dim=feature_dim,
            nn_config=nn_forward_post_config)