コード例 #1
0
    def __init__(self, model_cfg):
        BaseModel.__init__(self, model_cfg)
        self.action_dim = self.model_cfg.action_dim
        self.num_quantiles = self.model_cfg.num_quantiles

        # set input size of fc input layer
        self.model_cfg.fc.input.params.input_size = self.get_feature_size()

        # set output size of fc output layer
        self.model_cfg.fc.output.params.output_size = (
            self.num_quantiles * self.action_dim
        )

        # initialize input layer
        self.fc_input = hydra.utils.instantiate(self.model_cfg.fc.input)

        # initialize hidden layers
        hidden_layers = []
        for layer in self.model_cfg.fc.hidden:
            layer_info = self.model_cfg.fc.hidden[layer]
            hidden_layers.append(hydra.utils.instantiate(layer_info))
        self.fc_hidden = nn.Sequential(*hidden_layers)

        # initialize output layer
        self.fc_output = hydra.utils.instantiate(self.model_cfg.fc.output)

        self.tau = torch.FloatTensor(
            (2.0 * np.arange(self.num_quantiles) + 1) / (2.0 * self.num_quantiles)
        ).view(1, -1)
コード例 #2
0
ファイル: critic.py プロジェクト: LaoKpa/RLcycle
    def __init__(self, model_cfg: DictConfig):
        BaseModel.__init__(self, model_cfg)

        # set input size of fc input layer, first hidden later
        self.model_cfg.fc.input.params.input_size = (self.get_feature_size() +
                                                     self.model_cfg.action_dim)
        self.model_cfg.fc.hidden.hidden1.params.input_size = (
            self.model_cfg.action_dim +
            self.model_cfg.fc.input.params.output_size)

        # set output size of fc output layer
        self.model_cfg.fc.output.params.output_size = self.model_cfg.action_dim

        # initialize input layer
        self.fc_input = hydra.utils.instantiate(self.model_cfg.fc.input)

        # initialize hidden layers
        hidden_layers = []
        for layer in self.model_cfg.fc.hidden:
            layer_info = self.model_cfg.fc.hidden[layer]
            hidden_layers.append(hydra.utils.instantiate(layer_info))
        self.fc_hidden = nn.Sequential(*hidden_layers)

        # initialize output layer
        self.fc_output = hydra.utils.instantiate(self.model_cfg.fc.output)
コード例 #3
0
ファイル: policy.py プロジェクト: LaoKpa/RLcycle
    def __init__(self, model_cfg: DictConfig):
        BaseModel.__init__(self, model_cfg)
        # Define requisite attributes
        self.log_std_min = self.model_cfg.log_std_min
        self.log_std_max = self.model_cfg.log_std_max
        self.model_cfg.fc.input.params.input_size = self.get_feature_size()
        self.model_cfg.fc.mu_stream.output.params.output_size = (
            self.model_cfg.fc.log_sigma_stream.output.params.output_size
        ) = self.model_cfg.action_dim

        # Initialize input layer
        self.fc_input = hydra.utils.instantiate(self.model_cfg.fc.input)

        # Initialize hidden layers
        hidden_layers = []
        for layer in self.model_cfg.fc.hidden:
            layer_info = self.model_cfg.fc.hidden[layer]
            hidden_layers.append(hydra.utils.instantiate(layer_info))
        self.fc_hidden = nn.Sequential(*hidden_layers)

        # Initialize mu stream
        mu_stream = []
        for layer in self.model_cfg.fc.mu_stream:
            layer_info = self.model_cfg.fc.mu_stream[layer]
            mu_stream.append(hydra.utils.instantiate(layer_info))
        self.mu_stream = nn.Sequential(*mu_stream)

        # Initialize log_sigma stream
        log_sigma_stream = []
        for layer in self.model_cfg.fc.log_sigma_stream:
            layer_info = self.model_cfg.fc.log_sigma_stream[layer]
            log_sigma_stream.append(hydra.utils.instantiate(layer_info))
        self.log_sigma_stream = nn.Sequential(*log_sigma_stream)
コード例 #4
0
 def __call__(self, policy: BaseModel, state: np.ndarray) -> np.ndarray:
     if state.ndim == 1:
         state = state.reshape(1, -1)
     state = np2tensor(state, self.device)
     dist = policy.forward(state)
     categorical_dist = Categorical(dist)
     if self.exploration:
         action = categorical_dist.sample().cpu().detach().numpy()
     else:
         action = categorical_dist.sample().cpu().argmax().numpy()
     return action.item()
コード例 #5
0
ファイル: value.py プロジェクト: sushe-shakya/RLcycle
    def __init__(self, model_cfg: DictConfig):
        BaseModel.__init__(self, model_cfg)

        # initialize feature layer and fc inputs if not using cnn
        if not self.model_cfg.use_conv:
            self.model_cfg.linear_features.params.input_size = self.get_feature_size()
            self.features = hydra.utils.instantiate(self.model_cfg.linear_features)
            self.model_cfg.advantage.fc1.params.input_size = (
                self.model_cfg.value.fc1.params.input_size
            ) = self.model_cfg.linear_features.params.output_size

        # set input sizes of fc input layer if using cnn
        if self.model_cfg.use_conv:
            self.model_cfg.advantage.fc1.params.input_size = (
                self.model_cfg.value.fc1.params.input_size
            ) = self.get_feature_size()

        # set output size of advantage fc output layer:
        output_layer_key = list(self.model_cfg.advantage.keys())[-1]
        if self.model_cfg.advantage[output_layer_key].params.output_size == "undefined":
            self.model_cfg.advantage[
                output_layer_key
            ].params.output_size = self.model_cfg.action_dim

        # initialize advantage head
        advantage_stream = []
        for layer in self.model_cfg.advantage:
            layer_info = self.model_cfg.advantage[layer]
            advantage_stream.append(hydra.utils.instantiate(layer_info))
        self.advantage_stream = nn.Sequential(*advantage_stream)

        # initialize value head
        value_stream = []
        for layer in self.model_cfg.value:
            layer_info = self.model_cfg.value[layer]
            value_stream.append(hydra.utils.instantiate(layer_info))
        self.value_stream = nn.Sequential(*value_stream)
コード例 #6
0
ファイル: agent.py プロジェクト: sushe-shakya/RLcycle
    def test(
        self,
        policy: BaseModel,
        action_selector: ActionSelector,
        episode_i: int,
        update_step: int,
    ) -> float:
        """Test policy without random exploration a number of times."""
        print("====TEST START====")
        policy.eval()
        action_selector.exploration = False
        episode_rewards = []
        for test_i in range(self.experiment_info.test_num):
            state = self.env.reset()
            episode_reward = 0
            done = False
            while not done:
                if self.experiment_info.render_train:
                    self.env.render()
                action = action_selector(policy, state)
                state, action, reward, next_state, done = self.step(
                    state, action)
                episode_reward = episode_reward + reward
                state = next_state

            print(
                f"episode num: {episode_i} | test: {test_i} episode reward: {episode_reward}"
            )
            episode_rewards.append(episode_reward)

        mean_rewards = np.mean(episode_rewards)
        print(f"EPISODE NUM: {episode_i} | UPDATE STEP: {update_step} |"
              f"MEAN REWARD: {np.mean(episode_rewards)}")
        action_selector.exploration = True
        print("====TEST END====")

        return mean_rewards