def __init__(self, model_cfg): BaseModel.__init__(self, model_cfg) self.action_dim = self.model_cfg.action_dim self.num_quantiles = self.model_cfg.num_quantiles # set input size of fc input layer self.model_cfg.fc.input.params.input_size = self.get_feature_size() # set output size of fc output layer self.model_cfg.fc.output.params.output_size = ( self.num_quantiles * self.action_dim ) # initialize input layer self.fc_input = hydra.utils.instantiate(self.model_cfg.fc.input) # initialize hidden layers hidden_layers = [] for layer in self.model_cfg.fc.hidden: layer_info = self.model_cfg.fc.hidden[layer] hidden_layers.append(hydra.utils.instantiate(layer_info)) self.fc_hidden = nn.Sequential(*hidden_layers) # initialize output layer self.fc_output = hydra.utils.instantiate(self.model_cfg.fc.output) self.tau = torch.FloatTensor( (2.0 * np.arange(self.num_quantiles) + 1) / (2.0 * self.num_quantiles) ).view(1, -1)
def __init__(self, model_cfg: DictConfig): BaseModel.__init__(self, model_cfg) # set input size of fc input layer, first hidden later self.model_cfg.fc.input.params.input_size = (self.get_feature_size() + self.model_cfg.action_dim) self.model_cfg.fc.hidden.hidden1.params.input_size = ( self.model_cfg.action_dim + self.model_cfg.fc.input.params.output_size) # set output size of fc output layer self.model_cfg.fc.output.params.output_size = self.model_cfg.action_dim # initialize input layer self.fc_input = hydra.utils.instantiate(self.model_cfg.fc.input) # initialize hidden layers hidden_layers = [] for layer in self.model_cfg.fc.hidden: layer_info = self.model_cfg.fc.hidden[layer] hidden_layers.append(hydra.utils.instantiate(layer_info)) self.fc_hidden = nn.Sequential(*hidden_layers) # initialize output layer self.fc_output = hydra.utils.instantiate(self.model_cfg.fc.output)
def __init__(self, model_cfg: DictConfig): BaseModel.__init__(self, model_cfg) # Define requisite attributes self.log_std_min = self.model_cfg.log_std_min self.log_std_max = self.model_cfg.log_std_max self.model_cfg.fc.input.params.input_size = self.get_feature_size() self.model_cfg.fc.mu_stream.output.params.output_size = ( self.model_cfg.fc.log_sigma_stream.output.params.output_size ) = self.model_cfg.action_dim # Initialize input layer self.fc_input = hydra.utils.instantiate(self.model_cfg.fc.input) # Initialize hidden layers hidden_layers = [] for layer in self.model_cfg.fc.hidden: layer_info = self.model_cfg.fc.hidden[layer] hidden_layers.append(hydra.utils.instantiate(layer_info)) self.fc_hidden = nn.Sequential(*hidden_layers) # Initialize mu stream mu_stream = [] for layer in self.model_cfg.fc.mu_stream: layer_info = self.model_cfg.fc.mu_stream[layer] mu_stream.append(hydra.utils.instantiate(layer_info)) self.mu_stream = nn.Sequential(*mu_stream) # Initialize log_sigma stream log_sigma_stream = [] for layer in self.model_cfg.fc.log_sigma_stream: layer_info = self.model_cfg.fc.log_sigma_stream[layer] log_sigma_stream.append(hydra.utils.instantiate(layer_info)) self.log_sigma_stream = nn.Sequential(*log_sigma_stream)
def __call__(self, policy: BaseModel, state: np.ndarray) -> np.ndarray: if state.ndim == 1: state = state.reshape(1, -1) state = np2tensor(state, self.device) dist = policy.forward(state) categorical_dist = Categorical(dist) if self.exploration: action = categorical_dist.sample().cpu().detach().numpy() else: action = categorical_dist.sample().cpu().argmax().numpy() return action.item()
def __init__(self, model_cfg: DictConfig): BaseModel.__init__(self, model_cfg) # initialize feature layer and fc inputs if not using cnn if not self.model_cfg.use_conv: self.model_cfg.linear_features.params.input_size = self.get_feature_size() self.features = hydra.utils.instantiate(self.model_cfg.linear_features) self.model_cfg.advantage.fc1.params.input_size = ( self.model_cfg.value.fc1.params.input_size ) = self.model_cfg.linear_features.params.output_size # set input sizes of fc input layer if using cnn if self.model_cfg.use_conv: self.model_cfg.advantage.fc1.params.input_size = ( self.model_cfg.value.fc1.params.input_size ) = self.get_feature_size() # set output size of advantage fc output layer: output_layer_key = list(self.model_cfg.advantage.keys())[-1] if self.model_cfg.advantage[output_layer_key].params.output_size == "undefined": self.model_cfg.advantage[ output_layer_key ].params.output_size = self.model_cfg.action_dim # initialize advantage head advantage_stream = [] for layer in self.model_cfg.advantage: layer_info = self.model_cfg.advantage[layer] advantage_stream.append(hydra.utils.instantiate(layer_info)) self.advantage_stream = nn.Sequential(*advantage_stream) # initialize value head value_stream = [] for layer in self.model_cfg.value: layer_info = self.model_cfg.value[layer] value_stream.append(hydra.utils.instantiate(layer_info)) self.value_stream = nn.Sequential(*value_stream)
def test( self, policy: BaseModel, action_selector: ActionSelector, episode_i: int, update_step: int, ) -> float: """Test policy without random exploration a number of times.""" print("====TEST START====") policy.eval() action_selector.exploration = False episode_rewards = [] for test_i in range(self.experiment_info.test_num): state = self.env.reset() episode_reward = 0 done = False while not done: if self.experiment_info.render_train: self.env.render() action = action_selector(policy, state) state, action, reward, next_state, done = self.step( state, action) episode_reward = episode_reward + reward state = next_state print( f"episode num: {episode_i} | test: {test_i} episode reward: {episode_reward}" ) episode_rewards.append(episode_reward) mean_rewards = np.mean(episode_rewards) print(f"EPISODE NUM: {episode_i} | UPDATE STEP: {update_step} |" f"MEAN REWARD: {np.mean(episode_rewards)}") action_selector.exploration = True print("====TEST END====") return mean_rewards