def __init__(self, obs_dim, action_dim, shared_layers, eps=1e-7, device=torch.device(CPU), normalize_obs=False, normalize_value=False): super().__init__(obs_dim=obs_dim, norm_dim=(0, ), device=device, normalize_obs=normalize_obs, normalize_value=normalize_value) self._eps = eps self._action_dim = action_dim self._flatten = Flatten() self._shared_network = construct_linear_layers(shared_layers) self._policy = nn.Sequential(nn.Linear(shared_layers[-1][1], 256), nn.ReLU(), nn.Linear(256, action_dim * 2)) self._q1 = nn.Sequential( nn.Linear(shared_layers[-1][1] + action_dim, 256), nn.ReLU(), nn.Linear(256, 1)) self._q2 = nn.Sequential( nn.Linear(shared_layers[-1][1] + action_dim, 256), nn.ReLU(), nn.Linear(256, 1)) self.to(self.device)
def __init__(self, obs_dim, hidden_state_dim, action_dim, shared_layers, initial_alpha=1., eps=1e-7, device=torch.device(CPU), normalize_obs=False, normalize_value=False): super().__init__(obs_dim=obs_dim, hidden_state_dim=hidden_state_dim, initial_alpha=initial_alpha, eps=eps, norm_dim=(0, ), device=device, normalize_obs=normalize_obs, normalize_value=False) self._action_dim = action_dim self._flatten = Flatten() self._shared_network = construct_linear_layers(shared_layers) self.lstm_layer = nn.LSTM(input_size=shared_layers[-1][1], hidden_size=self.hidden_state_dim, batch_first=True) self._policy = nn.Sequential(nn.Linear(self.hidden_state_dim, 256), nn.ReLU(), nn.Linear(256, action_dim * 2)) self._q1 = nn.Sequential( nn.Linear(self.hidden_state_dim + action_dim, 256), nn.ReLU(), nn.Linear(256, 1)) self._q2 = nn.Sequential( nn.Linear(self.hidden_state_dim + action_dim, 256), nn.ReLU(), nn.Linear(256, 1)) self.to(self.device)
def __init__(self, obs_dim, action_dim, shared_layers, eps=1e-7, device=torch.device(CPU), normalize_obs=False, normalize_value=False): super().__init__(obs_dim=obs_dim, norm_dim=(0, ), device=device, normalize_obs=normalize_obs, normalize_value=normalize_value) self._eps = eps self._action_dim = action_dim self._flatten = Flatten() # NOTE: Separate architecture grants stable learning for GRAC self._policy = nn.Sequential(nn.Linear(obs_dim, 256), nn.ReLU(), nn.Linear(256, 256), nn.ReLU(), nn.Linear(256, action_dim * 2)) self._q1 = nn.Sequential(nn.Linear(obs_dim + action_dim, 256), nn.ReLU(), nn.Linear(256, 256), nn.ReLU(), nn.Linear(256, 1)) self._q2 = nn.Sequential(nn.Linear(obs_dim + action_dim, 256), nn.ReLU(), nn.Linear(256, 256), nn.ReLU(), nn.Linear(256, 1)) self.to(self.device)
def __init__(self, obs_dim, action_dim, initial_alpha=1., eps=1e-7, device=torch.device(CPU), normalize_obs=False, normalize_value=False, **kwargs): super().__init__(obs_dim=obs_dim, initial_alpha=initial_alpha, eps=eps, norm_dim=(0, ), device=device, normalize_obs=normalize_obs, normalize_value=normalize_value) self._action_dim = action_dim self._flatten = Flatten() self._policy = nn.Sequential(nn.Linear(obs_dim, 256), nn.ReLU(), nn.Linear(256, 256), nn.ReLU(), nn.Linear(256, action_dim * 2)) self._q1 = nn.Sequential(nn.Linear(obs_dim + action_dim, 256), nn.ReLU(), nn.Linear(256, 256), nn.ReLU(), nn.Linear(256, 1)) self._q2 = nn.Sequential(nn.Linear(obs_dim + action_dim, 256), nn.ReLU(), nn.Linear(256, 256), nn.ReLU(), nn.Linear(256, 1)) self.to(self.device)
def __init__(self, rec_dim, batch_size, encoder, decoder, dynamics, opt, buffer, algo_params, reduction=c.SUM, loss_coef=1., device=torch.device(c.CPU), **kwargs): # Image dim: (num_images, num_frames, height, width) assert len(rec_dim) == 4 super().__init__() self._flat = Flatten() self._rec_dim = rec_dim self._flatten_dim = int(np.product(rec_dim)) self._batch_size = batch_size self._buffer = buffer self._encoder = encoder self._decoder = decoder self._dynamics = dynamics self._opt = opt self._loss_coef = loss_coef self._mse = torch.nn.MSELoss(reduction=reduction) self.device = device self.algo_params = algo_params self.train_preprocessing = algo_params[c.TRAIN_PREPROCESSING]
def __init__(self, input_dim, output_dim, layers, device=torch.device(CPU)): super().__init__() self.device = device self._input_dim = input_dim self._output_dim = output_dim self._flatten = Flatten() self.fc_layers = construct_linear_layers(layers) # Assume independence between dimensions self.gaussian_parameters = nn.Linear(layers[-1][1], 2 * output_dim) self.to(device)
def __init__(self, obs_dim, action_dim, shared_layers, device=torch.device(CPU), normalize_obs=False, normalize_value=False): super().__init__(obs_dim=obs_dim, norm_dim=(0,), device=device, normalize_obs=normalize_obs, normalize_value=normalize_value) self._flatten = Flatten() self.shared_network = construct_linear_layers(shared_layers) self.action = nn.Linear(shared_layers[-1][1], action_dim) self.value = nn.Linear(shared_layers[-1][1], 1) self.to(self.device)
def __init__(self, obs_dim, action_dim, output_dim, layers, device=torch.device(CPU)): super().__init__() self.device = device self._obs_dim = obs_dim self._action_dim = action_dim self._output_dim = output_dim self._flatten = Flatten() self.fc_layers = construct_linear_layers(layers) self.output = nn.Linear(layers[-1][1], output_dim) self.to(device)
def __init__(self, obs_dim, action_dim, task_dim, shared_layers, initial_alpha=1., eps=1e-7, device=torch.device(CPU), normalize_obs=False, normalize_value=False): super().__init__(obs_dim=obs_dim, initial_alpha=initial_alpha, eps=eps, norm_dim=(0, ), device=device, normalize_obs=normalize_obs, normalize_value=False) self._task_dim = task_dim self._action_dim = action_dim self._flatten = Flatten() self._shared_network = construct_linear_layers(shared_layers) self._policy = nn.Sequential(nn.Linear(shared_layers[-1][1], 256), nn.ReLU(), nn.Linear(256, task_dim * action_dim * 2)) self._q1 = nn.Sequential( nn.Linear(shared_layers[-1][1] + action_dim, 256), nn.ReLU(), nn.Linear(256, task_dim)) self._q2 = nn.Sequential( nn.Linear(shared_layers[-1][1] + action_dim, 256), nn.ReLU(), nn.Linear(256, task_dim)) self._log_alpha = nn.Parameter( torch.ones(task_dim) * torch.log(torch.tensor(initial_alpha))) self.to(self.device) if normalize_value: self.value_rms = RunningMeanStd(shape=(self._task_dim, ), norm_dim=(0, ))
def __init__(self, obs_dim, hidden_state_dim, action_dim, shared_layers, device=torch.device(CPU), normalize_obs=False, normalize_value=False): super().__init__(obs_dim=obs_dim, hidden_state_dim=hidden_state_dim, norm_dim=(0,), device=device, normalize_obs=normalize_obs, normalize_value=normalize_value) self._flatten = Flatten() self.shared_network = construct_linear_layers(shared_layers) self.lstm_layer = nn.LSTM(input_size=shared_layers[-1][1], hidden_size=self.hidden_state_dim, batch_first=True) self.action_mean = nn.Linear(self.hidden_state_dim, action_dim) self.action_raw_std = nn.Linear(self.hidden_state_dim, action_dim) self.value = nn.Linear(self.hidden_state_dim, 1) self.to(self.device)