def __init__(self, obs_shape): super().__init__() self.encoder = make_encoder(encoder_type='pixel', obs_shape=obs_shape, feature_dim=100, num_layers=4, num_filters=32).to(device) self.decoder = make_decoder('pixel', obs_shape, 50, 4, 32).to(device) self.decoder.apply(weight_init)
def __init__( self, obs_shape, action_shape, device, hidden_dim=256, discount=0.99, init_temperature=0.01, alpha_lr=1e-3, alpha_beta=0.9, actor_lr=1e-3, actor_beta=0.9, actor_log_std_min=-10, actor_log_std_max=2, actor_update_freq=2, critic_lr=1e-3, critic_beta=0.9, critic_tau=0.005, critic_target_update_freq=2, encoder_type='pixel', encoder_feature_dim=50, encoder_lr=1e-3, encoder_tau=0.005, decoder_type='pixel', decoder_lr=1e-3, decoder_update_freq=1, decoder_latent_lambda=0.0, decoder_weight_lambda=0.0, num_layers=4, num_filters=32, behaviour_cloning=False ): self.device = device self.discount = discount self.critic_tau = critic_tau self.encoder_tau = encoder_tau self.actor_update_freq = actor_update_freq self.critic_target_update_freq = critic_target_update_freq self.decoder_update_freq = decoder_update_freq self.decoder_latent_lambda = decoder_latent_lambda self.behaviour_cloning = behaviour_cloning self.polyak_noise = 0.0 self.max_u = 1.0 self.random_eps = 0.0 self.actor = Actor( obs_shape, action_shape, hidden_dim, encoder_type, encoder_feature_dim, actor_log_std_min, actor_log_std_max, num_layers, num_filters ).to(device) if self.behaviour_cloning: self.actor_expert = Actor( obs_shape, action_shape, hidden_dim, encoder_type, encoder_feature_dim, actor_log_std_min, actor_log_std_max, num_layers, num_filters ).to(device) self.critic = Critic( obs_shape, action_shape, hidden_dim, encoder_type, encoder_feature_dim, num_layers, num_filters ).to(device) self.critic_target = Critic( obs_shape, action_shape, hidden_dim, encoder_type, encoder_feature_dim, num_layers, num_filters ).to(device) self.critic_target.load_state_dict(self.critic.state_dict()) # tie encoders between actor and critic self.actor.encoder.copy_conv_weights_from(self.critic.encoder) self.log_alpha = torch.tensor(np.log(init_temperature)).to(device) self.log_alpha.requires_grad = True # set target entropy to -|A| self.target_entropy = -np.prod(action_shape) self.decoder = None if decoder_type != 'identity': # create decoder self.decoder = make_decoder( decoder_type, obs_shape, encoder_feature_dim, num_layers, num_filters ).to(device) self.decoder.apply(weight_init) # optimizer for critic encoder for reconstruction loss self.encoder_optimizer = torch.optim.Adam( self.critic.encoder.parameters(), lr=encoder_lr ) # optimizer for decoder self.decoder_optimizer = torch.optim.Adam( self.decoder.parameters(), lr=decoder_lr, weight_decay=decoder_weight_lambda ) # optimizers self.actor_optimizer = torch.optim.Adam( self.actor.parameters(), lr=actor_lr, betas=(actor_beta, 0.999) ) self.critic_optimizer = torch.optim.Adam( self.critic.parameters(), lr=critic_lr, betas=(critic_beta, 0.999) ) self.log_alpha_optimizer = torch.optim.Adam( [self.log_alpha], lr=alpha_lr, betas=(alpha_beta, 0.999) ) self.train() self.critic_target.train()
def __init__( self, obs_shape, action_shape, device, hidden_dim=256, discount=0.99, init_temperature=0.01, alpha_lr=1e-3, alpha_beta=0.9, actor_lr=1e-3, actor_beta=0.9, actor_log_std_min=-10, actor_log_std_max=2, actor_update_freq=2, critic_lr=1e-3, critic_beta=0.9, critic_tau=0.005, critic_target_update_freq=2, encoder_type='pixel', encoder_feature_dim=50, encoder_lr=1e-3, encoder_tau=0.005, decoder_type='pixel', decoder_lr=1e-3, decoder_update_freq=1, decoder_latent_lambda=0.0, decoder_weight_lambda=0.0, num_layers=4, num_filters=32, key_points=9, sigma=0.1 ): self.device = device self.discount = discount self.critic_tau = critic_tau self.encoder_tau = encoder_tau self.actor_update_freq = actor_update_freq self.critic_target_update_freq = critic_target_update_freq self.decoder_update_freq = decoder_update_freq self.decoder_latent_lambda = decoder_latent_lambda self.imm = make_imm(obs_shape, encoder_feature_dim, num_layers, num_filters, key_points, sigma).to(device) imm_checkpoint = torch.load(path_pretrained_keynet_9) self.imm.load_state_dict(imm_checkpoint["model_state_dict"]) self.actor = Actor( obs_shape, action_shape, hidden_dim, encoder_type, encoder_feature_dim, actor_log_std_min, actor_log_std_max, num_layers, num_filters, key_points, sigma, self.imm.KeyNet ).to(device) self.critic = Critic( obs_shape, action_shape, hidden_dim, encoder_type, encoder_feature_dim, num_layers, num_filters, key_points, sigma, self.imm.KeyNet ).to(device) self.critic_target = Critic( obs_shape, action_shape, hidden_dim, encoder_type, encoder_feature_dim, num_layers, num_filters, key_points, sigma, self.imm.KeyNet ).to(device) self.critic_target.load_state_dict(self.critic.state_dict()) # tie encoders between actor and critic self.actor.encoder.copy_conv_weights_from(self.critic.encoder) self.log_alpha = torch.tensor(np.log(init_temperature)).to(device) self.log_alpha.requires_grad = True # set target entropy to -|A| self.target_entropy = -np.prod(action_shape) self.decoder = None if decoder_type != 'identity': # create decoder self.decoder = make_decoder( decoder_type, obs_shape, encoder_feature_dim, num_layers, num_filters, self.actor.encoder ).to(device) self.decoder.apply(weight_init) # optimizer for critic encoder for reconstruction loss self.encoder_optimizer = torch.optim.Adam( self.critic.encoder.parameters(), lr=encoder_lr ) # optimizer for decoder self.decoder_optimizer = torch.optim.Adam( self.decoder.parameters(), lr=decoder_lr, weight_decay=decoder_weight_lambda ) # optimizer for imm self.imm_optimizer = torch.optim.Adam( self.imm.parameters(), lr=decoder_lr, weight_decay=decoder_weight_lambda ) self.imm_optimizer.load_state_dict(imm_checkpoint['optimizer_state_dict']) # optimizers self.actor_optimizer = torch.optim.Adam( self.actor.parameters(), lr=actor_lr, betas=(actor_beta, 0.999) ) self.critic_optimizer = torch.optim.Adam( self.critic.parameters(), lr=critic_lr, betas=(critic_beta, 0.999) ) self.log_alpha_optimizer = torch.optim.Adam( [self.log_alpha], lr=alpha_lr, betas=(alpha_beta, 0.999) ) self.train() self.critic_target.train()
def __init__(self, obs_shape, action_shape, device, hidden_dim=256, discount=0.99, init_temperature=0.01, alpha_lr=1e-3, alpha_beta=0.9, actor_lr=1e-3, actor_beta=0.9, actor_log_std_min=-10, actor_log_std_max=2, actor_update_freq=2, encoder_stride=2, critic_lr=1e-3, critic_beta=0.9, critic_tau=0.005, critic_target_update_freq=2, encoder_type='pixel', encoder_feature_dim=50, encoder_lr=1e-3, encoder_tau=0.005, decoder_type='pixel', decoder_lr=1e-3, decoder_update_freq=1, decoder_weight_lambda=0.0, transition_model_type='deterministic', num_layers=4, num_filters=32): self.reconstruction = False if decoder_type == 'reconstruction': decoder_type = 'pixel' self.reconstruction = True self.device = device self.discount = discount self.critic_tau = critic_tau self.encoder_tau = encoder_tau self.actor_update_freq = actor_update_freq self.critic_target_update_freq = critic_target_update_freq self.decoder_update_freq = decoder_update_freq self.decoder_type = decoder_type self.actor = Actor(obs_shape, action_shape, hidden_dim, encoder_type, encoder_feature_dim, actor_log_std_min, actor_log_std_max, num_layers, num_filters, encoder_stride).to(device) self.critic = Critic(obs_shape, action_shape, hidden_dim, encoder_type, encoder_feature_dim, num_layers, num_filters, encoder_stride).to(device) self.critic_target = Critic(obs_shape, action_shape, hidden_dim, encoder_type, encoder_feature_dim, num_layers, num_filters, encoder_stride).to(device) self.critic_target.load_state_dict(self.critic.state_dict()) self.transition_model = make_transition_model(transition_model_type, encoder_feature_dim, action_shape).to(device) self.reward_decoder = nn.Sequential( nn.Linear(encoder_feature_dim + action_shape[0], 512), nn.LayerNorm(512), nn.ReLU(), nn.Linear(512, 1)).to(device) decoder_params = list(self.transition_model.parameters()) + list( self.reward_decoder.parameters()) # tie encoders between actor and critic self.actor.encoder.copy_conv_weights_from(self.critic.encoder) self.log_alpha = torch.tensor(np.log(init_temperature)).to(device) self.log_alpha.requires_grad = True # set target entropy to -|A| self.target_entropy = -np.prod(action_shape) self.decoder = None if decoder_type == 'pixel': # create decoder self.decoder = make_decoder(decoder_type, obs_shape, encoder_feature_dim, num_layers, num_filters).to(device) self.decoder.apply(weight_init) decoder_params += list(self.decoder.parameters()) self.decoder_optimizer = torch.optim.Adam( decoder_params, lr=decoder_lr, weight_decay=decoder_weight_lambda) # optimizer for critic encoder for reconstruction loss self.encoder_optimizer = torch.optim.Adam( self.critic.encoder.parameters(), lr=encoder_lr) # optimizers self.actor_optimizer = torch.optim.Adam(self.actor.parameters(), lr=actor_lr, betas=(actor_beta, 0.999)) self.critic_optimizer = torch.optim.Adam(self.critic.parameters(), lr=critic_lr, betas=(critic_beta, 0.999)) self.log_alpha_optimizer = torch.optim.Adam([self.log_alpha], lr=alpha_lr, betas=(alpha_beta, 0.999)) self.train() self.critic_target.train()