Example #1
0
    def __init__(self, obs_shape, image_obs_shape, action_shape, hidden_dim,
                 encoder_type, encoder_feature_dim, log_std_min, log_std_max,
                 num_layers, num_filters):
        super().__init__()

        self.encoder = make_encoder(encoder_type,
                                    image_obs_shape,
                                    encoder_feature_dim,
                                    num_layers,
                                    num_filters,
                                    output_logits=True)
        self.obs_dim = obs_shape[0]
        print('low obs dim:', self.obs_dim, ": ", self.encoder.feature_dim)

        self.log_std_min = log_std_min
        self.log_std_max = log_std_max

        self.trunk = nn.Sequential(
            nn.Linear(self.encoder.feature_dim + self.obs_dim, hidden_dim),
            nn.ReLU(), nn.Linear(hidden_dim, hidden_dim), nn.ReLU(),
            nn.Linear(hidden_dim, 2 * action_shape[0]))
        self.test_layer = nn.Linear(17, 17)

        self.outputs = dict()
        self.apply(weight_init)
    def __init__(self,
                 obs_shape,
                 encoder_feature_dim,
                 num_layers=4,
                 num_filters=32,
                 num_keypoints=10,
                 sigma=0.1):
        super().__init__()

        assert len(obs_shape) == 3

        self.num_layers = num_layers

        # keynet's image encoder. Similar to AE's image encoder
        self.image_encoder = make_encoder('pixel', obs_shape,
                                          encoder_feature_dim, num_layers,
                                          num_filters)

        self.features_to_score_maps = nn.Conv2d(in_channels=num_filters,
                                                out_channels=num_keypoints,
                                                kernel_size=1,
                                                stride=1,
                                                padding=0,
                                                dilation=1,
                                                groups=1,
                                                bias=True)
        self.num_keypoints = num_keypoints
        self.sigma = sigma

        self.outputs = dict()
Example #3
0
    def __init__(self, obs_shape, action_shape, hidden_dim, encoder_type,
                 encoder_feature_dim, log_std_min, log_std_max, num_layers,
                 num_filters, hybrid_state_shape, two_conv):
        super().__init__()

        self.encoder = make_encoder(encoder_type,
                                    obs_shape,
                                    encoder_feature_dim,
                                    num_layers,
                                    num_filters,
                                    output_logits=True,
                                    two_conv=two_conv)

        self.log_std_min = log_std_min
        self.log_std_max = log_std_max

        if hybrid_state_shape is None:
            trunk_input_dim = self.encoder.feature_dim
        else:
            trunk_input_dim = self.encoder.feature_dim + hybrid_state_shape[0]
        self.trunk = nn.Sequential(nn.Linear(trunk_input_dim,
                                             hidden_dim), nn.ReLU(),
                                   nn.Linear(hidden_dim, hidden_dim),
                                   nn.ReLU(),
                                   nn.Linear(hidden_dim, 2 * action_shape[0]))

        self.outputs = dict()
        self.apply(weight_init)
Example #4
0
    def __init__(
        self,
        obs_shape,
        action_shape,
        hidden_dim,
        encoder_type,
        encoder_feature_dim,
        num_layers,
        num_filters,
    ):
        super().__init__()

        self.encoder = make_encoder(
            encoder_type,
            obs_shape,
            encoder_feature_dim,
            num_layers,
            num_filters,
            output_logits=True,
        )

        self.Q1 = QFunction(self.encoder.feature_dim, action_shape[0],
                            hidden_dim)
        self.Q2 = QFunction(self.encoder.feature_dim, action_shape[0],
                            hidden_dim)

        self.outputs = dict()
        self.apply(weight_init)
Example #5
0
    def __init__(
        self, obs_shape, action_shape, hidden_dim, encoder_type,
        encoder_feature_dim, num_layers, num_filters, key_points, sigma,
        key_net
    ):
        super().__init__()


        self.encoder = make_encoder(
            encoder_type, obs_shape, encoder_feature_dim, num_layers,
            num_filters
        )

        self.key_net = key_net
        self.key_points = key_points

        self.Q1 = QFunction(
            self.encoder.feature_dim + 2 * self.key_points, action_shape[0], hidden_dim
        )
        self.Q2 = QFunction(
            self.encoder.feature_dim + 2 * self.key_points, action_shape[0], hidden_dim
        )

        self.outputs = dict()
        self.apply(weight_init)
Example #6
0
    def __init__(
        self, obs_shape, action_shape, hidden_dim, encoder_type,
        encoder_feature_dim, log_std_min, log_std_max, num_layers, num_filters,
        key_points, sigma, key_net):
        super().__init__()

        self.encoder = make_encoder(
            encoder_type, obs_shape, encoder_feature_dim, num_layers,
            num_filters
        )

        self.key_net = key_net

        self.log_std_min = log_std_min
        self.log_std_max = log_std_max
        self.key_points = key_points

        self.trunk = nn.Sequential(
            nn.Linear(self.encoder.feature_dim + 2*self.key_points, hidden_dim), 
            nn.ReLU(),
            nn.Linear(hidden_dim, hidden_dim), 
            nn.ReLU(),
            nn.Linear(hidden_dim, 2 * action_shape[0])
        )

        self.outputs = dict()
        self.apply(weight_init)
Example #7
0
    def __init__(self, obs_shape):
        super().__init__()
        self.encoder = make_encoder(encoder_type='pixel',
                                    obs_shape=obs_shape,
                                    feature_dim=100,
                                    num_layers=4,
                                    num_filters=32).to(device)

        self.decoder = make_decoder('pixel', obs_shape, 50, 4, 32).to(device)
        self.decoder.apply(weight_init)
Example #8
0
    def __init__(self,obs_shape, z_dim, encoder_feature_dim,hidden_dim, downsample = True):
        super(CURL, self).__init__()

        # Need to fix the encoders since I do not plan to use the critics
        self.encoder = make_encoder( # Nawid - Encoder of critic which is also used for the contrastive loss
            obs_shape, encoder_feature_dim, downsample = downsample)

        # Encoder target required for momentum encoding
        self.encoder_target = make_encoder( # Nawid - Encoder of critic which is also used for the contrastive loss -  Momentum encoder
            obs_shape, encoder_feature_dim, downsample = downsample)

        self.encoder_target.load_state_dict(self.encoder.state_dict()) # copies the parameters of the encoder into the target encoder which is changing slowly
        self.W = nn.Parameter(torch.rand(z_dim, z_dim)) # Nawid - weight vector for the bilinear product

        # Part related to the dynamics
        self.trunk = nn.Sequential(
            nn.Linear(self.encoder.feature_dim + n_actions, hidden_dim),nn.ReLU(), # Size of the input is related to the encoder output as well as the concatenated one hot vector for the actions
            nn.Linear(hidden_dim, hidden_dim),
        )
        self.W_skip = nn.Parameter(torch.rand(self.encoder.feature_dim, hidden_dim)) # Nawid - weight vector for the skip connection
        self.output_linear = nn.Linear(hidden_dim, z_dim)

        self.apply(weight_init)
    def __init__(self, obs_shape, encoder_feature_dim, num_layers, num_filters,
                 num_keypoints, sigma):
        super(IMM, self).__init__()

        self.ImageEncoder = make_encoder('pixel', obs_shape,
                                         encoder_feature_dim, num_layers,
                                         num_filters)
        self.KeyNet = make_keynet(obs_shape, encoder_feature_dim, num_layers,
                                  num_filters, num_keypoints, sigma)
        self.GenNet = make_gennet(
            obs_shape,
            num_filters,
            num_keypoints,
            encoded_image_size=self.KeyNet.image_encoder.out_dim)
        self.output = None
Example #10
0
    def __init__(self, obs_shape, action_shape, hidden_dim, encoder_type,
                 encoder_feature_dim, num_layers, num_filters,
                 hybrid_state_shape, two_conv):
        super().__init__()

        self.encoder = make_encoder(encoder_type,
                                    obs_shape,
                                    encoder_feature_dim,
                                    num_layers,
                                    num_filters,
                                    output_logits=True,
                                    two_conv=two_conv)

        if hybrid_state_shape is None:
            trunk_input_dim = self.encoder.feature_dim
        else:
            trunk_input_dim = self.encoder.feature_dim + hybrid_state_shape[0]
        self.Q1 = QFunction(trunk_input_dim, action_shape[0], hidden_dim)
        self.Q2 = QFunction(trunk_input_dim, action_shape[0], hidden_dim)

        self.outputs = dict()
        self.apply(weight_init)
Example #11
0
    def __init__(
        self, obs_shape, action_shape, hidden_dim, encoder_type,
        encoder_feature_dim, log_std_min, log_std_max, num_layers, num_filters
    ):
        super().__init__()

        self.encoder = make_encoder(
            encoder_type, obs_shape, encoder_feature_dim, num_layers,
            num_filters, output_logits=True
        )

        self.log_std_min = log_std_min
        self.log_std_max = log_std_max

        self.trunk = nn.Sequential(
            nn.Linear(self.encoder.feature_dim, hidden_dim), nn.ReLU(),
            nn.Linear(hidden_dim, hidden_dim), nn.ReLU(),
            nn.Linear(hidden_dim, 2 * action_shape)
        )

        self.outputs = dict()
        self.apply(weight_init)
Example #12
0
    def __init__(self, obs_shape, action_shape, hidden_dim, encoder_type,
                 encoder_feature_dim, log_std_min, log_std_max, num_layers,
                 num_filters, lstm_num_layers, lstm_dropout):
        super().__init__()

        self.encoder = make_encoder(encoder_type,
                                    obs_shape,
                                    encoder_feature_dim,
                                    num_layers,
                                    num_filters,
                                    output_logits=True)

        self.log_std_min = log_std_min
        self.log_std_max = log_std_max

        self.lstm = nn.LSTM(self.encoder.feature_dim,
                            hidden_dim,
                            lstm_num_layers,
                            dropout=lstm_dropout)
        self.linear_layer = nn.Linear(hidden_dim, 2 * action_shape[0])

        self.outputs = dict()
        self.apply(weight_init)
Example #13
0
    def __init__(
        self, obs_shape, action_shape, hidden_dim, encoder_type,
        encoder_feature_dim, log_std_min, log_std_max, num_layers, num_filters,
        num_mlp_layers,
    ):
        super().__init__()

        self.encoder = make_encoder(
            encoder_type, obs_shape, encoder_feature_dim, num_layers,
            num_filters, output_logits=True
        )

        self.log_std_min = log_std_min
        self.log_std_max = log_std_max

        self.trunk = D2RLNetwork(
                self.encoder.feature_dim, 
                hidden_dim, 
                2*action_shape[0], 
                num_mlp_layers
        )

        self.outputs = dict()
        self.apply(weight_init)
Example #14
0
    def __init__(self, obs_dim, action_dim, action_range, device, encoder_type,
                 critic_cfg, actor_cfg, discount, init_temperature, alpha_lr,
                 l2_regularizer_weight, alpha_betas, actor_lr, actor_betas,
                 actor_update_frequency, critic_lr, critic_betas, critic_tau,
                 num_envs, encoder_feature_dim, penalty_type,
                 critic_target_update_frequency, encoder_batch_size,
                 sac_batch_size, penalty_anneal_iters, penalty_weight):
        super().__init__()

        self.action_range = action_range
        self.device = torch.device(device)
        self.discount = discount
        self.critic_tau = critic_tau
        self.actor_update_frequency = actor_update_frequency
        self.critic_target_update_frequency = critic_target_update_frequency
        self.encoder_batch_size = encoder_batch_size
        self.sac_batch_size = sac_batch_size
        self.num_envs = num_envs
        self.l2_regularizer_weight = l2_regularizer_weight
        self.penalty_anneal_iters = penalty_anneal_iters
        self.penalty_weight = penalty_weight
        self.penalty_type = penalty_type

        self.encoder = make_encoder(encoder_type, obs_dim, encoder_feature_dim,
                                    2, 32).to(self.device)
        self.reward_model = nn.Sequential(nn.Linear(encoder_feature_dim, 200),
                                          nn.ReLU(), nn.Linear(200, 200),
                                          nn.ReLU(), nn.Linear(200,
                                                               1)).to(device)
        self.model = make_dynamics_model(encoder_feature_dim, 200,
                                         action_dim).to(device)
        self.critic = hydra.utils.instantiate(critic_cfg).to(self.device)
        self.critic.encoder = self.encoder

        self.critic_target = hydra.utils.instantiate(critic_cfg).to(
            self.device)
        self.critic_target.encoder = make_encoder(encoder_type, obs_dim,
                                                  encoder_feature_dim, 2,
                                                  32).to(self.device)
        self.critic_target.load_state_dict(self.critic.state_dict())

        self.actor = hydra.utils.instantiate(actor_cfg).to(self.device)
        self.actor.encoder = self.encoder

        self.log_alpha = torch.tensor(np.log(init_temperature)).to(self.device)
        self.log_alpha.requires_grad = True
        # set target entropy to -|A|
        self.target_entropy = -action_dim

        # optimizers
        self.actor_optimizer = torch.optim.Adam(self.actor.parameters(),
                                                lr=actor_lr,
                                                betas=actor_betas)

        self.critic_optimizer = torch.optim.Adam(list(
            self.critic.parameters()),
                                                 lr=critic_lr,
                                                 betas=critic_betas)

        self.log_alpha_optimizer = torch.optim.Adam([self.log_alpha],
                                                    lr=alpha_lr,
                                                    betas=alpha_betas)
        self.decoder_optimizer = torch.optim.Adam(
            list(self.model.parameters()) +
            list(self.reward_model.parameters()) +
            list(self.encoder.parameters()),
            lr=1e-4,
            weight_decay=1e-5)

        self.train()
        self.critic_target.train()
Example #15
0
    def __init__(
        self,
        obs_dim,
        action_dim,
        action_range,
        device,
        encoder_type,
        encoder_feature_dim,
        critic_cfg,
        actor_cfg,
        discount,
        init_temperature,
        alpha_lr,
        alpha_betas,
        actor_lr,
        actor_betas,
        actor_update_frequency,
        critic_lr,
        critic_betas,
        critic_tau,
        critic_target_update_frequency,
        batch_size,
    ):
        super().__init__()

        self.action_range = action_range
        self.device = torch.device(device)
        self.discount = discount
        self.critic_tau = critic_tau
        self.actor_update_frequency = actor_update_frequency
        self.critic_target_update_frequency = critic_target_update_frequency
        self.batch_size = batch_size
        self.encoder = make_encoder(
            encoder_type, obs_dim, encoder_feature_dim, 2, 32
        ).to(self.device)

        self.critic = hydra.utils.instantiate(critic_cfg).to(self.device)
        self.critic.encoder = self.encoder

        self.critic_target = hydra.utils.instantiate(critic_cfg).to(self.device)
        self.critic_target.encoder = self.encoder
        self.critic_target.load_state_dict(self.critic.state_dict())

        self.actor = hydra.utils.instantiate(actor_cfg).to(self.device)
        self.actor.encoder = self.encoder

        self.log_alpha = torch.tensor(np.log(init_temperature)).to(self.device)
        self.log_alpha.requires_grad = True
        # set target entropy to -|A|
        self.target_entropy = -action_dim

        # optimizers
        self.actor_optimizer = torch.optim.Adam(
            self.actor.parameters(), lr=actor_lr, betas=actor_betas
        )

        self.critic_optimizer = torch.optim.Adam(
            self.critic.parameters(), lr=critic_lr, betas=critic_betas
        )

        self.log_alpha_optimizer = torch.optim.Adam(
            [self.log_alpha], lr=alpha_lr, betas=alpha_betas
        )

        self.train()
        self.critic_target.train()
Example #16
0
    def __init__(
        self,
        obs_dim,
        action_dim,
        action_range,
        device,
        encoder_type,
        num_envs,
        c_ent,
        kld,
        critic_cfg,
        actor_cfg,
        discount,
        init_temperature,
        alpha_lr,
        encoder_lr,
        c_ent_iters,
        alpha_betas,
        actor_lr,
        actor_betas,
        actor_update_frequency,
        decoder_lr,
        decoder_weight_lambda,
        critic_lr,
        critic_betas,
        critic_tau,
        encoder_feature_dim,
        decoder_latent_lambda,
        critic_target_update_frequency,
        batch_size,
    ):
        super().__init__()

        self.action_range = action_range
        self.device = torch.device(device)
        self.discount = discount
        self.critic_tau = critic_tau
        self.actor_update_frequency = actor_update_frequency
        self.critic_target_update_frequency = critic_target_update_frequency
        self.batch_size = batch_size
        self.num_envs = num_envs
        self.encoder_tau = 0.005
        self.decoder_latent_lambda = decoder_latent_lambda
        self.encoder_type = encoder_type
        self.c_ent = c_ent
        self.c_ent_iters = c_ent_iters
        self.kld = kld
        self.encoder = make_encoder(
            encoder_type, obs_dim, encoder_feature_dim, 2, 32
        ).to(self.device)

        self.task_specific_encoders = [
            make_encoder(encoder_type, obs_dim, encoder_feature_dim, 2, 32).to(device)
            for i in range(self.num_envs)
        ]

        self.model = make_dynamics_model(encoder_feature_dim, 200, action_dim).to(
            device
        )
        self.task_specific_models = [
            make_dynamics_model(encoder_feature_dim, 200, action_dim).to(device)
            for i in range(self.num_envs)
        ]
        self.reward_model = nn.Sequential(
            nn.Linear(encoder_feature_dim, 200),
            nn.ReLU(),
            nn.Linear(200, 200),
            nn.ReLU(),
            nn.Linear(200, 1),
        ).to(device)
        self.decoder = nn.Sequential(
            nn.Linear(encoder_feature_dim * 2, 200),
            nn.ReLU(),
            nn.Linear(200, 200),
            nn.ReLU(),
            nn.Linear(200, obs_dim),
        ).to(device)
        self.classifier = nn.Sequential(
            nn.Linear(encoder_feature_dim, 512),
            nn.ReLU(),
            nn.Linear(512, 512),
            nn.ReLU(),
            nn.Linear(512, self.num_envs),
        ).to(device)

        self.critic = hydra.utils.instantiate(critic_cfg).to(self.device)
        self.critic.encoder = self.encoder
        self.critic_target = hydra.utils.instantiate(critic_cfg).to(self.device)
        self.critic_target.encoder = make_encoder(
            encoder_type, obs_dim, encoder_feature_dim, 2, 32
        ).to(self.device)
        self.critic_target.load_state_dict(self.critic.state_dict())

        self.actor = hydra.utils.instantiate(actor_cfg).to(self.device)
        self.actor.encoder = self.encoder

        self.log_alpha = torch.tensor(np.log(init_temperature)).to(self.device)
        self.log_alpha.requires_grad = True
        # set target entropy to -|A|
        self.target_entropy = -action_dim

        # optimizers
        self.actor_optimizer = torch.optim.Adam(
            self.actor.parameters(), lr=actor_lr, betas=actor_betas
        )

        self.critic_optimizer = torch.optim.Adam(
            list(self.critic.parameters()) + list(self.encoder.parameters()),
            lr=critic_lr,
            betas=critic_betas,
        )

        self.log_alpha_optimizer = torch.optim.Adam(
            [self.log_alpha], lr=alpha_lr, betas=alpha_betas
        )

        self.classifier_optimizer = torch.optim.Adam(
            self.classifier.parameters(), lr=actor_lr, betas=actor_betas
        )

        # optimizer for critic encoder for reconstruction loss
        self.encoder_optimizer = torch.optim.Adam(
            self.critic.encoder.parameters(), lr=encoder_lr
        )
        # optimizer for decoder
        task_specific_parameters = [
            params
            for t in (self.task_specific_encoders + self.task_specific_models)
            for params in list(t.parameters())
        ]
        self.decoder_optimizer = torch.optim.Adam(
            list(self.decoder.parameters())
            + list(self.model.parameters())
            + list(self.reward_model.parameters())
            + task_specific_parameters
            + list(self.encoder.parameters()),
            lr=decoder_lr,
            weight_decay=decoder_weight_lambda,
        )

        self.train()
        self.critic_target.train()