コード例 #1
0
    def __init__(self, ob_dim, hid_dim, learning_rate, kl_weight, device):
        super().__init__()
        self.ob_dim = ob_dim
        self.hid_dim = hid_dim
        self.learning_rate = learning_rate
        self.kl_weight = kl_weight
        self.device = device

        self.encoder1 = MLP(input_dim = self.ob_dim,
                            output_dim = self.hid_dim // 2,
                            n_layers = 2,
                            size = self.hid_dim,
                            device = self.device,
                            discrete = False)

        self.encoder2 = MLP(input_dim = self.ob_dim,
                            output_dim = self.hid_dim // 2,
                            n_layers = 2,
                            size = self.hid_dim,
                            device = self.device,
                            discrete = False)

        self.discriminator = MLP(input_dim = self.hid_dim,
                                output_dim = 1,
                                n_layers = 2,
                                size = self.hid_dim,
                                device = self.device,
                                discrete = True)

        prior_means = torch.zeros(self.hid_dim // 2).to(self.device)
        prior_cov = torch.eye(self.hid_dim // 2).to(self.device)
        self.prior = torch.distributions.MultivariateNormal(prior_means, prior_cov)

        self.optimizer = torch.optim.Adam(self.parameters(), lr = self.learning_rate)
コード例 #2
0
    def __init__(self,
                 ac_dim,
                 ob_dim,
                 n_layers,
                 size,
                 device,
                 learning_rate,
                 training=True,
                 discrete=False,
                 nn_baseline=False,
                 **kwargs):
        super().__init__()

        # init vars
        self.device = device
        self.discrete = discrete
        self.training = training
        self.nn_baseline = nn_baseline

        # network architecture
        self.policy_mlp = MLP(ac_dim, ob_dim, n_layers, size, device, discrete)
        params = list(self.policy_mlp.parameters())
        if self.nn_baseline:
            self.baseline_mlp = MLP(1, ob_dim, n_layers, size, device, True)
            params += list(self.baseline_mlp.parameters())

        #optimizer
        if self.training:
            self.optimizer = torch.optim.Adam(params, lr=learning_rate)
コード例 #3
0
class MLPPolicy:
    def __init__(self,
                 ac_dim,
                 ob_dim,
                 n_layers,
                 size,
                 device,
                 learning_rate,
                 training=True,
                 discrete=False,
                 nn_baseline=False,
                 **kwargs):
        super().__init__()

        # init vars
        self.device = device
        self.discrete = discrete
        self.training = training
        self.nn_baseline = nn_baseline

        # network architecture
        self.policy_mlp = MLP(ac_dim, ob_dim, n_layers, size, device, discrete)
        params = list(self.policy_mlp.parameters())
        if self.nn_baseline:
            self.baseline_mlp = MLP(1, ob_dim, n_layers, size, device, True)
            params += list(self.baseline_mlp.parameters())

        #optimizer
        if self.training:
            self.optimizer = torch.optim.Adam(params, lr=learning_rate)

    ##################################

    # update/train this policy
    def update(self, observations, actions):
        raise NotImplementedError

    # query the neural net that's our 'policy' function, as defined by an mlp above
    # query the policy with observation(s) to get selected action(s)
    def get_action(self, obs):
        output = self.policy_mlp(torch.Tensor(obs).to(self.device))
        if self.discrete:
            action_probs = nn.functional.log_softmax(output).exp()
            return torch.multinomial(action_probs,
                                     num_samples=1).cpu().detach().numpy()[0]
        else:
            return torch.normal(output[0], output[1]).cpu().detach().numpy()

    def get_log_prob(self, network_outputs, actions_taken):
        actions_taken = torch.Tensor(actions_taken).to(self.device)
        if self.discrete:
            network_outputs = nn.functional.log_softmax(network_outputs).exp()
            return torch.distributions.Categorical(network_outputs).log_prob(
                actions_taken)
        else:
            return torch.distributions.Normal(
                network_outputs[0],
                network_outputs[1]).log_prob(actions_taken).sum(-1)
コード例 #4
0
    def __init__(self, ob_dim, hid_dim, learning_rate, kl_weight, device):
        super().__init__()
        self.ob_dim = ob_dim
        self.hid_dim = hid_dim
        self.learning_rate = learning_rate
        self.kl_weight = kl_weight
        self.device = device
        '''
        TODO:
            define input and output size for the two encoders and the discriminator
            HINT: there should be self.hid_dim latent variables, half from each encoder
        '''

        self.encoder1 = MLP(input_dim=self.ob_dim,
                            output_dim=self.hid_dim // 2,
                            n_layers=2,
                            size=self.hid_dim,
                            device=self.device,
                            discrete=False)

        self.encoder2 = MLP(input_dim=self.ob_dim,
                            output_dim=self.hid_dim // 2,
                            n_layers=2,
                            size=self.hid_dim,
                            device=self.device,
                            discrete=False)

        self.discriminator = MLP(
            input_dim=self.hid_dim,
            output_dim=1,  # output 1 if s = s' and 0 if s != s'
            n_layers=2,
            size=self.hid_dim,
            device=self.device,
            discrete=True)
        '''
        TODO:
            prior_mean and prior_cov are for a standard normal distribution
            both have the same dimension as output dimension of the encoder network

            HINT1: Use torch.eye for the covariance matrix (Diagonal of covariance matrix are the variances)
            HINT2: Don't forget to add both to the correct device
        '''
        prior_means = torch.zeros(self.hid_dim // 2).to(self.device)
        prior_cov = torch.eye(self.hid_dim // 2).to(self.device)
        self.prior = torch.distributions.MultivariateNormal(
            prior_means, prior_cov)

        self.optimizer = torch.optim.Adam(self.parameters(),
                                          lr=self.learning_rate)
コード例 #5
0
    def __init__(self, hparams):
        self.ob_dim = hparams['ob_dim']
        self.ac_dim = hparams['ac_dim']
        self.size = hparams['size']
        self.n_layers = hparams['n_layers']
        self.device = hparams['device']
        self.learning_rate = hparams['learning_rate']
        self.num_target_updates = hparams['num_target_updates']
        self.num_grad_steps_per_target_update = hparams[
            'num_grad_steps_per_target_update']
        self.gamma = hparams['gamma']

        self.value_func = MLP(self.ob_dim, 1, self.n_layers, self.size,
                              self.device, True)
        self.optimizer = torch.optim.Adam(self.value_func.parameters(),
                                          lr=self.learning_rate)
コード例 #6
0
    def __init__(self,
                 ac_dim,
                 ob_dim,
                 n_layers,
                 size,
                 device,
                 learning_rate=0.001):
        # init vars
        self.device = device

        #TODO - specify ouput dim and input dim of delta func MLP
        self.delta_func = MLP(input_dim=ob_dim + ac_dim,
                              output_dim=ob_dim,
                              n_layers=n_layers,
                              size=size,
                              device=self.device,
                              discrete=True)

        #TODO - define the delta func optimizer. Adam optimizer will work well.
        self.optimizer = torch.optim.Adam(self.delta_func.parameters(),
                                          lr=learning_rate)
コード例 #7
0
    def __init__(self,
                 ac_dim,
                 ob_dim,
                 n_layers,
                 size,
                 device,
                 learning_rate=0.001):
        # init vars
        self.device = device

        #DoneTODO - specify ouput dim and input dim of delta func MLP
        # input_dim is ob_dim + ac_dim because we want to account for transition probabilities, which is f_theta(st. at)
        self.delta_func = MLP(input_dim=ob_dim + ac_dim,
                              output_dim=ob_dim,
                              n_layers=n_layers,
                              size=size,
                              device=self.device,
                              discrete=True)

        #DoneTODO - define the delta func optimizer. Adam optimizer will work well.
        self.optimizer = torch.optim.Adam(self.delta_func.parameters(),
                                          lr=learning_rate)
コード例 #8
0
class BootstrappedContinuousCritic:
    def __init__(self, hparams):
        self.ob_dim = hparams['ob_dim']
        self.ac_dim = hparams['ac_dim']
        self.size = hparams['size']
        self.n_layers = hparams['n_layers']
        self.device = hparams['device']
        self.learning_rate = hparams['learning_rate']
        self.num_target_updates = hparams['num_target_updates']
        self.num_grad_steps_per_target_update = hparams[
            'num_grad_steps_per_target_update']
        self.gamma = hparams['gamma']

        self.value_func = MLP(self.ob_dim, 1, self.n_layers, self.size,
                              self.device, True)
        self.optimizer = torch.optim.Adam(self.value_func.parameters(),
                                          lr=self.learning_rate)

    def update(self, ob_no, next_ob_no, re_n, terminal_n):
        '''
        ts_ob_no, ts_next_ob_no, ts_re_n, ts_terminal_n = map(lambda x: torch.Tensor(x).to(self.device),
                                                              [ob_no, next_ob_no, re_n, terminal_n])
        for _ in range(self.num_target_updates):
            with torch.no_grad():
                ts_next_V_n = self.value_func(ts_next_ob_no).view(-1)
            ts_target_n = ts_re_n + (1 - ts_terminal_n) * self.gamma * ts_next_V_n
            for _ in range(self.num_grad_steps_per_target_update):
                ts_V_n = self.value_func(ts_ob_no).view(-1)
                self.optimizer.zero_grad()
                loss = nn.functional.mse_loss(ts_V_n, ts_target_n)
                loss.backward()
                self.optimizer.step()
        '''
        ob, next_ob, rew, done = map(lambda x: torch.Tensor(x).to(self.device),
                                     [ob_no, next_ob_no, re_n, terminal_n])

        for update in range(self.num_grad_steps_per_target_update *
                            self.num_target_updates):
            if update % self.num_grad_steps_per_target_update == 0:
                next_value = self.value_func(next_ob).squeeze() * (1 - done)
                target_value = rew + self.gamma * next_value

            self.optimizer.zero_grad()
            loss = nn.functional.mse_loss(
                self.value_func(ob).squeeze(), target_value)
            loss.backward()
            self.optimizer.step()
            target_value.detach_()
        #'''

        return loss
コード例 #9
0
class FFModel:
    def __init__(self,
                 ac_dim,
                 ob_dim,
                 n_layers,
                 size,
                 device,
                 learning_rate=0.001):
        # init vars
        self.device = device

        #DoneTODO - specify ouput dim and input dim of delta func MLP
        # input_dim is ob_dim + ac_dim because we want to account for transition probabilities, which is f_theta(st. at)
        self.delta_func = MLP(input_dim=ob_dim + ac_dim,
                              output_dim=ob_dim,
                              n_layers=n_layers,
                              size=size,
                              device=self.device,
                              discrete=True)

        #DoneTODO - define the delta func optimizer. Adam optimizer will work well.
        self.optimizer = torch.optim.Adam(self.delta_func.parameters(),
                                          lr=learning_rate)

    #############################

    def get_prediction(self, obs, acs, data_statistics):
        if len(obs.shape) == 1 or len(acs.shape) == 1:
            obs = np.squeeze(obs)[None]
            acs = np.squeeze(acs)[None]

        # DoneTODO(Q1) normalize the obs and acs above using the normalize function and data_statistics
        norm_obs = normalize(obs, data_statistics['obs_mean'],
                             data_statistics['obs_std'])
        norm_acs = normalize(acs, data_statistics['acs_mean'],
                             data_statistics['acs_std'])

        norm_input = torch.Tensor(np.concatenate((norm_obs, norm_acs),
                                                 axis=1)).to(self.device)
        norm_delta = self.delta_func(norm_input).cpu().detach().numpy()

        # DoneTODO(Q1) Unnormalize the the norm_delta above using the unnormalize function and data_statistics
        delta = unnormalize(norm_delta, data_statistics['delta_mean'],
                            data_statistics['delta_std'])
        # DoneTODO(Q1) Return the predited next observation (You will use obs and delta)
        return obs + delta

    def update(self, observations, actions, next_observations,
               data_statistics):
        # DoneTODO(Q1) normalize the obs and acs above using the normalize function and data_statistics (same as above)
        norm_obs = normalize(np.squeeze(observations),
                             data_statistics['obs_mean'],
                             data_statistics['obs_std'])
        norm_acs = normalize(np.squeeze(actions), data_statistics['acs_mean'],
                             data_statistics['acs_std'])

        pred_delta = self.delta_func(
            torch.Tensor(np.concatenate((norm_obs, norm_acs),
                                        axis=1)).to(self.device))
        # DoneTODO(Q1) Define a normalized true_delta using observations, next_observations and the delta stats from data_statistics
        true_delta = torch.Tensor(
            normalize(next_observations - observations,
                      data_statistics['delta_mean'],
                      data_statistics['delta_std'])).to(self.device)

        # DoneTODO(Q1) Define a loss function that takes as input normalized versions of predicted change in state and true change in state
        loss = nn.functional.mse_loss(true_delta, pred_delta)
        self.optimizer.zero_grad()
        loss.backward()
        self.optimizer.step()

        return loss.item()
コード例 #10
0
class FFModel:
    def __init__(self,
                 ac_dim,
                 ob_dim,
                 n_layers,
                 size,
                 device,
                 learning_rate=0.001):
        # init vars
        self.device = device

        #TODO - specify ouput dim and input dim of delta func MLP
        self.delta_func = MLP(input_dim=ob_dim + ac_dim,
                              output_dim=ob_dim,
                              n_layers=n_layers,
                              size=size,
                              device=self.device,
                              discrete=True)

        #TODO - define the delta func optimizer. Adam optimizer will work well.
        self.optimizer = torch.optim.Adam(self.delta_func.parameters(),
                                          lr=learning_rate)

    #############################

    def get_prediction(self, obs, acs, data_statistics):
        if len(obs.shape) == 1 or len(acs.shape) == 1:
            obs = np.squeeze(obs)[None]
            acs = np.squeeze(acs)[None]

        norm_obs = normalize(obs, data_statistics['obs_mean'],
                             data_statistics['obs_std'])
        norm_acs = normalize(acs, data_statistics['acs_mean'],
                             data_statistics['acs_std'])

        norm_input = torch.Tensor(np.concatenate((norm_obs, norm_acs),
                                                 axis=1)).to(self.device)
        norm_delta = self.delta_func(norm_input).cpu().detach().numpy()

        delta = unnormalize(norm_delta, data_statistics['delta_mean'],
                            data_statistics['delta_std'])
        return obs + delta

    def update(self, observations, actions, next_observations,
               data_statistics):

        norm_obs = normalize(np.squeeze(observations),
                             data_statistics['obs_mean'],
                             data_statistics['obs_std'])
        norm_acs = normalize(np.squeeze(actions), data_statistics['acs_mean'],
                             data_statistics['acs_std'])

        pred_delta = self.delta_func(
            torch.Tensor(np.concatenate((norm_obs, norm_acs),
                                        axis=1)).to(self.device))
        true_delta = torch.Tensor(
            normalize(next_observations - observations,
                      data_statistics['delta_mean'],
                      data_statistics['delta_std'])).to(self.device)

        # TODO(Q1) Define a loss function that takes as input normalized versions of predicted change in state and true change in state
        loss = nn.functional.mse_loss(true_delta, pred_delta)
        self.optimizer.zero_grad()
        loss.backward()
        self.optimizer.step()

        return loss.item()