Esempio n. 1
0
    def __init__(self,
                 ac_dim,
                 ob_dim,
                 n_layers,
                 size,
                 discrete=False,
                 learning_rate=1e-4,
                 training=True,
                 nn_baseline=False,
                 **kwargs):
        super().__init__(**kwargs)

        # init vars
        self.ac_dim = ac_dim
        self.ob_dim = ob_dim
        self.n_layers = n_layers
        self.discrete = discrete
        self.size = size
        self.learning_rate = learning_rate
        self.training = training
        self.nn_baseline = nn_baseline

        if self.discrete:
            self.logits_na = ptu.build_mlp(
                input_size=self.ob_dim,
                output_size=self.ac_dim,
                n_layers=self.n_layers,
                size=self.size,
            )
            self.logits_na.to(ptu.device)
            self.mean_net = None
            self.logstd = None
            self.optimizer = optim.Adam(self.logits_na.parameters(),
                                        self.learning_rate)
        else:
            self.logits_na = None
            self.mean_net = ptu.build_mlp(
                input_size=self.ob_dim,
                output_size=self.ac_dim,
                n_layers=self.n_layers,
                size=self.size,
            )
            self.mean_net.to(ptu.device)
            self.logstd = nn.Parameter(
                torch.zeros(self.ac_dim,
                            dtype=torch.float32,
                            device=ptu.device))
            self.logstd.to(ptu.device)
            self.optimizer = optim.Adam(
                itertools.chain([self.logstd], self.mean_net.parameters()),
                self.learning_rate)
    def __init__(self, hparams):
        super().__init__()
        self.ob_dim = hparams['ob_dim']
        self.ac_dim = hparams['ac_dim']
        self.discrete = hparams['discrete']
        self.size = hparams['size']
        self.n_layers = hparams['n_layers']
        self.learning_rate = hparams['learning_rate']

        # critic parameters
        self.num_target_updates = hparams['num_target_updates']
        self.num_grad_steps_per_target_update = hparams[
            'num_grad_steps_per_target_update']
        self.gamma = hparams['gamma']
        self.critic_network = ptu.build_mlp(
            self.ob_dim,
            1,
            n_layers=self.n_layers,
            size=self.size,
        )
        self.critic_network.to(ptu.device)
        self.loss = nn.MSELoss()
        self.optimizer = optim.Adam(
            self.critic_network.parameters(),
            self.learning_rate,
        )
Esempio n. 3
0
    def __init__(self, ac_dim, ob_dim, n_layers, size, learning_rate=0.001):
        super(FFModel, self).__init__()

        self.ac_dim = ac_dim
        self.ob_dim = ob_dim
        self.n_layers = n_layers
        self.size = size
        self.learning_rate = learning_rate
        self.delta_network = ptu.build_mlp(
            input_size=self.ob_dim + self.ac_dim,
            output_size=self.ob_dim,
            n_layers=self.n_layers,
            size=self.size,
        )
        self.delta_network.to(ptu.device)
        self.optimizer = optim.Adam(
            self.delta_network.parameters(),
            self.learning_rate,
        )
        self.loss = nn.MSELoss()
        self.obs_mean = None
        self.obs_std = None
        self.acs_mean = None
        self.acs_std = None
        self.delta_mean = None
        self.delta_std = None
Esempio n. 4
0
    def __init__(self, hparams):
        super().__init__()
        self.ob_dim = hparams['ob_dim']
        self.ac_dim = hparams['ac_dim']
        self.discrete = hparams['discrete']
        self.size = hparams['size']
        self.n_layers = hparams['n_layers']
        self.learning_rate = hparams['learning_rate_valuefn']

        # critic parameters
        self.num_target_updates = hparams['num_target_updates']
        self.gamma = hparams['gamma']
        self.l2_reg = hparams['l2_reg']
        self.critic_network = ptu.build_mlp(self.ob_dim + self.ac_dim,
                                            1,
                                            n_layers=self.n_layers,
                                            size=self.size,
                                            init_method=init_method,
                                            activation='relu')
        self.critic_network.to(ptu.device)
        self.loss = nn.MSELoss()
        self.optimizer = optim.Adam(
            self.critic_network.parameters(),
            self.learning_rate,
        )

        self.apply(init_method)
Esempio n. 5
0
 def __init__(self, hparams):
     super().__init__()
     self.ob_dim = 3 + 2 * MAX_CAP
     self.ac_dim = 5
     self.is_city = hparams['is_city']
     self.size = hparams['size']
     self.n_layers = hparams['n_layers']
     self.learning_rate = hparams['learning_rate']
     self.n_drivers = hparams['n_drivers']
     self.shared_exp = hparams['shared_exp']
     self.shared_exp_lambda = hparams['shared_exp_lambda']
     # critic parameters
     self.num_target_updates = hparams['num_target_updates']
     self.num_grad_steps_per_target_update = hparams[
         'num_grad_steps_per_target_update']
     self.gamma = hparams['gamma']
     self.critic_networks = []
     self.losses = []
     self.optimizers = []
     for i in range(self.n_drivers):
         self.critic_networks.append(
             ptu.build_mlp(
                 self.ob_dim,
                 1,
                 n_layers=self.n_layers,
                 size=self.size,
             ))
         self.critic_networks[i].to(ptu.device)
         self.losses.append(nn.MSELoss())
         self.optimizers.append(
             optim.Adam(
                 self.critic_networks[i].parameters(),
                 self.learning_rate,
             ))
Esempio n. 6
0
    def __init__(self, hparams, optimizer_spec, **kwargs):
        super().__init__(**kwargs)
        self.ob_dim = hparams['ob_dim']
        self.output_size = hparams['rnd_output_size']
        self.n_layers = hparams['rnd_n_layers']
        self.size = hparams['rnd_size']
        self.optimizer_spec = optimizer_spec

        # TODO: Create two neural networks:
        # 1) f, the random function we are trying to learn
        # 2) f_hat, the function we are using to learn f
        # WARNING: Make sure you use different types of weight 
        #          initializations for these two functions

        # HINT 1) Check out the method ptu.build_mlp
        # HINT 2) There are two weight init methods defined above

        self.f = ptu.build_mlp(
            input_size=self.ob_dim,
            output_size=self.output_size,
            n_layers=self.n_layers,
            size=self.size,
            init_method=init_method_1,
        )
        self.f_hat = ptu.build_mlp(
            input_size=self.ob_dim,
            output_size=self.output_size,
            n_layers=self.n_layers,
            size=self.size,
            init_method=init_method_2,
        )
        self.loss = nn.MSELoss(reduction='none')
        
        self.optimizer = self.optimizer_spec.constructor(
            self.f_hat.parameters(),
            **self.optimizer_spec.optim_kwargs
        )
        self.learning_rate_scheduler = optim.lr_scheduler.LambdaLR(
            self.optimizer,
            self.optimizer_spec.learning_rate_schedule,
        )

        self.f.to(ptu.device)
        self.f_hat.to(ptu.device)
Esempio n. 7
0
 def __init__(self, params):
     super().__init__()
     self.n_agents = params['n_agents']
     self.ac_dim = params['ac_dim']
     self.avail_ac_dim = params['avail_ac_dim']
     self.ob_dim = params['ob_dim']
     self.encoder_n_layers = params['encoder_n_layers']
     self.decoder_n_layers = params['decoder_n_layers']
     self.size = params['layer_size_per_agent'] * self.n_agents
     self.learning_rate = params['learning_rate']
     self.ent_coef = params['entropy_coefficient']
     self.debugging = params['debugging']
     self.rnn = params['rnn']
     self.max_grad_norm = params['max_grad_norm']
     self.encoder = ptu.build_mlp(input_size=self.ob_dim,
                                  output_size=self.size,
                                  n_layers=self.encoder_n_layers,
                                  size=self.size,output_activation= 'relu')
     if self.rnn:
         self.lstm = nn.LSTM(self.size,self.size,batch_first = True)
         self.hc = None
     else:
         self.lstm = None
     self.decoder = ptu.build_mlp(input_size=self.size,
                                  output_size=self.avail_ac_dim,
                                  n_layers=self.decoder_n_layers,
                                  size=self.size,output_activation= 'identity')
     self.encoder.to(ptu.device)
     if self.rnn:
         self.lstm.to(ptu.device)
     self.decoder.to(ptu.device)
     if self.debugging:
         print(self.ac_dim,self.avail_ac_dim)
     self.print_time = False
     self.t = 0
     # for name, param in self.named_parameters():
     #     print( name, param.shape)
     self.optimizer = optim.Adam(self.parameters(),self.learning_rate)
Esempio n. 8
0
 def __init__(self, hparams):
     super().__init__()
     self.n_agents = hparams['n_agents']
     self.ob_dim = hparams['ob_dim']
     self.size = hparams['layer_size_per_agent'] * self.n_agents
     self.lstm_size = hparams['lstm_layer_size_per_agent'] * self.n_agents
     self.encoder_n_layers = hparams['encoder_n_layers']
     self.decoder_n_layers = hparams['decoder_n_layers']
     self.learning_rate = hparams['learning_rate']
     self.num_target_updates = hparams['num_target_updates']
     self.num_grad_steps_per_target_update = hparams[
         'num_grad_steps_per_target_update']
     self.gamma = hparams['gamma']
     self.debugging = hparams['debugging']
     self.rnn = hparams['rnn']
     if self.rnn:
         enc_output_size = self.lstm_size
         dec_input_size = self.lstm_size
     else:
         enc_output_size = self.size
         dec_input_size = self.size
     self.encoder = ptu.build_mlp(input_size=self.ob_dim,
                                  output_size=enc_output_size,
                                  n_layers=self.encoder_n_layers,
                                  size=self.size,
                                  activation="tanh",
                                  output_activation='tanh')
     self.lstm = nn.LSTM(self.lstm_size, self.lstm_size, batch_first=True)
     self.decoder = ptu.build_mlp(input_size=dec_input_size,
                                  output_size=1,
                                  n_layers=self.decoder_n_layers,
                                  size=self.size,
                                  activation="tanh",
                                  output_activation='identity')
     self.encoder.to(ptu.device)
     self.lstm.to(ptu.device)
     self.decoder.to(ptu.device)
Esempio n. 9
0
 def __init__(self, ac_dim, ob_dim, n_layers, size, **kwargs):
     super().__init__(ac_dim, ob_dim, n_layers, size, **kwargs)
     if self.nn_baseline:
         self.baseline = ptu.build_mlp(
             input_size=self.ob_dim,
             output_size=1,
             n_layers=self.n_layers,
             size=self.size,
         )
         self.baseline.to(ptu.device)
         self.baseline_optimizer = optim.Adam(
             self.baseline.parameters(),
             self.learning_rate,
         )
         self.baseline_loss = nn.MSELoss()
 def __init__(self,
              ac_dim,
              ob_dim,
              n_layers,
              size,
              shared_exp=False,
              shared_exp_lambda=1.,
              is_city=True,
              learning_rate=1e-4,
              n_drivers=1,
              training=True,
              nn_baseline=False,
              **kwargs):
     super().__init__(**kwargs)
     # init vars
     self.ac_dim = 5
     self.ob_dim = 3 + 2 * MAX_CAP
     self.n_layers = n_layers
     self.is_city = is_city
     self.n_drivers = n_drivers
     self.size = size
     self.learning_rate = learning_rate
     self.training = training
     self.shared_exp = shared_exp
     self.shared_exp_lambda = shared_exp_lambda
     self.nn_baseline = nn_baseline
     if self.is_city:
         self.agent_logits_nets = []
         #self.agent_logstds = []
         self.agent_optimizers = []
         #self.logits_na = None
         for i in range(self.n_drivers):
             self.agent_logits_nets.append(
                 ptu.build_mlp(input_size=self.ob_dim + self.ac_dim,
                               output_size=1,
                               n_layers=self.n_layers,
                               size=self.size))
             #self.agent_logstds.append(nn.Parameter(
             #    torch.zeros(self.ac_dim, dtype=torch.float32, device=ptu.device)
             #))
             self.agent_logits_nets[i].to(ptu.device)
             #self.logstd.to(ptu.device)
             self.agent_optimizers.append(
                 optim.Adam(self.agent_logits_nets[i].parameters(),
                            self.learning_rate))
         #self.logits_na = None
         self.baseline = None
Esempio n. 11
0
    def __init__(self, hparams, optimizer_spec, **kwargs):
        super().__init__(**kwargs)
        self.ob_dim = hparams['ob_dim']
        self.output_size = hparams['rnd_output_size']
        self.n_layers = hparams['rnd_n_layers']
        self.size = hparams['rnd_size']
        self.optimizer_spec = optimizer_spec
        self.hash = hparams["hash"]

        # TODO: Create two neural networks:
        # 1) f, the random function we are trying to learn
        # 2) f_hat, the function we are using to learn f
        # WARNING: Make sure you use different types of weight
        #          initializations for these two functions

        # HINT 1) Check out the method ptu.build_mlp
        # HINT 2) There are two weight init methods defined above
        if self.hash:
            self.encoder = ptu.build_mlp(self.ob_dim, self.output_size, self.n_layers, self.size)
            self.decoder = ptu.build_mlp(self.output_size, self.ob_dim, self.n_layers, self.size)
            self.ae_loss = nn.MSELoss()

            self.f = ptu.build_mlp(self.ob_dim, self.output_size, self.n_layers, self.size, init_method=init_method_1)
            self.f_hat = ptu.build_mlp(self.ob_dim, self.output_size, self.n_layers, self.size, init_method=init_method_2)

            self.optimizer = self.optimizer_spec.constructor(
                list(self.encoder.parameters()) + list(self.decoder.parameters()),
                **self.optimizer_spec.optim_kwargs
            )
            self.learning_rate_scheduler = optim.lr_scheduler.LambdaLR(
                self.optimizer,
                self.optimizer_spec.learning_rate_schedule,
            )

            self.counts = defaultdict(int)

            self.pretrain_autoencoder()

        else:
            self.f = ptu.build_mlp(self.ob_dim, self.output_size, self.n_layers, self.size, init_method=init_method_1)
            self.f_hat = ptu.build_mlp(self.ob_dim, self.output_size, self.n_layers, self.size, init_method=init_method_2)

            self.optimizer = self.optimizer_spec.constructor(
                self.f_hat.parameters(),
                **self.optimizer_spec.optim_kwargs
            )
            self.learning_rate_scheduler = optim.lr_scheduler.LambdaLR(
                self.optimizer,
                self.optimizer_spec.learning_rate_schedule,
            )

            self.f.to(ptu.device)
            self.f_hat.to(ptu.device)
Esempio n. 12
0
 def __init__(self, hparams):
     super().__init__()
     self.n_agents = hparams['n_agents']
     self.ob_dim = hparams['ob_dim']
     self.size = hparams['layer_size_per_agent'] * self.n_agents
     self.n_layers = hparams['n_layers']
     self.learning_rate = hparams['learning_rate']
     self.num_target_updates = hparams['num_target_updates']
     self.num_grad_steps_per_target_update = hparams[
         'num_grad_steps_per_target_update']
     self.gamma = hparams['gamma']
     output_size = self.size if not isinstance(self.size,
                                               list) else self.size[-1]
     self.critic_network_logits = ptu.build_mlp(input_size=self.ob_dim,
                                                output_size=1,
                                                n_layers=self.n_layers,
                                                size=self.size,
                                                activation='relu')
     self.critic_network_logits.to(ptu.device)
    def __init__(self, hparams, optimizer_spec, **kwargs):
        super().__init__(**kwargs)
        self.ob_dim = hparams['ob_dim']
        self.output_size = hparams['rnd_output_size']
        self.n_layers = hparams['rnd_n_layers']
        self.size = hparams['rnd_size']
        self.optimizer_spec = optimizer_spec

        self.f_hat = ptu.build_mlp(
                input_size=self.ob_dim,
                output_size=self.ob_dim,
                n_layers=self.n_layers,
                size=self.size)
        
        self.optimizer = self.optimizer_spec.constructor(
            self.f_hat.parameters(),
            **self.optimizer_spec.optim_kwargs
        )
        self.learning_rate_scheduler = optim.lr_scheduler.LambdaLR(
            self.optimizer,
            self.optimizer_spec.learning_rate_schedule,
        )

        self.f_hat.to(ptu.device)
Esempio n. 14
0
    def __init__(self,
                 ac_dim,
                 ob_dim,
                 n_layers,
                 size,
                 discrete=False,
                 learning_rate=1e-4,
                 training=True,
                 nn_baseline=False,
                 **kwargs):
        super().__init__(**kwargs)

        # init vars
        self.ac_dim = ac_dim
        self.ob_dim = ob_dim
        self.n_layers = n_layers
        self.discrete = discrete
        self.size = size
        self.learning_rate = learning_rate
        self.training = training
        self.nn_baseline = nn_baseline

        if self.discrete:
            self.logits_na = ptu.build_mlp(
                input_size=self.ob_dim,
                output_size=self.ac_dim,
                n_layers=self.n_layers,
                size=self.size,
            )
            self.logits_na.to(ptu.device)
            self.mean_net = None
            self.logstd = None
            self.optimizer = optim.Adam(self.logits_na.parameters(),
                                        self.learning_rate)
        else:
            self.logits_na = None
            self.mean_net = ptu.build_mlp(
                input_size=self.ob_dim,
                output_size=self.ac_dim,
                n_layers=self.n_layers,
                size=self.size,
            )
            # TODO: shouldn't logstd also be a NN?
            self.logstd = nn.Parameter(
                torch.zeros(self.ac_dim,
                            dtype=torch.float32,
                            device=ptu.device))
            self.mean_net.to(ptu.device)
            self.logstd.to(ptu.device)
            self.optimizer = optim.Adam(
                itertools.chain([self.logstd], self.mean_net.parameters()),
                self.learning_rate)
            self.normal_dist = distributions.Normal(ptu.from_numpy(0.0),
                                                    ptu.from_numpy(1.0))

        if nn_baseline:
            self.baseline = ptu.build_mlp(
                input_size=self.ob_dim,
                output_size=1,
                n_layers=self.n_layers,
                size=self.size,
            )
            self.baseline.to(ptu.device)
            self.baseline_optimizer = optim.Adam(
                self.baseline.parameters(),
                self.learning_rate,
            )
        else:
            self.baseline = None