def __init__(self, ac_dim, ob_dim, n_layers, size, discrete=False, learning_rate=1e-4, training=True, nn_baseline=False, **kwargs): super().__init__(**kwargs) # init vars self.ac_dim = ac_dim self.ob_dim = ob_dim self.n_layers = n_layers self.discrete = discrete self.size = size self.learning_rate = learning_rate self.training = training self.nn_baseline = nn_baseline if self.discrete: self.logits_na = ptu.build_mlp( input_size=self.ob_dim, output_size=self.ac_dim, n_layers=self.n_layers, size=self.size, ) self.logits_na.to(ptu.device) self.mean_net = None self.logstd = None self.optimizer = optim.Adam(self.logits_na.parameters(), self.learning_rate) else: self.logits_na = None self.mean_net = ptu.build_mlp( input_size=self.ob_dim, output_size=self.ac_dim, n_layers=self.n_layers, size=self.size, ) self.mean_net.to(ptu.device) self.logstd = nn.Parameter( torch.zeros(self.ac_dim, dtype=torch.float32, device=ptu.device)) self.logstd.to(ptu.device) self.optimizer = optim.Adam( itertools.chain([self.logstd], self.mean_net.parameters()), self.learning_rate)
def __init__(self, hparams): super().__init__() self.ob_dim = hparams['ob_dim'] self.ac_dim = hparams['ac_dim'] self.discrete = hparams['discrete'] self.size = hparams['size'] self.n_layers = hparams['n_layers'] self.learning_rate = hparams['learning_rate'] # critic parameters self.num_target_updates = hparams['num_target_updates'] self.num_grad_steps_per_target_update = hparams[ 'num_grad_steps_per_target_update'] self.gamma = hparams['gamma'] self.critic_network = ptu.build_mlp( self.ob_dim, 1, n_layers=self.n_layers, size=self.size, ) self.critic_network.to(ptu.device) self.loss = nn.MSELoss() self.optimizer = optim.Adam( self.critic_network.parameters(), self.learning_rate, )
def __init__(self, ac_dim, ob_dim, n_layers, size, learning_rate=0.001): super(FFModel, self).__init__() self.ac_dim = ac_dim self.ob_dim = ob_dim self.n_layers = n_layers self.size = size self.learning_rate = learning_rate self.delta_network = ptu.build_mlp( input_size=self.ob_dim + self.ac_dim, output_size=self.ob_dim, n_layers=self.n_layers, size=self.size, ) self.delta_network.to(ptu.device) self.optimizer = optim.Adam( self.delta_network.parameters(), self.learning_rate, ) self.loss = nn.MSELoss() self.obs_mean = None self.obs_std = None self.acs_mean = None self.acs_std = None self.delta_mean = None self.delta_std = None
def __init__(self, hparams): super().__init__() self.ob_dim = hparams['ob_dim'] self.ac_dim = hparams['ac_dim'] self.discrete = hparams['discrete'] self.size = hparams['size'] self.n_layers = hparams['n_layers'] self.learning_rate = hparams['learning_rate_valuefn'] # critic parameters self.num_target_updates = hparams['num_target_updates'] self.gamma = hparams['gamma'] self.l2_reg = hparams['l2_reg'] self.critic_network = ptu.build_mlp(self.ob_dim + self.ac_dim, 1, n_layers=self.n_layers, size=self.size, init_method=init_method, activation='relu') self.critic_network.to(ptu.device) self.loss = nn.MSELoss() self.optimizer = optim.Adam( self.critic_network.parameters(), self.learning_rate, ) self.apply(init_method)
def __init__(self, hparams): super().__init__() self.ob_dim = 3 + 2 * MAX_CAP self.ac_dim = 5 self.is_city = hparams['is_city'] self.size = hparams['size'] self.n_layers = hparams['n_layers'] self.learning_rate = hparams['learning_rate'] self.n_drivers = hparams['n_drivers'] self.shared_exp = hparams['shared_exp'] self.shared_exp_lambda = hparams['shared_exp_lambda'] # critic parameters self.num_target_updates = hparams['num_target_updates'] self.num_grad_steps_per_target_update = hparams[ 'num_grad_steps_per_target_update'] self.gamma = hparams['gamma'] self.critic_networks = [] self.losses = [] self.optimizers = [] for i in range(self.n_drivers): self.critic_networks.append( ptu.build_mlp( self.ob_dim, 1, n_layers=self.n_layers, size=self.size, )) self.critic_networks[i].to(ptu.device) self.losses.append(nn.MSELoss()) self.optimizers.append( optim.Adam( self.critic_networks[i].parameters(), self.learning_rate, ))
def __init__(self, hparams, optimizer_spec, **kwargs): super().__init__(**kwargs) self.ob_dim = hparams['ob_dim'] self.output_size = hparams['rnd_output_size'] self.n_layers = hparams['rnd_n_layers'] self.size = hparams['rnd_size'] self.optimizer_spec = optimizer_spec # TODO: Create two neural networks: # 1) f, the random function we are trying to learn # 2) f_hat, the function we are using to learn f # WARNING: Make sure you use different types of weight # initializations for these two functions # HINT 1) Check out the method ptu.build_mlp # HINT 2) There are two weight init methods defined above self.f = ptu.build_mlp( input_size=self.ob_dim, output_size=self.output_size, n_layers=self.n_layers, size=self.size, init_method=init_method_1, ) self.f_hat = ptu.build_mlp( input_size=self.ob_dim, output_size=self.output_size, n_layers=self.n_layers, size=self.size, init_method=init_method_2, ) self.loss = nn.MSELoss(reduction='none') self.optimizer = self.optimizer_spec.constructor( self.f_hat.parameters(), **self.optimizer_spec.optim_kwargs ) self.learning_rate_scheduler = optim.lr_scheduler.LambdaLR( self.optimizer, self.optimizer_spec.learning_rate_schedule, ) self.f.to(ptu.device) self.f_hat.to(ptu.device)
def __init__(self, params): super().__init__() self.n_agents = params['n_agents'] self.ac_dim = params['ac_dim'] self.avail_ac_dim = params['avail_ac_dim'] self.ob_dim = params['ob_dim'] self.encoder_n_layers = params['encoder_n_layers'] self.decoder_n_layers = params['decoder_n_layers'] self.size = params['layer_size_per_agent'] * self.n_agents self.learning_rate = params['learning_rate'] self.ent_coef = params['entropy_coefficient'] self.debugging = params['debugging'] self.rnn = params['rnn'] self.max_grad_norm = params['max_grad_norm'] self.encoder = ptu.build_mlp(input_size=self.ob_dim, output_size=self.size, n_layers=self.encoder_n_layers, size=self.size,output_activation= 'relu') if self.rnn: self.lstm = nn.LSTM(self.size,self.size,batch_first = True) self.hc = None else: self.lstm = None self.decoder = ptu.build_mlp(input_size=self.size, output_size=self.avail_ac_dim, n_layers=self.decoder_n_layers, size=self.size,output_activation= 'identity') self.encoder.to(ptu.device) if self.rnn: self.lstm.to(ptu.device) self.decoder.to(ptu.device) if self.debugging: print(self.ac_dim,self.avail_ac_dim) self.print_time = False self.t = 0 # for name, param in self.named_parameters(): # print( name, param.shape) self.optimizer = optim.Adam(self.parameters(),self.learning_rate)
def __init__(self, hparams): super().__init__() self.n_agents = hparams['n_agents'] self.ob_dim = hparams['ob_dim'] self.size = hparams['layer_size_per_agent'] * self.n_agents self.lstm_size = hparams['lstm_layer_size_per_agent'] * self.n_agents self.encoder_n_layers = hparams['encoder_n_layers'] self.decoder_n_layers = hparams['decoder_n_layers'] self.learning_rate = hparams['learning_rate'] self.num_target_updates = hparams['num_target_updates'] self.num_grad_steps_per_target_update = hparams[ 'num_grad_steps_per_target_update'] self.gamma = hparams['gamma'] self.debugging = hparams['debugging'] self.rnn = hparams['rnn'] if self.rnn: enc_output_size = self.lstm_size dec_input_size = self.lstm_size else: enc_output_size = self.size dec_input_size = self.size self.encoder = ptu.build_mlp(input_size=self.ob_dim, output_size=enc_output_size, n_layers=self.encoder_n_layers, size=self.size, activation="tanh", output_activation='tanh') self.lstm = nn.LSTM(self.lstm_size, self.lstm_size, batch_first=True) self.decoder = ptu.build_mlp(input_size=dec_input_size, output_size=1, n_layers=self.decoder_n_layers, size=self.size, activation="tanh", output_activation='identity') self.encoder.to(ptu.device) self.lstm.to(ptu.device) self.decoder.to(ptu.device)
def __init__(self, ac_dim, ob_dim, n_layers, size, **kwargs): super().__init__(ac_dim, ob_dim, n_layers, size, **kwargs) if self.nn_baseline: self.baseline = ptu.build_mlp( input_size=self.ob_dim, output_size=1, n_layers=self.n_layers, size=self.size, ) self.baseline.to(ptu.device) self.baseline_optimizer = optim.Adam( self.baseline.parameters(), self.learning_rate, ) self.baseline_loss = nn.MSELoss()
def __init__(self, ac_dim, ob_dim, n_layers, size, shared_exp=False, shared_exp_lambda=1., is_city=True, learning_rate=1e-4, n_drivers=1, training=True, nn_baseline=False, **kwargs): super().__init__(**kwargs) # init vars self.ac_dim = 5 self.ob_dim = 3 + 2 * MAX_CAP self.n_layers = n_layers self.is_city = is_city self.n_drivers = n_drivers self.size = size self.learning_rate = learning_rate self.training = training self.shared_exp = shared_exp self.shared_exp_lambda = shared_exp_lambda self.nn_baseline = nn_baseline if self.is_city: self.agent_logits_nets = [] #self.agent_logstds = [] self.agent_optimizers = [] #self.logits_na = None for i in range(self.n_drivers): self.agent_logits_nets.append( ptu.build_mlp(input_size=self.ob_dim + self.ac_dim, output_size=1, n_layers=self.n_layers, size=self.size)) #self.agent_logstds.append(nn.Parameter( # torch.zeros(self.ac_dim, dtype=torch.float32, device=ptu.device) #)) self.agent_logits_nets[i].to(ptu.device) #self.logstd.to(ptu.device) self.agent_optimizers.append( optim.Adam(self.agent_logits_nets[i].parameters(), self.learning_rate)) #self.logits_na = None self.baseline = None
def __init__(self, hparams, optimizer_spec, **kwargs): super().__init__(**kwargs) self.ob_dim = hparams['ob_dim'] self.output_size = hparams['rnd_output_size'] self.n_layers = hparams['rnd_n_layers'] self.size = hparams['rnd_size'] self.optimizer_spec = optimizer_spec self.hash = hparams["hash"] # TODO: Create two neural networks: # 1) f, the random function we are trying to learn # 2) f_hat, the function we are using to learn f # WARNING: Make sure you use different types of weight # initializations for these two functions # HINT 1) Check out the method ptu.build_mlp # HINT 2) There are two weight init methods defined above if self.hash: self.encoder = ptu.build_mlp(self.ob_dim, self.output_size, self.n_layers, self.size) self.decoder = ptu.build_mlp(self.output_size, self.ob_dim, self.n_layers, self.size) self.ae_loss = nn.MSELoss() self.f = ptu.build_mlp(self.ob_dim, self.output_size, self.n_layers, self.size, init_method=init_method_1) self.f_hat = ptu.build_mlp(self.ob_dim, self.output_size, self.n_layers, self.size, init_method=init_method_2) self.optimizer = self.optimizer_spec.constructor( list(self.encoder.parameters()) + list(self.decoder.parameters()), **self.optimizer_spec.optim_kwargs ) self.learning_rate_scheduler = optim.lr_scheduler.LambdaLR( self.optimizer, self.optimizer_spec.learning_rate_schedule, ) self.counts = defaultdict(int) self.pretrain_autoencoder() else: self.f = ptu.build_mlp(self.ob_dim, self.output_size, self.n_layers, self.size, init_method=init_method_1) self.f_hat = ptu.build_mlp(self.ob_dim, self.output_size, self.n_layers, self.size, init_method=init_method_2) self.optimizer = self.optimizer_spec.constructor( self.f_hat.parameters(), **self.optimizer_spec.optim_kwargs ) self.learning_rate_scheduler = optim.lr_scheduler.LambdaLR( self.optimizer, self.optimizer_spec.learning_rate_schedule, ) self.f.to(ptu.device) self.f_hat.to(ptu.device)
def __init__(self, hparams): super().__init__() self.n_agents = hparams['n_agents'] self.ob_dim = hparams['ob_dim'] self.size = hparams['layer_size_per_agent'] * self.n_agents self.n_layers = hparams['n_layers'] self.learning_rate = hparams['learning_rate'] self.num_target_updates = hparams['num_target_updates'] self.num_grad_steps_per_target_update = hparams[ 'num_grad_steps_per_target_update'] self.gamma = hparams['gamma'] output_size = self.size if not isinstance(self.size, list) else self.size[-1] self.critic_network_logits = ptu.build_mlp(input_size=self.ob_dim, output_size=1, n_layers=self.n_layers, size=self.size, activation='relu') self.critic_network_logits.to(ptu.device)
def __init__(self, hparams, optimizer_spec, **kwargs): super().__init__(**kwargs) self.ob_dim = hparams['ob_dim'] self.output_size = hparams['rnd_output_size'] self.n_layers = hparams['rnd_n_layers'] self.size = hparams['rnd_size'] self.optimizer_spec = optimizer_spec self.f_hat = ptu.build_mlp( input_size=self.ob_dim, output_size=self.ob_dim, n_layers=self.n_layers, size=self.size) self.optimizer = self.optimizer_spec.constructor( self.f_hat.parameters(), **self.optimizer_spec.optim_kwargs ) self.learning_rate_scheduler = optim.lr_scheduler.LambdaLR( self.optimizer, self.optimizer_spec.learning_rate_schedule, ) self.f_hat.to(ptu.device)
def __init__(self, ac_dim, ob_dim, n_layers, size, discrete=False, learning_rate=1e-4, training=True, nn_baseline=False, **kwargs): super().__init__(**kwargs) # init vars self.ac_dim = ac_dim self.ob_dim = ob_dim self.n_layers = n_layers self.discrete = discrete self.size = size self.learning_rate = learning_rate self.training = training self.nn_baseline = nn_baseline if self.discrete: self.logits_na = ptu.build_mlp( input_size=self.ob_dim, output_size=self.ac_dim, n_layers=self.n_layers, size=self.size, ) self.logits_na.to(ptu.device) self.mean_net = None self.logstd = None self.optimizer = optim.Adam(self.logits_na.parameters(), self.learning_rate) else: self.logits_na = None self.mean_net = ptu.build_mlp( input_size=self.ob_dim, output_size=self.ac_dim, n_layers=self.n_layers, size=self.size, ) # TODO: shouldn't logstd also be a NN? self.logstd = nn.Parameter( torch.zeros(self.ac_dim, dtype=torch.float32, device=ptu.device)) self.mean_net.to(ptu.device) self.logstd.to(ptu.device) self.optimizer = optim.Adam( itertools.chain([self.logstd], self.mean_net.parameters()), self.learning_rate) self.normal_dist = distributions.Normal(ptu.from_numpy(0.0), ptu.from_numpy(1.0)) if nn_baseline: self.baseline = ptu.build_mlp( input_size=self.ob_dim, output_size=1, n_layers=self.n_layers, size=self.size, ) self.baseline.to(ptu.device) self.baseline_optimizer = optim.Adam( self.baseline.parameters(), self.learning_rate, ) else: self.baseline = None