def init_nets(self): '''Initialize nets with multi-task dimensions, and set net params''' self.state_dims = [ body.state_dim for body in self.agent.nanflat_body_a] self.action_dims = [ body.action_dim for body in self.agent.nanflat_body_a] self.total_state_dim = sum(self.state_dims) self.total_action_dim = sum(self.action_dims) net_spec = self.agent.spec['net'] net_kwargs = util.compact_dict(dict( hid_layers_activation=_.get(net_spec, 'hid_layers_activation'), optim_param=_.get(net_spec, 'optim'), loss_param=_.get(net_spec, 'loss'), clamp_grad=_.get(net_spec, 'clamp_grad'), clamp_grad_val=_.get(net_spec, 'clamp_grad_val'), )) self.net = getattr(net, net_spec['type'])( self.total_state_dim, net_spec['hid_layers'], self.total_action_dim, **net_kwargs) self.target_net = getattr(net, net_spec['type'])( self.total_state_dim, net_spec['hid_layers'], self.total_action_dim, **net_kwargs) self.online_net = self.target_net self.eval_net = self.target_net util.set_attr(self, _.pick(net_spec, [ 'batch_size', 'update_type', 'update_frequency', 'polyak_weight', ]))
def init_nets(self): '''Initialize nets with multi-task dimensions, and set net params''' # NOTE: Separate init from MultitaskDQN despite similarities so that this implementation can support arbitrary sized state and action heads (e.g. multiple layers) net_spec = self.agent.spec['net'] if len(net_spec['hid_layers']) > 0: state_head_out_d = int(net_spec['hid_layers'][0] / 4) else: state_head_out_d = 16 self.state_dims = [ [body.state_dim, state_head_out_d] for body in self.agent.nanflat_body_a] self.action_dims = [ [body.action_dim] for body in self.agent.nanflat_body_a] self.total_state_dim = sum([s[0] for s in self.state_dims]) self.total_action_dim = sum([a[0] for a in self.action_dims]) logger.debug( f'State dims: {self.state_dims}, total: {self.total_state_dim}') logger.debug( f'Action dims: {self.action_dims}, total: {self.total_action_dim}') net_kwargs = util.compact_dict(dict( hid_layers_activation=_.get(net_spec, 'hid_layers_activation'), optim_param=_.get(net_spec, 'optim'), loss_param=_.get(net_spec, 'loss'), clamp_grad=_.get(net_spec, 'clamp_grad'), clamp_grad_val=_.get(net_spec, 'clamp_grad_val'), )) self.net = getattr(net, net_spec['type'])( self.state_dims, net_spec['hid_layers'], self.action_dims, **net_kwargs) self.target_net = getattr(net, net_spec['type'])( self.state_dims, net_spec['hid_layers'], self.action_dims, **net_kwargs) self.online_net = self.target_net self.eval_net = self.target_net util.set_attr(self, _.pick(net_spec, [ 'batch_size', 'update_type', 'update_frequency', 'polyak_weight', ]))
def init_nets(self): '''Initialize networks''' body = self.agent.nanflat_body_a[0] # single-body algo state_dim = body.state_dim action_dim = body.action_dim net_spec = self.agent.spec['net'] net_kwargs = util.compact_dict(dict( hid_layers_activation=_.get(net_spec, 'hid_layers_activation'), optim_param=_.get(net_spec, 'optim'), loss_param=_.get(net_spec, 'loss'), clamp_grad=_.get(net_spec, 'clamp_grad'), clamp_grad_val=_.get(net_spec, 'clamp_grad_val'), )) self.net = getattr(net, net_spec['type'])( state_dim, net_spec['hid_layers'], action_dim, **net_kwargs) self.target_net = getattr(net, net_spec['type'])( state_dim, net_spec['hid_layers'], action_dim, **net_kwargs) self.online_net = self.target_net self.eval_net = self.target_net util.set_attr(self, _.pick(net_spec, [ 'batch_size', ])) # Default network update params for base self.update_type = 'replace' self.update_frequency = 1 self.polyak_weight = 0.0
def init_nets(self): '''Initialize the neural network used to learn the Q function from the spec''' body = self.agent.nanflat_body_a[0] # singleton algo state_dim = body.state_dim action_dim = body.action_dim self.is_discrete = body.is_discrete net_spec = self.agent.spec['net'] net_kwargs = util.compact_dict( dict( hid_layers_activation=_.get(net_spec, 'hid_layers_activation'), optim_param=_.get(net_spec, 'optim'), loss_param=_.get(net_spec, 'loss'), clamp_grad=_.get(net_spec, 'clamp_grad'), clamp_grad_val=_.get(net_spec, 'clamp_grad_val'), )) # Below we automatically select an appropriate net for a discrete or continuous action space if the setting is of the form 'MLPdefault'. Otherwise the correct type of network is assumed to be specified in the spec. # Networks for continuous action spaces have two heads and return two values, the first is a tensor containing the mean of the action policy, the second is a tensor containing the std deviation of the action policy. The distribution is assumed to be a Gaussian (Normal) distribution. # Networks for discrete action spaces have a single head and return the logits for a categorical probability distribution over the discrete actions if net_spec['type'] == 'MLPdefault': if self.is_discrete: self.net = getattr(net, 'MLPNet')(state_dim, net_spec['hid_layers'], action_dim, **net_kwargs) else: self.net = getattr(net, 'MLPHeterogenousHeads')( state_dim, net_spec['hid_layers'], [action_dim, action_dim], **net_kwargs) else: self.net = getattr(net, net_spec['type'])(state_dim, net_spec['hid_layers'], action_dim, **net_kwargs)
def init_nets(self): '''Initialize the neural network used to learn the Q function from the spec''' body = self.agent.nanflat_body_a[0] # single-body algo state_dim = body.state_dim # dimension of the environment state, e.g. 4 action_dim = body.action_dim # dimension of the environment actions, e.g. 2 net_spec = self.agent.spec['net'] net_kwargs = util.compact_dict( dict( hid_layers_activation=_.get(net_spec, 'hid_layers_activation'), optim_param=_.get(net_spec, 'optim'), loss_param=_.get(net_spec, 'loss'), clamp_grad=_.get(net_spec, 'clamp_grad'), clamp_grad_val=_.get(net_spec, 'clamp_grad_val'), )) self.net = getattr(net, net_spec['type'])(state_dim, net_spec['hid_layers'], action_dim, **net_kwargs) util.set_attr( self, _.pick( net_spec, [ # how many examples to learn per training iteration 'batch_size', 'decay_lr', 'decay_lr_frequency', 'decay_lr_min_timestep', ]))
def init_nets(self): '''Initialize the neural network used to learn the Q function from the spec''' body = self.agent.nanflat_body_a[0] # single-body algo self.state_dim = body.state_dim # dimension of the environment state, e.g. 4 self.action_dim = body.action_dim # dimension of the environment actions, e.g. 2 net_spec = self.agent.spec['net'] mem_spec = self.agent.spec['memory'] net_kwargs = util.compact_dict(dict( hid_layers_activation=_.get(net_spec, 'hid_layers_activation'), optim_param=_.get(net_spec, 'optim'), loss_param=_.get(net_spec, 'loss'), clamp_grad=_.get(net_spec, 'clamp_grad'), clamp_grad_val=_.get(net_spec, 'clamp_grad_val'), gpu=_.get(net_spec, 'gpu'), )) if net_spec['type'].find('Recurrent') != -1: self.net = getattr(net, net_spec['type'])( self.state_dim, net_spec['hid_layers'], self.action_dim, mem_spec['length_history'], **net_kwargs) else: self.net = getattr(net, net_spec['type'])( self.state_dim, net_spec['hid_layers'], self.action_dim, **net_kwargs) self.set_net_attributes()
def test_compact_dict(d, res_d): assert util.compact_dict(d) == res_d
def init_nets(self): '''Initialize the neural networks used to learn the actor and critic from the spec''' body = self.agent.nanflat_body_a[0] # singleton algo state_dim = body.state_dim action_dim = body.action_dim self.is_discrete = body.is_discrete net_spec = self.agent.spec['net'] mem_spec = self.agent.spec['memory'] net_type = self.agent.spec['net']['type'] actor_kwargs = util.compact_dict( dict( hid_layers_activation=_.get(net_spec, 'hid_layers_activation'), optim_param=_.get(net_spec, 'optim_actor'), loss_param=_.get(net_spec, 'loss'), # Note: Not used for training actor clamp_grad=_.get(net_spec, 'clamp_grad'), clamp_grad_val=_.get(net_spec, 'clamp_grad_val'), gpu=_.get(net_spec, 'gpu'), )) if self.agent.spec['net']['use_same_optim']: logger.info('Using same optimizer for actor and critic') critic_kwargs = actor_kwargs else: logger.info('Using different optimizer for actor and critic') critic_kwargs = util.compact_dict( dict( hid_layers_activation=_.get(net_spec, 'hid_layers_activation'), optim_param=_.get(net_spec, 'optim_critic'), loss_param=_.get(net_spec, 'loss'), clamp_grad=_.get(net_spec, 'clamp_grad'), clamp_grad_val=_.get(net_spec, 'clamp_grad_val'), gpu=_.get(net_spec, 'gpu'), )) ''' Below we automatically select an appropriate net based on two different conditions 1. If the action space is discrete or continuous action - Networks for continuous action spaces have two heads and return two values, the first is a tensor containing the mean of the action policy, the second is a tensor containing the std deviation of the action policy. The distribution is assumed to be a Gaussian (Normal) distribution. - Networks for discrete action spaces have a single head and return the logits for a categorical probability distribution over the discrete actions 2. If the actor and critic are separate or share weights - If the networks share weights then the single network returns a list. - Continuous action spaces: The return list contains 3 elements: The first element contains the mean output for the actor (policy), the second element the std dev of the policy, and the third element is the state-value estimated by the network. - Discrete action spaces: The return list contains 2 element. The first element is a tensor containing the logits for a categorical probability distribution over the actions. The second element contains the state-value estimated by the network. 3. If the network type is feedforward, convolutional, or recurrent - Feedforward and convolutional networks take a single state as input and require an OnPolicyReplay or OnPolicyBatchReplay memory - Recurrent networks take n states as input and require an OnPolicyNStepReplay or OnPolicyNStepBatchReplay memory ''' if net_type == 'MLPseparate': self.is_shared_architecture = False self.is_recurrent = False if self.is_discrete: self.actor = getattr(net, 'MLPNet')(state_dim, net_spec['hid_layers'], action_dim, **actor_kwargs) logger.info( "Feedforward net, discrete action space, actor and critic are separate networks" ) else: self.actor = getattr(net, 'MLPHeterogenousHeads')( state_dim, net_spec['hid_layers'], [action_dim, action_dim], **actor_kwargs) logger.info( "Feedforward net, continuous action space, actor and critic are separate networks" ) self.critic = getattr(net, 'MLPNet')(state_dim, net_spec['hid_layers'], 1, **critic_kwargs) elif net_type == 'MLPshared': self.is_shared_architecture = True self.is_recurrent = False if self.is_discrete: self.actorcritic = getattr(net, 'MLPHeterogenousHeads')( state_dim, net_spec['hid_layers'], [action_dim, 1], **actor_kwargs) logger.info( "Feedforward net, discrete action space, actor and critic combined into single network, sharing params" ) else: self.actorcritic = getattr(net, 'MLPHeterogenousHeads')( state_dim, net_spec['hid_layers'], [action_dim, action_dim, 1], **actor_kwargs) logger.info( "Feedforward net, continuous action space, actor and critic combined into single network, sharing params" ) elif net_type == 'Convseparate': self.is_shared_architecture = False self.is_recurrent = False if self.is_discrete: self.actor = getattr(net, 'ConvNet')(state_dim, net_spec['hid_layers'], action_dim, **actor_kwargs) logger.info( "Convolutional net, discrete action space, actor and critic are separate networks" ) else: self.actor = getattr(net, 'ConvNet')(state_dim, net_spec['hid_layers'], [action_dim, action_dim], **actor_kwargs) logger.info( "Convolutional net, continuous action space, actor and critic are separate networks" ) self.critic = getattr(net, 'ConvNet')(state_dim, net_spec['hid_layers'], 1, **critic_kwargs) elif net_type == 'Convshared': self.is_shared_architecture = True self.is_recurrent = False if self.is_discrete: self.actorcritic = getattr(net, 'ConvNet')(state_dim, net_spec['hid_layers'], [action_dim, 1], **actor_kwargs) logger.info( "Convolutional net, discrete action space, actor and critic combined into single network, sharing params" ) else: self.actorcritic = getattr(net, 'ConvNet')( state_dim, net_spec['hid_layers'], [action_dim, action_dim, 1], **actor_kwargs) logger.info( "Convolutional net, continuous action space, actor and critic combined into single network, sharing params" ) elif net_type == 'Recurrentseparate': self.is_shared_architecture = False self.is_recurrent = True if self.is_discrete: self.actor = getattr(net, 'RecurrentNet')( state_dim, net_spec['hid_layers'], action_dim, mem_spec['length_history'], **actor_kwargs) logger.info( "Recurrent net, discrete action space, actor and critic are separate networks" ) else: self.actor = getattr(net, 'RecurrentNet')( state_dim, net_spec['hid_layers'], [action_dim, action_dim], mem_spec['length_history'], **actor_kwargs) logger.info( "Recurrent net, continuous action space, actor and critic are separate networks" ) self.critic = getattr(net, 'RecurrentNet')(state_dim, net_spec['hid_layers'], 1, mem_spec['length_history'], **critic_kwargs) elif net_type == 'Recurrentshared': self.is_shared_architecture = True self.is_recurrent = True if self.is_discrete: self.actorcritic = getattr(net, 'RecurrentNet')( state_dim, net_spec['hid_layers'], [action_dim, 1], mem_spec['length_history'], **actor_kwargs) logger.info( "Recurrent net, discrete action space, actor and critic combined into single network, sharing params" ) else: self.actorcritic = getattr(net, 'RecurrentNet')( state_dim, net_spec['hid_layers'], [action_dim, action_dim, 1], mem_spec['length_history'], **actor_kwargs) logger.info( "Recurrent net, continuous action space, actor and critic combined into single network, sharing params" ) else: logger.warn( "Incorrect network type. Please use 'MLPshared', MLPseparate', Recurrentshared, or Recurrentseparate." ) raise NotImplementedError