def __init__(self, net_spec, in_dim, out_dim): state_dim, action_dim = in_dim assert len(state_dim) == 3 # image shape (c,w,h) # conv body nn.Module.__init__(self) Net.__init__(self, net_spec, state_dim, out_dim) # set default util.set_attr( self, dict( out_layer_activation=None, init_fn=None, normalize=False, batch_norm=True, clip_grad_val=None, loss_spec={'name': 'MSELoss'}, optim_spec={'name': 'Adam'}, lr_scheduler_spec=None, update_type='replace', update_frequency=1, polyak_coef=0.0, gpu=False, )) util.set_attr(self, self.net_spec, [ 'conv_hid_layers', 'fc_hid_layers', 'hid_layers_activation', 'out_layer_activation', 'init_fn', 'normalize', 'batch_norm', 'clip_grad_val', 'loss_spec', 'optim_spec', 'lr_scheduler_spec', 'update_type', 'update_frequency', 'polyak_coef', 'gpu', ]) # state conv model self.conv_model = self.build_conv_layers(self.conv_hid_layers) self.conv_out_dim = self.get_conv_output_size() # state fc model self.fc_model = net_util.build_fc_model( [self.conv_out_dim + action_dim] + self.fc_hid_layers, self.hid_layers_activation) # affine transformation applied to tail_in_dim = self.fc_hid_layers[-1] self.model_tail = net_util.build_fc_model([tail_in_dim, self.out_dim], self.out_layer_activation) net_util.init_layers(self, self.init_fn) self.loss_fn = net_util.get_loss_fn(self, self.loss_spec) self.to(self.device) self.train()
def __init__(self, net_spec, algorithm, in_dim, out_dim): nn.Module.__init__(self) Net.__init__(self, net_spec, algorithm, in_dim, out_dim) # set default util.set_attr( self, dict( clip_grad=False, clip_grad_val=1.0, loss_spec={'name': 'MSELoss'}, optim_spec={'name': 'Adam'}, lr_decay='no_decay', update_type='replace', update_frequency=1, polyak_coef=0.0, gpu=False, )) util.set_attr(self, self.net_spec, [ 'hid_layers', 'hid_layers_activation', 'clip_grad', 'clip_grad_val', 'loss_spec', 'optim_spec', 'lr_decay', 'lr_decay_frequency', 'lr_decay_min_timestep', 'lr_anneal_timestep', 'update_type', 'update_frequency', 'polyak_coef', 'gpu', ]) # Guard against inappropriate algorithms and environments assert net_util.is_q_learning(algorithm) # Build model body dims = [self.in_dim] + self.hid_layers self.model_body = net_util.build_sequential(dims, self.hid_layers_activation) # output layers self.v = nn.Linear(dims[-1], 1) # state value self.adv = nn.Linear(dims[-1], out_dim) # action dependent raw advantage net_util.init_layers(self.modules()) if torch.cuda.is_available() and self.gpu: for module in self.modules(): module.cuda() self.loss_fn = net_util.get_loss_fn(self, self.loss_spec) self.optim = net_util.get_optim(self, self.optim_spec) self.lr_decay = getattr(net_util, self.lr_decay)
def __init__(self, net_spec, in_dim, out_dim): nn.Module.__init__(self) Net.__init__(self, net_spec, in_dim, out_dim) # set default util.set_attr( self, dict( init_fn=None, clip_grad_val=None, loss_spec={'name': 'MSELoss'}, optim_spec={'name': 'Adam'}, lr_scheduler_spec=None, update_type='replace', update_frequency=1, polyak_coef=0.0, gpu=False, )) util.set_attr(self, self.net_spec, [ 'shared', 'hid_layers', 'hid_layers_activation', 'init_fn', 'clip_grad_val', 'loss_spec', 'optim_spec', 'lr_scheduler_spec', 'update_type', 'update_frequency', 'polyak_coef', 'gpu', ]) # Guard against inappropriate algorithms and environments # Build model body dims = [self.in_dim] + self.hid_layers self.model_body = net_util.build_sequential(dims, self.hid_layers_activation) # output layers self.v = nn.Linear(dims[-1], 1) # state value self.adv = nn.Linear(dims[-1], out_dim) # action dependent raw advantage net_util.init_layers(self, self.init_fn) for module in self.modules(): module.to(self.device) self.loss_fn = net_util.get_loss_fn(self, self.loss_spec) self.optim = net_util.get_optim(self, self.optim_spec) self.lr_scheduler = net_util.get_lr_scheduler(self, self.lr_scheduler_spec)
def __init__(self, net_spec, algorithm, in_dim, out_dim): nn.Module.__init__(self) Net.__init__(self, net_spec, algorithm, in_dim, out_dim) # set default util.set_attr( self, dict( clip_grad=False, clip_grad_val=1.0, loss_spec={'name': 'MSELoss'}, optim_spec={'name': 'Adam'}, lr_decay='no_decay', update_type='replace', update_frequency=1, polyak_coef=0.0, gpu=False, )) util.set_attr(self, self.net_spec, [ 'hid_layers', 'hid_layers_activation', 'clip_grad', 'clip_grad_val', 'loss_spec', 'optim_spec', 'lr_decay', 'lr_decay_frequency', 'lr_decay_min_timestep', 'lr_anneal_timestep', 'update_type', 'update_frequency', 'polyak_coef', 'gpu', ]) dims = [self.in_dim] + self.hid_layers self.model_body = net_util.build_sequential(dims, self.hid_layers_activation) # multi-tail output layer with mean and std self.model_tails = nn.ModuleList( [nn.Linear(dims[-1], out_d) for out_d in out_dim]) net_util.init_layers(self.modules()) if torch.cuda.is_available() and self.gpu: for module in self.modules(): module.cuda() self.loss_fn = net_util.get_loss_fn(self, self.loss_spec) self.optim = net_util.get_optim(self, self.optim_spec) self.lr_decay = getattr(net_util, self.lr_decay)
def __init__(self, net_spec, in_dim, out_dim): state_dim, action_dim = in_dim nn.Module.__init__(self) Net.__init__(self, net_spec, in_dim, out_dim) # set default util.set_attr( self, dict( out_layer_activation=None, init_fn=None, clip_grad_val=None, loss_spec={'name': 'MSELoss'}, optim_spec={'name': 'Adam'}, lr_scheduler_spec=None, update_type='replace', update_frequency=1, polyak_coef=0.0, gpu=False, )) util.set_attr(self, self.net_spec, [ 'shared', 'hid_layers', 'hid_layers_activation', 'out_layer_activation', 'init_fn', 'clip_grad_val', 'loss_spec', 'optim_spec', 'lr_scheduler_spec', 'update_type', 'update_frequency', 'polyak_coef', 'gpu', ]) dims = [state_dim + action_dim] + self.hid_layers self.model = net_util.build_fc_model(dims, self.hid_layers_activation) # add last layer with no activation self.model_tail = net_util.build_fc_model([dims[-1], self.out_dim], self.out_layer_activation) net_util.init_layers(self, self.init_fn) self.loss_fn = net_util.get_loss_fn(self, self.loss_spec) self.to(self.device) self.train()
def __init__(self, net_spec, in_dim, out_dim): ''' net_spec: hid_layers: list with tuple consisting of two elements. (conv_hid, flat_hid) Note: tuple must contain two elements, use empty list if no such layers. 1. conv_hid: list containing dimensions of the convolutional hidden layers. Asssumed to all come before the flat layers. Note: a convolutional layer should specify the in_channel, out_channels, kernel_size, stride (of kernel steps), padding, and dilation (spacing between kernel points) E.g. [3, 16, (5, 5), 1, 0, (2, 2)] For more details, see http://pytorch.org/docs/master/nn.html#conv2d and https://github.com/vdumoulin/conv_arithmetic/blob/master/README.md 2. flat_hid: list of dense layers following the convolutional layers hid_layers_activation: activation function for the hidden layers init_fn: weight initialization function batch_norm: whether to add batch normalization after each convolutional layer, excluding the input layer. clip_grad: whether to clip the gradient clip_grad_val: the clip value loss_spec: measure of error between model predictions and correct outputs optim_spec: parameters for initializing the optimizer lr_decay: function to decay learning rate lr_decay_frequency: how many total timesteps per decay lr_decay_min_timestep: minimum amount of total timesteps before starting decay lr_anneal_timestep: timestep to anneal lr decay update_type: method to update network weights: 'replace' or 'polyak' update_frequency: how many total timesteps per update polyak_coef: ratio of polyak weight update gpu: whether to train using a GPU. Note this will only work if a GPU is available, othewise setting gpu=True does nothing ''' # OpenAI gym provides images as W x H x C, pyTorch expects C x W x H in_dim = np.roll(in_dim, 1) # use generic multi-output for Convnet out_dim = np.reshape(out_dim, -1).tolist() nn.Module.__init__(self) Net.__init__(self, net_spec, in_dim, out_dim) # set default util.set_attr( self, dict( init_fn='xavier_uniform_', batch_norm=True, clip_grad=False, clip_grad_val=1.0, loss_spec={'name': 'MSELoss'}, optim_spec={'name': 'Adam'}, lr_decay='no_decay', update_type='replace', update_frequency=1, polyak_coef=0.0, gpu=False, )) util.set_attr(self, self.net_spec, [ 'hid_layers', 'hid_layers_activation', 'init_fn', 'batch_norm', 'clip_grad', 'clip_grad_val', 'loss_spec', 'optim_spec', 'lr_decay', 'lr_decay_frequency', 'lr_decay_min_timestep', 'lr_anneal_timestep', 'update_type', 'update_frequency', 'polyak_coef', 'gpu', ]) # Guard against inappropriate algorithms and environments assert len(out_dim) == 1 # Build model self.conv_hid_layers = self.hid_layers[0] self.dense_hid_layers = self.hid_layers[1] # conv layer self.conv_model = self.build_conv_layers(self.conv_hid_layers) # fc layer from flattened conv self.dense_model = self.build_dense_layers(self.dense_hid_layers) # tails tail_in_dim = self.dense_hid_layers[-1] if len( self.dense_hid_layers) > 0 else self.conv_out_dim # output layers self.v = nn.Linear(tail_in_dim, 1) # state value self.adv = nn.Linear(tail_in_dim, out_dim[0]) # action dependent raw advantage net_util.init_layers(self, self.init_fn) for module in self.modules(): module.to(self.device) self.loss_fn = net_util.get_loss_fn(self, self.loss_spec) self.optim = net_util.get_optim(self, self.optim_spec) self.lr_decay = getattr(net_util, self.lr_decay)
def __init__(self, net_spec, algorithm, in_dim, out_dim): ''' net_spec: hid_layers: list with tuple consisting of two elements. (conv_hid, flat_hid) Note: tuple must contain two elements, use empty list if no such layers. 1. conv_hid: list containing dimensions of the convolutional hidden layers. Asssumed to all come before the flat layers. Note: a convolutional layer should specify the in_channel, out_channels, kernel_size, stride (of kernel steps), padding, and dilation (spacing between kernel points) E.g. [3, 16, (5, 5), 1, 0, (2, 2)] For more details, see http://pytorch.org/docs/master/nn.html#conv2d and https://github.com/vdumoulin/conv_arithmetic/blob/master/README.md 2. flat_hid: list of dense layers following the convolutional layers hid_layers_activation: activation function for the hidden layers batch_norm: whether to add batch normalization after each convolutional layer, excluding the input layer. clip_grad: whether to clip the gradient clip_grad_val: the clip value loss_spec: measure of error between model predictions and correct outputs optim_spec: parameters for initializing the optimizer lr_decay: function to decay learning rate lr_decay_frequency: how many total timesteps per decay lr_decay_min_timestep: minimum amount of total timesteps before starting decay lr_anneal_timestep: timestep to anneal lr decay update_type: method to update network weights: 'replace' or 'polyak' update_frequency: how many total timesteps per update polyak_coef: ratio of polyak weight update gpu: whether to train using a GPU. Note this will only work if a GPU is available, othewise setting gpu=True does nothing ''' # OpenAI gym provides images as W x H x C, pyTorch expects C x W x H in_dim = np.roll(in_dim, 1) # use generic multi-output for Convnet out_dim = np.reshape(out_dim, -1).tolist() nn.Module.__init__(self) Net.__init__(self, net_spec, algorithm, in_dim, out_dim) # set default util.set_attr(self, dict( batch_norm=True, clip_grad=False, clip_grad_val=1.0, loss_spec={'name': 'MSELoss'}, optim_spec={'name': 'Adam'}, lr_decay='no_decay', update_type='replace', update_frequency=1, polyak_coef=0.0, gpu=False, )) util.set_attr(self, self.net_spec, [ 'hid_layers', 'hid_layers_activation', 'batch_norm', 'clip_grad', 'clip_grad_val', 'loss_spec', 'optim_spec', 'lr_decay', 'lr_decay_frequency', 'lr_decay_min_timestep', 'lr_anneal_timestep', 'update_type', 'update_frequency', 'polyak_coef', 'gpu', ]) # Guard against inappropriate algorithms and environments assert net_util.is_q_learning(algorithm) assert len(out_dim) == 1 # Build model self.conv_hid_layers = self.hid_layers[0] self.dense_hid_layers = self.hid_layers[1] # conv layer self.conv_model = self.build_conv_layers(self.conv_hid_layers) # fc layer from flattened conv self.dense_model = self.build_dense_layers(self.dense_hid_layers) # tails tail_in_dim = self.dense_hid_layers[-1] if len(self.dense_hid_layers) > 0 else self.conv_out_dim # output layers self.v = nn.Linear(tail_in_dim, 1) # state value self.adv = nn.Linear(tail_in_dim, out_dim[0]) # action dependent raw advantage net_util.init_layers(self.modules()) if torch.cuda.is_available() and self.gpu: for module in self.modules(): module.cuda() self.loss_fn = net_util.get_loss_fn(self, self.loss_spec) self.optim = net_util.get_optim(self, self.optim_spec) self.lr_decay = getattr(net_util, self.lr_decay)
def __init__(self, net_spec, in_dim, out_dim): assert len(in_dim) == 3 # image shape (c,w,h) nn.Module.__init__(self) Net.__init__(self, net_spec, in_dim, out_dim) # set default util.set_attr( self, dict( init_fn=None, normalize=False, batch_norm=False, clip_grad_val=None, loss_spec={'name': 'MSELoss'}, optim_spec={'name': 'Adam'}, lr_scheduler_spec=None, update_type='replace', update_frequency=1, polyak_coef=0.0, gpu=False, )) util.set_attr(self, self.net_spec, [ 'conv_hid_layers', 'fc_hid_layers', 'hid_layers_activation', 'init_fn', 'normalize', 'batch_norm', 'clip_grad_val', 'loss_spec', 'optim_spec', 'lr_scheduler_spec', 'update_type', 'update_frequency', 'polyak_coef', 'gpu', ]) # Guard against inappropriate algorithms and environments assert isinstance(out_dim, int) # conv body self.conv_model = self.build_conv_layers(self.conv_hid_layers) self.conv_out_dim = self.get_conv_output_size() # fc body if ps.is_empty(self.fc_hid_layers): tail_in_dim = self.conv_out_dim else: # fc layer from flattened conv self.fc_model = net_util.build_fc_model([self.conv_out_dim] + self.fc_hid_layers, self.hid_layers_activation) tail_in_dim = self.fc_hid_layers[-1] # tails. avoid list for single-tail for compute speed self.v = nn.Linear(tail_in_dim, 1) # state value self.adv = nn.Linear(tail_in_dim, out_dim) # action dependent raw advantage self.model_tails = nn.ModuleList([self.v, self.adv]) net_util.init_layers(self, self.init_fn) self.loss_fn = net_util.get_loss_fn(self, self.loss_spec) self.to(self.device) self.train()