Python get_lr_scheduler Examples, slm_lab.agent.net.net_util.get_lr_scheduler Python Examples

Example #1

0

Show file

    def init_nets(self, global_nets=None):
        '''
        Initialize the neural networks used to learn the actor and critic from the spec
        Below we automatically select an appropriate net based on two different conditions
        1. If the action space is discrete or continuous action
            - Networks for continuous action spaces have two heads and return two values, the first is a tensor containing the mean of the action policy, the second is a tensor containing the std deviation of the action policy. The distribution is assumed to be a Gaussian (Normal) distribution.
            - Networks for discrete action spaces have a single head and return the logits for a categorical probability distribution over the discrete actions
        2. If the actor and critic are separate or share weights
            - If the networks share weights then the single network returns a list.
            - Continuous action spaces: The return list contains 3 elements: The first element contains the mean output for the actor (policy), the second element the std dev of the policy, and the third element is the state-value estimated by the network.
            - Discrete action spaces: The return list contains 2 element. The first element is a tensor containing the logits for a categorical probability distribution over the actions. The second element contains the state-value estimated by the network.
        3. If the network type is feedforward, convolutional, or recurrent
            - Feedforward and convolutional networks take a single state as input and require an OnPolicyReplay or OnPolicyBatchReplay memory
            - Recurrent networks take n states as input and require env spec "frame_op": "concat", "frame_op_len": seq_len
        '''
        assert 'shared' in self.net_spec, 'Specify "shared" for ActorCritic network in net_spec'
        self.shared = self.net_spec['shared']

        # create actor/critic specific specs
        actor_net_spec = self.net_spec.copy()
        critic_net_spec = self.net_spec.copy()
        for k in self.net_spec:
            if 'actor_' in k:
                actor_net_spec[k.replace('actor_', '')] = actor_net_spec.pop(k)
                critic_net_spec.pop(k)
            if 'critic_' in k:
                critic_net_spec[k.replace('critic_',
                                          '')] = critic_net_spec.pop(k)
                actor_net_spec.pop(k)
        if critic_net_spec['use_same_optim']:
            critic_net_spec = actor_net_spec

        in_dim = self.body.state_dim
        out_dim = net_util.get_out_dim(self.body, add_critic=self.shared)
        # main actor network, may contain out_dim self.shared == True
        NetClass = getattr(net, actor_net_spec['type'])
        self.net = NetClass(actor_net_spec, in_dim, out_dim)
        self.net_names = ['net']
        if not self.shared:  # add separate network for critic
            critic_out_dim = 1
            CriticNetClass = getattr(net, critic_net_spec['type'])
            self.critic_net = CriticNetClass(critic_net_spec, in_dim,
                                             critic_out_dim)
            self.net_names.append('critic_net')
        # init net optimizer and its lr scheduler
        self.optim = net_util.get_optim(self.net, self.net.optim_spec)
        self.lr_scheduler = net_util.get_lr_scheduler(
            self.optim, self.net.lr_scheduler_spec)
        if not self.shared:
            self.critic_optim = net_util.get_optim(self.critic_net,
                                                   self.critic_net.optim_spec)
            self.critic_lr_scheduler = net_util.get_lr_scheduler(
                self.critic_optim, self.critic_net.lr_scheduler_spec)
        net_util.set_global_nets(self, global_nets)
        self.end_init_nets()

Example #2

0

Show file

File: sac.py Project: jianghongping/SLM-Lab

    def init_nets(self, global_nets=None):
        '''
        Networks: net(actor/policy), q1_net, target_q1_net, q2_net, target_q2_net
        All networks are separate, and have the same hidden layer architectures and optim specs, so tuning is minimal
        '''
        self.shared = False  # SAC does not share networks
        NetClass = getattr(net, self.net_spec['type'])
        # main actor network
        self.net = NetClass(self.net_spec, self.body.state_dim,
                            net_util.get_out_dim(self.body))
        self.net_names = ['net']
        # two critic Q-networks to mitigate positive bias in q_loss and speed up training, uses q_net.py with prefix Q
        QNetClass = getattr(net, 'Q' + self.net_spec['type'])
        q_in_dim = [self.body.state_dim, self.body.action_dim]
        self.q1_net = QNetClass(self.net_spec, q_in_dim, 1)
        self.target_q1_net = QNetClass(self.net_spec, q_in_dim, 1)
        self.q2_net = QNetClass(self.net_spec, q_in_dim, 1)
        self.target_q2_net = QNetClass(self.net_spec, q_in_dim, 1)
        self.net_names += [
            'q1_net', 'target_q1_net', 'q2_net', 'target_q2_net'
        ]
        net_util.copy(self.q1_net, self.target_q1_net)
        net_util.copy(self.q2_net, self.target_q2_net)
        # temperature variable to be learned, and its target entropy
        self.log_alpha = torch.zeros(1,
                                     requires_grad=True,
                                     device=self.net.device)
        self.alpha = self.log_alpha.detach().exp()
        if self.body.is_discrete:
            self.target_entropy = -self.body.action_space.n
        else:
            self.target_entropy = -np.product(self.body.action_space.shape)

        # init net optimizer and its lr scheduler
        self.optim = net_util.get_optim(self.net, self.net.optim_spec)
        self.lr_scheduler = net_util.get_lr_scheduler(
            self.optim, self.net.lr_scheduler_spec)
        self.q1_optim = net_util.get_optim(self.q1_net, self.q1_net.optim_spec)
        self.q1_lr_scheduler = net_util.get_lr_scheduler(
            self.q1_optim, self.q1_net.lr_scheduler_spec)
        self.q2_optim = net_util.get_optim(self.q2_net, self.q2_net.optim_spec)
        self.q2_lr_scheduler = net_util.get_lr_scheduler(
            self.q2_optim, self.q2_net.lr_scheduler_spec)
        self.alpha_optim = net_util.get_optim(self.log_alpha,
                                              self.net.optim_spec)
        self.alpha_lr_scheduler = net_util.get_lr_scheduler(
            self.alpha_optim, self.net.lr_scheduler_spec)
        net_util.set_global_nets(self, global_nets)
        self.post_init_nets()

Example #3

0

Show file

    def __init__(self, net_spec, in_dim, out_dim):
        '''
        net_spec:
        hid_layers: list containing dimensions of the hidden layers
        hid_layers_activation: activation function for the hidden layers
        init_fn: weight initialization function
        clip_grad_val: clip gradient norm if value is not None
        loss_spec: measure of error between model predictions and correct outputs
        optim_spec: parameters for initializing the optimizer
        lr_scheduler_spec: Pytorch optim.lr_scheduler
        update_type: method to update network weights: 'replace' or 'polyak'
        update_frequency: how many total timesteps per update
        polyak_coef: ratio of polyak weight update
        gpu: whether to train using a GPU. Note this will only work if a GPU is available, othewise setting gpu=True does nothing
        '''
        nn.Module.__init__(self)
        super(MLPNet, self).__init__(net_spec, in_dim, out_dim)
        # set default
        util.set_attr(self, dict(
            init_fn=None,
            clip_grad_val=None,
            loss_spec={'name': 'MSELoss'},
            optim_spec={'name': 'Adam'},
            lr_scheduler_spec=None,
            update_type='replace',
            update_frequency=1,
            polyak_coef=0.0,
            gpu=False,
        ))
        util.set_attr(self, self.net_spec, [
            'shared',
            'hid_layers',
            'hid_layers_activation',
            'init_fn',
            'clip_grad_val',
            'loss_spec',
            'optim_spec',
            'lr_scheduler_spec',
            'update_type',
            'update_frequency',
            'polyak_coef',
            'gpu',
        ])

        dims = [self.in_dim] + self.hid_layers
        self.model = net_util.build_fc_model(dims, self.hid_layers_activation)
        # add last layer with no activation
        # tails. avoid list for single-tail for compute speed
        if ps.is_integer(self.out_dim):
            self.model_tail = nn.Linear(dims[-1], self.out_dim)
        else:
            self.model_tails = nn.ModuleList([nn.Linear(dims[-1], out_d) for out_d in self.out_dim])

        net_util.init_layers(self, self.init_fn)
        for module in self.modules():
            module.to(self.device)
        self.loss_fn = net_util.get_loss_fn(self, self.loss_spec)
        self.optim = net_util.get_optim(self, self.optim_spec)
        self.lr_scheduler = net_util.get_lr_scheduler(self, self.lr_scheduler_spec)

Example #4

0

Show file

File: sac.py Project: liangtianxin/SLM-Lab

    def init_nets(self, global_nets=None):
        '''
        Networks: net(actor/policy), critic (value), target_critic, q1_net, q1_net
        All networks are separate, and have the same hidden layer architectures and optim specs, so tuning is minimal
        '''
        self.shared = False  # SAC does not share networks
        in_dim = self.body.state_dim
        out_dim = net_util.get_out_dim(self.body)
        NetClass = getattr(net, self.net_spec['type'])
        # main actor network
        self.net = NetClass(self.net_spec, in_dim, out_dim)
        self.net_names = ['net']
        # critic network and its target network
        val_out_dim = 1
        self.critic_net = NetClass(self.net_spec, in_dim, val_out_dim)
        self.target_critic_net = NetClass(self.net_spec, in_dim, val_out_dim)
        self.net_names += ['critic_net', 'target_critic_net']
        # two Q-networks to mitigate positive bias in q_loss and speed up training
        q_in_dim = in_dim + self.body.action_dim  # NOTE concat s, a for now
        self.q1_net = NetClass(self.net_spec, q_in_dim, val_out_dim)
        self.q2_net = NetClass(self.net_spec, q_in_dim, val_out_dim)
        self.net_names += ['q1_net', 'q2_net']

        # init net optimizer and its lr scheduler
        self.optim = net_util.get_optim(self.net, self.net.optim_spec)
        self.lr_scheduler = net_util.get_lr_scheduler(
            self.optim, self.net.lr_scheduler_spec)
        self.critic_optim = net_util.get_optim(self.critic_net,
                                               self.critic_net.optim_spec)
        self.critic_lr_scheduler = net_util.get_lr_scheduler(
            self.critic_optim, self.critic_net.lr_scheduler_spec)
        self.q1_optim = net_util.get_optim(self.q1_net, self.q1_net.optim_spec)
        self.q1_lr_scheduler = net_util.get_lr_scheduler(
            self.q1_optim, self.q1_net.lr_scheduler_spec)
        self.q2_optim = net_util.get_optim(self.q2_net, self.q2_net.optim_spec)
        self.q2_lr_scheduler = net_util.get_lr_scheduler(
            self.q2_optim, self.q2_net.lr_scheduler_spec)
        net_util.set_global_nets(self, global_nets)
        self.post_init_nets()

Example #5

0

Show file

File: sarsa.py Project: c-w-m/slm-lab

 def init_nets(self, global_nets=None):
     '''Initialize the neural network used to learn the Q function from the spec'''
     if 'Recurrent' in self.net_spec['type']:
         self.net_spec.update(seq_len=self.net_spec['seq_len'])
     in_dim = self.body.state_dim
     out_dim = net_util.get_out_dim(self.body)
     NetClass = getattr(net, self.net_spec['type'])
     self.net = NetClass(self.net_spec, in_dim, out_dim)
     self.net_names = ['net']
     # init net optimizer and its lr scheduler
     self.optim = net_util.get_optim(self.net, self.net.optim_spec)
     self.lr_scheduler = net_util.get_lr_scheduler(self.optim, self.net.lr_scheduler_spec)
     net_util.set_global_nets(self, global_nets)
     self.end_init_nets()

Example #6

0

Show file

File: mlp.py Project: wilson1yan/SLM-Lab

    def __init__(self, net_spec, in_dim, out_dim):
        nn.Module.__init__(self)
        Net.__init__(self, net_spec, in_dim, out_dim)
        # set default
        util.set_attr(
            self,
            dict(
                init_fn=None,
                clip_grad_val=None,
                loss_spec={'name': 'MSELoss'},
                optim_spec={'name': 'Adam'},
                lr_scheduler_spec=None,
                update_type='replace',
                update_frequency=1,
                polyak_coef=0.0,
                gpu=False,
            ))
        util.set_attr(self, self.net_spec, [
            'shared',
            'hid_layers',
            'hid_layers_activation',
            'init_fn',
            'clip_grad_val',
            'loss_spec',
            'optim_spec',
            'lr_scheduler_spec',
            'update_type',
            'update_frequency',
            'polyak_coef',
            'gpu',
        ])

        # Guard against inappropriate algorithms and environments
        # Build model body
        dims = [self.in_dim] + self.hid_layers
        self.model_body = net_util.build_sequential(dims,
                                                    self.hid_layers_activation)
        # output layers
        self.v = nn.Linear(dims[-1], 1)  # state value
        self.adv = nn.Linear(dims[-1],
                             out_dim)  # action dependent raw advantage
        net_util.init_layers(self, self.init_fn)
        for module in self.modules():
            module.to(self.device)
        self.loss_fn = net_util.get_loss_fn(self, self.loss_spec)
        self.optim = net_util.get_optim(self, self.optim_spec)
        self.lr_scheduler = net_util.get_lr_scheduler(self,
                                                      self.lr_scheduler_spec)

Example #7

0

Show file

 def init_nets(self, global_nets=None):
     '''
     Initialize the neural network used to learn the policy function from the spec
     Below we automatically select an appropriate net for a discrete or continuous action space if the setting is of the form 'MLPNet'. Otherwise the correct type of network is assumed to be specified in the spec.
     Networks for continuous action spaces have two heads and return two values, the first is a tensor containing the mean of the action policy, the second is a tensor containing the std deviation of the action policy. The distribution is assumed to be a Gaussian (Normal) distribution.
     Networks for discrete action spaces have a single head and return the logits for a categorical probability distribution over the discrete actions
     '''
     in_dim = self.body.state_dim
     out_dim = net_util.get_out_dim(self.body)
     NetClass = getattr(net, self.net_spec['type'])
     self.net = NetClass(self.net_spec, in_dim, out_dim)
     self.net_names = ['net']
     # init net optimizer and its lr scheduler
     self.optim = net_util.get_optim(self.net, self.net.optim_spec)
     self.lr_scheduler = net_util.get_lr_scheduler(self.optim, self.net.lr_scheduler_spec)
     net_util.set_global_nets(self, global_nets)
     self.end_init_nets()

Example #8

0

Show file

 def init_nets(self, global_nets=None):
     '''Initialize the neural network used to learn the Q function from the spec'''
     if self.algorithm_spec['name'] == 'VanillaDQN':
         assert all(
             k not in self.net_spec
             for k in ['update_type', 'update_frequency', 'polyak_coef']
         ), 'Network update not available for VanillaDQN; use DQN.'
     in_dim = self.body.state_dim
     out_dim = net_util.get_out_dim(self.body)
     NetClass = getattr(net, self.net_spec['type'])
     self.net = NetClass(self.net_spec, in_dim, out_dim)
     self.net_names = ['net']
     # init net optimizer and its lr scheduler
     self.optim = net_util.get_optim(self.net, self.net.optim_spec)
     self.lr_scheduler = net_util.get_lr_scheduler(
         self.optim, self.net.lr_scheduler_spec)
     net_util.set_global_nets(self, global_nets)
     self.post_init_nets()

Example #9

0

Show file

 def init_nets(self, global_nets=None):
     '''Initialize networks'''
     if self.algorithm_spec['name'] == 'DQNBase':
         assert all(
             k not in self.net_spec
             for k in ['update_type', 'update_frequency', 'polyak_coef']
         ), 'Network update not available for DQNBase; use DQN.'
     in_dim = self.body.state_dim
     out_dim = net_util.get_out_dim(self.body)
     NetClass = getattr(net, self.net_spec['type'])
     self.net = NetClass(self.net_spec, in_dim, out_dim)
     self.target_net = NetClass(self.net_spec, in_dim, out_dim)
     self.net_names = ['net', 'target_net']
     # init net optimizer and its lr scheduler
     self.optim = net_util.get_optim(self.net, self.net.optim_spec)
     self.lr_scheduler = net_util.get_lr_scheduler(
         self.optim, self.net.lr_scheduler_spec)
     net_util.set_global_nets(self, global_nets)
     self.post_init_nets()
     self.online_net = self.target_net
     self.eval_net = self.target_net

Example #10

0

Show file

File: test_mlp.py Project: c-w-m/slm-lab

        "name": "StepLR",
        "step_size": 30,
        "gamma": 0.1
    },
    "update_type": "replace",
    "update_frequency": 1,
    "polyak_coef": 0.9,
    "gpu": True
}
in_dim = 10
out_dim = 3
batch_size = 16
net = MLPNet(net_spec, in_dim, out_dim)
# init net optimizer and its lr scheduler
optim = net_util.get_optim(net, net.optim_spec)
lr_scheduler = net_util.get_lr_scheduler(optim, net.lr_scheduler_spec)
x = torch.rand((batch_size, in_dim))


def test_init():
    net = MLPNet(net_spec, in_dim, out_dim)
    assert isinstance(net, nn.Module)
    assert hasattr(net, 'model')
    assert hasattr(net, 'model_tail')
    assert not hasattr(net, 'model_tails')


def test_forward():
    y = net.forward(x)
    assert y.shape == (batch_size, out_dim)

Example #11

0

Show file

File: mlp.py Project: wilson1yan/SLM-Lab

    def __init__(self, net_spec, in_dim, out_dim):
        '''
        Multi state processing heads, single shared body, and multi action tails.
        There is one state and action head per body/environment
        Example:

          env 1 state       env 2 state
         _______|______    _______|______
        |    head 1    |  |    head 2    |
        |______________|  |______________|
                |                  |
                |__________________|
         ________________|_______________
        |          Shared body           |
        |________________________________|
                         |
                 ________|_______
                |                |
         _______|______    ______|_______
        |    tail 1    |  |    tail 2    |
        |______________|  |______________|
                |                |
           env 1 action      env 2 action
        '''
        nn.Module.__init__(self)
        super(HydraMLPNet, self).__init__(net_spec, in_dim, out_dim)
        # set default
        util.set_attr(
            self,
            dict(
                init_fn=None,
                clip_grad_val=None,
                loss_spec={'name': 'MSELoss'},
                optim_spec={'name': 'Adam'},
                lr_scheduler_spec=None,
                update_type='replace',
                update_frequency=1,
                polyak_coef=0.0,
                gpu=False,
            ))
        util.set_attr(self, self.net_spec, [
            'hid_layers',
            'hid_layers_activation',
            'init_fn',
            'clip_grad_val',
            'loss_spec',
            'optim_spec',
            'lr_scheduler_spec',
            'update_type',
            'update_frequency',
            'polyak_coef',
            'gpu',
        ])
        assert len(
            self.hid_layers
        ) == 3, 'Your hidden layers must specify [*heads], [body], [*tails]. If not, use MLPNet'
        assert isinstance(self.in_dim,
                          list), 'Hydra network needs in_dim as list'
        assert isinstance(self.out_dim,
                          list), 'Hydra network needs out_dim as list'
        self.head_hid_layers = self.hid_layers[0]
        self.body_hid_layers = self.hid_layers[1]
        self.tail_hid_layers = self.hid_layers[2]
        if len(self.head_hid_layers) == 1:
            self.head_hid_layers = self.head_hid_layers * len(self.in_dim)
        if len(self.tail_hid_layers) == 1:
            self.tail_hid_layers = self.tail_hid_layers * len(self.out_dim)

        self.model_heads = self.build_model_heads(in_dim)
        heads_out_dim = np.sum(
            [head_hid_layers[-1] for head_hid_layers in self.head_hid_layers])
        dims = [heads_out_dim] + self.body_hid_layers
        self.model_body = net_util.build_sequential(dims,
                                                    self.hid_layers_activation)
        self.model_tails = self.build_model_tails(out_dim)

        net_util.init_layers(self, self.init_fn)
        for module in self.modules():
            module.to(self.device)
        self.loss_fn = net_util.get_loss_fn(self, self.loss_spec)
        self.optim = net_util.get_optim(self, self.optim_spec)
        self.lr_scheduler = net_util.get_lr_scheduler(self,
                                                      self.lr_scheduler_spec)

Example #12

0

Show file

File: recurrent.py Project: vmuthuk2/SLM-Lab

    def __init__(self, net_spec, in_dim, out_dim):
        '''
        net_spec:
        cell_type: any of RNN, LSTM, GRU
        fc_hid_layers: list of fc layers preceeding the RNN layers
        hid_layers_activation: activation function for the fc hidden layers
        out_layer_activation: activation function for the output layer, same shape as out_dim
        rnn_hidden_size: rnn hidden_size
        rnn_num_layers: number of recurrent layers
        bidirectional: if RNN should be bidirectional
        seq_len: length of the history of being passed to the net
        init_fn: weight initialization function
        clip_grad_val: clip gradient norm if value is not None
        loss_spec: measure of error between model predictions and correct outputs
        optim_spec: parameters for initializing the optimizer
        lr_scheduler_spec: Pytorch optim.lr_scheduler
        update_type: method to update network weights: 'replace' or 'polyak'
        update_frequency: how many total timesteps per update
        polyak_coef: ratio of polyak weight update
        gpu: whether to train using a GPU. Note this will only work if a GPU is available, othewise setting gpu=True does nothing
        '''
        nn.Module.__init__(self)
        super(RecurrentNet, self).__init__(net_spec, in_dim, out_dim)
        # set default
        util.set_attr(
            self,
            dict(
                out_layer_activation=None,
                cell_type='GRU',
                rnn_num_layers=1,
                bidirectional=False,
                init_fn=None,
                clip_grad_val=None,
                loss_spec={'name': 'MSELoss'},
                optim_spec={'name': 'Adam'},
                lr_scheduler_spec=None,
                update_type='replace',
                update_frequency=1,
                polyak_coef=0.0,
                gpu=False,
            ))
        util.set_attr(self, self.net_spec, [
            'cell_type',
            'fc_hid_layers',
            'hid_layers_activation',
            'out_layer_activation',
            'rnn_hidden_size',
            'rnn_num_layers',
            'bidirectional',
            'seq_len',
            'init_fn',
            'clip_grad_val',
            'loss_spec',
            'optim_spec',
            'lr_scheduler_spec',
            'update_type',
            'update_frequency',
            'polyak_coef',
            'gpu',
        ])
        # fc body: state processing model
        if ps.is_empty(self.fc_hid_layers):
            self.rnn_input_dim = self.in_dim
        else:
            fc_dims = [self.in_dim] + self.fc_hid_layers
            self.fc_model = net_util.build_fc_model(fc_dims,
                                                    self.hid_layers_activation)
            self.rnn_input_dim = fc_dims[-1]

        # RNN model
        self.rnn_model = getattr(nn, net_util.get_nn_name(self.cell_type))(
            input_size=self.rnn_input_dim,
            hidden_size=self.rnn_hidden_size,
            num_layers=self.rnn_num_layers,
            batch_first=True,
            bidirectional=self.bidirectional)

        # tails. avoid list for single-tail for compute speed
        if ps.is_integer(self.out_dim):
            self.model_tail = net_util.build_fc_model(
                [self.rnn_hidden_size, self.out_dim],
                self.out_layer_activation)
        else:
            if not ps.is_list(self.out_layer_activation):
                self.out_layer_activation = [self.out_layer_activation
                                             ] * len(out_dim)
            assert len(self.out_layer_activation) == len(self.out_dim)
            tails = []
            for out_d, out_activ in zip(self.out_dim,
                                        self.out_layer_activation):
                tail = net_util.build_fc_model([self.rnn_hidden_size, out_d],
                                               out_activ)
                tails.append(tail)
            self.model_tails = nn.ModuleList(tails)

        net_util.init_layers(self, self.init_fn)
        for module in self.modules():
            module.to(self.device)
        self.loss_fn = net_util.get_loss_fn(self, self.loss_spec)
        self.optim = net_util.get_optim(self, self.optim_spec)
        self.lr_scheduler = net_util.get_lr_scheduler(self,
                                                      self.lr_scheduler_spec)

Example #13

0

Show file

    def __init__(self, net_spec, in_dim, out_dim):
        '''
        net_spec:
        conv_hid_layers: list containing dimensions of the convolutional hidden layers, each is a list representing hid_layer = out_d, kernel, stride, padding, dilation.
            Asssumed to all come before the flat layers.
            Note: a convolutional layer should specify the in_channel, out_channels, kernel_size, stride (of kernel steps), padding, and dilation (spacing between kernel points) E.g. [3, 16, (5, 5), 1, 0, (2, 2)]
            For more details, see http://pytorch.org/docs/master/nn.html#conv2d and https://github.com/vdumoulin/conv_arithmetic/blob/master/README.md
        fc_hid_layers: list of fc layers following the convolutional layers
        hid_layers_activation: activation function for the hidden layers
        out_layer_activation: activation function for the output layer, same shape as out_dim
        init_fn: weight initialization function
        batch_norm: whether to add batch normalization after each convolutional layer, excluding the input layer.
        clip_grad_val: clip gradient norm if value is not None
        loss_spec: measure of error between model predictions and correct outputs
        optim_spec: parameters for initializing the optimizer
        lr_scheduler_spec: Pytorch optim.lr_scheduler
        update_type: method to update network weights: 'replace' or 'polyak'
        update_frequency: how many total timesteps per update
        polyak_coef: ratio of polyak weight update
        gpu: whether to train using a GPU. Note this will only work if a GPU is available, othewise setting gpu=True does nothing
        '''
        assert len(in_dim) == 3  # image shape (c,w,h)
        nn.Module.__init__(self)
        super(ConvNet, self).__init__(net_spec, in_dim, out_dim)
        # set default
        util.set_attr(
            self,
            dict(
                out_layer_activation=None,
                init_fn=None,
                batch_norm=True,
                clip_grad_val=None,
                loss_spec={'name': 'MSELoss'},
                optim_spec={'name': 'Adam'},
                lr_scheduler_spec=None,
                update_type='replace',
                update_frequency=1,
                polyak_coef=0.0,
                gpu=False,
            ))
        util.set_attr(self, self.net_spec, [
            'conv_hid_layers',
            'fc_hid_layers',
            'hid_layers_activation',
            'out_layer_activation',
            'init_fn',
            'batch_norm',
            'clip_grad_val',
            'loss_spec',
            'optim_spec',
            'lr_scheduler_spec',
            'update_type',
            'update_frequency',
            'polyak_coef',
            'gpu',
        ])

        # conv body
        self.conv_model = self.build_conv_layers(self.conv_hid_layers)
        self.conv_out_dim = self.get_conv_output_size()

        # fc body
        if ps.is_empty(self.fc_hid_layers):
            tail_in_dim = self.conv_out_dim
        else:
            # fc body from flattened conv
            self.fc_model = net_util.build_fc_model([self.conv_out_dim] +
                                                    self.fc_hid_layers,
                                                    self.hid_layers_activation)
            tail_in_dim = self.fc_hid_layers[-1]

        # tails. avoid list for single-tail for compute speed
        if ps.is_integer(self.out_dim):
            self.model_tail = net_util.build_fc_model(
                [tail_in_dim, self.out_dim], self.out_layer_activation)
        else:
            if not ps.is_list(self.out_layer_activation):
                self.out_layer_activation = [self.out_layer_activation
                                             ] * len(out_dim)
            assert len(self.out_layer_activation) == len(self.out_dim)
            tails = []
            for out_d, out_activ in zip(self.out_dim,
                                        self.out_layer_activation):
                tail = net_util.build_fc_model([tail_in_dim, out_d], out_activ)
                tails.append(tail)
            self.model_tails = nn.ModuleList(tails)

        net_util.init_layers(self, self.init_fn)
        for module in self.modules():
            module.to(self.device)
        self.loss_fn = net_util.get_loss_fn(self, self.loss_spec)
        self.optim = net_util.get_optim(self, self.optim_spec)
        self.lr_scheduler = net_util.get_lr_scheduler(self,
                                                      self.lr_scheduler_spec)

Example #14

0

Show file

    def __init__(self, net_spec, in_dim, out_dim):
        assert len(in_dim) == 3  # image shape (c,w,h)
        nn.Module.__init__(self)
        Net.__init__(self, net_spec, in_dim, out_dim)
        # set default
        util.set_attr(
            self,
            dict(
                init_fn=None,
                batch_norm=False,
                clip_grad_val=None,
                loss_spec={'name': 'MSELoss'},
                optim_spec={'name': 'Adam'},
                lr_scheduler_spec=None,
                update_type='replace',
                update_frequency=1,
                polyak_coef=0.0,
                gpu=False,
            ))
        util.set_attr(self, self.net_spec, [
            'conv_hid_layers',
            'fc_hid_layers',
            'hid_layers_activation',
            'init_fn',
            'batch_norm',
            'clip_grad_val',
            'loss_spec',
            'optim_spec',
            'lr_scheduler_spec',
            'update_type',
            'update_frequency',
            'polyak_coef',
            'gpu',
        ])

        # Guard against inappropriate algorithms and environments
        assert isinstance(out_dim, int)

        # conv body
        self.conv_model = self.build_conv_layers(self.conv_hid_layers)
        self.conv_out_dim = self.get_conv_output_size()

        # fc body
        if ps.is_empty(self.fc_hid_layers):
            tail_in_dim = self.conv_out_dim
        else:
            # fc layer from flattened conv
            self.fc_model = net_util.build_fc_model([self.conv_out_dim] +
                                                    self.fc_hid_layers,
                                                    self.hid_layers_activation)
            tail_in_dim = self.fc_hid_layers[-1]

        # tails. avoid list for single-tail for compute speed
        self.v = nn.Linear(tail_in_dim, 1)  # state value
        self.adv = nn.Linear(tail_in_dim,
                             out_dim)  # action dependent raw advantage
        self.model_tails = nn.ModuleList(self.v, self.adv)

        net_util.init_layers(self, self.init_fn)
        for module in self.modules():
            module.to(self.device)
        self.loss_fn = net_util.get_loss_fn(self, self.loss_spec)
        self.optim = net_util.get_optim(self, self.optim_spec)
        self.lr_scheduler = net_util.get_lr_scheduler(self,
                                                      self.lr_scheduler_spec)