예제 #1
0
 def init_algorithm_params(self):
     '''Initialize other algorithm parameters'''
     # set default
     util.set_attr(self,
                   dict(
                       action_pdtype='default',
                       action_policy='default',
                   ))
     util.set_attr(self, self.algorithm_spec, [
         'policy_name',
         'action_pdtype',
         'action_policy',
     ])
     self.action_policy = getattr(policy_util, self.action_policy)
     self.policy = None
     if 'word_policy' in self.algorithm_spec:
         params = deepcopy(ps.get(self.algorithm_spec, 'word_policy'))
         PolicyClass = getattr(word_policy, params.pop('name'))
     elif 'e2e' in self.algorithm_spec:
         params = deepcopy(ps.get(self.algorithm_spec, 'e2e'))
         PolicyClass = getattr(e2e, params.pop('name'))
     else:
         params = deepcopy(ps.get(self.algorithm_spec, 'policy'))
         PolicyClass = getattr(policy, params.pop('name'))
     self.policy = PolicyClass(**params)
예제 #2
0
 def init_algorithm_params(self):
     '''Initialize other algorithm parameters.'''
     # set default
     util.set_attr(
         self,
         dict(
             action_pdtype='default',
             action_policy='default',
             explore_var_spec=None,
         ))
     util.set_attr(
         self,
         self.algorithm_spec,
         [
             'action_pdtype',
             'action_policy',
             # explore_var is epsilon, tau or etc. depending on the action policy
             # these control the trade off between exploration and exploitaton
             'explore_var_spec',
             'gamma',  # the discount factor
             'training_frequency',  # how often to train for batch training (once each training_frequency time steps)
         ])
     self.to_train = 0
     self.action_policy = getattr(policy_util, self.action_policy)
     self.explore_var_scheduler = policy_util.VarScheduler(
         self.explore_var_spec)
     self.body.explore_var = self.explore_var_scheduler.start_val
예제 #3
0
 def init_algorithm_params(self):
     '''Initialize other algorithm parameters'''
     # set default
     util.set_attr(
         self,
         dict(
             action_pdtype='default',
             action_policy='default',
             explore_var_spec=None,
             entropy_coef_spec=None,
             policy_loss_coef=1.0,
             val_loss_coef=1.0,
         ))
     util.set_attr(
         self,
         self.algorithm_spec,
         [
             'action_pdtype',
             'action_policy',
             # theoretically, AC does not have policy update; but in this implementation we have such option
             'explore_var_spec',
             'gamma',  # the discount factor
             'lam',
             'num_step_returns',
             'entropy_coef_spec',
             'policy_loss_coef',
             'val_loss_coef',
             'sil_policy_loss_coef',
             'sil_val_loss_coef',
             'training_frequency',
             'training_batch_iter',
             'training_iter',
         ])
     super().init_algorithm_params()
예제 #4
0
 def init_algorithm_params(self):
     # set default
     util.set_attr(
         self,
         dict(
             action_pdtype='Argmax',
             action_policy='epsilon_greedy',
             explore_var_spec=None,
         ))
     util.set_attr(
         self,
         self.algorithm_spec,
         [
             'action_pdtype',
             'action_policy',
             # explore_var is epsilon, tau or etc. depending on the action policy
             # these control the trade off between exploration and exploitaton
             'explore_var_spec',
             'gamma',  # the discount factor
             'training_batch_iter',  # how many gradient updates per batch
             'training_iter',  # how many batches to train each time
             'training_frequency',  # how often to train (once a few timesteps)
             'training_start_step',  # how long before starting training
         ])
     super().init_algorithm_params()
예제 #5
0
 def init_algorithm_params(self):
     '''Initialize other algorithm parameters'''
     # set default
     util.set_attr(self, dict(
         action_pdtype='default',
         action_policy='default',
         explore_var_spec=None,
         entropy_coef_spec=None,
         policy_loss_coef=1.0,
     ))
     util.set_attr(self, self.algorithm_spec, [
         'action_pdtype',
         'action_policy',
         # theoretically, REINFORCE does not have policy update; but in this implementation we have such option
         'explore_var_spec',
         'gamma',  # the discount factor
         'entropy_coef_spec',
         'policy_loss_coef',
         'training_frequency',
     ])
     self.to_train = 0
     self.action_policy = getattr(policy_util, self.action_policy)
     self.explore_var_scheduler = policy_util.VarScheduler(self.explore_var_spec)
     self.body.explore_var = self.explore_var_scheduler.start_val
     if self.entropy_coef_spec is not None:
         self.entropy_coef_scheduler = policy_util.VarScheduler(self.entropy_coef_spec)
         self.body.entropy_coef = self.entropy_coef_scheduler.start_val
예제 #6
0
    def __init__(self, net_spec, in_dim, out_dim):
        assert len(in_dim) == 3  # image shape (c,w,h)
        nn.Module.__init__(self)
        Net.__init__(self, net_spec, in_dim, out_dim)
        # set default
        util.set_attr(self, dict(
            init_fn=None,
            normalize=False,
            batch_norm=False,
            clip_grad_val=None,
            loss_spec={'name': 'MSELoss'},
            optim_spec={'name': 'Adam'},
            lr_scheduler_spec=None,
            update_type='replace',
            update_frequency=1,
            polyak_coef=0.0,
            gpu=False,
        ))
        util.set_attr(self, self.net_spec, [
            'conv_hid_layers',
            'fc_hid_layers',
            'hid_layers_activation',
            'init_fn',
            'normalize',
            'batch_norm',
            'clip_grad_val',
            'loss_spec',
            'optim_spec',
            'lr_scheduler_spec',
            'update_type',
            'update_frequency',
            'polyak_coef',
            'gpu',
        ])

        # Guard against inappropriate algorithms and environments
        assert isinstance(out_dim, int)

        # conv body
        self.conv_model = self.build_conv_layers(self.conv_hid_layers)
        self.conv_out_dim = self.get_conv_output_size()

        # fc body
        if ps.is_empty(self.fc_hid_layers):
            tail_in_dim = self.conv_out_dim
        else:
            # fc layer from flattened conv
            self.fc_model = net_util.build_fc_model([self.conv_out_dim] + self.fc_hid_layers,
                                                    self.hid_layers_activation)
            tail_in_dim = self.fc_hid_layers[-1]

        # tails. avoid list for single-tail for compute speed
        self.v = nn.Linear(tail_in_dim, 1)  # state value
        self.adv = nn.Linear(tail_in_dim, out_dim)  # action dependent raw advantage
        self.model_tails = nn.ModuleList(self.v, self.adv)

        net_util.init_layers(self, self.init_fn)
        self.loss_fn = net_util.get_loss_fn(self, self.loss_spec)
        self.to(self.device)
        self.train()
예제 #7
0
 def __init__(self, agent, global_nets=None):
     super().__init__(agent, global_nets)
     util.set_attr(self, self.algorithm_spec, [
         'warmup_epi',
     ])
     # create the extra replay memory for warm-up
     MemoryClass = getattr(memory, self.memory_spec['warmup_name'])
     self.body.warmup_memory = MemoryClass(self.memory_spec, self.body)
예제 #8
0
def set_global_nets(algorithm, global_nets):
    '''For Hogwild, set attr built in init_global_nets above. Use in algorithm init.'''
    # set attr first so algorithm always has self.global_{net} to pass into train_step
    for net_name in algorithm.net_names:
        setattr(algorithm, f'global_{net_name}', None)
    # set attr created in init_global_nets
    if global_nets is not None:
        util.set_attr(algorithm, global_nets)
        logger.info(f'Set global_nets attr {list(global_nets.keys())} for Hogwild')
예제 #9
0
 def __init__(self, memory_spec, body):
     super().__init__(memory_spec, body)
     # NOTE for OnPolicy replay, frequency = episode; for other classes below frequency = frames
     util.set_attr(self, self.body.agent.agent_spec['algorithm'], ['training_frequency'])
     # Don't want total experiences reset when memory is
     self.is_episodic = True
     self.size = 0  # total experiences stored
     self.seen_size = 0  # total experiences seen cumulatively
     # declare what data keys to store
     self.data_keys = ['states', 'actions', 'rewards', 'next_states', 'dones']
     self.reset()
예제 #10
0
 def __init__(self, agent, global_nets=None):
     super().__init__(agent, global_nets)
     util.set_attr(self, self.algorithm_spec, [
         'warmup_epi',
     ])
     # create the extra replay memory for warm-up
     MemoryClass = getattr(memory, self.memory_spec['warmup_name'])
     self.body.warmup_memory = MemoryClass(self.memory_spec, self.body)
     if self.memory_spec['warmup_memory_path'] != '':
         import pickle
         self.body.warmup_memory = pickle.load(
             open(self.memory_spec['warmup_memory_path'], 'rb'))
예제 #11
0
 def __init__(self, var_decay_spec=None):
     self._updater_name = 'no_decay' if var_decay_spec is None else var_decay_spec[
         'name']
     self._updater = getattr(math_util, self._updater_name)
     util.set_attr(self, dict(start_val=np.nan, ))
     util.set_attr(self, var_decay_spec, [
         'start_val',
         'end_val',
         'start_step',
         'end_step',
     ])
     if not getattr(self, 'end_val', None):
         self.end_val = self.start_val
예제 #12
0
    def __init__(self, spec, e=None):
        super(MultiWozEnv, self).__init__(spec, e)
        self.action_dim = self.observation_dim = 0
        util.set_attr(self, self.env_spec, [
            'observation_dim',
            'action_dim',
        ])
        worker_id = int(f'{os.getpid()}{self.e+int(ps.unique_id())}'[-4:])
        self.u_env = MultiWozEnvironment(self.env_spec, worker_id, self.action_dim)
        self.evaluator = self.u_env.evaluator
        self.patch_gym_spaces(self.u_env)
        self._set_attr_from_u_env(self.u_env)

        logger.info(util.self_desc(self))
예제 #13
0
 def init_algorithm_params(self):
     '''Initialize other algorithm parameters'''
     # set default
     util.set_attr(
         self,
         dict(
             action_pdtype='default',
             action_policy='default',
             explore_var_spec=None,
             entropy_coef_spec=None,
             policy_loss_coef=1.0,
             val_loss_coef=1.0,
         ))
     util.set_attr(
         self,
         self.algorithm_spec,
         [
             'action_pdtype',
             'action_policy',
             # theoretically, AC does not have policy update; but in this implementation we have such option
             'explore_var_spec',
             'gamma',  # the discount factor
             'lam',
             'num_step_returns',
             'entropy_coef_spec',
             'policy_loss_coef',
             'val_loss_coef',
             'training_frequency',
         ])
     self.to_train = 0
     self.action_policy = getattr(policy_util, self.action_policy)
     self.explore_var_scheduler = policy_util.VarScheduler(
         self.explore_var_spec)
     self.body.explore_var = self.explore_var_scheduler.start_val
     if self.entropy_coef_spec is not None:
         self.entropy_coef_scheduler = policy_util.VarScheduler(
             self.entropy_coef_spec)
         self.body.entropy_coef = self.entropy_coef_scheduler.start_val
     # Select appropriate methods to calculate advs and v_targets for training
     if self.lam is not None:
         self.calc_advs_v_targets = self.calc_gae_advs_v_targets
     elif self.num_step_returns is not None:
         self.calc_advs_v_targets = self.calc_nstep_advs_v_targets
     else:
         self.calc_advs_v_targets = self.calc_ret_advs_v_targets
예제 #14
0
    def __init__(self, memory_spec, body):
        util.set_attr(self, memory_spec, [
            'alpha',
            'epsilon',
            'batch_size',
            'max_size',
            'use_cer',
        ])
        super().__init__(memory_spec, body)

        self.epsilon = np.full((1, ), self.epsilon)
        self.alpha = np.full((1, ), self.alpha)
        # adds a 'priorities' scalar to the data_keys and call reset again
        self.data_keys = [
            'states', 'actions', 'rewards', 'next_states', 'dones',
            'priorities'
        ]
        self.reset()
예제 #15
0
 def __init__(self, memory_spec, body):
     super().__init__(memory_spec, body)
     util.set_attr(self, self.memory_spec, [
         'batch_size',
         'max_size',
         'use_cer',
     ])
     self.is_episodic = False
     self.batch_idxs = None
     self.size = 0  # total experiences stored
     self.seen_size = 0  # total experiences seen cumulatively
     self.head = -1  # index of most recent experience
     # generic next_state buffer to store last next_states (allow for multiple for venv)
     # self.ns_idx_offset = self.body.env.num_envs if body.env.is_venv else 1
     # self.ns_buffer = deque(maxlen=self.ns_idx_offset)
     # declare what data keys to store
     self.data_keys = ['states', 'actions', 'rewards', 'next_states', 'dones']
     self.reset()
예제 #16
0
    def __init__(self, net_spec, in_dim, out_dim):
        nn.Module.__init__(self)
        Net.__init__(self, net_spec, in_dim, out_dim)
        # set default
        util.set_attr(
            self,
            dict(
                init_fn=None,
                clip_grad_val=None,
                loss_spec={'name': 'MSELoss'},
                optim_spec={'name': 'Adam'},
                lr_scheduler_spec=None,
                update_type='replace',
                update_frequency=1,
                polyak_coef=0.0,
                gpu=False,
            ))
        util.set_attr(self, self.net_spec, [
            'shared',
            'hid_layers',
            'hid_layers_activation',
            'init_fn',
            'clip_grad_val',
            'loss_spec',
            'optim_spec',
            'lr_scheduler_spec',
            'update_type',
            'update_frequency',
            'polyak_coef',
            'gpu',
        ])

        # Guard against inappropriate algorithms and environments
        # Build model body
        dims = [self.in_dim] + self.hid_layers
        self.model_body = net_util.build_fc_model(dims,
                                                  self.hid_layers_activation)
        # output layers
        self.v = nn.Linear(dims[-1], 1)  # state value
        self.adv = nn.Linear(dims[-1],
                             out_dim)  # action dependent raw advantage
        net_util.init_layers(self, self.init_fn)
        self.loss_fn = net_util.get_loss_fn(self, self.loss_spec)
        self.to(self.device)
예제 #17
0
 def init_algorithm_params(self):
     '''Initialize other algorithm parameters'''
     # set default
     util.set_attr(
         self,
         dict(
             action_pdtype='default',
             action_policy='default',
             explore_var_spec=None,
             entropy_coef_spec=None,
             minibatch_size=4,
             val_loss_coef=1.0,
         ))
     util.set_attr(
         self,
         self.algorithm_spec,
         [
             'action_pdtype',
             'action_policy',
             # theoretically, PPO does not have policy update; but in this implementation we have such option
             'explore_var_spec',
             'gamma',
             'lam',
             'clip_eps_spec',
             'entropy_coef_spec',
             'val_loss_coef',
             'minibatch_size',
             'training_frequency',  # horizon
             'training_epoch',
         ])
     self.to_train = 0
     self.action_policy = getattr(policy_util, self.action_policy)
     self.explore_var_scheduler = policy_util.VarScheduler(
         self.explore_var_spec)
     self.body.explore_var = self.explore_var_scheduler.start_val
     # extra variable decays for PPO
     self.clip_eps_scheduler = policy_util.VarScheduler(self.clip_eps_spec)
     self.body.clip_eps = self.clip_eps_scheduler.start_val
     if self.entropy_coef_spec is not None:
         self.entropy_coef_scheduler = policy_util.VarScheduler(
             self.entropy_coef_spec)
         self.body.entropy_coef = self.entropy_coef_scheduler.start_val
     # PPO uses GAE
     self.calc_advs_v_targets = self.calc_gae_advs_v_targets
예제 #18
0
파일: base.py 프로젝트: temporaer/ConvLab
 def __init__(self, spec, e=None):
     self.e = e or 0  # for multi-env
     self.done = False
     self.env_spec = spec['env'][self.e]
     # set default
     util.set_attr(
         self,
         dict(
             log_frequency=None,  # default to log at epi done
             frame_op=None,
             frame_op_len=None,
             normalize_state=False,
             reward_scale=None,
             num_envs=None,
         ))
     util.set_attr(self, spec['meta'], [
         'log_frequency',
         'eval_frequency',
     ])
     util.set_attr(self, self.env_spec, [
         'name',
         'frame_op',
         'frame_op_len',
         'normalize_state',
         'reward_scale',
         'num_envs',
         'max_t',
         'max_frame',
     ])
     seq_len = ps.get(spec, 'agent.0.net.seq_len')
     if seq_len is not None:  # infer if using RNN
         self.frame_op = 'stack'
         self.frame_op_len = seq_len
     if util.in_eval_lab_modes():  # use singleton for eval
         self.num_envs = 1
         self.log_frequency = None
     if spec['meta'][
             'distributed'] != False:  # divide max_frame for distributed
         self.max_frame = int(self.max_frame / spec['meta']['max_session'])
     self.is_venv = (self.num_envs is not None and self.num_envs > 1)
     if self.is_venv:
         assert self.log_frequency is not None, f'Specify log_frequency when using venv'
     self.clock_speed = 1 * (
         self.num_envs or 1
     )  # tick with a multiple of num_envs to properly count frames
     self.clock = Clock(self.max_frame, self.clock_speed)
     self.to_render = util.to_render()
예제 #19
0
    def __init__(self, net_spec, in_dim, out_dim):
        '''
        net_spec:
        cell_type: any of RNN, LSTM, GRU
        fc_hid_layers: list of fc layers preceeding the RNN layers
        hid_layers_activation: activation function for the fc hidden layers
        out_layer_activation: activation function for the output layer, same shape as out_dim
        rnn_hidden_size: rnn hidden_size
        rnn_num_layers: number of recurrent layers
        bidirectional: if RNN should be bidirectional
        seq_len: length of the history of being passed to the net
        init_fn: weight initialization function
        clip_grad_val: clip gradient norm if value is not None
        loss_spec: measure of error between model predictions and correct outputs
        optim_spec: parameters for initializing the optimizer
        lr_scheduler_spec: Pytorch optim.lr_scheduler
        update_type: method to update network weights: 'replace' or 'polyak'
        update_frequency: how many total timesteps per update
        polyak_coef: ratio of polyak weight update
        gpu: whether to train using a GPU. Note this will only work if a GPU is available, othewise setting gpu=True does nothing
        '''
        nn.Module.__init__(self)
        super().__init__(net_spec, in_dim, out_dim)
        # set default
        util.set_attr(
            self,
            dict(
                out_layer_activation=None,
                cell_type='GRU',
                rnn_num_layers=1,
                bidirectional=False,
                init_fn=None,
                clip_grad_val=None,
                loss_spec={'name': 'MSELoss'},
                optim_spec={'name': 'Adam'},
                lr_scheduler_spec=None,
                update_type='replace',
                update_frequency=1,
                polyak_coef=0.0,
                gpu=False,
            ))
        util.set_attr(self, self.net_spec, [
            'cell_type',
            'fc_hid_layers',
            'hid_layers_activation',
            'out_layer_activation',
            'rnn_hidden_size',
            'rnn_num_layers',
            'bidirectional',
            'seq_len',
            'init_fn',
            'clip_grad_val',
            'loss_spec',
            'optim_spec',
            'lr_scheduler_spec',
            'update_type',
            'update_frequency',
            'polyak_coef',
            'gpu',
        ])
        # restore proper in_dim from env stacked state_dim (stack_len, *raw_state_dim)
        self.in_dim = in_dim[1:] if len(in_dim) > 2 else in_dim[1]
        # fc body: state processing model
        if ps.is_empty(self.fc_hid_layers):
            self.rnn_input_dim = self.in_dim
        else:
            fc_dims = [self.in_dim] + self.fc_hid_layers
            self.fc_model = net_util.build_fc_model(fc_dims,
                                                    self.hid_layers_activation)
            self.rnn_input_dim = fc_dims[-1]

        # RNN model
        self.rnn_model = getattr(nn, net_util.get_nn_name(self.cell_type))(
            input_size=self.rnn_input_dim,
            hidden_size=self.rnn_hidden_size,
            num_layers=self.rnn_num_layers,
            batch_first=True,
            bidirectional=self.bidirectional)

        # tails. avoid list for single-tail for compute speed
        if ps.is_integer(self.out_dim):
            self.model_tail = net_util.build_fc_model(
                [self.rnn_hidden_size, self.out_dim],
                self.out_layer_activation)
        else:
            if not ps.is_list(self.out_layer_activation):
                self.out_layer_activation = [self.out_layer_activation
                                             ] * len(out_dim)
            assert len(self.out_layer_activation) == len(self.out_dim)
            tails = []
            for out_d, out_activ in zip(self.out_dim,
                                        self.out_layer_activation):
                tail = net_util.build_fc_model([self.rnn_hidden_size, out_d],
                                               out_activ)
                tails.append(tail)
            self.model_tails = nn.ModuleList(tails)

        net_util.init_layers(self, self.init_fn)
        self.loss_fn = net_util.get_loss_fn(self, self.loss_spec)
        self.to(self.device)
        self.train()
예제 #20
0
    def __init__(self, net_spec, in_dim, out_dim):
        '''
        Multi state processing heads, single shared body, and multi action tails.
        There is one state and action head per body/environment
        Example:

          env 1 state       env 2 state
         _______|______    _______|______
        |    head 1    |  |    head 2    |
        |______________|  |______________|
                |                  |
                |__________________|
         ________________|_______________
        |          Shared body           |
        |________________________________|
                         |
                 ________|_______
                |                |
         _______|______    ______|_______
        |    tail 1    |  |    tail 2    |
        |______________|  |______________|
                |                |
           env 1 action      env 2 action
        '''
        nn.Module.__init__(self)
        super().__init__(net_spec, in_dim, out_dim)
        # set default
        util.set_attr(
            self,
            dict(
                out_layer_activation=None,
                init_fn=None,
                clip_grad_val=None,
                loss_spec={'name': 'MSELoss'},
                optim_spec={'name': 'Adam'},
                lr_scheduler_spec=None,
                update_type='replace',
                update_frequency=1,
                polyak_coef=0.0,
                gpu=False,
            ))
        util.set_attr(self, self.net_spec, [
            'hid_layers',
            'hid_layers_activation',
            'out_layer_activation',
            'init_fn',
            'clip_grad_val',
            'loss_spec',
            'optim_spec',
            'lr_scheduler_spec',
            'update_type',
            'update_frequency',
            'polyak_coef',
            'gpu',
        ])
        assert len(
            self.hid_layers
        ) == 3, 'Your hidden layers must specify [*heads], [body], [*tails]. If not, use MLPNet'
        assert isinstance(self.in_dim,
                          list), 'Hydra network needs in_dim as list'
        assert isinstance(self.out_dim,
                          list), 'Hydra network needs out_dim as list'
        self.head_hid_layers = self.hid_layers[0]
        self.body_hid_layers = self.hid_layers[1]
        self.tail_hid_layers = self.hid_layers[2]
        if len(self.head_hid_layers) == 1:
            self.head_hid_layers = self.head_hid_layers * len(self.in_dim)
        if len(self.tail_hid_layers) == 1:
            self.tail_hid_layers = self.tail_hid_layers * len(self.out_dim)

        self.model_heads = self.build_model_heads(in_dim)
        heads_out_dim = np.sum(
            [head_hid_layers[-1] for head_hid_layers in self.head_hid_layers])
        dims = [heads_out_dim] + self.body_hid_layers
        self.model_body = net_util.build_fc_model(dims,
                                                  self.hid_layers_activation)
        self.model_tails = self.build_model_tails(self.out_dim,
                                                  self.out_layer_activation)

        net_util.init_layers(self, self.init_fn)
        self.loss_fn = net_util.get_loss_fn(self, self.loss_spec)
        self.to(self.device)
        self.train()
예제 #21
0
    def __init__(self, net_spec, in_dim, out_dim):
        '''
        net_spec:
        hid_layers: list containing dimensions of the hidden layers
        hid_layers_activation: activation function for the hidden layers
        out_layer_activation: activation function for the output layer, same shape as out_dim
        init_fn: weight initialization function
        clip_grad_val: clip gradient norm if value is not None
        loss_spec: measure of error between model predictions and correct outputs
        optim_spec: parameters for initializing the optimizer
        lr_scheduler_spec: Pytorch optim.lr_scheduler
        update_type: method to update network weights: 'replace' or 'polyak'
        update_frequency: how many total timesteps per update
        polyak_coef: ratio of polyak weight update
        gpu: whether to train using a GPU. Note this will only work if a GPU is available, othewise setting gpu=True does nothing
        '''
        nn.Module.__init__(self)
        super().__init__(net_spec, in_dim, out_dim)
        # set default
        util.set_attr(
            self,
            dict(
                out_layer_activation=None,
                init_fn=None,
                clip_grad_val=None,
                loss_spec={'name': 'MSELoss'},
                optim_spec={'name': 'Adam'},
                lr_scheduler_spec=None,
                update_type='replace',
                update_frequency=1,
                polyak_coef=0.0,
                gpu=False,
            ))
        util.set_attr(self, self.net_spec, [
            'shared',
            'hid_layers',
            'hid_layers_activation',
            'out_layer_activation',
            'init_fn',
            'clip_grad_val',
            'loss_spec',
            'optim_spec',
            'lr_scheduler_spec',
            'update_type',
            'update_frequency',
            'polyak_coef',
            'gpu',
        ])

        dims = [self.in_dim] + self.hid_layers
        self.model = net_util.build_fc_model(dims, self.hid_layers_activation)
        # add last layer with no activation
        # tails. avoid list for single-tail for compute speed
        if ps.is_integer(self.out_dim):
            self.model_tail = net_util.build_fc_model(
                [dims[-1], self.out_dim], self.out_layer_activation)
        else:
            if not ps.is_list(self.out_layer_activation):
                self.out_layer_activation = [self.out_layer_activation
                                             ] * len(out_dim)
            assert len(self.out_layer_activation) == len(self.out_dim)
            tails = []
            for out_d, out_activ in zip(self.out_dim,
                                        self.out_layer_activation):
                tail = net_util.build_fc_model([dims[-1], out_d], out_activ)
                tails.append(tail)
            self.model_tails = nn.ModuleList(tails)

        net_util.init_layers(self, self.init_fn)
        self.loss_fn = net_util.get_loss_fn(self, self.loss_spec)
        self.to(self.device)
        self.train()
예제 #22
0
 def __init__(self, agent, global_nets=None):
     super().__init__(agent, global_nets)
     util.set_attr(self, self.algorithm_spec, [
         'warmup_epi',
     ])
예제 #23
0
    def __init__(self, net_spec, in_dim, out_dim):
        '''
        net_spec:
        conv_hid_layers: list containing dimensions of the convolutional hidden layers, each is a list representing hid_layer = out_d, kernel, stride, padding, dilation.
            Asssumed to all come before the flat layers.
            Note: a convolutional layer should specify the in_channel, out_channels, kernel_size, stride (of kernel steps), padding, and dilation (spacing between kernel points) E.g. [3, 16, (5, 5), 1, 0, (2, 2)]
            For more details, see http://pytorch.org/docs/master/nn.html#conv2d and https://github.com/vdumoulin/conv_arithmetic/blob/master/README.md
        fc_hid_layers: list of fc layers following the convolutional layers
        hid_layers_activation: activation function for the hidden layers
        out_layer_activation: activation function for the output layer, same shape as out_dim
        init_fn: weight initialization function
        normalize: whether to divide by 255.0 to normalize image input
        batch_norm: whether to add batch normalization after each convolutional layer, excluding the input layer.
        clip_grad_val: clip gradient norm if value is not None
        loss_spec: measure of error between model predictions and correct outputs
        optim_spec: parameters for initializing the optimizer
        lr_scheduler_spec: Pytorch optim.lr_scheduler
        update_type: method to update network weights: 'replace' or 'polyak'
        update_frequency: how many total timesteps per update
        polyak_coef: ratio of polyak weight update
        gpu: whether to train using a GPU. Note this will only work if a GPU is available, othewise setting gpu=True does nothing
        '''
        assert len(in_dim) == 3  # image shape (c,w,h)
        nn.Module.__init__(self)
        super().__init__(net_spec, in_dim, out_dim)
        # set default
        util.set_attr(
            self,
            dict(
                out_layer_activation=None,
                init_fn=None,
                normalize=False,
                batch_norm=True,
                clip_grad_val=None,
                loss_spec={'name': 'MSELoss'},
                optim_spec={'name': 'Adam'},
                lr_scheduler_spec=None,
                update_type='replace',
                update_frequency=1,
                polyak_coef=0.0,
                gpu=False,
            ))
        util.set_attr(self, self.net_spec, [
            'conv_hid_layers',
            'fc_hid_layers',
            'hid_layers_activation',
            'out_layer_activation',
            'init_fn',
            'normalize',
            'batch_norm',
            'clip_grad_val',
            'loss_spec',
            'optim_spec',
            'lr_scheduler_spec',
            'update_type',
            'update_frequency',
            'polyak_coef',
            'gpu',
        ])

        # conv body
        self.conv_model = self.build_conv_layers(self.conv_hid_layers)
        self.conv_out_dim = self.get_conv_output_size()

        # fc body
        if ps.is_empty(self.fc_hid_layers):
            tail_in_dim = self.conv_out_dim
        else:
            # fc body from flattened conv
            self.fc_model = net_util.build_fc_model([self.conv_out_dim] +
                                                    self.fc_hid_layers,
                                                    self.hid_layers_activation)
            tail_in_dim = self.fc_hid_layers[-1]

        # tails. avoid list for single-tail for compute speed
        if ps.is_integer(self.out_dim):
            self.model_tail = net_util.build_fc_model(
                [tail_in_dim, self.out_dim], self.out_layer_activation)
        else:
            if not ps.is_list(self.out_layer_activation):
                self.out_layer_activation = [self.out_layer_activation
                                             ] * len(out_dim)
            assert len(self.out_layer_activation) == len(self.out_dim)
            tails = []
            for out_d, out_activ in zip(self.out_dim,
                                        self.out_layer_activation):
                tail = net_util.build_fc_model([tail_in_dim, out_d], out_activ)
                tails.append(tail)
            self.model_tails = nn.ModuleList(tails)

        net_util.init_layers(self, self.init_fn)
        self.loss_fn = net_util.get_loss_fn(self, self.loss_spec)
        self.to(self.device)
        self.train()