Ejemplo n.º 1
0
def print_grads_norms(net):
    global_norm = utils.global_grad_norm(net.parameters())
    print('Global_grads norm: {:.8f}'.format(global_norm))
    for n, m in net.named_children():
        w_norm = 0. if m.weight.grad is None else utils.global_grad_norm(
            [m.weight])
        b_norm = 0. if m.bias.grad is None else utils.global_grad_norm(
            [m.bias])
        print('--' * 10, n, '--' * 10)
        print('W_grad norm: {:.8f}\nb_grad norm: {:.8f}'.format(
            w_norm, b_norm))
Ejemplo n.º 2
0
    def __init__(self, network, batch_env, args):
        logging.debug('PAAC init is started')
        self.checkpoint_dir = join_path(args.debugging_folder,
                                        self.CHECKPOINT_SUBDIR)
        ensure_dir(self.checkpoint_dir)

        checkpoint = self._load_latest_checkpoint(self.checkpoint_dir)
        self.last_saving_step = checkpoint['last_step'] if checkpoint else 0

        self.global_step = self.last_saving_step
        self.network = network
        self.batch_env = batch_env
        self.optimizer = optim.RMSprop(
            self.network.parameters(),
            lr=args.initial_lr,
            eps=args.e,
        )  #RMSprop defualts: momentum=0., centered=False, weight_decay=0

        if checkpoint:
            logging.info('Restoring agent variables from previous run')
            self.network.load_state_dict(checkpoint['network_state_dict'])
            self.optimizer.load_state_dict(checkpoint['optimizer_state_dict'])

        self.lr_scheduler = LinearAnnealingLR(self.optimizer,
                                              args.lr_annealing_steps)
        #pytorch documentation says:
        #In most cases it’s better to use CUDA_VISIBLE_DEVICES environmental variable
        #Therefore to specify a particular gpu one should use CUDA_VISIBLE_DEVICES.
        self.device = self.network._device

        self.gamma = args.gamma  # future rewards discount factor
        self.entropy_coef = args.entropy_regularisation_strength
        self.loss_scaling = args.loss_scaling  #5.
        self.critic_coef = args.critic_coef  #0.25
        self.total_steps = args.max_global_steps
        self.rollout_steps = args.rollout_steps
        self.clip_norm = args.clip_norm
        self.num_emulators = batch_env.num_emulators

        self.evaluate = None
        self.reshape_r = lambda r: np.clip(r, -1., 1.)
        self.compute_returns = n_step_returns
        if args.clip_norm_type == 'global':
            self.clip_gradients = nn.utils.clip_grad_norm_
        elif args.clip_norm_type == 'local':
            self.clip_gradients = utils.clip_local_grad_norm
        elif args.clip_norm_type == 'ignore':
            self.clip_gradients = lambda params, _: utils.global_grad_norm(
                params)
        else:
            raise ValueError('Norm type({}) is not recoginized'.format(
                args.clip_norm_type))
        logging.debug('Paac init is done')

        self.curr_learning = True
        self.starting_length = [[5, 10], [5, 10], [5, 10], [5, 10], [15, 20],
                                [15, 20], [15, 20],
                                [15,
                                 20]]  # 1. 5-10;  2. 15-20; 3.40-50; 4.90-100
        self.checking_length = [15, 20]
Ejemplo n.º 3
0
    def __init__(self, network_creator, batch_env, args):
        logging.debug('PAAC init is started')
        self.args = copy.copy(vars(args))
        self.checkpoint_dir = join_path(self.args['debugging_folder'],
                                        self.CHECKPOINT_SUBDIR)
        ensure_dir(self.checkpoint_dir)

        checkpoint = self._load_latest_checkpoint(self.checkpoint_dir)
        self.last_saving_step = checkpoint['last_step'] if checkpoint else 0

        self.final_rewards = []
        self.global_step = self.last_saving_step
        self.network = network_creator()
        self.batch_env = batch_env
        self.optimizer = optim.RMSprop(
            self.network.parameters(),
            lr=self.args['initial_lr'],
            eps=self.args['e'],
        )  #RMSprop defualts: momentum=0., centered=False, weight_decay=0

        if checkpoint:
            logging.info('Restoring agent variables from previous run')
            self.network.load_state_dict(checkpoint['network_state_dict'])
            self.optimizer.load_state_dict(checkpoint['optimizer_state_dict'])

        self.lr_scheduler = LinearAnnealingLR(self.optimizer,
                                              self.args['lr_annealing_steps'])
        #pytorch documentation says:
        #In most cases it’s better to use CUDA_VISIBLE_DEVICES environmental variable
        #Therefore to specify a particular gpu one should use CUDA_VISIBLE_DEVICES.
        self.use_cuda = self.args['device'] == 'gpu'
        self.use_rnn = hasattr(
            self.network, 'get_initial_state'
        )  #get_initial_state should return state of the rnn layers
        self._tensors = torch.cuda if self.use_cuda else torch

        self.action_codes = np.eye(
            batch_env.num_actions)  #envs reveive actions in one-hot encoding!
        self.gamma = self.args['gamma']  # future rewards discount factor
        self.entropy_coef = self.args['entropy_regularisation_strength']
        self.loss_scaling = self.args['loss_scaling']  #5.
        self.critic_coef = self.args['critic_coef']  #0.25
        self.eval_func = None

        if self.args['clip_norm_type'] == 'global':
            self.clip_gradients = nn.utils.clip_grad_norm_
        elif self.args['clip_norm_type'] == 'local':
            self.clip_gradients = utils.clip_local_grad_norm
        elif self.args['clip_norm_type'] == 'ignore':
            self.clip_gradients = lambda params, _: utils.global_grad_norm(
                params)
        else:
            raise ValueError('Norm type({}) is not recoginized'.format(
                self.args['clip_norm_type']))
        logging.debug('Paac init is done')