def print_grads_norms(net): global_norm = utils.global_grad_norm(net.parameters()) print('Global_grads norm: {:.8f}'.format(global_norm)) for n, m in net.named_children(): w_norm = 0. if m.weight.grad is None else utils.global_grad_norm( [m.weight]) b_norm = 0. if m.bias.grad is None else utils.global_grad_norm( [m.bias]) print('--' * 10, n, '--' * 10) print('W_grad norm: {:.8f}\nb_grad norm: {:.8f}'.format( w_norm, b_norm))
def __init__(self, network, batch_env, args): logging.debug('PAAC init is started') self.checkpoint_dir = join_path(args.debugging_folder, self.CHECKPOINT_SUBDIR) ensure_dir(self.checkpoint_dir) checkpoint = self._load_latest_checkpoint(self.checkpoint_dir) self.last_saving_step = checkpoint['last_step'] if checkpoint else 0 self.global_step = self.last_saving_step self.network = network self.batch_env = batch_env self.optimizer = optim.RMSprop( self.network.parameters(), lr=args.initial_lr, eps=args.e, ) #RMSprop defualts: momentum=0., centered=False, weight_decay=0 if checkpoint: logging.info('Restoring agent variables from previous run') self.network.load_state_dict(checkpoint['network_state_dict']) self.optimizer.load_state_dict(checkpoint['optimizer_state_dict']) self.lr_scheduler = LinearAnnealingLR(self.optimizer, args.lr_annealing_steps) #pytorch documentation says: #In most cases it’s better to use CUDA_VISIBLE_DEVICES environmental variable #Therefore to specify a particular gpu one should use CUDA_VISIBLE_DEVICES. self.device = self.network._device self.gamma = args.gamma # future rewards discount factor self.entropy_coef = args.entropy_regularisation_strength self.loss_scaling = args.loss_scaling #5. self.critic_coef = args.critic_coef #0.25 self.total_steps = args.max_global_steps self.rollout_steps = args.rollout_steps self.clip_norm = args.clip_norm self.num_emulators = batch_env.num_emulators self.evaluate = None self.reshape_r = lambda r: np.clip(r, -1., 1.) self.compute_returns = n_step_returns if args.clip_norm_type == 'global': self.clip_gradients = nn.utils.clip_grad_norm_ elif args.clip_norm_type == 'local': self.clip_gradients = utils.clip_local_grad_norm elif args.clip_norm_type == 'ignore': self.clip_gradients = lambda params, _: utils.global_grad_norm( params) else: raise ValueError('Norm type({}) is not recoginized'.format( args.clip_norm_type)) logging.debug('Paac init is done') self.curr_learning = True self.starting_length = [[5, 10], [5, 10], [5, 10], [5, 10], [15, 20], [15, 20], [15, 20], [15, 20]] # 1. 5-10; 2. 15-20; 3.40-50; 4.90-100 self.checking_length = [15, 20]
def __init__(self, network_creator, batch_env, args): logging.debug('PAAC init is started') self.args = copy.copy(vars(args)) self.checkpoint_dir = join_path(self.args['debugging_folder'], self.CHECKPOINT_SUBDIR) ensure_dir(self.checkpoint_dir) checkpoint = self._load_latest_checkpoint(self.checkpoint_dir) self.last_saving_step = checkpoint['last_step'] if checkpoint else 0 self.final_rewards = [] self.global_step = self.last_saving_step self.network = network_creator() self.batch_env = batch_env self.optimizer = optim.RMSprop( self.network.parameters(), lr=self.args['initial_lr'], eps=self.args['e'], ) #RMSprop defualts: momentum=0., centered=False, weight_decay=0 if checkpoint: logging.info('Restoring agent variables from previous run') self.network.load_state_dict(checkpoint['network_state_dict']) self.optimizer.load_state_dict(checkpoint['optimizer_state_dict']) self.lr_scheduler = LinearAnnealingLR(self.optimizer, self.args['lr_annealing_steps']) #pytorch documentation says: #In most cases it’s better to use CUDA_VISIBLE_DEVICES environmental variable #Therefore to specify a particular gpu one should use CUDA_VISIBLE_DEVICES. self.use_cuda = self.args['device'] == 'gpu' self.use_rnn = hasattr( self.network, 'get_initial_state' ) #get_initial_state should return state of the rnn layers self._tensors = torch.cuda if self.use_cuda else torch self.action_codes = np.eye( batch_env.num_actions) #envs reveive actions in one-hot encoding! self.gamma = self.args['gamma'] # future rewards discount factor self.entropy_coef = self.args['entropy_regularisation_strength'] self.loss_scaling = self.args['loss_scaling'] #5. self.critic_coef = self.args['critic_coef'] #0.25 self.eval_func = None if self.args['clip_norm_type'] == 'global': self.clip_gradients = nn.utils.clip_grad_norm_ elif self.args['clip_norm_type'] == 'local': self.clip_gradients = utils.clip_local_grad_norm elif self.args['clip_norm_type'] == 'ignore': self.clip_gradients = lambda params, _: utils.global_grad_norm( params) else: raise ValueError('Norm type({}) is not recoginized'.format( self.args['clip_norm_type'])) logging.debug('Paac init is done')