def learn(self): assert hasattr(self, 'q1_optim') and hasattr(self, 'q2_optim') and\ hasattr(self, 'policy_optim') and hasattr(self, 'alpha_optim') self.learning_steps += 1 # if self.use_per: # batch, weights = self.memory.sample(self.batch_size) # else: # batch = self.memory.sample(self.batch_size) # # Set priority weights to 1 when we don't use PER. # weights = 1. batch = self.prep_minibatch() weights = 1. q1_loss, q2_loss, errors, mean_q1, mean_q2 = \ self.calc_critic_loss(batch, weights) policy_loss, entropies = self.calc_policy_loss(batch, weights) entropy_loss = self.calc_entropy_loss(entropies, weights) update_params(self.q1_optim, q1_loss) update_params(self.q2_optim, q2_loss) update_params(self.policy_optim, policy_loss) update_params(self.alpha_optim, entropy_loss) self.alpha = self.log_alpha.exp() if self.use_per: self.memory.update_priority(errors) if self.learning_steps % self.log_interval == 0: self.writer.add_scalar('loss/Q1', q1_loss.detach().item(), self.learning_steps) self.writer.add_scalar('loss/Q2', q2_loss.detach().item(), self.learning_steps) self.writer.add_scalar('loss/policy', policy_loss.detach().item(), self.learning_steps) self.writer.add_scalar('loss/alpha', entropy_loss.detach().item(), self.learning_steps) self.writer.add_scalar('stats/alpha', self.alpha.detach().item(), self.learning_steps) self.writer.add_scalar('stats/mean_Q1', mean_q1, self.learning_steps) self.writer.add_scalar('stats/mean_Q2', mean_q2, self.learning_steps) self.writer.add_scalar('stats/entropy', entropies.detach().mean().item(), self.learning_steps)
def rad_learn(self): assert hasattr(self, 'q1_optim') and hasattr(self, 'q2_optim') and\ hasattr(self, 'policy_optim') and hasattr(self, 'alpha_optim') augs_funcs = {} aug_to_func = { 'crop': rad.random_crop, 'grayscale': rad.random_grayscale, 'cutout': rad.random_cutout, 'cutout_color': rad.random_cutout_color, 'flip': rad.random_flip, 'rotate': rad.random_rotation, 'rand_conv': rad.random_convolution, 'color_jitter': rad.random_color_jitter, 'translate': rad.random_translate, 'no_aug': rad.no_aug, } aug_name = "crop" augs_funcs[aug_name] = aug_to_func[aug_name] self.learning_steps += 1 if self.use_per: batch, weights = self.memory.sample_rad(augs_funcs) else: batch = self.memory.sample_rad(augs_funcs) # Set priority weights to 1 when we don't use PER. weights = 1. q1_loss, q2_loss, errors, mean_q1, mean_q2 = \ self.calc_critic_loss(batch, weights) policy_loss, entropies = self.calc_policy_loss(batch, weights) entropy_loss = self.calc_entropy_loss(entropies, weights) update_params(self.q1_optim, q1_loss) update_params(self.q2_optim, q2_loss) update_params(self.policy_optim, policy_loss) update_params(self.alpha_optim, entropy_loss) self.alpha = self.log_alpha.exp() if self.use_per: self.memory.update_priority(errors) if self.learning_steps % self.log_interval == 0: self.writer.add_scalar('loss/Q1', q1_loss.detach().item(), self.learning_steps) self.writer.add_scalar('loss/Q2', q2_loss.detach().item(), self.learning_steps) self.writer.add_scalar('loss/policy', policy_loss.detach().item(), self.learning_steps) self.writer.add_scalar('loss/alpha', entropy_loss.detach().item(), self.learning_steps) self.writer.add_scalar('stats/alpha', self.alpha.detach().item(), self.learning_steps) self.writer.add_scalar('stats/mean_Q1', mean_q1, self.learning_steps) self.writer.add_scalar('stats/mean_Q2', mean_q2, self.learning_steps) self.writer.add_scalar('stats/entropy', entropies.detach().mean().item(), self.learning_steps)