def neural_psc(self, frame, step): last_frame = process_density_images(frame) density_input = process_density_input(last_frame) prob = self.density_model.prob_evaluate(self.sess, density_input, True) + 1e-8 prob_dot = self.density_model.prob_evaluate(self.sess, density_input) + 1e-8 pred_gain = np.sum(np.log(prob_dot) - np.log(prob)) psc_reward = pow( (exp(0.1 * pow(step + 1, -0.5) * max(0, pred_gain)) - 1), 0.5) return psc_reward
def bonus(self, obs, t): step = t frame = resize(obs, (self.flags.img_height, self.flags.img_width), order=1) last_frame = process_density_images(frame) density_input = process_density_input(last_frame) prob = self.density_model.prob_evaluate(self.sess, density_input, True) prob_dot = self.density_model.prob_evaluate(self.sess, density_input) prob += 1e-8 prob_dot += 1e-8 pred_gain = np.sum(np.log(prob_dot) - np.log(prob)) self.writer.add_scalar('data/loss', -np.sum(np.log(prob)), t) self.writer.add_scalar('data/PG', pred_gain, t) psc_reward = pow((exp(0.1 * pow(step + 1, -0.5) * max(0, pred_gain)) - 1), 0.5) return psc_reward