def _step(self, action): if action == 1: self.state += 0.05 elif action == 2: self.state -= 0.05 self.state = np.mod(self.state, 2 * np.pi) gt = self.gt_now if self.in_period('fixation'): reward = 0 else: reward =\ np.max((self.rewards['correct']-tasktools.circular_dist(self.state-gt), self.rewards['fail'])) norm_rew = (reward - self.rewards['fail']) / ( self.rewards['correct'] - self.rewards['fail']) self.performance += norm_rew / self.dec_per_dur return self.ob_now, reward, False, {'new_trial': False}
def _step(self, action): ob = self.ob_now ob[16:32] = np.cos(self.theta - self.state) if action == 1: self.state += 0.05 elif action == 2: self.state -= 0.05 self.state = np.mod(self.state, 2 * np.pi) gt = self.gt_now reward = 0 if self.in_period('go1') or self.in_period('go2'): reward =\ np.max((self.rewards['correct']-tasktools.circular_dist(self.state-gt), self.rewards['fail'])) norm_rew = (reward - self.rewards['fail']) / ( self.rewards['correct'] - self.rewards['fail']) self.performance += norm_rew / self.dec_per_dur return ob, reward, False, {'new_trial': False}