Example #1
0
    def _step(self, action):
        if action == 1:
            self.state += 0.05
        elif action == 2:
            self.state -= 0.05
        self.state = np.mod(self.state, 2 * np.pi)

        gt = self.gt_now
        if self.in_period('fixation'):
            reward = 0
        else:
            reward =\
                np.max((self.rewards['correct']-tasktools.circular_dist(self.state-gt),
                        self.rewards['fail']))
            norm_rew = (reward - self.rewards['fail']) / (
                self.rewards['correct'] - self.rewards['fail'])
            self.performance += norm_rew / self.dec_per_dur

        return self.ob_now, reward, False, {'new_trial': False}
Example #2
0
    def _step(self, action):
        ob = self.ob_now
        ob[16:32] = np.cos(self.theta - self.state)
        if action == 1:
            self.state += 0.05
        elif action == 2:
            self.state -= 0.05

        self.state = np.mod(self.state, 2 * np.pi)

        gt = self.gt_now
        reward = 0
        if self.in_period('go1') or self.in_period('go2'):
            reward =\
                np.max((self.rewards['correct']-tasktools.circular_dist(self.state-gt),
                        self.rewards['fail']))
            norm_rew = (reward - self.rewards['fail']) / (
                self.rewards['correct'] - self.rewards['fail'])
            self.performance += norm_rew / self.dec_per_dur

        return ob, reward, False, {'new_trial': False}