Example #1
0
 def reset(self):
     # core.log("[ai] resetting environment ...")
     self._epoch_num = 0
     state = self._next_epoch()
     self.last['state'] = state
     self.last['state_v'] = featurizer.featurize(state, 1)
     return self.last['state_v']
Example #2
0
    def step(self, policy):
        # create the parameters from the policy and update
        # update them in the algorithm
        self._apply_policy(policy)
        self._epoch_num += 1

        # wait for the algorithm to run with the new parameters
        state = self._next_epoch()

        self.last['reward'] = state['reward']
        self.last['state'] = state
        self.last['state_v'] = featurizer.featurize(state, self._epoch_num)

        self._agent.on_ai_step()

        return self.last['state_v'], self.last['reward'], not self._agent.is_training(), {}