def makeObservation(self): returnObs = Observation() returnObs.doubleArray = self.pos.tolist() if self.fuel_loc is not None: returnObs.doubleArray += [self.fuel] returnObs.intArray = [self.pass_loc, self.pass_dest] return returnObs
def env_start(self): self.reset() returnObs = Observation() returnObs.doubleArray = [ self.cart_location, self.cart_velocity ] + self.pole_angle.tolist() + self.pole_velocity.tolist() return returnObs
def makeObservation(self): returnObs = Observation() returnObs.doubleArray = self.pos.tolist() if self.fuel_loc is not None: returnObs.doubleArray += [self.fuel] returnObs.intArray = [self.pass_loc, self.pass_dest] return returnObs
def env_start(self): self.reset() returnObs = Observation() returnObs.doubleArray = ( [self.cart_location, self.cart_velocity] + self.pole_angle.tolist() + self.pole_velocity.tolist() ) return returnObs
def env_step(self, thisAction): log = logging.getLogger('pyrl.environments.gridworld.env_step') episodeOver = 0 intAction = thisAction.intArray[0] log.debug("Action to take: %d", intAction) theReward = self.takeAction(intAction) if self.isAtGoal(): log.info("Episode completed!!") episodeOver = 1 if self.reward_noise > 0: theReward += numpy.random.normal(scale=self.reward_noise) theObs = Observation() theObs.doubleArray = self.getState() returnRO = Reward_observation_terminal() returnRO.r = theReward returnRO.o = theObs returnRO.terminal = episodeOver log.info("(Action - State - Reward): (%d - %s - %f)", intAction, pformat(theObs), theReward) return returnRO
def createObservation(self): obs = Observation(numDoubles = self.obsSize**2) tmp = self.vmem[ int(self.obsOrg[0]):int(self.obsOrg[0]+self.obsSize), int(self.obsOrg[1]):int(self.obsOrg[1]+self.obsSize) ] obs.doubleArray = list(tmp.flatten()) return obs
def env_start(self): log = logging.getLogger('pyrl.environments.gridworld.env_start') self.reset() log.info("Environment started") returnObs = Observation() returnObs.doubleArray = self.getState() log.debug("Observation to return: %s", pformat(returnObs)) return returnObs
def getObservation(self): returnObs = Observation() features = [1.] if self.original_features: features += mdptetris.features_original() if self.dellacherie_features: features += mdptetris.features_dellacherie() returnObs.intArray = [mdptetris.current_piece()] returnObs.doubleArray = features return returnObs
def env_start(self): """ Instantiate a new :class:`PinballModel` environment :returns: The initial state :rtype: :class:`Observation` """ self.pinball = PinballModel(self.configuration) obs = Observation() obs.doubleArray = self.pinball.get_state() return obs
def getObservation(self): returnObs = Observation() features = [1.] if self.original_features: features += mdptetris.features_original() if self.dellacherie_features: features += mdptetris.features_dellacherie() returnObs.intArray = [mdptetris.current_piece()] returnObs.doubleArray = features return returnObs
def env_start(self): """ Instantiate a new :class:`PinballModel` environment :returns: The initial state :rtype: :class:`Observation` """ self.pinball = PinballModel(self.configuration) obs = Observation() obs.doubleArray = self.pinball.get_state() return obs
def env_step(self,thisAction): intAction = thisAction.intArray[0] theReward, episodeOver = self.takeAction(intAction) theObs = Observation() theObs.doubleArray = self.state.tolist() returnRO = Reward_observation_terminal() returnRO.r = theReward returnRO.o = theObs returnRO.terminal = int(episodeOver) return returnRO
def env_step(self, thisAction): # print self.agentRow, self.agentCol hitBoundary = self.updatePosition(thisAction.doubleArray[0]) theObs = Observation() theObs.doubleArray = [self.agentRow, self.agentCol] returnRO = Reward_observation_terminal() returnRO.r = self.calculateReward(hitBoundary) returnRO.o = theObs returnRO.terminal = self.checkCurrentTerminal() return returnRO
def env_step(self,thisAction): intAction = thisAction.intArray[0] obs, reward = self.takeAction(intAction) theObs = Observation() theObs.doubleArray = [obs] returnRO = Reward_observation_terminal() returnRO.r = reward returnRO.o = theObs returnRO.terminal = 0 return returnRO
def env_step(self, thisAction): intAction = thisAction.intArray[0] obs, reward = self.takeAction(intAction) theObs = Observation() theObs.doubleArray = [obs] returnRO = Reward_observation_terminal() returnRO.r = reward returnRO.o = theObs returnRO.terminal = 0 return returnRO
def env_step(self,thisAction): # validate the action assert len(thisAction.doubleArray)==2,"Expected 4 double actions." self.takeAction(thisAction.doubleArray) theObs = Observation() theObs.doubleArray = self.getState().tolist() theReward,terminate = self.getReward() returnRO = Reward_observation_terminal() returnRO.r = theReward returnRO.o = theObs returnRO.terminal = int(terminate) return returnRO
def env_step(self, thisAction): # validate the action assert len(thisAction.doubleArray) == 2, "Expected 4 double actions." self.takeAction(thisAction.doubleArray) theObs = Observation() theObs.doubleArray = self.getState().tolist() theReward, terminate = self.getReward() returnRO = Reward_observation_terminal() returnRO.r = theReward returnRO.o = theObs returnRO.terminal = int(terminate) return returnRO
def env_step(self, action): """ Take a step in the environment :param action: The action that the agent wants to take :returns: The next state, reward and whether the current state is terminal :rtype: :class:`Reward_observation_terminal` """ returnRO = Reward_observation_terminal() returnRO.r = self.pinball.take_action(action.intArray[0]) obs = Observation() obs.doubleArray = self.pinball.get_state() returnRO.o = obs returnRO.terminal = self.pinball.episode_ended() return returnRO
def env_step(self, action): """ Take a step in the environment :param action: The action that the agent wants to take :returns: The next state, reward and whether the current state is terminal :rtype: :class:`Reward_observation_terminal` """ returnRO = Reward_observation_terminal() returnRO.r = self.pinball.take_action(action.intArray[0]) obs = Observation() obs.doubleArray = self.pinball.get_state() returnRO.o = obs returnRO.terminal = self.pinball.episode_ended() return returnRO
def env_step(self, thisAction): intAction = thisAction.intArray[0] theReward = self.takeAction(intAction) episodeOver = int(self.terminate()) if self.reward_noise > 0: theReward += numpy.random.normal(scale=self.reward_noise) theObs = Observation() theObs.doubleArray = ( [self.cart_location, self.cart_velocity] + self.pole_angle.tolist() + self.pole_velocity.tolist() ) returnRO = Reward_observation_terminal() returnRO.r = theReward returnRO.o = theObs returnRO.terminal = episodeOver return returnRO
def env_step(self, thisAction): intAction = thisAction.intArray[0] theReward = self.takeAction(intAction) episodeOver = int(self.terminate()) if self.reward_noise > 0: theReward += numpy.random.normal(scale=self.reward_noise) theObs = Observation() theObs.doubleArray = [ self.cart_location, self.cart_velocity ] + self.pole_angle.tolist() + self.pole_velocity.tolist() returnRO = Reward_observation_terminal() returnRO.r = theReward returnRO.o = theObs returnRO.terminal = episodeOver return returnRO
def env_step(self,thisAction): episodeOver = 0 theReward = -1.0 intAction = thisAction.intArray[0] self.step(intAction, self.noise) seized = 0 theReward = self.stim_penalty if intAction == 1 else 0.0 if self.getLabel(self.current_neighbor) == self.seiz_label: theReward += self.seizure_penalty theObs = Observation() theObs.doubleArray = self.state.tolist() returnRO = Reward_observation_terminal() returnRO.r = theReward returnRO.o = theObs returnRO.terminal = 0 return returnRO
def env_step(self,thisAction): episodeOver = 0 intAction = thisAction.intArray[0] theReward = self.takeAction(intAction) if self.isAtGoal(): episodeOver = 1 if self.reward_noise > 0: theReward += numpy.random.normal(scale=self.reward_noise) theObs = Observation() theObs.doubleArray = self.getState() returnRO = Reward_observation_terminal() returnRO.r = theReward returnRO.o = theObs returnRO.terminal = episodeOver return returnRO
def env_start(self): self.reset() returnObs = Observation() returnObs.doubleArray = self.state.tolist() return returnObs
def createObservation(self): obs = Observation(numDoubles=self.obsSize**2) tmp = self.vmem[int(self.obsOrg[0]):int(self.obsOrg[0] + self.obsSize), int(self.obsOrg[1]):int(self.obsOrg[1] + self.obsSize)] obs.doubleArray = list(tmp.flatten()) return obs
def env_start(self): self.restart_simulation() returnObs=Observation() returnObs.doubleArray=self.getState().tolist() return returnObs
def env_start(self): self.reset() returnObs = Observation() returnObs.doubleArray = self.pos.tolist() + [self.fuel] return returnObs
def env_start(self): self.reset() returnObs = Observation() returnObs.doubleArray = self.pos.tolist() + [self.fuel] return returnObs
def env_start(self): self.reset() returnObs = Observation() returnObs.doubleArray = self.state.tolist() return returnObs
def env_start(self): self.reset() returnObs = Observation() returnObs.doubleArray = self.getState() return returnObs
def env_start(self): self.reset() returnObs = Observation() returnObs.doubleArray = [self.sampleObservation(0)] return returnObs
def env_start(self): self.restart_simulation() returnObs = Observation() returnObs.doubleArray = self.getState().tolist() return returnObs
def env_start(self): self.setStartState() returnObs = Observation() returnObs.doubleArray = [self.agentRow, self.agentCol] return returnObs
def env_start(self): self.reset() returnObs = Observation() returnObs.doubleArray = [self.sampleObservation(0)] return returnObs