Beispiel #1
0
	def makeObservation(self):
		returnObs = Observation()
		returnObs.doubleArray = self.pos.tolist()
		if self.fuel_loc is not None:
			returnObs.doubleArray += [self.fuel]
		returnObs.intArray = [self.pass_loc, self.pass_dest]
		return returnObs
Beispiel #2
0
 def env_start(self):
     self.reset()
     returnObs = Observation()
     returnObs.doubleArray = [
         self.cart_location, self.cart_velocity
     ] + self.pole_angle.tolist() + self.pole_velocity.tolist()
     return returnObs
Beispiel #3
0
 def makeObservation(self):
     returnObs = Observation()
     returnObs.doubleArray = self.pos.tolist()
     if self.fuel_loc is not None:
         returnObs.doubleArray += [self.fuel]
     returnObs.intArray = [self.pass_loc, self.pass_dest]
     return returnObs
Beispiel #4
0
 def env_start(self):
     self.reset()
     returnObs = Observation()
     returnObs.doubleArray = (
         [self.cart_location, self.cart_velocity] + self.pole_angle.tolist() + self.pole_velocity.tolist()
     )
     return returnObs
Beispiel #5
0
    def env_step(self, thisAction):
        log = logging.getLogger('pyrl.environments.gridworld.env_step')
        episodeOver = 0
        intAction = thisAction.intArray[0]
        log.debug("Action to take: %d", intAction)
        theReward = self.takeAction(intAction)

        if self.isAtGoal():
            log.info("Episode completed!!")
            episodeOver = 1

        if self.reward_noise > 0:
            theReward += numpy.random.normal(scale=self.reward_noise)

        theObs = Observation()
        theObs.doubleArray = self.getState()

        returnRO = Reward_observation_terminal()
        returnRO.r = theReward
        returnRO.o = theObs
        returnRO.terminal = episodeOver

        log.info("(Action - State - Reward): (%d - %s - %f)", intAction,
                 pformat(theObs), theReward)

        return returnRO
Beispiel #6
0
 def createObservation(self):
   obs = Observation(numDoubles = self.obsSize**2) 
   tmp = self.vmem[
     int(self.obsOrg[0]):int(self.obsOrg[0]+self.obsSize),
     int(self.obsOrg[1]):int(self.obsOrg[1]+self.obsSize)
   ]
   obs.doubleArray = list(tmp.flatten()) 
   return obs
Beispiel #7
0
 def env_start(self):
     log = logging.getLogger('pyrl.environments.gridworld.env_start')
     self.reset()
     log.info("Environment started")
     returnObs = Observation()
     returnObs.doubleArray = self.getState()
     log.debug("Observation to return: %s", pformat(returnObs))
     return returnObs
Beispiel #8
0
    def getObservation(self):
        returnObs = Observation()
        features = [1.]
        if self.original_features:
            features += mdptetris.features_original()
        if self.dellacherie_features:
            features += mdptetris.features_dellacherie()

        returnObs.intArray = [mdptetris.current_piece()]
        returnObs.doubleArray = features
        return returnObs
Beispiel #9
0
    def env_start(self):
        """ Instantiate a new :class:`PinballModel` environment

        :returns: The initial state
	:rtype: :class:`Observation`

	"""
        self.pinball = PinballModel(self.configuration)
        obs = Observation()
        obs.doubleArray = self.pinball.get_state()
        return obs
Beispiel #10
0
    def getObservation(self):
        returnObs = Observation()
        features = [1.]
        if self.original_features:
            features += mdptetris.features_original()
        if self.dellacherie_features:
            features += mdptetris.features_dellacherie()

        returnObs.intArray = [mdptetris.current_piece()]
        returnObs.doubleArray = features
        return returnObs
Beispiel #11
0
    def env_start(self):
        """ Instantiate a new :class:`PinballModel` environment

        :returns: The initial state
	:rtype: :class:`Observation`

	"""
	self.pinball = PinballModel(self.configuration)
	obs = Observation()
        obs.doubleArray = self.pinball.get_state()
	return obs
Beispiel #12
0
    def env_step(self,thisAction):
        intAction = thisAction.intArray[0]
        theReward, episodeOver = self.takeAction(intAction)

        theObs = Observation()
        theObs.doubleArray = self.state.tolist()
        returnRO = Reward_observation_terminal()
        returnRO.r = theReward
        returnRO.o = theObs
        returnRO.terminal = int(episodeOver)

        return returnRO
Beispiel #13
0
    def env_step(self, thisAction):
        # print self.agentRow, self.agentCol
        hitBoundary = self.updatePosition(thisAction.doubleArray[0])

        theObs = Observation()
        theObs.doubleArray = [self.agentRow, self.agentCol]

        returnRO = Reward_observation_terminal()
        returnRO.r = self.calculateReward(hitBoundary)
        returnRO.o = theObs
        returnRO.terminal = self.checkCurrentTerminal()

        return returnRO
Beispiel #14
0
	def env_step(self,thisAction):
		intAction = thisAction.intArray[0]
		obs, reward = self.takeAction(intAction)

		theObs = Observation()
		theObs.doubleArray = [obs]
		
		returnRO = Reward_observation_terminal()
		returnRO.r = reward
		returnRO.o = theObs
		returnRO.terminal = 0

		return returnRO
Beispiel #15
0
    def env_step(self, thisAction):
        intAction = thisAction.intArray[0]
        obs, reward = self.takeAction(intAction)

        theObs = Observation()
        theObs.doubleArray = [obs]

        returnRO = Reward_observation_terminal()
        returnRO.r = reward
        returnRO.o = theObs
        returnRO.terminal = 0

        return returnRO
Beispiel #16
0
    def env_step(self,thisAction):
        # validate the action 
        assert len(thisAction.doubleArray)==2,"Expected 4 double actions."
        
        self.takeAction(thisAction.doubleArray)
        
        theObs = Observation()
        theObs.doubleArray = self.getState().tolist()
        
        theReward,terminate = self.getReward()
        returnRO = Reward_observation_terminal()
        returnRO.r = theReward
        returnRO.o = theObs
        returnRO.terminal = int(terminate)

        return returnRO
Beispiel #17
0
    def env_step(self, thisAction):
        # validate the action
        assert len(thisAction.doubleArray) == 2, "Expected 4 double actions."

        self.takeAction(thisAction.doubleArray)

        theObs = Observation()
        theObs.doubleArray = self.getState().tolist()

        theReward, terminate = self.getReward()
        returnRO = Reward_observation_terminal()
        returnRO.r = theReward
        returnRO.o = theObs
        returnRO.terminal = int(terminate)

        return returnRO
Beispiel #18
0
    def env_step(self, action):
	""" Take a step in the environment

	:param action: The action that the agent wants to take
	:returns: The next state, reward and whether the current state is terminal
	:rtype: :class:`Reward_observation_terminal`

	"""
        returnRO = Reward_observation_terminal()

        returnRO.r = self.pinball.take_action(action.intArray[0])

        obs = Observation()
        obs.doubleArray = self.pinball.get_state()
        returnRO.o = obs

        returnRO.terminal = self.pinball.episode_ended()
        return returnRO
Beispiel #19
0
    def env_step(self, action):
        """ Take a step in the environment

	:param action: The action that the agent wants to take
	:returns: The next state, reward and whether the current state is terminal
	:rtype: :class:`Reward_observation_terminal`

	"""
        returnRO = Reward_observation_terminal()

        returnRO.r = self.pinball.take_action(action.intArray[0])

        obs = Observation()
        obs.doubleArray = self.pinball.get_state()
        returnRO.o = obs

        returnRO.terminal = self.pinball.episode_ended()
        return returnRO
Beispiel #20
0
    def env_step(self, thisAction):
        intAction = thisAction.intArray[0]

        theReward = self.takeAction(intAction)
        episodeOver = int(self.terminate())

        if self.reward_noise > 0:
            theReward += numpy.random.normal(scale=self.reward_noise)

        theObs = Observation()
        theObs.doubleArray = (
            [self.cart_location, self.cart_velocity] + self.pole_angle.tolist() + self.pole_velocity.tolist()
        )
        returnRO = Reward_observation_terminal()
        returnRO.r = theReward
        returnRO.o = theObs
        returnRO.terminal = episodeOver

        return returnRO
Beispiel #21
0
    def env_step(self, thisAction):
        intAction = thisAction.intArray[0]

        theReward = self.takeAction(intAction)
        episodeOver = int(self.terminate())

        if self.reward_noise > 0:
            theReward += numpy.random.normal(scale=self.reward_noise)

        theObs = Observation()
        theObs.doubleArray = [
            self.cart_location, self.cart_velocity
        ] + self.pole_angle.tolist() + self.pole_velocity.tolist()
        returnRO = Reward_observation_terminal()
        returnRO.r = theReward
        returnRO.o = theObs
        returnRO.terminal = episodeOver

        return returnRO
Beispiel #22
0
    def env_step(self,thisAction):
        episodeOver = 0
        theReward = -1.0
        intAction = thisAction.intArray[0]

        self.step(intAction, self.noise)
        seized = 0
        theReward = self.stim_penalty if intAction == 1 else 0.0
        if self.getLabel(self.current_neighbor) == self.seiz_label:
            theReward += self.seizure_penalty

        theObs = Observation()
        theObs.doubleArray = self.state.tolist()

        returnRO = Reward_observation_terminal()
        returnRO.r = theReward
        returnRO.o = theObs
        returnRO.terminal = 0

        return returnRO
Beispiel #23
0
    def env_step(self,thisAction):
        episodeOver = 0
        intAction = thisAction.intArray[0]

        theReward = self.takeAction(intAction)

        if self.isAtGoal():
            episodeOver = 1

        if self.reward_noise > 0:
            theReward += numpy.random.normal(scale=self.reward_noise)

        theObs = Observation()
        theObs.doubleArray = self.getState()

        returnRO = Reward_observation_terminal()
        returnRO.r = theReward
        returnRO.o = theObs
        returnRO.terminal = episodeOver

        return returnRO
Beispiel #24
0
 def env_start(self):
     self.reset()
     returnObs = Observation()
     returnObs.doubleArray = self.state.tolist()
     return returnObs
Beispiel #25
0
 def createObservation(self):
     obs = Observation(numDoubles=self.obsSize**2)
     tmp = self.vmem[int(self.obsOrg[0]):int(self.obsOrg[0] + self.obsSize),
                     int(self.obsOrg[1]):int(self.obsOrg[1] + self.obsSize)]
     obs.doubleArray = list(tmp.flatten())
     return obs
Beispiel #26
0
 def env_start(self):
     self.restart_simulation()
     returnObs=Observation()
     returnObs.doubleArray=self.getState().tolist()
     return returnObs
Beispiel #27
0
 def env_start(self):
     self.reset()
     returnObs = Observation()
     returnObs.doubleArray = self.pos.tolist() + [self.fuel]
     return returnObs
Beispiel #28
0
	def env_start(self):
		self.reset()
		returnObs = Observation()
		returnObs.doubleArray = self.pos.tolist() + [self.fuel]
		return returnObs
Beispiel #29
0
 def env_start(self):
     self.reset()
     returnObs = Observation()
     returnObs.doubleArray = self.state.tolist()
     return returnObs
Beispiel #30
0
 def env_start(self):
     self.reset()
     returnObs = Observation()
     returnObs.doubleArray = self.getState()
     return returnObs
Beispiel #31
0
	def env_start(self):
		self.reset()
		returnObs = Observation()
		returnObs.doubleArray = [self.sampleObservation(0)]
		return returnObs
Beispiel #32
0
 def env_start(self):
     self.restart_simulation()
     returnObs = Observation()
     returnObs.doubleArray = self.getState().tolist()
     return returnObs
Beispiel #33
0
 def env_start(self):
     self.setStartState()
     returnObs = Observation()
     returnObs.doubleArray = [self.agentRow, self.agentCol]
     return returnObs
Beispiel #34
0
 def env_start(self):
     self.reset()
     returnObs = Observation()
     returnObs.doubleArray = [self.sampleObservation(0)]
     return returnObs