def env_start(self): self.reset() returnObs = Observation() returnObs.doubleArray = [ self.cart_location, self.cart_velocity ] + self.pole_angle.tolist() + self.pole_velocity.tolist() return returnObs
def env_step(self, thisAction): log = logging.getLogger('pyrl.environments.gridworld.env_step') episodeOver = 0 intAction = thisAction.intArray[0] log.debug("Action to take: %d", intAction) theReward = self.takeAction(intAction) if self.isAtGoal(): log.info("Episode completed!!") episodeOver = 1 if self.reward_noise > 0: theReward += numpy.random.normal(scale=self.reward_noise) theObs = Observation() theObs.doubleArray = self.getState() returnRO = Reward_observation_terminal() returnRO.r = theReward returnRO.o = theObs returnRO.terminal = episodeOver log.info("(Action - State - Reward): (%d - %s - %f)", intAction, pformat(theObs), theReward) return returnRO
def env_step(self, thisAction): episodeOver = 0 theReward = 0 if thisAction.intArray[0] == 0: self.currentState = self.currentState - 1 if thisAction.intArray[0] == 1: self.currentState = self.currentState + 1 if self.currentState <= 0: self.currentState = 0 theReward = -1 episodeOver = 1 if self.currentState >= 20: self.currentState = 20 theReward = 1 episodeOver = 1 theObs = Observation() theObs.intArray = [self.currentState] returnRO = Reward_observation_terminal() returnRO.r = theReward returnRO.o = theObs returnRO.terminal = episodeOver return returnRO
def env_start(self): self.currentState=10 returnObs=Observation() returnObs.intArray=[self.currentState] return returnObs
def env_step(self,thisAction): episodeOver=0 theReward=0 if thisAction.intArray[0]==0: self.currentState=self.currentState-1 if thisAction.intArray[0]==1: self.currentState=self.currentState+1 if self.currentState <= 0: self.currentState=0 theReward=-1 episodeOver=1 if self.currentState >= 20: self.currentState=20 theReward=1 episodeOver=1 theObs=Observation() theObs.intArray=[self.currentState] returnRO=Reward_observation_terminal() returnRO.r=theReward returnRO.o=theObs returnRO.terminal=episodeOver return returnRO
class test_empty_environment(Environment): whichEpisode=0 emptyObservation=Observation() nonEmptyObservation=Observation(2,4,5) def env_init(self): self.nonEmptyObservation.intArray=[0,1] self.nonEmptyObservation.doubleArray=[0.0/4.0,1.0/4.0,2.0/4.0,3.0/4.0] self.nonEmptyObservation.charArray=['a','b','c','d','e'] return "" def env_start(self): self.whichEpisode=self.whichEpisode+1 if self.whichEpisode % 2 == 0: return self.emptyObservation else: return self.nonEmptyObservation def env_step(self,action): ro=Reward_observation_terminal() if self.whichEpisode % 2 == 0: ro.o=self.emptyObservation else: ro.o=self.nonEmptyObservation return ro def env_cleanup(self): pass def env_message(self,inMessage): return None
def env_start(self): self.setStartState() returnObs = Observation() returnObs.intArray = [ self.calculateFlatState(self.agentRow, self.agentCol) ] return returnObs
def env_step(self, thisAction): # Make sure the action is valid assert len(thisAction.intArray) == 1, "Expected 1 integer action." assert thisAction.intArray[0] >= 0, "Expected action to be in [0,4]" assert thisAction.intArray[0] < 4, "Expected action to be in [0,4]" self.updatePosition(thisAction.intArray[0]) lastActionValue = thisAction.intArray[0] theObs = Observation() theObs.intArray = [self.calculateFlatState()] theObs.charArray = ["T", "T", "T", "T"] if len(self.optionsArray[self.agentRow][self.agentCol]) != 0: for i in range(len( self.optionsArray[self.agentRow][self.agentCol])): theObs.charArray[ 2 + self.optionsArray[self.agentRow][self.agentCol][i]] = "T" returnRO = Reward_observation_terminal() returnRO.r = self.calculateReward(lastActionValue) returnRO.o = theObs returnRO.terminal = self.checkCurrentTerminal() return returnRO
def env_start(self): if self.fixedStartState: stateValid = self.setAgentState(self.startRow, self.startCol) if not stateValid: print "The fixed start state was NOT valid: " + str( int(self.startRow)) + "," + str(int(self.startRow)) self.setRandomState() else: self.setRandomState() returnObs = Observation() returnObs.intArray = [self.calculateFlatState()] #Up, Right, Down, Option1, Option2 returnObs.charArray = ["T", "T", "T", "T"] if len(self.optionsArray[self.startRow][self.startCol]) != 0: for i in range(len( self.optionsArray[self.startRow][self.startCol])): returnObs.charArray[ 3 + self.optionsArray[self.startRow][self.startCol][i]] = "T" # print returnObs.charArray #Now add characters based on options present return returnObs
def env_start(self): """ Start the game! """ # Set up start states self.world.add_starts(*self.start_states) # Set up terminal states self.world.add_terminals(*self.terminal_states.keys()) for (row, col), reward in self.terminal_states.items(): self.world[row][col].reward = reward # Initialize state of the agent to one of start_states r = random.randrange(len(self.start_states)) self.world.agent_state = list(self.start_states[r]) # Initialize step counter self.steps = 0 self.step_out('START WORLD:') self.step_out(self.world) # Pass agent state over to the agent obs = Observation() obs.intArray = self.world.agent_state return obs
def makeObservation(self): returnObs = Observation() returnObs.doubleArray = self.pos.tolist() if self.fuel_loc is not None: returnObs.doubleArray += [self.fuel] returnObs.intArray = [self.pass_loc, self.pass_dest] return returnObs
def env_start(self): """ Start the game! """ # Set up start states self.world.add_starts(*self.start_states) # Set up terminal states self.world.add_terminals(*self.terminal_states.keys()) for (row, col), reward in self.terminal_states.items(): self.world[row][col].reward = reward # Initialize state of the agent to one of start_states r = random.randrange(len(self.start_states)) self.world.agent_state = list(self.world.expand_pos(self.start_states[r])) # Initialize step counter self.steps = 0 self.step_out('START WORLD:') self.step_out(self.world) # Pass agent state over to the agent obs = Observation() obs.intArray = self.world.agent_state return obs
def env_start(self): self.reset() returnObs = Observation() returnObs.doubleArray = ( [self.cart_location, self.cart_velocity] + self.pole_angle.tolist() + self.pole_velocity.tolist() ) return returnObs
def env_start(self): self.currentState = 10 returnObs = Observation() returnObs.intArray = [self.currentState] return returnObs
def env_step(self,thisAction): # プレーヤーの移動 self.player.update(thisAction) # 移動後のスコア計算 theReward = self.field.decision(int(self.player.x+0.5), int(self.player.y+0.5), thisAction.intArray[0]) #print("Reward:%d" %theReward) episodeOver = self.field.get_gameover() #print("EdgeTracer:episodeOver %03d" %episodeOver) # フィールドの描画 self.draw_field() returnObs=Observation() returnObs.intArray=np.append(np.zeros(128), [ item for innerlist in self.img_state for item in innerlist ]) #scipy.misc.imsave('l_screen.png', img_src) #scipy.misc.imsave('r_screen.png', img_afn) returnRO=Reward_observation_terminal() returnRO.r=theReward returnRO.o=returnObs returnRO.terminal=episodeOver return returnRO
def env_start(self): self.seed() self.reset() #self.seps=0 returnObs = Observation() returnObs.intArray = [self.s] return returnObs
def createObservation(self): obs = Observation(numDoubles = self.obsSize**2) tmp = self.vmem[ int(self.obsOrg[0]):int(self.obsOrg[0]+self.obsSize), int(self.obsOrg[1]):int(self.obsOrg[1]+self.obsSize) ] obs.doubleArray = list(tmp.flatten()) return obs
def env_start(self): log = logging.getLogger('pyrl.environments.gridworld.env_start') self.reset() log.info("Environment started") returnObs = Observation() returnObs.doubleArray = self.getState() log.debug("Observation to return: %s", pformat(returnObs)) return returnObs
def env_step(self, action): self.steps += 1 # Action is one of N,S,W,E action = action.charArray[0] self.step_out('ACTION:', action) if not action in self.valid_actions.keys(): print 'WARNING: Invalid action %s' % (action) obs = Observation() obs.intArray = self.world.agent_state return Reward_observation_terminal(0, obs, False) # The actions might result in movement in a direction other than the one # intended with a probability of (1 - action_prob) if self.enable_stochastic_actions: dice = random.random() if dice > self.action_prob: # Randomness! Choose uniformly between each other action other_actions = list(set(self.valid_actions.keys()) - set(action)) action = random.choice(other_actions) # Move the agent self.step_out('RESULT ACTION:', action) self.move_agent(self.valid_actions[action]) # Apply wind from the new state if self.enable_wind: pstate = self.world[self.world.agent_state[0]][self.world.agent_state[1]] if pstate.wind: p, dir = pstate.wind dice = random.random() if dice <= p: # Fudge & crackers! Our agent gets caught by the wind! self.step_out('WIND IN %s!' % (dir)) self.move_agent(dir) agent_state = self.world.reduce_pos(self.world.agent_state); pstate = self.world[agent_state[0]][agent_state[1]] # Return observation obs = Observation() obs.intArray = self.world.agent_state #print('IT\'S A NEW WORLD:') self.step_out(self.world) #self.debug('\n' + str(self.world)) self.step_out("REWARD:", pstate.reward) terminal = pstate.terminal if self.steps > self.step_limit: self.debug("STEP LIMIT REACHED!") terminal = True return Reward_observation_terminal(pstate.reward, obs, terminal)
def env_start(self): """ Get the state of the environment and return it. """ self.state = [0 for i in range(9)] #self.env_play() obs = Observation() obs.intArray = self.state return obs
def env_start(self): State = random.randint(0, 3) returnObs = Observation() #zero for all the 4 starting states self.presentCol = 0 self.presentRow = self.Start_states[State][0] returnObs.intArray = [self.rolloutstate()] return returnObs
def test_agent_step(): print "Testing." color_range = 128 size_of_observation = 128+210*160 print "Setting up agent." agent = setup() color = 1 observation = Observation() observation.intArray = np.ones(size_of_observation, dtype=np.uint8) observation.intArray *= color agent.agent_start(observation) agent.agent_train(False) for i in range(2, 256): print "Round %d" % i reward = float(i) color = i observation = Observation() observation.intArray = np.ones(size_of_observation, dtype=np.uint8) observation.intArray *= color agent.agent_step(reward, observation) agent.agent_train(False) reward = float(i) color = i observation = Observation() observation.intArray = np.ones(size_of_observation, dtype=np.uint8) observation.intArray *= color agent.agent_step(reward, observation) agent.agent_train(True)
def getObservation(self): returnObs = Observation() features = [1.] if self.original_features: features += mdptetris.features_original() if self.dellacherie_features: features += mdptetris.features_dellacherie() returnObs.intArray = [mdptetris.current_piece()] returnObs.doubleArray = features return returnObs
def env_start(self): """ Instantiate a new :class:`PinballModel` environment :returns: The initial state :rtype: :class:`Observation` """ self.pinball = PinballModel(self.configuration) obs = Observation() obs.doubleArray = self.pinball.get_state() return obs
def env_start(self): k = random.randint(0, 1) State = random.randint(self.states[k][0], self.states[k][1]) returnObs = Observation() #zero for all the 4 starting states self.presentCol = random.randint(0, 999) #self.presentCol = 10900 self.presentRow = State returnObs.intArray = [self.rolloutstate()] return returnObs
def env_step(self,thisAction): intAction = thisAction.intArray[0] theReward, episodeOver = self.takeAction(intAction) theObs = Observation() theObs.doubleArray = self.state.tolist() returnRO = Reward_observation_terminal() returnRO.r = theReward returnRO.o = theObs returnRO.terminal = int(episodeOver) return returnRO
def env_step(self, thisAction): intAction = int(thisAction.intArray[0]) theReward = self.takeAction(intAction) theObs = Observation() theObs.intArray = self.getState() returnRO = Reward_observation_terminal() returnRO.r = theReward returnRO.o = theObs returnRO.terminal = 0 return returnRO
def env_step(self,thisAction): intAction = int(thisAction.intArray[0]) theReward = self.takeAction(intAction) theObs = Observation() theObs.intArray = self.getState() returnRO = Reward_observation_terminal() returnRO.r = theReward returnRO.o = theObs returnRO.terminal = 0 return returnRO
def env_step(self, action): self.agent.botAction = action self.step() pixels = pygame.surfarray.array2d(screen) theObs = Observation() theObs.intArray = misc.imresize(pixels, (84, 84)).flatten().tolist() returnRO = Reward_observation_terminal() returnRO.r = 1 #reward goes here returnRO.o = theObs returnRO.terminal = 0 return returnRO
def env_start(self): if self.fixedStartState: stateValid=self.setAgentState(self.startRow,self.startCol) if not stateValid: print("The fixed start state was NOT valid: "+str(int(self.startRow))+","+str(int(self.startRow))) self.setRandomState() else: self.setRandomState() returnObs=Observation() returnObs.intArray=[self.calculateFlatState()] return returnObs
def env_start(self): if self.fixedStartState: stateValid = self.setAgentState(self.state) if not stateValid: print "The fixed start state was NOT valid: " + str(self.state) self.setRandomState() else: self.setRandomState() returnObs = Observation() # print self.state returnObs.intArray = map(int, list(self.state)) return returnObs #return observation
def env_start(self): if self.fixedStartState: stateValid=self.setAgentState(self.startRow,self.startCol) if not stateValid: print "The fixed start state was NOT valid: "+str(int(self.startRow))+","+str(int(self.startRow)) self.setRandomState() else: self.setRandomState() returnObs=Observation() returnObs.intArray=[self.calculateFlatState()] return returnObs
def env_start(self): if self.fixedStartState: stateValid = self.setAgentState(self.state) if not stateValid: print "The fixed start state was NOT valid: " + str(self.state) self.setRandomState() else: self.setRandomState() returnObs = Observation() # print self.state returnObs.intArray = map(int, list(self.state)) return returnObs
def env_step(self,thisAction): intAction = thisAction.intArray[0] obs, reward = self.takeAction(intAction) theObs = Observation() theObs.doubleArray = [obs] returnRO = Reward_observation_terminal() returnRO.r = reward returnRO.o = theObs returnRO.terminal = 0 return returnRO
def env_step(self, thisAction): intAction = thisAction.intArray[0] obs, reward = self.takeAction(intAction) theObs = Observation() theObs.doubleArray = [obs] returnRO = Reward_observation_terminal() returnRO.r = reward returnRO.o = theObs returnRO.terminal = 0 return returnRO
def env_step(self, thisAction): # print self.agentRow, self.agentCol hitBoundary = self.updatePosition(thisAction.doubleArray[0]) theObs = Observation() theObs.doubleArray = [self.agentRow, self.agentCol] returnRO = Reward_observation_terminal() returnRO.r = self.calculateReward(hitBoundary) returnRO.o = theObs returnRO.terminal = self.checkCurrentTerminal() return returnRO
def env_start(self): startx, starty = self.bg.getStart() self.player.reset(startx, starty) self.bg.reset() self.gameover = False returnObs=Observation() arr = pygame.surfarray.array2d(self.screen) #returnObs.intArray=np.zeros(128) returnObs.intArray=np.append(np.zeros(128), [ item for innerlist in arr for item in innerlist ]) scipy.misc.imsave('screen.png', arr) return returnObs
def agent_init(self, taskSpecString): TaskSpec = TaskSpecVRLGLUE3.TaskSpecParser(taskSpecString) if TaskSpec.valid: assert len(TaskSpec.getIntObservations() ) == 1, "expecting 1-dimensional discrete observations" assert len(TaskSpec.getDoubleObservations() ) == 0, "expecting no continuous observations" assert not TaskSpec.isSpecial( TaskSpec.getIntObservations()[0][0] ), " expecting min observation to be a number not a special value" assert not TaskSpec.isSpecial( TaskSpec.getIntObservations()[0][1] ), " expecting max observation to be a number not a special value" self.numStates = TaskSpec.getIntObservations()[0][1] + 1 assert len(TaskSpec.getIntActions() ) == 1, "expecting 1-dimensional discrete actions" assert len(TaskSpec.getDoubleActions() ) == 0, "expecting no continuous actions" assert not TaskSpec.isSpecial( TaskSpec.getIntActions()[0][0] ), " expecting min action to be a number not a special value" assert not TaskSpec.isSpecial( TaskSpec.getIntActions()[0][1] ), " expecting max action to be a number not a special value" self.numActions = TaskSpec.getIntActions()[0][1] + 1 self.value_function = numpy.zeros( [self.numStates, self.numActions]) else: print "Task Spec could not be parsed: " + taskSpecString self.lastAction = Action() self.lastObservation = Observation()
def agent_init(self, spec): taskSpec = TaskSpecVRLGLUE3.TaskSpecParser(spec) if taskSpec.valid: self.num_actions = taskSpec.getIntActions()[0][1] + 1 else: raise "Invalid task spec" self.last_observation = Observation() self.batch_size = 32 # batch size for SGD self.ep_start = 1 # initial value of epsilon in epsilon-greedy exploration self.ep = self.ep_start # exploration probability self.ep_end = 0.1 # final value of epsilon in epsilon-greedy exploration self.ep_endt = 1000000 # number of frames over which epsilon is linearly annealed self.episode_qvals = [] self.all_qvals = [] self.learn_start = 0 # number of steps after which learning starts self.is_testing = False self.replay_memory = 1000000 self.phi_length = 4 # number of most recent frames for input to Q-function self.reset_after = 10000 # replace Q_hat with Q after this many steps self.step_counter = 0 self.episode_counter = 0 self.total_reward = 0 self.qvals = [] self.train_table = TransitionTable(self.phi_length, self.replay_memory, RESIZED_WIDTH, RESIZED_HEIGHT) self.test_table = TransitionTable(self.phi_length, self.phi_length, RESIZED_WIDTH, RESIZED_HEIGHT) if self.network_file is None: self.network = DeepQLearner(RESIZED_WIDTH, RESIZED_HEIGHT, self.num_actions, self.phi_length, self.batch_size) else: self.network = cPickle.load(open(self.network_file))
def agent_init(self, taskSpecString): self.numActions = 4 self.numStates = 144 self.qfunction = [ self.numActions * [0.0] for i in range(self.numStates) ] #x coordinate self.phi1 = np.array([i for i in range(12)]) #y coordinate self.phi2 = np.array([i for i in range(12)]) #self.theta = np.array([ for i in range(4)]) self.thetax = np.array([[ random.random(), random.random(), random.random(), random.random() ] for i in range(12)]) self.thetay = np.array([[ random.random(), random.random(), random.random(), random.random() ] for i in range(12)]) self.thetaxy = np.array([[[ random.random(), random.random(), random.random(), random.random() ] for i in range(12)] for j in range(12)]) self.lastAction = Action() self.lastObs = Observation()
def agent_init(self, taskSpecString): print "Agent Up" # print taskSpecString TaskSpec = TaskSpecVRLGLUE3.TaskSpecParser(taskSpecString) if TaskSpec.valid: print len( TaskSpec.getDoubleActions()), ": ", TaskSpec.getDoubleActions( ), '\n', len(TaskSpec.getDoubleObservations() ), ": ", TaskSpec.getDoubleObservations() assert len(TaskSpec.getIntObservations() ) == 0, "expecting no discrete observations" assert len(TaskSpec.getDoubleObservations( )) == 12, "expecting 12-dimensional continuous observations" assert len( TaskSpec.getIntActions()) == 0, "expecting no discrete actions" assert len(TaskSpec.getDoubleActions() ) == 4, "expecting 4-dimensional continuous actions" self.obs_specs = TaskSpec.getDoubleObservations() self.actions_specs = TaskSpec.getDoubleActions() # print "Observations: ",self.obs_specs # print "actions_specs:", self.actions_specs else: print "Task Spec could not be parsed: " + taskSpecString self.lastAction = Action() self.lastObservation = Observation()
def env_step(self, thisAction): # validate the action assert len(thisAction.doubleArray) == 2, "Expected 4 double actions." self.takeAction(thisAction.doubleArray) theObs = Observation() theObs.doubleArray = self.getState().tolist() theReward, terminate = self.getReward() returnRO = Reward_observation_terminal() returnRO.r = theReward returnRO.o = theObs returnRO.terminal = int(terminate) return returnRO
def env_step(self,thisAction): # validate the action assert len(thisAction.doubleArray)==2,"Expected 4 double actions." self.takeAction(thisAction.doubleArray) theObs = Observation() theObs.doubleArray = self.getState().tolist() theReward,terminate = self.getReward() returnRO = Reward_observation_terminal() returnRO.r = theReward returnRO.o = theObs returnRO.terminal = int(terminate) return returnRO
def agent_init(self, taskSpec): """Initialize the RL agent. Args: taskSpec: The RLGlue task specification string. """ # (Re)initialize parameters (incase they have been changed during a trial self.init_parameters() # Parse the task specification and set up the weights and such TaskSpec = TaskSpecVRLGLUE3.TaskSpecParser(taskSpec) if self.agent_supported(TaskSpec): self.numStates = len(TaskSpec.getDoubleObservations()) self.discStates = numpy.array(TaskSpec.getIntObservations()) self.numDiscStates = int( reduce(lambda a, b: a * (b[1] - b[0] + 1), self.discStates, 1.0)) self.numActions = TaskSpec.getIntActions()[0][1] + 1 self.model.model_init(self.numDiscStates, TaskSpec.getDoubleObservations(), \ self.numActions, TaskSpec.getRewardRange()[0]) self.planner.planner_init(self.numDiscStates, TaskSpec.getDoubleObservations(), \ self.numActions, TaskSpec.getRewardRange()[0]) else: print "Task Spec could not be parsed: " + taskSpecString self.lastAction = Action() self.lastObservation = Observation()
def env_start(self): self.startx, self.starty = self.field.getStart() #print("startx:%03d" %self.startx + " starty:%03d" %self.starty) self.player.reset(self.startx, self.starty) self.field.reset() self.gameover = False # フィールドの描画 self.draw_field() #crop = img_src[0:41, 0:55] #scipy.misc.imsave('crop.png', crop) returnObs=Observation() returnObs.intArray=np.append(np.zeros(128), [ item for innerlist in self.img_state for item in innerlist ]) #scipy.misc.imsave('screen.png', img_src) return returnObs
def env_step(self,thisAction): # Make sure the action is valid assert len(thisAction.intArray)==1,"Expected 1 integer action." assert thisAction.intArray[0]>=0, "Expected action to be in [0,3]" assert thisAction.intArray[0]<4, "Expected action to be in [0,3]" self.updatePosition(thisAction.intArray[0]) theObs=Observation() theObs.intArray=[self.calculateFlatState()] returnRO=Reward_observation_terminal() returnRO.r=self.calculateReward() returnRO.o=theObs returnRO.terminal=self.checkCurrentTerminal() return returnRO
def env_step(self,actions): """ Verify the actions are valid, play a move, and return the state. """ reward = 0 terminal = 0 #Change our current state to the new board self.state = actions.intArray #Check if the agent made a winning move if self.is_victory(): print "WE LOST" reward = 1 terminal = 1 #Otherwise keep on playing! elif self.is_full(): "AGENT FILLED" reward = 1 terminal = 1 elif not self.is_full(): print "PLAY" self.env_play() #Check if we won if self.is_full(): print "WE FILLED" reward = 1 terminal = 1 if self.is_victory(): print "WE WON" reward = 0 terminal = 1 #Set up the observation object and return it obs = Observation() obs.intArray = self.state reward_obs = Reward_observation_terminal() reward_obs.r = reward reward_obs.o = obs reward_obs.terminal = terminal return reward_obs
def env_step(self,thisAction): self.screen.fill((0,0,0)) if self.gameover: self.center_msg("""Game Over!\nYour score: %d Press space to continue""" % self.score) else: if self.paused: self.center_msg("Paused") else: pygame.draw.line(self.screen, (255,255,255), (self.rlim+1, 0), (self.rlim+1, self.height-1)) self.disp_msg("Next:", ( self.rlim+cell_size, 2)) self.disp_msg("Score: %d\n\nLevel: %d\nLines: %d" % (self.score, self.level, self.lines),(self.rlim+cell_size, cell_size*5)) self.draw_matrix(self.bground_grid, (0,0)) self.draw_matrix(self.board, (0,0)) self.draw_matrix(self.stone, (self.stone_x, self.stone_y)) self.draw_matrix(self.next_stone, (cols+1,2)) pygame.display.update() for event in pygame.event.get(): if event.type == pygame.USEREVENT+1: self.drop(False) elif event.type == pygame.QUIT: self.quit() elif event.type == pygame.KEYDOWN: for key in key_actions: if event.key == eval("pygame.K_"+key): key_actions[key]() episodeOver=0 theReward=0 theObs=Observation() theObs.intArray=np.zeros(50816) returnRO=Reward_observation_terminal() returnRO.r=theReward returnRO.o=theObs returnRO.terminal=episodeOver return returnRO
def env_step(self, action): """ Take a step in the environment :param action: The action that the agent wants to take :returns: The next state, reward and whether the current state is terminal :rtype: :class:`Reward_observation_terminal` """ returnRO = Reward_observation_terminal() returnRO.r = self.pinball.take_action(action.intArray[0]) obs = Observation() obs.doubleArray = self.pinball.get_state() returnRO.o = obs returnRO.terminal = self.pinball.episode_ended() return returnRO
def env_step(self, thisAction): intAction = thisAction.intArray[0] theReward = self.takeAction(intAction) episodeOver = int(self.terminate()) if self.reward_noise > 0: theReward += numpy.random.normal(scale=self.reward_noise) theObs = Observation() theObs.doubleArray = ( [self.cart_location, self.cart_velocity] + self.pole_angle.tolist() + self.pole_velocity.tolist() ) returnRO = Reward_observation_terminal() returnRO.r = theReward returnRO.o = theObs returnRO.terminal = episodeOver return returnRO
def env_step(self,thisAction): episodeOver = 0 theReward = -1.0 intAction = thisAction.intArray[0] self.step(intAction, self.noise) seized = 0 theReward = self.stim_penalty if intAction == 1 else 0.0 if self.getLabel(self.current_neighbor) == self.seiz_label: theReward += self.seizure_penalty theObs = Observation() theObs.doubleArray = self.state.tolist() returnRO = Reward_observation_terminal() returnRO.r = theReward returnRO.o = theObs returnRO.terminal = 0 return returnRO
def env_step(self, thisAction): # Process action # self.stageIndex = thisAction.intArray[0] if thisAction.intArray[0] == 0: self.stageIndex = self.licycle.next() # print "stageIndex: {}".format(self.stageIndex) traci.trafficlights.setRedYellowGreenState("1", self.Stages[self.stageIndex]) traci.simulationStep() self.simStep += 1 # print "Simulation step: {}".format(self.simStep) self.currentVehList = traci.vehicle.getIDList() self.state.updateState(self.currentVehList) episodeTerminal=0 # Check if state is terminal if traci.simulation.getMinExpectedNumber() == 0: theObs = Observation() theObs.intArray=self.state.carState.flatten() episodeTerminal=1 traci.close() theObs=Observation() theObs.intArray=self.state.carState.flatten() returnRO=Reward_observation_terminal() returnRO.r=self.calculate_reward() # returnRO.r=self.calculate_delay() # print "Reward: {}".format(returnRO.r) returnRO.o=theObs returnRO.terminal=episodeTerminal killedVehicles = checkVehKill(self.vehicleDict) for vehicle in killedVehicles: del self.vehicleDict[vehicle] self.previousVehList = self.currentVehList return returnRO
def env_start(self): # Randomly generate new routes routeGenProcess = subprocess.Popen("python %s" % (self.routeScript), shell=True, stdout=sys.stdout) # Start SUMO sumoProcess = subprocess.Popen("%s -c %s --no-warnings" % (self.sumoBinary, self.sumoConfig), shell=True, stdout=sys.stdout) traci.init(self.traciPORT) self.state = State("1") # Reset these variables when episodes starts self.vehicleDict = {} self.currentVehList = [] self.previousVehList = [] self.totalCumWaitingTime = 0 returnObs = Observation() returnObs.intArray = self.state.carState.flatten() self.simStep = 1 return returnObs
def env_step(self,thisAction): episodeOver = 0 intAction = thisAction.intArray[0] theReward = self.takeAction(intAction) if self.isAtGoal(): episodeOver = 1 if self.reward_noise > 0: theReward += numpy.random.normal(scale=self.reward_noise) theObs = Observation() theObs.doubleArray = self.getState() returnRO = Reward_observation_terminal() returnRO.r = theReward returnRO.o = theObs returnRO.terminal = episodeOver return returnRO