def env_step(self, action): self.steps += 1 # Action is one of N,S,W,E action = action.charArray[0] self.step_out('ACTION:', action) if not action in self.valid_actions.keys(): print 'WARNING: Invalid action %s' % (action) obs = Observation() obs.intArray = self.world.agent_state return Reward_observation_terminal(0, obs, False) # The actions might result in movement in a direction other than the one # intended with a probability of (1 - action_prob) if self.enable_stochastic_actions: dice = random.random() if dice > self.action_prob: # Randomness! Choose uniformly between each other action other_actions = list( set(self.valid_actions.keys()) - set(action)) action = random.choice(other_actions) # Move the agent self.step_out('RESULT ACTION:', action) self.move_agent(self.valid_actions[action]) # Apply wind from the new state if self.enable_wind: pstate = self.world[self.world.agent_state[0]][ self.world.agent_state[1]] if pstate.wind: p, dir = pstate.wind dice = random.random() if dice <= p: # Fudge & crackers! Our agent gets caught by the wind! self.step_out('WIND IN %s!' % (dir)) self.move_agent(dir) agent_state = self.world.reduce_pos(self.world.agent_state) pstate = self.world[agent_state[0]][agent_state[1]] # Return observation obs = Observation() obs.intArray = self.world.agent_state #print('IT\'S A NEW WORLD:') self.step_out(self.world) #self.debug('\n' + str(self.world)) self.step_out("REWARD:", pstate.reward) terminal = pstate.terminal if self.steps > self.step_limit: self.debug("STEP LIMIT REACHED!") terminal = True return Reward_observation_terminal(pstate.reward, obs, terminal)
def test_agent_step(): print "Testing." color_range = 128 size_of_observation = 128+210*160 print "Setting up agent." agent = setup() color = 1 observation = Observation() observation.intArray = np.ones(size_of_observation, dtype=np.uint8) observation.intArray *= color agent.agent_start(observation) agent.agent_train(False) for i in range(2, 256): print "Round %d" % i reward = float(i) color = i observation = Observation() observation.intArray = np.ones(size_of_observation, dtype=np.uint8) observation.intArray *= color agent.agent_step(reward, observation) agent.agent_train(False) reward = float(i) color = i observation = Observation() observation.intArray = np.ones(size_of_observation, dtype=np.uint8) observation.intArray *= color agent.agent_step(reward, observation) agent.agent_train(True)
def env_step(self, action): self.steps += 1 # Action is one of N,S,W,E action = action.charArray[0] self.step_out('ACTION:', action) if not action in self.valid_actions.keys(): print 'WARNING: Invalid action %s' % (action) obs = Observation() obs.intArray = self.world.agent_state return Reward_observation_terminal(0, obs, False) # The actions might result in movement in a direction other than the one # intended with a probability of (1 - action_prob) if self.enable_stochastic_actions: dice = random.random() if dice > self.action_prob: # Randomness! Choose uniformly between each other action other_actions = list(set(self.valid_actions.keys()) - set(action)) action = random.choice(other_actions) # Move the agent self.step_out('RESULT ACTION:', action) self.move_agent(self.valid_actions[action]) # Apply wind from the new state if self.enable_wind: pstate = self.world[self.world.agent_state[0]][self.world.agent_state[1]] if pstate.wind: p, dir = pstate.wind dice = random.random() if dice <= p: # Fudge & crackers! Our agent gets caught by the wind! self.step_out('WIND IN %s!' % (dir)) self.move_agent(dir) agent_state = self.world.reduce_pos(self.world.agent_state); pstate = self.world[agent_state[0]][agent_state[1]] # Return observation obs = Observation() obs.intArray = self.world.agent_state #print('IT\'S A NEW WORLD:') self.step_out(self.world) #self.debug('\n' + str(self.world)) self.step_out("REWARD:", pstate.reward) terminal = pstate.terminal if self.steps > self.step_limit: self.debug("STEP LIMIT REACHED!") terminal = True return Reward_observation_terminal(pstate.reward, obs, terminal)
def env_start(self): if self.fixedStartState: stateValid = self.setAgentState(self.startRow, self.startCol) if not stateValid: print "The fixed start state was NOT valid: " + str( int(self.startRow)) + "," + str(int(self.startRow)) self.setRandomState() else: self.setRandomState() returnObs = Observation() returnObs.intArray = [self.calculateFlatState()] #Up, Right, Down, Option1, Option2 returnObs.charArray = ["T", "T", "T", "T"] if len(self.optionsArray[self.startRow][self.startCol]) != 0: for i in range(len( self.optionsArray[self.startRow][self.startCol])): returnObs.charArray[ 3 + self.optionsArray[self.startRow][self.startCol][i]] = "T" # print returnObs.charArray #Now add characters based on options present return returnObs
def env_start(self): self.currentState=10 returnObs=Observation() returnObs.intArray=[self.currentState] return returnObs
def env_start(self): """ Start the game! """ # Set up start states self.world.add_starts(*self.start_states) # Set up terminal states self.world.add_terminals(*self.terminal_states.keys()) for (row, col), reward in self.terminal_states.items(): self.world[row][col].reward = reward # Initialize state of the agent to one of start_states r = random.randrange(len(self.start_states)) self.world.agent_state = list(self.start_states[r]) # Initialize step counter self.steps = 0 self.step_out('START WORLD:') self.step_out(self.world) # Pass agent state over to the agent obs = Observation() obs.intArray = self.world.agent_state return obs
def makeObservation(self): returnObs = Observation() returnObs.doubleArray = self.pos.tolist() if self.fuel_loc is not None: returnObs.doubleArray += [self.fuel] returnObs.intArray = [self.pass_loc, self.pass_dest] return returnObs
def env_start(self): """ Start the game! """ # Set up start states self.world.add_starts(*self.start_states) # Set up terminal states self.world.add_terminals(*self.terminal_states.keys()) for (row, col), reward in self.terminal_states.items(): self.world[row][col].reward = reward # Initialize state of the agent to one of start_states r = random.randrange(len(self.start_states)) self.world.agent_state = list(self.world.expand_pos(self.start_states[r])) # Initialize step counter self.steps = 0 self.step_out('START WORLD:') self.step_out(self.world) # Pass agent state over to the agent obs = Observation() obs.intArray = self.world.agent_state return obs
def env_start(self): self.setStartState() returnObs = Observation() returnObs.intArray = [ self.calculateFlatState(self.agentRow, self.agentCol) ] return returnObs
def env_step(self, thisAction): # Make sure the action is valid assert len(thisAction.intArray) == 1, "Expected 1 integer action." assert thisAction.intArray[0] >= 0, "Expected action to be in [0,4]" assert thisAction.intArray[0] < 4, "Expected action to be in [0,4]" self.updatePosition(thisAction.intArray[0]) lastActionValue = thisAction.intArray[0] theObs = Observation() theObs.intArray = [self.calculateFlatState()] theObs.charArray = ["T", "T", "T", "T"] if len(self.optionsArray[self.agentRow][self.agentCol]) != 0: for i in range(len( self.optionsArray[self.agentRow][self.agentCol])): theObs.charArray[ 2 + self.optionsArray[self.agentRow][self.agentCol][i]] = "T" returnRO = Reward_observation_terminal() returnRO.r = self.calculateReward(lastActionValue) returnRO.o = theObs returnRO.terminal = self.checkCurrentTerminal() return returnRO
def env_step(self,thisAction): # プレーヤーの移動 self.player.update(thisAction) # 移動後のスコア計算 theReward = self.field.decision(int(self.player.x+0.5), int(self.player.y+0.5), thisAction.intArray[0]) #print("Reward:%d" %theReward) episodeOver = self.field.get_gameover() #print("EdgeTracer:episodeOver %03d" %episodeOver) # フィールドの描画 self.draw_field() returnObs=Observation() returnObs.intArray=np.append(np.zeros(128), [ item for innerlist in self.img_state for item in innerlist ]) #scipy.misc.imsave('l_screen.png', img_src) #scipy.misc.imsave('r_screen.png', img_afn) returnRO=Reward_observation_terminal() returnRO.r=theReward returnRO.o=returnObs returnRO.terminal=episodeOver return returnRO
def env_step(self,thisAction): episodeOver=0 theReward=0 if thisAction.intArray[0]==0: self.currentState=self.currentState-1 if thisAction.intArray[0]==1: self.currentState=self.currentState+1 if self.currentState <= 0: self.currentState=0 theReward=-1 episodeOver=1 if self.currentState >= 20: self.currentState=20 theReward=1 episodeOver=1 theObs=Observation() theObs.intArray=[self.currentState] returnRO=Reward_observation_terminal() returnRO.r=theReward returnRO.o=theObs returnRO.terminal=episodeOver return returnRO
def env_start(self): self.currentState = 10 returnObs = Observation() returnObs.intArray = [self.currentState] return returnObs
def env_step(self, thisAction): episodeOver = 0 theReward = 0 if thisAction.intArray[0] == 0: self.currentState = self.currentState - 1 if thisAction.intArray[0] == 1: self.currentState = self.currentState + 1 if self.currentState <= 0: self.currentState = 0 theReward = -1 episodeOver = 1 if self.currentState >= 20: self.currentState = 20 theReward = 1 episodeOver = 1 theObs = Observation() theObs.intArray = [self.currentState] returnRO = Reward_observation_terminal() returnRO.r = theReward returnRO.o = theObs returnRO.terminal = episodeOver return returnRO
def makeObservation(self): returnObs = Observation() returnObs.doubleArray = self.pos.tolist() if self.fuel_loc is not None: returnObs.doubleArray += [self.fuel] returnObs.intArray = [self.pass_loc, self.pass_dest] return returnObs
def env_step(self, action): action = action.intArray if len(action) != 3: print action, len(action) assert len(action) == self.simulationParameterObj.nbrReaches, "Expected " + str( self.simulationParameterObj.nbrReaches) + " integer action." if not InvasiveUtility.is_action_allowable(action, self.state): theObs = Observation() InvasiveUtility.is_action_allowable(action, self.state) #map(int, results) theObs.intArray = [-1] returnRO = Reward_observation_terminal() returnRO.r = self.Bad_Action_Penalty returnRO.o = theObs return returnRO cost_state_unit = InvasiveUtility.get_unit_invaded_reaches(self.state, self.simulationParameterObj.habitatSize) * self.actionParameterObj.costPerReach stateCost = cost_state_unit + InvasiveUtility.get_invaded_reaches( self.state) * self.actionParameterObj.costPerTree stateCost = stateCost + InvasiveUtility.get_empty_slots(self.state) * self.actionParameterObj.emptyCost costAction = InvasiveUtility.get_budget_cost_actions(action, self.state, self.actionParameterObj) if costAction > self.actionParameterObj.budget: theObs = Observation() InvasiveUtility.is_action_allowable(action, self.state) #map(int, results) theObs.intArray = [-1] returnRO = Reward_observation_terminal() returnRO.r = self.Bad_Action_Penalty returnRO.o = theObs return returnRO nextState = simulateNextState(self.state, action, self.simulationParameterObj, self.actionParameterObj, self.dispertionTable, self.germinationObj) self.state = nextState theObs = Observation() theObs.intArray = self.state returnRO = Reward_observation_terminal() returnRO.r = -1 * (costAction + stateCost) returnRO.o = theObs return returnRO
def env_start(self): self.seed() self.reset() #self.seps=0 returnObs = Observation() returnObs.intArray = [self.s] return returnObs
def env_start(self): State = random.randint(0, 3) returnObs = Observation() #zero for all the 4 starting states self.presentCol = 0 self.presentRow = self.Start_states[State][0] returnObs.intArray = [self.rolloutstate()] return returnObs
def env_start(self): """ Get the state of the environment and return it. """ self.state = [0 for i in range(9)] #self.env_play() obs = Observation() obs.intArray = self.state return obs
def getObservation(self): returnObs = Observation() features = [1.] if self.original_features: features += mdptetris.features_original() if self.dellacherie_features: features += mdptetris.features_dellacherie() returnObs.intArray = [mdptetris.current_piece()] returnObs.doubleArray = features return returnObs
def getObservation(self): returnObs = Observation() features = [1.] if self.original_features: features += mdptetris.features_original() if self.dellacherie_features: features += mdptetris.features_dellacherie() returnObs.intArray = [mdptetris.current_piece()] returnObs.doubleArray = features return returnObs
def env_step(self, thisAction): # Process action # self.stageIndex = thisAction.intArray[0] if thisAction.intArray[0] == 0: self.stageIndex = self.licycle.next() # print "stageIndex: {}".format(self.stageIndex) traci.trafficlights.setRedYellowGreenState("1", self.Stages[self.stageIndex]) traci.simulationStep() self.simStep += 1 # print "Simulation step: {}".format(self.simStep) self.currentVehList = traci.vehicle.getIDList() self.state.updateState(self.currentVehList) episodeTerminal=0 # Check if state is terminal if traci.simulation.getMinExpectedNumber() == 0: theObs = Observation() theObs.intArray=self.state.carState.flatten() episodeTerminal=1 traci.close() theObs=Observation() theObs.intArray=self.state.carState.flatten() returnRO=Reward_observation_terminal() returnRO.r=self.calculate_reward() # returnRO.r=self.calculate_delay() # print "Reward: {}".format(returnRO.r) returnRO.o=theObs returnRO.terminal=episodeTerminal killedVehicles = checkVehKill(self.vehicleDict) for vehicle in killedVehicles: del self.vehicleDict[vehicle] self.previousVehList = self.currentVehList return returnRO
def env_step(self,thisAction): intAction = int(thisAction.intArray[0]) theReward = self.takeAction(intAction) theObs = Observation() theObs.intArray = self.getState() returnRO = Reward_observation_terminal() returnRO.r = theReward returnRO.o = theObs returnRO.terminal = 0 return returnRO
def env_start(self): k = random.randint(0, 1) State = random.randint(self.states[k][0], self.states[k][1]) returnObs = Observation() #zero for all the 4 starting states self.presentCol = random.randint(0, 999) #self.presentCol = 10900 self.presentRow = State returnObs.intArray = [self.rolloutstate()] return returnObs
def env_step(self, thisAction): intAction = int(thisAction.intArray[0]) theReward = self.takeAction(intAction) theObs = Observation() theObs.intArray = self.getState() returnRO = Reward_observation_terminal() returnRO.r = theReward returnRO.o = theObs returnRO.terminal = 0 return returnRO
def env_start(self): if self.fixedStartState: stateValid = self.setAgentState(self.state) if not stateValid: print "The fixed start state was NOT valid: " + str(self.state) self.setRandomState() else: self.setRandomState() returnObs = Observation() # print self.state returnObs.intArray = map(int, list(self.state)) return returnObs
def env_start(self): if self.fixedStartState: stateValid=self.setAgentState(self.startRow,self.startCol) if not stateValid: print("The fixed start state was NOT valid: "+str(int(self.startRow))+","+str(int(self.startRow))) self.setRandomState() else: self.setRandomState() returnObs=Observation() returnObs.intArray=[self.calculateFlatState()] return returnObs
def env_start(self): if self.fixedStartState: stateValid=self.setAgentState(self.startRow,self.startCol) if not stateValid: print "The fixed start state was NOT valid: "+str(int(self.startRow))+","+str(int(self.startRow)) self.setRandomState() else: self.setRandomState() returnObs=Observation() returnObs.intArray=[self.calculateFlatState()] return returnObs
def env_start(self): if self.fixedStartState: stateValid = self.setAgentState(self.state) if not stateValid: print "The fixed start state was NOT valid: " + str(self.state) self.setRandomState() else: self.setRandomState() returnObs = Observation() # print self.state returnObs.intArray = map(int, list(self.state)) return returnObs #return observation
def env_step(self, action): self.agent.botAction = action self.step() pixels = pygame.surfarray.array2d(screen) theObs = Observation() theObs.intArray = misc.imresize(pixels, (84, 84)).flatten().tolist() returnRO = Reward_observation_terminal() returnRO.r = 1 #reward goes here returnRO.o = theObs returnRO.terminal = 0 return returnRO
def env_step(self, action): action = action.intArray assert len(action) == self.simulationParameterObj.nbrReaches, "Expected " + str( self.simulationParameterObj.nbrReaches) + " integer action." if not InvasiveUtility.is_action_allowable(action, self.state): theObs = Observation() InvasiveUtility.is_action_allowable(action, self.state) #map(int, results) theObs.intArray = [-1] returnRO = Reward_observation_terminal() returnRO.r = self.Bad_Action_Penalty returnRO.o = theObs return returnRO cost_state_unit = InvasiveUtility.get_unit_invaded_reaches(self.state, self.simulationParameterObj.habitatSize) * self.actionParameterObj.costPerReach stateCost = cost_state_unit + InvasiveUtility.get_invaded_reaches( self.state) * self.actionParameterObj.costPerTree stateCost = stateCost + InvasiveUtility.get_empty_slots(self.state) * self.actionParameterObj.emptyCost costAction = InvasiveUtility.get_budget_cost_actions(action, self.state, self.actionParameterObj) if costAction > self.actionParameterObj.budget: theObs = Observation() InvasiveUtility.is_action_allowable(action, self.state) #map(int, results) theObs.intArray = [-1] returnRO = Reward_observation_terminal() returnRO.r = self.Bad_Action_Penalty returnRO.o = theObs return returnRO nextState = simulateNextState(self.state, action, self.simulationParameterObj, self.actionParameterObj, self.dispertionTable, self.germinationObj) self.state = nextState theObs = Observation() theObs.intArray = self.state returnRO = Reward_observation_terminal() returnRO.r = -1 * (costAction + stateCost) returnRO.o = theObs return returnRO
def env_start(self): startx, starty = self.bg.getStart() self.player.reset(startx, starty) self.bg.reset() self.gameover = False returnObs=Observation() arr = pygame.surfarray.array2d(self.screen) #returnObs.intArray=np.zeros(128) returnObs.intArray=np.append(np.zeros(128), [ item for innerlist in arr for item in innerlist ]) scipy.misc.imsave('screen.png', arr) return returnObs
def env_step(self,actions): """ Verify the actions are valid, play a move, and return the state. """ reward = 0 terminal = 0 #Change our current state to the new board self.state = actions.intArray #Check if the agent made a winning move if self.is_victory(): print "WE LOST" reward = 1 terminal = 1 #Otherwise keep on playing! elif self.is_full(): "AGENT FILLED" reward = 1 terminal = 1 elif not self.is_full(): print "PLAY" self.env_play() #Check if we won if self.is_full(): print "WE FILLED" reward = 1 terminal = 1 if self.is_victory(): print "WE WON" reward = 0 terminal = 1 #Set up the observation object and return it obs = Observation() obs.intArray = self.state reward_obs = Reward_observation_terminal() reward_obs.r = reward reward_obs.o = obs reward_obs.terminal = terminal return reward_obs
def env_step(self,thisAction): # Make sure the action is valid assert len(thisAction.intArray)==1,"Expected 1 integer action." assert thisAction.intArray[0]>=0, "Expected action to be in [0,3]" assert thisAction.intArray[0]<4, "Expected action to be in [0,3]" self.updatePosition(thisAction.intArray[0]) theObs=Observation() theObs.intArray=[self.calculateFlatState()] returnRO=Reward_observation_terminal() returnRO.r=self.calculateReward() returnRO.o=theObs returnRO.terminal=self.checkCurrentTerminal() return returnRO
def env_step(self,thisAction): self.screen.fill((0,0,0)) if self.gameover: self.center_msg("""Game Over!\nYour score: %d Press space to continue""" % self.score) else: if self.paused: self.center_msg("Paused") else: pygame.draw.line(self.screen, (255,255,255), (self.rlim+1, 0), (self.rlim+1, self.height-1)) self.disp_msg("Next:", ( self.rlim+cell_size, 2)) self.disp_msg("Score: %d\n\nLevel: %d\nLines: %d" % (self.score, self.level, self.lines),(self.rlim+cell_size, cell_size*5)) self.draw_matrix(self.bground_grid, (0,0)) self.draw_matrix(self.board, (0,0)) self.draw_matrix(self.stone, (self.stone_x, self.stone_y)) self.draw_matrix(self.next_stone, (cols+1,2)) pygame.display.update() for event in pygame.event.get(): if event.type == pygame.USEREVENT+1: self.drop(False) elif event.type == pygame.QUIT: self.quit() elif event.type == pygame.KEYDOWN: for key in key_actions: if event.key == eval("pygame.K_"+key): key_actions[key]() episodeOver=0 theReward=0 theObs=Observation() theObs.intArray=np.zeros(50816) returnRO=Reward_observation_terminal() returnRO.r=theReward returnRO.o=theObs returnRO.terminal=episodeOver return returnRO
def env_step(self, action): # エージェントから受け取った○を打つ場所 int_action_agent = action.intArray[0] # 相手(Agent) の手を実行し、勝敗を確認する # 勝敗がつかなければ、自身の手を考え、実行する。 # ゲームの報酬などをまとめて エージェントにおくる。 # パスの場合は、(-1,-1)を使用する if int_action_agent == -1: step_raw_col = (-1, -1) else: step_raw_col = (int_action_agent // self.n_cols, int_action_agent % self.n_cols) # step 実行 step_o, step_r, step_done = self.game.step(step_raw_col) rot = Reward_observation_terminal() # build_map_from_game()でマップを作成する。 self.map = self.build_map_from_game() observation = Observation() observation.intArray = self.map rot.o = observation # step_r は報酬、step_done は継続の有無 rot.r = step_r rot.terminal = step_done # ボード情報保存用 current_map = '' for i in range(0, len(self.map), self.n_cols): current_map += ' '.join(map(str, self.map[i:i + self.n_cols])) + '\n' self.history.append(current_map) # 試合の様子を記録 if rot.r == self.game.r_lose: f = open('history.txt', 'a') history = '\n'.join(self.history) f.writelines('# START\n' + history + '# END\n\n') f.close() # 決着がついた場合は agentのagent_end # 決着がついていない場合は agentのagent_step に続く return rot
def env_step(self, thisAction): # Make sure the action is valid assert len(thisAction.intArray) == 1, "Expected 1 integer action." assert thisAction.intArray[0] >= 0, "Expected action to be in [0,3]" assert thisAction.intArray[0] < 4, "Expected action to be in [0,3]" self.updatePosition(thisAction.intArray[0]) theObs = Observation() theObs.intArray = [self.calculateFlatState()] returnRO = Reward_observation_terminal() returnRO.r = self.calculateReward() returnRO.o = theObs returnRO.terminal = self.checkCurrentTerminal() return returnRO
def env_start(self): self.startx, self.starty = self.field.getStart() #print("startx:%03d" %self.startx + " starty:%03d" %self.starty) self.player.reset(self.startx, self.starty) self.field.reset() self.gameover = False # フィールドの描画 self.draw_field() #crop = img_src[0:41, 0:55] #scipy.misc.imsave('crop.png', crop) returnObs=Observation() returnObs.intArray=np.append(np.zeros(128), [ item for innerlist in self.img_state for item in innerlist ]) #scipy.misc.imsave('screen.png', img_src) return returnObs
def env_start(self): # 盤面を初期化 self.map = [0] * self.n_rows * self.n_cols * self.n_heights # 盤の状態を保持し、最後に確認するためのリスト self.history = [] current_map = '' for i in range(0, len(self.map), self.n_cols): current_map += ' '.join(map(str, self.map[i:i + self.n_cols])) + '\n' self.history.append(current_map) # 盤の状態をRL_Glueを通してエージェントに渡す observation = Observation() observation.intArray = self.map return observation
def env_step(self, thisAction): print thisAction.intArray[0] assert len(thisAction.intArray) == 1, "Expected 1 integer action." assert thisAction.intArray[0] >= 0, "Expected action to be in [0,3]" assert thisAction.intArray[0] < 4, "Expected action to be in [0,3]" self.updatePosition(thisAction.intArray[0]) Obs = Observation() Obs.intArray = [self.rolloutstate()] Reward = Reward_observation_terminal() Reward.r = self.current_reward() Reward.o = Obs Reward.terminal = self.goalcheck() return Reward
def env_start(self): # plan:Reversi ボード初期化 self.game.resetBoard() # map データの作成 self.map = self.build_map_from_game() # 盤の状態を保持し、最後に確認するためのリスト # kmori: self.history に現在の盤面をテキストで追記します。 self.history = [] current_map = '' for i in range(0, len(self.map), self.n_cols): current_map += ' '.join(map(str, self.map[i:i + self.n_cols])) + '\n' self.history.append(current_map) # 盤の状態をRL_Glueを通してエージェントに渡す observation = Observation() observation.intArray = self.map return observation
def env_start(self): # Randomly generate new routes routeGenProcess = subprocess.Popen("python %s" % (self.routeScript), shell=True, stdout=sys.stdout) # Start SUMO sumoProcess = subprocess.Popen("%s -c %s --no-warnings" % (self.sumoBinary, self.sumoConfig), shell=True, stdout=sys.stdout) traci.init(self.traciPORT) self.state = State("1") # Reset these variables when episodes starts self.vehicleDict = {} self.currentVehList = [] self.previousVehList = [] self.totalCumWaitingTime = 0 returnObs = Observation() returnObs.intArray = self.state.carState.flatten() self.simStep = 1 return returnObs
def env_start(self): if self.fixedStartState: stateValid=self.setAgentState(self.startRow,self.startCol) if not stateValid: print "The fixed start state was NOT valid: "+str(int(self.startRow))+","+str(int(self.startRow)) self.setRandomState() else: self.setRandomState() returnObs=Observation() returnObs.intArray=[self.calculateFlatState()] #Up, Right, Down, Option1, Option2 returnObs.charArray = ["T", "T", "T", "T"] if len(self.optionsArray[self.startRow][self.startCol]) != 0: for i in range(len(self.optionsArray[self.startRow][self.startCol])): returnObs.charArray[3+self.optionsArray[self.startRow][self.startCol][i]] = "T" # print returnObs.charArray #Now add characters based on options present return returnObs
def env_step(self,thisAction): # Make sure the action is valid assert len(thisAction.intArray)==1,"Expected 1 integer action." assert thisAction.intArray[0]>=0, "Expected action to be in [0,4]" assert thisAction.intArray[0]<4, "Expected action to be in [0,4]" self.updatePosition(thisAction.intArray[0]) lastActionValue = thisAction.intArray[0] theObs=Observation() theObs.intArray=[self.calculateFlatState()] theObs.charArray = ["T", "T", "T", "T"] if len(self.optionsArray[self.agentRow][self.agentCol]) != 0: for i in range(len(self.optionsArray[self.agentRow][self.agentCol])): theObs.charArray[2+self.optionsArray[self.agentRow][self.agentCol][i]] = "T" returnRO=Reward_observation_terminal() returnRO.r=self.calculateReward(lastActionValue) returnRO.o=theObs returnRO.terminal=self.checkCurrentTerminal() return returnRO
def env_start(self): key_actions = { 'ESCAPE': self.quit, 'LEFT': lambda:self.move(-1), 'RIGHT': lambda:self.move(+1), 'DOWN': lambda:self.drop(True), 'UP': self.rotate_stone, 'p': self.toggle_pause, 'SPACE': self.start_game, 'RETURN': self.insta_drop } self.gameover = False self.paused = False self.currentState=10 returnObs=Observation() returnObs.intArray=np.zeros(50816) return returnObs
def env_step(self,thisAction): episodeOver=0 theReward=0 #screen.fill((255,255,255)) # 画面を青色で塗りつぶす #self.screen.blit(self.backImg, (0,0)) self.bg.draw(self.screen) self.player.setAction(thisAction) self.player.update() theReward = self.bg.decision(self.player.rect.x, self.player.rect.y) episodeOver = self.bg.get_gameover() self.player.draw(self.screen) #all.update() #all.draw(screen) #score_board.draw(self.screen) # スコアボードを描画 pygame.display.update() # 画面を更新 returnObs=Observation() arr = pygame.surfarray.array2d(self.screen) returnObs.intArray=np.append(np.zeros(128), [ item for innerlist in arr for item in innerlist ]) scipy.misc.imsave('screen.png', arr) returnRO=Reward_observation_terminal() returnRO.r=theReward returnRO.o=returnObs returnRO.terminal=episodeOver # イベント処理 for event in pygame.event.get(): if event.type == QUIT: # 終了イベント sys.exit() return returnRO
def env_start(self): self.setStartState() returnObs=Observation() returnObs.intArray=[self.calculateFlatState(self.agentRow,self.agentCol)] return returnObs
def env_start(self): self.reset() returnObs = Observation() returnObs.intArray = self.getState() return returnObs
def env_start(self): self.start() returnObs = Observation() returnObs.intArray = [1] return returnObs
def env_step(self, action): #まずはエージェントさんに勝敗を告げる。 # エージェントから受け取った○を打つ場所 int_action_agent = self.get_drop_ball_point(action.intArray[0]) # 盤に○を打ち、空白の個所を取得する self.map[int_action_agent] = self.flg_agent #これが盤面 free_top = self.get_free_top_of_map() #free = [i for i, v in enumerate(self.map) if v == self.flg_free] n_free = len(free_top) rot = Reward_observation_terminal() rot.r = 0.0 rot.terminal = False # ○を打った後の勝敗を確認する for line in self.lines: state = np.array(self.map)[line] point = sum(state == self.flg_agent) if point == self.n_rows: rot.r = self.r_win rot.terminal = True break point = sum(state == self.flg_env) if point == self.n_rows: rot.r = self.r_lose rot.terminal = True break # 勝敗がつかなければ、×を打つ位置を決める if not rot.terminal: # 空白がなければ引き分け if n_free == 0: rot.r = self.r_draw rot.terminal = True else: int_action_env = None # 空白が1個所ならばそこに×を打つ if n_free == 1: int_action_env = self.get_drop_ball_point(free_top[0]) rot.terminal = True else: # ×の位置を決定する 75% if np.random.rand() < self.opp: #勝てそうなら勝ちに行く。 #todo アルゴリズム変更。n_free回打ってみてチェック。 for line in self.lines: state = np.array(self.map)[line] point = sum(state == self.flg_env) #環境さん if point == self.n_rows - 1: #環境さんが勝ちそう! index = np.where(state == self.flg_free)[0] if len(index) != 0: want_to_put = line[index[0]] i_top = want_to_put % 16 #上から落としてみて起きたい場所におけるか? if (want_to_put == self.get_drop_ball_point(i_top)): int_action_env = want_to_put break #負けそうなら回避する。 #todo アルゴリズム変更。負ける箇所が複数なら負けを宣言。 if int_action_env is None: for line in self.lines: state = np.array(self.map)[line] point = sum(state == self.flg_agent) #エージェントさん if point == self.n_rows - 1: index = np.where(state == self.flg_free)[0] if len(index) != 0: want_to_put = line[index[0]] i_top = want_to_put % 16 #上から落としてみて起きたい場所におけるか? if (want_to_put == self. get_drop_ball_point(i_top)): int_action_env = want_to_put break int_action_env = line[index[0]] break # ×の位置をランダムに決定する 25% if int_action_env is None: int_action_env = self.get_drop_ball_point( free_top[np.random.randint(n_free)]) # 盤に×を打つ self.map[int_action_env] = self.flg_env #このままでいい。 free_top = self.get_free_top_of_map() #0の箇所を探索している。 n_free = len(free_top) # ×を打った後の勝敗を確認する for line in self.lines: state = np.array(self.map)[line] point = sum(state == self.flg_agent) if point == self.n_rows: rot.r = self.r_win rot.terminal = True break point = sum(state == self.flg_env) if point == self.n_rows: rot.r = self.r_lose rot.terminal = True break if not rot.terminal and n_free == 0: rot.r = self.r_draw rot.terminal = True # 盤の状態と報酬、決着がついたかどうか をまとめて エージェントにおくる。 observation = Observation() observation.intArray = self.map rot.o = observation current_map = 'map\n' for i in range(0, len(self.map), self.n_cols): current_map += ' '.join(map(str, self.map[i:i + self.n_cols])) + '\n' if (i % 16 == 0): current_map += "\n" self.history.append(current_map) if rot.r == -1: f = open('history.txt', 'a') history = '\n'.join(self.history) f.writelines('# START\n' + history + '# END\n\n') f.close() # 決着がついた場合は agentのagent_end # 決着がついていない場合は agentのagent_step に続く return rot
def create_observation(self, state): observation = Observation() observation.intArray = state.tolist() return observation
def env_start(self): self.reset() returnObs = Observation() returnObs.intArray = self.getState() return returnObs