def run( self ): """ Main control loop for game play. """ self.initializeGhostPowers() self.initializeAgentObservedVars() self.initializePacmanPowers() self.display.initialize(self.state.data) self.numMoves = 0 ###self.display.initialize(self.state.makeObservation(1).data) if not self.informAgentsOfGameStart(): return agentIndex = self.state.getNextAgentIndex() numAgents = len( self.agents ) while not self.gameOver: event = self.state.getNextEvent() if event.isAgentMove(): agentIndex = event.getAgentIndex() if not self.getAndRunAgentAction(agentIndex): return self.numMoves += 1 else: self.runEvent() # Change the display self.display.update(self.state.data) ###idx = agentIndex - agentIndex % 2 + 1 ###self.display.update( self.state.makeObservation(idx).data ) # Allow for game specific conditions (winning, losing, etc.) self.rules.process(self.state, self) if _BOINC_ENABLED: boinc.set_fraction_done(self.getProgress()) if not self.informLearningAgents(): return self.display.finish()
def run(self): """ Main control loop for game play. """ self.display.initialize(self.state.data) self.numMoves = 0 agentIndex = self.startingIndex numAgents = len(self.agents) previous_action = Directions.STOP expout = int(self.rules.getMoveTimeout(agentIndex)) totalComputationTime = 0 totalExpandedNodes = 0 if (expout > 0): pacmodule.pacman.GameState.setMaximumExpanded(expout) while not self.gameOver: # Fetch the next agent agent = self.agents[agentIndex] move_time = 0 skip_action = False # Generate an observation of the state observation = self.state.deepCopy() # Solicit an action action = None self.mute(agentIndex) pacmodule.pacman.GameState.resetNodeExpansionCounter() violated = False t = time.time() if expout == 0: action = agent.get_action(observation) else: #TODO : node expansion control through getSuccessors action = agent.get_action(observation) if pacmodule.pacman.GameState.countExpanded > expout: violated = True totalComputationTime += (time.time() - t) totalExpandedNodes += pacmodule.pacman.GameState.countExpanded if not self.state.isLegalAction(agentIndex, action): print("Illegal move !") action = previous_action elif violated: print("Node expansion budget violated !") action = previous_action if not self.state.isLegalAction(agentIndex,action): action = Directions.STOP self.unmute() # Execute the action self.moveHistory.append((agentIndex, action)) previous_action = action self.state = self.state.generateSuccessor(agentIndex, action) # Change the display self.display.update(self.state.data) ###idx = agentIndex - agentIndex % 2 + 1 ###self.display.update( self.state.makeObservation(idx).data ) # Allow for game specific conditions (winning, losing, etc.) self.rules.process(self.state, self) # Track progress if agentIndex == numAgents + 1: self.numMoves += 1 # Next agent agentIndex = (agentIndex + 1) % numAgents if _BOINC_ENABLED: boinc.set_fraction_done(self.getProgress()) totalScore = self.state.getScore() self.display.finish() return totalScore,totalComputationTime,totalExpandedNodes
def run(self): """ Main control loop for game play. """ self.display.initialize(self.state.data) self.numMoves = 0 ###self.display.initialize(self.state.makeObservation(1).data) # inform learning agents of the game start for i in range(len(self.agents)): agent = self.agents[i] if not agent: self.mute(i) # this is a null agent, meaning it failed to load # the other team wins print >> sys.stderr, "Agent %d failed to load" % i self.unmute() self._agentCrash(i, quiet=True) return if ("registerInitialState" in dir(agent)): self.mute(i) agent.registerInitialState(self.state.deepCopy()) self.unmute() agentIndex = self.startingIndex numAgents = len(self.agents) while not self.gameOver: action = None observation = self.state.deepCopy() if agentIndex == 0: # Fetch the next agent agent = self.agents[agentIndex] move_time = 0 skip_action = False # Generate an observation of the state if 'observationFunction' in dir(agent): self.mute(agentIndex) observation = agent.observationFunction( self.state.deepCopy()) self.unmute() else: observation = self.state.deepCopy() # 'Fix' observation ppos = observation.getPacmanPosition() rpos = observation.getGhostState(1).getPosition() if manhattanDistance(ppos, rpos) > 12: observation.removeAgentState(1) vis_capsules = [] for c in observation.data.capsules: if manhattanDistance(ppos, c) <= 12: vis_capsules.append(c) observation.data.capsules = vis_capsules # print(observation) # Solicit an action self.mute(agentIndex) start = time.time() action = agent.getAction(observation) if time.time() - start > 3.1: print('Step time exceeds maximum time!') self.unmute() self.socket.send(self.packAction(action)) else: action = self.unpackAction(self.saveRead(4)) # Execute the action self.moveHistory.append((agentIndex, action)) self.state = self.state.generateSuccessor(agentIndex, action) observation.data._agentMoved = self.state.data._agentMoved # Change the display self.display.update(self.state.data) # Allow for game specific conditions (winning, losing, etc.) self.rules.process(self.state, self) # Track progress if agentIndex == numAgents + 1: self.numMoves += 1 # Next agent agentIndex = (agentIndex + 1) % numAgents if _BOINC_ENABLED: boinc.set_fraction_done(self.getProgress()) # inform a learning agent of the game result for agentIndex, agent in enumerate(self.agents): if "final" in dir(agent): try: self.mute(agentIndex) agent.final(self.state) self.unmute() except Exception, data: if not self.catchExceptions: raise self._agentCrash(agentIndex) self.unmute() return
class Game: """ The Game manages the control flow, soliciting actions from agents. """ def __init__( self, agents, display, rules, startingIndex=0, muteAgents=False ): self.agentCrashed = False self.agents = agents self.display = display self.rules = rules self.startingIndex = startingIndex self.gameOver = False self.muteAgents = muteAgents self.catchExceptions = _BOINC_ENABLED self.moveHistory = [] def getProgress(self): if self.gameOver: return 1.0 else: return self.rules.getProgress(self) def _agentCrash( self, agentIndex ): "Helper method for handling agent crashes" self.gameOver = True self.agentCrashed = True self.rules.agentCrash(agentIndex) OLD_STDOUT = None OLD_STDERR = None def mute(self): if not self.muteAgents: return global OLD_STDOUT, OLD_STDERR import cStringIO OLD_STDOUT = sys.stdout OLD_STDERR = sys.stderr sys.stdout = cStringIO.StringIO() sys.stderr = cStringIO.StringIO() def unmute(self): if not self.muteAgents: return global OLD_STDOUT, OLD_STDERR sys.stdout.close() sys.stderr.close() # Revert stdout/stderr to originals sys.stdout = OLD_STDOUT sys.stderr = OLD_STDERR def run( self ): """ Main control loop for game play. """ self.display.initialize(self.state.data) self.numMoves = 0 ###self.display.initialize(self.state.makeObservation(1).data) # inform learning agents of the game start for i in range(len(self.agents)): agent = self.agents[i] if not agent: # this is a null agent, meaning it failed to load # the other team wins self._agentCrash(i) return if ("registerInitialState" in dir(agent)): self.mute() agent.registerInitialState(self.state.deepCopy()) self.unmute() agentIndex = self.startingIndex numAgents = len( self.agents ) while not self.gameOver: # Fetch the next agent agent = self.agents[agentIndex] # Generate an observation of the state if 'observationFunction' in dir( agent ): try: self.mute() observation = agent.observationFunction(self.state.deepCopy()) self.unmute() except Exception,data: if not self.catchExceptions: raise data self.unmute() print "Exception",data self._agentCrash(agentIndex) return else: observation = self.state.deepCopy() # Solicit an action startTime = time.time() try: self.mute() action = agent.getAction( observation ) self.unmute() self.moveHistory.append( (agentIndex, action) ) except Exception,data: if not self.catchExceptions: raise data self.unmute() print "Exception", data self._agentCrash(agentIndex) return if 'checkTime' in dir(self.rules): self.rules.checkTime(time.time() - startTime) # Execute the action self.state = self.state.generateSuccessor( agentIndex, action ) # Change the display self.display.update( self.state.data ) ###idx = agentIndex - agentIndex % 2 + 1 ###self.display.update( self.state.makeObservation(idx).data ) # Allow for game specific conditions (winning, losing, etc.) self.rules.process(self.state, self) # Track progress if agentIndex == numAgents + 1: self.numMoves += 1 # Next agent agentIndex = ( agentIndex + 1 ) % numAgents if _BOINC_ENABLED: boinc.set_fraction_done(self.getProgress())
def run( self ): """ Main control loop for game play. """ self.display.initialize(self.state.data) self.numMoves = 0 ###self.display.initialize(self.state.makeObservation(1).data) # inform learning agents of the game start for i in range(len(self.agents)): agent = self.agents[i] if not agent: # this is a null agent, meaning it failed to load # the other team wins self._agentCrash(i, quiet=True) return if ("registerInitialState" in dir(agent)): self.mute() if self.catchExceptions: try: timed_func = TimeoutFunction(agent.registerInitialState, int(self.rules.getMaxStartupTime(i))) try: start_time = time.time() timed_func(self.state.deepCopy()) time_taken = time.time() - start_time self.totalAgentTimes[i] += time_taken except(TimeoutFunctionException): print("Agent %d ran out of time on startup!" % i) self.unmute() self.agentTimeout = True self._agentCrash(i, quiet=True) return except(Exception,data): self.unmute() self._agentCrash(i, quiet=True) return else: agent.registerInitialState(self.state.deepCopy()) ## TODO: could this exceed the total time self.unmute() agentIndex = self.startingIndex numAgents = len( self.agents ) while not self.gameOver: # Fetch the next agent agent = self.agents[agentIndex] move_time = 0 skip_action = False # Generate an observation of the state if 'observationFunction' in dir( agent ): self.mute() if self.catchExceptions: try: timed_func = TimeoutFunction(agent.observationFunction, int(self.rules.getMoveTimeout(agentIndex))) try: start_time = time.time() observation = timed_func(self.state.deepCopy()) except(TimeoutFunctionException): skip_action = True move_time += time.time() - start_time self.unmute() except(Exception,data): self.unmute() self._agentCrash(agentIndex, quiet=True) return else: observation = agent.observationFunction(self.state.deepCopy()) self.unmute() else: observation = self.state.deepCopy() # Solicit an action action = None self.mute() if self.catchExceptions: try: timed_func = TimeoutFunction(agent.getAction, int(self.rules.getMoveTimeout(agentIndex)) - int(move_time)) try: start_time = time.time() if skip_action: raise(TimeoutFunctionException()) action = timed_func( observation ) except(TimeoutFunctionException): print("Agent %d timed out on a single move!" % agentIndex) self.agentTimeout = True self.unmute() self._agentCrash(agentIndex, quiet=True) return move_time += time.time() - start_time if move_time > self.rules.getMoveWarningTime(agentIndex): self.totalAgentTimeWarnings[agentIndex] += 1 print("Agent %d took too long to make a move! This is warning %d" % (agentIndex, self.totalAgentTimeWarnings[agentIndex])) if self.totalAgentTimeWarnings[agentIndex] > self.rules.getMaxTimeWarnings(agentIndex): print("Agent %d exceeded the maximum number of warnings: %d" % (agentIndex, self.totalAgentTimeWarnings[agentIndex])) self.agentTimeout = True self.unmute() self._agentCrash(agentIndex, quiet=True) self.totalAgentTimes[agentIndex] += move_time #print("Agent: %d, time: %f, total: %f" % (agentIndex, move_time, self.totalAgentTimes[agentIndex])) if self.totalAgentTimes[agentIndex] > self.rules.getMaxTotalTime(agentIndex): print("Agent %d ran out of time! (time: %1.2f)" % (agentIndex, self.totalAgentTimes[agentIndex])) self.agentTimeout = True self.unmute() self._agentCrash(agentIndex, quiet=True) return self.unmute() except(Exception,data): self.unmute() self._agentCrash(agentIndex) return else: action = agent.getAction(observation) self.unmute() # Execute the action self.moveHistory.append( (agentIndex, action) ) if self.catchExceptions: try: self.state = self.state.generateSuccessor( agentIndex, action ) except(Exception,data): self._agentCrash(agentIndex) return else: self.state = self.state.generateSuccessor( agentIndex, action ) # Change the display self.display.update( self.state.data ) ###idx = agentIndex - agentIndex % 2 + 1 ###self.display.update( self.state.makeObservation(idx).data ) # Allow for game specific conditions (winning, losing, etc.) self.rules.process(self.state, self) # Track progress if agentIndex == numAgents + 1: self.numMoves += 1 # Next agent agentIndex = ( agentIndex + 1 ) % numAgents if _BOINC_ENABLED: boinc.set_fraction_done(self.getProgress()) # inform a learning agent of the game result for agent in self.agents: if "final" in dir( agent ) : try: self.mute() agent.final( self.state ) self.unmute() except(Exception,data): if not self.catchExceptions: raise self.unmute() print("Exception",data) self._agentCrash(agent.index) return self.display.finish()
def run(self): """ Main control loop for game play. """ self.display.initialize(self.state.data) self.numMoves = 0 ###self.display.initialize(self.state.makeObservation(1).data) # inform learning agents of the game start for i in range(len(self.agents)): agent = self.agents[i] if not agent: self.mute(i) # this is a null agent, meaning it failed to load # the other team wins print("Agent %d failed to load" % i, file=sys.stderr) self.unmute() self._agentCrash(i, quiet=True) return if ("registerInitialState" in dir(agent)): self.mute(i) if self.catchExceptions: try: timed_func = TimeoutFunction( agent.registerInitialState, int(self.rules.getMaxStartupTime(i))) try: start_time = time.time() timed_func(self.state.deepCopy()) time_taken = time.time() - start_time self.totalAgentTimes[i] += time_taken except TimeoutFunctionException: print("Agent %d ran out of time on startup!" % i, file=sys.stderr) self.unmute() self.agentTimeout = True self._agentCrash(i, quiet=True) return except Exception as data: self._agentCrash(i, quiet=False) self.unmute() return else: agent.registerInitialState(self.state.deepCopy()) ## TODO: could this exceed the total time self.unmute() "Import classes from busteresAgents" from bustersAgents import BasicAgentAA from bustersAgents import BustersAgent from keyboardAgents import KeyboardAgent import os.path import arff from os import path "Check if .arff file exists, if yes than just open it, if no, than create" if path.exists("all_data_pacman.arff") is True: print('Results file exists') f = open("all_data_pacman.arff", "a") d = list() else: f = open("all_data_pacman.arff", "a") f.write( "@relation Example1 \n@attribute PosX numeric \n@attribute PosY numeric \n@attribute isNorthLegal {True, False} \n@attribute isEastLegal {True, False} \n@attribute isSouthLegal {True, False} \n@attribute isWestLegal {True, False} \n@attribute Direction {Stop, North, South, East, West} \n@attribute Ghost1X numeric \n@attribute Ghost1Y numeric \n@attribute Ghost1Distance numeric \n@attribute Ghost2X numeric \n@attribute Ghost2Y numeric \n@attribute Ghost2Distance numeric \n@attribute Ghost3X numeric \n@attribute Ghost3Y numeric \n@attribute Ghost3Distance numeric \n@attribute Ghost4X numeric \n@attribute Ghost4Y numeric \n@attribute Ghost4Distance numeric \n@attribute NumberOfDots numeric \n@attribute NearestDotDistance numeric \n@attribute CurrentScore numeric \n@attribute FutureScore numeric \n@attribute FutureDirection {Stop, North, South, East, West} \n@data \n" ) print('Created new results file') agentIndex = self.startingIndex numAgents = len(self.agents) step = 0 while not self.gameOver: # Fetch the next agent agent = self.agents[agentIndex] move_time = 0 skip_action = False # Generate an observation of the state if 'observationFunction' in dir(agent): self.mute(agentIndex) if self.catchExceptions: try: timed_func = TimeoutFunction( agent.observationFunction, int(self.rules.getMoveTimeout(agentIndex))) try: start_time = time.time() observation = timed_func(self.state.deepCopy()) except TimeoutFunctionException: skip_action = True move_time += time.time() - start_time self.unmute() except Exception as data: self._agentCrash(agentIndex, quiet=False) self.unmute() return else: observation = agent.observationFunction( self.state.deepCopy()) self.unmute() else: observation = self.state.deepCopy() # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!! Change it if agentIndex == self.startingIndex: if step >= 1: f.write(str(observation.getScore()) + ',') f.write( str(observation.data.agentStates[0].getDirection()) + '\n') g = BasicAgentAA.printLineData(self, observation) if g: for i in g: if i == None: i = '?' f.write(str(i) + ',') # Solicit an action action = None step += 1 self.mute(agentIndex) if self.catchExceptions: try: timed_func = TimeoutFunction( agent.getAction, int(self.rules.getMoveTimeout(agentIndex)) - int(move_time)) try: start_time = time.time() if skip_action: raise TimeoutFunctionException() action = timed_func(observation) except TimeoutFunctionException: print("Agent %d timed out on a single move!" % agentIndex, file=sys.stderr) self.agentTimeout = True self._agentCrash(agentIndex, quiet=True) self.unmute() return move_time += time.time() - start_time if move_time > self.rules.getMoveWarningTime(agentIndex): self.totalAgentTimeWarnings[agentIndex] += 1 print( "Agent %d took too long to make a move! This is warning %d" % (agentIndex, self.totalAgentTimeWarnings[agentIndex]), file=sys.stderr) if self.totalAgentTimeWarnings[ agentIndex] > self.rules.getMaxTimeWarnings( agentIndex): print( "Agent %d exceeded the maximum number of warnings: %d" % (agentIndex, self.totalAgentTimeWarnings[agentIndex]), file=sys.stderr) self.agentTimeout = True self._agentCrash(agentIndex, quiet=True) self.unmute() return self.totalAgentTimes[agentIndex] += move_time #print "Agent: %d, time: %f, total: %f" % (agentIndex, move_time, self.totalAgentTimes[agentIndex]) if self.totalAgentTimes[ agentIndex] > self.rules.getMaxTotalTime( agentIndex): print("Agent %d ran out of time! (time: %1.2f)" % (agentIndex, self.totalAgentTimes[agentIndex]), file=sys.stderr) self.agentTimeout = True self._agentCrash(agentIndex, quiet=True) self.unmute() return self.unmute() except Exception as data: self._agentCrash(agentIndex) self.unmute() return else: action = agent.getAction(observation) self.unmute() # Execute the action self.moveHistory.append((agentIndex, action)) if self.catchExceptions: try: self.state = self.state.generateSuccessor( agentIndex, action) except Exception as data: self.mute(agentIndex) self._agentCrash(agentIndex) self.unmute() return else: self.state = self.state.generateSuccessor(agentIndex, action) # Change the display self.display.update(self.state.data) ###idx = agentIndex - agentIndex % 2 + 1 ###self.display.update( self.state.makeObservation(idx).data ) # Allow for game specific conditions (winning, losing, etc.) self.rules.process(self.state, self) # Track progress if agentIndex == numAgents + 1: self.numMoves += 1 # Next agent agentIndex = (agentIndex + 1) % numAgents if _BOINC_ENABLED: boinc.set_fraction_done(self.getProgress()) for agentIndex, agent in enumerate(self.agents): if "final" in dir(agent): try: self.mute(agentIndex) agent.final(self.state) self.unmute() except Exception as data: if not self.catchExceptions: raise self._agentCrash(agentIndex) self.unmute() return f.write("0" + ',' + 'Stop' + '\n') f.close() self.display.finish()
class Game: """ The Game manages the control flow, soliciting actions from agents. """ def __init__(self, agents, display, rules, startingIndex=0, muteAgents=False, catchExceptions=False): self.agentCrashed = False self.agents = agents self.display = display self.rules = rules self.startingIndex = startingIndex self.gameOver = False self.muteAgents = muteAgents self.catchExceptions = catchExceptions self.moveHistory = [] self.totalAgentTimes = [0 for agent in agents] self.totalAgentTimeWarnings = [0 for agent in agents] self.agentTimeout = False import cStringIO self.agentOutput = [cStringIO.StringIO() for agent in agents] def getProgress(self): if self.gameOver: return 1.0 else: return self.rules.getProgress(self) def _agentCrash(self, agentIndex, quiet=False): "Helper method for handling agent crashes" if not quiet: traceback.print_exc() self.gameOver = True self.agentCrashed = True self.rules.agentCrash(self, agentIndex) OLD_STDOUT = None OLD_STDERR = None def mute(self, agentIndex): if not self.muteAgents: return global OLD_STDOUT, OLD_STDERR import cStringIO OLD_STDOUT = sys.stdout OLD_STDERR = sys.stderr sys.stdout = self.agentOutput[agentIndex] sys.stderr = self.agentOutput[agentIndex] def unmute(self): if not self.muteAgents: return global OLD_STDOUT, OLD_STDERR # Revert stdout/stderr to originals sys.stdout = OLD_STDOUT sys.stderr = OLD_STDERR def run(self): """ Main control loop for game play. """ self.display.initialize(self.state.data) self.numMoves = 0 ###self.display.initialize(self.state.makeObservation(1).data) # inform learning agents of the game start for i in range(len(self.agents)): agent = self.agents[i] if not agent: self.mute(i) # this is a null agent, meaning it failed to load # the other team wins print >> sys.stderr, "Agent %d failed to load" % i self.unmute() self._agentCrash(i, quiet=True) return if ("registerInitialState" in dir(agent)): self.mute(i) if self.catchExceptions: try: timed_func = TimeoutFunction( agent.registerInitialState, int(self.rules.getMaxStartupTime(i))) try: start_time = time.time() timed_func(self.state.deepCopy()) time_taken = time.time() - start_time self.totalAgentTimes[i] += time_taken except TimeoutFunctionException: print >> sys.stderr, "Agent %d ran out of time on startup!" % i self.unmute() self.agentTimeout = True self._agentCrash(i, quiet=True) return except Exception, data: self._agentCrash(i, quiet=False) self.unmute() return else: agent.registerInitialState(self.state.deepCopy()) ## TODO: could this exceed the total time self.unmute() agentIndex = self.startingIndex numAgents = len(self.agents) while not self.gameOver: # Fetch the next agent oneStepActionsList = [] for i in range(4): agent = self.agents[agentIndex] move_time = 0 skip_action = False # Generate an observation of the state if 'observationFunction' in dir(agent): self.mute(agentIndex) if self.catchExceptions: try: timed_func = TimeoutFunction( agent.observationFunction, int(self.rules.getMoveTimeout(agentIndex))) try: start_time = time.time() observation = timed_func(self.state.deepCopy()) except TimeoutFunctionException: skip_action = True move_time += time.time() - start_time self.unmute() except Exception, data: self._agentCrash(agentIndex, quiet=False) self.unmute() return else: observation = agent.observationFunction( self.state.deepCopy()) self.unmute() else: observation = self.state.deepCopy() # Solicit an action action = None self.mute(agentIndex) if self.catchExceptions: try: timed_func = TimeoutFunction( agent.getAction, int(self.rules.getMoveTimeout(agentIndex)) - int(move_time)) try: start_time = time.time() if skip_action: raise TimeoutFunctionException() action = timed_func(observation) except TimeoutFunctionException: print >> sys.stderr, "Agent %d timed out on a single move!" % agentIndex self.agentTimeout = True self._agentCrash(agentIndex, quiet=True) self.unmute() return move_time += time.time() - start_time if move_time > self.rules.getMoveWarningTime( agentIndex): self.totalAgentTimeWarnings[agentIndex] += 1 print >> sys.stderr, "Agent %d took too long to make a move! This is warning %d" % ( agentIndex, self.totalAgentTimeWarnings[agentIndex]) if self.totalAgentTimeWarnings[ agentIndex] > self.rules.getMaxTimeWarnings( agentIndex): print >> sys.stderr, "Agent %d exceeded the maximum number of warnings: %d" % ( agentIndex, self.totalAgentTimeWarnings[agentIndex]) self.agentTimeout = True self._agentCrash(agentIndex, quiet=True) self.unmute() return self.totalAgentTimes[agentIndex] += move_time #print "Agent: %d, time: %f, total: %f" % (agentIndex, move_time, self.totalAgentTimes[agentIndex]) if self.totalAgentTimes[ agentIndex] > self.rules.getMaxTotalTime( agentIndex): print >> sys.stderr, "Agent %d ran out of time! (time: %1.2f)" % ( agentIndex, self.totalAgentTimes[agentIndex]) self.agentTimeout = True self._agentCrash(agentIndex, quiet=True) self.unmute() return self.unmute() except Exception, data: self._agentCrash(agentIndex) self.unmute() return else: action = agent.getAction(observation) self.unmute() # Execute the action self.moveHistory.append((agentIndex, action)) oneStepActionsList.append((agentIndex, action)) ''' if self.catchExceptions: try: self.state = self.state.generateSuccessor( agentIndex, action ) except Exception,data: self.mute(agentIndex) self._agentCrash(agentIndex) self.unmute() return else: self.state = self.state.generateSuccessor( agentIndex, action ) ''' # Change the display # self.display.update( self.state.data ) ###idx = agentIndex - agentIndex % 2 + 1 ###self.display.update( self.state.makeObservation(idx).data ) # Allow for game specific conditions (winning, losing, etc.) #self.rules.process(self.state, self) # Track progress if agentIndex == numAgents + 1: self.numMoves += 1 # Next agent agentIndex = (agentIndex + 1) % numAgents if _BOINC_ENABLED: boinc.set_fraction_done(self.getProgress())
def run(self): self.display.initialize(self.state.data) self.num_moves = 0 # self.display.initialize(self.state.makeObservation(1).data) # Inform learning agents of the game start. for i in range(len(self.agents)): agent = self.agents[i] if not agent: self.mute(i) sys.stderr.write("Agent %d failed to load" % i) self.unmute() self._agent_crash(i, quiet=True) return if "register_initial_state" in dir(agent): self.mute(i) if self.catch_exceptions: try: timed_func = TimeoutFunction( agent.register_initial_state, int(self.rules.get_max_startup_time(i))) try: start_time = time.time() timed_func(self.state.deep_copy()) time_taken = time.time() - start_time self.total_agent_times[i] += time_taken except TimeoutFunctionException: sys.stderr.write( "Agent %d ran out of time on startup!\n" % i) self.unmute() self.agent_timeout = True self._agent_crash(i, quiet=True) return except Exception: self._agent_crash(i, quiet=False) self.unmute() return else: agent.register_initial_state(self.state.deep_copy()) self.unmute() agent_index = self.starting_index num_agents = len(self.agents) while not self.game_over: # Fetch the next agent. agent = self.agents[agent_index] move_time = 0 skip_action = False # Generate an observation of the state. if 'observation_function' in dir(agent): self.mute(agent_index) if self.catch_exceptions: try: timed_func = TimeoutFunction( agent.observation_function, int(self.rules.get_move_timeout(agent_index))) try: start_time = time.time() observation = timed_func(self.state.deep_copy()) except TimeoutFunctionException: skip_action = True move_time += time.time() - start_time self.unmute() except Exception: self._agent_crash(agent_index, quiet=False) self.unmute() return else: observation = agent.observation_function( self.state.deep_copy()) self.unmute() else: observation = self.state.deep_copy() # Solicit an action. action = None self.mute(agent_index) if self.catch_exceptions: try: timed_func = TimeoutFunction( agent.get_action, int(self.rules.get_move_timeout(agent_index)) - int(move_time)) try: start_time = time.time() if skip_action: raise TimeoutFunctionException() action = timed_func(observation) except TimeoutFunctionException: sys.stderr.write( "Agent %d timed out on a single move!\n" % agent_index) self.agent_timeout = True self._agent_crash(agent_index, quiet=True) self.unmute() return move_time += time.time() - start_time if move_time > self.rules.get_move_warning_time( agent_index): self.total_agent_time_warnings[agent_index] += 1 sys.stderr.write( "Agent %d took too long to make a move! This is warning %d\n" % (agent_index, self.total_agent_time_warnings[agent_index])) if self.total_agent_time_warnings[ agent_index] > self.rules.get_max_time_warnings( agent_index): sys.stderr.write( "Agent %d exceeded the maximum number of warnings: %d\n" % (agent_index, self.total_agent_time_warnings[agent_index])) self.agent_timeout = True self._agent_crash(agent_index, quiet=True) self.unmute() return self.total_agent_times[agent_index] += move_time if self.total_agent_times[ agent_index] > self.rules.get_max_total_time( agent_index): sys.stderr.write( "Agent %d ran out of time! (time: %1.2f)\n" % (agent_index, self.total_agent_times[agent_index])) self.agent_timeout = True self._agent_crash(agent_index, quiet=True) self.unmute() return self.unmute() except Exception as data: self._agent_crash(agent_index) self.unmute() return else: action = agent.get_action(observation) self.unmute() # Execute the action. self.move_history.append((agent_index, action)) if self.catch_exceptions: try: self.state = self.state.generate_successor( agent_index, action) except Exception: self.mute(agent_index) self._agent_crash(agent_index) self.unmute() return else: self.state = self.state.generate_successor(agent_index, action) # Change the display. self.display.update(self.state.data) # idx = agentIndex - agentIndex % 2 + 1 # self.display.update( self.state.makeObservation(idx).data ) # Allow for game specific conditions (winning, losing, etc). self.rules.process(self.state, self) # Track progress. if agent_index == num_agents + 1: self.num_moves += 1 # Next agent. agent_index = (agent_index + 1) % num_agents if _BOINC_ENABLED: boinc.set_fraction_done(self.get_progress()) # Inform a learning agent of the game result. for agent_index, agent in enumerate(self.agents): if "final" in dir(agent): try: self.mute(agent_index) agent.final(self.state) self.unmute() except Exception: if not self.catch_exceptions: raise self._agent_crash(agent_index) self.unmute() return self.display.finish()
def run(self): """ Main control loop for game play. """ self.inform_agent_about_start() self.display.initialize(self.state.data) self.numMoves = 0 for i in range(len(self.agents)): agent = self.agents[i] if not agent: self.mute(i) # this is a null agent, meaning it failed to load # the other team wins print >> sys.stderr, "Agent %d failed to load" % i self.unmute() self._agentCrash(i, quiet=True) return agentIndex = self.startingIndex numAgents = len(self.agents) pool = Pool(processes=4) while not self.gameOver: # Fetch the next agent # agent = self.agents[agentIndex] # move_time = 0 # skip_action = False # Generate an observation of the state # observation = self.state.deepCopy() # Solicit an action action = None self.mute(agentIndex) gameStateDTO = PublicGameState(self.state.deepCopy()) myFutureResults = pool.imap_unordered( choose_action, [(x, gameStateDTO, self.agents[x].ip_address) for x in range(numAgents)]) myResults = [] for agentIndex in range(numAgents): try: myResults.append(myFutureResults.next(1)) except Exception as e: print("Agent %d reached timeout" % agentIndex) myResults.append( (agentIndex, json.dumps( random.choice( self.state.getLegalActions(agentIndex))))) for myResult in myResults: agentIndex = myResult[0] try: action = json.loads(myResult[1]) except Exception as dat: print("Have to choose a random action for agent %d" % agentIndex) action = random.choice( self.state.getLegalActions(agentIndex)) # Execute the action self.moveHistory.append((agentIndex, action)) if self.catchExceptions: try: self.state = self.state.generateSuccessor( agentIndex, action) except Exception as data: self.mute(agentIndex) self._agentCrash(agentIndex) self.unmute() return else: self.state = self.state.generateSuccessor( agentIndex, action) # Change the display self.display.update(self.state.data) # Allow for game specific conditions (winning, losing, etc.) self.rules.process(self.state, self) # Track progress self.numMoves += 1 # Allow for game specific conditions (winning, losing, etc.) self.rules.process(self.state, self) # Track progress self.numMoves += 1 # Next agent if _BOINC_ENABLED: boinc.set_fraction_done(self.getProgress()) # inform a learning agent of the game result for agentIndex, agent in enumerate(self.agents): if "final" in dir(agent): try: self.mute(agentIndex) agent.final(self.state) self.unmute() except Exception as data: if not self.catchExceptions: raise self._agentCrash(agentIndex) self.unmute() return score = self.state.data.score if score == 0: messagebox.showinfo("No Winner", "There is no Winner") if score > 0: messagebox.showinfo("Red Wins", "Team Red wins with score: " + str(score)) if score < 0: messagebox.showinfo( "Blue Wins", "Team Blue wins with score: " + str(score * -1)) self.display.finish()
# game.py
def run(self): """ Main control loop for game play. """ self.display.initialize(self.state.data) self.numMoves = 0 from graphicsDisplay import PacmanGraphics if self.createPolicy and 'initPolicy' in dir(self.display): policy = self.display.initPolicy(self, 0) # assume 0 is player return policy, self ###self.display.initialize(self.state.makeObservation(1).data) # inform learning agents of the game start for i in range(len(self.agents)): agent = self.agents[i] if not agent: self.mute(i) # this is a null agent, meaning it failed to load # the other team wins print >> sys.stderr, "Agent %d failed to load" % i self.unmute() self._agentCrash(i, quiet=True) return if ("registerInitialState" in dir(agent)): agent.registerInitialState(self.state.deepCopy()) agentIndex = self.startingIndex numAgents = len(self.agents) while not self.gameOver: # Fetch the next agent agent = self.agents[agentIndex] move_time = 0 # Generate an observation of the state if 'observationFunction' in dir(agent): observation = agent.observationFunction(self.state.deepCopy()) else: observation = self.state.deepCopy() # Solicit an action action = None while action == None: action = agent.getAction(observation) time.sleep(0.1) # Execute the action self.moveHistory.append((agentIndex, action)) self.state = self.state.generateSuccessor(agentIndex, action) # Change the display self.display.update(self.state.data) ###idx = agentIndex - agentIndex % 2 + 1 ###self.display.update( self.state.makeObservation(idx).data ) # Allow for game specific conditions (winning, losing, etc.) self.rules.process(self.state, self) # Track progress if agentIndex == numAgents + 1: self.numMoves += 1 # Next agent agentIndex = (agentIndex + 1) % numAgents if _BOINC_ENABLED: boinc.set_fraction_done(self.getProgress()) # inform a learning agent of the game result for agentIndex, agent in enumerate(self.agents): if "final" in dir(agent): try: self.mute(agentIndex) agent.final(self.state) self.unmute() except Exception, data: raise self._agentCrash(agentIndex) self.unmute() return
def run( self ): """ Main control loop for game play. """ self.display.initialize(self.state.data) self.numMoves = 0 ###self.display.initialize(self.state.makeObservation(1).data) # inform learning agents of the game start for i in range(len(self.agents)): agent = self.agents[i] if not agent: self.mute(i) # this is a null agent, meaning it failed to load # the other team wins print("Agent %d failed to load" % i, file=sys.stderr) self.unmute() self._agentCrash(i, quiet=True) return if ("registerInitialState" in dir(agent)): self.mute(i) if self.catchExceptions: try: timed_func = TimeoutFunction(agent.registerInitialState, int(self.rules.getMaxStartupTime(i))) try: start_time = time.time() timed_func(self.state.deepCopy()) time_taken = time.time() - start_time self.totalAgentTimes[i] += time_taken except TimeoutFunctionException: print("Agent %d ran out of time on startup!" % i, file=sys.stderr) self.unmute() self.agentTimeout = True self._agentCrash(i, quiet=True) return except Exception as data: self._agentCrash(i, quiet=False) self.unmute() return else: agent.registerInitialState(self.state.deepCopy()) ## TODO: could this exceed the total time self.unmute() # print("registered...") #whoami agentIndex = self.startingIndex numAgents = len( self.agents ) maihoonnaa=1 #whoami while not self.gameOver: # Fetch the next agent agent = self.agents[agentIndex] move_time = 0 skip_action = False # Generate an observation of the state if 'observationFunction' in dir( agent ): self.mute(agentIndex) if self.catchExceptions: try: timed_func = TimeoutFunction(agent.observationFunction, int(self.rules.getMoveTimeout(agentIndex))) try: start_time = time.time() observation = timed_func(self.state.deepCopy()) except TimeoutFunctionException: skip_action = True move_time += time.time() - start_time self.unmute() except Exception as data: self._agentCrash(agentIndex, quiet=False) self.unmute() return else: observation = agent.observationFunction(self.state.deepCopy()) self.unmute() else: observation = self.state.deepCopy() # Solicit an action action = None self.mute(agentIndex) if self.catchExceptions: try: # timed_func = TimeoutFunction(agent.getAction, int(self.rules.getMoveTimeout(agentIndex)) - int(move_time)) timed_func = TimeoutFunction(agent.getAction, int(self.state.data.score)) try: start_time = time.time() if skip_action: raise TimeoutFunctionException() action = timed_func( observation ) except TimeoutFunctionException: print("Agent %d timed out on a single move!" % agentIndex, file=sys.stderr) self.agentTimeout = True self._agentCrash(agentIndex, quiet=True) self.unmute() return move_time += time.time() - start_time # if move_time > self.rules.getMoveWarningTime(agentIndex): # self.totalAgentTimeWarnings[agentIndex] += 1 # print("Agent %d took too long to make a move! This is warning %d" % (agentIndex, self.totalAgentTimeWarnings[agentIndex]), file=sys.stderr) # if self.totalAgentTimeWarnings[agentIndex] > self.rules.getMaxTimeWarnings(agentIndex): # print("Agent %d exceeded the maximum number of warnings: %d" % (agentIndex, self.totalAgentTimeWarnings[agentIndex]), file=sys.stderr) # self.agentTimeout = True # self._agentCrash(agentIndex, quiet=True) # self.unmute() # return self.totalAgentTimes[agentIndex] += move_time #print("Agent: %d, time: %f, total: %f" % (agentIndex, move_time, self.totalAgentTimes[agentIndex])) # if self.totalAgentTimes[agentIndex] > self.rules.getMaxTotalTime(agentIndex): # print("Agent %d ran out of time! (time: %1.2f)" % (agentIndex, self.totalAgentTimes[agentIndex]), file=sys.stderr) # self.agentTimeout = True # self._agentCrash(agentIndex, quiet=True) # self.unmute() # return self.unmute() except Exception as data: self._agentCrash(agentIndex) self.unmute() return else: # try: # timed_func = TimeoutFunction(agent.getAction, int(math.ceil(self.state.data.score / (SCALING_FACTOR+1)))) # try: # start_time = time.time() # action = timed_func(observation) # except TimeoutFunctionException: # print('You have run out of compute time! You exceeded {:.3f}s of compute'.format(self.state.data.score / (SCALING_FACTOR+1))) # self.state.data.score = 0 # self.state.data._lose = True # self.rules.process(self.state, self) # continue # # except: # # print('Your agent crashed!') # # self.state.data.score = 0 # # self.state.data._lose = True # # self.rules.process(self.state, self) # # continue # move_time = time.time() - start_time action = agent.getAction(self.state.deepCopy()) assert(action in self.state.getLegalActions(agentIndex)), str(self.state) + " "+ str(self.state.getLegalActions(agentIndex)) +" "+str(action) +" " + str(agentIndex) #whoami self.unmute() self.state.data.score += 0 #whoami max(0,-1 * SCALING_FACTOR) # if self.state.data.score <= 0: # self.state.data.score = 0 # self.state.data._lose = True # self.rules.process(self.state, self) # continue #whoami # if self.state.data.deathCount >= 2: # self.state.data._lose = True # self.rules.process(self.state, self) # continue # #whoami # Execute the action self.moveHistory.append( (agentIndex, action) ) if self.catchExceptions: try: self.state = self.state.generateSuccessor( agentIndex, action ) except Exception as data: self.mute(agentIndex) self._agentCrash(agentIndex) self.unmute() return else: self.state = self.state.generateSuccessor( agentIndex, action ) # Change the display self.display.update( self.state.data ) ###idx = agentIndex - agentIndex % 2 + 1 ###self.display.update( self.state.makeObservation(idx).data ) # Allow for game specific conditions (winning, losing, etc.) # print("death counter now ",self.state.data.deathCount, agentIndex)#whoami self.rules.process(self.state, self) # print("step ",maihoonnaa,"...",agentIndex) #whoami maihoonnaa+=1#whoami # Track progress if agentIndex == numAgents + 1: self.numMoves += 1 # Next agent agentIndex = ( agentIndex + 1 ) % numAgents if _BOINC_ENABLED: boinc.set_fraction_done(self.getProgress()) # inform a learning agent of the game result for agentIndex, agent in enumerate(self.agents): if "final" in dir( agent ) : try: self.mute(agentIndex) agent.final( self.state ) self.unmute() except Exception as data: if not self.catchExceptions: raise data self._agentCrash(agentIndex) self.unmute() return # print("terminal...") #whoami self.display.finish()
def run(self): """ Main control loop for game play. """ self.display.initialize(self.state.data) self.numMoves = 0 ###self.display.initialize(self.state.makeObservation(1).data) # inform learning agents of the game start for i in range(len(self.agents)): agent = self.agents[i] if not agent: self.mute(i) # this is a null agent, meaning it failed to load # the other team wins print("Agent %d failed to load" % i, file=sys.stderr) self.unmute() self._agentCrash(i, quiet=True) return if ("registerInitialState" in dir(agent)): self.mute(i) if self.catchExceptions: try: timed_func = TimeoutFunction( agent.registerInitialState, int(self.rules.getMaxStartupTime(i))) try: start_time = time.time() timed_func(self.state.deepCopy()) time_taken = time.time() - start_time self.totalAgentTimes[i] += time_taken except TimeoutFunctionException: print("Agent %d ran out of time on startup!" % i, file=sys.stderr) self.unmute() self.agentTimeout = True self._agentCrash(i, quiet=True) return except Exception as data: self._agentCrash(i, quiet=False) self.unmute() return else: agent.registerInitialState(self.state.deepCopy()) ## TODO: could this exceed the total time self.unmute() "Import classes from busteresAgents" from bustersAgents import BasicAgentAA from bustersAgents import BustersAgent import os.path from os import path "Check if csv file exists, if yes than just open it, if no, than create" # assign header columns headerList = [ 'PositionX', 'PositionY', 'is North legal', 'is East legal', 'is South legal', 'is West legal', 'is Stop legal', 'Direction', 'Position', 'Ghost1 X', 'Ghost1 Y', 'Ghost2 X', 'Ghost2 Y', 'Ghost3 X', 'Ghost3 Y', 'Ghost4 X', 'Ghost4 Y' ] if path.exists("Results.csv") is True: print('Results file exists') csvfile = open('Results.csv', 'a', newline='') spamwriter = csv.writer(csvfile, delimiter=',', quotechar='|', quoting=csv.QUOTE_MINIMAL) spamwriter.writerow(['', '', '', '', '', '', '', '']) else: # open CSV file and assign header csvfile = open("Results.csv", 'w') dw = csv.DictWriter(csvfile, delimiter=',', fieldnames=headerList) dw.writeheader() print('Created new results file') agentIndex = self.startingIndex numAgents = len(self.agents) step = 0 while not self.gameOver: # Fetch the next agent agent = self.agents[agentIndex] move_time = 0 skip_action = False # Generate an observation of the state if 'observationFunction' in dir(agent): self.mute(agentIndex) if self.catchExceptions: try: timed_func = TimeoutFunction( agent.observationFunction, int(self.rules.getMoveTimeout(agentIndex))) try: start_time = time.time() observation = timed_func(self.state.deepCopy()) except TimeoutFunctionException: skip_action = True move_time += time.time() - start_time self.unmute() except Exception as data: self._agentCrash(agentIndex, quiet=False) self.unmute() return else: observation = agent.observationFunction( self.state.deepCopy()) self.unmute() else: observation = self.state.deepCopy() # Solicit an action action = None step += 1 self.mute(agentIndex) if self.catchExceptions: try: timed_func = TimeoutFunction( agent.getAction, int(self.rules.getMoveTimeout(agentIndex)) - int(move_time)) try: start_time = time.time() if skip_action: raise TimeoutFunctionException() action = timed_func(observation) except TimeoutFunctionException: print("Agent %d timed out on a single move!" % agentIndex, file=sys.stderr) self.agentTimeout = True self._agentCrash(agentIndex, quiet=True) self.unmute() return move_time += time.time() - start_time if move_time > self.rules.getMoveWarningTime(agentIndex): self.totalAgentTimeWarnings[agentIndex] += 1 print( "Agent %d took too long to make a move! This is warning %d" % (agentIndex, self.totalAgentTimeWarnings[agentIndex]), file=sys.stderr) if self.totalAgentTimeWarnings[ agentIndex] > self.rules.getMaxTimeWarnings( agentIndex): print( "Agent %d exceeded the maximum number of warnings: %d" % (agentIndex, self.totalAgentTimeWarnings[agentIndex]), file=sys.stderr) self.agentTimeout = True self._agentCrash(agentIndex, quiet=True) self.unmute() return self.totalAgentTimes[agentIndex] += move_time #print "Agent: %d, time: %f, total: %f" % (agentIndex, move_time, self.totalAgentTimes[agentIndex]) if self.totalAgentTimes[ agentIndex] > self.rules.getMaxTotalTime( agentIndex): print("Agent %d ran out of time! (time: %1.2f)" % (agentIndex, self.totalAgentTimes[agentIndex]), file=sys.stderr) self.agentTimeout = True self._agentCrash(agentIndex, quiet=True) self.unmute() return self.unmute() except Exception as data: self._agentCrash(agentIndex) self.unmute() return else: action = agent.getAction(observation) self.unmute() # Execute the action self.moveHistory.append((agentIndex, action)) if self.catchExceptions: try: self.state = self.state.generateSuccessor( agentIndex, action) except Exception as data: self.mute(agentIndex) self._agentCrash(agentIndex) self.unmute() return else: self.state = self.state.generateSuccessor(agentIndex, action) # Change the display self.display.update(self.state.data) ###idx = agentIndex - agentIndex % 2 + 1 ###self.display.update( self.state.makeObservation(idx).data ) # Allow for game specific conditions (winning, losing, etc.) self.rules.process(self.state, self) # Track progress if agentIndex == numAgents + 1: self.numMoves += 1 # Next agent agentIndex = (agentIndex + 1) % numAgents if _BOINC_ENABLED: boinc.set_fraction_done(self.getProgress()) #USE MY FUNCTION PRINT LINE FROM BASIC AGENT g = BasicAgentAA.printLineData(self, observation) "Writing results to file" spamwriter = csv.writer(csvfile, delimiter=',', quotechar='|', quoting=csv.QUOTE_MINIMAL) spamwriter.writerow(g) # inform a learning agent of the game result for agentIndex, agent in enumerate(self.agents): if "final" in dir(agent): try: self.mute(agentIndex) agent.final(self.state) self.unmute() except Exception as data: if not self.catchExceptions: raise self._agentCrash(agentIndex) self.unmute() return self.display.finish()
def run(self): """ Main control loop for game play. """ self.display.initialize(self.state.data) self.numMoves = 0 # Simple way to determine whether we are in phase 3 # as there will be a ghost phase3 = len(self.agents) > 2 initialBroadcast = None # Inform learning agents of the game start for i in range(len(self.agents)): agent = self.agents[i] #agent = None if not agent: self.mute(i) # If null agent, it failed to load. Terminate the game print("Agent %d failed to load" % i, file=sys.stderr) self.unmute() self._agentCrash(i, quiet=True) return # Hack to get broadcast across to index 1 agent # during registerInitialState for phase 2 if not phase3 and initialBroadcast is not None: agent.receivedInitialBroadcast = initialBroadcast initialBroadcast = None if ("registerInitialState" in dir(agent)): self.mute(i) # Timed register initial state if self.catchExceptions: try: timed_func = TimeoutFunction( agent.registerInitialState, int(self.rules.getMaxStartupTime(i))) try: start_time = time.time() timed_func(self.state.deepCopy()) time_taken = time.time() - start_time self.totalAgentTimes[i] += time_taken except TimeoutFunctionException: print("Agent %d ran out of time on startup!" % i, file=sys.stderr) self.unmute() self.agentTimeout = True self._agentCrash(i, quiet=True) return # Agent initialization times # print(self.totalAgentTimes) except Exception as data: self._agentCrash(i, quiet=False) self.unmute() return else: agent.registerInitialState(self.state.deepCopy()) # Part 2 of hack to get agent 0 to broadcast path # to agent 1 during registerInitialState for phase 2 if not phase3 and i == 0: initialBroadcast = agent.toInitialBroadcast assert initialBroadcast is None or all( [a in LEGAL_DIRECTIONS for a in initialBroadcast]) self.unmute() agentIndex = self.startingIndex numAgents = len(self.agents) while not self.gameOver: # Fetch the next agent agent = self.agents[agentIndex] move_time = 0 skip_action = False # Generate an observation of the state if 'observationFunction' in dir(agent): self.mute(agentIndex) if self.catchExceptions: try: timed_func = TimeoutFunction( agent.observationFunction, int(self.rules.getMoveTimeout(agentIndex))) try: start_time = time.time() observation = timed_func(self.state.deepCopy()) except TimeoutFunctionException: skip_action = True move_time += time.time() - start_time self.unmute() except Exception as data: self._agentCrash(agentIndex, quiet=False) self.unmute() return else: observation = agent.observationFunction( self.state.deepCopy()) self.unmute() else: observation = self.state.deepCopy() # Solicit an action action = None self.mute(agentIndex) if self.catchExceptions: try: timed_func = TimeoutFunction( agent.getAction, int(self.rules.getMoveTimeout(agentIndex)) - int(move_time)) try: start_time = time.time() if skip_action: raise TimeoutFunctionException() action = timed_func(observation) except TimeoutFunctionException: print("Agent %d timed out on a single move!" % agentIndex, file=sys.stderr) self.agentTimeout = True self._agentCrash(agentIndex, quiet=True) self.unmute() return move_time += time.time() - start_time if move_time > self.rules.getMoveWarningTime(agentIndex): self.totalAgentTimeWarnings[agentIndex] += 1 print( "Agent %d took too long to make a move! This is warning %d" % (agentIndex, self.totalAgentTimeWarnings[agentIndex]), file=sys.stderr) if self.totalAgentTimeWarnings[ agentIndex] > self.rules.getMaxTimeWarnings( agentIndex): print( "Agent %d exceeded the maximum number of warnings: %d" % (agentIndex, self.totalAgentTimeWarnings[agentIndex]), file=sys.stderr) self.agentTimeout = True self._agentCrash(agentIndex, quiet=True) self.unmute() return self.totalAgentTimes[agentIndex] += move_time #print "Agent: %d, time: %f, total: %f" % (agentIndex, move_time, self.totalAgentTimes[agentIndex]) if self.totalAgentTimes[ agentIndex] > self.rules.getMaxTotalTime( agentIndex): print("Agent %d ran out of time! (time: %1.2f)" % (agentIndex, self.totalAgentTimes[agentIndex]), file=sys.stderr) self.agentTimeout = True self._agentCrash(agentIndex, quiet=True) self.unmute() return self.unmute() except Exception as data: self._agentCrash(agentIndex) self.unmute() return else: action = agent.getAction(observation) # Get other pacman on the agent's team teammateList = [ self.agents[i] for i in agent.getTeam(observation) if i != agent.index ] # Only broadcast/receive broadcast if has teammates, i.e. is not a ghost if len(teammateList) > 0: otherPacman = teammateList[0] broadcast = agent.toBroadcast assert broadcast is None or all( [a in LEGAL_DIRECTIONS for a in broadcast]) otherPacman.receivedBroadcast = broadcast self.unmute() # Execute the action self.moveHistory.append((agentIndex, action)) if self.catchExceptions: try: self.state = self.state.generateSuccessor( agentIndex, action) except Exception as data: self.mute(agentIndex) self._agentCrash(agentIndex) self.unmute() return else: self.state = self.state.generateSuccessor(agentIndex, action) # Change the display self.display.update(self.state.data) # Allow for game specific conditions (winning, losing, etc.) self.rules.process(self.state, self) # Track progress if agentIndex == numAgents + 1: self.numMoves += 1 # Next agent agentIndex = (agentIndex + 1) % numAgents if _BOINC_ENABLED: boinc.set_fraction_done(self.getProgress()) # Inform a learning agent of the game result for agentIndex, agent in enumerate(self.agents): if "final" in dir(agent): try: self.mute(agentIndex) agent.final(self.state) self.unmute() except Exception as data: if not self.catchExceptions: raise self._agentCrash(agentIndex) self.unmute() return self.display.finish()
def run(self): """ Main control loop for game play. """ self.display.initialize(self.state.data) self.numMoves = 0 # self.display.initialize(self.state.makeObservation(1).data) # inform learning agents of the game start for i in range(len(self.agents)): agent = self.agents[i] if not agent: self.mute(i) # this is a null agent, meaning it failed to load # the other team wins print("Agent %d failed to load" % i, file=sys.stderr) self.unmute() self._agentCrash(i, quiet=True) return if ("registerInitialState" in dir(agent)): self.mute(i) if self.catchExceptions: try: timed_func = TimeoutFunction( agent.registerInitialState, int(self.rules.getMaxStartupTime(i))) try: start_time = time.time() timed_func(self.state.deepCopy()) time_taken = time.time() - start_time self.totalAgentTimes[i] += time_taken except TimeoutFunctionException: print("Agent %d ran out of time on startup!" % i, file=sys.stderr) self.unmute() self.agentTimeout = True self._agentCrash(i, quiet=True) return except Exception as data: self._agentCrash(i, quiet=False) self.unmute() return else: agent.registerInitialState(self.state.deepCopy()) # TODO: could this exceed the total time self.unmute() agentIndex = self.startingIndex numAgents = len(self.agents) while not self.gameOver: # Fetch the next agent agent = self.agents[agentIndex] move_time = 0 skip_action = False # Generate an observation of the state if 'observationFunction' in dir(agent): self.mute(agentIndex) if self.catchExceptions: try: timed_func = TimeoutFunction( agent.observationFunction, int(self.rules.getMoveTimeout(agentIndex))) try: start_time = time.time() observation = timed_func(self.state.deepCopy()) except TimeoutFunctionException: skip_action = True move_time += time.time() - start_time self.unmute() except Exception as data: self._agentCrash(agentIndex, quiet=False) self.unmute() return else: observation = agent.observationFunction( self.state.deepCopy()) self.unmute() else: observation = self.state.deepCopy() # Solicit an action action = None self.mute(agentIndex) if self.catchExceptions: try: timed_func = TimeoutFunction( agent.getAction, int(self.rules.getMoveTimeout(agentIndex)) - int(move_time)) try: start_time = time.time() if skip_action: raise TimeoutFunctionException() action = timed_func(observation) except TimeoutFunctionException: print("Agent %d timed out on a single move!" % agentIndex, file=sys.stderr) self.agentTimeout = True self._agentCrash(agentIndex, quiet=True) self.unmute() return move_time += time.time() - start_time if move_time > self.rules.getMoveWarningTime(agentIndex): self.totalAgentTimeWarnings[agentIndex] += 1 print( "Agent %d took too long to make a move! This is warning %d" % (agentIndex, self.totalAgentTimeWarnings[agentIndex]), file=sys.stderr) if self.totalAgentTimeWarnings[ agentIndex] > self.rules.getMaxTimeWarnings( agentIndex): print( "Agent %d exceeded the maximum number of warnings: %d" % (agentIndex, self.totalAgentTimeWarnings[agentIndex]), file=sys.stderr) self.agentTimeout = True self._agentCrash(agentIndex, quiet=True) self.unmute() return self.totalAgentTimes[agentIndex] += move_time # print "Agent: %d, time: %f, total: %f" % (agentIndex, move_time, self.totalAgentTimes[agentIndex]) if self.totalAgentTimes[ agentIndex] > self.rules.getMaxTotalTime( agentIndex): print("Agent %d ran out of time! (time: %1.2f)" % (agentIndex, self.totalAgentTimes[agentIndex]), file=sys.stderr) self.agentTimeout = True self._agentCrash(agentIndex, quiet=True) self.unmute() return self.unmute() except Exception as data: self._agentCrash(agentIndex) self.unmute() return else: action = agent.getAction(observation) self.unmute() # Execute the action self.moveHistory.append((agentIndex, action)) if self.catchExceptions: try: self.state = self.state.generateSuccessor( agentIndex, action) except Exception as data: self.mute(agentIndex) self._agentCrash(agentIndex) self.unmute() return else: self.state = self.state.generateSuccessor(agentIndex, action) # Change the display self.display.update(self.state.data) ###idx = agentIndex - agentIndex % 2 + 1 ###self.display.update( self.state.makeObservation(idx).data ) # Allow for game specific conditions (winning, losing, etc.) self.rules.process(self.state, self) # Track progress if agentIndex == numAgents + 1: self.numMoves += 1 # Next agent agentIndex = (agentIndex + 1) % numAgents if _BOINC_ENABLED: boinc.set_fraction_done(self.getProgress()) # inform a learning agent of the game result for agentIndex, agent in enumerate(self.agents): if "final" in dir(agent): try: self.mute(agentIndex) agent.final(self.state) self.unmute() except Exception as data: if not self.catchExceptions: raise self._agentCrash(agentIndex) self.unmute() return self.display.finish()
def run( self,EPISODES,callbacks=[],log_dir="" ): """ Main control loop for game play. """ self.agents[0].last_episode = EPISODES self.display.initialize(self.state.data) self.numMoves = 0 ###self.display.initialize(self.state.makeObservation(1).data) # inform learning agents of the game start for i in range(len(self.agents)): agent = self.agents[i] if not agent: self.mute(i) # this is a null agent, meaning it failed to load # the other team wins print >>sys.stderr, "Agent %d failed to load" % i self.unmute() self._agentCrash(i, quiet=True) return if ("registerInitialState" in dir(agent)): self.mute(i) if self.catchExceptions: try: timed_func = TimeoutFunction(agent.registerInitialState, int(self.rules.getMaxStartupTime(i))) try: start_time = time.time() timed_func(self.state.deepCopy()) time_taken = time.time() - start_time self.totalAgentTimes[i] += time_taken except TimeoutFunctionException: print >>sys.stderr, "Agent %d ran out of time on startup!" % i self.unmute() self.agentTimeout = True self._agentCrash(i, quiet=True) return except Exception: # aquí habia variable data self._agentCrash(i, quiet=False) self.unmute() return else: agent.registerInitialState(self.state.deepCopy()) ## TODO: could this exceed the total time self.unmute() agentIndex = self.startingIndex numAgents = len( self.agents ) # TODO dejo esto aquí pues es donde empieza self.agents[0].n =0 res = 0 m = 0 while not self.gameOver: # Fetch the next agent agent = self.agents[agentIndex] mov = 0 skip_action = False # Generate an observation of the state if 'observationFunction' in dir( agent ): self.mute(agentIndex) if self.catchExceptions: try: timed_func = TimeoutFunction(agent.observationFunction, int(self.rules.getMoveTimeout(agentIndex))) try: start_time = time.time() observation = timed_func(self.state.deepCopy()) except TimeoutFunctionException: skip_action = True move_time += time.time() - start_time self.unmute() except Exception: self._agentCrash(agentIndex, quiet=False) self.unmute() return else: observation = agent.observationFunction(self.state.deepCopy()) self.unmute() else: observation = self.state.deepCopy() # Solicit an action action = None self.mute(agentIndex) if self.catchExceptions: try: timed_func = TimeoutFunction(agent.getAction, int(self.rules.getMoveTimeout(agentIndex)) - int(move_time)) try: start_time = time.time() if skip_action: raise TimeoutFunctionException() action = timed_func( observation ) except TimeoutFunctionException: print >>sys.stderr, "Agent %d timed out on a single move!" % agentIndex self.agentTimeout = True self._agentCrash(agentIndex, quiet=True) self.unmute() return move_time += time.time() - start_time if move_time > self.rules.getMoveWarningTime(agentIndex): self.totalAgentTimeWarnings[agentIndex] += 1 print >>sys.stderr, "Agent %d took too long to make a move! This is warning %d" % (agentIndex, self.totalAgentTimeWarnings[agentIndex]) if self.totalAgentTimeWarnings[agentIndex] > self.rules.getMaxTimeWarnings(agentIndex): print >>sys.stderr, "Agent %d exceeded the maximum number of warnings: %d" % (agentIndex, self.totalAgentTimeWarnings[agentIndex]) self.agentTimeout = True self._agentCrash(agentIndex, quiet=True) self.unmute() return self.totalAgentTimes[agentIndex] += move_time #print "Agent: %d, time: %f, total: %f" % (agentIndex, move_time, self.totalAgentTimes[agentIndex]) if self.totalAgentTimes[agentIndex] > self.rules.getMaxTotalTime(agentIndex): print >>sys.stderr, "Agent %d ran out of time! (time: %1.2f)" % (agentIndex, self.totalAgentTimes[agentIndex]) self.agentTimeout = True self._agentCrash(agentIndex, quiet=True) self.unmute() return self.unmute() except Exception: self._agentCrash(agentIndex) self.unmute() return else: action = agent.getAction(observation) self.unmute() # Execute the action self.moveHistory.append( (agentIndex, action) ) if self.catchExceptions: try: self.state = self.state.generateSuccessor( agentIndex, action ) except Exception: self.mute(agentIndex) self._agentCrash(agentIndex) self.unmute() return else: self.state = self.state.generateSuccessor( agentIndex, action ) # Change the display self.display.update( self.state.data ) ###idx = agentIndex - agentIndex % 2 + 1 ###self.display.update( self.state.makeObservation(idx).data ) # Allow for game specific conditions (winning, losing, etc.) self.rules.process(self.state, self) # Track progress if agentIndex == numAgents + 1: self.numMoves += 1 # Next agent agentIndex = ( agentIndex + 1 ) % numAgents if _BOINC_ENABLED: boinc.set_fraction_done(self.getProgress()) res += 1/(m+1)*(self.agents[0].lastReward-res) m += 1 # print(f"Episodio: {EPISODES:d}") # inform a learning agent of the game result #TODO aquí aparecen cuando se acaban los juegos for agentIndex, agent in enumerate(self.agents): if "final" in dir( agent ) : try: self.mute(agentIndex) agent.final( self.state ) if not agent.prueba: agent.policy_second.update_policy(agent,callbacks,log_dir=log_dir) self.unmute() except Exception: if not self.catchExceptions: raise self._agentCrash(agentIndex) self.unmute() return self.display.finish() return self.agents[0].lastReward,self.agents[0].epsilon,self.agents[0].phi
self.state = self.state.generateSuccessor(agentIndex, action) # Change the display self.display.update(self.state.data) ###idx = agentIndex - agentIndex % 2 + 1 ###self.display.update( self.state.makeObservation(idx).data ) # Allow for game specific conditions (winning, losing, etc.) self.rules.process(self.state, self) # Track progress if agentIndex == numAgents + 1: self.numMoves += 1 # Next agent agentIndex = (agentIndex + 1) % numAgents if _BOINC_ENABLED: boinc.set_fraction_done(self.getProgress()) # inform a learning agent of the game result for agentIndex, agent in enumerate(self.agents): if "final" in dir(agent): try: self.mute(agentIndex) agent.final(self.state) self.unmute() except Exception, data: if not self.catchExceptions: raise self._agentCrash(agentIndex) self.unmute() return self.display.finish()
def run(self, replay, minibatch_size): #whoami """ Main control loop for game play. """ self.display.initialize(self.state.data) self.numMoves = 0 ###self.display.initialize(self.state.makeObservation(1).data) # inform learning agents of the game start for i in range(len(self.agents)): agent = self.agents[i] if not agent: self.mute(i) # this is a null agent, meaning it failed to load # the other team wins print("Agent %d failed to load" % i, file=sys.stderr) self.unmute() self._agentCrash(i, quiet=True) return if ("registerInitialState" in dir(agent)): self.mute(i) if self.catchExceptions: try: timed_func = TimeoutFunction( agent.registerInitialState, int(self.rules.getMaxStartupTime(i))) try: start_time = time.time() timed_func(self.state.deepCopy()) time_taken = time.time() - start_time self.totalAgentTimes[i] += time_taken except TimeoutFunctionException: print("Agent %d ran out of time on startup!" % i, file=sys.stderr) self.unmute() self.agentTimeout = True self._agentCrash(i, quiet=True) return except Exception as data: self._agentCrash(i, quiet=False) self.unmute() return else: agent.registerInitialState(self.state.deepCopy()) ## TODO: could this exceed the total time self.unmute() # print("registered...") #whoami #################### old_score = 100 ##################### # agentIndex = self.startingIndex # numAgents = len( self.agents ) maihoonnaa = 1 #whoami while not self.gameOver: # Fetch the next agent # agent = self.agents[agentIndex] # move_time = 0 # skip_action = False act_vect = [] action_list = [] reward_vect = [] tab_ka_state = self.state for i in range(len(self.agents)): action = self.agents[i].getAction(self.state.deepCopy()) assert (action in self.state.getLegalActions(i)), str( self.state) + " " + str(self.state.getLegalActions( i)) + " " + str(action) + " " + str(i) action_list.append(action) if (self.state.data.agentStates[i].isPacman): act_vect.append(action) # print(tab_ka_state,"tab_ka_state") ; print(action_list,"yahan") old_state = self.state.deepCopy() #print(self.state,"wahan") for i in range(len(self.agents)): if (self.state.data._win or self.state.data._lose): # self.state=self.state.generateSuccessor( i, action_list[i] ) #agentIndex, action # reward_vect.append(self.state.data.score-old_score) # old_score=self.state.data.score # print(self.state,"kahan2") # if(self.state.data.agentStates[i].isPacman): # # print("udhar",i) # reward_vect.append(0) # continue for o in range(len(reward_vect)): if (self.state.data._win): reward_vect[o] = 500 else: reward_vect[o] = -500 h = len(reward_vect) for o in range(self.state.data.numPacmanAgents - h): if (self.state.data._win): reward_vect.append(500) else: reward_vect.append(-500) break try: self.moveHistory.append((i, action_list[i])) self.state = self.state.generateSuccessor( i, action_list[i]) #agentIndex, action except Exception as e: if (str(e) == 'Can\'t generate a successor of a terminal state.' ): print("this shouldnt happen") raise Exception(str(e)) elif (str(e) != "Illegal ghost action "): print("this too shouldnt happen") raise Exception(str(e)) self.display.update(self.state.data) self.rules.process(self.state, self) if (self.state.data.agentStates[i].isPacman): # print("idhar",i) reward_vect.append(self.state.data.score - old_score) else: r = self.state.data.score - old_score ghostState = self.state.data.agentStates[i] ghostPosition = ghostState.configuration.getPosition() for j in range(self.state.data.numPacmanAgents): pacmanPosition = self.state.getPacmanPosition(j) if (manhattanDistance(ghostPosition, pacmanPosition) <= COLLISION_TOLERANCE): reward_vect[j] += (r / self.state.data.numPacmanAgents) old_score = self.state.data.score assert (len(reward_vect) == self.state.data.numPacmanAgents ), "reward vector flawed" + str(reward_vect) f = open("actions.txt", "a") f.write("replay ENTRY:\n") f.write(str(old_state.deepCopy()) + "\n") f.write( str(tuple(copy.deepcopy(act_vect))) + " " + str(tuple(copy.deepcopy(reward_vect))) + "\n") f.write(str(self.state.deepCopy()) + "\n") f.close() replay[(old_state.deepCopy(), tuple(copy.deepcopy(act_vect)), tuple(copy.deepcopy(reward_vect)), self.state.deepCopy())] = 1 #------------------------------------------------------ for i in range(len(self.agents)): holdit = replay.keys() points = np.random.choice( [nn for nn in range(len(list(holdit)))], min(minibatch_size, len(holdit)), False) samples = [] for t in range(points.shape[0]): samples.append( list(holdit)[points[t]]) #tuples are appended here for k in range(len(samples)): (s, a, r, s_n) = samples[k] a = list(a) r = list(r) agent = self.agents[i] #i=agentIndex # Generate an observation of the state if 'observationFunction' in dir(agent): self.mute(i) if self.catchExceptions: try: timed_func = TimeoutFunction( agent.observationFunction, int(self.rules.getMoveTimeout(agentIndex))) try: start_time = time.time() observation = timed_func( s_n.deepCopy(), s.deepCopy(), copy.deepcopy(r), copy.deepcopy(a), k) except TimeoutFunctionException: skip_action = True move_time += time.time() - start_time self.unmute() except Exception as data: self._agentCrash(agentIndex, quiet=False) self.unmute() return else: observation = agent.observationFunction( s_n.deepCopy(), s.deepCopy(), copy.deepcopy(r), copy.deepcopy(a), k) self.unmute() else: observation = self.state.deepCopy() if (self.state.data.agentStates[i].isPacman): agent.update_the_params(len(samples)) # # Generate an observation of the state # if 'observationFunction' in dir( agent ): # self.mute(i) # if self.catchExceptions: # try: # timed_func = TimeoutFunction(agent.observationFunction, int(self.rules.getMoveTimeout(agentIndex))) # try: # start_time = time.time() # observation = timed_func(s_n.deepCopy(),s.deepCopy(),copy.deepcopy(r),copy.deepcopy(a)) # except TimeoutFunctionException: # skip_action = True # move_time += time.time() - start_time # self.unmute() # except Exception as data: # self._agentCrash(agentIndex, quiet=False) # self.unmute() # return # else: # observation = agent.observationFunction(s_n.deepCopy(),s.deepCopy(),copy.deepcopy(r),copy.deepcopy(a)) # self.unmute() # else: # observation = self.state.deepCopy() ################################### REMOVED AREA # Change the display # self.display.update( self.state.data ) # ###idx = agentIndex - agentIndex % 2 + 1 # ###self.display.update( self.state.makeObservation(idx).data ) # # Allow for game specific conditions (winning, losing, etc.) # # print("death counter now ",self.state.data.deathCount, agentIndex)#whoami # self.rules.process(self.state, self) # print("maihoonnaa ",maihoonnaa,"...") #whoami maihoonnaa += 1 #whoami # Track progress # if agentIndex == numAgents + 1: self.numMoves += 1 # # Next agent # agentIndex = ( agentIndex + 1 ) % numAgents if _BOINC_ENABLED: boinc.set_fraction_done(self.getProgress()) # inform a learning agent of the game result for agentIndex, agent in enumerate(self.agents): if "final" in dir(agent): try: self.mute(agentIndex) #whoami # agent.final( self.state ) holdit = replay.keys() points = np.random.choice( [nn for nn in range(len(list(holdit)))], min(minibatch_size, len(holdit)), False) samples = [] for t in range(points.shape[0]): samples.append( list(holdit)[points[t]]) #tuples are appended here for k in range(len(samples)): (s, a, r, s_n) = samples[k] a = list(a) r = list(r) observation = agent.observationFunction( s_n.deepCopy(), s.deepCopy(), copy.deepcopy(r), copy.deepcopy(a), k) if (self.state.data.agentStates[agentIndex].isPacman): agent.final(s_n.deepCopy(), len(samples)) self.unmute() except Exception as data: if not self.catchExceptions: raise data self._agentCrash(agentIndex) self.unmute() return # print("terminal...") #whoami self.display.finish() return replay
self.state = self.state.generateSuccessor( agentIndex, action ) # Change the display self.display.update( self.state.data ) ###idx = agentIndex - agentIndex % 2 + 1 ###self.display.update( self.state.makeObservation(idx).data ) # Allow for game specific conditions (winning, losing, etc.) self.rules.process(self.state, self) # Track progress if agentIndex == numAgents + 1: self.numMoves += 1 # Next agent agentIndex = ( agentIndex + 1 ) % numAgents if _BOINC_ENABLED: boinc.set_fraction_done(self.getProgress()) # inform a learning agent of the game result for agent in self.agents: if "final" in dir( agent ) : try: self.mute(agent.index) agent.final( self.state ) self.unmute() except Exception,data: if not self.catchExceptions: raise self._agentCrash(agent.index) self.unmute() return self.display.finish()
def run(self, total_pacmen, pacman_types_corresponding_indexes, graphics, pacmen, stillTraining, is_training, numGames, evalGraphics, currentRound, numTraining): """ Main control loop for game play. """ self.display.initialize(self.state.data, total_pacmen, pacman_types_corresponding_indexes) self.numMoves = 0 # self.display.initialize(self.state.makeObservation(1).data) # inform learning agents of the game start for i in range(len(self.agents)): agent = self.agents[i] if not agent: self.mute(i) # this is a null agent, meaning it failed to load # the other team wins print("Agent %d failed to load" % i, file=sys.stderr) self.unmute() self._agentCrash(i, quiet=True) return if ("registerInitialState" in dir(agent)): self.mute(i) if self.catchExceptions: try: timed_func = TimeoutFunction( agent.registerInitialState, int(self.rules.getMaxStartupTime(i))) try: start_time = time.time() timed_func(self.state.deepCopy()) time_taken = time.time() - start_time self.totalAgentTimes[i] += time_taken except TimeoutFunctionException: print("Agent %d ran out of time on startup!" % i, file=sys.stderr) self.unmute() self.agentTimeout = True self._agentCrash(i, quiet=True) return except Exception as data: self._agentCrash(i, quiet=False) self.unmute() return else: agent.registerInitialState( self.state.deepCopy(), i, type(agent).__name__, pacman_types_corresponding_indexes, graphics, is_training, evalGraphics, numGames) # TODO: could this exceed the total time self.unmute() agentIndex = self.startingIndex numAgents = len(self.agents) if graphics or evalGraphics: self.display.updateEpochInfo(currentRound, numTraining) while not self.gameOver: # Fetch the next agent agent = self.agents[agentIndex] # if this is a dead pacman, skip loading it if agent.isPacman == True: if agent.isDead == True: # Next agent agentIndex = (agentIndex + 1) % numAgents continue agent.scoreChange = 0 move_time = 0 skip_action = False # Generate an observation of the state if 'observationFunction' in dir(agent): self.mute(agentIndex) if self.catchExceptions: try: timed_func = TimeoutFunction( agent.observationFunction, int(self.rules.getMoveTimeout(agentIndex))) try: start_time = time.time() observation = timed_func(self.state.deepCopy()) except TimeoutFunctionException: skip_action = True move_time += time.time() - start_time self.unmute() except Exception as data: self._agentCrash(agentIndex, quiet=False) self.unmute() return else: observation = agent.observationFunction( self.state.deepCopy(), total_pacmen, agentIndex, stillTraining) self.unmute() else: observation = self.state.deepCopy() # Solicit an action action = None self.mute(agentIndex) if self.catchExceptions: try: timed_func = TimeoutFunction( agent.getAction, int(self.rules.getMoveTimeout(agentIndex)) - int(move_time)) try: start_time = time.time() if skip_action: raise TimeoutFunctionException() action = timed_func(observation) except TimeoutFunctionException: print("Agent %d timed out on a single move!" % agentIndex, file=sys.stderr) self.agentTimeout = True self._agentCrash(agentIndex, quiet=True) self.unmute() return move_time += time.time() - start_time if move_time > self.rules.getMoveWarningTime(agentIndex): self.totalAgentTimeWarnings[agentIndex] += 1 print( "Agent %d took too long to make a move! This is warning %d" % (agentIndex, self.totalAgentTimeWarnings[agentIndex]), file=sys.stderr) if self.totalAgentTimeWarnings[ agentIndex] > self.rules.getMaxTimeWarnings( agentIndex): print( "Agent %d exceeded the maximum number of warnings: %d" % (agentIndex, self.totalAgentTimeWarnings[agentIndex]), file=sys.stderr) self.agentTimeout = True self._agentCrash(agentIndex, quiet=True) self.unmute() return self.totalAgentTimes[agentIndex] += move_time # print "Agent: %d, time: %f, total: %f" % (agentIndex, move_time, self.totalAgentTimes[agentIndex]) if self.totalAgentTimes[ agentIndex] > self.rules.getMaxTotalTime( agentIndex): print("Agent %d ran out of time! (time: %1.2f)" % (agentIndex, self.totalAgentTimes[agentIndex]), file=sys.stderr) self.agentTimeout = True self._agentCrash(agentIndex, quiet=True) self.unmute() return self.unmute() except Exception as data: self._agentCrash(agentIndex) self.unmute() return else: action = agent.getAction(observation, total_pacmen, agentIndex) self.unmute() # Execute the action self.moveHistory.append((agentIndex, action)) if self.catchExceptions: try: self.state = self.state.generateSuccessor( agentIndex, action) except Exception as data: self.mute(agentIndex) self._agentCrash(agentIndex) self.unmute() return else: self.state = self.state.generateSuccessor( agentIndex, action, total_pacmen, pacmen, agent) # check if there's dead pacman in the current round deadPacman = None deadPacmanIndex = None if self.state.data.deadPacmanIndex != None: # inform this learning agent of the game result deadPacmanIndex = self.state.data.deadPacmanIndex deadPacman = pacmen[deadPacmanIndex] if "final" in dir(deadPacman): try: self.mute(deadPacmanIndex) forceFinish = False deadPacman.final(self.state, total_pacmen, deadPacmanIndex, stillTraining, forceFinish) # update its death "reward" pacmenScoreChanges[ deadPacmanIndex] = deadPacman.scoreChange if 'observationFunction' in dir(deadPacman): observation = deadPacman.observationFunction( self.state.deepCopy(), total_pacmen, deadPacmanIndex, stillTraining) self.unmute() except Exception as data: if not self.catchExceptions: raise self._agentCrash(deadPacmanIndex) self.unmute() return # Allow for game specific conditions (winning, losing, etc.) self.rules.process(self.state, self) # Change the display # now also remove the dead pacman from the screen self.display.update(self.state.data, total_pacmen, agent, agentIndex, deadPacman, deadPacmanIndex, currentRound, numTraining) ###idx = agentIndex - agentIndex % 2 + 1 ###self.display.update( self.state.makeObservation(idx).data ) # update pacman scoreChange if self.state.data.deadPacmanIndex == None: if agent.isPacman == True or self.state.data.collidedPacman != None: if agentIndex < total_pacmen: # current agent is pacman pacmenScoreChanges[agentIndex] = agent.scoreChange else: collidedPacmanIndex = self.state.data.collidedPacman pacmenScoreChanges[collidedPacmanIndex] = self.agents[ collidedPacmanIndex].scoreChange self.state.data.collidedPacman = None self.state.deadPacmanIndex = None # Track progress if agentIndex == numAgents + 1: self.numMoves += 1 # Next agent agentIndex = (agentIndex + 1) % numAgents if _BOINC_ENABLED: boinc.set_fraction_done(self.getProgress()) # Only used for ghosts after changing to end the game iff all pacmen die for agentIndex, agent in enumerate(self.agents): if agent.isPacman == False: if "final" in dir(agent): try: self.mute(agentIndex) agent.final(self.state, total_pacmen, agentIndex) self.unmute() except Exception as data: if not self.catchExceptions: raise self._agentCrash(agentIndex) self.unmute() return else: continue # if already reach to the end of training episodes, finish training for those Pacmen who didn't get a chance to be trained(usually this happs if -x too small) if currentRound == numTraining: for pacman in pacmen: if pacman.hasFinishedTraining == False: forceFinish = True pacman.final(self.state, total_pacmen, pacman.index, stillTraining, forceFinish) self.display.finish()