def __init__(self, xsize, ysize, occlusions=[]): self.AgentHome = COORD(int(floor((xsize-1)/2)), 0) self.PredatorNum = 1 self.PredatorHome = COORD(0, 0) self.ChaseProbability = 0.75 self.MoveProbability = 0.5 self.Discount = 0.95 self.GoalPos = COORD(int(floor((xsize-1)/2)), ysize-1) self.Grid = Grid(xsize, ysize) self.NumActions = 4 self.NumObservations = 2 Simulator.__init__(self, self.NumActions, self.NumObservations, self.Discount) self.Occlusions = occlusions self.RewardClearLevel = 1000 self.RewardDefault = -1 self.RewardDie = -100 self.RewardHitWall = -25 self.RewardRange = 100 self.State = GameState() self.State.AgentPos = self.AgentHome self.State.PredatorPos = self.PredatorHome
def __init__(self): self.AgentPos = COORD(5, 0) self.AgentDir = -1 self.PredatorPos = COORD(0, 0) self.PredatorDir = -1 self.PredatorSpeedMult = 2 self.PredatorBeliefState = None
def __init__(self, xsize, ysize, visualrange=1, visualcone=None): self.AgentHome = COORD(int(floor((xsize - 1) / 2)), 0) self.PredatorNum = 1 self.PredatorHome = COORD(0, 0) self.ChaseProbability = 0.75 self.MoveProbability = 0.5 self.GoalPos = COORD(int(floor((xsize - 1) / 2)), ysize - 1) self.NumActions = 4 self.VisualRange = visualrange self.State = GameState() self.State.AgentPos = self.AgentHome self.State.PredatorPos = self.PredatorHome self.XSize = xsize self.YSize = ysize self.RewardDefault = -1 self.RewardHitWall = -25 self.RewardClearLevel = 1000 self.RewardDie = -100 self.Grid = Grid(xsize, ysize)
def CreatePredatorLocations(self, spawnArea, agentCoord, goalCoord, occlusions=[]): agentSurroundCoordinates = [] for x in range(agentCoord.X - spawnArea, agentCoord.X + spawnArea + 1): for y in range(agentCoord.Y - spawnArea, agentCoord.Y + spawnArea + 1): if (ManhattanDistance(COORD(x, y), agentCoord) <= 2 * spawnArea): agentSurroundCoordinates.append(COORD(x, y)) agentSurroundCoordinates.append(goalCoord) agentSurroundCoordinates.extend(occlusions) predatorLocations = [] for y in range(self.YSize): for x in range(self.XSize): newLocation = COORD(x, y) if newLocation in agentSurroundCoordinates: continue predatorLocations.append(newLocation) return predatorLocations
def GetPredatorLocations(): #np.random.seed(1) tempGame = Game(XSize, YSize) agentSurroundCoordinates = [] for x in range(tempGame.AgentHome.X - ExperimentParams.SpawnArea, tempGame.AgentHome.X + ExperimentParams.SpawnArea + 1): for y in range(tempGame.AgentHome.Y - ExperimentParams.SpawnArea, tempGame.AgentHome.Y + ExperimentParams.SpawnArea + 1): if (ManhattanDistance(COORD(x, y), tempGame.AgentHome) <= 2 * ExperimentParams.SpawnArea): agentSurroundCoordinates.append(COORD(x, y)) agentSurroundCoordinates.append(tempGame.GoalPos) allPredatorLocations = [ COORD(x, y) for x in range(0, XSize) for y in range(0, YSize) ] validSpawnLocations = list( set(allPredatorLocations) - set(agentSurroundCoordinates)) predatorIndices = random.sample(range(0, len(validSpawnLocations)), ExperimentParams.NumRuns) predatorLocations = [validSpawnLocations[ind] for ind in predatorIndices] return predatorLocations
def ValidOcclusion(self, coord, agentCoord, goalCoord, occlusionCoords): aroundOrigin = [COORD(0, 0) + COORD(i, j) for i in range(-1, 2) for j in range(-1, 2)] aroundAgent = [agentCoord + COORD(i, j) for i in range(-1, 2) for j in range(-1, 2)] # [agent + COORD(i, j) for i in range(-1, 2) for j in range(-1, 2)] aroundGoal = [goalCoord + COORD(i, j) for i in range(-1, 2) for j in range(-1, 2)] return coord not in aroundOrigin and \ coord not in aroundAgent and \ coord not in aroundGoal and \ coord not in occlusionCoords
def __init__(self, xsize, ysize, occlusions=[], visualrange=None, verbose=False): self.AgentHome = COORD(int(floor((xsize - 1) / 2)), 0) self.PredatorNum = 1 self.PredatorHome = COORD(0, 0) self.ChaseProbability = 0.75 self.MoveProbability = 0.5 self.Discount = 0.95 self.GoalPos = COORD(int(floor((xsize - 1) / 2)), ysize - 1) #int(floor((xsize-1)/2)) self.Grid = Grid(xsize, ysize) self.NumActions = 4 self.VisualRange = visualrange self.Occlusions = occlusions if not visualrange: self.NumObservations = 2 else: self.VisionObservationBit = len( self.Grid.VisualArea(COORD(0, 0), 0, self.VisualRange)) self.NumObservations = self.VisionObservationBit #1 << (self.VisionObservationBit) visualArea = self.Grid.VisualArea(self.AgentHome, 0, self.VisualRange) Simulator.__init__(self, self.NumActions, self.NumObservations, self.GoalPos, occlusions, self.Discount) self.RewardClearLevel = 1000 self.RewardDefault = -1 self.RewardDie = -100 self.RewardHitWall = -25 self.RewardRange = 100 self.State = GameState() self.State.AgentPos = self.AgentHome self.State.PredatorPos = self.PredatorHome self.XSize = xsize self.YSize = ysize if verbose: #self.Display = Display(xsize, ysize, 'game') self.InitializeDisplay()
def GetValidPredatorLocations(self): allPredatorLocations = [COORD(x, y) for x in range(self.XSize) for y in range(self.YSize)] invalidPredatorLocations = [self.GoalPos, self.AgentHome] + self.Occlusions for predator in allPredatorLocations: if self.Grid.VisualRay((self.AgentHome).Copy(), predator.Copy(), self.Occlusions)[0]: invalidPredatorLocations.append(predator) self.StartPredatorLocations = list(set(allPredatorLocations) - set(invalidPredatorLocations))
def CreateRandomStartState(self): state = GameState() state = self.NewLevel(state) self.GetValidPredatorLocations() predLocation = (state.PredatorPos).Copy() if self.PredatorObservation(state): state.PredatorBeliefState = [state.AgentPos] else: allAgentLocations = [ COORD(x, y) for x in range(self.XSize) for y in range(self.YSize) ] validAgentLocations = list( set(allAgentLocations) - set(self.Occlusions)) invisibleAgentLocations = [ coord for coord in validAgentLocations if self.Grid.VisualRay(coord, predLocation, self.Occlusions) ] state.PredatorBeliefState = invisibleAgentLocations agentObservation = np.zeros(self.NumActions) for scan in range(self.NumActions): agentObservation[scan] = self.MakeObservation(state, scan) if agentObservation.any(): return state state.PredatorPos = self.StartPredatorLocations[Random( 0, len(self.StartPredatorLocations))] return state
def CreateRandomOcclusions(self, value, agentCoord, goalCoord): allOcclusionCoords = [] for i in range(value): while True: coord = COORD(Random(0, self.XSize), Random(0, self.YSize)) if self.ValidOcclusion(coord, agentCoord, goalCoord, allOcclusionCoords): break allOcclusionCoords.append(coord) return allOcclusionCoords
def CreateOcclusions(self, value, agentCoord, goalCoord): occlusionCoords = [] if value == 0: return occlusionCoords areaSizes = [] while True: limit = value if value > 10: limit = 10 size = Random(1, limit) if size + np.sum(np.asarray(areaSizes)) <= value: areaSizes.append(size) if np.sum(np.asanyarray(areaSizes)) == value: break allOcclusionCoords = [] for area in areaSizes: triedCoordinates = [] while True: areaOcclusionCoords = [] while True: startCoord = COORD(Random(0, self.XSize), Random(0, self.YSize)) if self.ValidOcclusion( startCoord, agentCoord, goalCoord, allOcclusionCoords ) and startCoord not in triedCoordinates: break areaOcclusionCoords.append(startCoord) n = 0 for i in range(area - 1): if n > 100: break n = 0 while True: if n > 100: break adjacentCell = areaOcclusionCoords[-1] + Compass[ Random(0, 4)] if self.Inside(adjacentCell) and self.ValidOcclusion( adjacentCell, agentCoord, goalCoord, allOcclusionCoords): areaOcclusionCoords.append(adjacentCell) break n += 1 if n > 100: triedCoordinates.append(startCoord) continue allOcclusionCoords.extend(areaOcclusionCoords) break return allOcclusionCoords
def VisualArea(self, coord, observationDirection, visualRange, pureVision=False): RadiusCoordinates = [] for x in range(coord.X - visualRange, coord.X + visualRange + 1): for y in range(coord.Y - visualRange, coord.Y + visualRange + 1): if (ManhattanDistance(COORD(x, y), coord) <= 2 * visualRange): RadiusCoordinates.append(COORD(x, y)) RangeCoordinates = [] RadiusCoordinates = np.flipud( np.reshape(np.array(RadiusCoordinates), (2 * visualRange + 1, 2 * visualRange + 1)).transpose()) if observationDirection == COMPASS.NORTH: RangeCoordinates = self.VisualCone(RadiusCoordinates, visualRange) elif observationDirection == COMPASS.EAST: RadiusCoordinates = np.flipud(RadiusCoordinates.transpose()) RangeCoordinates = self.VisualCone(RadiusCoordinates, visualRange) elif observationDirection == COMPASS.WEST: RadiusCoordinates = RadiusCoordinates.transpose() RangeCoordinates = self.VisualCone(RadiusCoordinates, visualRange) elif observationDirection == COMPASS.SOUTH: RadiusCoordinates = np.flipud(RadiusCoordinates) RangeCoordinates = self.VisualCone(RadiusCoordinates, visualRange) assert (RangeCoordinates) if pureVision: return RangeCoordinates for a in range(4): sidePos = coord + Compass[a] if sidePos not in RangeCoordinates: RangeCoordinates.append(sidePos) return RangeCoordinates
def getPredatorLocations(directory): parentDirectory = os.path.dirname(directory) predatorLocations = [None for simulation in range(NumSimulations)] for simulationInd in range(NumSimulations): episodeFolder = parentDirectory + '/planning/Data/Simulation_%d/Vision_1/Depth_5000/Episode_*.csv'%(simulationInd) episodeFiles = glob.glob(episodeFolder) episode = pd.read_csv(episodeFiles[0], header=0) predatorLocations[simulationInd] = COORD(episode['Predator X'].iloc[0], episode['Predator Y'].iloc[0]) return predatorLocations
def LocalMove(self, state, history, stepObs, status): allPredatorLocations = [ COORD(x, y) for x in range(self.XSize) for y in range(self.YSize) ] possiblePredatorLocations = list( set(allPredatorLocations) - set(self.Occlusions) ) #Remove occlusions from possible predator location list state.PredatorPos = possiblePredatorLocations[Random( 0, len(possiblePredatorLocations))] #state.PredatorPos = COORD(Random(0, self.Grid.GetXSize()), # Random(0, self.Grid.GetYSize())) if history.Size() == 0: return True, state observation = self.MakeObservation(state, state.AgentDir) return history.Back().Observation == observation
def SelectASTARRandom(self, state, history, status): if Bernoulli(0.5): self.ASTAR.__init__(self.XSize, self.YSize, self.Occlusions) self.ASTAR.InitializeGrid((state.AgentPos).Copy(), self.GoalPos.Copy()) path = self.ASTAR.Solve() pathPos = COORD(path[1][0], path[1][1]) for action in range(self.NumActions): agentPos = (state.AgentPos).Copy() nextAgentPos = agentPos + Compass[action] if nextAgentPos == pathPos: break return action return self.SelectRandom(state, history, status)
def getOcclusions(directory): parentDirectory = os.path.dirname(directory) occlusionList = [[[] for occlusion in range(NumEntropy)] for simulation in range(NumSimulations)] for simulationInd in range(NumSimulations): for occlusionInd in range(NumEntropy): occlusionFile = parentDirectory + "/planning/Data/Simulation_%d/Occlusion_%d/OcclusionCoordinates.csv" % ( simulationInd, occlusionInd) occlusion = pd.read_csv(occlusionFile, header=0) occlusions = [ COORD(x[0], x[1]) for x in occlusion[['X', 'Y']].values ] occlusionList[simulationInd][occlusionInd] = occlusions return occlusionList
def getEnvironmentPolicies(directory): parentDirectory = os.path.dirname(directory) print("Importing successful trajectories...") environmentPolicies = [[[[] for predator in range(NumPredators)] for occlusion in range(NumEntropy)] for simulation in range(NumSimulations)] predatorHomes = np.full((NumPredators, NumSimulations, NumEntropy), np.nan, dtype='object') for simulationInd in range(NumSimulations): for occlusionInd in range(NumEntropy): sys.stdout.write('\r' + "Simulation #: %d, Entropy: .%d" % (simulationInd, occlusionInd)) for predatorInd in range(NumPredators): if not os.path.exists( parentDirectory + '/planning/Data/Simulation_%d/Occlusion_%d/Predator_%d/' % (simulationInd, occlusionInd, predatorInd)): continue episodeFolder = parentDirectory + '/planning/Data/Simulation_%d/Occlusion_%d/Predator_%d/Depth_5000/Episode_*.csv' % ( simulationInd, occlusionInd, predatorInd) episodeFiles = glob.glob(episodeFolder) loadPredator = True for i, episodeFile in enumerate(episodeFiles): episode = pd.read_csv(episodeFile, header=0) if i == 0: predatorHomes[predatorInd, simulationInd, occlusionInd] = COORD( episode['Predator X'].iloc[0], episode['Predator Y'].iloc[0]) if episode['Reward'].iloc[-1] < 0: continue policies = environmentPolicies[simulationInd][ occlusionInd][predatorInd] policy = (episode[['Action']].values).flatten() policies.append(policy[1:]) environmentPolicies[simulationInd][occlusionInd][ predatorInd] = policies return environmentPolicies, predatorHomes
def CreateStartState(self): state = GameState() state = self.NewLevel(state) predLocation = (state.PredatorPos).Copy() if self.PredatorObservation(state): state.PredatorBeliefState = [state.AgentPos] else: allAgentLocations = [ COORD(x, y) for x in range(self.XSize) for y in range(self.YSize) ] validAgentLocations = list( set(allAgentLocations) - set(self.Occlusions)) invisibleAgentLocations = [ coord for coord in validAgentLocations if self.Grid.VisualRay(coord, predLocation, self.Occlusions) ] state.PredatorBeliefState = invisibleAgentLocations return state
def Run(self, policy): undiscountedReturn = 0.0 discountedReturn = 0.0 discount = 1.0 state = self.Real.CreateStartState() currentState = self.Real.Copy(state) t = 0 while True: try: action = policy[t] except IndexError: self.ASTAR.__init__(self.Real.XSize, self.Real.YSize, self.Real.Occlusions) self.ASTAR.InitializeGrid((state.AgentPos).Copy(), (self.Real.GoalPos).Copy()) path = self.ASTAR.Solve() pathPos = COORD(path[1][0], path[1][1]) for action in range(self.Real.NumActions): agentPos = (state.AgentPos).Copy() nextAgentPos = self.Real.NextPos(agentPos, action) if nextAgentPos == pathPos: break terminal, state, observation, reward = self.Real.Step( state, action) currentState = self.Real.Copy(state) undiscountedReturn += reward discountedReturn += reward * discount discount *= self.Real.GetDiscount() if terminal: return reward t += 1 return None
def MovePredator(self, state, move, previousPredatorLocation): numberOfMoves = 1 observation = self.PredatorObservation(self.Copy(state)) try: believedAgentPosition = state.PredatorBeliefState[0] except IndexError: if observation: believedAgentPosition = (state.AgentPos).Copy() state.PredatorBeliefState = [(state.AgentPos).Copy()] else: allAgentLocations = [COORD(x, y) for x in range(self.XSize) for y in range(self.YSize)] invisibleAgentLocations = [coord for coord in allAgentLocations if self.Grid.VisualRay(coord, (state.PredatorPos).Copy(), self.Occlusions)] validAgentLocations = list(set(invisibleAgentLocations) - set(self.Occlusions)) state.PredatorBeliefState = validAgentLocations if len(state.PredatorBeliefState) > 1: believedAgentPosition = state.PredatorBeliefState[Random(0, len(state.PredatorBeliefState))] numberOfMoves = 1 if move: numberOfMoves = state.PredatorSpeedMult believedState = self.Copy(state) believedState.AgentPos = believedAgentPosition believedState.AgentPos = (state.AgentPos).Copy() for i in range(numberOfMoves): if Bernoulli(self.ChaseProbability) or (i > 0 and len(self.Occlusions) > 15): believedState = self.MovePredatorAggressive(believedState, previousPredatorLocation) else: believedState = self.MovePredatorRandom(believedState) state.PredatorPos = believedState.PredatorPos if state.AgentPos == state.PredatorPos: return state, (state.AgentPos == state.PredatorPos) return state, (state.AgentPos == state.PredatorPos)
def __init__(self): self.AgentPos = COORD(7, 0) self.PredatorPos = COORD(0, 0) self.PredatorDir = -1 self.PredatorSpeedMult = 2
def NextPos(self, fromCoord, dir): nextPos = COORD(fromCoord.X, fromCoord.Y) + Compass[dir] if self.Inside(nextPos): return nextPos else: return Compass[COMPASS.NAA]
from grid import Grid from entropy import Entropy from astar import AStar from math import floor import os, sys, pickle, itertools from pathlib2 import Path import numpy as np from mpi4py import MPI SearchParams.Verbose = 0 XSize = 15 YSize = 15 AgentHome = COORD(7, 0) GoalPos = COORD(floor(XSize / 2), YSize - 1) visualRange = None treeknowlege = 2 # 0 = pure, 1 = legal, 2 = smart rolloutknowledge = 2 # 0 = pure, 1 = legal, 2 = smart smarttreecount = 1.0 # prior count for preferred actions in smart tree search smarttreevalue = 1.0 # prior value for preferred actions during smart tree search def GetPath(occlusions): ASTAR = AStar(XSize, YSize, occlusions) ASTAR.InitializeGrid(AgentHome, GoalPos) path = ASTAR.Solve() return path
def Coordinate(self, index): assert (index < self.XSize * self.YSize) return COORD( divmod(index, self.XSize)[0], divmod(index, self.XSize)[1])
def Index(self, x, y): assert (self.Inside(COORD(x, y))) return ((self.XSize) * (self.YSize - 1 - y)) + x
def GetPossiblePredatorLocations(self): allPredatorLocations = [COORD(x, y) for x in range(self.XSize) for y in range(self.YSize)] invalidPredatorLocations = [self.GoalPos, self.AgentHome] + self.Occlusions return list(set(allPredatorLocations) - set(invalidPredatorLocations))
import os, sys, glob import numpy as np from PolicyReuseQ import PRQLearningAgent from coord import COORD, Opposite from grid import Grid import warnings warnings.filterwarnings("ignore") XSize = 15 YSize = 15 NumSimulations = 20 NumVisualRange = 5 AgentHome = COORD(7, 0) def getEnvironmentPolicies(directory): parentDirectory = os.path.dirname(directory) print("Importing successful trajectories...") environmentPolicies = [[[] for vision in range(NumSimulations)] for simulation in range(NumVisualRange)] for simulationInd in range(NumSimulations): for visrange in range(NumVisualRange): sys.stdout.write('\r' + "Simulation #: %d, Visual range: .%d" % (simulationInd, visrange+1)) episodeFolder = parentDirectory + '/planning/Data/Simulation_%d/Vision_%d/Depth_5000/Episode_*.csv'%(simulationInd, visrange+1) episodeFiles = glob.glob(episodeFolder) for episodeFile in episodeFiles: