def iterateTrainOneCondition(parameterOneCondition): numTrainStepEachIteration = int( parameterOneCondition['numTrainStepEachIteration']) numTrajectoriesPerIteration = int( parameterOneCondition['numTrajectoriesPerIteration']) dirName = os.path.dirname(__file__) numOfAgent = 2 agentIds = list(range(numOfAgent)) maxRunningSteps = 50 numSimulations = 250 killzoneRadius = 50 fixedParameters = { 'maxRunningSteps': maxRunningSteps, 'numSimulations': numSimulations, 'killzoneRadius': killzoneRadius } # env MDP sheepsID = [0] wolvesID = [1, 2] blocksID = [] numSheeps = len(sheepsID) numWolves = len(wolvesID) numBlocks = len(blocksID) numAgents = numWolves + numSheeps numEntities = numAgents + numBlocks sheepSize = 0.05 wolfSize = 0.075 blockSize = 0.2 sheepMaxSpeed = 1.3 * 1 wolfMaxSpeed = 1.0 * 1 blockMaxSpeed = None entitiesSizeList = [sheepSize] * numSheeps + [wolfSize] * numWolves + [ blockSize ] * numBlocks entityMaxSpeedList = [sheepMaxSpeed] * numSheeps + [ wolfMaxSpeed ] * numWolves + [blockMaxSpeed] * numBlocks entitiesMovableList = [True] * numAgents + [False] * numBlocks massList = [1.0] * numEntities centralControlId = 1 centerControlIndexList = [centralControlId] reshapeAction = UnpackCenterControlAction(centerControlIndexList) getCollisionForce = GetCollisionForce() applyActionForce = ApplyActionForce(wolvesID, sheepsID, entitiesMovableList) applyEnvironForce = ApplyEnvironForce(numEntities, entitiesMovableList, entitiesSizeList, getCollisionForce, getPosFromAgentState) integrateState = IntegrateState(numEntities, entitiesMovableList, massList, entityMaxSpeedList, getVelFromAgentState, getPosFromAgentState) interpolateState = TransitMultiAgentChasing(numEntities, reshapeAction, applyActionForce, applyEnvironForce, integrateState) numFramesToInterpolate = 1 def transit(state, action): for frameIndex in range(numFramesToInterpolate): nextState = interpolateState(state, action) action = np.array([(0, 0)] * numAgents) state = nextState return nextState isTerminal = lambda state: False isCollision = IsCollision(getPosFromAgentState) collisonRewardWolf = 1 punishForOutOfBound = PunishForOutOfBound() rewardWolf = RewardCentralControlPunishBond( wolvesID, sheepsID, entitiesSizeList, getPosFromAgentState, isCollision, punishForOutOfBound, collisonRewardWolf) collisonRewardSheep = -1 rewardSheep = RewardCentralControlPunishBond( sheepsID, wolvesID, entitiesSizeList, getPosFromAgentState, isCollision, punishForOutOfBound, collisonRewardSheep) resetState = ResetMultiAgentChasing(numAgents, numBlocks) observeOneAgent = lambda agentID: Observe(agentID, wolvesID, sheepsID, blocksID, getPosFromAgentState, getVelFromAgentState) observe = lambda state: [ observeOneAgent(agentID)(state) for agentID in range(numAgents) ] # policy actionSpace = [(10, 0), (7, 7), (0, 10), (-7, 7), (-10, 0), (-7, -7), (0, -10), (7, -7), (0, 0)] wolfActionSpace = [(10, 0), (7, 7), (0, 10), (-7, 7), (-10, 0), (-7, -7), (0, -10), (7, -7), (0, 0)] preyPowerRatio = 0.5 sheepActionSpace = list(map(tuple, np.array(actionSpace) * preyPowerRatio)) predatorPowerRatio = 0.5 wolfActionOneSpace = list( map(tuple, np.array(wolfActionSpace) * predatorPowerRatio)) wolfActionTwoSpace = list( map(tuple, np.array(wolfActionSpace) * predatorPowerRatio)) wolvesActionSpace = list(it.product(wolfActionOneSpace, wolfActionTwoSpace)) actionSpaceList = [sheepActionSpace, wolvesActionSpace] # neural network init numStateSpace = 4 * numEntities numSheepActionSpace = len(sheepActionSpace) numWolvesActionSpace = len(wolvesActionSpace) regularizationFactor = 1e-4 sharedWidths = [128] actionLayerWidths = [128] valueLayerWidths = [128] generateSheepModel = GenerateModel(numStateSpace, numSheepActionSpace, regularizationFactor) generateWolvesModel = GenerateModel(numStateSpace, numWolvesActionSpace, regularizationFactor) generateModelList = [generateSheepModel, generateWolvesModel] sheepDepth = 9 wolfDepth = 9 depthList = [sheepDepth, wolfDepth] resBlockSize = 2 dropoutRate = 0.0 initializationMethod = 'uniform' multiAgentNNmodel = [ generateModel(sharedWidths * depth, actionLayerWidths, valueLayerWidths, resBlockSize, initializationMethod, dropoutRate) for depth, generateModel in zip(depthList, generateModelList) ] # replay buffer bufferSize = 20000 saveToBuffer = SaveToBuffer(bufferSize) def getUniformSamplingProbabilities(buffer): return [(1 / len(buffer)) for _ in buffer] miniBatchSize = 512 sampleBatchFromBuffer = SampleBatchFromBuffer( miniBatchSize, getUniformSamplingProbabilities) # pre-process the trajectory for replayBuffer rewardMultiAgents = [rewardSheep, rewardWolf] decay = 1 accumulateMultiAgentRewards = AccumulateMultiAgentRewards(decay) addMultiAgentValuesToTrajectory = AddValuesToTrajectory( accumulateMultiAgentRewards) actionIndex = 1 def getTerminalActionFromTrajectory(trajectory): return trajectory[-1][actionIndex] removeTerminalTupleFromTrajectory = RemoveTerminalTupleFromTrajectory( getTerminalActionFromTrajectory) # pre-process the trajectory for NNTraining sheepActionToOneHot = ActionToOneHot(sheepActionSpace) wolvesActionToOneHot = ActionToOneHot(wolvesActionSpace) actionToOneHotList = [sheepActionToOneHot, wolvesActionToOneHot] processTrajectoryForPolicyValueNets = [ ProcessTrajectoryForPolicyValueNetMultiAgentReward( actionToOneHotList[agentId], agentId) for agentId in agentIds ] # function to train NN model terminalThreshold = 1e-6 lossHistorySize = 10 initActionCoeff = 1 initValueCoeff = 1 initCoeff = (initActionCoeff, initValueCoeff) afterActionCoeff = 1 afterValueCoeff = 1 afterCoeff = (afterActionCoeff, afterValueCoeff) terminalController = TrainTerminalController(lossHistorySize, terminalThreshold) coefficientController = CoefficientCotroller(initCoeff, afterCoeff) reportInterval = 10000 trainStepsIntervel = 1 # 10000 trainReporter = TrainReporter(numTrainStepEachIteration, reportInterval) learningRateDecay = 1 learningRateDecayStep = 1 learningRate = 0.0001 learningRateModifier = LearningRateModifier(learningRate, learningRateDecay, learningRateDecayStep) trainNN = Train(numTrainStepEachIteration, miniBatchSize, sampleData, learningRateModifier, terminalController, coefficientController, trainReporter) # load save dir trajectorySaveExtension = '.pickle' NNModelSaveExtension = '' trajectoriesSaveDirectory = os.path.join( dirName, '..', '..', 'data', 'iterTrain2wolves1sheepMADDPGEnv', 'trajectories') if not os.path.exists(trajectoriesSaveDirectory): os.makedirs(trajectoriesSaveDirectory) NNModelSaveDirectory = os.path.join(dirName, '..', '..', 'data', 'iterTrain2wolves1sheepMADDPGEnv', 'NNModelRes') if not os.path.exists(NNModelSaveDirectory): os.makedirs(NNModelSaveDirectory) generateTrajectorySavePath = GetSavePath(trajectoriesSaveDirectory, trajectorySaveExtension, fixedParameters) generateNNModelSavePath = GetSavePath(NNModelSaveDirectory, NNModelSaveExtension, fixedParameters) startTime = time.time() sheepDepth = 9 wolfDepth = 9 depthList = [sheepDepth, wolfDepth] resBlockSize = 2 dropoutRate = 0.0 initializationMethod = 'uniform' multiAgentNNmodel = [ generateModel(sharedWidths * depth, actionLayerWidths, valueLayerWidths, resBlockSize, initializationMethod, dropoutRate) for depth, generateModel in zip(depthList, generateModelList) ] preprocessMultiAgentTrajectories = PreprocessTrajectoriesForBuffer( addMultiAgentValuesToTrajectory, removeTerminalTupleFromTrajectory) numTrajectoriesToStartTrain = 1024 trainOneAgent = TrainOneAgent(numTrainStepEachIteration, numTrajectoriesToStartTrain, processTrajectoryForPolicyValueNets, sampleBatchFromBuffer, trainNN) # restorePretrainModel sheepPreTrainModelPath = os.path.join( dirName, '..', '..', 'data', 'MADDPG2wolves1sheep', 'trainSheepWithPretrrainWolves', 'trainedResNNModels', 'agentId=0_depth=9_learningRate=0.0001_maxRunningSteps=50_miniBatchSize=256_numSimulations=250_trainSteps=50000' ) wolvesPreTrainModelPath = os.path.join( dirName, '..', '..', 'data', 'MADDPG2wolves1sheep', 'trainWolvesTwoCenterControlAction', 'trainedResNNModels', 'agentId=1_depth=9_learningRate=0.0001_maxRunningSteps=50_miniBatchSize=256_numSimulations=250_trainSteps=50000' ) pretrainModelPathList = [sheepPreTrainModelPath, wolvesPreTrainModelPath] sheepId, wolvesId = [0, 1] trainableAgentIds = [sheepId, wolvesId] for agentId in trainableAgentIds: restoredNNModel = restoreVariables(multiAgentNNmodel[agentId], pretrainModelPathList[agentId]) multiAgentNNmodel[agentId] = restoredNNModel NNModelPathParameters = { 'iterationIndex': 0, 'agentId': agentId, 'numTrajectoriesPerIteration': numTrajectoriesPerIteration, 'numTrainStepEachIteration': numTrainStepEachIteration } NNModelSavePath = generateNNModelSavePath(NNModelPathParameters) saveVariables(multiAgentNNmodel[agentId], NNModelSavePath) fuzzySearchParameterNames = ['sampleIndex'] loadTrajectoriesForParallel = LoadTrajectories(generateTrajectorySavePath, loadFromPickle, fuzzySearchParameterNames) loadTrajectoriesForTrainBreak = LoadTrajectories( generateTrajectorySavePath, loadFromPickle) # initRreplayBuffer replayBuffer = [] trajectoryBeforeTrainIndex = 0 trajectoryBeforeTrainPathParamters = { 'iterationIndex': trajectoryBeforeTrainIndex } trajectoriesBeforeTrain = loadTrajectoriesForParallel( trajectoryBeforeTrainPathParamters) preProcessedTrajectoriesBeforeTrain = preprocessMultiAgentTrajectories( trajectoriesBeforeTrain) replayBuffer = saveToBuffer(replayBuffer, preProcessedTrajectoriesBeforeTrain) # delete used model for disk space fixedParametersForDelete = { 'maxRunningSteps': maxRunningSteps, 'numSimulations': numSimulations, 'killzoneRadius': killzoneRadius, 'numTrajectoriesPerIteration': numTrajectoriesPerIteration, 'numTrainStepEachIteration': numTrainStepEachIteration } toDeleteNNModelExtensionList = ['.meta', '.index', '.data-00000-of-00001'] generatetoDeleteNNModelPathList = [ GetSavePath(NNModelSaveDirectory, toDeleteNNModelExtension, fixedParametersForDelete) for toDeleteNNModelExtension in toDeleteNNModelExtensionList ] # restore model restoredIteration = 0 for agentId in trainableAgentIds: modelPathForRestore = generateNNModelSavePath({ 'iterationIndex': restoredIteration, 'agentId': agentId, 'numTrajectoriesPerIteration': numTrajectoriesPerIteration, 'numTrainStepEachIteration': numTrainStepEachIteration }) restoredNNModel = restoreVariables(multiAgentNNmodel[agentId], modelPathForRestore) multiAgentNNmodel[agentId] = restoredNNModel # restore buffer bufferTrajectoryPathParameters = { 'numTrajectoriesPerIteration': numTrajectoriesPerIteration, 'numTrainStepEachIteration': numTrainStepEachIteration } restoredIterationIndexRange = range(restoredIteration) restoredTrajectories = loadTrajectoriesForTrainBreak( parameters=bufferTrajectoryPathParameters, parametersWithSpecificValues={ 'iterationIndex': list(restoredIterationIndexRange) }) preProcessedRestoredTrajectories = preprocessMultiAgentTrajectories( restoredTrajectories) print(len(preProcessedRestoredTrajectories)) replayBuffer = saveToBuffer(replayBuffer, preProcessedRestoredTrajectories) modelMemorySize = 5 modelSaveFrequency = 50 deleteUsedModel = DeleteUsedModel(modelMemorySize, modelSaveFrequency, generatetoDeleteNNModelPathList) numIterations = 10000 for iterationIndex in range(restoredIteration + 1, numIterations): print('iterationIndex: ', iterationIndex) numCpuToUseWhileTrain = int(16) numCmdList = min(numTrajectoriesPerIteration, numCpuToUseWhileTrain) sampleTrajectoryFileName = 'sampleMultiMCTSAgentCenterControlResNetTrajCondtion.py' generateTrajectoriesParallelWhileTrain = GenerateTrajectoriesParallel( sampleTrajectoryFileName, numTrajectoriesPerIteration, numCmdList) trajectoryPathParameters = { 'iterationIndex': iterationIndex, 'numTrajectoriesPerIteration': numTrajectoriesPerIteration, 'numTrainStepEachIteration': numTrainStepEachIteration } cmdList = generateTrajectoriesParallelWhileTrain( trajectoryPathParameters) trajectories = loadTrajectoriesForParallel(trajectoryPathParameters) trajectorySavePath = generateTrajectorySavePath( trajectoryPathParameters) saveToPickle(trajectories, trajectorySavePath) preProcessedTrajectories = preprocessMultiAgentTrajectories( trajectories) updatedReplayBuffer = saveToBuffer(replayBuffer, preProcessedTrajectories) for agentId in trainableAgentIds: updatedAgentNNModel = trainOneAgent(agentId, multiAgentNNmodel, updatedReplayBuffer) NNModelPathParameters = { 'iterationIndex': iterationIndex, 'agentId': agentId, 'numTrajectoriesPerIteration': numTrajectoriesPerIteration, 'numTrainStepEachIteration': numTrainStepEachIteration } NNModelSavePath = generateNNModelSavePath(NNModelPathParameters) saveVariables(updatedAgentNNModel, NNModelSavePath) multiAgentNNmodel[agentId] = updatedAgentNNModel replayBuffer = updatedReplayBuffer deleteUsedModel(iterationIndex, agentId) endTime = time.time() print("Time taken for {} iterations: {} seconds".format( numIterations, (endTime - startTime)))
def main(): DEBUG = 0 renderOn = 0 if DEBUG: parametersForTrajectoryPath = {} startSampleIndex = 5 endSampleIndex = 7 agentId = 1 parametersForTrajectoryPath['sampleIndex'] = (startSampleIndex, endSampleIndex) else: parametersForTrajectoryPath = json.loads(sys.argv[1]) startSampleIndex = int(sys.argv[2]) endSampleIndex = int(sys.argv[3]) agentId = int(parametersForTrajectoryPath['agentId']) parametersForTrajectoryPath['sampleIndex'] = (startSampleIndex, endSampleIndex) # check file exists or not dirName = os.path.dirname(__file__) trajectoriesSaveDirectory = os.path.join( dirName, '..', '..', '..', '..', 'data', 'NoPhysics2wolves1sheep', 'trainWolvesTwoCenterControlAction88', 'trajectories') if not os.path.exists(trajectoriesSaveDirectory): os.makedirs(trajectoriesSaveDirectory) trajectorySaveExtension = '.pickle' maxRunningSteps = 50 numSimulations = 250 killzoneRadius = 150 fixedParameters = { 'agentId': agentId, 'maxRunningSteps': maxRunningSteps, 'numSimulations': numSimulations, 'killzoneRadius': killzoneRadius } generateTrajectorySavePath = GetSavePath(trajectoriesSaveDirectory, trajectorySaveExtension, fixedParameters) trajectorySavePath = generateTrajectorySavePath( parametersForTrajectoryPath) if not os.path.isfile(trajectorySavePath): numOfAgent = 3 xBoundary = [0, 600] yBoundary = [0, 600] resetState = Reset(xBoundary, yBoundary, numOfAgent) stayInBoundaryByReflectVelocity = StayInBoundaryByReflectVelocity( xBoundary, yBoundary) interpolateOneFrame = InterpolateOneFrame( stayInBoundaryByReflectVelocity) chooseInterpolatedNextState = lambda interpolatedStates: interpolatedStates[ -1] sheepId = 0 wolvesId = 1 centerControlIndexList = [wolvesId] unpackCenterControlAction = UnpackCenterControlAction( centerControlIndexList) numFramesToInterpolate = 0 transit = TransitWithInterpolation(numFramesToInterpolate, interpolateOneFrame, chooseInterpolatedNextState, unpackCenterControlAction) # NNGuidedMCTS init cInit = 1 cBase = 100 calculateScore = ScoreChild(cInit, cBase) selectChild = SelectChild(calculateScore) actionSpace = [(10, 0), (7, 7), (0, 10), (-7, 7), (-10, 0), (-7, -7), (0, -10), (7, -7), (0, 0)] wolfActionSpace = [(10, 0), (7, 7), (0, 10), (-7, 7), (-10, 0), (-7, -7), (0, -10), (7, -7)] preyPowerRatio = 10 sheepActionSpace = list( map(tuple, np.array(actionSpace) * preyPowerRatio)) predatorPowerRatio = 8 wolfActionOneSpace = list( map(tuple, np.array(wolfActionSpace) * predatorPowerRatio)) wolfActionTwoSpace = list( map(tuple, np.array(wolfActionSpace) * predatorPowerRatio)) wolvesActionSpace = list( product(wolfActionOneSpace, wolfActionTwoSpace)) actionSpaceList = [sheepActionSpace, wolvesActionSpace] # neural network init numStateSpace = 2 * numOfAgent numSheepActionSpace = len(sheepActionSpace) numWolvesActionSpace = len(wolvesActionSpace) regularizationFactor = 1e-4 sharedWidths = [128] actionLayerWidths = [128] valueLayerWidths = [128] generateSheepModel = GenerateModel(numStateSpace, numSheepActionSpace, regularizationFactor) # load save dir NNModelSaveExtension = '' sheepNNModelSaveDirectory = os.path.join( dirName, '..', '..', '..', '..', 'data', 'NoPhysics2wolves1sheep', 'trainSheepWithTwoHeatSeekingWolves', 'trainedResNNModels') sheepNNModelFixedParameters = { 'agentId': 0, 'maxRunningSteps': 50, 'numSimulations': 110, 'miniBatchSize': 256, 'learningRate': 0.0001, } getSheepNNModelSavePath = GetSavePath(sheepNNModelSaveDirectory, NNModelSaveExtension, sheepNNModelFixedParameters) depth = 9 resBlockSize = 2 dropoutRate = 0.0 initializationMethod = 'uniform' initSheepNNModel = generateSheepModel(sharedWidths * depth, actionLayerWidths, valueLayerWidths, resBlockSize, initializationMethod, dropoutRate) sheepTrainedModelPath = getSheepNNModelSavePath({ 'trainSteps': 50000, 'depth': depth }) sheepTrainedModel = restoreVariables(initSheepNNModel, sheepTrainedModelPath) sheepPolicy = ApproximatePolicy(sheepTrainedModel, sheepActionSpace) wolfOneId = 1 wolfTwoId = 2 xPosIndex = [0, 1] getSheepXPos = GetAgentPosFromState(sheepId, xPosIndex) getWolfOneXPos = GetAgentPosFromState(wolfOneId, xPosIndex) speed = 120 #sheepPolicy = HeatSeekingContinuesDeterministicPolicy(getWolfOneXPos, getSheepXPos, speed) # MCTS cInit = 1 cBase = 100 calculateScore = ScoreChild(cInit, cBase) selectChild = SelectChild(calculateScore) # prior getActionPrior = lambda state: { action: 1 / len(wolvesActionSpace) for action in wolvesActionSpace } # load chase nn policy chooseActionInMCTS = sampleFromDistribution def wolvesTransit(state, action): return transit(state, [chooseActionInMCTS(sheepPolicy(state)), action]) # reward function wolfOneId = 1 wolfTwoId = 2 xPosIndex = [0, 1] getSheepXPos = GetAgentPosFromState(sheepId, xPosIndex) getWolfOneXPos = GetAgentPosFromState(wolfOneId, xPosIndex) getWolfTwoXPos = GetAgentPosFromState(wolfTwoId, xPosIndex) isCollidedOne = IsTerminal(getWolfOneXPos, getSheepXPos, killzoneRadius) isCollidedTwo = IsTerminal(getWolfTwoXPos, getSheepXPos, killzoneRadius) calCollisionTimes = lambda state: np.sum([ isCollidedOne(state), isCollidedTwo(state) ]) # collisionTimeByAddingCollisionInAllWolves #calCollisionTimes = lambda state: np.max([isCollidedOne(state), isCollidedTwo(state)]) # collisionTimeByBooleanCollisionForAnyWolf calTerminationSignals = calCollisionTimes chooseInterpolatedStateByEarlyTermination = ChooseInterpolatedStateByEarlyTermination( calTerminationSignals) numFramesToInterpolateInReward = 3 interpolateStateInReward = TransitWithInterpolation( numFramesToInterpolateInReward, interpolateOneFrame, chooseInterpolatedStateByEarlyTermination, unpackCenterControlAction) aliveBonus = -1 / maxRunningSteps * 10 deathPenalty = 1 rewardFunction = RewardFunctionCompeteWithStateInterpolation( aliveBonus, deathPenalty, calCollisionTimes, interpolateStateInReward) # initialize children; expand initializeChildren = InitializeChildren(wolvesActionSpace, wolvesTransit, getActionPrior) isTerminal = lambda state: False expand = Expand(isTerminal, initializeChildren) # random rollout policy def rolloutPolicy(state): return [ sampleFromDistribution(sheepPolicy(state)), wolvesActionSpace[np.random.choice( range(numWolvesActionSpace))] ] # rollout #rolloutHeuristicWeight = 0 #minDistance = 400 #rolloutHeuristic1 = HeuristicDistanceToTarget( # rolloutHeuristicWeight, getWolfOneXPos, getSheepXPos, minDistance) #rolloutHeuristic2 = HeuristicDistanceToTarget( # rolloutHeuristicWeight, getWolfTwoXPos, getSheepXPos, minDistance) #rolloutHeuristic = lambda state: (rolloutHeuristic1(state) + rolloutHeuristic2(state)) / 2 rolloutHeuristic = lambda state: 0 maxRolloutSteps = 15 rollout = RollOut(rolloutPolicy, maxRolloutSteps, transit, rewardFunction, isTerminal, rolloutHeuristic) wolfPolicy = MCTS(numSimulations, selectChild, expand, rollout, backup, establishSoftmaxActionDist) # All agents' policies policy = lambda state: [sheepPolicy(state), wolfPolicy(state)] chooseActionList = [maxFromDistribution, maxFromDistribution] def sampleAction(state): actionDists = [sheepPolicy(state), wolfPolicy(state)] action = [ chooseAction(actionDist) for actionDist, chooseAction in zip( actionDists, chooseActionList) ] return action render = None if renderOn: import pygame as pg from pygame.color import THECOLORS screenColor = THECOLORS['black'] circleColorList = [ THECOLORS['green'], THECOLORS['yellow'], THECOLORS['red'] ] circleSize = 10 saveImage = False saveImageDir = os.path.join(dirName, '..', '..', '..', '..', 'data', 'demoImg') if not os.path.exists(saveImageDir): os.makedirs(saveImageDir) screen = pg.display.set_mode([max(xBoundary), max(yBoundary)]) render = Render(numOfAgent, xPosIndex, screen, screenColor, circleColorList, circleSize, saveImage, saveImageDir) forwardOneStep = ForwardOneStep(transit, rewardFunction) sampleTrajectory = SampleTrajectoryWithRender(maxRunningSteps, isTerminal, resetState, forwardOneStep, render, renderOn) trajectories = [ sampleTrajectory(sampleAction) for sampleIndex in range(startSampleIndex, endSampleIndex) ] print([len(traj) for traj in trajectories]) saveToPickle(trajectories, trajectorySavePath)
def main(): parametersForTrajectoryPath = json.loads(sys.argv[1]) startSampleIndex = int(sys.argv[2]) endSampleIndex = int(sys.argv[3]) # parametersForTrajectoryPath['sampleOneStepPerTraj']=1 #0 # parametersForTrajectoryPath['sampleIndex'] = (startSampleIndex, endSampleIndex) trainSteps = int(parametersForTrajectoryPath['trainSteps']) depth = int(parametersForTrajectoryPath['depth']) dataSize = int(parametersForTrajectoryPath['dataSize']) # parametersForTrajectoryPath = {} # depth = 5 # dataSize = 5000 # trainSteps = 50000 # startSampleIndex = 0 # endSampleIndex = 100 killzoneRadius = 25 numSimulations = 200 maxRunningSteps = 100 fixedParameters = { 'maxRunningSteps': maxRunningSteps, 'numSimulations': numSimulations, 'killzoneRadius': killzoneRadius } trajectorySaveExtension = '.pickle' dirName = os.path.dirname(__file__) trajectoriesSaveDirectory = os.path.join( dirName, '..', '..', '..', 'data', 'evaluateSupervisedLearning', 'multiMCTSAgentResNetNoPhysicsCenterControl', 'evaluateCenterControlTrajByCondition') if not os.path.exists(trajectoriesSaveDirectory): os.makedirs(trajectoriesSaveDirectory) generateTrajectorySavePath = GetSavePath(trajectoriesSaveDirectory, trajectorySaveExtension, fixedParameters) trajectorySavePath = generateTrajectorySavePath( parametersForTrajectoryPath) if not os.path.isfile(trajectorySavePath): numOfAgent = 3 sheepId = 0 wolvesId = 1 wolfOneId = 1 wolfTwoId = 2 xPosIndex = [0, 1] xBoundary = [0, 600] yBoundary = [0, 600] reset = Reset(xBoundary, yBoundary, numOfAgent) getSheepXPos = GetAgentPosFromState(sheepId, xPosIndex) getWolfOneXPos = GetAgentPosFromState(wolfOneId, xPosIndex) getWolfTwoXPos = GetAgentPosFromState(wolfTwoId, xPosIndex) isTerminalOne = IsTerminal(getWolfOneXPos, getSheepXPos, killzoneRadius) isTerminalTwo = IsTerminal(getWolfTwoXPos, getSheepXPos, killzoneRadius) isTerminal = lambda state: isTerminalOne(state) or isTerminalTwo(state) stayInBoundaryByReflectVelocity = StayInBoundaryByReflectVelocity( xBoundary, yBoundary) transit = TransiteForNoPhysics(stayInBoundaryByReflectVelocity) actionSpace = [(10, 0), (7, 7), (0, 10), (-7, 7), (-10, 0), (-7, -7), (0, -10), (7, -7), (0, 0)] preyPowerRatio = 3 sheepActionSpace = list( map(tuple, np.array(actionSpace) * preyPowerRatio)) predatorPowerRatio = 2 wolfActionOneSpace = list( map(tuple, np.array(actionSpace) * predatorPowerRatio)) wolfActionTwoSpace = list( map(tuple, np.array(actionSpace) * predatorPowerRatio)) wolvesActionSpace = list( it.product(wolfActionOneSpace, wolfActionTwoSpace)) # neural network init numStateSpace = 6 numSheepActionSpace = len(sheepActionSpace) numWolvesActionSpace = len(wolvesActionSpace) regularizationFactor = 1e-4 sharedWidths = [128] actionLayerWidths = [128] valueLayerWidths = [128] generateSheepModel = GenerateModel(numStateSpace, numSheepActionSpace, regularizationFactor) # load save dir NNModelSaveExtension = '' NNModelSaveDirectory = os.path.join( dirName, '..', '..', '..', 'data', 'evaluateEscapeMultiChasingNoPhysics', 'trainedResNNModelsMultiStillAction') NNModelFixedParameters = { 'agentId': 0, 'maxRunningSteps': 150, 'numSimulations': 200, 'miniBatchSize': 256, 'learningRate': 0.0001 } getNNModelSavePath = GetSavePath(NNModelSaveDirectory, NNModelSaveExtension, NNModelFixedParameters) if not os.path.exists(NNModelSaveDirectory): os.makedirs(NNModelSaveDirectory) resBlockSize = 2 dropoutRate = 0.0 initializationMethod = 'uniform' initSheepNNModel = generateSheepModel(sharedWidths * 5, actionLayerWidths, valueLayerWidths, resBlockSize, initializationMethod, dropoutRate) sheepTrainedModelPath = getNNModelSavePath({ 'trainSteps': 50000, 'depth': 5 }) sheepTrainedModel = restoreVariables(initSheepNNModel, sheepTrainedModelPath) sheepPolicy = ApproximatePolicy(sheepTrainedModel, sheepActionSpace) generateWolvesModel = GenerateModel(numStateSpace, numWolvesActionSpace, regularizationFactor) initWolvesNNModel = generateWolvesModel(sharedWidths * depth, actionLayerWidths, valueLayerWidths, resBlockSize, initializationMethod, dropoutRate) NNModelSaveDirectory = os.path.join( dirName, '..', '..', '..', 'data', 'evaluateSupervisedLearning', 'multiMCTSAgentResNetNoPhysicsCenterControl', 'trainedResNNModels') wolfId = 1 NNModelFixedParametersWolves = { 'agentId': wolfId, 'maxRunningSteps': maxRunningSteps, 'numSimulations': numSimulations, 'miniBatchSize': 256, 'learningRate': 0.0001, } getNNModelSavePath = GetSavePath(NNModelSaveDirectory, NNModelSaveExtension, NNModelFixedParametersWolves) wolvesTrainedModelPath = getNNModelSavePath({ 'trainSteps': trainSteps, 'depth': depth, 'dataSize': dataSize }) wolvesTrainedModel = restoreVariables(initWolvesNNModel, wolvesTrainedModelPath) wolfPolicy = ApproximatePolicy(wolvesTrainedModel, wolvesActionSpace) from exec.evaluateNoPhysicsEnvWithRender import Render import pygame as pg from pygame.color import THECOLORS screenColor = THECOLORS['black'] circleColorList = [ THECOLORS['green'], THECOLORS['red'], THECOLORS['orange'] ] circleSize = 10 saveImage = False saveImageDir = os.path.join(dirName, '..', '..', '..', 'data', 'demoImg') if not os.path.exists(saveImageDir): os.makedirs(saveImageDir) renderOn = False render = None if renderOn: screen = pg.display.set_mode([xBoundary[1], yBoundary[1]]) render = Render(numOfAgent, xPosIndex, screen, screenColor, circleColorList, circleSize, saveImage, saveImageDir) chooseActionList = [chooseGreedyAction, chooseGreedyAction] sampleTrajectory = SampleTrajectoryWithRender(maxRunningSteps, transit, isTerminal, reset, chooseActionList, render, renderOn) # All agents' policies policy = lambda state: [sheepPolicy(state), wolfPolicy(state)] trajectories = [ sampleTrajectory(policy) for sampleIndex in range(startSampleIndex, endSampleIndex) ] saveToPickle(trajectories, trajectorySavePath)
def main(): DEBUG = 1 renderOn = 1 if DEBUG: parametersForTrajectoryPath = {} startSampleIndex = 1 endSampleIndex = 2 parametersForTrajectoryPath['sampleIndex'] = (startSampleIndex, endSampleIndex) iterationIndex = 2 numTrainStepEachIteration = 1 numTrajectoriesPerIteration = 1 else: parametersForTrajectoryPath = json.loads(sys.argv[1]) startSampleIndex = int(sys.argv[2]) endSampleIndex = int(sys.argv[3]) parametersForTrajectoryPath['sampleIndex'] = (startSampleIndex, endSampleIndex) iterationIndex = int(parametersForTrajectoryPath['iterationIndex']) numTrainStepEachIteration = int(parametersForTrajectoryPath['numTrainStepEachIteration']) numTrajectoriesPerIteration = int(parametersForTrajectoryPath['numTrajectoriesPerIteration']) # check file exists or not dirName = os.path.dirname(__file__) trajectoriesSaveDirectory = os.path.join(dirName, '..', '..', 'data', 'iterTrain2wolves1sheepMADDPGEnv', 'trajectories') if not os.path.exists(trajectoriesSaveDirectory): os.makedirs(trajectoriesSaveDirectory) trajectorySaveExtension = '.pickle' maxRunningSteps = 50 numSimulations = 250 killzoneRadius = 50 numTree = 2 fixedParameters = {'maxRunningSteps': maxRunningSteps, 'numSimulations': numSimulations, 'killzoneRadius': killzoneRadius} generateTrajectorySavePath = GetSavePath(trajectoriesSaveDirectory, trajectorySaveExtension, fixedParameters) trajectorySavePath = generateTrajectorySavePath(parametersForTrajectoryPath) if not os.path.isfile(trajectorySavePath): # env MDP sheepsID = [0] wolvesID = [1, 2] blocksID = [] numSheeps = len(sheepsID) numWolves = len(wolvesID) numBlocks = len(blocksID) numAgents = numWolves + numSheeps numEntities = numAgents + numBlocks sheepSize = 0.05 wolfSize = 0.075 blockSize = 0.2 sheepMaxSpeed = 1.3 * 1 wolfMaxSpeed = 1.0 * 1 blockMaxSpeed = None entitiesSizeList = [sheepSize] * numSheeps + [wolfSize] * numWolves + [blockSize] * numBlocks entityMaxSpeedList = [sheepMaxSpeed] * numSheeps + [wolfMaxSpeed] * numWolves + [blockMaxSpeed] * numBlocks entitiesMovableList = [True] * numAgents + [False] * numBlocks massList = [1.0] * numEntities centralControlId = 1 centerControlIndexList = [centralControlId] reshapeAction = UnpackCenterControlAction(centerControlIndexList) getCollisionForce = GetCollisionForce() applyActionForce = ApplyActionForce(wolvesID, sheepsID, entitiesMovableList) applyEnvironForce = ApplyEnvironForce(numEntities, entitiesMovableList, entitiesSizeList, getCollisionForce, getPosFromAgentState) integrateState = IntegrateState(numEntities, entitiesMovableList, massList, entityMaxSpeedList, getVelFromAgentState, getPosFromAgentState) interpolateState = TransitMultiAgentChasing(numEntities, reshapeAction, applyActionForce, applyEnvironForce, integrateState) numFramesToInterpolate = 1 def transit(state, action): for frameIndex in range(numFramesToInterpolate): nextState = interpolateState(state, action) action = np.array([(0, 0)] * numAgents) state = nextState return nextState isTerminal = lambda state: False isCollision = IsCollision(getPosFromAgentState) collisonRewardWolf = 1 punishForOutOfBound = PunishForOutOfBound() rewardWolf = RewardCentralControlPunishBond(wolvesID, sheepsID, entitiesSizeList, getPosFromAgentState, isCollision, punishForOutOfBound, collisonRewardWolf) collisonRewardSheep = -1 rewardSheep = RewardCentralControlPunishBond(sheepsID, wolvesID, entitiesSizeList, getPosFromAgentState, isCollision, punishForOutOfBound, collisonRewardSheep) terminalRewardList = [collisonRewardSheep,collisonRewardWolf] rewardMultiAgents = [rewardSheep, rewardWolf] resetState = ResetMultiAgentChasing(numAgents, numBlocks) observeOneAgent = lambda agentID: Observe(agentID, wolvesID, sheepsID, blocksID, getPosFromAgentState, getVelFromAgentState) observe = lambda state: [observeOneAgent(agentID)(state) for agentID in range(numAgents)] # policy actionSpace = [(10, 0), (7, 7), (0, 10), (-7, 7), (-10, 0), (-7, -7), (0, -10), (7, -7), (0, 0)] wolfActionSpace = [(10, 0), (7, 7), (0, 10), (-7, 7), (-10, 0), (-7, -7), (0, -10), (7, -7), (0, 0)] preyPowerRatio = 0.5 sheepActionSpace = list(map(tuple, np.array(actionSpace) * preyPowerRatio)) predatorPowerRatio = 0.5 wolfActionOneSpace = list(map(tuple, np.array(wolfActionSpace) * predatorPowerRatio)) wolfActionTwoSpace = list(map(tuple, np.array(wolfActionSpace) * predatorPowerRatio)) wolvesActionSpace = list(product(wolfActionOneSpace, wolfActionTwoSpace)) actionSpaceList = [sheepActionSpace, wolvesActionSpace] # neural network init numStateSpace = 4 * numEntities numSheepActionSpace = len(sheepActionSpace) numWolvesActionSpace = len(wolvesActionSpace) regularizationFactor = 1e-4 sharedWidths = [128] actionLayerWidths = [128] valueLayerWidths = [128] generateSheepModel = GenerateModel(numStateSpace, numSheepActionSpace, regularizationFactor) generateWolvesModel = GenerateModel(numStateSpace, numWolvesActionSpace, regularizationFactor) generateModelList = [generateSheepModel, generateWolvesModel] sheepDepth = 9 wolfDepth = 9 depthList = [sheepDepth, wolfDepth] resBlockSize = 2 dropoutRate = 0.0 initializationMethod = 'uniform' sheepId,wolvesId = [0,1] trainableAgentIds = [sheepId, wolvesId] multiAgentNNmodel = [generateModel(sharedWidths * depth, actionLayerWidths, valueLayerWidths, resBlockSize, initializationMethod, dropoutRate) for depth, generateModel in zip(depthList, generateModelList)] otherAgentApproximatePolicy = [lambda NNmodel, : ApproximatePolicy(NNmodel, sheepActionSpace), lambda NNmodel, : ApproximatePolicy(NNmodel, wolvesActionSpace)] # NNGuidedMCTS init cInit = 1 cBase = 100 calculateScore = ScoreChild(cInit, cBase) selectChild = SelectChild(calculateScore) getApproximatePolicy = [lambda NNmodel, : ApproximatePolicy(NNmodel, sheepActionSpace), lambda NNmodel, : ApproximatePolicy(NNmodel, wolvesActionSpace)] getApproximateValue = [lambda NNmodel: ApproximateValue(NNmodel), lambda NNmodel: ApproximateValue(NNmodel)] def getStateFromNode(node): return list(node.id.values())[0] chooseActionInMCTS = sampleFromDistribution composeMultiAgentTransitInSingleAgentMCTS = ComposeMultiAgentTransitInSingleAgentMCTS(chooseActionInMCTS) composeSingleAgentGuidedMCTS = ComposeSingleAgentGuidedMCTS(numTree, numSimulations, actionSpaceList, terminalRewardList, selectChild, isTerminal, transit, getStateFromNode, getApproximatePolicy, getApproximateValue, composeMultiAgentTransitInSingleAgentMCTS) prepareMultiAgentPolicy = PrepareMultiAgentPolicy(composeSingleAgentGuidedMCTS, otherAgentApproximatePolicy, trainableAgentIds) # load model NNModelSaveExtension = '' NNModelSaveDirectory = os.path.join(dirName, '..', '..', 'data', 'iterTrain2wolves1sheepMADDPGEnv', 'NNModelRes') if not os.path.exists(NNModelSaveDirectory): os.makedirs(NNModelSaveDirectory) generateNNModelSavePath = GetSavePath(NNModelSaveDirectory, NNModelSaveExtension, fixedParameters) for agentId in trainableAgentIds: modelPath = generateNNModelSavePath({'iterationIndex': iterationIndex - 1, 'agentId': agentId, 'numTrajectoriesPerIteration': numTrajectoriesPerIteration, 'numTrainStepEachIteration': numTrainStepEachIteration}) restoredNNModel = restoreVariables(multiAgentNNmodel[agentId], modelPath) multiAgentNNmodel[agentId] = restoredNNModel multiAgentPolicy = prepareMultiAgentPolicy(multiAgentNNmodel) chooseActionList = [maxFromDistribution, maxFromDistribution] def sampleAction(state): actionDists = multiAgentPolicy(state) action = [chooseAction(actionDist) for actionDist, chooseAction in zip(actionDists, chooseActionList)] return action render = lambda state: None forwardOneStep = ForwardMultiAgentsOneStep(transit, rewardMultiAgents) sampleTrajectory = SampleTrajectoryWithRender(maxRunningSteps, isTerminal, resetState, forwardOneStep, render, renderOn) trajectories = [sampleTrajectory(sampleAction) for sampleIndex in range(startSampleIndex, endSampleIndex)] print([len(traj) for traj in trajectories]) saveToPickle(trajectories, trajectorySavePath)
def main(): startTime = time.time() DEBUG = 1 renderOn = 1 if DEBUG: parametersForTrajectoryPath = {} startSampleIndex = 5 endSampleIndex = 8 agentId = 1 parametersForTrajectoryPath['sampleIndex'] = (startSampleIndex, endSampleIndex) else: parametersForTrajectoryPath = json.loads(sys.argv[1]) startSampleIndex = int(sys.argv[2]) endSampleIndex = int(sys.argv[3]) agentId = int(parametersForTrajectoryPath['agentId']) parametersForTrajectoryPath['sampleIndex'] = (startSampleIndex, endSampleIndex) # check file exists or not dirName = os.path.dirname(__file__) trajectoriesSaveDirectory = os.path.join( dirName, '..', '..', '..', '..', 'data', 'MADDPG2wolves1sheep', 'trainWolvesTwoCenterControlAction', 'trajectories') if not os.path.exists(trajectoriesSaveDirectory): os.makedirs(trajectoriesSaveDirectory) trajectorySaveExtension = '.pickle' maxRunningSteps = 50 numSimulations = 250 fixedParameters = { 'agentId': agentId, 'maxRunningSteps': maxRunningSteps, 'numSimulations': numSimulations } generateTrajectorySavePath = GetSavePath(trajectoriesSaveDirectory, trajectorySaveExtension, fixedParameters) trajectorySavePath = generateTrajectorySavePath( parametersForTrajectoryPath) if not os.path.isfile(trajectorySavePath): # env MDP sheepsID = [0] wolvesID = [1, 2] blocksID = [] numSheeps = len(sheepsID) numWolves = len(wolvesID) numBlocks = len(blocksID) numAgents = numWolves + numSheeps numEntities = numAgents + numBlocks sheepSize = 0.05 wolfSize = 0.075 blockSize = 0.2 sheepMaxSpeed = 1.3 * 1 wolfMaxSpeed = 1.0 * 1 blockMaxSpeed = None entitiesSizeList = [sheepSize] * numSheeps + [wolfSize] * numWolves + [ blockSize ] * numBlocks entityMaxSpeedList = [sheepMaxSpeed] * numSheeps + [ wolfMaxSpeed ] * numWolves + [blockMaxSpeed] * numBlocks entitiesMovableList = [True] * numAgents + [False] * numBlocks massList = [1.0] * numEntities centralControlId = 1 centerControlIndexList = [centralControlId] reshapeAction = UnpackCenterControlAction(centerControlIndexList) getCollisionForce = GetCollisionForce() applyActionForce = ApplyActionForce(wolvesID, sheepsID, entitiesMovableList) applyEnvironForce = ApplyEnvironForce(numEntities, entitiesMovableList, entitiesSizeList, getCollisionForce, getPosFromAgentState) integrateState = IntegrateState(numEntities, entitiesMovableList, massList, entityMaxSpeedList, getVelFromAgentState, getPosFromAgentState) interpolateState = TransitMultiAgentChasing(numEntities, reshapeAction, applyActionForce, applyEnvironForce, integrateState) numFramesToInterpolate = 1 def transit(state, action): for frameIndex in range(numFramesToInterpolate): nextState = interpolateState(state, action) action = np.array([(0, 0)] * numAgents) state = nextState return nextState isTerminal = lambda state: False isCollision = IsCollision(getPosFromAgentState) collisonRewardWolf = 1 punishForOutOfBound = PunishForOutOfBound() rewardWolf = RewardCentralControlPunishBond( wolvesID, sheepsID, entitiesSizeList, getPosFromAgentState, isCollision, punishForOutOfBound, collisonRewardWolf) collisonRewardSheep = -1 rewardSheep = RewardCentralControlPunishBond( sheepsID, wolvesID, entitiesSizeList, getPosFromAgentState, isCollision, punishForOutOfBound, collisonRewardSheep) resetState = ResetMultiAgentChasing(numAgents, numBlocks) observeOneAgent = lambda agentID: Observe( agentID, wolvesID, sheepsID, blocksID, getPosFromAgentState, getVelFromAgentState) observe = lambda state: [ observeOneAgent(agentID)(state) for agentID in range(numAgents) ] # policy actionSpace = [(10, 0), (7, 7), (0, 10), (-7, 7), (-10, 0), (-7, -7), (0, -10), (7, -7), (0, 0)] wolfActionSpace = [(10, 0), (7, 7), (0, 10), (-7, 7), (-10, 0), (-7, -7), (0, -10), (7, -7), (0, 0)] preyPowerRatio = 0.5 sheepActionSpace = list( map(tuple, np.array(actionSpace) * preyPowerRatio)) predatorPowerRatio = 0.5 wolfActionOneSpace = list( map(tuple, np.array(wolfActionSpace) * predatorPowerRatio)) wolfActionTwoSpace = list( map(tuple, np.array(wolfActionSpace) * predatorPowerRatio)) wolvesActionSpace = list( product(wolfActionOneSpace, wolfActionTwoSpace)) actionSpaceList = [sheepActionSpace, wolvesActionSpace] # neural network init numStateSpace = 4 * numEntities numSheepActionSpace = len(sheepActionSpace) numWolvesActionSpace = len(wolvesActionSpace) regularizationFactor = 1e-4 sharedWidths = [128] actionLayerWidths = [128] valueLayerWidths = [128] generateSheepModel = GenerateModel(numStateSpace, numSheepActionSpace, regularizationFactor) sheepPolicy = lambda state: {(0, 0): 1} # MCTS cInit = 1 cBase = 100 calculateScore = ScoreChild(cInit, cBase) selectChild = SelectChild(calculateScore) # prior getActionPrior = lambda state: { action: 1 / len(wolvesActionSpace) for action in wolvesActionSpace } # load chase nn policy chooseActionInMCTS = sampleFromDistribution def wolvesTransit(state, action): return transit(state, [chooseActionInMCTS(sheepPolicy(state)), action]) # initialize children; expand initializeChildren = InitializeChildren(wolvesActionSpace, wolvesTransit, getActionPrior) isTerminal = lambda state: False expand = Expand(isTerminal, initializeChildren) # random rollout policy def rolloutPolicy(state): return [ sampleFromDistribution(sheepPolicy(state)), wolvesActionSpace[np.random.choice( range(numWolvesActionSpace))] ] rolloutHeuristic = lambda state: 0 maxRolloutSteps = 15 rollout = RollOut(rolloutPolicy, maxRolloutSteps, transit, rewardWolf, isTerminal, rolloutHeuristic) wolfPolicy = MCTS(numSimulations, selectChild, expand, rollout, backup, establishSoftmaxActionDist) # All agents' policies policy = lambda state: [sheepPolicy(state), wolfPolicy(state)] chooseActionList = [maxFromDistribution, maxFromDistribution] def sampleAction(state): actionDists = [sheepPolicy(state), wolfPolicy(state)] action = [ chooseAction(actionDist) for actionDist, chooseAction in zip( actionDists, chooseActionList) ] return action render = None forwardOneStep = ForwardOneStep(transit, rewardWolf) sampleTrajectory = SampleTrajectoryWithRender(maxRunningSteps, isTerminal, resetState, forwardOneStep, render, renderOn) trajectories = [ sampleTrajectory(sampleAction) for sampleIndex in range(startSampleIndex, endSampleIndex) ] print([len(traj) for traj in trajectories]) saveToPickle(trajectories, trajectorySavePath) endTime = time.time()
def main(): DEBUG = 0 renderOn = 0 if DEBUG: parametersForTrajectoryPath = {} startSampleIndex = 0 endSampleIndex = 10 agentId = 1 parametersForTrajectoryPath['sampleIndex'] = (startSampleIndex, endSampleIndex) else: parametersForTrajectoryPath = json.loads(sys.argv[1]) startSampleIndex = int(sys.argv[2]) endSampleIndex = int(sys.argv[3]) agentId = int(parametersForTrajectoryPath['agentId']) parametersForTrajectoryPath['sampleIndex'] = (startSampleIndex, endSampleIndex) # check file exists or not dirName = os.path.dirname(__file__) trajectoriesSaveDirectory = os.path.join( dirName, '..', '..', '..', '..', 'data', '2wolves1sheep', 'trainWolvesTwoCenterControlMultiTrees', 'trajectories') if not os.path.exists(trajectoriesSaveDirectory): os.makedirs(trajectoriesSaveDirectory) trajectorySaveExtension = '.pickle' maxRunningSteps = 50 numSimulations = 500 killzoneRadius = 50 fixedParameters = { 'agentId': agentId, 'maxRunningSteps': maxRunningSteps, 'numSimulations': numSimulations, 'killzoneRadius': killzoneRadius } generateTrajectorySavePath = GetSavePath(trajectoriesSaveDirectory, trajectorySaveExtension, fixedParameters) trajectorySavePath = generateTrajectorySavePath( parametersForTrajectoryPath) if not os.path.isfile(trajectorySavePath): numOfAgent = 3 sheepId = 0 wolvesId = 1 wolfOneId = 1 wolfTwoId = 2 xPosIndex = [0, 1] xBoundary = [0, 600] yBoundary = [0, 600] getSheepXPos = GetAgentPosFromState(sheepId, xPosIndex) getWolfOneXPos = GetAgentPosFromState(wolfOneId, xPosIndex) getWolfTwoXPos = GetAgentPosFromState(wolfTwoId, xPosIndex) reset = Reset(xBoundary, yBoundary, numOfAgent) isTerminalOne = IsTerminal(getWolfOneXPos, getSheepXPos, killzoneRadius) isTerminalTwo = IsTerminal(getWolfTwoXPos, getSheepXPos, killzoneRadius) isTerminal = lambda state: isTerminalOne(state) or isTerminalTwo(state) stayInBoundaryByReflectVelocity = StayInBoundaryByReflectVelocity( xBoundary, yBoundary) centerControlIndexList = [wolvesId] unpackCenterControlAction = UnpackCenterControlAction( centerControlIndexList) transitionFunction = TransiteForNoPhysicsWithCenterControlAction( stayInBoundaryByReflectVelocity) numFramesToInterpolate = 3 transit = TransitWithInterpolateStateWithCenterControlAction( numFramesToInterpolate, transitionFunction, isTerminal, unpackCenterControlAction) # NNGuidedMCTS init cInit = 1 cBase = 100 calculateScore = ScoreChild(cInit, cBase) selectChild = SelectChild(calculateScore) actionSpace = [(10, 0), (7, 7), (0, 10), (-7, 7), (-10, 0), (-7, -7), (0, -10), (7, -7), (0, 0)] wolfActionSpace = actionSpace # wolfActionSpace = [(10, 0), (0, 10), (-10, 0), (0, -10), (0, 0)] preyPowerRatio = 12 sheepActionSpace = list( map(tuple, np.array(actionSpace) * preyPowerRatio)) predatorPowerRatio = 8 wolfActionOneSpace = list( map(tuple, np.array(wolfActionSpace) * predatorPowerRatio)) wolfActionTwoSpace = list( map(tuple, np.array(wolfActionSpace) * predatorPowerRatio)) wolvesActionSpace = list( product(wolfActionOneSpace, wolfActionTwoSpace)) actionSpaceList = [sheepActionSpace, wolvesActionSpace] # neural network init numStateSpace = 2 * numOfAgent numSheepActionSpace = len(sheepActionSpace) numWolvesActionSpace = len(wolvesActionSpace) regularizationFactor = 1e-4 sharedWidths = [128] actionLayerWidths = [128] valueLayerWidths = [128] generateSheepModel = GenerateModel(numStateSpace, numSheepActionSpace, regularizationFactor) # load save dir NNModelSaveExtension = '' sheepNNModelSaveDirectory = os.path.join( dirName, '..', '..', '..', '..', 'data', '2wolves1sheep', 'trainSheepWithTwoHeatSeekingWolves', 'trainedResNNModels') sheepNNModelFixedParameters = { 'agentId': 0, 'maxRunningSteps': 50, 'numSimulations': 110, 'miniBatchSize': 256, 'learningRate': 0.0001, } getSheepNNModelSavePath = GetSavePath(sheepNNModelSaveDirectory, NNModelSaveExtension, sheepNNModelFixedParameters) depth = 9 resBlockSize = 2 dropoutRate = 0.0 initializationMethod = 'uniform' initSheepNNModel = generateSheepModel(sharedWidths * depth, actionLayerWidths, valueLayerWidths, resBlockSize, initializationMethod, dropoutRate) sheepTrainedModelPath = getSheepNNModelSavePath({ 'trainSteps': 50000, 'depth': depth }) sheepTrainedModel = restoreVariables(initSheepNNModel, sheepTrainedModelPath) sheepPolicy = ApproximatePolicy(sheepTrainedModel, sheepActionSpace) # MCTS cInit = 1 cBase = 100 calculateScore = ScoreChild(cInit, cBase) selectChild = SelectChild(calculateScore) # prior getActionPrior = lambda state: { action: 1 / len(wolvesActionSpace) for action in wolvesActionSpace } # load chase nn policy temperatureInMCTS = 1 chooseActionInMCTS = SampleAction(temperatureInMCTS) def wolvesTransit(state, action): return transit(state, [chooseActionInMCTS(sheepPolicy(state)), action]) # reward function aliveBonus = -1 / maxRunningSteps deathPenalty = 1 rewardFunction = reward.RewardFunctionCompete(aliveBonus, deathPenalty, isTerminal) # initialize children; expand initializeChildren = InitializeChildren(wolvesActionSpace, wolvesTransit, getActionPrior) expand = Expand(isTerminal, initializeChildren) # random rollout policy def rolloutPolicy(state): return wolvesActionSpace[np.random.choice( range(numWolvesActionSpace))] # rollout rolloutHeuristicWeight = 0 minDistance = 400 rolloutHeuristic1 = reward.HeuristicDistanceToTarget( rolloutHeuristicWeight, getWolfOneXPos, getSheepXPos, minDistance) rolloutHeuristic2 = reward.HeuristicDistanceToTarget( rolloutHeuristicWeight, getWolfTwoXPos, getSheepXPos, minDistance) rolloutHeuristic = lambda state: (rolloutHeuristic1(state) + rolloutHeuristic2(state)) / 2 maxRolloutSteps = 15 rollout = RollOut(rolloutPolicy, maxRolloutSteps, wolvesTransit, rewardFunction, isTerminal, rolloutHeuristic) numTree = 4 numSimulationsPerTree = int(numSimulations / numTree) wolfPolicy = StochasticMCTS( numTree, numSimulationsPerTree, selectChild, expand, rollout, backup, establishSoftmaxActionDistFromMultipleTrees) # All agents' policies policy = lambda state: [sheepPolicy(state), wolfPolicy(state)] chooseActionList = [chooseGreedyAction, chooseGreedyAction] render = None if renderOn: import pygame as pg from pygame.color import THECOLORS screenColor = THECOLORS['black'] circleColorList = [ THECOLORS['green'], THECOLORS['red'], THECOLORS['red'] ] circleSize = 10 saveImage = False saveImageDir = os.path.join(dirName, '..', '..', '..', '..', 'data', 'demoImg') if not os.path.exists(saveImageDir): os.makedirs(saveImageDir) screen = pg.display.set_mode([xBoundary[1], yBoundary[1]]) render = Render(numOfAgent, xPosIndex, screen, screenColor, circleColorList, circleSize, saveImage, saveImageDir) sampleTrajectory = SampleTrajectoryWithRender(maxRunningSteps, transit, isTerminal, reset, chooseActionList, render, renderOn) trajectories = [ sampleTrajectory(policy) for sampleIndex in range(startSampleIndex, endSampleIndex) ] print([len(traj) for traj in trajectories]) saveToPickle(trajectories, trajectorySavePath)