Python SelectChild Examples

Programming Language: Python

Namespace/Package Name: src.algorithms.mcts

Class/Type: SelectChild

Examples at hotexamples.com: 6

Python SelectChild - 6 examples found. These are the top rated real world Python examples of src.algorithms.mcts.SelectChild extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

SelectChild(6)

Frequently Used Methods

SelectChild (6)

Example #1

Show file

File: testMCTS.py Project: ningtangla/trainCentrolControl

    def setUp(self):
        # Env param
        bound_low = 0
        bound_high = 7
        self.transition = TransitionFunction(bound_low, bound_high)

        self.action_space = [-1, 1]
        self.num_action_space = len(self.action_space)
        self.uniformActionPrior = {
            action: 1 / self.num_action_space
            for action in self.action_space
        }

        step_penalty = -1
        catch_reward = 1
        self.target_state = bound_high
        self.isTerminal = Terminal(self.target_state)

        self.c_init = 0
        self.c_base = 1
        self.scoreChild = ScoreChild(self.c_init, self.c_base)

        self.selectChild = SelectChild(self.scoreChild)

        init_state = 3
        level1_0_state = self.transition(init_state, action=0)
        level1_1_state = self.transition(init_state, action=1)
        self.default_actionPrior = 1 / self.num_action_space

        self.root = Node(id={1: init_state},
                         numVisited=1,
                         sumValue=0,
                         actionPrior=self.default_actionPrior,
                         isExpanded=True)
        self.level1_0 = Node(parent=self.root,
                             id={0: level1_0_state},
                             numVisited=2,
                             sumValue=5,
                             actionPrior=self.default_actionPrior,
                             isExpanded=False)
        self.level1_1 = Node(parent=self.root,
                             id={1: level1_1_state},
                             numVisited=3,
                             sumValue=10,
                             actionPrior=self.default_actionPrior,
                             isExpanded=False)

        self.getActionPrior = lambda state: self.uniformActionPrior
        self.initializeChildren = InitializeChildren(self.action_space,
                                                     self.transition,
                                                     self.getActionPrior)
        self.expand = Expand(self.isTerminal, self.initializeChildren)

Example #2

Show file

File: mctsPolicy.py Project: chengshaozhe/commitmentBenefits

def mctsPolicy():
    lowerBound = 0
    gridSize = 10
    upperBound = [gridSize - 1, gridSize - 1]

    maxRunningSteps = 50

    actionSpace = [(-1, 0), (1, 0), (0, 1), (0, -1)]
    numActionSpace = len(actionSpace)

    sheepSpeedRatio = 1
    sheepActionSpace = list(map(tuple,
                                np.array(actionSpace) * sheepSpeedRatio))

    numOfAgent = 5  # 1 hunter, 1 stag with high value, 3 rabbits with low value
    hunterId = [0]
    targetIds = [1, 2, 3, 4]
    stagId = [1]
    rabbitId = [2, 3, 4]

    positionIndex = [0, 1]

    getHunterPos = GetAgentPosFromState(hunterId, positionIndex)
    getTargetsPos = GetAgentPosFromState(targetIds, positionIndex)

    stayWithinBoundary = env.StayWithinBoundary(upperBound, lowerBound)
    isTerminal = env.IsTerminal(getHunterPos, getTargetsPos)
    transitionFunction = env.Transition(stayWithinBoundary)
    reset = env.Reset(upperBound, lowerBound, numOfAgent)

    # stagPolicy = RandomPolicy(sheepActionSpace)
    stagPolicy = stationaryAgentPolicy
    rabbitPolicies = [stationaryAgentPolicy] * len(rabbitId)

    cInit = 1
    cBase = 100
    calculateScore = ScoreChild(cInit, cBase)
    selectChild = SelectChild(calculateScore)
    getActionPrior = lambda state: {
        action: 1 / len(actionSpace)
        for action in actionSpace
    }

    def wolfTransit(state, action):
        return transitionFunction(
            state, [action, maxFromDistribution(stagPolicy(state))] + [
                maxFromDistribution(rabbitPolicy(state))
                for rabbitPolicy in rabbitPolicies
            ])

    stepPenalty = -1 / maxRunningSteps
    catchBonus = 1
    highRewardRatio = 10
    rewardFunction = reward.RewardFunction(highRewardRatio, stepPenalty,
                                           catchBonus, isTerminal)

    initializeChildren = InitializeChildren(actionSpace, wolfTransit,
                                            getActionPrior)
    expand = Expand(isTerminal, initializeChildren)

    def rolloutPolicy(state):
        return actionSpace[np.random.choice(range(numActionSpace))]

    rolloutHeuristicWeight = 0
    rolloutHeuristic = reward.HeuristicDistanceToTarget(
        rolloutHeuristicWeight, getHunterPos, getTargetsPos)
    # rolloutHeuristic = lambda state: 0

    maxRolloutSteps = 20
    rollout = RollOut(rolloutPolicy, maxRolloutSteps, wolfTransit,
                      rewardFunction, isTerminal, rolloutHeuristic)
    numSimulations = 600
    wolfPolicy = MCTS(numSimulations, selectChild, expand, rollout, backup,
                      establishSoftmaxActionDist)

    # All agents' policies
    policy = lambda state: [
        wolfPolicy(state), stagPolicy(state)
    ] + [rabbitPolicy(state) for rabbitPolicy in rabbitPolicies]
    numOfAgent = 5
    gridSize = 15
    maxRunningSteps = 50

    screenWidth = 600
    screenHeight = 600
    fullScreen = False

    initializeScreen = InitializeScreen(screenWidth, screenHeight, fullScreen)
    screen = initializeScreen()
    # pg.mouse.set_visible(False)

    leaveEdgeSpace = 2
    lineWidth = 1
    backgroundColor = [205, 255, 204]
    lineColor = [0, 0, 0]
    distractorColor = [255, 255, 0]
    targetColor = [221, 160, 221]
    playerColor = [50, 50, 255]
    targetRadius = 10
    playerRadius = 10
    textColorTuple = (255, 50, 50)

    drawBackground = DrawBackground(screen, gridSize, leaveEdgeSpace,
                                    backgroundColor, lineColor, lineWidth,
                                    textColorTuple)
    drawNewState = DrawNewState(screen, drawBackground, targetColor,
                                playerColor, targetRadius, playerRadius)

    chooseAction = [maxFromDistribution] * numOfAgent

    renderOn = True
    sampleTrajectory = SampleTrajectory(maxRunningSteps, transitionFunction,
                                        isTerminal, reset, chooseAction,
                                        renderOn, drawNewState)

    startTime = time.time()
    numOfEpisodes = 10
    trajectories = [sampleTrajectory(policy) for i in range(numOfEpisodes)]
    finshedTime = time.time() - startTime

    print('lenght:', len(trajectories[0]))
    print('time:', finshedTime)

Example #3

Show file

def main():
    DEBUG = 0
    renderOn = 0
    if DEBUG:
        parametersForTrajectoryPath = {}
        startSampleIndex = 5
        endSampleIndex = 7
        agentId = 1
        parametersForTrajectoryPath['sampleIndex'] = (startSampleIndex,
                                                      endSampleIndex)
    else:
        parametersForTrajectoryPath = json.loads(sys.argv[1])
        startSampleIndex = int(sys.argv[2])
        endSampleIndex = int(sys.argv[3])
        agentId = int(parametersForTrajectoryPath['agentId'])
        parametersForTrajectoryPath['sampleIndex'] = (startSampleIndex,
                                                      endSampleIndex)

    # check file exists or not
    dirName = os.path.dirname(__file__)
    trajectoriesSaveDirectory = os.path.join(
        dirName, '..', '..', '..', '..', 'data', 'NoPhysics2wolves1sheep',
        'trainWolvesTwoCenterControlAction88', 'trajectories')
    if not os.path.exists(trajectoriesSaveDirectory):
        os.makedirs(trajectoriesSaveDirectory)

    trajectorySaveExtension = '.pickle'
    maxRunningSteps = 50
    numSimulations = 250
    killzoneRadius = 150
    fixedParameters = {
        'agentId': agentId,
        'maxRunningSteps': maxRunningSteps,
        'numSimulations': numSimulations,
        'killzoneRadius': killzoneRadius
    }

    generateTrajectorySavePath = GetSavePath(trajectoriesSaveDirectory,
                                             trajectorySaveExtension,
                                             fixedParameters)

    trajectorySavePath = generateTrajectorySavePath(
        parametersForTrajectoryPath)

    if not os.path.isfile(trajectorySavePath):
        numOfAgent = 3
        xBoundary = [0, 600]
        yBoundary = [0, 600]
        resetState = Reset(xBoundary, yBoundary, numOfAgent)

        stayInBoundaryByReflectVelocity = StayInBoundaryByReflectVelocity(
            xBoundary, yBoundary)
        interpolateOneFrame = InterpolateOneFrame(
            stayInBoundaryByReflectVelocity)

        chooseInterpolatedNextState = lambda interpolatedStates: interpolatedStates[
            -1]

        sheepId = 0
        wolvesId = 1
        centerControlIndexList = [wolvesId]
        unpackCenterControlAction = UnpackCenterControlAction(
            centerControlIndexList)

        numFramesToInterpolate = 0
        transit = TransitWithInterpolation(numFramesToInterpolate,
                                           interpolateOneFrame,
                                           chooseInterpolatedNextState,
                                           unpackCenterControlAction)

        # NNGuidedMCTS init
        cInit = 1
        cBase = 100
        calculateScore = ScoreChild(cInit, cBase)
        selectChild = SelectChild(calculateScore)

        actionSpace = [(10, 0), (7, 7), (0, 10), (-7, 7), (-10, 0), (-7, -7),
                       (0, -10), (7, -7), (0, 0)]
        wolfActionSpace = [(10, 0), (7, 7), (0, 10), (-7, 7), (-10, 0),
                           (-7, -7), (0, -10), (7, -7)]

        preyPowerRatio = 10
        sheepActionSpace = list(
            map(tuple,
                np.array(actionSpace) * preyPowerRatio))

        predatorPowerRatio = 8
        wolfActionOneSpace = list(
            map(tuple,
                np.array(wolfActionSpace) * predatorPowerRatio))
        wolfActionTwoSpace = list(
            map(tuple,
                np.array(wolfActionSpace) * predatorPowerRatio))

        wolvesActionSpace = list(
            product(wolfActionOneSpace, wolfActionTwoSpace))

        actionSpaceList = [sheepActionSpace, wolvesActionSpace]

        # neural network init
        numStateSpace = 2 * numOfAgent
        numSheepActionSpace = len(sheepActionSpace)
        numWolvesActionSpace = len(wolvesActionSpace)

        regularizationFactor = 1e-4
        sharedWidths = [128]
        actionLayerWidths = [128]
        valueLayerWidths = [128]
        generateSheepModel = GenerateModel(numStateSpace, numSheepActionSpace,
                                           regularizationFactor)

        # load save dir
        NNModelSaveExtension = ''
        sheepNNModelSaveDirectory = os.path.join(
            dirName, '..', '..', '..', '..', 'data', 'NoPhysics2wolves1sheep',
            'trainSheepWithTwoHeatSeekingWolves', 'trainedResNNModels')
        sheepNNModelFixedParameters = {
            'agentId': 0,
            'maxRunningSteps': 50,
            'numSimulations': 110,
            'miniBatchSize': 256,
            'learningRate': 0.0001,
        }
        getSheepNNModelSavePath = GetSavePath(sheepNNModelSaveDirectory,
                                              NNModelSaveExtension,
                                              sheepNNModelFixedParameters)

        depth = 9
        resBlockSize = 2
        dropoutRate = 0.0
        initializationMethod = 'uniform'
        initSheepNNModel = generateSheepModel(sharedWidths * depth,
                                              actionLayerWidths,
                                              valueLayerWidths, resBlockSize,
                                              initializationMethod,
                                              dropoutRate)

        sheepTrainedModelPath = getSheepNNModelSavePath({
            'trainSteps': 50000,
            'depth': depth
        })
        sheepTrainedModel = restoreVariables(initSheepNNModel,
                                             sheepTrainedModelPath)
        sheepPolicy = ApproximatePolicy(sheepTrainedModel, sheepActionSpace)

        wolfOneId = 1
        wolfTwoId = 2
        xPosIndex = [0, 1]
        getSheepXPos = GetAgentPosFromState(sheepId, xPosIndex)
        getWolfOneXPos = GetAgentPosFromState(wolfOneId, xPosIndex)
        speed = 120
        #sheepPolicy = HeatSeekingContinuesDeterministicPolicy(getWolfOneXPos, getSheepXPos, speed)

        # MCTS
        cInit = 1
        cBase = 100
        calculateScore = ScoreChild(cInit, cBase)
        selectChild = SelectChild(calculateScore)

        # prior
        getActionPrior = lambda state: {
            action: 1 / len(wolvesActionSpace)
            for action in wolvesActionSpace
        }

        # load chase nn policy
        chooseActionInMCTS = sampleFromDistribution

        def wolvesTransit(state, action):
            return transit(state,
                           [chooseActionInMCTS(sheepPolicy(state)), action])

        # reward function
        wolfOneId = 1
        wolfTwoId = 2
        xPosIndex = [0, 1]
        getSheepXPos = GetAgentPosFromState(sheepId, xPosIndex)
        getWolfOneXPos = GetAgentPosFromState(wolfOneId, xPosIndex)
        getWolfTwoXPos = GetAgentPosFromState(wolfTwoId, xPosIndex)
        isCollidedOne = IsTerminal(getWolfOneXPos, getSheepXPos,
                                   killzoneRadius)
        isCollidedTwo = IsTerminal(getWolfTwoXPos, getSheepXPos,
                                   killzoneRadius)

        calCollisionTimes = lambda state: np.sum([
            isCollidedOne(state), isCollidedTwo(state)
        ])  # collisionTimeByAddingCollisionInAllWolves
        #calCollisionTimes = lambda state: np.max([isCollidedOne(state), isCollidedTwo(state)]) # collisionTimeByBooleanCollisionForAnyWolf

        calTerminationSignals = calCollisionTimes
        chooseInterpolatedStateByEarlyTermination = ChooseInterpolatedStateByEarlyTermination(
            calTerminationSignals)

        numFramesToInterpolateInReward = 3
        interpolateStateInReward = TransitWithInterpolation(
            numFramesToInterpolateInReward, interpolateOneFrame,
            chooseInterpolatedStateByEarlyTermination,
            unpackCenterControlAction)

        aliveBonus = -1 / maxRunningSteps * 10
        deathPenalty = 1
        rewardFunction = RewardFunctionCompeteWithStateInterpolation(
            aliveBonus, deathPenalty, calCollisionTimes,
            interpolateStateInReward)

        # initialize children; expand
        initializeChildren = InitializeChildren(wolvesActionSpace,
                                                wolvesTransit, getActionPrior)
        isTerminal = lambda state: False
        expand = Expand(isTerminal, initializeChildren)

        # random rollout policy
        def rolloutPolicy(state):
            return [
                sampleFromDistribution(sheepPolicy(state)),
                wolvesActionSpace[np.random.choice(
                    range(numWolvesActionSpace))]
            ]

        # rollout
        #rolloutHeuristicWeight = 0
        #minDistance = 400
        #rolloutHeuristic1 = HeuristicDistanceToTarget(
        #    rolloutHeuristicWeight, getWolfOneXPos, getSheepXPos, minDistance)
        #rolloutHeuristic2 = HeuristicDistanceToTarget(
        #    rolloutHeuristicWeight, getWolfTwoXPos, getSheepXPos, minDistance)

        #rolloutHeuristic = lambda state: (rolloutHeuristic1(state) + rolloutHeuristic2(state)) / 2

        rolloutHeuristic = lambda state: 0
        maxRolloutSteps = 15
        rollout = RollOut(rolloutPolicy, maxRolloutSteps, transit,
                          rewardFunction, isTerminal, rolloutHeuristic)

        wolfPolicy = MCTS(numSimulations, selectChild, expand, rollout, backup,
                          establishSoftmaxActionDist)

        # All agents' policies
        policy = lambda state: [sheepPolicy(state), wolfPolicy(state)]
        chooseActionList = [maxFromDistribution, maxFromDistribution]

        def sampleAction(state):
            actionDists = [sheepPolicy(state), wolfPolicy(state)]
            action = [
                chooseAction(actionDist) for actionDist, chooseAction in zip(
                    actionDists, chooseActionList)
            ]
            return action

        render = None
        if renderOn:
            import pygame as pg
            from pygame.color import THECOLORS
            screenColor = THECOLORS['black']
            circleColorList = [
                THECOLORS['green'], THECOLORS['yellow'], THECOLORS['red']
            ]
            circleSize = 10
            saveImage = False
            saveImageDir = os.path.join(dirName, '..', '..', '..', '..',
                                        'data', 'demoImg')
            if not os.path.exists(saveImageDir):
                os.makedirs(saveImageDir)
            screen = pg.display.set_mode([max(xBoundary), max(yBoundary)])
            render = Render(numOfAgent, xPosIndex, screen, screenColor,
                            circleColorList, circleSize, saveImage,
                            saveImageDir)

        forwardOneStep = ForwardOneStep(transit, rewardFunction)
        sampleTrajectory = SampleTrajectoryWithRender(maxRunningSteps,
                                                      isTerminal, resetState,
                                                      forwardOneStep, render,
                                                      renderOn)

        trajectories = [
            sampleTrajectory(sampleAction)
            for sampleIndex in range(startSampleIndex, endSampleIndex)
        ]
        print([len(traj) for traj in trajectories])
        saveToPickle(trajectories, trajectorySavePath)

Example #4

Show file

def main():
    DEBUG = 1
    renderOn = 1

    if DEBUG:
        parametersForTrajectoryPath = {}
        startSampleIndex = 1
        endSampleIndex = 2
        parametersForTrajectoryPath['sampleIndex'] = (startSampleIndex, endSampleIndex)
        iterationIndex = 2
        numTrainStepEachIteration = 1
        numTrajectoriesPerIteration = 1

    else:
        parametersForTrajectoryPath = json.loads(sys.argv[1])
        startSampleIndex = int(sys.argv[2])
        endSampleIndex = int(sys.argv[3])
        parametersForTrajectoryPath['sampleIndex'] = (startSampleIndex, endSampleIndex)
        iterationIndex = int(parametersForTrajectoryPath['iterationIndex'])
        numTrainStepEachIteration = int(parametersForTrajectoryPath['numTrainStepEachIteration'])
        numTrajectoriesPerIteration = int(parametersForTrajectoryPath['numTrajectoriesPerIteration'])

    # check file exists or not
    dirName = os.path.dirname(__file__)
    trajectoriesSaveDirectory = os.path.join(dirName, '..', '..',  'data', 'iterTrain2wolves1sheepMADDPGEnv', 'trajectories')
    if not os.path.exists(trajectoriesSaveDirectory):
        os.makedirs(trajectoriesSaveDirectory)

    trajectorySaveExtension = '.pickle'

    maxRunningSteps = 50
    numSimulations = 250
    killzoneRadius = 50
    numTree = 2
    fixedParameters = {'maxRunningSteps': maxRunningSteps, 'numSimulations': numSimulations, 'killzoneRadius': killzoneRadius}
    generateTrajectorySavePath = GetSavePath(trajectoriesSaveDirectory, trajectorySaveExtension, fixedParameters)
    trajectorySavePath = generateTrajectorySavePath(parametersForTrajectoryPath)

    if not os.path.isfile(trajectorySavePath):
        # env MDP
        sheepsID = [0]
        wolvesID = [1, 2]
        blocksID = []

        numSheeps = len(sheepsID)
        numWolves = len(wolvesID)
        numBlocks = len(blocksID)

        numAgents = numWolves + numSheeps
        numEntities = numAgents + numBlocks

        sheepSize = 0.05
        wolfSize = 0.075
        blockSize = 0.2

        sheepMaxSpeed = 1.3 * 1
        wolfMaxSpeed = 1.0 * 1
        blockMaxSpeed = None

        entitiesSizeList = [sheepSize] * numSheeps + [wolfSize] * numWolves + [blockSize] * numBlocks
        entityMaxSpeedList = [sheepMaxSpeed] * numSheeps + [wolfMaxSpeed] * numWolves + [blockMaxSpeed] * numBlocks
        entitiesMovableList = [True] * numAgents + [False] * numBlocks
        massList = [1.0] * numEntities

        centralControlId = 1
        centerControlIndexList = [centralControlId]
        reshapeAction = UnpackCenterControlAction(centerControlIndexList)
        getCollisionForce = GetCollisionForce()
        applyActionForce = ApplyActionForce(wolvesID, sheepsID, entitiesMovableList)
        applyEnvironForce = ApplyEnvironForce(numEntities, entitiesMovableList, entitiesSizeList,
                                              getCollisionForce, getPosFromAgentState)
        integrateState = IntegrateState(numEntities, entitiesMovableList, massList,
                                        entityMaxSpeedList, getVelFromAgentState, getPosFromAgentState)
        interpolateState = TransitMultiAgentChasing(numEntities, reshapeAction, applyActionForce, applyEnvironForce, integrateState)

        numFramesToInterpolate = 1

        def transit(state, action):
            for frameIndex in range(numFramesToInterpolate):
                nextState = interpolateState(state, action)
                action = np.array([(0, 0)] * numAgents)
                state = nextState
            return nextState

        isTerminal = lambda state: False

        isCollision = IsCollision(getPosFromAgentState)
        collisonRewardWolf = 1
        punishForOutOfBound = PunishForOutOfBound()
        rewardWolf = RewardCentralControlPunishBond(wolvesID, sheepsID, entitiesSizeList, getPosFromAgentState, isCollision, punishForOutOfBound, collisonRewardWolf)
        collisonRewardSheep = -1
        rewardSheep = RewardCentralControlPunishBond(sheepsID, wolvesID, entitiesSizeList, getPosFromAgentState, isCollision, punishForOutOfBound, collisonRewardSheep)
        terminalRewardList = [collisonRewardSheep,collisonRewardWolf]
        rewardMultiAgents = [rewardSheep, rewardWolf]

        resetState = ResetMultiAgentChasing(numAgents, numBlocks)

        observeOneAgent = lambda agentID: Observe(agentID, wolvesID, sheepsID, blocksID, getPosFromAgentState, getVelFromAgentState)
        observe = lambda state: [observeOneAgent(agentID)(state) for agentID in range(numAgents)]

    # policy
        actionSpace = [(10, 0), (7, 7), (0, 10), (-7, 7), (-10, 0), (-7, -7), (0, -10), (7, -7), (0, 0)]
        wolfActionSpace = [(10, 0), (7, 7), (0, 10), (-7, 7), (-10, 0), (-7, -7), (0, -10), (7, -7), (0, 0)]

        preyPowerRatio = 0.5
        sheepActionSpace = list(map(tuple, np.array(actionSpace) * preyPowerRatio))

        predatorPowerRatio = 0.5
        wolfActionOneSpace = list(map(tuple, np.array(wolfActionSpace) * predatorPowerRatio))
        wolfActionTwoSpace = list(map(tuple, np.array(wolfActionSpace) * predatorPowerRatio))

        wolvesActionSpace = list(product(wolfActionOneSpace, wolfActionTwoSpace))

        actionSpaceList = [sheepActionSpace, wolvesActionSpace]

        # neural network init
        numStateSpace = 4 * numEntities
        numSheepActionSpace = len(sheepActionSpace)
        numWolvesActionSpace = len(wolvesActionSpace)

        regularizationFactor = 1e-4
        sharedWidths = [128]
        actionLayerWidths = [128]
        valueLayerWidths = [128]
        generateSheepModel = GenerateModel(numStateSpace, numSheepActionSpace, regularizationFactor)
        generateWolvesModel = GenerateModel(numStateSpace, numWolvesActionSpace, regularizationFactor)
        generateModelList = [generateSheepModel, generateWolvesModel]

        sheepDepth = 9
        wolfDepth = 9
        depthList = [sheepDepth, wolfDepth]
        resBlockSize = 2
        dropoutRate = 0.0
        initializationMethod = 'uniform'
        sheepId,wolvesId = [0,1]
        trainableAgentIds = [sheepId, wolvesId]

        multiAgentNNmodel = [generateModel(sharedWidths * depth, actionLayerWidths, valueLayerWidths, resBlockSize, initializationMethod, dropoutRate) for depth, generateModel in zip(depthList, generateModelList)]

        otherAgentApproximatePolicy = [lambda NNmodel, : ApproximatePolicy(NNmodel, sheepActionSpace), lambda NNmodel, : ApproximatePolicy(NNmodel, wolvesActionSpace)]
        # NNGuidedMCTS init
        cInit = 1
        cBase = 100
        calculateScore = ScoreChild(cInit, cBase)
        selectChild = SelectChild(calculateScore)

        getApproximatePolicy = [lambda NNmodel, : ApproximatePolicy(NNmodel, sheepActionSpace), lambda NNmodel, : ApproximatePolicy(NNmodel, wolvesActionSpace)]
        getApproximateValue = [lambda NNmodel: ApproximateValue(NNmodel), lambda NNmodel: ApproximateValue(NNmodel)]

        def getStateFromNode(node): return list(node.id.values())[0]

        chooseActionInMCTS = sampleFromDistribution

        composeMultiAgentTransitInSingleAgentMCTS = ComposeMultiAgentTransitInSingleAgentMCTS(chooseActionInMCTS)
        composeSingleAgentGuidedMCTS = ComposeSingleAgentGuidedMCTS(numTree, numSimulations, actionSpaceList, terminalRewardList, selectChild, isTerminal, transit, getStateFromNode, getApproximatePolicy, getApproximateValue, composeMultiAgentTransitInSingleAgentMCTS)
        prepareMultiAgentPolicy = PrepareMultiAgentPolicy(composeSingleAgentGuidedMCTS, otherAgentApproximatePolicy, trainableAgentIds)

        # load model
        NNModelSaveExtension = ''
        NNModelSaveDirectory = os.path.join(dirName, '..', '..',  'data', 'iterTrain2wolves1sheepMADDPGEnv', 'NNModelRes')
        if not os.path.exists(NNModelSaveDirectory):
            os.makedirs(NNModelSaveDirectory)

        generateNNModelSavePath = GetSavePath(NNModelSaveDirectory, NNModelSaveExtension, fixedParameters)

        for agentId in trainableAgentIds:
            modelPath = generateNNModelSavePath({'iterationIndex': iterationIndex - 1, 'agentId': agentId, 'numTrajectoriesPerIteration': numTrajectoriesPerIteration, 'numTrainStepEachIteration': numTrainStepEachIteration})
            restoredNNModel = restoreVariables(multiAgentNNmodel[agentId], modelPath)
            multiAgentNNmodel[agentId] = restoredNNModel

        multiAgentPolicy = prepareMultiAgentPolicy(multiAgentNNmodel)
        chooseActionList = [maxFromDistribution, maxFromDistribution]

        def sampleAction(state):
            actionDists = multiAgentPolicy(state)
            action = [chooseAction(actionDist) for actionDist, chooseAction in zip(actionDists, chooseActionList)]
            return action

        render = lambda state: None
        forwardOneStep = ForwardMultiAgentsOneStep(transit, rewardMultiAgents)
        sampleTrajectory = SampleTrajectoryWithRender(maxRunningSteps, isTerminal, resetState, forwardOneStep, render, renderOn)

        trajectories = [sampleTrajectory(sampleAction) for sampleIndex in range(startSampleIndex, endSampleIndex)]
        print([len(traj) for traj in trajectories])
        saveToPickle(trajectories, trajectorySavePath)

Example #5

Show file

def main():
    startTime = time.time()

    DEBUG = 1
    renderOn = 1
    if DEBUG:
        parametersForTrajectoryPath = {}
        startSampleIndex = 5
        endSampleIndex = 8
        agentId = 1
        parametersForTrajectoryPath['sampleIndex'] = (startSampleIndex,
                                                      endSampleIndex)
    else:
        parametersForTrajectoryPath = json.loads(sys.argv[1])
        startSampleIndex = int(sys.argv[2])
        endSampleIndex = int(sys.argv[3])
        agentId = int(parametersForTrajectoryPath['agentId'])
        parametersForTrajectoryPath['sampleIndex'] = (startSampleIndex,
                                                      endSampleIndex)

    # check file exists or not
    dirName = os.path.dirname(__file__)
    trajectoriesSaveDirectory = os.path.join(
        dirName, '..', '..', '..', '..', 'data', 'MADDPG2wolves1sheep',
        'trainWolvesTwoCenterControlAction', 'trajectories')
    if not os.path.exists(trajectoriesSaveDirectory):
        os.makedirs(trajectoriesSaveDirectory)

    trajectorySaveExtension = '.pickle'
    maxRunningSteps = 50
    numSimulations = 250
    fixedParameters = {
        'agentId': agentId,
        'maxRunningSteps': maxRunningSteps,
        'numSimulations': numSimulations
    }

    generateTrajectorySavePath = GetSavePath(trajectoriesSaveDirectory,
                                             trajectorySaveExtension,
                                             fixedParameters)

    trajectorySavePath = generateTrajectorySavePath(
        parametersForTrajectoryPath)

    if not os.path.isfile(trajectorySavePath):

        # env MDP
        sheepsID = [0]
        wolvesID = [1, 2]
        blocksID = []

        numSheeps = len(sheepsID)
        numWolves = len(wolvesID)
        numBlocks = len(blocksID)

        numAgents = numWolves + numSheeps
        numEntities = numAgents + numBlocks

        sheepSize = 0.05
        wolfSize = 0.075
        blockSize = 0.2

        sheepMaxSpeed = 1.3 * 1
        wolfMaxSpeed = 1.0 * 1
        blockMaxSpeed = None

        entitiesSizeList = [sheepSize] * numSheeps + [wolfSize] * numWolves + [
            blockSize
        ] * numBlocks
        entityMaxSpeedList = [sheepMaxSpeed] * numSheeps + [
            wolfMaxSpeed
        ] * numWolves + [blockMaxSpeed] * numBlocks
        entitiesMovableList = [True] * numAgents + [False] * numBlocks
        massList = [1.0] * numEntities

        centralControlId = 1
        centerControlIndexList = [centralControlId]
        reshapeAction = UnpackCenterControlAction(centerControlIndexList)
        getCollisionForce = GetCollisionForce()
        applyActionForce = ApplyActionForce(wolvesID, sheepsID,
                                            entitiesMovableList)
        applyEnvironForce = ApplyEnvironForce(numEntities, entitiesMovableList,
                                              entitiesSizeList,
                                              getCollisionForce,
                                              getPosFromAgentState)
        integrateState = IntegrateState(numEntities, entitiesMovableList,
                                        massList, entityMaxSpeedList,
                                        getVelFromAgentState,
                                        getPosFromAgentState)
        interpolateState = TransitMultiAgentChasing(numEntities, reshapeAction,
                                                    applyActionForce,
                                                    applyEnvironForce,
                                                    integrateState)

        numFramesToInterpolate = 1

        def transit(state, action):
            for frameIndex in range(numFramesToInterpolate):
                nextState = interpolateState(state, action)
                action = np.array([(0, 0)] * numAgents)
                state = nextState
            return nextState

        isTerminal = lambda state: False

        isCollision = IsCollision(getPosFromAgentState)
        collisonRewardWolf = 1
        punishForOutOfBound = PunishForOutOfBound()
        rewardWolf = RewardCentralControlPunishBond(
            wolvesID, sheepsID, entitiesSizeList, getPosFromAgentState,
            isCollision, punishForOutOfBound, collisonRewardWolf)
        collisonRewardSheep = -1
        rewardSheep = RewardCentralControlPunishBond(
            sheepsID, wolvesID, entitiesSizeList, getPosFromAgentState,
            isCollision, punishForOutOfBound, collisonRewardSheep)

        resetState = ResetMultiAgentChasing(numAgents, numBlocks)

        observeOneAgent = lambda agentID: Observe(
            agentID, wolvesID, sheepsID, blocksID, getPosFromAgentState,
            getVelFromAgentState)
        observe = lambda state: [
            observeOneAgent(agentID)(state) for agentID in range(numAgents)
        ]

        # policy
        actionSpace = [(10, 0), (7, 7), (0, 10), (-7, 7), (-10, 0), (-7, -7),
                       (0, -10), (7, -7), (0, 0)]
        wolfActionSpace = [(10, 0), (7, 7), (0, 10), (-7, 7), (-10, 0),
                           (-7, -7), (0, -10), (7, -7), (0, 0)]

        preyPowerRatio = 0.5
        sheepActionSpace = list(
            map(tuple,
                np.array(actionSpace) * preyPowerRatio))

        predatorPowerRatio = 0.5
        wolfActionOneSpace = list(
            map(tuple,
                np.array(wolfActionSpace) * predatorPowerRatio))
        wolfActionTwoSpace = list(
            map(tuple,
                np.array(wolfActionSpace) * predatorPowerRatio))

        wolvesActionSpace = list(
            product(wolfActionOneSpace, wolfActionTwoSpace))

        actionSpaceList = [sheepActionSpace, wolvesActionSpace]

        # neural network init
        numStateSpace = 4 * numEntities
        numSheepActionSpace = len(sheepActionSpace)
        numWolvesActionSpace = len(wolvesActionSpace)

        regularizationFactor = 1e-4
        sharedWidths = [128]
        actionLayerWidths = [128]
        valueLayerWidths = [128]
        generateSheepModel = GenerateModel(numStateSpace, numSheepActionSpace,
                                           regularizationFactor)

        sheepPolicy = lambda state: {(0, 0): 1}

        # MCTS
        cInit = 1
        cBase = 100
        calculateScore = ScoreChild(cInit, cBase)
        selectChild = SelectChild(calculateScore)

        # prior
        getActionPrior = lambda state: {
            action: 1 / len(wolvesActionSpace)
            for action in wolvesActionSpace
        }

        # load chase nn policy
        chooseActionInMCTS = sampleFromDistribution

        def wolvesTransit(state, action):
            return transit(state,
                           [chooseActionInMCTS(sheepPolicy(state)), action])

        # initialize children; expand
        initializeChildren = InitializeChildren(wolvesActionSpace,
                                                wolvesTransit, getActionPrior)
        isTerminal = lambda state: False
        expand = Expand(isTerminal, initializeChildren)

        # random rollout policy
        def rolloutPolicy(state):
            return [
                sampleFromDistribution(sheepPolicy(state)),
                wolvesActionSpace[np.random.choice(
                    range(numWolvesActionSpace))]
            ]

        rolloutHeuristic = lambda state: 0
        maxRolloutSteps = 15
        rollout = RollOut(rolloutPolicy, maxRolloutSteps, transit, rewardWolf,
                          isTerminal, rolloutHeuristic)

        wolfPolicy = MCTS(numSimulations, selectChild, expand, rollout, backup,
                          establishSoftmaxActionDist)

        # All agents' policies
        policy = lambda state: [sheepPolicy(state), wolfPolicy(state)]
        chooseActionList = [maxFromDistribution, maxFromDistribution]

        def sampleAction(state):
            actionDists = [sheepPolicy(state), wolfPolicy(state)]
            action = [
                chooseAction(actionDist) for actionDist, chooseAction in zip(
                    actionDists, chooseActionList)
            ]
            return action

        render = None
        forwardOneStep = ForwardOneStep(transit, rewardWolf)
        sampleTrajectory = SampleTrajectoryWithRender(maxRunningSteps,
                                                      isTerminal, resetState,
                                                      forwardOneStep, render,
                                                      renderOn)

        trajectories = [
            sampleTrajectory(sampleAction)
            for sampleIndex in range(startSampleIndex, endSampleIndex)
        ]
        print([len(traj) for traj in trajectories])
        saveToPickle(trajectories, trajectorySavePath)

    endTime = time.time()

Example #6

Show file

def main():
    DEBUG = 0
    renderOn = 0
    if DEBUG:
        parametersForTrajectoryPath = {}
        startSampleIndex = 0
        endSampleIndex = 10
        agentId = 1
        parametersForTrajectoryPath['sampleIndex'] = (startSampleIndex,
                                                      endSampleIndex)
    else:
        parametersForTrajectoryPath = json.loads(sys.argv[1])
        startSampleIndex = int(sys.argv[2])
        endSampleIndex = int(sys.argv[3])
        agentId = int(parametersForTrajectoryPath['agentId'])
        parametersForTrajectoryPath['sampleIndex'] = (startSampleIndex,
                                                      endSampleIndex)

    # check file exists or not
    dirName = os.path.dirname(__file__)
    trajectoriesSaveDirectory = os.path.join(
        dirName, '..', '..', '..', '..', 'data', '2wolves1sheep',
        'trainWolvesTwoCenterControlMultiTrees', 'trajectories')
    if not os.path.exists(trajectoriesSaveDirectory):
        os.makedirs(trajectoriesSaveDirectory)

    trajectorySaveExtension = '.pickle'
    maxRunningSteps = 50
    numSimulations = 500
    killzoneRadius = 50
    fixedParameters = {
        'agentId': agentId,
        'maxRunningSteps': maxRunningSteps,
        'numSimulations': numSimulations,
        'killzoneRadius': killzoneRadius
    }

    generateTrajectorySavePath = GetSavePath(trajectoriesSaveDirectory,
                                             trajectorySaveExtension,
                                             fixedParameters)

    trajectorySavePath = generateTrajectorySavePath(
        parametersForTrajectoryPath)

    if not os.path.isfile(trajectorySavePath):
        numOfAgent = 3
        sheepId = 0
        wolvesId = 1

        wolfOneId = 1
        wolfTwoId = 2

        xPosIndex = [0, 1]
        xBoundary = [0, 600]
        yBoundary = [0, 600]

        getSheepXPos = GetAgentPosFromState(sheepId, xPosIndex)
        getWolfOneXPos = GetAgentPosFromState(wolfOneId, xPosIndex)
        getWolfTwoXPos = GetAgentPosFromState(wolfTwoId, xPosIndex)

        reset = Reset(xBoundary, yBoundary, numOfAgent)

        isTerminalOne = IsTerminal(getWolfOneXPos, getSheepXPos,
                                   killzoneRadius)
        isTerminalTwo = IsTerminal(getWolfTwoXPos, getSheepXPos,
                                   killzoneRadius)

        isTerminal = lambda state: isTerminalOne(state) or isTerminalTwo(state)

        stayInBoundaryByReflectVelocity = StayInBoundaryByReflectVelocity(
            xBoundary, yBoundary)

        centerControlIndexList = [wolvesId]
        unpackCenterControlAction = UnpackCenterControlAction(
            centerControlIndexList)
        transitionFunction = TransiteForNoPhysicsWithCenterControlAction(
            stayInBoundaryByReflectVelocity)

        numFramesToInterpolate = 3
        transit = TransitWithInterpolateStateWithCenterControlAction(
            numFramesToInterpolate, transitionFunction, isTerminal,
            unpackCenterControlAction)

        # NNGuidedMCTS init
        cInit = 1
        cBase = 100
        calculateScore = ScoreChild(cInit, cBase)
        selectChild = SelectChild(calculateScore)

        actionSpace = [(10, 0), (7, 7), (0, 10), (-7, 7), (-10, 0), (-7, -7),
                       (0, -10), (7, -7), (0, 0)]
        wolfActionSpace = actionSpace
        # wolfActionSpace = [(10, 0), (0, 10), (-10, 0), (0, -10), (0, 0)]

        preyPowerRatio = 12
        sheepActionSpace = list(
            map(tuple,
                np.array(actionSpace) * preyPowerRatio))

        predatorPowerRatio = 8
        wolfActionOneSpace = list(
            map(tuple,
                np.array(wolfActionSpace) * predatorPowerRatio))
        wolfActionTwoSpace = list(
            map(tuple,
                np.array(wolfActionSpace) * predatorPowerRatio))

        wolvesActionSpace = list(
            product(wolfActionOneSpace, wolfActionTwoSpace))

        actionSpaceList = [sheepActionSpace, wolvesActionSpace]

        # neural network init
        numStateSpace = 2 * numOfAgent
        numSheepActionSpace = len(sheepActionSpace)
        numWolvesActionSpace = len(wolvesActionSpace)

        regularizationFactor = 1e-4
        sharedWidths = [128]
        actionLayerWidths = [128]
        valueLayerWidths = [128]
        generateSheepModel = GenerateModel(numStateSpace, numSheepActionSpace,
                                           regularizationFactor)

        # load save dir
        NNModelSaveExtension = ''
        sheepNNModelSaveDirectory = os.path.join(
            dirName, '..', '..', '..', '..', 'data', '2wolves1sheep',
            'trainSheepWithTwoHeatSeekingWolves', 'trainedResNNModels')
        sheepNNModelFixedParameters = {
            'agentId': 0,
            'maxRunningSteps': 50,
            'numSimulations': 110,
            'miniBatchSize': 256,
            'learningRate': 0.0001,
        }
        getSheepNNModelSavePath = GetSavePath(sheepNNModelSaveDirectory,
                                              NNModelSaveExtension,
                                              sheepNNModelFixedParameters)

        depth = 9
        resBlockSize = 2
        dropoutRate = 0.0
        initializationMethod = 'uniform'
        initSheepNNModel = generateSheepModel(sharedWidths * depth,
                                              actionLayerWidths,
                                              valueLayerWidths, resBlockSize,
                                              initializationMethod,
                                              dropoutRate)

        sheepTrainedModelPath = getSheepNNModelSavePath({
            'trainSteps': 50000,
            'depth': depth
        })
        sheepTrainedModel = restoreVariables(initSheepNNModel,
                                             sheepTrainedModelPath)
        sheepPolicy = ApproximatePolicy(sheepTrainedModel, sheepActionSpace)

        # MCTS
        cInit = 1
        cBase = 100
        calculateScore = ScoreChild(cInit, cBase)
        selectChild = SelectChild(calculateScore)

        # prior
        getActionPrior = lambda state: {
            action: 1 / len(wolvesActionSpace)
            for action in wolvesActionSpace
        }

        # load chase nn policy
        temperatureInMCTS = 1
        chooseActionInMCTS = SampleAction(temperatureInMCTS)

        def wolvesTransit(state, action):
            return transit(state,
                           [chooseActionInMCTS(sheepPolicy(state)), action])

        # reward function
        aliveBonus = -1 / maxRunningSteps
        deathPenalty = 1
        rewardFunction = reward.RewardFunctionCompete(aliveBonus, deathPenalty,
                                                      isTerminal)

        # initialize children; expand
        initializeChildren = InitializeChildren(wolvesActionSpace,
                                                wolvesTransit, getActionPrior)
        expand = Expand(isTerminal, initializeChildren)

        # random rollout policy
        def rolloutPolicy(state):
            return wolvesActionSpace[np.random.choice(
                range(numWolvesActionSpace))]

        # rollout
        rolloutHeuristicWeight = 0
        minDistance = 400
        rolloutHeuristic1 = reward.HeuristicDistanceToTarget(
            rolloutHeuristicWeight, getWolfOneXPos, getSheepXPos, minDistance)
        rolloutHeuristic2 = reward.HeuristicDistanceToTarget(
            rolloutHeuristicWeight, getWolfTwoXPos, getSheepXPos, minDistance)

        rolloutHeuristic = lambda state: (rolloutHeuristic1(state) +
                                          rolloutHeuristic2(state)) / 2

        maxRolloutSteps = 15
        rollout = RollOut(rolloutPolicy, maxRolloutSteps, wolvesTransit,
                          rewardFunction, isTerminal, rolloutHeuristic)

        numTree = 4
        numSimulationsPerTree = int(numSimulations / numTree)
        wolfPolicy = StochasticMCTS(
            numTree, numSimulationsPerTree, selectChild, expand, rollout,
            backup, establishSoftmaxActionDistFromMultipleTrees)

        # All agents' policies
        policy = lambda state: [sheepPolicy(state), wolfPolicy(state)]
        chooseActionList = [chooseGreedyAction, chooseGreedyAction]

        render = None
        if renderOn:
            import pygame as pg
            from pygame.color import THECOLORS
            screenColor = THECOLORS['black']
            circleColorList = [
                THECOLORS['green'], THECOLORS['red'], THECOLORS['red']
            ]
            circleSize = 10

            saveImage = False
            saveImageDir = os.path.join(dirName, '..', '..', '..', '..',
                                        'data', 'demoImg')
            if not os.path.exists(saveImageDir):
                os.makedirs(saveImageDir)

            screen = pg.display.set_mode([xBoundary[1], yBoundary[1]])
            render = Render(numOfAgent, xPosIndex, screen, screenColor,
                            circleColorList, circleSize, saveImage,
                            saveImageDir)

        sampleTrajectory = SampleTrajectoryWithRender(maxRunningSteps, transit,
                                                      isTerminal, reset,
                                                      chooseActionList, render,
                                                      renderOn)
        trajectories = [
            sampleTrajectory(policy)
            for sampleIndex in range(startSampleIndex, endSampleIndex)
        ]
        print([len(traj) for traj in trajectories])
        saveToPickle(trajectories, trajectorySavePath)