def main():
    lowerBound = 0
    gridSize = 10
    upperBound = [gridSize - 1, gridSize - 1]

    actionSpace = [(-1, 0), (1, 0), (0, 1), (0, -1)]
    numActionSpace = len(actionSpace)

    sheepSpeedRatio = 1
    sheepActionSpace = list(map(tuple,
                                np.array(actionSpace) * sheepSpeedRatio))

    numOfAgent = 5  # 1 hunter, 1 stag with high value, 3 rabbits with low value
    hunterId = [0]
    targetIds = [1, 2, 3, 4]
    stagId = [1]
    rabbitId = [2, 3, 4]

    positionIndex = [0, 1]

    getHunterPos = GetAgentPosFromState(hunterId, positionIndex)
    getTargetsPos = GetAgentPosFromState(targetIds, positionIndex)

    stayWithinBoundary = env.StayWithinBoundary(upperBound, lowerBound)
    isTerminal = env.IsTerminal(getHunterPos, getTargetsPos)
    transitionFunction = env.Transition(stayWithinBoundary)
    reset = env.Reset(upperBound, lowerBound, numOfAgent)

    stagPolicy = RandomPolicy(sheepActionSpace)
    stagPolicy = stationaryAgentPolicy

    rabbitPolicies = [stationaryAgentPolicy] * len(rabbitId)
Esempio n. 2
0
    def __call__(self, parameters):
        numOfAgent = parameters['numOfAgent']
        trajectories = []
        for trajectoryId in range(self.numTrajectories):

            forwardOneStep = self.composeFowardOneTimeStepWithRandomSubtlety(
                numOfAgent)

            sheepId = 0
            wolfId = 1
            distractorsIds = list(range(2, numOfAgent))
            distanceToVisualDegreeRatio = 20
            minInitSheepWolfDistance = 9 * distanceToVisualDegreeRatio
            minInitSheepDistractorDistance = 2.5 * distanceToVisualDegreeRatio  # no distractor in killzone when init
            isLegalInitPositions = IsLegalInitPositions(
                sheepId, wolfId, distractorsIds, minInitSheepWolfDistance,
                minInitSheepDistractorDistance)
            xBoundary = [0, 600]
            yBoundary = [0, 600]
            resetState = ResetState(xBoundary, yBoundary, numOfAgent,
                                    isLegalInitPositions,
                                    transPolarToCartesian)

            killzoneRadius = 2.5 * distanceToVisualDegreeRatio
            isTerminal = IsTerminal(sheepId, wolfId, killzoneRadius)

            numMDPTimeStepPerSecond = 5
            maxRunningSteps = 25 * numMDPTimeStepPerSecond
            sampleTrajecoty = SampleTrajectory(maxRunningSteps, isTerminal,
                                               resetState, forwardOneStep)

            numActionDirections = 8
            actionSpace = [
                (np.cos(directionId * 2 * math.pi / numActionDirections),
                 np.sin(directionId * 2 * math.pi / numActionDirections))
                for directionId in range(numActionDirections)
            ]
            randomPolicy = RandomPolicy(actionSpace)
            sampleAction = lambda state: sampleFromDistribution(
                randomPolicy(state))

            trajectory = sampleTrajecoty(sampleAction)

            trajectories.append(trajectory)
        return trajectories
Esempio n. 3
0
def main():

    # MDP Env
    xBoundary = [0, 600]
    yBoundary = [0, 600]
    xSwamp = [300, 400]
    ySwamp = [300, 400]
    swamp = [[[300, 400], [300, 400]]]

    noise = [50, 50]
    stayInBoundaryByReflectVelocity = StayInBoundaryByReflectVelocity(
        xBoundary, yBoundary)
    transitionWithNoise = TransitionWithNoise(noise)

    minDistance = 50
    target = [600, 600]
    isTerminal = IsTerminal(minDistance, target)

    isInSwamp = IsInSwamp(swamp)

    singleAgentTransit = MovingAgentTransitionInSwampWorld(
        transitionWithNoise, stayInBoundaryByReflectVelocity, isTerminal)

    transitionFunctionPack = [singleAgentTransit, static]
    multiAgentTransition = MultiAgentTransitionInGeneral(
        transitionFunctionPack)
    twoAgentTransit = MultiAgentTransitionInSwampWorld(multiAgentTransition,
                                                       target)

    numOfAgent = 2
    xBoundaryReset = [500, 600]
    yBoundaryReset = [0, 100]
    resetState = Reset([0, 0], [0, 0], numOfAgent, target)

    actionSpace = [(100, 0), (-100, 0), (0, 100), (0, -100)]
    #k = np.random.choice(actionSpace)
    #print(k)

    actionCost = -1
    swampPenalty = -100
    terminalReward = 1000
    rewardFunction = RewardFunction(actionCost, terminalReward, swampPenalty,
                                    isTerminal, isInSwamp)

    maxRunningSteps = 100

    oneStepSampleTrajectory = OneStepSampleTrajectory(twoAgentTransit,
                                                      rewardFunction)
    sampleTrajecoty = SampleTrajectory(maxRunningSteps, isTerminal, resetState,
                                       oneStepSampleTrajectory)
    randomPolicy = RandomPolicy(actionSpace)
    actionDistribution = randomPolicy()
    #numSimulation, selectAction, selectNextState, expand, estimateValue, backup, outputDistribution
    numSimulation = 5
    cInit = 100
    cBase = 1
    scoreChild = ScoreChild(cInit, cBase)
    selectAction = SelectAction(scoreChild)
    selectNextState = SelectNextState(selectAction)
    uniformActionPrior = {action: 1 / 4 for action in actionSpace}
    getActionPrior = lambda state: uniformActionPrior
    initializeChildren = InitializeChildren(actionSpace, twoAgentTransit,
                                            getActionPrior)
    expand = Expand(isTerminal, initializeChildren)
    alpha = 0
    C = 3
    pWidening = PWidening(alpha, C)
    expandNewState = ExpandNextState(twoAgentTransit, pWidening)

    rolloutPolicy = lambda state: random.choice(actionSpace)
    rolloutHeuristic = lambda state: 0  #reward return sometimes grab nothing.
    maxRolloutStep = 10
    estimateValue = RollOut(rolloutPolicy, maxRolloutStep, twoAgentTransit,
                            rewardFunction, isTerminal, rolloutHeuristic)
    mctsSelectAction = MCTS(numSimulation, selectAction, selectNextState,
                            expand, expandNewState, estimateValue, backup,
                            establishPlainActionDist)

    #sampleAction = SampleFromDistribution(actionDictionary)
    def sampleAction(state):
        actionDist = mctsSelectAction(state)
        action = maxFromDistribution(actionDist)
        return action

    trajectories = [sampleTrajecoty(sampleAction) for _ in range(1)]
    #print(findCumulativeReward(trajectories))
    print(trajectories)
    DIRNAME = os.path.dirname(__file__)
    trajectoryDirectory = os.path.join(DIRNAME, '..', '..', 'data',
                                       'evaluateObstacle2', 'trajectories')
    if not os.path.exists(trajectoryDirectory):
        os.makedirs(trajectoryDirectory)

    # generate demo image
    screenWidth = 600
    screenHeight = 600
    screen = pg.display.set_mode((screenWidth, screenHeight))
    screenColor = THECOLORS['black']
    xBoundary = [0, 600]
    yBoundary = [0, 600]
    lineColor = THECOLORS['white']
    lineWidth = 4
    xSwamp = [300, 400]
    ySwamp = [300, 400]
    drawBackground = DrawBackground(screen, screenColor, xBoundary, yBoundary,
                                    lineColor, lineWidth, xSwamp, ySwamp)

    fps = 40
    circleColorSpace = np.array([[0, 0, 255], [0, 255, 255]])
    circleSize = 10
    positionIndex = [0, 1]

    saveImage = True
    imageSavePath = os.path.join(trajectoryDirectory, 'picMovingSheep')
    if not os.path.exists(imageSavePath):
        os.makedirs(imageSavePath)
    trajectoryParameters = 'obstacle'
    imageFolderName = str(trajectoryParameters)

    saveImageDir = os.path.join(os.path.join(imageSavePath, imageFolderName))
    if not os.path.exists(saveImageDir):
        os.makedirs(saveImageDir)
    agentIdsToDraw = list(range(2))
    drawState = DrawState(fps, screen, circleColorSpace, circleSize,
                          agentIdsToDraw, positionIndex, saveImage,
                          saveImageDir, drawBackground)

    numFramesToInterpolate = 3
    interpolateState = InterpolateState(numFramesToInterpolate,
                                        twoAgentTransit)

    stateIndexInTimeStep = 0
    actionIndexInTimeStep = 1

    chaseTrial = ChaseTrialWithTraj(stateIndexInTimeStep, drawState,
                                    interpolateState, actionIndexInTimeStep)

    [chaseTrial(trajectory) for trajectory in trajectories]
    pg.quit()
Esempio n. 4
0
def main():

    # MDP Env
    xBoundary = [0, 600]
    yBoundary = [0, 600]
    xSwamp = [300, 400]
    ySwamp = [300, 400]
    swamp = [[[300,400],[300,400]]]

    noise = [1, 1]
    stayInBoundaryByReflectVelocity = StayInBoundaryByReflectVelocity(xBoundary, yBoundary)
    transitionWithNoise = TransitionWithNoise(noise)

    minDistance = 50
    target = [200, 200]
    isTerminal = IsTerminal(minDistance, target)


    isInSwamp = IsInSwamp(swamp)
    
    singleAgentTransit = MovingAgentTransitionInSwampWorld(transitionWithNoise, stayInBoundaryByReflectVelocity, isTerminal)
    

    transitionFunctionPack = [singleAgentTransit, static]
    multiAgentTransition = MultiAgentTransitionInGeneral(transitionFunctionPack)
    twoAgentTransit = MultiAgentTransitionInSwampWorld(multiAgentTransition, target)


    numOfAgent = 2
    xBoundaryReset = [500, 600]
    yBoundaryReset = [0, 100]
    resetState = Reset(xBoundaryReset, yBoundaryReset, numOfAgent, target)

    actionSpace = [[10, 0], [-10, 0], [-10, -10], [10, 10], [0, 10], [0, -10], [-10, 10], [10, -10]]

    
    actionCost = -1
    swampPenalty = -10
    terminalReward = 10
    rewardFunction = RewardFunction(actionCost, terminalReward, swampPenalty, isTerminal, isInSwamp)
    
    maxRunningSteps = 100

    oneStepSampleTrajectory = OneStepSampleTrajectory(twoAgentTransit, rewardFunction)
    sampleTrajecoty = SampleTrajectory(maxRunningSteps, isTerminal, resetState, oneStepSampleTrajectory)
    randomPolicy = RandomPolicy(actionSpace)
    actionDistribution = randomPolicy()
    sampleAction = SampleFromDistribution(actionDistribution)

    trajectories = [sampleTrajecoty(sampleAction) for _ in range(10)]

    DIRNAME = os.path.dirname(__file__)
    trajectoryDirectory = os.path.join(DIRNAME, '..', '..', 'data', 'evaluateObstacle',
                                    'trajectories')
    if not os.path.exists(trajectoryDirectory):
        os.makedirs(trajectoryDirectory)
    

    # generate demo image
    screenWidth = 600
    screenHeight = 600
    screen = pg.display.set_mode((screenWidth, screenHeight))
    screenColor = THECOLORS['black']
    xBoundary = [0, 600]
    yBoundary = [0, 600]
    lineColor = THECOLORS['white']
    lineWidth = 4
    xSwamp=[300,400]
    ySwamp=[300,400]
    drawBackground = DrawBackground(screen, screenColor, xBoundary, yBoundary, lineColor, lineWidth, xSwamp, ySwamp)

    fps=40
    circleColorSpace = np.array([[0, 0, 255], [0, 255, 255] ])
    circleSize = 10
    positionIndex = [0, 1]
    
    saveImage = True
    imageSavePath = os.path.join(trajectoryDirectory, 'picMovingSheep')
    if not os.path.exists(imageSavePath):
        os.makedirs(imageSavePath)
    trajectoryParameters = 'obstacle'
    imageFolderName = str(trajectoryParameters)

    saveImageDir = os.path.join(os.path.join(imageSavePath, imageFolderName))
    if not os.path.exists(saveImageDir):
        os.makedirs(saveImageDir)
    agentIdsToDraw = list(range(2))
    drawState = DrawState(fps, screen, circleColorSpace, circleSize, agentIdsToDraw, positionIndex, 
            saveImage, saveImageDir, drawBackground)
    
    numFramesToInterpolate = 3
    interpolateState = InterpolateState(numFramesToInterpolate, twoAgentTransit)

    stateIndexInTimeStep = 0
    actionIndexInTimeStep = 1
    
    chaseTrial = ChaseTrialWithTraj(stateIndexInTimeStep, drawState, interpolateState, actionIndexInTimeStep)
   
    [chaseTrial(trajectory) for trajectory in trajectories]
    pg.quit()
Esempio n. 5
0
    def __call__(self, parameters):
        print(parameters)
        numSimulation = parameters['numSimulation']
        xBoundary = [0, 600]
        yBoundary = [0, 600]
        xSwamp = [300, 400]
        ySwamp = [300, 400]
        swamp = [[[300, 400], [300, 400]]]

        noise = parameters['noise']
        stayInBoundaryByReflectVelocity = StayInBoundaryByReflectVelocity(
            xBoundary, yBoundary)
        transitionWithNoise = TransitionWithNoise(noise)

        minDistance = 50
        target = [600, 600]
        isTerminal = IsTerminal(minDistance, target)

        isInSwamp = IsInSwamp(swamp)

        singleAgentTransit = MovingAgentTransitionInSwampWorld(
            transitionWithNoise, stayInBoundaryByReflectVelocity, isTerminal)

        transitionFunctionPack = [singleAgentTransit, static]
        multiAgentTransition = MultiAgentTransitionInGeneral(
            transitionFunctionPack)
        twoAgentTransit = MultiAgentTransitionInSwampWorld(
            multiAgentTransition, target)

        numOfAgent = 2
        xBoundaryReset = [500, 600]
        yBoundaryReset = [0, 100]
        resetState = Reset([0, 0], [0, 0], numOfAgent, target)

        actionSpace = [(100, 0), (-100, 0), (0, 100), (0, -100)]
        #k = np.random.choice(actionSpace)
        #print(k)

        actionCost = -1
        swampPenalty = -100
        terminalReward = 1000
        rewardFunction = RewardFunction(actionCost, terminalReward,
                                        swampPenalty, isTerminal, isInSwamp)

        maxRunningSteps = 100

        oneStepSampleTrajectory = OneStepSampleTrajectory(
            twoAgentTransit, rewardFunction)
        sampleTrajecoty = SampleTrajectory(maxRunningSteps, isTerminal,
                                           resetState, oneStepSampleTrajectory)
        randomPolicy = RandomPolicy(actionSpace)
        actionDistribution = randomPolicy()
        #numSimulation, selectAction, selectNextState, expand, estimateValue, backup, outputDistribution
        # numSimulation = 50
        cInit = 100
        cBase = 1
        scoreChild = ScoreChild(cInit, cBase)
        selectAction = SelectAction(scoreChild)
        selectNextState = SelectNextState(selectAction)
        uniformActionPrior = {action: 1 / 4 for action in actionSpace}
        getActionPrior = lambda state: uniformActionPrior
        initializeChildren = InitializeChildren(actionSpace, twoAgentTransit,
                                                getActionPrior)
        expand = Expand(isTerminal, initializeChildren)
        expandNewState = ExpandNextState(twoAgentTransit)

        rolloutPolicy = lambda state: random.choice(actionSpace)
        rolloutHeuristic = lambda state: 0  #reward return sometimes grab nothing.
        maxRolloutStep = 100
        estimateValue = RollOut(rolloutPolicy, maxRolloutStep, twoAgentTransit,
                                rewardFunction, isTerminal, rolloutHeuristic)
        mctsSelectAction = MCTS(numSimulation, selectAction, selectNextState,
                                expand, expandNewState, estimateValue, backup,
                                establishPlainActionDist)

        #sampleAction = SampleFromDistribution(actionDictionary)
        def sampleAction(state):
            actionDist = mctsSelectAction(state)
            action = maxFromDistribution(actionDist)
            return action

        trajectoriesWithIntentionDists = []
        for trajectoryId in range(self.numTrajectories):
            trajectory = sampleTrajecoty(sampleAction)
            trajectoriesWithIntentionDists.append(trajectory)
            print(trajectoriesWithIntentionDists)
        trajectoryFixedParameters = {'Algorithm': "MCTS"}
        self.saveTrajectoryByParameters(trajectoriesWithIntentionDists,
                                        trajectoryFixedParameters, parameters)
Esempio n. 6
0
def qLearningPolicy():
    lowerBound = 0
    gridSize = 10
    upperBound = [gridSize - 1, gridSize - 1]

    actionSpace = [(-1, 0), (1, 0), (0, 1), (0, -1)]
    numActionSpace = len(actionSpace)

    sheepSpeedRatio = 1
    sheepActionSpace = list(map(tuple, np.array(actionSpace) * sheepSpeedRatio))

    numOfAgent = 5  # 1 hunter, 1 stag with high value, 3 rabbits with low value
    hunterId = [0]
    targetIds = [1, 2, 3, 4]
    stagId = [1]
    rabbitId = [2, 3, 4]

    positionIndex = [0, 1]

    getHunterPos = GetAgentPosFromState(hunterId, positionIndex)
    getTargetsPos = GetAgentPosFromState(targetIds, positionIndex)

    stayWithinBoundary = env.StayWithinBoundary(upperBound, lowerBound)
    isTerminal = env.IsTerminal(getHunterPos, getTargetsPos)
    transitionFunction = env.Transition(stayWithinBoundary)
    reset = env.Reset(upperBound, lowerBound, numOfAgent)

    stagPolicy = RandomPolicy(sheepActionSpace)
    stagPolicy = stationaryAgentPolicy

    rabbitPolicies = [stationaryAgentPolicy] * len(rabbitId)

    def wolfTransit(state, action): return transitionFunction(
        state, [action, maxFromDistribution(stagPolicy(state))] + [maxFromDistribution(rabbitPolicy(state)) for rabbitPolicy in rabbitPolicies])

    maxRunningSteps = 100
    stepPenalty = -1 / maxRunningSteps
    catchBonus = 1
    highRewardRatio = 1
    rewardFunction = RewardFunction(highRewardRatio, stepPenalty, catchBonus, isTerminal)

# q-learing
    qTable = initQtable(actionSpace)
    discountFactor = 0.9
    learningRate = 0.01
    updateQTable = UpdateQTable(discountFactor, learningRate)
    epsilon = 0.1
    getAction = GetAction(epsilon, actionSpace, argMax)

    startTime = time.time()
    for episode in range(1000):
        state = reset()
        for step in range(maxRunningSteps):
            wolfactionIndex = getAction(qTable, str(state))
            wolfaction = actionSpace[wolfactionIndex]
            nextState = wolfTransit(state, wolfaction)
            reward = rewardFunction(nextState, wolfaction)
            done = isTerminal(nextState)

            qTable = updateQTable(qTable, str(state), wolfactionIndex, reward, str(nextState))
            state = nextState
            if done:
                break
    QDict = qTable
    softmaxBeta = 5
    wolfPolicy = SoftmaxPolicy(softmaxBeta, QDict, actionSpace)
    # All agents' policies
    policy = lambda state: [wolfPolicy(state), stagPolicy(state)] + [rabbitPolicy(state) for rabbitPolicy in rabbitPolicies]

    return policy
Esempio n. 7
0
    def __call__(self, parameters):
        print(parameters)
        numSimulation = parameters['numSimulation']
        rolloutHeuristic = parameters['rolloutHeuristic']
        xBoundary = [0, 400]
        yBoundary = [0, 400]
        xSwamp = [300, 400]
        ySwamp = [300, 400]
        swamp = [[[175, 225], [175, 225]]]

        noise = parameters['noise']
        cBase = parameters['cBase']
        maxRolloutStep = parameters['maxRolloutStep']
        stayInBoundaryByReflectVelocity = StayInBoundaryByReflectVelocity(
            xBoundary, yBoundary)
        transitionWithNoise = TransitionWithNoise(noise)

        minDistance = 40
        target = [400, 400]
        isTerminal = IsTerminal(minDistance, target)

        isInSwamp = IsInSwamp(swamp)

        singleAgentTransit = MovingAgentTransitionInSwampWorld(
            transitionWithNoise, stayInBoundaryByReflectVelocity, isTerminal)

        transitionFunctionPack = [singleAgentTransit, static]
        multiAgentTransition = MultiAgentTransitionInGeneral(
            transitionFunctionPack)
        twoAgentTransit = MultiAgentTransitionInSwampWorld(
            multiAgentTransition, target)

        maxRolloutStep = 15

        numOfAgent = 2
        xBoundaryReset = [500, 600]
        yBoundaryReset = [0, 100]
        resetState = Reset([0, 400], [0, 400], numOfAgent, target)

        actionSpace = [(50, 0), (-50, 0), (0, 50), (0, -50)]

        actionCost = -0.02
        swampPenalty = -0.5
        terminalReward = 1
        rewardFunction = RewardFunction(actionCost, terminalReward,
                                        swampPenalty, isTerminal, isInSwamp)

        maxRunningSteps = 50

        oneStepSampleTrajectory = OneStepSampleTrajectory(
            twoAgentTransit, rewardFunction)
        sampleTrajecoty = SampleTrajectory(maxRunningSteps, isTerminal,
                                           resetState, oneStepSampleTrajectory)
        randomPolicy = RandomPolicy(actionSpace)
        actionDistribution = randomPolicy()

        cInit = 1
        #cBase =100
        scoreChild = ScoreChild(cInit, cBase)
        selectAction = SelectAction(scoreChild)
        selectNextState = SelectNextState(selectAction)
        uniformActionPrior = {action: 1 / 4 for action in actionSpace}
        getActionPrior = lambda state: uniformActionPrior
        initializeChildren = InitializeChildren(actionSpace, twoAgentTransit,
                                                getActionPrior)
        expand = Expand(isTerminal, initializeChildren)
        expandNewState = ExpandNextState(twoAgentTransit)

        rolloutPolicy = lambda state: random.choice(actionSpace)

        def rolloutHeuristic(allState):
            [state, terminalPosition] = allState
            distanceToTerminal = np.linalg.norm(np.array(target) -
                                                np.array(state),
                                                ord=2)
            return (2 * np.exp(-distanceToTerminal / 100) - 1)

    # rolloutHeuristic = lambda state: 0#reward return sometimes grab nothing.

    #maxRolloutStep = 15

        estimateValue = RollOut(rolloutPolicy, maxRolloutStep, twoAgentTransit,
                                rewardFunction, isTerminal, rolloutHeuristic)
        mctsSelectAction = MCTS(numSimulation, selectAction, selectNextState,
                                expand, expandNewState, estimateValue, backup,
                                establishPlainActionDist)

        def sampleAction(state):
            actionDist = mctsSelectAction(state)
            action = maxFromDistribution(actionDist)
            return action

        trajectoriesWithIntentionDists = []
        for trajectoryId in range(self.numTrajectories):
            trajectory = sampleTrajecoty(sampleAction)
            trajectoriesWithIntentionDists.append(trajectory)
            #print(trajectoriesWithIntentionDists)
        trajectoryFixedParameters = {'Algorithm': "MCTSNew"}
        self.saveTrajectoryByParameters(trajectoriesWithIntentionDists,
                                        trajectoryFixedParameters, parameters)
Esempio n. 8
0
def main():

    # MDP Env
    xBoundary = [0, 600]
    yBoundary = [0, 600]
    xSwamp = [300, 400]
    ySwamp = [300, 400]
    swamp = [[[300, 400], [300, 400]]]

    noise = [50, 50]
    stayInBoundaryByReflectVelocity = StayInBoundaryByReflectVelocity(
        xBoundary, yBoundary)
    transitionWithNoise = TransitionWithNoise(noise)

    minDistance = 50
    target = [600, 600]
    isTerminal = IsTerminal(minDistance, target)

    isInSwamp = IsInSwamp(swamp)

    singleAgentTransit = MovingAgentTransitionInSwampWorld(
        transitionWithNoise, stayInBoundaryByReflectVelocity, isTerminal)

    transitionFunctionPack = [singleAgentTransit, static]
    multiAgentTransition = MultiAgentTransitionInGeneral(
        transitionFunctionPack)
    twoAgentTransit = MultiAgentTransitionInSwampWorld(multiAgentTransition,
                                                       target)

    numOfAgent = 2
    xBoundaryReset = [500, 600]
    yBoundaryReset = [0, 100]
    resetState = Reset([0, 0], [0, 0], numOfAgent, target)

    actionSpace = [(100, 0), (-100, 0), (0, 100), (0, -100)]
    #k = np.random.choice(actionSpace)
    #print(k)

    actionCost = -1
    swampPenalty = -100
    terminalReward = 1000
    rewardFunction = RewardFunction(actionCost, terminalReward, swampPenalty,
                                    isTerminal, isInSwamp)

    maxRunningSteps = 100

    oneStepSampleTrajectory = OneStepSampleTrajectory(twoAgentTransit,
                                                      rewardFunction)
    sampleTrajecoty = SampleTrajectory(maxRunningSteps, isTerminal, resetState,
                                       oneStepSampleTrajectory)
    randomPolicy = RandomPolicy(actionSpace)
    actionDistribution = randomPolicy()
    #numSimulation, selectAction, selectNextState, expand, estimateValue, backup, outputDistribution
    numSimulation = 50
    cInit = 1
    cBase = 100
    scoreChild = ScoreChild(cInit, cBase)
    selectAction = SelectAction(scoreChild)
    selectNextState = SelectNextState(selectAction)
    uniformActionPrior = {action: 1 / 4 for action in actionSpace}
    getActionPrior = lambda state: uniformActionPrior
    initializeChildren = InitializeChildren(actionSpace, twoAgentTransit,
                                            getActionPrior)
    expand = Expand(isTerminal, initializeChildren)
    expandNewState = ExpandNextState(twoAgentTransit)

    rolloutPolicy = lambda state: random.choice(actionSpace)
    rolloutHeuristic = lambda state: 0  #reward return sometimes grab nothing.
    maxRolloutStep = 100
    estimateValue = RollOut(rolloutPolicy, maxRolloutStep, twoAgentTransit,
                            rewardFunction, isTerminal, rolloutHeuristic)
    mctsSelectAction = MCTS(numSimulation, selectAction, selectNextState,
                            expand, expandNewState, estimateValue, backup,
                            establishPlainActionDist)

    #sampleAction = SampleFromDistribution(actionDictionary)
    def sampleAction(state):
        actionDist = mctsSelectAction(state)
        action = maxFromDistribution(actionDist)
        return action

    trajectories = [sampleTrajecoty(sampleAction) for _ in range(1)]
    DIRNAME = os.path.dirname(__file__)
    trajectoryDirectory = os.path.join(DIRNAME, '..', '..', 'data',
                                       'evaluateCompeteDetection',
                                       'trajectories')
    if not os.path.exists(trajectoryDirectory):
        os.makedirs(trajectoryDirectory)

    print(trajectories)
    print(findCumulativeReward(trajectories))