def setUp(self):
        # Env param
        bound_low = 0
        bound_high = 7
        self.transition = TransitionFunction(bound_low, bound_high)

        self.action_space = [-1, 1]
        self.num_action_space = len(self.action_space)
        self.actionPrior_func = GetActionPrior(self.action_space)

        step_penalty = -1
        catch_reward = 1
        self.target_state = bound_high
        self.isTerminal = Terminal(self.target_state)

        self.c_init = 0
        self.c_base = 1
        self.calculateScore = CalculateScore(self.c_init, self.c_base)

        self.selectChild = SelectChild(self.calculateScore)

        init_state = 3
        level1_0_state = self.transition(init_state, action=0)
        level1_1_state = self.transition(init_state, action=1)
        self.default_actionPrior = 0.5

        self.root = Node(id={1: init_state},
                         numVisited=1,
                         sumValue=0,
                         actionPrior=self.default_actionPrior,
                         isExpanded=True)
        self.level1_0 = Node(parent=self.root,
                             id={0: level1_0_state},
                             numVisited=2,
                             sumValue=5,
                             actionPrior=self.default_actionPrior,
                             isExpanded=False)
        self.level1_1 = Node(parent=self.root,
                             id={1: level1_1_state},
                             numVisited=3,
                             sumValue=10,
                             actionPrior=self.default_actionPrior,
                             isExpanded=False)

        self.initializeChildren = InitializeChildren(self.action_space,
                                                     self.transition,
                                                     self.actionPrior_func)
        self.expand = Expand(self.isTerminal, self.initializeChildren)
예제 #2
0
def main():
    # experiment conditions
    maxRunningSteps = 15
    numTrials = 50
    manipulatedVariables = OrderedDict()
    manipulatedVariables['qPosInit'] = [(0.3, 0, -0.3, 0), (9.75, 0, 9.15, 0),
                                        (9.75, 9.75, 9.3, 9.3)]
    manipulatedVariables['sheepPolicyName'] = ['mcts', 'random']
    manipulatedVariables['numSimulations'] = [5, 25, 50, 100, 250, 400]

    levelNames = list(manipulatedVariables.keys())
    levelValues = list(manipulatedVariables.values())
    modelIndex = pd.MultiIndex.from_product(levelValues, names=levelNames)
    toSplitFrame = pd.DataFrame(index=modelIndex)

    # reset function
    envModelName = 'twoAgents'
    qVelInit = [0, 0, 0, 0]
    numAgents = 2
    qPosInitNoise = 0
    qVelInitNoise = 0

    # isTerminal
    killzoneRadius = 0.5
    isTerminal = IsTerminal(killzoneRadius)

    # transit for multiplayer transition; sheepTransit for sheep's MCTS simulation (only takes sheep's action as input)
    renderOn = False
    numSimulationFrames = 20
    transit = TransitionFunction(envModelName, isTerminal, renderOn,
                                 numSimulationFrames)
    sheepTransit = lambda state, action: transit(
        state, [action, stationaryWolfPolicy(state)])

    # reward function
    aliveBonus = 0.05
    deathPenalty = -1
    rewardFunction = reward.RewardFunctionCompete(aliveBonus, deathPenalty,
                                                  isTerminal)

    # select child
    cInit = 1
    cBase = 100
    calculateScore = CalculateScore(cInit, cBase)
    selectChild = SelectChild(calculateScore)

    # prior
    actionSpace = [(10, 0), (7, 7), (0, 10), (-7, 7), (-10, 0), (-7, -7),
                   (0, -10), (7, -7)]
    getActionPrior = GetActionPrior(actionSpace)

    # initialize children; expand
    initializeChildren = InitializeChildren(actionSpace, sheepTransit,
                                            getActionPrior)
    expand = Expand(isTerminal, initializeChildren)

    # random rollout policy
    numActionSpace = len(actionSpace)
    rolloutPolicy = lambda state: actionSpace[np.random.choice(
        range(numActionSpace))]

    # select next action
    selectNextAction = SelectNextAction(sheepTransit)

    sheepId = 0
    wolfId = 1
    xPosIndex = 2
    numXPosEachAgent = 2

    getSheepXPos = GetAgentPos(sheepId, xPosIndex, numXPosEachAgent)
    getWolfXPos = GetAgentPos(wolfId, xPosIndex, numXPosEachAgent)

    # rollout
    rolloutHeuristicWeight = 0
    maxRolloutSteps = 10
    rolloutHeuristic = HeuristicDistanceToTarget(rolloutHeuristicWeight,
                                                 getWolfXPos, getSheepXPos)
    rollout = RollOut(rolloutPolicy, maxRolloutSteps, sheepTransit,
                      rewardFunction, isTerminal, rolloutHeuristic)

    # wrapper function
    getReset = lambda qPosInit: Reset(envModelName, qPosInit, qVelInit,
                                      numAgents, qPosInitNoise, qVelInitNoise)

    # All agents' policies
    getMCTS = lambda numSimulations: MCTS(numSimulations, selectChild, expand,
                                          rollout, backup, selectNextAction)
    sheepPolicyRandom = lambda state: actionSpace[np.random.choice(
        range(numActionSpace))]
    getSheepPolicy = GetSheepPolicy(getMCTS, sheepPolicyRandom)
    wolfActionMagnitude = 5
    wolfPolicyDirectlyTowardsSheep = WolfPolicyForceDirectlyTowardsSheep(
        getSheepXPos, getWolfXPos, wolfActionMagnitude)
    prepareAllAgentsPolicy = PrepareAllAgentsPolicy(
        getSheepPolicy, wolfPolicyDirectlyTowardsSheep)

    # sample trajectory
    getSampleTrajectory = lambda reset: SampleTrajectory(
        maxRunningSteps, transit, isTerminal, reset)

    # function to generate and save trajectories
    dataDirectory = '../data/testMCTSvsRandomInSheepEscape/trajectories'
    if not os.path.exists(dataDirectory):
        os.makedirs(dataDirectory)

    getSaveFileName = GetSaveFileName(dataDirectory, tupleToString)
    generateTrajectoriesAndComputeStatistics = GenerateTrajectoriesAndComputeStatistics(
        getReset, getSampleTrajectory, prepareAllAgentsPolicy, numTrials,
        getSaveFileName)

    # run all trials and save trajectories
    statisticsDf = toSplitFrame.groupby(levelNames).apply(
        generateTrajectoriesAndComputeStatistics)

    # plot the statistics
    fig = plt.figure()

    plotRowNum = 1
    plotColNum = 3
    plotCounter = 1

    for (key, dataDf) in statisticsDf.groupby('qPosInit'):
        dataDf.index = dataDf.index.droplevel('qPosInit')
        axForDraw = fig.add_subplot(plotRowNum, plotColNum, plotCounter)
        drawPerformanceLine(dataDf, axForDraw, str(key))
        plotCounter += 1

    plt.legend(loc='best')

    plt.show()
예제 #3
0
def evaluate(cInit, cBase):
    actionSpace = [(10, 0), (7, 7), (0, 10), (-7, 7), (-10, 0), (-7, -7),
                   (0, -10), (7, -7)]
    numActionSpace = len(actionSpace)
    getActionPrior = GetActionPrior(actionSpace)
    numStateSpace = 4

    initSheepPosition = np.array([90, 90])
    initWolfPosition = np.array([90, 90])
    initSheepVelocity = np.array([0, 0])
    initWolfVelocity = np.array([0, 0])
    initSheepPositionNoise = np.array([40, 60])
    initWolfPositionNoise = np.array([0, 20])
    sheepPositionReset = ag.SheepPositionReset(initSheepPosition,
                                               initSheepPositionNoise)
    wolfPositionReset = ag.WolfPositionReset(initWolfPosition,
                                             initWolfPositionNoise)

    numOneAgentState = 2
    positionIndex = [0, 1]
    xBoundary = [0, 180]
    yBoundary = [0, 180]
    checkBoundaryAndAdjust = ag.CheckBoundaryAndAdjust(xBoundary, yBoundary)
    sheepPositionTransition = ag.SheepPositionTransition(
        numOneAgentState, positionIndex, checkBoundaryAndAdjust)
    wolfSpeed = 7
    wolfPositionTransition = ag.WolfPositionTransition(numOneAgentState,
                                                       positionIndex,
                                                       checkBoundaryAndAdjust,
                                                       wolfSpeed)

    numAgent = 2
    sheepId = 0
    wolfId = 1
    transition = env.TransitionFunction(sheepId, wolfId, sheepPositionReset,
                                        wolfPositionReset,
                                        sheepPositionTransition,
                                        wolfPositionTransition)
    minDistance = 10
    isTerminal = env.IsTerminal(sheepId, wolfId, numOneAgentState,
                                positionIndex, minDistance)

    screen = pg.display.set_mode([xBoundary[1], yBoundary[1]])
    screenColor = [255, 255, 255]
    circleColorList = [[50, 255, 50], [50, 50, 50], [50, 50, 50], [50, 50, 50],
                       [50, 50, 50], [50, 50, 50], [50, 50, 50], [50, 50, 50],
                       [50, 50, 50]]
    circleSize = 8
    saveImage = True
    saveImageFile = 'image'
    render = env.Render(numAgent, numOneAgentState, positionIndex, screen,
                        screenColor, circleColorList, circleSize, saveImage,
                        saveImageFile)

    aliveBouns = 0.05
    deathPenalty = -1
    rewardFunction = reward.RewardFunctionTerminalPenalty(
        sheepId, wolfId, numOneAgentState, positionIndex, aliveBouns,
        deathPenalty, isTerminal)

    # Hyper-parameters
    numSimulations = 600
    maxRunningSteps = 70

    # MCTS algorithm
    # Select child
    calculateScore = CalculateScore(cInit, cBase)
    selectChild = SelectChild(calculateScore)

    # expand
    initializeChildren = InitializeChildren(actionSpace, transition,
                                            getActionPrior)
    expand = Expand(transition, isTerminal, initializeChildren)
    #selectNextRoot = selectNextRoot

    # Rollout
    rolloutPolicy = lambda state: actionSpace[np.random.choice(
        range(numActionSpace))]
    maxRollOutSteps = 50
    rollout = RollOut(rolloutPolicy, maxRollOutSteps, transition,
                      rewardFunction, isTerminal)

    mcts = MCTS(numSimulations, selectChild, expand, rollout, backup,
                selectNextRoot)

    runMCTS = RunMCTS(mcts, maxRunningSteps, isTerminal, render)

    rootAction = actionSpace[np.random.choice(range(numActionSpace))]
    numTestingIterations = 70
    episodeLengths = []
    for step in range(numTestingIterations):
        import datetime
        print(datetime.datetime.now())
        state, action = None, None
        initState = transition(state, action)
        #optimal = math.ceil((np.sqrt(np.sum(np.power(initState[0:2] - initState[2:4], 2))) - minDistance )/10)
        rootNode = Node(id={rootAction: initState},
                        num_visited=0,
                        sum_value=0,
                        is_expanded=True)
        episodeLength = runMCTS(rootNode)
        episodeLengths.append(episodeLength)
    meanEpisodeLength = np.mean(episodeLengths)
    print("mean episode length is", meanEpisodeLength)
    return [meanEpisodeLength]
def main():  # comments for explanation
    startTime = datetime.now()

    # experiment conditions
    maxRunningSteps = 1
    numTrials = 200
    numSimulations = 200
    manipulatedVariables = OrderedDict()
    manipulatedVariables['yCoordinate'] = [-9.75, -5, 0, 5, 9.75]
    manipulatedVariables['trialIndex'] = list(
        range(numTrials))  # this is not being manipulated

    levelNames = list(manipulatedVariables.keys())
    levelValues = list(manipulatedVariables.values())
    modelIndex = pd.MultiIndex.from_product(levelValues, names=levelNames)
    toSplitFrame = pd.DataFrame(index=modelIndex)

    # reset function
    envModelName = 'twoAgents'
    qVelInit = [0, 0, 0, 0]
    numAgents = 2
    qPosInitNoise = 0
    qVelInitNoise = 0
    getQPosFromYCoordinate = lambda yCoordinate: [
        -9.75, yCoordinate, -9.15, yCoordinate
    ]

    # isTerminal
    killzoneRadius = 0.5
    isTerminal = IsTerminal(killzoneRadius)

    # transit for multiplayer transition
    # sheepTransit for sheep's MCTS simulation (only takes sheep's action as input)
    renderOn = False
    numSimulationFrames = 20
    transit = TransitionFunction(envModelName, isTerminal, renderOn,
                                 numSimulationFrames)
    sheepTransit = lambda state, action: transit(
        state, [action, stationaryWolfPolicy(state)])

    # reward function
    aliveBonus = 0.05
    deathPenalty = -1
    rewardFunction = reward.RewardFunctionCompete(aliveBonus, deathPenalty,
                                                  isTerminal)

    # select child
    cInit = 1
    cBase = 100
    calculateScore = CalculateScore(cInit, cBase)
    selectChild = SelectChild(calculateScore)

    # prior
    actionSpace = [(10, 0), (7, 7), (0, 10), (-7, 7), (-10, 0), (-7, -7),
                   (0, -10), (7, -7)]
    getActionPrior = GetActionPrior(actionSpace)

    # initialize children; expand
    initializeChildren = InitializeChildren(actionSpace, sheepTransit,
                                            getActionPrior)
    expand = Expand(isTerminal, initializeChildren)

    # random rollout policy
    numActionSpace = len(actionSpace)
    rolloutPolicy = lambda state: actionSpace[np.random.choice(
        range(numActionSpace))]

    # select next action
    selectNextAction = SelectNextAction(sheepTransit)

    sheepId = 0
    wolfId = 1
    xPosIndex = 2
    numXPosEachAgent = 2

    getSheepXPos = GetAgentPos(sheepId, xPosIndex, numXPosEachAgent)
    getWolfXPos = GetAgentPos(wolfId, xPosIndex, numXPosEachAgent)

    # rollout
    rolloutHeuristicWeight = 0
    maxRolloutSteps = 10
    rolloutHeuristic = HeuristicDistanceToTarget(rolloutHeuristicWeight,
                                                 getWolfXPos, getSheepXPos)
    rollout = RollOut(rolloutPolicy, maxRolloutSteps, sheepTransit,
                      rewardFunction, isTerminal, rolloutHeuristic)

    # wrapper function
    getReset = lambda yCoordinate: Reset(
        envModelName, getQPosFromYCoordinate(
            yCoordinate), qVelInit, numAgents, qPosInitNoise, qVelInitNoise)

    # All agents' policies
    sheepPolicyMCTS = MCTS(numSimulations, selectChild, expand, rollout,
                           backup, selectNextAction)
    wolfActionMagnitude = 5
    wolfPolicyDirectlyTowardsSheep = WolfPolicyForceDirectlyTowardsSheep(
        getSheepXPos, getWolfXPos, wolfActionMagnitude)
    allAgentsPolicies = [sheepPolicyMCTS, wolfPolicyDirectlyTowardsSheep]
    policy = lambda state: [
        agentPolicy(state) for agentPolicy in allAgentsPolicies
    ]

    # sample trajectory
    getSampleTrajectory = lambda reset: SampleTrajectory(
        maxRunningSteps, transit, isTerminal, reset)

    runTrial = RunTrial(getReset, getSampleTrajectory, policy)

    # run all trials and generate trajectories
    trialDataDf = toSplitFrame.groupby(levelNames).apply(
        runTrial
    )  # call this save trajectory (also take the number of trials for this condition)
    pickle_out = open('trajectories/testSheepFirstStepEscape.pickle',
                      'wb')  # save as csv instead of pickle
    pickle.dump(trialDataDf,
                pickle_out)  # save each condition in a separate file
    pickle_out.close()  # keep appending

    print('trialDataDf')
    print(trialDataDf)

    # meausurement function
    timeStep = 0
    getSheepActionFromAllAgentsActions = lambda allAgentActions: allAgentActions[
        sheepId]
    getFirstTrajectoryFromDf = lambda trajectoryDf: trajectoryDf.values[0][0]
    getAllAgentActionsFromTrajectory = lambda trajectory, timeStep: trajectory[
        timeStep][1]
    getSheepFirstActionFromTrajectoryDf = GetAgentActionFromTrajectoryDf(
        getFirstTrajectoryFromDf, timeStep, getSheepActionFromAllAgentsActions,
        getAllAgentActionsFromTrajectory)

    # make measurements on individual trial
    measurementDf = trialDataDf.groupby(levelNames).apply(
        getSheepFirstActionFromTrajectoryDf)
    pickle_out = open('measurements/testQEffectOnMCTSMujoco.pickle', 'wb')
    pickle.dump(measurementDf, pickle_out)
    pickle_out.close()

    print('measurementDf')
    print(measurementDf)

    # # compute statistics on the measurements (mean, standard error)
    dataStatisticsDf = measurementDf.groupby('yCoordinate')
    # pickle_out = open('statistics/testSheepFirstStepEscape.pickle', 'wb')
    # pickle.dump(dataStatisticsDf, pickle_out)
    # pickle_out.close()

    # plot the statistics
    fig = plt.figure()

    plotRowNum = 5
    plotColNum = 1
    plotCounter = 1

    for (key, dataDf) in dataStatisticsDf:
        axForDraw = fig.add_subplot(plotRowNum, plotColNum, plotCounter)
        drawHistogram(dataDf, axForDraw)
        plotCounter += 1

    plt.legend(loc='best')

    plt.show()

    endTime = datetime.now()
    print("Time taken: ", endTime - startTime)
예제 #5
0
def main():

    # MDP Env
    xBoundary = [0, 600]
    yBoundary = [0, 600]
    xSwamp = [300, 300]
    ySwamp = [300, 300]
    swamp = [[[300, 300], [300, 300]]]

    noise = [1, 1]
    stayInBoundaryByReflectVelocity = StayInBoundaryByReflectVelocity(
        xBoundary, yBoundary)
    transitionWithNoise = TransitionWithNoise(noise)

    minDistance = 50
    target = [200, 200]
    isTerminal = IsTerminal(minDistance, target)

    isInSwamp = IsInSwamp(swamp)

    singleAgentTransit = MovingAgentTransitionInSwampWorld(
        transitionWithNoise, stayInBoundaryByReflectVelocity, isTerminal)

    transitionFunctionPack = [singleAgentTransit, static]
    multiAgentTransition = MultiAgentTransitionInGeneral(
        transitionFunctionPack)
    twoAgentTransit = MultiAgentTransitionInSwampWorld(multiAgentTransition,
                                                       target)

    numOfAgent = 2
    xBoundaryReset = [500, 600]
    yBoundaryReset = [0, 100]
    resetState = Reset(xBoundaryReset, yBoundaryReset, numOfAgent, target)

    actionSpace = [(100, 0), (-100, 0), (0, 100), (0, -100)]
    #k = np.random.choice(actionSpace)
    #print(k)

    actionCost = -1
    swampPenalty = -10
    terminalReward = 100
    rewardFunction = RewardFunction(actionCost, terminalReward, swampPenalty,
                                    isTerminal, isInSwamp)

    maxRunningSteps = 50

    oneStepSampleTrajectory = OneStepSampleTrajectory(twoAgentTransit,
                                                      rewardFunction)
    sampleTrajecoty = SampleTrajectory(maxRunningSteps, isTerminal, resetState,
                                       oneStepSampleTrajectory)
    randomPolicy = RandomPolicy(actionSpace)
    actionDistribution = randomPolicy()
    #numSimulation, selectChild, expand, estimateValue, backup, outputDistribution
    numSimulation = 200
    cInit = 0
    cBase = 1
    scoreChild = ScoreChild(cInit, cBase)
    selectChild = SelectChild(scoreChild)
    uniformActionPrior = {action: 1 / 4 for action in actionSpace}
    getActionPrior = lambda state: uniformActionPrior
    initializeChildren = InitializeChildren(actionSpace, twoAgentTransit,
                                            getActionPrior)
    expand = Expand(isTerminal, initializeChildren)
    rolloutPolicy = lambda state: random.choice(actionSpace)
    rolloutHeuristic = lambda state: 1
    maxRolloutStep = 10
    estimateValue = RollOut(rolloutPolicy, maxRolloutStep, twoAgentTransit,
                            rewardFunction, isTerminal, rolloutHeuristic)
    mctsSelectAction = MCTS(numSimulation, selectChild, expand, estimateValue,
                            backup, establishPlainActionDist)

    #sampleAction = SampleFromDistribution(actionDictionary)
    def sampleAction(state):
        actionDist = mctsSelectAction(state)
        action = maxFromDistribution(actionDist)
        return action

    trajectories = [sampleTrajecoty(sampleAction) for _ in range(1)]
    print(trajectories)

    DIRNAME = os.path.dirname(__file__)
    trajectoryDirectory = os.path.join(DIRNAME, '..', '..', 'data',
                                       'evaluateObstacle', 'trajectories')
    if not os.path.exists(trajectoryDirectory):
        os.makedirs(trajectoryDirectory)

    # generate demo image
    screenWidth = 600
    screenHeight = 600
    screen = pg.display.set_mode((screenWidth, screenHeight))
    screenColor = THECOLORS['black']
    xBoundary = [0, 600]
    yBoundary = [0, 600]
    lineColor = THECOLORS['white']
    lineWidth = 4
    xSwamp = [300, 300]
    ySwamp = [300, 300]
    drawBackground = DrawBackground(screen, screenColor, xBoundary, yBoundary,
                                    lineColor, lineWidth, xSwamp, ySwamp)

    fps = 40
    circleColorSpace = np.array([[0, 0, 255], [0, 255, 255]])
    circleSize = 10
    positionIndex = [0, 1]

    saveImage = True
    imageSavePath = os.path.join(trajectoryDirectory, 'picMovingSheep')
    if not os.path.exists(imageSavePath):
        os.makedirs(imageSavePath)
    trajectoryParameters = 'obstacle'
    imageFolderName = str(trajectoryParameters)

    saveImageDir = os.path.join(os.path.join(imageSavePath, imageFolderName))
    if not os.path.exists(saveImageDir):
        os.makedirs(saveImageDir)
    agentIdsToDraw = list(range(2))
    drawState = DrawState(fps, screen, circleColorSpace, circleSize,
                          agentIdsToDraw, positionIndex, saveImage,
                          saveImageDir, drawBackground)

    numFramesToInterpolate = 3
    interpolateState = InterpolateState(numFramesToInterpolate,
                                        twoAgentTransit)

    stateIndexInTimeStep = 0
    actionIndexInTimeStep = 1

    chaseTrial = ChaseTrialWithTraj(stateIndexInTimeStep, drawState,
                                    interpolateState, actionIndexInTimeStep)

    [chaseTrial(trajectory) for trajectory in trajectories]
    pg.quit()
def main():  # comments for explanation
    startTime = datetime.now()

    # experiment conditions
    numTrials = 2
    manipulatedVariables = OrderedDict()
    manipulatedVariables['numSimulations'] = [5, 6, 7]
    manipulatedVariables['rolloutHeuristicWeight'] = [0.1, 0]
    manipulatedVariables['maxRolloutSteps'] = [10, 0]
    manipulatedVariables['trialIndex'] = list(range(numTrials))

    levelNames = list(manipulatedVariables.keys())
    levelValues = list(manipulatedVariables.values())
    modelIndex = pd.MultiIndex.from_product(levelValues, names=levelNames)
    toSplitFrame = pd.DataFrame(index=modelIndex)
    toSplitFrame = toSplitFrame.select(lambda row:
                                       (row[1] == 0.1 and row[2] == 0) or
                                       (row[1] == 0 and row[2] == 10),
                                       axis=0)

    # reset function
    envModelName = 'twoAgents'
    qPosInit = [-9.8, 9.8, 8, 0]
    qVelInit = [0, 0, 0, 0]
    numAgents = 2
    qPosInitNoise = 0
    qVelInitNoise = 0
    reset = Reset(envModelName, qPosInit, qVelInit, numAgents, qPosInitNoise,
                  qVelInitNoise)

    # isTerminal
    killzoneRadius = 0.5
    isTerminal = IsTerminal(killzoneRadius)

    # transit for multiplayer transition
    # sheepTransit for sheep's MCTS simulation (only takes sheep's action as input)
    renderOn = False
    numSimulationFrames = 20
    transit = TransitionFunction(envModelName, isTerminal, renderOn,
                                 numSimulationFrames)
    sheepTransit = lambda state, action: transit(
        state, [action, stationaryWolfPolicy(state)])

    # reward function (aliveBonus is negative and deathPenalty is positive because sheep is chasing wolf).
    aliveBonus = -0.05
    deathPenalty = 1
    rewardFunction = reward.RewardFunctionCompete(aliveBonus, deathPenalty,
                                                  isTerminal)

    # select child
    cInit = 1
    cBase = 100
    calculateScore = CalculateScore(cInit, cBase)
    selectChild = SelectChild(calculateScore)

    # prior
    actionSpace = [(10, 0), (7, 7), (0, 10), (-7, 7), (-10, 0), (-7, -7),
                   (0, -10), (7, -7)]
    getActionPrior = GetActionPrior(actionSpace)

    # initialize children; expand
    initializeChildren = InitializeChildren(actionSpace, sheepTransit,
                                            getActionPrior)
    expand = Expand(isTerminal, initializeChildren)

    # random rollout policy
    numActionSpace = len(actionSpace)
    rolloutPolicy = lambda state: actionSpace[np.random.choice(
        range(numActionSpace))]

    # select next action
    selectNextAction = SelectNextAction(sheepTransit)

    sheepId = 0
    wolfId = 1
    xPosIndex = 2
    numXPosEachAgent = 2
    maxRunningSteps = 5

    getSheepXPos = GetAgentPos(sheepId, xPosIndex, numXPosEachAgent)
    getWolfXPos = GetAgentPos(wolfId, xPosIndex, numXPosEachAgent)

    # wrapper functions
    getRolloutHeuristic = lambda rolloutHeuristicWeight: HeuristicDistanceToTarget(
        rolloutHeuristicWeight, getWolfXPos, getSheepXPos
    )  # change the interface of heuristic function
    getRollout = lambda maxRolloutSteps, rolloutHeuristic: RollOut(
        rolloutPolicy, maxRolloutSteps, sheepTransit, rewardFunction,
        isTerminal, rolloutHeuristic)

    getMCTS = lambda numSimulations, rollout: MCTS(
        numSimulations, selectChild, expand, rollout, backup, selectNextAction)

    # sample trajectory
    sampleTrajectory = SampleTrajectory(maxRunningSteps, transit, isTerminal,
                                        reset)

    runTrial = RunTrial(getRolloutHeuristic, getRollout, getMCTS,
                        sampleTrajectory)

    # run all trials and generate trajectories
    trialDataDf = toSplitFrame.groupby(levelNames).apply(runTrial)
    print(trialDataDf.values[0][0])
    pickle_out = open('trajectories/testQEffectOnMCTSMujoco.pickle', 'wb')
    pickle.dump(trialDataDf, pickle_out)
    pickle_out.close()

    # meausurement function
    initState = reset()
    allAgentsOptimalActions = [(10, 0),
                               (0, 0)]  # sheep directly moves towards wolf
    optimalNextState = transit(initState, allAgentsOptimalActions)
    optimalNextPosition = getSheepXPos(optimalNextState)
    nextPositionIndex = 1  # we are interested in the state at 2nd time step
    stateIndexInTuple = 0  # tuple = (state, action). State is the first element.
    firstTrialIndex = 0
    getSheepXPosAtNextStepFromTrajectory = GetPosFromTrajectory(
        nextPositionIndex, stateIndexInTuple, sheepId, xPosIndex,
        numXPosEachAgent)
    getFirstTrajectoryFromDf = GetTrialTrajectoryFromDf(firstTrialIndex)
    measurementFunction = DistanceBetweenActualAndOptimalNextPosition(
        optimalNextPosition, getSheepXPosAtNextStepFromTrajectory,
        getFirstTrajectoryFromDf)

    # make measurements on individual trial
    measurementDf = trialDataDf.groupby(levelNames).apply(measurementFunction)
    pickle_out = open('measurements/testQEffectOnMCTSMujoco.pickle', 'wb')
    pickle.dump(measurementDf, pickle_out)
    pickle_out.close()

    # compute statistics on the measurements (mean, standard error)
    manipulatedVariablesExceptTrialIndex = copy.deepcopy(levelNames)
    manipulatedVariablesExceptTrialIndex.remove('trialIndex')
    dataStatisticsDf = measurementDf.groupby(
        manipulatedVariablesExceptTrialIndex).distance.agg(['mean', 'std'])
    print(dataStatisticsDf)
    pickle_out = open('statistics/testQEffectOnMCTSMujoco.pickle', 'wb')
    pickle.dump(dataStatisticsDf, pickle_out)
    pickle_out.close()

    # plot the statistics
    fig = plt.figure()

    plotRowNum = 1
    plotColNum = 1
    plotCounter = 1

    legendLabels = {(0, 10): 'Rollout', (0.1, 0): 'No Rollout'}

    axForDraw = fig.add_subplot(plotRowNum, plotColNum, plotCounter)
    drawPerformanceLine(dataStatisticsDf, axForDraw, 'numSimulations',
                        legendLabels)

    plt.legend(loc='best')

    plt.show()

    endTime = datetime.now()
    print("Time taken: ", endTime - startTime)