def __call__(self, condition):

        getSavePath = self.getTrajectorySavePathByCondition(condition)
        attentionType = condition['attType']
        alpha = condition['alpha']
        C = condition['C']
        minAttentionDistance = condition['minAttDist']
        rangeAttention = condition['rangeAtt']
        numTree = condition['numTrees']
        numSimulations = condition['numSim']
        actionRatio = condition['actRatio']
        cBase = condition['cBase']
        burnTime = condition['burnTime']
        softParaForIdentity = condition['softId']
        softParaForSubtlety = condition['softSubtlety']
        damp = condition['damp']
        actionCost = condition['actCost']

        numSub = 10
        allIdentityResults = []
        allPerceptionResults = []
        allActionResults = []
        allVelDiffResults = []
        allResults = []
        possibleTrialSubtleties = [3.3, 1.83, 0.01]#[500.0, 3.3, 1.83, 0.92, 0.01]
        for subIndex in range(numSub):
            meanIdentiyOnConditions = {}
            meanPerceptionOnConditions = {}
            meanActionOnConditions = {}
            meanVelDiffOnConditions = {}
            meanEscapeOnConditions = {}
            for chasingSubtlety in possibleTrialSubtleties:

                print(numTree, chasingSubtlety, numSimulations, attentionType)
                numAgent = 25
                sheepId = 0
                suspectorIds = list(range(1, numAgent))

                resetWolfIdAndSubtlety = ag.ResetWolfIdAndSubtlety(suspectorIds, [chasingSubtlety])
                distanceToVisualDegreeRatio = 20
                minInitSheepWolfDistance = 9 * distanceToVisualDegreeRatio
                minInitSheepDistractorDistance = 2.5 * distanceToVisualDegreeRatio  # no distractor in killzone when init
                isLegalInitPositions = ag.IsLegalInitPositions(sheepId, minInitSheepWolfDistance, minInitSheepDistractorDistance)
                xBoundary = [0, 640]
                yBoundary = [0, 480]
                resetAgentPositions = ag.ResetAgentPositions(xBoundary, yBoundary, numAgent, isLegalInitPositions)
                resetPhysicalState = ag.ResetPhysicalState(sheepId, numAgent, resetAgentPositions, resetWolfIdAndSubtlety)

                numFramePerSecond = 20
                numMDPTimeStepPerSecond = 5
                numFrameWithoutActionChange = int(numFramePerSecond/numMDPTimeStepPerSecond)

                sheepActionUpdateFrequency = 1
                minSheepSpeed = int(17.4 * distanceToVisualDegreeRatio/numFramePerSecond)
                maxSheepSpeed = int(23.2 * distanceToVisualDegreeRatio/numFramePerSecond)
                warmUpTimeSteps = int(10 * numMDPTimeStepPerSecond)
                sheepPolicy = ag.SheepPolicy(sheepActionUpdateFrequency, minSheepSpeed, maxSheepSpeed, warmUpTimeSteps, burnTime, damp)

                wolfActionUpdateFrequency = int(0.2 * numMDPTimeStepPerSecond)
                minWolfSpeed = int(8.7 * distanceToVisualDegreeRatio/numFramePerSecond)
                maxWolfSpeed = int(14.5 * distanceToVisualDegreeRatio/numFramePerSecond)
                wolfPolicy = ag.WolfPolicy(wolfActionUpdateFrequency, minWolfSpeed, maxWolfSpeed, warmUpTimeSteps)
                distractorActionUpdateFrequency = int(0.2 * numMDPTimeStepPerSecond)
                minDistractorSpeed = int(8.7 * distanceToVisualDegreeRatio/numFramePerSecond)
                maxDistractorSpeed = int(14.5 * distanceToVisualDegreeRatio/numFramePerSecond)
                distractorPolicy = ag.DistractorPolicy(distractorActionUpdateFrequency, minDistractorSpeed, maxDistractorSpeed, warmUpTimeSteps)
                preparePolicy = ag.PreparePolicy(sheepId, numAgent, sheepPolicy, wolfPolicy, distractorPolicy)
                updatePhysicalState = ag.UpdatePhysicalState(sheepId, numAgent, preparePolicy)

                xBoundary = [0, 640]
                yBoundary = [0, 480]
                checkBoundaryAndAdjust = ag.CheckBoundaryAndAdjust(xBoundary, yBoundary)
                transiteMultiAgentMotion = ag.TransiteMultiAgentMotion(checkBoundaryAndAdjust)

                minDistance = 2.5 * distanceToVisualDegreeRatio
                isTerminal = env.IsTerminal(sheepId, minDistance)
               # screen = pg.display.set_mode([xBoundary[1], yBoundary[1]])
               # screenColor = np.array([0, 0, 0])
               # sheepColor = np.array([0, 255, 0])
               # wolfColor = np.array([255, 0, 0])
               # circleSize = 10
               # saveImage = True
               # saveImageFile = 'image3'
               # render = env.Render(numAgent, screen, xBoundary[1], yBoundary[1], screenColor, sheepColor, wolfColor, circleSize, saveImage, saveImageFile, isTerminal)
                render = None
                renderOnInSimulation = False
                transiteStateWithoutActionChangeInSimulation = env.TransiteStateWithoutActionChange(numFrameWithoutActionChange, isTerminal, transiteMultiAgentMotion, render, renderOnInSimulation)
                renderOnInPlay = False
                transiteStateWithoutActionChangeInPlay = env.TransiteStateWithoutActionChange(numFrameWithoutActionChange, isTerminal, transiteMultiAgentMotion, render, renderOnInPlay)

                if attentionType == 'idealObserver':
                    attentionLimitation= 1
                    precisionPerSlot=500.0
                    precisionForUntracked=500.0
                    memoryratePerSlot=1.0
                    memoryrateForUntracked=1.0
                if attentionType == 'preAttention':
                    attentionLimitation= 1
                    precisionPerSlot=2.5
                    precisionForUntracked=2.5
                    memoryratePerSlot=0.45
                    memoryrateForUntracked=0.45
                if attentionType == 'attention3':
                    attentionLimitation= 3
                    precisionPerSlot=8.0
                    precisionForUntracked=0.01
                    memoryratePerSlot=0.7
                    memoryrateForUntracked=0.01
                if attentionType == 'hybrid3':
                    attentionLimitation= 3
                    precisionPerSlot=8.0
                    precisionForUntracked=2.5
                    memoryratePerSlot=0.7
                    memoryrateForUntracked=0.45
                if attentionType == 'attention4':
                    attentionLimitation= 4
                    precisionPerSlot=8.0
                    precisionForUntracked=0.01
                    memoryratePerSlot=0.7
                    memoryrateForUntracked=0.01
                if attentionType == 'hybrid4':
                    attentionLimitation= 4
                    precisionPerSlot=8.0
                    precisionForUntracked=2.5
                    memoryratePerSlot=0.7
                    memoryrateForUntracked=0.45


                if attentionType == 'preAttentionMem0.25':
                    attentionLimitation= 1
                    precisionPerSlot=2.5
                    precisionForUntracked=2.5
                    memoryratePerSlot=0.25
                    memoryrateForUntracked=0.25
                if attentionType == 'preAttentionMem0.65':
                    attentionLimitation= 1
                    precisionPerSlot=2.5
                    precisionForUntracked=2.5
                    memoryratePerSlot=0.65
                    memoryrateForUntracked=0.65
                if attentionType == 'preAttentionPre0.5':
                    attentionLimitation= 1
                    precisionPerSlot=0.5
                    precisionForUntracked=0.5
                    memoryratePerSlot=0.45
                    memoryrateForUntracked=0.45
                if attentionType == 'preAttentionPre4.5':
                    attentionLimitation= 1
                    precisionPerSlot=4.5
                    precisionForUntracked=4.5
                    memoryratePerSlot=0.45
                    memoryrateForUntracked=0.45

                attention = Attention.AttentionToPrecisionAndDecay(precisionPerSlot, precisionForUntracked, memoryratePerSlot, memoryrateForUntracked)
                transferMultiAgentStatesToPositionDF = ba.TransferMultiAgentStatesToPositionDF(numAgent)
                possibleSubtleties = [500.0, 11.0, 3.3, 1.83, 0.92, 0.31, 0.01]
                resetBeliefAndAttention = ba.ResetBeliefAndAttention(sheepId, suspectorIds, possibleSubtleties, attentionLimitation, transferMultiAgentStatesToPositionDF, attention)

                maxAttentionDistance = minAttentionDistance + rangeAttention
                attentionMinDistance = minAttentionDistance * distanceToVisualDegreeRatio
                attentionMaxDistance = maxAttentionDistance * distanceToVisualDegreeRatio
                numStandardErrorInDistanceRange = 4
                calDistancePriorOnAttentionSlot = Attention.CalDistancePriorOnAttentionSlot(attentionMinDistance, attentionMaxDistance, numStandardErrorInDistanceRange)
                attentionSwitch = Attention.AttentionSwitch(attentionLimitation, calDistancePriorOnAttentionSlot)
                computePosterior = calPosterior.CalPosteriorLog(minDistance)

                print(attentionLimitation, attentionMinDistance/distanceToVisualDegreeRatio, attentionMaxDistance/distanceToVisualDegreeRatio)

                attentionSwitchFrequencyInSimulation = np.inf
                beliefUpdateFrequencyInSimulation = np.inf
                updateBeliefAndAttentionInSimulation = ba.UpdateBeliefAndAttentionState(attention, computePosterior, attentionSwitch, transferMultiAgentStatesToPositionDF,
                        attentionSwitchFrequencyInSimulation, beliefUpdateFrequencyInSimulation, burnTime)

                attentionSwitchFrequencyInPlay = int(0.6 * numMDPTimeStepPerSecond)
                beliefUpdateFrequencyInPlay = int(0.6 * numMDPTimeStepPerSecond)
                updateBeliefAndAttentionInPlay = ba.UpdateBeliefAndAttentionState(attention, computePosterior, attentionSwitch, transferMultiAgentStatesToPositionDF,
                        attentionSwitchFrequencyInPlay, beliefUpdateFrequencyInPlay, burnTime)

                updatePhysicalStateByBeliefFrequencyInSimulationRoot = int(0.2 * numMDPTimeStepPerSecond)
                updatePhysicalStateByBeliefInSimulationRoot = ba.UpdatePhysicalStateImagedByBelief(updatePhysicalStateByBeliefFrequencyInSimulationRoot,
                        softParaForIdentity, softParaForSubtlety)
                reUpdatePhysicalStateByBeliefInSimulationRoot = ba.UpdatePhysicalStateImagedByBelief(updatePhysicalStateByBeliefFrequencyInSimulationRoot,
                        softParaForIdentity = 1, softParaForSubtlety = 1)
                updatePhysicalStateByBeliefFrequencyInSimulation = np.inf
                #updatePhysicalStateByBeliefInSimulation = ba.UpdatePhysicalStateImagedByBelief(updatePhysicalStateByBeliefFrequencyInSimulation,
                #        softParaForIdentity, softParaForSubtlety)
                updatePhysicalStateByBeliefInSimulation = lambda state: state

                updatePhysicalStateByBeliefFrequencyInPlay = np.inf
                #updatePhysicalStateByBeliefInPlay = ba.UpdatePhysicalStateImagedByBelief(updatePhysicalStateByBeliefFrequencyInPlay,
                #        softParaForIdentity, softParaForSubtlety)
                updatePhysicalStateByBeliefInPlay = lambda state: state

                transitionFunctionInSimulation = env.TransitionFunction(resetPhysicalState, resetBeliefAndAttention, updatePhysicalState, transiteStateWithoutActionChangeInSimulation,
                        updateBeliefAndAttentionInSimulation, updatePhysicalStateByBeliefInSimulation)

                transitionFunctionInPlay = env.TransitionFunction(resetPhysicalState, resetBeliefAndAttention, updatePhysicalState, transiteStateWithoutActionChangeInPlay,
                        updateBeliefAndAttentionInPlay, updatePhysicalStateByBeliefInPlay)

                numActionSpace = 8
                actionInterval = int(360/(numActionSpace))
                actionMagnitude = actionRatio * minSheepSpeed * numFramePerSecond
                actionSpaceFull = [(np.cos(degreeInPolar) * actionMagnitude, np.sin(degreeInPolar) * actionMagnitude)
                        for degreeInPolar in np.arange(0, 360, actionInterval)/180 * math.pi]
                actionSpaceHalf = [(np.cos(degreeInPolar) * actionMagnitude * 0.5, np.sin(degreeInPolar) * actionMagnitude * 0.5)
                        for degreeInPolar in np.arange(0, 360, actionInterval)/180 * math.pi]
                actionSpace = [(0, 0)] + actionSpaceFull# + actionSpaceHalf
                getActionPrior = lambda state : {action: 1/len(actionSpace) for action in actionSpace}

                maxRollOutSteps = 5
                aliveBouns = 0.2 * 0
                deathPenalty = -1
                rewardFunction = reward.RewardFunctionTerminalPenalty(sheepId, aliveBouns, actionCost, deathPenalty, isTerminal, actionSpace)
                rewardRollout = lambda state, action, nextState: rewardFunction(state, action)


                cInit = 1
                #cBase = 50
                scoreChild = ScoreChild(cInit, cBase)
                selectAction = SelectAction(scoreChild)
                selectNextState = SelectNextState(selectAction)

                initializeChildren = InitializeChildren(actionSpace, transitionFunctionInSimulation, getActionPrior)
                expand = Expand(isTerminal, initializeChildren)
                pWidening = PWidening(alpha, C)
                expandNewState = ExpandNextState(transitionFunctionInSimulation, pWidening)

                rolloutPolicy = lambda state: actionSpace[np.random.choice(range(numActionSpace))]
                rolloutHeuristic = lambda state: 0
                estimateValue = RollOut(rolloutPolicy, maxRollOutSteps, transitionFunctionInSimulation, rewardRollout, isTerminal, rolloutHeuristic)

                numActionPlaned = 1
                outputAction = OutputAction(numActionPlaned, actionSpace)
                #numSimulations = int(numTotalSimulationTimes/numTree)

                #sheepColorInMcts = np.array([0, 255, 0])
                #wolfColorInMcts = np.array([255, 0, 0])
                #distractorColorInMcts = np.array([255, 255, 255])
                #saveImageMCTS = True
                #mctsRender = env.MctsRender(numAgent, screen, xBoundary[1], yBoundary[1], screenColor, sheepColorInMcts, wolfColorInMcts, distractorColorInMcts, circleSize, saveImageMCTS, saveImageFile)
                #mctsRenderOn = False
                #mctsRender = None
                #pg.init()
                #mcts = MCTS(numSimulations, selectChild, expand, rollout, backup, selectAction, mctsRender, mctsRenderOn)
                pwMultipleTrees = PWMultipleTrees(numSimulations, selectAction, selectNextState, expand, expandNewState, estimateValue, backup, outputAction)

                maxRunningSteps = int(25 * numMDPTimeStepPerSecond)
                makeDiffSimulationRoot = MakeDiffSimulationRoot(isTerminal, updatePhysicalStateByBeliefInSimulationRoot, reUpdatePhysicalStateByBeliefInSimulationRoot)
                runMCTSTrjactory = RunMCTSTrjactory(maxRunningSteps, numTree, numActionPlaned, sheepActionUpdateFrequency, transitionFunctionInPlay, isTerminal, makeDiffSimulationRoot, render)

                rootAction = actionSpace[np.random.choice(range(numActionSpace))]
                numTrial = 10
                trajectories = [runMCTSTrjactory(pwMultipleTrees) for trial in range(numTrial)]

                savePath = getSavePath({'chasingSubtlety': chasingSubtlety, 'subIndex': subIndex})
                tsl.saveToPickle(trajectories, savePath)
                getCSVSavePath = self.getCSVSavePathByCondition(condition)

                startStatsIndex = 1
                def getTrueWolfIdAcc(trajectory):
                    AccTrial = []
                    for timeStepIndex in range(len(trajectory) - 2):
                        timeStep = trajectory[timeStepIndex]
                        wolfId = timeStep[0][0][3][0]
                        wolfSubtlety = timeStep[0][0][3][1]
                        #print(wolfId, '**', wolfIdInEach)
                        if timeStepIndex >= startStatsIndex:
                            IdAcc = np.mean([int(IdAndSubtlety[0] == wolfId) for IdAndSubtlety in timeStep[5]])
                            AccTrial.append(IdAcc)
                    meanAcc = np.mean(AccTrial)
                    return meanAcc
                meanIdentiy = np.mean([getTrueWolfIdAcc(trajectory) for trajectory in trajectories])
                meanIdentiyOnConditions.update({chasingSubtlety: meanIdentiy})

                def getTrueWolfIdSubtletyAcc(trajectory):
                    AccTrial = []
                    for timeStepIndex in range(len(trajectory) - 2):
                        timeStep = trajectory[timeStepIndex]
                        wolfId = timeStep[0][0][3][0]
                        wolfSubtlety = timeStep[0][0][3][1]
                        #print(wolfId, '**', wolfIdInEach)
                        if timeStepIndex >= startStatsIndex:
                            IdAndSubtletyAcc = np.mean([int((IdAndSubtlety[0] == wolfId) and (IdAndSubtlety[1] == wolfSubtlety)) for IdAndSubtlety in timeStep[5]])
                            AccTrial.append(IdAndSubtletyAcc)
                    meanAcc = np.mean(AccTrial)
                    return meanAcc
                meanPerception = np.mean([getTrueWolfIdSubtletyAcc(trajectory) for trajectory in trajectories])
                meanPerceptionOnConditions.update({chasingSubtlety: meanPerception})

                def getActionDeviationLevel(trajectory):
                    AccTrial = []
                    for timeStepIndex in range(len(trajectory) - 2):
                        timeStep = trajectory[timeStepIndex]
                        actionReal = np.array(timeStep[1])
                        actionOnTruth = np.array(timeStep[4])
                        if timeStepIndex >= startStatsIndex:
                            deviateLevel = round(agf.computeAngleBetweenVectors(actionReal, actionOnTruth) / (math.pi / 4))
                            AccTrial.append(deviateLevel)
                    meanAcc = np.mean(AccTrial)
                    return meanAcc
                meanAction = np.mean([getActionDeviationLevel(trajectory) for trajectory in trajectories])
                meanActionOnConditions.update({chasingSubtlety: meanAction})

                def getVelocityDiff(trajectory):
                    AccTrial = []
                    for timeStepIndex in range(len(trajectory) - 2):
                        timeStep = trajectory[timeStepIndex]
                        velReal = np.array(timeStep[0][0][0][1][0])
                        velWithActionOnTruth = np.array(timeStep[2][1][0])
                        velWithActionOppo = np.array(timeStep[3][1][0])
                        if timeStepIndex >= startStatsIndex:
                            velDiffNormWithActionOnTruth = np.linalg.norm((velReal - velWithActionOnTruth))
                            velDiffNormWithActionOppo = np.linalg.norm((velReal - velWithActionOppo))
                            velDiffRatio = 1.0 * velDiffNormWithActionOnTruth / velDiffNormWithActionOppo
                            AccTrial.append(velDiffRatio)
                    meanAcc = np.mean(AccTrial)
                    return meanAcc
                meanVelDiff = np.mean([getVelocityDiff(trajectory) for trajectory in trajectories])
                meanVelDiffOnConditions.update({chasingSubtlety: meanVelDiff})

                getEscapeAcc = lambda trajectory: int(len(trajectory) >= (maxRunningSteps - 2))
                meanEscape = np.mean([getEscapeAcc(trajectory) for trajectory in trajectories])
                meanEscapeOnConditions.update({chasingSubtlety: meanEscape})


            allResults.append(meanEscapeOnConditions)
            results = pd.DataFrame(allResults)
            escapeCSVSavePath = getCSVSavePath({'measure': 'escape'})
            results.to_csv(escapeCSVSavePath)

            allIdentityResults.append(meanIdentiyOnConditions)
            identityResults = pd.DataFrame(allIdentityResults)
            identityCSVSavePath = getCSVSavePath({'measure': 'identity'})
            identityResults.to_csv(identityCSVSavePath)

            allPerceptionResults.append(meanPerceptionOnConditions)
            perceptionResults = pd.DataFrame(allPerceptionResults)
            perceptionCSVSavePath = getCSVSavePath({'measure': 'percetion'})
            perceptionResults.to_csv(perceptionCSVSavePath)

            allActionResults.append(meanActionOnConditions)
            actionResults = pd.DataFrame(allActionResults)
            actionCSVSavePath = getCSVSavePath({'measure': 'action'})
            actionResults.to_csv(actionCSVSavePath)

            allVelDiffResults.append(meanVelDiffOnConditions)
            velDiffResults = pd.DataFrame(allVelDiffResults)
            velDiffCSVSavePath = getCSVSavePath({'measure': 'velDiff'})
            velDiffResults.to_csv(velDiffCSVSavePath)
def evaluate(numTree, chasingSubtlety, numTotalSimulationTimes, cInit, cBase):
    print(numTree, chasingSubtlety, numTotalSimulationTimes, cInit, cBase)
    numActionSpace = 8
    actionInterval = int(360 / numActionSpace)
    actionSpace = [
        (np.cos(degreeInPolar), np.sin(degreeInPolar))
        for degreeInPolar in np.arange(0, 360, actionInterval) / 180 * math.pi
    ]
    getActionPrior = GetActionPrior(actionSpace)

    # 2D Env
    initSheepPosition = np.array([320, 240])
    initSheepPositionNoise = np.array([0, 0])
    resetSheepState = ag.ResetAgentState(initSheepPosition,
                                         initSheepPositionNoise)
    initWolfOrDistractorPosition = np.array([320, 240])
    initWolfOrDistractorPositionNoise = np.array([125, 230])
    resetWolfOrDistractorState = ag.ResetAgentState(
        initWolfOrDistractorPosition, initWolfOrDistractorPositionNoise)

    numAgent = 25
    sheepId = 0
    suspectorIds = list(range(1, numAgent))

    resetWolfIdAndSubtlety = ag.ResetWolfIdAndSubtlety(suspectorIds,
                                                       [chasingSubtlety])
    resetPhysicalState = ag.ResetPhysicalState(sheepId, numAgent,
                                               resetSheepState,
                                               resetWolfOrDistractorState,
                                               resetWolfIdAndSubtlety)

    numFramePerSecond = 60
    numMDPTimeStepPerSecond = 5
    numFrameWithoutActionChange = int(numFramePerSecond /
                                      numMDPTimeStepPerSecond)

    sheepActionUpdateFrequency = 1
    distanceToVisualDegreeRatio = 20
    minSheepSpeed = int(17.4 * distanceToVisualDegreeRatio / numFramePerSecond)
    maxSheepSpeed = int(23.2 * distanceToVisualDegreeRatio / numFramePerSecond)
    warmUpTimeSteps = int(10 * numMDPTimeStepPerSecond)
    sheepPolicy = ag.SheepPolicy(sheepActionUpdateFrequency, minSheepSpeed,
                                 maxSheepSpeed, warmUpTimeSteps)

    wolfActionUpdateFrequency = int(0.2 * numMDPTimeStepPerSecond)
    minWolfSpeed = int(8.7 * distanceToVisualDegreeRatio / numFramePerSecond)
    maxWolfSpeed = int(14.5 * distanceToVisualDegreeRatio / numFramePerSecond)
    wolfPolicy = ag.WolfPolicy(wolfActionUpdateFrequency, minWolfSpeed,
                               maxWolfSpeed, warmUpTimeSteps)
    distractorActionUpdateFrequency = int(0.2 * numMDPTimeStepPerSecond)
    minDistractorSpeed = int(8.7 * distanceToVisualDegreeRatio /
                             numFramePerSecond)
    maxDistractorSpeed = int(14.5 * distanceToVisualDegreeRatio /
                             numFramePerSecond)
    distractorPolicy = ag.DistractorPolicy(distractorActionUpdateFrequency,
                                           minDistractorSpeed,
                                           maxDistractorSpeed, warmUpTimeSteps)
    preparePolicy = ag.PreparePolicy(sheepId, numAgent, sheepPolicy,
                                     wolfPolicy, distractorPolicy)
    updatePhysicalState = ag.UpdatePhysicalState(numAgent, preparePolicy)

    xBoundary = [0, 640]
    yBoundary = [0, 480]
    checkBoundaryAndAdjust = ag.CheckBoundaryAndAdjust(xBoundary, yBoundary)
    transiteMultiAgentMotion = ag.TransiteMultiAgentMotion(
        checkBoundaryAndAdjust)

    minDistance = 2.5 * distanceToVisualDegreeRatio
    isTerminal = env.IsTerminal(sheepId, minDistance)
    screen = pg.display.set_mode([xBoundary[1], yBoundary[1]])
    #screen = None
    screenColor = np.array([255, 255, 255])
    sheepColor = np.array([0, 255, 0])
    wolfColor = np.array([255, 0, 0])
    circleSize = 10
    saveImage = True
    saveImageFile = 'image1'
    render = env.Render(numAgent, screen, screenColor, sheepColor, wolfColor,
                        circleSize, saveImage, saveImageFile)
    renderOnInSimulation = False
    transiteStateWithoutActionChangeInSimulation = env.TransiteStateWithoutActionChange(
        numFrameWithoutActionChange, isTerminal, transiteMultiAgentMotion,
        render, renderOnInSimulation)
    renderOnInPlay = True
    transiteStateWithoutActionChangeInPlay = env.TransiteStateWithoutActionChange(
        numFrameWithoutActionChange, isTerminal, transiteMultiAgentMotion,
        render, renderOnInPlay)

    attentionLimitation = 4
    precisionPerSlot = 8.0
    precisionForUntracked = 2.5
    memoryratePerSlot = 0.7
    memoryrateForUntracked = 0.45
    attention = Attention.AttentionToPrecisionAndDecay(precisionPerSlot,
                                                       precisionForUntracked,
                                                       memoryratePerSlot,
                                                       memoryrateForUntracked)
    transferMultiAgentStatesToPositionDF = ba.TransferMultiAgentStatesToPositionDF(
        numAgent)
    possibleSubtleties = [500, 11, 3.3, 1.83, 0.92, 0.31]
    resetBeliefAndAttention = ba.ResetBeliefAndAttention(
        sheepId, suspectorIds, possibleSubtleties, attentionLimitation,
        transferMultiAgentStatesToPositionDF, attention)

    maxDistance = 7.5 * distanceToVisualDegreeRatio
    numStandardErrorInDistanceRange = 2
    calDistancePriorOnAttentionSlot = Attention.CalDistancePriorOnAttentionSlot(
        minDistance, maxDistance, numStandardErrorInDistanceRange)
    attentionSwitch = Attention.AttentionSwitch(
        attentionLimitation, calDistancePriorOnAttentionSlot)
    computePosterior = calPosterior.CalPosteriorLog(minDistance)

    attentionSwitchFrequencyInSimulation = np.inf
    beliefUpdateFrequencyInSimulation = np.inf
    updateBeliefAndAttentionInSimulation = ba.UpdateBeliefAndAttentionState(
        attention, computePosterior, attentionSwitch,
        transferMultiAgentStatesToPositionDF,
        attentionSwitchFrequencyInSimulation,
        beliefUpdateFrequencyInSimulation)

    attentionSwitchFrequencyInPlay = int(0.6 * numMDPTimeStepPerSecond)
    beliefUpdateFrequencyInPlay = int(0.2 * numMDPTimeStepPerSecond)
    updateBeliefAndAttentionInPlay = ba.UpdateBeliefAndAttentionState(
        attention, computePosterior, attentionSwitch,
        transferMultiAgentStatesToPositionDF, attentionSwitchFrequencyInPlay,
        beliefUpdateFrequencyInPlay)

    updatePhysicalStateByBeliefFrequencyInSimulationRoot = int(
        0.2 * numMDPTimeStepPerSecond)
    updatePhysicalStateByBeliefInSimulationRoot = ba.UpdatePhysicalStateImagedByBelief(
        updatePhysicalStateByBeliefFrequencyInSimulationRoot)
    updatePhysicalStateByBeliefFrequencyInSimulation = np.inf
    updatePhysicalStateByBeliefInSimulation = ba.UpdatePhysicalStateImagedByBelief(
        updatePhysicalStateByBeliefFrequencyInSimulation)

    updatePhysicalStateByBeliefFrequencyInPlay = np.inf
    updatePhysicalStateByBeliefInPlay = ba.UpdatePhysicalStateImagedByBelief(
        updatePhysicalStateByBeliefFrequencyInPlay)

    transitionFunctionInSimulation = env.TransitionFunction(
        resetPhysicalState, resetBeliefAndAttention, updatePhysicalState,
        transiteStateWithoutActionChangeInSimulation,
        updateBeliefAndAttentionInSimulation,
        updatePhysicalStateByBeliefInSimulation)

    transitionFunctionInPlay = env.TransitionFunction(
        resetPhysicalState, resetBeliefAndAttention, updatePhysicalState,
        transiteStateWithoutActionChangeInPlay, updateBeliefAndAttentionInPlay,
        updatePhysicalStateByBeliefInPlay)

    maxRollOutSteps = 5
    aliveBouns = 1 / maxRollOutSteps
    deathPenalty = -1
    rewardFunction = reward.RewardFunctionTerminalPenalty(
        sheepId, aliveBouns, deathPenalty, isTerminal)

    # MCTS algorithm
    # Select child
    calculateScore = CalculateScore(cInit, cBase)
    selectChild = SelectChild(calculateScore)

    # expand
    initializeChildren = InitializeChildren(actionSpace,
                                            transitionFunctionInSimulation,
                                            getActionPrior)
    expand = Expand(isTerminal, initializeChildren)

    # Rollout
    rolloutPolicy = lambda state: actionSpace[np.random.choice(
        range(numActionSpace))]
    rollout = RollOut(rolloutPolicy, maxRollOutSteps,
                      transitionFunctionInSimulation, rewardFunction,
                      isTerminal)

    numActionPlaned = 1
    selectAction = SelectAction(numActionPlaned, actionSpace)
    numSimulations = int(numTotalSimulationTimes / numTree)

    sheepColorInMcts = np.array([0, 255, 0])
    wolfColorInMcts = np.array([255, 0, 0])
    distractorColorInMcts = np.array([0, 0, 0])
    mctsRender = env.MctsRender(numAgent, screen, xBoundary[1], yBoundary[1],
                                screenColor, sheepColorInMcts, wolfColorInMcts,
                                distractorColorInMcts, circleSize, saveImage,
                                saveImageFile)
    mctsRenderOn = True

    mcts = MCTS(numSimulations, selectChild, expand, rollout, backup,
                selectAction, mctsRender, mctsRenderOn)

    maxRunningSteps = int(25 * numMDPTimeStepPerSecond)
    makeDiffSimulationRoot = MakeDiffSimulationRoot(
        isTerminal, updatePhysicalStateByBeliefInSimulationRoot)
    runMCTS = RunMCTS(maxRunningSteps, numTree, numActionPlaned,
                      transitionFunctionInPlay, isTerminal,
                      makeDiffSimulationRoot, render)

    rootAction = actionSpace[np.random.choice(range(numActionSpace))]
    numTestingIterations = 1
    episodeLengths = []
    escape = 0
    step = 1
    while step <= numTestingIterations:
        import datetime
        print(datetime.datetime.now())
        episodeLength = runMCTS(mcts)
        if episodeLength >= 1 * numMDPTimeStepPerSecond:
            step = step + 1
            episodeLengths.append(episodeLength)
            if episodeLength >= maxRunningSteps - 10:
                escape = escape + 1
    meanEpisodeLength = np.mean(episodeLengths)
    print("mean episode length is", meanEpisodeLength,
          escape / numTestingIterations)
    return [meanEpisodeLength, escape / numTestingIterations]
    def __call__(self, condition):

        getSavePath = self.getTrajectorySavePathByCondition(condition)
        attentionType = condition['attentionType']
        alpha = condition['alphaForStateWidening']
        C = condition['CForStateWidening']
        cBase = condition['cBase']
        numTree = condition['numTrees']
        numSimulations = condition['numSimulationTimes']
        actionRatio = condition['actionRatio']
        burnTime = condition['burnTime']
        damp = condition['damp']
        actionCost = condition['actionCost']

        numSub = 10
        allResults = []
        possibleTrialSubtleties = [500.0, 3.3, 1.83, 0.92, 0.001]
        for subIndex in range(numSub):
            meanEscapeOnConditions = {}
            for chasingSubtlety in possibleTrialSubtleties:

                print(numTree, chasingSubtlety, numSimulations, attentionType)
                numAgent = 25
                sheepId = 0
                suspectorIds = list(range(1, numAgent))

                resetWolfIdAndSubtlety = ag.ResetWolfIdAndSubtlety(
                    suspectorIds, [chasingSubtlety])
                distanceToVisualDegreeRatio = 20
                minInitSheepWolfDistance = 9 * distanceToVisualDegreeRatio
                minInitSheepDistractorDistance = 2.5 * distanceToVisualDegreeRatio  # no distractor in killzone when init
                isLegalInitPositions = ag.IsLegalInitPositions(
                    sheepId, minInitSheepWolfDistance,
                    minInitSheepDistractorDistance)
                xBoundary = [0, 640]
                yBoundary = [0, 480]
                resetAgentPositions = ag.ResetAgentPositions(
                    xBoundary, yBoundary, numAgent, isLegalInitPositions)
                resetPhysicalState = ag.ResetPhysicalState(
                    sheepId, numAgent, resetAgentPositions,
                    resetWolfIdAndSubtlety)

                numFramePerSecond = 20
                numMDPTimeStepPerSecond = 5
                numFrameWithoutActionChange = int(numFramePerSecond /
                                                  numMDPTimeStepPerSecond)

                sheepActionUpdateFrequency = 1
                minSheepSpeed = int(17.4 * distanceToVisualDegreeRatio /
                                    numFramePerSecond)
                maxSheepSpeed = int(23.2 * distanceToVisualDegreeRatio /
                                    numFramePerSecond)
                warmUpTimeSteps = int(10 * numMDPTimeStepPerSecond)
                sheepPolicy = ag.SheepPolicy(sheepActionUpdateFrequency,
                                             minSheepSpeed, maxSheepSpeed,
                                             warmUpTimeSteps, burnTime, damp)

                wolfActionUpdateFrequency = int(0.2 * numMDPTimeStepPerSecond)
                minWolfSpeed = int(8.7 * distanceToVisualDegreeRatio /
                                   numFramePerSecond)
                maxWolfSpeed = int(14.5 * distanceToVisualDegreeRatio /
                                   numFramePerSecond)
                wolfPolicy = ag.WolfPolicy(wolfActionUpdateFrequency,
                                           minWolfSpeed, maxWolfSpeed,
                                           warmUpTimeSteps)
                distractorActionUpdateFrequency = int(0.2 *
                                                      numMDPTimeStepPerSecond)
                minDistractorSpeed = int(8.7 * distanceToVisualDegreeRatio /
                                         numFramePerSecond)
                maxDistractorSpeed = int(14.5 * distanceToVisualDegreeRatio /
                                         numFramePerSecond)
                distractorPolicy = ag.DistractorPolicy(
                    distractorActionUpdateFrequency, minDistractorSpeed,
                    maxDistractorSpeed, warmUpTimeSteps)
                preparePolicy = ag.PreparePolicy(sheepId, numAgent,
                                                 sheepPolicy, wolfPolicy,
                                                 distractorPolicy)
                updatePhysicalState = ag.UpdatePhysicalState(
                    sheepId, numAgent, preparePolicy)

                xBoundary = [0, 640]
                yBoundary = [0, 480]
                checkBoundaryAndAdjust = ag.CheckBoundaryAndAdjust(
                    xBoundary, yBoundary)
                transiteMultiAgentMotion = ag.TransiteMultiAgentMotion(
                    checkBoundaryAndAdjust)

                minDistance = 2.5 * distanceToVisualDegreeRatio
                isTerminal = env.IsTerminal(sheepId, minDistance)
                # screen = pg.display.set_mode([xBoundary[1], yBoundary[1]])
                # screenColor = np.array([0, 0, 0])
                # sheepColor = np.array([0, 255, 0])
                # wolfColor = np.array([255, 0, 0])
                # circleSize = 10
                # saveImage = True
                # saveImageFile = 'image3'
                # render = env.Render(numAgent, screen, xBoundary[1], yBoundary[1], screenColor, sheepColor, wolfColor, circleSize, saveImage, saveImageFile, isTerminal)
                render = None
                renderOnInSimulation = False
                transiteStateWithoutActionChangeInSimulation = env.TransiteStateWithoutActionChange(
                    numFrameWithoutActionChange, isTerminal,
                    transiteMultiAgentMotion, render, renderOnInSimulation)
                renderOnInPlay = False
                transiteStateWithoutActionChangeInPlay = env.TransiteStateWithoutActionChange(
                    numFrameWithoutActionChange, isTerminal,
                    transiteMultiAgentMotion, render, renderOnInPlay)

                if attentionType == 'idealObserver':
                    attentionLimitation = 1
                    precisionPerSlot = 500.0
                    precisionForUntracked = 500.0
                    memoryratePerSlot = 1.0
                    memoryrateForUntracked = 1.0
                if attentionType == 'preAttention':
                    attentionLimitation = 1
                    precisionPerSlot = 2.5
                    precisionForUntracked = 2.5
                    memoryratePerSlot = 0.45
                    memoryrateForUntracked = 0.45
                if attentionType == 'attention3':
                    attentionLimitation = 3
                    precisionPerSlot = 8.0
                    precisionForUntracked = 0.01
                    memoryratePerSlot = 0.7
                    memoryrateForUntracked = 0.01
                if attentionType == 'hybrid3':
                    attentionLimitation = 3
                    precisionPerSlot = 8.0
                    precisionForUntracked = 2.5
                    memoryratePerSlot = 0.7
                    memoryrateForUntracked = 0.45
                if attentionType == 'attention4':
                    attentionLimitation = 4
                    precisionPerSlot = 8.0
                    precisionForUntracked = 0.01
                    memoryratePerSlot = 0.7
                    memoryrateForUntracked = 0.01
                if attentionType == 'hybrid4':
                    attentionLimitation = 4
                    precisionPerSlot = 8.0
                    precisionForUntracked = 2.5
                    memoryratePerSlot = 0.7
                    memoryrateForUntracked = 0.45
                attention = Attention.AttentionToPrecisionAndDecay(
                    precisionPerSlot, precisionForUntracked, memoryratePerSlot,
                    memoryrateForUntracked)
                transferMultiAgentStatesToPositionDF = ba.TransferMultiAgentStatesToPositionDF(
                    numAgent)
                possibleSubtleties = [
                    500.0, 11.0, 3.3, 1.83, 0.92, 0.31, 0.001
                ]
                resetBeliefAndAttention = ba.ResetBeliefAndAttention(
                    sheepId, suspectorIds, possibleSubtleties,
                    attentionLimitation, transferMultiAgentStatesToPositionDF,
                    attention)

                minAttentionDistance = 40.0
                rangeAttention = 10.0
                maxAttentionDistance = minAttentionDistance + rangeAttention
                attentionMinDistance = minAttentionDistance * distanceToVisualDegreeRatio
                attentionMaxDistance = maxAttentionDistance * distanceToVisualDegreeRatio
                numStandardErrorInDistanceRange = 4
                calDistancePriorOnAttentionSlot = Attention.CalDistancePriorOnAttentionSlot(
                    attentionMinDistance, attentionMaxDistance,
                    numStandardErrorInDistanceRange)
                attentionSwitch = Attention.AttentionSwitch(
                    attentionLimitation, calDistancePriorOnAttentionSlot)
                computePosterior = calPosterior.CalPosteriorLog(minDistance)

                attentionSwitchFrequencyInSimulation = np.inf
                beliefUpdateFrequencyInSimulation = np.inf
                updateBeliefAndAttentionInSimulation = ba.UpdateBeliefAndAttentionState(
                    attention, computePosterior, attentionSwitch,
                    transferMultiAgentStatesToPositionDF,
                    attentionSwitchFrequencyInSimulation,
                    beliefUpdateFrequencyInSimulation, burnTime)

                attentionSwitchFrequencyInPlay = int(0.6 *
                                                     numMDPTimeStepPerSecond)
                beliefUpdateFrequencyInPlay = int(0.2 *
                                                  numMDPTimeStepPerSecond)
                updateBeliefAndAttentionInPlay = ba.UpdateBeliefAndAttentionState(
                    attention, computePosterior, attentionSwitch,
                    transferMultiAgentStatesToPositionDF,
                    attentionSwitchFrequencyInPlay,
                    beliefUpdateFrequencyInPlay, burnTime)

                updatePhysicalStateByBeliefFrequencyInSimulationRoot = int(
                    0.6 * numMDPTimeStepPerSecond)
                updatePhysicalStateByBeliefInSimulationRoot = ba.UpdatePhysicalStateImagedByBelief(
                    updatePhysicalStateByBeliefFrequencyInSimulationRoot)
                updatePhysicalStateByBeliefFrequencyInSimulation = np.inf
                updatePhysicalStateByBeliefInSimulation = ba.UpdatePhysicalStateImagedByBelief(
                    updatePhysicalStateByBeliefFrequencyInSimulation)

                updatePhysicalStateByBeliefFrequencyInPlay = np.inf
                updatePhysicalStateByBeliefInPlay = ba.UpdatePhysicalStateImagedByBelief(
                    updatePhysicalStateByBeliefFrequencyInPlay)

                transitionFunctionInSimulation = env.TransitionFunction(
                    resetPhysicalState, resetBeliefAndAttention,
                    updatePhysicalState,
                    transiteStateWithoutActionChangeInSimulation,
                    updateBeliefAndAttentionInSimulation,
                    updatePhysicalStateByBeliefInSimulation)

                transitionFunctionInPlay = env.TransitionFunction(
                    resetPhysicalState, resetBeliefAndAttention,
                    updatePhysicalState,
                    transiteStateWithoutActionChangeInPlay,
                    updateBeliefAndAttentionInPlay,
                    updatePhysicalStateByBeliefInPlay)

                maxRollOutSteps = 5
                aliveBouns = 1 / maxRollOutSteps
                deathPenalty = -1
                rewardFunction = reward.RewardFunctionTerminalPenalty(
                    sheepId, aliveBouns, actionCost, deathPenalty, isTerminal)
                rewardRollout = lambda state, action, nextState: rewardFunction(
                    state, action)

                numActionSpace = 8
                actionInterval = int(360 / (numActionSpace))
                actionMagnitude = actionRatio * minSheepSpeed
                actionSpace = [
                    (0, 0)
                ] + [(np.cos(degreeInPolar) * actionMagnitude,
                      np.sin(degreeInPolar) * actionMagnitude)
                     for degreeInPolar in np.arange(0, 360, actionInterval) /
                     180 * math.pi]
                getActionPrior = lambda state: {
                    action: 1 / len(actionSpace)
                    for action in actionSpace
                }

                cInit = 1
                #cBase = 50
                scoreChild = ScoreChild(cInit, cBase)
                selectAction = SelectAction(scoreChild)
                selectNextState = SelectNextState(selectAction)

                initializeChildren = InitializeChildren(
                    actionSpace, transitionFunctionInSimulation,
                    getActionPrior)
                expand = Expand(isTerminal, initializeChildren)
                pWidening = PWidening(alpha, C)
                expandNewState = ExpandNextState(
                    transitionFunctionInSimulation, pWidening)

                rolloutPolicy = lambda state: actionSpace[np.random.choice(
                    range(numActionSpace))]
                rolloutHeuristic = lambda state: 0
                estimateValue = RollOut(rolloutPolicy, maxRollOutSteps,
                                        transitionFunctionInSimulation,
                                        rewardRollout, isTerminal,
                                        rolloutHeuristic)

                numActionPlaned = 1
                outputAction = OutputAction(numActionPlaned, actionSpace)
                #numSimulations = int(numTotalSimulationTimes/numTree)

                #sheepColorInMcts = np.array([0, 255, 0])
                #wolfColorInMcts = np.array([255, 0, 0])
                #distractorColorInMcts = np.array([255, 255, 255])
                #saveImageMCTS = True
                #mctsRender = env.MctsRender(numAgent, screen, xBoundary[1], yBoundary[1], screenColor, sheepColorInMcts, wolfColorInMcts, distractorColorInMcts, circleSize, saveImageMCTS, saveImageFile)
                #mctsRenderOn = False
                #mctsRender = None
                #pg.init()
                #mcts = MCTS(numSimulations, selectChild, expand, rollout, backup, selectAction, mctsRender, mctsRenderOn)
                pwMultipleTrees = PWMultipleTrees(numSimulations, selectAction,
                                                  selectNextState, expand,
                                                  expandNewState,
                                                  estimateValue, backup,
                                                  outputAction)

                maxRunningSteps = int(25 * numMDPTimeStepPerSecond)
                makeDiffSimulationRoot = MakeDiffSimulationRoot(
                    isTerminal, updatePhysicalStateByBeliefInSimulationRoot)
                runMCTSTrjactory = RunMCTSTrjactory(
                    maxRunningSteps, numTree, numActionPlaned,
                    sheepActionUpdateFrequency, transitionFunctionInPlay,
                    isTerminal, makeDiffSimulationRoot, render)

                rootAction = actionSpace[np.random.choice(
                    range(numActionSpace))]
                numTrial = 15
                print(attentionLimitation,
                      attentionMinDistance / distanceToVisualDegreeRatio,
                      attentionMaxDistance / distanceToVisualDegreeRatio)
                trajectories = [
                    runMCTSTrjactory(pwMultipleTrees)
                    for trial in range(numTrial)
                ]

                savePath = getSavePath({
                    'chasingSubtlety': chasingSubtlety,
                    'subIndex': subIndex
                })
                tsl.saveToPickle(trajectories, savePath)

                meanEscape = np.mean([
                    1 if len(trajectory) >= (maxRunningSteps - 1) else 0
                    for trajectory in trajectories
                ])
                meanEscapeOnConditions.update({chasingSubtlety: meanEscape})
                print(meanEscapeOnConditions)
            allResults.append(meanEscapeOnConditions)
            results = pd.DataFrame(allResults)
            getCSVSavePath = self.getCSVSavePathByCondition(condition)
            csvSavePath = getCSVSavePath({})
            results.to_csv(csvSavePath)
Example #4
0
 def testCalPosterior(self):
     calPosteriorLog = targetCode.CalPosteriorLog(0.99)
     posteriorLogDf = calPosteriorLog(self.beforePosterior, self.beforeData,
                                      self.nowData)
     self.assertEqual(posteriorLogDf['logP'].idxmax(), (0, 1, 50))