def main():

    actionSpace = [[10, 0], [7, 7], [0, 10], [-7, 7], [-10, 0], [-7, -7],
                   [0, -10], [7, -7]]
    numActionSpace = len(actionSpace)
    numStateSpace = 4

    initSheepPosition = np.array([180, 180])
    initWolfPosition = np.array([180, 180])
    initSheepVelocity = np.array([0, 0])
    initWolfVelocity = np.array([0, 0])
    initSheepPositionNoise = np.array([90, 150])
    initWolfPositionNoise = np.array([0, 60])
    sheepPositionReset = ag.SheepPositionReset(initSheepPosition,
                                               initSheepPositionNoise)
    wolfPositionReset = ag.WolfPositionReset(initWolfPosition,
                                             initWolfPositionNoise)

    numOneAgentState = 2
    positionIndex = [0, 1]
    xBoundary = [0, 360]
    yBoundary = [0, 360]
    checkBoundaryAndAdjust = ag.CheckBoundaryAndAdjust(xBoundary, yBoundary)
    sheepPositionTransition = ag.SheepPositionTransition(
        numOneAgentState, positionIndex, checkBoundaryAndAdjust)
    wolfPositionTransition = ag.WolfPositionTransition(numOneAgentState,
                                                       positionIndex,
                                                       checkBoundaryAndAdjust)

    numAgent = 2
    sheepId = 0
    wolfId = 1
    transitionFunction = env.TransitionFunction(sheepId, wolfId,
                                                sheepPositionReset,
                                                wolfPositionReset,
                                                sheepPositionTransition,
                                                wolfPositionTransition)
    minDistance = 15
    isTerminal = env.IsTerminal(sheepId, wolfId, numOneAgentState,
                                positionIndex, minDistance)

    screen = pg.display.set_mode([xBoundary[1], yBoundary[1]])
    screenColor = [255, 255, 255]
    circleColorList = [[50, 255, 50], [50, 50, 50], [50, 50, 50], [50, 50, 50],
                       [50, 50, 50], [50, 50, 50], [50, 50, 50], [50, 50, 50],
                       [50, 50, 50]]
    circleSize = 8
    saveImage = False
    saveImageFile = 'image'
    render = env.Render(numAgent, numOneAgentState, positionIndex, screen,
                        screenColor, circleColorList, circleSize, saveImage,
                        saveImageFile)

    aliveBouns = -1
    deathPenalty = 20
    rewardDecay = 0.99
    rewardFunction = reward.RewardFunctionTerminalPenalty(
        sheepId, wolfId, numOneAgentState, positionIndex, aliveBouns,
        deathPenalty, isTerminal)
    accumulateReward = AccumulateReward(rewardDecay, rewardFunction)
    def __call__(self, condition):

        getSavePath = self.getTrajectorySavePathByCondition(condition)
        attentionType = condition['attType']
        alpha = condition['alpha']
        C = condition['C']
        minAttentionDistance = condition['minAttDist']
        rangeAttention = condition['rangeAtt']
        numTree = condition['numTrees']
        numSimulations = condition['numSim']
        actionRatio = condition['actRatio']
        cBase = condition['cBase']
        burnTime = condition['burnTime']
        softParaForIdentity = condition['softId']
        softParaForSubtlety = condition['softSubtlety']
        damp = condition['damp']
        actionCost = condition['actCost']

        numSub = 10
        allIdentityResults = []
        allPerceptionResults = []
        allActionResults = []
        allVelDiffResults = []
        allResults = []
        possibleTrialSubtleties = [3.3, 1.83, 0.01]#[500.0, 3.3, 1.83, 0.92, 0.01]
        for subIndex in range(numSub):
            meanIdentiyOnConditions = {}
            meanPerceptionOnConditions = {}
            meanActionOnConditions = {}
            meanVelDiffOnConditions = {}
            meanEscapeOnConditions = {}
            for chasingSubtlety in possibleTrialSubtleties:

                print(numTree, chasingSubtlety, numSimulations, attentionType)
                numAgent = 25
                sheepId = 0
                suspectorIds = list(range(1, numAgent))

                resetWolfIdAndSubtlety = ag.ResetWolfIdAndSubtlety(suspectorIds, [chasingSubtlety])
                distanceToVisualDegreeRatio = 20
                minInitSheepWolfDistance = 9 * distanceToVisualDegreeRatio
                minInitSheepDistractorDistance = 2.5 * distanceToVisualDegreeRatio  # no distractor in killzone when init
                isLegalInitPositions = ag.IsLegalInitPositions(sheepId, minInitSheepWolfDistance, minInitSheepDistractorDistance)
                xBoundary = [0, 640]
                yBoundary = [0, 480]
                resetAgentPositions = ag.ResetAgentPositions(xBoundary, yBoundary, numAgent, isLegalInitPositions)
                resetPhysicalState = ag.ResetPhysicalState(sheepId, numAgent, resetAgentPositions, resetWolfIdAndSubtlety)

                numFramePerSecond = 20
                numMDPTimeStepPerSecond = 5
                numFrameWithoutActionChange = int(numFramePerSecond/numMDPTimeStepPerSecond)

                sheepActionUpdateFrequency = 1
                minSheepSpeed = int(17.4 * distanceToVisualDegreeRatio/numFramePerSecond)
                maxSheepSpeed = int(23.2 * distanceToVisualDegreeRatio/numFramePerSecond)
                warmUpTimeSteps = int(10 * numMDPTimeStepPerSecond)
                sheepPolicy = ag.SheepPolicy(sheepActionUpdateFrequency, minSheepSpeed, maxSheepSpeed, warmUpTimeSteps, burnTime, damp)

                wolfActionUpdateFrequency = int(0.2 * numMDPTimeStepPerSecond)
                minWolfSpeed = int(8.7 * distanceToVisualDegreeRatio/numFramePerSecond)
                maxWolfSpeed = int(14.5 * distanceToVisualDegreeRatio/numFramePerSecond)
                wolfPolicy = ag.WolfPolicy(wolfActionUpdateFrequency, minWolfSpeed, maxWolfSpeed, warmUpTimeSteps)
                distractorActionUpdateFrequency = int(0.2 * numMDPTimeStepPerSecond)
                minDistractorSpeed = int(8.7 * distanceToVisualDegreeRatio/numFramePerSecond)
                maxDistractorSpeed = int(14.5 * distanceToVisualDegreeRatio/numFramePerSecond)
                distractorPolicy = ag.DistractorPolicy(distractorActionUpdateFrequency, minDistractorSpeed, maxDistractorSpeed, warmUpTimeSteps)
                preparePolicy = ag.PreparePolicy(sheepId, numAgent, sheepPolicy, wolfPolicy, distractorPolicy)
                updatePhysicalState = ag.UpdatePhysicalState(sheepId, numAgent, preparePolicy)

                xBoundary = [0, 640]
                yBoundary = [0, 480]
                checkBoundaryAndAdjust = ag.CheckBoundaryAndAdjust(xBoundary, yBoundary)
                transiteMultiAgentMotion = ag.TransiteMultiAgentMotion(checkBoundaryAndAdjust)

                minDistance = 2.5 * distanceToVisualDegreeRatio
                isTerminal = env.IsTerminal(sheepId, minDistance)
               # screen = pg.display.set_mode([xBoundary[1], yBoundary[1]])
               # screenColor = np.array([0, 0, 0])
               # sheepColor = np.array([0, 255, 0])
               # wolfColor = np.array([255, 0, 0])
               # circleSize = 10
               # saveImage = True
               # saveImageFile = 'image3'
               # render = env.Render(numAgent, screen, xBoundary[1], yBoundary[1], screenColor, sheepColor, wolfColor, circleSize, saveImage, saveImageFile, isTerminal)
                render = None
                renderOnInSimulation = False
                transiteStateWithoutActionChangeInSimulation = env.TransiteStateWithoutActionChange(numFrameWithoutActionChange, isTerminal, transiteMultiAgentMotion, render, renderOnInSimulation)
                renderOnInPlay = False
                transiteStateWithoutActionChangeInPlay = env.TransiteStateWithoutActionChange(numFrameWithoutActionChange, isTerminal, transiteMultiAgentMotion, render, renderOnInPlay)

                if attentionType == 'idealObserver':
                    attentionLimitation= 1
                    precisionPerSlot=500.0
                    precisionForUntracked=500.0
                    memoryratePerSlot=1.0
                    memoryrateForUntracked=1.0
                if attentionType == 'preAttention':
                    attentionLimitation= 1
                    precisionPerSlot=2.5
                    precisionForUntracked=2.5
                    memoryratePerSlot=0.45
                    memoryrateForUntracked=0.45
                if attentionType == 'attention3':
                    attentionLimitation= 3
                    precisionPerSlot=8.0
                    precisionForUntracked=0.01
                    memoryratePerSlot=0.7
                    memoryrateForUntracked=0.01
                if attentionType == 'hybrid3':
                    attentionLimitation= 3
                    precisionPerSlot=8.0
                    precisionForUntracked=2.5
                    memoryratePerSlot=0.7
                    memoryrateForUntracked=0.45
                if attentionType == 'attention4':
                    attentionLimitation= 4
                    precisionPerSlot=8.0
                    precisionForUntracked=0.01
                    memoryratePerSlot=0.7
                    memoryrateForUntracked=0.01
                if attentionType == 'hybrid4':
                    attentionLimitation= 4
                    precisionPerSlot=8.0
                    precisionForUntracked=2.5
                    memoryratePerSlot=0.7
                    memoryrateForUntracked=0.45


                if attentionType == 'preAttentionMem0.25':
                    attentionLimitation= 1
                    precisionPerSlot=2.5
                    precisionForUntracked=2.5
                    memoryratePerSlot=0.25
                    memoryrateForUntracked=0.25
                if attentionType == 'preAttentionMem0.65':
                    attentionLimitation= 1
                    precisionPerSlot=2.5
                    precisionForUntracked=2.5
                    memoryratePerSlot=0.65
                    memoryrateForUntracked=0.65
                if attentionType == 'preAttentionPre0.5':
                    attentionLimitation= 1
                    precisionPerSlot=0.5
                    precisionForUntracked=0.5
                    memoryratePerSlot=0.45
                    memoryrateForUntracked=0.45
                if attentionType == 'preAttentionPre4.5':
                    attentionLimitation= 1
                    precisionPerSlot=4.5
                    precisionForUntracked=4.5
                    memoryratePerSlot=0.45
                    memoryrateForUntracked=0.45

                attention = Attention.AttentionToPrecisionAndDecay(precisionPerSlot, precisionForUntracked, memoryratePerSlot, memoryrateForUntracked)
                transferMultiAgentStatesToPositionDF = ba.TransferMultiAgentStatesToPositionDF(numAgent)
                possibleSubtleties = [500.0, 11.0, 3.3, 1.83, 0.92, 0.31, 0.01]
                resetBeliefAndAttention = ba.ResetBeliefAndAttention(sheepId, suspectorIds, possibleSubtleties, attentionLimitation, transferMultiAgentStatesToPositionDF, attention)

                maxAttentionDistance = minAttentionDistance + rangeAttention
                attentionMinDistance = minAttentionDistance * distanceToVisualDegreeRatio
                attentionMaxDistance = maxAttentionDistance * distanceToVisualDegreeRatio
                numStandardErrorInDistanceRange = 4
                calDistancePriorOnAttentionSlot = Attention.CalDistancePriorOnAttentionSlot(attentionMinDistance, attentionMaxDistance, numStandardErrorInDistanceRange)
                attentionSwitch = Attention.AttentionSwitch(attentionLimitation, calDistancePriorOnAttentionSlot)
                computePosterior = calPosterior.CalPosteriorLog(minDistance)

                print(attentionLimitation, attentionMinDistance/distanceToVisualDegreeRatio, attentionMaxDistance/distanceToVisualDegreeRatio)

                attentionSwitchFrequencyInSimulation = np.inf
                beliefUpdateFrequencyInSimulation = np.inf
                updateBeliefAndAttentionInSimulation = ba.UpdateBeliefAndAttentionState(attention, computePosterior, attentionSwitch, transferMultiAgentStatesToPositionDF,
                        attentionSwitchFrequencyInSimulation, beliefUpdateFrequencyInSimulation, burnTime)

                attentionSwitchFrequencyInPlay = int(0.6 * numMDPTimeStepPerSecond)
                beliefUpdateFrequencyInPlay = int(0.6 * numMDPTimeStepPerSecond)
                updateBeliefAndAttentionInPlay = ba.UpdateBeliefAndAttentionState(attention, computePosterior, attentionSwitch, transferMultiAgentStatesToPositionDF,
                        attentionSwitchFrequencyInPlay, beliefUpdateFrequencyInPlay, burnTime)

                updatePhysicalStateByBeliefFrequencyInSimulationRoot = int(0.2 * numMDPTimeStepPerSecond)
                updatePhysicalStateByBeliefInSimulationRoot = ba.UpdatePhysicalStateImagedByBelief(updatePhysicalStateByBeliefFrequencyInSimulationRoot,
                        softParaForIdentity, softParaForSubtlety)
                reUpdatePhysicalStateByBeliefInSimulationRoot = ba.UpdatePhysicalStateImagedByBelief(updatePhysicalStateByBeliefFrequencyInSimulationRoot,
                        softParaForIdentity = 1, softParaForSubtlety = 1)
                updatePhysicalStateByBeliefFrequencyInSimulation = np.inf
                #updatePhysicalStateByBeliefInSimulation = ba.UpdatePhysicalStateImagedByBelief(updatePhysicalStateByBeliefFrequencyInSimulation,
                #        softParaForIdentity, softParaForSubtlety)
                updatePhysicalStateByBeliefInSimulation = lambda state: state

                updatePhysicalStateByBeliefFrequencyInPlay = np.inf
                #updatePhysicalStateByBeliefInPlay = ba.UpdatePhysicalStateImagedByBelief(updatePhysicalStateByBeliefFrequencyInPlay,
                #        softParaForIdentity, softParaForSubtlety)
                updatePhysicalStateByBeliefInPlay = lambda state: state

                transitionFunctionInSimulation = env.TransitionFunction(resetPhysicalState, resetBeliefAndAttention, updatePhysicalState, transiteStateWithoutActionChangeInSimulation,
                        updateBeliefAndAttentionInSimulation, updatePhysicalStateByBeliefInSimulation)

                transitionFunctionInPlay = env.TransitionFunction(resetPhysicalState, resetBeliefAndAttention, updatePhysicalState, transiteStateWithoutActionChangeInPlay,
                        updateBeliefAndAttentionInPlay, updatePhysicalStateByBeliefInPlay)

                numActionSpace = 8
                actionInterval = int(360/(numActionSpace))
                actionMagnitude = actionRatio * minSheepSpeed * numFramePerSecond
                actionSpaceFull = [(np.cos(degreeInPolar) * actionMagnitude, np.sin(degreeInPolar) * actionMagnitude)
                        for degreeInPolar in np.arange(0, 360, actionInterval)/180 * math.pi]
                actionSpaceHalf = [(np.cos(degreeInPolar) * actionMagnitude * 0.5, np.sin(degreeInPolar) * actionMagnitude * 0.5)
                        for degreeInPolar in np.arange(0, 360, actionInterval)/180 * math.pi]
                actionSpace = [(0, 0)] + actionSpaceFull# + actionSpaceHalf
                getActionPrior = lambda state : {action: 1/len(actionSpace) for action in actionSpace}

                maxRollOutSteps = 5
                aliveBouns = 0.2 * 0
                deathPenalty = -1
                rewardFunction = reward.RewardFunctionTerminalPenalty(sheepId, aliveBouns, actionCost, deathPenalty, isTerminal, actionSpace)
                rewardRollout = lambda state, action, nextState: rewardFunction(state, action)


                cInit = 1
                #cBase = 50
                scoreChild = ScoreChild(cInit, cBase)
                selectAction = SelectAction(scoreChild)
                selectNextState = SelectNextState(selectAction)

                initializeChildren = InitializeChildren(actionSpace, transitionFunctionInSimulation, getActionPrior)
                expand = Expand(isTerminal, initializeChildren)
                pWidening = PWidening(alpha, C)
                expandNewState = ExpandNextState(transitionFunctionInSimulation, pWidening)

                rolloutPolicy = lambda state: actionSpace[np.random.choice(range(numActionSpace))]
                rolloutHeuristic = lambda state: 0
                estimateValue = RollOut(rolloutPolicy, maxRollOutSteps, transitionFunctionInSimulation, rewardRollout, isTerminal, rolloutHeuristic)

                numActionPlaned = 1
                outputAction = OutputAction(numActionPlaned, actionSpace)
                #numSimulations = int(numTotalSimulationTimes/numTree)

                #sheepColorInMcts = np.array([0, 255, 0])
                #wolfColorInMcts = np.array([255, 0, 0])
                #distractorColorInMcts = np.array([255, 255, 255])
                #saveImageMCTS = True
                #mctsRender = env.MctsRender(numAgent, screen, xBoundary[1], yBoundary[1], screenColor, sheepColorInMcts, wolfColorInMcts, distractorColorInMcts, circleSize, saveImageMCTS, saveImageFile)
                #mctsRenderOn = False
                #mctsRender = None
                #pg.init()
                #mcts = MCTS(numSimulations, selectChild, expand, rollout, backup, selectAction, mctsRender, mctsRenderOn)
                pwMultipleTrees = PWMultipleTrees(numSimulations, selectAction, selectNextState, expand, expandNewState, estimateValue, backup, outputAction)

                maxRunningSteps = int(25 * numMDPTimeStepPerSecond)
                makeDiffSimulationRoot = MakeDiffSimulationRoot(isTerminal, updatePhysicalStateByBeliefInSimulationRoot, reUpdatePhysicalStateByBeliefInSimulationRoot)
                runMCTSTrjactory = RunMCTSTrjactory(maxRunningSteps, numTree, numActionPlaned, sheepActionUpdateFrequency, transitionFunctionInPlay, isTerminal, makeDiffSimulationRoot, render)

                rootAction = actionSpace[np.random.choice(range(numActionSpace))]
                numTrial = 10
                trajectories = [runMCTSTrjactory(pwMultipleTrees) for trial in range(numTrial)]

                savePath = getSavePath({'chasingSubtlety': chasingSubtlety, 'subIndex': subIndex})
                tsl.saveToPickle(trajectories, savePath)
                getCSVSavePath = self.getCSVSavePathByCondition(condition)

                startStatsIndex = 1
                def getTrueWolfIdAcc(trajectory):
                    AccTrial = []
                    for timeStepIndex in range(len(trajectory) - 2):
                        timeStep = trajectory[timeStepIndex]
                        wolfId = timeStep[0][0][3][0]
                        wolfSubtlety = timeStep[0][0][3][1]
                        #print(wolfId, '**', wolfIdInEach)
                        if timeStepIndex >= startStatsIndex:
                            IdAcc = np.mean([int(IdAndSubtlety[0] == wolfId) for IdAndSubtlety in timeStep[5]])
                            AccTrial.append(IdAcc)
                    meanAcc = np.mean(AccTrial)
                    return meanAcc
                meanIdentiy = np.mean([getTrueWolfIdAcc(trajectory) for trajectory in trajectories])
                meanIdentiyOnConditions.update({chasingSubtlety: meanIdentiy})

                def getTrueWolfIdSubtletyAcc(trajectory):
                    AccTrial = []
                    for timeStepIndex in range(len(trajectory) - 2):
                        timeStep = trajectory[timeStepIndex]
                        wolfId = timeStep[0][0][3][0]
                        wolfSubtlety = timeStep[0][0][3][1]
                        #print(wolfId, '**', wolfIdInEach)
                        if timeStepIndex >= startStatsIndex:
                            IdAndSubtletyAcc = np.mean([int((IdAndSubtlety[0] == wolfId) and (IdAndSubtlety[1] == wolfSubtlety)) for IdAndSubtlety in timeStep[5]])
                            AccTrial.append(IdAndSubtletyAcc)
                    meanAcc = np.mean(AccTrial)
                    return meanAcc
                meanPerception = np.mean([getTrueWolfIdSubtletyAcc(trajectory) for trajectory in trajectories])
                meanPerceptionOnConditions.update({chasingSubtlety: meanPerception})

                def getActionDeviationLevel(trajectory):
                    AccTrial = []
                    for timeStepIndex in range(len(trajectory) - 2):
                        timeStep = trajectory[timeStepIndex]
                        actionReal = np.array(timeStep[1])
                        actionOnTruth = np.array(timeStep[4])
                        if timeStepIndex >= startStatsIndex:
                            deviateLevel = round(agf.computeAngleBetweenVectors(actionReal, actionOnTruth) / (math.pi / 4))
                            AccTrial.append(deviateLevel)
                    meanAcc = np.mean(AccTrial)
                    return meanAcc
                meanAction = np.mean([getActionDeviationLevel(trajectory) for trajectory in trajectories])
                meanActionOnConditions.update({chasingSubtlety: meanAction})

                def getVelocityDiff(trajectory):
                    AccTrial = []
                    for timeStepIndex in range(len(trajectory) - 2):
                        timeStep = trajectory[timeStepIndex]
                        velReal = np.array(timeStep[0][0][0][1][0])
                        velWithActionOnTruth = np.array(timeStep[2][1][0])
                        velWithActionOppo = np.array(timeStep[3][1][0])
                        if timeStepIndex >= startStatsIndex:
                            velDiffNormWithActionOnTruth = np.linalg.norm((velReal - velWithActionOnTruth))
                            velDiffNormWithActionOppo = np.linalg.norm((velReal - velWithActionOppo))
                            velDiffRatio = 1.0 * velDiffNormWithActionOnTruth / velDiffNormWithActionOppo
                            AccTrial.append(velDiffRatio)
                    meanAcc = np.mean(AccTrial)
                    return meanAcc
                meanVelDiff = np.mean([getVelocityDiff(trajectory) for trajectory in trajectories])
                meanVelDiffOnConditions.update({chasingSubtlety: meanVelDiff})

                getEscapeAcc = lambda trajectory: int(len(trajectory) >= (maxRunningSteps - 2))
                meanEscape = np.mean([getEscapeAcc(trajectory) for trajectory in trajectories])
                meanEscapeOnConditions.update({chasingSubtlety: meanEscape})


            allResults.append(meanEscapeOnConditions)
            results = pd.DataFrame(allResults)
            escapeCSVSavePath = getCSVSavePath({'measure': 'escape'})
            results.to_csv(escapeCSVSavePath)

            allIdentityResults.append(meanIdentiyOnConditions)
            identityResults = pd.DataFrame(allIdentityResults)
            identityCSVSavePath = getCSVSavePath({'measure': 'identity'})
            identityResults.to_csv(identityCSVSavePath)

            allPerceptionResults.append(meanPerceptionOnConditions)
            perceptionResults = pd.DataFrame(allPerceptionResults)
            perceptionCSVSavePath = getCSVSavePath({'measure': 'percetion'})
            perceptionResults.to_csv(perceptionCSVSavePath)

            allActionResults.append(meanActionOnConditions)
            actionResults = pd.DataFrame(allActionResults)
            actionCSVSavePath = getCSVSavePath({'measure': 'action'})
            actionResults.to_csv(actionCSVSavePath)

            allVelDiffResults.append(meanVelDiffOnConditions)
            velDiffResults = pd.DataFrame(allVelDiffResults)
            velDiffCSVSavePath = getCSVSavePath({'measure': 'velDiff'})
            velDiffResults.to_csv(velDiffCSVSavePath)
Ejemplo n.º 3
0
def main():
    # action space
    actionSpace = [[10, 0], [7, 7], [0, 10], [-7, 7], [-10, 0], [-7, -7], [0, -10], [7, -7]]
    numActionSpace = len(actionSpace)

    # state space
    numStateSpace = 4
    xBoundary = [0, 360]
    yBoundary = [0, 360]
    checkBoundaryAndAdjust = ag.CheckBoundaryAndAdjust(xBoundary, yBoundary)

    initSheepPositionMean = np.array([180, 180])
    initWolfPositionMean = np.array([180, 180])
    initSheepPositionNoise = np.array([120, 120])
    initWolfPositionNoise = np.array([60, 60])
    sheepPositionReset = ag.SheepPositionReset(initSheepPositionMean, initSheepPositionNoise, checkBoundaryAndAdjust)
    wolfPositionReset = ag.WolfPositionReset(initWolfPositionMean, initWolfPositionNoise, checkBoundaryAndAdjust)

    numOneAgentState = 2
    positionIndex = [0, 1]

    sheepPositionTransition = ag.SheepPositionTransition(numOneAgentState, positionIndex, checkBoundaryAndAdjust)
    wolfPositionTransition = ag.WolfPositionTransition(numOneAgentState, positionIndex, checkBoundaryAndAdjust)

    numAgent = 2
    sheepId = 0
    wolfId = 1
    transitionFunction = env.TransitionFunction(sheepId, wolfId, sheepPositionReset, wolfPositionReset,
                                                sheepPositionTransition, wolfPositionTransition)
    minDistance = 15
    isTerminal = env.IsTerminal(sheepId, wolfId, numOneAgentState, positionIndex, minDistance)

    screen = pg.display.set_mode([xBoundary[1], yBoundary[1]])
    screenColor = [255, 255, 255]
    circleColorList = [[50, 255, 50], [50, 50, 50], [50, 50, 50], [50, 50, 50], [50, 50, 50], [50, 50, 50],
                       [50, 50, 50], [50, 50, 50], [50, 50, 50]]
    circleSize = 8
    saveImage = False
    saveImageFile = 'image'
    render = env.Render(numAgent, numOneAgentState, positionIndex, screen, screenColor, circleColorList, circleSize,
                        saveImage, saveImageFile)

    aliveBouns = -1
    deathPenalty = 20
    rewardDecay = 0.99
    rewardFunction = reward.TerminalPenalty(sheepId, wolfId, numOneAgentState, positionIndex, aliveBouns, deathPenalty, isTerminal)
    accumulateRewards = PG.AccumulateRewards(rewardDecay, rewardFunction)

    maxTimeStep = 150
    sampleTrajectory = PG.SampleTrajectory(maxTimeStep, transitionFunction, isTerminal)

    approximatePolicy = PG.ApproximatePolicy(actionSpace)
    trainPG = PG.TrainTensorflow(actionSpace)

    numTrajectory = 20
    maxEpisode = 1000

    # Generate models.
    learningRate = 1e-4
    hiddenNeuronNumbers = [128, 256, 512, 1024]
    hiddenDepths = [2, 4, 8]
    # hiddenNeuronNumbers = [128]
    # hiddenDepths = [2]
    generateModel = GeneratePolicyNet(numStateSpace, numActionSpace, learningRate)
    models = {(n, d): generateModel(d, round(n / d)) for n, d in it.product(hiddenNeuronNumbers, hiddenDepths)}
    print("Models generated")

    # Train.
    policyGradient = PG.PolicyGradient(numTrajectory, maxEpisode, render)
    trainModel = lambda model: policyGradient(model, approximatePolicy,
                                                             sampleTrajectory,
                                                             accumulateRewards,
                                                             trainPG)
    trainedModels = {key: trainModel(model) for key, model in models.items()}
    print("Finished training")

    # Evaluate
    modelEvaluate = Evaluate(numTrajectory, approximatePolicy, sampleTrajectory, rewardFunction)
    meanEpisodeRewards = {key: modelEvaluate(model) for key, model in trainedModels.items()}
    print("Finished evaluating")
    # print(meanEpisodeRewards)

    # Visualize
    independentVariableNames = ['NeuroTotalNumber', 'layerNumber']
    draw(meanEpisodeRewards, independentVariableNames)
    print("Finished visualizing", meanEpisodeRewards)
    def __call__(self, condition):

        getSavePath = self.getTrajectorySavePathByCondition(condition)
        attentionType = condition['attentionType']
        alpha = condition['alphaForStateWidening']
        C = condition['CForStateWidening']
        cBase = condition['cBase']
        numTree = condition['numTrees']
        numSimulations = condition['numSimulationTimes']
        actionRatio = condition['actionRatio']
        burnTime = condition['burnTime']
        damp = condition['damp']
        actionCost = condition['actionCost']

        numSub = 10
        allResults = []
        possibleTrialSubtleties = [500.0, 3.3, 1.83, 0.92, 0.001]
        for subIndex in range(numSub):
            meanEscapeOnConditions = {}
            for chasingSubtlety in possibleTrialSubtleties:

                print(numTree, chasingSubtlety, numSimulations, attentionType)
                numAgent = 25
                sheepId = 0
                suspectorIds = list(range(1, numAgent))

                resetWolfIdAndSubtlety = ag.ResetWolfIdAndSubtlety(
                    suspectorIds, [chasingSubtlety])
                distanceToVisualDegreeRatio = 20
                minInitSheepWolfDistance = 9 * distanceToVisualDegreeRatio
                minInitSheepDistractorDistance = 2.5 * distanceToVisualDegreeRatio  # no distractor in killzone when init
                isLegalInitPositions = ag.IsLegalInitPositions(
                    sheepId, minInitSheepWolfDistance,
                    minInitSheepDistractorDistance)
                xBoundary = [0, 640]
                yBoundary = [0, 480]
                resetAgentPositions = ag.ResetAgentPositions(
                    xBoundary, yBoundary, numAgent, isLegalInitPositions)
                resetPhysicalState = ag.ResetPhysicalState(
                    sheepId, numAgent, resetAgentPositions,
                    resetWolfIdAndSubtlety)

                numFramePerSecond = 20
                numMDPTimeStepPerSecond = 5
                numFrameWithoutActionChange = int(numFramePerSecond /
                                                  numMDPTimeStepPerSecond)

                sheepActionUpdateFrequency = 1
                minSheepSpeed = int(17.4 * distanceToVisualDegreeRatio /
                                    numFramePerSecond)
                maxSheepSpeed = int(23.2 * distanceToVisualDegreeRatio /
                                    numFramePerSecond)
                warmUpTimeSteps = int(10 * numMDPTimeStepPerSecond)
                sheepPolicy = ag.SheepPolicy(sheepActionUpdateFrequency,
                                             minSheepSpeed, maxSheepSpeed,
                                             warmUpTimeSteps, burnTime, damp)

                wolfActionUpdateFrequency = int(0.2 * numMDPTimeStepPerSecond)
                minWolfSpeed = int(8.7 * distanceToVisualDegreeRatio /
                                   numFramePerSecond)
                maxWolfSpeed = int(14.5 * distanceToVisualDegreeRatio /
                                   numFramePerSecond)
                wolfPolicy = ag.WolfPolicy(wolfActionUpdateFrequency,
                                           minWolfSpeed, maxWolfSpeed,
                                           warmUpTimeSteps)
                distractorActionUpdateFrequency = int(0.2 *
                                                      numMDPTimeStepPerSecond)
                minDistractorSpeed = int(8.7 * distanceToVisualDegreeRatio /
                                         numFramePerSecond)
                maxDistractorSpeed = int(14.5 * distanceToVisualDegreeRatio /
                                         numFramePerSecond)
                distractorPolicy = ag.DistractorPolicy(
                    distractorActionUpdateFrequency, minDistractorSpeed,
                    maxDistractorSpeed, warmUpTimeSteps)
                preparePolicy = ag.PreparePolicy(sheepId, numAgent,
                                                 sheepPolicy, wolfPolicy,
                                                 distractorPolicy)
                updatePhysicalState = ag.UpdatePhysicalState(
                    sheepId, numAgent, preparePolicy)

                xBoundary = [0, 640]
                yBoundary = [0, 480]
                checkBoundaryAndAdjust = ag.CheckBoundaryAndAdjust(
                    xBoundary, yBoundary)
                transiteMultiAgentMotion = ag.TransiteMultiAgentMotion(
                    checkBoundaryAndAdjust)

                minDistance = 2.5 * distanceToVisualDegreeRatio
                isTerminal = env.IsTerminal(sheepId, minDistance)
                # screen = pg.display.set_mode([xBoundary[1], yBoundary[1]])
                # screenColor = np.array([0, 0, 0])
                # sheepColor = np.array([0, 255, 0])
                # wolfColor = np.array([255, 0, 0])
                # circleSize = 10
                # saveImage = True
                # saveImageFile = 'image3'
                # render = env.Render(numAgent, screen, xBoundary[1], yBoundary[1], screenColor, sheepColor, wolfColor, circleSize, saveImage, saveImageFile, isTerminal)
                render = None
                renderOnInSimulation = False
                transiteStateWithoutActionChangeInSimulation = env.TransiteStateWithoutActionChange(
                    numFrameWithoutActionChange, isTerminal,
                    transiteMultiAgentMotion, render, renderOnInSimulation)
                renderOnInPlay = False
                transiteStateWithoutActionChangeInPlay = env.TransiteStateWithoutActionChange(
                    numFrameWithoutActionChange, isTerminal,
                    transiteMultiAgentMotion, render, renderOnInPlay)

                if attentionType == 'idealObserver':
                    attentionLimitation = 1
                    precisionPerSlot = 500.0
                    precisionForUntracked = 500.0
                    memoryratePerSlot = 1.0
                    memoryrateForUntracked = 1.0
                if attentionType == 'preAttention':
                    attentionLimitation = 1
                    precisionPerSlot = 2.5
                    precisionForUntracked = 2.5
                    memoryratePerSlot = 0.45
                    memoryrateForUntracked = 0.45
                if attentionType == 'attention3':
                    attentionLimitation = 3
                    precisionPerSlot = 8.0
                    precisionForUntracked = 0.01
                    memoryratePerSlot = 0.7
                    memoryrateForUntracked = 0.01
                if attentionType == 'hybrid3':
                    attentionLimitation = 3
                    precisionPerSlot = 8.0
                    precisionForUntracked = 2.5
                    memoryratePerSlot = 0.7
                    memoryrateForUntracked = 0.45
                if attentionType == 'attention4':
                    attentionLimitation = 4
                    precisionPerSlot = 8.0
                    precisionForUntracked = 0.01
                    memoryratePerSlot = 0.7
                    memoryrateForUntracked = 0.01
                if attentionType == 'hybrid4':
                    attentionLimitation = 4
                    precisionPerSlot = 8.0
                    precisionForUntracked = 2.5
                    memoryratePerSlot = 0.7
                    memoryrateForUntracked = 0.45
                attention = Attention.AttentionToPrecisionAndDecay(
                    precisionPerSlot, precisionForUntracked, memoryratePerSlot,
                    memoryrateForUntracked)
                transferMultiAgentStatesToPositionDF = ba.TransferMultiAgentStatesToPositionDF(
                    numAgent)
                possibleSubtleties = [
                    500.0, 11.0, 3.3, 1.83, 0.92, 0.31, 0.001
                ]
                resetBeliefAndAttention = ba.ResetBeliefAndAttention(
                    sheepId, suspectorIds, possibleSubtleties,
                    attentionLimitation, transferMultiAgentStatesToPositionDF,
                    attention)

                minAttentionDistance = 40.0
                rangeAttention = 10.0
                maxAttentionDistance = minAttentionDistance + rangeAttention
                attentionMinDistance = minAttentionDistance * distanceToVisualDegreeRatio
                attentionMaxDistance = maxAttentionDistance * distanceToVisualDegreeRatio
                numStandardErrorInDistanceRange = 4
                calDistancePriorOnAttentionSlot = Attention.CalDistancePriorOnAttentionSlot(
                    attentionMinDistance, attentionMaxDistance,
                    numStandardErrorInDistanceRange)
                attentionSwitch = Attention.AttentionSwitch(
                    attentionLimitation, calDistancePriorOnAttentionSlot)
                computePosterior = calPosterior.CalPosteriorLog(minDistance)

                attentionSwitchFrequencyInSimulation = np.inf
                beliefUpdateFrequencyInSimulation = np.inf
                updateBeliefAndAttentionInSimulation = ba.UpdateBeliefAndAttentionState(
                    attention, computePosterior, attentionSwitch,
                    transferMultiAgentStatesToPositionDF,
                    attentionSwitchFrequencyInSimulation,
                    beliefUpdateFrequencyInSimulation, burnTime)

                attentionSwitchFrequencyInPlay = int(0.6 *
                                                     numMDPTimeStepPerSecond)
                beliefUpdateFrequencyInPlay = int(0.2 *
                                                  numMDPTimeStepPerSecond)
                updateBeliefAndAttentionInPlay = ba.UpdateBeliefAndAttentionState(
                    attention, computePosterior, attentionSwitch,
                    transferMultiAgentStatesToPositionDF,
                    attentionSwitchFrequencyInPlay,
                    beliefUpdateFrequencyInPlay, burnTime)

                updatePhysicalStateByBeliefFrequencyInSimulationRoot = int(
                    0.6 * numMDPTimeStepPerSecond)
                updatePhysicalStateByBeliefInSimulationRoot = ba.UpdatePhysicalStateImagedByBelief(
                    updatePhysicalStateByBeliefFrequencyInSimulationRoot)
                updatePhysicalStateByBeliefFrequencyInSimulation = np.inf
                updatePhysicalStateByBeliefInSimulation = ba.UpdatePhysicalStateImagedByBelief(
                    updatePhysicalStateByBeliefFrequencyInSimulation)

                updatePhysicalStateByBeliefFrequencyInPlay = np.inf
                updatePhysicalStateByBeliefInPlay = ba.UpdatePhysicalStateImagedByBelief(
                    updatePhysicalStateByBeliefFrequencyInPlay)

                transitionFunctionInSimulation = env.TransitionFunction(
                    resetPhysicalState, resetBeliefAndAttention,
                    updatePhysicalState,
                    transiteStateWithoutActionChangeInSimulation,
                    updateBeliefAndAttentionInSimulation,
                    updatePhysicalStateByBeliefInSimulation)

                transitionFunctionInPlay = env.TransitionFunction(
                    resetPhysicalState, resetBeliefAndAttention,
                    updatePhysicalState,
                    transiteStateWithoutActionChangeInPlay,
                    updateBeliefAndAttentionInPlay,
                    updatePhysicalStateByBeliefInPlay)

                maxRollOutSteps = 5
                aliveBouns = 1 / maxRollOutSteps
                deathPenalty = -1
                rewardFunction = reward.RewardFunctionTerminalPenalty(
                    sheepId, aliveBouns, actionCost, deathPenalty, isTerminal)
                rewardRollout = lambda state, action, nextState: rewardFunction(
                    state, action)

                numActionSpace = 8
                actionInterval = int(360 / (numActionSpace))
                actionMagnitude = actionRatio * minSheepSpeed
                actionSpace = [
                    (0, 0)
                ] + [(np.cos(degreeInPolar) * actionMagnitude,
                      np.sin(degreeInPolar) * actionMagnitude)
                     for degreeInPolar in np.arange(0, 360, actionInterval) /
                     180 * math.pi]
                getActionPrior = lambda state: {
                    action: 1 / len(actionSpace)
                    for action in actionSpace
                }

                cInit = 1
                #cBase = 50
                scoreChild = ScoreChild(cInit, cBase)
                selectAction = SelectAction(scoreChild)
                selectNextState = SelectNextState(selectAction)

                initializeChildren = InitializeChildren(
                    actionSpace, transitionFunctionInSimulation,
                    getActionPrior)
                expand = Expand(isTerminal, initializeChildren)
                pWidening = PWidening(alpha, C)
                expandNewState = ExpandNextState(
                    transitionFunctionInSimulation, pWidening)

                rolloutPolicy = lambda state: actionSpace[np.random.choice(
                    range(numActionSpace))]
                rolloutHeuristic = lambda state: 0
                estimateValue = RollOut(rolloutPolicy, maxRollOutSteps,
                                        transitionFunctionInSimulation,
                                        rewardRollout, isTerminal,
                                        rolloutHeuristic)

                numActionPlaned = 1
                outputAction = OutputAction(numActionPlaned, actionSpace)
                #numSimulations = int(numTotalSimulationTimes/numTree)

                #sheepColorInMcts = np.array([0, 255, 0])
                #wolfColorInMcts = np.array([255, 0, 0])
                #distractorColorInMcts = np.array([255, 255, 255])
                #saveImageMCTS = True
                #mctsRender = env.MctsRender(numAgent, screen, xBoundary[1], yBoundary[1], screenColor, sheepColorInMcts, wolfColorInMcts, distractorColorInMcts, circleSize, saveImageMCTS, saveImageFile)
                #mctsRenderOn = False
                #mctsRender = None
                #pg.init()
                #mcts = MCTS(numSimulations, selectChild, expand, rollout, backup, selectAction, mctsRender, mctsRenderOn)
                pwMultipleTrees = PWMultipleTrees(numSimulations, selectAction,
                                                  selectNextState, expand,
                                                  expandNewState,
                                                  estimateValue, backup,
                                                  outputAction)

                maxRunningSteps = int(25 * numMDPTimeStepPerSecond)
                makeDiffSimulationRoot = MakeDiffSimulationRoot(
                    isTerminal, updatePhysicalStateByBeliefInSimulationRoot)
                runMCTSTrjactory = RunMCTSTrjactory(
                    maxRunningSteps, numTree, numActionPlaned,
                    sheepActionUpdateFrequency, transitionFunctionInPlay,
                    isTerminal, makeDiffSimulationRoot, render)

                rootAction = actionSpace[np.random.choice(
                    range(numActionSpace))]
                numTrial = 15
                print(attentionLimitation,
                      attentionMinDistance / distanceToVisualDegreeRatio,
                      attentionMaxDistance / distanceToVisualDegreeRatio)
                trajectories = [
                    runMCTSTrjactory(pwMultipleTrees)
                    for trial in range(numTrial)
                ]

                savePath = getSavePath({
                    'chasingSubtlety': chasingSubtlety,
                    'subIndex': subIndex
                })
                tsl.saveToPickle(trajectories, savePath)

                meanEscape = np.mean([
                    1 if len(trajectory) >= (maxRunningSteps - 1) else 0
                    for trajectory in trajectories
                ])
                meanEscapeOnConditions.update({chasingSubtlety: meanEscape})
                print(meanEscapeOnConditions)
            allResults.append(meanEscapeOnConditions)
            results = pd.DataFrame(allResults)
            getCSVSavePath = self.getCSVSavePathByCondition(condition)
            csvSavePath = getCSVSavePath({})
            results.to_csv(csvSavePath)
def main():
    #tf.set_random_seed(123)
    #np.random.seed(123)

    actionSpace = [[10, 0], [7, 7], [0, 10], [-7, 7], [-10, 0], [-7, -7],
                   [0, -10], [7, -7]]
    numActionSpace = len(actionSpace)
    numStateSpace = 4

    numActorFC1Unit = 50
    numActorFC2Unit = 50
    numActorFC3Unit = 50
    numActorFC4Unit = 50
    numCriticFC1Unit = 100
    numCriticFC2Unit = 100
    numCriticFC3Unit = 100
    numCriticFC4Unit = 100
    learningRateActor = 1e-4
    learningRateCritic = 3e-4

    actorGraph = tf.Graph()
    with actorGraph.as_default():
        with tf.name_scope("inputs"):
            state_ = tf.placeholder(tf.float32, [None, numStateSpace],
                                    name="state_")
            actionLabel_ = tf.placeholder(tf.int32, [None, numActionSpace],
                                          name="actionLabel_")
            advantages_ = tf.placeholder(tf.float32, [
                None,
            ],
                                         name="advantages_")

        with tf.name_scope("hidden"):
            initWeight = tf.random_uniform_initializer(-0.03, 0.03)
            initBias = tf.constant_initializer(0.01)
            fullyConnected1_ = tf.layers.dense(inputs=state_,
                                               units=numActorFC1Unit,
                                               activation=tf.nn.relu,
                                               kernel_initializer=initWeight,
                                               bias_initializer=initBias)
            fullyConnected2_ = tf.layers.dense(inputs=fullyConnected1_,
                                               units=numActorFC2Unit,
                                               activation=tf.nn.relu,
                                               kernel_initializer=initWeight,
                                               bias_initializer=initBias)
            fullyConnected3_ = tf.layers.dense(inputs=fullyConnected2_,
                                               units=numActorFC2Unit,
                                               activation=tf.nn.relu,
                                               kernel_initializer=initWeight,
                                               bias_initializer=initBias)
            allActionActivation_ = tf.layers.dense(
                inputs=fullyConnected3_,
                units=numActionSpace,
                activation=None,
                kernel_initializer=initWeight,
                bias_initializer=initBias)

        with tf.name_scope("outputs"):
            actionDistribution_ = tf.nn.softmax(allActionActivation_,
                                                name='actionDistribution_')
            actionEntropy_ = tf.multiply(tfp.distributions.Categorical(
                probs=actionDistribution_).entropy(),
                                         1,
                                         name='actionEntropy_')
            negLogProb_ = tf.nn.softmax_cross_entropy_with_logits_v2(
                logits=allActionActivation_,
                labels=actionLabel_,
                name='negLogProb_')
            loss_ = tf.reduce_mean(tf.multiply(negLogProb_, advantages_),
                                   name='loss_')
            actorLossSummary = tf.summary.scalar("ActorLoss", loss_)

        with tf.name_scope("train"):
            trainOpt_ = tf.train.AdamOptimizer(learningRateActor,
                                               name='adamOpt_').minimize(loss_)

        actorInit = tf.global_variables_initializer()

    actorModel = tf.Session(graph=actorGraph)
    actorModel.run(actorInit)

    criticGraph = tf.Graph()
    with criticGraph.as_default():
        with tf.name_scope("inputs"):
            state_ = tf.placeholder(tf.float32, [None, numStateSpace],
                                    name="state_")
            valueTarget_ = tf.placeholder(tf.float32, [None, 1],
                                          name="valueTarget_")

        with tf.name_scope("hidden"):
            initWeight = tf.random_uniform_initializer(-0.03, 0.03)
            initBias = tf.constant_initializer(0.001)
            fullyConnected1_ = tf.layers.dense(inputs=state_,
                                               units=numActorFC1Unit,
                                               activation=tf.nn.relu,
                                               kernel_initializer=initWeight,
                                               bias_initializer=initBias)
            fullyConnected2_ = tf.layers.dense(inputs=fullyConnected1_,
                                               units=numActorFC2Unit,
                                               activation=tf.nn.relu,
                                               kernel_initializer=initWeight,
                                               bias_initializer=initBias)
            fullyConnected3_ = tf.layers.dense(inputs=fullyConnected2_,
                                               units=numActorFC3Unit,
                                               activation=tf.nn.relu,
                                               kernel_initializer=initWeight,
                                               bias_initializer=initBias)
            fullyConnected4_ = tf.layers.dense(inputs=fullyConnected3_,
                                               units=numActorFC4Unit,
                                               activation=tf.nn.relu,
                                               kernel_initializer=initWeight,
                                               bias_initializer=initBias)

        with tf.name_scope("outputs"):
            value_ = tf.layers.dense(inputs=fullyConnected4_,
                                     units=1,
                                     activation=None,
                                     name='value_',
                                     kernel_initializer=initWeight,
                                     bias_initializer=initBias)
            diff_ = tf.subtract(valueTarget_, value_, name='diff_')
            loss_ = tf.reduce_mean(tf.square(diff_), name='loss_')
        criticLossSummary = tf.summary.scalar("CriticLoss", loss_)

        with tf.name_scope("train"):
            trainOpt_ = tf.train.AdamOptimizer(learningRateCritic,
                                               name='adamOpt_').minimize(loss_)

        criticInit = tf.global_variables_initializer()

    criticModel = tf.Session(graph=criticGraph)
    criticModel.run(criticInit)

    xBoundary = [0, 360]
    yBoundary = [0, 360]
    checkBoundaryAndAdjust = ag.CheckBoundaryAndAdjust(xBoundary, yBoundary)

    initSheepPosition = np.array([180, 180])
    initWolfPosition = np.array([180, 180])
    initSheepVelocity = np.array([0, 0])
    initWolfVelocity = np.array([0, 0])
    initSheepPositionNoise = np.array([60, 120])
    initWolfPositionNoise = np.array([0, 60])
    sheepPositionReset = ag.SheepPositionReset(initSheepPosition,
                                               initSheepPositionNoise)
    wolfPositionReset = ag.WolfPositionReset(initWolfPosition,
                                             initWolfPositionNoise)

    numOneAgentState = 2
    positionIndex = [0, 1]

    sheepPositionTransition = ag.SheepPositionTransition(
        numOneAgentState, positionIndex, checkBoundaryAndAdjust)
    wolfPositionTransition = ag.WolfPositionTransition(numOneAgentState,
                                                       positionIndex,
                                                       checkBoundaryAndAdjust)

    numAgent = 2
    sheepId = 0
    wolfId = 1
    transitionFunction = env.TransitionFunction(sheepId, wolfId,
                                                sheepPositionReset,
                                                wolfPositionReset,
                                                sheepPositionTransition,
                                                wolfPositionTransition)
    minDistance = 15
    isTerminal = env.IsTerminal(sheepId, wolfId, numOneAgentState,
                                positionIndex, minDistance)

    screen = pg.display.set_mode([xBoundary[1], yBoundary[1]])
    screenColor = [255, 255, 255]
    circleColorList = [[50, 255, 50], [50, 50, 50], [50, 50, 50], [50, 50, 50],
                       [50, 50, 50], [50, 50, 50], [50, 50, 50], [50, 50, 50],
                       [50, 50, 50]]
    circleSize = 8
    saveImage = False
    saveImageFile = 'image'
    render = env.Render(numAgent, numOneAgentState, positionIndex, screen,
                        screenColor, circleColorList, circleSize, saveImage,
                        saveImageFile)

    aliveBouns = -1
    deathPenalty = 20
    rewardDecay = 0.99
    rewardFunction = reward.RewardFunctionTerminalPenalty(
        sheepId, wolfId, numOneAgentState, positionIndex, aliveBouns,
        deathPenalty, isTerminal)
    accumulateReward = AccumulateReward(rewardDecay, rewardFunction)

    maxTimeStep = 150
    sampleTrajectory = SampleTrajectory(maxTimeStep, transitionFunction,
                                        isTerminal)

    approximatePolicy = ApproximatePolicy(actionSpace)
    trainCritic = TrainCriticMonteCarloTensorflow(accumulateReward)
    estimateAdvantage = EstimateAdvantageMonteCarlo(accumulateReward)
    trainActor = TrainActorMonteCarloTensorflow(actionSpace)

    numTrajectory = 50
    maxEpisode = 602
    actorCritic = OfflineAdvantageActorCritic(numTrajectory, maxEpisode,
                                              render)

    trainedActorModel, trainedCriticModel = actorCritic(
        actorModel, criticModel, approximatePolicy, sampleTrajectory,
        trainCritic, approximateValue, estimateAdvantage, trainActor)

    savePathActor = 'data/tmpModelActor.ckpt'
    savePathCritic = 'data/tmpModelCritic.ckpt'
    with actorModel.as_default():
        actorSaver.save(trainedActorModel, savePathActor)
    with criticModel.as_default():
        criticSaver.save(trainedCriticModel, savePathCritic)
Ejemplo n.º 6
0
def evaluate(cInit, cBase):
    actionSpace = [(10, 0), (7, 7), (0, 10), (-7, 7), (-10, 0), (-7, -7),
                   (0, -10), (7, -7)]
    numActionSpace = len(actionSpace)
    getActionPrior = GetActionPrior(actionSpace)
    numStateSpace = 4

    initSheepPosition = np.array([90, 90])
    initWolfPosition = np.array([90, 90])
    initSheepVelocity = np.array([0, 0])
    initWolfVelocity = np.array([0, 0])
    initSheepPositionNoise = np.array([40, 60])
    initWolfPositionNoise = np.array([0, 20])
    sheepPositionReset = ag.SheepPositionReset(initSheepPosition,
                                               initSheepPositionNoise)
    wolfPositionReset = ag.WolfPositionReset(initWolfPosition,
                                             initWolfPositionNoise)

    numOneAgentState = 2
    positionIndex = [0, 1]
    xBoundary = [0, 180]
    yBoundary = [0, 180]
    checkBoundaryAndAdjust = ag.CheckBoundaryAndAdjust(xBoundary, yBoundary)
    sheepPositionTransition = ag.SheepPositionTransition(
        numOneAgentState, positionIndex, checkBoundaryAndAdjust)
    wolfSpeed = 7
    wolfPositionTransition = ag.WolfPositionTransition(numOneAgentState,
                                                       positionIndex,
                                                       checkBoundaryAndAdjust,
                                                       wolfSpeed)

    numAgent = 2
    sheepId = 0
    wolfId = 1
    transition = env.TransitionFunction(sheepId, wolfId, sheepPositionReset,
                                        wolfPositionReset,
                                        sheepPositionTransition,
                                        wolfPositionTransition)
    minDistance = 10
    isTerminal = env.IsTerminal(sheepId, wolfId, numOneAgentState,
                                positionIndex, minDistance)

    screen = pg.display.set_mode([xBoundary[1], yBoundary[1]])
    screenColor = [255, 255, 255]
    circleColorList = [[50, 255, 50], [50, 50, 50], [50, 50, 50], [50, 50, 50],
                       [50, 50, 50], [50, 50, 50], [50, 50, 50], [50, 50, 50],
                       [50, 50, 50]]
    circleSize = 8
    saveImage = True
    saveImageFile = 'image'
    render = env.Render(numAgent, numOneAgentState, positionIndex, screen,
                        screenColor, circleColorList, circleSize, saveImage,
                        saveImageFile)

    aliveBouns = 0.05
    deathPenalty = -1
    rewardFunction = reward.RewardFunctionTerminalPenalty(
        sheepId, wolfId, numOneAgentState, positionIndex, aliveBouns,
        deathPenalty, isTerminal)

    # Hyper-parameters
    numSimulations = 600
    maxRunningSteps = 70

    # MCTS algorithm
    # Select child
    calculateScore = CalculateScore(cInit, cBase)
    selectChild = SelectChild(calculateScore)

    # expand
    initializeChildren = InitializeChildren(actionSpace, transition,
                                            getActionPrior)
    expand = Expand(transition, isTerminal, initializeChildren)
    #selectNextRoot = selectNextRoot

    # Rollout
    rolloutPolicy = lambda state: actionSpace[np.random.choice(
        range(numActionSpace))]
    maxRollOutSteps = 50
    rollout = RollOut(rolloutPolicy, maxRollOutSteps, transition,
                      rewardFunction, isTerminal)

    mcts = MCTS(numSimulations, selectChild, expand, rollout, backup,
                selectNextRoot)

    runMCTS = RunMCTS(mcts, maxRunningSteps, isTerminal, render)

    rootAction = actionSpace[np.random.choice(range(numActionSpace))]
    numTestingIterations = 70
    episodeLengths = []
    for step in range(numTestingIterations):
        import datetime
        print(datetime.datetime.now())
        state, action = None, None
        initState = transition(state, action)
        #optimal = math.ceil((np.sqrt(np.sum(np.power(initState[0:2] - initState[2:4], 2))) - minDistance )/10)
        rootNode = Node(id={rootAction: initState},
                        num_visited=0,
                        sum_value=0,
                        is_expanded=True)
        episodeLength = runMCTS(rootNode)
        episodeLengths.append(episodeLength)
    meanEpisodeLength = np.mean(episodeLengths)
    print("mean episode length is", meanEpisodeLength)
    return [meanEpisodeLength]
def evaluate(numTree, chasingSubtlety, numTotalSimulationTimes, cInit, cBase):
    print(numTree, chasingSubtlety, numTotalSimulationTimes, cInit, cBase)
    numActionSpace = 8
    actionInterval = int(360 / numActionSpace)
    actionSpace = [
        (np.cos(degreeInPolar), np.sin(degreeInPolar))
        for degreeInPolar in np.arange(0, 360, actionInterval) / 180 * math.pi
    ]
    getActionPrior = GetActionPrior(actionSpace)

    # 2D Env
    initSheepPosition = np.array([320, 240])
    initSheepPositionNoise = np.array([0, 0])
    resetSheepState = ag.ResetAgentState(initSheepPosition,
                                         initSheepPositionNoise)
    initWolfOrDistractorPosition = np.array([320, 240])
    initWolfOrDistractorPositionNoise = np.array([125, 230])
    resetWolfOrDistractorState = ag.ResetAgentState(
        initWolfOrDistractorPosition, initWolfOrDistractorPositionNoise)

    numAgent = 25
    sheepId = 0
    suspectorIds = list(range(1, numAgent))

    resetWolfIdAndSubtlety = ag.ResetWolfIdAndSubtlety(suspectorIds,
                                                       [chasingSubtlety])
    resetPhysicalState = ag.ResetPhysicalState(sheepId, numAgent,
                                               resetSheepState,
                                               resetWolfOrDistractorState,
                                               resetWolfIdAndSubtlety)

    numFramePerSecond = 60
    numMDPTimeStepPerSecond = 5
    numFrameWithoutActionChange = int(numFramePerSecond /
                                      numMDPTimeStepPerSecond)

    sheepActionUpdateFrequency = 1
    distanceToVisualDegreeRatio = 20
    minSheepSpeed = int(17.4 * distanceToVisualDegreeRatio / numFramePerSecond)
    maxSheepSpeed = int(23.2 * distanceToVisualDegreeRatio / numFramePerSecond)
    warmUpTimeSteps = int(10 * numMDPTimeStepPerSecond)
    sheepPolicy = ag.SheepPolicy(sheepActionUpdateFrequency, minSheepSpeed,
                                 maxSheepSpeed, warmUpTimeSteps)

    wolfActionUpdateFrequency = int(0.2 * numMDPTimeStepPerSecond)
    minWolfSpeed = int(8.7 * distanceToVisualDegreeRatio / numFramePerSecond)
    maxWolfSpeed = int(14.5 * distanceToVisualDegreeRatio / numFramePerSecond)
    wolfPolicy = ag.WolfPolicy(wolfActionUpdateFrequency, minWolfSpeed,
                               maxWolfSpeed, warmUpTimeSteps)
    distractorActionUpdateFrequency = int(0.2 * numMDPTimeStepPerSecond)
    minDistractorSpeed = int(8.7 * distanceToVisualDegreeRatio /
                             numFramePerSecond)
    maxDistractorSpeed = int(14.5 * distanceToVisualDegreeRatio /
                             numFramePerSecond)
    distractorPolicy = ag.DistractorPolicy(distractorActionUpdateFrequency,
                                           minDistractorSpeed,
                                           maxDistractorSpeed, warmUpTimeSteps)
    preparePolicy = ag.PreparePolicy(sheepId, numAgent, sheepPolicy,
                                     wolfPolicy, distractorPolicy)
    updatePhysicalState = ag.UpdatePhysicalState(numAgent, preparePolicy)

    xBoundary = [0, 640]
    yBoundary = [0, 480]
    checkBoundaryAndAdjust = ag.CheckBoundaryAndAdjust(xBoundary, yBoundary)
    transiteMultiAgentMotion = ag.TransiteMultiAgentMotion(
        checkBoundaryAndAdjust)

    minDistance = 2.5 * distanceToVisualDegreeRatio
    isTerminal = env.IsTerminal(sheepId, minDistance)
    screen = pg.display.set_mode([xBoundary[1], yBoundary[1]])
    #screen = None
    screenColor = np.array([255, 255, 255])
    sheepColor = np.array([0, 255, 0])
    wolfColor = np.array([255, 0, 0])
    circleSize = 10
    saveImage = True
    saveImageFile = 'image1'
    render = env.Render(numAgent, screen, screenColor, sheepColor, wolfColor,
                        circleSize, saveImage, saveImageFile)
    renderOnInSimulation = False
    transiteStateWithoutActionChangeInSimulation = env.TransiteStateWithoutActionChange(
        numFrameWithoutActionChange, isTerminal, transiteMultiAgentMotion,
        render, renderOnInSimulation)
    renderOnInPlay = True
    transiteStateWithoutActionChangeInPlay = env.TransiteStateWithoutActionChange(
        numFrameWithoutActionChange, isTerminal, transiteMultiAgentMotion,
        render, renderOnInPlay)

    attentionLimitation = 4
    precisionPerSlot = 8.0
    precisionForUntracked = 2.5
    memoryratePerSlot = 0.7
    memoryrateForUntracked = 0.45
    attention = Attention.AttentionToPrecisionAndDecay(precisionPerSlot,
                                                       precisionForUntracked,
                                                       memoryratePerSlot,
                                                       memoryrateForUntracked)
    transferMultiAgentStatesToPositionDF = ba.TransferMultiAgentStatesToPositionDF(
        numAgent)
    possibleSubtleties = [500, 11, 3.3, 1.83, 0.92, 0.31]
    resetBeliefAndAttention = ba.ResetBeliefAndAttention(
        sheepId, suspectorIds, possibleSubtleties, attentionLimitation,
        transferMultiAgentStatesToPositionDF, attention)

    maxDistance = 7.5 * distanceToVisualDegreeRatio
    numStandardErrorInDistanceRange = 2
    calDistancePriorOnAttentionSlot = Attention.CalDistancePriorOnAttentionSlot(
        minDistance, maxDistance, numStandardErrorInDistanceRange)
    attentionSwitch = Attention.AttentionSwitch(
        attentionLimitation, calDistancePriorOnAttentionSlot)
    computePosterior = calPosterior.CalPosteriorLog(minDistance)

    attentionSwitchFrequencyInSimulation = np.inf
    beliefUpdateFrequencyInSimulation = np.inf
    updateBeliefAndAttentionInSimulation = ba.UpdateBeliefAndAttentionState(
        attention, computePosterior, attentionSwitch,
        transferMultiAgentStatesToPositionDF,
        attentionSwitchFrequencyInSimulation,
        beliefUpdateFrequencyInSimulation)

    attentionSwitchFrequencyInPlay = int(0.6 * numMDPTimeStepPerSecond)
    beliefUpdateFrequencyInPlay = int(0.2 * numMDPTimeStepPerSecond)
    updateBeliefAndAttentionInPlay = ba.UpdateBeliefAndAttentionState(
        attention, computePosterior, attentionSwitch,
        transferMultiAgentStatesToPositionDF, attentionSwitchFrequencyInPlay,
        beliefUpdateFrequencyInPlay)

    updatePhysicalStateByBeliefFrequencyInSimulationRoot = int(
        0.2 * numMDPTimeStepPerSecond)
    updatePhysicalStateByBeliefInSimulationRoot = ba.UpdatePhysicalStateImagedByBelief(
        updatePhysicalStateByBeliefFrequencyInSimulationRoot)
    updatePhysicalStateByBeliefFrequencyInSimulation = np.inf
    updatePhysicalStateByBeliefInSimulation = ba.UpdatePhysicalStateImagedByBelief(
        updatePhysicalStateByBeliefFrequencyInSimulation)

    updatePhysicalStateByBeliefFrequencyInPlay = np.inf
    updatePhysicalStateByBeliefInPlay = ba.UpdatePhysicalStateImagedByBelief(
        updatePhysicalStateByBeliefFrequencyInPlay)

    transitionFunctionInSimulation = env.TransitionFunction(
        resetPhysicalState, resetBeliefAndAttention, updatePhysicalState,
        transiteStateWithoutActionChangeInSimulation,
        updateBeliefAndAttentionInSimulation,
        updatePhysicalStateByBeliefInSimulation)

    transitionFunctionInPlay = env.TransitionFunction(
        resetPhysicalState, resetBeliefAndAttention, updatePhysicalState,
        transiteStateWithoutActionChangeInPlay, updateBeliefAndAttentionInPlay,
        updatePhysicalStateByBeliefInPlay)

    maxRollOutSteps = 5
    aliveBouns = 1 / maxRollOutSteps
    deathPenalty = -1
    rewardFunction = reward.RewardFunctionTerminalPenalty(
        sheepId, aliveBouns, deathPenalty, isTerminal)

    # MCTS algorithm
    # Select child
    calculateScore = CalculateScore(cInit, cBase)
    selectChild = SelectChild(calculateScore)

    # expand
    initializeChildren = InitializeChildren(actionSpace,
                                            transitionFunctionInSimulation,
                                            getActionPrior)
    expand = Expand(isTerminal, initializeChildren)

    # Rollout
    rolloutPolicy = lambda state: actionSpace[np.random.choice(
        range(numActionSpace))]
    rollout = RollOut(rolloutPolicy, maxRollOutSteps,
                      transitionFunctionInSimulation, rewardFunction,
                      isTerminal)

    numActionPlaned = 1
    selectAction = SelectAction(numActionPlaned, actionSpace)
    numSimulations = int(numTotalSimulationTimes / numTree)

    sheepColorInMcts = np.array([0, 255, 0])
    wolfColorInMcts = np.array([255, 0, 0])
    distractorColorInMcts = np.array([0, 0, 0])
    mctsRender = env.MctsRender(numAgent, screen, xBoundary[1], yBoundary[1],
                                screenColor, sheepColorInMcts, wolfColorInMcts,
                                distractorColorInMcts, circleSize, saveImage,
                                saveImageFile)
    mctsRenderOn = True

    mcts = MCTS(numSimulations, selectChild, expand, rollout, backup,
                selectAction, mctsRender, mctsRenderOn)

    maxRunningSteps = int(25 * numMDPTimeStepPerSecond)
    makeDiffSimulationRoot = MakeDiffSimulationRoot(
        isTerminal, updatePhysicalStateByBeliefInSimulationRoot)
    runMCTS = RunMCTS(maxRunningSteps, numTree, numActionPlaned,
                      transitionFunctionInPlay, isTerminal,
                      makeDiffSimulationRoot, render)

    rootAction = actionSpace[np.random.choice(range(numActionSpace))]
    numTestingIterations = 1
    episodeLengths = []
    escape = 0
    step = 1
    while step <= numTestingIterations:
        import datetime
        print(datetime.datetime.now())
        episodeLength = runMCTS(mcts)
        if episodeLength >= 1 * numMDPTimeStepPerSecond:
            step = step + 1
            episodeLengths.append(episodeLength)
            if episodeLength >= maxRunningSteps - 10:
                escape = escape + 1
    meanEpisodeLength = np.mean(episodeLengths)
    print("mean episode length is", meanEpisodeLength,
          escape / numTestingIterations)
    return [meanEpisodeLength, escape / numTestingIterations]
Ejemplo n.º 8
0
def main():
    #tf.set_random_seed(123)
    #np.random.seed(123)

    actionSpace = [[10, 0], [7, 7], [0, 10], [-7, 7], [-10, 0], [-7, -7],
                   [0, -10], [7, -7]]
    numActionSpace = len(actionSpace)
    numStateSpace = 4

    xBoundary = [0, 360]
    yBoundary = [0, 360]
    checkBoundaryAndAdjust = ag.CheckBoundaryAndAdjust(xBoundary, yBoundary)

    initSheepPosition = np.array([180, 180])
    initWolfPosition = np.array([180, 180])
    initSheepVelocity = np.array([0, 0])
    initWolfVelocity = np.array([0, 0])
    initSheepPositionNoise = np.array([120, 120])
    initWolfPositionNoise = np.array([60, 60])
    sheepPositionReset = ag.SheepPositionReset(initSheepPosition,
                                               initSheepPositionNoise,
                                               checkBoundaryAndAdjust)
    wolfPositionReset = ag.WolfPositionReset(initWolfPosition,
                                             initWolfPositionNoise,
                                             checkBoundaryAndAdjust)

    numOneAgentState = 2
    positionIndex = [0, 1]

    sheepPositionTransition = ag.SheepPositionTransition(
        numOneAgentState, positionIndex, checkBoundaryAndAdjust)
    wolfPositionTransition = ag.WolfPositionTransition(numOneAgentState,
                                                       positionIndex,
                                                       checkBoundaryAndAdjust)

    numAgent = 2
    sheepId = 0
    wolfId = 1
    transitionFunction = env.TransitionFunction(sheepId, wolfId,
                                                sheepPositionReset,
                                                wolfPositionReset,
                                                sheepPositionTransition,
                                                wolfPositionTransition)
    minDistance = 15
    isTerminal = env.IsTerminal(sheepId, wolfId, numOneAgentState,
                                positionIndex, minDistance)

    screen = pg.display.set_mode([xBoundary[1], yBoundary[1]])
    screenColor = [255, 255, 255]
    circleColorList = [[50, 255, 50], [50, 50, 50], [50, 50, 50], [50, 50, 50],
                       [50, 50, 50], [50, 50, 50], [50, 50, 50], [50, 50, 50],
                       [50, 50, 50]]
    circleSize = 8
    saveImage = False
    saveImageFile = 'image'
    render = env.Render(numAgent, numOneAgentState, positionIndex, screen,
                        screenColor, circleColorList, circleSize, saveImage,
                        saveImageFile)

    aliveBouns = -1
    deathPenalty = 20
    rewardDecay = 0.99
    rewardFunction = reward.RewardFunctionTerminalPenalty(
        sheepId, wolfId, numOneAgentState, positionIndex, aliveBouns,
        deathPenalty, isTerminal)
    accumulateReward = A2CMC.AccumulateReward(rewardDecay, rewardFunction)

    maxTimeStep = 150
    sampleTrajectory = A2CMC.SampleTrajectory(maxTimeStep, transitionFunction,
                                              isTerminal)

    approximatePolicy = A2CMC.ApproximatePolicy(actionSpace)
    approximateValue = A2CMC.approximateValue
    trainCritic = A2CMC.TrainCriticMonteCarloTensorflow(accumulateReward)
    estimateAdvantage = A2CMC.EstimateAdvantageMonteCarlo(accumulateReward)
    trainActor = A2CMC.TrainActorMonteCarloTensorflow(actionSpace)

    numTrajectory = 5
    maxEpisode = 1
    actorCritic = A2CMC.OfflineAdvantageActorCritic(numTrajectory, maxEpisode,
                                                    render)

    # Generate models.
    learningRateActor = 1e-4
    learningRateCritic = 3e-4
    hiddenNeuronNumbers = [128, 256, 512, 1024]
    hiddenDepths = [2, 4, 8]
    generateModel = GenerateActorCriticModel(numStateSpace, numActionSpace,
                                             learningRateActor,
                                             learningRateCritic)
    modelDict = {(n, d): generateModel(d, round(n / d))
                 for n, d in it.product(hiddenNeuronNumbers, hiddenDepths)}

    print("Generated graphs")
    # Train.
    actorCritic = A2CMC.OfflineAdvantageActorCritic(numTrajectory, maxEpisode,
                                                    render)
    modelTrain = lambda actorModel, criticModel: actorCritic(
        actorModel, criticModel, approximatePolicy, sampleTrajectory,
        trainCritic, approximateValue, estimateAdvantage, trainActor)
    trainedModelDict = {
        key: modelTrain(model[0], model[1])
        for key, model in modelDict.items()
    }

    print("Finished training")
    # Evaluate
    modelEvaluate = Evaluate(numTrajectory, approximatePolicy,
                             sampleTrajectory, rewardFunction)
    meanEpisodeRewards = {
        key: modelEvaluate(model[0], model[1])
        for key, model in trainedModelDict.items()
    }

    print("Finished evaluating")
    # Visualize
    independentVariableNames = ['NeuroTotalNumber', 'layerNumber']
    draw(meanEpisodeRewards, independentVariableNames)