def __call__(self, condition): getSavePath = self.getTrajectorySavePathByCondition(condition) getCSVSavePath = self.getCSVSavePathByCondition(condition) attentionType = condition['attentionType'] alpha = condition['alphaForStateWidening'] C = condition['CForStateWidening'] minAttentionDistance = condition['minAttentionDistance'] rangeAttention = condition['rangeAttention'] numTree = condition['numTrees'] numSimulations = condition['numSimulationTimes'] actionRatio = condition['actionRatio'] cBase = condition['cBase'] burnTime = condition['burnTime'] numSub = 5 allResultsIdentity = [] allResultsAttention = [] allResultsIdentitySampled = [] possibleTrialSubtleties = [500.0, 11.0, 3.3, 1.83, 0.92, 0.31, 0.001] for subIndex in range(numSub): meanIdentityPerceptionOnConditions = {} meanAttentionPerceptionOnConditions = {} meanIdentitySampledOnConditions = {} for chasingSubtlety in possibleTrialSubtleties: print(numTree, chasingSubtlety, numSimulations, attentionType) numAgent = 25 sheepId = 0 suspectorIds = list(range(1, numAgent)) resetWolfIdAndSubtlety = ag.ResetWolfIdAndSubtlety( suspectorIds, [chasingSubtlety]) distanceToVisualDegreeRatio = 20 minInitSheepWolfDistance = 9 * distanceToVisualDegreeRatio minInitSheepDistractorDistance = 2.5 * distanceToVisualDegreeRatio # no distractor in killzone when init isLegalInitPositions = ag.IsLegalInitPositions( sheepId, minInitSheepWolfDistance, minInitSheepDistractorDistance) xBoundary = [0, 640] yBoundary = [0, 480] resetAgentPositions = ag.ResetAgentPositions( xBoundary, yBoundary, numAgent, isLegalInitPositions) resetPhysicalState = ag.ResetPhysicalState( sheepId, numAgent, resetAgentPositions, resetWolfIdAndSubtlety) numFramePerSecond = 20 numMDPTimeStepPerSecond = 5 numFrameWithoutActionChange = int(numFramePerSecond / numMDPTimeStepPerSecond) sheepActionUpdateFrequency = 1 minSheepSpeed = int(17.4 * distanceToVisualDegreeRatio / numFramePerSecond) maxSheepSpeed = int(23.2 * distanceToVisualDegreeRatio / numFramePerSecond) warmUpTimeSteps = int(10 * numMDPTimeStepPerSecond) sheepPolicy = ag.SheepPolicy(sheepActionUpdateFrequency, minSheepSpeed, maxSheepSpeed, warmUpTimeSteps, burnTime) wolfActionUpdateFrequency = int(0.2 * numMDPTimeStepPerSecond) minWolfSpeed = int(8.7 * distanceToVisualDegreeRatio / numFramePerSecond) maxWolfSpeed = int(14.5 * distanceToVisualDegreeRatio / numFramePerSecond) wolfPolicy = ag.WolfPolicy(wolfActionUpdateFrequency, minWolfSpeed, maxWolfSpeed, warmUpTimeSteps) distractorActionUpdateFrequency = int(0.2 * numMDPTimeStepPerSecond) minDistractorSpeed = int(8.7 * distanceToVisualDegreeRatio / numFramePerSecond) maxDistractorSpeed = int(14.5 * distanceToVisualDegreeRatio / numFramePerSecond) distractorPolicy = ag.DistractorPolicy( distractorActionUpdateFrequency, minDistractorSpeed, maxDistractorSpeed, warmUpTimeSteps) preparePolicy = ag.PreparePolicy(sheepId, numAgent, sheepPolicy, wolfPolicy, distractorPolicy) updatePhysicalState = ag.UpdatePhysicalState( sheepId, numAgent, preparePolicy) xBoundary = [0, 640] yBoundary = [0, 480] checkBoundaryAndAdjust = ag.CheckBoundaryAndAdjust( xBoundary, yBoundary) transiteMultiAgentMotion = ag.TransiteMultiAgentMotion( checkBoundaryAndAdjust) minDistance = 0.0 * distanceToVisualDegreeRatio isTerminal = env.IsTerminal(sheepId, minDistance) # screen = pg.display.set_mode([xBoundary[1], yBoundary[1]]) # screenColor = np.array([0, 0, 0]) # sheepColor = np.array([0, 255, 0]) # wolfColor = np.array([255, 0, 0]) # circleSize = 10 # saveImage = True # saveImageFile = 'image3' # render = env.Render(numAgent, screen, xBoundary[1], yBoundary[1], screenColor, sheepColor, wolfColor, circleSize, saveImage, saveImageFile, isTerminal) render = None renderOnInSimulation = False transiteStateWithoutActionChangeInSimulation = env.TransiteStateWithoutActionChange( numFrameWithoutActionChange, isTerminal, transiteMultiAgentMotion, render, renderOnInSimulation) renderOnInPlay = False transiteStateWithoutActionChangeInPlay = env.TransiteStateWithoutActionChange( numFrameWithoutActionChange, isTerminal, transiteMultiAgentMotion, render, renderOnInPlay) if attentionType == 'idealObserver': attentionLimitation = 4 precisionPerSlot = 500.0 precisionForUntracked = 500.0 memoryratePerSlot = 1.0 memoryrateForUntracked = 1.0 if attentionType == 'preAttention': attentionLimitation = 4 precisionPerSlot = 2.5 precisionForUntracked = 2.5 memoryratePerSlot = 0.45 memoryrateForUntracked = 0.45 if attentionType == 'attention3': attentionLimitation = 3 precisionPerSlot = 16.0 precisionForUntracked = 0.01 memoryratePerSlot = 0.7 memoryrateForUntracked = 0.01 if attentionType == 'hybrid3': attentionLimitation = 3 precisionPerSlot = 16.0 precisionForUntracked = 2.5 memoryratePerSlot = 0.7 memoryrateForUntracked = 0.45 if attentionType == 'attention4': attentionLimitation = 4 precisionPerSlot = 16.0 precisionForUntracked = 0.01 memoryratePerSlot = 0.7 memoryrateForUntracked = 0.01 if attentionType == 'hybrid4': attentionLimitation = 4 precisionPerSlot = 16.0 precisionForUntracked = 2.5 memoryratePerSlot = 0.7 memoryrateForUntracked = 0.45 if attentionType == 'preAttentionMem0.25': attentionLimitation = 4 precisionPerSlot = 2.5 precisionForUntracked = 2.5 memoryratePerSlot = 0.25 memoryrateForUntracked = 0.25 if attentionType == 'preAttentionMem0.65': attentionLimitation = 4 precisionPerSlot = 2.5 precisionForUntracked = 2.5 memoryratePerSlot = 0.65 memoryrateForUntracked = 0.65 if attentionType == 'preAttentionPre0.5': attentionLimitation = 4 precisionPerSlot = 0.5 precisionForUntracked = 0.5 memoryratePerSlot = 0.45 memoryrateForUntracked = 0.45 if attentionType == 'preAttentionPre4.5': attentionLimitation = 4 precisionPerSlot = 4.5 precisionForUntracked = 4.5 memoryratePerSlot = 0.45 memoryrateForUntracked = 0.45 attention = Attention.AttentionToPrecisionAndDecay( precisionPerSlot, precisionForUntracked, memoryratePerSlot, memoryrateForUntracked) transferMultiAgentStatesToPositionDF = ba.TransferMultiAgentStatesToPositionDF( numAgent) possibleSubtleties = [ 500.0, 11.0, 3.3, 1.83, 0.92, 0.31, 0.001 ] resetBeliefAndAttention = ba.ResetBeliefAndAttention( sheepId, suspectorIds, possibleSubtleties, attentionLimitation, transferMultiAgentStatesToPositionDF, attention) maxAttentionDistance = minAttentionDistance + rangeAttention attentionMinDistance = minAttentionDistance * distanceToVisualDegreeRatio attentionMaxDistance = maxAttentionDistance * distanceToVisualDegreeRatio numStandardErrorInDistanceRange = 4 calDistancePriorOnAttentionSlot = Attention.CalDistancePriorOnAttentionSlot( attentionMinDistance, attentionMaxDistance, numStandardErrorInDistanceRange) attentionSwitch = Attention.AttentionSwitch( attentionLimitation, calDistancePriorOnAttentionSlot) computePosterior = calPosterior.CalPosteriorLog(minDistance) print(attentionLimitation, attentionMinDistance / distanceToVisualDegreeRatio, attentionMaxDistance / distanceToVisualDegreeRatio) attentionSwitchFrequencyInSimulation = np.inf beliefUpdateFrequencyInSimulation = np.inf updateBeliefAndAttentionInSimulation = ba.UpdateBeliefAndAttentionState( attention, computePosterior, attentionSwitch, transferMultiAgentStatesToPositionDF, attentionSwitchFrequencyInSimulation, beliefUpdateFrequencyInSimulation, burnTime) attentionSwitchFrequencyInPlay = int(0.6 * numMDPTimeStepPerSecond) beliefUpdateFrequencyInPlay = int(0.2 * numMDPTimeStepPerSecond) updateBeliefAndAttentionInPlay = ba.UpdateBeliefAndAttentionState( attention, computePosterior, attentionSwitch, transferMultiAgentStatesToPositionDF, attentionSwitchFrequencyInPlay, beliefUpdateFrequencyInPlay, burnTime) updatePhysicalStateByBeliefFrequencyInSimulationRoot = int( 0.2 * numMDPTimeStepPerSecond) updatePhysicalStateByBeliefInSimulationRoot = ba.UpdatePhysicalStateImagedByBelief( updatePhysicalStateByBeliefFrequencyInSimulationRoot) updatePhysicalStateByBeliefFrequencyInSimulation = np.inf updatePhysicalStateByBeliefInSimulation = ba.UpdatePhysicalStateImagedByBelief( updatePhysicalStateByBeliefFrequencyInSimulation) updatePhysicalStateByBeliefFrequencyInPlay = np.inf updatePhysicalStateByBeliefInPlay = ba.UpdatePhysicalStateImagedByBelief( updatePhysicalStateByBeliefFrequencyInPlay) transitionFunctionInSimulation = env.TransitionFunction( resetPhysicalState, resetBeliefAndAttention, updatePhysicalState, transiteStateWithoutActionChangeInSimulation, updateBeliefAndAttentionInSimulation, updatePhysicalStateByBeliefInSimulation) transitionFunctionInPlay = env.TransitionFunction( resetPhysicalState, resetBeliefAndAttention, updatePhysicalState, transiteStateWithoutActionChangeInPlay, updateBeliefAndAttentionInPlay, updatePhysicalStateByBeliefInPlay) maxRollOutSteps = 5 aliveBouns = 1 / maxRollOutSteps deathPenalty = -1 rewardFunction = reward.RewardFunctionTerminalPenalty( sheepId, aliveBouns, deathPenalty, isTerminal) rewardRollout = lambda state, action, nextState: rewardFunction( state, action) numActionSpace = 8 actionInterval = int(360 / (numActionSpace)) actionMagnitude = actionRatio * minSheepSpeed actionSpace = [ (np.cos(degreeInPolar) * actionMagnitude, np.sin(degreeInPolar) * actionMagnitude) for degreeInPolar in np.arange(0, 360, actionInterval) / 180 * math.pi ] getActionPrior = lambda state: { action: 1 / len(actionSpace) for action in actionSpace } cInit = 1 #cBase = 50 scoreChild = ScoreChild(cInit, cBase) selectAction = SelectAction(scoreChild) selectNextState = SelectNextState(selectAction) initializeChildren = InitializeChildren( actionSpace, transitionFunctionInSimulation, getActionPrior) expand = Expand(isTerminal, initializeChildren) pWidening = PWidening(alpha, C) expandNewState = ExpandNextState( transitionFunctionInSimulation, pWidening) rolloutPolicy = lambda state: actionSpace[np.random.choice( range(numActionSpace))] rolloutHeuristic = lambda state: 0 estimateValue = RollOut(rolloutPolicy, maxRollOutSteps, transitionFunctionInSimulation, rewardRollout, isTerminal, rolloutHeuristic) numActionPlaned = 1 outputAction = OutputAction(numActionPlaned, actionSpace) #numSimulations = int(numTotalSimulationTimes/numTree) #sheepColorInMcts = np.array([0, 255, 0]) #wolfColorInMcts = np.array([255, 0, 0]) #distractorColorInMcts = np.array([255, 255, 255]) #saveImageMCTS = True #mctsRender = env.MctsRender(numAgent, screen, xBoundary[1], yBoundary[1], screenColor, sheepColorInMcts, wolfColorInMcts, distractorColorInMcts, circleSize, saveImageMCTS, saveImageFile) #mctsRenderOn = False #mctsRender = None #pg.init() #mcts = MCTS(numSimulations, selectChild, expand, rollout, backup, selectAction, mctsRender, mctsRenderOn) pwMultipleTrees = PWMultipleTrees(numSimulations, selectAction, selectNextState, expand, expandNewState, estimateValue, backup, outputAction) maxRunningSteps = int(50 * numMDPTimeStepPerSecond) makeDiffSimulationRoot = MakeDiffSimulationRoot( isTerminal, updatePhysicalStateByBeliefInSimulationRoot) runMCTSTrjactory = RunMCTSTrjactory( maxRunningSteps, numTree, numActionPlaned, sheepActionUpdateFrequency, transitionFunctionInPlay, isTerminal, makeDiffSimulationRoot, render) rootAction = actionSpace[np.random.choice( range(numActionSpace))] numTrial = 10 trajectories = [ runMCTSTrjactory(pwMultipleTrees) for trial in range(numTrial) ] savePath = getSavePath({ 'chasingSubtlety': chasingSubtlety, 'subIndex': subIndex }) #tsl.saveToPickle(trajectories, savePath) #def getTrueWolfIndentityAcc(trajectory): # AccTrial = [] # for timeStepIndex in range(len(trajectory)): # timeStep = trajectory[timeStepIndex] # wolfId = trajectory[0][0][0][3][0] # wolfIdInEach = timeStep[0][0][3][0] # #print(wolfId, '**', wolfIdInEach) # if (timeStepIndex % 3 == 0) and timeStepIndex >= 11: # AccTrial.append(timeStep[0][1][int(wolfIdInEach) - 1]) # meanIdentityAcc = np.mean(AccTrial) # #meanIdentityAcc = np.mean(np.array([timeStep[0][1][int(timeStep[0][0][3][0] - 1)] for timeStep in trajectory])[11:]) # return meanIdentityAcc getTrueWolfIndentityAcc = lambda trajectory: np.array([ timeStep[0][1][int(timeStep[0][0][3][0] - 1)] for timeStep in trajectory ])[:] identityPerceptionTimeSeries = np.mean([ getTrueWolfIndentityAcc(trajectory) for trajectory in trajectories ], axis=0) resultsTimeSeries = pd.DataFrame( [identityPerceptionTimeSeries], columns=list(range(len(identityPerceptionTimeSeries)))) savePathIdentitySeries = getCSVSavePath({ 'chasingSubtlety': chasingSubtlety, 'measure': 'identity' }) if subIndex == 0: resultsTimeSeries.to_csv(savePathIdentitySeries, mode='a') else: resultsTimeSeries.to_csv(savePathIdentitySeries, mode='a', header=False) meanIdentityPerception = np.mean([ getTrueWolfIndentityAcc(trajectory) for trajectory in trajectories ]) meanIdentityPerceptionOnConditions.update( {chasingSubtlety: meanIdentityPerception}) print(meanIdentityPerceptionOnConditions) getTrueWolfAttentionNumber = lambda trajectory: np.array([ timeStep[0][2][int(timeStep[0][0][3][0] - 1)] for timeStep in trajectory ])[:] attentionNumberTimeSeries = np.mean([ getTrueWolfAttentionNumber(trajectory) for trajectory in trajectories ], axis=0) resultsAttentionTimeSeries = pd.DataFrame( [attentionNumberTimeSeries], columns=list(range(len(attentionNumberTimeSeries)))) savePathAttentionSeries = getCSVSavePath({ 'chasingSubtlety': chasingSubtlety, 'measure': 'attentionNumber' }) if subIndex == 0: resultsAttentionTimeSeries.to_csv(savePathAttentionSeries, mode='a') else: resultsAttentionTimeSeries.to_csv(savePathAttentionSeries, mode='a', header=False) meanAttentionPerception = np.mean([ getTrueWolfAttentionNumber(trajectory) for trajectory in trajectories ]) meanAttentionPerceptionOnConditions.update( {chasingSubtlety: meanAttentionPerception}) getSampledWolfIndentityAcc = lambda trajectory: np.array([ int( int(timeStep[0][3][0][0]) == int(timeStep[0][0][3][0])) for timeStep in trajectory ])[:] identitySampledTimeSeries = np.mean([ getSampledWolfIndentityAcc(trajectory) for trajectory in trajectories ], axis=0) resultsSampledTimeSeries = pd.DataFrame( [identitySampledTimeSeries], columns=list(range(len(identitySampledTimeSeries)))) savePathIdentitySampledSeries = getCSVSavePath({ 'chasingSubtlety': chasingSubtlety, 'measure': 'identitySampled' }) if subIndex == 0: resultsSampledTimeSeries.to_csv( savePathIdentitySampledSeries, mode='a') else: resultsSampledTimeSeries.to_csv( savePathIdentitySampledSeries, mode='a', header=False) meanIdentitySampled = np.mean([ getSampledWolfIndentityAcc(trajectory) for trajectory in trajectories ]) meanIdentitySampledOnConditions.update( {chasingSubtlety: meanIdentitySampled}) allResultsIdentity.append(meanIdentityPerceptionOnConditions) resultsIdentity = pd.DataFrame(allResultsIdentity) csvSavePathIdentity = getCSVSavePath({'measure': 'identity'}) resultsIdentity.to_csv(csvSavePathIdentity) allResultsAttention.append(meanAttentionPerceptionOnConditions) resultsAttention = pd.DataFrame(allResultsAttention) csvSavePathAttention = getCSVSavePath( {'measure': 'attentionNumber'}) resultsAttention.to_csv(csvSavePathAttention) allResultsIdentitySampled.append(meanIdentitySampledOnConditions) resultsIdentitySampled = pd.DataFrame(allResultsIdentitySampled) csvSavePathIdentitySampled = getCSVSavePath( {'measure': 'identitySampled'}) resultsIdentitySampled.to_csv(csvSavePathIdentitySampled)
def __call__(self, condition): getSavePath = self.getTrajectorySavePathByCondition(condition) attentionType = condition['attentionType'] alpha = condition['alphaForStateWidening'] C = condition['CForStateWidening'] minAttentionDistance = condition['minAttentionDistance'] rangeAttention = condition['rangeAttention'] numTree = condition['numTrees'] numSimulations = condition['numSimulationTimes'] actionRatio = condition['actionRatio'] cBase = condition['cBase'] burnTime = condition['burnTime'] softParaForIdentity = condition['softId'] softParaForSubtlety = condition['softSubtlety'] numSub = 5 allIdentityResults = [] allPerceptionResults = [] allActionResults = [] allVelDiffResults = [] allResults = [] possibleTrialSubtleties = [0.92, 0.01] for subIndex in range(numSub): meanIdentiyOnConditions = {} meanPerceptionOnConditions = {} meanActionOnConditions = {} meanVelDiffOnConditions = {} meanEscapeOnConditions = {} for chasingSubtlety in possibleTrialSubtleties: print(numTree, chasingSubtlety, numSimulations, attentionType) numAgent = 25 sheepId = 0 suspectorIds = list(range(1, numAgent)) resetWolfIdAndSubtlety = ag.ResetWolfIdAndSubtlety( suspectorIds, [chasingSubtlety]) distanceToVisualDegreeRatio = 20 minInitSheepWolfDistance = 9 * distanceToVisualDegreeRatio minInitSheepDistractorDistance = 2.5 * distanceToVisualDegreeRatio # no distractor in killzone when init isLegalInitPositions = ag.IsLegalInitPositions( sheepId, minInitSheepWolfDistance, minInitSheepDistractorDistance) xBoundary = [0, 640] yBoundary = [0, 480] resetAgentPositions = ag.ResetAgentPositions( xBoundary, yBoundary, numAgent, isLegalInitPositions) resetPhysicalState = ag.ResetPhysicalState( sheepId, numAgent, resetAgentPositions, resetWolfIdAndSubtlety) numFramePerSecond = 20 numMDPTimeStepPerSecond = 5 numFrameWithoutActionChange = int(numFramePerSecond / numMDPTimeStepPerSecond) sheepActionUpdateFrequency = 1 minSheepSpeed = int(17.4 * distanceToVisualDegreeRatio / numFramePerSecond) maxSheepSpeed = int(23.2 * distanceToVisualDegreeRatio / numFramePerSecond) warmUpTimeSteps = int(10 * numMDPTimeStepPerSecond) sheepPolicy = ag.SheepPolicy(sheepActionUpdateFrequency, minSheepSpeed, maxSheepSpeed, warmUpTimeSteps, burnTime) wolfActionUpdateFrequency = int(0.2 * numMDPTimeStepPerSecond) minWolfSpeed = int(8.7 * distanceToVisualDegreeRatio / numFramePerSecond) maxWolfSpeed = int(14.5 * distanceToVisualDegreeRatio / numFramePerSecond) wolfPolicy = ag.WolfPolicy(wolfActionUpdateFrequency, minWolfSpeed, maxWolfSpeed, warmUpTimeSteps) distractorActionUpdateFrequency = int(0.2 * numMDPTimeStepPerSecond) minDistractorSpeed = int(8.7 * distanceToVisualDegreeRatio / numFramePerSecond) maxDistractorSpeed = int(14.5 * distanceToVisualDegreeRatio / numFramePerSecond) distractorPolicy = ag.DistractorPolicy( distractorActionUpdateFrequency, minDistractorSpeed, maxDistractorSpeed, warmUpTimeSteps) preparePolicy = ag.PreparePolicy(sheepId, numAgent, sheepPolicy, wolfPolicy, distractorPolicy) updatePhysicalState = ag.UpdatePhysicalState( sheepId, numAgent, preparePolicy) xBoundary = [0, 640] yBoundary = [0, 480] checkBoundaryAndAdjust = ag.CheckBoundaryAndAdjust( xBoundary, yBoundary) transiteMultiAgentMotion = ag.TransiteMultiAgentMotion( checkBoundaryAndAdjust) minDistance = 2.5 * distanceToVisualDegreeRatio isTerminal = env.IsTerminal(sheepId, minDistance) # screen = pg.display.set_mode([xBoundary[1], yBoundary[1]]) # screenColor = np.array([0, 0, 0]) # sheepColor = np.array([0, 255, 0]) # wolfColor = np.array([255, 0, 0]) # circleSize = 10 # saveImage = True # saveImageFile = 'image3' # render = env.Render(numAgent, screen, xBoundary[1], yBoundary[1], screenColor, sheepColor, wolfColor, circleSize, saveImage, saveImageFile, isTerminal) render = None renderOnInSimulation = False transiteStateWithoutActionChangeInSimulation = env.TransiteStateWithoutActionChange( numFrameWithoutActionChange, isTerminal, transiteMultiAgentMotion, render, renderOnInSimulation) renderOnInPlay = False transiteStateWithoutActionChangeInPlay = env.TransiteStateWithoutActionChange( numFrameWithoutActionChange, isTerminal, transiteMultiAgentMotion, render, renderOnInPlay) if attentionType == 'idealObserver': attentionLimitation = 1 precisionPerSlot = 500.0 precisionForUntracked = 500.0 memoryratePerSlot = 1.0 memoryrateForUntracked = 1.0 if attentionType == 'preAttention': attentionLimitation = 1 precisionPerSlot = 2.5 precisionForUntracked = 2.5 memoryratePerSlot = 0.45 memoryrateForUntracked = 0.45 if attentionType == 'attention3': attentionLimitation = 3 precisionPerSlot = 8.0 precisionForUntracked = 0.01 memoryratePerSlot = 0.7 memoryrateForUntracked = 0.01 if attentionType == 'hybrid3': attentionLimitation = 3 precisionPerSlot = 8.0 precisionForUntracked = 2.5 memoryratePerSlot = 0.7 memoryrateForUntracked = 0.45 if attentionType == 'attention4': attentionLimitation = 4 precisionPerSlot = 8.0 precisionForUntracked = 0.01 memoryratePerSlot = 0.7 memoryrateForUntracked = 0.01 if attentionType == 'hybrid4': attentionLimitation = 4 precisionPerSlot = 8.0 precisionForUntracked = 2.5 memoryratePerSlot = 0.7 memoryrateForUntracked = 0.45 if attentionType == 'preAttentionMem0.25': attentionLimitation = 1 precisionPerSlot = 2.5 precisionForUntracked = 2.5 memoryratePerSlot = 0.25 memoryrateForUntracked = 0.25 if attentionType == 'preAttentionMem0.65': attentionLimitation = 1 precisionPerSlot = 2.5 precisionForUntracked = 2.5 memoryratePerSlot = 0.65 memoryrateForUntracked = 0.65 if attentionType == 'preAttentionPre0.5': attentionLimitation = 1 precisionPerSlot = 0.5 precisionForUntracked = 0.5 memoryratePerSlot = 0.45 memoryrateForUntracked = 0.45 if attentionType == 'preAttentionPre4.5': attentionLimitation = 1 precisionPerSlot = 4.5 precisionForUntracked = 4.5 memoryratePerSlot = 0.45 memoryrateForUntracked = 0.45 attention = Attention.AttentionToPrecisionAndDecay( precisionPerSlot, precisionForUntracked, memoryratePerSlot, memoryrateForUntracked) transferMultiAgentStatesToPositionDF = ba.TransferMultiAgentStatesToPositionDF( numAgent) possibleSubtleties = [500.0, 11.0, 3.3, 1.83, 0.92, 0.31, 0.01] resetBeliefAndAttention = ba.ResetBeliefAndAttention( sheepId, suspectorIds, possibleSubtleties, attentionLimitation, transferMultiAgentStatesToPositionDF, attention) maxAttentionDistance = minAttentionDistance + rangeAttention attentionMinDistance = minAttentionDistance * distanceToVisualDegreeRatio attentionMaxDistance = maxAttentionDistance * distanceToVisualDegreeRatio numStandardErrorInDistanceRange = 4 calDistancePriorOnAttentionSlot = Attention.CalDistancePriorOnAttentionSlot( attentionMinDistance, attentionMaxDistance, numStandardErrorInDistanceRange) attentionSwitch = Attention.AttentionSwitch( attentionLimitation, calDistancePriorOnAttentionSlot) computePosterior = calPosterior.CalPosteriorLog(minDistance) print(attentionLimitation, attentionMinDistance / distanceToVisualDegreeRatio, attentionMaxDistance / distanceToVisualDegreeRatio) attentionSwitchFrequencyInSimulation = np.inf beliefUpdateFrequencyInSimulation = np.inf updateBeliefAndAttentionInSimulation = ba.UpdateBeliefAndAttentionState( attention, computePosterior, attentionSwitch, transferMultiAgentStatesToPositionDF, attentionSwitchFrequencyInSimulation, beliefUpdateFrequencyInSimulation, burnTime) attentionSwitchFrequencyInPlay = int(0.6 * numMDPTimeStepPerSecond) beliefUpdateFrequencyInPlay = int(0.2 * numMDPTimeStepPerSecond) updateBeliefAndAttentionInPlay = ba.UpdateBeliefAndAttentionState( attention, computePosterior, attentionSwitch, transferMultiAgentStatesToPositionDF, attentionSwitchFrequencyInPlay, beliefUpdateFrequencyInPlay, burnTime) updatePhysicalStateByBeliefFrequencyInSimulationRoot = int( 0.2 * numMDPTimeStepPerSecond) updatePhysicalStateByBeliefInSimulationRoot = ba.UpdatePhysicalStateImagedByBelief( updatePhysicalStateByBeliefFrequencyInSimulationRoot, softParaForIdentity, softParaForSubtlety) reUpdatePhysicalStateByBeliefInSimulationRoot = ba.UpdatePhysicalStateImagedByBelief( updatePhysicalStateByBeliefFrequencyInSimulationRoot, softParaForIdentity=1, softParaForSubtlety=1) updatePhysicalStateByBeliefFrequencyInSimulation = np.inf updatePhysicalStateByBeliefInSimulation = ba.UpdatePhysicalStateImagedByBelief( updatePhysicalStateByBeliefFrequencyInSimulation, softParaForIdentity, softParaForSubtlety) updatePhysicalStateByBeliefFrequencyInPlay = np.inf updatePhysicalStateByBeliefInPlay = ba.UpdatePhysicalStateImagedByBelief( updatePhysicalStateByBeliefFrequencyInPlay, softParaForIdentity, softParaForSubtlety) transitionFunctionInSimulation = env.TransitionFunction( resetPhysicalState, resetBeliefAndAttention, updatePhysicalState, transiteStateWithoutActionChangeInSimulation, updateBeliefAndAttentionInSimulation, updatePhysicalStateByBeliefInSimulation) transitionFunctionInPlay = env.TransitionFunction( resetPhysicalState, resetBeliefAndAttention, updatePhysicalState, transiteStateWithoutActionChangeInPlay, updateBeliefAndAttentionInPlay, updatePhysicalStateByBeliefInPlay) maxRollOutSteps = 5 aliveBouns = 1 / maxRollOutSteps deathPenalty = -1 rewardFunction = reward.RewardFunctionTerminalPenalty( sheepId, aliveBouns, deathPenalty, isTerminal) rewardRollout = lambda state, action, nextState: rewardFunction( state, action) numActionSpace = 8 actionInterval = int(360 / (numActionSpace)) actionMagnitude = actionRatio * minSheepSpeed * numFramePerSecond actionSpace = [ (np.cos(degreeInPolar) * actionMagnitude, np.sin(degreeInPolar) * actionMagnitude) for degreeInPolar in np.arange(0, 360, actionInterval) / 180 * math.pi ] getActionPrior = lambda state: { action: 1 / len(actionSpace) for action in actionSpace } cInit = 1 #cBase = 50 scoreChild = ScoreChild(cInit, cBase) selectAction = SelectAction(scoreChild) selectNextState = SelectNextState(selectAction) initializeChildren = InitializeChildren( actionSpace, transitionFunctionInSimulation, getActionPrior) expand = Expand(isTerminal, initializeChildren) pWidening = PWidening(alpha, C) expandNewState = ExpandNextState( transitionFunctionInSimulation, pWidening) rolloutPolicy = lambda state: actionSpace[np.random.choice( range(numActionSpace))] rolloutHeuristic = lambda state: 0 estimateValue = RollOut(rolloutPolicy, maxRollOutSteps, transitionFunctionInSimulation, rewardRollout, isTerminal, rolloutHeuristic) numActionPlaned = 1 outputAction = OutputAction(numActionPlaned, actionSpace) #numSimulations = int(numTotalSimulationTimes/numTree) #sheepColorInMcts = np.array([0, 255, 0]) #wolfColorInMcts = np.array([255, 0, 0]) #distractorColorInMcts = np.array([255, 255, 255]) #saveImageMCTS = True #mctsRender = env.MctsRender(numAgent, screen, xBoundary[1], yBoundary[1], screenColor, sheepColorInMcts, wolfColorInMcts, distractorColorInMcts, circleSize, saveImageMCTS, saveImageFile) #mctsRenderOn = False #mctsRender = None #pg.init() #mcts = MCTS(numSimulations, selectChild, expand, rollout, backup, selectAction, mctsRender, mctsRenderOn) pwMultipleTrees = PWMultipleTrees(numSimulations, selectAction, selectNextState, expand, expandNewState, estimateValue, backup, outputAction) maxRunningSteps = int(25 * numMDPTimeStepPerSecond) makeDiffSimulationRoot = MakeDiffSimulationRoot( isTerminal, updatePhysicalStateByBeliefInSimulationRoot, reUpdatePhysicalStateByBeliefInSimulationRoot) runMCTSTrjactory = RunMCTSTrjactory( maxRunningSteps, numTree, numActionPlaned, sheepActionUpdateFrequency, transitionFunctionInPlay, isTerminal, makeDiffSimulationRoot, render) rootAction = actionSpace[np.random.choice( range(numActionSpace))] numTrial = 10 trajectories = [ runMCTSTrjactory(pwMultipleTrees) for trial in range(numTrial) ] savePath = getSavePath({ 'chasingSubtlety': chasingSubtlety, 'subIndex': subIndex }) tsl.saveToPickle(trajectories, savePath) getCSVSavePath = self.getCSVSavePathByCondition(condition) startStatsIndex = 1 def getTrueWolfIdAcc(trajectory): AccTrial = [] for timeStepIndex in range(len(trajectory) - 2): timeStep = trajectory[timeStepIndex] wolfId = timeStep[0][0][3][0] wolfSubtlety = timeStep[0][0][3][1] #print(wolfId, '**', wolfIdInEach) if timeStepIndex >= startStatsIndex: IdAcc = [ int(IdAndSubtlety[0] == wolfId) for IdAndSubtlety in timeStep[5] ] AccTrial.append(IdAcc) meanAcc = np.mean(AccTrial) return meanAcc meanIdentiy = np.mean([ getTrueWolfIdAcc(trajectory) for trajectory in trajectories ]) meanIdentiyOnConditions.update({chasingSubtlety: meanIdentiy}) def getTrueWolfIdSubtletyAcc(trajectory): AccTrial = [] for timeStepIndex in range(len(trajectory) - 2): timeStep = trajectory[timeStepIndex] wolfId = timeStep[0][0][3][0] wolfSubtlety = timeStep[0][0][3][1] #print(wolfId, '**', wolfIdInEach) if timeStepIndex >= startStatsIndex: IdAndSubtletyAcc = [ int((IdAndSubtlety[0] == wolfId) and (IdAndSubtlety[1] == wolfSubtlety)) for IdAndSubtlety in timeStep[5] ] AccTrial.append(IdAndSubtletyAcc) meanAcc = np.mean(AccTrial) return meanAcc meanPerception = np.mean([ getTrueWolfIdSubtletyAcc(trajectory) for trajectory in trajectories ]) meanPerceptionOnConditions.update( {chasingSubtlety: meanPerception}) def getActionDeviationLevel(trajectory): AccTrial = [] for timeStepIndex in range(len(trajectory) - 2): timeStep = trajectory[timeStepIndex] actionReal = np.array(timeStep[1]) actionOnTruth = np.array(timeStep[4]) if timeStepIndex >= startStatsIndex: deviateLevel = round( agf.computeAngleBetweenVectors( actionReal, actionOnTruth) / (math.pi / 4)) AccTrial.append(deviateLevel) meanAcc = np.mean(AccTrial) return meanAcc meanAction = np.mean([ getActionDeviationLevel(trajectory) for trajectory in trajectories ]) meanActionOnConditions.update({chasingSubtlety: meanAction}) def getVelocityDiff(trajectory): AccTrial = [] for timeStepIndex in range(len(trajectory) - 2): timeStep = trajectory[timeStepIndex] velReal = np.array(timeStep[0][0][0][1][0]) velWithActionOnTruth = np.array(timeStep[2][1][0]) velWithActionOppo = np.array(timeStep[3][1][0]) if timeStepIndex >= startStatsIndex: velDiffNormWithActionOnTruth = np.linalg.norm( (velReal - velWithActionOnTruth)) velDiffNormWithActionOppo = np.linalg.norm( (velReal - velWithActionOppo)) velDiffRatio = 1.0 * velDiffNormWithActionOnTruth / velDiffNormWithActionOppo AccTrial.append(velDiffRatio) meanAcc = np.mean(AccTrial) return meanAcc meanVelDiff = np.mean([ getVelocityDiff(trajectory) for trajectory in trajectories ]) meanVelDiffOnConditions.update({chasingSubtlety: meanVelDiff}) getEscapeAcc = lambda trajectory: int( len(trajectory) >= (maxRunningSteps - 2)) meanEscape = np.mean( [getEscapeAcc(trajectory) for trajectory in trajectories]) meanEscapeOnConditions.update({chasingSubtlety: meanEscape}) print(meanEscapeOnConditions) allResults.append(meanEscapeOnConditions) results = pd.DataFrame(allResults) escapeCSVSavePath = getCSVSavePath({'measure': 'escape'}) results.to_csv(escapeCSVSavePath) allIdentityResults.append(meanIdentiyOnConditions) identityResults = pd.DataFrame(allIdentityResults) identityCSVSavePath = getCSVSavePath({'measure': 'identity'}) identityResults.to_csv(identityCSVSavePath) allPerceptionResults.append(meanPerceptionOnConditions) perceptionResults = pd.DataFrame(allPerceptionResults) perceptionCSVSavePath = getCSVSavePath({'measure': 'percetion'}) perceptionResults.to_csv(perceptionCSVSavePath) allActionResults.append(meanActionOnConditions) actionResults = pd.DataFrame(allActionResults) actionCSVSavePath = getCSVSavePath({'measure': 'action'}) actionResults.to_csv(actionCSVSavePath) allVelDiffResults.append(meanVelDiffOnConditions) velDiffResults = pd.DataFrame(allVelDiffResults) velDiffCSVSavePath = getCSVSavePath({'measure': 'velDiff'}) velDiffResults.to_csv(velDiffCSVSavePath)