def testHeatSeekingDiscreteDeterministicPolicyOnChangableIntention(self, imaginedWeId, intentionPrior, state, groundTruthCentralControlAction): getStateForPolicyGivenIntention = GetStateForPolicyGivenIntention(imaginedWeId) policy = PolicyOnChangableIntention(self.perceptAction, intentionPrior, self.updateIntentionDistribution, self.chooseIntention, getStateForPolicyGivenIntention, self.policyGivenIntention) centralControlActionDist = policy(state) action = maxFromDistribution(centralControlActionDist) self.assertTrue(np.allclose(action, groundTruthCentralControlAction))
def sampleAction(state): actionDist = mctsSelectAction(state) action = maxFromDistribution(actionDist) return action
def wolfTransit(state, action): return transitionFunction( state, [action, maxFromDistribution(stagPolicy(state))] + [ maxFromDistribution(rabbitPolicy(state)) for rabbitPolicy in rabbitPolicies ])
def testHeatSeekingDiscreteDeterministicPolicy(self, state, groundTruthAction): heatSeekingPolicy = HeatSeekingDiscreteDeterministicPolicy(self.actionSpace, self.getPredatorPos, self.getPreyPos, computeAngleBetweenVectors) action = maxFromDistribution(heatSeekingPolicy(state)) truthValue = np.allclose(action, groundTruthAction) self.assertTrue(truthValue)
def testHeatSeekingContinuesDeterministicPolicy(self, state, actionMagnitude, groundTruthWolfAction): heatSeekingPolicy = HeatSeekingContinuesDeterministicPolicy(self.getSheepXPos, self.getWolfXPos, actionMagnitude) action = maxFromDistribution(heatSeekingPolicy(state)) truthValue = np.allclose(action, groundTruthWolfAction) self.assertTrue(truthValue)
def testStationaryAgentPolicy(self, state, groundTruthAction): action = maxFromDistribution(stationaryAgentPolicy(state)) truthValue = np.array_equal(action, groundTruthAction) self.assertTrue(truthValue)
def testMaximumFromDistribution(self, actionDist, groundTruthAction): sampledAction = maxFromDistribution(actionDist) self.assertEqual(sampledAction, groundTruthAction)