예제 #1
0
 def testHeatSeekingDiscreteDeterministicPolicyOnChangableIntention(self, imaginedWeId, intentionPrior, state, groundTruthCentralControlAction):
     getStateForPolicyGivenIntention = GetStateForPolicyGivenIntention(imaginedWeId)
     policy = PolicyOnChangableIntention(self.perceptAction, intentionPrior, self.updateIntentionDistribution,
             self.chooseIntention, getStateForPolicyGivenIntention, self.policyGivenIntention)
     centralControlActionDist = policy(state)
     action = maxFromDistribution(centralControlActionDist)
     self.assertTrue(np.allclose(action, groundTruthCentralControlAction))
예제 #2
0
 def sampleAction(state):
     actionDist = mctsSelectAction(state)
     action = maxFromDistribution(actionDist)
     return action
예제 #3
0
 def wolfTransit(state, action):
     return transitionFunction(
         state, [action, maxFromDistribution(stagPolicy(state))] + [
             maxFromDistribution(rabbitPolicy(state))
             for rabbitPolicy in rabbitPolicies
         ])
예제 #4
0
 def testHeatSeekingDiscreteDeterministicPolicy(self, state, groundTruthAction):
     heatSeekingPolicy = HeatSeekingDiscreteDeterministicPolicy(self.actionSpace, self.getPredatorPos, self.getPreyPos, computeAngleBetweenVectors)
     action = maxFromDistribution(heatSeekingPolicy(state))
     truthValue = np.allclose(action, groundTruthAction)
     self.assertTrue(truthValue)
예제 #5
0
 def testHeatSeekingContinuesDeterministicPolicy(self, state, actionMagnitude, groundTruthWolfAction):
     heatSeekingPolicy = HeatSeekingContinuesDeterministicPolicy(self.getSheepXPos, self.getWolfXPos,
                                                                 actionMagnitude)
     action = maxFromDistribution(heatSeekingPolicy(state))
     truthValue = np.allclose(action, groundTruthWolfAction)
     self.assertTrue(truthValue)
예제 #6
0
    def testStationaryAgentPolicy(self, state, groundTruthAction):
        action = maxFromDistribution(stationaryAgentPolicy(state))

        truthValue = np.array_equal(action, groundTruthAction)
        self.assertTrue(truthValue)
예제 #7
0
 def testMaximumFromDistribution(self, actionDist, groundTruthAction):
     sampledAction = maxFromDistribution(actionDist)
     self.assertEqual(sampledAction, groundTruthAction)