Beispiel #1
0
 def evaluate_state_action(self, s, a):
     """Create a one-hot vector for the given state-action pair."""
     assert isinstance(s, int), 'The input sample must be an int'
     assert self.num_features >= s >= 0, \
         'Given state {} with num_states {} is not possible'.format(s, self.num_features)
     return one_hot_vector(self.size, self.num_features * a + s)
Beispiel #2
0
 def exhaustive_states(self):
     return [one_hot_vector(self.num_states(), i) for i in range(self.num_states())]
Beispiel #3
0
 def evaluate(self, s):
     assert isinstance(s, int), 'The input sample must be an int'
     assert self.num_features >= s >= 0, \
         'Given state {} with num_states {} is not possible'.format(s, self.num_features)
     return one_hot_vector(self.num_features, s)