def evaluate_state_action(self, s, a): """Create a one-hot vector for the given state-action pair.""" assert isinstance(s, int), 'The input sample must be an int' assert self.num_features >= s >= 0, \ 'Given state {} with num_states {} is not possible'.format(s, self.num_features) return one_hot_vector(self.size, self.num_features * a + s)
def exhaustive_states(self): return [one_hot_vector(self.num_states(), i) for i in range(self.num_states())]
def evaluate(self, s): assert isinstance(s, int), 'The input sample must be an int' assert self.num_features >= s >= 0, \ 'Given state {} with num_states {} is not possible'.format(s, self.num_features) return one_hot_vector(self.num_features, s)