def policy_observation(self,policy): #policy observation returns policy feature sum
""" Helicopter alternative evaluation function for IRL. """
observation, sum_error = self.heli.reset()
featureSum = self.features_from_state(observation)
while not self.heli.terminal:
action = policy.propagate(observation,1)
observation, err = self.heli.update(action) # Maybe I should return q here as well. so that we have the same info to start with.
state,q,a = extract_sa(observation) # Function from evaluator
state = state+q
feat = self.features_from_state(state[:-1])
featureSum=[sum(x) for x in zip(*[featureSum,feat])]
""" Helicopter alternative evaluation function. """
state, sum_error = heli.reset()
while not heli.terminal:
action = policy.propagate(state, 1)
state, error = heli.update(action) # Maybe I should return q here as well. so that we have the same info to start with.
st,q,a = extract_sa(state)
st = st+q
feat = featuresFromState(st[:-1])
errorTest = numpy.dot(theta,feat)
sum_error += errorTest
return 1 / math.log(sum_error)