コード例 #1
0
 def policy_observation(self,policy): #policy observation returns policy feature sum
   """ Helicopter alternative evaluation function for IRL. """
   observation, sum_error = self.heli.reset()
   featureSum = self.features_from_state(observation)
   while not self.heli.terminal:
     action = policy.propagate(observation,1)
     observation, err = self.heli.update(action) # Maybe I should return q here as well. so that we have the same info to start with.
     state,q,a = extract_sa(observation) # Function from evaluator
     state = state+q
     feat = self.features_from_state(state[:-1])
     featureSum=[sum(x) for x in zip(*[featureSum,feat])]
   return featureSum
コード例 #2
0
 def hoverIRL(policy,thet):
   """ Helicopter alternative evaluation function. """
   state, sum_error = heli.reset()
   while not heli.terminal:
     action = policy.propagate(state, 1)
     state, error = heli.update(action) # Maybe I should return q here as well. so that we have the same info to start with.
     st,q,a = extract_sa(state)
     st = st+q
     feat = featuresFromState(st[:-1])
     errorTest = numpy.dot(theta,feat)
     if errorTest==error:
       sum_error += errorTest
     else:
       sum_error+=error
     print"-----------"
     print errorTest
     print error
     print "-----------"
   return 1 / math.log(sum_error)