def __init__(self, cmp, rewardSet, initialPhi, queryType, gamma, qi=False): """ qi: query iteration """ QTPAgent.__init__(self, cmp, rewardSet, initialPhi, queryType, gamma) # do query iteration? self.qi = qi self.m = 1 if hasattr(self, 'computePiValue'): # policy gradient agent has different ways to compute values.. self.computeV = lambda pi, S, A, r, horizon: self.computePiValue(pi, r, horizon) else: self.computeV = lambda pi, S, A, r, horizon: lp.computeValue(pi, r, S, A)
def __init__(self, mdp, k): """ qi: query iteration """ self.mdp = mdp self.k = k if hasattr(self, 'computePiValue'): # policy gradient agent has different ways to compute values.. self.computeV = lambda pi, S, A, r, horizon: self.computePiValue( pi, r, horizon) else: self.computeV = lambda pi, S, A, r, horizon: lp.computeValue( pi, r, S, A)
def __init__(self, cmp, rewardSet, initialPhi, queryType, gamma, qi=False): """ qi: query iteration """ QTPAgent.__init__(self, cmp, rewardSet, initialPhi, queryType, gamma) # do query iteration? self.qi = qi self.m = 1 if hasattr(self, 'computePiValue'): # policy gradient agent has different ways to compute values.. self.computeV = lambda pi, S, A, r, horizon: self.computePiValue( pi, r, horizon) else: self.computeV = lambda pi, S, A, r, horizon: lp.computeValue( pi, r, S, A)
def computeValue(self, x): return computeValue(x, self.mdp.r, self.mdp.S, self.mdp.A)
def computeValue(self, x): """ compute the value of policy x. it computes the dot product between x and r """ return computeValue(x, self.mdp.r, self.mdp.S, self.mdp.A)