def __init__(self, cmp, rewardSet, initialPhi, queryType, gamma, qi=False):
   """
   qi: query iteration
   """
   QTPAgent.__init__(self, cmp, rewardSet, initialPhi, queryType, gamma)
   # do query iteration?
   self.qi = qi
   self.m = 1
   
   if hasattr(self, 'computePiValue'):
     # policy gradient agent has different ways to compute values..
     self.computeV = lambda pi, S, A, r, horizon: self.computePiValue(pi, r, horizon)
   else:
     self.computeV = lambda pi, S, A, r, horizon: lp.computeValue(pi, r, S, A)
Esempio n. 2
0
    def __init__(self, mdp, k):
        """
    qi: query iteration
    """
        self.mdp = mdp
        self.k = k

        if hasattr(self, 'computePiValue'):
            # policy gradient agent has different ways to compute values..
            self.computeV = lambda pi, S, A, r, horizon: self.computePiValue(
                pi, r, horizon)
        else:
            self.computeV = lambda pi, S, A, r, horizon: lp.computeValue(
                pi, r, S, A)
Esempio n. 3
0
    def __init__(self, cmp, rewardSet, initialPhi, queryType, gamma, qi=False):
        """
    qi: query iteration
    """
        QTPAgent.__init__(self, cmp, rewardSet, initialPhi, queryType, gamma)
        # do query iteration?
        self.qi = qi
        self.m = 1

        if hasattr(self, 'computePiValue'):
            # policy gradient agent has different ways to compute values..
            self.computeV = lambda pi, S, A, r, horizon: self.computePiValue(
                pi, r, horizon)
        else:
            self.computeV = lambda pi, S, A, r, horizon: lp.computeValue(
                pi, r, S, A)
Esempio n. 4
0
 def computeValue(self, x):
   return computeValue(x, self.mdp.r, self.mdp.S, self.mdp.A)
Esempio n. 5
0
 def computeValue(self, x):
     """
 compute the value of policy x. it computes the dot product between x and r
 """
     return computeValue(x, self.mdp.r, self.mdp.S, self.mdp.A)
Esempio n. 6
0
 def computeValue(self, x):
     return computeValue(x, self.mdp.r, self.mdp.S, self.mdp.A)