Esempio n. 1
0
    def epsilon_greedy(self,sensation,applicable_actions):
        """
        Given self.epsilon() and self.Q(), return a distribution over
        applicable_actions as an array where each element contains the
        a probability mass for the corresponding action.  I.e.  The
        action with the highest Q gets p = self.epsilon() and the
        others get the remainder of the mass, uniformly distributed.
        """
        Q = array([self.Q(sensation,action) for action in applicable_actions])

        # simple epsilon-greedy policy
        # get a vector with a 1 where each max element is, zero elsewhere
        mask = (Q == mmax(Q))

        num_maxes = len(nonzero(mask))
        num_others = len(mask) - num_maxes

        if num_others == 0: return mask
        
        e0 = self.epsilon()/num_maxes
        e1 = self.epsilon()/num_others

        result = zeros(len(mask))+0.0
        putmask(result,mask,1-e0)
        putmask(result,mask==0,e1)
        return result
    def epsilon_greedy(self, sensation, applicable_actions):
        """
        Given self.epsilon() and self.Q(), return a distribution over
        applicable_actions as an array where each element contains the
        a probability mass for the corresponding action.  I.e.  The
        action with the highest Q gets p = self.epsilon() and the
        others get the remainder of the mass, uniformly distributed.
        """
        Q = array([self.Q(sensation, action) for action in applicable_actions])

        # simple epsilon-greedy policy
        # get a vector with a 1 where each max element is, zero elsewhere
        mask = (Q == mmax(Q))

        num_maxes = len(nonzero(mask))
        num_others = len(mask) - num_maxes

        if num_others == 0: return mask

        e0 = self.epsilon() / num_maxes
        e1 = self.epsilon() / num_others

        result = zeros(len(mask)) + 0.0
        putmask(result, mask, 1 - e0)
        putmask(result, mask == 0, e1)
        return result
Esempio n. 3
0
def softmax(ar,temp):
    """
    Given an array and a temperature, return the Boltzman distribution
    over that array.

    For an array X, and temp T returns a new array containing:

    exp(Xi/T)/sum_j(exp(Xj/T) for all Xi.

    If temp == 0 or any value in the array is inf, the function
    returns the limit value as T -> 0.
    """
    if temp == 0 or inf in ar:
        v = (ar == mmax(ar))
        return v/float(sum(v))
    else:
        numer = Numeric.exp(ar/float(temp))
        denom = Numeric.sum(numer)    
        return numer/denom
def softmax(ar, temp):
    """
    Given an array and a temperature, return the Boltzman distribution
    over that array.

    For an array X, and temp T returns a new array containing:

    exp(Xi/T)/sum_j(exp(Xj/T) for all Xi.

    If temp == 0 or any value in the array is inf, the function
    returns the limit value as T -> 0.
    """
    if temp == 0 or inf in ar:
        v = (ar == mmax(ar))
        return v / float(sum(v))
    else:
        numer = Numeric.exp(ar / float(temp))
        denom = Numeric.sum(numer)
        return numer / denom