Beispiel #1
0
    def _forwardImplementation(self, inbuf, outbuf):
        """ Draws a random number between 0 and 1. If the number is less
            than epsilon, a random action is chosen. If it is equal or
            larger than epsilon, the greedy action is returned.
        """
        assert self.module

        values = self.module.getActionValues(self._state)
        action = drawGibbs(values, self.tau)

        self.tau *= self.decay

        outbuf[:] = array([action])
Beispiel #2
0
 def _legalizeIt(self, a):
     """ draw index from an array of values, filtering out illegal moves. """
     if not min(a) >= 0:
         print(a)
         print(min(a))
         print(self.module.params)
         print(self.module.inputbuffer)
         print(self.module.outputbuffer)
         raise Exception('Non-positive value in array?')
     legals = self.game.getLegals(self.color)
     vals = ones(len(a)) * (-100) * (1 + self.temperature)
     for i in map(self._convertPosToIndex, legals):
         vals[i] = a[i]
     drawn = self._convertIndexToPos(drawGibbs(vals, self.temperature))
     assert drawn in legals
     return drawn