def _forwardImplementation(self, inbuf, outbuf): """ Draws a random number between 0 and 1. If the number is less than epsilon, a random action is chosen. If it is equal or larger than epsilon, the greedy action is returned. """ assert self.module values = self.module.getActionValues(self._state) action = drawGibbs(values, self.tau) self.tau *= self.decay outbuf[:] = array([action])
def _legalizeIt(self, a): """ draw index from an array of values, filtering out illegal moves. """ if not min(a) >= 0: print(a) print(min(a)) print(self.module.params) print(self.module.inputbuffer) print(self.module.outputbuffer) raise Exception('Non-positive value in array?') legals = vals = ones(len(a)) * (-100) * (1 + self.temperature) for i in map(self._convertPosToIndex, legals): vals[i] = a[i] drawn = self._convertIndexToPos(drawGibbs(vals, self.temperature)) assert drawn in legals return drawn