def propensity(self, x, action): xp = cuda.get_array_module(x) """: type: numpy""" # Compute independent thompson sample distributions z_means = xp.zeros((x.shape[0], self.k)) z_std = xp.zeros((x.shape[0], self.k)) for a in range(self.k): m, s = self.regressors[a].thompson_distribution(x) z_means[:, a] = m.data z_std[:, a] = s.data # Compute the argmax probability m_i, m_j = _tiles(z_means) s_i, s_j = _tiles(z_std) c_m = _cut_diagonals(m_i - m_j).data c_s = _cut_diagonals(s_i + s_j).data opts = factorial(self.k - 1) res = xp.prod(0.5 * (1 + F.erf(c_m / (xp.sqrt(2) * c_s)).data), axis=2) a = F.reshape(action, (action.shape[0], 1)) res = select_items_per_row(as_variable(res), a) return F.reshape(res, action.shape)
def _ndtr(a): """CDF of the standard normal distribution. See https://github.com/scipy/scipy/blob/master/scipy/special/cephes/ndtr.c """ if not isinstance(a, chainer.Variable): a = chainer.Variable(a) x = a * NPY_SQRT1_2 z = abs(x) half_erfc_z = 0.5 * F.erfc(z) return F.where(z.data < NPY_SQRT1_2, 0.5 + 0.5 * F.erf(x), F.where(x.data > 0, 1.0 - half_erfc_z, half_erfc_z))
def gelu(input_tensor): """Gaussian Error Linear Unit. This is a smoother version of the RELU. Original paper: https://arxiv.org/abs/1606.08415 Args: input_tensor: float Tensor to perform activation. Returns: `input_tensor` with the GELU activation applied. """ cdf = 0.5 * (1.0 + F.erf(input_tensor / 2.0 ** 0.5)) return input_tensor * cdf