def forward_gpu(self, x): self.rand = cuda.empty_like(x[0]) y = cuda.empty_like(x[0]) cuda.get_generator().fill_uniform(self.rand) self.scale = 1. / (1 - self.dropout_ratio) self.kernel = cuda.elementwise( '''float* y, const float* x, const float* rand, float dropout_ratio, float scale''', 'y[i] = rand[i] < dropout_ratio ? 0 : scale * x[i]', 'dropout') self.kernel(y, x[0], self.rand, self.dropout_ratio, self.scale) return y,
def forward_gpu(self, inputs): mean, ln_var = inputs if self.eps is None: self.eps = cuda.empty(ln_var.shape, numpy.float32) cuda.get_generator().fill_normal(self.eps) noise = cuda.empty_like(ln_var) cuda.elementwise( 'float* noise, const float* v, const float* e', 'noise[i] = __expf(v[i] * 0.5f) * e[i];', 'gaussian_forward' )(noise, ln_var, self.eps) self.noise = noise return mean + self.noise,
def sample_gpu(self, shape): ps = cuda.empty(shape, numpy.float32) cuda.get_generator().fill_uniform(ps) vs = cuda.empty(shape, numpy.int32) cuda.elementwise( '''int* vs, const float* ps, const float* threshold, const int* values, int b''', ''' float pb = ps[i] * b; int index = __float2int_rd(pb); // fill_uniform sometimes returns 1.0, so we need to check index if (index >= b) { index = 0; } int lr = threshold[index] < pb - index; vs[i] = values[index * 2 + lr]; ''', 'walker_alias_sample')(vs, ps, self.threshold, self.values, len(self.threshold)) return vs
def sample_gpu(self, shape): ps = cuda.empty(shape, numpy.float32) cuda.get_generator().fill_uniform(ps) vs = cuda.empty(shape, numpy.int32) cuda.elementwise( '''int* vs, const float* ps, const float* threshold, const int* values, int b''', ''' float pb = ps[i] * b; int index = __float2int_rd(pb); // fill_uniform sometimes returns 1.0, so we need to check index if (index >= b) { index = 0; } int lr = threshold[index] < pb - index; vs[i] = values[index * 2 + lr]; ''', 'walker_alias_sample' )(vs, ps, self.threshold, self.values, len(self.threshold)) return vs