def numerical_grad_gpu(f, inputs, grad_outputs, eps=1e-3):
    grads = tuple(cuda.zeros_like(x) for x in inputs)
    for x, gx in zip(inputs, grads):
        x = x.ravel()
        gx = gx.ravel()
        x_cpu = x.get()
        gx_cpu = gx.get()
        for i in six.moves.range(x_cpu.size):
            orig = x_cpu[i]
            x_cpu[i] = orig + eps
            x.set(x_cpu)
            ys1 = f()
            x_cpu[i] = orig - eps
            x.set(x_cpu)
            ys2 = f()
            x_cpu[i] = orig
            x.set(x_cpu)

            for y1, y2, gy in zip(ys1, ys2, grad_outputs):
                if gy is not None:
                    dot = sum(((y1 - y2) * gy).ravel()).get()
                    gx_cpu[i] += dot / (2 * eps)
        gx.set(gx_cpu)

    return grads
Example #2
0
    def backward_gpu(self, inputs, loss):
        x, t = inputs
        gloss, = loss

        n_in = x.shape[1]
        gx = cuda.zeros_like(x)
        cuda.elementwise(
            '''T wxy, raw T x, raw T w, raw int32 ts, raw int32 paths,
            raw T codes, raw int32 begins, raw T gloss,
            int32 c, int32 max_length''',
            'raw T gx, raw T gw',
            '''
            int ind = i / max_length;
            int offset = i - ind * max_length;
            int t = ts[ind];

            int begin = begins[t];
            int length = begins[t + 1] - begins[t];

            if (offset < length) {
              int p = begin + offset;
              int node = paths[p];
              T code = codes[p];

              T g = -gloss[0] * code / (1.0 + exp(wxy));
              for (int j = 0; j < c; ++j) {
                atomicAdd(&gx[ind * c + j], g * w[node * c + j]);
                atomicAdd(&gw[node * c + j], g * x[ind * c + j]);
              }
            }
            ''',
            'binary_hierarchical_softmax_bwd'
        )(self.wxy, x, self.W, t, self.paths, self.codes,
          self.begins, gloss, n_in, self.max_length, gx, self.gW)
        return gx, None
Example #3
0
def numerical_grad_gpu(f, inputs, grad_outputs, eps=1e-3):
    grads = tuple(cuda.zeros_like(x) for x in inputs)
    for x, gx in zip(inputs, grads):
        x = x.ravel()
        gx = gx.ravel()
        x_cpu = x.get()
        gx_cpu = gx.get()
        for i in six.moves.range(x_cpu.size):
            orig = x_cpu[i]
            x_cpu[i] = orig + eps
            x.set(x_cpu)
            ys1 = _copy_arrays(f())
            x_cpu[i] = orig - eps
            x.set(x_cpu)
            ys2 = _copy_arrays(f())
            x_cpu[i] = orig
            x.set(x_cpu)

            for y1, y2, gy in zip(ys1, ys2, grad_outputs):
                if gy is not None:
                    dot = sum(((y1 - y2) * gy).ravel()).get()
                    gx_cpu[i] += dot / (2 * eps)
        gx.set(gx_cpu)

    return grads
Example #4
0
    def backward(self, x, gy):
        if isinstance(x[0], cuda.GPUArray):
            gx = cuda.zeros_like(x[0])
        else:
            gx = numpy.zeros_like(x[0])
        gys = split_axis.SplitAxis(self.split_inds, axis=1).forward(gy)
        for pooler, gy in zip(self.poolers, gys):
            gy = gy.reshape(pooler.out_shape)
            gx += pooler.backward(x, (gy, ))[0]

        return gx,
Example #5
0
 def create_linear_chain(self, length, gpu):
     if gpu:
         x = chainer.Variable(cuda.to_gpu(self.x))
     else:
         x = chainer.Variable(self.x)
     ret = [x]
     for i in six.moves.range(length):
         ret.append(constant((ret[i], ), (self.a, )))
     if gpu:
         ret[-1].grad = cuda.zeros_like(ret[-1].data)
     else:
         ret[-1].grad = np.zeros_like(ret[-1].data)
     return ret
Example #6
0
 def backward_gpu(self, x, gys):
     gx = cuda.zeros_like(x[0])
     coffset = 0
     kernel = cuda.elementwise(
         _args, 'COPY(x[idx] = y[i])', 'split_bwd', preamble=_preamble)
     for gy in gys:
         if gy is None:
             continue
         cdimy = gy.shape[self.axis]
         if cdimy != 0:
             kernel(gy, gx, cdimy, self.cdimx, self.rdim, coffset)
         coffset += cdimy
     return gx,
Example #7
0
    def backward_gpu(self, inputs, grads):
        x, t = inputs
        gloss, = grads

        n_in = x.shape[1]
        g = cuda.empty_like(self.wx)
        cuda.elementwise(
            'float* g, const float* wx, const float* gloss, int m',
            '''
            float y;
            if (i % m == 0) {
              y = 1;
            } else {
              y = -1;
            }

            g[i] = -y * *gloss / (1.0f + __expf(wx[i] * y));
            ''',
            'negative_sampling_calculate_g'
        )(g, self.wx, gloss, self.sample_size + 1)
        gx = cuda.zeros_like(x)
        cuda.elementwise(
            '''float* gx, const float* g, const float* W, const int* k, int c,
            int m''',
            '''
            int d = i / c;
            g = &g[d * m];
            k = &k[d * m];
            float w = 0;
            for (int j = 0; j < m; ++j) {
              w += g[j] * W[k[j] * c + i % c];
            }
            gx[i] = w;
            ''',
            'negative_sampling_calculate_gx'
        )(gx, g, self.W, self.samples, n_in, self.sample_size + 1)
        cuda.elementwise(
            '''const float * g, const float* x, const int* k, float* gW, int c,
            int m''',
            '''
            x = &x[(i / m) * c];
            gW = &gW[k[i] * c];
            float gi = g[i];
            for (int j = 0; j < c; ++j) {
              atomicAdd(gW + j, gi * x[j]);
            }
            ''',
            'negative_sampling_calculate_gw'
        )(g, x, self.samples, self.gW, n_in, self.sample_size + 1)
        return gx, None
Example #8
0
 def backward_gpu(self, x, gys):
     gx = cuda.zeros_like(x[0])
     coffset = 0
     kernel = cuda.elementwise(_args,
                               'COPY(x[idx] = y[i])',
                               'split_bwd',
                               preamble=_preamble)
     for gy in gys:
         if gy is None:
             continue
         cdimy = gy.shape[self.axis]
         if cdimy != 0:
             kernel(gy, gx, cdimy, self.cdimx, self.rdim, coffset)
         coffset += cdimy
     return gx,
Example #9
0
    def backward_gpu(self, inputs, grads):
        x, t = inputs
        gloss, = grads

        n_in = x.shape[1]
        g = cuda.empty_like(self.wx)
        cuda.elementwise(
            'float* g, const float* wx, const float* gloss, int m', '''
            float y;
            if (i % m == 0) {
              y = 1;
            } else {
              y = -1;
            }

            g[i] = -y * *gloss / (1.0f + __expf(wx[i] * y));
            ''', 'negative_sampling_calculate_g')(g, self.wx, gloss,
                                                  self.sample_size + 1)
        gx = cuda.zeros_like(x)
        cuda.elementwise(
            '''float* gx, const float* g, const float* W, const int* k, int c,
            int m''', '''
            int d = i / c;
            g = &g[d * m];
            k = &k[d * m];
            float w = 0;
            for (int j = 0; j < m; ++j) {
              w += g[j] * W[k[j] * c + i % c];
            }
            gx[i] = w;
            ''', 'negative_sampling_calculate_gx')(gx, g, self.W, self.samples,
                                                   n_in, self.sample_size + 1)
        cuda.elementwise(
            '''const float * g, const float* x, const int* k, float* gW, int c,
            int m''', '''
            x = &x[(i / m) * c];
            gW = &gW[k[i] * c];
            float gi = g[i];
            for (int j = 0; j < c; ++j) {
              atomicAdd(gW + j, gi * x[j]);
            }
            ''', 'negative_sampling_calculate_gw')(g, x, self.samples, self.gW,
                                                   n_in, self.sample_size + 1)
        return gx, None
Example #10
0
    def backward_gpu(self, inputs, grads):
        x, t = inputs
        gloss, = grads

        n_in = x.shape[1]
        g = cuda.elementwise(
            'T wx, raw T gloss, int32 m', 'T g',
            '''
            T y;
            if (i % m == 0) {
              y = 1;
            } else {
              y = -1;
            }

            g = -y * gloss[0] / (1.0f + __expf(wx * y));
            ''',
            'negative_sampling_calculate_g'
        )(self.wx, gloss, self.sample_size + 1)
        gx = cuda.zeros_like(x)
        cuda.elementwise(
            'raw T g, raw T W, raw S k, int32 c, int32 m', 'T gx',
            '''
            int d = i / c;
            T w = 0;
            for (int j = 0; j < m; ++j) {
              w += g[d * m + j] * W[k[d * m + j] * c + i % c];
            }
            gx = w;
            ''',
            'negative_sampling_calculate_gx'
        )(g, self.W, self.samples, n_in, self.sample_size + 1, gx)
        cuda.elementwise(
            'T g, raw T x, S k, int32 c, int32 m', 'raw T gW',
            '''
            T gi = g;
            for (int j = 0; j < c; ++j) {
              atomicAdd(&gW[k * c + j], gi * x[(i / m) * c + j]);
            }
            ''',
            'negative_sampling_calculate_gw'
        )(g, x, self.samples, n_in, self.sample_size + 1, self.gW)
        return gx, None
Example #11
0
    def backward_gpu(self, inputs, grads):
        x, t = inputs
        gloss, = grads

        n_in = x.shape[1]
        g = cuda.elementwise(
            'T wx, raw T gloss, int32 m', 'T g',
            '''
            T y;
            if (i % m == 0) {
              y = 1;
            } else {
              y = -1;
            }

            g = -y * gloss[0] / (1.0f + __expf(wx * y));
            ''',
            'negative_sampling_calculate_g'
        )(self.wx, gloss, self.sample_size + 1)
        gx = cuda.zeros_like(x)
        cuda.elementwise(
            'raw T g, raw T W, raw S k, int32 c, int32 m', 'T gx',
            '''
            int d = i / c;
            T w = 0;
            for (int j = 0; j < m; ++j) {
              w += g[d * m + j] * W[k[d * m + j] * c + i % c];
            }
            gx = w;
            ''',
            'negative_sampling_calculate_gx'
        )(g, self.W, self.samples, n_in, self.sample_size + 1, gx)
        cuda.elementwise(
            'T g, raw T x, S k, int32 c, int32 m', 'raw T gW',
            '''
            T gi = g;
            for (int j = 0; j < c; ++j) {
              atomicAdd(&gW[k * c + j], gi * x[(i / m) * c + j]);
            }
            ''',
            'negative_sampling_calculate_gw'
        )(g, x, self.samples, n_in, self.sample_size + 1, self.gW)
        return gx, None
Example #12
0
    def forward_gpu(self, inputs):
        x, t = inputs
        n_in = x.shape[1]
        self._make_samples(t)

        wx = cuda.empty((x.shape[0], self.sample_size + 1))
        cuda.elementwise(
            '''float* wx, const float* W, const float* x, const int* k, int c,
            int m''',
            '''
            x = &x[(i / m) * c];
            W = &W[k[i] * c];
            float f = 0;
            for (int j = 0; j < c; ++j) {
              f += x[j] * W[j];
            }
            wx[i] = f;
            ''',
            'negative_sampling_wx'
        )(wx, self.W, x, self.samples, n_in, self.sample_size + 1)
        self.wx = wx

        y = cuda.zeros_like(wx)
        cuda.elementwise(
            'float* y, const float* wx, int c, int m',
            '''
            float f = wx[i];
            if (i % m == 0) {
              f = -f;
            }
            float loss;
            if (f < 0) {
              loss = __logf(1 + __expf(f));
            } else {
              loss = f + __logf(1 + __expf(-f));
            }
            y[i] = loss;
            ''',
            'negative_sampling_forward'
        )(y, wx, n_in, self.sample_size + 1)
        loss = cuda.gpuarray.sum(y)
        return loss,
Example #13
0
    def forward_gpu(self, inputs):
        x, t = inputs
        n_in = x.shape[1]
        self._make_samples(t)

        wx = cuda.empty((x.shape[0], self.sample_size + 1))
        cuda.elementwise(
            '''float* wx, const float* W, const float* x, const int* k, int c,
            int m''', '''
            x = &x[(i / m) * c];
            W = &W[k[i] * c];
            float f = 0;
            for (int j = 0; j < c; ++j) {
              f += x[j] * W[j];
            }
            wx[i] = f;
            ''', 'negative_sampling_wx')(wx, self.W, x, self.samples, n_in,
                                         self.sample_size + 1)
        self.wx = wx

        y = cuda.zeros_like(wx)
        cuda.elementwise(
            'float* y, const float* wx, int c, int m', '''
            float f = wx[i];
            if (i % m == 0) {
              f = -f;
            }
            float loss;
            if (f < 0) {
              loss = __logf(1 + __expf(f));
            } else {
              loss = f + __logf(1 + __expf(-f));
            }
            y[i] = loss;
            ''', 'negative_sampling_forward')(y, wx, n_in,
                                              self.sample_size + 1)
        loss = cuda.gpuarray.sum(y)
        return loss,
Example #14
0
    def backward_gpu(self, inputs, loss):
        x, t = inputs
        gloss, = loss

        n_in = x.shape[1]
        gx = cuda.zeros_like(x)
        cuda.elementwise(
            '''const float* wxy, float* gx, float* gw, const float* x,
            const float* w, const int* ts, const int* paths,
            const float* codes, const int* begins,
            const float* gloss, int c, int max_length''',
            '''
            int ind = i / max_length;
            int offset = i - ind * max_length;
            int t = ts[ind];

            int begin = begins[t];
            int length = begins[t + 1] - begins[t];

            if (offset < length) {
              int p = begin + offset;
              int node = paths[p];
              float code = codes[p];
              gx = &gx[ind * c];
              x = &x[ind * c];

              float g = -*gloss * code / (1.0 + exp(wxy[i]));
              for (int j = 0; j < c; ++j) {
                atomicAdd(gx + j, g * w[node * c + j]);
                atomicAdd(gw + node * c + j, g * x[j]);
              }
            }
            ''',
            'binary_hierarchical_softmax_bwd'
        )(self.wxy, gx, self.gW, x, self.W, t, self.paths, self.codes,
          self.begins, gloss, n_in, self.max_length)
        return gx, None
Example #15
0
    def backward_gpu(self, inputs, loss):
        x, t = inputs
        gloss, = loss

        n_in = x.shape[1]
        gx = cuda.zeros_like(x)
        cuda.elementwise(
            '''const float* wxy, float* gx, float* gw, const float* x,
            const float* w, const int* ts, const int* paths,
            const float* codes, const int* begins,
            const float* gloss, int c, int max_length''',
            '''
            int ind = i / max_length;
            int offset = i - ind * max_length;
            int t = ts[ind];

            int begin = begins[t];
            int length = begins[t + 1] - begins[t];

            if (offset < length) {
              int p = begin + offset;
              int node = paths[p];
              float code = codes[p];
              gx = &gx[ind * c];
              x = &x[ind * c];

              float g = -*gloss * code / (1.0 + exp(wxy[i]));
              for (int j = 0; j < c; ++j) {
                atomicAdd(gx + j, g * w[node * c + j]);
                atomicAdd(gw + node * c + j, g * x[j]);
              }
            }
            ''',
            'binary_hierarchical_softmax_bwd'
        )(self.wxy, gx, self.gW, x, self.W, t, self.paths, self.codes,
          self.begins, gloss, n_in, self.max_length)
        return gx, None
Example #16
0
 def init_state_gpu(self, param, grad):
     return cuda.zeros_like(param), cuda.zeros_like(param)
Example #17
0
 def init_state_gpu(self, param, grad):
     return cuda.zeros_like(param)
Example #18
0
def _zeros_like(x):
    if isinstance(x, numpy.ndarray):
        return numpy.zeros_like(x)
    else:
        return cuda.zeros_like(x)
def _zeros_like(x):
    if isinstance(x, numpy.ndarray):
        return numpy.zeros_like(x)
    else:
        return cuda.zeros_like(x)
Example #20
0
 def init_state_gpu(self, param, grad):
     n = cuda.zeros_like(param)
     g = cuda.zeros_like(param)
     delta = cuda.zeros_like(param)
     return n, g, delta