def forward(self, inputs): e1 = array.as_mat(inputs[0]) e2 = array.as_mat(inputs[1]) W = inputs[2] xp = cuda.get_array_module(*inputs) if xp is numpy: y = numpy.einsum('ij,ik,jkl->il', e1, e2, W) else: i_len, j_len = e1.shape k_len = e2.shape[1] # 'ij,ik->ijk' e1e2 = e1[:, :, None] * e2[:, None, :] # ijk->i[jk] e1e2 = e1e2.reshape(i_len, j_len * k_len) # jkl->[jk]l W_mat = W.reshape(-1, W.shape[2]) # 'i[jk],[jk]l->il' y = e1e2.dot(W_mat) if len(inputs) == 6: V1, V2, b = inputs[3:] y += e1.dot(V1) y += e2.dot(V2) y += b return y,
def forward(self, inputs): e1 = array.as_mat(inputs[0]) e2 = array.as_mat(inputs[1]) W = inputs[2] if not type_check.same_types(*inputs): raise ValueError('numpy and cupy must not be used together\n' 'type(W): {0}, type(e1): {1}, type(e2): {2}' .format(type(W), type(e1), type(e2))) xp = cuda.get_array_module(*inputs) if xp is numpy: y = numpy.einsum('ij,ik,jkl->il', e1, e2, W) else: i_len, j_len = e1.shape k_len = e2.shape[1] # 'ij,ik->ijk' e1e2 = e1[:, :, None] * e2[:, None, :] # ijk->i[jk] e1e2 = e1e2.reshape(i_len, j_len * k_len) # jkl->[jk]l W_mat = W.reshape(-1, W.shape[2]) # 'i[jk],[jk]l->il' y = e1e2.dot(W_mat) if len(inputs) == 6: V1, V2, b = inputs[3:] y += e1.dot(V1) y += e2.dot(V2) y += b return y,
def backward(self, inputs, grad_outputs): e1 = array.as_mat(inputs[0]) e2 = array.as_mat(inputs[1]) W = inputs[2] gy = grad_outputs[0] xp = cuda.get_array_module(*inputs) if xp is numpy: gW = numpy.einsum('ij,ik,il->jkl', e1, e2, gy) ge1 = numpy.einsum('ik,jkl,il->ij', e2, W, gy) ge2 = numpy.einsum('ij,jkl,il->ik', e1, W, gy) else: kern = cuda.reduce('T in0, T in1, T in2', 'T out', 'in0 * in1 * in2', 'a + b', 'out = a', 0, 'bilinear_product') e1_b = e1[:, :, None, None] # ij e2_b = e2[:, None, :, None] # ik gy_b = gy[:, None, None, :] # il W_b = W[None, :, :, :] # jkl gW = kern(e1_b, e2_b, gy_b, axis=0) # 'ij,ik,il->jkl' ge1 = kern(e2_b, W_b, gy_b, axis=(2, 3)) # 'ik,jkl,il->ij' ge2 = kern(e1_b, W_b, gy_b, axis=(1, 3)) # 'ij,jkl,il->ik' ret = ge1.reshape(inputs[0].shape), ge2.reshape(inputs[1].shape), gW if len(inputs) == 6: V1, V2, b = inputs[3:] gV1 = e1.T.dot(gy) gV2 = e2.T.dot(gy) gb = gy.sum(0) ge1 += gy.dot(V1.T) ge2 += gy.dot(V2.T) ret += gV1, gV2, gb return ret
def forward(self, inputs): e1 = array.as_mat(inputs[0]) e2 = array.as_mat(inputs[1]) W = inputs[2] if not type_check.same_types(*inputs): raise ValueError( 'numpy and cupy must not be used together\n' 'type(W): {0}, type(e1): {1}, type(e2): {2}'.format( type(W), type(e1), type(e2))) xp = cuda.get_array_module(*inputs) if xp is numpy: y = numpy.einsum('ij,ik,jkl->il', e1, e2, W) else: i_len, j_len = e1.shape k_len = e2.shape[1] # 'ij,ik->ijk' e1e2 = e1[:, :, None] * e2[:, None, :] # ijk->i[jk] e1e2 = e1e2.reshape(i_len, j_len * k_len) # jkl->[jk]l W_mat = W.reshape(-1, W.shape[2]) # 'i[jk],[jk]l->il' y = e1e2.dot(W_mat) if len(inputs) == 6: V1, V2, b = inputs[3:] y += e1.dot(V1) y += e2.dot(V2) y += b return y,
def backward(self, inputs, grad_outputs): e1 = array.as_mat(inputs[0]) e2 = array.as_mat(inputs[1]) W = inputs[2] gy = grad_outputs[0] xp = cuda.get_array_module(*inputs) if xp is numpy: gW = numpy.einsum("ij,ik,il->jkl", e1, e2, gy) ge1 = numpy.einsum("ik,jkl,il->ij", e2, W, gy) ge2 = numpy.einsum("ij,jkl,il->ik", e1, W, gy) else: kern = cuda.reduce( "T in0, T in1, T in2", "T out", "in0 * in1 * in2", "a + b", "out = a", 0, "bilinear_product" ) e1_b = e1[:, :, None, None] # ij e2_b = e2[:, None, :, None] # ik gy_b = gy[:, None, None, :] # il W_b = W[None, :, :, :] # jkl gW = kern(e1_b, e2_b, gy_b, axis=0) # 'ij,ik,il->jkl' ge1 = kern(e2_b, W_b, gy_b, axis=(2, 3)) # 'ik,jkl,il->ij' ge2 = kern(e1_b, W_b, gy_b, axis=(1, 3)) # 'ij,jkl,il->ik' ret = ge1.reshape(inputs[0].shape), ge2.reshape(inputs[1].shape), gW if len(inputs) == 6: V1, V2, b = inputs[3:] gV1 = e1.T.dot(gy) gV2 = e2.T.dot(gy) gb = gy.sum(0) ge1 += gy.dot(V1.T) ge2 += gy.dot(V2.T) ret += gV1, gV2, gb return ret
def forward(self, inputs): e1 = array.as_mat(inputs[0]) e2 = array.as_mat(inputs[1]) W = inputs[2] xp = cuda.get_array_module(*inputs) if xp is numpy: y = numpy.einsum("ij,ik,jkl->il", e1, e2, W) else: i_len, j_len = e1.shape k_len = e2.shape[1] # 'ij,ik->ijk' e1e2 = e1[:, :, None] * e2[:, None, :] # ijk->i[jk] e1e2 = e1e2.reshape(i_len, j_len * k_len) # jkl->[jk]l W_mat = W.reshape(-1, W.shape[2]) # 'i[jk],[jk]l->il' y = e1e2.dot(W_mat) if len(inputs) == 6: V1, V2, b = inputs[3:] y += e1.dot(V1) y += e2.dot(V2) y += b return (y,)
def forward(self, inputs): e1 = array.as_mat(inputs[0]) #print 'e1.shape', #print e1.shape e2 = array.as_mat(inputs[1]) #print 'e2.shape', #print e2.shape W = inputs[2] #print 'W.shape', #print W.shape #modified algorythm y = e1 + e2 - e2.sum(1).reshape(len(e2), 1) / len(e2[0]) #print 'y.dtype', #print y.dtype print 'cupy.max(e1) = ', print cupy.max(e1) print 'cupy.min(e1) = ', print cupy.min(e1) print 'cupy.max(e2) = ', print cupy.max(e2) print 'cupy.min(e2) = ', print cupy.min(e2) print 'cupy.max(y) = ', print cupy.max(y) print 'cupy.min(y) = ', print cupy.min(y) #sum_e1e2.astype(dtype=e1.dtype, copy=False) #print 'y.shape', #print y.shape #print 'e2.sum(1).reshape(len(e2), 1).shape', #print e2.sum(1).reshape(len(e2), 1).shape ''' xp = cuda.get_array_module(*inputs) if xp is numpy: y = numpy.einsum('ij,ik,jkl->il', e1, e2, W) else: i_len, j_len = e1.shape k_len = e2.shape[1] # 'ij,ik->ijk' e1e2 = e1[:, :, None] * e2[:, None, :] # ijk->i[jk] e1e2 = e1e2.reshape(i_len, j_len * k_len) # jkl->[jk]l W_mat = W.reshape(-1, W.shape[2]) # 'i[jk],[jk]l->il' y = e1e2.dot(W_mat) if len(inputs) == 6: V1, V2, b = inputs[3:] y += e1.dot(V1) y += e2.dot(V2) y += b ''' #print 'y.shape', #print y.shape return y,
def backward_gpu(self, x, gy): e1 = array.as_mat(x[0]) e2 = array.as_mat(x[1]) gy, = gy kern_add = cuda.reduce('T in0, T in1, T in2', 'T out', 'in0 * in1 * in2', 'a + b', 'out += a', 0, 'bilinear_product_add') kern = cuda.reduce('T in0, T in1, T in2', 'T out', 'in0 * in1 * in2', 'a + b', 'out = a', 0, 'bilinear_product') e1_b = e1[:, :, numpy.newaxis, numpy.newaxis] # ij e2_b = e2[:, numpy.newaxis, :, numpy.newaxis] # ik gy_b = gy[:, numpy.newaxis, numpy.newaxis, :] # il W_b = self.W[numpy.newaxis, :, :, :] # jkl # 'ij,ik,il->jkl' kern_add(e1_b, e2_b, gy_b, self.gW, axis=0) if not self.nobias: self.gV1 += e1.T.dot(gy) self.gV2 += e2.T.dot(gy) self.gb += gy.sum(axis=0) # 'ik,jkl,il->ij' ge1 = kern(e2_b, W_b, gy_b, axis=(2, 3)) # 'ij,jkl,il->ik' ge2 = kern(e1_b, W_b, gy_b, axis=(1, 3)) if not self.nobias: ge1 += gy.dot(self.V1.T) ge2 += gy.dot(self.V2.T) return (ge1.reshape(x[0].shape), ge2.reshape(x[1].shape))
def forward_cpu(self, x): e1 = array.as_mat(x[0]) e2 = array.as_mat(x[1]) y = numpy.einsum('ij,ik,jkl->il', e1, e2, self.W) if not self.nobias: y += e1.dot(self.V1) y += e2.dot(self.V2) y += self.b return y,
def _matmul(a, b, transa=False, transb=False, transout=False): a = array.as_mat(a) b = array.as_mat(b) if transa: a = a.T if transb: b = b.T if transout: # (A B)^T = B^T A^T a, b = b.T, a.T return a.dot(b)
def setUp(self): super(TestBilinearWOBias2, self).setUp() assert self.in_shape[1] % 2 == 0 self.e1 = _uniform(self.batch_size, 1, self.in_shape[0]) self.e2 = _uniform(self.batch_size, 2, self.in_shape[1] // 2) self.gy = _uniform(self.batch_size, self.out_size) e1 = array.as_mat(self.e1) e2 = array.as_mat(self.e2) self.y = numpy.einsum('ij,ik,jkl->il', e1, e2, self.W)
def setUp(self): super(TestBilinearWOBias2, self).setUp() assert self.in_shape[1] % 2 == 0 self.e1 = _uniform(self.batch_size, 1, self.in_shape[0]) self.e2 = _uniform(self.batch_size, 2, self.in_shape[1] // 2) self.gy = _uniform(self.batch_size, self.out_size) e1 = array.as_mat(self.e1) e2 = array.as_mat(self.e2) self.y = numpy.einsum("ij,ik,jkl->il", e1, e2, self.W)
def setUp(self): super(TestBilinear2, self).setUp() assert self.in_shape[1] % 2 == 0 self.e1 = _uniform(self.batch_size, 1, self.in_shape[0]) self.e2 = _uniform(self.batch_size, self.in_shape[1] // 2, 2) self.gy = _uniform(self.batch_size, self.out_size) e1 = array.as_mat(self.e1) e2 = array.as_mat(self.e2) self.y = ( numpy.einsum('ij,ik,jkl->il', e1, e2, self.W) + e1.dot(self.V1) + e2.dot(self.V2) + self.b)
def forward_gpu(self, x): i_len, j_len = array.as_mat(x[0]).shape k_len = array.as_mat(x[1]).shape[1] l_len = self.W.shape[2] # When indices are enclosed with [], they are 'flatten' # (i.e. linealized as 1-D array) # ij->[ij] e1 = array.as_vec(x[0]) # ik->[ik] e2 = array.as_vec(x[1]) e1e2 = cuda.empty(i_len * j_len * k_len, dtype=numpy.float32) # '[ij],[ik]->[ijk]' cuda.elementwise( 'float* y, float* e1, float* e2, int e1c, int e2c', ''' int I = i / e1c / e2c; int J = (i - I * e1c * e2c) / e2c; int K = i % e2c; y[i] = e1[I * e1c + J] * e2[I * e2c + K]; ''', 'row_wise_outer_product')( e1e2, e1, e2, j_len, k_len) # [ijk]->i[jk] e1e2 = e1e2.reshape(i_len, j_len * k_len) # jkl->[jk]l W_mat = self.W.reshape( self.W.shape[0] * self.W.shape[1], self.W.shape[2]) y = cuda.empty((i_len, l_len), dtype=numpy.float32) with cuda.using_cumisc(): # 'i[jk],[jk]l->il' cuda.culinalg.dot(e1e2, W_mat, out=y) if not self.nobias: e1 = array.as_mat(x[0]) e2 = array.as_mat(x[1]) with cuda.using_cumisc(): # ij,jl->il cuda.culinalg.add_dot(e1, self.V1, y) # ik,kl->il cuda.culinalg.add_dot(e2, self.V2, y) cuda.elementwise( 'float* y, float* b, int n_channel', 'y[i] += b[i % n_channel]', 'linear_bias')(y, self.b, self.b.size) return y,
def backward(self, indexes, grad_outputs): inputs = self.get_retained_inputs() e1 = array.as_mat(inputs[0]) e2 = array.as_mat(inputs[1]) W, gy = inputs[2], inputs[-1] gge1 = array.as_mat(grad_outputs[0]) gge2 = array.as_mat(grad_outputs[1]) ggW = grad_outputs[2] dge1_de2 = _ij_il_jkl_to_ik(gge1, gy, W) dge1_dW = _ij_ik_il_to_jkl(gge1, e2, gy) dge1_dgy = _ij_ik_jkl_to_il(gge1, e2, W) dge2_de1 = _ik_il_jkl_to_ij(gge2, gy, W) dge2_dW = _ij_ik_il_to_jkl(e1, gge2, gy) dge2_dgy = _ij_ik_jkl_to_il(e1, gge2, W) dgW_de1 = _ik_il_jkl_to_ij(e2, gy, ggW) dgW_de2 = _ij_il_jkl_to_ik(e1, gy, ggW) dgW_dgy = _ij_ik_jkl_to_il(e1, e2, ggW) ge1 = dgW_de1 + dge2_de1 ge2 = dgW_de2 + dge1_de2 gW = dge1_dW + dge2_dW ggy = dgW_dgy + dge1_dgy + dge2_dgy if len(inputs) == 6: V1, V2 = inputs[3], inputs[4] ggV1, ggV2, ggb = grad_outputs[3:] gV1 = chainer.functions.matmul(gge1, gy, transa=True) gV2 = chainer.functions.matmul(gge2, gy, transa=True) ge1 += chainer.functions.matmul(gy, ggV1, transb=True) ge2 += chainer.functions.matmul(gy, ggV2, transb=True) ggy += chainer.functions.matmul(gge1, V1) ggy += chainer.functions.matmul(gge2, V2) ggy += chainer.functions.matmul(e1, ggV1) ggy += chainer.functions.matmul(e2, ggV2) ggy += chainer.functions.broadcast_to(ggb, ggy.shape) ge1 = ge1.reshape(inputs[0].shape) ge2 = ge2.reshape(inputs[1].shape) if len(inputs) == 6: return ge1, ge2, gW, gV1, gV2, ggy return ge1, ge2, gW, ggy
def backward_cpu(self, x, gy): e1 = array.as_mat(x[0]) e2 = array.as_mat(x[1]) e3 = array.as_mat(x[2]) gy, = gy if not self.nobias: self.gV1 += e1.T.dot(gy) self.gV2 += e2.T.dot(gy) self.gV3 += e3.T.dot(gy) self.gb += gy.sum(0) if not self.nobias: ge1 = gy.dot(self.V1.T) ge2 = gy.dot(self.V2.T) ge3 = gy.dot(self.V3.T) return (ge1.reshape(x[0].shape), ge2.reshape(x[1].shape),ge3.reshape(x[1].shape))
def backward(self, x, gy): #e1 = array.as_mat(x[0]) #e2 = array.as_mat(x[1]) #e3 = array.as_mat(x[2]) #print len(x) #print x[0].shape e = {} for i in range(0,self.categorynumber): e[i] = array.as_mat(x[i]) gy, = gy if not self.nobias: #self.gV1 += e1.T.dot(gy) #self.gV2 += e2.T.dot(gy) #self.gV3 += e3.T.dot(gy) for i in range(0,self.categorynumber): self.gV[i] += e[i].T.dot(gy) self.gb += gy.sum(0) if not self.nobias: ge = {} gevalue = [] for i in range(0,self.categorynumber): ge[i] = gy.dot(self.V[i].T) gevalue.append(ge[i].reshape(x[i].shape)) #ge1 = gy.dot(self.V[0].T) #ge2 = gy.dot(self.V[1].T) #ge3 = gy.dot(self.V[2].T) return tuple(gevalue) #return (ge1.reshape(x[0].shape), ge2.reshape(x[1].shape),ge3.reshape(x[2].shape))
def get_norm(W, expand=False): xp = cuda.get_array_module(W) norm = xp.linalg.norm(array.as_mat(W), axis=1) + 1e-12 if expand: expanded_shape = (W.shape[0], ) + (1, ) * (W.ndim - 1) norm = norm.reshape(expanded_shape) return norm
def forward_gpu(self, inputs): self.retain_inputs((0,)) x = array.as_mat(inputs[0]) l2normsquared_kernel = cuda.cupy.ReductionKernel( 'T x', 'T y', 'x * x', 'a + b', 'y = a', '0', 'l2normsquared' ) return l2normsquared_kernel(x, axis=1),
def backward_cpu(self, x, gy): #e1 = array.as_mat(x[0]) #e2 = array.as_mat(x[1]) #e3 = array.as_mat(x[2]) e = {} for i in range(0,3): e[i] = array.as_mat(x[i]) gy, = gy if not self.nobias: #self.gV1 += e1.T.dot(gy) #self.gV2 += e2.T.dot(gy) #self.gV3 += e3.T.dot(gy) for i in range(0,3): self.gV[i] += e[i].T.dot(gy) self.gb += gy.sum(0) if not self.nobias: #ge1 = gy.dot(self.V1.T) #ge2 = gy.dot(self.V2.T) ge = {} for i in range(0,3): ge[i] = gy.dot(self.V[i].T) returnvalue = [] for i in range(0,3): returnvalue.append((ge[i].reshape(x[i].shape)) return (ge[0].reshape(x[0].shape),ge[1].reshape(x[1].shape),ge[2].reshape(x[2].shape)) #return tuple(returnvalue) #return (ge1.reshape(x[0].shape), ge2.reshape(x[1].shape),ge3.reshape(x[1].shape))
def backward_cpu(self, x, gy): e1 = array.as_mat(x[0]) e2 = array.as_mat(x[1]) gy, = gy self.gW += numpy.einsum('ij,ik,il->jkl', e1, e2, gy) if not self.nobias: self.gV1 += e1.T.dot(gy) self.gV2 += e2.T.dot(gy) self.gb += gy.sum(0) ge1 = numpy.einsum('ik,jkl,il->ij', e2, self.W, gy) ge2 = numpy.einsum('ij,jkl,il->ik', e1, self.W, gy) if not self.nobias: ge1 += gy.dot(self.V1.T) ge2 += gy.dot(self.V2.T) return (ge1.reshape(x[0].shape), ge2.reshape(x[1].shape))
def forward_gpu(self, x): i_len, j_len = array.as_mat(x[0]).shape k_len = array.as_mat(x[1]).shape[1] l_len = self.W.shape[2] # When indices are enclosed with [], they are 'flatten' # (i.e. linealized as 1-D array) # ij->[ij] e1 = array.as_vec(x[0]) # ik->[ik] e2 = array.as_vec(x[1]) e1e2 = cuda.empty(i_len * j_len * k_len, dtype=numpy.float32) # '[ij],[ik]->[ijk]' cuda.elementwise( 'float* y, float* e1, float* e2, int e1c, int e2c', ''' int I = i / e1c / e2c; int J = (i - I * e1c * e2c) / e2c; int K = i % e2c; y[i] = e1[I * e1c + J] * e2[I * e2c + K]; ''', 'row_wise_outer_product')(e1e2, e1, e2, j_len, k_len) # [ijk]->i[jk] e1e2 = e1e2.reshape(i_len, j_len * k_len) # jkl->[jk]l W_mat = self.W.reshape(self.W.shape[0] * self.W.shape[1], self.W.shape[2]) y = cuda.empty((i_len, l_len), dtype=numpy.float32) with cuda.using_cumisc(): # 'i[jk],[jk]l->il' cuda.culinalg.dot(e1e2, W_mat, out=y) if not self.nobias: e1 = array.as_mat(x[0]) e2 = array.as_mat(x[1]) with cuda.using_cumisc(): # ij,jl->il cuda.culinalg.add_dot(e1, self.V1, y) # ik,kl->il cuda.culinalg.add_dot(e2, self.V2, y) cuda.elementwise('float* y, float* b, int n_channel', 'y[i] += b[i % n_channel]', 'linear_bias')(y, self.b, self.b.size) return y,
def forward_gpu(self, inputs): x = array.as_mat(inputs[0]) l2norm_kernel = cuda.cupy.ReductionKernel('T x, float32 eps', 'T y', 'x * x', 'a + b', 'y = sqrt(a) + eps', '0', 'l2norm') norm = cuda.cupy.broadcast_to( l2norm_kernel(x, self.eps, axis=1).reshape(-1, 1), x.shape) return x / norm,
def backward(self, inputs, grad_outputs): e1 = array.as_mat(inputs[0]) e2 = array.as_mat(inputs[1]) W = inputs[2] if not type_check.same_types(*inputs): raise ValueError( 'numpy and cupy must not be used together\n' 'type(W): {0}, type(e1): {1}, type(e2): {2}'.format( type(W), type(e1), type(e2))) gy = grad_outputs[0] xp = cuda.get_array_module(*inputs) if xp is numpy: gW = numpy.einsum('ij,ik,il->jkl', e1, e2, gy) ge1 = numpy.einsum('ik,jkl,il->ij', e2, W, gy) ge2 = numpy.einsum('ij,jkl,il->ik', e1, W, gy) else: kern = cuda.reduce('T in0, T in1, T in2', 'T out', 'in0 * in1 * in2', 'a + b', 'out = a', 0, 'bilinear_product') e1_b = e1[:, :, None, None] # ij e2_b = e2[:, None, :, None] # ik gy_b = gy[:, None, None, :] # il W_b = W[None, :, :, :] # jkl gW = kern(e1_b, e2_b, gy_b, axis=0) # 'ij,ik,il->jkl' ge1 = kern(e2_b, W_b, gy_b, axis=(2, 3)) # 'ik,jkl,il->ij' ge2 = kern(e1_b, W_b, gy_b, axis=(1, 3)) # 'ij,jkl,il->ik' ret = ge1.reshape(inputs[0].shape), ge2.reshape(inputs[1].shape), gW if len(inputs) == 6: V1, V2, b = inputs[3:] gV1 = e1.T.dot(gy) gV2 = e2.T.dot(gy) gb = gy.sum(0) ge1 += gy.dot(V1.T) ge2 += gy.dot(V2.T) ret += gV1, gV2, gb return ret
def backward(self, inputs, grad_outputs): e1 = array.as_mat(inputs[0]) e2 = array.as_mat(inputs[1]) W = inputs[2] if not type_check.same_types(*inputs): raise ValueError('numpy and cupy must not be used together\n' 'type(W): {0}, type(e1): {1}, type(e2): {2}' .format(type(W), type(e1), type(e2))) gy = grad_outputs[0] xp = cuda.get_array_module(*inputs) if xp is numpy: gW = numpy.einsum('ij,ik,il->jkl', e1, e2, gy) ge1 = numpy.einsum('ik,jkl,il->ij', e2, W, gy) ge2 = numpy.einsum('ij,jkl,il->ik', e1, W, gy) else: kern = cuda.reduce('T in0, T in1, T in2', 'T out', 'in0 * in1 * in2', 'a + b', 'out = a', 0, 'bilinear_product') e1_b = e1[:, :, None, None] # ij e2_b = e2[:, None, :, None] # ik gy_b = gy[:, None, None, :] # il W_b = W[None, :, :, :] # jkl gW = kern(e1_b, e2_b, gy_b, axis=0) # 'ij,ik,il->jkl' ge1 = kern(e2_b, W_b, gy_b, axis=(2, 3)) # 'ik,jkl,il->ij' ge2 = kern(e1_b, W_b, gy_b, axis=(1, 3)) # 'ij,jkl,il->ik' ret = ge1.reshape(inputs[0].shape), ge2.reshape(inputs[1].shape), gW if len(inputs) == 6: V1, V2, b = inputs[3:] gV1 = e1.T.dot(gy) gV2 = e2.T.dot(gy) gb = gy.sum(0) ge1 += gy.dot(V1.T) ge2 += gy.dot(V2.T) ret += gV1, gV2, gb return ret
def forward(self, inputs): self.retain_inputs(tuple(range(len(inputs)))) e1 = array.as_mat(inputs[0]) e2 = array.as_mat(inputs[1]) W = inputs[2] if not type_check.same_types(*inputs): raise ValueError('numpy and cupy must not be used together\n' 'type(W): {0}, type(e1): {1}, type(e2): {2}' .format(type(W), type(e1), type(e2))) xp = cuda.get_array_module(*inputs) y = xp.einsum('ij,ik,jkl->il', e1, e2, W) if len(inputs) == 6: V1, V2, b = inputs[3:] y += e1.dot(V1) y += e2.dot(V2) y += b return y,
def forward(self, inputs): self.retain_inputs(tuple(range(len(inputs)))) e1 = array.as_mat(inputs[0]) e2 = array.as_mat(inputs[1]) W = inputs[2] if not type_check.same_types(*inputs): raise ValueError( 'numpy and cupy must not be used together\n' 'type(W): {0}, type(e1): {1}, type(e2): {2}'.format( type(W), type(e1), type(e2))) xp = cuda.get_array_module(*inputs) y = xp.einsum('ij,ik,jkl->il', e1, e2, W) if len(inputs) == 6: V1, V2, b = inputs[3:] y += e1.dot(V1) y += e2.dot(V2) y += b return y,
def forward_gpu(self, x): e1 = array.as_mat(x[0]) e2 = array.as_mat(x[1]) i_len, j_len = e1.shape k_len = e2.shape[1] # 'ij,ik->ijk' e1e2 = e1[:, :, numpy.newaxis] * e2[:, numpy.newaxis, :] # ijk->i[jk] e1e2 = e1e2.reshape(i_len, j_len * k_len) # jkl->[jk]l W_mat = self.W.reshape( self.W.shape[0] * self.W.shape[1], self.W.shape[2]) # 'i[jk],[jk]l->il' y = e1e2.dot(W_mat) if not self.nobias: y += e1.dot(self.V1) y += e2.dot(self.V2) y += self.b return y,
def forward(self, inputs): self.retain_inputs(tuple(range(len(inputs)))) e1 = array.as_mat(inputs[0]) e2 = array.as_mat(inputs[1]) W, gy = inputs[2], inputs[-1] xp = cuda.get_array_module(*inputs) ge1 = xp.einsum('ik,jkl,il->ij', e2, W, gy) ge2 = xp.einsum('ij,jkl,il->ik', e1, W, gy) gW = xp.einsum('ij,ik,il->jkl', e1, e2, gy) ret = ge1.reshape(inputs[0].shape), ge2.reshape(inputs[1].shape), gW if len(inputs) == 6: V1, V2 = inputs[3], inputs[4] gV1 = e1.T.dot(gy) gV2 = e2.T.dot(gy) gb = gy.sum(0) ge1 += gy.dot(V1.T) ge2 += gy.dot(V2.T) ret += gV1, gV2, gb return ret
def backward_gpu(self, x, gy): e1 = array.as_mat(x[0]) e2 = array.as_mat(x[1]) gy, = gy kern_add = cuda.reduce( 'T in0, T in1, T in2', 'T out', 'in0 * in1 * in2', 'a + b', 'out += a', 0, 'bilinear_product_add') kern = cuda.reduce( 'T in0, T in1, T in2', 'T out', 'in0 * in1 * in2', 'a + b', 'out = a', 0, 'bilinear_product') e1_b = e1[:, :, numpy.newaxis, numpy.newaxis] # ij e2_b = e2[:, numpy.newaxis, :, numpy.newaxis] # ik gy_b = gy[:, numpy.newaxis, numpy.newaxis, :] # il W_b = self.W[numpy.newaxis, :, :, :] # jkl # 'ij,ik,il->jkl' kern_add(e1_b, e2_b, gy_b, self.gW, axis=0) if not self.nobias: self.gV1 += e1.T.dot(gy) self.gV2 += e2.T.dot(gy) self.gb += gy.sum(axis=0) # 'ik,jkl,il->ij' ge1 = kern(e2_b, W_b, gy_b, axis=(2, 3)) # 'ij,jkl,il->ik' ge2 = kern(e1_b, W_b, gy_b, axis=(1, 3)) if not self.nobias: ge1 += gy.dot(self.V1.T) ge2 += gy.dot(self.V2.T) return (ge1.reshape(x[0].shape), ge2.reshape(x[1].shape))
def forward(self, inputs): x,V,b = inputs e = {} for i in range(0,self.categorynumber): e[i] = array.as_mat(x[i]) #y = numpy.einsum('ij,ik,jkl->il', e1, e2,e3, self.W) #y = e1.dot(self.V1) y = e[0].dot(V[0]) if not self.nobias: #y += e1.dot(self.V1) #y += e2.dot(self.V2) #y += e3.dot(self.V3) for i in range(1,self.categorynumber): y += e[i].dot(V[i]) y += b return y,
def forward_gpu(self, inputs): x = array.as_mat(inputs[0]) l2norm_kernel = cuda.cupy.ReductionKernel( 'T x, float32 eps', 'T y', 'x * x', 'a + b', 'y = sqrt(a) + eps', '0', 'l2norm' ) norm = cuda.cupy.broadcast_to( l2norm_kernel(x, self.eps, axis=1).reshape(-1, 1), x.shape ) return x / norm,
def forward_cpu(self, x): #e1 = array.as_mat(x[0]) #e2 = array.as_mat(x[1]) #e3 = array.as_mat(x[2]) e = {} for i in range(0,3): e[i] = array.as_mat(x[i]) #y = numpy.einsum('ij,ik,jkl->il', e1, e2,e3, self.W) y = e[0].dot(self.V[0]) if not self.nobias: #y += e1.dot(self.V1) #y += e2.dot(self.V2) #y += e3.dot(self.V3) for i in range(1,3): y += e[i].dot(self.V[i]) y += self.b return y,
def forward(self, x): #e1 = array.as_mat(x[0]) #e2 = array.as_mat(x[1]) #e3 = array.as_mat(x[2]) #print len(x) #print x[0].shape e = {} for i in range(0,self.categorynumber): e[i] = array.as_mat(x[i]) #y = numpy.einsum('ij,ik,jkl->il', e1, e2,e3, self.W) #y = e1.dot(self.V1) y = e[0].dot(self.V[0]) if not self.nobias: #y += e1.dot(self.V1) #y += e2.dot(self.V2) #y += e3.dot(self.V3) for i in range(1,self.categorynumber): y += e[i].dot(self.V[i]) y += self.b return y,
def backward(self, inputs, grad_outputs): x,V,b = inputs e = {} for i in range(0,self.categorynumber): e[i] = array.as_mat(x[i]) #gy, = gy gy, = grad_outputs[0] if not self.nobias: #self.gV1 += e1.T.dot(gy) #self.gV2 += e2.T.dot(gy) #self.gV3 += e3.T.dot(gy) for i in range(0,self.categorynumber): self.gV[i] += e[i].T.dot(gy) self.gb += gy.sum(0) if not self.nobias: ge = {} gevalue = [] for i in range(0,self.categorynumber): ge[i] = gy.dot(V[i].T) gevalue.append(ge[i].reshape(x[i].shape)) #ge1 = gy.dot(self.V[0].T) #ge2 = gy.dot(self.V[1].T) #ge3 = gy.dot(self.V[2].T) return tuple(gevalue)
def forward_gpu(self, inputs): x = array.as_mat(inputs[0]) l2normsquared_kernel = cuda.cupy.ReductionKernel( 'T x', 'T y', 'x * x', 'a + b', 'y = a', '0', 'l2normsquared' ) return l2normsquared_kernel(x, axis=1),
def _ij_ik_jkl_to_il(a, b, c): ab = chainer.functions.matmul(a[:, :, None], b[:, None, :]) # ijk c = c.reshape(-1, c.shape[-1]) # [jk]l return chainer.functions.matmul(array.as_mat(ab), c)
def backward(self, inputs, grad_outputs): e1 = array.as_mat(inputs[0]) e2 = array.as_mat(inputs[1]) W = inputs[2] gy = grad_outputs[0] #print 'L7 gy.shape', #print gy.shape ''' xp = cuda.get_array_module(*inputs) if xp is numpy: gW = numpy.einsum('ij,ik,il->jkl', e1, e2, gy) ge1 = numpy.einsum('ik,jkl,il->ij', e2, W, gy) ge2 = numpy.einsum('ij,jkl,il->ik', e1, W, gy) else: kern = cuda.reduce('T in0, T in1, T in2', 'T out', 'in0 * in1 * in2', 'a + b', 'out = a', 0, 'bilinear_product') e1_b = e1[:, :, None, None] # ij e2_b = e2[:, None, :, None] # ik gy_b = gy[:, None, None, :] # il W_b = W[None, :, :, :] # jkl gW = kern(e1_b, e2_b, gy_b, axis=0) # 'ij,ik,il->jkl' ge1 = kern(e2_b, W_b, gy_b, axis=(2, 3)) # 'ik,jkl,il->ij' ge2 = kern(e1_b, W_b, gy_b, axis=(1, 3)) # 'ij,jkl,il->ik' ret = ge1.reshape(inputs[0].shape), ge2.reshape(inputs[1].shape), gW if len(inputs) == 6: V1, V2, b = inputs[3:] gV1 = e1.T.dot(gy) gV2 = e2.T.dot(gy) gb = gy.sum(0) ge1 += gy.dot(V1.T) ge2 += gy.dot(V2.T) ret += gV1, gV2, gb ''' #modified backward calculation #calculate ge1 gy_cube = gy.reshape(len(gy), time_span, -1).astype(dtype=gy.dtype, copy=False) #print 'L7 gy_cube.shape=', #print gy_cube.shape gy_tile = cupy.tile(gy_cube, (1, 1, len(e1[0]))).astype(dtype=gy.dtype, copy=False) #print 'L7 gy_tile.shape=', #print gy_tile.shape e2_cube = e2.reshape(len(e2), time_span, -1).astype(dtype=gy.dtype, copy=False) #print 'L7 e2_cube.shape=', #print e2_cube.shape ge1_cube = gy_tile * e2_cube #print 'L7 ge1_cube.shape=', #print ge1_cube.shape #print 'L7 ge1_cube.dtype=', #print ge1_cube.dtype ge1_sum = cupy.sum(ge1_cube, axis=1).astype(dtype=gy.dtype, copy=False) #print 'L7 ge1_sum.shape=', #print ge1_sum.shape ge1 = ge1_sum.reshape(len(gy), -1).astype(dtype=gy.dtype, copy=False) #print 'L7 ge1.shape=', #print ge1.shape #calculate ge2 e1_cube = e1.reshape(len(e1), 1, -1).astype(dtype=gy.dtype, copy=False) #print 'L7 e1_cube.shape=', #print e1_cube.shape e1_tile = cupy.tile(e1_cube, (1, time_span, 1)).astype(dtype=gy.dtype, copy=False) #print 'L7 e1_tile.shape=', #print e1_tile.shape ge2_cube = e1_tile * gy_tile #print 'L7 ge2_cube.shape=', #print ge2_cube.shape #print 'L7 ge2_cube.dtype=', #print ge2_cube.dtype ge2 = ge2_cube.reshape(len(gy), -1).astype(dtype=gy.dtype, copy=False) #print 'L7 ge2.shape=', #print ge2.shape #print 'L7 W.shape=', #print W.shape gW = cupy.zeros((len(W), len(W[0]), len(W[0][0])), dtype=gy.dtype) #print 'L7 gW.shape=', #print gW.shape ret = ge1.reshape(inputs[0].shape), ge2.reshape(inputs[1].shape), gW return ret
def forward_cpu(self, inputs): x = array.as_mat(inputs[0]) norm = numpy.linalg.norm(x, axis=1) + self.eps return x / norm[:, numpy.newaxis],
def _ij_ik_il_to_jkl(a, b, c): ab = chainer.functions.matmul(a[:, :, None], b[:, None, :]) # ijk return chainer.functions.matmul(array.as_mat(ab).T, c).reshape( a.shape[1], b.shape[1], c.shape[1])
def backward_gpu(self, x, gy): i_len, j_len = array.as_mat(x[0]).shape k_len = array.as_mat(x[1]).shape[1] l_len = gy[0].shape[1] # ij->[ij] e1 = array.as_vec(x[0]) # ik->[ik] e2 = array.as_vec(x[1]) gy, = gy # il->[il] gy_vec = array.as_vec(gy) # jkl->[jkl] W_vec = array.as_vec(self.W) dgW = cuda.empty((j_len * k_len * l_len,), dtype=numpy.float32) # '[ij],[ik],[il]->[jkl]' cuda.elementwise( ''' float* y, float* e1, float* e2, float* gy, int r, int e1c, int e2c, int gyc ''', ''' int J = i / e2c / gyc; int K = (i - J * e2c * gyc) / gyc; int L = i % gyc; float yval = 0; for (int I = 0; I < r; ++I) { int e1idx = I * e1c + J; int e2idx = I * e2c + K; int gyidx = I * gyc + L; yval += e1[e1idx] * e2[e2idx] * gy[gyidx]; } y[i] = yval; ''', 'sum_of_three_ary_tensor_product')( dgW, e1, e2, gy_vec, i_len, j_len, k_len, l_len) # [jkl]->jkl self.gW += dgW.reshape((j_len, k_len, l_len)) if not self.nobias: e1 = array.as_mat(x[0]) e2 = array.as_mat(x[1]) with cuda.using_cumisc(): # ij,il->jl cuda.culinalg.add_dot(e1, gy, self.gV1, transa='T') # ik,il->kl cuda.culinalg.add_dot(e2, gy, self.gV2, transa='T') self.gb += cuda.cumisc.sum(gy, 0) ge1 = cuda.empty((i_len * j_len,), dtype=numpy.float32) # '[ik],[jkl],[il]->[ij]' cuda.elementwise( ''' float* y, float* e, float* W, float* gy, int ec, int gyc, int gec ''', ''' int I = i / gec; int J = i % gec; float yval = 0; for (int K = 0; K < ec; ++K) { for (int L = 0; L < gyc; ++L) { int eidx = I * ec + K; int Widx = J * ec * gyc + K * gyc + L; int gyidx = I * gyc + L; yval += e[eidx] * W[Widx] * gy[gyidx]; } } y[i] = yval; ''', 'ge_kernel')(ge1, e2, W_vec, gy_vec, k_len, l_len, j_len) # [ij]->ij ge1 = ge1.reshape(i_len, j_len) ge2 = cuda.empty((i_len * k_len,), dtype=numpy.float32) # '[ij],[jkl],[il]->[ik]' cuda.elementwise( ''' float* y, float* e, float* W, float* gy, int ec, int gyc, int gec ''', ''' int I = i / gec; int K = i % gec; float yval = 0; for (int J = 0; J < ec; ++J) { for (int L = 0; L < gyc; ++L) { int eidx = I * ec + J; int Widx = J * gec * gyc + K * gyc + L; int gyidx = I * gyc + L; yval += e[eidx] * W[Widx] * gy[gyidx]; } } y[i] = yval; ''', 'ge_kernel2')(ge2, e1, W_vec, gy_vec, j_len, l_len, k_len) # [ik]->ik ge2 = ge2.reshape(i_len, k_len) if not self.nobias: with cuda.using_cumisc(): # il,jl->ij cuda.culinalg.add_dot(gy, self.V1, ge1, transb='T') # il,kl->ik cuda.culinalg.add_dot(gy, self.V2, ge2, transb='T') return (ge1.reshape(x[0].shape), ge2.reshape(x[1].shape))
def _ij_ik_il_to_jkl(a, b, c): ab = chainer.functions.matmul(a[:, :, None], b[:, None, :]) # ijk return chainer.functions.matmul(array.as_mat(ab).T, c).reshape(a.shape[1], b.shape[1], c.shape[1])
def forward_cpu(self, inputs): x = array.as_mat(inputs[0]) return (x * x).sum(axis=1),
def backward_gpu(self, x, gy): i_len, j_len = array.as_mat(x[0]).shape k_len = array.as_mat(x[1]).shape[1] l_len = gy[0].shape[1] # ij->[ij] e1 = array.as_vec(x[0]) # ik->[ik] e2 = array.as_vec(x[1]) gy, = gy # il->[il] gy_vec = array.as_vec(gy) # jkl->[jkl] W_vec = array.as_vec(self.W) dgW = cuda.empty((j_len * k_len * l_len, ), dtype=numpy.float32) # '[ij],[ik],[il]->[jkl]' cuda.elementwise( ''' float* y, float* e1, float* e2, float* gy, int r, int e1c, int e2c, int gyc ''', ''' int J = i / e2c / gyc; int K = (i - J * e2c * gyc) / gyc; int L = i % gyc; float yval = 0; for (int I = 0; I < r; ++I) { int e1idx = I * e1c + J; int e2idx = I * e2c + K; int gyidx = I * gyc + L; yval += e1[e1idx] * e2[e2idx] * gy[gyidx]; } y[i] = yval; ''', 'sum_of_three_ary_tensor_product')(dgW, e1, e2, gy_vec, i_len, j_len, k_len, l_len) # [jkl]->jkl self.gW += dgW.reshape((j_len, k_len, l_len)) if not self.nobias: e1 = array.as_mat(x[0]) e2 = array.as_mat(x[1]) with cuda.using_cumisc(): # ij,il->jl cuda.culinalg.add_dot(e1, gy, self.gV1, transa='T') # ik,il->kl cuda.culinalg.add_dot(e2, gy, self.gV2, transa='T') self.gb += cuda.cumisc.sum(gy, 0) ge1 = cuda.empty((i_len * j_len, ), dtype=numpy.float32) # '[ik],[jkl],[il]->[ij]' cuda.elementwise( ''' float* y, float* e, float* W, float* gy, int ec, int gyc, int gec ''', ''' int I = i / gec; int J = i % gec; float yval = 0; for (int K = 0; K < ec; ++K) { for (int L = 0; L < gyc; ++L) { int eidx = I * ec + K; int Widx = J * ec * gyc + K * gyc + L; int gyidx = I * gyc + L; yval += e[eidx] * W[Widx] * gy[gyidx]; } } y[i] = yval; ''', 'ge_kernel')(ge1, e2, W_vec, gy_vec, k_len, l_len, j_len) # [ij]->ij ge1 = ge1.reshape(i_len, j_len) ge2 = cuda.empty((i_len * k_len, ), dtype=numpy.float32) # '[ij],[jkl],[il]->[ik]' cuda.elementwise( ''' float* y, float* e, float* W, float* gy, int ec, int gyc, int gec ''', ''' int I = i / gec; int K = i % gec; float yval = 0; for (int J = 0; J < ec; ++J) { for (int L = 0; L < gyc; ++L) { int eidx = I * ec + J; int Widx = J * gec * gyc + K * gyc + L; int gyidx = I * gyc + L; yval += e[eidx] * W[Widx] * gy[gyidx]; } } y[i] = yval; ''', 'ge_kernel2')(ge2, e1, W_vec, gy_vec, j_len, l_len, k_len) # [ik]->ik ge2 = ge2.reshape(i_len, k_len) if not self.nobias: with cuda.using_cumisc(): # il,jl->ij cuda.culinalg.add_dot(gy, self.V1, ge1, transb='T') # il,kl->ik cuda.culinalg.add_dot(gy, self.V2, ge2, transb='T') return (ge1.reshape(x[0].shape), ge2.reshape(x[1].shape))
def forward_cpu(self, inputs): self.retain_inputs((0,)) x = array.as_mat(inputs[0]) return (x * x).sum(axis=1),
def backward(self, inputs, grad_outputs): e1 = array.as_mat(inputs[0]) e2 = array.as_mat(inputs[1]) W = inputs[2] gy = grad_outputs[0] print 'cupy.max(gy) = ', print cupy.max(gy) print 'cupy.min(gy) = ', print cupy.min(gy) #print 'backward' #print 'gy.shape', #print gy.shape ''' xp = cuda.get_array_module(*inputs) if xp is numpy: gW = numpy.einsum('ij,ik,il->jkl', e1, e2, gy) ge1 = numpy.einsum('ik,jkl,il->ij', e2, W, gy) ge2 = numpy.einsum('ij,jkl,il->ik', e1, W, gy) else: kern = cuda.reduce('T in0, T in1, T in2', 'T out', 'in0 * in1 * in2', 'a + b', 'out = a', 0, 'bilinear_product') e1_b = e1[:, :, None, None] # ij e2_b = e2[:, None, :, None] # ik gy_b = gy[:, None, None, :] # il W_b = W[None, :, :, :] # jkl gW = kern(e1_b, e2_b, gy_b, axis=0) # 'ij,ik,il->jkl' ge1 = kern(e2_b, W_b, gy_b, axis=(2, 3)) # 'ik,jkl,il->ij' ge2 = kern(e1_b, W_b, gy_b, axis=(1, 3)) # 'ij,jkl,il->ik' ''' #ge1_ext = e1*gy.astype(dtype=gy.dtype, copy=False) #Hadamard product #print 'ge1_ext.shape', #print ge1_ext.shape #ge1 = cupy.sum(ge1_ext, axis=1).astype(dtype=gy.dtype, copy=False) #print 'ge1.shape', #print ge1.shape ge1 = cupy.sum(gy, axis=1).reshape(len(gy), 1).astype(dtype=gy.dtype, copy=False) print 'cupy.max(ge1) = ', print cupy.max(ge1) print 'cupy.min(ge1) = ', print cupy.min(ge1) gy_sum = cupy.sum(gy, axis=1).reshape(len(gy), 1).astype(dtype=gy.dtype, copy=False) #print 'gy_sum.shape', #print gy_sum.shape gy_tile = cupy.tile(gy_sum, len(gy[0])).astype(dtype=gy.dtype, copy=False) #print 'gy_tile.shape', #print gy_tile.shape #print 'gy.shape', #print gy.shape #print 'gy_tile.shape', #print gy_tile.shape #print 'gy_tile / len(gy[0]).dtype', #print (gy_tile / len(gy[0])).dtype #ge_tmp1 = gy_tile / len(gy[0]) #ge_tmp2 = gy - gy_tile ge2 = (gy - gy_tile / len(gy[0])).astype(dtype=gy.dtype, copy=False) #print 'ge2.shape', #print ge2.shape print 'cupy.max(ge2) = ', print cupy.max(ge2) print 'cupy.min(ge2) = ', print cupy.min(ge2) gW = cupy.zeros(len(e1[0]) * len(e2[0]) * len(e2[0])).reshape( len(e1[0]), len(e2[0]), len(e2[0])).astype(dtype=gy.dtype, copy=False) #print 'gW.shape', #print gW.shape ret = ge1.reshape(inputs[0].shape), ge2.reshape(inputs[1].shape), gW if len(inputs) == 6: V1, V2, b = inputs[3:] gV1 = e1.T.dot(gy) gV2 = e2.T.dot(gy) gb = gy.sum(0) ge1 += gy.dot(V1.T) ge2 += gy.dot(V2.T) ret += gV1, gV2, gb #print 'len(ret)', #print len(ret) #print 'ret[0].shape', #print ret[0].shape #print 'ret[1].shape', #print ret[1].shape #print 'ret[2].shape', #print ret[2].shape return ret
def forward(self, inputs): e1 = array.as_mat(inputs[0]) e2 = array.as_mat(inputs[1]) W = inputs[2] ''' xp = cuda.get_array_module(*inputs) if xp is numpy: y = numpy.einsum('ij,ik,jkl->il', e1, e2, W) else: i_len, j_len = e1.shape k_len = e2.shape[1] # 'ij,ik->ijk' e1e2 = e1[:, :, None] * e2[:, None, :] # ijk->i[jk] e1e2 = e1e2.reshape(i_len, j_len * k_len) # jkl->[jk]l W_mat = W.reshape(-1, W.shape[2]) # 'i[jk],[jk]l->il' y = e1e2.dot(W_mat) if len(inputs) == 6: V1, V2, b = inputs[3:] y += e1.dot(V1) y += e2.dot(V2) y += b ''' #modified forward calculation #print 'L7 e1.shape', #print e1.shape #print 'L7 e2.shape', #print e2.shape #print 'L7 W.shape', #print W.shape e1_cube = e1.reshape(len(e1), 1, -1).astype(dtype=e1.dtype, copy=False) #print 'L7 e1_cube.shape=', #print e1_cube.shape e1_tile = cupy.tile(e1_cube, (1, time_span, 1)).astype(dtype=e1.dtype, copy=False) #print 'L7 e1_tile.shape=', #print e1_tile.shape e2_cube = e2.reshape(len(e2), time_span, -1).astype(dtype=e1.dtype, copy=False) #print 'L7 e2_cube.shape=', #print e2_cube.shape y_cube = e1_tile * e2_cube #print 'L7 y_cube.shape=', #print y_cube.shape #print 'L7 y_cube.dtype=', #print y_cube.dtype y_sum = cupy.sum(y_cube, axis=2).astype(dtype=e1.dtype, copy=False) #print 'L7 y_sum.shape=', #print y_sum.shape y = y_sum.reshape(len(e1), -1).astype(dtype=e1.dtype, copy=False) #print 'L7 y.shape=', #print y.shape return y,
def backward(self, inputs, grad_outputs): e1 = array.as_mat(inputs[0]) e2 = array.as_mat(inputs[1]) W = inputs[2] gy = grad_outputs[0] print 'cupy.max(gy) = ', print cupy.max(gy) print 'cupy.min(gy) = ', print cupy.min(gy) #print 'backward' #print 'gy.shape', #print gy.shape ''' xp = cuda.get_array_module(*inputs) if xp is numpy: gW = numpy.einsum('ij,ik,il->jkl', e1, e2, gy) ge1 = numpy.einsum('ik,jkl,il->ij', e2, W, gy) ge2 = numpy.einsum('ij,jkl,il->ik', e1, W, gy) else: kern = cuda.reduce('T in0, T in1, T in2', 'T out', 'in0 * in1 * in2', 'a + b', 'out = a', 0, 'bilinear_product') e1_b = e1[:, :, None, None] # ij e2_b = e2[:, None, :, None] # ik gy_b = gy[:, None, None, :] # il W_b = W[None, :, :, :] # jkl gW = kern(e1_b, e2_b, gy_b, axis=0) # 'ij,ik,il->jkl' ge1 = kern(e2_b, W_b, gy_b, axis=(2, 3)) # 'ik,jkl,il->ij' ge2 = kern(e1_b, W_b, gy_b, axis=(1, 3)) # 'ij,jkl,il->ik' ''' #ge1_ext = e1*gy.astype(dtype=gy.dtype, copy=False) #Hadamard product #print 'ge1_ext.shape', #print ge1_ext.shape #ge1 = cupy.sum(ge1_ext, axis=1).astype(dtype=gy.dtype, copy=False) #print 'ge1.shape', #print ge1.shape ge1 = cupy.sum(gy, axis=1).reshape(len(gy), 1).astype(dtype=gy.dtype, copy=False) print 'cupy.max(ge1) = ', print cupy.max(ge1) print 'cupy.min(ge1) = ', print cupy.min(ge1) gy_sum = cupy.sum(gy, axis=1).reshape(len(gy), 1).astype(dtype=gy.dtype, copy=False) #print 'gy_sum.shape', #print gy_sum.shape gy_tile = cupy.tile(gy_sum, len(gy[0])).astype(dtype=gy.dtype, copy=False) #print 'gy_tile.shape', #print gy_tile.shape #print 'gy.shape', #print gy.shape #print 'gy_tile.shape', #print gy_tile.shape #print 'gy_tile / len(gy[0]).dtype', #print (gy_tile / len(gy[0])).dtype #ge_tmp1 = gy_tile / len(gy[0]) #ge_tmp2 = gy - gy_tile ge2 = (gy - gy_tile / len(gy[0])).astype(dtype=gy.dtype, copy=False) #print 'ge2.shape', #print ge2.shape print 'cupy.max(ge2) = ', print cupy.max(ge2) print 'cupy.min(ge2) = ', print cupy.min(ge2) gW = cupy.zeros(len(e1[0])*len(e2[0])*len(e2[0])).reshape(len(e1[0]), len(e2[0]), len(e2[0])).astype(dtype=gy.dtype, copy=False) #print 'gW.shape', #print gW.shape ret = ge1.reshape(inputs[0].shape), ge2.reshape(inputs[1].shape), gW if len(inputs) == 6: V1, V2, b = inputs[3:] gV1 = e1.T.dot(gy) gV2 = e2.T.dot(gy) gb = gy.sum(0) ge1 += gy.dot(V1.T) ge2 += gy.dot(V2.T) ret += gV1, gV2, gb #print 'len(ret)', #print len(ret) #print 'ret[0].shape', #print ret[0].shape #print 'ret[1].shape', #print ret[1].shape #print 'ret[2].shape', #print ret[2].shape return ret