def test_bias_invalid_shape(self): x1 = chainer.Variable(numpy.zeros((3, 2, 3), numpy.float32)) x2 = chainer.Variable(numpy.zeros((2), numpy.float32)) axis = 0 with chainer.DebugMode(True): with self.assertRaises(AssertionError): functions.bias(x1, x2, axis)
def deconv(self, variable): v = variable if (v.creator is not None): # Convolution -> Deconvolutionに変換 if (v.creator.label == 'Convolution2DFunction'): print(v.creator.label, v.rank) convW = v.creator.inputs[1].data in_cn, out_cn = convW.shape[0], convW.shape[1] # in/out channels kh, kw = convW.shape[2], convW.shape[3] # kernel size sx, sy = v.creator.sx, v.creator.sy # stride pw, ph = v.creator.pw, v.creator.ph # padding name = 'conv' + v.rank # temporal layer name super(DeconvNet, self).add_link( name, L.Deconvolution2D(in_cn, out_cn, (kh, kw), stride=(sy, sx), pad=(ph, pw), nobias=True, initialW=convW)) self.forwards[name] = self[name] # もし畳み込み層にバイアスがある場合、それも登録 if len(v.creator.inputs) == 3: F.bias(v) b = v.creator.inputs[2].data bname = 'convb' + v.rank super(DeconvNet, self).add_link(bname, L.Bias(shape=b.shape)) self[bname].b.data = b self.depends[bname] = (parent) self.depends[name] = (bname) self.forwards[bname] = self[bname] self.layers.append((bname, [parent], name)) else: self.depends[name] = (parent) elif (v.creator.label == 'ReLU'): name = parent elif (v.creator.label == 'MaxPooling2D'): kw, kh = v.creator.kw, v.creator.kh sx, sy = v.creator.sx, v.creator.sy pw, ph = v.creator.pw, v.creator.ph name = 'maxpool' + v.rank self.depends[name] = (parent) self.forwards[name] = lambda x: F.unpooling_2d( x, (kh, kw), stride=(sy, sx), pad=(ph, pw)) self.register_inv_layer(v.creator.inputs[0], name) else: depends['output'] = parent
def dropout_convolution_2d(self, x): train = configuration.config.train W, b = self.W, self.b log_alpha = VDF.calculate_log_alpha(self.W, self.log_sigma2, eps=1e-8, thresholds=(-8., 8.)) clip_mask = (log_alpha.data > self.loga_threshold) if train: W = (1. - clip_mask) * W mu = F.convolution_2d(x, (1. - clip_mask) * W, b=None, stride=self.stride, pad=self.pad, deterministic=self.deterministic) si = F.sqrt( F.convolution_2d(x * x, F.exp(log_alpha) * W * W, b=None, stride=self.stride, pad=self.pad, deterministic=self.deterministic) + 1e-8) normal_noise = self.xp.random.normal(0., 1., mu.shape).astype('f') activation = mu + si * normal_noise return F.bias(activation, b) else: return F.convolution_2d(x, (1. - clip_mask) * W, b, stride=self.stride, pad=self.pad, deterministic=self.deterministic)
def __call__(self, inputs, W, b): """ Perform the LSTM op Args: inputs (float[][]): input tensor containing "x" to transform """ x = inputs x = self.norm_x(x) if self.h is not None: x += F.bias(self.norm_h(self.h_x(self.h, W)), b) if self.c is None: self.c = variable.Variable(self.xp.zeros((len(inputs), self.n_units), dtype=self.xp.float32)) # Compute the LSTM using Chainer's function to be able to use LayerNormalization def extract_gates(x): r = F.reshape(x, (x.shape[0], x.shape[1] // 4, 4) + x.shape[2:]) return F.split_axis(r, 4, axis=2) a, i, f, o = extract_gates(x) # Remove unused dimension and apply transformation a = F.tanh(F.squeeze(a, axis=2)) i = F.sigmoid(F.squeeze(i, axis=2)) f = F.sigmoid(F.squeeze(f, axis=2) + self.forget_bias) o = F.sigmoid(F.squeeze(o, axis=2)) # Transform c = a * i + f * self.c # Apply LayerNormalization h = o * F.tanh(self.norm_c(c)) self.c, self.h = c, h return self.h
def decov_loss(tensor, xp=None, axis=1): """ Decov loss as described in https://arxiv.org/pdf/1511.06068.pdf 'Reducing Overfitting In Deep Networks by Decorrelating Representation'. This version implements the loss in the variable format. ARGS: axis: (int, optional) If the tensor is 4-dim, it is reshaped into 2-dim; axis is the first dimension. """ if xp is None: # # get xp module if not provided. xp = chainer.cuda.get_array_module(tensor.data) if tensor.ndim == 4: # # reshape to a 2D matrix. matr = F.reshape(tensor, (tensor.shape[axis], -1)) elif tensor.ndim == 2: matr = tensor # # subtract the mean. centered = F.bias(matr, -F.mean(matr)) # # compute the covariance. cov = F.matmul(centered, F.transpose(centered)) # # compute the frombenius norm. frob_norm = F.sum(F.square(cov)) # # get the norm of diagonal elements. # # in chainer 5.x this should work. # corr_diag_sqr = F.sum(F.square(F.diagonal(cov1))) corr_diag_sqr = F.sum(F.square(cov * xp.eye(cov.shape[0], dtype=cov.dtype))) loss = 0.5 * (frob_norm - corr_diag_sqr) return loss
def __call__(self, x): # chainer requires explicit broadcast for avoiding latent bugs u = F.mean(x, -1, keepdims=True) u = F.broadcast_to(u, x.shape) s = F.mean((x - u) ** 2, -1, keepdims=True) s = F.broadcast_to(s, x.shape) x = (x - u) / F.sqrt(s + self.e) return F.bias(F.scale(x, self.g, axis=2), self.b, axis=2)
def __call__(self, batch): """ Input ----- batch: Input Variable of shape [N, hidden_dim] """ # calc mini-batch squared mean nu = F.mean(F.square(batch), axis=0) # Normalize sig_hat = F.rsqrt(F.bias(nu, self.eps)) activated = F.scale(batch, sig_hat) # shift shift = F.bias(F.scale(activated, self.gamma), self.beta) # TRU return F.maximum(shift, self.tau)
def deconv(self, variable): v = variable if(v.creator is not None): # Convolution -> Deconvolutionに変換 if (v.creator.label == 'Convolution2DFunction'): print(v.creator.label, v.rank) convW = v.creator.inputs[1].data in_cn, out_cn = convW.shape[0], convW.shape[1] # in/out channels kh, kw = convW.shape[2], convW.shape[3] # kernel size sx, sy = v.creator.sx, v.creator.sy # stride pw, ph = v.creator.pw, v.creator.ph # padding name = 'conv' + v.rank # temporal layer name super(DeconvNet, self).add_link(name, L.Deconvolution2D( in_cn, out_cn, (kh, kw), stride=(sy, sx), pad=(ph, pw), nobias=True, initialW=convW)) self.forwards[name] = self[name] # もし畳み込み層にバイアスがある場合、それも登録 if len(v.creator.inputs) == 3: F.bias(v) b = v.creator.inputs[2].data bname = 'convb' + v.rank super(DeconvNet, self).add_link(bname, L.Bias(shape=b.shape)) self[bname].b.data = b self.depends[bname] = (parent) self.depends[name] = (bname) self.forwards[bname] = self[bname] self.layers.append((bname, [parent], name)) else: self.depends[name] = (parent) elif (v.creator.label == 'ReLU'): name = parent elif (v.creator.label == 'MaxPooling2D'): kw, kh = v.creator.kw, v.creator.kh sx, sy = v.creator.sx, v.creator.sy pw, ph = v.creator.pw, v.creator.ph name = 'maxpool' + v.rank self.depends[name] = (parent) self.forwards[name] = lambda x: F.unpooling_2d(x, (kh, kw), stride=(sy, sx), pad=(ph, pw)) self.register_inv_layer(v.creator.inputs[0], name) else: depends['output'] = parent
def __call__(self, x): if self.s is None: self.initialize_state(x.shape) if self.rec: s = F.elu( self.Wx(x) + self.Wy(self.y) + self.l_tau * self.s * (1 - self.y)) else: s = F.elu(self.Wx(x) + self.l_tau * self.s * (1 - self.y)) if self.soft: y = F.sigmoid(F.bias(s, self.b)) else: y = step_func.step(F.bias(s, self.b)) #y = F.relu(F.sign(F.bias(s, self.b))) self.s = s self.y = y return y
def forward(self, x, scaled_laplacian): """ x: (batchsize, N, in_channels) scaled_laplacian: (batchsize, N, N) output: (batchsize, N, out_channels) """ batchsize, N, _ = x.shape chebyshev_poly = [] # cheby_poly = (batchsize, N, in_channels) cheby_k_minus1 = F.matmul(scaled_laplacian, x) # (batchsize, N, in_channels) cheby_k_minus2 = x # (batchsize, N, in_channels) chebyshev_poly.append(cheby_k_minus2) chebyshev_poly.append(cheby_k_minus1) for i in range(2, self.chebyshev_order): cheby_k = 2 * F.matmul(scaled_laplacian, cheby_k_minus1) - cheby_k_minus2 chebyshev_poly.append(cheby_k) cheby_k_minus2 = cheby_k_minus1 cheby_k_minus1 = cheby_k # chebyshev loop for j, (chebyshev, cheby_weight) in enumerate( zip(chebyshev_poly, self.chebyshev_coeff)): chebyshev = F.reshape(chebyshev, (-1, self.in_channels)) output = F.matmul(chebyshev, cheby_weight) output = F.reshape(output, (-1, N, self.out_channels)) if j == 0: y = output else: y = F.bias(y, output, axis=0) y = F.bias(y, self.bias, axis=2) y = F.relu(y) return y
def __call__(self, x): """Applies the linear layer. Args: x (~chainer.Variable): Batch of input vectors. Returns: ~chainer.Variable: Output of the linear layer. """ if self.W.data is None: in_size = functools.reduce(operator.mul, x.shape[1:], 1) self._initialize_params(in_size) if self.scale_param.data[0, 0] < 0: self._initialize_scale() #return linear.linear(x, self.W, self.b) y = self.scale_param.data * linear.linear(x, self.W, None) return bias(y, self.b)
def invert_linear(variable, guided=True, ignore_bias=True, rms=0.02, gamma=1.0): """ 全結合層を通った後の~chainer.Variableから、通る前の状態を復元する Args: variable (~chainer.Variable): 全結合層を通った後の中間層 guided (bool): guided backpropagation を行う場合はTrue、行わない場合はFalse. ignore_bias: バイアス項を無視する場合はTrue、考慮する場合はFalse. rms (float): 値が0以上の場合、重みのRMSが指定された値になるように、重みを正規化します。 Returns: data (ndarray): 復元された全結合層を通る前の中間層のデータ(返されるのは~chainer.Variableではないことに注意) """ assert variable.creator is not None assert variable.creator.label == 'LinearFunction', 'variable.creator should be LinearFunction.' v = variable bottom_blob = v.creator.inputs[0] bshape = bottom_blob.data.shape W = v.creator.inputs[1].data.copy() scale = rms / get_RMS(W) if rms > 0 else 1 #scale = 1 W = W * scale #xp = cuda.get_array_module(W) #absW = abs(W) #Wmax = W.std() * 2 #W = xp.sign(W) * Wmax * ((absW/Wmax)**gamma) # もし全結合層のバイアスを考慮する場合、先にバイアス分を引いておく if not ignore_bias and len(v.creator.inputs) == 3: in_data = F.bias(v, -v.creator.inputs[2] * scale) else: in_data = v inv_data = F.linear(in_data, W.T) # guided backpropagation if guided: # そもそも順伝搬時の値が0以下だったら伝搬させない switch = bottom_blob.data > 0 inv_data.data *= switch.reshape(inv_data.data.shape) return inv_data.data.reshape(bshape)
def __call__(self, h, adj): # h: (mb, atoms, hidden_dim) mb, num_edge_type, atoms, _ = adj.shape # (mb, atoms, atoms, num_edge_type) adj_in_one_hot = functions.transpose(adj, axes=(0, 2, 3, 1)) adj_reshape_in = functions.reshape(adj_in_one_hot, shape=(mb * atoms * atoms, num_edge_type)) adj_nn = adj_reshape_in for i in range(self.n_hidden_layers): # layer_dim = num_edge_type adj_nn = self.hidden_layers[i](adj_nn) adj_nn = self.activation(adj_nn) if self.dropout != 0.0: adj_nn = functions.dropout(adj_nn, ratio=self.dropout) # (mb * atoms * atoms, out_dim) = (mb * atoms * atoms, node_dim * node_dim) adj_output = self.output_layer(adj_nn) adj_tmp = functions.reshape(adj_output, shape=(mb, atoms, atoms, self.node_dim, self.node_dim)) a = functions.reshape(functions.transpose(adj_tmp, axes=(0, 1, 3, 2, 4)), shape=(-1, atoms * self.node_dim, atoms * self.node_dim)) # a: (mb, atoms * hidden_dim, atoms * hidden_dim) # flat: (mb, atoms * hidden_dim, 1) h_flat = functions.reshape(h, shape=(mb, atoms * self.node_dim, 1)) a_mul = functions.reshape(functions.matmul(a, h_flat), shape=(mb * atoms, self.node_dim)) message_bias = self.xp.zeros(shape=(self.node_dim, ), dtype=self.xp.float32) message_bias = chainer.Variable(data=message_bias, name='message_bias') a_t = functions.bias(a_mul, message_bias, axis=1) messages = functions.reshape(a_t, shape=(mb, atoms, self.node_dim)) return messages
def __call__(self, x): """Applies the convolution layer. Args: x (~chainer.Variable): Input image. Returns: ~chainer.Variable: Output of the convolution. """ if self.W.data is None: self._initialize_params(x.shape[1]) if self.scale_param.data[0, 0] < 0: self._initialize_scale() #print(self.scale_param.data) y = self.scale_param.data * convolution_2d.convolution_2d( x, self.W, None, self.stride, self.pad, dilate=self.dilate, groups=self.groups) return bias(y, self.b)
def check_backward(self, x1_data, x2_data, axis, y_grad): x = (x1_data, x2_data) gradient_check.check_backward( lambda x, y: functions.bias(x, y, axis), x, y_grad)
def check_forward(self, x1_data, x2_data, axis, y_expected): x1 = chainer.Variable(x1_data) x2 = chainer.Variable(x2_data) y = functions.bias(x1, x2, axis) testing.assert_allclose(y_expected, y.data)
def check_backward(self, x1_data, x2_data, axis, y_grad): x = (x1_data, x2_data) gradient_check.check_backward(lambda x, y: functions.bias(x, y, axis), x, y_grad)
b.cleargrad() x.cleargrad() log_sigma2.cleargrad() xp.random.seed(777) start = time.time() log_alpha = F.clip(log_sigma2 - F.log(W * W + 1e-8), -8., 8.) clip_mask = (log_alpha.data > loga_threshold) _W = (1. - clip_mask) * W mu = F.linear(x, _W) si = F.sqrt(F.linear(x * x, F.exp(log_alpha) * _W * _W) + 1e-8) normal_noise = xp.random.standard_normal(mu.shape).astype('f') y = mu + si * normal_noise if b is not None: y = F.bias(y, b) F.sum(y).backward() vs2 = [y.data, W.grad, b.grad, x.grad, log_sigma2.grad, ] times.append(time.time() - start) print('composition', numpy.mean(times[5:])) for v1, v2 in zip(vs1, vs2): testing.assert_allclose(v1, v2, rtol=0.001) print('### KL ###') times = []
def forward(self, inputs, device): x1, x2 = inputs axis = 1 return functions.bias(x1, x2, axis),
def invert_convolution(variable, guided=True, ignore_bias=True, rms=0.02, rms_axis=None, gamma=1.0): """ 畳み込み後の~chainer.Variableから、畳み込み前の状態を復元する Args: variable (~chainer.Variable): 畳み込み後の中間層 guided (bool): guided backpropagation を行う場合はTrue、行わない場合はFalse. ignore_bias: バイアス項を無視する場合はTrue、考慮する場合はFalse. rms (float): 値が0以上の場合、畳み込みのフィルタ重みのRMSが指定された値になるように、フィルタ重みを正規化します。 Returns: data (ndarray): 復元された畳み込み前の中間層のデータ(返されるのは~chainer.Variableではないことに注意) """ assert variable.creator is not None assert variable.creator.label == 'Convolution2DFunction', 'variable.creator should be Convolution2DFunction.' v = variable bottom_blob = v.creator.inputs[0] # 畳み込みフィルタをRMSがfixed_RMSになるように正規化 convW = v.creator.inputs[1].data.copy() xp = cuda.get_array_module(convW) scale = rms / get_RMS(convW, axis=rms_axis) if rms > 0 else 1 scale = scale.reshape(-1, 1, 1, 1) #print(scale, scale.shape) '''if rms > 0: rmsW = Vutil.get_RMS(convW) scale = rmsW / rms #print(rmsW, scale) else: scale = 1''' convW = convW * scale #abs_convW = abs(convW) #Wmax = convW.std() * 2 #xp = cuda.get_array_module(convW) #convW = xp.sign(convW) * Wmax * ((abs_convW/Wmax)**gamma) # もし畳み込み層のバイアスを考慮する場合、先にバイアス分を引いておく if not ignore_bias and len(v.creator.inputs) == 3: in_data = F.bias(v, -v.creator.inputs[2] * scale) else: in_data = v in_cn, out_cn = convW.shape[0], convW.shape[1] # in/out channels kh, kw = convW.shape[2], convW.shape[3] # kernel size sx, sy = v.creator.sx, v.creator.sy # stride pw, ph = v.creator.pw, v.creator.ph # padding outsize = (bottom_blob.data.shape[2], bottom_blob.data.shape[3]) # Deconvolution (転置畳み込み) deconv_data = F.deconvolution_2d(in_data, convW, stride=(sy, sx), pad=(ph, pw), outsize=outsize) # guided backpropagation if guided and v.rank > 1: # そもそも畳み込み前の値が0以下だったら伝搬させない switch = bottom_blob.data > 0 deconv_data.data *= switch return deconv_data.data