def addgrad(self, var): """Accumulates the gradient array from given source variable. This method adds the gradient of a given variable to the gradient of this variable. The accumulation is even done across the host and different devices. If this variable has uninitialized data/grad arrays, this method initializes it with the shape of the given variable and then accumulates the gradient. Args: var (Variable): Source variable. """ src = var._grad_var if src is None: return if self.data is None: self.initialize(var.shape) dst = self._grad_var src_dev = cuda.get_device_from_array(src.data) dst_dev = cuda.get_device_from_array(self.data) if src_dev.id != dst_dev.id: src = chainer.functions.copy(src, dst_dev.id) self._grad_var = src if dst is None else src + dst
def check_forward(self, x1_data, x2_data, x3_data): xp = self.link.xp x1 = chainer.Variable(x1_data) if self.input_variable else x1_data h1 = self.link(x1) with cuda.get_device_from_array(x1_data): c0 = chainer.Variable(xp.zeros((len(self.x1), self.out_size), dtype=self.x1.dtype)) c1_expect, h1_expect = functions.lstm(c0, self.link.upward(x1)) testing.assert_allclose(h1.data, h1_expect.data) testing.assert_allclose(self.link.h.data, h1_expect.data) testing.assert_allclose(self.link.c.data, c1_expect.data) batch = len(x2_data) x2 = chainer.Variable(x2_data) if self.input_variable else x2_data h1_in, h1_rest = functions.split_axis( self.link.h.data, [batch], axis=0) y2 = self.link(x2) with cuda.get_device_from_array(x1): c2_expect, y2_expect = \ functions.lstm(c1_expect, self.link.upward(x2) + self.link.lateral(h1_in)) testing.assert_allclose(y2.data, y2_expect.data) testing.assert_allclose(self.link.h.data[:batch], y2_expect.data) testing.assert_allclose(self.link.h.data[batch:], h1_rest.data) x3 = chainer.Variable(x3_data) if self.input_variable else x3_data h2_rest = self.link.h y3 = self.link(x3) c3_expect, y3_expect = \ functions.lstm(c2_expect, self.link.upward(x3)) testing.assert_allclose(y3.data, y3_expect.data) testing.assert_allclose(self.link.h.data, h2_rest.data)
def addgrad(self, var): """Accumulates the gradient array from given source variable. This method adds the gradient of a given variable to the gradient of this variable. The accumulation is even done across the host and different devices. If this variable has uninitialized data/grad arrays, this method initializes it with the shape of the given variable and then accumulates the gradient. Args: var (Variable): Source variable. """ src = var._grad_var if src is None: return if self.data is None: self.initialize(var.shape) dst = self._grad_var src_dev = cuda.get_device_from_array(src.data) dst_dev = cuda.get_device_from_array(self.data) if src_dev.id != dst_dev.id: src = chainer.functions.copy(src, dst_dev.id) self._grad_var = src if dst is None else src + dst
def main(): args = get_args() if not args.silent: save_path = os.path.abspath(script_path + args.save_path) if not os.path.exists(save_path):os.mkdir(save_path) save_path = os.path.abspath(save_path + "/" + args.name) if not os.path.exists(save_path):os.mkdir(save_path) preview_path = os.path.abspath(save_path + "/preview") if not os.path.exists(preview_path):os.mkdir(preview_path) dataset = Dataset(args) if args.max_epoch is not None: epoch_iter = dataset.train_data_len // args.batch_size if dataset.train_data_len % args.batch_size != 0:epoch_iter += 1 args.max_iter = args.max_epoch * epoch_iter progress = print_progress(args.max_iter, args.batch_size, dataset.train_data_len) if args.gpu_num != 0: cuda.get_device_from_array(xp.array([i for i in range(args.gpu_num)])).use() model = make_model(args, dataset) netG_opt = make_optimizer(model.netG_0, args.adam_alpha, args.adam_beta1, args.adam_beta2) netD_opt = make_optimizer(model.netD_0, args.adam_alpha, args.adam_beta1, args.adam_beta2) updater = Updater(model, netG_opt, netD_opt, args.n_dis, args.batch_size, args.gpu_num, args.KL_loss_iter, args.KL_loss_conf, args.epoch_decay, args.max_iter) print("==========================================") print("Info:start train") start = time.time() for i in range(args.max_iter): data = toGPU(dataset.next(), args.gpu_num) updater.update(data, dataset.now_epoch) if dataset.now_iter % args.display_interval == 0: elapsed = time.time() - start progress(elapsed, dataset.get_state) np.save(save_path + "/loss_hist.npy", updater.loss_hist) start = time.time() if dataset.now_iter % args.snapshot_interval == 0 and not args.silent: data = dataset.sampling(args.sample_size) sample = sample_generate(model.netG_0, data, args.noise_dim, args.noise_dist) Image.fromarray(sample).save(preview_path + f"/image_{dataset.now_iter:08d}.png") serializers.save_npz(save_path + f"/Generator_{dataset.now_iter:08d}.npz",model.netG_0) serializers.save_npz(save_path + f"/Discriminator_{dataset.now_iter:08d}.npz",model.netD_0) if not args.silent: data = dataset.sampling(args.sample_size) sample = sample_generate(model.netG_0, data, args.noise_dim, args.noise_dist) Image.fromarray(sample).save(preview_path + f"/image_{dataset.now_iter:08d}.png") serializers.save_npz(save_path + f"/Generator_{dataset.now_iter:08d}.npz",model.netG_0) serializers.save_npz(save_path + f"/Discriminator_{dataset.now_iter:08d}.npz",model.netD_0) print("\n\n\n\n==========================================") print("Info:finish train")
def __call__(self, atom_array, adj): """Forward propagation Args: atom_array (numpy.ndarray): minibatch of molecular which is represented with atom IDs (representing C, O, S, ...) `atom_array[mol_index, atom_index]` represents `mol_index`-th molecule's `atom_index`-th atomic number adj (numpy.ndarray): minibatch of adjancency matrix with edge-type information Returns: ~chainer.Variable: minibatch of fingerprint """ # reset state if atom_array.dtype == self.xp.int32: h = self.embed(atom_array) # (minibatch, max_num_atoms) else: h = atom_array h0 = functions.copy(h, cuda.get_device_from_array(h.data).id) g_list = [] for step in range(self.n_layers): message_layer_index = 0 if self.weight_tying else step h = self.update_layers[message_layer_index](h, adj) if self.concat_hidden: g = self.readout_layers[step](h, h0) g_list.append(g) if self.concat_hidden: return functions.concat(g_list, axis=1) else: g = self.readout_layers[0](h, h0) return g
def init_state(self, param): xp = backend.get_array_module(param.data) with cuda.get_device_from_array(param.data): self.state['m'] = xp.zeros_like(param.data) self.state['v'] = xp.zeros_like(param.data) if self.hyperparam.amsgrad: self.state['vhat'] = xp.zeros_like(param.data)
def init_state(self, param): super(_ObserveZeroRule, self).init_state(param) xp = backend.get_array_module(param.data) with cuda.get_device_from_array(param.data): self.state['u'] = xp.zeros_like(param.data) self.state['nzu'] = 0
def zerograd(self): """Initializes the gradient array by zeros. Note that the gradient variable is unchained from the computational graph by this method because this operation breaks the backprop validity. .. deprecated:: v1.15 Use :meth:`cleargrad` instead. """ warnings.warn( 'Variable.zerograd is deprecated. Use Variable.cleargrad instead.', DeprecationWarning) if self.data is None: return with cuda.get_device_from_array(self.data) as dev: gv = self._grad_var if gv is None: xp = numpy if dev.id == -1 else cuda.cupy self.grad = xp.zeros_like(self.data) else: gv.unchain() gv.data.fill(0)
def forward_cudnn(self, inputs): if self.eps < libcudnn.CUDNN_BN_MIN_EPSILON: raise RuntimeError('cuDNN does not allow an eps value ' 'less than {}.'.format( libcudnn.CUDNN_BN_MIN_EPSILON)) self.retain_inputs((0, 1)) x, gamma, beta = inputs xp = cuda.cupy orig_shape = x.shape batch_size, channels = orig_shape[:2] groups = self.groups cudnn_shape = (1, batch_size * groups, -1, 1) x = x.reshape(cudnn_shape) with cuda.get_device_from_array(x): dummy_beta = xp.zeros(batch_size * groups, dtype=x.dtype) self.dummy_gamma = xp.ones_like(dummy_beta) x_hat, self.mean, self.inv_std = \ cudnn.batch_normalization_forward_training( x, self.dummy_gamma, dummy_beta, dummy_beta, dummy_beta, self.eps, 1.0, True, libcudnn.CUDNN_BATCHNORM_SPATIAL, configuration.config.debug) y = x_hat.reshape((batch_size, channels, -1)) cuda.elementwise('T gamma, T beta', 'T y', 'y = y * gamma + beta', 'groupnorm_y')(gamma[:, None], beta[:, None], y) y = y.reshape(orig_shape) return y,
def zerograd(self): """Initializes the gradient array by zeros. Note that the gradient variable is unchained from the computational graph by this method because this operation breaks the backprop validity. .. deprecated:: v1.15 Use :meth:`cleargrad` instead. """ warnings.warn( 'Variable.zerograd is deprecated. Use Variable.cleargrad instead.', DeprecationWarning) if self.data is None: return with cuda.get_device_from_array(self.data) as dev: gv = self._grad_var if gv is None: xp = numpy if dev.id == -1 else cuda.cupy self.grad = xp.zeros_like(self.data) else: gv.unchain() gv.data.fill(0)
def _prepare(self, param): with cuda.get_device_from_array(param.data) as device: state = self.state if state is None: state = self._state = {} self.init_state(param) for name, value in six.iteritems(state): if not isinstance(value, (numpy.ndarray, cuda.ndarray)): continue value_device = cuda.get_device_from_array(value) if value_device.id != device.id: if device.id >= 0: state[name] = cuda.to_gpu(value) else: state[name] = cuda.to_cpu(value)
def init_state(self, param): xp = cuda.get_array_module(param.data) with cuda.get_device_from_array(param.data): self.state['m'] = xp.zeros_like(param.data) self.state['v'] = xp.zeros_like(param.data) if self.hyperparam.amsgrad: self.state['vhat'] = xp.zeros_like(param.data)
def _prepare(self, param): with cuda.get_device_from_array(param.data) as device: state = self.state if state is None: state = self._state = {} self.init_state(param) for name, value in six.iteritems(state): if not isinstance(value, (numpy.ndarray, cuda.ndarray)): continue value_device = cuda.get_device_from_array(value) if value_device.id != device.id: if device.id >= 0: state[name] = cuda.to_gpu(value) else: state[name] = cuda.to_cpu(value)
def forward(self, inputs): x, = inputs self._in_device = cuda.get_device_from_array(x).id if int(self.out_device) == -1: return cuda.to_cpu(x), else: return cuda.to_gpu(x, self.out_device),
def forward(self, inputs): x, = inputs self._in_device = cuda.get_device_from_array(x).id if int(self.out_device) == -1: return cuda.to_cpu(x), else: return cuda.to_gpu(x, self.out_device),
def forward_cudnn(self, inputs): if self.eps < libcudnn.CUDNN_BN_MIN_EPSILON: raise RuntimeError( 'cuDNN does not allow an eps value ' 'less than {}.'.format(libcudnn.CUDNN_BN_MIN_EPSILON)) self.retain_inputs((0, 1)) x, gamma, beta = inputs xp = cuda.cupy orig_shape = x.shape batch_size, channels = orig_shape[:2] groups = self.groups cudnn_shape = (1, batch_size * groups, -1, 1) x = x.reshape(cudnn_shape) with cuda.get_device_from_array(x): dummy_beta = xp.zeros(batch_size * groups, dtype=x.dtype) self.dummy_gamma = xp.ones_like(dummy_beta) x_hat, self.mean, self.inv_std = \ cudnn.batch_normalization_forward_training( x, self.dummy_gamma, dummy_beta, dummy_beta, dummy_beta, None, None, self.eps, 1.0, True, libcudnn.CUDNN_BATCHNORM_SPATIAL, configuration.config.debug) y = x_hat.reshape((batch_size, channels, -1)) cuda.elementwise( 'T gamma, T beta', 'T y', 'y = y * gamma + beta', 'groupnorm_y')(gamma[:, None], beta[:, None], y) y = y.reshape(orig_shape) return y,
def __call__(self, rule, param): grad = param.grad if grad is None: return xp = cuda.get_array_module(grad) with cuda.get_device_from_array(grad): xp.clip(grad, self.lower_bound, self.upper_bound, out=grad)
def __call__(self, rule, param): grad = param.grad if grad is None: return xp = cuda.get_array_module(grad) with cuda.get_device_from_array(grad): xp.clip(grad, self.lower_bound, self.upper_bound, out=grad)
def __call__(self, atom_array, adj): """Forward propagation Args: atom_array (numpy.ndarray): minibatch of molecular which is represented with atom IDs (representing C, O, S, ...) `atom_array[mol_index, atom_index]` represents `mol_index`-th molecule's `atom_index`-th atomic number adj (numpy.ndarray): minibatch of adjancency matrix with edge-type information Returns: ~chainer.Variable: minibatch of fingerprint """ # reset state if atom_array.dtype == self.xp.int32: h = self.embed(atom_array) # (minibatch, max_num_atoms) else: h = atom_array h0 = functions.copy(h, cuda.get_device_from_array(h.data).id) g_list = [] for step in range(self.n_layers): message_layer_index = 0 if self.weight_tying else step h = self.update_layers[message_layer_index](h, adj) if self.concat_hidden: g = self.readout_layers[step](h, h0) g_list.append(g) if self.concat_hidden: return functions.concat(g_list, axis=1) else: g = self.readout_layers[0](h, h0) return g
def _sum_sqnorm(arr): sq_sum = collections.defaultdict(float) for x in arr: with cuda.get_device_from_array(x) as dev: x = x.ravel() s = x.dot(x) sq_sum[int(dev)] += s return sum([float(i) for i in six.itervalues(sq_sum)])
def _sum_sqnorm(arr): sq_sum = collections.defaultdict(float) for x in arr: with cuda.get_device_from_array(x) as dev: x = x.ravel() s = x.dot(x) sq_sum[int(dev)] += s return sum([float(i) for i in six.itervalues(sq_sum)])
def __call__(self, x): with cuda.get_device_from_array(x): pixel_mean = cuda.to_gpu(self.pixel_mean) h = x - pixel_mean h = F.max_pooling_2d(F.relu(self.conv1(h)), 2, stride=2) h = F.max_pooling_2d(F.relu(self.conv2(h)), 2, stride=2) h = F.reshape(h, (-1, 7*7*48)) return h
def init_state(self, param): xp = backend.get_array_module(param.data) with cuda.get_device_from_array(param.data): self.state['v'] = xp.zeros_like(param.data) # For iDeep if isinstance(param.data, intel64.mdarray): self.state['v'] = intel64.ideep.array( self.state['v'], itype=intel64.ideep.wgt_array)
def init_state(self, param): xp = cuda.get_array_module(param.data) with cuda.get_device_from_array(param.data): self.state['v'] = xp.zeros_like(param.data) # For iDeep if intel64.inputs_all_ready((self.state['v'],)): self.state['v'] = intel64.ideep.array( self.state['v'], itype=intel64.ideep.wgt_array)
def init_state(self, param): xp = backend.get_array_module(param.data) with cuda.get_device_from_array(param.data): self.state['v'] = xp.zeros_like(param.data) # For iDeep if isinstance(param.data, intel64.mdarray): self.state['v'] = intel64.ideep.array( self.state['v'], itype=intel64.ideep.wgt_array)
def __call__(self, rule, param): p, g = param.array, param.grad_var if p is None or g is None: return xp = cuda.get_array_module(p) with cuda.get_device_from_array(p) as dev: sign = xp.sign(p) # if int(dev) == -1: g -= self.rate * sign
def __call__(self, opt): norm = numpy.sqrt( _sum_sqnorm([p.grad for p in opt.target.params(False)])) rate = self.threshold / norm if rate < 1: for param in opt.target.params(False): grad = param.grad with cuda.get_device_from_array(grad): grad *= rate
def __call__(self, opt): norm = numpy.sqrt(_sum_sqnorm( [p.grad for p in opt.target.params(False)])) rate = self.threshold / norm if rate < 1: for param in opt.target.params(False): grad = param.grad with cuda.get_device_from_array(grad): grad *= rate
def _check_array(array, name): xp = cuda.get_array_module(array) with cuda.get_device_from_array(array): if not array.dtype == xp.float32 and not array.dtype == xp.float16: warnings.warn('non FP32 or FP16 dtype detected in {}'.format(name)) array = array.astype(xp.float32) if not (array.flags.c_contiguous or array.flags.f_contiguous): warnings.warn('non contiguous array detected in {}'.format(name)) array = xp.ascontiguousarray(array) return array
def init_state(self, param): xp = cuda.get_array_module(param.data) with cuda.get_device_from_array(param.data): self.state['v'] = xp.zeros_like(param.data) # For iDeep if (isinstance(param.data, intel64.mdarray) and intel64.inputs_all_ready((self.state['v'],))): self.state['v'] = intel64.ideep.array( self.state['v'], itype=intel64.ideep.wgt_array)
def __call__(self, rule, param): p, g = param.data, param.grad if p is None or g is None: return with cuda.get_device_from_array(p) as dev: if int(dev) == -1: g += self.rate * p else: kernel = cuda.elementwise('T p, T decay', 'T g', 'g += decay * p', 'weight_decay') kernel(p, self.rate, g)
def __call__(self, rule, param): p, g = param.data, param.grad if p is None or g is None: return with cuda.get_device_from_array(p) as dev: if int(dev) == -1: g += self.rate * p else: kernel = cuda.elementwise( 'T p, T decay', 'T g', 'g += decay * p', 'weight_decay') kernel(p, self.rate, g)
def _concat_arrays(arrays, padding): # Convert `arrays` to numpy.ndarray if `arrays` consists of the built-in # types such as int or float. if not isinstance(arrays[0], numpy.ndarray) and\ not isinstance(arrays[0], cuda.ndarray): arrays = numpy.asarray(arrays) if padding is not None: return _concat_arrays_with_padding(arrays, padding) xp = cuda.get_array_module(arrays[0]) with cuda.get_device_from_array(arrays[0]): return xp.concatenate([array[None] for array in arrays])
def _concat_arrays(arrays, padding): # Convert `arrays` to numpy.ndarray if `arrays` consists of the built-in # types such as int or float. if not isinstance(arrays[0], numpy.ndarray) and\ not isinstance(arrays[0], cuda.ndarray): arrays = numpy.asarray(arrays) if padding is not None: return _concat_arrays_with_padding(arrays, padding) xp = cuda.get_array_module(arrays[0]) with cuda.get_device_from_array(arrays[0]): return xp.concatenate([array[None] for array in arrays])
def backward(self, target_input_indexes, grad_outputs): retained_inputs = self.get_retained_inputs() inputs = [None] * len(self.inputs) in_data = [None] * len(self.inputs) for retained, i_in in six.moves.zip(retained_inputs, self._input_indexes_to_retain): inputs[i_in] = retained in_data[i_in] = None if retained is None else retained.array in_data = tuple(in_data) grad_out_data = tuple( [None if grad is None else grad.data for grad in grad_outputs]) is_chainerx_fallback_mode = self._is_chainerx_fallback_mode if is_chainerx_fallback_mode: # Convert input and output gradients to numpy/cupy in_data = backend.from_chainerx(in_data) grad_out_data = backend.from_chainerx(grad_out_data) # Call Function.backward with cuda.get_device_from_array(*(in_data + grad_out_data)): if is_chainerx_fallback_mode: # Enable attribute fallback with function_node._chainerx_attribute_fallback( self._function, self.chainerx_device): gxs = self._function.backward(in_data, grad_out_data) else: gxs = self._function.backward(in_data, grad_out_data) # Check gradients for x, gx in six.moves.zip(self.inputs, gxs): if gx is not None: variable._check_grad_type(self, x, True, gx) # Convert input gradients back to ChainerX if is_chainerx_fallback_mode: gxs = backend.to_chainerx(gxs) ret = [] for i in target_input_indexes: if gxs[i] is None: g = None else: # Intentionally not passing requires_grad=False so that # backprop routines can raise an error when a further backprop # is attempted against this gradient variable. g = variable.Variable(gxs[i]) if g.xp is not chainerx: g.node._old_style_grad_generator = self._function.label ret.append(g) return tuple(ret)
def backward(self, target_input_indexes, grad_outputs): retained_inputs = self.get_retained_inputs() inputs = [None] * len(self.inputs) in_data = [None] * len(self.inputs) for retained, i_in in six.moves.zip( retained_inputs, self._input_indexes_to_retain): inputs[i_in] = retained in_data[i_in] = None if retained is None else retained.array in_data = tuple(in_data) grad_out_data = tuple([None if grad is None else grad.data for grad in grad_outputs]) is_chainerx_fallback_mode = self._is_chainerx_fallback_mode if is_chainerx_fallback_mode: # Convert input and output gradients to numpy/cupy in_data = backend.from_chx(in_data) grad_out_data = backend.from_chx(grad_out_data) # Call Function.backward with cuda.get_device_from_array(*(in_data + grad_out_data)): if is_chainerx_fallback_mode: # Enable attribute fallback with function_node._chainerx_attribute_fallback( self._function, self.chainerx_device): gxs = self._function.backward(in_data, grad_out_data) else: gxs = self._function.backward(in_data, grad_out_data) # Check gradients for x, gx in six.moves.zip(self.inputs, gxs): if gx is not None: variable._check_grad_type(self, x, True, gx) # Convert input gradients back to ChainerX if is_chainerx_fallback_mode: gxs = backend.to_chx(gxs) ret = [] for i in target_input_indexes: if gxs[i] is None: g = None else: # Intentionally not passing requires_grad=False so that # backprop routines can raise an error when a further backprop # is attempted against this gradient variable. g = variable.Variable(gxs[i]) if g.xp is not chainerx: g.node._old_style_grad_generator = self._function.label ret.append(g) return tuple(ret)
def __call__(self, rule, param): g = param.grad if g is None: return xp = cuda.get_array_module(g) with cuda.get_device_from_array(g) as dev: noise = self.noise_func(xp, g.shape, g.dtype, self, rule) if int(dev) == -1: g += noise else: kernel = cuda.elementwise('T noise', 'T g', 'g += noise', 'gradient_noise') kernel(noise, g)
def check_backward(self, src_id, dst_id): x_data = _to_gpu(self.x_data, src_id) x = chainer.Variable(x_data) y = functions.copy(x, dst_id) gy = _to_gpu(self.gy, dst_id) y.grad = gy y.backward() x_grad = x.grad self.assertEqual(cuda.get_device_from_array(x_grad).id, src_id) numpy.testing.assert_array_equal(cuda.to_cpu(x_grad), self.gy)
def forward(self, inputs): gpu = backend.get_array_module(*inputs) is not numpy inputs = [cuda.to_cpu(x) for x in inputs] outputs = self.forward_func(*inputs) if gpu: # TODO(unno): We can remove redundant gpu-cpu copy using # theano.sandbox.cuda.CudaNdarray.gpudata device = cuda.get_device_from_array(inputs) outputs = [cuda.to_gpu(x, device) for x in outputs] return tuple(outputs)
def forward(self, inputs): gpu = backend.get_array_module(*inputs) is not numpy inputs = [cuda.to_cpu(x) for x in inputs] outputs = self.forward_func(*inputs) if gpu: # TODO(unno): We can remove redundant gpu-cpu copy using # theano.sandbox.cuda.CudaNdarray.gpudata device = cuda.get_device_from_array(inputs) outputs = [cuda.to_gpu(x, device) for x in outputs] return tuple(outputs)
def __call__(self, rule, param): g = param.grad if g is None: return xp = cuda.get_array_module(g) with cuda.get_device_from_array(g) as dev: noise = self.noise_func(xp, g.shape, g.dtype, self, rule) if int(dev) == -1: g += noise else: kernel = cuda.elementwise( 'T noise', 'T g', 'g += noise', 'gradient_noise') kernel(noise, g)
def __call__(self, rule, param): p, g = param.data, param.grad if p is None or g is None: return xp = cuda.get_array_module(p) with cuda.get_device_from_array(p) as dev: sign = xp.sign(p) if int(dev) == -1: g += self.rate * sign else: kernel = cuda.elementwise('T s, T decay', 'T g', 'g += decay * s', 'lasso') kernel(sign, self.rate, g)
def __call__(self, rule, param): p, g = param.data, param.grad if p is None or g is None: return xp = cuda.get_array_module(p) with cuda.get_device_from_array(p) as dev: sign = xp.sign(p) if int(dev) == -1: g += self.rate * sign else: kernel = cuda.elementwise( 'T s, T decay', 'T g', 'g += decay * s', 'lasso') kernel(sign, self.rate, g)
def check_backward(self, src_id, dst_id): x_data = _to_gpu(self.x_data, src_id) x = chainer.Variable(x_data) y = functions.copy(x, dst_id) gy = _to_gpu(self.gy, dst_id) y.grad = gy y.backward() x_grad = x.grad self.assertEqual(cuda.get_device_from_array(x_grad).id, src_id) numpy.testing.assert_array_equal( cuda.to_cpu(x_grad), self.gy)
def reallocate_cleared_grads(self): """Reallocate gradients cleared by :meth:`~chainer.Variable.cleargrad`. This method allocates arrays for all gradients which have :obj:`None`. This method is called before and after every optimizer hook. If an inheriting optimizer does not require this allocation, the optimizer can override this method with a blank function. """ for name, param in self.target.namedparams(False): if param.grad is None: with cuda.get_device_from_array(param.data): xp = cuda.get_array_module(param.data) param.grad = xp.zeros_like(param.data)
def reallocate_cleared_grads(self): """Reallocate gradients cleared by :meth:`~chainer.Variable.cleargrad`. This method allocates arrays for all gradients which have :obj:`None`. This method is called before and after every optimizer hook. If an inheriting optimizer does not require this allocation, the optimizer can override this method with a blank function. """ for name, param in self.target.namedparams(False): if param.grad is None: with cuda.get_device_from_array(param.data): xp = cuda.get_array_module(param.data) param.grad = xp.zeros_like(param.data)
def _contains_nan(x): """Returns whether the input array has NaN values. Args: x (numpy.ndarray or cupy.ndarray): Array to be checked. Returns: bool: True if the input has NaN values. """ if x.dtype.kind in ('f', 'c'): with cuda.get_device_from_array(x): return get_array_module(x).isnan(x).any() else: return False
def update_core(self, param): """Updates the parameter. Implementation of UpdateRule should override this method or both of :meth:`_update_core_cpu` and :meth:`_update_core_gpu`. Args: param (~chainer.Variable): Variable to be updated. """ with cuda.get_device_from_array(param.data) as dev: if int(dev) == -1: self.update_core_cpu(param) else: self.update_core_gpu(param)
def update_core(self, param): """Updates the parameter. Implementation of UpdateRule should override this method or both of :meth:`_update_core_cpu` and :meth:`_update_core_gpu`. Args: param (~chainer.Variable): Variable to be updated. """ with cuda.get_device_from_array(param.data) as dev: if int(dev) == -1: self.update_core_cpu(param) else: self.update_core_gpu(param)
def _contains_nan(x): """Returns whether the input array has NaN values. Args: x (numpy.ndarray or cupy.ndarray): Array to be checked. Returns: bool: True if the input has NaN values. """ if x.dtype.kind in ('f', 'c'): with cuda.get_device_from_array(x): return cuda.get_array_module(x).isnan(x).any() else: return False
def get_loss(self, x, tbox, tlabel, input_size): x, y, w, h, conf, prob = self._split(x) with cuda.get_device_from_array(x.data): tx, ty, tw, th, tconf, tprob, box_learning_scale \ = self.get_t_and_scale(x, y, w, h, conf, prob, tbox, tlabel, input_size) x_loss = F.sum((tx - x)**2 * box_learning_scale) / 2 y_loss = F.sum((ty - y)**2 * box_learning_scale) / 2 w_loss = F.sum((tw - w)**2 * box_learning_scale) / 2 h_loss = F.sum((th - h)**2 * box_learning_scale) / 2 c_loss = F.sum((tconf - conf)**2) / 2 p_loss = F.sum((tprob - prob)**2) / 2 loss = x_loss + y_loss + w_loss + h_loss + c_loss + p_loss loss = loss / x.shape[0] #print("x", x_loss.data, "y", y_loss.data, "w", w_loss.data, "h", h_loss.data, "c", c_loss.data, "p", p_loss.data, "loss", loss.data) return loss
def _concat_arrays_with_padding(arrays, padding): shape = numpy.array(arrays[0].shape, dtype=int) for array in arrays[1:]: if numpy.any(shape != array.shape): numpy.maximum(shape, array.shape, shape) shape = tuple(numpy.insert(shape, 0, len(arrays))) xp = cuda.get_array_module(arrays[0]) with cuda.get_device_from_array(arrays[0]): result = xp.full(shape, padding, dtype=arrays[0].dtype) for i in six.moves.range(len(arrays)): src = arrays[i] slices = tuple(slice(dim) for dim in src.shape) result[(i, ) + slices] = src return result
def _concat_arrays_with_padding(arrays, padding): shape = numpy.array(arrays[0].shape, dtype=int) for array in arrays[1:]: if numpy.any(shape != array.shape): numpy.maximum(shape, array.shape, shape) shape = tuple(numpy.insert(shape, 0, len(arrays))) xp = cuda.get_array_module(arrays[0]) with cuda.get_device_from_array(arrays[0]): result = xp.full(shape, padding, dtype=arrays[0].dtype) for i in six.moves.range(len(arrays)): src = arrays[i] slices = tuple(slice(dim) for dim in src.shape) result[(i,) + slices] = src return result
def check_forward(self, x_data): xp = self.link.xp x = chainer.Variable(x_data) if self.input_variable else x_data c1, h1 = self.link(None, None, x) with cuda.get_device_from_array(x_data): c0 = chainer.Variable(xp.zeros((len(self.x), self.out_size), dtype=self.x.dtype)) c1_expect, h1_expect = functions.lstm(c0, self.link.upward(x)) testing.assert_allclose(h1.data, h1_expect.data) testing.assert_allclose(c1.data, c1_expect.data) c2, h2 = self.link(c1, h1, x) c2_expect, h2_expect = \ functions.lstm(c1_expect, self.link.upward(x) + self.link.lateral(h1)) testing.assert_allclose(h2.data, h2_expect.data) testing.assert_allclose(c2.data, c2_expect.data)
def _nary_tree_lstm(func, *inputs): cs = inputs[:len(inputs) // 2] hs = inputs[len(inputs) // 2:-1] x = inputs[-1] xp = backend.get_array_module(x) with cuda.get_device_from_array(x): W_x = func.W_x.W.data.T b_x = func.W_x.b.data W_h_list = [getattr(func, 'W_h{}'.format(i)).W.data.T for i in range(1, func.n_ary + 1)] W_xs = xp.split(W_x, 3 + func.n_ary, 1) W_xa, W_xi, W_xo, W_xfs = W_xs[0], W_xs[1], W_xs[2], W_xs[3:] b_xs = xp.split(b_x[None, ], 3 + func.n_ary, 1) b_a, b_i, b_o, b_fs = b_xs[0], b_xs[1], b_xs[2], b_xs[3:] W_ha_list = [xp.split(W_h, 3 + func.n_ary, 1)[0] for W_h in W_h_list] W_hi_list = [xp.split(W_h, 3 + func.n_ary, 1)[1] for W_h in W_h_list] W_ho_list = [xp.split(W_h, 3 + func.n_ary, 1)[2] for W_h in W_h_list] W_hfs_list = [xp.split(W_h, 3 + func.n_ary, 1)[3:] for W_h in W_h_list] assert(all(len(W_hfs_list) == len(W_hfs) for W_hfs in W_hfs_list)) a = x.dot(W_xa) + b_a + \ sum(h.dot(W_ha) for h, W_ha in zip(hs, W_ha_list)) i = x.dot(W_xi) + b_i + \ sum(h.dot(W_hi) for h, W_hi in zip(hs, W_hi_list)) o = x.dot(W_xo) + b_o + \ sum(h.dot(W_ho) for h, W_ho in zip(hs, W_ho_list)) f_list = [x.dot(W_xf) + b_f + sum(h.dot(W_hf) for h, W_hf in zip(hs, W_hf_list)) for W_xf, b_f, W_hf_list in zip(W_xfs, b_fs, zip(*W_hfs_list))] a = xp.tanh(a) i = _sigmoid(i) o = _sigmoid(o) f_list = [_sigmoid(f) for f in f_list] c_next = a * i + sum(f * c for f, c in zip(f_list, cs)) y = o * xp.tanh(c_next) return c_next, y
def _zoneoutlstm(func, c, h, x, c_creator, h_creator): xp = backend.get_array_module(x) with cuda.get_device_from_array(x): lstm_in = x.dot(func.upward.W.data.T) lstm_in += h.dot(func.lateral.W.data.T) lstm_in = xp.reshape(lstm_in, (len(lstm_in), lstm_in.shape[1] // 4, 4)) a, i, f, o = xp.split(lstm_in, 4, 2) a = xp.reshape(a, (len(a), a.shape[1])) i = xp.reshape(i, (len(i), i.shape[1])) f = xp.reshape(f, (len(f), f.shape[1])) o = xp.reshape(o, (len(o), o.shape[1])) c_tmp = xp.tanh(a) * _sigmoid(i) + _sigmoid(f) * c c_next = c * c_creator.flag_h + c_tmp * c_creator.flag_x h_next = h * h_creator.flag_h + \ (_sigmoid(o) * xp.tanh(c_tmp)) * h_creator.flag_x return c_next, h_next
def debug_print(self): """Display a summary of the stored data and location of the Variable""" msg = """{summary} - device: {device} - backend: {backend} - shape: {shape} - dtype: {dtype} - statistics: {stats} - grad: {grad}""" stats_msg = 'mean={0:.8f}, std={1:.8f}' data = self.data with cuda.get_device_from_array(data) as dev: xp = numpy if int(dev) == -1 else cuda.cupy if data is None: # `data` can be `None` if constructed without any arguments device = None backend = None stats = None else: device = getattr(data, 'device', 'CPU') backend = type(data) stats = stats_msg.format(float(xp.mean(data)), float(xp.std(data))) shape = getattr(data, 'shape', None) dtype = getattr(data, 'dtype', None) if self.grad is None: grad = None elif xp.all(self.grad == 0): grad = 0 else: grad = stats_msg.format(float(xp.mean(self.grad)), float(xp.std(self.grad))) return msg.format(summary=self.summary(), device=device, backend=backend, shape=shape, dtype=dtype, stats=stats, grad=grad)
def _child_sum_tree_lstm(func, *inputs): cs = inputs[:len(inputs) // 2] hs = inputs[len(inputs) // 2:-1] x = inputs[-1] xp = backend.get_array_module(x) with cuda.get_device_from_array(x): W_x = func.W_x.W.data.T b_x = func.W_x.b.data W_h_aio = func.W_h_aio.W.data.T W_h_f = func.W_h_f.W.data.T W_xa, W_xi, W_xo, W_xf = xp.split(W_x, 4, 1) b_a, b_i, b_o, b_f = xp.split(b_x[None, ], 4, 1) W_ha, W_hi, W_ho = xp.split(W_h_aio, 3, 1) W_hf = W_h_f if len(hs) >= 1: sum_h = sum(hs) a = x.dot(W_xa) + sum_h.dot(W_ha) + b_a i = x.dot(W_xi) + sum_h.dot(W_hi) + b_i o = x.dot(W_xo) + sum_h.dot(W_ho) + b_o f_list = [x.dot(W_xf) + h.dot(W_hf) + b_f for h in hs] else: a = x.dot(W_xa) + b_a i = x.dot(W_xi) + b_i o = x.dot(W_xo) + b_o a = xp.tanh(a) i = _sigmoid(i) o = _sigmoid(o) if len(hs) >= 1: f_list = [_sigmoid(f) for f in f_list] c_next = sum([f * c for f, c in zip(f_list, cs)], a * i) y = o * xp.tanh(c_next) else: c_next = a * i y = o * xp.tanh(c_next) return c_next, y
def backward(self, target_input_indexes, grad_outputs): in_data = tuple([input.data for input in self.inputs]) grad_out_data = tuple([None if grad is None else grad.data for grad in grad_outputs]) with cuda.get_device_from_array(*(in_data + grad_out_data)): gxs = self._function.backward(in_data, grad_out_data) for x, gx in six.moves.zip(self.inputs, gxs): variable._check_grad_type(self, x, gx) ret = [] for i in target_input_indexes: if gxs[i] is None: g = None else: # Intentionally not passing requires_grad=False so that # backprop routines can raise an error when a further backprop # is attempted against this gradient variable. g = variable.Variable(gxs[i]) g.node._old_style_grad_generator = self._function.label ret.append(g) return tuple(ret)