def forward(self, x, finetune=False): if self.gamma is not None: gamma = self.gamma else: with chainer.using_device(self.device): gamma = self.xp.ones( self.avg_mean.shape, dtype=x.dtype) if self.beta is not None: beta = self.beta else: with chainer.using_device(self.device): beta = self.xp.zeros( self.avg_mean.shape, dtype=x.dtype) if configuration.config.train: if finetune: self.N += 1 decay = 1. - 1. / self.N else: decay = self.decay ret = batch_renormalization.batch_renormalization( x, gamma, beta, self.rmax, self.dmax, self.eps, self.avg_mean, self.avg_var, decay, update_statistics=True) else: # Use running average statistics or fine-tuned statistics. mean = self.avg_mean var = self.avg_var ret = batch_normalization.fixed_batch_normalization( x, gamma, beta, mean, var, self.eps) return ret
def check_forward(self, x1_data, x2_data, x3_data): xp = self.link.xp x1 = chainer.Variable(x1_data) if self.input_variable else x1_data h1 = self.link(x1) device = backend.get_device_from_array(x1_data) with chainer.using_device(device): c0 = chainer.Variable(xp.zeros((len(self.x1), self.out_size), dtype=self.x1.dtype)) c1_expect, h1_expect = functions.lstm(c0, self.link.upward(x1)) testing.assert_allclose(h1.data, h1_expect.data) testing.assert_allclose(self.link.h.data, h1_expect.data) testing.assert_allclose(self.link.c.data, c1_expect.data) batch = len(x2_data) x2 = chainer.Variable(x2_data) if self.input_variable else x2_data h1_in, h1_rest = functions.split_axis( self.link.h.data, [batch], axis=0) y2 = self.link(x2) device = backend.get_device_from_array(x1) with chainer.using_device(device): c2_expect, y2_expect = \ functions.lstm(c1_expect, self.link.upward(x2) + self.link.lateral(h1_in)) testing.assert_allclose(y2.data, y2_expect.data) testing.assert_allclose(self.link.h.data[:batch], y2_expect.data) testing.assert_allclose(self.link.h.data[batch:], h1_rest.data) x3 = chainer.Variable(x3_data) if self.input_variable else x3_data h2_rest = self.link.h y3 = self.link(x3) c3_expect, y3_expect = \ functions.lstm(c2_expect, self.link.upward(x3)) testing.assert_allclose(y3.data, y3_expect.data) testing.assert_allclose(self.link.h.data, h2_rest.data)
def normalize_weight(self, link): """Normalize target weight before every single forward computation.""" weight_name, vector_name = self.weight_name, self.vector_name W = getattr(link, weight_name) u = getattr(link, vector_name) weight_matrix = self.reshape_W(W) if not configuration.config.in_recomputing: with chainer.using_device(link.device): u, v = update_approximate_vectors( weight_matrix, u, self.n_power_iteration, self.eps) else: v = self.v sigma = calculate_max_singular_value(weight_matrix, u, v) if self.factor is not None: sigma /= self.factor if self.use_gamma: W = link.gamma * W / sigma else: W = W / sigma if not configuration.config.in_recomputing: self.v = v with chainer.using_device(link.device): if configuration.config.train: if link.xp is chainerx: # TODO(crcrpar): Remove this when # chainerx supports `copyto`. getattr(link, vector_name)[:] = u else: backend.copyto(getattr(link, vector_name), u) return W
def forward(self, c, h, x): """Returns new cell state and updated output of LSTM. Args: c (~chainer.Variable): Cell states of LSTM units. h (~chainer.Variable): Output at the previous time step. x (~chainer.Variable): A new batch from the input sequence. Returns: tuple of ~chainer.Variable: Returns ``(c_new, h_new)``, where ``c_new`` represents new cell state, and ``h_new`` is updated output of LSTM units. """ if self.upward.W.array is None: in_size = x.size // x.shape[0] with chainer.using_device(self.device): self.upward._initialize_params(in_size) self._initialize_params() lstm_in = self.upward(x) if h is not None: lstm_in += self.lateral(h) if c is None: xp = self.xp with chainer.using_device(self.device): c = variable.Variable( xp.zeros((x.shape[0], self.state_size), dtype=x.dtype)) return lstm.lstm(c, lstm_in)
def check_equal_memory_shared(self, arr1, arr2): # Check that the two arrays share the internal memory. numpy.testing.assert_array_equal(backend.CpuDevice().send(arr1), backend.CpuDevice().send(arr2)) with chainer.using_device(backend.get_device_from_array(arr1)): arr1 += 2 numpy.testing.assert_array_equal(backend.CpuDevice().send(arr1), backend.CpuDevice().send(arr2)) with chainer.using_device(backend.get_device_from_array(arr1)): arr1 -= 2
def check_equal_memory_shared(self, arr1, arr2): # Check that the two arrays share the internal memory. numpy.testing.assert_array_equal( backend.CpuDevice().send(arr1), backend.CpuDevice().send(arr2)) with chainer.using_device(backend.get_device_from_array(arr1)): arr1 += 2 numpy.testing.assert_array_equal( backend.CpuDevice().send(arr1), backend.CpuDevice().send(arr2)) with chainer.using_device(backend.get_device_from_array(arr1)): arr1 -= 2
def forward(self, x, finetune=False): if self.gamma is not None: gamma = self.gamma else: with chainer.using_device(self.device): gamma = self.xp.ones(self.avg_mean.shape, dtype=x.dtype) if self.beta is not None: beta = self.beta else: with chainer.using_device(self.device): beta = self.xp.zeros(self.avg_mean.shape, dtype=x.dtype) if configuration.config.train: if finetune: self.N += 1 decay = 1. - 1. / self.N else: decay = self.decay avg_mean = self.avg_mean avg_var = self.avg_var update_statistics = True if chainer.config.in_recomputing: # Do not update statistics when extra forward computation is # called. if finetune: self.N -= 1 # Revert the count avg_mean = self._prev_avg_mean avg_var = self._prev_avg_var update_statistics = False elif chainer.config._will_recompute: self._prev_avg_mean = avg_mean.copy() self._prev_avg_var = avg_var.copy() ret = batch_renormalization.batch_renormalization( x, gamma, beta, self.rmax, self.dmax, self.eps, avg_mean, avg_var, decay, update_statistics=update_statistics) else: # Use running average statistics or fine-tuned statistics. mean = self.avg_mean var = self.avg_var ret = batch_normalization.fixed_batch_normalization( x, gamma, beta, mean, var, self.eps) return ret
def generate_array(initializer, shape, xp, dtype=None, device=None): # type: (types.AbstractInitializer, types.ShapeSpec, types.Xp, types.DTypeSpec, types.DeviceSpec) -> types.NdArray # NOQA """Return initialized array. The algorithms used to make the new values depend on the concrete derived classes. If the initializer has the ``dtype`` attribute, it is used to construct the array. Otherwise, ``chainer.config.dtype`` is used instead. See :ref:`configuration` for the dtype config. Args: initializer: A callable object that takes :ref:`ndarray` and edits its value. shape (int or tuple of int): Shape of the initialized array. xp (module): :mod:`cupy`, :mod:`numpy`, or :mod:`chainerx`. dtype: Dtype specifier. If omitted, ``initializer.dtype`` is used. device: Target device specifier. If omitted, the current device is used for :mod:`cupy`, and the default device is used for :mod:`chainerx`. Returns: :ref:`ndarray`: An initialized array. """ dtype_attr = getattr(initializer, 'dtype', None) if dtype is not None and dtype_attr is not None \ and numpy.dtype(dtype) != numpy.dtype(dtype_attr): raise ValueError('dtype mismatch: {} != {}'.format(dtype, dtype_attr)) if dtype is None: dtype = dtype_attr dtype = chainer.get_dtype(dtype) if device is None: backend_device = backend._guess_device_from_array_module(xp) else: backend_device = chainer.get_device(device) if xp != backend_device.xp: raise ValueError('xp and device arguments are inconsistent.') if xp is chainerx: # Initialize with NumPy/CuPy array that shares memory with the # ChainerX array. # TODO(sonots): Directly use initializer after ChainerX # supports random. chx_device = backend_device.device array = chainerx.empty(shape, dtype=dtype, device=chx_device) fallback_device = backend_device.fallback_device with chainer.using_device(fallback_device): initializer(fallback_device.send(array)) return array with chainer.using_device(backend_device): array = xp.empty(shape, dtype=dtype) initializer(array) return array
def __call__(self, x, **kwargs): argument.check_unexpected_kwargs( kwargs, test='test argument is not supported anymore. ' 'Use chainer.using_config') finetune, = argument.parse_kwargs(kwargs, ('finetune', False)) # reshape input x for instance normalization shape_org = x.shape B, C = shape_org[:2] shape_ins = (1, B * C) + shape_org[2:] x_reshaped = functions.reshape(x, shape_ins) gamma = self.gamma if gamma is None: with chainer.using_device(self.device): gamma = self.xp.ones(self.avg_mean.shape, dtype=self._dtype) beta = self.beta if beta is None: with chainer.using_device(self.device): beta = self.xp.zeros(self.avg_mean.shape, dtype=self._dtype) gamma = functions.tile(gamma, (B, )) beta = functions.tile(beta, (B, )) mean = self.xp.tile(self.avg_mean, (B, )) var = self.xp.tile(self.avg_var, (B, )) # instance normalization is always done in training mode if finetune: self.N += 1 decay = 1. - 1. / self.N else: decay = self.decay ret = functions.batch_normalization(x_reshaped, gamma, beta, eps=self.eps, running_mean=mean, running_var=var, decay=decay) self.avg_mean = mean.reshape(B, C).mean(axis=0) self.avg_var = var.reshape(B, C).mean(axis=0) # ret is normalized input x return functions.reshape(ret, shape_org)
def predict(self, img_feats, bos, eos, max_caption_length): """Batch of image features to captions.""" hx, cx, _ = self.reset(img_feats) with chainer.using_device(self.device): xp = self.xp captions = xp.full((img_feats.shape[0], 1), bos, dtype=np.int32) for i in range(max_caption_length): # Create a list of the previous tokens to treat as inputs xs = [xp.atleast_1d(c[-1]) for c in captions] # Get the predictions `ys` hx, cx, ys = self.step(hx, cx, xs) # From `ys`, get the indices for the highest confidence. # These indices correspond to the predicted tokens # # Note that this is a greedy approach and that it can by # replaced by e.g. beam search pred = ys.array.argmax(axis=1).astype(np.int32) captions = xp.hstack((captions, pred[:, None])) if (pred == eos).all(): break return captions
def _as_array(data): if isinstance(data, chainer.get_array_types()): return data else: device = chainer.backend.get_device_from_array(data[0]) with chainer.using_device(device): return device.xp.asarray(data)
def forward(self, x): """Updates the internal state and returns the LSTM outputs. Args: x (~chainer.Variable): A new batch from the input sequence. Returns: ~chainer.Variable: Outputs of updated LSTM units. """ lstm_in = self.upward(x) if self.h is not None: lstm_in += self.lateral(self.h) if self.c is None: xp = self.xp with chainer.using_device(self.device): self.c = variable.Variable( xp.zeros((len(x), self.state_size), dtype=x.dtype)) lstm_in = reshape.reshape(lstm_in, (len(lstm_in), lstm_in.shape[1] // 4, 4)) a, i, f, o = split_axis.split_axis(lstm_in, 4, 2) a = reshape.reshape(a, a.shape[:2]) i = reshape.reshape(i, i.shape[:2]) f = reshape.reshape(f, f.shape[:2]) o = reshape.reshape(o, o.shape[:2]) peep_in_i = self.peep_i(self.c) peep_in_f = self.peep_f(self.c) a = tanh.tanh(a) i = sigmoid.sigmoid(i + peep_in_i) f = sigmoid.sigmoid(f + peep_in_f) self.c = a * i + f * self.c peep_in_o = self.peep_o(self.c) o = sigmoid.sigmoid(o + peep_in_o) self.h = o * tanh.tanh(self.c) return self.h
def forward(self, x): """Updates the internal state and returns the LSTM outputs. Args: x (~chainer.Variable): A new batch from the input sequence. Returns: ~chainer.Variable: Outputs of updated LSTM units. """ lstm_in = self.upward(x) if self.h is not None: lstm_in += self.lateral(self.h) if self.c is None: xp = self.xp with chainer.using_device(self.device): self.c = variable.Variable( xp.zeros((len(x), self.state_size), dtype=x.dtype)) lstm_in = reshape.reshape( lstm_in, (len(lstm_in), lstm_in.shape[1] // 4, 4)) a, i, f, o = split_axis.split_axis(lstm_in, 4, 2) a = reshape.reshape(a, a.shape[:2]) i = reshape.reshape(i, i.shape[:2]) f = reshape.reshape(f, f.shape[:2]) o = reshape.reshape(o, o.shape[:2]) peep_in_i = self.peep_i(self.c) peep_in_f = self.peep_f(self.c) a = tanh.tanh(a) i = sigmoid.sigmoid(i + peep_in_i) f = sigmoid.sigmoid(f + peep_in_f) self.c = a * i + f * self.c peep_in_o = self.peep_o(self.c) o = sigmoid.sigmoid(o + peep_in_o) self.h = o * tanh.tanh(self.c) return self.h
def __init__(self, prediction_config_path="prediction_config.json", gpu=-1): self.gpu = gpu with open(prediction_config_path) as prediction_config_file: prediction_config = json.load(prediction_config_file) classes = sorted(prediction_config["classes"]) long_class_label_dict = { "alpha_num": "Alphanumeric", "alphanum": "Alphanumeric", "date": "Date", "num": "Number", "plz": "Zip Code", "text": "Word" } self.idx_to_label_map = {i: long_class_label_dict[label] for i, label in enumerate(classes)} self.input_image_size = prediction_config["input_image_size"] self.base_model = PooledResNet(prediction_config["resnet_size"]) self.model = CrossEntropyClassifier(self.base_model, len(classes)) with numpy.load(prediction_config["model_path"]) as f: chainer.serializers.NpzDeserializer(f, strict=True).load(self.model) if int(self.gpu) >= 0: with chainer.using_device(chainer.get_device(self.gpu)): self.base_model.to_device(self.gpu) self.model.to_device(self.gpu)
def backward(self, indexes, flat_gys): device = chainer.backend.get_device_from_array(flat_gys[0].array) gys, _ = _unflatten(flat_gys, self.nested_outputs) retained = self.retained gys = [self._to_var(gy) for gy in gys] values = gys + retained del self.retained del self.nested_outputs inputs = {} assert len(self.bwd_input_names) == len(values) for name, value in zip(self.bwd_input_names, values): inputs[name] = value with chainer.using_device(self.chainerx_device_name): outputs = self.bwd.run(inputs) gxs = [] assert len(self.input_tmpl) == len(self.fwd_input_names) for name, tmpl in zip(self.fwd_input_names, self.input_tmpl): grad_name = 'grad_out@' + name if grad_name in outputs: gx = _from_var(outputs[grad_name], device) if _is_array(tmpl): gxs.append(gx) else: assert len(gx) == len(tmpl) gxs.extend(_flatten_structured(gx, tmpl)) else: gxs.extend([None] * len(_flatten(tmpl))) gxs = tuple(None if gx is None else chainer.Variable(gx) for gx in gxs) return gxs
def evaluate_image( self, image: numpy.ndarray, return_boxes: bool = False) -> Union[bool, Tuple[bool, list]]: if self.needs_patches: patches, bboxes = self.prediction_helper.create_sliding_window( image) else: patches = image[numpy.newaxis, ...] network = self.network device = chainer.get_device(network.device) xp = numpy with chainer.using_device(device), chainer.configuration.using_config( 'train', False): predicted_patches = [] for patch in patches: batch = [{'image': patch}] batch = concat_examples(batch, device) xp = get_array_module(batch['image']) predictions = network(**batch) predicted_patches.append(xp.argmax(predictions.array, axis=1)) predicted_patches = xp.stack(predicted_patches, axis=0) contains_handwriting = chainer.backends.cuda.to_cpu( predicted_patches == 1) if return_boxes: assert self.needs_patches, "Can not return boxes if we do not need patches" return contains_handwriting, bboxes else: return contains_handwriting.any()
def __call__(self, rule, param): grad = param.grad if grad is None: return with chainer.using_device(param.device): xp = param.device.xp xp.clip(grad, self.lower_bound, self.upper_bound, out=grad)
def forward(self, args): flat_inputs = args[:self.num_inputs] param_values = args[self.num_inputs:] device = chainer.backend.get_device_from_array(*flat_inputs) inputs, i = _unflatten(flat_inputs, self.input_tmpl) assert i == len(flat_inputs) entire_inputs = {} assert len(self.fwd_input_names) == len(inputs) for name, value in zip(self.fwd_input_names, inputs): entire_inputs[name] = self._to_var(value) assert len(self.param_names) == len(param_values) for name, value in zip(self.param_names, param_values): entire_inputs[name] = self._to_var(value) with chainer.using_device(self.chainerx_device_name): outputs = self.fwd.run(entire_inputs, **self.runtime_kwargs) outputs_and_retained = [] for name in self.fwd_output_names: outputs_and_retained.append(outputs[name]) self.retained = outputs_and_retained[self.num_outputs:] # TODO(hamaji): Do not hold actual arrays. self.nested_outputs = [] for output in outputs_and_retained[:self.num_outputs]: self.nested_outputs.append(_from_var(output, device)) flat_outputs = _flatten(self.nested_outputs) return tuple(flat_outputs)
def forward(self, x): """Updates the internal state and returns the LSTM outputs. Args: x (~chainer.Variable): A new batch from the input sequence. Returns: ~chainer.Variable: Outputs of updated LSTM units. """ if self.upward.W.array is None: with chainer.using_device(self.device): in_size = utils.size_of_shape(x.shape[1:]) self.upward._initialize_params(in_size) self._initialize_params() batch = x.shape[0] lstm_in = self.upward(x) h_rest = None if self.h is not None: h_size = self.h.shape[0] if batch == 0: h_rest = self.h elif h_size < batch: msg = ('The batch size of x must be equal to or less than' 'the size of the previous state h.') raise TypeError(msg) elif h_size > batch: h_update, h_rest = split_axis.split_axis( self.h, [batch], axis=0) lstm_in += self.lateral(h_update) else: lstm_in += self.lateral(self.h) if self.c is None: with chainer.using_device(self.device): self.c = variable.Variable( self.xp.zeros((batch, self.state_size), dtype=x.dtype)) self.c, y = lstm.lstm(self.c, lstm_in) if h_rest is None: self.h = y elif len(y.array) == 0: self.h = h_rest else: self.h = concat.concat([y, h_rest], axis=0) return y
def forward(self, x): """Updates the internal state and returns the LSTM outputs. Args: x (~chainer.Variable): A new batch from the input sequence. Returns: ~chainer.Variable: Outputs of updated LSTM units. """ if self.upward.W.array is None: with chainer.using_device(self.device): in_size = utils.size_of_shape(x.shape[1:]) self.upward._initialize_params(in_size) self._initialize_params() batch = x.shape[0] lstm_in = self.upward(x) h_rest = None if self.h is not None: h_size = self.h.shape[0] if batch == 0: h_rest = self.h elif h_size < batch: msg = ('The batch size of x must be equal to or less than' 'the size of the previous state h.') raise TypeError(msg) elif h_size > batch: h_update, h_rest = split_axis.split_axis(self.h, [batch], axis=0) lstm_in += self.lateral(h_update) else: lstm_in += self.lateral(self.h) if self.c is None: with chainer.using_device(self.device): self.c = variable.Variable( self.xp.zeros((batch, self.state_size), dtype=x.dtype)) self.c, y = lstm.lstm(self.c, lstm_in) if h_rest is None: self.h = y elif len(y.array) == 0: self.h = h_rest else: self.h = concat.concat([y, h_rest], axis=0) return y
def init_state(self, param): with chainer.using_device(param.device): self.state['v'] = param.device.xp.zeros_like(param.data) # For iDeep if isinstance(param.data, intel64.mdarray): self.state['v'] = intel64.ideep.array( self.state['v'], itype=intel64.ideep.wgt_array)
def update_core(self): optimizer = self.get_optimizer('main') model_main = optimizer.target models_others = { k: v for k, v in self._models.items() if v is not model_main } iterator = self.get_iterator('main') batch = iterator.next() # # Split the batch to sub-batches. # n = len(self._models) in_arrays_list = {} for i, key in enumerate(six.iterkeys(self._models)): in_arrays_list[key] = self.converter(batch[i::n], self._devices[key]) # For reducing memory for model in six.itervalues(self._models): model.cleargrads() losses = [] for model_key, model in six.iteritems(self._models): in_arrays = in_arrays_list[model_key] loss_func = self.loss_func or model with function.force_backprop_mode(): with chainer.using_device(self._devices[model_key]): if isinstance(in_arrays, tuple): loss = loss_func(*in_arrays) elif isinstance(in_arrays, dict): loss = loss_func(**in_arrays) else: loss = loss_func(in_arrays) losses.append(loss) # For _uninitialized_params for model in six.itervalues(self._models): model.cleargrads() for loss in losses: loss.backward(loss_scale=self.loss_scale) for model in six.itervalues(models_others): model_main.addgrads(model) optimizer.update() for model in six.itervalues(models_others): model.copyparams(model_main) if self.auto_new_epoch and iterator.is_new_epoch: optimizer.new_epoch(auto=True)
def update_core(self): with chainer.using_device(self.device): loss_localizer = self.update_localizer() # loss_recognizer = self.update_recognizer() self.log_results({ "loss/localizer": loss_localizer, # "loss/recognizer": loss_recognizer })
def forward(self, x, finetune=False): if self.gamma is not None: gamma = self.gamma else: with chainer.using_device(self.device): gamma = self.xp.ones( self.avg_mean.shape, dtype=x.dtype) if self.beta is not None: beta = self.beta else: with chainer.using_device(self.device): beta = self.xp.zeros( self.avg_mean.shape, dtype=x.dtype) if configuration.config.train: if finetune: self.N += 1 decay = 1. - 1. / self.N else: decay = self.decay avg_mean = self.avg_mean avg_var = self.avg_var if chainer.config.in_recomputing: # Do not update statistics when extra forward computation is # called. if finetune: self.N -= 1 # Revert the count avg_mean = self.xp.zeros_like(self.avg_mean) avg_var = self.xp.zeros_like(self.avg_var) ret = batch_renormalization.batch_renormalization( x, gamma, beta, self.rmax, self.dmax, self.eps, avg_mean, avg_var, decay, update_statistics=True) else: # Use running average statistics or fine-tuned statistics. mean = self.avg_mean var = self.avg_var ret = batch_normalization.fixed_batch_normalization( x, gamma, beta, mean, var, self.eps) return ret
def get_embeddings(model, dataset, batch_size, xp): with chainer.using_device(model.device): embeddings = [] for i in range(0, len(dataset), batch_size): batch = xp.array(list(dataset[i:i + batch_size])) embedding_batch = model(batch) embedding_flat = cuda.to_cpu(embedding_batch.array) embeddings.extend(embedding_flat) return np.array(embeddings)
def __call__(self, rule, param): p, g = param.data, param.grad if p is None or g is None: return with chainer.using_device(param.device): if param.device.xp is cuda.cupy: kernel = cuda.elementwise('T p, T decay', 'T g', 'g += decay * p', 'weight_decay') kernel(p, self.rate, g) else: g += self.rate * p
def initialize(self): if self.initialized: return self.initialized = True if self.device_id >= 0: with chainer.using_device(chainer.get_device(self.device_id)): self.network = self.load_network(self.device_id) else: self.network = self.load_network('@numpy')
def backward(self, target_input_indexes, grad_outputs): retained_inputs = self.get_retained_inputs() inputs = [None] * len(self.inputs) in_data = [None] * len(self.inputs) for retained, i_in in six.moves.zip(retained_inputs, self._input_indexes_to_retain): inputs[i_in] = retained in_data[i_in] = None if retained is None else retained.array in_data = tuple(in_data) grad_out_data = tuple( [None if grad is None else grad.array for grad in grad_outputs]) is_chainerx_fallback_mode = self._is_chainerx_fallback_mode if is_chainerx_fallback_mode: # Convert input and output gradients to numpy/cupy in_data = backend.from_chx(in_data) grad_out_data = backend.from_chx(grad_out_data) # Call Function.backward with chainer.using_device( backend.get_device_from_array(*(in_data + grad_out_data))): if is_chainerx_fallback_mode: # Enable attribute fallback with function_node._chainerx_attribute_fallback( self._function, self.chainerx_device): gxs = self._function.backward(in_data, grad_out_data) else: gxs = self._function.backward(in_data, grad_out_data) # Check gradients for x, gx in six.moves.zip(self.inputs, gxs): if gx is not None: variable._check_grad_type(self, x, True, gx) # Convert input gradients back to ChainerX if is_chainerx_fallback_mode: gxs = backend.to_chx(gxs) ret = [] for i in target_input_indexes: if gxs[i] is None: g = None else: # Intentionally not passing requires_grad=False so that # backprop routines can raise an error when a further backprop # is attempted against this gradient variable. g = variable.Variable(gxs[i]) if g.xp is not chainerx: g.node._old_style_grad_generator = self._function.label ret.append(g) return tuple(ret)
def forward(self, link, inputs, device): x, = inputs device_1 = backend.GpuDevice.from_device_id(1) link.to_device(device_1) x.to_device(device_1) device_0 = backend.GpuDevice.from_device_id(0) with chainer.using_device(device_0): with chainer.using_config('train', not self.test): y = link(x, finetune=self.finetune) return y,
def __enter__(self): self._contexts = [ chainer.using_config('use_cudnn', self.use_cudnn), chainer.using_config('cudnn_deterministic', self.cudnn_deterministic), chainer.using_config('autotune', self.autotune), chainer.using_config('use_ideep', self.use_ideep), chainer.using_device(self.device), ] for c in self._contexts: c.__enter__() return self
def _prepare(self, param): device = param.device with chainer.using_device(device): state = self.state if state is None: state = self._state = {} self.init_state(param) for name, value in six.iteritems(state): if not isinstance(value, chainer.get_array_types()): continue state[name] = device.send(value)
def __call__(self, **kwargs): image = kwargs.pop('image', None) words = kwargs.pop('words', None) return_predictions = kwargs.pop('return_predictions', False) batch_size, images_per_image, num_channels, height, width = image.shape image = self.xp.reshape(image, (-1, num_channels, height, width)) with chainer.using_device(self.device): rois, bboxes = self.localizer.predict(image)[:2] predicted_words, raw_classification_result = self.recognizer.predict( rois, return_raw_classification_result=True) predicted_words = F.reshape(predicted_words, (batch_size, images_per_image) + predicted_words.shape[1:]) raw_classification_result = F.reshape( raw_classification_result, (batch_size, images_per_image) + raw_classification_result.shape[1:]) best_indices, scores = self.determine_best_prediction_indices( raw_classification_result) chosen_indices = best_indices self.calc_word_accuracy( self.xp.concatenate([ predicted_words[i, best_indices[i]].array for i in range(batch_size) ], axis=0), words, self.strip_non_alphanumeric_predictions, ) if not self.only_return_best_result: best_indices = self.xp.arange(images_per_image)[None, ...] best_indices = self.xp.tile(best_indices, (batch_size, 1)) predicted_words = self.xp.stack([ predicted_words[i, best_indices[i]].array for i in range(batch_size) ], axis=0) if return_predictions: rois = F.reshape(rois, (batch_size, images_per_image) + rois.shape[1:]) bboxes = F.reshape(bboxes, (batch_size, images_per_image) + bboxes.shape[1:]) rois = self.xp.stack( [rois[i, best_indices[i]].array for i in range(batch_size)], axis=0) bboxes = self.xp.stack( [bboxes[i, best_indices[i]].array for i in range(batch_size)], axis=0) return rois, bboxes, predicted_words, best_indices, chosen_indices, scores
def check_multi_devices_forward(self, device_0, device_1): layer, hook = self.layer, self.hook layer.add_hook(hook) layer.to_device(device_1) x = device_1.send(self.x) msg = None with chainer.using_device(device_0): try: layer(x) except Exception as e: msg = e assert msg is None
def forward(self, x): """Updates the internal state and returns the LSTM outputs. Args: x (~chainer.Variable): A new batch from the input sequence. Returns: ~chainer.Variable: Outputs of updated LSTM units. """ lstm_in = self.upward(x) if self.h is not None: lstm_in += self.lateral(self.h) else: xp = self.xp with chainer.using_device(self.device): self.h = variable.Variable( xp.zeros((len(x), self.state_size), dtype=x.dtype)) if self.c is None: xp = self.xp with chainer.using_device(self.device): self.c = variable.Variable( xp.zeros((len(x), self.state_size), dtype=x.dtype)) lstm_in = reshape.reshape( lstm_in, (len(lstm_in), lstm_in.shape[1] // 4, 4)) a, i, f, o = split_axis.split_axis(lstm_in, 4, 2) a = reshape.reshape(a, (len(a), self.state_size)) i = reshape.reshape(i, (len(i), self.state_size)) f = reshape.reshape(f, (len(f), self.state_size)) o = reshape.reshape(o, (len(o), self.state_size)) c_tmp = tanh.tanh(a) * sigmoid.sigmoid(i) + sigmoid.sigmoid(f) * self.c self.c = zoneout.zoneout(self.c, c_tmp, self.c_ratio) self.h = zoneout.zoneout(self.h, sigmoid.sigmoid(o) * tanh.tanh(c_tmp), self.h_ratio) return self.h
def add_noise(device, h, sigma=0.2): if chainer.config.train: xp = device.xp # TODO(niboshi): Support random.randn in ChainerX if device.xp is chainerx: fallback_device = device.fallback_device with chainer.using_device(fallback_device): randn = device.send(fallback_device.xp.random.randn(*h.shape)) else: randn = xp.random.randn(*h.shape) return h + sigma * randn else: return h
def __call__(self, rule, param): p, g = param.data, param.grad if p is None or g is None: return with chainer.using_device(param.device): xp = param.device.xp sign = xp.sign(p) if xp is cuda.cupy: kernel = cuda.elementwise( 'T s, T decay', 'T g', 'g += decay * s', 'lasso') kernel(sign, self.rate, g) else: g += self.rate * sign
def forward(self, x): """Updates the internal state and returns the LSTM outputs. Args: x (~chainer.Variable): A new batch from the input sequence. Returns: ~chainer.Variable: Outputs of updated LSTM units. """ lstm_in = self.upward(x) if self.h is not None: lstm_in += self.lateral(self.h) else: xp = self.xp with chainer.using_device(self.device): self.h = variable.Variable( xp.zeros((len(x), self.state_size), dtype=x.dtype)) if self.c is None: xp = self.xp with chainer.using_device(self.device): self.c = variable.Variable( xp.zeros((len(x), self.state_size), dtype=x.dtype)) lstm_in = reshape.reshape(lstm_in, (len(lstm_in), lstm_in.shape[1] // 4, 4)) a, i, f, o = split_axis.split_axis(lstm_in, 4, 2) a = reshape.reshape(a, (len(a), self.state_size)) i = reshape.reshape(i, (len(i), self.state_size)) f = reshape.reshape(f, (len(f), self.state_size)) o = reshape.reshape(o, (len(o), self.state_size)) c_tmp = tanh.tanh(a) * sigmoid.sigmoid(i) + sigmoid.sigmoid(f) * self.c self.c = zoneout.zoneout(self.c, c_tmp, self.c_ratio) self.h = zoneout.zoneout(self.h, sigmoid.sigmoid(o) * tanh.tanh(c_tmp), self.h_ratio) return self.h
def reallocate_cleared_grads(self): """Reallocate gradients cleared by :meth:`~chainer.Variable.cleargrad`. This method allocates arrays for all gradients which have :obj:`None`. This method is called before and after every optimizer hook. If an inheriting optimizer does not require this allocation, the optimizer can override this method with a blank function. """ for name, param in self.target.namedparams(False): if param.grad is None: device = param.device with chainer.using_device(device): param.grad = device.xp.zeros_like(param.data)
def _concat_arrays(arrays, padding): # Convert `arrays` to numpy.ndarray if `arrays` consists of the built-in # types such as int, float or list. if not isinstance(arrays[0], chainer.get_array_types()): arrays = numpy.asarray(arrays) if padding is not None: arr_concat = _concat_arrays_with_padding(arrays, padding) else: device = backend.get_device_from_array(arrays[0]) with chainer.using_device(device): arr_concat = device.xp.concatenate( [array[None] for array in arrays]) return arr_concat
def check_deleted(self, backend_config): layer, hook = self.layer, self.hook layer.add_hook(hook) layer.to_device(backend_config.device) x = backend_config.get_array(self.x) with chainer.using_device(backend_config.device): y1 = layer(x).array with chainer.using_config('train', False): y2 = layer(x).array layer.delete_hook(hook.name) assert not hasattr(layer, hook.vector_name) y3 = layer(x).array y1, y2, y3 = _cpu._to_cpu(y1), _cpu._to_cpu(y2), _cpu._to_cpu(y3) assert not numpy.array_equal(y1, y3) assert not numpy.array_equal(y2, y3)
def _concat_arrays_with_padding(arrays, padding): shape = numpy.array(arrays[0].shape, dtype=int) for array in arrays[1:]: if numpy.any(shape != array.shape): numpy.maximum(shape, array.shape, shape) shape = tuple(numpy.insert(shape, 0, len(arrays))) device = backend.get_device_from_array(arrays[0]) with chainer.using_device(device): result = device.xp.full(shape, padding, dtype=arrays[0].dtype) for i in six.moves.range(len(arrays)): src = arrays[i] slices = tuple(slice(dim) for dim in src.shape) result[(i,) + slices] = src return result
def __enter__(self): contexts = [ chainer.using_config( 'use_cudnn', self.use_cudnn), chainer.using_config( 'cudnn_deterministic', self.cudnn_deterministic), chainer.using_config( 'autotune', self.autotune), chainer.using_config( 'use_ideep', self.use_ideep), chainer.using_device(self.device), ] for c in contexts: c.__enter__() self._contexts.append(contexts) return self
def accuracy(self, backend_config, loss_scaling=False): model = self.model optimizer = self.optimizer optimizer.setup(model) _optimizer_loss_scaling(optimizer, loss_scaling) if backend_config.use_ideep == 'always': if not intel64.is_ideep_available(): # TODO(niboshi): This is temporary workaround. # See the comment on Skipped. raise Skipped('ideep is required to run this test.') model.to_device(backend_config.device) with chainer.using_device(backend_config.device): return self._train_linear_classifier( model, optimizer, backend_config)
def update_core(self, param): """Updates the parameter. Implementation of UpdateRule should override this method or both of :meth:`update_core_cpu` and :meth:`update_core_gpu`. Args: param (~chainer.Variable): Variable to be updated. """ device = param.device with chainer.using_device(device): if device.xp is chainerx: self.update_core_chainerx(param) elif device.xp is numpy: self.update_core_cpu(param) else: self.update_core_gpu(param)
def sample(self, shape): """Generates a random sample based on given probabilities. Args: shape (tuple of int): Shape of a return value. Returns: Returns a generated array with the given shape. If a sampler is in CPU mode the return value is a :class:`numpy.ndarray` object, and if it is in GPU mode the return value is a :class:`cupy.ndarray` object. """ xp = self._device.xp with chainer.using_device(self._device): if xp is cuda.cupy: return self.sample_gpu(shape) else: return self.sample_xp(xp, shape)
def _backward_chainerx(self, target_input_indexes, grad_outputs, retained_inputs, retained_outputs): # Backward wrapper that is called from C++ via a Python binding in case # self.apply was called with chainerx.ndarrays. assert self._is_chainex_fallback_mode assert len(target_input_indexes) > 0 assert ( (self._input_indexes_to_retain is None and len(retained_inputs) == 0) or (len(self._input_indexes_to_retain) == len(retained_inputs))) assert ( (self._output_indexes_to_retain is None and len(retained_outputs) == 0) or (len(self._output_indexes_to_retain) == len(retained_outputs))) assert all([ a is None or isinstance(a, chainerx.ndarray) for a in grad_outputs]) self._chainerx_retained_inputs = tuple([ variable.Variable( array, requires_grad=array.is_backprop_required()) for array in retained_inputs]) self._chainerx_retained_outputs = tuple([ variable.Variable( array, requires_grad=( False if array is None else array.is_backprop_required())) for array in retained_outputs]) device = backend.get_device_from_array( *(retained_inputs + retained_outputs + grad_outputs)) with chainer.using_device(device): gxs = self._backward_target_inputs( tuple(target_input_indexes), tuple([ None if gy is None else chainer.Variable( gy, requires_grad=gy.is_backprop_required()) for gy in grad_outputs])) gx_arrs = [gx._data[0] for gx in gxs] assert all([isinstance(gx, chainerx.ndarray) for gx in gx_arrs]) return gx_arrs
def accuracy(self, backend_config): # TODO(niboshi): Support it if backend_config.use_chainerx and self.dtype == numpy.float16: raise unittest.SkipTest('ChainerX does not support float16') model = self.model optimizer = self.optimizer optimizer.setup(model) if backend_config.use_ideep == 'always': if not intel64.is_ideep_available(): # TODO(niboshi): This is temporary workaround. # See the comment on Skipped. raise Skipped('ideep is required to run this test.') model.to_device(backend_config.device) with chainer.using_device(backend_config.device): return self._train_linear_classifier( model, optimizer, backend_config)
def as_noncontiguous_array(a): if a is None: return None if a.size <= 1: return a device = backend.get_device_from_array(a) xp = device.xp slices = (slice(None, None, 2),) * a.ndim with chainer.using_device(device): ret = xp.empty(tuple([s * 2 for s in a.shape]), dtype=a.dtype) ret[slices] = a ret = ret[slices] if device.xp is chainerx: assert not ret.is_contiguous else: assert not ret.flags.c_contiguous return ret
def as_noncontiguous_array(a): if a is None: return None if a.size <= 1: return a device = backend.get_device_from_array(a) xp = device.xp with chainer.using_device(device): ret = xp.empty( (a.shape[0] * 2,) + a.shape[1:], dtype=a.dtype) ret[::2] = a ret = ret[::2] if device.xp is chainerx: assert not ret.is_contiguous else: assert not ret.flags.c_contiguous return ret
def _prepare_parameters(self, link, input_variable=None): """Prepare one buffer and one parameter. Args: link (:class:`~chainer.Link`): Link to normalize spectrally. input_variable (:class:`~chainer.Variable`): The first minibatch to initialize weight. """ if getattr(link, self.weight_name).array is None: if input_variable is not None: link._initialize_params(input_variable.shape[1]) initialW = getattr(link, self.weight_name) if initialW.shape[self.axis] == 0: raise ValueError( 'Expect {}.shape[{}] > 0'.format(self.weight_name, self.axis) ) u = link.xp.random.normal( size=(initialW.shape[self.axis],)).astype(dtype=initialW.dtype) setattr(link, self.vector_name, u) link.register_persistent(self.vector_name) if self.use_gamma: # Initialize the scaling parameter with the max singular value. weight_matrix = self.reshape_W(initialW.array) # TODO(crcrpar): Remove this when chainerx supports SVD. if link.xp is chainerx: xp, device, array = fallback._from_chx(weight_matrix) if xp is numpy: _, s, _ = numpy.linalg.svd(array) else: with chainer.using_device(device): _, s, _ = xp.linalg.svd(array) else: _, s, _ = link.xp.linalg.svd(weight_matrix) with link.init_scope(): link.gamma = variable.Parameter(s[0], ()) self._initialized = True
def forward(self, x, y): self.args.append((x, y)) with chainer.using_device(backend.get_device_from_array(x, y)): chainer.report({'loss': x.sum() + y.sum()}, self)
def backward(self, indexes, grad_outputs): x, W, gy = self.get_retained_inputs() device = backend.get_device_from_array(x.data) xp = device.xp if 0 in indexes: gx = chainer.Variable(xp.zeros_like(x.data)) if 1 in indexes: gW = chainer.Variable(xp.zeros_like(W.data)) if 2 in indexes: ggy = chainer.Variable(xp.zeros_like(gy.data)) ggx, _, ggW = grad_outputs pos_neg_mask = xp.ones(self.sample_size + 1) pos_neg_mask[0] *= -1 with chainer.using_device(device): arange = xp.arange(len(self.ignore_mask)) for i in arange[self.ignore_mask]: # Partial forward pass to obtain intermediate `Variable`s ix = x[i] k = self.samples[i] if self.reduce == 'sum': igy = gy else: igy = gy[i] w = W[k] f = chainer.functions.flatten( chainer.functions.matmul(w, ix[:, None])) * pos_neg_mask sigf = chainer.functions.sigmoid(f) g = chainer.functions.broadcast_to(igy, f.shape) * sigf \ * pos_neg_mask dgW_dg = chainer.functions.flatten( chainer.functions.matmul(ggW[k], ix[:, None])) * pos_neg_mask dgW_df = chainer.functions.broadcast_to(igy, f.shape) \ * _sigmoid_grad(f, sigf, dgW_dg) * pos_neg_mask dgx_dg = chainer.functions.flatten( chainer.functions.matmul(ggx[i][None, :], w, transb=True)) dgx_df = chainer.functions.broadcast_to(igy, f.shape) \ * _sigmoid_grad(f, sigf, dgx_dg) if 0 in indexes: # derivative of gx dgx = chainer.functions.matmul(w, dgx_df[:, None], transa=True) # derivative of gW dgx += chainer.functions.matmul(g[None, :], ggW[k]).T dgx += chainer.functions.matmul( w, dgW_df[:, None], transa=True) gx = chainer.functions.scatter_add( gx, i, chainer.functions.flatten(dgx)) if 1 in indexes: # derivative of gx shape = ggx[i].shape for ik, ig, idgx_df in six.moves.zip(k, g, dgx_df): ig = chainer.functions.broadcast_to(ig, shape) idgx_df = chainer.functions.broadcast_to(idgx_df, shape) gW = chainer.functions.scatter_add( gW, ik, ig * ggx[i] + idgx_df * ix) # derivative of gW gW = chainer.functions.scatter_add( gW, k, chainer.functions.matmul(dgW_df[:, None], ix[None, :])) if 2 in indexes: dgx_dg *= pos_neg_mask dggy = chainer.functions.sum((dgx_dg + dgW_dg) * sigf) if self.reduce == 'sum': ggy += dggy else: ggy = chainer.functions.scatter_add(ggy, i, dggy) ret = [] if 0 in indexes: ret.append(gx) if 1 in indexes: ret.append(gW) if 2 in indexes: ret.append(ggy) return ret
def run(self): with chainer.using_device(self.device): self._run()
def generate_array(initializer, shape, xp, dtype=None, device=None): # type: (types.AbstractInitializer, types.ShapeSpec, types.Xp, types.DTypeSpec, types.DeviceSpec) -> types.NdArray # NOQA """Return initialized array. The algorithms used to make the new values depend on the concrete derived classes. If the initializer has the ``dtype`` attribute, it is used to construct the array. Otherwise, ``chainer.config.dtype`` is used instead. See :ref:`configuration` for the dtype config. Args: initializer: A callable object that takes :ref:`ndarray` and edits its value. shape (tuple): Shape of a return array. xp (module): :mod:`cupy`, :mod:`numpy`, or :mod:`chainerx`. dtype: Dtype specifier. If omitted, ``initializer.dtype`` is used. device: Target device specifier. If omitted, the current device is used for :mod:`cupy`, and the default device is used for :mod:`chainerx`. Returns: :ref:`ndarray`: An initialized array. """ dtype_attr = getattr(initializer, 'dtype', None) if dtype is not None and dtype_attr is not None \ and numpy.dtype(dtype) != numpy.dtype(dtype_attr): raise ValueError( 'dtype mismatch: {} != {}'.format(dtype, dtype_attr)) if dtype is None: dtype = dtype_attr dtype = chainer.get_dtype(dtype) if device is None: backend_device = backend._guess_device_from_array_module(xp) else: backend_device = chainer.get_device(device) if xp != backend_device.xp: raise ValueError('xp and device arguments are inconsistent.') if xp is chainerx: # Initialize with NumPy/CuPy array that shares memory with the # ChainerX array. # TODO(sonots): Directly use initializer after ChainerX # supports random. chx_device = backend_device.device # type: ignore # TODO(okapies): remove 'type: ignore' when chainerx implements sequence support for empty() # NOQA array = chainerx.empty(shape, dtype=dtype, device=chx_device) # type: ignore # NOQA if chx_device.backend.name == 'native': temp_array = _cpu._to_cpu(array) temp_device = cuda.DummyDevice # type: cuda.Device elif chx_device.backend.name == 'cuda': temp_array = cuda.to_gpu(array, chx_device.index) temp_device = cuda.Device(chx_device.index) else: raise RuntimeError('ChainerX backend: {} is not supported.'.format( chx_device.backend.name)) with temp_device: initializer(temp_array) return array with chainer.using_device(backend_device): array = xp.empty(shape, dtype=dtype) initializer(array) return array
def forward(self, x, **kwargs): """forward(self, x, finetune=False) Invokes the forward propagation of BatchNormalization. In training mode, the BatchNormalization computes moving averages of mean and variance for evaluation during training, and normalizes the input using batch statistics. Args: x (Variable): Input variable. finetune (bool): If it is in the training mode and ``finetune`` is ``True``, BatchNormalization runs in fine-tuning mode; it accumulates the input array to compute population statistics for normalization, and normalizes the input using batch statistics. """ finetune, = argument.parse_kwargs( kwargs, ('finetune', False), test='test argument is not supported anymore. ' 'Use chainer.using_config') if self.avg_mean is None: param_shape = tuple([ d for i, d in enumerate(x.shape) if i not in self.axis]) self._initialize_params(param_shape) gamma = self.gamma if gamma is None: with chainer.using_device(self.device): gamma = self.xp.ones( self.avg_mean.shape, dtype=self._highprec_dtype) beta = self.beta if beta is None: with chainer.using_device(self.device): beta = self.xp.zeros( self.avg_mean.shape, dtype=self._highprec_dtype) if configuration.config.train: if finetune: self.N += 1 decay = 1. - 1. / self.N else: decay = self.decay avg_mean = self.avg_mean avg_var = self.avg_var if chainer.config.in_recomputing: # Do not update statistics when extra forward computation is # called. if finetune: self.N -= 1 # Revert the count avg_mean = None avg_var = None ret = functions.batch_normalization( x, gamma, beta, eps=self.eps, running_mean=avg_mean, running_var=avg_var, decay=decay, axis=self.axis) else: # Use running average statistics or fine-tuned statistics. mean = self.avg_mean var = self.avg_var ret = functions.fixed_batch_normalization( x, gamma, beta, mean, var, self.eps, axis=self.axis) return ret
def init_hx(self, xs): shape = (self.n_layers * self.direction, len(xs), self.out_size) with chainer.using_device(self.device): hx = variable.Variable(self.xp.zeros(shape, dtype=xs[0].dtype)) return hx