def normalize_weight(self, link): """Normalize target weight before every single forward computation.""" weight_name, vector_name = self.weight_name, self.vector_name W = getattr(link, weight_name) u = getattr(link, vector_name) weight_matrix = self.reshape_W(W) if not configuration.config.in_recomputing: with chainer.using_device(link.device): u, v = update_approximate_vectors( weight_matrix, u, self.n_power_iteration, self.eps) else: v = self.v sigma = calculate_max_singular_value(weight_matrix, u, v) if self.factor is not None: sigma /= self.factor if self.use_gamma: W = link.gamma * W / sigma else: W = W / sigma if not configuration.config.in_recomputing: self.v = v with chainer.using_device(link.device): if configuration.config.train: if link.xp is chainerx: # TODO(crcrpar): Remove this when # chainerx supports `copyto`. getattr(link, vector_name)[:] = u else: backend.copyto(getattr(link, vector_name), u) return W
def copyparams(self, link, copy_persistent=True): """Copies all parameters from given link. This method copies data arrays of all parameters in the hierarchy. The copy is even done across the host and devices. Note that this method does not copy the gradient arrays. *From v5.0.0:* this method also copies the persistent values (e.g. the moving statistics of :class:`~chainer.links.BatchNormalization`). If the persistent value is an ndarray, the elements are copied. Otherwise, it is copied using :func:`copy.deepcopy`. The old behavior (not copying persistent values) can be reproduced with ``copy_persistent=False``. Args: link (Link): Source link object. copy_persistent (bool): If ``True``, persistent values are also copied. ``True`` by default. """ src = link.__dict__ dst = self.__dict__ for name in self._params: dst[name].copydata(src[name]) if copy_persistent: array_types = chainer.get_array_types() for name in self._persistent: d = dst[name] s = src[name] if isinstance(d, array_types) and isinstance(s, array_types): backend.copyto(d, s) else: dst[name] = copy.deepcopy(s)
def copydata(self, var): """Copies the data array from given source variable. This method copies the data array from given variable to this variable. The copy is done even if the arrays reside on different devices, including across the host and a GPU device. If this variable has an uninitialized data array, this method initializes it by the data array of the given variable. Similarly, if the given variable has an uninitialized data array, this method initializes it by the data array of this variable (``self``). If both are uninitialized, this method does nothing. Args: var (Variable): Source variable. """ src = var.array dst = self.array if src is None: if dst is None: return var.initialize(self.shape) src = var.array elif dst is None: self.initialize(src.shape) dst = self.array backend.copyto(dst, src)
def copydata(self, var): """Copies the data array from given source variable. This method copies the data array from given variable to this variable. The copy is done even if the arrays reside on different devices, including across the host and a GPU device. If this variable has an uninitialized data array, this method initializes it by the data array of the given variable. Similarly, if the given variable has an uninitialized data array, this method initializes it by the data array of this variable (``self``). If both are uninitialized, this method does nothing. Args: var (Variable): Source variable. """ src = var.array dst = self.array if src is None: if dst is None: return var.initialize(self.shape) src = var.array elif dst is None: self.initialize(src.shape) dst = self.array backend.copyto(dst, src)
def copyparams(self, link, copy_persistent=True): # type: ('Link', bool) -> None """Copies all parameters from given link. This method copies data arrays of all parameters in the hierarchy. The copy is even done across the host and devices. Note that this method does not copy the gradient arrays. *From v5.0.0:* this method also copies the persistent values (e.g. the moving statistics of :class:`~chainer.links.BatchNormalization`). If the persistent value is an ndarray, the elements are copied. Otherwise, it is copied using :func:`copy.deepcopy`. The old behavior (not copying persistent values) can be reproduced with ``copy_persistent=False``. Args: link (Link): Source link object. copy_persistent (bool): If ``True``, persistent values are also copied. ``True`` by default. """ src = link.__dict__ dst = self.__dict__ for name in self._params: dst[name].copydata(src[name]) if copy_persistent: array_types = chainer.get_array_types() for name in self._persistent: d = dst[name] s = src[name] if isinstance(d, array_types) and isinstance(s, array_types): backend.copyto(d, s) else: dst[name] = copy.deepcopy(s)
def __call__(self, array): if self.dtype is not None: assert array.dtype == self.dtype # Calling copy to ensures that the fill_value array # is moved to the device where array resides if isinstance(self.fill_value, chainer.get_array_types()): backend.copyto(array, self.fill_value) else: device = backend.get_device_from_array(array) array[...] = device.xp.asarray(self.fill_value)
def __call__(self, array): if self.dtype is not None: assert array.dtype == self.dtype,\ '{} != {}'.format(array.dtype, self.dtype) if self.rng is None: device = backend.get_device_from_array(array) array[...] = device.xp.random.uniform(low=-self.scale, high=self.scale, size=array.shape) else: backend.copyto( array, self.rng.uniform(low=-self.scale, high=self.scale, size=array.shape).astype(array.dtype, copy=False))
def __call__(self, array): if self.dtype is not None: assert array.dtype == self.dtype,\ '{} != {}'.format(array.dtype, self.dtype) if self.rng is None: device = backend.get_device_from_array(array) args = {'loc': 0.0, 'scale': self.scale, 'size': array.shape} if device.xp is cuda.cupy: # Only CuPy supports dtype option if self.dtype == numpy.float32 or self.dtype == numpy.float16: # float16 is not supported in cuRAND args['dtype'] = numpy.float32 array[...] = device.xp.random.normal(**args) else: backend.copyto(array, self.rng.normal( loc=0.0, scale=self.scale, size=array.shape).astype(array.dtype, copy=False))
def __call__(self, array): if self.dtype is not None: assert array.dtype == self.dtype,\ '{} != {}'.format(array.dtype, self.dtype) if not array.shape: # 0-dim case if self.rng is None: a = numpy.random.randint(2) else: a = self.rng.randint(2) a = int(a) array[...] = self.scale * (2 * a - 1) elif not array.size: raise ValueError('Array to be initialized must be non-empty.') else: # numpy.prod returns float value when the argument is empty. out_dim = len(array) in_dim = utils.size_of_shape(array.shape[1:]) if (in_dim > out_dim and self._checks[0]) or ( in_dim < out_dim and self._checks[1]): raise ValueError( 'Cannot make orthogonal {}. ' 'shape = {}, interpreted as ' '{}-dim input and {}-dim output.'.format( self.mode, array.shape, in_dim, out_dim)) transpose = in_dim > out_dim if self.rng is None: a = numpy.random.normal(size=(out_dim, in_dim)) else: a_tmp = self.rng.normal(size=(out_dim, in_dim)) a = numpy.empty(a_tmp.shape, dtype=a_tmp.dtype) backend.copyto(a, a_tmp) if transpose: a = a.T # cupy.linalg.qr requires cusolver in CUDA 8+ q, r = numpy.linalg.qr(a) q *= numpy.copysign(self.scale, numpy.diag(r)) if transpose: q = q.T backend.copyto(array, q.reshape(array.shape).astype( array.dtype, copy=False))
def test_from_ideep(self): src = intel64.ideep.array(self.src_data) dst = self._get_dst() assert isinstance(src, intel64.mdarray) backend.copyto(dst, src) numpy.testing.assert_array_equal(self._to_cpu(dst), self.src_data)
def test_fail_on_invalid_dst(self): src = numpy.zeros(1) dst = None with self.assertRaises(TypeError): backend.copyto(dst, src)
def test_from_ideep(self): src = intel64.ideep.array(self.src_data) dst = self._get_dst() assert isinstance(src, intel64.mdarray) backend.copyto(dst, src) numpy.testing.assert_array_equal(cuda.to_cpu(dst), self.src_data)
def test_gpu_to_another_gpu(self): src = cuda.cupy.array(self.src_data) with cuda.get_device_from_id(1): dst = self._get_dst() backend.copyto(dst, src) cuda.cupy.testing.assert_array_equal(dst, src)
def test_from_gpu(self): src = cuda.cupy.array(self.src_data) dst = self._get_dst() backend.copyto(dst, src) numpy.testing.assert_array_equal(cuda.to_cpu(dst), self.src_data)
def test_gpu_to_another_gpu(self): src = cuda.cupy.array(self.src_data) with cuda.get_device_from_id(1): dst = self._get_dst() backend.copyto(dst, src) cuda.cupy.testing.assert_array_equal(dst, src)
def test_from_chx_cuda(self): src = chainerx.array(self.src_data, device='cuda:0') dst = self._get_dst() backend.copyto(dst, src) numpy.testing.assert_array_equal(self._to_cpu(dst), self.src_data)
def test_fail_on_invalid_dst(self): src = numpy.zeros(1) dst = None with self.assertRaises(TypeError): backend.copyto(dst, src)
def test_from_gpu(self): src = cuda.cupy.array(self.src_data) dst = self._get_dst() backend.copyto(dst, src) numpy.testing.assert_array_equal(self._to_cpu(dst), self.src_data)