def test_get_device_from_id(self): self.assertEqual(cuda.get_device_from_id(0), cuda.Device(0))
def test_numpy_array_async3(self): with cuda.Device(1): y = cuda.to_gpu(self.x, stream=cuda.Stream.null) self.assertIsInstance(y, cuda.ndarray) cuda.cupy.testing.assert_array_equal(self.x, y) self.assertEqual(int(y.device), 1)
def test_get_device_from_id_for_numpy_int(self): self.assertEqual( cuda.get_device_from_id(numpy.int64(0)), cuda.Device(0))
def to_gpu(self, device=None): super(Parameter, self).to_gpu(device) if self.data is None: if device is None: device = cuda.Device().id self._initial_device = device
def backward(self, retain_grad=False): """Runs error backpropagation (a.k.a. backprop) from this variable. On backprop, :meth:`FunctionNode.backward` is called on each :class:`FunctionNode` object appearing in the backward graph starting from this variable. The backward graph is represented by backward references from variable nodes to their creators, and from function nodes to their input variable nodes. The backprop stops at all root nodes. Some function nodes set ``None`` as gradients of some inputs, where further backprop does not take place at such inputs. This method uses :data:`grad` as the initial error array. User can manually set a gradient array before calling this method. If :data:`data` contains only one element (i.e., it is scalar) and :data:`grad` is ``None``, then this method automatically complements 1.0 as the initial error. This is useful on starting backprop from some scalar loss value. Note that this method does not support *differentiable backprop*. Use :func:`grad` to compute the gradient of gradients. Args: retain_grad (bool): If ``True``, the gradient arrays of all intermediate variables are kept. Otherwise, :data:`grad` of the intermediate variables are set to ``None`` on appropriate timing, which may reduce the maximum memory consumption. In most cases of training some models, the purpose of backprop is to compute gradients of parameters, not of all variables, and therefore it is recommended to set this flag ``False``. """ self._node._check_old_style_gradient() if self.creator_node is None: return initial_device = None if cuda.available and isinstance(self.data, cuda.cupy.ndarray): try: initial_device = cuda.Device() except cuda.cupy.cuda.runtime.CUDARuntimeError as e: if e.status != 38: # cudaErrorNoDevice raise is_debug = chainer.is_debug() cand_funcs = [] seen_set = set() grads = {} # Initialize error by 1, if this is a loss variable if self.data.size == 1 and self._grad_var is None: with cuda.get_device_from_array(self.data) as device: if device is cuda.DummyDevice: self.grad = numpy.ones_like(self.data) else: self.grad = cuda.cupy.ones_like(self.data) grads[self._node] = self._grad_var def add_cand(cand): if cand not in seen_set: # Negate since heapq is min-heap heapq.heappush(cand_funcs, (-cand.rank, len(seen_set), cand)) seen_set.add(cand) add_cand(self.creator_node) def get_grad(node): if node is None: return None if node in grads: return grads[node] return node.grad_var while cand_funcs: _, _, func = heapq.heappop(cand_funcs) inputs = func.inputs outputs = [y() for y in func.outputs] # access via weak ref in_data = tuple([x.data for x in inputs]) out_grad = tuple([get_grad(y) for y in outputs]) out_grad_data = tuple( [None if g is None else g.data for g in out_grad]) hooks = chainer.get_function_hooks() if func._n_local_function_hooks != 0: hooks = collections.OrderedDict(hooks) hooks.update(func.local_function_hooks) hooks = hooks.values() # avoid six for performance cuda.get_device_from_array(*in_data).use() for hook in hooks: hook.backward_preprocess(func, in_data, out_grad_data) # Collect the current input gradients. # # Note (Tokui): When the same variable is passed to multiple input # slots (e.g. an expression like ``f(x, x)``), it makes the # gradient accumulation complicated since the back-propagated # gradients w.r.t. the first and second argument should be # accumulated to the current gradient w.r.t. the same variable. # In this case, the current implementation passes the current # gradient only to the first occurrence of the variable in the # input tuple and passes ``None`` to the rest of the occurrences. # For example, when the input variables are ``(x, x)``, the # input gradient passed to the ``backward_accumulate`` method is # ``(gx, None)`` where ``gx`` is the current gradient of ``x``. # See also the docstring of ``FunctionNode.backward_accumulate``. target_input_indexes = [ i for i, x in enumerate(inputs) if x.requires_grad ] target_inputs = [inputs[i] for i in target_input_indexes] in_grad = [] for i, index_i in enumerate(target_input_indexes): x = inputs[index_i] if x in target_inputs[:i]: # Pass ``None`` for duplicated input variables except for # the first occurrence (see the comment above). gx = None elif x in grads: gx = grads[x] elif x.creator_node is None: x._check_old_style_gradient() # accumulate the gradient only if the node is a leaf gx = x.grad_var else: gx = None in_grad.append(gx) gxs = func.backward_accumulate(target_input_indexes, out_grad, in_grad) assert len(gxs) == len(in_grad) for hook in hooks: hook.backward_postprocess(func, in_data, out_grad_data) if is_debug: for gx in gxs: if gx is None: continue gx_data = gx.data cuda.get_device_from_array(gx_data).use() if cuda.get_array_module(gx_data).isnan(gx_data).any(): msg = ('NaN is detected on backward computation of ' '{}'.format(func.label)) raise RuntimeError(msg) if not retain_grad: for y in outputs: if y is not None and y is not self.node: grads[y] = None y_var = y.get_variable() if y_var is not None: y_var._grad_var = None for i, gx in enumerate(gxs): if gx is None: continue x = target_inputs[i] if not x.requires_grad: continue _check_grad_type(func, x, gx.data) if x in target_inputs[:i]: # Accumulate the duplicated gradients here. See the comment # above the code that builds ``in_grad``. cur_gx = grads[x] grads[x] = gx if cur_gx is None else gx + cur_gx else: grads[x] = gx x_var = x.get_variable() if x_var is not None: x_var._grad_var = grads[x] if x.creator_node is not None: add_cand(x.creator_node) del gxs # to reduce memory usage if initial_device is not None: initial_device.use()
def __call__(self, batch): with cuda.Device(self._device_id): with chainer.using_config('train', True): return self.train(batch)
################################################################################## # condition condTxt = 'DNN:'+DNNmode\ +' STFTchNum:'+str(chNum)\ +' winLen:'+str(winLen)\ +' shift:'+str(shiftLen)\ +' Nfft:'+str(fftLen)\ +' winMode:'+winMode\ +' loss:'+lossMode\ +' stateSize:'+str(stateSize)\ +' hiddenSize:'+str(hiddenSize) ################################################################################### # DGT setting with cuda.Device( DEVICE_INFO ): window = mm.hannWin(winLen) windowD = mm.calcCanonicalDualWindow(window,shiftLen) dgt = dgt.dgtOnGPU(window,shiftLen,fftLen) print(condTxt) ################################################################################## # DNN setting inputSize = chNum exec('dnnEst = dnnModel.'+DNNmode+'(inputSize, hiddenSize, stateSize).to_gpu( DEVICE_INFO )') print("params: "+str(sum(p.data.size for p in dnnEst.params()))) # Optimizer optm_dnn = optimizers.Adam(alpha=lr, beta1=0.9, beta2=0.999, eps=1e-8)
def fit(self, content_image, style_image, epoch_num, callback=None): device_id = None if self.device_id >= 0: device_id = self.device_id with cuda.Device(device_id): return self.__fit(content_image, style_image, epoch_num, callback)
def test_copy_parameters_from_gpu_to_gpu(self): device_id = cuda.Device().id self.check_copy_parameters_from(device_id, device_id)
def test_copy_parameters_from_gpu_to_cpu(self): self.check_copy_parameters_from(cuda.Device().id, -1)
def test_linear_model_multi_gpu(self): with cuda.Device(0): self.assertGreater( cuda.to_cpu(self.model.accuracy_gpu(1).data), 0.9)
def backward(self, retain_grad=False): """Runs error backpropagation (a.k.a. backprop) from this variable. On backprop, :meth:`Function.backward` is called on each :class:`Function` object appearing in the backward graph starting from this variable. The backward graph is represented by backward references from variables to their creators, and from functions to their inputs. The backprop stops at all root variables. Some functions set ``None`` as gradients of some inputs, where further backprop does not take place at such input variables. This method uses :data:`grad` as the initial error array. User can manually set a gradient array before calling this method. If :data:`data` contains only one element (i.e., it is scalar) and :data:`grad` is ``None``, then this method automatically complements 1.0 as the initial error. This is useful on starting backprop from some scalar loss value. Args: retain_grad (bool): If ``True``, the gradient arrays of all intermediate variables are kept. Otherwise, :data:`grad` of the intermediate variables are set to ``None`` on appropriate timing, which may reduce the maximum memory consumption. In most cases of training some models, the purpose of backprop is to compute gradients of parameters, not of variables, so it is recommended to set this flag ``False``. """ if self.creator is None: return initial_device = None if cuda.available and isinstance(self.data, cuda.cupy.ndarray): try: initial_device = cuda.Device() except cuda.cupy.cuda.runtime.CUDARuntimeError as e: if e.status != 38: # cudaErrorNoDevice raise is_debug = chainer.is_debug() cand_funcs = [] seen_set = set() seen_vars = set() need_copy = set() # Initialize error by 1, if this is a loss variable if self.data.size == 1 and self.grad is None: with cuda.get_device(self.data) as device: if device is cuda.DummyDevice: self.grad = numpy.ones_like(self.data) else: self.grad = cuda.cupy.ones_like(self.data) def add_cand(cand): if cand not in seen_set: # Negate since heapq is min-heap heapq.heappush(cand_funcs, (-cand.rank, len(seen_set), cand)) seen_set.add(cand) add_cand(self.creator) while cand_funcs: _, _, func = heapq.heappop(cand_funcs) outputs = [y() for y in func.outputs] # access via weak ref in_data = tuple([x.data for x in func.inputs]) out_grad = tuple([None if y is None else y.grad for y in outputs]) hooks = chainer.get_function_hooks() if func._n_local_function_hooks != 0: hooks = collections.OrderedDict(hooks) hooks.update(func.local_function_hooks) cuda.get_device(*(in_data + out_grad)).use() for hook in six.itervalues(hooks): hook.backward_preprocess(func, in_data, out_grad) gxs = func.backward(in_data, out_grad) assert len(gxs) == len(in_data) for hook in six.itervalues(hooks): hook.backward_postprocess(func, in_data, out_grad) if is_debug: for gx in gxs: if gx is None: continue cuda.get_device(gx).use() if cuda.get_array_module(gx).isnan(gx).any(): msg = 'NaN is detected on backward computation' raise RuntimeError(msg) if not retain_grad: for y in outputs: if y is not None and y is not self: y.grad = None for x, gx in zip(func.inputs, gxs): if gx is None: continue _check_grad_type(func, x, gx) # Accumulate the gradient to x. It is a bit tricky to handle # branches and parameter gradient accumulation correctly. id_x = id(x) if x.creator is None: # leaf if x._grad is None: x.grad = gx need_copy.add(id_x) else: cuda.get_device(gx).use() if id_x in need_copy: x.grad = utils.force_array(x.grad + gx) # copy need_copy.remove(id_x) else: x._grad += gx else: # not a leaf add_cand(x.creator) if id_x not in seen_vars: # 1st visit x.grad = gx seen_vars.add(id_x) need_copy.add(id_x) else: cuda.get_device(gx).use() if id_x in need_copy: # 2nd visit x._grad = utils.force_array(gx + x._grad) # copied need_copy.remove(id_x) else: # 3rd or later visit x._grad += gx del gxs # to reduce memory usage if initial_device is not None: initial_device.use()