def create_multi_node_checkpointer(name, comm, cp_interval=5, gc_interval=5, path=None): '''Create multi-node checkpointer object Generational snapshot extension to allow fault tolerance; It keeps several old snapshots to rollback synchronized snapshot at each MPI process. Snapshot files are identified as '<name>.<rank>.<iteration>'. - <name> ... identifier of the run where snapshot is kept for - <rank> ... which process owned the model - <iteration> ... number of iteration. This extension keeps several files for each execution and allows users to resume the whole job at the latest snapshots of each MPI process, and the iteration where all snapshots agrees. As this object is a usual Chainer extension, users can just create this object and pass to the trainer as an extension:: checkpointer = create_multi_node_checkpointer(name=run_id, comm=comm) trainer.extend(checkpointer, trigger=(25, 'iteration')) To run recovery at startup, before first iteration, run checkpointer.maybe_load(trainer, optimizer) before ``trainer.run()`` . If nothing is recovered (i.e. no snapshot found), ``trainer.updater.iteration`` will remain ``0`` . Otherwise it will have the value of snapshot and the training will resume from that iteration. ``optimizer`` is optional but this will let multi node optimizer avoid initial broadcast when all snapshot data among nodes are all in sync. After training finished without errors all those temporary checkpoints will be cleaned up at all nodes. Another example to use checkpointer *without* trainer would be:: checkpointer = create_multi_node_checkpointer(name=run_id, comm=comm) checkpointer.maybe_load(obj_you_want_to_snap, optimizer) while True: ## Training loop ... updater.update() ... checkpointer.save(obj_you_want_to_snap) # Make a checkpoint Args: name (str): unique id of the run comm: communicater in ChainerMN cp_interval (int): minimum number of checkpoints to preserve gc_interval (int): interval to collect non-preserved checkpoints ''' experimental('chainermn.extensions.create_multi_node_checkpointer') return _MultiNodeCheckpointer(name, comm, cp_interval, gc_interval, path)
def __init__(self, inputs, outputs): utils.experimental('chainer.links.TheanoFunction') if not _available: msg = '''theano is not installed on your environment. Please install theano to activate theano function. $ pip install theano''' raise RuntimeError(msg) inputs = _to_var_tuple(inputs) outputs = _to_var_tuple(outputs) # TODO(unno): We can remove redundant gpu-cpu copy using # theano.sandbox.cuda.basic_ops.gpu_from_host self.forward_func = theano.function(inputs=inputs, outputs=outputs) gs = tuple( o.type('g_{}'.format(i)) for i, o in enumerate(outputs)) known_grads = collections.OrderedDict(zip(outputs, gs)) grad = theano.tensor.grad( cost=None, wrt=inputs, known_grads=known_grads, disconnected_inputs='ignore') self.backward_func = theano.function( inputs=inputs + gs, outputs=grad, on_unused_input='ignore')
def __init__(self, pipeline, repeat=True): utils.experimental('DaliIterator') self.pipeline = pipeline self._repeat = repeat self._is_build = False self.epoch_size = 1 # dummy self.reset()
def __init__(self, inputs, outputs): utils.experimental('chainer.links.TheanoFunction') try: # When Theano library is imported, it executes a lot of # initialization process. To minimize its side effect, # we need import theano here. import theano except ImportError: msg = '''theano is not installed on your environment. Please install theano to activate theano function. $ pip install theano''' raise RuntimeError(msg) inputs = _to_var_tuple(inputs) outputs = _to_var_tuple(outputs) # TODO(unno): We can remove redundant gpu-cpu copy using # theano.sandbox.cuda.basic_ops.gpu_from_host self.forward_func = theano.function(inputs=inputs, outputs=outputs) gs = tuple( o.type('g_{}'.format(i)) for i, o in enumerate(outputs)) known_grads = collections.OrderedDict(zip(outputs, gs)) grad = theano.tensor.grad( cost=None, wrt=inputs, known_grads=known_grads, disconnected_inputs='ignore') self.backward_func = theano.function( inputs=inputs + gs, outputs=grad, on_unused_input='ignore')
def __init__(self, inputs, outputs): utils.experimental('chainer.links.TheanoFunction') try: # When Theano library is imported, it executes a lot of # initialization process. To minimize its side effect, # we need import theano here. import theano except ImportError: msg = '''theano is not installed on your environment. Please install theano to activate theano function. $ pip install theano''' raise RuntimeError(msg) inputs = _to_var_tuple(inputs) outputs = _to_var_tuple(outputs) # TODO(unno): We can remove redundant gpu-cpu copy using # theano.sandbox.cuda.basic_ops.gpu_from_host self.forward_func = theano.function(inputs=inputs, outputs=outputs) gs = tuple(o.type('g_{}'.format(i)) for i, o in enumerate(outputs)) known_grads = collections.OrderedDict(zip(outputs, gs)) grad = theano.tensor.grad(cost=None, wrt=inputs, known_grads=known_grads, disconnected_inputs='ignore') self.backward_func = theano.function(inputs=inputs + gs, outputs=grad, on_unused_input='ignore')
def __init__(self, ndim, ksize, stride=None, pad=0, cover_all=True): utils.experimental('chainer.functions.pooling.MaxPoolingND') super(MaxPoolingND, self).__init__(ndim, ksize, stride=stride, pad=pad, cover_all=cover_all)
def __init__(self, in_size, out_size): super(ChildSumTreeLSTM, self).__init__( W_x=linear.Linear(in_size, 4 * out_size), W_h_aio=linear.Linear(out_size, 3 * out_size, nobias=True), W_h_f=linear.Linear(out_size, out_size, nobias=True), ) self.in_size = in_size self.state_size = out_size utils.experimental('chainer.links.tree_lstm.py')
def __init__(self, ndim, ksize, stride=None, pad=0, cover_all=False): utils.experimental('chainer.functions.pooling.AveragePoolingND') # TODO(takagi) Support cover_all mode. if cover_all is True: raise ValueError('`cover_all` mode is not supported yet.') super(AveragePoolingND, self).__init__( ndim, ksize, stride=stride, pad=pad, cover_all=cover_all)
def __init__(self, ndim, ksize, stride=None, pad=0, outsize=None, cover_all=True): super(UnpoolingND, self).__init__(ndim, ksize, stride, pad, cover_all) self.outs = None if outsize is None else outsize utils.experimental('chainer.functions.pooling.UnpoolingND')
def __init__(self, ndim, ksize, stride=None, pad=0, outsize=None, cover_all=True): utils.experimental('chainer.functions.pooling.UnpoolingND') if stride is None: stride = ksize self.ndim = ndim self.ksize = conv_nd.as_tuple(ksize, ndim) self.stride = conv_nd.as_tuple(stride, ndim) self.pad = conv_nd.as_tuple(pad, ndim) self.outs = outsize self.cover_all = cover_all
def __init__(self, in_size, out_size, n_ary=2): assert (n_ary >= 2) super(NaryTreeLSTM, self).__init__(W_x=linear.Linear(in_size, (3 + n_ary) * out_size), ) for i in range(1, n_ary + 1): self.add_link( 'W_h{}'.format(i), linear.Linear(out_size, (3 + n_ary) * out_size, nobias=True)) self.in_size = in_size self.state_size = out_size self.n_ary = n_ary utils.experimental('chainer.links.tree_lstm.py')
def __init__(self, in_size, out_size, n_ary=2): assert(n_ary >= 2) super(NaryTreeLSTM, self).__init__( W_x=linear.Linear(in_size, (3 + n_ary) * out_size), ) for i in range(1, n_ary + 1): self.add_link( 'W_h{}'.format(i), linear.Linear(out_size, (3 + n_ary) * out_size, nobias=True)) self.in_size = in_size self.state_size = out_size self.n_ary = n_ary utils.experimental('chainer.links.tree_lstm.py')
def __init__(self, eps=1e-6, initial_gamma=None, initial_beta=None): super(LayerNormalization, self).__init__() self.add_uninitialized_param('gamma') self.add_uninitialized_param('beta') if initial_gamma is None: initial_gamma = initializers.One() self._gamma_initializer = initial_gamma if initial_beta is None: initial_beta = initializers.Zero() self._beta_initializer = initial_beta self.eps = eps utils.experimental( 'chainer.links.normalization.layer_normalization.py')
def __init__(self, in_size, out_size, n_ary=2): assert(n_ary >= 1) super(NaryTreeLSTM, self).__init__() with self.init_scope(): self.W_x = linear.Linear(in_size, (3 + n_ary) * out_size) for i in range(1, n_ary + 1): l = linear.Linear( out_size, (3 + n_ary) * out_size, nobias=True) setattr(self, 'W_h{}'.format(i), l) self.in_size = in_size self.state_size = out_size self.n_ary = n_ary utils.experimental('chainer.links.tree_lstm.py')
def __init__(self, in_size, out_size, n_ary=2): assert (n_ary >= 1) super(NaryTreeLSTM, self).__init__() with self.init_scope(): self.W_x = linear.Linear(in_size, (3 + n_ary) * out_size) for i in range(1, n_ary + 1): l = linear.Linear(out_size, (3 + n_ary) * out_size, nobias=True) setattr(self, 'W_h{}'.format(i), l) self.in_size = in_size self.state_size = out_size self.n_ary = n_ary utils.experimental('chainer.links.tree_lstm.py')
def __init__(self, size=None, eps=1e-6, initial_gamma=None, initial_beta=None): super(LayerNormalization, self).__init__() self.add_uninitialized_param('gamma') self.add_uninitialized_param('beta') if initial_gamma is None: initial_gamma = initializers.One() self._gamma_initializer = initial_gamma if initial_beta is None: initial_beta = initializers.Zero() self._beta_initializer = initial_beta self.eps = eps if size is not None: self._initialize_params(size) utils.experimental( 'chainer.links.normalization.layer_normalization.py')
def __init__(self, size=None, eps=1e-6, initial_gamma=None, initial_beta=None): super(LayerNormalization, self).__init__() if initial_gamma is None: initial_gamma = 1 if initial_beta is None: initial_beta = 0 with self.init_scope(): self.gamma = variable.Parameter(initial_gamma) self.beta = variable.Parameter(initial_beta) self.eps = eps if size is not None: self._initialize_params(size) utils.experimental( 'chainer.links.normalization.layer_normalization.py')
def __init__(self, ndim, ksize, stride=None, pad=0, cover_all=False, pad_value=0): if not (pad_value is None or pad_value == 0): raise ValueError( 'pad_value must be either 0 or None, not {}.'.format( pad_value)) utils.experimental('chainer.functions.pooling.AveragePoolingND') # TODO(takagi) Support cover_all mode. if cover_all is True: raise ValueError('`cover_all` mode is not supported yet.') super(AveragePoolingND, self).__init__(ndim, ksize, stride=stride, pad=pad, cover_all=cover_all) self.pad_value = pad_value
def f(): utils.experimental('f')
def __init__(self, forward_func, backward_func): utils.experimental('chainer.functions.TheanoFunction') self.forward_func = forward_func self.backward_func = backward_func
def f(self): utils.experimental('C.f')
def export(model, args, directory=None, export_params=True, graph_name='Graph'): """(Experimental) Export a computational graph as Caffe format. Args: model (~chainer.Chain): The model object you want to export in ONNX format. It should have :meth:`__call__` method because the second argment ``args`` is directly given to the model by the ``()`` accessor. args (list of ~chainer.Variable): The argments which are given to the model directly. directory (str): The directory used for saving the resulting Caffe model. If None, nothing is saved to the disk. export_params (bool): If True, this function exports all the parameters included in the given model at the same time. If False, the exported Caffe model doesn't include any parameter values. graph_name (str): A string to be used for the ``name`` field of the graph in the exported Caffe model. .. note:: Currently, this function supports networks that created by following layer functions. - :func:`~chainer.functions.linear` - :func:`~chainer.functions.convolution_2d` - :func:`~chainer.functions.deconvolution_2d` - :func:`~chainer.functions.max_pooling_2d` - :func:`~chainer.functions.average_pooling_2d` - :func:`~chainer.functions.batch_normalization` - :func:`~chainer.functions.local_response_normalization` - :func:`~chainer.functions.relu` - :func:`~chainer.functions.concat` - :func:`~chainer.functions.softmax` - :func:`~chainer.functions.reshape` - :func:`~chainer.functions.add` This function can export at least following networks. - GoogLeNet - ResNet - VGG And, this function use testing (evaluation) mode. .. admonition:: Example >>> from chainer.exporters import caffe >>> >>> class Model(chainer.Chain): ... def __init__(self): ... super(Model, self).__init__() ... with self.init_scope(): ... self.l1 = L.Convolution2D(None, 1, 1, 1, 0) ... self.b2 = L.BatchNormalization(1) ... self.l3 = L.Linear(None, 1) ... ... def __call__(self, x): ... h = F.relu(self.l1(x)) ... h = self.b2(h) ... return self.l3(h) ... >>> x = chainer.Variable(np.zeros((1, 10, 10, 10), np.float32)) >>> caffe.export(Model(), [x], None, True, 'test') """ utils.experimental('chainer.exporters.caffe.export') assert isinstance(args, (tuple, list)) if len(args) != 1: raise NotImplementedError() for i in args: assert isinstance(i, variable.Variable) with function.force_backprop_mode(), chainer.using_config('train', False): output = model(*args) if isinstance(output, variable.Variable): output = [output] assert isinstance(output, (tuple, list)) for i in output: assert isinstance(i, variable.Variable) prototxt = None caffemodel = None if directory is not None: prototxt = os.path.join(directory, 'chainer_model.prototxt') if export_params: caffemodel = os.path.join(directory, 'chainer_model.caffemodel') retriever = _RetrieveAsCaffeModel(prototxt, caffemodel) retriever(graph_name, args, output)
def apply(self, inputs): """Computes output variables and grows the computational graph. Basic behavior is expressed in the documentation of :class:`FunctionNode`. .. note:: If the :data:`~Variable.data` attribute of input variables exist on a GPU device, that device is made current before calling :meth:`forward`, so implementors do not need to take care of device selection in most cases. Args: inputs: Tuple of input variables. Each element can be either :class:`~chainer.Variable`, :class:`numpy.ndarray`, or :class:`cupy.ndarray`. If the element is an ndarray, it is automatically wrapped with :class:`~chainer.Variable`. Returns: A tuple of output :class:`~chainer.Variable` objects. """ input_vars = [chainer.as_variable(x) for x in inputs] in_data = tuple([x.data for x in input_vars]) requires_grad = any([x.requires_grad for x in input_vars]) # Check for input array types if not chainer.is_arrays_compatible(in_data): raise ValueError( 'incompatible array types are mixed in the forward input ' '({}).\n' '{}'.format(self.label, ', '.join(str(type(x)) for x in in_data))) is_debug = chainer.is_debug() if is_debug: # Keep stack trace for debug self.stack = traceback.extract_stack() if configuration.config.type_check: self._check_data_type_forward(in_data) hooks = chainer.get_function_hooks() if self._n_local_function_hooks > 0: hooks = collections.OrderedDict(hooks) hooks.update(self.local_function_hooks) hooks = hooks.values() # avoid six for performance for hook in hooks: hook.forward_preprocess(self, in_data) # Forward propagation with cuda.get_device_from_array(*in_data): self._input_indexes_to_retain = None self._output_indexes_to_retain = None outputs = self.forward(in_data) # Check for output array types if not isinstance(outputs, tuple): raise TypeError('forward output must be a tuple ({})\n' 'Actual: {}'.format(self.label, type(outputs))) if not chainer.is_arrays_compatible(outputs): raise ValueError( 'incompatible array types are mixed in the forward output ' '({}).\n' '{}'.format(self.label, ', '.join(str(type(x)) for x in outputs))) for hook in hooks: hook.forward_postprocess(self, in_data) # NaN check of output values if is_debug: if any(out.dtype.kind == 'f' and cuda.get_array_module(out).isnan(out).any() for out in outputs): msg = ('NaN is detected on forward computation of ' '{}'.format(self.label)) raise RuntimeError(msg) ret = tuple([ variable.Variable(y, requires_grad=requires_grad) for y in outputs ]) if configuration.config.enable_backprop: # Topological ordering self.rank = max([x.rank for x in input_vars]) if input_vars else 0 # Add backward edges for y in ret: y.creator_node = self self.inputs = tuple([x.node for x in input_vars]) # Add forward edges (must be weak references) self.outputs = tuple([weakref.ref(y.node) for y in ret]) if self._input_indexes_to_retain is not None: for index in self._input_indexes_to_retain: input_vars[index].retain_data() if self._output_indexes_to_retain is not None: retained_data = [] for index in self._output_indexes_to_retain: ret[index].retain_data() retained_data.append(outputs[index]) self._retained_output_data = tuple(retained_data) self.lazy_grad_sum = configuration.config.lazy_grad_sum if self.lazy_grad_sum: experimental('config.lazy_grad_sum') return ret
def create_multi_node_checkpointer(name, comm, cp_interval=5, gc_interval=5, path=None): '''Create multi-node checkpointer object Generational snapshot extension to allow fault tolerance; It keeps several old snapshots to rollback synchronized snapshot at each MPI process. Snapshot files are identified as '<name>.<rank>.<iteration>'. - <name> ... identifier of the run where snapshot is kept for - <rank> ... which process owned the model - <iteration> ... number of iteration. This extension keeps several files for each execution and allows users to resume the whole job at the latest snapshots of each MPI process, and the iteration where all snapshots agrees. As this object is a usual Chainer extension, users can just create this object and pass to the trainer as an extension:: checkpointer = create_multi_node_checkpointer(name=run_id, comm=comm) trainer.extend(checkpointer, trigger=(25, 'iteration')) To run recovery at startup, before first iteration, run checkpointer.maybe_load(trainer, optimizer) before ``trainer.run()`` . If nothing is recovered (i.e. no snapshot found), ``trainer.updater.iteration`` will remain ``0`` . Otherwise it will have the value of snapshot and the training will resume from that iteration. ``optimizer`` is optional but this will let multi node optimizer avoid initial broadcast when all snapshot data among nodes are all in sync. .. note:: Make sure that ``checkpointer.maybe_load`` is called *after* all extensions with states, such as ``ExponentialShift``, set to the trainer. After training finished without errors all those temporary checkpoints will be cleaned up at all nodes. Another example to use checkpointer *without* trainer would be:: checkpointer = create_multi_node_checkpointer(name=run_id, comm=comm) checkpointer.maybe_load(obj_you_want_to_snap, optimizer) while True: ## Training loop ... updater.update() ... checkpointer.save(obj_you_want_to_snap) # Make a checkpoint Args: name (str): unique id of the run comm: communicater in ChainerMN cp_interval (int): minimum number of checkpoints to preserve gc_interval (int): interval to collect non-preserved checkpoints ''' experimental('chainermn.extensions.create_multi_node_checkpointer') return _MultiNodeCheckpointer(name, comm, cp_interval, gc_interval, path)
def class_method(cls): utils.experimental('C.class_method')
def __init__(self): utils.experimental('C')
def apply(self, inputs): """Computes output variables and grows the computational graph. Basic behavior is expressed in the documentation of :class:`FunctionNode`. .. note:: If the :data:`~Variable.data` attribute of input variables exist on a GPU device, that device is made current before calling :meth:`forward`, so implementors do not need to take care of device selection in most cases. Args: inputs: Tuple of input variables. Each element can be either :class:`~chainer.Variable`, :class:`numpy.ndarray`, or :class:`cupy.ndarray`. If the element is an ndarray, it is automatically wrapped with :class:`~chainer.Variable`. Returns: A tuple of output :class:`~chainer.Variable` objects. """ input_vars = [chainer.as_variable(x) for x in inputs] in_data = tuple([x.data for x in input_vars]) requires_grad = any([x.requires_grad for x in input_vars]) # Check for input array types if not chainer.is_arrays_compatible(in_data): raise ValueError( 'incompatible array types are mixed in the forward input ' '({}).\n' '{}'.format( self.label, ', '.join(str(type(x)) for x in in_data))) is_debug = chainer.is_debug() if is_debug: # Keep stack trace for debug self.stack = traceback.extract_stack() if configuration.config.type_check: self._check_data_type_forward(in_data) hooks = chainer.get_function_hooks() if self._n_local_function_hooks > 0: hooks = collections.OrderedDict(hooks) hooks.update(self.local_function_hooks) hooks = hooks.values() # avoid six for performance for hook in hooks: hook.forward_preprocess(self, in_data) # Forward propagation with cuda.get_device_from_array(*in_data): self._input_indexes_to_retain = None self._output_indexes_to_retain = None outputs = self.forward(in_data) # Check for output array types if not isinstance(outputs, tuple): raise TypeError( 'forward output must be a tuple ({})\n' 'Actual: {}'.format(self.label, type(outputs))) if not chainer.is_arrays_compatible(outputs): raise ValueError( 'incompatible array types are mixed in the forward output ' '({}).\n' '{}'.format( self.label, ', '.join(str(type(x)) for x in outputs))) for hook in hooks: hook.forward_postprocess(self, in_data) # NaN check of output values if is_debug: if any(out.dtype.kind == 'f' and cuda.get_array_module(out).isnan(out).any() for out in outputs): msg = ('NaN is detected on forward computation of ' '{}'.format(self.label)) raise RuntimeError(msg) ret = tuple([variable.Variable(y, requires_grad=requires_grad) for y in outputs]) if configuration.config.enable_backprop: # Topological ordering self.rank = max([x.rank for x in input_vars]) if input_vars else 0 # Add backward edges for y in ret: y.creator_node = self self.inputs = tuple([x.node for x in input_vars]) # Add forward edges (must be weak references) self.outputs = tuple([weakref.ref(y.node) for y in ret]) if self._input_indexes_to_retain is not None: for index in self._input_indexes_to_retain: input_vars[index].retain_data() if self._output_indexes_to_retain is not None: retained_data = [] for index in self._output_indexes_to_retain: ret[index].retain_data() retained_data.append(outputs[index]) self._retained_output_data = tuple(retained_data) self.lazy_grad_sum = configuration.config.lazy_grad_sum if self.lazy_grad_sum: experimental('config.lazy_grad_sum') return ret
def static_method(): utils.experimental('static_method')
def __init__(self, ndim, ksize, stride=None, pad=0, cover_all=True): utils.experimental('chainer.functions.pooling.MaxPoolingND') super(MaxPoolingND, self).__init__( ndim, ksize, stride=stride, pad=pad, cover_all=cover_all)