def update_core(self):
        optimizer = self.get_optimizer('main')
        model_main = optimizer.target
        models_others = {
            k: v
            for k, v in self._models.items() if v is not model_main
        }

        iterator = self.get_iterator('main')
        batch = iterator.next()

        #
        # Split the batch to sub-batches.
        #
        n = len(self._models)
        in_arrays_list = {}
        for i, key in enumerate(six.iterkeys(self._models)):
            in_arrays_list[key] = self.converter(batch[i::n],
                                                 self._devices[key])

        # For reducing memory
        for model in six.itervalues(self._models):
            model.cleargrads()

        losses = []
        for model_key, model in six.iteritems(self._models):
            in_arrays = in_arrays_list[model_key]
            loss_func = self.loss_func or model

            with function.force_backprop_mode():
                dev_id = self._devices[model_key]
                dev_id = dev_id if 0 <= dev_id else None
                with cuda.get_device_from_id(dev_id):
                    if isinstance(in_arrays, tuple):
                        loss = loss_func(*in_arrays)
                    elif isinstance(in_arrays, dict):
                        loss = loss_func(**in_arrays)
                    else:
                        loss = loss_func(in_arrays)

            losses.append(loss)

        # For _uninitialized_params
        for model in six.itervalues(self._models):
            model.cleargrads()

        for loss in losses:
            loss.backward(loss_scale=self.loss_scale)

        for model in six.itervalues(models_others):
            model_main.addgrads(model)

        optimizer.update()

        for model in six.itervalues(models_others):
            model.copyparams(model_main)

        if self.auto_new_epoch and iterator.is_new_epoch:
            optimizer.new_epoch(auto=True)
Esempio n. 2
0
 def backward(self, indexes, grad_outputs):
     inputs = self.get_retained_inputs()
     with function.force_backprop_mode():
         outs = _call_func(self.func, inputs)
     # Return gradients that are further backproable
     return chainer.grad(
         outs, inputs, grad_outputs=grad_outputs,
         enable_double_backprop=True)
Esempio n. 3
0
    def update_core(self):
        optimizer = self.get_optimizer('main')
        model_main = optimizer.target
        models_others = {k: v for k, v in self._models.items()
                         if v is not model_main}

        iterator = self.get_iterator('main')
        batch = iterator.next()

        #
        # Split the batch to sub-batches.
        #
        n = len(self._models)
        in_arrays_list = {}
        for i, key in enumerate(six.iterkeys(self._models)):
            in_arrays_list[key] = self.converter(
                batch[i::n], self._devices[key])

        # For reducing memory
        for model in six.itervalues(self._models):
            model.cleargrads()

        losses = []
        for model_key, model in six.iteritems(self._models):
            in_arrays = in_arrays_list[model_key]
            loss_func = self.loss_func or model

            with function.force_backprop_mode():
                dev_id = self._devices[model_key]
                dev_id = dev_id if 0 <= dev_id else None
                with cuda.get_device_from_id(dev_id):
                    if isinstance(in_arrays, tuple):
                        loss = loss_func(*in_arrays)
                    elif isinstance(in_arrays, dict):
                        loss = loss_func(**in_arrays)
                    else:
                        loss = loss_func(in_arrays)

            losses.append(loss)

        # For _uninitialized_params
        for model in six.itervalues(self._models):
            model.cleargrads()

        for loss in losses:
            loss.backward(loss_scale=self.loss_scale)

        for model in six.itervalues(models_others):
            model_main.addgrads(model)

        optimizer.update()

        for model in six.itervalues(models_others):
            model.copyparams(model_main)

        if self.auto_new_epoch and iterator.is_new_epoch:
            optimizer.new_epoch(auto=True)
Esempio n. 4
0
    def update_core(self):
        optimizer = self.get_optimizer('main')
        # it is main wrapper class: au_rcnn_train_chain, space_time_rnn
        model_main = optimizer.target
        loss_head_module = model_main.loss_head_module

        models_others = {k: v for k, v in self._models.items()
                         if v != model_main.au_rcnn_train_chain}

        batch = self.get_iterator('main').next()
        in_arrays = self.converter(batch, -1)
        images, bboxes, labels = in_arrays
        batch_size, T, channel, height, width = images.shape
        images = images.reshape(batch_size * T, channel, height, width)  # B*T, C, H, W
        bboxes = bboxes.reshape(batch_size * T, config.BOX_NUM[self.database], 4)  # B*T, 9, 4
        labels = chainer.cuda.to_gpu(labels, device=self._devices["main"])
        # labels = labels.reshape(batch_size * T, config.BOX_NUM[self.database], -1)  # B*T, 9, 12/22

        # For reducing memory
        for model in six.itervalues(models_others):
            model.cleargrads()
        model_main.cleargrads()
        #
        # Split the batch to sub-batches.
        #
        n = len(self._models)
        in_arrays_list = {}
        sub_index = self.split_list(list(range(batch_size * T)), n)
        for i, key in enumerate(sorted(self._models.keys(), key=lambda e:str(e))):  # self._models are all au_rcnn_train_chain includes main gpu
            in_arrays_list[key] = (F.copy(images[sub_index[i]], self._devices.get(key, self._devices["main"])),
                                   F.copy(bboxes[sub_index[i]], self._devices.get(key, self._devices["main"])))

        # self._models are all au_rcnn_train_chain includes main gpu
        with function.force_backprop_mode():
            roi_feature_multi_gpu = []
            for model_key, au_rcnn_train_chain in sorted(self._models.items(), key=lambda e:str(e[0])):
                images, bboxes = in_arrays_list[model_key]
                assert int(images.data.device) == au_rcnn_train_chain._device_id
                roi_feature = au_rcnn_train_chain(images, bboxes)  # shape =(B*T//n, F, D)
                roi_feature_multi_gpu.append(F.copy(roi_feature, self._devices["main"]))
            roi_feature = F.concat(roi_feature_multi_gpu, axis=0)  # multiple batch combine
            roi_feature = roi_feature.reshape(batch_size, T, config.BOX_NUM[self.database], roi_feature.shape[-1])
            loss = loss_head_module(roi_feature, labels)

        model_main.cleargrads()
        for model in six.itervalues(self._models):
            model.cleargrads()
        loss.backward()
        for model in six.itervalues(models_others):
            model_main.au_rcnn_train_chain.addgrads(model)
        optimizer.update()
        for model in six.itervalues(models_others):
            model.copyparams(model_main.au_rcnn_train_chain)  # only the main model will update parameter, so copy to each other models
Esempio n. 5
0
    def backward(self, indexes, grad_outputs):
        # Double backprop is not allowed
        if chainer.config.enable_backprop:
            raise RuntimeError('double backpropagation in functions.forget is '
                               'not allowed.')

        inputs = self.get_retained_inputs()
        # Create new variables that have no creators
        dummy_inputs = tuple([variable.Variable(inp.array) for inp in inputs])

        with function.force_backprop_mode(),\
                chainer.using_config('in_recomputing', True):
            outs = _call_func(self.func, dummy_inputs)
            assert len(outs) == len(grad_outputs)

        for out, grad_output in zip(outs, grad_outputs):
            out.grad_var = grad_output
        # TODO(kataoka): use outer backward's `retain_grad` and `loss_scale`
        chainer.variable._backprop_to_all(outs, False, None)

        return tuple([inp.grad_var for inp in dummy_inputs])
Esempio n. 6
0
    def backward(self, indexes, grad_outputs):
        # Double backprop is not allowed
        if chainer.config.enable_backprop:
            raise RuntimeError('double backpropagation in functions.forget is '
                               'not allowed.')

        inputs = self.get_retained_inputs()
        # Create new variables that have no creators
        dummy_inputs = tuple([variable.Variable(inp.array) for inp in inputs])

        with function.force_backprop_mode(),\
                chainer.using_config('in_recomputing', True):
            outs = _call_func(self.func, dummy_inputs)
            assert len(outs) == len(grad_outputs)

        for out, grad_output in zip(outs, grad_outputs):
            out.grad_var = grad_output
        # TODO(kataoka): use outer backward's `retain_grad` and `loss_scale`
        chainer.variable._backprop_to_all(outs, False, None)

        return tuple([inp.grad_var for inp in dummy_inputs])
Esempio n. 7
0
    def backward(self, indexes, grad_outputs):
        # Double backprop is not allowed
        if chainer.config.enable_backprop:
            raise RuntimeError('double backpropagation in functions.forget is '
                               'not allowed.')

        inputs = self.get_retained_inputs()
        # Create new variables that have no creators
        dummy_inputs = tuple([variable.Variable(inp.array) for inp in inputs])

        with function.force_backprop_mode():
            outs = _call_func(self.func, dummy_inputs)
            assert len(outs) == len(grad_outputs)
            if len(outs) > 1:
                # Avoid doing backward multiple times when `outs` is a tuple
                outs = chainer.functions.identity(*outs)

        for out, grad_output in zip(outs, grad_outputs):
            out.grad_var = grad_output
        outs[0].backward()

        return tuple([inp.grad_var for inp in dummy_inputs])
Esempio n. 8
0
    def backward(self, indexes, grad_outputs):
        # Double backprop is not allowed
        if chainer.config.enable_backprop:
            raise RuntimeError('double backpropagation in functions.forget is '
                               'not allowed.')

        inputs = self.get_retained_inputs()
        # Create new variables that have no creators
        dummy_inputs = tuple([variable.Variable(inp.array) for inp in inputs])

        with function.force_backprop_mode():
            outs = _call_func(self.func, dummy_inputs)
            assert len(outs) == len(grad_outputs)
            if len(outs) > 1:
                # Avoid doing backward multiple times when `outs` is a tuple
                outs = chainer.functions.identity(*outs)

        for out, grad_output in zip(outs, grad_outputs):
            out.grad_var = grad_output
        outs[0].backward()

        return tuple([inp.grad_var for inp in dummy_inputs])
Esempio n. 9
0
def export(model,
           args,
           directory=None,
           export_params=True,
           graph_name='Graph'):
    """(Experimental) Export a computational graph as Caffe format.

    Args:
        model (~chainer.Chain): The model object you want to export in Caffe
            format. It should have :meth:`__call__` method because the second
            argument ``args`` is directly given to the model by the ``()``
            accessor.
        args (list of ~chainer.Variable): The arguments which are given to the
            model directly.
        directory (str): The directory used for saving the resulting Caffe
            model. If None, nothing is saved to the disk.
        export_params (bool): If True, this function exports all the parameters
            included in the given model at the same time. If False, the
            exported Caffe model doesn't include any parameter values.
        graph_name (str): A string to be used for the ``name`` field of the
            graph in the exported Caffe model.

    .. note::
        Currently, this function supports networks that created by following
        layer functions.

        - :func:`~chainer.functions.linear`
        - :func:`~chainer.functions.convolution_2d`
        - :func:`~chainer.functions.deconvolution_2d`
        - :func:`~chainer.functions.max_pooling_2d`
        - :func:`~chainer.functions.average_pooling_2d`
        - :func:`~chainer.functions.batch_normalization`
        - :func:`~chainer.functions.local_response_normalization`
        - :func:`~chainer.functions.relu`
        - :func:`~chainer.functions.leaky_relu`
        - :func:`~chainer.functions.concat`
        - :func:`~chainer.functions.softmax`
        - :func:`~chainer.functions.reshape`
        - :func:`~chainer.functions.add`

        This function can export at least following networks.

        - GoogLeNet
        - ResNet
        - VGG

        And, this function use testing (evaluation) mode.

    .. admonition:: Example

       >>> from chainer.exporters import caffe
       >>>
       >>> class Model(chainer.Chain):
       ...    def __init__(self):
       ...        super(Model, self).__init__()
       ...        with self.init_scope():
       ...            self.l1 = L.Convolution2D(None, 1, 1, 1, 0)
       ...            self.b2 = L.BatchNormalization(1)
       ...            self.l3 = L.Linear(None, 1)
       ...
       ...    def __call__(self, x):
       ...        h = F.relu(self.l1(x))
       ...        h = self.b2(h)
       ...        return self.l3(h)
       ...
       >>> x = chainer.Variable(np.zeros((1, 10, 10, 10), np.float32))
       >>> caffe.export(Model(), [x], None, True, 'test')

    """

    assert isinstance(args, (tuple, list))
    if len(args) != 1:
        raise NotImplementedError()
    for i in args:
        assert isinstance(i, variable.Variable)
    with function.force_backprop_mode(), chainer.using_config('train', False):
        output = model(*args)

    if isinstance(output, variable.Variable):
        output = [output]
    assert isinstance(output, (tuple, list))
    for i in output:
        assert isinstance(i, variable.Variable)

    prototxt = None
    caffemodel = None
    if directory is not None:
        prototxt = os.path.join(directory, 'chainer_model.prototxt')
        if export_params:
            caffemodel = os.path.join(directory, 'chainer_model.caffemodel')
    retriever = _RetrieveAsCaffeModel(prototxt, caffemodel)
    retriever(graph_name, args, output)
 def backward(self, inputs, grads):
     with function.force_backprop_mode():
         xs = [variable.Variable(x) for x in inputs]
         outs = self._call_func(xs)
         _DummyFunction(grads)(*outs).backward()
     return tuple(x.grad for x in xs)
Esempio n. 11
0
 def backward(self, inputs, grads):
     with function.force_backprop_mode():
         xs = [variable.Variable(x) for x in inputs]
         outs = self._call_func(xs)
         _DummyFunction(grads)(*outs).backward()
     return tuple(x.grad for x in xs)
Esempio n. 12
0
def export(model, args, directory=None,
           export_params=True, graph_name='Graph'):
    """(Experimental) Export a computational graph as Caffe format.

    Args:
        model (~chainer.Chain): The model object you want to export in ONNX
            format. It should have :meth:`__call__` method because the second
            argment ``args`` is directly given to the model by the ``()``
            accessor.
        args (list of ~chainer.Variable): The argments which are given to the
            model directly.
        directory (str): The directory used for saving the resulting Caffe
            model. If None, nothing is saved to the disk.
        export_params (bool): If True, this function exports all the parameters
            included in the given model at the same time. If False, the
            exported Caffe model doesn't include any parameter values.
        graph_name (str): A string to be used for the ``name`` field of the
            graph in the exported Caffe model.

    .. note::
        Currently, this function supports networks that created by following
        layer functions.

        - :func:`~chainer.functions.linear`
        - :func:`~chainer.functions.convolution_2d`
        - :func:`~chainer.functions.deconvolution_2d`
        - :func:`~chainer.functions.max_pooling_2d`
        - :func:`~chainer.functions.average_pooling_2d`
        - :func:`~chainer.functions.batch_normalization`
        - :func:`~chainer.functions.local_response_normalization`
        - :func:`~chainer.functions.relu`
        - :func:`~chainer.functions.concat`
        - :func:`~chainer.functions.softmax`
        - :func:`~chainer.functions.reshape`
        - :func:`~chainer.functions.add`

        This function can export at least following networks.

        - GoogLeNet
        - ResNet
        - VGG

        And, this function use testing (evaluation) mode.

    .. admonition:: Example

       >>> from chainer.exporters import caffe
       >>>
       >>> class Model(chainer.Chain):
       ...    def __init__(self):
       ...        super(Model, self).__init__()
       ...        with self.init_scope():
       ...            self.l1 = L.Convolution2D(None, 1, 1, 1, 0)
       ...            self.b2 = L.BatchNormalization(1)
       ...            self.l3 = L.Linear(None, 1)
       ...
       ...    def __call__(self, x):
       ...        h = F.relu(self.l1(x))
       ...        h = self.b2(h)
       ...        return self.l3(h)
       ...
       >>> x = chainer.Variable(np.zeros((1, 10, 10, 10), np.float32))
       >>> caffe.export(Model(), [x], None, True, 'test')

    """

    utils.experimental('chainer.exporters.caffe.export')
    assert isinstance(args, (tuple, list))
    if len(args) != 1:
        raise NotImplementedError()
    for i in args:
        assert isinstance(i, variable.Variable)
    with function.force_backprop_mode(), chainer.using_config('train', False):
        output = model(*args)

    if isinstance(output, variable.Variable):
        output = [output]
    assert isinstance(output, (tuple, list))
    for i in output:
        assert isinstance(i, variable.Variable)

    prototxt = None
    caffemodel = None
    if directory is not None:
        prototxt = os.path.join(directory, 'chainer_model.prototxt')
        if export_params:
            caffemodel = os.path.join(directory, 'chainer_model.caffemodel')
    retriever = _RetrieveAsCaffeModel(prototxt, caffemodel)
    retriever(graph_name, args, output)
Esempio n. 13
0
    def update_core(self):
        optimizer = self.get_optimizer("main")
        model_main = optimizer.target
        models_others = {
            k: v
            for k, v in self._models.items() if v is not model_main
        }

        iterator = self.get_iterator("main")
        batch = iterator.next()

        # -- split the batch to sub-batches -- #
        n = len(self._models)
        in_arrays_lists = {}
        for i, key in enumerate(six.iterkeys(self._models)):
            in_arrays_lists[key] = self.converter(batch[i::n],
                                                  self._devices[key])

        # for reducing memory
        for model in six.itervalues(self._models):
            model.cleargrads()

        losses = []
        for model_key, model in six.iteritems(self._models):
            x, adj = in_arrays_lists[model_key]

            with function.force_backprop_mode():
                with chainer.using_device(self._devices[model_key]):
                    z, sum_log_det_jacs = model(x, adj)
                    nll = model.log_prob(z, sum_log_det_jacs)

                    if self.two_step:
                        loss = self.h_nll_weight * nll[0] + nll[1]
                    else:
                        loss = nll
                    #loss += F.square(F.exp(model.ln_var) + F.exp(-model.ln_var))
            losses.append(loss)

        for model in six.itervalues(self._models):
            model.cleargrads()

        for loss in losses:
            loss.backward(loss_scale=self.loss_scale)

        for model in six.itervalues(models_others):
            model_main.addgrads(model)

        total_loss = 0.0
        for loss in losses:
            loss_in_cpu = F.copy(loss, -1)
            total_loss += loss_in_cpu
        average_losses = total_loss / len(losses)
        chainer.report({
            "neg_log_likelihood": average_losses,
            "z_var": model_main.z_var
        })

        optimizer.update()

        for model in six.itervalues(models_others):
            model.copyparams(model_main)

        if self.auto_new_epoch and iterator.is_new_epoch:
            optimizer.new_epoch(auto=True)
    def update_core(self):
        names = list(six.iterkeys(self.devices))
        gen_optimizer = self.get_optimizer('opt_gen')
        dis_optimizer = self.get_optimizer('opt_dis')
        for i in range(self.n_dis):
            # clear the gradients first
            for model in six.itervalues(self.models):
                model['gen'].cleargrads()
                model['dis'].cleargrads()
            # update D
            # first calculate the gradients
            for accumulation_index in range(self.n_accumulation):
                for name in names:
                    with function.force_backprop_mode():
                        dev_id = self.devices[name]
                        dev_id = dev_id if 0 <= dev_id else None
                        with cuda.get_device_from_id(dev_id):
                            gen = self.models[name]['gen']
                            dis = self.models[name]['dis']
                            xp = gen.xp
                            x_real, y_real = self.get_batch(xp)
                            batchsize = len(x_real)
                            dis_real = dis(x_real, y=y_real)
                            x_fake, y_fake = self._generete_samples(
                                gen=gen, n_gen_samples=batchsize)
                            dis_fake = dis(x_fake, y=y_fake)
                            x_fake.unchain_backward()
                            loss_dis = self.loss_dis(
                                dis_fake=dis_fake, dis_real=dis_real) / float(
                                    self.n_accumulation)
                            chainer.reporter.report({'loss_dis': loss_dis})
                            loss_dis.backward()

            for name in names:
                if name != 'main':
                    self.models['main']['dis'].addgrads(
                        self.models[name]['dis'])
            dis_optimizer.update()
            if self.iteration % self.n_SR == 0:
                SR(self.models['main']['dis'])

            for name in names:
                if name != 'main':
                    self.models[name]['dis'].copyparams(
                        self.models['main']['dis'])
            # update G

            if i == 0:
                for model in six.itervalues(self.models):
                    model['gen'].cleargrads()
                    model['dis'].cleargrads()
                for accumulation_index in range(self.n_accumulation):
                    for name in names:
                        with function.force_backprop_mode():
                            dev_id = self.devices[name]
                            dev_id = dev_id if 0 <= dev_id else None
                            with cuda.get_device_from_id(dev_id):
                                gen = self.models[name]['gen']
                                dis = self.models[name]['dis']
                                x_fake, y_fake = self._generete_samples(
                                    gen=gen)
                                dis_fake = dis(x_fake, y=y_fake)
                                loss_gen = self.loss_gen(
                                    dis_fake=dis_fake) / float(
                                        self.n_accumulation)
                                chainer.reporter.report({'loss_gen': loss_gen})
                                loss_gen.backward()

                for name in names:
                    if name != 'main':
                        self.models['main']['gen'].addgrads(
                            self.models[name]['gen'])
                gen_optimizer.update()
                for name in names:
                    if name != 'main':
                        self.models[name]['gen'].copyparams(
                            self.models['main']['gen'])