Beispiel #1
0
def affine_backward(inputs, base_axis=1):
    """
    Args:
      inputs (list of nn.Variable): Incomming grads/inputs to/of the forward function.
      kwargs (dict of arguments): Dictionary of the corresponding function arguments.

    Return:
      list of Variable: Return the gradients wrt inputs of the corresponding function.
    """
    dy = inputs[0]
    x0 = inputs[1]
    w0 = inputs[2]

    base_axis += inputs[0].ndim * (base_axis < 0)

    ctx = nn.get_current_context()
    dfx = AffineDataGrad(ctx, base_axis)
    dfw = AffineFilterGrad(ctx, base_axis)
    dfx.xshape = x0.shape
    dfw.wshape = w0.shape

    dx0 = dfx(dy, w0)
    dw0 = dfw(dy, x0)

    if len(inputs) == 4:
        axes = [i for i in range(0, base_axis)]
        db0 = F.sum(dy, axes, keepdims=False)
        return dx0, dw0, db0
    else:
        return dx0, dw0
def fused_batch_normalization_backward(inputs,
                                       axes=(1, ),
                                       decay_rate=0.9,
                                       eps=1e-05,
                                       batch_stat=True,
                                       nonlinearity='relu'):
    """
    Args:
      inputs (list of nn.Variable): Incomming grads/inputs to/of the forward function.
      kwargs (dict of arguments): Dictionary of the corresponding function arguments.

    Return:
      list of Variable: Return the gradients wrt inputs of the corresponding function.
    """
    if nonlinearity not in ["", "relu"]:
        raise ValueError("nonlinearity must be either '' or 'relu'.")
    ctx = nn.get_current_context()
    df = FusedBatchNormalizationBackward(ctx, axes, decay_rate, eps,
                                         batch_stat, nonlinearity)
    dy = inputs[0]
    x0 = inputs[1]
    b0 = inputs[2]
    g0 = inputs[3]
    rm = inputs[4]
    rv = inputs[5]
    z0 = inputs[6] if len(inputs) == 7 else None
    df.is_add = True if z0 else False
    y0 = get_output(x0, "FusedBatchNormalization")
    if df.is_add:
        dx0, db0, dg0, dz0 = df(dy, x0, b0, g0, rm, rv, y0, z0)
        return dx0, db0, dg0, None, None, dz0
    else:
        dx0, db0, dg0 = df(dy, x0, b0, g0, rm, rv, y0)
        return dx0, db0, dg0, None, None
Beispiel #3
0
    def backward_impl(self, inputs, outputs, prop_down, accum):
        # inputs: [inputs_fwd_graph] + [inputs_bwd_graph] or
        # [inputs_fwd_graph] + [outputs_fwd_graph] + [inputs_bwd_graph]

        # Args
        kernel = self.forward_func.info.args["kernel"]
        stride = self.forward_func.info.args["stride"]
        ignore_border = self.forward_func.info.args["ignore_border"]
        pad = self.forward_func.info.args["pad"]
        channel_last = self.forward_func.info.args["channel_last"]

        # Inputs
        x0 = inputs[0].data
        dy = inputs[1].data
        # Outputs
        dx0 = outputs[0].data
        # Grads of inputs
        g_x0 = inputs[0].grad
        g_dy = inputs[1].grad
        # Grads of outputs
        g_dx0 = outputs[0].grad

        # Compute
        ctx = nn.get_current_context()
        backward_func = nn.function.MaxPoolingBackward(
            ctx, kernel, stride, ignore_border, pad, channel_last)

        if prop_down[1]:
            x0_ = nn.Variable(x0.shape).apply(
                data=x0, grad=g_x0, need_grad=True)
            dy_ = nn.Variable(dy.shape).apply(
                data=dy, grad=g_dy, need_grad=True)
            dx0_ = nn.Variable(dx0.shape).apply(data=dx0, grad=g_dx0)
            backward_func.setup([x0_, dy_], [dx0_])
            backward_func.backward([x0_, dy_], [dx0_], accum=accum)
Beispiel #4
0
def convolution_data_grad_backward(inputs,
                                   base_axis=1,
                                   pad=None,
                                   stride=None,
                                   dilation=None,
                                   group=1,
                                   channel_last=False):
    """
    Args:
      inputs (list of nn.Variable): Incomming grads/inputs to/of the forward function.
      kwargs (dict of arguments): Dictionary of the corresponding function arguments.

    Return:
      list of Variable: Return the gradients wrt inputs of the corresponding function.
    """
    gdx = inputs[0]
    dy = inputs[1]
    w0 = inputs[2]

    ctx = nn.get_current_context()
    dfw = ConvolutionFilterGrad(ctx, base_axis, pad, stride, dilation, group,
                                channel_last)
    dfw.wshape = w0.shape

    gdy = F.convolution(gdx, w0, None, base_axis, pad, stride, dilation, group,
                        channel_last)
    gw0 = dfw(dy, gdx)
    return gdy, gw0
Beispiel #5
0
 def _call_function(self, type_name, inputs, args):
     import nnabla.function_bases as FB
     function_expr = "FB.F.{type_name}(nn.{ctx}, **{args})".format(
         type_name=type_name, ctx=nn.get_current_context(), args=args)
     function = eval(function_expr)
     o = function(*inputs)
     return o
Beispiel #6
0
def deconvolution_filter_grad_backward(inputs,
                                       base_axis=1,
                                       pad=None,
                                       stride=None,
                                       dilation=None,
                                       group=1,
                                       channel_last=False,
                                       output_padding=None):
    """
    Args:
      inputs (list of nn.Variable): Incomming grads/inputs to/of the forward function.
      kwargs (dict of arguments): Dictionary of the corresponding function arguments.

    Return:
      list of Variable: Return the gradients wrt inputs of the corresponding function.
    """
    gdw = inputs[0]
    dy = inputs[1]
    x0 = inputs[2]

    ctx = nn.get_current_context()
    dfx = DeconvolutionDataGrad(ctx, base_axis, pad, stride, dilation, group,
                                channel_last, output_padding)
    dfx.xshape = x0.shape

    gdy = F.deconvolution(x0, gdw, None, base_axis, pad, stride, dilation,
                          group, channel_last, output_padding)
    gx0 = dfx(dy, gdw)
    return gdy, gx0
Beispiel #7
0
def _create_function(inputs, f, batch_size):
    ctx = nn.get_current_context()
    function_proto = f

    # todo: arrange weight name for NNC

    if function_proto.type == "Reshape":  # if batch_size = -1, something wrong?
        reshape_shape = resolve_reshape_params(
            inputs, function_proto, batch_size)
        function_instance = F.Reshape(
            ctx, shape=reshape_shape, inplace=function_proto.reshape_param.inplace)
    elif function_proto.type == 'Broadcast':
        shape = resolve_broadcast_params(inputs, function_proto, batch_size)
        function_instance = F.Broadcast(ctx, shape=shape)
    elif function_proto.type == "RepeatStart":
        raise NotImplementedError("Repeat not supported.")
        function_instance = F.Identity(ctx)
    elif function_proto.type == "RepeatEnd":
        raise NotImplementedError("Repeat not supported.")
        function_instance = F.Identity(ctx)
    elif function_proto.type == "RecurrentOutput":
        raise NotImplementedError("Recurrent not supported.")
        function_instance = F.Stack(
            ctx, axis=function_proto.recurrent_param.axis)
    elif function_proto.type == "RecurrentInput":
        raise NotImplementedError("Recurrent not supported.")
        function_instance = F.Split(
            ctx, axis=function_proto.recurrent_param.axis)
    elif function_proto.type == "Delay":
        raise NotImplementedError("Recurrent not supported.")
        function_instance = F.Identity(ctx)
    else:
        function_instance = _create_function_instance(ctx, function_proto)

    return function_instance
Beispiel #8
0
def scope_function():
    # turn off auto forward mode
    nn.set_auto_forward(False)

    # clear all parameters
    nn.clear_parameters()

    # keep context
    ctx = nn.get_current_context()

    yield

    # restore context
    nn.set_default_context(ctx)
Beispiel #9
0
def batch_normalization_backward(inputs, axes=(1,), decay_rate=0.9, eps=1e-05,
                                 batch_stat=True, no_scale=False, no_bias=False):
    """
    Args:
      inputs (list of nn.Variable): Incomming grads/inputs to/of the forward function.
      kwargs (dict of arguments): Dictionary of the corresponding function arguments.

    Return:
      list of Variable: Return the gradients wrt inputs of the corresponding function.
    """
    ctx = nn.get_current_context()
    df = BatchNormalizationBackward(
        ctx, axes, decay_rate, eps, batch_stat, no_scale, no_bias)
    d_inputs = df(*inputs)
    return force_tuple(d_inputs) + (None, None)
Beispiel #10
0
def concatenate_backward(inputs, axis=None):
    """
    Args:
      inputs (list of nn.Variable): Incomming grads/inputs to/of the forward function.
      kwargs (dict of arguments): Dictionary of the corresponding function arguments.

    Return:
      list of Variable: Return the gradients wrt inputs of the corresponding function.
    """
    dy = inputs[0]
    axis = axis if axis is not None else len(dy.shape) - 1
    ctx = nn.get_current_context()
    df = ConcatenateDataGrad(ctx, axis=axis)
    df.xshapes = [x.shape for x in inputs[1:]]
    dx0 = df(dy)
    return dx0
Beispiel #11
0
def unpooling_backward(inputs, kernel, channel_last=False):
    """
    Args:
      inputs (list of nn.Variable): Incomming grads/inputs to/of the forward function.
      kwargs (dict of arguments): Dictionary of the corresponding function arguments.

    Return:
      list of Variable: Return the gradients wrt inputs of the corresponding function.
    """
    dy = inputs[0]
    x0 = inputs[1]
    ctx = nn.get_current_context()
    df = UnpoolingDataGrad(ctx, kernel, channel_last)
    df.xshape = x0.shape
    dx0 = df(dy)
    return dx0
Beispiel #12
0
def global_average_pooling_backward(inputs):
    """
    Args:
      inputs (list of nn.Variable): Incomming grads/inputs to/of the forward function.
      kwargs (dict of arguments): Dictionary of the corresponding function arguments.

    Return:
      list of Variable: Return the gradients wrt inputs of the corresponding function.
    """
    dy = inputs[0]
    x0 = inputs[1]
    ctx = nn.get_current_context()
    pool = GlobalAveragePoolingDataGrad(ctx)
    pool.xshape = x0.shape
    dx0 = pool(dy)
    return dx0
Beispiel #13
0
def slice_backward(inputs, start=None, stop=None, step=None):
    """
    Args:
      inputs (list of nn.Variable): Incomming grads/inputs to/of the forward function.
      kwargs (dict of arguments): Dictionary of the corresponding function arguments.

    Return:
      list of Variable: Return the gradients wrt inputs of the corresponding function.
    """
    dy = inputs[0]
    x0 = inputs[1]
    ctx = nn.get_current_context()
    df = SliceDataGrad(ctx, start, stop, step)
    df.xshape = x0.shape
    dx0 = df(dy)
    return dx0
Beispiel #14
0
def deconvolution_backward(inputs,
                           base_axis=1,
                           pad=None,
                           stride=None,
                           dilation=None,
                           group=1,
                           channel_last=False,
                           output_padding=None):
    """
    Args:
      inputs (list of nn.Variable): Incomming grads/inputs to/of the forward function.
      kwargs (dict of arguments): Dictionary of the corresponding function arguments.

    Return:
      list of Variable: Return the gradients wrt inputs of the corresponding function.
    """
    dy = inputs[0]
    x0 = inputs[1]
    w0 = inputs[2]

    base_axis += x0.ndim * (base_axis < 0)
    # base_axis += inputs[0].ndim*(base_axis < 0)

    ctx = nn.get_current_context()
    dfx = DeconvolutionDataGrad(ctx, base_axis, pad, stride, dilation, group,
                                channel_last, output_padding)
    dfw = DeconvolutionFilterGrad(ctx, base_axis, pad, stride, dilation, group,
                                  channel_last, output_padding)
    dfx.xshape = x0.shape
    dfw.wshape = w0.shape

    dx0 = dfx(dy, w0)
    dw0 = dfw(dy, x0)
    axes = [i for i in range(dy.ndim, base_axis)]
    db0 = F.sum(dy, axes, keepdims=False) if len(inputs) == 4 else None

    if len(inputs) == 4:
        if channel_last:
            axes = [i for i in range(dy.ndim - 1)]
        else:
            axes = [i for i in range(0, base_axis)] + \
                                     [i for i in range(base_axis + 1, dy.ndim)]
        db0 = F.sum(dy, axes, keepdims=False) if len(inputs) == 4 else None
        return dx0, dw0, db0
    else:
        return dx0, dw0
Beispiel #15
0
    def _create_function(self, function_proto):
        inputs = self._create_inputs(function_proto.input)

        function_instance = _create_function(
            nn.get_current_context(), inputs, function_proto, self.batch_size)

        outputs = function_instance(*inputs)
        if not isinstance(outputs, tuple):
            outputs = (outputs,)

        for i, name in enumerate(function_proto.output):
            try:
                var, _ = self.vseen[name]
            except:
                self.vseen[name] = (outputs[i], [0])
                continue
            var.rewire_on(outputs[i])
Beispiel #16
0
def measure_cpu_gpu_instant_load():
    # Get current cpu gpu load, as
    # load = [rank, cpu_load, nvidia_device_id, gpu_load]
    # result_arr: [load, load, ...]

    gpu_load = []
    if gpu_load_backend_ok:
        global gpu_a_load
        global gpu_m_count

        gpu_m_count += 1
        try:
            comm = current_communicator()
            if comm:
                index = comm.local_rank
            elif 'cuda' in str(nn.get_current_context().backend):
                index = 0
            else:
                raise Exception
            handler = pynvml.nvmlDeviceGetHandleByIndex(index)
            gpu_load = [[
                index,
                pynvml.nvmlDeviceGetUtilizationRates(handler).gpu
            ]]

            if index in gpu_a_load.keys():
                gpu_a_load[index]['name'] = pynvml.nvmlDeviceGetName(
                    handler).decode("utf-8")
                o_load = gpu_a_load[index]['load']
                n_load = gpu_load[0][1]
                gpu_a_load[index]['load'] = (
                    (gpu_m_count - 1) * o_load + n_load) / gpu_m_count
            else:
                gpu_a_load[index] = {
                    'name': pynvml.nvmlDeviceGetName(handler).decode("utf-8"),
                    'load': gpu_load[0][1]
                }

        except Exception:
            gpu_load = []

    if cpu_load_backend_ok:
        global p_handler
        cpu_load = p_handler.cpu_percent()
        callback.update_status(
            ('cpu_gpu_load', collect_and_shape_result(cpu_load, gpu_load)))
Beispiel #17
0
def interpolate_backward(inputs, output_size, mode, align_corners=True,
                         half_pixel=False, half_pixel_for_nn=False, channel_last=False):
    """
    Args:
      inputs (list of nn.Variable): Incomming grads/inputs to/of the forward function.
      kwargs (dict of arguments): Dictionary of the corresponding function arguments.

    Return:
      list of Variable: Return the gradients wrt inputs of the corresponding function.
    """
    dy = inputs[0]
    x0 = inputs[1]
    ctx = nn.get_current_context()
    df = InterpolateDataGrad(ctx, output_size, mode, align_corners,
                             half_pixel, half_pixel_for_nn, channel_last)
    df.xshape = x0.shape
    dx0 = df(dy)
    return dx0
Beispiel #18
0
def embed_backward(inputs):
    """
    Args:
      inputs (list of nn.Variable): Incomming grads/inputs to/of the forward function.
      kwargs (dict of arguments): Dictionary of the corresponding function arguments.

    Return:
      list of Variable: Return the gradients wrt inputs of the corresponding function.
    """
    dy = inputs[0]
    x0 = inputs[1]
    w0 = inputs[2]

    ctx = nn.get_current_context()
    dfw = EmbedFilterGrad(ctx)
    dfw.wshape = w0.shape

    dw0 = dfw(dy, x0)
    return None, dw0
Beispiel #19
0
def affine_filter_grad_backward(inputs, base_axis=1):
    """
    Args:
      inputs (list of nn.Variable): Incomming grads/inputs to/of the forward function.
      kwargs (dict of arguments): Dictionary of the corresponding function arguments.

    Return:
      list of Variable: Return the gradients wrt inputs of the corresponding function.
    """
    gdw = inputs[0]
    dy = inputs[1]
    x0 = inputs[2]

    ctx = nn.get_current_context()
    dfx = AffineDataGrad(ctx, base_axis)
    dfx.xshape = x0.shape

    gdy = F.affine(x0, gdw, None, base_axis)
    gx0 = dfx(dy, gdw)
    return gdy, gx0
Beispiel #20
0
def scope_function():
    # turn off auto forward mode
    nn.set_auto_forward(False)

    # clear all parameters
    nn.clear_parameters()

    # keep context
    ctx = nn.get_current_context()

    # use cached array
    nn.prefer_cached_array(True)

    # turn off re-computation
    nn.set_global_recompute(False)

    yield

    # restore context
    nn.set_default_context(ctx)
Beispiel #21
0
def affine_data_grad_backward(inputs, base_axis=1):
    """
    Args:
      inputs (list of nn.Variable): Incomming grads/inputs to/of the forward function.
      kwargs (dict of arguments): Dictionary of the corresponding function arguments.

    Return:
      list of Variable: Return the gradients wrt inputs of the corresponding function.
    """
    gdx = inputs[0]
    dy = inputs[1]
    w0 = inputs[2]

    ctx = nn.get_current_context()
    dfw = AffineFilterGrad(ctx, base_axis)
    dfw.wshape = w0.shape

    gdy = F.affine(gdx, w0, None, base_axis)
    gw0 = dfw(dy, gdx)
    return gdy, gw0
Beispiel #22
0
def pad_backward(inputs, pad_width, mode='constant', constant_value=0):
    """
    Args:
      inputs (list of nn.Variable): Incomming grads/inputs to/of the forward function.
      kwargs (dict of arguments): Dictionary of the corresponding function arguments.

    Return:
      list of Variable: Return the gradients wrt inputs of the corresponding function.
    """
    if mode != "constant":
        raise NotImplementedError(
            "{}_backward (mode!=constant) is not implemented.".format(func['snake_name']))
    dy = inputs[0]
    x0 = inputs[1]
    ctx = nn.get_current_context()
    # constant value is always zero after 1st-order derivative
    df = PadDataGrad(ctx, pad_width, mode, constant_value=0)
    df.xshape = x0.shape
    dx0 = df(dy)
    return dx0
Beispiel #23
0
    def _connect_on_gradient_graph(self, grad_vars, f):
        # 1. accumulate variables used more than one or do nothing
        vf_vb_map = grad_vars.pop(f)  # {VO_fwd: [VI_bwd]}
        grad_inputs = []
        for o in f.outputs:
            # address `floating` variables; no function takes it as input.
            # e.g., when dx, db, dg = dBN(...), (db, dg) are not used afterwards.
            #v = vf_vb_map[o] if o in vf_vb_map else [None]
            v = vf_vb_map[o] if o in vf_vb_map else [0]
            if len(v) > 1:
                grad_inputs += [sum(v)]
                #grad_inputs += [F.add_n(v)]
            else:
                grad_inputs += v

        # 2. lookup the backward function
        f_fwd_name = f.info.type_name
        if f_fwd_name not in registry:
            raise ValueError(
                "{} is not in the backward function registry".format(
                    f_fwd_name))
        backward_func = registry[f_fwd_name]

        # 3. connect
        grad_inputs = grad_inputs + f.inputs
        ctx = nn.get_current_context()
        with nn.context_scope(ctx):
            grad_outputs = backward_func(grad_inputs, **f.info.args)
        grad_outputs = self._force_list(grad_outputs)

        # 4. put grad_output as grad_input to a corresponding function
        for inp, grad_out in zip(f.inputs, grad_outputs):
            if grad_out is None:
                continue
            if inp.parent not in grad_vars:
                grad_vars[inp.parent] = OrderedDict()
            if inp not in grad_vars[inp.parent]:
                grad_vars[inp.parent][inp] = [grad_out]
            else:
                grad_vars[inp.parent][inp] += [grad_out]
        return grad_outputs
Beispiel #24
0
def max_pooling_backward_backward(inputs,
                                  kernel,
                                  stride=None,
                                  ignore_border=True,
                                  pad=None,
                                  channel_last=False):
    """
    Args:
      inputs (list of nn.Variable): Incomming grads/inputs to/of the forward function.
      kwargs (dict of arguments): Dictionary of the corresponding function arguments.

    Return:
      list of Variable: Return the gradients wrt inputs of the corresponding function.
    """
    gdx = inputs[0]
    dy = inputs[1]
    x0 = inputs[2]
    ctx = nn.get_current_context()
    df = MaxPoolingBackwardDataGrad(ctx, kernel, stride, ignore_border, pad,
                                    channel_last)
    df.yshape = dy.shape
    gdy = df(gdx, x0)
    return gdy, None
Beispiel #25
0
def average_pooling_backward(inputs,
                             kernel,
                             stride=None,
                             ignore_border=True,
                             pad=None,
                             channel_last=False,
                             including_pad=True):
    """
    Args:
      inputs (list of nn.Variable): Incomming grads/inputs to/of the forward function.
      kwargs (dict of arguments): Dictionary of the corresponding function arguments.

    Return:
      list of Variable: Return the gradients wrt inputs of the corresponding function.
    """
    dy = inputs[0]
    x0 = inputs[1]
    ctx = nn.get_current_context()
    df = AveragePoolingDataGrad(ctx, kernel, stride, ignore_border, pad,
                                channel_last, including_pad)
    df.xshape = x0.shape
    dx0 = df(dy)
    return dx0
Beispiel #26
0
# これで、 mnist-collection/dcgan.pyの内部にアクセスできるようになった。 今回の例ではハイ
# パーパラメータが設定されているのでそれに倣う。

source = inspect.getsource(I)
print(source[source.index("if __name__"):])

max_iter = 20000
learning_rate = 0.0002
batch_size = 64
weight_decay = 0.0001

# コンテキストを設定する。
context = get_extension_context("cudnn", device_id=0, type_config="float")
nn.set_default_context(context)
nn.get_current_context()

# Fakeパスの設定
z = nn.Variable([batch_size, 100, 1, 1])
fake = I.generator(z)
fake.persistent = True  # Not to clear at backward
pred_fake = I.discriminator(fake)
loss_gen = F.mean(
    F.sigmoid_cross_entropy(pred_fake, F.constant(1, pred_fake.shape)))
fake_dis = fake.get_unlinked_variable(need_grad=True)
fake_dis.need_grad = True  # TODO: Workaround until v1.0.2
pred_fake_dis = I.discriminator(fake_dis)
loss_dis = F.mean(
    F.sigmoid_cross_entropy(pred_fake_dis, F.constant(0, pred_fake_dis.shape)))

# Realパスの設定
Beispiel #27
0
 def __init__(self, ):
     ctx = nn.get_current_context()
     if "half" in [x.split(":")[-1] for x in ctx.backend]:
         raise ValueError(
             "Half is not supported up to now, context = {}".format(ctx))
Beispiel #28
0
def abs_max_recorder(x, M, training=True):
    ctx = nn.get_current_context()
    func = AbsMaxRecorder(ctx, training)
    return func(x, M)
Beispiel #29
0
def max_mva_recorder(x, M, decay=0.99, training=True):
    ctx = nn.get_current_context()
    func = MaxMvaRecorder(ctx, decay, training)
    return func(x, M)
Beispiel #30
0
def minmax_minmax_recorder(x, m, M, training=True):
    ctx = nn.get_current_context()
    func = MinMaxMinMaxRecorder(ctx, training)
    return func(x, m, M)