Exemple #1
0
    def __init__(self, in_ch):
        self.in_ch = in_ch

        self.gamma = nn.Tensor((in_ch, ), init=nn.initializer.Scalar(1.0))
        self.beta = nn.Tensor((in_ch, ), init=nn.initializer.Scalar(0.0))

        super().__init__(saveables=['gamma', 'beta'])
Exemple #2
0
    def __init__(self, in_ch, out_ch, use_bias=True, weight_initializer=None, bias_initializer=None ):

        self.in_ch = in_ch
        self.out_ch = out_ch
        self.use_bias = use_bias
        
        if weight_initializer is None:
            weight_initializer = nc.Cacheton.get_var('Dense_default_weight_initializer')
        if weight_initializer is None:
            weight_initializer = nn.initializer.GlorotUniform()            
        if weight_initializer.has_fan_in():
            if weight_initializer.fan_in is None: weight_initializer.fan_in = in_ch
        if weight_initializer.has_fan_out():
            if weight_initializer.fan_out is None: weight_initializer.fan_out = out_ch

        self.weight = nn.Tensor( (in_ch, out_ch), init=weight_initializer )
        
        self.bias = None
        if self.use_bias:
            if bias_initializer is None:
                bias_initializer = nn.initializer.Scalar(0.0)
            self.bias_initializer = bias_initializer    
            self.bias = nn.Tensor( (self.out_ch,), init=bias_initializer)
            
        super().__init__(saveables=['weight','bias'])
Exemple #3
0
def backward_test():
    inp_t = nn.Tensor((2, 2, 8, 8), init=nn.initializer.Scalar(1.0))
    kernel_t = nn.Tensor((4, 2, 3, 3), init=nn.initializer.Scalar(0.0))

    kernel_2_t = nn.Tensor((4, 2, 3, 3), init=nn.initializer.Scalar(0.0))

    opt = nn.optimizer.RMSprop([inp_t, kernel_t, kernel_2_t])
    opt.zero_grad()

    with nn.optimizer.freeze():
        r = nn.conv2D(inp_t, kernel_2_t)
    r.backward()

    if kernel_2_t.has_grad():
        raise Exception(
            "kernel_2_t has grad, but used inside nn.optimizer.freeze()")

    r = nn.conv2D(inp_t, kernel_t)
    r.backward()

    if not kernel_t.has_grad():
        raise Exception("kernel_t has no grad")

    kernel_grad_t = kernel_t.get_grad()
    if all(np.ndarray.flatten(kernel_grad_t.np()) == 0):
        raise Exception("kernel_grad_t is not changed after backward step.")

    opt.step()

    if all(np.ndarray.flatten(kernel_t.np()) == 0):
        raise Exception("kernel_t is not changed after optimization step.")
Exemple #4
0
    def __init__(self, in_ch):
        self.in_ch = in_ch

        self.weight = nn.Tensor((in_ch, ), init=nn.initializer.Scalar(1.0))
        self.bias = nn.Tensor((in_ch, ), init=nn.initializer.Scalar(0.0))
        self.eps = nn.Tensor((1, ), init=nn.initializer.Scalar(1e-6))

        super().__init__(saveables=['weight', 'bias', 'eps'])
Exemple #5
0
def BatchNorm2D_test():
    module = BatchNorm2D(4)
    module.set_training(True)

    x = nn.Tensor((2, 4, 8, 8))
    y = module(x)
    y.backward(grad_for_non_trainables=True)

    if not x.has_grad():
        raise Exception('x has no grad')

    module.set_training(False)
    x = nn.Tensor((2, 4, 8, 8))
    y = module(x)
    y.backward(grad_for_non_trainables=True)
Exemple #6
0
def Dropout_test():
    module = Dropout(0.3)

    module.set_training(True)
    x = nn.Tensor((2, 4, 4, 4))
    y = module(x)
    y.backward(grad_for_non_trainables=True)

    if not x.has_grad():
        raise Exception('x has no grad')

    module.set_training(False)
    x = nn.Tensor((2, 4, 4, 4))
    y = module(x)
    y.backward(grad_for_non_trainables=True)
Exemple #7
0
def resize2D_bilinear(input_t, size_or_output_hw):
    """
    resize2D_bilinear operator
    
    arguments
    
     size_or_output_hw  int
                        float     
                        Iterable of height,weight
    
    """
    N, C, H, W = input_t.shape

    if isinstance(size_or_output_hw, Iterable):
        OH, OW = int(size_or_output_hw[0]), int(size_or_output_hw[1])
    elif isinstance(size_or_output_hw, (int, float)):
        OH = int(H * size_or_output_hw)
        OW = int(W * size_or_output_hw)
    else:
        raise ValueError(
            f'Unknown type of size_or_output_hw : {size_or_output_hw.__class__.__name__}'
        )

    OH = max(1, OH)
    OW = max(1, OW)

    coords_shape = nc.TensorShape((OH, OW, 2))

    coords_t = nn.Tensor(coords_shape,
                         nn.initializer.CoordsArange(0, H - 1, 0, W - 1))
    output_t = nn.spatial_transform2D(input_t, coords_t, grad_to_coords=False)

    return output_t
Exemple #8
0
def spatial_affine_transform2D(input_t, affine_t, output_size=None):
    """
    arguments

        input_t     Tensor(NCHW)

        affine_t    Tensor(N,2,3)
                    affine matrix

                    example of identity affine matrix
                    [1,0,0],
                    [0,1,0]

        output_size(None)

                    tuple of 2 ints (HW)
                    of output size
                    if None , size will not be changed

    Reference:

    Spatial Transformer Networks https://arxiv.org/abs/1506.02025
    """

    op = nc.Cacheton.get(_SpatialAffineTransform2DOp, input_t.shape,
                         affine_t.shape, output_size)

    affine_t = affine_t.transpose((0, 2, 1))

    coords = nn.Tensor(op.coords_shape,
                       init=op.coords_init).reshape(op.coords_reshape)
    coords = nc.op.matmul(coords, affine_t).reshape(op.coords_affined_shape)

    output_t = nc.op.spatial_transform2D(input_t, coords)
    return output_t
Exemple #9
0
def conv2DTranspose(input_t, kernel_t, stride=2, dilation=1, padding='same'):
    """
    conv2DTranspose operator.

     input_t     Tensor          shape must be
                                 (batch, in_ch, height,  width)

     kernel_t    Tensor          shape must be
                                 (out_ch,in_ch, k_height,k_width)

     stride(2)       int

     dilation(1)     int

     padding(same)   'valid'
                     'same'
    """

    op = nc.Cacheton.get(_Conv2DTransposeOp, input_t.shape, kernel_t.shape,
                         int(stride), int(dilation), padding)

    output_t = nn.Tensor(op.output_shape)
    output_t._set_op_name('conv2DTranspose')
    output_t._assign_gradfn(
        input_t, lambda O_t, dO_t: conv2DTranspose_dI_gradfn(
            op, input_t, kernel_t, O_t, dO_t))
    output_t._assign_gradfn(
        kernel_t, lambda O_t, dO_t: conv2DTranspose_dK_gradfn(
            op, input_t, kernel_t, O_t, dO_t))

    out = nc.op.matmul(kernel_t.reshape((kernel_t.shape[0], -1)),
                       op.im2colT(input_t))
    nc.op.transpose(out.reshape(op.OC_N_OH_OW), (1, 0, 2, 3),
                    output_t=output_t)
    return output_t
Exemple #10
0
def depthwise_conv2D (input_t, kernel_t, stride=1, dilation=1, padding='same'):
    """
    Depthwise Conv2D operator.
   
     input_t     Tensor          shape must be 
                                 (batch, in_ch,  height,  width)
     
     kernel_t    Tensor          shape must be 
                                 (in_ch,k_height,k_width)
 
     stride(1)       int
     
     dilation(1)     int
     
     padding(same)   'valid'         no padding
                     'same'          output size will be the same 
                                     or divided by stride
                     int             padding value for all sides
                     Iterable of 4 ints 
                                paddings for left,top,right,bottom sides
    """
    
    op = nc.Cacheton.get(_DepthwiseConv2DOp, input_t.shape, kernel_t.shape, int(stride), int(dilation), padding)

    output_t = nn.Tensor( op.output_shape )
    output_t._set_op_name('depthwise_conv2D')
    output_t._assign_gradfn (input_t,  lambda O_t, dO_t: conv2D_dI_gradfn(op, input_t, kernel_t, dO_t) )
    output_t._assign_gradfn (kernel_t, lambda O_t, dO_t: conv2D_dK_gradfn(op, input_t, kernel_t, dO_t) )

    op.O_depthwise_krn.run(output_t, input_t, kernel_t)

    return output_t
Exemple #11
0
def InstanceNorm2D_test():
    module = InstanceNorm2D(4)
    x = nn.Tensor((2, 4, 8, 8))
    y = module(x)
    y.backward(grad_for_non_trainables=True)
    if not x.has_grad():
        raise Exception('x has no grad')
Exemple #12
0
def DepthwiseConv2D_test():
    module = DepthwiseConv2D(4, 3)
    x = nn.Tensor((2, 4, 8, 8))
    y = module(x)
    y.backward(grad_for_non_trainables=True)
    if not x.has_grad():
        raise Exception('x has no grad')
Exemple #13
0
def slice_op(input_t, slices, output_t=None, is_add_to_output=False):
    """
    arguments:

        output_t            compute result to this Tensor.
                            Tensor may be with different shape, but should match total size.
                            gradfn will not be set.

        is_add_to_output    add result to output_t if output_t is set.
    """
    is_add_to_output = False if output_t is None else is_add_to_output

    op = nc.Cacheton.get(_SliceOp, input_t.shape, hashable_slices(slices),
                         is_add_to_output)

    if output_t is None:
        if op.output_is_reshaped:
            return input_t.reshape(op.output_shape)
        else:
            output_t = nn.Tensor(op.output_shape)
            output_t._set_op_name('slice')
            output_t._assign_gradfn(
                input_t, lambda O_t, dO_t: slice_dI_gradfn(op, input_t, dO_t))

    elif output_t.shape.size != op.output_shape.size:
        raise ValueError(f'output_t must have size {op.output_shape.size}')

    op.forward_krn.run(input_t, output_t)

    return output_t
Exemple #14
0
def matmulc_op(a_t, b_t, output_t=None, is_add_to_output=False):
    """
    matmul operator in col-major format

        A(...,K,M) x B(...,N,K) = (...,N,M)

    arguments

        output_t            compute result to this Tensor.
                            Tensor may be with different shape,
                            but should match total size.
                            gradfn will not be set.

        is_add_to_output    add result to output_t if output_t is set.
    """
    is_add_to_output = False if output_t is None else is_add_to_output

    op = nc.Cacheton.get(_MatmulOp, a_t.shape, b_t.shape, is_add_to_output)

    if output_t is None:
        output_t = nn.Tensor(op.output_shape)
        output_t._set_op_name('matmul')
        output_t._assign_gradfn(
            a_t, lambda O_t, dO_t: matmul_a_grad(a_t, b_t, dO_t))
        output_t._assign_gradfn(
            b_t, lambda O_t, dO_t: matmul_b_grad(a_t, b_t, dO_t))
    elif output_t.shape.size != op.output_shape.size:
        raise ValueError(f'output_t must have size {op.output_shape.size}')

    op.forward_krn.run(a_t, b_t, output_t)

    return output_t
Exemple #15
0
def Dense_test():
    module = Dense(4,8)        
    x = nn.Tensor( (2,4) )
    y = module(x)
    y.backward(grad_for_non_trainables=True)
    
    if not x.has_grad():
        raise Exception('x has no grad')
Exemple #16
0
def BlurPool_test():
    module = BlurPool()
    x = nn.Tensor((2, 3, 64, 64))
    y = module(x)
    y.backward(grad_for_non_trainables=True)

    if not x.has_grad():
        raise Exception('x has no grad')
Exemple #17
0
def FRNorm2D_test():
    in_ch = 3
    module = FRNorm2D(in_ch)
    x = nn.Tensor((2, in_ch, 64, 64))
    y = module(x)
    y.backward(grad_for_non_trainables=True)

    if not x.has_grad():
        raise Exception(f'x has no grad')
Exemple #18
0
def Conv2DTranspose_test():
    module = Conv2DTranspose(4, 8, 3)

    x = nn.Tensor((2, 4, 8, 8))
    y = module(x)
    y.backward(grad_for_non_trainables=True)

    if not x.has_grad():
        raise Exception('x has no grad')
Exemple #19
0
def SGD_test():
    weight_t = nn.Tensor((16, ), init=nn.initializer.Scalar(0.0))
    weight_t.get_grad().fill(1.0)

    opt = nn.optimizer.SGD([weight_t],
                           lr_decay=0.1,
                           lr_dropout=0.7,
                           clipnorm=0.1)
    opt.step()
Exemple #20
0
def Initializers_test():
    nn.Tensor((128, ), init=nn.initializer.Scalar(1.0)).np()
    nn.Tensor((128, ), init=nn.initializer.GlorotUniform(1.0, 1.0, 1.0)).np()
    nn.Tensor((128, ), init=nn.initializer.GlorotNormal(1.0, 1.0, 1.0)).np()
    nn.Tensor((128, ), init=nn.initializer.HeUniform(1.0, 1.0)).np()
    nn.Tensor((128, ), init=nn.initializer.HeNormal(1.0, 1.0)).np()
    nn.Tensor((128, ), init=nn.initializer.RandomNormal()).np()
    nn.Tensor((128, ), init=nn.initializer.RandomUniform()).np()
Exemple #21
0
def spatial_transform2D(input_t, coords_t, grad_to_coords=True):
    """
    spatial_transform2D operator
    
    Transforms input_t in spatial axes using coords_t.

    arguments

        input_t     Tensor(NCHW)

        coords_t    Tensor(NCHWD)
                    N is 1 or input_t.N
                    C is 1 or input_t.C
                    H - output height
                    W - output width
                    D is (2)[x,y] coords

        grad_to_coords(True)

                if True, broadcasts coords_t to input_t for backprop
                if you use spatial_transform2D for resize only,
                you don't need backprop to coords_t

    Reference:

    Spatial Transformer Networks https://arxiv.org/abs/1506.02025
    """

    op = nc.Cacheton.get(_SpatialTransform2DOp, input_t.shape, coords_t.shape)

    output_t = nn.Tensor(op.output_shape)
    output_t._set_op_name('spatial_transform2D')
    output_t._assign_gradfn(
        input_t, lambda O_t, dO_t: spatial_transform2D_dI_gradfn(
            op, input_t, coords_t, dO_t))

    if grad_to_coords:
        dK_coords_t = coords_t

        diff_rank = 5 - dK_coords_t.shape.rank

        if diff_rank != 0:
            dK_coords_t = dK_coords_t.reshape(diff_rank * (1, ) +
                                              dK_coords_t.shape)

        if op.coords_N_tile != 1 or op.coords_C_tile != 1:
            dK_coords_t = nc.op.tile(
                dK_coords_t, (op.coords_N_tile, op.coords_C_tile, 1, 1, 1))

        output_t._assign_gradfn(
            dK_coords_t, lambda O_t, dO_t: spatial_transform2D_dK_gradfn(
                op, input_t, dK_coords_t, dO_t))

    op.O_forward_krn.run(output_t, input_t, coords_t)

    return output_t
Exemple #22
0
    def __init__(self,
                 in_ch,
                 out_ch,
                 kernel_size,
                 stride=2,
                 dilation=1,
                 padding='same',
                 use_bias=True,
                 kernel_initializer=None,
                 bias_initializer=None):

        self.in_ch = in_ch
        self.out_ch = out_ch
        self.kernel_size = kernel_size
        self.stride = stride
        self.padding = padding
        self.dilation = dilation
        self.use_bias = use_bias

        if kernel_initializer is None:
            kernel_initializer = nc.Cacheton.get_var(
                'Conv2DTranspose_default_kernel_initializer')
        if kernel_initializer is None:
            kernel_initializer = nn.initializer.GlorotUniform()
        if kernel_initializer.has_fan_in():
            if kernel_initializer.fan_in is None:
                kernel_initializer.fan_in = in_ch * kernel_size * kernel_size
        if kernel_initializer.has_fan_out():
            if kernel_initializer.fan_out is None:
                kernel_initializer.fan_out = out_ch * kernel_size * kernel_size

        self.kernel_initializer = kernel_initializer
        self.kernel = nn.Tensor((out_ch, in_ch, kernel_size, kernel_size),
                                init=kernel_initializer)

        self.bias = None
        if self.use_bias:
            if bias_initializer is None:
                bias_initializer = nn.initializer.Scalar(0.0)
            self.bias_initializer = bias_initializer
            self.bias = nn.Tensor((self.out_ch, ), init=bias_initializer)

        super().__init__(saveables=['kernel', 'bias'])
Exemple #23
0
    def __init__(self, in_ch, momentum=0.99):
        self.in_ch = in_ch

        if momentum < 0 or momentum > 1.0:
            raise ValueError(
                f'momentum {momentum} must be in range [0 .. 1.0]')
        self.momentum = momentum

        self.gamma = nn.Tensor((in_ch, ), init=nn.initializer.Scalar(1.0))
        self.beta = nn.Tensor((in_ch, ), init=nn.initializer.Scalar(0.0))

        self.running_mean = nn.Tensor((in_ch, ),
                                      init=nn.initializer.Scalar(0.0))
        self.running_var = nn.Tensor((in_ch, ),
                                     init=nn.initializer.Scalar(1.0))

        super().__init__(
            saveables=['gamma', 'beta', 'running_mean', 'running_var'],
            trainables=['gamma', 'beta'])
Exemple #24
0
def MultiGPU_test_():
    devices_count = len(nn.devices.get_current())

    x = nn.Tensor_sliced_from_value(
        np.arange(0, devices_count).reshape((devices_count, )))

    weight_t = nn.Tensor((1, ), init=nn.initializer.Scalar(0.0))
    weight_t.get_grad().set(x)

    opt = nn.optimizer.RMSprop([weight_t], rho=1)
    opt.step()
    if all(np.ndarray.flatten(weight_t.np(0)) == \
           np.ndarray.flatten(weight_t.np(1)) ):
        raise Exception("weight_t is equal on both GPUs")

    weight_t = nn.Tensor((1, ), init=nn.initializer.Scalar(0.0))
    weight_t.get_grad().set(x)
    opt = nn.optimizer.RMSprop([weight_t], rho=1)
    opt.step(multi_gpu_step=True)
    if all(np.ndarray.flatten(weight_t.np(0)) != \
           np.ndarray.flatten(weight_t.np(1)) ):
        raise Exception("weight_t is not equal on both GPUs")
Exemple #25
0
def pool2D_op(op_type, input_t, pool_size=2, stride=2, padding='same'):
    """
    arguments
    
        op_type     'avg','min','max'
    """
    op = nc.Cacheton.get(_Pool2DOp, op_type, input_t.shape, int(pool_size),
                         int(stride), padding)

    output_t = nn.Tensor(op.output_shape)
    output_t._set_op_name(f'{op_type}_pool2D')
    output_t._assign_gradfn(
        input_t, lambda O_t, dO_t: pool2D_dI_gradfn(op, input_t, dO_t, O_t))

    op.O_forward_krn.run(output_t, input_t)
    return output_t
Exemple #26
0
def stack_op(tensor_list, axis, output_t=None, is_add_to_output=False):
    """
    arguments:

        output_t            compute result to this Tensor.
                            Tensor may be with different shape, but should match total size.
                            gradfn will not be set.

        is_add_to_output    add result to output_t if output_t is set.
    """
    is_add_to_output = False if output_t is None else is_add_to_output

    tensor_list = tuple(tensor_list)

    if not all(isinstance(tensor, nn.Tensor) for tensor in tensor_list):
        raise ValueError('All value must have type of Tensor')

    stack_count = len(tensor_list)
    if stack_count == 0:
        raise ValueError('tensor_list is empty')

    input_shape = tensor_list[0].shape

    if not all(tensor.shape == input_shape for tensor in tensor_list):
        raise ValueError('All tensors must have the same shape')

    op = nc.Cacheton.get(_StackOp, input_shape, int(axis), stack_count,
                         is_add_to_output)

    if output_t is None:
        output_t = nn.Tensor(op.info.output_shape)
        output_t._set_op_name('stack')
        for n in range(stack_count):
            output_t._assign_gradfn(tensor_list[n],
                                    lambda O_t, dO_t, n=n: stack_gradfn(
                                        op, tensor_list[n], dO_t, n))

    elif output_t.shape.size != op.info.output_shape.size:
        raise ValueError(
            f'output_t must have size {op.info.output_shape.size}')

    for i in range(stack_count):
        op.forward_krn.run(output_t, tensor_list[i], np.int64(i))

    return output_t
Exemple #27
0
def shallow_mode_test():
    class Module1(nn.Module):
        def __init__(self, include_self=True):
            self.conv = nn.Conv2D(1, 1, 3, stride=2)

        def forward(self, x):
            x = self.conv(x)
            return x

    m = Module1()
    m.set_training(True)

    out = m.shallow_forward(nn.Tensor((1, 1, 16, 16)))

    dev = nn.devices.get_current()[0]

    if dev.get_used_memory() != 0:
        raise Exception('Memory is allocated in shallow_forward')
Exemple #28
0
def dual_wise_op(DualWiseOpKernel_cls,
                 DualWiseOpKernel_args,
                 a_t,
                 b_t,
                 output_t=None,
                 is_add_to_output=False):
    """
    operator for DualWiseOpKernel ops with two inputs
    
    arguments

        DualWiseOpKernel_cls     class derived from DualWiseOpKernel

        DualWiseOpKernel_args    args to construct DualWiseOpKernel_cls

        output_t            compute result to this Tensor.
                            Tensor may be with different shape, but should match total size.
                            gradfn will not be set.

        is_add_to_output    add result to output_t if output_t is set.
    """
    is_add_to_output = False if output_t is None else is_add_to_output

    op = nc.Cacheton.get(_DualWiseOp, DualWiseOpKernel_cls,
                         DualWiseOpKernel_args, a_t.shape, b_t.shape,
                         is_add_to_output)

    if output_t is None:
        output_t = nn.Tensor(op.info.output_shape)
        output_t._set_op_name(f'{op.kernel.get_op_name()}')
        output_t._assign_gradfn(
            a_t,
            lambda O_t, dO_t: dual_wise_op_A_gradfn(op, a_t, b_t, O_t, dO_t))
        output_t._assign_gradfn(
            b_t,
            lambda O_t, dO_t: dual_wise_op_B_gradfn(op, a_t, b_t, O_t, dO_t))
    elif output_t.shape.size != op.info.output_shape.size:
        raise ValueError(
            f'output_t must have size { op.info.output_shape.size }')

    op.forward_krn.run(output_t, a_t, b_t)

    return output_t
Exemple #29
0
def concat_op(tensor_list, axis, output_t=None, is_add_to_output=False):
    """
    arguments

        output_t            compute result to this Tensor.
                            Tensor may be with different shape,
                            but should match total size.
                            gradfn will not be set.

        is_add_to_output    add result to output_t if output_t is set.
    """
    is_add_to_output = False if output_t is None else is_add_to_output

    tensor_list = tuple(tensor_list)

    if not all(isinstance(tensor, nn.Tensor) for tensor in tensor_list):
        raise ValueError('All values must have type of Tensor')
    if len(tensor_list) == 0:
        raise ValueError('empty tensor_list')

    op = nc.Cacheton.get(_ConcatOp, tuple(t.shape for t in tensor_list),
                         int(axis), is_add_to_output)

    if output_t is None:
        output_t = nn.Tensor(op.info.output_shape)
        output_t._set_op_name('concat')
        for n in range(len(tensor_list)):
            output_t._assign_gradfn(tensor_list[n],
                                    lambda O_t, dO_t, n=n: concat_gradfn(
                                        op, tensor_list[n], dO_t, n))

    elif output_t.shape.size != op.info.output_shape.size:
        raise ValueError(
            f'output_t must have size {op.info.output_shape.size}')

    for i, t in enumerate(tensor_list):
        op.forward_krn.run(output_t,
                           t,
                           np.int64(op.info.axis_offsets[i]),
                           np.int64(op.info.axis_sizes[i]),
                           global_shape=(t.shape.size, ))

    return output_t
Exemple #30
0
def dropout_op(input_t,
               rate,
               seed=None,
               output_t=None,
               is_add_to_output=False):
    """
    Dropout operator
    
    arguments
    
        input_t     Tensor
        
        rate        float [0 .. 1.0)
                    probability
                    
        seed(None)  int value
                    if None - random seed
    
        output_t            compute result to this Tensor.
                            Tensor may be with different shape, but should match total size.
                            gradfn will not be set.

        is_add_to_output    add result to output_t if output_t is set.
        
    reference
    
    Dropout: A Simple Way to Prevent Neural Networks from Overfitting
    """

    op = nc.Cacheton.get(_DropoutOp, input_t.shape, float(rate),
                         int(seed) if seed is not None else None,
                         is_add_to_output)
    if output_t is None:
        output_t = nn.Tensor(op.output_shape)
        output_t._set_op_name('dropout')
        output_t._assign_gradfn(
            input_t, lambda O_t, dO_t: dropout_dI_gradfn(op, input_t, dO_t))
    elif output_t.shape.size != op.output_shape.size:
        raise ValueError(f'output_t must have size {op.output_shape.size}')

    op.krn.run(output_t, input_t, np.uint32(op.seed))
    return output_t