Beispiel #1
0
    def backward(ctx, grad_output):
        a, b = ctx.saved_tensors
        #raise Exception("TODO: Implement '-' backward")
        # calculate gradient of output w.r.t. each input
        temp_a = (1. / b.data)
        grad_a = temp_a * grad_output.data
        temp_b = -(a.data / (b.data**2))
        grad_b = temp_b * grad_output.data

        if a.shape != b.shape:
            if a.shape != grad_a.shape:
                grad_a = unbroadcast(grad_a, a.shape)
            if b.shape != grad_b.shape:
                grad_b = unbroadcast(grad_b, b.shape)

        # the order of gradients returned should match the order of the arguments
        return tensor.Tensor(grad_a), tensor.Tensor(grad_b)
Beispiel #2
0
 def forward(ctx, a, shape):
     if not type(a).__name__ == 'Tensor':
         raise Exception("Arg for Reshape must be tensor: {}".format(type(a).__name__))
     ctx.shape = a.shape
     requires_grad = a.requires_grad
     c = tensor.Tensor(a.data.reshape(shape), requires_grad=requires_grad,
                                              is_leaf=not requires_grad)
     return c
Beispiel #3
0
 def backward(ctx, grad_output):
     predicted, target = ctx.saved_tensors
     Softmax = ((np.exp(predicted.data)) /
                (np.sum(np.exp(predicted.data), axis=1).reshape(-1, 1)))
     one_hot = to_one_hot(target.data, Softmax.shape[-1])
     grad_predicted = ((Softmax - one_hot.data) *
                       grad_output.data) / predicted.data.shape[0]
     return (tensor.Tensor(grad_predicted), )
    def backward(ctx, grad_output):
        # TODO: Finish Conv1d backward pass. It's surprisingly similar to the forward pass.
        
        x,weight,stride = ctx.saved_tensors
        batch_size, in_channel, input_size = x.shape
        out_channel, _, kernel_size = weight.shape
        stride = stride.data.item()
        output_size = grad_output.shape[-1]   

        #Or grad_w = np.zeros_like(weight) #since gradient is not transposed in pytorch contrary to math
        grad_w = np.zeros(shape=(out_channel,in_channel,kernel_size))
        grad_y = np.zeros(shape=(batch_size,in_channel,input_size))
        grad_b = np.zeros(shape=(out_channel,))

        #Grad_b
        grad_b = np.einsum('ijk->j',grad_output.data)
        
        ##grad_w
        #upsample
        new_shape = (grad_output.shape[2]-1) * stride + 1 
        
        out_z = np.zeros((grad_output.shape[0],grad_output.shape[1],new_shape))
        out_z[:,:,::stride] = grad_output.data

        for t in range(input_size-out_z.shape[2]+1):
            if t < grad_w.shape[2]:
                seg_b = x.data[:,:,t:t+out_z.shape[2]]
                grad_w[:,:,t] = np.einsum('ijk,ilk->lj',seg_b,out_z)

        ### grad_x
        #For strided z from y|upsample
        new_k = (grad_output.shape[2]-1) * stride + 1 
        out = np.zeros((grad_output.shape[0],grad_output.shape[1],new_k))
        out[:,:,::stride] = grad_output.data

        #Padding to restore kernel_size - 1 # but not for stride greater than 1, but we upsample and behave stride as 1
        npad = ((0, 0), (0, 0), (kernel_size-1, kernel_size-1))
        out_2 = np.pad(out,pad_width=npad, mode='constant', constant_values=0)
        flip_weight = np.flip(weight.data,axis=2)
        for idx,t in enumerate(range(out_2.shape[2]-kernel_size+1)):
            seg_a = out_2[:,:,t:t+kernel_size]
            grad_y[:,:,t] = np.einsum('ijk,jlk->il',seg_a,flip_weight)

        
        return tensor.Tensor(grad_y,requires_grad=True,is_leaf=True), tensor.Tensor(grad_w,requires_grad=True,is_leaf=True) \
        ,tensor.Tensor(grad_b,requires_grad=True,is_leaf=True)
    def backward(ctx, grad_output):
        a = ctx.saved_tensors[0]

        grad_output = tensor.Tensor(2 * a.data * grad_output.data)
        #print("Shape of Power :",grad_output.shape)
        # TODO: Implement more Functions below
        #print("Power :",grad_output)
        return grad_output, None
Beispiel #6
0
 def forward(ctx, a):
     if not type(a).__name__ == 'Tensor':
         raise Exception("Arg for Log must be tensor: {}".format(type(a).__name__))
     ctx.save_for_backward(a)
     requires_grad = a.requires_grad
     c = tensor.Tensor(np.log(a.data), requires_grad=requires_grad,
                                       is_leaf=not requires_grad)
     return c
 def backward(ctx, grad_output):
     a = ctx.saved_tensors
     zero_check = np.zeros((1))
     grad_output = np.multiply(
         tensor.Tensor(np.where(a[0].data <= zero_check, 0, 1)),
         grad_output)
     #print("ReLU :",grad_output)
     # TODO: Implement more Functions below
     return grad_output, None
    def backward(ctx, grad_output):
        a, b = ctx.saved_tensors
        #pdb.set_trace()
        #raise Exception("TODO: Implement '-' backward")
        # calculate gradient of output w.r.t. each input
        grad_a = np.matmul(grad_output.data, b.T().data)
        grad_b = np.matmul(a.T().data, grad_output.data)

        if b.shape != grad_b.shape:
            grad_b = unbroadcast(grad_b, b)

        if a.shape != grad_a.shape:
            grad_a = unbroadcast(grad_a, a)
        grad_a = tensor.Tensor(grad_a)
        grad_b = tensor.Tensor(grad_b)
        # TODO: Implement more Functions below
        #print('grad_a and grad_b from Matmul :',grad_a.shape,grad_b.shape)
        return grad_a, grad_b
Beispiel #9
0
 def forward(ctx, a):
     if not len(a.shape) == 2:
         raise Exception("Arg for Transpose must be 2D tensor: {}".format(
             a.shape))
     requires_grad = a.requires_grad
     b = tensor.Tensor(a.data.T,
                       requires_grad=requires_grad,
                       is_leaf=not requires_grad)
     return b
Beispiel #10
0
 def backward(ctx, grad_output):
     x, = ctx.saved_tensors
     # calculate gradient of output w.r.t. each input
     grad_x = np.where(x.data >= 0, np.ones(x.data.shape),
                       np.zeros(x.data.shape))
     grad_x = grad_x * grad_output.data
     #grad_x = unbroadcast(grad_x, grad_x.shape[1])
     # the order of gradients returned should match the order of the arguments
     return tensor.Tensor(grad_x)
    def backward(ctx, grad_output):
        a = ctx.saved_tensors[0]

        grad_output = tensor.Tensor(np.exp(a.data) * grad_output.data)
        #print("Root :",grad_output)
        #print("Shape of Root",grad_output.shape)
        # TODO: Implement more Functions below
        #print('grad_output shape in Exp:',grad_output.shape)
        return grad_output, None
    def backward(ctx, grad_output):
        a = ctx.saved_tensors[0]

        grad_output = tensor.Tensor(0.5 * ((a.data)**(-0.5)) *
                                    grad_output.data)
        #print("Root :",grad_output)
        #print("Shape of Root",grad_output.shape)
        # TODO: Implement more Functions below
        return grad_output, None
Beispiel #13
0
    def backward(ctx, grad_output):

        x, weight, output_size, bias, stride = ctx.saved_tensors
        batch_size, in_channel, input_size = x.shape
        out_channel, _, kernel_size = weight.shape

        dz = grad_output.data  #np.asarray([[[1,1],[2,1],[1,2]]]) #np.asarray([[[1,1,1,1],[2,1,2,1],[1,2,1,2]]])  #
        #print('dz:',dz)
        if stride.data > 1:
            dz = upsample(dz, stride.data, input_size, kernel_size)
        dzpad = np.zeros(
            (batch_size, out_channel, (input_size + 2 * (kernel_size - 1))))

        Wflip = weight.data
        for i in range(weight.shape[0]):
            Wflip[i] = np.fliplr(weight.data[i])

        for i in range(batch_size):
            for j in range(out_channel):
                dzpad[i, j, kernel_size - 1:input_size] = dz[i,
                                                             j, :]  #center map

        dy = np.zeros((batch_size, in_channel, input_size))
        for i in range(batch_size):
            for j in range(in_channel):
                for k in range(input_size):
                    segment = dzpad[:, :, k:k + kernel_size]
                    dy[i, j, k] = np.sum(
                        Wflip[:, j, :] *
                        segment[i, :, :])  #np.tensordot(Wflip, segment)

        dw = np.zeros((out_channel, in_channel, kernel_size))
        for i in range(out_channel):
            for j in range(in_channel):
                for k in range(kernel_size):
                    dw[i, j, k] = np.sum(dz[:, i, :] *
                                         x.data[:, j, k:(k + input_size) -
                                                (kernel_size - 1)])

        grad_data = grad_output.data
        #grad_b = (np.sum(grad_data, axis=2)[0]*grad_data.shape[0])
        grad_b = np.sum(grad_data, axis=(0, 2))
        return tensor.Tensor(dy), tensor.Tensor(dw), tensor.Tensor(
            grad_b), None
Beispiel #14
0
    def forward(ctx, a):        
        # Save inputs to access later in backward pass.
        ctx.save_for_backward(a)

        # Create addition output and sets `requires_grad and `is_leaf`
        # (see appendix A for info on those params)
        requires_grad = a.requires_grad
        c = tensor.Tensor(np.maximum(a.data,0), requires_grad=requires_grad,
                                           is_leaf=not requires_grad)
        return c
    def backward(ctx, grad_output):

        a = ctx.saved_tensors[0]
        #print("Shape of a:",a.shape)

        #print("Shape of grad_ouput :",grad_output.shape)
        grad_a = tensor.Tensor(np.ones(a.shape) * grad_output.data)

        #print('grad_output shape in Sum_column:',grad_a.shape)

        return grad_a, None
    def backward(ctx, grad_output):
        a, b = ctx.saved_tensors

        #raise Exception("TODO: Implement '-' backward")

        # calculate gradient of output w.r.t. each input

        grad_a = np.multiply(np.divide(1, b.data), grad_output.data)
        grad_b = -np.multiply(np.multiply(a.data, np.divide(1, b.data**2)),
                              grad_output.data)

        if b.shape != grad_b.shape:
            grad_b = tensor.Tensor(unbroadcast(grad_b, b))

        if a.shape != grad_a.shape:
            grad_a = tensor.Tensor(unbroadcast(grad_a, a))

        # TODO: Implement more Functions below
        #print('grad_a and grad_b from Division :',grad_a.shape,grad_b.shape)
        return grad_a, grad_b
Beispiel #17
0
    def backward(ctx, grad_output):
        # retrieve forward inputs that we stored
        a, b = ctx.saved_tensors
        # calculate gradient of output w.r.t. each input
        """  
        grad_output := dl/dc i.e. gradient of loss w.r.t output (c)
        grad_a := dl/da i.e. gradient of loss w.r.t a
        dl/da = (dl/dc) . (dc/da)
        dc/da = d/da(a + b) ==> 1
        """
        grad_a = np.ones(a.shape) * grad_output.data
        grad_b = np.ones(b.shape) * grad_output.data
        if a.shape != b.shape:
            if a.shape != grad_a.shape:
                grad_a = unbroadcast(grad_a, a.shape)
            if b.shape != grad_b.shape:
                grad_b = unbroadcast(grad_b, b.shape)

        # the order of gradients returned should match the order of the arguments
        return tensor.Tensor(grad_a), tensor.Tensor(grad_b)
Beispiel #18
0
 def forward(ctx, a, b):
     if not (type(a).__name__ == 'Tensor' and type(b).__name__ == 'Tensor'):
         raise Exception("Both args must be Tensors:{}, {}".format(type(a).__name__, type(b).__name__))
     ctx.save_for_backward(a, b)
     requires_grad = a.requires_grad or b.requires_grad
     # 加法运算中只要有一项requires_grad=True
     # 则结果相加结果的requires_grad也为True
     # 开始创建的tensor is_leaf都为True
     c = tensor.Tensor(a.data * b.data, requires_grad=requires_grad,
                       is_leaf=not requires_grad)
     return c
Beispiel #19
0
    def forward(ctx, a, b):
        # Check that inputs are tensors of same shape
        if not (type(a).__name__ == 'Tensor' and type(b).__name__ == 'Tensor') or \
            a.data.shape != b.data.shape:
            raise Exception("Both args must be Tensors: {}, {}".format(type(a).__name__, type(b).__name__))

        ctx.save_for_backward(a, b)
        requires_grad = a.requires_grad or b.requires_grad
        c = tensor.Tensor(a.data - b.data, requires_grad=requires_grad,
                          is_leaf=not requires_grad)
        return c
Beispiel #20
0
    def backward(ctx, grad_output):
        # retrieve forward inputs that we stored
        a = ctx.saved_tensors[0]

        # calculate gradient of output w.r.t. each input
        # dL/da = dout/da * dL/dout      
        grad_relu = grad_output.data * np.where(a.data<0,0,1)

        # the order of gradients returned should match the order of the arguments
        grad_relu = tensor.Tensor(unbroadcast(grad_relu, a.shape))
        return grad_relu
Beispiel #21
0
 def forward(ctx, predicted, target):
     ctx.save_for_backward(predicted, target)
     LogSoftmax = np.log(
         (np.exp(predicted.data)) /
         (np.sum(np.exp(predicted.data), axis=1).reshape(-1, 1)))
     one_hot = to_one_hot(target.data, LogSoftmax.shape[-1])
     N = LogSoftmax.shape[0]
     LLLoss = np.sum(LogSoftmax * one_hot.data) / N
     requires_grad = predicted.requires_grad
     return tensor.Tensor(-LLLoss,
                          requires_grad=requires_grad,
                          is_leaf=not requires_grad)
    def backward(ctx, grad_output):

        # retrieve forward inputs that we stored
        a, b = ctx.saved_tensors

        # calculate gradient of output w.r.t. each input
        grad_a = np.ones(a.shape) * grad_output.data
        grad_b = np.ones(b.shape) * grad_output.data

        if b.shape != grad_b.shape:
            grad_b = unbroadcast(grad_b, b)

        if a.shape != grad_a.shape:
            grad_a = unbroadcast(grad_a, a)
        # the order of gradients returned should match the order of the arguments

        grad_a = tensor.Tensor(grad_a)
        grad_b = tensor.Tensor(grad_b)
        # TODO: Implement more Functions below
        #print('grad_a and grad_b from Add :',grad_a.shape,grad_b.shape)
        return grad_a, grad_b
    def backward(ctx, grad_output):
        a, b = ctx.saved_tensors

        #raise Exception("TODO: Implement '-' backward")

        # calculate gradient of output w.r.t. each input

        grad_a = np.ones(a.shape) * grad_output.data
        grad_b = -1 * np.ones(b.shape) * grad_output.data

        if len(b.shape) == 1:
            grad_b = unbroadcast(grad_b, b)

        if len(a.shape) == 1:
            grad_a = unbroadcast(grad_a, a)
        # TODO: Implement more Functions below
        grad_a = tensor.Tensor(grad_a)
        grad_b = tensor.Tensor(grad_b)
        # TODO: Implement more Functions below
        #print('grad_a and grad_b from Subtract :',grad_a.shape,grad_b.shape)
        return grad_a, grad_b
Beispiel #24
0
 def forward(ctx, a, axis, keepdims):
     if not type(a).__name__ == 'Tensor':
         raise Exception("Only log of tensor is supported")
     ctx.axis = axis
     ctx.shape = a.shape
     if axis is not None:
         ctx.len = a.shape[axis]
     ctx.keepdims = keepdims
     requires_grad = a.requires_grad
     c = tensor.Tensor(a.data.sum(axis = axis, keepdims = keepdims), \
                       requires_grad=requires_grad, is_leaf=not requires_grad)
     return c
Beispiel #25
0
    def backward(ctx, grad_output):
        # retrieve forward inputs that we stored
        z = ctx.saved_tensors

        # calculate gradient of output w.r.t. each input
        # dL/da = dout/da * dL/dout
        
        grad = tensor.Tensor(np.heaviside(z[0].data,0)*grad_output.data)
        
        # the order of gradients returned should match the order of the arguments
        
        return grad,
Beispiel #26
0
    def backward(ctx, grad_output):
        grad_out = grad_output.data

        if (ctx.axis is not None) and (not ctx.keepdims):
            grad_out = np.expand_dims(grad_output.data, axis=ctx.axis)
        else:
            grad_out = grad_output.data.copy()

        grad = np.ones(ctx.shape) * grad_out

        assert grad.shape == ctx.shape
        # Take note that gradient tensors SHOULD NEVER have requires_grad = True.
        return tensor.Tensor(grad), None, None
Beispiel #27
0
    def backward(ctx, grad_output):
        # retrieve forward inputs that we stored
        x = ctx.saved_tensors

        # calculate gradient of output w.r.t. each input
        # dL/da = dout/da * dL/dout
        # grad_a =   np.matmul(grad_output.data,np.transpose(1/x[0].data))
        # dL/db = dout/db * dL/dout
        grad_a =   grad_output.data * 1/x[0].data

        grad_a = tensor.Tensor(grad_a)
        # the order of gradients returned should match the order of the arguments
        return grad_a, 
Beispiel #28
0
    def forward(ctx, a, b):
        # Check that inputs are tensors of same shape
        if not (type(a).__name__ == 'Tensor' and type(b).__name__ == 'Tensor') or \
            a.data.shape != b.data.shape:
            pass
        # Save inputs to access later in backward pass.
        ctx.save_for_backward(a, b)

        # Create addition output and sets `requires_grad and `is_leaf`
        # (see appendix A for info on those params)
        requires_grad = a.requires_grad or b.requires_grad
        c = tensor.Tensor(a.data / b.data, requires_grad=requires_grad,
                                           is_leaf=not requires_grad)
        return c
Beispiel #29
0
 def forward(ctx, x, indices):
     '''
     Args:
         x (tensor): Tensor object that we need to slice
         indices (int,list,Slice): This is the key passed to the __getitem__ function of the Tensor object when it is sliced using [ ] notation.
     '''
     # raise NotImplementedError('Implemented Slice.forward')
     ctx.x = x
     ctx.indices = indices
     requires_grad = x.requires_grad
     sliced = x.data[indices]
     return tensor.Tensor(sliced,
                          requires_grad=requires_grad,
                          is_leaf=not requires_grad)
Beispiel #30
0
    def forward(ctx, x, p):

        if not (type(x).__name__ == 'Tensor'):
            raise Exception("Args must be Tensors: {}".format(
                type(x).__name__))

        ctx.save_for_backward(x, p)
        result = np.float_power(x.data, p.data)
        requires_grad = x.requires_grad
        z = tensor.Tensor(result,
                          requires_grad=requires_grad,
                          is_leaf=not requires_grad)

        return z