def backward(ctx, grad_output): a, b = ctx.saved_tensors #raise Exception("TODO: Implement '-' backward") # calculate gradient of output w.r.t. each input temp_a = (1. / b.data) grad_a = temp_a * grad_output.data temp_b = -(a.data / (b.data**2)) grad_b = temp_b * grad_output.data if a.shape != b.shape: if a.shape != grad_a.shape: grad_a = unbroadcast(grad_a, a.shape) if b.shape != grad_b.shape: grad_b = unbroadcast(grad_b, b.shape) # the order of gradients returned should match the order of the arguments return tensor.Tensor(grad_a), tensor.Tensor(grad_b)
def forward(ctx, a, shape): if not type(a).__name__ == 'Tensor': raise Exception("Arg for Reshape must be tensor: {}".format(type(a).__name__)) ctx.shape = a.shape requires_grad = a.requires_grad c = tensor.Tensor(a.data.reshape(shape), requires_grad=requires_grad, is_leaf=not requires_grad) return c
def backward(ctx, grad_output): predicted, target = ctx.saved_tensors Softmax = ((np.exp(predicted.data)) / (np.sum(np.exp(predicted.data), axis=1).reshape(-1, 1))) one_hot = to_one_hot(target.data, Softmax.shape[-1]) grad_predicted = ((Softmax - one_hot.data) * grad_output.data) / predicted.data.shape[0] return (tensor.Tensor(grad_predicted), )
def backward(ctx, grad_output): # TODO: Finish Conv1d backward pass. It's surprisingly similar to the forward pass. x,weight,stride = ctx.saved_tensors batch_size, in_channel, input_size = x.shape out_channel, _, kernel_size = weight.shape stride = stride.data.item() output_size = grad_output.shape[-1] #Or grad_w = np.zeros_like(weight) #since gradient is not transposed in pytorch contrary to math grad_w = np.zeros(shape=(out_channel,in_channel,kernel_size)) grad_y = np.zeros(shape=(batch_size,in_channel,input_size)) grad_b = np.zeros(shape=(out_channel,)) #Grad_b grad_b = np.einsum('ijk->j',grad_output.data) ##grad_w #upsample new_shape = (grad_output.shape[2]-1) * stride + 1 out_z = np.zeros((grad_output.shape[0],grad_output.shape[1],new_shape)) out_z[:,:,::stride] = grad_output.data for t in range(input_size-out_z.shape[2]+1): if t < grad_w.shape[2]: seg_b = x.data[:,:,t:t+out_z.shape[2]] grad_w[:,:,t] = np.einsum('ijk,ilk->lj',seg_b,out_z) ### grad_x #For strided z from y|upsample new_k = (grad_output.shape[2]-1) * stride + 1 out = np.zeros((grad_output.shape[0],grad_output.shape[1],new_k)) out[:,:,::stride] = grad_output.data #Padding to restore kernel_size - 1 # but not for stride greater than 1, but we upsample and behave stride as 1 npad = ((0, 0), (0, 0), (kernel_size-1, kernel_size-1)) out_2 = np.pad(out,pad_width=npad, mode='constant', constant_values=0) flip_weight = np.flip(weight.data,axis=2) for idx,t in enumerate(range(out_2.shape[2]-kernel_size+1)): seg_a = out_2[:,:,t:t+kernel_size] grad_y[:,:,t] = np.einsum('ijk,jlk->il',seg_a,flip_weight) return tensor.Tensor(grad_y,requires_grad=True,is_leaf=True), tensor.Tensor(grad_w,requires_grad=True,is_leaf=True) \ ,tensor.Tensor(grad_b,requires_grad=True,is_leaf=True)
def backward(ctx, grad_output): a = ctx.saved_tensors[0] grad_output = tensor.Tensor(2 * a.data * grad_output.data) #print("Shape of Power :",grad_output.shape) # TODO: Implement more Functions below #print("Power :",grad_output) return grad_output, None
def forward(ctx, a): if not type(a).__name__ == 'Tensor': raise Exception("Arg for Log must be tensor: {}".format(type(a).__name__)) ctx.save_for_backward(a) requires_grad = a.requires_grad c = tensor.Tensor(np.log(a.data), requires_grad=requires_grad, is_leaf=not requires_grad) return c
def backward(ctx, grad_output): a = ctx.saved_tensors zero_check = np.zeros((1)) grad_output = np.multiply( tensor.Tensor(np.where(a[0].data <= zero_check, 0, 1)), grad_output) #print("ReLU :",grad_output) # TODO: Implement more Functions below return grad_output, None
def backward(ctx, grad_output): a, b = ctx.saved_tensors #pdb.set_trace() #raise Exception("TODO: Implement '-' backward") # calculate gradient of output w.r.t. each input grad_a = np.matmul(grad_output.data, b.T().data) grad_b = np.matmul(a.T().data, grad_output.data) if b.shape != grad_b.shape: grad_b = unbroadcast(grad_b, b) if a.shape != grad_a.shape: grad_a = unbroadcast(grad_a, a) grad_a = tensor.Tensor(grad_a) grad_b = tensor.Tensor(grad_b) # TODO: Implement more Functions below #print('grad_a and grad_b from Matmul :',grad_a.shape,grad_b.shape) return grad_a, grad_b
def forward(ctx, a): if not len(a.shape) == 2: raise Exception("Arg for Transpose must be 2D tensor: {}".format( a.shape)) requires_grad = a.requires_grad b = tensor.Tensor(a.data.T, requires_grad=requires_grad, is_leaf=not requires_grad) return b
def backward(ctx, grad_output): x, = ctx.saved_tensors # calculate gradient of output w.r.t. each input grad_x = np.where(x.data >= 0, np.ones(x.data.shape), np.zeros(x.data.shape)) grad_x = grad_x * grad_output.data #grad_x = unbroadcast(grad_x, grad_x.shape[1]) # the order of gradients returned should match the order of the arguments return tensor.Tensor(grad_x)
def backward(ctx, grad_output): a = ctx.saved_tensors[0] grad_output = tensor.Tensor(np.exp(a.data) * grad_output.data) #print("Root :",grad_output) #print("Shape of Root",grad_output.shape) # TODO: Implement more Functions below #print('grad_output shape in Exp:',grad_output.shape) return grad_output, None
def backward(ctx, grad_output): a = ctx.saved_tensors[0] grad_output = tensor.Tensor(0.5 * ((a.data)**(-0.5)) * grad_output.data) #print("Root :",grad_output) #print("Shape of Root",grad_output.shape) # TODO: Implement more Functions below return grad_output, None
def backward(ctx, grad_output): x, weight, output_size, bias, stride = ctx.saved_tensors batch_size, in_channel, input_size = x.shape out_channel, _, kernel_size = weight.shape dz = grad_output.data #np.asarray([[[1,1],[2,1],[1,2]]]) #np.asarray([[[1,1,1,1],[2,1,2,1],[1,2,1,2]]]) # #print('dz:',dz) if stride.data > 1: dz = upsample(dz, stride.data, input_size, kernel_size) dzpad = np.zeros( (batch_size, out_channel, (input_size + 2 * (kernel_size - 1)))) Wflip = weight.data for i in range(weight.shape[0]): Wflip[i] = np.fliplr(weight.data[i]) for i in range(batch_size): for j in range(out_channel): dzpad[i, j, kernel_size - 1:input_size] = dz[i, j, :] #center map dy = np.zeros((batch_size, in_channel, input_size)) for i in range(batch_size): for j in range(in_channel): for k in range(input_size): segment = dzpad[:, :, k:k + kernel_size] dy[i, j, k] = np.sum( Wflip[:, j, :] * segment[i, :, :]) #np.tensordot(Wflip, segment) dw = np.zeros((out_channel, in_channel, kernel_size)) for i in range(out_channel): for j in range(in_channel): for k in range(kernel_size): dw[i, j, k] = np.sum(dz[:, i, :] * x.data[:, j, k:(k + input_size) - (kernel_size - 1)]) grad_data = grad_output.data #grad_b = (np.sum(grad_data, axis=2)[0]*grad_data.shape[0]) grad_b = np.sum(grad_data, axis=(0, 2)) return tensor.Tensor(dy), tensor.Tensor(dw), tensor.Tensor( grad_b), None
def forward(ctx, a): # Save inputs to access later in backward pass. ctx.save_for_backward(a) # Create addition output and sets `requires_grad and `is_leaf` # (see appendix A for info on those params) requires_grad = a.requires_grad c = tensor.Tensor(np.maximum(a.data,0), requires_grad=requires_grad, is_leaf=not requires_grad) return c
def backward(ctx, grad_output): a = ctx.saved_tensors[0] #print("Shape of a:",a.shape) #print("Shape of grad_ouput :",grad_output.shape) grad_a = tensor.Tensor(np.ones(a.shape) * grad_output.data) #print('grad_output shape in Sum_column:',grad_a.shape) return grad_a, None
def backward(ctx, grad_output): a, b = ctx.saved_tensors #raise Exception("TODO: Implement '-' backward") # calculate gradient of output w.r.t. each input grad_a = np.multiply(np.divide(1, b.data), grad_output.data) grad_b = -np.multiply(np.multiply(a.data, np.divide(1, b.data**2)), grad_output.data) if b.shape != grad_b.shape: grad_b = tensor.Tensor(unbroadcast(grad_b, b)) if a.shape != grad_a.shape: grad_a = tensor.Tensor(unbroadcast(grad_a, a)) # TODO: Implement more Functions below #print('grad_a and grad_b from Division :',grad_a.shape,grad_b.shape) return grad_a, grad_b
def backward(ctx, grad_output): # retrieve forward inputs that we stored a, b = ctx.saved_tensors # calculate gradient of output w.r.t. each input """ grad_output := dl/dc i.e. gradient of loss w.r.t output (c) grad_a := dl/da i.e. gradient of loss w.r.t a dl/da = (dl/dc) . (dc/da) dc/da = d/da(a + b) ==> 1 """ grad_a = np.ones(a.shape) * grad_output.data grad_b = np.ones(b.shape) * grad_output.data if a.shape != b.shape: if a.shape != grad_a.shape: grad_a = unbroadcast(grad_a, a.shape) if b.shape != grad_b.shape: grad_b = unbroadcast(grad_b, b.shape) # the order of gradients returned should match the order of the arguments return tensor.Tensor(grad_a), tensor.Tensor(grad_b)
def forward(ctx, a, b): if not (type(a).__name__ == 'Tensor' and type(b).__name__ == 'Tensor'): raise Exception("Both args must be Tensors:{}, {}".format(type(a).__name__, type(b).__name__)) ctx.save_for_backward(a, b) requires_grad = a.requires_grad or b.requires_grad # 加法运算中只要有一项requires_grad=True # 则结果相加结果的requires_grad也为True # 开始创建的tensor is_leaf都为True c = tensor.Tensor(a.data * b.data, requires_grad=requires_grad, is_leaf=not requires_grad) return c
def forward(ctx, a, b): # Check that inputs are tensors of same shape if not (type(a).__name__ == 'Tensor' and type(b).__name__ == 'Tensor') or \ a.data.shape != b.data.shape: raise Exception("Both args must be Tensors: {}, {}".format(type(a).__name__, type(b).__name__)) ctx.save_for_backward(a, b) requires_grad = a.requires_grad or b.requires_grad c = tensor.Tensor(a.data - b.data, requires_grad=requires_grad, is_leaf=not requires_grad) return c
def backward(ctx, grad_output): # retrieve forward inputs that we stored a = ctx.saved_tensors[0] # calculate gradient of output w.r.t. each input # dL/da = dout/da * dL/dout grad_relu = grad_output.data * np.where(a.data<0,0,1) # the order of gradients returned should match the order of the arguments grad_relu = tensor.Tensor(unbroadcast(grad_relu, a.shape)) return grad_relu
def forward(ctx, predicted, target): ctx.save_for_backward(predicted, target) LogSoftmax = np.log( (np.exp(predicted.data)) / (np.sum(np.exp(predicted.data), axis=1).reshape(-1, 1))) one_hot = to_one_hot(target.data, LogSoftmax.shape[-1]) N = LogSoftmax.shape[0] LLLoss = np.sum(LogSoftmax * one_hot.data) / N requires_grad = predicted.requires_grad return tensor.Tensor(-LLLoss, requires_grad=requires_grad, is_leaf=not requires_grad)
def backward(ctx, grad_output): # retrieve forward inputs that we stored a, b = ctx.saved_tensors # calculate gradient of output w.r.t. each input grad_a = np.ones(a.shape) * grad_output.data grad_b = np.ones(b.shape) * grad_output.data if b.shape != grad_b.shape: grad_b = unbroadcast(grad_b, b) if a.shape != grad_a.shape: grad_a = unbroadcast(grad_a, a) # the order of gradients returned should match the order of the arguments grad_a = tensor.Tensor(grad_a) grad_b = tensor.Tensor(grad_b) # TODO: Implement more Functions below #print('grad_a and grad_b from Add :',grad_a.shape,grad_b.shape) return grad_a, grad_b
def backward(ctx, grad_output): a, b = ctx.saved_tensors #raise Exception("TODO: Implement '-' backward") # calculate gradient of output w.r.t. each input grad_a = np.ones(a.shape) * grad_output.data grad_b = -1 * np.ones(b.shape) * grad_output.data if len(b.shape) == 1: grad_b = unbroadcast(grad_b, b) if len(a.shape) == 1: grad_a = unbroadcast(grad_a, a) # TODO: Implement more Functions below grad_a = tensor.Tensor(grad_a) grad_b = tensor.Tensor(grad_b) # TODO: Implement more Functions below #print('grad_a and grad_b from Subtract :',grad_a.shape,grad_b.shape) return grad_a, grad_b
def forward(ctx, a, axis, keepdims): if not type(a).__name__ == 'Tensor': raise Exception("Only log of tensor is supported") ctx.axis = axis ctx.shape = a.shape if axis is not None: ctx.len = a.shape[axis] ctx.keepdims = keepdims requires_grad = a.requires_grad c = tensor.Tensor(a.data.sum(axis = axis, keepdims = keepdims), \ requires_grad=requires_grad, is_leaf=not requires_grad) return c
def backward(ctx, grad_output): # retrieve forward inputs that we stored z = ctx.saved_tensors # calculate gradient of output w.r.t. each input # dL/da = dout/da * dL/dout grad = tensor.Tensor(np.heaviside(z[0].data,0)*grad_output.data) # the order of gradients returned should match the order of the arguments return grad,
def backward(ctx, grad_output): grad_out = grad_output.data if (ctx.axis is not None) and (not ctx.keepdims): grad_out = np.expand_dims(grad_output.data, axis=ctx.axis) else: grad_out = grad_output.data.copy() grad = np.ones(ctx.shape) * grad_out assert grad.shape == ctx.shape # Take note that gradient tensors SHOULD NEVER have requires_grad = True. return tensor.Tensor(grad), None, None
def backward(ctx, grad_output): # retrieve forward inputs that we stored x = ctx.saved_tensors # calculate gradient of output w.r.t. each input # dL/da = dout/da * dL/dout # grad_a = np.matmul(grad_output.data,np.transpose(1/x[0].data)) # dL/db = dout/db * dL/dout grad_a = grad_output.data * 1/x[0].data grad_a = tensor.Tensor(grad_a) # the order of gradients returned should match the order of the arguments return grad_a,
def forward(ctx, a, b): # Check that inputs are tensors of same shape if not (type(a).__name__ == 'Tensor' and type(b).__name__ == 'Tensor') or \ a.data.shape != b.data.shape: pass # Save inputs to access later in backward pass. ctx.save_for_backward(a, b) # Create addition output and sets `requires_grad and `is_leaf` # (see appendix A for info on those params) requires_grad = a.requires_grad or b.requires_grad c = tensor.Tensor(a.data / b.data, requires_grad=requires_grad, is_leaf=not requires_grad) return c
def forward(ctx, x, indices): ''' Args: x (tensor): Tensor object that we need to slice indices (int,list,Slice): This is the key passed to the __getitem__ function of the Tensor object when it is sliced using [ ] notation. ''' # raise NotImplementedError('Implemented Slice.forward') ctx.x = x ctx.indices = indices requires_grad = x.requires_grad sliced = x.data[indices] return tensor.Tensor(sliced, requires_grad=requires_grad, is_leaf=not requires_grad)
def forward(ctx, x, p): if not (type(x).__name__ == 'Tensor'): raise Exception("Args must be Tensors: {}".format( type(x).__name__)) ctx.save_for_backward(x, p) result = np.float_power(x.data, p.data) requires_grad = x.requires_grad z = tensor.Tensor(result, requires_grad=requires_grad, is_leaf=not requires_grad) return z