def forward(self, input1, requires_grad=False): assert (input1.is_contiguous()) # assert(input2.is_contiguous()) # self.input1 = input1.contiguous() # need to use in the backward process, so we need to cache it fillhole = 1 if requires_grad == False else 0 # if input1.is_cuda: # self.device = torch.cuda.current_device() # else: # self.device = -1 # count = torch.zeros(input1.size(0),1,input1.size(2),input1.size(3)) # for accumulating the homography projections # output = torch.zeros(input1.size()) if input1.is_cuda: # output = output.cuda() # count = count.cuda() if self.count is None: #print(input1.type()) if isinstance(input1, torch.cuda.FloatTensor): self.count = torch.cuda.FloatTensor().resize_( input1.size(0), 1, input1.size(2), input1.size(3)).zero_() self.output = torch.cuda.FloatTensor().resize_( input1.size()).zero_() else: self.count = torch.cuda.HalfTensor().resize_( input1.size(0), 1, input1.size(2), input1.size(3)).zero_() self.output = torch.cuda.HalfTensor().resize_( input1.size()).zero_() err = my_lib.FlowProjectionLayer_gpu_forward( input1, self.count, self.output, fillhole) else: output = torch.cuda.FloatTensor(input1.data.size()) err = my_lib.FlowProjectionLayer_cpu_forward( input1, self.count, output, fillhole) if err != 0: print(err) # output = output/count # to divide the counter #self.save_for_backward(input1, self.count) #self.fillhole = fillhole # self.count = count #to keep this # print(self.input1[0, 0, :10, :10]) # print(self.count[0, 0, :10, :10]) # print(self.input1[0, 0, -10:, -10:]) # print(self.count[0, 0, -10:, -10:]) # the function returns the output to its caller return self.output
def forward(ctx, input1, requires_grad): assert(input1.is_contiguous()) fillhole = 1 if requires_grad == False else 0 if input1.is_cuda : count = torch.cuda.FloatTensor().resize_(input1.size(0), 1, input1.size(2), input1.size(3)).zero_() output = torch.cuda.FloatTensor().resize_(input1.size()).zero_() err = my_lib.FlowProjectionLayer_gpu_forward(input1, count, output, fillhole) else: output = torch.cuda.FloatTensor(input1.data.size()) err = my_lib.FlowProjectionLayer_cpu_forward(input1, count, output, fillhole) if err != 0: print(err) ctx.save_for_backward(input1, count) ctx.fillhole = fillhole return output