def bind_buffers(self): """ Binds GPU addresses of buffers to the kernel parameters. When kernels and initial parameters are generated, tensors have not yet been allocated so a placeholder is used for the memory addresses. This must be called before the first kernel run to bind the tensor addresses in GPU memory to the kernel parameters. """ for index in range(len(self.params)): if isinstance(self.params[index], TensorDescription): self.params[index] = pointer_from_td(self.params[index]) super(ElementWiseKernel, self).bind_buffers()
def bind_buffers(self): """ Gets allocated tensors for input and output feature maps. Allocates a scratch tensor for argmax indices if the op is max pooling since this is required for bprop. Builds a final list of parameters to pass to the kernel. """ for k in self.kernels: for index in range(len(k[1])): if isinstance(k[1][index], TensorDescription): k[1][index] = pointer_from_td(k[1][index]) super(LUTBpropKernel, self).bind_buffers()
def bind_buffers(self): """ Get allocated GPU tensor for output and potentially source value """ if isinstance(self.tensor, TensorDescription): self.tensor = self.tensor.value if isinstance(self.value, TensorDescription): self.value = self.value.value.tensor if self.kernel is not None: for index in range(len(self.params)): if isinstance(self.params[index], TensorDescription): self.params[index] = pointer_from_td(self.params[index]) super(SetItemKernel, self).bind_buffers()
def bind_buffers(self): """ Get allocated GPU tensor for source and dest """ if isinstance(self.out_sliced, TensorDescription): self.out_sliced = self.out_sliced.value.tensor if isinstance(self.x, TensorDescription): self.x = self.x.value.tensor if isinstance(self.out, TensorDescription): self.out = self.out.value.tensor if self.kernel is not None: for index in range(len(self.params)): if isinstance(self.params[index], TensorDescription): self.params[index] = pointer_from_td(self.params[index]) super(UnsliceKernel, self).bind_buffers()