def compile(self): if self.ptx is None: program = _NVRTCProgram(kernel.encode(), "recurrent_forget_mult.cu".encode()) GPUForgetMult.ptx = program.compile() if torch.cuda.current_device() not in GPUForgetMult.configured_gpus: m = function.Module() m.load(bytes(self.ptx.encode())) self.forget_mult = m.get_function("recurrent_forget_mult") self.bwd_forget_mult = m.get_function("bwd_recurrent_forget_mult") Stream = namedtuple("Stream", ["ptr"]) self.stream = Stream(ptr=torch.cuda.current_stream().cuda_stream) GPUForgetMult.configured_gpus[torch.cuda.current_device()] = ( self.forget_mult, self.bwd_forget_mult, self.stream, ) ( self.forget_mult, self.bwd_forget_mult, self.stream, ) = GPUForgetMult.configured_gpus[torch.cuda.current_device()]
def compile(self): if self.ptx is None: program = _NVRTCProgram(kernel.encode(), 'recurrent_forget_mult.cu'.encode()) GPUForgetMult.ptx = program.compile() if torch.cuda.current_device() not in GPUForgetMult.configured_gpus: m = function.Module() m.load(bytes(self.ptx.encode())) self.forget_mult = m.get_function('recurrent_forget_mult') self.bwd_forget_mult = m.get_function('bwd_recurrent_forget_mult') Stream = namedtuple('Stream', ['ptr']) self.stream = Stream(ptr=torch.cuda.current_stream().cuda_stream) GPUForgetMult.configured_gpus[torch.cuda.current_device()] = (self.forget_mult, self.bwd_forget_mult, self.stream) self.forget_mult, self.bwd_forget_mult, self.stream = GPUForgetMult.configured_gpus[torch.cuda.current_device()]