def tvm_callback_cuda_compile(code): ptx = nvcc.compile_cuda(code, target='ptx', arch='sm_70', options=[ '--maxrregcount', '128', '-I /home/tusimple/Desktop/tvm_ir_test' ]) return ptx
def tvm_callback_cuda_compile(code): ptx = nvcc.compile_cuda(code, target='ptx', arch='sm_70', options=[ '--maxrregcount', '128', '-I /root/zhoujq/gemmf16/wmma_gemm_f16/include' ]) return ptx
def tvm_callback_cuda_compile(code): """use nvcc to generate ptx code for better optimization""" curr_cuda_target_arch = AutotvmGlobalScope.current.cuda_target_arch # e.g., target arch could be [ # "-gencode", "arch=compute_52,code=sm_52", # "-gencode", "arch=compute_70,code=sm_70" # ] target = "fatbin" if isinstance(curr_cuda_target_arch, list) else "ptx" ptx = nvcc.compile_cuda(code, target=target, arch=AutotvmGlobalScope.current.cuda_target_arch) return ptx
def tvm_callback_cuda_compile(code): ptx = nvcc.compile_cuda(code, target="ptx") return ptx
def tvm_callback_cuda_compile(code): """Use nvcc compiler for better perf.""" ptx = nvcc.compile_cuda(code, target="ptx") return ptx
def tvm_callback_cuda_compile(code): ptx = nvcc.compile_cuda(code, target="ptx", options=["-arch=sm_37"]) return ptx
def tvm_callback_cuda_compile(code): """Use nvcc compiler for better perf.""" ptx = nvcc.compile_cuda(code, target="ptx", arch='sm_52') # use old arch for this to work on old GPUs return ptx
def tvm_callback_cuda_compile(code): # pylint: disable=unused-variable ptx = nvcc.compile_cuda(code) return ptx
def tvm_callback_cuda_compile(code): ptx = nvcc.compile_cuda(code, target="ptx", options=["-arch=sm_37" ]) # 37 for k80(ec2 instance) return ptx
def tvm_callback_cuda_compile(code): """Use nvcc compiler for better perf.""" ptx = nvcc.compile_cuda(code, target="ptx", options=["-arch=sm_52"]) return ptx