def _generate_file(self, ext: str): # generate cubin/ptx by calling nvcc global _test_cache_dir nvcc = cupy.cuda.get_nvcc_path() # split() is needed because nvcc could come from the env var NVCC cmd = nvcc.split() arch = '-gencode=arch=compute_{cc},code=sm_{cc}'.format( cc=compiler._get_arch()) source = '{}/test_load_cubin.cu'.format(_test_cache_dir) file_path = _test_cache_dir + 'test_load_cubin' with open(source, 'w') as f: f.write(_test_source5) if ext == 'cubin': file_path += '.cubin' flag = '-cubin' elif ext == 'ptx': file_path += '.ptx' flag = '-ptx' else: raise ValueError cmd += [arch, flag, source, '-o', file_path] compiler._run_nvcc(cmd, _test_cache_dir) return file_path
def _generate_file(self, ext: str): # generate cubin/ptx by calling nvcc/hipcc if not cupy.cuda.runtime.is_hip: cc = cupy.cuda.get_nvcc_path() arch = '-gencode=arch=compute_{CC},code=sm_{CC}'.format( CC=compiler._get_arch()) code = _test_source5 else: # TODO(leofang): expose get_hipcc_path() to cupy.cuda? cc = cupy._environment.get_hipcc_path() arch = '-v' # dummy code = compiler._convert_to_hip_source(_test_source5, None, False) # split() is needed because nvcc could come from the env var NVCC cmd = cc.split() source = '{}/test_load_cubin.cu'.format(self.cache_dir) file_path = self.cache_dir + 'test_load_cubin' with open(source, 'w') as f: f.write(code) if ext == 'cubin': file_path += '.cubin' flag = '-cubin' elif ext == 'ptx': file_path += '.ptx' flag = '-ptx' elif ext == 'hsaco': file_path += '.hsaco' flag = '--genco' else: raise ValueError cmd += [arch, flag, source, '-o', file_path] cc = 'nvcc' if not cupy.cuda.runtime.is_hip else 'hipcc' compiler._run_cc(cmd, self.cache_dir, cc) return file_path
def _check_get_arch(self, device_cc, expected_arch): with mock.patch('cupy.cuda.device.Device') as device_class: device_class.return_value.compute_capability = device_cc assert compiler._get_arch() == expected_arch