def createHsaTopology(options): topology_dir = joinpath(m5.options.outdir, \ 'fs/sys/devices/virtual/kfd/kfd/topology') remake_dir(topology_dir) # Ripped from real Kaveri platform to appease kmt version checks # Set up generation_id file_append((topology_dir, 'generation_id'), 1) # Set up system properties sys_prop = 'platform_oem 2314885673410447169\n' + \ 'platform_id 35322352389441\n' + \ 'platform_rev 1\n' file_append((topology_dir, 'system_properties'), sys_prop) # Populate the topology tree # TODO: Just the bare minimum to pass for now node_dir = joinpath(topology_dir, 'nodes/0') remake_dir(node_dir) # must show valid kaveri gpu id or massive meltdown file_append((node_dir, 'gpu_id'), 2765) # must have marketing name file_append((node_dir, 'name'), 'Carrizo\n') # populate global node properties # NOTE: SIMD count triggers a valid GPU agent creation node_prop = 'cpu_cores_count %s\n' % options.num_cpus + \ 'simd_count %s\n' \ % (options.num_compute_units * options.simds_per_cu) + \ 'mem_banks_count 0\n' + \ 'caches_count 0\n' + \ 'io_links_count 0\n' + \ 'cpu_core_id_base 16\n' + \ 'simd_id_base 2147483648\n' + \ 'max_waves_per_simd %s\n' % options.wfs_per_simd + \ 'lds_size_in_kb %s\n' % int(options.lds_size / 1024) + \ 'gds_size_in_kb 0\n' + \ 'wave_front_size %s\n' % options.wf_size + \ 'array_count 1\n' + \ 'simd_arrays_per_engine %s\n' % options.sa_per_complex + \ 'cu_per_simd_array %s\n' % options.cu_per_sa + \ 'simd_per_cu %s\n' % options.simds_per_cu + \ 'max_slots_scratch_cu 32\n' + \ 'vendor_id 4098\n' + \ 'device_id 39028\n' + \ 'location_id 8\n' + \ 'max_engine_clk_fcompute %s\n' \ % int(toFrequency(options.gpu_clock) / 1e6) + \ 'local_mem_size 0\n' + \ 'fw_version 699\n' + \ 'capability 4738\n' + \ 'max_engine_clk_ccompute %s\n' \ % int(toFrequency(options.CPUClock) / 1e6) file_append((node_dir, 'properties'), node_prop)
def createCarrizoTopology(options): topology_dir = joinpath(m5.options.outdir, \ 'fs/sys/devices/virtual/kfd/kfd/topology') remake_dir(topology_dir) # Ripped from real Kaveri platform to appease kmt version checks # Set up generation_id file_append((topology_dir, 'generation_id'), 1) # Set up system properties sys_prop = 'platform_oem 2314885673410447169\n' + \ 'platform_id 35322352389441\n' + \ 'platform_rev 1\n' file_append((topology_dir, 'system_properties'), sys_prop) # Populate the topology tree # TODO: Just the bare minimum to pass for now node_dir = joinpath(topology_dir, 'nodes/0') remake_dir(node_dir) # must show valid kaveri gpu id or massive meltdown file_append((node_dir, 'gpu_id'), 2765) gfx_dict = { "gfx801": { "name": "Carrizo\n", "id": 39028 }, "gfx902": { "name": "Raven\n", "id": 5597 } } # must have marketing name file_append((node_dir, 'name'), gfx_dict[options.gfx_version]["name"]) mem_banks_cnt = 1 # Should be the same as the render driver filename (dri/renderD<drm_num>) drm_num = 128 device_id = gfx_dict[options.gfx_version]["id"] # populate global node properties # NOTE: SIMD count triggers a valid GPU agent creation node_prop = 'cpu_cores_count %s\n' % options.num_cpus + \ 'simd_count %s\n' \ % (options.num_compute_units * options.simds_per_cu) + \ 'mem_banks_count %s\n' % mem_banks_cnt + \ 'caches_count 0\n' + \ 'io_links_count 0\n' + \ 'cpu_core_id_base 16\n' + \ 'simd_id_base 2147483648\n' + \ 'max_waves_per_simd %s\n' % options.wfs_per_simd + \ 'lds_size_in_kb %s\n' % int(options.lds_size / 1024) + \ 'gds_size_in_kb 0\n' + \ 'wave_front_size %s\n' % options.wf_size + \ 'array_count 1\n' + \ 'simd_arrays_per_engine %s\n' % options.sa_per_complex + \ 'cu_per_simd_array %s\n' % options.cu_per_sa + \ 'simd_per_cu %s\n' % options.simds_per_cu + \ 'max_slots_scratch_cu 32\n' + \ 'vendor_id 4098\n' + \ 'device_id %s\n' % device_id + \ 'location_id 8\n' + \ 'drm_render_minor %s\n' % drm_num + \ 'max_engine_clk_fcompute %s\n' \ % int(toFrequency(options.gpu_clock) / 1e6) + \ 'local_mem_size 0\n' + \ 'fw_version 699\n' + \ 'capability 4738\n' + \ 'max_engine_clk_ccompute %s\n' \ % int(toFrequency(options.CPUClock) / 1e6) file_append((node_dir, 'properties'), node_prop) for i in range(mem_banks_cnt): mem_dir = joinpath(node_dir, f'mem_banks/{i}') remake_dir(mem_dir) # Heap type value taken from real system, heap type values: # https://github.com/RadeonOpenCompute/ROCT-Thunk-Interface/blob/roc-4.0.x/include/hsakmttypes.h#L317 mem_prop = f'heap_type 0\n' + \ f'size_in_bytes {toMemorySize(options.mem_size)}' + \ f'flags 0\n' + \ f'width 64\n' + \ f'mem_clk_max 1600\n' file_append((mem_dir, 'properties'), mem_prop)
def createFijiTopology(options): topology_dir = joinpath(m5.options.outdir, \ 'fs/sys/devices/virtual/kfd/kfd/topology') remake_dir(topology_dir) amdgpu_dir = joinpath(m5.options.outdir, \ 'fs/sys/module/amdgpu/parameters') remake_dir(amdgpu_dir) # Fiji reported VM size in GB. Used to reserve an allocation from CPU # to implement SVM (i.e. GPUVM64 pointers and X86 pointers agree) file_append((amdgpu_dir, 'vm_size'), 256) # Ripped from real Fiji platform to appease KMT version checks file_append((topology_dir, 'generation_id'), 2) # Set up system properties. Regiter as ast-rocm server sys_prop = 'platform_oem 35498446626881\n' + \ 'platform_id 71791775140929\n' + \ 'platform_rev 2\n' file_append((topology_dir, 'system_properties'), sys_prop) # Populate the topology tree # Our dGPU system is two nodes. Node 0 is a CPU and Node 1 is a dGPU node_dir = joinpath(topology_dir, 'nodes/0') remake_dir(node_dir) # Register as a CPU file_append((node_dir, 'gpu_id'), 0) file_append((node_dir, 'name'), '') # CPU links. Only thing that matters is we tell the runtime that GPU is # connected through PCIe to CPU socket 0. io_links = 1 io_dir = joinpath(node_dir, 'io_links/0') remake_dir(io_dir) io_prop = 'type 2\n' + \ 'version_major 0\n' + \ 'version_minor 0\n' + \ 'node_from 0\n' + \ 'node_to 1\n' + \ 'weight 20\n' + \ 'min_latency 0\n' + \ 'max_latency 0\n' + \ 'min_bandwidth 0\n' + \ 'max_bandwidth 0\n' + \ 'recommended_transfer_size 0\n' + \ 'flags 13\n' file_append((io_dir, 'properties'), io_prop) # Populate CPU node properties node_prop = 'cpu_cores_count %s\n' % options.num_cpus + \ 'simd_count 0\n' + \ 'mem_banks_count 1\n' + \ 'caches_count 0\n' + \ 'io_links_count %s\n' % io_links + \ 'cpu_core_id_base 0\n' + \ 'simd_id_base 0\n' + \ 'max_waves_per_simd 0\n' + \ 'lds_size_in_kb 0\n' + \ 'gds_size_in_kb 0\n' + \ 'wave_front_size 64\n' + \ 'array_count 0\n' + \ 'simd_arrays_per_engine 0\n' + \ 'cu_per_simd_array 0\n' + \ 'simd_per_cu 0\n' + \ 'max_slots_scratch_cu 0\n' + \ 'vendor_id 0\n' + \ 'device_id 0\n' + \ 'location_id 0\n' + \ 'drm_render_minor 0\n' + \ 'max_engine_clk_ccompute 3400\n' file_append((node_dir, 'properties'), node_prop) # CPU memory reporting mem_dir = joinpath(node_dir, 'mem_banks/0') remake_dir(mem_dir) # Heap type value taken from real system, heap type values: # https://github.com/RadeonOpenCompute/ROCT-Thunk-Interface/blob/roc-4.0.x/include/hsakmttypes.h#L317 mem_prop = 'heap_type 0\n' + \ 'size_in_bytes 33704329216\n' + \ 'flags 0\n' + \ 'width 72\n' + \ 'mem_clk_max 2400\n' file_append((mem_dir, 'properties'), mem_prop) # Build the GPU node node_dir = joinpath(topology_dir, 'nodes/1') remake_dir(node_dir) # Register as a Fiji file_append((node_dir, 'gpu_id'), 50156) file_append((node_dir, 'name'), 'Fiji\n') # Should be the same as the render driver filename (dri/renderD<drm_num>) drm_num = 128 # Real Fiji shows 96, but building that topology is complex and doesn't # appear to be required for anything. caches = 0 # GPU links. Only thing that matters is we tell the runtime that GPU is # connected through PCIe to CPU socket 0. io_links = 1 io_dir = joinpath(node_dir, 'io_links/0') remake_dir(io_dir) io_prop = 'type 2\n' + \ 'version_major 0\n' + \ 'version_minor 0\n' + \ 'node_from 1\n' + \ 'node_to 0\n' + \ 'weight 20\n' + \ 'min_latency 0\n' + \ 'max_latency 0\n' + \ 'min_bandwidth 0\n' + \ 'max_bandwidth 0\n' + \ 'recommended_transfer_size 0\n' + \ 'flags 1\n' file_append((io_dir, 'properties'), io_prop) # Populate GPU node properties node_prop = 'cpu_cores_count 0\n' + \ 'simd_count %s\n' \ % (options.num_compute_units * options.simds_per_cu) + \ 'mem_banks_count 1\n' + \ 'caches_count %s\n' % caches + \ 'io_links_count %s\n' % io_links + \ 'cpu_core_id_base 0\n' + \ 'simd_id_base 2147487744\n' + \ 'max_waves_per_simd %s\n' % options.wfs_per_simd + \ 'lds_size_in_kb %s\n' % int(options.lds_size / 1024) + \ 'gds_size_in_kb 0\n' + \ 'wave_front_size %s\n' % options.wf_size + \ 'array_count 4\n' + \ 'simd_arrays_per_engine %s\n' % options.sa_per_complex + \ 'cu_per_simd_array %s\n' % options.cu_per_sa + \ 'simd_per_cu %s\n' % options.simds_per_cu + \ 'max_slots_scratch_cu 32\n' + \ 'vendor_id 4098\n' + \ 'device_id 29440\n' + \ 'location_id 512\n' + \ 'drm_render_minor %s\n' % drm_num + \ 'max_engine_clk_fcompute %s\n' \ % int(toFrequency(options.gpu_clock) / 1e6) + \ 'local_mem_size 4294967296\n' + \ 'fw_version 730\n' + \ 'capability 4736\n' + \ 'max_engine_clk_ccompute %s\n' \ % int(toFrequency(options.CPUClock) / 1e6) file_append((node_dir, 'properties'), node_prop) # Fiji HBM reporting # TODO: Extract size, clk, and width from sim paramters mem_dir = joinpath(node_dir, 'mem_banks/0') remake_dir(mem_dir) # Heap type value taken from real system, heap type values: # https://github.com/RadeonOpenCompute/ROCT-Thunk-Interface/blob/roc-4.0.x/include/hsakmttypes.h#L317 mem_prop = 'heap_type 1\n' + \ 'size_in_bytes 4294967296\n' + \ 'flags 0\n' + \ 'width 4096\n' + \ 'mem_clk_max 500\n' file_append((mem_dir, 'properties'), mem_prop)