Exemple #1
0
def createHsaTopology(options):
    topology_dir = joinpath(m5.options.outdir, \
        'fs/sys/devices/virtual/kfd/kfd/topology')
    remake_dir(topology_dir)

    # Ripped from real Kaveri platform to appease kmt version checks
    # Set up generation_id
    file_append((topology_dir, 'generation_id'), 1)

    # Set up system properties
    sys_prop = 'platform_oem 2314885673410447169\n' + \
               'platform_id 35322352389441\n'       + \
               'platform_rev 1\n'
    file_append((topology_dir, 'system_properties'), sys_prop)

    # Populate the topology tree
    # TODO: Just the bare minimum to pass for now
    node_dir = joinpath(topology_dir, 'nodes/0')
    remake_dir(node_dir)

    # must show valid kaveri gpu id or massive meltdown
    file_append((node_dir, 'gpu_id'), 2765)

    # must have marketing name
    file_append((node_dir, 'name'), 'Carrizo\n')

    # populate global node properties
    # NOTE: SIMD count triggers a valid GPU agent creation
    node_prop = 'cpu_cores_count %s\n' % options.num_cpus                   + \
                'simd_count %s\n'                                             \
                    % (options.num_compute_units * options.simds_per_cu)    + \
                'mem_banks_count 0\n'                                       + \
                'caches_count 0\n'                                          + \
                'io_links_count 0\n'                                        + \
                'cpu_core_id_base 16\n'                                     + \
                'simd_id_base 2147483648\n'                                 + \
                'max_waves_per_simd %s\n' % options.wfs_per_simd            + \
                'lds_size_in_kb %s\n' % int(options.lds_size / 1024)        + \
                'gds_size_in_kb 0\n'                                        + \
                'wave_front_size %s\n' % options.wf_size                    + \
                'array_count 1\n'                                           + \
                'simd_arrays_per_engine %s\n' % options.sa_per_complex      + \
                'cu_per_simd_array %s\n' % options.cu_per_sa                + \
                'simd_per_cu %s\n' % options.simds_per_cu                   + \
                'max_slots_scratch_cu 32\n'                                 + \
                'vendor_id 4098\n'                                          + \
                'device_id 39028\n'                                         + \
                'location_id 8\n'                                           + \
                'max_engine_clk_fcompute %s\n'                                \
                    % int(toFrequency(options.gpu_clock) / 1e6)             + \
                'local_mem_size 0\n'                                        + \
                'fw_version 699\n'                                          + \
                'capability 4738\n'                                         + \
                'max_engine_clk_ccompute %s\n'                                \
                    % int(toFrequency(options.CPUClock) / 1e6)

    file_append((node_dir, 'properties'), node_prop)
Exemple #2
0
def createCarrizoTopology(options):
    topology_dir = joinpath(m5.options.outdir, \
        'fs/sys/devices/virtual/kfd/kfd/topology')
    remake_dir(topology_dir)

    # Ripped from real Kaveri platform to appease kmt version checks
    # Set up generation_id
    file_append((topology_dir, 'generation_id'), 1)

    # Set up system properties
    sys_prop = 'platform_oem 2314885673410447169\n' + \
               'platform_id 35322352389441\n'       + \
               'platform_rev 1\n'
    file_append((topology_dir, 'system_properties'), sys_prop)

    # Populate the topology tree
    # TODO: Just the bare minimum to pass for now
    node_dir = joinpath(topology_dir, 'nodes/0')
    remake_dir(node_dir)

    # must show valid kaveri gpu id or massive meltdown
    file_append((node_dir, 'gpu_id'), 2765)

    gfx_dict = {
        "gfx801": {
            "name": "Carrizo\n",
            "id": 39028
        },
        "gfx902": {
            "name": "Raven\n",
            "id": 5597
        }
    }

    # must have marketing name
    file_append((node_dir, 'name'), gfx_dict[options.gfx_version]["name"])

    mem_banks_cnt = 1

    # Should be the same as the render driver filename (dri/renderD<drm_num>)
    drm_num = 128

    device_id = gfx_dict[options.gfx_version]["id"]

    # populate global node properties
    # NOTE: SIMD count triggers a valid GPU agent creation
    node_prop = 'cpu_cores_count %s\n' % options.num_cpus                   + \
                'simd_count %s\n'                                             \
                    % (options.num_compute_units * options.simds_per_cu)    + \
                'mem_banks_count %s\n' % mem_banks_cnt                      + \
                'caches_count 0\n'                                          + \
                'io_links_count 0\n'                                        + \
                'cpu_core_id_base 16\n'                                     + \
                'simd_id_base 2147483648\n'                                 + \
                'max_waves_per_simd %s\n' % options.wfs_per_simd            + \
                'lds_size_in_kb %s\n' % int(options.lds_size / 1024)        + \
                'gds_size_in_kb 0\n'                                        + \
                'wave_front_size %s\n' % options.wf_size                    + \
                'array_count 1\n'                                           + \
                'simd_arrays_per_engine %s\n' % options.sa_per_complex      + \
                'cu_per_simd_array %s\n' % options.cu_per_sa                + \
                'simd_per_cu %s\n' % options.simds_per_cu                   + \
                'max_slots_scratch_cu 32\n'                                 + \
                'vendor_id 4098\n'                                          + \
                'device_id %s\n' % device_id                                + \
                'location_id 8\n'                                           + \
                'drm_render_minor %s\n' % drm_num                           + \
                'max_engine_clk_fcompute %s\n'                                \
                    % int(toFrequency(options.gpu_clock) / 1e6)             + \
                'local_mem_size 0\n'                                        + \
                'fw_version 699\n'                                          + \
                'capability 4738\n'                                         + \
                'max_engine_clk_ccompute %s\n'                                \
                    % int(toFrequency(options.CPUClock) / 1e6)

    file_append((node_dir, 'properties'), node_prop)

    for i in range(mem_banks_cnt):
        mem_dir = joinpath(node_dir, f'mem_banks/{i}')
        remake_dir(mem_dir)

        # Heap type value taken from real system, heap type values:
        # https://github.com/RadeonOpenCompute/ROCT-Thunk-Interface/blob/roc-4.0.x/include/hsakmttypes.h#L317
        mem_prop = f'heap_type 0\n'                                         + \
                   f'size_in_bytes {toMemorySize(options.mem_size)}'        + \
                   f'flags 0\n'                                             + \
                   f'width 64\n'                                            + \
                   f'mem_clk_max 1600\n'
        file_append((mem_dir, 'properties'), mem_prop)
Exemple #3
0
def createFijiTopology(options):
    topology_dir = joinpath(m5.options.outdir, \
        'fs/sys/devices/virtual/kfd/kfd/topology')
    remake_dir(topology_dir)

    amdgpu_dir = joinpath(m5.options.outdir, \
        'fs/sys/module/amdgpu/parameters')
    remake_dir(amdgpu_dir)

    # Fiji reported VM size in GB.  Used to reserve an allocation from CPU
    # to implement SVM (i.e. GPUVM64 pointers and X86 pointers agree)
    file_append((amdgpu_dir, 'vm_size'), 256)

    # Ripped from real Fiji platform to appease KMT version checks
    file_append((topology_dir, 'generation_id'), 2)

    # Set up system properties.  Regiter as ast-rocm server
    sys_prop = 'platform_oem 35498446626881\n' + \
               'platform_id 71791775140929\n' + \
               'platform_rev 2\n'
    file_append((topology_dir, 'system_properties'), sys_prop)

    # Populate the topology tree
    # Our dGPU system is two nodes.  Node 0 is a CPU and Node 1 is a dGPU
    node_dir = joinpath(topology_dir, 'nodes/0')
    remake_dir(node_dir)

    # Register as a CPU
    file_append((node_dir, 'gpu_id'), 0)
    file_append((node_dir, 'name'), '')

    # CPU links.  Only thing that matters is we tell the runtime that GPU is
    # connected through PCIe to CPU socket 0.
    io_links = 1
    io_dir = joinpath(node_dir, 'io_links/0')
    remake_dir(io_dir)
    io_prop = 'type 2\n'                                    + \
              'version_major 0\n'                           + \
              'version_minor 0\n'                           + \
              'node_from 0\n'                               + \
              'node_to 1\n'                                 + \
              'weight 20\n'                                 + \
              'min_latency 0\n'                             + \
              'max_latency 0\n'                             + \
              'min_bandwidth 0\n'                           + \
              'max_bandwidth 0\n'                           + \
              'recommended_transfer_size 0\n'               + \
              'flags 13\n'
    file_append((io_dir, 'properties'), io_prop)

    # Populate CPU node properties
    node_prop = 'cpu_cores_count %s\n' % options.num_cpus   + \
                'simd_count 0\n'                            + \
                'mem_banks_count 1\n'                       + \
                'caches_count 0\n'                          + \
                'io_links_count %s\n' % io_links            + \
                'cpu_core_id_base 0\n'                      + \
                'simd_id_base 0\n'                          + \
                'max_waves_per_simd 0\n'                    + \
                'lds_size_in_kb 0\n'                        + \
                'gds_size_in_kb 0\n'                        + \
                'wave_front_size 64\n'                      + \
                'array_count 0\n'                           + \
                'simd_arrays_per_engine 0\n'                + \
                'cu_per_simd_array 0\n'                     + \
                'simd_per_cu 0\n'                           + \
                'max_slots_scratch_cu 0\n'                  + \
                'vendor_id 0\n'                             + \
                'device_id 0\n'                             + \
                'location_id 0\n'                           + \
                'drm_render_minor 0\n'                      + \
                'max_engine_clk_ccompute 3400\n'

    file_append((node_dir, 'properties'), node_prop)

    # CPU memory reporting
    mem_dir = joinpath(node_dir, 'mem_banks/0')
    remake_dir(mem_dir)
    # Heap type value taken from real system, heap type values:
    # https://github.com/RadeonOpenCompute/ROCT-Thunk-Interface/blob/roc-4.0.x/include/hsakmttypes.h#L317
    mem_prop = 'heap_type 0\n'                              + \
               'size_in_bytes 33704329216\n'                + \
               'flags 0\n'                                  + \
               'width 72\n'                                 + \
               'mem_clk_max 2400\n'

    file_append((mem_dir, 'properties'), mem_prop)

    # Build the GPU node
    node_dir = joinpath(topology_dir, 'nodes/1')
    remake_dir(node_dir)

    # Register as a Fiji
    file_append((node_dir, 'gpu_id'), 50156)
    file_append((node_dir, 'name'), 'Fiji\n')

    # Should be the same as the render driver filename (dri/renderD<drm_num>)
    drm_num = 128

    # Real Fiji shows 96, but building that topology is complex and doesn't
    # appear to be required for anything.
    caches = 0

    # GPU links.  Only thing that matters is we tell the runtime that GPU is
    # connected through PCIe to CPU socket 0.
    io_links = 1
    io_dir = joinpath(node_dir, 'io_links/0')
    remake_dir(io_dir)
    io_prop = 'type 2\n'                                    + \
              'version_major 0\n'                           + \
              'version_minor 0\n'                           + \
              'node_from 1\n'                               + \
              'node_to 0\n'                                 + \
              'weight 20\n'                                 + \
              'min_latency 0\n'                             + \
              'max_latency 0\n'                             + \
              'min_bandwidth 0\n'                           + \
              'max_bandwidth 0\n'                           + \
              'recommended_transfer_size 0\n'               + \
              'flags 1\n'
    file_append((io_dir, 'properties'), io_prop)

    # Populate GPU node properties
    node_prop = 'cpu_cores_count 0\n'                                       + \
                'simd_count %s\n'                                             \
                    % (options.num_compute_units * options.simds_per_cu)    + \
                'mem_banks_count 1\n'                                       + \
                'caches_count %s\n' % caches                                + \
                'io_links_count %s\n' % io_links                            + \
                'cpu_core_id_base 0\n'                                      + \
                'simd_id_base 2147487744\n'                                 + \
                'max_waves_per_simd %s\n' % options.wfs_per_simd            + \
                'lds_size_in_kb %s\n' % int(options.lds_size / 1024)        + \
                'gds_size_in_kb 0\n'                                        + \
                'wave_front_size %s\n' % options.wf_size                    + \
                'array_count 4\n'                           + \
                'simd_arrays_per_engine %s\n' % options.sa_per_complex      + \
                'cu_per_simd_array %s\n' % options.cu_per_sa                + \
                'simd_per_cu %s\n' % options.simds_per_cu                   + \
                'max_slots_scratch_cu 32\n'                                 + \
                'vendor_id 4098\n'                                          + \
                'device_id 29440\n'                                         + \
                'location_id 512\n'                                         + \
                'drm_render_minor %s\n' % drm_num                           + \
                'max_engine_clk_fcompute %s\n'                                \
                    % int(toFrequency(options.gpu_clock) / 1e6)             + \
                'local_mem_size 4294967296\n'                               + \
                'fw_version 730\n'                                          + \
                'capability 4736\n'                                         + \
                'max_engine_clk_ccompute %s\n'                                \
                    % int(toFrequency(options.CPUClock) / 1e6)

    file_append((node_dir, 'properties'), node_prop)

    # Fiji HBM reporting
    # TODO: Extract size, clk, and width from sim paramters
    mem_dir = joinpath(node_dir, 'mem_banks/0')
    remake_dir(mem_dir)
    # Heap type value taken from real system, heap type values:
    # https://github.com/RadeonOpenCompute/ROCT-Thunk-Interface/blob/roc-4.0.x/include/hsakmttypes.h#L317
    mem_prop = 'heap_type 1\n'                              + \
               'size_in_bytes 4294967296\n'                 + \
               'flags 0\n'                                  + \
               'width 4096\n'                               + \
               'mem_clk_max 500\n'

    file_append((mem_dir, 'properties'), mem_prop)