Esempio n. 1
0
#suppress_brian2_logs()
# Uncomment this to get brian2cuda logs
# (Set log_level_diagnostic() for DynamicConfigCreator diagnostic messages)
BrianLogger.log_level_debug()


# The configuration classes are defined in
# `brian2cuda/tests/features/cuda_configuration.py`
configurations = [
    # C++
    CPPStandaloneConfigurationSinglePrecision,
    CPPStandaloneConfigurationOpenMPMaxThreadsSinglePrecision,

    # Brian2CUDA
    ## number of partitions equal number of SMs on GPU (108 on A100 GPU)
    DynamicConfigCreator('CUDA standalone (single precision, max blocks, atomics)',
                         prefs={'core.default_float_dtype': float32}),

    ## 1 partition
    DynamicConfigCreator('CUDA standalone (single precision, 1 block, atomics)',
                         prefs={'core.default_float_dtype': float32,
                                'devices.cuda_standalone.parallel_blocks': 1}),

    ## 64 partitions
    DynamicConfigCreator('CUDA standalone (single precision, 64 blocks, atomics)',
                         prefs={'core.default_float_dtype': float32,
                                'devices.cuda_standalone.parallel_blocks': 64}),

]

# The `benchmark` classes are defined in brian2cuda/tests/features/speed.py. The
# `n_slice` parameter indexes the `n_range` class attribute of the respective benchmark
    #WeaveConfiguration,
    #LocalConfiguration,
    #    CPPStandaloneConfiguration,
    #    #CPPStandaloneConfigurationSinglePrecision,
    #    CPPStandaloneConfigurationOpenMPMaxThreads,
    #    #CPPStandaloneConfigurationOpenMPMaxThreadsSinglePrecision,
    #
    #    # max blocks
    #    DynamicConfigCreator('CUDA standalone (max blocks, atomics)'),
    #
    #    #DynamicConfigCreator('CUDA standalone (single precision, max blocks, atomics)',
    #    #                     prefs={'core.default_float_dtype': float32}),
    #
    #
    # 1 block
    DynamicConfigCreator('CUDA standalone (1 block, atomics)',
                         prefs={'devices.cuda_standalone.parallel_blocks': 1}),

    #DynamicConfigCreator('CUDA standalone (single precision, 1 block, atomics)',
    #                     prefs={'core.default_float_dtype': float32,
    #                            'devices.cuda_standalone.parallel_blocks': 1}),

    ## 20 blocks
    #DynamicConfigCreator('CUDA standalone (20 blocks, atomics)',
    #                     prefs={'devices.cuda_standalone.parallel_blocks': 20}),

    #DynamicConfigCreator('CUDA standalone (single precision, 20 blocks, atomics)',
    #                     prefs={'core.default_float_dtype': float32,
    #                            'devices.cuda_standalone.parallel_blocks': 20}),

    ## 40 blocks
    #DynamicConfigCreator('CUDA standalone (40 blocks, atomics)',
    #(WeaveConfiguration,                     None),
    #(LocalConfiguration,                     None),

    #(DynamicConfigCreator('CUDA standalone'),
    # 'cuda_standalone'),

    #(DynamicConfigCreator('CUDA standalone bundles',
    #                      git_commit='nemo_bundles'),
    # 'cuda_standalone'),

    #(DynamicConfigCreator("CUDA standalone (profile='blocking')",
    #                      set_device_kwargs={'profile': 'blocking'}),
    # 'cuda_standalone'),
    (CUDAStandaloneConfiguration, 'cuda_standalone'),
    (DynamicConfigCreator(
        "CUDA standalone (no bundles)",
        prefs={'devices.cuda_standalone.push_synapse_bundles':
               False}), 'cuda_standalone'),
    (DynamicConfigCreator("CUDA standalone (no atomics)",
                          prefs={'codegen.generators.cuda.use_atomics':
                                 False}), 'cuda_standalone'),
    (DynamicConfigCreator("CUDA standalone (1 post block)",
                          prefs={'devices.cuda_standalone.parallel_blocks':
                                 1}), 'cuda_standalone'),

    #(DynamicConfigCreator("CUDA standalone (no atomics, no bundles)",
    #                      prefs={'codegen.generators.cuda.use_atomics': False,
    #                             'devices.cuda_standalone.push_synapse_bundles': False}),
    # 'cuda_standalone'),

    #(DynamicConfigCreator("CUDA standalone (no atomics, 1 post block)",
    #                      prefs={'codegen.generators.cuda.use_atomics': False,
Esempio n. 4
0
    additional_dir_name = '_' + sys.argv[1]
else:
    additional_dir_name = ''

prefs['devices.cpp_standalone.extra_make_args_unix'] = ['-j12']

# host specific settings
if socket.gethostname() == 'elnath':
    prefs['devices.cpp_standalone.extra_make_args_unix'] = ['-j24']
    prefs['codegen.cuda.extra_compile_args_nvcc'].remove('-arch=sm_35')
    prefs['codegen.cuda.extra_compile_args_nvcc'].extend(['-arch=sm_20'])

configs = [  # configuration                          project_directory
    (NumpyConfiguration, None), (WeaveConfiguration, None),
    (LocalConfiguration, None),
    (DynamicConfigCreator('CUDA standalone'), 'cuda_standalone'),
    (DynamicConfigCreator('CUDA standalone bundles',
                          git_commit='nemo_bundles'), 'cuda_standalone'),
    (DynamicConfigCreator("CUDA standalone (profile='blocking')",
                          set_device_kwargs={'profile':
                                             'blocking'}), 'cuda_standalone'),
    (DynamicConfigCreator("CUDA standalone with 2 blocks per SM",
                          prefs={'devices.cuda_standalone.SM_multiplier':
                                 2}), 'cuda_standalone'),
    (CUDAStandaloneConfiguration, 'cuda_standalone'),
    (CUDAStandaloneConfigurationExtraThresholdKernel, 'cuda_standalone'),
    (CUDAStandaloneConfigurationNoAssert, 'cuda_standalone'),
    (CUDAStandaloneConfigurationCurandDouble, 'cuda_standalone'),
    (CUDAStandaloneConfigurationNoCudaOccupancyAPI, 'cuda_standalone'),
    (CUDAStandaloneConfigurationNoCudaOccupancyAPIProfileCPU,
     'cuda_standalone'),
Esempio n. 5
0
    #(WeaveConfiguration,                     None),
    #(LocalConfiguration,                     None),
    (CUDAStandaloneConfiguration, 'cuda_standalone'),

    #(DynamicConfigCreator('CUDA standalone'),
    # 'cuda_standalone'),

    #(DynamicConfigCreator('CUDA standalone bundles',
    #                      git_commit='nemo_bundles'),
    # 'cuda_standalone'),

    #(DynamicConfigCreator("CUDA standalone (profile='blocking')",
    #                      set_device_kwargs={'profile': 'blocking'}),
    # 'cuda_standalone'),
    (DynamicConfigCreator("CUDA standalone single precision",
                          prefs={'core.default_float_dtype':
                                 float32}), 'cuda_standalone'),
    (DynamicConfigCreator("CUDA standalone 1 post block",
                          prefs={'devices.cuda_standalone.parallel_blocks':
                                 1}), 'cuda_standalone'),

    #(CUDAStandaloneConfigurationExtraThresholdKernel,             'cuda_standalone'),
    #(CUDAStandaloneConfigurationNoAssert,             'cuda_standalone'),
    #(CUDAStandaloneConfigurationNoCudaOccupancyAPI,      'cuda_standalone'),
    #(CUDAStandaloneConfigurationNoCudaOccupancyAPIProfileCPU,    'cuda_standalone'),
    #(CUDAStandaloneConfiguration2BlocksPerSM, 'cuda_standalone'),
    #(CUDAStandaloneConfiguration2BlocksPerSMLaunchBounds, 'cuda_standalone'),
    #(CUDAStandaloneConfigurationSynLaunchBounds,     'cuda_standalone'),
    #(CUDAStandaloneConfiguration2BlocksPerSMSynLaunchBounds, 'cuda_standalone'),
    #(CUDAStandaloneConfigurationProfileGPU,   'cuda_standalone'),
    #(CUDAStandaloneConfigurationProfileCPU,   'cuda_standalone'),
Esempio n. 6
0
]

# Brian2CUDA configurations
for partitions in [1, 16, 32, 48, 64, 80, 96, 'max']:
    partition_pref = partitions
    if partitions == 1:
        block_name = '1 block'
    elif partitions == 'max':
        block_name = f'max blocks'
        partition_pref = None
    else:
        block_name = f'{partitions} blocks'

    config = DynamicConfigCreator(
        f'CUDA standalone (single precision, {block_name}, atomics)',
        prefs={
            'core.default_float_dtype': float32,
            'devices.cuda_standalone.parallel_blocks': partition_pref
        })
    configurations.append(config)

# The `benchmark` classes are defined in brian2cuda/tests/features/speed.py. The
# `n_slice` parameter indexes the `n_range` class attribute of the respective benchmark
# class to determine the network sizes for which this benchmark should be run.
speed_tests = [  # benchmark                                      n_slice
    # XXX: Only run largest network size: slice(-1, None)
    # LIF benchmark with heterogeneous delays
    (BrunelHakimHeterogDelays, slice(-1, None)),
    # STDP benchmark with heterogeneous delays
    (STDPCUDARandomConnectivityHeterogeneousDelays, slice(-1, None)),
]
Esempio n. 7
0
    prefs['devices.cpp_standalone.extra_make_args_unix'] = ['-j24']
    prefs['codegen.cuda.extra_compile_args_nvcc'].remove('-arch=sm_35')
    prefs['codegen.cuda.extra_compile_args_nvcc'].extend(['-arch=sm_20'])

configs = [  # configuration                          project_directory
    #(NumpyConfiguration,                     None),
    #(WeaveConfiguration,                     None),
    #(LocalConfiguration,                     None),
    (CPPStandaloneConfiguration, 'cpp_standalone'),
    (CPPStandaloneConfigurationSinglePrecision, 'cpp_standalone'),
    (CPPStandaloneConfigurationOpenMPMaxThreads, 'cpp_standalone'),
    (CPPStandaloneConfigurationOpenMPMaxThreadsSinglePrecision,
     'cpp_standalone'),

    # max blocks
    (DynamicConfigCreator('CUDA standalone (max blocks, atomics)'),
     'cuda_standalone'),
    (DynamicConfigCreator(
        'CUDA standalone (single precision, max blocks, atomics)',
        prefs={'core.default_float_dtype': float32}), 'cuda_standalone'),

    # 1 block
    (DynamicConfigCreator('CUDA standalone (1 block, atomics)',
                          prefs={'devices.cuda_standalone.parallel_blocks':
                                 1}), 'cuda_standalone'),
    (DynamicConfigCreator(
        'CUDA standalone (single precision, 1 block, atomics)',
        prefs={
            'core.default_float_dtype': float32,
            'devices.cuda_standalone.parallel_blocks': 1
        }), 'cuda_standalone'),
    #(WeaveConfiguration,                     None),
    #(LocalConfiguration,                     None),
    #(CUDAStandaloneConfiguration,             'cuda_standalone'),

    #(DynamicConfigCreator('CUDA standalone'),
    # 'cuda_standalone'),

    #(DynamicConfigCreator('CUDA standalone bundles',
    #                      git_commit='nemo_bundles'),
    # 'cuda_standalone'),

    #(DynamicConfigCreator("CUDA standalone (profile='blocking')",
    #                      set_device_kwargs={'profile': 'blocking'}),
    # 'cuda_standalone'),
    (DynamicConfigCreator(
        "CUDA standalone (TITAN Xp, Pascal)",
        prefs={'devices.cpp_standalone.run_environment_variables':
               gpu_0_env}), 'cuda_standalone'),

    #(DynamicConfigCreator("CUDA standalone (GeForce GTX TITAN X, Maxwell)",
    #                      prefs={'devices.cpp_standalone.run_environment_variables': gpu_1_env}),
    # 'cuda_standalone'),
    (DynamicConfigCreator(
        "CUDA standalone single precision (TITAN Xp, Pascal)",
        prefs={
            'core.default_float_dtype': float32,
            'devices.cpp_standalone.run_environment_variables': gpu_0_env
        }), 'cuda_standalone'),

    #(DynamicConfigCreator("CUDA standalone single precicion (GeForce GTX TITAN X, Maxwell)",
    #                      prefs={'core.default_float_dtype': float32,
    #                             'devices.cpp_standalone.run_environment_variables': gpu_1_env}),