#suppress_brian2_logs() # Uncomment this to get brian2cuda logs # (Set log_level_diagnostic() for DynamicConfigCreator diagnostic messages) BrianLogger.log_level_debug() # The configuration classes are defined in # `brian2cuda/tests/features/cuda_configuration.py` configurations = [ # C++ CPPStandaloneConfigurationSinglePrecision, CPPStandaloneConfigurationOpenMPMaxThreadsSinglePrecision, # Brian2CUDA ## number of partitions equal number of SMs on GPU (108 on A100 GPU) DynamicConfigCreator('CUDA standalone (single precision, max blocks, atomics)', prefs={'core.default_float_dtype': float32}), ## 1 partition DynamicConfigCreator('CUDA standalone (single precision, 1 block, atomics)', prefs={'core.default_float_dtype': float32, 'devices.cuda_standalone.parallel_blocks': 1}), ## 64 partitions DynamicConfigCreator('CUDA standalone (single precision, 64 blocks, atomics)', prefs={'core.default_float_dtype': float32, 'devices.cuda_standalone.parallel_blocks': 64}), ] # The `benchmark` classes are defined in brian2cuda/tests/features/speed.py. The # `n_slice` parameter indexes the `n_range` class attribute of the respective benchmark
#WeaveConfiguration, #LocalConfiguration, # CPPStandaloneConfiguration, # #CPPStandaloneConfigurationSinglePrecision, # CPPStandaloneConfigurationOpenMPMaxThreads, # #CPPStandaloneConfigurationOpenMPMaxThreadsSinglePrecision, # # # max blocks # DynamicConfigCreator('CUDA standalone (max blocks, atomics)'), # # #DynamicConfigCreator('CUDA standalone (single precision, max blocks, atomics)', # # prefs={'core.default_float_dtype': float32}), # # # 1 block DynamicConfigCreator('CUDA standalone (1 block, atomics)', prefs={'devices.cuda_standalone.parallel_blocks': 1}), #DynamicConfigCreator('CUDA standalone (single precision, 1 block, atomics)', # prefs={'core.default_float_dtype': float32, # 'devices.cuda_standalone.parallel_blocks': 1}), ## 20 blocks #DynamicConfigCreator('CUDA standalone (20 blocks, atomics)', # prefs={'devices.cuda_standalone.parallel_blocks': 20}), #DynamicConfigCreator('CUDA standalone (single precision, 20 blocks, atomics)', # prefs={'core.default_float_dtype': float32, # 'devices.cuda_standalone.parallel_blocks': 20}), ## 40 blocks #DynamicConfigCreator('CUDA standalone (40 blocks, atomics)',
#(WeaveConfiguration, None), #(LocalConfiguration, None), #(DynamicConfigCreator('CUDA standalone'), # 'cuda_standalone'), #(DynamicConfigCreator('CUDA standalone bundles', # git_commit='nemo_bundles'), # 'cuda_standalone'), #(DynamicConfigCreator("CUDA standalone (profile='blocking')", # set_device_kwargs={'profile': 'blocking'}), # 'cuda_standalone'), (CUDAStandaloneConfiguration, 'cuda_standalone'), (DynamicConfigCreator( "CUDA standalone (no bundles)", prefs={'devices.cuda_standalone.push_synapse_bundles': False}), 'cuda_standalone'), (DynamicConfigCreator("CUDA standalone (no atomics)", prefs={'codegen.generators.cuda.use_atomics': False}), 'cuda_standalone'), (DynamicConfigCreator("CUDA standalone (1 post block)", prefs={'devices.cuda_standalone.parallel_blocks': 1}), 'cuda_standalone'), #(DynamicConfigCreator("CUDA standalone (no atomics, no bundles)", # prefs={'codegen.generators.cuda.use_atomics': False, # 'devices.cuda_standalone.push_synapse_bundles': False}), # 'cuda_standalone'), #(DynamicConfigCreator("CUDA standalone (no atomics, 1 post block)", # prefs={'codegen.generators.cuda.use_atomics': False,
additional_dir_name = '_' + sys.argv[1] else: additional_dir_name = '' prefs['devices.cpp_standalone.extra_make_args_unix'] = ['-j12'] # host specific settings if socket.gethostname() == 'elnath': prefs['devices.cpp_standalone.extra_make_args_unix'] = ['-j24'] prefs['codegen.cuda.extra_compile_args_nvcc'].remove('-arch=sm_35') prefs['codegen.cuda.extra_compile_args_nvcc'].extend(['-arch=sm_20']) configs = [ # configuration project_directory (NumpyConfiguration, None), (WeaveConfiguration, None), (LocalConfiguration, None), (DynamicConfigCreator('CUDA standalone'), 'cuda_standalone'), (DynamicConfigCreator('CUDA standalone bundles', git_commit='nemo_bundles'), 'cuda_standalone'), (DynamicConfigCreator("CUDA standalone (profile='blocking')", set_device_kwargs={'profile': 'blocking'}), 'cuda_standalone'), (DynamicConfigCreator("CUDA standalone with 2 blocks per SM", prefs={'devices.cuda_standalone.SM_multiplier': 2}), 'cuda_standalone'), (CUDAStandaloneConfiguration, 'cuda_standalone'), (CUDAStandaloneConfigurationExtraThresholdKernel, 'cuda_standalone'), (CUDAStandaloneConfigurationNoAssert, 'cuda_standalone'), (CUDAStandaloneConfigurationCurandDouble, 'cuda_standalone'), (CUDAStandaloneConfigurationNoCudaOccupancyAPI, 'cuda_standalone'), (CUDAStandaloneConfigurationNoCudaOccupancyAPIProfileCPU, 'cuda_standalone'),
#(WeaveConfiguration, None), #(LocalConfiguration, None), (CUDAStandaloneConfiguration, 'cuda_standalone'), #(DynamicConfigCreator('CUDA standalone'), # 'cuda_standalone'), #(DynamicConfigCreator('CUDA standalone bundles', # git_commit='nemo_bundles'), # 'cuda_standalone'), #(DynamicConfigCreator("CUDA standalone (profile='blocking')", # set_device_kwargs={'profile': 'blocking'}), # 'cuda_standalone'), (DynamicConfigCreator("CUDA standalone single precision", prefs={'core.default_float_dtype': float32}), 'cuda_standalone'), (DynamicConfigCreator("CUDA standalone 1 post block", prefs={'devices.cuda_standalone.parallel_blocks': 1}), 'cuda_standalone'), #(CUDAStandaloneConfigurationExtraThresholdKernel, 'cuda_standalone'), #(CUDAStandaloneConfigurationNoAssert, 'cuda_standalone'), #(CUDAStandaloneConfigurationNoCudaOccupancyAPI, 'cuda_standalone'), #(CUDAStandaloneConfigurationNoCudaOccupancyAPIProfileCPU, 'cuda_standalone'), #(CUDAStandaloneConfiguration2BlocksPerSM, 'cuda_standalone'), #(CUDAStandaloneConfiguration2BlocksPerSMLaunchBounds, 'cuda_standalone'), #(CUDAStandaloneConfigurationSynLaunchBounds, 'cuda_standalone'), #(CUDAStandaloneConfiguration2BlocksPerSMSynLaunchBounds, 'cuda_standalone'), #(CUDAStandaloneConfigurationProfileGPU, 'cuda_standalone'), #(CUDAStandaloneConfigurationProfileCPU, 'cuda_standalone'),
] # Brian2CUDA configurations for partitions in [1, 16, 32, 48, 64, 80, 96, 'max']: partition_pref = partitions if partitions == 1: block_name = '1 block' elif partitions == 'max': block_name = f'max blocks' partition_pref = None else: block_name = f'{partitions} blocks' config = DynamicConfigCreator( f'CUDA standalone (single precision, {block_name}, atomics)', prefs={ 'core.default_float_dtype': float32, 'devices.cuda_standalone.parallel_blocks': partition_pref }) configurations.append(config) # The `benchmark` classes are defined in brian2cuda/tests/features/speed.py. The # `n_slice` parameter indexes the `n_range` class attribute of the respective benchmark # class to determine the network sizes for which this benchmark should be run. speed_tests = [ # benchmark n_slice # XXX: Only run largest network size: slice(-1, None) # LIF benchmark with heterogeneous delays (BrunelHakimHeterogDelays, slice(-1, None)), # STDP benchmark with heterogeneous delays (STDPCUDARandomConnectivityHeterogeneousDelays, slice(-1, None)), ]
prefs['devices.cpp_standalone.extra_make_args_unix'] = ['-j24'] prefs['codegen.cuda.extra_compile_args_nvcc'].remove('-arch=sm_35') prefs['codegen.cuda.extra_compile_args_nvcc'].extend(['-arch=sm_20']) configs = [ # configuration project_directory #(NumpyConfiguration, None), #(WeaveConfiguration, None), #(LocalConfiguration, None), (CPPStandaloneConfiguration, 'cpp_standalone'), (CPPStandaloneConfigurationSinglePrecision, 'cpp_standalone'), (CPPStandaloneConfigurationOpenMPMaxThreads, 'cpp_standalone'), (CPPStandaloneConfigurationOpenMPMaxThreadsSinglePrecision, 'cpp_standalone'), # max blocks (DynamicConfigCreator('CUDA standalone (max blocks, atomics)'), 'cuda_standalone'), (DynamicConfigCreator( 'CUDA standalone (single precision, max blocks, atomics)', prefs={'core.default_float_dtype': float32}), 'cuda_standalone'), # 1 block (DynamicConfigCreator('CUDA standalone (1 block, atomics)', prefs={'devices.cuda_standalone.parallel_blocks': 1}), 'cuda_standalone'), (DynamicConfigCreator( 'CUDA standalone (single precision, 1 block, atomics)', prefs={ 'core.default_float_dtype': float32, 'devices.cuda_standalone.parallel_blocks': 1 }), 'cuda_standalone'),
#(WeaveConfiguration, None), #(LocalConfiguration, None), #(CUDAStandaloneConfiguration, 'cuda_standalone'), #(DynamicConfigCreator('CUDA standalone'), # 'cuda_standalone'), #(DynamicConfigCreator('CUDA standalone bundles', # git_commit='nemo_bundles'), # 'cuda_standalone'), #(DynamicConfigCreator("CUDA standalone (profile='blocking')", # set_device_kwargs={'profile': 'blocking'}), # 'cuda_standalone'), (DynamicConfigCreator( "CUDA standalone (TITAN Xp, Pascal)", prefs={'devices.cpp_standalone.run_environment_variables': gpu_0_env}), 'cuda_standalone'), #(DynamicConfigCreator("CUDA standalone (GeForce GTX TITAN X, Maxwell)", # prefs={'devices.cpp_standalone.run_environment_variables': gpu_1_env}), # 'cuda_standalone'), (DynamicConfigCreator( "CUDA standalone single precision (TITAN Xp, Pascal)", prefs={ 'core.default_float_dtype': float32, 'devices.cpp_standalone.run_environment_variables': gpu_0_env }), 'cuda_standalone'), #(DynamicConfigCreator("CUDA standalone single precicion (GeForce GTX TITAN X, Maxwell)", # prefs={'core.default_float_dtype': float32, # 'devices.cpp_standalone.run_environment_variables': gpu_1_env}),