def vertical_advection_bandwidth(output, executions, option): kwargs = common_kwargs(option) configurations = [ Configuration(vadv.Classic, block_size=(512, 1), unroll_factor=8, **kwargs), Configuration(vadv.LocalMem, block_size=(128, 1), unroll_factor=28, **kwargs), Configuration(vadv.SharedMem, block_size=(64, 1), unroll_factor=0, **kwargs), Configuration(vadv.LocalMemMerged, block_size=(512, 1), unroll_factor=2, **kwargs) ] table = run_scaling_benchmark( configurations, executions, preprocess_args=truncate_block_size_to_domain) table.to_csv(output)
def horizontal_diffusion_bandwidth(output, executions, option): kwargs = common_kwargs(option) configurations = [ Configuration(hdiff.Classic, block_size=(64, 8, 1), **kwargs), Configuration(hdiff.OnTheFly, block_size=(128, 4, 1), loop='3D', **kwargs), Configuration(hdiff.OnTheFlyIncache, block_size=(32, 8, 4), **kwargs), Configuration(hdiff.JScanSharedMem, block_size=(512, 16, 1), **kwargs), Configuration(hdiff.JScanOtfIncache, block_size=(128, 4, 1), **kwargs), Configuration(hdiff.JScanOtf, block_size=(256, 4, 1), **kwargs), Configuration(hdiff.JScanShuffleIncache, block_size=(60, 4, 1), **kwargs), Configuration(hdiff.JScanShuffle, block_size=(60, 3, 1), **kwargs), Configuration(hdiff.JScanShuffleSystolic, block_size=(60, 4, 1), **kwargs) ] def truncate_block_size_to_domain_if_possible(**kwargs): if kwargs['block_size'][0] != 60: return truncate_block_size_to_domain(**kwargs) return kwargs table = run_scaling_benchmark( configurations, executions, preprocess_args=truncate_block_size_to_domain_if_possible) table.to_csv(output)
def horizontal_diffusion_bandwidth(output, executions, dtype, option): vector_size = 64 // np.dtype(dtype).itemsize kwargs = common_kwargs(option, dtype=dtype, alignment=64, vector_size=vector_size) configurations = [ Configuration(hdiff.ClassicVec, **kwargs, block_size=(1024, 16, 1)), Configuration(hdiff.OnTheFlyVec, **kwargs, block_size=(1024, 8, 1)), Configuration(hdiff.MinimumMem, **kwargs, block_size=(1024, 64, 1)) ] table = run_scaling_benchmark(configurations, executions, preprocess_args=scale_domain) table.to_csv(output)
def horizontal_diffusion_bandwidth(output, executions, dtype, option): vector_size = 32 // np.dtype(dtype).itemsize kwargs = common_kwargs(option, dtype=dtype, vector_size=vector_size, streaming_stores=True, block_size=(1024, 16, 1)) configurations = [ Configuration(hdiff.ClassicVec, **kwargs), Configuration(hdiff.OnTheFlyVec, **kwargs), Configuration(hdiff.MinimumMem, **kwargs) ] table = run_scaling_benchmark( configurations, executions, preprocess_args=truncate_block_size_to_domain) table.to_csv(output)
def basic_bandwidth(output, executions, option): kwargs = common_kwargs( option, loop='3D', block_size=(32, 8, 1), halo=1, ) stream_kwargs = kwargs.copy() stream_kwargs.update(loop='1D', block_size=(1024, 1, 1), halo=0) configurations = [ Configuration(basic.Copy, name='stream', **stream_kwargs), Configuration(basic.Empty, name='empty', **kwargs), Configuration(basic.Copy, name='copy', **kwargs), Configuration(basic.OnesidedAverage, name='avg-i', axis=0, **kwargs), Configuration(basic.OnesidedAverage, name='avg-j', axis=1, **kwargs), Configuration(basic.OnesidedAverage, name='avg-k', axis=2, **kwargs), Configuration(basic.SymmetricAverage, name='sym-avg-i', axis=0, **kwargs), Configuration(basic.SymmetricAverage, name='sym-avg-j', axis=1, **kwargs), Configuration(basic.SymmetricAverage, name='sym-avg-k', axis=2, **kwargs), Configuration(basic.Laplacian, name='lap-ij', along_x=True, along_y=True, along_z=False, **kwargs) ] table = run_scaling_benchmark(configurations, executions) table.to_csv(output)
def vertical_advection_bandwidth(output, executions, dtype, option): vector_size = 32 // np.dtype(dtype).itemsize kwargs = common_kwargs(option, dtype=dtype, vector_size=vector_size) configurations = [ Configuration(vadv.KMiddleVec, **kwargs, block_size=(128, 1), streaming_stores=True), Configuration(vadv.KInnermostVec, **kwargs, block_size=(64, 1)), Configuration(vadv.KInnermostBlockVec, **kwargs, block_size=(16, 1), prefetch_distance=4, streaming_stores=True) ] table = run_scaling_benchmark( configurations, executions, preprocess_args=truncate_block_size_to_domain) table.to_csv(output)
def basic_bandwidth(output, executions, dtype, option): vector_size = 32 // np.dtype(dtype).itemsize kwargs = common_kwargs(option, dtype=dtype, vector_size=vector_size, loop='3D-blocked-vec', halo=1, block_size=(1024, 16, 1), streaming_stores=True) stream_kwargs = kwargs.copy() stream_kwargs.update(loop='1D-vec', halo=0) configurations = [ Configuration(basic.Copy, name='stream', **stream_kwargs), Configuration(basic.Copy, name='copy', **kwargs), Configuration(basic.OnesidedAverage, name='avg-i', axis=0, **kwargs), Configuration(basic.OnesidedAverage, name='avg-j', axis=1, **kwargs), Configuration(basic.OnesidedAverage, name='avg-k', axis=2, **kwargs), Configuration(basic.SymmetricAverage, name='sym-avg-i', axis=0, **kwargs), Configuration(basic.SymmetricAverage, name='sym-avg-j', axis=1, **kwargs), Configuration(basic.SymmetricAverage, name='sym-avg-k', axis=2, **kwargs), Configuration(basic.Laplacian, name='lap-ij', along_x=True, along_y=True, along_z=False, **kwargs) ] table = run_scaling_benchmark( configurations, executions, preprocess_args=truncate_block_size_to_domain) table.to_csv(output)
def vertical_advection_bandwidth(output, executions, dtype, option): vector_size = 64 // np.dtype(dtype).itemsize kwargs = common_kwargs(option, dtype=dtype, layout=(2, 0, 1), vector_size=vector_size) configurations = [ Configuration(vadv.KMiddleVec, **kwargs, block_size=(1024, 1)), Configuration(vadv.KInnermostVec, **kwargs, block_size=(64, 1), prefetch_distance=4), Configuration(vadv.KInnermostBlockVec, **kwargs, block_size=(64, 1), prefetch_distance=2) ] table = run_scaling_benchmark(configurations, executions, preprocess_args=scale_domain) table.to_csv(output)