def fill(arch, dtype, dsize, repeat=10): repeat = scaled_repeat_times(arch, dsize, repeat) num_elements = dsize // dtype_size[dtype] x = ti.field(dtype, shape=num_elements) @ti.kernel def fill_const(n: ti.i32): for i in range(n): x[i] = ti.cast(0.7, dtype) return membound_benchmark(fill_const, num_elements, repeat)
def _save_suite_info_as_json(self, suite_path='./'): info_dict = { 'cases': [func.__name__ for func in self.test_cases], 'dtype': [dtype2str(dtype) for dtype in self.test_dtype_list], 'dsize': [size for size in self.test_dsize_list], 'repeat': [ scaled_repeat_times(self._arch, size, self.basic_repeat_times) for size in self.test_dsize_list ], 'evaluator': [func.__name__ for func in self.evaluator] } info_path = os.path.join(suite_path, '_info.json') with open(info_path, 'w') as f: print(dump2json(info_dict), file=f)
def get_results_dict(self): results_dict = {} for i in range(len(self._test_dsize_list)): dsize = self._test_dsize_list[i] repeat = scaled_repeat_times(self._arch, dsize, MemoryBound.basic_repeat_times) elapsed_time = self._min_time_in_us[i] item_name = size2str(dsize).replace('.0', '') item_dict = { 'dsize_byte': dsize, 'repeat': repeat, 'elapsed_time_ms': elapsed_time } results_dict[item_name] = item_dict return results_dict
def reduction(arch, dtype, dsize, repeat=10): repeat = scaled_repeat_times(arch, dsize, repeat) num_elements = dsize // dtype_size[dtype] x = ti.field(dtype, shape=num_elements) y = ti.field(dtype, shape=()) y[None] = 0 @ti.kernel def reduction(n: ti.i32): for i in range(n): y[None] += x[i] init_const(x, dtype, num_elements) return membound_benchmark(reduction, num_elements, repeat)
def saxpy(arch, dtype, dsize, repeat=10): repeat = scaled_repeat_times(arch, dsize, repeat) num_elements = dsize // dtype_size[dtype] // 3 #z=x+y x = ti.field(dtype, shape=num_elements) y = ti.field(dtype, shape=num_elements) z = ti.field(dtype, shape=num_elements) @ti.kernel def saxpy(n: ti.i32): for i in range(n): z[i] = 17 * x[i] + y[i] init_const(x, dtype, num_elements) init_const(y, dtype, num_elements) return membound_benchmark(saxpy, num_elements, repeat)