def _sch_rules() -> List[ScheduleRule]: from tvm.meta_schedule import schedule_rule as M return [ M.AutoInline( into_producer=False, into_consumer=True, inline_const_tensor=True, disallow_if_then_else=True, require_injective=True, require_ordered=True, disallow_op=["tir.exp"], ), M.AddRFactor(max_jobs_per_core=16, max_innermost_factor=64), M.MultiLevelTiling( structure="SSRSRS", tile_binds=None, max_innermost_factor=64, vector_load_lens=None, reuse_read=None, reuse_write=M.ReuseType( req="may", levels=[1, 2], scope="global", ), ), M.ParallelizeVectorizeUnroll( max_jobs_per_core=16, max_vectorize_extent=64, unroll_max_steps=[0, 16, 64, 512], unroll_explicit=True, ), M.RandomComputeLocation(), ]
num_trials_per_iter=32, max_trials_per_task=32, max_trials_global=20000, ) sch_rules_for_vnni = [ schedule_rule.AutoInline( into_producer=False, into_consumer=True, inline_const_tensor=True, disallow_if_then_else=True, require_injective=True, require_ordered=True, disallow_op=["tir.exp"], ), schedule_rule.AddRFactor(max_jobs_per_core=16, max_innermost_factor=64), schedule_rule.MultiLevelTilingWithIntrin( VNNI_INTRIN, structure="SSRSRS", tile_binds=None, max_innermost_factor=64, vector_load_lens=None, reuse_read=None, reuse_write=schedule_rule.ReuseType( req="may", levels=[1, 2], scope="global", ), ), schedule_rule.ParallelizeVectorizeUnroll( max_jobs_per_core=16,