class SoftmaxKernel(AutotilerKernel): def __init__(self, cname, params, qrec, gen_ctrl=None, at_ver=3): del qrec if gen_ctrl is None: self.gen_ctrl = GenCtrl(None, cname=cname) else: gen_ctrl.cname = cname self.gen_ctrl = gen_ctrl self.at_softmax_params = gen_softmax_at_params(params) self.in_dim = params.in_dims[0] self.cname = cname self.node_name = params.name self.at_ver = at_ver def code(self, code_block=None): if code_block is None: code_block = CodeBlock() code_block.comment("generator for {}", self.node_name) if not self.gen_ctrl.is_unmodified: self.gen_ctrl.gen_ctrl_decl(code_block) gen_at_softmax(code_block, self.cname, self.in_dim, self.at_softmax_params.SoftMaxOper, at_ver=self.at_ver) return code_block
class TwoDTransposeKernelSq8(AutotilerKernel): def __init__(self, cname, params, real_in_shape, real_transpose, qrec, gen_ctrl=None, at_ver=3): if gen_ctrl is None: self.gen_ctrl = GenCtrl(None, cname=cname) else: gen_ctrl.cname = cname self.gen_ctrl = gen_ctrl self.in_q = qrec.in_qs[0] self.out_q = qrec.out_qs[0] self.in_shape = real_in_shape self.in_dim = params.in_dims[0] self.out_dim = params.out_dims[0] self.real_transpose = real_transpose self.cname = cname self.node_name = params.name self.at_ver = at_ver def code(self, code_block=None): if code_block is None: code_block = CodeBlock() code_block.comment("generator for {}", self.node_name) code_block.comment("transpose from {} to {} ({})", self.in_dim, self.out_dim, self.real_transpose) if not self.gen_ctrl.is_unmodified: self.gen_ctrl.gen_ctrl_decl(code_block) gen_at_2d_transpose(code_block, self.cname, self.in_shape) return code_block
class GlobalPoolKernel(AutotilerKernel): def __init__(self, node_name, cname, params, qrec, gen_ctrl=None, at_ver=3): if gen_ctrl is None: self.gen_ctrl = GenCtrl(None, cname=cname) else: gen_ctrl.cname = cname self.gen_ctrl = gen_ctrl self.at_globalpool_params = gen_globalpool_at_params(params) self.in_dim = params.in_dims[0] self.out_dim = params.out_dims[0] self.in_q = qrec.in_qs[0] self.out_q = qrec.out_qs[0] self.cname = cname self.node_name = node_name self.at_ver = at_ver def code(self, code_block=None): if code_block is None: code_block = CodeBlock() code_block.comment("generator for {}", self.node_name) if not self.gen_ctrl.is_unmodified: self.gen_ctrl.gen_ctrl_decl(code_block) gen_at_globalpool(code_block, self.cname, self.in_q, self.out_q, self.in_dim, self.out_dim, self.at_globalpool_params, at_ver=self.at_ver) return code_block
class SSDPostProcessKernel(AutotilerKernel): def __init__(self, cname, params, qrec, gen_ctrl=None, at_ver=3): del qrec if gen_ctrl is None: self.gen_ctrl = GenCtrl(None, cname=cname) else: gen_ctrl.cname = cname self.gen_ctrl = gen_ctrl self.num_anchors = params.in_dims[0].shape[0] # num_boxes x 4 self.num_classes = params.in_dims[1].shape[ 1] # num_boxes x num_classes self.out_boxes = params.out_dims[0].shape[0] # out_boxes x 4 self.max_bb_before_nms = params.max_bb_before_nms self.cname = cname self.node_name = params.name self.at_ver = at_ver def code(self, code_block=None): if code_block is None: code_block = CodeBlock() code_block.comment("generator for {}", self.node_name) if not self.gen_ctrl.is_unmodified: self.gen_ctrl.gen_ctrl_decl(code_block) gen_at_ssd_parameter(code_block, self.cname, self.num_anchors, self.num_classes, self.out_boxes, self.max_bb_before_nms, at_ver=self.at_ver) return code_block
class GlobalPoolKernel(AutotilerKernel): def __init__(self, node_name, cname, params, qrec, gen_ctrl=None, at_ver=3): if gen_ctrl is None: self.gen_ctrl = GenCtrl(None, cname=cname) else: gen_ctrl.cname = cname self.gen_ctrl = gen_ctrl self.at_globalpool_params = gen_globalpool_at_params(params) in_dim = params.in_dims[0] reduce_sz = reduce(lambda x, y: x * y, (sz for idx, sz in enumerate(in_dim.shape) if idx not in params.axis)) #self.c = in_dim.size()/reduce_sz self.c = reduce_sz (self.h, self.w) = balanced_divisors(in_dim.size()/reduce_sz) self.in_q = qrec.in_qs[0] self.out_q = qrec.out_qs[0] self.cname = cname self.node_name = node_name self.at_ver = at_ver def code(self, code_block=None): if code_block is None: code_block = CodeBlock() code_block.comment("generator for {}", self.node_name) if not self.gen_ctrl.is_unmodified: self.gen_ctrl.gen_ctrl_decl(code_block) gen_at_globalpool(code_block, self.cname, self.in_q, self.out_q, self.c, self.h, self.w, self.at_globalpool_params, at_ver=self.at_ver) return code_block
class PoolKernel(AutotilerKernel): def __init__(self, node_name, cname, pool_params, act_params, qrec, act_q=None, gen_ctrl=None, at_ver=3, force_relu=True): if gen_ctrl is None: self.gen_ctrl = GenCtrl(None, cname=cname) else: gen_ctrl.cname = cname self.gen_ctrl = gen_ctrl if pool_params.ker_in_order and pool_params.ker_in_order[0] == [ "h", "w", "c" ]: self.gen_ctrl.hwc = 1 if not qrec.out_qs[0].signed: self.gen_ctrl.output_datasize = -qrec.out_qs[0].dtype_bits // 8 if not qrec.in_qs[0].signed: self.gen_ctrl.input_datasize = -qrec.in_qs[0].dtype_bits // 8 if act_params is not None: self.at_act_params = gen_active_at_params( act_params, force_relu=force_relu, asymmetric=act_q.in_qs[0].zero_point != 0) else: self.at_act_params = NO_ACTIVATION pad_compatibilities = [] self.at_pool_params = gen_pool_at_params(pool_params, pad_compatibilities) self.in_dim = pool_params.in_dims[0] self.out_dim = pool_params.out_dims[0] self.cname = cname self.node_name = node_name self.at_ver = at_ver def code(self, code_block=None): if code_block is None: code_block = CodeBlock() code_block.comment("generator for {}", self.node_name) if not self.gen_ctrl.is_unmodified: self.gen_ctrl.gen_ctrl_decl(code_block) gen_ctrl = self.gen_ctrl.ctrl_name else: gen_ctrl = "0" gen_cnn_pool_act_sq8(code_block, self.cname, gen_ctrl, self.in_dim.c, self.in_dim.w, self.in_dim.h, self.at_pool_params, self.at_act_params.ReLUOper) return code_block
class GlobalPoolKernel(AutotilerKernel): def __init__(self, node_name, cname, pool_params, act_params, act_q=None, gen_ctrl=None, at_ver=3, force_relu=True): if gen_ctrl is None: self.gen_ctrl = GenCtrl(None, cname=cname) else: gen_ctrl.cname = cname self.gen_ctrl = gen_ctrl if pool_params.ker_in_order and pool_params.ker_in_order[0] == [ "h", "w", "c" ]: self.gen_ctrl.hwc = 1 if act_params is not None: self.at_act_params = gen_active_at_params( act_params, force_relu=force_relu, asymmetric=act_q.in_qs[0].zero_point != 0) else: self.at_act_params = NO_ACTIVATION self.at_globalpool_params = gen_globalpool_at_params(pool_params) in_dim = pool_params.in_dims[0] reduce_sz = reduce(lambda x, y: x * y, (sz for idx, sz in enumerate(in_dim.shape) if idx not in pool_params.axis), 1) #self.c = in_dim.size()/reduce_sz self.c = reduce_sz (self.h, self.w) = balanced_divisors(in_dim.size() / reduce_sz) self.cname = cname self.node_name = node_name self.at_ver = at_ver def code(self, code_block=None): if code_block is None: code_block = CodeBlock() code_block.comment("generator for {}", self.node_name) if not self.gen_ctrl.is_unmodified: self.gen_ctrl.gen_ctrl_decl(code_block) gen_ctrl = self.gen_ctrl.ctrl_name else: gen_ctrl = "0" gen_cnn_globalpool_sq8(code_block, self.cname, gen_ctrl, self.c, self.h, self.w, self.at_globalpool_params.GlobalPoolOper, self.at_act_params.ReLUOper) return code_block
class LinearReluKernel(AutotilerKernel): def __init__(self, node_name, cname, linear_params, linear_q, act_params, act_q, at_ver=3, gen_ctrl=None): if gen_ctrl is None: self.gen_ctrl = GenCtrl(None, cname=cname) else: gen_ctrl.cname = cname self.gen_ctrl = gen_ctrl assert linear_params is not None, "linear should always be included" at_linear_params = gen_linear_at_params(linear_params) in_dim = linear_params.in_dims[0] out_dim = linear_params.out_dims[0] filter_q = linear_q.in_qs[1] in_q = linear_q.in_qs[0] out_q = linear_q.out_qs[0] bias_q = linear_q.in_qs[2] if act_params is not None: at_act_params = gen_active_at_params(act_params) out_q = act_q.out_qs[0] if at_ver < 3: if act_params.activation == "relu6" and out_q.q != 0: self.gen_ctrl.ReluN = 6 << out_q.q self.gen_ctrl.ReluNNoNorm = 1 else: if act_params.activation == "relun": self.gen_ctrl.ReluN = act_params.activation_params else: at_act_params = NO_ACTIVATION self.at_linear_params = at_linear_params self.in_dim = in_dim self.out_dim = out_dim self.in_q = in_q self.bias_q = bias_q self.out_q = out_q self.filter_q = filter_q self.at_act_params = at_act_params self.cname = cname self.node_name = node_name self.at_ver = at_ver def code(self, code_block=None): if code_block is None: code_block = CodeBlock() code_block.comment("generator for {}", self.node_name) if not self.gen_ctrl.is_unmodified: self.gen_ctrl.gen_ctrl_decl(code_block) gen_at_linear_relu(code_block, self.cname, self.in_q, self.out_q, self.filter_q, self.bias_q, self.in_dim, self.out_dim, self.at_linear_params, self.at_act_params, at_ver=self.at_ver, gen_ctrl=self.gen_ctrl) return code_block
class MatrixScaleKernel(AutotilerKernel): def __init__(self, cname, params, qrec, gen_ctrl=None, at_ver=3): if gen_ctrl is None: self.gen_ctrl = GenCtrl(None, cname=cname) else: gen_ctrl.cname = cname self.gen_ctrl = gen_ctrl at_matscale_params = gen_matscale_at_params(params) in_dim = params.in_dims[0] out_dim = params.out_dims[0] assert in_dim.shape[0] == out_dim.shape[0] if params.fusion_type == "vec_scalar": otherq = qrec.in_qs[0] vectorq = qrec.in_qs[1] scalarq = qrec.in_qs[2] elif params.fusion_type == "vector": otherq = qrec.in_qs[1] vectorq = qrec.in_qs[2] scalarq = None elif params.fusion_type == "scalar": otherq = qrec.in_qs[0] vectorq = None scalarq = qrec.in_qs[1] else: raise NotImplementedError("unknown fusion type %s" % params.fusion_type) self.at_matscale_params = at_matscale_params self.in_dim = in_dim self.out_dim = out_dim self.otherq = otherq self.vectorq = vectorq self.scalarq = scalarq self.out_q = qrec.out_qs[0] self.cname = cname self.node_name = params.name self.at_ver = at_ver def code(self, code_block=None): if code_block is None: code_block = CodeBlock() code_block.comment("generator for {}", self.node_name) if not self.gen_ctrl.is_unmodified: self.gen_ctrl.gen_ctrl_decl(code_block) gen_at_matscale(code_block, self.cname, self.otherq, self.vectorq, self.scalarq, self.out_q, self.in_dim, self.out_dim, self.at_matscale_params) return code_block
class MatrixAddKernel(AutotilerKernel): def __init__(self, cname, matrixadd_params, matrixadd_q, act_params, act_q, gen_ctrl=None, at_ver=3): if gen_ctrl is None: self.gen_ctrl = GenCtrl(None, cname=cname) else: gen_ctrl.cname = cname self.gen_ctrl = gen_ctrl at_matrixadd_params = gen_matrixadd_at_params(matrixadd_params) in_dim = matrixadd_params.in_dims[0] out_dim = matrixadd_params.out_dims[0] in_q1 = matrixadd_q.in_qs[0] in_q2 = matrixadd_q.in_qs[1] out_q = matrixadd_q.out_qs[0] if act_params is not None: at_act_params = gen_active_at_params(act_params) out_q = act_q.out_qs[0] if at_ver < 3: if act_params.activation == "relu6" and out_q.q != 0: self.gen_ctrl.ReluN = 6 << out_q.q self.gen_ctrl.ReluNNoNorm = 1 else: if act_params.activation == "relun": self.gen_ctrl.ReluN = act_params.activation_params else: at_act_params = NO_ACTIVATION self.at_matrixadd_params = at_matrixadd_params self.in_dim = in_dim self.out_dim = out_dim self.in_q1 = in_q1 self.in_q2 = in_q2 self.out_q = out_q self.at_act_params = at_act_params self.cname = cname self.node_name = matrixadd_params.name self.at_ver = at_ver def code(self, code_block=None): if code_block is None: code_block = CodeBlock() code_block.comment("generator for {}", self.node_name) if not self.gen_ctrl.is_unmodified: self.gen_ctrl.gen_ctrl_decl(code_block) gen_at_matrixadd(code_block, self.cname, self.in_q1, self.in_q2, self.out_q, self.in_dim, self.out_dim, self.at_matrixadd_params, at_ver=self.at_ver, gen_ctrl=self.gen_ctrl) return code_block
class ThreeDTransposeKernel(AutotilerKernel): def __init__(self, cname, params, real_in_shape, real_transpose, qrec, gen_ctrl=None, at_ver=3): if gen_ctrl is None: self.gen_ctrl = GenCtrl(None, cname=cname) else: gen_ctrl.cname = cname self.gen_ctrl = gen_ctrl if qrec.out_qs[0].is_floating: self.gen_ctrl.float_dump = 1 self.in_shape = real_in_shape dim_names = ['C', 'H', 'W'] perm = [dim_names[i] for i in real_transpose] self.permop = "KOP_MATPERM_CHW2{}".format("".join(perm)) self.real_transpose = real_transpose self.in_q = qrec.in_qs[0] self.out_q = qrec.out_qs[0] self.in_dim = params.in_dims[0] self.out_dim = params.out_dims[0] self.cname = cname self.node_name = params.name self.at_ver = at_ver def code(self, code_block=None): if code_block is None: code_block = CodeBlock() code_block.comment("generator for {}", self.node_name) code_block.comment("transpose from {} to {} ({})", self.in_dim, self.out_dim, self.real_transpose) if not self.gen_ctrl.is_unmodified: self.gen_ctrl.gen_ctrl_decl(code_block) gen_ctrl = self.gen_ctrl.ctrl_name else: gen_ctrl = "0" gen_at_3d_transpose(code_block, self.cname, abs(at_bits(self.in_q)), self.in_shape, self.permop, gen_ctrl=gen_ctrl) return code_block
class RNNKernel(AutotilerKernel): def __init__(self, node_name, cname, rnn_params, gen_ctrl=None, at_ver=3): if gen_ctrl is None: self.gen_ctrl = GenCtrl(None, cname=cname) else: gen_ctrl.cname = cname self.gen_ctrl = gen_ctrl if isinstance(rnn_params, RNNParameters): self.kname = "RNN_Stack_fp16" elif isinstance(rnn_params, LSTMParameters): self.kname = "LSTM_Stack_fp16" elif isinstance(rnn_params, GRUParameters): self.kname = "GRU_Stack_fp16" if not rnn_params.linear_before_reset: # gen_ctrl.linear_before_reset = 0 raise ValueError( "In {} linear_before_reset == 0 not supported by the Autotiler kernels" ) else: raise ValueError("unknown RNN parameter type") self.n_cells = rnn_params.n_cells self.n_states = rnn_params.n_states self.n_inputs = rnn_params.n_inputs self.n_input_cells = rnn_params.n_input_cells self.n_output_cells = rnn_params.n_output_cells self.revert = rnn_params.revert if not rnn_params.hard_act: gen_ctrl.rnn_use_hardact = 0 if not rnn_params.rnn_same_inout_scale: gen_ctrl.rnn_same_inout_scale = 0 self.cname = cname self.node_name = node_name self.at_ver = at_ver def code(self, code_block=None): if code_block is None: code_block = CodeBlock() code_block.comment("generator for {}", self.node_name) if not self.gen_ctrl.is_unmodified: self.gen_ctrl.gen_ctrl_decl(code_block) gen_ctrl = self.gen_ctrl.ctrl_name else: gen_ctrl = "0" gen_rnn_fp16(code_block, self.kname, self.cname, gen_ctrl, self.n_cells, self.n_input_cells, self.n_output_cells, self.n_states, self.n_inputs, self.revert and "1" or "0") return code_block
class PoolKernel(AutotilerKernel): def __init__(self, node_name, cname, pool_params, act_params, gen_ctrl=None, at_ver=3, force_relu=True): if gen_ctrl is None: self.gen_ctrl = GenCtrl(None, cname=cname) else: gen_ctrl.cname = cname self.gen_ctrl = gen_ctrl if act_params is not None: self.at_act_params = gen_active_at_params(act_params, force_relu=force_relu) else: self.at_act_params = NO_ACTIVATION pad_compatibilities = [] self.at_pool_params = gen_pool_at_params(pool_params, pad_compatibilities) self.in_dim = pool_params.in_dims[0] self.out_dim = pool_params.out_dims[0] self.cname = cname self.node_name = node_name self.at_ver = at_ver def code(self, code_block=None): if code_block is None: code_block = CodeBlock() code_block.comment("generator for {}", self.node_name) if not self.gen_ctrl.is_unmodified: self.gen_ctrl.gen_ctrl_decl(code_block) gen_ctrl = self.gen_ctrl.ctrl_name else: gen_ctrl = "0" gen_cnn_pool_act_sq8(code_block, self.cname, gen_ctrl, self.in_dim.c, self.in_dim.w, self.in_dim.h, self.at_pool_params, self.at_act_params.ReLUOper) return code_block
class RNNKernel(AutotilerKernel): def __init__(self, node_name, cname, rnn_params, gen_ctrl=None, at_ver=3): if gen_ctrl is None: self.gen_ctrl = GenCtrl(None, cname=cname) else: gen_ctrl.cname = cname self.gen_ctrl = gen_ctrl if isinstance(rnn_params, RNNParameters): self.kname = "RNN_Stack_SQ8" elif isinstance(rnn_params, LSTMParameters): self.kname = "LSTM_Stack_SQ8" else: raise ValueError("unknown RNN parameter type") self.n_cells = rnn_params.n_cells self.n_states = rnn_params.n_states self.n_inputs = rnn_params.n_inputs self.n_input_cells = rnn_params.n_input_cells self.n_output_cells = rnn_params.n_output_cells self.cname = cname self.node_name = node_name self.at_ver = at_ver def code(self, code_block=None): if code_block is None: code_block = CodeBlock() code_block.comment("generator for {}", self.node_name) if not self.gen_ctrl.is_unmodified: self.gen_ctrl.gen_ctrl_decl(code_block) gen_ctrl = self.gen_ctrl.ctrl_name else: gen_ctrl = "0" gen_rnn_sq8(code_block, self.kname, self.cname, gen_ctrl, self.n_cells, self.n_input_cells, self.n_output_cells, self.n_states, self.n_inputs) return code_block
class LinearReluKernel(AutotilerKernel): def __init__(self, node_name, cname, linear_params, linear_q, act_params, act_q, at_ver=3, gen_ctrl=None, force_relu=True): if gen_ctrl is None: self.gen_ctrl = GenCtrl(None, cname=cname) else: gen_ctrl.cname = cname self.gen_ctrl = gen_ctrl assert linear_params is not None, "linear should always be included" at_linear_params = gen_linear_at_params(linear_params) in_dim = linear_params.in_dims[0] out_dim = linear_params.out_dims[0] filter_q = linear_q.weights_q in_q = linear_q.in_qs[0] out_q = linear_q.out_qs[0] bias_q = linear_q.biases_q mulbiases_q = linear_q.mul_biases_q if act_params is not None: at_act_params = gen_active_at_params(act_params, force_relu=force_relu) if in_dim is None: in_dim = act_params.in_dims[0] if out_dim is None: out_dim = act_params.out_dims[0] if in_q is None: in_q = act_q.in_qs[0] out_q = act_q.out_qs[0] else: at_act_params = NO_ACTIVATION self.at_linear_params = at_linear_params self.in_dim = in_dim.size() self.out_dim = out_dim.size() self.in_q = in_q self.bias_q = bias_q self.mulbiases_q = mulbiases_q self.out_q = out_q self.filter_q = filter_q self.at_act_params = at_act_params self.cname = cname self.node_name = node_name self.at_ver = at_ver def code(self, code_block=None): if code_block is None: code_block = CodeBlock() code_block.comment("generator for {}", self.node_name) if not self.gen_ctrl.is_unmodified: self.gen_ctrl.gen_ctrl_decl(code_block) gen_ctrl = self.gen_ctrl.ctrl_name else: gen_ctrl = "0" gen_at_linear_relu(code_block, self.cname, self.bias_q.bits//8, self.mulbiases_q.bits//8, self.in_dim, self.out_dim, self.at_linear_params.LinearOper, self.at_act_params.ReLUOper, at_ver=self.at_ver, gen_ctrl=gen_ctrl) return code_block
class PoolReluKernel(AutotilerKernel): def __init__(self, node_name, cname, pool_params, pool_q, act_params, act_q, code_block=None, at_ver=3, gen_ctrl=None): if gen_ctrl is None: self.gen_ctrl = GenCtrl(None, cname=cname) else: gen_ctrl.cname = cname self.gen_ctrl = gen_ctrl in_q = out_q = None in_dim = out_dim = None pad_compatibilities = [] if pool_params is not None: at_pool_params = gen_pool_at_params(pool_params, pad_compatibilities) if in_dim is None: in_dim = pool_params.in_dims[0] out_dim = pool_params.out_dims[0] if in_q is None: in_q = pool_q.in_qs[0] out_q = pool_q.out_qs[0] else: at_pool_params = NO_POOL if act_params is not None: at_act_params = gen_active_at_params(act_params) if in_dim is None: in_dim = act_params.in_dims[0] if out_dim is None: out_dim = act_params.out_dims[0] if in_q is None: in_q = act_q.in_qs[0] out_q = act_q.out_qs[0] if at_ver < 3: if act_params.activation == "relu6" and out_q.q != 0: self.gen_ctrl.ReluN = 6 << out_q.q self.gen_ctrl.ReluNNoNorm = 1 else: if act_params.activation == "relun": self.gen_ctrl.ReluN = act_params.activation_params else: at_act_params = NO_ACTIVATION if code_block is None: code_block = CodeBlock() if pad_compatibilities: reduction = PadDim.pad_compatibility_reduce(*pad_compatibilities, "convolution padding is not compatible with pool padding") if not reduction[2]: # default is balanced pad left at_pad_ctrl = next(i for i, v in enumerate(reduction) if v) self.gen_ctrl.PadType = at_pad_ctrl if in_q.bits != out_q.bits: raise NotImplementedError("only homogenious operations are supported at present") if at_pool_params == NO_POOL: raise NotImplementedError( "activation layer on its own should not be matched by this kernel") self.at_pool_params = at_pool_params self.in_dim = in_dim self.out_dim = out_dim self.in_q = in_q self.out_q = out_q self.at_act_params = at_act_params self.cname = cname self.node_name = node_name self.at_ver = at_ver def code(self, code_block=None): if code_block is None: code_block = CodeBlock() code_block.comment("generator for {}", self.node_name) if not self.gen_ctrl.is_unmodified: self.gen_ctrl.gen_ctrl_decl(code_block) gen_at_pool_relu(code_block, self.cname, self.in_q, self.out_q, self.in_dim, self.out_dim, self.at_pool_params, self.at_act_params, gen_ctrl=self.gen_ctrl, at_ver=self.at_ver) return code_block