Beispiel #1
0
class SoftmaxKernel(AutotilerKernel):
    def __init__(self, cname, params, qrec, gen_ctrl=None, at_ver=3):
        del qrec
        if gen_ctrl is None:
            self.gen_ctrl = GenCtrl(None, cname=cname)
        else:
            gen_ctrl.cname = cname
            self.gen_ctrl = gen_ctrl

        self.at_softmax_params = gen_softmax_at_params(params)
        self.in_dim = params.in_dims[0]
        self.cname = cname
        self.node_name = params.name
        self.at_ver = at_ver

    def code(self, code_block=None):
        if code_block is None:
            code_block = CodeBlock()

        code_block.comment("generator for {}", self.node_name)

        if not self.gen_ctrl.is_unmodified:
            self.gen_ctrl.gen_ctrl_decl(code_block)

        gen_at_softmax(code_block, self.cname, self.in_dim,
                       self.at_softmax_params.SoftMaxOper, at_ver=self.at_ver)
        return code_block
class TwoDTransposeKernelSq8(AutotilerKernel):
    def __init__(self, cname, params, real_in_shape, real_transpose, qrec, gen_ctrl=None, at_ver=3):
        if gen_ctrl is None:
            self.gen_ctrl = GenCtrl(None, cname=cname)
        else:
            gen_ctrl.cname = cname
            self.gen_ctrl = gen_ctrl

        self.in_q = qrec.in_qs[0]
        self.out_q = qrec.out_qs[0]
        self.in_shape = real_in_shape
        self.in_dim = params.in_dims[0]
        self.out_dim = params.out_dims[0]
        self.real_transpose = real_transpose
        self.cname = cname
        self.node_name = params.name
        self.at_ver = at_ver

    def code(self, code_block=None):
        if code_block is None:
            code_block = CodeBlock()

        code_block.comment("generator for {}", self.node_name)
        code_block.comment("transpose from {} to {} ({})", self.in_dim,
                           self.out_dim, self.real_transpose)

        if not self.gen_ctrl.is_unmodified:
            self.gen_ctrl.gen_ctrl_decl(code_block)

        gen_at_2d_transpose(code_block, self.cname,
                            self.in_shape)
        return code_block
Beispiel #3
0
class GlobalPoolKernel(AutotilerKernel):
    def __init__(self, node_name, cname, params, qrec, gen_ctrl=None, at_ver=3):
        if gen_ctrl is None:
            self.gen_ctrl = GenCtrl(None, cname=cname)
        else:
            gen_ctrl.cname = cname
            self.gen_ctrl = gen_ctrl

        self.at_globalpool_params = gen_globalpool_at_params(params)
        self.in_dim = params.in_dims[0]
        self.out_dim = params.out_dims[0]
        self.in_q = qrec.in_qs[0]
        self.out_q = qrec.out_qs[0]
        self.cname = cname
        self.node_name = node_name
        self.at_ver = at_ver

    def code(self, code_block=None):
        if code_block is None:
            code_block = CodeBlock()

        code_block.comment("generator for {}", self.node_name)

        if not self.gen_ctrl.is_unmodified:
            self.gen_ctrl.gen_ctrl_decl(code_block)

        gen_at_globalpool(code_block, self.cname, self.in_q, self.out_q,
                          self.in_dim, self.out_dim, self.at_globalpool_params,
                          at_ver=self.at_ver)
        return code_block
class SSDPostProcessKernel(AutotilerKernel):
    def __init__(self, cname, params, qrec, gen_ctrl=None, at_ver=3):
        del qrec
        if gen_ctrl is None:
            self.gen_ctrl = GenCtrl(None, cname=cname)
        else:
            gen_ctrl.cname = cname
            self.gen_ctrl = gen_ctrl

        self.num_anchors = params.in_dims[0].shape[0]  # num_boxes x 4
        self.num_classes = params.in_dims[1].shape[
            1]  # num_boxes x num_classes
        self.out_boxes = params.out_dims[0].shape[0]  # out_boxes x 4
        self.max_bb_before_nms = params.max_bb_before_nms
        self.cname = cname
        self.node_name = params.name
        self.at_ver = at_ver

    def code(self, code_block=None):
        if code_block is None:
            code_block = CodeBlock()

        code_block.comment("generator for {}", self.node_name)

        if not self.gen_ctrl.is_unmodified:
            self.gen_ctrl.gen_ctrl_decl(code_block)

        gen_at_ssd_parameter(code_block,
                             self.cname,
                             self.num_anchors,
                             self.num_classes,
                             self.out_boxes,
                             self.max_bb_before_nms,
                             at_ver=self.at_ver)
        return code_block
Beispiel #5
0
class GlobalPoolKernel(AutotilerKernel):
    def __init__(self, node_name, cname, params, qrec, gen_ctrl=None, at_ver=3):
        if gen_ctrl is None:
            self.gen_ctrl = GenCtrl(None, cname=cname)
        else:
            gen_ctrl.cname = cname
            self.gen_ctrl = gen_ctrl

        self.at_globalpool_params = gen_globalpool_at_params(params)
        in_dim = params.in_dims[0]
        reduce_sz = reduce(lambda x, y: x * y, (sz for idx, sz in enumerate(in_dim.shape)
                                                if idx not in params.axis))
        #self.c = in_dim.size()/reduce_sz
        self.c = reduce_sz
        (self.h, self.w) = balanced_divisors(in_dim.size()/reduce_sz)
        self.in_q = qrec.in_qs[0]
        self.out_q = qrec.out_qs[0]
        self.cname = cname
        self.node_name = node_name
        self.at_ver = at_ver

    def code(self, code_block=None):
        if code_block is None:
            code_block = CodeBlock()

        code_block.comment("generator for {}", self.node_name)

        if not self.gen_ctrl.is_unmodified:
            self.gen_ctrl.gen_ctrl_decl(code_block)

        gen_at_globalpool(code_block, self.cname, self.in_q, self.out_q,
                          self.c, self.h, self.w, self.at_globalpool_params,
                          at_ver=self.at_ver)
        return code_block
class PoolKernel(AutotilerKernel):
    def __init__(self,
                 node_name,
                 cname,
                 pool_params,
                 act_params,
                 qrec,
                 act_q=None,
                 gen_ctrl=None,
                 at_ver=3,
                 force_relu=True):
        if gen_ctrl is None:
            self.gen_ctrl = GenCtrl(None, cname=cname)
        else:
            gen_ctrl.cname = cname
            self.gen_ctrl = gen_ctrl
        if pool_params.ker_in_order and pool_params.ker_in_order[0] == [
                "h", "w", "c"
        ]:
            self.gen_ctrl.hwc = 1
        if not qrec.out_qs[0].signed:
            self.gen_ctrl.output_datasize = -qrec.out_qs[0].dtype_bits // 8
        if not qrec.in_qs[0].signed:
            self.gen_ctrl.input_datasize = -qrec.in_qs[0].dtype_bits // 8

        if act_params is not None:
            self.at_act_params = gen_active_at_params(
                act_params,
                force_relu=force_relu,
                asymmetric=act_q.in_qs[0].zero_point != 0)
        else:
            self.at_act_params = NO_ACTIVATION

        pad_compatibilities = []
        self.at_pool_params = gen_pool_at_params(pool_params,
                                                 pad_compatibilities)
        self.in_dim = pool_params.in_dims[0]
        self.out_dim = pool_params.out_dims[0]
        self.cname = cname
        self.node_name = node_name
        self.at_ver = at_ver

    def code(self, code_block=None):
        if code_block is None:
            code_block = CodeBlock()

        code_block.comment("generator for {}", self.node_name)

        if not self.gen_ctrl.is_unmodified:
            self.gen_ctrl.gen_ctrl_decl(code_block)
            gen_ctrl = self.gen_ctrl.ctrl_name
        else:
            gen_ctrl = "0"

        gen_cnn_pool_act_sq8(code_block, self.cname, gen_ctrl, self.in_dim.c,
                             self.in_dim.w, self.in_dim.h, self.at_pool_params,
                             self.at_act_params.ReLUOper)
        return code_block
Beispiel #7
0
class GlobalPoolKernel(AutotilerKernel):
    def __init__(self,
                 node_name,
                 cname,
                 pool_params,
                 act_params,
                 act_q=None,
                 gen_ctrl=None,
                 at_ver=3,
                 force_relu=True):
        if gen_ctrl is None:
            self.gen_ctrl = GenCtrl(None, cname=cname)
        else:
            gen_ctrl.cname = cname
            self.gen_ctrl = gen_ctrl
        if pool_params.ker_in_order and pool_params.ker_in_order[0] == [
                "h", "w", "c"
        ]:
            self.gen_ctrl.hwc = 1

        if act_params is not None:
            self.at_act_params = gen_active_at_params(
                act_params,
                force_relu=force_relu,
                asymmetric=act_q.in_qs[0].zero_point != 0)
        else:
            self.at_act_params = NO_ACTIVATION

        self.at_globalpool_params = gen_globalpool_at_params(pool_params)
        in_dim = pool_params.in_dims[0]
        reduce_sz = reduce(lambda x, y: x * y,
                           (sz for idx, sz in enumerate(in_dim.shape)
                            if idx not in pool_params.axis), 1)
        #self.c = in_dim.size()/reduce_sz
        self.c = reduce_sz
        (self.h, self.w) = balanced_divisors(in_dim.size() / reduce_sz)
        self.cname = cname
        self.node_name = node_name
        self.at_ver = at_ver

    def code(self, code_block=None):
        if code_block is None:
            code_block = CodeBlock()

        code_block.comment("generator for {}", self.node_name)

        if not self.gen_ctrl.is_unmodified:
            self.gen_ctrl.gen_ctrl_decl(code_block)
            gen_ctrl = self.gen_ctrl.ctrl_name
        else:
            gen_ctrl = "0"

        gen_cnn_globalpool_sq8(code_block, self.cname, gen_ctrl, self.c,
                               self.h, self.w,
                               self.at_globalpool_params.GlobalPoolOper,
                               self.at_act_params.ReLUOper)
        return code_block
class LinearReluKernel(AutotilerKernel):
    def __init__(self, node_name, cname, linear_params, linear_q, act_params, act_q, at_ver=3, gen_ctrl=None):
        if gen_ctrl is None:
            self.gen_ctrl = GenCtrl(None, cname=cname)
        else:
            gen_ctrl.cname = cname
            self.gen_ctrl = gen_ctrl

        assert linear_params is not None, "linear should always be included"
        at_linear_params = gen_linear_at_params(linear_params)
        in_dim = linear_params.in_dims[0]
        out_dim = linear_params.out_dims[0]
        filter_q = linear_q.in_qs[1]
        in_q = linear_q.in_qs[0]
        out_q = linear_q.out_qs[0]
        bias_q = linear_q.in_qs[2]

        if act_params is not None:
            at_act_params = gen_active_at_params(act_params)
            out_q = act_q.out_qs[0]
            if at_ver < 3:
                if act_params.activation == "relu6" and out_q.q != 0:
                    self.gen_ctrl.ReluN = 6 << out_q.q
                    self.gen_ctrl.ReluNNoNorm = 1
            else:
                if act_params.activation == "relun":
                    self.gen_ctrl.ReluN = act_params.activation_params
        else:
            at_act_params = NO_ACTIVATION

        self.at_linear_params = at_linear_params
        self.in_dim = in_dim
        self.out_dim = out_dim
        self.in_q = in_q
        self.bias_q = bias_q
        self.out_q = out_q
        self.filter_q = filter_q
        self.at_act_params = at_act_params
        self.cname = cname
        self.node_name = node_name
        self.at_ver = at_ver

    def code(self, code_block=None):
        if code_block is None:
            code_block = CodeBlock()

        code_block.comment("generator for {}", self.node_name)

        if not self.gen_ctrl.is_unmodified:
            self.gen_ctrl.gen_ctrl_decl(code_block)

        gen_at_linear_relu(code_block, self.cname, self.in_q, self.out_q,
                           self.filter_q, self.bias_q,
                           self.in_dim, self.out_dim, self.at_linear_params, self.at_act_params,
                           at_ver=self.at_ver, gen_ctrl=self.gen_ctrl)

        return code_block
class MatrixScaleKernel(AutotilerKernel):
    def __init__(self, cname, params, qrec, gen_ctrl=None, at_ver=3):
        if gen_ctrl is None:
            self.gen_ctrl = GenCtrl(None, cname=cname)
        else:
            gen_ctrl.cname = cname
            self.gen_ctrl = gen_ctrl

        at_matscale_params = gen_matscale_at_params(params)
        in_dim = params.in_dims[0]
        out_dim = params.out_dims[0]
        assert in_dim.shape[0] == out_dim.shape[0]
        if params.fusion_type == "vec_scalar":
            otherq = qrec.in_qs[0]
            vectorq = qrec.in_qs[1]
            scalarq = qrec.in_qs[2]
        elif params.fusion_type == "vector":
            otherq = qrec.in_qs[1]
            vectorq = qrec.in_qs[2]
            scalarq = None
        elif params.fusion_type == "scalar":
            otherq = qrec.in_qs[0]
            vectorq = None
            scalarq = qrec.in_qs[1]
        else:
            raise NotImplementedError("unknown fusion type %s" %
                                      params.fusion_type)

        self.at_matscale_params = at_matscale_params
        self.in_dim = in_dim
        self.out_dim = out_dim
        self.otherq = otherq
        self.vectorq = vectorq
        self.scalarq = scalarq
        self.out_q = qrec.out_qs[0]
        self.cname = cname
        self.node_name = params.name
        self.at_ver = at_ver

    def code(self, code_block=None):
        if code_block is None:
            code_block = CodeBlock()

        code_block.comment("generator for {}", self.node_name)

        if not self.gen_ctrl.is_unmodified:
            self.gen_ctrl.gen_ctrl_decl(code_block)

        gen_at_matscale(code_block, self.cname, self.otherq, self.vectorq,
                        self.scalarq, self.out_q, self.in_dim, self.out_dim,
                        self.at_matscale_params)

        return code_block
class MatrixAddKernel(AutotilerKernel):
    def __init__(self, cname, matrixadd_params, matrixadd_q, act_params, act_q, gen_ctrl=None, at_ver=3):
        if gen_ctrl is None:
            self.gen_ctrl = GenCtrl(None, cname=cname)
        else:
            gen_ctrl.cname = cname
            self.gen_ctrl = gen_ctrl

        at_matrixadd_params = gen_matrixadd_at_params(matrixadd_params)
        in_dim = matrixadd_params.in_dims[0]
        out_dim = matrixadd_params.out_dims[0]
        in_q1 = matrixadd_q.in_qs[0]
        in_q2 = matrixadd_q.in_qs[1]
        out_q = matrixadd_q.out_qs[0]

        if act_params is not None:
            at_act_params = gen_active_at_params(act_params)
            out_q = act_q.out_qs[0]
            if at_ver < 3:
                if act_params.activation == "relu6" and out_q.q != 0:
                    self.gen_ctrl.ReluN = 6 << out_q.q
                    self.gen_ctrl.ReluNNoNorm = 1
            else:
                if act_params.activation == "relun":
                    self.gen_ctrl.ReluN = act_params.activation_params
        else:
            at_act_params = NO_ACTIVATION

        self.at_matrixadd_params = at_matrixadd_params
        self.in_dim = in_dim
        self.out_dim = out_dim
        self.in_q1 = in_q1
        self.in_q2 = in_q2
        self.out_q = out_q
        self.at_act_params = at_act_params
        self.cname = cname
        self.node_name = matrixadd_params.name
        self.at_ver = at_ver

    def code(self, code_block=None):
        if code_block is None:
            code_block = CodeBlock()

        code_block.comment("generator for {}", self.node_name)

        if not self.gen_ctrl.is_unmodified:
            self.gen_ctrl.gen_ctrl_decl(code_block)

        gen_at_matrixadd(code_block, self.cname, self.in_q1, self.in_q2, self.out_q,
                         self.in_dim, self.out_dim, self.at_matrixadd_params,
                         at_ver=self.at_ver, gen_ctrl=self.gen_ctrl)

        return code_block
Beispiel #11
0
class ThreeDTransposeKernel(AutotilerKernel):
    def __init__(self,
                 cname,
                 params,
                 real_in_shape,
                 real_transpose,
                 qrec,
                 gen_ctrl=None,
                 at_ver=3):
        if gen_ctrl is None:
            self.gen_ctrl = GenCtrl(None, cname=cname)
        else:
            gen_ctrl.cname = cname
            self.gen_ctrl = gen_ctrl

        if qrec.out_qs[0].is_floating:
            self.gen_ctrl.float_dump = 1

        self.in_shape = real_in_shape
        dim_names = ['C', 'H', 'W']
        perm = [dim_names[i] for i in real_transpose]
        self.permop = "KOP_MATPERM_CHW2{}".format("".join(perm))
        self.real_transpose = real_transpose

        self.in_q = qrec.in_qs[0]
        self.out_q = qrec.out_qs[0]
        self.in_dim = params.in_dims[0]
        self.out_dim = params.out_dims[0]
        self.cname = cname
        self.node_name = params.name
        self.at_ver = at_ver

    def code(self, code_block=None):
        if code_block is None:
            code_block = CodeBlock()

        code_block.comment("generator for {}", self.node_name)
        code_block.comment("transpose from {} to {} ({})", self.in_dim,
                           self.out_dim, self.real_transpose)

        if not self.gen_ctrl.is_unmodified:
            self.gen_ctrl.gen_ctrl_decl(code_block)
            gen_ctrl = self.gen_ctrl.ctrl_name
        else:
            gen_ctrl = "0"

        gen_at_3d_transpose(code_block,
                            self.cname,
                            abs(at_bits(self.in_q)),
                            self.in_shape,
                            self.permop,
                            gen_ctrl=gen_ctrl)
        return code_block
Beispiel #12
0
class RNNKernel(AutotilerKernel):
    def __init__(self, node_name, cname, rnn_params, gen_ctrl=None, at_ver=3):
        if gen_ctrl is None:
            self.gen_ctrl = GenCtrl(None, cname=cname)
        else:
            gen_ctrl.cname = cname
            self.gen_ctrl = gen_ctrl

        if isinstance(rnn_params, RNNParameters):
            self.kname = "RNN_Stack_fp16"
        elif isinstance(rnn_params, LSTMParameters):
            self.kname = "LSTM_Stack_fp16"
        elif isinstance(rnn_params, GRUParameters):
            self.kname = "GRU_Stack_fp16"
            if not rnn_params.linear_before_reset:
                # gen_ctrl.linear_before_reset = 0
                raise ValueError(
                    "In {} linear_before_reset == 0 not supported by the Autotiler kernels"
                )
        else:
            raise ValueError("unknown RNN parameter type")
        self.n_cells = rnn_params.n_cells
        self.n_states = rnn_params.n_states
        self.n_inputs = rnn_params.n_inputs
        self.n_input_cells = rnn_params.n_input_cells
        self.n_output_cells = rnn_params.n_output_cells
        self.revert = rnn_params.revert
        if not rnn_params.hard_act:
            gen_ctrl.rnn_use_hardact = 0
        if not rnn_params.rnn_same_inout_scale:
            gen_ctrl.rnn_same_inout_scale = 0
        self.cname = cname
        self.node_name = node_name
        self.at_ver = at_ver

    def code(self, code_block=None):
        if code_block is None:
            code_block = CodeBlock()

        code_block.comment("generator for {}", self.node_name)

        if not self.gen_ctrl.is_unmodified:
            self.gen_ctrl.gen_ctrl_decl(code_block)
            gen_ctrl = self.gen_ctrl.ctrl_name
        else:
            gen_ctrl = "0"

        gen_rnn_fp16(code_block, self.kname, self.cname, gen_ctrl,
                     self.n_cells, self.n_input_cells, self.n_output_cells,
                     self.n_states, self.n_inputs, self.revert and "1" or "0")
        return code_block
Beispiel #13
0
class PoolKernel(AutotilerKernel):
    def __init__(self,
                 node_name,
                 cname,
                 pool_params,
                 act_params,
                 gen_ctrl=None,
                 at_ver=3,
                 force_relu=True):
        if gen_ctrl is None:
            self.gen_ctrl = GenCtrl(None, cname=cname)
        else:
            gen_ctrl.cname = cname
            self.gen_ctrl = gen_ctrl

        if act_params is not None:
            self.at_act_params = gen_active_at_params(act_params,
                                                      force_relu=force_relu)
        else:
            self.at_act_params = NO_ACTIVATION

        pad_compatibilities = []
        self.at_pool_params = gen_pool_at_params(pool_params,
                                                 pad_compatibilities)
        self.in_dim = pool_params.in_dims[0]
        self.out_dim = pool_params.out_dims[0]
        self.cname = cname
        self.node_name = node_name
        self.at_ver = at_ver

    def code(self, code_block=None):
        if code_block is None:
            code_block = CodeBlock()

        code_block.comment("generator for {}", self.node_name)

        if not self.gen_ctrl.is_unmodified:
            self.gen_ctrl.gen_ctrl_decl(code_block)
            gen_ctrl = self.gen_ctrl.ctrl_name
        else:
            gen_ctrl = "0"

        gen_cnn_pool_act_sq8(code_block, self.cname, gen_ctrl, self.in_dim.c,
                             self.in_dim.w, self.in_dim.h, self.at_pool_params,
                             self.at_act_params.ReLUOper)
        return code_block
class RNNKernel(AutotilerKernel):
    def __init__(self, node_name, cname, rnn_params, gen_ctrl=None, at_ver=3):
        if gen_ctrl is None:
            self.gen_ctrl = GenCtrl(None, cname=cname)
        else:
            gen_ctrl.cname = cname
            self.gen_ctrl = gen_ctrl

        if isinstance(rnn_params, RNNParameters):
            self.kname = "RNN_Stack_SQ8"
        elif isinstance(rnn_params, LSTMParameters):
            self.kname = "LSTM_Stack_SQ8"
        else:
            raise ValueError("unknown RNN parameter type")
        self.n_cells = rnn_params.n_cells
        self.n_states = rnn_params.n_states
        self.n_inputs = rnn_params.n_inputs
        self.n_input_cells = rnn_params.n_input_cells
        self.n_output_cells = rnn_params.n_output_cells
        self.cname = cname
        self.node_name = node_name
        self.at_ver = at_ver

    def code(self, code_block=None):
        if code_block is None:
            code_block = CodeBlock()

        code_block.comment("generator for {}", self.node_name)

        if not self.gen_ctrl.is_unmodified:
            self.gen_ctrl.gen_ctrl_decl(code_block)
            gen_ctrl = self.gen_ctrl.ctrl_name
        else:
            gen_ctrl = "0"

        gen_rnn_sq8(code_block, self.kname, self.cname, gen_ctrl, self.n_cells,
                    self.n_input_cells, self.n_output_cells, self.n_states,
                    self.n_inputs)
        return code_block
class LinearReluKernel(AutotilerKernel):
    def __init__(self, node_name, cname, linear_params, linear_q, act_params, act_q, at_ver=3, gen_ctrl=None, force_relu=True):
        if gen_ctrl is None:
            self.gen_ctrl = GenCtrl(None, cname=cname)
        else:
            gen_ctrl.cname = cname
            self.gen_ctrl = gen_ctrl

        assert linear_params is not None, "linear should always be included"
        at_linear_params = gen_linear_at_params(linear_params)
        in_dim = linear_params.in_dims[0]
        out_dim = linear_params.out_dims[0]
        filter_q = linear_q.weights_q
        in_q = linear_q.in_qs[0]
        out_q = linear_q.out_qs[0]
        bias_q = linear_q.biases_q
        mulbiases_q = linear_q.mul_biases_q

        if act_params is not None:
            at_act_params = gen_active_at_params(act_params, force_relu=force_relu)
            if in_dim is None:
                in_dim = act_params.in_dims[0]
            if out_dim is None:
                out_dim = act_params.out_dims[0]
            if in_q is None:
                in_q = act_q.in_qs[0]
            out_q = act_q.out_qs[0]
        else:
            at_act_params = NO_ACTIVATION

        self.at_linear_params = at_linear_params
        self.in_dim = in_dim.size()
        self.out_dim = out_dim.size()
        self.in_q = in_q
        self.bias_q = bias_q
        self.mulbiases_q = mulbiases_q
        self.out_q = out_q
        self.filter_q = filter_q
        self.at_act_params = at_act_params
        self.cname = cname
        self.node_name = node_name
        self.at_ver = at_ver

    def code(self, code_block=None):
        if code_block is None:
            code_block = CodeBlock()

        code_block.comment("generator for {}", self.node_name)

        if not self.gen_ctrl.is_unmodified:
            self.gen_ctrl.gen_ctrl_decl(code_block)
            gen_ctrl = self.gen_ctrl.ctrl_name
        else:
            gen_ctrl = "0"

        gen_at_linear_relu(code_block, self.cname, self.bias_q.bits//8, self.mulbiases_q.bits//8,
                           self.in_dim, self.out_dim,
                           self.at_linear_params.LinearOper,
                           self.at_act_params.ReLUOper,
                           at_ver=self.at_ver, gen_ctrl=gen_ctrl)

        return code_block
class PoolReluKernel(AutotilerKernel):
    def __init__(self, node_name, cname, pool_params, pool_q,
                 act_params, act_q, code_block=None, at_ver=3, gen_ctrl=None):
        if gen_ctrl is None:
            self.gen_ctrl = GenCtrl(None, cname=cname)
        else:
            gen_ctrl.cname = cname
            self.gen_ctrl = gen_ctrl

        in_q = out_q = None
        in_dim = out_dim = None
        pad_compatibilities = []

        if pool_params is not None:
            at_pool_params = gen_pool_at_params(pool_params, pad_compatibilities)
            if in_dim is None:
                in_dim = pool_params.in_dims[0]
            out_dim = pool_params.out_dims[0]
            if in_q is None:
                in_q = pool_q.in_qs[0]
            out_q = pool_q.out_qs[0]
        else:
            at_pool_params = NO_POOL

        if act_params is not None:
            at_act_params = gen_active_at_params(act_params)
            if in_dim is None:
                in_dim = act_params.in_dims[0]
            if out_dim is None:
                out_dim = act_params.out_dims[0]
            if in_q is None:
                in_q = act_q.in_qs[0]
            out_q = act_q.out_qs[0]
            if at_ver < 3:
                if act_params.activation == "relu6" and out_q.q != 0:
                    self.gen_ctrl.ReluN = 6 << out_q.q
                    self.gen_ctrl.ReluNNoNorm = 1
            else:
                if act_params.activation == "relun":
                    self.gen_ctrl.ReluN = act_params.activation_params
        else:
            at_act_params = NO_ACTIVATION

        if code_block is None:
            code_block = CodeBlock()

        if pad_compatibilities:
            reduction = PadDim.pad_compatibility_reduce(*pad_compatibilities,
                                                        "convolution padding is not compatible with pool padding")
            if not reduction[2]:  # default is balanced pad left
                at_pad_ctrl = next(i for i, v in enumerate(reduction) if v)
                self.gen_ctrl.PadType = at_pad_ctrl

        if in_q.bits != out_q.bits:
            raise NotImplementedError("only homogenious operations are supported at present")
        if at_pool_params == NO_POOL:
            raise NotImplementedError(
                "activation layer on its own should not be matched by this kernel")

        self.at_pool_params = at_pool_params
        self.in_dim = in_dim
        self.out_dim = out_dim
        self.in_q = in_q
        self.out_q = out_q
        self.at_act_params = at_act_params
        self.cname = cname
        self.node_name = node_name
        self.at_ver = at_ver

    def code(self, code_block=None):
        if code_block is None:
            code_block = CodeBlock()

        code_block.comment("generator for {}", self.node_name)

        if not self.gen_ctrl.is_unmodified:
            self.gen_ctrl.gen_ctrl_decl(code_block)

        gen_at_pool_relu(code_block, self.cname, self.in_q, self.out_q,
                         self.in_dim, self.out_dim, self.at_pool_params,
                         self.at_act_params, gen_ctrl=self.gen_ctrl,
                         at_ver=self.at_ver)

        return code_block