コード例 #1
0
 def lowering(self):
     """
     Create the loops required to express this node in ANSI C code without SIMD and replace this node.
     This loop will stay in graph to provide meta information.
     :return: None.
     """
     t_var = Allocation.allocate_var('float', 'flat_x', np.prod(self.out_dim))
     t_var_idx = IndexedVariable(t_var)
     n = AssignmentNode(t_var, self.in_var)
     sum_var = Allocation.allocate_var('float', 'sum', [])
     sum_loop = LoopNode(t_var.dim)
     sum_exp = Expression('{sum_var} += expf({t_var_idx});', sum_var=sum_var, t_var_idx=t_var_idx)
     sum_node = ExpressionNode(sum_exp)
     sum_loop.add_edge('content', sum_node)
     t_var_idx.set_indices([sum_loop.get_node('var')])
     out_var_idx = IndexedVariable(self.out_var)
     loops, idxs = LoopNode.create_loops(self.in_var.dim)
     out_var_idx.set_indices(idxs)
     in_var_idx = IndexedVariable(self.in_var)
     in_var_idx.set_indices(idxs)
     exp = Expression('{out_var_idx} = expf({in_var_idx}) / {sum_var};',
                      out_var_idx=out_var_idx, in_var_idx=in_var_idx, sum_var=sum_var)
     node = ExpressionNode(exp)
     loops[-1].add_edge('content', node)
     sum_loop.add_edge('next', loops[0])
     n.add_edge('next', sum_loop)
     CHeaderNode.instance().pointer_decls.append(t_var)
     CHeaderNode.instance().var_decls.append(self.out_var)
     CHeaderNode.instance().var_decls.append(sum_var)
     CHeaderNode.instance().math_required = True
     # Meta data not required yet so remove this node
     self.replace_self_with_path(n, loops[0])
コード例 #2
0
ファイル: cnn.py プロジェクト: iml130/nncg
    def lowering(self):
        """
        Create the loops required to express this node in ANSI C code without SIMD and replace this node.
        This loop will stay in graph to provide meta information.
        :return: None.
        """
        b_var = Allocation.allocate_var('float',
                                        'b',
                                        self.b.shape,
                                        init_data=self.b)
        b_var_idx = IndexedVariable(b_var)

        # Make sure that e.g. Flatten has been applied before. In Keras it is not required but it makes
        # things easier.
        assert _len(self.in_dim) == 1

        # Assign bias to output variable
        out_var_idx = IndexedVariable(self.out_var)
        b_loop = LoopNode(self.out_dim)
        out_var_idx.set_indices([b_loop.get_node('var')])
        b_var_idx.set_indices([b_loop.get_node('var')])
        set_bias = AssignmentNode(out_var_idx, b_var_idx)
        b_loop.add_edge('content', set_bias)

        # Loops for multiplication
        out_var_idx = IndexedVariable(self.out_var)
        in_loop = LoopNode(self.in_dim)
        out_loop = LoopNode(self.out_dim)
        out_var_idx.set_indices([out_loop.get_node('var')])
        w_var = Allocation.allocate_var('float',
                                        'w',
                                        self.w.shape,
                                        init_data=self.w)
        in_var_idx = IndexedVariable(self.in_var, False)
        w_var_idx = IndexedVariable(w_var, False)
        in_var_idx.set_indices([in_loop.get_node('var')])
        w_var_idx.set_indices(
            [in_loop.get_node('var'),
             out_loop.get_node('var')])
        mac_node = MACNode(out_var_idx, in_var_idx, w_var_idx)

        b_loop.add_edge('next', in_loop)
        in_loop.add_edge('content', out_loop)
        out_loop.add_edge('content', mac_node)

        self.var_decls.append(self.out_var)
        self.const_decls.append(w_var)
        self.const_decls.append(b_var)

        # Meta data not required yet so remove this node
        self.add_edge('content', b_loop)
コード例 #3
0
 def __init__(self, id, in_dim, weights_method):
     """
     Initialize the node.
     :param id: An identifier that is added to the function name, see func_def.
     :param in_dim: The three dimensional length of the input: H x W x C
     :param weights_method: The method how the weights are stored and initialized.
                            'direct': The weights are written into the C file.
                            'stdio': The weights are read using ANSI C stdio.
     """
     super().__init__()
     self.id = id
     self.in_dim = in_dim
     self.out_var = Allocation.allocate_var('float', 'x', in_dim)
     self.out_var.decl_written = True
     self.out_dim = in_dim
     self.weights_method = weights_method
     if weights_method == 'stdio':
         self.direct = False
         self.stdio = True
     elif weights_method == 'direct':
         self.direct = True
         self.stdio = False
     else:
         raise Exception('Unknown weights method.')
     CHeaderNode.__instance = self
     self.reset()
コード例 #4
0
 def __init__(self, H, W, C_OUT):
     """
     Init the Node. Immediately creates Nodes for writing C code as this node is applied after
     general lowering.
     :param H: Height.
     :param W: Width.
     :param C_OUT: Number of output channels.
     """
     super().__init__()
     loop_descr = [
         [0, H, 1],
         [0, W, 1],
         [0, C_OUT, 1]
     ]
     l = LoopNode.create_loops_by_description(loop_descr)
     self.add_edge('content', l[0])
     self.sse_var = Allocation.allocate_var('__m128i', 'cx', [H, W, C_OUT])
     sse_var_idx = IndexedVariable(self.sse_var)
     h_idx = l[0].get_node('var')
     w_idx = l[1].get_node('var')
     c_idx = l[2].get_node('var')
     sse_var_idx.set_indices([h_idx, w_idx, c_idx])
     an = AssignmentNode(sse_var_idx, Expression('_mm_setzero_ps()'))
     l[2].add_edge('content', an)
     self.var_decls.append(self.sse_var)
コード例 #5
0
 def __init__(self, res_var, sse_var, H, W, C_OUT):
     """
     Init the Node. Immediately creates Nodes for writing C code as this node is applied after
     general lowering.
     :param res_var: The Variable that is the output of the original Node that was quantized.
     :param sse_var: The Variable for storing the intermediate quantized results.
     :param H: Output height.
     :param W: Output width.
     :param C_OUT: Channels out.
     """
     super().__init__()
     loop_descr = [
         [0, H, 1],
         [0, W, 1],
         [0, C_OUT, 1]
     ]
     l = LoopNode.create_loops_by_description(loop_descr)
     self.add_edge('content', l[0])
     sse_var_idx = IndexedVariable(sse_var)
     res_var_idx = IndexedVariable(res_var)
     h_idx = l[0].get_node('var')
     w_idx = l[1].get_node('var')
     c_idx = l[2].get_node('var')
     sse_var_idx.set_indices([h_idx, w_idx, c_idx])
     res_var_idx.set_indices([h_idx, w_idx, c_idx])
     lo_var = Allocation.allocate_var('__m128i', 'lo')
     l1 = AssignmentNode(lo_var, Expression('_mm_srai_epi32(_mm_unpacklo_epi16({qx}, {qx}), 16);', qx=sse_var_idx))
     hi_var = Allocation.allocate_var('__m128i', 'hi')
     l2 = AssignmentNode(hi_var, Expression('_mm_srai_epi32(_mm_unpackhi_epi16({qx}, {qx}), 16);', qx=sse_var_idx), l1)
     sum1_var = Allocation.allocate_var('__m128i', 'sum1')
     l3 = AssignmentNode(sum1_var, Expression('_mm_hadd_epi32({hi}, {lo});', lo=lo_var, hi=hi_var), l2)
     sum2_var = Allocation.allocate_var('__m128i', 'sum2')
     l4 = AssignmentNode(sum2_var, Expression('_mm_hadd_epi32({sum1}, {sum1});', sum1=sum1_var), l3)
     temp_var = Allocation.allocate_var('int', 'temp_res', [4])
     l5 = FuncCallNode(Expression('_mm_store_si128((__m128i*)&{res}, {sum2});', res=temp_var, sum2=sum2_var), l4)
     temp_var_idx_0 = IndexedVariable(temp_var)
     temp_var_idx_0.set_indices([Constant('0')])
     temp_var_idx_1 = IndexedVariable(temp_var)
     temp_var_idx_1.set_indices([Constant('1')])
     l6 = AddNode(res_var_idx, res_var_idx, temp_var_idx_0, l5)
     l7 = AddNode(res_var_idx, res_var_idx, temp_var_idx_1, l6)
     l[2].add_edge('content', l1)
     self.var_decls.append(lo_var)
     self.var_decls.append(hi_var)
     self.var_decls.append(sum1_var)
     self.var_decls.append(sum2_var)
     self.var_decls.append(temp_var)
コード例 #6
0
 def __init__(self, prev_node):
     """
     Initialize this node.
     :param prev_node: The previous node.
     """
     super().__init__(prev_node)
     self.in_dim = prev_node.out_dim
     self.out_dim = np.prod(self.in_dim)
     self.in_var = prev_node.out_var
     self.out_var = Allocation.allocate_var('float', 'x', self.out_dim)
コード例 #7
0
 def __init__(self, mean, prev_node):
     """
     Initialize the node.
     :param mean: The mean to be subtracted as scalar.
     :param prev_node: The previous node.
     """
     super().__init__(prev_node)
     self.in_dim = prev_node.out_dim
     self.out_dim = self.in_dim
     self.in_var = prev_node.out_var
     self.out_var = Allocation.allocate_var('float', 'x', self.out_dim)
     self.mean = mean
コード例 #8
0
 def __init__(self, alpha, prev_node):
     """
     Initialize the LeakyReLUNode.
     :param alpha: The leakyness of this node. 0 for a non-leaky (normal) ReLU.
     :param prev_node:
     """
     super().__init__(prev_node)
     self.alpha = alpha
     self.in_var = prev_node.out_var
     self.in_dim = prev_node.out_dim
     self.out_dim = self.in_dim
     self.out_var = Allocation.allocate_var('float', 'x', self.out_dim)
コード例 #9
0
 def __init__(self, w, b, prev_node):
     """
     Initialize the DenseNode.
     :param w: The weights in two dimensions: channels in, channels out. It is compatible to Keras.
     :param b: The bias with one dimension: channels out. It is compatible to Keras.
     :param prev_node: The previous node.
     """
     super().__init__(prev_node)
     self.w = w
     self.b = b
     self.out_dim = w.shape[1]
     self.in_dim = prev_node.out_dim
     self.in_var = prev_node.out_var
     self.out_var = Allocation.allocate_var('float', 'x', self.out_dim)
コード例 #10
0
ファイル: quantization.py プロジェクト: iml130/nncg
 def __init__(self, x_scale, prev_node, dtype):
     """
     Init this Node.
     :param x_scale: The scale previously determined with quantize_scale().
     :param prev_node: The previous node.
     :param dtype: The target data type for quantization.
     """
     super().__init__()
     self.in_var = prev_node.out_var
     self.in_dim = prev_node.out_dim
     self.out_dim = self.in_dim
     self.out_var = Allocation.allocate_var(dtype, 'x', self.out_dim)
     self.out_var.change_padding(self.in_var.pads)
     self.x_scale = x_scale
コード例 #11
0
 def __init__(self, prev_node):
     """
     Initialize the node.
     :param prev_node:
     """
     super().__init__(prev_node)
     self.in_dim = prev_node.out_dim
     if type(self.in_dim) is list:
         c = 0
         for d in self.in_dim:
             if d > 1:
                 c += 1
         assert c == 1
     self.out_dim = self.in_dim
     self.in_var = prev_node.out_var
     self.out_var = Allocation.allocate_var('float', 'x', self.out_dim)
コード例 #12
0
ファイル: cnn.py プロジェクト: iml130/nncg
    def __init__(self, w: np.ndarray, b: np.ndarray, stride: tuple,
                 padding: str, prev_node):
        """
        Initialize the Conv2DNode.
        :param w: Weights. Shape must be: kernel height, kernel width, channels in, channels out (number of filter)
                  as NumPy ndarray. Thus the weight from a Keras Conv2D can be passed without prior conversion.
        :param b: Bias. NumPy ndarray with length "channels out"
        :param stride: Tuple of 2.
        :param padding: Like in TensorFlow 'same' or 'valid'
        :param prev_node: The previous node.
        """
        self.in_var = prev_node.out_var
        x = self.in_var
        assert self.in_var.dim[2] == w.shape[2]
        assert w.shape[3] == b.shape[0]
        super().__init__(prev_node)
        self.in_dim = prev_node.out_dim
        self.w = w
        self.b = b
        self.stride = stride
        self.padding = padding
        self.H, self.W, self.C_IN = x.dim
        self.KH, self.KW, _, self.C_OUT = w.shape
        self.SH, self.SW = stride

        if padding == 'valid':
            H_OUT = int(np.ceil((self.H - self.KH + 1) / self.SH))
            W_OUT = int(np.ceil((self.W - self.KW + 1) / self.SW))
            self.pad_top = self.pad_bottom = self.pad_left = self.pad_right = 0
        elif padding == 'same':
            H_OUT = int(np.ceil(float(self.H) / float(self.SH)))
            W_OUT = int(np.ceil(float(self.W) / float(self.SW)))
            self.pad_along_height = max(
                (H_OUT - 1) * self.SH + self.KH - self.H, 0)
            self.pad_along_width = max(
                (W_OUT - 1) * self.SW + self.KW - self.W, 0)
            self.pad_top = int(self.pad_along_height // 2)
            self.pad_bottom = int(self.pad_along_height - self.pad_top)
            self.pad_left = int(self.pad_along_width // 2)
            self.pad_right = int(self.pad_along_width - self.pad_left)
        else:
            raise Exception("Unknown padding.")
        self.in_var.change_padding([[self.pad_top, self.pad_bottom],
                                    [self.pad_left, self.pad_right], [0, 0]])
        self.out_dim = (H_OUT, W_OUT, self.C_OUT)
        self.out_var = Allocation.allocate_var('float', 'x', self.out_dim)
コード例 #13
0
 def __init__(self, size, stride, prev_node):
     """
     Initialize the node.
     :param size: The size of the max filter with two dimensions: H x W. It is Keras compatible.
     :param stride: The stride with two dimensions: H x W. It is Keras compatible.
     :param prev_node: The previous Node.
     """
     super().__init__(prev_node)
     self.size = size
     self.stride = stride
     self.in_dim = prev_node.out_dim
     self.in_var = prev_node.out_var
     self.h_loop_end = self.in_dim[0] - size[0] + 1
     self.w_loop_end = self.in_dim[1] - size[1] + 1
     x_res = int(np.ceil(self.h_loop_end / stride[0]))
     y_res = int(np.ceil(self.w_loop_end / stride[1]))
     self.out_dim = (x_res, y_res, self.in_dim[2])
     self.out_var = Allocation.allocate_var('float', 'x', self.out_dim)
コード例 #14
0
 def __init__(self, stop, content=None, start=0, step=1, var_name='i'):
     """
     Initialize the LoopNode.
     :param stop: Upper limit of loop.
     :param content: The first Node of a 'next' path to be executed within the loop.
     :param start: Initial value.
     :param step: Step size each iteration,
     :param var_name: Optional variable name.
     """
     super().__init__()
     self.start = start
     self.stop = stop
     self.step = step
     if content is not None:
         self.add_edge('content', content)
     self.type = 'int'
     var = Allocation.allocate_var(self.type, var_name, [])
     self.add_edge('var', var)
コード例 #15
0
    def lowering(self):
        """
        Create the loops required to express this node in ANSI C code without SIMD and connect this node with
        the new nodes via 'content' edge. This loop will stay in graph to provide meta information.
        :return: None.
        """

        # Create loops for settings the bias.
        b_var = Allocation.allocate_var('float', 'b', self.b.shape, init_data=self.b)
        out_var_idx = IndexedVariable(self.out_var)
        b_var_idx = IndexedVariable(b_var)

        # Create the loops using a descriptor.
        bias_loop_descr = [
            [0, self.out_dim[0], 1],
            [0, self.out_dim[1], 1],
            [0, self.out_dim[2], 1]
        ]
        bias_loops = LoopNode.create_loops_by_description(bias_loop_descr)
        b_h_loop = bias_loops[0]
        b_w_loop = bias_loops[1]
        b_c_loop = bias_loops[2]

        set_bias = AssignmentNode(out_var_idx, b_var_idx)
        b_c_loop.add_edge('content', set_bias)
        out_var_idx.set_indices([b_h_loop.get_node('var'), b_w_loop.get_node('var'), b_c_loop.get_node('var')])
        b_var_idx.set_indices([b_c_loop.get_node('var')])

        # Create the loops for convolution, again with descriptors
        conv_loop_descr = [
            [0, self.out_dim[0] * self.SH, self.stride[0]],
            [0, self.out_dim[1] * self.SW, self.stride[1]],
            [0, self.KH, 1],
            [0, self.KW, 1],
            [0, self.C_IN, 1],
            [0, self.C_OUT, 1]
        ]
        conv_loops = LoopNode.create_loops_by_description(conv_loop_descr)
        h_loop = conv_loops[0]
        w_loop = conv_loops[1]
        kh_loop = conv_loops[2]
        kw_loop = conv_loops[3]
        c_in_loop = conv_loops[4]
        c_out_loop = conv_loops[5]

        b_h_loop.add_edge('next', h_loop)

        w_var = Allocation.allocate_var('float', 'w', self.w.shape, init_data=self.w)
        out_var_idx = IndexedVariable(self.out_var)
        in_var_idx = IndexedVariable(self.in_var, False)
        w_var_idx = IndexedVariable(w_var, False)

        # Indices of IndexedVariables must respect the stride
        exp1 = Expression('{var} / {stride0}',
                          var=h_loop.get_node('var'),
                          stride0=Constant(self.stride[0]))
        exp2 = Expression('{var} / {stride1}',
                          var=w_loop.get_node('var'),
                          stride1=Constant(self.stride[1]))
        # And access to the image start at the upper left corner. But we have to add the current offset of the filter.
        exp3 = Expression('{var1} + {var2}',
                          var1=h_loop.get_node('var'),
                          var2=kh_loop.get_node('var'))
        exp4 = Expression('{var1} + {var2}',
                          var1=w_loop.get_node('var'),
                          var2=kw_loop.get_node('var'))
        out_var_idx.set_indices([exp1, exp2, c_out_loop.get_node('var')])
        in_var_idx.set_indices([exp3, exp4, c_in_loop.get_node('var')])
        w_var_idx.set_indices(
            [kh_loop.get_node('var'), kw_loop.get_node('var'), c_in_loop.get_node('var'), c_out_loop.get_node('var')])
        mac_node = MACNode(out_var_idx, w_var_idx, in_var_idx)
        c_out_loop.add_edge('content', mac_node)

        # These variables must be declared (partially with initial data) at the beginning of the function
        CHeaderNode.instance().var_decls.append(self.out_var)
        CHeaderNode.instance().const_decls.append(w_var)
        CHeaderNode.instance().const_decls.append(b_var)

        # Don't remove this node, just put everything as content to this node.
        self.add_edge('content', b_h_loop)