def lowering(self): """ Create the loops required to express this node in ANSI C code without SIMD and replace this node. This loop will stay in graph to provide meta information. :return: None. """ t_var = Allocation.allocate_var('float', 'flat_x', np.prod(self.out_dim)) t_var_idx = IndexedVariable(t_var) n = AssignmentNode(t_var, self.in_var) sum_var = Allocation.allocate_var('float', 'sum', []) sum_loop = LoopNode(t_var.dim) sum_exp = Expression('{sum_var} += expf({t_var_idx});', sum_var=sum_var, t_var_idx=t_var_idx) sum_node = ExpressionNode(sum_exp) sum_loop.add_edge('content', sum_node) t_var_idx.set_indices([sum_loop.get_node('var')]) out_var_idx = IndexedVariable(self.out_var) loops, idxs = LoopNode.create_loops(self.in_var.dim) out_var_idx.set_indices(idxs) in_var_idx = IndexedVariable(self.in_var) in_var_idx.set_indices(idxs) exp = Expression('{out_var_idx} = expf({in_var_idx}) / {sum_var};', out_var_idx=out_var_idx, in_var_idx=in_var_idx, sum_var=sum_var) node = ExpressionNode(exp) loops[-1].add_edge('content', node) sum_loop.add_edge('next', loops[0]) n.add_edge('next', sum_loop) CHeaderNode.instance().pointer_decls.append(t_var) CHeaderNode.instance().var_decls.append(self.out_var) CHeaderNode.instance().var_decls.append(sum_var) CHeaderNode.instance().math_required = True # Meta data not required yet so remove this node self.replace_self_with_path(n, loops[0])
def lowering(self): """ Create the loops required to express this node in ANSI C code without SIMD and replace this node. This loop will stay in graph to provide meta information. :return: None. """ b_var = Allocation.allocate_var('float', 'b', self.b.shape, init_data=self.b) b_var_idx = IndexedVariable(b_var) # Make sure that e.g. Flatten has been applied before. In Keras it is not required but it makes # things easier. assert _len(self.in_dim) == 1 # Assign bias to output variable out_var_idx = IndexedVariable(self.out_var) b_loop = LoopNode(self.out_dim) out_var_idx.set_indices([b_loop.get_node('var')]) b_var_idx.set_indices([b_loop.get_node('var')]) set_bias = AssignmentNode(out_var_idx, b_var_idx) b_loop.add_edge('content', set_bias) # Loops for multiplication out_var_idx = IndexedVariable(self.out_var) in_loop = LoopNode(self.in_dim) out_loop = LoopNode(self.out_dim) out_var_idx.set_indices([out_loop.get_node('var')]) w_var = Allocation.allocate_var('float', 'w', self.w.shape, init_data=self.w) in_var_idx = IndexedVariable(self.in_var, False) w_var_idx = IndexedVariable(w_var, False) in_var_idx.set_indices([in_loop.get_node('var')]) w_var_idx.set_indices( [in_loop.get_node('var'), out_loop.get_node('var')]) mac_node = MACNode(out_var_idx, in_var_idx, w_var_idx) b_loop.add_edge('next', in_loop) in_loop.add_edge('content', out_loop) out_loop.add_edge('content', mac_node) self.var_decls.append(self.out_var) self.const_decls.append(w_var) self.const_decls.append(b_var) # Meta data not required yet so remove this node self.add_edge('content', b_loop)
def __init__(self, id, in_dim, weights_method): """ Initialize the node. :param id: An identifier that is added to the function name, see func_def. :param in_dim: The three dimensional length of the input: H x W x C :param weights_method: The method how the weights are stored and initialized. 'direct': The weights are written into the C file. 'stdio': The weights are read using ANSI C stdio. """ super().__init__() self.id = id self.in_dim = in_dim self.out_var = Allocation.allocate_var('float', 'x', in_dim) self.out_var.decl_written = True self.out_dim = in_dim self.weights_method = weights_method if weights_method == 'stdio': self.direct = False self.stdio = True elif weights_method == 'direct': self.direct = True self.stdio = False else: raise Exception('Unknown weights method.') CHeaderNode.__instance = self self.reset()
def __init__(self, H, W, C_OUT): """ Init the Node. Immediately creates Nodes for writing C code as this node is applied after general lowering. :param H: Height. :param W: Width. :param C_OUT: Number of output channels. """ super().__init__() loop_descr = [ [0, H, 1], [0, W, 1], [0, C_OUT, 1] ] l = LoopNode.create_loops_by_description(loop_descr) self.add_edge('content', l[0]) self.sse_var = Allocation.allocate_var('__m128i', 'cx', [H, W, C_OUT]) sse_var_idx = IndexedVariable(self.sse_var) h_idx = l[0].get_node('var') w_idx = l[1].get_node('var') c_idx = l[2].get_node('var') sse_var_idx.set_indices([h_idx, w_idx, c_idx]) an = AssignmentNode(sse_var_idx, Expression('_mm_setzero_ps()')) l[2].add_edge('content', an) self.var_decls.append(self.sse_var)
def __init__(self, res_var, sse_var, H, W, C_OUT): """ Init the Node. Immediately creates Nodes for writing C code as this node is applied after general lowering. :param res_var: The Variable that is the output of the original Node that was quantized. :param sse_var: The Variable for storing the intermediate quantized results. :param H: Output height. :param W: Output width. :param C_OUT: Channels out. """ super().__init__() loop_descr = [ [0, H, 1], [0, W, 1], [0, C_OUT, 1] ] l = LoopNode.create_loops_by_description(loop_descr) self.add_edge('content', l[0]) sse_var_idx = IndexedVariable(sse_var) res_var_idx = IndexedVariable(res_var) h_idx = l[0].get_node('var') w_idx = l[1].get_node('var') c_idx = l[2].get_node('var') sse_var_idx.set_indices([h_idx, w_idx, c_idx]) res_var_idx.set_indices([h_idx, w_idx, c_idx]) lo_var = Allocation.allocate_var('__m128i', 'lo') l1 = AssignmentNode(lo_var, Expression('_mm_srai_epi32(_mm_unpacklo_epi16({qx}, {qx}), 16);', qx=sse_var_idx)) hi_var = Allocation.allocate_var('__m128i', 'hi') l2 = AssignmentNode(hi_var, Expression('_mm_srai_epi32(_mm_unpackhi_epi16({qx}, {qx}), 16);', qx=sse_var_idx), l1) sum1_var = Allocation.allocate_var('__m128i', 'sum1') l3 = AssignmentNode(sum1_var, Expression('_mm_hadd_epi32({hi}, {lo});', lo=lo_var, hi=hi_var), l2) sum2_var = Allocation.allocate_var('__m128i', 'sum2') l4 = AssignmentNode(sum2_var, Expression('_mm_hadd_epi32({sum1}, {sum1});', sum1=sum1_var), l3) temp_var = Allocation.allocate_var('int', 'temp_res', [4]) l5 = FuncCallNode(Expression('_mm_store_si128((__m128i*)&{res}, {sum2});', res=temp_var, sum2=sum2_var), l4) temp_var_idx_0 = IndexedVariable(temp_var) temp_var_idx_0.set_indices([Constant('0')]) temp_var_idx_1 = IndexedVariable(temp_var) temp_var_idx_1.set_indices([Constant('1')]) l6 = AddNode(res_var_idx, res_var_idx, temp_var_idx_0, l5) l7 = AddNode(res_var_idx, res_var_idx, temp_var_idx_1, l6) l[2].add_edge('content', l1) self.var_decls.append(lo_var) self.var_decls.append(hi_var) self.var_decls.append(sum1_var) self.var_decls.append(sum2_var) self.var_decls.append(temp_var)
def __init__(self, prev_node): """ Initialize this node. :param prev_node: The previous node. """ super().__init__(prev_node) self.in_dim = prev_node.out_dim self.out_dim = np.prod(self.in_dim) self.in_var = prev_node.out_var self.out_var = Allocation.allocate_var('float', 'x', self.out_dim)
def __init__(self, mean, prev_node): """ Initialize the node. :param mean: The mean to be subtracted as scalar. :param prev_node: The previous node. """ super().__init__(prev_node) self.in_dim = prev_node.out_dim self.out_dim = self.in_dim self.in_var = prev_node.out_var self.out_var = Allocation.allocate_var('float', 'x', self.out_dim) self.mean = mean
def __init__(self, alpha, prev_node): """ Initialize the LeakyReLUNode. :param alpha: The leakyness of this node. 0 for a non-leaky (normal) ReLU. :param prev_node: """ super().__init__(prev_node) self.alpha = alpha self.in_var = prev_node.out_var self.in_dim = prev_node.out_dim self.out_dim = self.in_dim self.out_var = Allocation.allocate_var('float', 'x', self.out_dim)
def __init__(self, w, b, prev_node): """ Initialize the DenseNode. :param w: The weights in two dimensions: channels in, channels out. It is compatible to Keras. :param b: The bias with one dimension: channels out. It is compatible to Keras. :param prev_node: The previous node. """ super().__init__(prev_node) self.w = w self.b = b self.out_dim = w.shape[1] self.in_dim = prev_node.out_dim self.in_var = prev_node.out_var self.out_var = Allocation.allocate_var('float', 'x', self.out_dim)
def __init__(self, x_scale, prev_node, dtype): """ Init this Node. :param x_scale: The scale previously determined with quantize_scale(). :param prev_node: The previous node. :param dtype: The target data type for quantization. """ super().__init__() self.in_var = prev_node.out_var self.in_dim = prev_node.out_dim self.out_dim = self.in_dim self.out_var = Allocation.allocate_var(dtype, 'x', self.out_dim) self.out_var.change_padding(self.in_var.pads) self.x_scale = x_scale
def __init__(self, prev_node): """ Initialize the node. :param prev_node: """ super().__init__(prev_node) self.in_dim = prev_node.out_dim if type(self.in_dim) is list: c = 0 for d in self.in_dim: if d > 1: c += 1 assert c == 1 self.out_dim = self.in_dim self.in_var = prev_node.out_var self.out_var = Allocation.allocate_var('float', 'x', self.out_dim)
def __init__(self, w: np.ndarray, b: np.ndarray, stride: tuple, padding: str, prev_node): """ Initialize the Conv2DNode. :param w: Weights. Shape must be: kernel height, kernel width, channels in, channels out (number of filter) as NumPy ndarray. Thus the weight from a Keras Conv2D can be passed without prior conversion. :param b: Bias. NumPy ndarray with length "channels out" :param stride: Tuple of 2. :param padding: Like in TensorFlow 'same' or 'valid' :param prev_node: The previous node. """ self.in_var = prev_node.out_var x = self.in_var assert self.in_var.dim[2] == w.shape[2] assert w.shape[3] == b.shape[0] super().__init__(prev_node) self.in_dim = prev_node.out_dim self.w = w self.b = b self.stride = stride self.padding = padding self.H, self.W, self.C_IN = x.dim self.KH, self.KW, _, self.C_OUT = w.shape self.SH, self.SW = stride if padding == 'valid': H_OUT = int(np.ceil((self.H - self.KH + 1) / self.SH)) W_OUT = int(np.ceil((self.W - self.KW + 1) / self.SW)) self.pad_top = self.pad_bottom = self.pad_left = self.pad_right = 0 elif padding == 'same': H_OUT = int(np.ceil(float(self.H) / float(self.SH))) W_OUT = int(np.ceil(float(self.W) / float(self.SW))) self.pad_along_height = max( (H_OUT - 1) * self.SH + self.KH - self.H, 0) self.pad_along_width = max( (W_OUT - 1) * self.SW + self.KW - self.W, 0) self.pad_top = int(self.pad_along_height // 2) self.pad_bottom = int(self.pad_along_height - self.pad_top) self.pad_left = int(self.pad_along_width // 2) self.pad_right = int(self.pad_along_width - self.pad_left) else: raise Exception("Unknown padding.") self.in_var.change_padding([[self.pad_top, self.pad_bottom], [self.pad_left, self.pad_right], [0, 0]]) self.out_dim = (H_OUT, W_OUT, self.C_OUT) self.out_var = Allocation.allocate_var('float', 'x', self.out_dim)
def __init__(self, size, stride, prev_node): """ Initialize the node. :param size: The size of the max filter with two dimensions: H x W. It is Keras compatible. :param stride: The stride with two dimensions: H x W. It is Keras compatible. :param prev_node: The previous Node. """ super().__init__(prev_node) self.size = size self.stride = stride self.in_dim = prev_node.out_dim self.in_var = prev_node.out_var self.h_loop_end = self.in_dim[0] - size[0] + 1 self.w_loop_end = self.in_dim[1] - size[1] + 1 x_res = int(np.ceil(self.h_loop_end / stride[0])) y_res = int(np.ceil(self.w_loop_end / stride[1])) self.out_dim = (x_res, y_res, self.in_dim[2]) self.out_var = Allocation.allocate_var('float', 'x', self.out_dim)
def __init__(self, stop, content=None, start=0, step=1, var_name='i'): """ Initialize the LoopNode. :param stop: Upper limit of loop. :param content: The first Node of a 'next' path to be executed within the loop. :param start: Initial value. :param step: Step size each iteration, :param var_name: Optional variable name. """ super().__init__() self.start = start self.stop = stop self.step = step if content is not None: self.add_edge('content', content) self.type = 'int' var = Allocation.allocate_var(self.type, var_name, []) self.add_edge('var', var)
def lowering(self): """ Create the loops required to express this node in ANSI C code without SIMD and connect this node with the new nodes via 'content' edge. This loop will stay in graph to provide meta information. :return: None. """ # Create loops for settings the bias. b_var = Allocation.allocate_var('float', 'b', self.b.shape, init_data=self.b) out_var_idx = IndexedVariable(self.out_var) b_var_idx = IndexedVariable(b_var) # Create the loops using a descriptor. bias_loop_descr = [ [0, self.out_dim[0], 1], [0, self.out_dim[1], 1], [0, self.out_dim[2], 1] ] bias_loops = LoopNode.create_loops_by_description(bias_loop_descr) b_h_loop = bias_loops[0] b_w_loop = bias_loops[1] b_c_loop = bias_loops[2] set_bias = AssignmentNode(out_var_idx, b_var_idx) b_c_loop.add_edge('content', set_bias) out_var_idx.set_indices([b_h_loop.get_node('var'), b_w_loop.get_node('var'), b_c_loop.get_node('var')]) b_var_idx.set_indices([b_c_loop.get_node('var')]) # Create the loops for convolution, again with descriptors conv_loop_descr = [ [0, self.out_dim[0] * self.SH, self.stride[0]], [0, self.out_dim[1] * self.SW, self.stride[1]], [0, self.KH, 1], [0, self.KW, 1], [0, self.C_IN, 1], [0, self.C_OUT, 1] ] conv_loops = LoopNode.create_loops_by_description(conv_loop_descr) h_loop = conv_loops[0] w_loop = conv_loops[1] kh_loop = conv_loops[2] kw_loop = conv_loops[3] c_in_loop = conv_loops[4] c_out_loop = conv_loops[5] b_h_loop.add_edge('next', h_loop) w_var = Allocation.allocate_var('float', 'w', self.w.shape, init_data=self.w) out_var_idx = IndexedVariable(self.out_var) in_var_idx = IndexedVariable(self.in_var, False) w_var_idx = IndexedVariable(w_var, False) # Indices of IndexedVariables must respect the stride exp1 = Expression('{var} / {stride0}', var=h_loop.get_node('var'), stride0=Constant(self.stride[0])) exp2 = Expression('{var} / {stride1}', var=w_loop.get_node('var'), stride1=Constant(self.stride[1])) # And access to the image start at the upper left corner. But we have to add the current offset of the filter. exp3 = Expression('{var1} + {var2}', var1=h_loop.get_node('var'), var2=kh_loop.get_node('var')) exp4 = Expression('{var1} + {var2}', var1=w_loop.get_node('var'), var2=kw_loop.get_node('var')) out_var_idx.set_indices([exp1, exp2, c_out_loop.get_node('var')]) in_var_idx.set_indices([exp3, exp4, c_in_loop.get_node('var')]) w_var_idx.set_indices( [kh_loop.get_node('var'), kw_loop.get_node('var'), c_in_loop.get_node('var'), c_out_loop.get_node('var')]) mac_node = MACNode(out_var_idx, w_var_idx, in_var_idx) c_out_loop.add_edge('content', mac_node) # These variables must be declared (partially with initial data) at the beginning of the function CHeaderNode.instance().var_decls.append(self.out_var) CHeaderNode.instance().const_decls.append(w_var) CHeaderNode.instance().const_decls.append(b_var) # Don't remove this node, just put everything as content to this node. self.add_edge('content', b_h_loop)