def lowering(self): """ Create the loops required to express this node in ANSI C code without SIMD and replace this node. This loop will stay in graph to provide meta information. :return: None. """ t_var = Allocation.allocate_var('float', 'flat_x', np.prod(self.out_dim)) t_var_idx = IndexedVariable(t_var) n = AssignmentNode(t_var, self.in_var) sum_var = Allocation.allocate_var('float', 'sum', []) sum_loop = LoopNode(t_var.dim) sum_exp = Expression('{sum_var} += expf({t_var_idx});', sum_var=sum_var, t_var_idx=t_var_idx) sum_node = ExpressionNode(sum_exp) sum_loop.add_edge('content', sum_node) t_var_idx.set_indices([sum_loop.get_node('var')]) out_var_idx = IndexedVariable(self.out_var) loops, idxs = LoopNode.create_loops(self.in_var.dim) out_var_idx.set_indices(idxs) in_var_idx = IndexedVariable(self.in_var) in_var_idx.set_indices(idxs) exp = Expression('{out_var_idx} = expf({in_var_idx}) / {sum_var};', out_var_idx=out_var_idx, in_var_idx=in_var_idx, sum_var=sum_var) node = ExpressionNode(exp) loops[-1].add_edge('content', node) sum_loop.add_edge('next', loops[0]) n.add_edge('next', sum_loop) CHeaderNode.instance().pointer_decls.append(t_var) CHeaderNode.instance().var_decls.append(self.out_var) CHeaderNode.instance().var_decls.append(sum_var) CHeaderNode.instance().math_required = True # Meta data not required yet so remove this node self.replace_self_with_path(n, loops[0])
def __init__(self, H, W, C_OUT): """ Init the Node. Immediately creates Nodes for writing C code as this node is applied after general lowering. :param H: Height. :param W: Width. :param C_OUT: Number of output channels. """ super().__init__() loop_descr = [ [0, H, 1], [0, W, 1], [0, C_OUT, 1] ] l = LoopNode.create_loops_by_description(loop_descr) self.add_edge('content', l[0]) self.sse_var = Allocation.allocate_var('__m128i', 'cx', [H, W, C_OUT]) sse_var_idx = IndexedVariable(self.sse_var) h_idx = l[0].get_node('var') w_idx = l[1].get_node('var') c_idx = l[2].get_node('var') sse_var_idx.set_indices([h_idx, w_idx, c_idx]) an = AssignmentNode(sse_var_idx, Expression('_mm_setzero_ps()')) l[2].add_edge('content', an) self.var_decls.append(self.sse_var)
def lowering(self): """ Create the loops required to express this node in ANSI C code without SIMD and replace this node. This loop will stay in graph to provide meta information. :return: None. """ out_idx_var = IndexedVariable(self.out_var) in_idx_var = IndexedVariable(self.in_var) sub_node = SubNode(out_idx_var, in_idx_var, Constant(self.mean)) n = sub_node count_vars = [] for d in reversed(self.out_dim): n = LoopNode(d, n) count_vars.append(n.get_node('var')) count_vars = list(reversed(count_vars)) in_idx_var.set_indices(count_vars) out_idx_var.set_indices(count_vars) # Meta data not required yet so remove this node self.replace_self_with_path(n, n) CHeaderNode.instance().var_decls.append(self.out_var)
def lowering(self): """ Create the Nodes required to express this node in ANSI C code. It actually creates loops to convert all quantized values back to floats. This loop will stay in graph to provide meta information. :return: None. """ loops, idxs = LoopNode.create_loops(self.in_var.dim) in_var_idx = IndexedVariable(self.in_var) out_var_idx = IndexedVariable(self.out_var) in_var_idx.set_indices(idxs) out_var_idx.set_indices(idxs) div_node = MultNode(out_var_idx, in_var_idx, Constant(self.x_scale * self.const_scale)) loops[-1].add_edge('content', div_node) self.add_edge('content', loops[0])
def __init__(self, res_var, sse_var, H, W, C_OUT): """ Init the Node. Immediately creates Nodes for writing C code as this node is applied after general lowering. :param res_var: The Variable that is the output of the original Node that was quantized. :param sse_var: The Variable for storing the intermediate quantized results. :param H: Output height. :param W: Output width. :param C_OUT: Channels out. """ super().__init__() loop_descr = [ [0, H, 1], [0, W, 1], [0, C_OUT, 1] ] l = LoopNode.create_loops_by_description(loop_descr) self.add_edge('content', l[0]) sse_var_idx = IndexedVariable(sse_var) res_var_idx = IndexedVariable(res_var) h_idx = l[0].get_node('var') w_idx = l[1].get_node('var') c_idx = l[2].get_node('var') sse_var_idx.set_indices([h_idx, w_idx, c_idx]) res_var_idx.set_indices([h_idx, w_idx, c_idx]) lo_var = Allocation.allocate_var('__m128i', 'lo') l1 = AssignmentNode(lo_var, Expression('_mm_srai_epi32(_mm_unpacklo_epi16({qx}, {qx}), 16);', qx=sse_var_idx)) hi_var = Allocation.allocate_var('__m128i', 'hi') l2 = AssignmentNode(hi_var, Expression('_mm_srai_epi32(_mm_unpackhi_epi16({qx}, {qx}), 16);', qx=sse_var_idx), l1) sum1_var = Allocation.allocate_var('__m128i', 'sum1') l3 = AssignmentNode(sum1_var, Expression('_mm_hadd_epi32({hi}, {lo});', lo=lo_var, hi=hi_var), l2) sum2_var = Allocation.allocate_var('__m128i', 'sum2') l4 = AssignmentNode(sum2_var, Expression('_mm_hadd_epi32({sum1}, {sum1});', sum1=sum1_var), l3) temp_var = Allocation.allocate_var('int', 'temp_res', [4]) l5 = FuncCallNode(Expression('_mm_store_si128((__m128i*)&{res}, {sum2});', res=temp_var, sum2=sum2_var), l4) temp_var_idx_0 = IndexedVariable(temp_var) temp_var_idx_0.set_indices([Constant('0')]) temp_var_idx_1 = IndexedVariable(temp_var) temp_var_idx_1.set_indices([Constant('1')]) l6 = AddNode(res_var_idx, res_var_idx, temp_var_idx_0, l5) l7 = AddNode(res_var_idx, res_var_idx, temp_var_idx_1, l6) l[2].add_edge('content', l1) self.var_decls.append(lo_var) self.var_decls.append(hi_var) self.var_decls.append(sum1_var) self.var_decls.append(sum2_var) self.var_decls.append(temp_var)
def lowering(self): """ Create the Nodes required to express this node in ANSI C code. It actually creates loops to convert all floats to the desired data type applying the given scale. This loop will stay in graph to provide meta information. :return: None. """ loops, idxs = LoopNode.create_loops(self.in_var.dim) in_var_idx = IndexedVariable(self.in_var) out_var_idx = IndexedVariable(self.out_var) in_var_idx.set_indices(idxs) out_var_idx.set_indices(idxs) div_node = MultNode(out_var_idx, in_var_idx, Constant(1 / self.x_scale)) loops[-1].add_edge('content', div_node) self.add_edge('content', loops[0]) self.var_decls.append(self.out_var)
def lowering(self): """ Create the loops required to express this node in ANSI C code without SIMD. This loop will stay in graph to provide meta information. :return: None. """ loops, idxs = LoopNode.create_loops(self.in_var.dim) in_var_idx = IndexedVariable(self.in_var) out_var_idx = IndexedVariable(self.out_var) in_var_idx.set_indices(idxs) out_var_idx.set_indices(idxs) condition = Expression('{t_var_idx} < 0', t_var_idx=in_var_idx) if self.alpha == 0: false_exp = Constant(0) else: false_exp = Expression('{alpha} * {t_var_idx}', t_var_idx=in_var_idx) cond_node = ConditionalNode(out_var_idx, condition, false_exp, in_var_idx) loops[-1].add_edge('content', cond_node) CHeaderNode.instance().var_decls.append(self.out_var) # Meta information of this node not required yet, so delete this node and replace it with the loops. self.replace_self_with_path(loops[0], loops[0])
def lowering(self): """ Create the loops required to express this node in ANSI C code without SIMD and connect this node with the new nodes via 'content' edge. This loop will stay in graph to provide meta information. :return: None. """ # Create loops for settings the bias. b_var = Allocation.allocate_var('float', 'b', self.b.shape, init_data=self.b) out_var_idx = IndexedVariable(self.out_var) b_var_idx = IndexedVariable(b_var) # Create the loops using a descriptor. bias_loop_descr = [ [0, self.out_dim[0], 1], [0, self.out_dim[1], 1], [0, self.out_dim[2], 1] ] bias_loops = LoopNode.create_loops_by_description(bias_loop_descr) b_h_loop = bias_loops[0] b_w_loop = bias_loops[1] b_c_loop = bias_loops[2] set_bias = AssignmentNode(out_var_idx, b_var_idx) b_c_loop.add_edge('content', set_bias) out_var_idx.set_indices([b_h_loop.get_node('var'), b_w_loop.get_node('var'), b_c_loop.get_node('var')]) b_var_idx.set_indices([b_c_loop.get_node('var')]) # Create the loops for convolution, again with descriptors conv_loop_descr = [ [0, self.out_dim[0] * self.SH, self.stride[0]], [0, self.out_dim[1] * self.SW, self.stride[1]], [0, self.KH, 1], [0, self.KW, 1], [0, self.C_IN, 1], [0, self.C_OUT, 1] ] conv_loops = LoopNode.create_loops_by_description(conv_loop_descr) h_loop = conv_loops[0] w_loop = conv_loops[1] kh_loop = conv_loops[2] kw_loop = conv_loops[3] c_in_loop = conv_loops[4] c_out_loop = conv_loops[5] b_h_loop.add_edge('next', h_loop) w_var = Allocation.allocate_var('float', 'w', self.w.shape, init_data=self.w) out_var_idx = IndexedVariable(self.out_var) in_var_idx = IndexedVariable(self.in_var, False) w_var_idx = IndexedVariable(w_var, False) # Indices of IndexedVariables must respect the stride exp1 = Expression('{var} / {stride0}', var=h_loop.get_node('var'), stride0=Constant(self.stride[0])) exp2 = Expression('{var} / {stride1}', var=w_loop.get_node('var'), stride1=Constant(self.stride[1])) # And access to the image start at the upper left corner. But we have to add the current offset of the filter. exp3 = Expression('{var1} + {var2}', var1=h_loop.get_node('var'), var2=kh_loop.get_node('var')) exp4 = Expression('{var1} + {var2}', var1=w_loop.get_node('var'), var2=kw_loop.get_node('var')) out_var_idx.set_indices([exp1, exp2, c_out_loop.get_node('var')]) in_var_idx.set_indices([exp3, exp4, c_in_loop.get_node('var')]) w_var_idx.set_indices( [kh_loop.get_node('var'), kw_loop.get_node('var'), c_in_loop.get_node('var'), c_out_loop.get_node('var')]) mac_node = MACNode(out_var_idx, w_var_idx, in_var_idx) c_out_loop.add_edge('content', mac_node) # These variables must be declared (partially with initial data) at the beginning of the function CHeaderNode.instance().var_decls.append(self.out_var) CHeaderNode.instance().const_decls.append(w_var) CHeaderNode.instance().const_decls.append(b_var) # Don't remove this node, just put everything as content to this node. self.add_edge('content', b_h_loop)
def lowering(self): """ Create the loops required to express this node in ANSI C code without SIMD and replace this node. This loop will stay in graph to provide meta information. :return: None. """ h_loop = LoopNode(stop=self.h_loop_end, step=self.stride[0]) w_loop = LoopNode(stop=self.w_loop_end, step=self.stride[1]) h_loop.add_edge('content', w_loop) c_loop = LoopNode(self.in_dim[2]) w_loop.add_edge('content', c_loop) exp1 = Expression('{var} / {stride0}', var=h_loop.get_node('var'), stride0=Constant(self.stride[0])) exp2 = Expression('{var} / {stride1}', var=w_loop.get_node('var'), stride1=Constant(self.stride[1])) out_var_idx = IndexedVariable(self.out_var) in_var_idx = IndexedVariable(self.in_var, False) out_var_idx.set_indices([exp1, exp2, c_loop.get_node('var')]) in_var_idx.set_indices([h_loop.get_node('var'), w_loop.get_node('var'), c_loop.get_node('var')]) init = AssignmentNode(out_var_idx, in_var_idx) c_loop.add_edge('content', init) kh_loop = LoopNode(self.size[0]) init.add_edge('next', kh_loop) kw_loop = LoopNode(self.size[1]) kh_loop.add_edge('content', kw_loop) exp3 = Expression('{var1} + {var2}', var1=h_loop.get_node('var'), var2=kh_loop.get_node('var')) exp4 = Expression('{var1} + {var2}', var1=w_loop.get_node('var'), var2=kw_loop.get_node('var')) out_var_idx = IndexedVariable(self.out_var) in_var_idx = IndexedVariable(self.in_var, False) out_var_idx.set_indices([exp1, exp2, c_loop.get_node('var')]) in_var_idx.set_indices([exp3, exp4, c_loop.get_node('var')]) condition = Expression('{var_in} > {var_out}', var_in=in_var_idx, var_out=out_var_idx) n = ConditionalNode(out_var_idx, condition, in_var_idx, out_var_idx) kw_loop.add_edge('content', n) # Meta data not required yet so remove this node self.replace_self_with_path(h_loop, h_loop) CHeaderNode.instance().var_decls.append(self.out_var)
def lowering(self): """ Create the loops required to express this node in ANSI C code without SIMD and replace this node. This loop will stay in graph to provide meta information. :return: None. """ b_var = Allocation.allocate_var('float', 'b', self.b.shape, init_data=self.b) b_var_idx = IndexedVariable(b_var) # Make sure that e.g. Flatten has been applied before. In Keras it is not required but it makes # things easier. assert _len(self.in_dim) == 1 # Assign bias to output variable out_var_idx = IndexedVariable(self.out_var) b_loop = LoopNode(self.out_dim) out_var_idx.set_indices([b_loop.get_node('var')]) b_var_idx.set_indices([b_loop.get_node('var')]) set_bias = AssignmentNode(out_var_idx, b_var_idx) b_loop.add_edge('content', set_bias) # Loops for multiplication out_var_idx = IndexedVariable(self.out_var) in_loop = LoopNode(self.in_dim) out_loop = LoopNode(self.out_dim) out_var_idx.set_indices([out_loop.get_node('var')]) w_var = Allocation.allocate_var('float', 'w', self.w.shape, init_data=self.w) in_var_idx = IndexedVariable(self.in_var, False) w_var_idx = IndexedVariable(w_var, False) in_var_idx.set_indices([in_loop.get_node('var')]) w_var_idx.set_indices([in_loop.get_node('var'), out_loop.get_node('var')]) mac_node = MACNode(out_var_idx, in_var_idx, w_var_idx) b_loop.add_edge('next', in_loop) in_loop.add_edge('content', out_loop) out_loop.add_edge('content', mac_node) CHeaderNode.instance().var_decls.append(self.out_var) CHeaderNode.instance().const_decls.append(w_var) CHeaderNode.instance().const_decls.append(b_var) # Meta data not required yet so remove this node self.replace_self_with_path(b_loop, in_loop)