def generate_c_code(self, **kwargs): res = '' # include header res += '\n'.join([ c_helper.generate_local_include(h) for h in self.get_c_op_include_header() ]) res += '\n\n' # param type res += self.get_c_param_type() res += '\n\n' # 1 TemplateArrayAddLoop = c_helper.generate_ndim_for_loop( np.ones(self.output_tensor_shapes[0])) # 2 lcoal_var_name = '_output_val' indent = ' ' * 4 c_array = ', '.join([str(v) for v in self.output_tensor_values[0]]) TemplateConstValue = indent + 'int ' + lcoal_var_name + str( self.input_tensor_ndims) + '={' + c_array + '};' mapping = {} TemplateStatements = 'output[i] =' + lcoal_var_name + '[i];' mapping.update({'X': lcoal_var_name}) mapping.update({'Y': self.output_tensor_names[0]}) TemplateFunction = cleandoc(''' void {op_func_name}(void *op_param,{tx} input{XDims} , {ty} output{YDims}, void *inputs_params, void* outputs_params) {{ {statements1} {statements2} }} ''') mappingf = {} mappingf.update({'op_func_name': self.get_func_name()}) mappingf.update({ 'XDims': c_helper.generate_dim_bracket(self.input_tensor_shapes[0]) }) mappingf.update({ 'YDims': c_helper.generate_dim_bracket(self.output_tensor_shapes[0]) }) mappingf.update({'tx': data_type.np2c(self.input_tensor_dtypes[0])}) mappingf.update({'ty': data_type.np2c(self.output_tensor_dtypes[0])}) mappingf.update({'statements1': TemplateConstValue}) mappingf.update({ 'statements2': TemplateArrayAddLoop.replace('[statements]', TemplateStatements.format(**mapping)) }) res += '\n\n' res += TemplateFunction.format(**mappingf) return res
def generate_c_code(self, **kwargs): res = '' res += '\n'.join([ c_helper.generate_local_include(h) for h in self.get_c_op_include_header() ]) res += '\n\n' # param type res += self.get_c_param_type() res += '\n\n' # 1 TemplateArrayDropoutLoop = c_helper.generate_ndim_for_loop( np.ones(self.output_tensor_shapes[0])) TemplateStatements = ''' output{dims} = ((high - low) * rand()/RAND_MAX ) - low; ''' mapping = {} mapping.update({ 'dims': ''.join([ '[' + v + ']' for v in string.ascii_lowercase[8:8 + self.output_tensor_ndims[0]] ]) }) # 3 TemplateFunction = cleandoc(''' void {op_func_name}(void *op_param, {t_in} data{dims}, {t_out} output{dims}, void *inputs_params, void* outputs_params) {{ const float high = {high}; const float low = {low}; {statements} }} ''') mappingf = {} mappingf.update({'op_func_name': self.get_func_name()}) mappingf.update({ 'dims': c_helper.generate_dim_bracket(self.output_tensor_shapes[0]) }) mappingf.update({'t_in': data_type.np2c(self.input_tensor_dtypes[0])}) mappingf.update( {'t_out': data_type.np2c(self.output_tensor_dtypes[0])}) mappingf.update({'high': self.attrs.get('high', 1.0)}) mappingf.update({'low': self.attrs.get('low', 0.0)}) mappingf.update({ 'statements': TemplateArrayDropoutLoop.replace( '[statements]', TemplateStatements.format(**mapping)) }) res += '\n\n' res += TemplateFunction.format(**mappingf) return res
def generate_c_code(self, **kwargs): res ='' # include header res += '\n'.join([c_helper.generate_local_include(h) for h in self.get_c_op_include_header()]) res +='\n\n' # param type res += self.get_c_param_type() res +='\n\n' # 1 TemplateArrayAddLoop = c_helper.generate_ndim_for_loop(np.ones(self.output_tensor_shapes[0])) # 2 TemplateStatements = '{Y}{StatementDims} = {c_abs}({X}{StatementDims});' mapping = {} mapping.update({'X': self.input_tensor_names[0]}) mapping.update({'Y': self.output_tensor_names[0]}) StatementDims = '' for _, step in zip_longest(self.input_tensor[0].shape[::-1], reversed(string.ascii_lowercase[8:8 + self.output_tensor_ndims[0]])): StatementDims = '[{0}]'.format(step) + StatementDims mapping.update({'StatementDims': StatementDims}) out_c_type = data_type.np2c(self.output_tensor_dtypes[0]) if out_c_type.startswith('double'): mapping.update({'c_abs': 'fabs'}) elif out_c_type.startswith('float'): mapping.update({'c_abs': 'fabsf'}) elif out_c_type.startswith('int'): mapping.update({'c_abs': 'fabsf'}) else: raise ValueError('{0} is not supported'.format(out_c_type)) TemplateFunction = cleandoc(''' void {op_func_name}(void *op_param,{t} {X}{Dims} , {t} {Y}{Dims}, void *inputs_params, void* outputs_params) {{ {statements} }} ''') mappingf = {} mappingf.update({'op_func_name': self.get_func_name()}) mappingf.update({'X': self.input_tensor_names[0]}) mappingf.update({'Y': self.output_tensor_names[0]}) mappingf.update({'Dims':c_helper.generate_dim_bracket(self.input_tensor_shapes[0])}) mappingf.update({'Dims': c_helper.generate_dim_bracket(self.output_tensor_shapes[0])}) mappingf.update({'t': data_type.np2c(self.output_tensor_dtypes[0])}) mappingf.update({'statements': TemplateArrayAddLoop.replace('[statements]', TemplateStatements.format(**mapping))}) res += '\n\n' res += TemplateFunction.format(**mappingf) return res
def generate_c_code(self, **kwargs): res = '' res += '\n'.join([ c_helper.generate_local_include(h) for h in self.get_c_op_include_header() ]) res += '\n\n' # param type res += self.get_c_param_type() res += '\n\n' # 1 TemplateArrayExpLoop = c_helper.generate_ndim_for_loop( np.ones(self.output_tensor_shapes[0]), pragma=self.OpenMP) if self.OpenMP: TemplateArrayExpLoop = TemplateArrayExpLoop.replace( '[pragma]', self.PRAGMA_OMP) # 2 TemplateStatements = ''' Y{dims} = exp(X{dims}); ''' mapping = {} mapping.update({ 'dims': ''.join([ '[' + v + ']' for v in string.ascii_lowercase[8:8 + self.output_tensor_ndims[0]] ]) }) # 3 TemplateFunction = cleandoc(''' void {op_func_name}(void *op_param, {t} X{dims}, {t} Y{dims}, void *inputs_params, void* outputs_params) {{ {statements} }} ''') mappingf = {} mappingf.update({'op_func_name': self.get_func_name()}) mappingf.update({ 'dims': c_helper.generate_dim_bracket(self.output_tensor_shapes[0]) }) mappingf.update({'t': data_type.np2c(self.output_tensor_dtypes[0])}) mappingf.update({ 'statements': TemplateArrayExpLoop.replace('[statements]', TemplateStatements.format(**mapping)) }) res += '\n\n' res += TemplateFunction.format(**mappingf) return res
def generate_c_code(self, **kwargs): res = '' # include header res += '\n'.join([ c_helper.generate_local_include(h) for h in self.get_c_op_include_header() ]) res += '\n\n' # param type res += self.get_c_param_type() res += '\n\n' # 1 TemplateArrayTransposeLoop = c_helper.generate_ndim_for_loop( np.ones(self.output_tensor_shapes[0]), pragma=self.OpenMP) if self.OpenMP: TemplateArrayTransposeLoop = TemplateArrayTransposeLoop.replace( '[pragma]', self.PRAGMA_OMP) TemplateFunction = cleandoc(''' void {op_func_name}(void *op_param, {t} {X}{XDims}, {t} {C}{CDims}, void *inputs_params, void* outputs_params) {{ {statements} }} ''') mappingf = {} mappingf.update({'op_func_name': self.get_func_name()}) mappingf.update({'X': self.input_tensor_names[0]}) mappingf.update({'C': self.output_tensor_names[0]}) mappingf.update({ 'XDims': c_helper.generate_dim_bracket(self.input_tensor_shapes[0]) }) mappingf.update({ 'CDims': c_helper.generate_dim_bracket(self.output_tensor_shapes[0]) }) mappingf.update({'t': data_type.np2c(self.output_tensor_dtypes[0])}) mappingf.update({ 'statements': TemplateArrayTransposeLoop.replace('[statements]', self.generate_kernel_code()) }) res += '\n\n' res += TemplateFunction.format(**mappingf) return res
def generate_c_code(self, **kwargs): res = '' res += '\n'.join([ c_helper.generate_local_include(h) for h in self.get_c_op_include_header() ]) res += '\n\n' # param type res += self.get_c_param_type() res += '\n\n' # 1 TemplateArrayLeakyReluLoop = c_helper.generate_ndim_for_loop( np.ones(self.output_tensor_shapes[0]), pragma=self.OpenMP) if self.OpenMP: TemplateArrayLeakyReluLoop = TemplateArrayLeakyReluLoop.replace( '[pragma]', self.PRAGMA_OMP) TemplateFunction = cleandoc(''' void {op_func_name}(void *op_param, {t} {X}{dims}, {t} {Y}{dims}, void *inputs_params, void* outputs_params) {{ LeakyReluOpParam *param_ptr = (LeakyReluOpParam *)op_param; const {t} alpha = {alpha}; {statements} }} ''') mappingf = {} mappingf.update({'op_func_name': self.get_func_name()}) mappingf.update({ 'dims': c_helper.generate_dim_bracket(self.output_tensor_shapes[0]) }) mappingf.update({'t': data_type.np2c(self.output_tensor_dtypes[0])}) mappingf.update({'alpha': self.attrs['alpha']}) mappingf.update({'X': self.input_tensor_names[0]}) mappingf.update({'Y': self.output_tensor_names[0]}) mappingf.update({ 'statements': TemplateArrayLeakyReluLoop.replace( '[statements]', self.generate_kernel_code(alpha_variable_value=False)) }) res += '\n\n' res += TemplateFunction.format(**mappingf) return res
def generate_c_code(self, **kwargs): res = '' # include header res += '\n'.join([ c_helper.generate_local_include(h) for h in self.get_c_op_include_header() ]) res += '\n\n' # param type res += self.get_c_param_type() res += '\n\n' TemplateFunction = cleandoc(''' void {op_func_name}(void *op_param,{t} input{XDims}, {t} output{CDims}, void *inputs_params, void* outputs_params) {{ {init_statements}\n {main_statements} }} ''') mappingf = {} mappingf.update({'op_func_name': self.get_func_name()}) mappingf.update({ 'XDims': c_helper.generate_dim_bracket(self.input_tensor_shapes[0]) }) mappingf.update({ 'CDims': c_helper.generate_dim_bracket(self.output_tensor_shapes[0]) }) mappingf.update({'t': data_type.np2c(self.output_tensor_dtypes[0])}) mappingf.update({'main_statements': self.generate_c_code_reduce()}) mappingf.update( {'init_statements': self.generate_c_code_init_output()}) res += '\n\n' res += TemplateFunction.format(**mappingf) return res
def generate_c_code(self, **kwargs): res = '' res += '\n'.join([ c_helper.generate_local_include(h) for h in self.get_c_op_include_header() ]) res += '\n\n' # param type res += self.get_c_param_type() res += '\n\n' ndim = self.output_tensor_ndims[0] if (ndim != 3 and ndim != 4 and ndim != 5): raise ValueError() kernel_shape = self.attrs['kernel_shape'] pads = self.attrs['pads'] storage_order = self.attrs['storage_order'] strides = self.attrs['strides'] dilations = self.attrs['dilations'] if (ndim == 3): TemplateStatements = ''' const int X_n = {X_d0}; const int X_c = {X_d1}; const int X_w = {X_d2}; const int Y_n = {Y_d0}; const int Y_c = {Y_d1}; const int Y_w = {Y_d2}; const int kernel_shape_w = {kernel_shape_w}; const int pad_w_begin = {pad_w_begin}; const int pad_w_end = {pad_w_end}; const int stride_w = {stride_w}; const int dilation_w = {dilation_w}; const int storage_order = {storage_order}; const int kernel_shape_w_min = -pad_w_begin; const int kernel_shape_w_max = (kernel_shape_w - pad_w_begin); memset( (void *)Y, 0, sizeof(Y[0][0][0]) * Y_n * Y_c * Y_w ); for (int n=0; n<Y_n; n++) {{ {pragma} for (int c=0; c<Y_c; c++) {{ for (int w=0; w<Y_w; w++) {{ {t} pool; int max_flag; pool = {MIN_VAL}; max_flag = 0; for (int kw=kernel_shape_w_min; kw<kernel_shape_w_max; kw++) {{ if ((w*stride_w+kw*dilation_w < 0) || (w*stride_w+kw*dilation_w >= X_w)) {{ continue; }} if (pool < X[n][c][w*stride_w+kw*dilation_w]) {{ pool = X[n][c][w*stride_w+kw*dilation_w]; max_flag = 1; }} }} if (max_flag) {{ Y[n][c][w] = pool; }} }} }} }} ''' mapping = {} mapping.update({'X_d0': self.input_tensor_shapes[0][0]}) mapping.update({'X_d1': self.input_tensor_shapes[0][1]}) mapping.update({'X_d2': self.input_tensor_shapes[0][2]}) mapping.update({'Y_d0': self.output_tensor_shapes[0][0]}) mapping.update({'Y_d1': self.output_tensor_shapes[0][1]}) mapping.update({'Y_d2': self.output_tensor_shapes[0][2]}) mapping.update({'kernel_shape_w': kernel_shape[0]}) mapping.update({'pad_w_begin': pads[0]}) mapping.update({'pad_w_end': pads[1]}) mapping.update({'stride_w': strides[0]}) mapping.update({'dilation_w': dilations[0]}) mapping.update({'storage_order': storage_order}) mapping.update({'pragma': self.PRAGMA_OMP if self.OpenMP else ''}) mapping.update({'MIN_VAL': get_min(self.input_tensor[0].dtype)}) mapping.update({'t': data_type.np2c(self.output_tensor_dtypes[0])}) elif (ndim == 4): TemplateStatements = ''' const int X_n = {X_d0}; const int X_c = {X_d1}; const int X_h = {X_d2}; const int X_w = {X_d3}; const int Y_n = {Y_d0}; const int Y_c = {Y_d1}; const int Y_h = {Y_d2}; const int Y_w = {Y_d3}; const int kernel_shape_h = {kernel_shape_h}; const int kernel_shape_w = {kernel_shape_w}; const int pad_h_begin = {pad_h_begin}; const int pad_w_begin = {pad_w_begin}; const int pad_h_end = {pad_h_end}; const int pad_w_end = {pad_w_end}; const int stride_h = {stride_h}; const int stride_w = {stride_w}; const int dilation_h = {dilation_h}; const int dilation_w = {dilation_w}; const int storage_order = {storage_order}; const int kernel_shape_h_min = -pad_h_begin; const int kernel_shape_h_max = (kernel_shape_h - pad_h_begin); const int kernel_shape_w_min = -pad_w_begin; const int kernel_shape_w_max = (kernel_shape_w - pad_w_begin); memset( (void *)Y, 0, sizeof(Y[0][0][0][0]) * Y_n * Y_c * Y_h * Y_w ); ''' if ((pads[0] == 0) and (pads[1] == 0) and (pads[2] == 0) and (pads[3] == 0) and (dilations[0] == 1) and (dilations[1] == 1) and (kernel_shape[0] == 2) and (kernel_shape[1] == 2) and (self.input_tensor_shapes[0][2] % strides[0] == 0) and (self.input_tensor_shapes[0][3] % strides[1] == 0)): TemplateStatements += ''' for (int n=0; n<Y_n; n++) {{ {pragma} for (int c=0; c<Y_c; c++) {{ if (storage_order == 0) {{ for (int h=0; h<Y_h; h++) {{ for (int w=0; w<Y_w; w++) {{ Y[n][c][h][w] = fmaxf( fmaxf( X[n][c][h*stride_h+0][w*stride_w+0], X[n][c][h*stride_h+0][w*stride_w+1] ), fmaxf( X[n][c][h*stride_h+1][w*stride_w+0], X[n][c][h*stride_h+1][w*stride_w+1] )); }} }} }} else {{ for (int w=0; w<Y_w; w++) {{ for (int h=0; h<Y_h; h++) {{ Y[n][c][h][w] = fmaxf( fmaxf( X[n][c][h*stride_h+0][w*stride_w+0], X[n][c][h*stride_h+0][w*stride_w+1] ), fmaxf( X[n][c][h*stride_h+1][w*stride_w+0], X[n][c][h*stride_h+1][w*stride_w+1] )); }} }} }} }} }} ''' else: TemplateStatements += ''' for (int n=0; n<Y_n; n++) {{ {pragma} for (int c=0; c<Y_c; c++) {{ if (storage_order == 0) {{ for (int h=0; h<Y_h; h++) {{ for (int w=0; w<Y_w; w++) {{ {t} pool; int max_flag; pool = {MIN_VAL}; max_flag = 0; for (int kh=kernel_shape_h_min; kh<kernel_shape_h_max; kh++) {{ if ((h*stride_h+kh*dilation_h < 0) || (h*stride_h+kh*dilation_h >= X_h)) {{ continue; }} for (int kw=kernel_shape_w_min; kw<kernel_shape_w_max; kw++) {{ if ((w*stride_w+kw*dilation_w < 0) || (w*stride_w+kw*dilation_w >= X_w)) {{ continue; }} if (pool < X[n][c][h*stride_h+kh*dilation_h][w*stride_w+kw*dilation_w]) {{ pool = X[n][c][h*stride_h+kh*dilation_h][w*stride_w+kw*dilation_w]; max_flag = 1; }} }} }} if (max_flag) {{ Y[n][c][h][w] = pool; }} }} }} }} else {{ for (int w=0; w<Y_w; w++) {{ for (int h=0; h<Y_h; h++) {{ {t} pool; int max_flag; pool = {MIN_VAL}; max_flag = 0; for (int kh=kernel_shape_h_min; kh<kernel_shape_h_max; kh++) {{ if ((h*stride_h+kh*dilation_h < 0) || (h*stride_h+kh*dilation_h >= X_h)) {{ continue; }} for (int kw=kernel_shape_w_min; kw<kernel_shape_w_max; kw++) {{ if ((w*stride_w+kw*dilation_w < 0) || (w*stride_w+kw*dilation_w >= X_w)) {{ continue; }} if (pool < X[n][c][h*stride_h+kh*dilation_h][w*stride_w+kw*dilation_w]) {{ pool = X[n][c][h*stride_h+kh*dilation_h][w*stride_w+kw*dilation_w]; max_flag = 1; }} }} }} if (max_flag) {{ Y[n][c][h][w] = pool; }} }} }} }} }} }} ''' mapping = {} mapping.update({'X_d0': self.input_tensor_shapes[0][0]}) mapping.update({'X_d1': self.input_tensor_shapes[0][1]}) mapping.update({'X_d2': self.input_tensor_shapes[0][2]}) mapping.update({'X_d3': self.input_tensor_shapes[0][3]}) mapping.update({'Y_d0': self.output_tensor_shapes[0][0]}) mapping.update({'Y_d1': self.output_tensor_shapes[0][1]}) mapping.update({'Y_d2': self.output_tensor_shapes[0][2]}) mapping.update({'Y_d3': self.output_tensor_shapes[0][3]}) mapping.update({'kernel_shape_h': kernel_shape[0]}) mapping.update({'kernel_shape_w': kernel_shape[1]}) mapping.update({'pad_h_begin': pads[0]}) mapping.update({'pad_h_end': pads[2]}) mapping.update({'pad_w_begin': pads[1]}) mapping.update({'pad_w_end': pads[3]}) mapping.update({'stride_h': strides[0]}) mapping.update({'stride_w': strides[1]}) mapping.update({'dilation_h': dilations[0]}) mapping.update({'dilation_w': dilations[1]}) mapping.update({'storage_order': storage_order}) mapping.update({'pragma': self.PRAGMA_OMP if self.OpenMP else ''}) mapping.update({'MIN_VAL': get_min(self.input_tensor[0].dtype)}) mapping.update({'t': data_type.np2c(self.output_tensor_dtypes[0])}) elif (ndim == 5): TemplateStatements = ''' const int X_n = {X_d0}; const int X_c = {X_d1}; const int X_d = {X_d2}; const int X_h = {X_d3}; const int X_w = {X_d4}; const int Y_n = {Y_d0}; const int Y_c = {Y_d1}; const int Y_d = {Y_d2}; const int Y_h = {Y_d3}; const int Y_w = {Y_d4}; const int kernel_shape_d = {kernel_shape_d}; const int kernel_shape_h = {kernel_shape_h}; const int kernel_shape_w = {kernel_shape_w}; const int pad_d_begin = {pad_d_begin}; const int pad_h_begin = {pad_h_begin}; const int pad_w_begin = {pad_w_begin}; const int pad_d_end = {pad_d_end}; const int pad_h_end = {pad_h_end}; const int pad_w_end = {pad_w_end}; const int stride_d = {stride_d}; const int stride_h = {stride_h}; const int stride_w = {stride_w}; const int dilation_d = {dilation_d}; const int dilation_h = {dilation_h}; const int dilation_w = {dilation_w}; const int storage_order = {storage_order}; const int kernel_shape_d_min = -pad_d_begin; const int kernel_shape_d_max = (kernel_shape_d - pad_d_begin); const int kernel_shape_h_min = -pad_h_begin; const int kernel_shape_h_max = (kernel_shape_h - pad_h_begin); const int kernel_shape_w_min = -pad_w_begin; const int kernel_shape_w_max = (kernel_shape_w - pad_w_begin); memset( (void *)Y, 0, sizeof(Y[0][0][0][0][0]) * Y_n * Y_c * Y_d * Y_h * Y_w ); for (int n=0; n<Y_n; n++) {{ {pragma} for (int c=0; c<Y_c; c++) {{ for (int d=0; d<Y_d; d++) {{ for (int h=0; h<Y_h; h++) {{ for (int w=0; w<Y_w; w++) {{ {t} pool; int max_flag; pool = {MIN_VAL}; max_flag = 0; for (int kd=kernel_shape_d_min; kd<kernel_shape_d_max; kd++) {{ if ((d*stride_d+kd*dilation_d < 0) || (d*stride_d+kd*dilation_d >= X_d)) {{ continue; }} for (int kh=kernel_shape_h_min; kh<kernel_shape_h_max; kh++) {{ if ((h*stride_h+kh*dilation_h < 0) || (h*stride_h+kh*dilation_h >= X_h)) {{ continue; }} for (int kw=kernel_shape_w_min; kw<kernel_shape_w_max; kw++) {{ if ((w*stride_w+kw*dilation_w < 0) || (w*stride_w+kw*dilation_w >= X_w)) {{ continue; }} if (pool < X[n][c][d*stride_d+kd*dilation_d][h*stride_h+kh*dilation_h][w*stride_w+kw*dilation_w]) {{ pool = X[n][c][d*stride_d+kd*dilation_d][h*stride_h+kh*dilation_h][w*stride_w+kw*dilation_w]; max_flag = 1; }} }} }} }} if (max_flag) {{ Y[n][c][d][h][w] = pool; }} }} }} }} }} }} ''' mapping = {} mapping.update({'X_d0': self.input_tensor_shapes[0][0]}) mapping.update({'X_d1': self.input_tensor_shapes[0][1]}) mapping.update({'X_d2': self.input_tensor_shapes[0][2]}) mapping.update({'X_d3': self.input_tensor_shapes[0][3]}) mapping.update({'X_d4': self.input_tensor_shapes[0][4]}) mapping.update({'Y_d0': self.output_tensor_shapes[0][0]}) mapping.update({'Y_d1': self.output_tensor_shapes[0][1]}) mapping.update({'Y_d2': self.output_tensor_shapes[0][2]}) mapping.update({'Y_d3': self.output_tensor_shapes[0][3]}) mapping.update({'Y_d4': self.output_tensor_shapes[0][4]}) mapping.update({'kernel_shape_d': kernel_shape[0]}) mapping.update({'kernel_shape_h': kernel_shape[1]}) mapping.update({'kernel_shape_w': kernel_shape[2]}) mapping.update({'pad_d_begin': pads[0]}) mapping.update({'pad_d_end': pads[3]}) mapping.update({'pad_h_begin': pads[1]}) mapping.update({'pad_h_end': pads[4]}) mapping.update({'pad_w_begin': pads[2]}) mapping.update({'pad_w_end': pads[5]}) mapping.update({'stride_d': strides[0]}) mapping.update({'stride_h': strides[1]}) mapping.update({'stride_w': strides[2]}) mapping.update({'dilation_d': dilations[0]}) mapping.update({'dilation_h': dilations[1]}) mapping.update({'dilation_w': dilations[2]}) mapping.update({'storage_order': storage_order}) mapping.update({'pragma': self.PRAGMA_OMP if self.OpenMP else ''}) mapping.update({'MIN_VAL': get_min(self.input_tensor[0].dtype)}) mapping.update({'t': data_type.np2c(self.output_tensor_dtypes[0])}) # 3 TemplateFunction = cleandoc(''' void {op_func_name}(void *op_param, {t} X{dims_X}, {t} Y{dims}, void *inputs_params, void* outputs_params) {{ {statements} }} ''') mappingf = {} mappingf.update({'op_func_name': self.get_func_name()}) mappingf.update({'X': self.input_tensor_names[0]}) mappingf.update({ 'dims_X': c_helper.generate_dim_bracket(self.input_tensor_shapes[0]) }) mappingf.update({'Y': self.output_tensor_names[0]}) mappingf.update({ 'dims': c_helper.generate_dim_bracket(self.output_tensor_shapes[0]) }) mappingf.update({'t': data_type.np2c(self.output_tensor_dtypes[0])}) mappingf.update({'statements': TemplateStatements.format(**mapping)}) res += '\n\n' res += TemplateFunction.format(**mappingf) return res
def generate_c_code(self, **kwargs): res = '' # include header res += '\n'.join([ c_helper.generate_local_include(h) for h in self.get_c_op_include_header() ]) res += '\n\n' # param type res += self.get_c_param_type() res += '\n\n' res += self.get_op_variale_def() res += '\n\n' # 1 TemplateArrayAddLoop = c_helper.generate_ndim_for_loop( np.empty(self.output_tensor_shapes[0]), pragma=self.OpenMP) if self.OpenMP: TemplateArrayAddLoop = TemplateArrayAddLoop.replace( '[pragma]', self.PRAGMA_OMP) # 2 mapping = {} # TemplateStatements = 'temp_arr{CStatementDims} = ROUND({X}{XStatementDims} / {Y}{YStatementDims}) + {Z}{ZStatementDims};\n' # # if data_type.np2c(self.output_tensor_dtypes[0]) == 'uint8_t': # TemplateStatements += ' {C}{CStatementDims} = CLAMP(temp_arr{CStatementDims}, 0, 255);\n' # else: # TemplateStatements += ' {C}{CStatementDims} = CLAMP(temp_arr{CStatementDims}, -127, 128);\n' if data_type.np2c(self.output_tensor_dtypes[0]) == 'uint8_t': TemplateStatements = '{C}{CStatementDims} = CLAMP(ROUND({X}{XStatementDims} / {Y}{YStatementDims}) + {Z}{ZStatementDims}, 0, 255);\n' else: TemplateStatements = '{C}{CStatementDims} = CLAMP(ROUND({X}{XStatementDims} / {Y}{YStatementDims}) + {Z}{ZStatementDims}, -127, 128);\n' mapping.update({'X': self.input_tensor_names[0]}) mapping.update({'Y': self.input_tensor_names[1]}) mapping.update({'Z': self.input_tensor_names[2]}) mapping.update({'C': self.output_tensor_names[0]}) XStatementDims = '' YStatementDims = '' ZStatementDims = '' CStatementDims = '' X, Y, Z = self.input_tensor_values for element_num_x, element_num_y, element_num_z, step in zip_longest( X.shape[::-1], Y.shape[::-1], Z.shape[::-1], reversed(string.ascii_lowercase[8:8 + self.output_tensor_ndims[0]])): if element_num_x is not None: if element_num_x == 1: XStatementDims = '[0]' + XStatementDims else: XStatementDims = '[{0}]'.format(step) + XStatementDims if element_num_y is not None: if element_num_y == 1: YStatementDims = '[0]' + YStatementDims else: YStatementDims = '[{0}]'.format(step) + YStatementDims if element_num_z is not None: if element_num_z == 1: ZStatementDims = '[0]' + ZStatementDims else: ZStatementDims = '[{0}]'.format(step) + ZStatementDims CStatementDims = '[{0}]'.format(step) + CStatementDims mapping.update({'XStatementDims': XStatementDims}) mapping.update({'YStatementDims': YStatementDims}) mapping.update({'ZStatementDims': ZStatementDims}) mapping.update({'CStatementDims': CStatementDims}) TemplateFunction = cleandoc(''' void {op_func_name}(void *op_param,{x_type} {X}{XDims} , {y_type} {Y}{YDims}, {z_type} {Z}{ZDims}, {c_type} {C}{CDims}, void *inputs_params, void* outputs_params) {{ {statements} }} ''') mappingf = {} mappingf.update({'op_func_name': self.get_func_name()}) mappingf.update({'X': self.input_tensor_names[0]}) mappingf.update({'Y': self.input_tensor_names[1]}) mappingf.update({'Z': self.input_tensor_names[2]}) mappingf.update({'C': self.output_tensor_names[0]}) mappingf.update({ 'XDims': c_helper.generate_dim_bracket(self.input_tensor_shapes[0]) }) mappingf.update({ 'YDims': c_helper.generate_dim_bracket(self.input_tensor_shapes[1]) }) mappingf.update({ 'ZDims': c_helper.generate_dim_bracket(self.input_tensor_shapes[2]) }) mappingf.update({ 'CDims': c_helper.generate_dim_bracket(self.output_tensor_shapes[0]) }) mappingf.update( {'x_type': data_type.np2c(self.node.input_tensor_values[0].dtype)}) mappingf.update( {'y_type': data_type.np2c(self.node.input_tensor_values[1].dtype)}) mappingf.update( {'z_type': data_type.np2c(self.node.input_tensor_values[2].dtype)}) mappingf.update( {'c_type': data_type.np2c(self.output_tensor_dtypes[0])}) mappingf.update({ 'statements': TemplateArrayAddLoop.replace('[statements]', TemplateStatements.format(**mapping)) }) res += '\n\n' res += TemplateFunction.format(**mappingf) return res
def generate_c_code(self, **kwargs): res = '' # include header res += '\n'.join([ c_helper.generate_local_include(h) for h in self.get_c_op_include_header() ]) res += '\n\n' # param type res += self.get_c_param_type() res += '\n\n' # 1 TemplateArrayAddLoop = c_helper.generate_ndim_for_loop( np.ones(self.output_tensor_shapes[0])) # 2 mapping = {} TemplateStatements = '{C}{CStatementDims} = {X}{XStatementDims} ^ {Y}{YStatementDims};' mapping.update({'X': self.input_tensor_names[0]}) mapping.update({'Y': self.input_tensor_names[1]}) mapping.update({'C': self.output_tensor_names[0]}) XStatementDims = '' YStatementDims = '' CStatementDims = '' X, Y = self.input_tensor_values for element_num_x, element_num_y, step in zip_longest( X.shape[::-1], Y.shape[::-1], reversed(string.ascii_lowercase[8:8 + self.output_tensor_ndims[0]])): if element_num_x is not None: if element_num_x == 1: XStatementDims = '[0]' + XStatementDims else: XStatementDims = '[{0}]'.format(step) + XStatementDims if element_num_y is not None: if element_num_y == 1: YStatementDims = '[0]' + YStatementDims else: YStatementDims = '[{0}]'.format(step) + YStatementDims CStatementDims = '[{0}]'.format(step) + CStatementDims mapping.update({'XStatementDims': XStatementDims}) mapping.update({'YStatementDims': YStatementDims}) mapping.update({'CStatementDims': CStatementDims}) TemplateFunction = cleandoc(''' void {op_func_name}(void *op_param,{t} {X}{XDims} , {t} {Y}{YDims}, {t} {C}{CDims}, void *inputs_params, void* outputs_params) {{ {statements} }} ''') mappingf = {} mappingf.update({'op_func_name': self.get_func_name()}) mappingf.update({'X': self.input_tensor_names[0]}) mappingf.update({'Y': self.input_tensor_names[1]}) mappingf.update({'C': self.output_tensor_names[0]}) mappingf.update({ 'XDims': c_helper.generate_dim_bracket(self.input_tensor_shapes[0]) }) mappingf.update({ 'YDims': c_helper.generate_dim_bracket(self.input_tensor_shapes[1]) }) mappingf.update({ 'CDims': c_helper.generate_dim_bracket(self.output_tensor_shapes[0]) }) mappingf.update({'t': data_type.np2c(self.output_tensor_dtypes[0])}) mappingf.update({ 'statements': TemplateArrayAddLoop.replace('[statements]', TemplateStatements.format(**mapping)) }) res += '\n\n' res += TemplateFunction.format(**mappingf) return res
def generate_c_code(self, **kwargs): res = '' # include header res += '\n'.join([ c_helper.generate_local_include(h) for h in self.get_c_op_include_header() ]) res += '\n\n' # param type res += self.get_c_param_type() res += '\n\n' ndim = len(self.c_starts) indent = [' ' * 4] * ndim alpha_iter = reversed(string.ascii_lowercase[8:8 + ndim]) TemplateArrayLoop = ''.join(indent) + '[statements]' input_dim = '' output_dim = '' for s, e in zip(self.c_starts[::-1], self.c_ends[::-1]): var = next(alpha_iter) params = {} params.update({'var': var}) params.update({'start': str(0)}) params.update({'end': str(e)}) loop_start = ''.join( indent ) + 'for(int {var}={start};{var}<{end};{var}++ ){{'.format( **params) loop_end = ''.join(indent) + '}' TemplateArrayLoop = loop_start + '\n' + TemplateArrayLoop + '\n' + loop_end input_dim = '[' + str(var) + ('' if s == 0 else '+' + str(s)) + ']' + input_dim output_dim = '[' + str(var) + ']' + output_dim indent.pop() statements = TemplateArrayLoop.replace( '[statements]', ''.join(indent) + 'output' + output_dim + '=' + 'input' + input_dim + ';') TemplateFunction = cleandoc(''' void {op_func_name}(void *op_param,{t} input{XDims}, {t} output{CDims}, void *inputs_params, void* outputs_params) {{ {statements} }} ''') mappingf = {} mappingf.update({'op_func_name': self.get_func_name()}) mappingf.update({ 'XDims': c_helper.generate_dim_bracket(self.input_tensor_shapes[0]) }) mappingf.update({ 'CDims': c_helper.generate_dim_bracket(self.output_tensor_shapes[0]) }) mappingf.update({'t': data_type.np2c(self.output_tensor_dtypes[0])}) mappingf.update({'statements': statements}) res += '\n\n' res += TemplateFunction.format(**mappingf) return res
def generate_c_code(self, **kwargs): res = '' res += '\n'.join([ c_helper.generate_local_include(h) for h in self.get_c_op_include_header() ]) res += '\n\n' # param type res += self.get_c_param_type() res += '\n\n' # 1 # 2 TemplateStatements = ''' {t} *_data = ({t} *)data; {t} *_squeezed = ({t} *)squeezed; int data_elements = {data_elements}; int shape_elements = {shape_elements}; int i; if (data_elements >= shape_elements) {{ for (i=0; i<shape_elements; i++) {{ *(_squeezed +i) = *(_data +i); }} }} else {{ for (i=0; i<data_elements; i++) {{ *(_squeezed +i) = *(_data +i); }} for (; i<shape_elements; i++) {{ *(_squeezed +i) = ({t})0.0; }} }} ''' mapping = {} mapping.update({'t': data_type.np2c(self.output_tensor_dtypes[0])}) mapping.update({'data_elements': self.input_tensor[0].size}) mapping.update({'shape_elements': self.output_tensor[0].size}) # 3 TemplateFunction = cleandoc(''' void {op_func_name}(void *op_param, {t} data{dims_data}, {t} squeezed{dims_squeezed}, void *inputs_params, void* outputs_params) {{ {statements} }} ''') mappingf = {} mappingf.update({'op_func_name': self.get_func_name()}) mappingf.update({ 'dims_data': c_helper.generate_dim_bracket(self.input_tensor_shapes[0]) }) mappingf.update({ 'dims_squeezed': c_helper.generate_dim_bracket(self.output_tensor_shapes[0]) }) mappingf.update({'t': data_type.np2c(self.output_tensor_dtypes[0])}) mappingf.update({'statements': TemplateStatements.format(**mapping)}) res += '\n\n' res += TemplateFunction.format(**mappingf) return res
def generate_c_code(self, **kwargs): axis = self.attrs['axis'] value_ndims = self.output_tensor_ndims[0] value_shapes = self.output_tensor_shapes[0] res = '' res += '\n'.join([ c_helper.generate_local_include(h) for h in self.get_c_op_include_header() ]) res += '\n\n' # param type res += self.get_c_param_type() res += '\n\n' TemplateStatements = ''' const int axis = {axis}; const int largest = {largest}; const int sorted = {sorted}; const int X_shape[] = {X_shape}; const int K_val = K[0]; const int Values_shape[] = {Values_shape}; const int Indices_shape[] = {Indices_shape}; int sorted_indices{dims_X}; ''' mapping = {} mapping.update({'op_func_name': self.get_func_name()}) mapping.update({'axis': self.attrs['axis']}) mapping.update({'largest': self.attrs['largest']}) mapping.update({'sorted': self.attrs['sorted']}) mapping.update({ 'dims_X': c_helper.generate_dim_bracket(self.input_tensor_shapes[0]) }) mapping.update({ 'X_shape': str(self.input_tensor_shapes[0]).replace('(', '{').replace(')', '}') }) mapping.update({ 'Values_shape': str(self.output_tensor_shapes[0]).replace('(', '{').replace(')', '}') }) mapping.update({ 'Indices_shape': str(self.output_tensor_shapes[1]).replace('(', '{').replace(')', '}') }) mapping.update( {'target_range': self.input_tensor_shapes[0][self.attrs['axis']]}) mapping.update({'t': data_type.np2c(self.output_tensor_dtypes[0])}) TemplatePreProcessLoop = c_helper.generate_ndim_for_loop( np.ones(self.input_tensor_shapes[0])) TemplatePreProcessCore = ''' sorted_indices{dims_all} = {target_rank}; ''' TemplateArrayLoopLeft = c_helper.generate_ndim_for_loop( np.ones(self.output_tensor_shapes[0][0:axis + 1])) if ( axis >= 0) else '[statements]' TemplateArrayLoopRight = c_helper.generate_ndim_for_loop( np.ones(self.output_tensor_shapes[0][axis + 1:]), gen=iter(string.ascii_lowercase[9 + axis:])) if ( axis < value_ndims - 1) else '[statements]' TemplateLoopCore = ''' for (int z={target_rank}+1; z<{target_range}; z++) {{ ''' if (self.attrs['largest'] == 1): TemplateLoopCore += ''' if (X{dims_left}[sorted_indices{dims_left}[{target_rank}]{dims_right}]{dims_right} < X{dims_left}[sorted_indices{dims_left}[z]{dims_right}]{dims_right}) {{ ''' else: TemplateLoopCore += ''' if (X{dims_left}[sorted_indices{dims_left}[{target_rank}]{dims_right}]{dims_right} > X{dims_left}[sorted_indices{dims_left}[z]{dims_right}]{dims_right}) {{ ''' TemplateLoopCore += ''' int tmp_idx = sorted_indices{dims_all}; sorted_indices{dims_all} = sorted_indices{dims_left}[z]{dims_right}; sorted_indices{dims_left}[z]{dims_right} = tmp_idx; }} }} ''' mapping_loop = {} mapping_loop.update({ 'dims_all': ''.join([ '[' + v + ']' for v in string.ascii_lowercase[8:8 + value_ndims] ]) }) mapping_loop.update({ 'dims_left': ''.join( ['[' + v + ']' for v in string.ascii_lowercase[8:8 + axis]]) }) mapping_loop.update({ 'dims_right': ''.join([ '[' + v + ']' for v in string.ascii_lowercase[9 + axis:8 + value_ndims] ]) }) mapping_loop.update( {'target_range': self.input_tensor_shapes[0][axis]}) mapping_loop.update({'target_rank': string.ascii_lowercase[8 + axis]}) TemplatePostProcessLoop = c_helper.generate_ndim_for_loop( np.ones(self.output_tensor_shapes[0])) TemplatePostProcessCore = ''' Values{dims_all} = X{dims_left}[sorted_indices{dims_left}[{target_rank}]{dims_right}]{dims_right}; Indices{dims_all} = sorted_indices{dims_left}[{target_rank}]{dims_right}; ''' # 3 TemplateFunction = cleandoc(''' void {op_func_name}(void *op_param, {t} X{dims_X}, long long int K[], {t} Values{dims_Values}, long long int Indices{dims_Indices}, void *inputs_params, void* outputs_params) {{ {pre_statements} {preloop_statements} {loop_statements} {postloop_statements} }} ''') mappingf = {} mappingf.update({'op_func_name': self.get_func_name()}) mappingf.update({ 'dims_X': c_helper.generate_dim_bracket(self.input_tensor_shapes[0]) }) mappingf.update({ 'dims_Values': c_helper.generate_dim_bracket(self.output_tensor_shapes[0]) }) mappingf.update({ 'dims_Indices': c_helper.generate_dim_bracket(self.output_tensor_shapes[1]) }) mappingf.update({'t': data_type.np2c(self.output_tensor_dtypes[0])}) mappingf.update( {'pre_statements': TemplateStatements.format(**mapping)}) mappingf.update({ 'preloop_statements': TemplatePreProcessLoop.replace( '[statements]', TemplatePreProcessCore.format(**mapping_loop)) }) mappingf.update({ 'loop_statements': TemplateArrayLoopLeft.replace( '[statements]', TemplateArrayLoopRight.replace( '[statements]', TemplateLoopCore.format(**mapping_loop))) }) mappingf.update({ 'postloop_statements': TemplatePostProcessLoop.replace( '[statements]', TemplatePostProcessCore.format(**mapping_loop)) }) res += '\n\n' res += TemplateFunction.format(**mappingf) return res
def generate_c_code(self, **kwargs): res = "" res += "\n".join([ c_helper.generate_local_include(h) for h in self.get_c_op_include_header() ]) res += "\n\n" # param type res += self.get_c_param_type() res += "\n\n" # 1 TemplateArrayFuseLoop = c_helper.generate_ndim_for_loop( np.empty(self.output_tensor_shapes[0]), pragma=self.OpenMP) if self.OpenMP: TemplateArrayFuseLoop = TemplateArrayFuseLoop.replace( "[pragma]", self.PRAGMA_OMP) # param type res += self.get_c_param_type() res += "\n\n" premap = self.prev_node.op.generate_kernel_map() # transpose postmap = self.post_node.op.generate_kernel_map() postmap.update({"X": premap["X"]}) postmap.update({"XStatementDims": premap["XStatementDims"]}) template = self.post_node.op.generate_kernel_template() statements = template.format(**postmap) # 3 post_input_count = len(self.post_node.input_tensor) if post_input_count == 1: TemplateFunction = cleandoc(""" void {op_func_name}(void *op_param, {t} {X}{dims_i}, {t} {C}{dims_o}, void *inputs_params, void* outputs_params) {{ {statements} }} """) mappingf = {} mappingf.update({"op_func_name": self.get_func_name()}) mappingf.update({ "dims_i": c_helper.generate_dim_bracket(self.input_tensor_shapes[0]) }) mappingf.update({ "dims_o": c_helper.generate_dim_bracket(self.output_tensor_shapes[0]) }) mappingf.update( {"t": data_type.np2c(self.output_tensor_dtypes[0])}) mappingf.update({"X": self.input_tensor_names[0]}) mappingf.update({"C": self.output_tensor_names[0]}) mappingf.update({ "statements": TemplateArrayFuseLoop.replace("[statements]", statements) }) res += "\n\n" res += TemplateFunction.format(**mappingf) elif post_input_count == 2: TemplateFunction = cleandoc(""" void {op_func_name}(void *op_param, {t} {X1}{dims_i1}, {t} {X2}{dims_i2}, {t} {C}{dims_o}, void *inputs_params, void* outputs_params) {{ {statements} }} """) mappingf = {} mappingf.update({"op_func_name": self.get_func_name()}) mappingf.update({ "dims_i1": c_helper.generate_dim_bracket(self.input_tensor_shapes[0]) }) mappingf.update({ "dims_i2": c_helper.generate_dim_bracket(self.input_tensor_shapes[1]) }) mappingf.update({ "dims_o": c_helper.generate_dim_bracket(self.output_tensor_shapes[0]) }) mappingf.update( {"t": data_type.np2c(self.output_tensor_dtypes[0])}) mappingf.update({"X1": self.input_tensor_names[0]}) mappingf.update({"X2": self.input_tensor_names[1]}) mappingf.update({"C": self.output_tensor_names[0]}) mappingf.update({ "statements": TemplateArrayFuseLoop.replace("[statements]", statements) }) res += "\n\n" res += TemplateFunction.format(**mappingf) else: raise ValueError() return res
def generate_c_code(self, **kwargs): res = '' res += '\n'.join([ c_helper.generate_local_include(h) for h in self.get_c_op_include_header() ]) res += '\n\n' # param type res += self.get_c_param_type() res += '\n\n' ndim = self.output_tensor_ndims[0] if (ndim != 3 and ndim != 4 and ndim != 5): raise ValueError() kernel_shape = self.attrs['kernel_shape'] pads = self.attrs['pads'] storage_order = self.attrs['storage_order'] strides = self.attrs['strides'] count_include_pad = self.attrs['count_include_pad'] if (ndim == 3): TemplateStatements = ''' int X_n = {X_d0}; int X_c = {X_d1}; int X_w = {X_d2}; int Y_n = {Y_d0}; int Y_c = {Y_d1}; int Y_w = {Y_d2}; int kernel_shape_w = {kernel_shape_w}; int pad_w_begin = {pad_w_begin}; int pad_w_end = {pad_w_end}; int stride_w = {stride_w}; int storage_order = {storage_order}; int count_include_pad = {count_include_pad}; int n; int c; int w; int kw; int kernel_shape_w_min; int kernel_shape_w_max; {t} pool; int data_cnt; kernel_shape_w_min = -pad_w_begin; kernel_shape_w_max = (kernel_shape_w - pad_w_begin); memset( (void *)Y, 0.0, sizeof(Y[0][0][0]) * Y_n * Y_c * Y_w ); for (n=0; n<Y_n; n++) {{ for (c=0; c<Y_c; c++) {{ for (w=0; w<Y_w; w++) {{ pool = 0.0; data_cnt = 0; for (kw=kernel_shape_w_min; kw<kernel_shape_w_max; kw++) {{ if ((w*stride_w+kw < 0) || (w*stride_w+kw >= X_w)) {{ continue; }} pool += X[n][c][w*stride_w+kw]; data_cnt++; }} if (data_cnt > 0) {{ Y[n][c][w] = pool / data_cnt; }} }} }} }} ''' mapping = {} mapping.update({'X_d0': self.input_tensor_shapes[0][0]}) mapping.update({'X_d1': self.input_tensor_shapes[0][1]}) mapping.update({'X_d2': self.input_tensor_shapes[0][2]}) mapping.update({'Y_d0': self.output_tensor_shapes[0][0]}) mapping.update({'Y_d1': self.output_tensor_shapes[0][1]}) mapping.update({'Y_d2': self.output_tensor_shapes[0][2]}) mapping.update({'kernel_shape_w': kernel_shape[0]}) mapping.update({'pad_w_begin': pads[0]}) mapping.update({'pad_w_end': pads[1]}) mapping.update({'stride_w': strides[0]}) mapping.update({'storage_order': storage_order}) mapping.update({'count_include_pad': count_include_pad}) mapping.update({'t': data_type.np2c(self.output_tensor_dtypes[0])}) elif (ndim == 4): TemplateStatements = ''' int X_n = {X_d0}; int X_c = {X_d1}; int X_h = {X_d2}; int X_w = {X_d3}; int Y_n = {Y_d0}; int Y_c = {Y_d1}; int Y_h = {Y_d2}; int Y_w = {Y_d3}; int kernel_shape_h = {kernel_shape_h}; int kernel_shape_w = {kernel_shape_w}; int pad_h_begin = {pad_h_begin}; int pad_w_begin = {pad_w_begin}; int pad_h_end = {pad_h_end}; int pad_w_end = {pad_w_end}; int stride_h = {stride_h}; int stride_w = {stride_w}; int storage_order = {storage_order}; int count_include_pad = {count_include_pad}; int n; int c; int h, w; int kh, kw; int kernel_shape_h_min; int kernel_shape_h_max; int kernel_shape_w_min; int kernel_shape_w_max; {t} pool; int data_cnt; kernel_shape_h_min = -pad_h_begin; kernel_shape_h_max = (kernel_shape_h - pad_h_begin); kernel_shape_w_min = -pad_w_begin; kernel_shape_w_max = (kernel_shape_w - pad_w_begin); memset( (void *)Y, 0.0, sizeof(Y[0][0][0][0]) * Y_n * Y_c * Y_h * Y_w ); for (n=0; n<Y_n; n++) {{ for (c=0; c<Y_c; c++) {{ if (storage_order == 0) {{ for (h=0; h<Y_h; h++) {{ for (w=0; w<Y_w; w++) {{ pool = 0.0; data_cnt = 0; for (kh=kernel_shape_h_min; kh<kernel_shape_h_max; kh++) {{ if ((h*stride_h+kh < 0) || (h*stride_h+kh >= X_h)) {{ if (count_include_pad != 0) {{ data_cnt += kernel_shape_w; }} continue; }} for (kw=kernel_shape_w_min; kw<kernel_shape_w_max; kw++) {{ if ((w*stride_w+kw < 0) || (w*stride_w+kw >= X_w)) {{ if (count_include_pad != 0) {{ data_cnt++; }} }} else {{ pool += X[n][c][h*stride_h+kh][w*stride_w+kw]; data_cnt++; }} }} }} if (data_cnt > 0) {{ Y[n][c][h][w] = pool / data_cnt; }} }} }} }} else {{ for (w=0; w<Y_w; w++) {{ for (h=0; h<Y_h; h++) {{ pool = 0.0; data_cnt = 0; for (kh=kernel_shape_h_min; kh<kernel_shape_h_max; kh++) {{ if ((h*stride_h+kh < 0) || (h*stride_h+kh >= X_h)) {{ if (count_include_pad != 0) {{ data_cnt++; }} continue; }} for (kw=kernel_shape_w_min; kw<kernel_shape_w_max; kw++) {{ if ((w*stride_w+kw < 0) || (w*stride_w+kw >= X_w)) {{ if (count_include_pad != 0) {{ data_cnt++; }} }} else {{ pool += X[n][c][h*stride_h+kh][w*stride_w+kw]; data_cnt++; }} }} }} if (data_cnt > 0) {{ Y[n][c][h][w] = pool / data_cnt; }} }} }} }} }} }} ''' mapping = {} mapping.update({'X_d0': self.input_tensor_shapes[0][0]}) mapping.update({'X_d1': self.input_tensor_shapes[0][1]}) mapping.update({'X_d2': self.input_tensor_shapes[0][2]}) mapping.update({'X_d3': self.input_tensor_shapes[0][3]}) mapping.update({'Y_d0': self.output_tensor_shapes[0][0]}) mapping.update({'Y_d1': self.output_tensor_shapes[0][1]}) mapping.update({'Y_d2': self.output_tensor_shapes[0][2]}) mapping.update({'Y_d3': self.output_tensor_shapes[0][3]}) mapping.update({'kernel_shape_h': kernel_shape[0]}) mapping.update({'kernel_shape_w': kernel_shape[1]}) mapping.update({'pad_h_begin': pads[0]}) mapping.update({'pad_h_end': pads[2]}) mapping.update({'pad_w_begin': pads[1]}) mapping.update({'pad_w_end': pads[3]}) mapping.update({'stride_h': strides[0]}) mapping.update({'stride_w': strides[1]}) mapping.update({'storage_order': storage_order}) mapping.update({'count_include_pad': count_include_pad}) mapping.update({'t': data_type.np2c(self.output_tensor_dtypes[0])}) elif (ndim == 5): TemplateStatements = ''' int X_n = {X_d0}; int X_c = {X_d1}; int X_d = {X_d2}; int X_h = {X_d3}; int X_w = {X_d4}; int Y_n = {Y_d0}; int Y_c = {Y_d1}; int Y_d = {Y_d2}; int Y_h = {Y_d3}; int Y_w = {Y_d4}; int kernel_shape_d = {kernel_shape_d}; int kernel_shape_h = {kernel_shape_h}; int kernel_shape_w = {kernel_shape_w}; int pad_d_begin = {pad_d_begin}; int pad_h_begin = {pad_h_begin}; int pad_w_begin = {pad_w_begin}; int pad_d_end = {pad_d_end}; int pad_h_end = {pad_h_end}; int pad_w_end = {pad_w_end}; int stride_d = {stride_d}; int stride_h = {stride_h}; int stride_w = {stride_w}; int storage_order = {storage_order}; int count_include_pad = {count_include_pad}; int n; int c; int d, h, w; int kd, kh, kw; int kernel_shape_d_min; int kernel_shape_d_max; int kernel_shape_h_min; int kernel_shape_h_max; int kernel_shape_w_min; int kernel_shape_w_max; {t} pool; int data_cnt; kernel_shape_d_min = -pad_d_begin; kernel_shape_d_max = (kernel_shape_d - pad_d_begin); kernel_shape_h_min = -pad_h_begin; kernel_shape_h_max = (kernel_shape_h - pad_h_begin); kernel_shape_w_min = -pad_w_begin; kernel_shape_w_max = (kernel_shape_w - pad_w_begin); memset( (void *)Y, 0.0, sizeof(Y[0][0][0][0][0]) * Y_n * Y_c * Y_d * Y_h * Y_w ); for (n=0; n<Y_n; n++) {{ for (c=0; c<Y_c; c++) {{ for (d=0; d<Y_d; d++) {{ for (h=0; h<Y_h; h++) {{ for (w=0; w<Y_w; w++) {{ pool = 0.0; data_cnt = 0; for (kd=kernel_shape_d_min; kd<kernel_shape_d_max; kd++) {{ if ((d*stride_d+kd < 0) || (d*stride_d+kd >= X_d)) {{ continue; }} for (kh=kernel_shape_h_min; kh<kernel_shape_h_max; kh++) {{ if ((h*stride_h+kh < 0) || (h*stride_h+kh >= X_h)) {{ continue; }} for (kw=kernel_shape_w_min; kw<kernel_shape_w_max; kw++) {{ if ((w*stride_w+kw < 0) || (w*stride_w+kw >= X_w)) {{ continue; }} pool += X[n][c][d*stride_d+kd][h*stride_h+kh][w*stride_w+kw]; data_cnt++; }} }} }} if (data_cnt > 0) {{ Y[n][c][d][h][w] = pool / data_cnt; }} }} }} }} }} }} ''' mapping = {} mapping.update({'X_d0': self.input_tensor_shapes[0][0]}) mapping.update({'X_d1': self.input_tensor_shapes[0][1]}) mapping.update({'X_d2': self.input_tensor_shapes[0][2]}) mapping.update({'X_d3': self.input_tensor_shapes[0][3]}) mapping.update({'X_d4': self.input_tensor_shapes[0][4]}) mapping.update({'Y_d0': self.output_tensor_shapes[0][0]}) mapping.update({'Y_d1': self.output_tensor_shapes[0][1]}) mapping.update({'Y_d2': self.output_tensor_shapes[0][2]}) mapping.update({'Y_d3': self.output_tensor_shapes[0][3]}) mapping.update({'Y_d4': self.output_tensor_shapes[0][4]}) mapping.update({'kernel_shape_d': kernel_shape[0]}) mapping.update({'kernel_shape_h': kernel_shape[1]}) mapping.update({'kernel_shape_w': kernel_shape[2]}) mapping.update({'pad_d_begin': pads[0]}) mapping.update({'pad_d_end': pads[3]}) mapping.update({'pad_h_begin': pads[1]}) mapping.update({'pad_h_end': pads[4]}) mapping.update({'pad_w_begin': pads[2]}) mapping.update({'pad_w_end': pads[5]}) mapping.update({'stride_d': strides[0]}) mapping.update({'stride_h': strides[1]}) mapping.update({'stride_w': strides[2]}) mapping.update({'storage_order': storage_order}) mapping.update({'count_include_pad': count_include_pad}) mapping.update({'t': data_type.np2c(self.output_tensor_dtypes[0])}) # 3 TemplateFunction = cleandoc(''' void {op_func_name}(void *op_param, {t} X{dims_X}, {t} Y{dims}, void *inputs_params, void* outputs_params) {{ {statements} }} ''') mappingf = {} mappingf.update({'op_func_name': self.get_func_name()}) mappingf.update({'X': self.input_tensor_names[0]}) mappingf.update({ 'dims_X': c_helper.generate_dim_bracket(self.input_tensor_shapes[0]) }) mappingf.update({'Y': self.output_tensor_names[0]}) mappingf.update({ 'dims': c_helper.generate_dim_bracket(self.output_tensor_shapes[0]) }) mappingf.update({'t': data_type.np2c(self.output_tensor_dtypes[0])}) mappingf.update({'statements': TemplateStatements.format(**mapping)}) res += '\n\n' res += TemplateFunction.format(**mappingf) return res
def generate_c_code(self, **kwargs): res = '' res += '\n'.join([ c_helper.generate_local_include(h) for h in self.get_c_op_include_header() ]) res += '\n\n' # param type res += self.get_c_param_type() res += '\n\n' TemplateStatements = ''' const int Y_n = {d1}; const int Y_c = {d2}; const int Y_h = {d3}; const int Y_w = {d4}; const double epsilon = {epsilon}; const double momentum = {momentum}; const int spatial = {spatial}; int n; int c, h, w; double sum; double ave; double sigma2; double norm; #if {spatial} // spatial is true for (n=0; n<Y_n; n++) {{ for (c=0; c<Y_c; c++) {{ sum = 0.0; for (h=0; h<Y_h; h++) {{ for (w=0; w<Y_w; w++) {{ sum += X[n][c][h][w]; }} }} ave /= (h * w); ave = momentum * ave + (1-momentum) * mean[c]; sigma2 = 0.0; for (h=0; h<Y_h; h++) {{ for (w=0; w<Y_w; w++) {{ sigma2 += pow((X[n][c][h][w] - ave), 2); }} }} sigma2 /= (h * w); for (h=0; h<Y_h; h++) {{ for (w=0; w<Y_w; w++) {{ norm = (X[n][c][h][w] - mean[c]) / sqrt(var[c]+epsilon); Y[n][c][h][w] = scale[c] * norm + B[c]; }} }} }} }} #else // spatial is false for (n=0; n<Y_n; n++) {{ for (c=0; c<Y_c; c++) {{ sum = 0.0; for (h=0; h<Y_h; h++) {{ for (w=0; w<Y_w; w++) {{ sum += X[n][c][h][w]; }} }} ave /= (h * w); ave = momentum * ave + (1-momentum) * mean[c][h][w]; sigma2 = 0.0; for (h=0; h<Y_h; h++) {{ for (w=0; w<Y_w; w++) {{ sigma2 += pow((X[n][c][h][w] - ave), 2); }} }} sigma2 /= (h * w); for (h=0; h<Y_h; h++) {{ for (w=0; w<Y_w; w++) {{ norm = (X[n][c][h][w] - mean[c][h][w]) / sqrt(var[c][h][w]+epsilon); Y[n][c][h][w] = scale[c][h][w] * norm + B[c][h][w]; }} }} }} }} #endif // spatial ''' mapping = {} mapping.update({'d1': self.input_tensor_shapes[0][0]}) mapping.update({'d2': self.input_tensor_shapes[0][1]}) mapping.update({'d3': self.input_tensor_shapes[0][2]}) mapping.update({'d4': self.input_tensor_shapes[0][3]}) mapping.update({'epsilon': self.attrs['epsilon']}) mapping.update({'momentum': self.attrs['momentum']}) mapping.update({'spatial': self.attrs['spatial']}) # 3 TemplateFunction = cleandoc(''' void {op_func_name}(void *op_param, {t} X{dims_X}, {t} scale{dims_scale}, {t} B{dims_B}, {t} mean{dims_mean}, {t} var{dims_var}, {t} Y{dims}, void *inputs_params, void* outputs_params) {{ {statements} }} ''') mappingf = {} mappingf.update({'op_func_name': self.get_func_name()}) mappingf.update({ 'dims_X': c_helper.generate_dim_bracket(self.input_tensor_shapes[0]) }) mappingf.update({ 'dims_scale': c_helper.generate_dim_bracket(self.input_tensor_shapes[1]) }) mappingf.update({ 'dims_B': c_helper.generate_dim_bracket(self.input_tensor_shapes[2]) }) mappingf.update({ 'dims_mean': c_helper.generate_dim_bracket(self.input_tensor_shapes[3]) }) mappingf.update({ 'dims_var': c_helper.generate_dim_bracket(self.input_tensor_shapes[4]) }) mappingf.update({ 'dims': c_helper.generate_dim_bracket(self.output_tensor_shapes[0]) }) mappingf.update({'t': data_type.np2c(self.output_tensor_dtypes[0])}) mappingf.update({'statements': TemplateStatements.format(**mapping)}) res += '\n\n' res += TemplateFunction.format(**mappingf) return res
def generate_c_code(self, **kwargs): res = '' res += '\n'.join([ c_helper.generate_local_include(h) for h in self.get_c_op_include_header() ]) res += '\n\n' # param type res += self.get_c_param_type() res += '\n\n' ratio = self.attrs.get('ratio', 0.5) # 1 TemplateArrayDropoutLoop = c_helper.generate_ndim_for_loop( np.ones(self.output_tensor_shapes[0])) TemplateStatements = ''' if (random() > RAND_MAX * ratio) {{ output{dims} = data{dims}; }} else {{ output{dims} = 0.0; }} ''' mapping = {} mapping.update({ 'dims': ''.join([ '[' + v + ']' for v in string.ascii_lowercase[8:8 + self.output_tensor_ndims[0]] ]) }) # 3 TemplateFunction = cleandoc(''' void {op_func_name}(void *op_param, {t} data{dims}, {t} output{dims}, void *inputs_params, void* outputs_params) {{ const float ratio = {ratio}; {statements} }} ''') mappingf = {} mappingf.update({'op_func_name': self.get_func_name()}) mappingf.update({ 'dims': c_helper.generate_dim_bracket(self.output_tensor_shapes[0]) }) mappingf.update({'t': data_type.np2c(self.output_tensor_dtypes[0])}) mappingf.update({'ratio': ratio}) mappingf.update({ 'statements': TemplateArrayDropoutLoop.replace( '[statements]', TemplateStatements.format(**mapping)) }) res += '\n\n' res += TemplateFunction.format(**mappingf) return res
def generate_c_code(self, **kwargs): res = '' res += '\n'.join([ c_helper.generate_local_include(h) for h in self.get_c_op_include_header() ]) res += '\n\n' # param type res += self.get_c_param_type() res += '\n\n' input_shapes = [] input_shapes.append(self.input_tensor_shapes[0]) input_shapes.append(self.input_tensor_shapes[1]) input_mod_shapes = [] max_dim = 5 if (len(input_shapes[0]) == 1): input_mod_shapes.append(((1, ) * (max_dim - 1) + input_shapes[0])) else: input_mod_shapes.append((1, ) * (max_dim - len(input_shapes[0])) + input_shapes[0]) if (len(input_shapes[1]) == 1): input_mod_shapes.append( ((1, ) * (max_dim - 2) + input_shapes[1] + (1, ))) else: input_mod_shapes.append((1, ) * (max_dim - len(input_shapes[1])) + input_shapes[1]) outputs_shape = (max(input_mod_shapes[0][0], input_mod_shapes[1][0]), max(input_mod_shapes[0][1], input_mod_shapes[1][1]), max(input_mod_shapes[0][2], input_mod_shapes[1][2]), input_mod_shapes[0][3], input_mod_shapes[1][4]) output_names = self.output_tensor_names[0] ndim = self.output_tensor_ndims[0] TemplateStatements = ''' int A_h = {A_d0}; int A_i = {A_d1}; int A_j = {A_d2}; int A_m = {A_d3}; int A_k = {A_d4}; int B_h = {B_d0}; int B_i = {B_d1}; int B_j = {B_d2}; int B_k = {B_d3}; int B_n = {B_d4}; int Y_h = {Y_d0}; int Y_i = {Y_d1}; int Y_j = {Y_d2}; int Y_m = {Y_d3}; int Y_n = {Y_d4}; {t} *_A = ({t} *)A; {t} *_B = ({t} *)B; {t} *_Y = ({t} *)Y; {t} tmpA, tmpB; int h, i, j; int k; int m; int n; memset( Y, ({t})0.0, sizeof(*_Y)*Y_h*Y_i*Y_j*Y_m*Y_n ); for (h=0; h < Y_h; h++) {{ for (i=0; i < Y_i; i++) {{ for (j=0; j < Y_j; j++) {{ for (m=0; m < Y_m; m++) {{ for (n=0; n < Y_n; n++) {{ for (k=0; k < B_k; k++) {{ tmpA = *(_A + h*(Y_i*Y_j*Y_m*B_k) + i*(Y_j*Y_m*B_k) + j*(Y_m*B_k) + m*(B_k) + k); tmpB = *(_B + h*(Y_i*Y_j*B_k*Y_n) + i*(Y_j*B_k*Y_n) + j*(B_k*Y_n) + k*(Y_n) + n); *(_Y + h*(Y_i*Y_j*Y_m*Y_n) + i*(Y_j*Y_m*Y_n) + j*(Y_m*Y_n) + m*(Y_n) + n) += tmpA * tmpB; // Y[h][i][j][m][n] += A[h][i][j][m][k] * B[h][i][j][k][n]; }} }} }} }} }} }} ''' mapping = {} mapping.update({'A_d0': input_mod_shapes[0][0]}) mapping.update({'A_d1': input_mod_shapes[0][1]}) mapping.update({'A_d2': input_mod_shapes[0][2]}) mapping.update({'A_d3': input_mod_shapes[0][3]}) mapping.update({'A_d4': input_mod_shapes[0][4]}) mapping.update({'B_d0': input_mod_shapes[1][0]}) mapping.update({'B_d1': input_mod_shapes[1][1]}) mapping.update({'B_d2': input_mod_shapes[1][2]}) mapping.update({'B_d3': input_mod_shapes[1][3]}) mapping.update({'B_d4': input_mod_shapes[1][4]}) mapping.update({'Y_d0': outputs_shape[0]}) mapping.update({'Y_d1': outputs_shape[1]}) mapping.update({'Y_d2': outputs_shape[2]}) mapping.update({'Y_d3': outputs_shape[3]}) mapping.update({'Y_d4': outputs_shape[4]}) mapping.update({'t': data_type.np2c(self.output_tensor_dtypes[0])}) # 3 TemplateFunction = cleandoc(''' void {op_func_name}(void *op_param, {t} A{dims_A}, {t} B{dims_B}, {t} Y{dims}, void *inputs_params, void* outputs_params) {{ {statements} }} ''') mappingf = {} mappingf.update({'op_func_name': self.get_func_name()}) mappingf.update({'A': self.input_tensor_names[0]}) mappingf.update( {'dims_A': c_helper.generate_dim_bracket(input_shapes[0])}) mappingf.update({'B': self.input_tensor_names[1]}) mappingf.update( {'dims_B': c_helper.generate_dim_bracket(input_shapes[1])}) mappingf.update({'Y': self.output_tensor_names[0]}) mappingf.update({ 'dims': c_helper.generate_dim_bracket(self.output_tensor_shapes[0]) }) mappingf.update({'t': data_type.np2c(self.output_tensor_dtypes[0])}) mappingf.update({'statements': TemplateStatements.format(**mapping)}) res += '\n\n' res += TemplateFunction.format(**mappingf) return res
def generate_c_code(self, **kwargs): res = '' res += '\n'.join([ c_helper.generate_local_include(h) for h in self.get_c_op_include_header() ]) res += '\n\n' # param type res += self.get_c_param_type() res += '\n\n' TemplateStatements = ''' const int center_point_box = {center_point_box}; const int boxes_batch = {boxes_batch}; const int boxes_spatial = {boxes_spatial}; const int boxes_params = {boxes_params}; const int scores_batch = {scores_batch}; const int scores_class = {scores_class}; const int scores_spatial = {scores_spatial}; const long long int max_output_boxes_p_class = max_output_boxes_per_class[0]; const float iou_th = iou_threshold[0]; const float score_th = score_threshold[0]; const int selected_indices_num = {selected_indices_num}; const int selected_indices_index = {selected_indices_index}; // printf("max_output: %ld\\n", max_output_boxes_per_class[0]); // printf("iou_th: %f\\n", iou_threshold[0]); // printf("score_th: %f\\n", score_threshold[0]); // printf("max_output: %ld\\n", max_output_boxes_p_class); // printf("iou_th: %f\\n", iou_th); // printf("score_th: %f\\n", score_th); memset( (void *)selected_indices, 0, sizeof({t}) * selected_indices_num * selected_indices_index ); if (max_output_boxes_p_class == 0) {{ return; }} for (int n=0; n<scores_batch; n++) {{ for (int c=0; c<scores_class; c++) {{ float decoded_boxes[boxes_spatial][4]; // decode boxes if (center_point_box == 0) {{ for (int i=0; i<boxes_spatial; i++) {{ if (boxes[n][i][0] < boxes[n][i][2]) {{ decoded_boxes[i][0] = boxes[n][i][0]; // ymin decoded_boxes[i][2] = boxes[n][i][2]; // ymax }} else {{ decoded_boxes[i][0] = boxes[n][i][2]; // ymin decoded_boxes[i][2] = boxes[n][i][0]; // ymax }} if (boxes[n][i][1] < boxes[n][i][3]) {{ decoded_boxes[i][1] = boxes[n][i][1]; // xmin decoded_boxes[i][3] = boxes[n][i][3]; // xmax }} else {{ decoded_boxes[i][1] = boxes[n][i][3]; // xmin decoded_boxes[i][3] = boxes[n][i][1]; // xmax }} }} }} else {{ for (int i=0; i<boxes_spatial; i++) {{ decoded_boxes[i][0] = boxes[n][i][1] - boxes[n][i][3]/2; // ymin decoded_boxes[i][1] = boxes[n][i][0] - boxes[n][i][2]/2; // xmin decoded_boxes[i][2] = boxes[n][i][1] + boxes[n][i][3]/2; // ymax decoded_boxes[i][3] = boxes[n][i][0] + boxes[n][i][2]/2; // xmax }} }} // // SelectDetectionsAboveScoreThreshold(scores, non_max_suppression_score_threshold, &keep_scores, &keep_indices); // float keep_scores[scores_spatial]; int keep_indices[scores_spatial]; int num_scores_kept = 0; for (int i=0; i<scores_spatial; i++) {{ keep_scores[i] = 0.0; keep_indices[i] = 0; }} for (int i=0; i<scores_spatial; i++) {{ // printf("scores(%f)[%d:%d:%d]:th(%f) -> %d\\n", scores[n][c][i], n, c, i, score_th, num_scores_kept); if (scores[n][c][i] >= score_th) {{ keep_scores[num_scores_kept] = scores[n][c][i]; keep_indices[num_scores_kept] = i; num_scores_kept++; }} }} // for (int i=0; i<num_scores_kept; i++) {{ // printf("keep_indices[%d] = %d\\n", i, keep_indices[i]); // }} // // DecreasingPartialArgSort(keep_scores.data(), num_scores_kept, num_scores_kept, sorted_indices.data()); // int sorted_indices[num_scores_kept]; for (int i=0; i<num_scores_kept; i++) {{ sorted_indices[i] = keep_indices[i]; // printf("keep_score[%d] = %f\\n", sorted_indices[i], keep_scores[i]); }} // for (int i=0; i<num_scores_kept; i++) {{ // printf("pre_keep_score[%d] = %f\\n", i, keep_scores[i]); // }} // for (int i=0; i<num_scores_kept; i++) {{ // printf("pre_keep_indices[%d] = %d\\n", i, keep_indices[i]); // }} // for (int i=0; i<num_scores_kept; i++) {{ // printf("pre_sorted_indices[%d] = %d\\n", i, sorted_indices[i]); // }} // qsort( keep_scores, num_scores_kept, sizeof(float), nonmaxsuppression_num_cmp ); nonmaxsuppression_idx_sort( keep_scores, sorted_indices, num_scores_kept); // for (int i=0; i<num_scores_kept; i++) {{ // printf("keep_score[%d] = %f\\n", i, keep_scores[i]); // }} // for (int i=0; i<num_scores_kept; i++) {{ // printf("keep_indices[%d] = %d\\n", i, keep_indices[i]); // }} // for (int i=0; i<num_scores_kept; i++) {{ // printf("sorted_indices[%d] = %d\\n", i, sorted_indices[i]); // }} const int num_boxes_kept = num_scores_kept; const int output_size = (num_scores_kept < max_output_boxes_p_class) ? num_scores_kept: max_output_boxes_p_class; int num_active_candidate = num_boxes_kept; // printf("output_size: %d\\n", output_size); // printf("selected_indices_num: %d\\n", selected_indices_num); // printf("num_active_candidate: %d\\n", num_active_candidate); int active_box_candidate[num_scores_kept]; for (int row=0; row<num_boxes_kept; row++) {{ active_box_candidate[row] = 1; }} int selected[scores_spatial]; int selected_box_cnt = 0; for (int i=0; i<num_boxes_kept; i++) {{ selected[i] = -1; }} for (int i=0; i<num_boxes_kept; i++) {{ if (num_active_candidate == 0 || selected_box_cnt >= output_size) break; if (active_box_candidate[i] == 1) {{ // selected[selected_box_cnt] = keep_indices[sorted_indices[i]]; selected[selected_box_cnt] = sorted_indices[i]; // printf("%d: selected[%d] = %d\\n", i, selected_box_cnt, selected[selected_box_cnt]); selected_box_cnt++; active_box_candidate[i] = 0; num_active_candidate--; }} else {{ continue; }} for (int j = i + 1; j < num_boxes_kept; ++j) {{ if (active_box_candidate[j] == 1) {{ // float iou = nonmaxsuppression_compute_iou((float *)decoded_boxes, keep_indices[sorted_indices[i]], keep_indices[sorted_indices[j]]); float iou = nonmaxsuppression_compute_iou((float *)decoded_boxes, sorted_indices[i], sorted_indices[j]); // printf("%d:%d: iou = [%f:%f:%f:%f] [%f:%f:%f:%f] %f(th:%f)\\n", i, j, // decoded_boxes[i][0], decoded_boxes[i][1], decoded_boxes[i][2], decoded_boxes[i][3], // decoded_boxes[j][0], decoded_boxes[j][1], decoded_boxes[j][2], decoded_boxes[j][3], // iou, iou_th); if (iou > iou_th) {{ active_box_candidate[j] = 0; num_active_candidate--; }} }} }} }} // for (int i=0; i<num_boxes_kept; i++) {{ // printf("sorted[%d] = %d : keep_indices = %d\\n", i, sorted_indices[i], keep_indices[sorted_indices[i]]); // }} // for (int i=0; i<num_boxes_kept; i++) {{ // printf("selected[%d] = %d\\n", i, selected[i]); // }} int num_batch_elements = selected_indices_num/scores_batch; for (int i=0; i<num_batch_elements/scores_class; i++) {{ if (selected[i] == -1) {{ selected_indices[n*num_batch_elements+c*num_batch_elements/scores_class+i][0] = -1; selected_indices[n*num_batch_elements+c*num_batch_elements/scores_class+i][1] = -1; selected_indices[n*num_batch_elements+c*num_batch_elements/scores_class+i][2] = -1; }} else {{ selected_indices[n*num_batch_elements+c*num_batch_elements/scores_class+i][0] = n; selected_indices[n*num_batch_elements+c*num_batch_elements/scores_class+i][1] = c; selected_indices[n*num_batch_elements+c*num_batch_elements/scores_class+i][2] = selected[i]; }} // printf("NMS_result: %d:%d:%d\\n", n, c, selected[i]); }} }} }} ''' mapping = {} mapping.update({'op_func_name': self.get_func_name()}) mapping.update({'center_point_box': self.attrs['center_point_box']}) mapping.update({'boxes_batch': self.input_tensor_shapes[0][0]}) mapping.update({'boxes_spatial': self.input_tensor_shapes[0][1]}) mapping.update({'boxes_params': self.input_tensor_shapes[0][2]}) mapping.update({'scores_batch': self.input_tensor_shapes[1][0]}) mapping.update({'scores_class': self.input_tensor_shapes[1][1]}) mapping.update({'scores_spatial': self.input_tensor_shapes[1][2]}) mapping.update( {'max_output_boxes_per_class': self.input_tensor_shapes[2][0]}) mapping.update({'iou_threshold': self.input_tensor_shapes[3][0]}) mapping.update({'score_threshold': self.input_tensor_shapes[4][0]}) mapping.update( {'selected_indices_num': self.output_tensor_shapes[0][0]}) mapping.update( {'selected_indices_index': self.output_tensor_shapes[0][1]}) mapping.update({'t': data_type.np2c(self.output_tensor_dtypes[0])}) # 3 TemplateFunction = cleandoc(''' void {op_func_name}(void *op_param, float boxes{dims_boxes}, float scores{dims_scores}, long long int max_output_boxes_per_class[], float iou_threshold[], float score_threshold[], {t} selected_indices{dims_selected_indices}, void *inputs_params, void* outputs_params) {{ {statements} }} ''') mappingf = {} mappingf.update({'op_func_name': self.get_func_name()}) mappingf.update({'boxes': self.input_tensor_names[0]}) mappingf.update({ 'dims_boxes': c_helper.generate_dim_bracket(self.input_tensor_shapes[0]) }) mappingf.update({'scores': self.input_tensor_names[1]}) mappingf.update({ 'dims_scores': c_helper.generate_dim_bracket(self.input_tensor_shapes[1]) }) mappingf.update( {'max_output_boxes_per_class': self.input_tensor_names[2]}) mappingf.update({'iou_threshold': self.input_tensor_names[3]}) mappingf.update({'score_threshold': self.input_tensor_names[4]}) mappingf.update({'selected_indices': self.output_tensor_names[0]}) mappingf.update({ 'dims_selected_indices': c_helper.generate_dim_bracket(self.output_tensor_shapes[0]) }) mappingf.update({'t': data_type.np2c(self.output_tensor_dtypes[0])}) mappingf.update({'statements': TemplateStatements.format(**mapping)}) res += '\n\n' res += TemplateFunction.format(**mappingf) return res
def generate_c_code(self, **kwargs): b_min = 0 b_max = 0 input_array_boundary = [] for (_, d) in enumerate(self.input_tensor_shapes): b_max += d[self.attrs['axis']] input_array_boundary.append( (self.input_tensor_names[_], b_min, b_max)) b_min = b_max loop_idx = string.ascii_lowercase[8:8 + self.output_tensor_ndims[0]] concat_idx = loop_idx[self.attrs['axis']] res = '' # include header res += '\n'.join([ c_helper.generate_local_include(h) for h in self.get_c_op_include_header() ]) res += '\n\n' # param type res += self.get_c_param_type() res += '\n\n' # 1 TemplateArrayConcatLoop = c_helper.generate_ndim_for_loop(np.ones( self.output_tensor_shapes[0]), indent=0) # 2 input_vals = OrderedDict({ k: self._gen_array_element_val(self.output_tensor_ndims[0], v) for k, v in self.input_tensor_dict.items() }) output_vals = { self.output_tensor_names[0]: self._gen_array_element_val(self.output_tensor_ndims[0], self.output_tensor_values[0]) } Conditions = '' ConcatStatement = '' TemplateConditionElement = cleandoc(''' {if} ({concat_idx} < {concat_boundary}) {{ {indent} {outputVal} = {inputVal}; {indent}}} ''') for (i, boundary) in enumerate(input_array_boundary): mapping_cond_element = { 't': data_type.np2c(self.input_tensor_dtypes[0]) } mapping_cond_element.update({'concat_idx': concat_idx}) mapping_cond_element.update( {'indent': ' ' * 4 * (self.output_tensor_ndims[0] + 1)}) # mapping_cond_element.update({'inputVal': list(input_vals.keys())[i] + list(input_vals.values())[i]}) # list(val.keys()) の順番がordered_dictに従わず、安定しないため、別に持っている変数boundary順に変更 input_axis_idx = input_vals[boundary[0]] input_axis_idx = input_axis_idx.replace( concat_idx, concat_idx + '-' + str(boundary[1])) mapping_cond_element.update( {'inputVal': boundary[0] + input_axis_idx}) mapping_cond_element.update({ 'outputVal': list(output_vals.keys())[0] + list(output_vals.values())[0] }) mapping_cond_element.update({'concat_boundary': boundary[2]}) if (i == 0): mapping_cond_element.update( {'if': ' ' * 4 * (self.output_tensor_ndims[0] + 1) + 'if'}) else: mapping_cond_element.update({'if': 'else if'}) ConcatStatement += TemplateConditionElement.format( **mapping_cond_element) TemplateFunction = cleandoc(''' void {op_func_name}(void *op_param,{InputsParamSignature}, {OutputsParamSignature}, void *inputs_params, void* outputs_params) {{ {statements} }} ''') mappingf = {} mappingf.update({'op_func_name': self.get_func_name()}) input_sigs = [] for name, value in self.input_tensor_dict.items(): input_sigs.append(self.gen_param_signature(name, value)) mappingf.update({'InputsParamSignature': ','.join(input_sigs)}) mappingf.update({ 'OutputsParamSignature': self.gen_param_signature(self.output_tensor_names[0], self.output_tensor_values[0]) }) mappingf.update({ 'statements': TemplateArrayConcatLoop.replace('[statements]', ConcatStatement) }) res += '\n\n' res += TemplateFunction.format(**mappingf) return res
def generate_c_code(self, **kwargs): axis = self.attrs['axis'] data_ndims = self.input_tensor_ndims[0] output_ndims = self.output_tensor_ndims[0] indices_ndims = self.input_tensor_ndims[1] res = '' # include header res += '\n'.join([ c_helper.generate_local_include(h) for h in self.get_c_op_include_header() ]) res += '\n\n' # param type res += self.get_c_param_type() res += '\n\n' textwrap.TextWrapper() TemplateKernel = textwrap.indent(""" if ({array_indices}>= 0) {{ {array_output}= {array_data}; }} else {{ {array_output} = 0.0; }} """, prefix=" " * 4 * output_ndims) TemplateLoop = c_helper.generate_ndim_for_loop( np.ones(self.output_tensor_shapes[0])) array_right_data = ''.join( ['[' + v + ']' for v in string.ascii_lowercase[8:8 + axis]]) array_left_data = ''.join([ '[' + v + ']' for v in string.ascii_lowercase[9 + axis + indices_ndims - 1:8 + data_ndims + indices_ndims - 1] ]) array_indices = "indices" + "".join([ '[' + v + ']' for v in string.ascii_lowercase[8 + axis:8 + axis + indices_ndims] ]) array_data = "data" + array_right_data + '[' + array_indices + ']' + array_left_data array_output = "output" + ''.join([ '[' + v + ']' for v in string.ascii_lowercase[8:8 + output_ndims] ]) mapping_kernel = {} mapping_kernel.update({"array_data": array_data}) mapping_kernel.update({"array_indices": array_indices}) mapping_kernel.update({"array_output": array_output}) # 3 TemplateFunction = cleandoc(''' void {op_func_name}(void *op_param, {t1} data{dims_data}, {t2} indices{dims_indices}, {t1} output{dims_output}, void *inputs_params, void* outputs_params) {{ {loop_statements} }} ''') mappingf = {} mappingf.update({'op_func_name': self.get_func_name()}) mappingf.update({ 'dims_data': c_helper.generate_dim_bracket(self.input_tensor_shapes[0]) }) mappingf.update({ 'dims_indices': c_helper.generate_dim_bracket(self.input_tensor_shapes[1]) }) mappingf.update({ 'dims_output': c_helper.generate_dim_bracket(self.output_tensor_shapes[0]) }) mappingf.update({'t1': data_type.np2c(self.output_tensor_dtypes[0])}) mappingf.update({'t2': data_type.np2c(self.input_tensor_dtypes[1])}) mappingf.update({ 'loop_statements': TemplateLoop.replace('[statements]', TemplateKernel.format(**mapping_kernel)) }) res += '\n\n' res += TemplateFunction.format(**mappingf) return res
def generate_c_code(self, **kwargs): res = '' # include header res += '\n'.join([ c_helper.generate_local_include(h) for h in self.get_c_op_include_header() ]) res += '\n\n' # param type res += self.get_c_param_type() res += '\n\n' # 1 TemplateArrayAddLoop = c_helper.generate_ndim_for_loop(np.ones( self.output_tensor_shapes[0]), indent=0) # 2 input_vals = OrderedDict({ k: self._gen_array_element_val(self.output_tensor_ndims[0], v) for k, v in self.input_tensor_dict.items() }) output_vals = { self.output_tensor_names[0]: self._gen_array_element_val(self.output_tensor_ndims[0], self.output_tensor_values[0]) } TemplateSumExpression = cleandoc(''' {indent}{outputVal} = {sum_expression}; ''') mapping = {} mapping.update({'indent': ' ' * 4 * (self.output_tensor_ndims[0] + 1)}) mapping.update({ 'outputVal': list(output_vals.keys())[0] + list(output_vals.values())[0] }) mapping.update({ 'sum_expression': '+'.join([k + v for k, v in input_vals.items()]) }) SubStatement = TemplateSumExpression.format(**mapping) TemplateFunction = cleandoc(''' void {op_func_name}(void *op_param,{InputsParamSignature}, {OutputsParamSignature}, void *inputs_params, void* outputs_params) {{ {statements} }} ''') mappingf = {} mappingf.update({'op_func_name': self.get_func_name()}) input_sigs = [] for name, value in self.input_tensor_dict.items(): input_sigs.append(self.gen_param_signature(name, value)) mappingf.update({'InputsParamSignature': ','.join(input_sigs)}) mappingf.update({ 'OutputsParamSignature': self.gen_param_signature(self.output_tensor_names[0], self.output_tensor_values[0]) }) mappingf.update({ 'statements': TemplateArrayAddLoop.replace('[statements]', SubStatement) }) res += '\n\n' res += TemplateFunction.format(**mappingf) return res
def generate_c_code(self, **kwargs): res = '' res += '\n'.join([ c_helper.generate_local_include(h) for h in self.get_c_op_include_header() ]) res += '\n\n' # param type res += self.get_c_param_type() res += '\n\n' input_shapes = [] input_shapes.append(self.input_tensor_shapes[0]) input_shapes.append(self.input_tensor_shapes[1]) if (len(self.input_tensor) == 4): input_shapes.append(self.input_tensor_shapes[2]) input_shapes.append(self.input_tensor_shapes[3]) input_mod_shapes = [] input_org_shapes = [] max_dim = 5 if (len(input_shapes[0]) == 1): input_mod_shapes.append(((1, ) * (max_dim - 1) + input_shapes[0])) input_org_shapes.append(((0, ) * (max_dim - 1) + input_shapes[0])) else: input_mod_shapes.append((1, ) * (max_dim - len(input_shapes[0])) + input_shapes[0]) input_org_shapes.append((0, ) * (max_dim - len(input_shapes[0])) + input_shapes[0]) if (len(input_shapes[1]) == 1): input_mod_shapes.append( ((1, ) * (max_dim - 2) + input_shapes[1] + (1, ))) input_org_shapes.append( ((0, ) * (max_dim - 2) + input_shapes[1] + (1, ))) else: input_mod_shapes.append((1, ) * (max_dim - len(input_shapes[1])) + input_shapes[1]) input_org_shapes.append((0, ) * (max_dim - len(input_shapes[1])) + input_shapes[1]) outputs_shape = ((1, ) * (max_dim - len(self.output_tensor_shapes[0])) ) + self.output_tensor_shapes[0] output_names = self.output_tensor_names[0] ndim = self.output_tensor_ndims[0] if (len(self.input_tensor) == 2): zero_point_shapes = [0, 0] else: zero_point_shapes = [ self.input_tensor_shapes[2][0], self.input_tensor_shapes[3][0] ] TemplateStatements = ''' const int A_h = {A_d0}; const int A_i = {A_d1}; const int A_j = {A_d2}; const int A_m = {A_d3}; const int A_k = {A_d4}; const int B_h = {B_d0}; const int B_i = {B_d1}; const int B_j = {B_d2}; const int B_k = {B_d3}; const int B_n = {B_d4}; const int Y_h = {Y_d0}; const int Y_i = {Y_d1}; const int Y_j = {Y_d2}; const int Y_m = {Y_d3}; const int Y_n = {Y_d4}; const int A_h_o = {A_d0_o}; const int A_i_o = {A_d1_o}; const int A_j_o = {A_d2_o}; const int B_h_o = {B_d0_o}; const int B_i_o = {B_d1_o}; const int B_j_o = {B_d2_o}; {ta} *_A = ({ta} *)A; {tb} *_B = ({tb} *)B; int *_Y = (int *)Y; int tmpA, tmpB, tmpY; {tb} BT [{B_d0}][{B_d1}][{B_d2}][{B_d3}][{B_d4}]; {tb} *_BT = ({tb} *)BT; ''' if (len(self.input_tensor) == 4): TemplateStatements += ''' {taz} a_zero_point_mod[{A_d3}]; {tbz} b_zero_point_mod[{B_d4}]; ''' TemplateStatements += ''' int h, i, j; int ah, ai, aj; int bh, bi, bj; int k; int m; int n; int tmpA_pos_h, tmpA_pos_i, tmpA_pos; int tmpB_pos_h, tmpB_pos_i, tmpB_pos; int tmpY_pos_h, tmpY_pos_i, tmpY_pos; memset( Y, (int)0, sizeof(*_Y)*Y_h*Y_i*Y_j*Y_m*Y_n ); ''' if (len(self.input_tensor) == 4): if (zero_point_shapes[0] == 1): TemplateStatements += ''' #pragma omp parallel for for (m=0; m < A_m; m++) {{ a_zero_point_mod[m] = a_zero_point[0]; }} ''' else: TemplateStatements += ''' #pragma omp parallel for for (m=0; m < A_m; m++) {{ a_zero_point_mod[m] = a_zero_point[m]; }} ''' if (zero_point_shapes[1] == 1): TemplateStatements += ''' #pragma omp parallel for for (n=0; n < B_n; n++) {{ b_zero_point_mod[n] = b_zero_point[0]; }} ''' else: TemplateStatements += ''' #pragma omp parallel for for (n=0; n < B_n; n++) {{ b_zero_point_mod[n] = b_zero_point[n]; }} ''' TemplateStatements += ''' for (h=0; h < B_h; h++) {{ bh = (B_h_o > 1) ? h : 0; tmpB_pos_h = bh*(B_i*B_j*B_k*B_n); for (i=0; i < B_i; i++) {{ bi = (B_i_o > 1) ? i : 0; tmpB_pos_i = tmpB_pos_h + bi*(B_j*B_k*B_n); for (j=0; j < B_j; j++) {{ bj = (B_j_o > 1) ? j : 0; tmpB_pos = tmpB_pos_i + bj*(B_k*B_n); #pragma omp parallel for private(n,k) for (n=0; n < B_n; n++) {{ for (k=0; k < B_k; k++) {{ *(_BT + tmpB_pos + n*(B_k) + k) = *(_B + tmpB_pos + k*(B_n) + n); }} }} }} }} }} for (h=0; h < Y_h; h++) {{ ah = (A_h_o > 1) ? h : 0; bh = (B_h_o > 1) ? h : 0; tmpA_pos_h = ah*(A_i*A_j*A_m*A_k); tmpB_pos_h = bh*(B_i*B_j*B_k*B_n); tmpY_pos_h = h*(Y_i*Y_j*Y_m*Y_n); for (i=0; i < Y_i; i++) {{ ai = (A_i_o > 1) ? i : 0; bi = (B_i_o > 1) ? i : 0; tmpA_pos_i = tmpA_pos_h + ai*(A_j*A_m*A_k); tmpB_pos_i = tmpB_pos_h + bi*(B_j*B_k*B_n); tmpY_pos_i = tmpY_pos_h + i*(Y_j*Y_m*Y_n); for (j=0; j < Y_j; j++) {{ aj = (A_j_o > 1) ? j : 0; bj = (B_j_o > 1) ? j : 0; tmpA_pos = tmpA_pos_i + aj*(A_m*A_k); tmpB_pos = tmpB_pos_i + bj*(B_k*B_n); tmpY_pos = tmpY_pos_i + j*(Y_m*Y_n); #pragma omp parallel for private(m,n,k,tmpA,tmpB) reduction(+:tmpY) for (m=0; m < Y_m; m++) {{ for (n=0; n < Y_n; n++) {{ tmpY = 0; for (k=0; k < B_k; k++) {{ ''' if (len(self.input_tensor) == 2): TemplateStatements += ''' tmpA = *(_A + tmpA_pos + m*(A_k) + k); tmpB = *(_BT + tmpB_pos + n*(B_k) + k); ''' else: TemplateStatements += ''' tmpA = *(_A + tmpA_pos + m*(A_k) + k) - a_zero_point_mod[m]; tmpB = *(_BT + tmpB_pos + n*(B_k) + k) - b_zero_point_mod[n]; ''' TemplateStatements += ''' tmpY += tmpA * tmpB; }} *(_Y + tmpY_pos + m*(Y_n) + n) = tmpY; }} }} }} }} }} ''' mapping = {} mapping.update({'A_d0': input_mod_shapes[0][0]}) mapping.update({'A_d1': input_mod_shapes[0][1]}) mapping.update({'A_d2': input_mod_shapes[0][2]}) mapping.update({'A_d3': input_mod_shapes[0][3]}) mapping.update({'A_d4': input_mod_shapes[0][4]}) mapping.update({'B_d0': input_mod_shapes[1][0]}) mapping.update({'B_d1': input_mod_shapes[1][1]}) mapping.update({'B_d2': input_mod_shapes[1][2]}) mapping.update({'B_d3': input_mod_shapes[1][3]}) mapping.update({'B_d4': input_mod_shapes[1][4]}) mapping.update({'Y_d0': outputs_shape[0]}) mapping.update({'Y_d1': outputs_shape[1]}) mapping.update({'Y_d2': outputs_shape[2]}) mapping.update({'Y_d3': outputs_shape[3]}) mapping.update({'Y_d4': outputs_shape[4]}) mapping.update({'ta': data_type.np2c(self.input_tensor_dtypes[0])}) mapping.update({'tb': data_type.np2c(self.input_tensor_dtypes[1])}) if (len(self.input_tensor) == 4): mapping.update( {'taz': data_type.np2c(self.input_tensor_dtypes[2])}) mapping.update( {'tbz': data_type.np2c(self.input_tensor_dtypes[3])}) mapping.update({'A_d0_o': input_org_shapes[0][0]}) mapping.update({'A_d1_o': input_org_shapes[0][1]}) mapping.update({'A_d2_o': input_org_shapes[0][2]}) mapping.update({'B_d0_o': input_org_shapes[1][0]}) mapping.update({'B_d1_o': input_org_shapes[1][1]}) mapping.update({'B_d2_o': input_org_shapes[1][2]}) # 3 if (len(self.input_tensor) == 4): TemplateFunction = cleandoc(''' void {op_func_name}(void *op_param, {ta} A{dims_A}, {tb} B{dims_B}, {taz} a_zero_point{dims_az}, {tbz} b_zero_point{dims_bz}, int Y{dims}, void *inputs_params, void* outputs_params) {{ {statements} }} ''') else: TemplateFunction = cleandoc(''' void {op_func_name}(void *op_param, {ta} A{dims_A}, {tb} B{dims_B}, int Y{dims}, void *inputs_params, void* outputs_params) {{ {statements} }} ''') mappingf = {} mappingf.update({'op_func_name': self.get_func_name()}) mappingf.update({'A': self.input_tensor_names[0]}) mappingf.update( {'dims_A': c_helper.generate_dim_bracket(input_shapes[0])}) mappingf.update({'B': self.input_tensor_names[1]}) mappingf.update( {'dims_B': c_helper.generate_dim_bracket(input_shapes[1])}) mappingf.update({'Y': self.output_tensor_names[0]}) mappingf.update({ 'dims': c_helper.generate_dim_bracket(self.output_tensor_shapes[0]) }) mappingf.update({'ta': data_type.np2c(self.input_tensor_dtypes[0])}) mappingf.update({'tb': data_type.np2c(self.input_tensor_dtypes[1])}) if (len(self.input_tensor) == 4): mappingf.update( {'dims_az': c_helper.generate_dim_bracket(input_shapes[2])}) mappingf.update( {'dims_bz': c_helper.generate_dim_bracket(input_shapes[3])}) mappingf.update( {'taz': data_type.np2c(self.input_tensor_dtypes[2])}) mappingf.update( {'tbz': data_type.np2c(self.input_tensor_dtypes[3])}) mappingf.update({'statements': TemplateStatements.format(**mapping)}) res += '\n\n' res += TemplateFunction.format(**mappingf) return res
def generate_c_code(self, **kwargs): res = '' res += '\n'.join([ c_helper.generate_local_include(h) for h in self.get_c_op_include_header() ]) res += '\n\n' # param type res += self.get_c_param_type() res += '\n\n' axis = self.attrs['axis'] if axis == -1: axis = self.input_tensor_ndims[0] - 1 batch_size = 1 for d in range(0, axis): batch_size *= self.input_tensor_shapes[0][d] num = 1 for d in range(axis, self.input_tensor_ndims[0]): num *= self.input_tensor_shapes[0][d] TemplateStatements = ''' {t} *_input = ({t} *)input; {t} *_output = ({t} *)output; int batch_size = {batch_size}; int num = {num}; int i; int batch; {t} max, sum; for (batch=0; batch<batch_size; batch++) {{ sum = 0.0; max = -HUGE_VAL; for (i=0; i<num; i++) {{ if (*(_input + batch*num +i) > max) {{ max = *(_input + batch*num +i); }} }} for (i=0; i<num; i++) {{ *(_output + batch*num +i) = {exp}(*(_input + batch*num +i) - max); sum += *(_output + batch*num +i); }} for (i=0; i<num; i++) {{ *(_output + batch*num +i) /= sum; }} }} ''' mapping = {} mapping.update({'batch_size': batch_size}) mapping.update({'num': num}) mapping.update({'d1': self.output_tensor_shapes[0][0]}) mapping.update({'d2': self.output_tensor_shapes[0][1]}) mapping.update({'t': data_type.np2c(self.output_tensor_dtypes[0])}) if (self.output_tensor_dtypes[0] == 'float64'): mapping.update({'exp': 'exp'}) elif (self.output_tensor_dtypes[0] == 'float32'): mapping.update({'exp': 'expf'}) else: mapping.update({'exp': 'expf'}) # 3 TemplateFunction = cleandoc(''' void {op_func_name}(void *op_param, {t} input{dims_input}, {t} output{dims}, void *inputs_params, void* outputs_params) {{ {statements} }} ''') mappingf = {} mappingf.update({'op_func_name': self.get_func_name()}) mappingf.update({'input': self.input_tensor_names[0]}) mappingf.update({ 'dims_input': c_helper.generate_dim_bracket(self.input_tensor_shapes[0]) }) mappingf.update({'output': self.output_tensor_names[0]}) mappingf.update({ 'dims': c_helper.generate_dim_bracket(self.output_tensor_shapes[0]) }) mappingf.update({'t': data_type.np2c(self.output_tensor_dtypes[0])}) mappingf.update({'statements': TemplateStatements.format(**mapping)}) res += '\n\n' res += TemplateFunction.format(**mappingf) return res
def generate_c_code(self, **kwargs): res ='' # include header res += '\n'.join([c_helper.generate_local_include(h) for h in self.get_c_op_include_header()]) res +='\n\n' # param type res += self.get_c_param_type() res +='\n\n' # 1 TemplateArrayAddLoop = c_helper.generate_ndim_for_loop(np.empty(self.output_tensor_shapes[0]),indent=0) # 2 input_vals = OrderedDict({k: self._gen_array_element_val(self.output_tensor_ndims[0], v) for k, v in self.input_tensor_dict.items()}) output_vals = {self.output_tensor_names[0]: self._gen_array_element_val(self.output_tensor_ndims[0], self.output_tensor_values[0])} Conditions = '' MeanStatement = '' TemplateCondition = cleandoc(''' {t} sum = 0.0f; int num = 0; {conditions} {indent}{outputVal} = sum/num; ''') TemplateCompare = cleandoc(''' {indent}num++; {indent}sum += {input}; ''') for k, v in input_vals.items(): Conditions += TemplateCompare.format(**{'input': k + v, 'indent': ' ' * 4 * (self.input_tensor_ndims[0] + 1)}) Conditions += '\n' else: mapping_cond ={'t': data_type.np2c(self.input_tensor_dtypes[0])} mapping_cond.update({'conditions': Conditions}) mapping_cond.update({'outputVal': list(output_vals.keys())[0] + list(output_vals.values())[0]}) mapping_cond.update({'indent': ' ' * 4 * (self.output_tensor_ndims[0] + 1)}) MeanStatement += TemplateCondition.format(**mapping_cond) TemplateFunction = cleandoc(''' void {op_func_name}(void *op_param,{InputsParamSignature}, {OutputsParamSignature}, void *inputs_params, void* outputs_params) {{ {statements} }} ''') mappingf = {} mappingf.update({'op_func_name': self.get_func_name()}) input_sigs = [] for name, value in self.input_tensor_dict.items(): input_sigs.append(self.gen_param_signature(name, value)) mappingf.update({'InputsParamSignature':','.join(input_sigs)}) mappingf.update({'OutputsParamSignature': self.gen_param_signature(self.output_tensor_names[0], self.output_tensor_values[0])}) mappingf.update({'statements': TemplateArrayAddLoop.replace('[statements]', MeanStatement)}) res += '\n\n' res += TemplateFunction.format(**mappingf) return res
def generate_c_code(self, **kwargs): res = '' res += '\n'.join([ c_helper.generate_local_include(h) for h in self.get_c_op_include_header() ]) res += '\n\n' # param type res += self.get_c_param_type() res += '\n\n' TemplateStatements = ''' int Y_n = {d1}; int Y_c = {d2}; int Y_h = {d3}; int Y_w = {d4}; const double bias[] = {bias}; const double scale = {scale}; int n; int c, h, w; for (n=0; n<Y_n; n++) {{ for (c=0; c<Y_c; c++) {{ for (h=0; h<Y_h; h++) {{ for (w=0; w<Y_w; w++) {{ Y[n][c][h][w] = scale * X[n][c][h][w] + bias[c]; }} }} }} }} ''' mapping = {} mapping.update({'d1': self.input_tensor_shapes[0][0]}) mapping.update({'d2': self.input_tensor_shapes[0][1]}) mapping.update({'d3': self.input_tensor_shapes[0][2]}) mapping.update({'d4': self.input_tensor_shapes[0][3]}) mapping.update({ 'bias': str(self.attrs['bias']).replace('[', '{').replace(']', '}') }) mapping.update({'scale': self.attrs['scale']}) # 3 # void {op_func_name}(void *op_param, {t} X{dims_X}, {t} bias{dims_bias}, {t} scale, {t} Y{dims}, void *inputs_params, void* outputs_params) {{ TemplateFunction = cleandoc(''' void {op_func_name}(void *op_param, {t} X{dims_X}, {t} Y{dims}, void *inputs_params, void* outputs_params) {{ {statements} }} ''') mappingf = {} mappingf.update({'op_func_name': self.get_func_name()}) mappingf.update({ 'dims_X': c_helper.generate_dim_bracket(self.input_tensor_shapes[0]) }) # mappingf.update({'dims_bias': c_helper.generate_dim_bracket(np.array(self.attrs['bias']).shape)}) mappingf.update({ 'dims': c_helper.generate_dim_bracket(self.output_tensor_shapes[0]) }) mappingf.update({'t': data_type.np2c(self.output_tensor_dtypes[0])}) mappingf.update({'statements': TemplateStatements.format(**mapping)}) res += '\n\n' res += TemplateFunction.format(**mappingf) return res