Ejemplo n.º 1
0
    def generate_c_code(self, **kwargs):
        res = ''

        # include header
        res += '\n'.join([
            c_helper.generate_local_include(h)
            for h in self.get_c_op_include_header()
        ])
        res += '\n\n'

        # param type
        res += self.get_c_param_type()
        res += '\n\n'

        # 1
        TemplateArrayAddLoop = c_helper.generate_ndim_for_loop(
            np.ones(self.output_tensor_shapes[0]))

        # 2
        lcoal_var_name = '_output_val'
        indent = '  ' * 4
        c_array = ', '.join([str(v) for v in self.output_tensor_values[0]])

        TemplateConstValue = indent + 'int ' + lcoal_var_name + str(
            self.input_tensor_ndims) + '={' + c_array + '};'

        mapping = {}
        TemplateStatements = 'output[i] =' + lcoal_var_name + '[i];'
        mapping.update({'X': lcoal_var_name})
        mapping.update({'Y': self.output_tensor_names[0]})

        TemplateFunction = cleandoc('''
        void {op_func_name}(void *op_param,{tx} input{XDims} , {ty} output{YDims}, void *inputs_params, void* outputs_params)
        {{
        {statements1}
        {statements2}
        }}
        ''')
        mappingf = {}
        mappingf.update({'op_func_name': self.get_func_name()})
        mappingf.update({
            'XDims':
            c_helper.generate_dim_bracket(self.input_tensor_shapes[0])
        })
        mappingf.update({
            'YDims':
            c_helper.generate_dim_bracket(self.output_tensor_shapes[0])
        })
        mappingf.update({'tx': data_type.np2c(self.input_tensor_dtypes[0])})
        mappingf.update({'ty': data_type.np2c(self.output_tensor_dtypes[0])})
        mappingf.update({'statements1': TemplateConstValue})
        mappingf.update({
            'statements2':
            TemplateArrayAddLoop.replace('[statements]',
                                         TemplateStatements.format(**mapping))
        })
        res += '\n\n'
        res += TemplateFunction.format(**mappingf)

        return res
Ejemplo n.º 2
0
    def generate_c_code(self, **kwargs):
        res =''

        # include header
        res += '\n'.join([c_helper.generate_local_include(h) for h in self.get_c_op_include_header()])
        res +='\n\n'

        # param type
        res += self.get_c_param_type()
        res +='\n\n'

        # 1
        TemplateArrayAddLoop = c_helper.generate_ndim_for_loop(np.ones(self.output_tensor_shapes[0]))


        # 2
        TemplateStatements = '{Y}{StatementDims} = {c_abs}({X}{StatementDims});'

        mapping = {}
        mapping.update({'X': self.input_tensor_names[0]})
        mapping.update({'Y': self.output_tensor_names[0]})


        StatementDims = ''
        for _, step in zip_longest(self.input_tensor[0].shape[::-1],  
                                   reversed(string.ascii_lowercase[8:8 + self.output_tensor_ndims[0]])):
            StatementDims  =  '[{0}]'.format(step) + StatementDims
        mapping.update({'StatementDims': StatementDims})        
        
        
        out_c_type = data_type.np2c(self.output_tensor_dtypes[0])
        if out_c_type.startswith('double'):
            mapping.update({'c_abs': 'fabs'})
        elif out_c_type.startswith('float'):
            mapping.update({'c_abs': 'fabsf'})
        elif out_c_type.startswith('int'):
            mapping.update({'c_abs': 'fabsf'})
        else:
            raise ValueError('{0} is not supported'.format(out_c_type))


        TemplateFunction = cleandoc('''
        void {op_func_name}(void *op_param,{t} {X}{Dims} , {t} {Y}{Dims}, void *inputs_params, void* outputs_params)
        {{
        {statements}
        }}
        ''')
        mappingf = {}
        mappingf.update({'op_func_name': self.get_func_name()})
        mappingf.update({'X': self.input_tensor_names[0]})
        mappingf.update({'Y': self.output_tensor_names[0]})
        mappingf.update({'Dims':c_helper.generate_dim_bracket(self.input_tensor_shapes[0])})
        mappingf.update({'Dims': c_helper.generate_dim_bracket(self.output_tensor_shapes[0])})
        mappingf.update({'t': data_type.np2c(self.output_tensor_dtypes[0])})
        mappingf.update({'statements': TemplateArrayAddLoop.replace('[statements]', TemplateStatements.format(**mapping))})
        res += '\n\n'
        res += TemplateFunction.format(**mappingf)

        return res
Ejemplo n.º 3
0
    def generate_c_code(self, **kwargs):
        res = ''
        res += '\n'.join([
            c_helper.generate_local_include(h)
            for h in self.get_c_op_include_header()
        ])
        res += '\n\n'

        # param type
        res += self.get_c_param_type()
        res += '\n\n'

        # 1
        TemplateArrayDropoutLoop = c_helper.generate_ndim_for_loop(
            np.ones(self.output_tensor_shapes[0]))

        TemplateStatements = '''
                    output{dims} = ((high - low) * rand()/RAND_MAX ) - low;
        '''

        mapping = {}
        mapping.update({
            'dims':
            ''.join([
                '[' + v + ']'
                for v in string.ascii_lowercase[8:8 +
                                                self.output_tensor_ndims[0]]
            ])
        })

        # 3
        TemplateFunction = cleandoc('''
        void {op_func_name}(void *op_param, {t_in} data{dims}, {t_out} output{dims}, void *inputs_params, void* outputs_params) {{
            const float high = {high};
            const float low = {low};
            {statements}
        }}
        ''')

        mappingf = {}
        mappingf.update({'op_func_name': self.get_func_name()})
        mappingf.update({
            'dims':
            c_helper.generate_dim_bracket(self.output_tensor_shapes[0])
        })
        mappingf.update({'t_in': data_type.np2c(self.input_tensor_dtypes[0])})
        mappingf.update(
            {'t_out': data_type.np2c(self.output_tensor_dtypes[0])})
        mappingf.update({'high': self.attrs.get('high', 1.0)})
        mappingf.update({'low': self.attrs.get('low', 0.0)})
        mappingf.update({
            'statements':
            TemplateArrayDropoutLoop.replace(
                '[statements]', TemplateStatements.format(**mapping))
        })
        res += '\n\n'
        res += TemplateFunction.format(**mappingf)

        return res
Ejemplo n.º 4
0
    def gen_param_signature(self, name, value): #
        mapping ={}
        mapping.update({'type': data_type.np2c(value.dtype)})
        mapping.update({'name': name})
        mapping.update({'dim_bracket': c_helper.generate_dim_bracket(value.shape)})
 
        return '{type} {name}{dim_bracket}'.format(**mapping)
Ejemplo n.º 5
0
    def generate_c_code(self, **kwargs):
        TEMPALTE_IDENTITY_FUNC = cleandoc('''
        void {op_func_name}(void *op_param, {t} Input{InputDims}, {t} Output{OutputDims}, void *inputs_params, void* outputs_params){{        
            memcpy(Output, Input, sizeof({t}) * {cumdim});
        }}
        ''')

        res = ''
        res += self.get_c_param_type()  # call only once
        res += '\n\n\n'

        # constant function
        mapping = {}
        mapping.update({'op_func_name': self.get_func_name()})
        mapping.update({'t': data_type.np2c(self.input_tensor_dtypes[0])})
        mapping.update(
            {'cumdim': np.cumproduct(self.input_tensor_shapes[0])[-1]})
        mapping.update({'Input': self.input_tensor_names[0]})
        mapping.update({'Output': self.output_tensor_names[0]})
        mapping.update({
            'InputDims':
            c_helper.generate_dim_bracket(self.input_tensor_shapes[0])
        })
        mapping.update({
            'OutputDims':
            c_helper.generate_dim_bracket(self.input_tensor_shapes[0])
        })

        res += TEMPALTE_IDENTITY_FUNC.format(**mapping)

        return res
Ejemplo n.º 6
0
    def generate_c_code(self, **kwargs):
        res = ''
        res += '\n'.join([
            c_helper.generate_local_include(h)
            for h in self.get_c_op_include_header()
        ])
        res += '\n\n'

        # param type
        res += self.get_c_param_type()
        res += '\n\n'

        # 1
        TemplateArrayExpLoop = c_helper.generate_ndim_for_loop(
            np.ones(self.output_tensor_shapes[0]), pragma=self.OpenMP)

        if self.OpenMP:
            TemplateArrayExpLoop = TemplateArrayExpLoop.replace(
                '[pragma]', self.PRAGMA_OMP)

        # 2
        TemplateStatements = '''
            Y{dims} = exp(X{dims});
        '''

        mapping = {}
        mapping.update({
            'dims':
            ''.join([
                '[' + v + ']'
                for v in string.ascii_lowercase[8:8 +
                                                self.output_tensor_ndims[0]]
            ])
        })

        # 3
        TemplateFunction = cleandoc('''
        void {op_func_name}(void *op_param, {t} X{dims}, {t} Y{dims}, void *inputs_params, void* outputs_params) {{
            {statements}
        }}
        ''')

        mappingf = {}
        mappingf.update({'op_func_name': self.get_func_name()})
        mappingf.update({
            'dims':
            c_helper.generate_dim_bracket(self.output_tensor_shapes[0])
        })
        mappingf.update({'t': data_type.np2c(self.output_tensor_dtypes[0])})
        mappingf.update({
            'statements':
            TemplateArrayExpLoop.replace('[statements]',
                                         TemplateStatements.format(**mapping))
        })
        res += '\n\n'
        res += TemplateFunction.format(**mappingf)

        return res
Ejemplo n.º 7
0
    def generate_c_code(self, **kwargs):
        res = ''

        # include header
        res += '\n'.join([
            c_helper.generate_local_include(h)
            for h in self.get_c_op_include_header()
        ])
        res += '\n\n'

        # param type
        res += self.get_c_param_type()
        res += '\n\n'

        # 1
        TemplateArrayTransposeLoop = c_helper.generate_ndim_for_loop(
            np.ones(self.output_tensor_shapes[0]), pragma=self.OpenMP)

        if self.OpenMP:
            TemplateArrayTransposeLoop = TemplateArrayTransposeLoop.replace(
                '[pragma]', self.PRAGMA_OMP)

        TemplateFunction = cleandoc('''
        void {op_func_name}(void *op_param, {t} {X}{XDims}, {t} {C}{CDims}, void *inputs_params, void* outputs_params)
        {{
        {statements}
        }}
        ''')
        mappingf = {}
        mappingf.update({'op_func_name': self.get_func_name()})
        mappingf.update({'X': self.input_tensor_names[0]})
        mappingf.update({'C': self.output_tensor_names[0]})
        mappingf.update({
            'XDims':
            c_helper.generate_dim_bracket(self.input_tensor_shapes[0])
        })
        mappingf.update({
            'CDims':
            c_helper.generate_dim_bracket(self.output_tensor_shapes[0])
        })
        mappingf.update({'t': data_type.np2c(self.output_tensor_dtypes[0])})
        mappingf.update({
            'statements':
            TemplateArrayTransposeLoop.replace('[statements]',
                                               self.generate_kernel_code())
        })
        res += '\n\n'
        res += TemplateFunction.format(**mappingf)

        return res
Ejemplo n.º 8
0
 def _generate_initializers_c(self):
     TEMPALTE_INITIALIZER = cleandoc("""
         {t} {name}{dim_bracket} = {values};
         """)
     res = []
     for name in self.initializers:
         i = self.tensor_dict[name]
         mapping = {
             "name": name,
             "t": data_type.np2c(i.dtype),
             "dim_bracket": c_helper.generate_dim_bracket(i.shape),
             "values": c_helper.generate_c_array(i)
         }
         res.append(TEMPALTE_INITIALIZER.format(**mapping))
     return "\n\n".join(res)
Ejemplo n.º 9
0
def generate_param_signature(param_dict, override_param_name=None):
    param_signatures = []
    param_signature = " {type} {name}{dim}"

    for param_name, param_val in param_dict.items():
        mapping = {
            "type":
            data_type.np2c(param_val.dtype),
            "dim":
            generate_dim_bracket(param_val.shape),
            "name":
            param_name if override_param_name == None else override_param_name
        }
        param_signatures.append(param_signature.format(**mapping))
    return param_signatures
Ejemplo n.º 10
0
    def generate_c_code(self, **kwargs):
        res = ''
        res += '\n'.join([
            c_helper.generate_local_include(h)
            for h in self.get_c_op_include_header()
        ])
        res += '\n\n'

        # param type
        res += self.get_c_param_type()
        res += '\n\n'

        # 1
        TemplateArrayLeakyReluLoop = c_helper.generate_ndim_for_loop(
            np.ones(self.output_tensor_shapes[0]), pragma=self.OpenMP)
        if self.OpenMP:
            TemplateArrayLeakyReluLoop = TemplateArrayLeakyReluLoop.replace(
                '[pragma]', self.PRAGMA_OMP)

        TemplateFunction = cleandoc('''
        void {op_func_name}(void *op_param, {t} {X}{dims}, {t} {Y}{dims}, void *inputs_params, void* outputs_params) {{
            LeakyReluOpParam *param_ptr = (LeakyReluOpParam *)op_param;
            const {t} alpha = {alpha};
        
            {statements}
        }}
        ''')

        mappingf = {}
        mappingf.update({'op_func_name': self.get_func_name()})
        mappingf.update({
            'dims':
            c_helper.generate_dim_bracket(self.output_tensor_shapes[0])
        })
        mappingf.update({'t': data_type.np2c(self.output_tensor_dtypes[0])})
        mappingf.update({'alpha': self.attrs['alpha']})
        mappingf.update({'X': self.input_tensor_names[0]})
        mappingf.update({'Y': self.output_tensor_names[0]})
        mappingf.update({
            'statements':
            TemplateArrayLeakyReluLoop.replace(
                '[statements]',
                self.generate_kernel_code(alpha_variable_value=False))
        })
        res += '\n\n'
        res += TemplateFunction.format(**mappingf)

        return res
Ejemplo n.º 11
0
    def _generate_inititializers_def(self):
        TEMPALTE_INITIALIZER = cleandoc("""
            {t} {name}{dim_bracket};
            NUMPY_HEADER {nph_name};
            """)
        res = ["// define initializers"]
        for name in self.initializers:
            i = self.tensor_dict[name]
            mapping = {
                "name": name,
                "nph_name": name.replace("vi_", "nph_"),
                "t": data_type.np2c(i.dtype),
                "dim_bracket": c_helper.generate_dim_bracket(i.shape)
            }
            res.append(TEMPALTE_INITIALIZER.format(**mapping))

        return "\n".join(res)
Ejemplo n.º 12
0
    def _generate_node_outputs_def(self):
        res = []
        res.append("// Define Outptus")

        TemplateOutputs = "{t} {name}{shape};"
        for i, n in enumerate(self.graph):
            for j in range(len(n.op.output_tensor)):
                res.append(
                    TemplateOutputs.format(
                        **{
                            "t":
                            data_type.np2c(n.op.output_tensor_dtypes[j]),
                            "name":
                            n.output_tensor_names[j],  # output_tensor_names
                            "shape":
                            c_helper.generate_dim_bracket(
                                n.op.output_tensor_shapes[j])
                        }))
        return "\n".join(res)
Ejemplo n.º 13
0
    def _gen_c_code_value(self):
        TEMPALTE_CONSTANCT_VALUE = cleandoc('''
            {t} {name}{dim_bracket} = {values};
            ''')

        attr_value = self.attrs['value']
        value = numpy_helper.to_array(attr_value)

        mapping = {
            'name':
            self.get_name(),
            't':
            data_type.np2c(self.output_tensor_dtypes[0]),
            'dim_bracket':
            c_helper.generate_dim_bracket(self.output_tensor_shapes[0]),
            'values':
            c_helper.generate_c_array(value)
        }

        return TEMPALTE_CONSTANCT_VALUE.format(**mapping)
Ejemplo n.º 14
0
    def generate_c_code(self, **kwargs):

        res = ''

        # include header
        res += '\n'.join([
            c_helper.generate_local_include(h)
            for h in self.get_c_op_include_header()
        ])
        res += '\n\n'

        # param type
        res += self.get_c_param_type()
        res += '\n\n'

        TemplateFunction = cleandoc('''
        void {op_func_name}(void *op_param,{t} input{XDims}, {t} output{CDims}, void *inputs_params, void* outputs_params)
        {{
        {init_statements}\n
        {main_statements}
        }}
        ''')

        mappingf = {}
        mappingf.update({'op_func_name': self.get_func_name()})
        mappingf.update({
            'XDims':
            c_helper.generate_dim_bracket(self.input_tensor_shapes[0])
        })
        mappingf.update({
            'CDims':
            c_helper.generate_dim_bracket(self.output_tensor_shapes[0])
        })
        mappingf.update({'t': data_type.np2c(self.output_tensor_dtypes[0])})
        mappingf.update({'main_statements': self.generate_c_code_reduce()})
        mappingf.update(
            {'init_statements': self.generate_c_code_init_output()})
        res += '\n\n'
        res += TemplateFunction.format(**mappingf)

        return res
Ejemplo n.º 15
0
    def generate_c_code(self, **kwargs):
        TEMPALTE_CONSTANCT_FUNC = cleandoc('''
        void {op_func_name}(void *op_param, void *outputs, void* outputs_params){{
            ConstantOpParam *p = (ConstantOpParam *)op_param;
            int ndim;
            int* shape;
            void * value;
        
            ndim = p->ndim;
            shape = p->shape;
            value =({type} *) p->value;
            
            int len = 1;
            for(int i=0;i< ndim;i++){{
                len *=shape[i];
            }}
        
            memcpy(outputs, value, sizeof({type}) * len);
        }}
        ''')

        res = ''
        res += self.get_c_param_type()  # call only once
        res += '\n\n\n'

        # constant value
        res += self._gen_c_code_value()
        res += '\n\n\n'

        # constant function
        mapping = {}
        mapping.update({'op_name': self.get_name()})
        mapping.update({'op_func_name': self.get_func_name()})
        mapping.update({'type': data_type.np2c(self.output_tensor_dtypes[0])})
        res += TEMPALTE_CONSTANCT_FUNC.format(**mapping)

        return res
Ejemplo n.º 16
0
    def generate_c_code(self, **kwargs):
        res = ''
        # include header
        res += '\n'.join([
            c_helper.generate_local_include(h)
            for h in self.get_c_op_include_header()
        ])
        res += '\n\n'

        # param type
        res += self.get_c_param_type()
        res += '\n\n'
        res += self.get_op_variale_def()
        res += '\n\n'
        # 1
        TemplateArrayAddLoop = c_helper.generate_ndim_for_loop(
            np.empty(self.output_tensor_shapes[0]), pragma=self.OpenMP)
        if self.OpenMP:
            TemplateArrayAddLoop = TemplateArrayAddLoop.replace(
                '[pragma]', self.PRAGMA_OMP)

        # 2
        mapping = {}
        # TemplateStatements = 'temp_arr{CStatementDims} = ROUND({X}{XStatementDims} / {Y}{YStatementDims}) + {Z}{ZStatementDims};\n'
        #
        # if data_type.np2c(self.output_tensor_dtypes[0]) == 'uint8_t':
        #     TemplateStatements += '            {C}{CStatementDims} = CLAMP(temp_arr{CStatementDims}, 0, 255);\n'
        # else:
        #     TemplateStatements += '            {C}{CStatementDims} = CLAMP(temp_arr{CStatementDims}, -127, 128);\n'

        if data_type.np2c(self.output_tensor_dtypes[0]) == 'uint8_t':
            TemplateStatements = '{C}{CStatementDims} = CLAMP(ROUND({X}{XStatementDims} / {Y}{YStatementDims}) + {Z}{ZStatementDims}, 0, 255);\n'
        else:
            TemplateStatements = '{C}{CStatementDims} = CLAMP(ROUND({X}{XStatementDims} / {Y}{YStatementDims}) + {Z}{ZStatementDims}, -127, 128);\n'

        mapping.update({'X': self.input_tensor_names[0]})
        mapping.update({'Y': self.input_tensor_names[1]})
        mapping.update({'Z': self.input_tensor_names[2]})
        mapping.update({'C': self.output_tensor_names[0]})

        XStatementDims = ''
        YStatementDims = ''
        ZStatementDims = ''
        CStatementDims = ''

        X, Y, Z = self.input_tensor_values

        for element_num_x, element_num_y, element_num_z, step in zip_longest(
                X.shape[::-1], Y.shape[::-1], Z.shape[::-1],
                reversed(string.ascii_lowercase[8:8 +
                                                self.output_tensor_ndims[0]])):
            if element_num_x is not None:
                if element_num_x == 1:
                    XStatementDims = '[0]' + XStatementDims
                else:
                    XStatementDims = '[{0}]'.format(step) + XStatementDims

            if element_num_y is not None:
                if element_num_y == 1:
                    YStatementDims = '[0]' + YStatementDims
                else:
                    YStatementDims = '[{0}]'.format(step) + YStatementDims

            if element_num_z is not None:
                if element_num_z == 1:
                    ZStatementDims = '[0]' + ZStatementDims
                else:
                    ZStatementDims = '[{0}]'.format(step) + ZStatementDims

            CStatementDims = '[{0}]'.format(step) + CStatementDims

        mapping.update({'XStatementDims': XStatementDims})
        mapping.update({'YStatementDims': YStatementDims})
        mapping.update({'ZStatementDims': ZStatementDims})
        mapping.update({'CStatementDims': CStatementDims})

        TemplateFunction = cleandoc('''
        void {op_func_name}(void *op_param,{x_type} {X}{XDims} , {y_type} {Y}{YDims}, {z_type} {Z}{ZDims}, {c_type} {C}{CDims}, void *inputs_params, void* outputs_params)
        {{      
        {statements}
        }}
        ''')
        mappingf = {}
        mappingf.update({'op_func_name': self.get_func_name()})
        mappingf.update({'X': self.input_tensor_names[0]})
        mappingf.update({'Y': self.input_tensor_names[1]})
        mappingf.update({'Z': self.input_tensor_names[2]})
        mappingf.update({'C': self.output_tensor_names[0]})
        mappingf.update({
            'XDims':
            c_helper.generate_dim_bracket(self.input_tensor_shapes[0])
        })
        mappingf.update({
            'YDims':
            c_helper.generate_dim_bracket(self.input_tensor_shapes[1])
        })
        mappingf.update({
            'ZDims':
            c_helper.generate_dim_bracket(self.input_tensor_shapes[2])
        })
        mappingf.update({
            'CDims':
            c_helper.generate_dim_bracket(self.output_tensor_shapes[0])
        })
        mappingf.update(
            {'x_type': data_type.np2c(self.node.input_tensor_values[0].dtype)})
        mappingf.update(
            {'y_type': data_type.np2c(self.node.input_tensor_values[1].dtype)})
        mappingf.update(
            {'z_type': data_type.np2c(self.node.input_tensor_values[2].dtype)})
        mappingf.update(
            {'c_type': data_type.np2c(self.output_tensor_dtypes[0])})
        mappingf.update({
            'statements':
            TemplateArrayAddLoop.replace('[statements]',
                                         TemplateStatements.format(**mapping))
        })
        res += '\n\n'
        res += TemplateFunction.format(**mappingf)

        return res
Ejemplo n.º 17
0
    def generate_c_code(self, **kwargs):
        hidden_size = self.attrs['hidden_size']
        batch_size = self.input_tensor_shapes[0][1]
        input_size = self.input_tensor_shapes[0][-1]
        sequence_lens = self.input_tensor_shapes[0][0]
        num_directions = self.input_tensor_shapes[1][0]
        b_half_index = self.input_tensor_shapes[3][-1] // 2 if 3 < len(self.input_tensor) else 0

        HDim = [sequence_lens,num_directions, batch_size, hidden_size]
        
        res =''

        # include header
        res += '\n'.join([c_helper.generate_std_include(h) for h in self.get_c_op_include_header()])
        res +='\n\n'

        # param type
        res += self.get_c_param_type()
        res +='\n\n\n'
        res += ""
            
        # activate func
        activations =  ['Sigmoid','Tanh', 'Tanh']   # default
        activation_alpha = ["0", "0", "0"]          # default
        activation_beta =  ["0", "0", "0"]          # default

        func = list(self.attrs.get('activations',[]))
        alpha = list(self.attrs.get('activation_alpha',[]))
        beta = list(self.attrs.get('activation_beta',[]))

        activations[:len(func)] = func
        activation_alpha[:len(alpha)] = alpha 
        activation_beta[:len(beta)] = beta

        for act in set(activations):    
            res += self.get_activate_func(act) + '\n\n'

        res +='\n\n'
                
        mappingf = {}
        mappingf.update({"signature":self.get_signature()})

        mappingf.update({'input_size': input_size})
        mappingf.update({'batch_size': batch_size})
        mappingf.update({'hidden_size': hidden_size})
        mappingf.update({'sequence_lens': sequence_lens})

        mappingf.update({"PLen": str(3 * hidden_size)})
        mappingf.update({"P": self.input_tensor_names[7] + "[0]" if len(self.input_tensor)==8 else "P"})

        mappingf.update({"HDim_last2": c_helper.generate_dim_bracket(HDim[-2:])})
        mappingf.update({"bias_code": self.gen_c_bias(b_half_index)})

        # activate
        mappingf.update({"act_f": LSTMActivate[activations[0]].value})
        mappingf.update({"act_g": LSTMActivate[activations[1]].value})
        mappingf.update({"act_h": LSTMActivate[activations[2]].value})

        mappingf.update({"act_alpha_f": activation_alpha[0]})
        mappingf.update({"act_alpha_g": activation_alpha[1]})
        mappingf.update({"act_alpha_h": activation_alpha[2]})

        mappingf.update({"act_beta_f": activation_beta[0]})
        mappingf.update({"act_beta_g": activation_beta[1]})
        mappingf.update({"act_beta_h": activation_beta[2]})

        mappingf.update({"Y_set_code": ""})
        mappingf.update({"Y_h_set_code": ""})
        mappingf.update({"Y_c_set_code": ""})
        mappingf_index=0
        for index, _ in enumerate(range(len(self.node.outputs))):# onnx definitive order
            if index==0 and self.node.outputs[index] != "":# Y exist
                mappingf.update({'yt': data_type.np2c(self.output_tensor_dtypes[mappingf_index])})
                mappingf.update({'Y': "vi_Y"})
                mappingf.update({'YDims': c_helper.generate_dim_bracket(self.output_tensor_shapes[mappingf_index])})
                mappingf.update({"Y_set_code": "memcpy(&vi_Y[i], &(y), sizeof(y));"})
                mappingf_index +=1

            elif index==1 and  self.node.outputs[index] != "":# Y_h exist
                mappingf.update({'yt': data_type.np2c(self.output_tensor_dtypes[mappingf_index])})
                mappingf.update({'Y_h': "vi_Y_h"})
                mappingf.update({'Y_hDims': c_helper.generate_dim_bracket(self.output_tensor_shapes[mappingf_index])})
                mappingf.update({"Y_h_set_code": "memcpy(&vi_Y_h[0], &(y), sizeof(y));"})# only support OneDirectino
                mappingf_index +=1

            elif index==2 and  self.node.outputs[index] != "":# Y_c exist:
                mappingf.update({'yt': data_type.np2c(self.output_tensor_dtypes[mappingf_index])})
                mappingf.update({'Y_c': "vi_Y_c"})
                mappingf.update({'Y_cDims': c_helper.generate_dim_bracket(self.output_tensor_shapes[mappingf_index])})
                mappingf.update({"Y_c_set_code": "memcpy(&vi_Y_c[0], &(tc), sizeof(tc));"})# only support OneDirectino
        

        TemplateFunction = cleandoc('''
        {signature}
        {{
            const int hidden_size={hidden_size};
            const {yt} P[{PLen}] ={{0.0}};

            {yt} prevH{HDim_last2} = {{0.0}};
            {yt} prevC{HDim_last2} = {{0.0}};

            for(int i =0;i<{sequence_lens};i++)
            {{
                float y[{batch_size}][{hidden_size}] = {{0.0}};
                float ti[{batch_size}][{hidden_size}] = {{0.0}};
                float to[{batch_size}][{hidden_size}] = {{0.0}};
                float tf[{batch_size}][{hidden_size}] = {{0.0}};
                float tc[{batch_size}][{hidden_size}] = {{0.0}};            

                for(int j=0;j<{batch_size};j++){{
                    for(int k=0;k<{hidden_size};k++){{

                        for(int l=0;l<{input_size};l++){{
                            ti[j][k] +=(vi_X[i][j][l] * vi_W[0][k + hidden_size * 0][l]);
                            to[j][k] +=(vi_X[i][j][l] * vi_W[0][k + hidden_size * 1][l]);
                            tf[j][k] +=(vi_X[i][j][l] * vi_W[0][k + hidden_size * 2][l]);
                            tc[j][k] +=(vi_X[i][j][l] * vi_W[0][k + hidden_size * 3][l]);
                        }}
        
                        for(int l=0;l<{hidden_size}; l++){{
                            ti[j][k] +=(prevH[j][l] * vi_R[0][k + hidden_size * 0][l]);
                            to[j][k] +=(prevH[j][l] * vi_R[0][k + hidden_size * 1][l]);
                            tf[j][k] +=(prevH[j][l] * vi_R[0][k + hidden_size * 2][l]);
                            tc[j][k] +=(prevH[j][l] * vi_R[0][k + hidden_size * 3][l]);
                        }}
                        {bias_code}
                    }}
                }}

                for(int j=0;j<{batch_size};j++){{
                    for(int k=0;k<{hidden_size};k++){{
                        ti[j][k] = {act_f}(ti[j][k] + {P}[k + 3 * 0] * prevC[j][k], {act_alpha_f}, {act_beta_f});
                        tf[j][k] = {act_f}(tf[j][k] + {P}[k + 3 * 1] * prevC[j][k], {act_alpha_f}, {act_beta_f});
                        tc[j][k] = {act_g}(tc[j][k], {act_alpha_g}, {act_beta_g});
                        tc[j][k] = tf[j][k] * prevC[j][k] + ti[j][k] * tc[j][k];
                        to[j][k] = {act_f}(to[j][k] + {P}[k + 3 * 2]* tc[j][k], {act_alpha_f}, {act_beta_f});
                        y[j][k] = to[j][k] * {act_h}(tc[j][k], {act_alpha_h}, {act_beta_h});
                    }}
                }}

                {Y_set_code}
                {Y_h_set_code}
                {Y_c_set_code}
                memcpy(&prevH, &(y), sizeof(y));
                memcpy(&prevC, &(tc), sizeof(tc));

            }}
        }}
        ''')
        
        res += TemplateFunction.format(**mappingf)

        return res
Ejemplo n.º 18
0
    def get_signature(self):

        res = "void {op_func_name}(void *op_param,{xt} {X}{XDims},{wt} {W}{WDims},{rt} {R}{RDims},"
        mapping = {}
        mapping.update({'op_func_name': self.get_func_name()})
        mapping.update({'xt': data_type.np2c(self.input_tensor_dtypes[0])})
        mapping.update({'X': "vi_X"})
        mapping.update({'XDims':c_helper.generate_dim_bracket(self.input_tensor_shapes[0])})

        mapping.update({'wt': data_type.np2c(self.input_tensor_dtypes[1])})
        mapping.update({'W': self.input_tensor_names[1].replace('_lstm_', '_')})          # vi_lstm_W -> vi_W
        mapping.update({'WDims':c_helper.generate_dim_bracket(self.input_tensor_shapes[1])})

        mapping.update({'rt': data_type.np2c(self.input_tensor_dtypes[2])})
        mapping.update({'R': self.input_tensor_names[2].replace('_lstm_', '_')})
        mapping.update({'RDims':c_helper.generate_dim_bracket(self.input_tensor_shapes[2])})


        if 3 < len(self.input_tensor):
            mapping.update({'bt': data_type.np2c(self.input_tensor_dtypes[3])})
            mapping.update({'B': self.input_tensor_names[3].replace('_lstm_', '_')})
            mapping.update({'BDims': c_helper.generate_dim_bracket(self.input_tensor_shapes[3])})
            res += "{bt} {B}{BDims},"

        if 4 < len(self.input_tensor):
            mapping.update({'slt': data_type.np2c(self.input_tensor_dtypes[4])})
            mapping.update({'sl': self.input_tensor_names[4]})
            mapping.update({'slDims': c_helper.generate_dim_bracket(self.input_tensor_shapes[4])})
            res += "{slt} {sl}{slDims},"

        if 5 < len(self.input_tensor):
            mapping.update({'initial_ht': data_type.np2c(self.input_tensor_dtypes[5])})
            mapping.update({'initial_h': self.input_tensor_names[5]})
            mapping.update({'initial_hDims': c_helper.generate_dim_bracket(self.input_tensor_shapes[5])})
            res += "{initial_ht} {initial_h}{initial_hDims},"

        if 6 < len(self.input_tensor):
            mapping.update({'initial_ct': data_type.np2c(self.input_tensor_dtypes[6])})
            mapping.update({'initial_c': self.input_tensor_names[6]})
            mapping.update({'initial_cDims': c_helper.generate_dim_bracket(self.input_tensor_shapes[6])})
            res += "{initial_ct} {initial_c}{initial_cDims},"

        if 7 < len(self.input_tensor):
            mapping.update({'pt': data_type.np2c(self.input_tensor_dtypes[7])})
            mapping.update({'P': self.input_tensor_names[7]})
            mapping.update({'PDims': c_helper.generate_dim_bracket(self.input_tensor_shapes[7])})
            res += "{pt} {P}{PDims},"
            
        # output
        mapping_index =0
        for index, o in enumerate(range(len(self.node.outputs))):# onnx definitive order
            if index==0 and self.node.outputs[index] != "":# Y exist
                mapping.update({'yt': data_type.np2c(self.output_tensor_dtypes[mapping_index])})
                mapping.update({'Y': "vi_Y"})
                mapping.update({'YDims': c_helper.generate_dim_bracket(self.output_tensor_shapes[mapping_index])})

                mapping_index +=1

                res += "{yt} {Y}{YDims},"

            elif index==1 and  self.node.outputs[index] != "":# Y_h exist
                mapping.update({'y_ht': data_type.np2c(self.output_tensor_dtypes[mapping_index])})
                mapping.update({'Y_h': "vi_Y_h"})
                mapping.update({'Y_hDims': c_helper.generate_dim_bracket(self.output_tensor_shapes[mapping_index])})

                mapping_index +=1

                res += "{y_ht} {Y_h}{Y_hDims},"

            elif index==2 and  self.node.outputs[index] != "":# Y_c exist:
                mapping.update({'y_ct': data_type.np2c(self.output_tensor_dtypes[mapping_index])})
                mapping.update({'Y_c': "vi_Y_c"})
                mapping.update({'Y_cDims': c_helper.generate_dim_bracket(self.output_tensor_shapes[mapping_index])})

                res += "{y_ct} {Y_c}{Y_cDims},"

        res += " void *inputs_params, void* outputs_params)"

        return res.format(**mapping)
Ejemplo n.º 19
0
    def generate_c_code(self, **kwargs):
        res = ''

        # include header
        res += '\n'.join([
            c_helper.generate_local_include(h)
            for h in self.get_c_op_include_header()
        ])
        res += '\n\n'

        # param type
        res += self.get_c_param_type()
        res += '\n\n'

        # 1
        TemplateArrayAddLoop = c_helper.generate_ndim_for_loop(
            np.ones(self.output_tensor_shapes[0]))

        # 2
        mapping = {}
        TemplateStatements = '{C}{CStatementDims} = {X}{XStatementDims} ^ {Y}{YStatementDims};'
        mapping.update({'X': self.input_tensor_names[0]})
        mapping.update({'Y': self.input_tensor_names[1]})
        mapping.update({'C': self.output_tensor_names[0]})

        XStatementDims = ''
        YStatementDims = ''
        CStatementDims = ''

        X, Y = self.input_tensor_values

        for element_num_x, element_num_y, step in zip_longest(
                X.shape[::-1], Y.shape[::-1],
                reversed(string.ascii_lowercase[8:8 +
                                                self.output_tensor_ndims[0]])):
            if element_num_x is not None:
                if element_num_x == 1:
                    XStatementDims = '[0]' + XStatementDims
                else:
                    XStatementDims = '[{0}]'.format(step) + XStatementDims

            if element_num_y is not None:
                if element_num_y == 1:
                    YStatementDims = '[0]' + YStatementDims
                else:
                    YStatementDims = '[{0}]'.format(step) + YStatementDims

            CStatementDims = '[{0}]'.format(step) + CStatementDims

        mapping.update({'XStatementDims': XStatementDims})
        mapping.update({'YStatementDims': YStatementDims})
        mapping.update({'CStatementDims': CStatementDims})

        TemplateFunction = cleandoc('''
        void {op_func_name}(void *op_param,{t} {X}{XDims} , {t} {Y}{YDims}, {t} {C}{CDims}, void *inputs_params, void* outputs_params)
        {{
        {statements}
        }}
        ''')
        mappingf = {}
        mappingf.update({'op_func_name': self.get_func_name()})
        mappingf.update({'X': self.input_tensor_names[0]})
        mappingf.update({'Y': self.input_tensor_names[1]})
        mappingf.update({'C': self.output_tensor_names[0]})
        mappingf.update({
            'XDims':
            c_helper.generate_dim_bracket(self.input_tensor_shapes[0])
        })
        mappingf.update({
            'YDims':
            c_helper.generate_dim_bracket(self.input_tensor_shapes[1])
        })
        mappingf.update({
            'CDims':
            c_helper.generate_dim_bracket(self.output_tensor_shapes[0])
        })
        mappingf.update({'t': data_type.np2c(self.output_tensor_dtypes[0])})
        mappingf.update({
            'statements':
            TemplateArrayAddLoop.replace('[statements]',
                                         TemplateStatements.format(**mapping))
        })
        res += '\n\n'
        res += TemplateFunction.format(**mappingf)

        return res
Ejemplo n.º 20
0
    def generate_c_code(self, **kwargs):
        res = ''
        res += '\n'.join([
            c_helper.generate_local_include(h)
            for h in self.get_c_op_include_header()
        ])
        res += '\n\n'

        # param type
        res += self.get_c_param_type()
        res += '\n\n'

        input_shapes = []
        input_shapes.append(self.input_tensor_shapes[0])
        input_shapes.append(self.input_tensor_shapes[1])
        if (len(self.input_tensor) == 4):
            input_shapes.append(self.input_tensor_shapes[2])
            input_shapes.append(self.input_tensor_shapes[3])
        input_mod_shapes = []
        input_org_shapes = []

        max_dim = 5
        if (len(input_shapes[0]) == 1):
            input_mod_shapes.append(((1, ) * (max_dim - 1) + input_shapes[0]))
            input_org_shapes.append(((0, ) * (max_dim - 1) + input_shapes[0]))
        else:
            input_mod_shapes.append((1, ) * (max_dim - len(input_shapes[0])) +
                                    input_shapes[0])
            input_org_shapes.append((0, ) * (max_dim - len(input_shapes[0])) +
                                    input_shapes[0])
        if (len(input_shapes[1]) == 1):
            input_mod_shapes.append(
                ((1, ) * (max_dim - 2) + input_shapes[1] + (1, )))
            input_org_shapes.append(
                ((0, ) * (max_dim - 2) + input_shapes[1] + (1, )))
        else:
            input_mod_shapes.append((1, ) * (max_dim - len(input_shapes[1])) +
                                    input_shapes[1])
            input_org_shapes.append((0, ) * (max_dim - len(input_shapes[1])) +
                                    input_shapes[1])

        outputs_shape = ((1, ) * (max_dim - len(self.output_tensor_shapes[0]))
                         ) + self.output_tensor_shapes[0]
        output_names = self.output_tensor_names[0]

        ndim = self.output_tensor_ndims[0]

        if (len(self.input_tensor) == 2):
            zero_point_shapes = [0, 0]
        else:
            zero_point_shapes = [
                self.input_tensor_shapes[2][0], self.input_tensor_shapes[3][0]
            ]

        TemplateStatements = '''
            const int   A_h = {A_d0};
            const int   A_i = {A_d1};
            const int   A_j = {A_d2};
            const int   A_m = {A_d3};
            const int   A_k = {A_d4};
            const int   B_h = {B_d0};
            const int   B_i = {B_d1};
            const int   B_j = {B_d2};
            const int   B_k = {B_d3};
            const int   B_n = {B_d4};
            const int   Y_h = {Y_d0};
            const int   Y_i = {Y_d1};
            const int   Y_j = {Y_d2};
            const int   Y_m = {Y_d3};
            const int   Y_n = {Y_d4};

            const int   A_h_o = {A_d0_o};
            const int   A_i_o = {A_d1_o};
            const int   A_j_o = {A_d2_o};
            const int   B_h_o = {B_d0_o};
            const int   B_i_o = {B_d1_o};
            const int   B_j_o = {B_d2_o};

            {ta} *_A = ({ta} *)A;
            {tb} *_B = ({tb} *)B;
            int *_Y = (int *)Y;
            int tmpA, tmpB, tmpY;
            {tb} BT [{B_d0}][{B_d1}][{B_d2}][{B_d3}][{B_d4}];
            {tb} *_BT = ({tb} *)BT;
        '''

        if (len(self.input_tensor) == 4):
            TemplateStatements += '''
            {taz} a_zero_point_mod[{A_d3}];
            {tbz} b_zero_point_mod[{B_d4}];
            '''

        TemplateStatements += '''
            int   h, i, j;
            int   ah, ai, aj;
            int   bh, bi, bj;
            int   k;
            int   m;
            int   n;

            int   tmpA_pos_h, tmpA_pos_i, tmpA_pos;
            int   tmpB_pos_h, tmpB_pos_i, tmpB_pos;
            int   tmpY_pos_h, tmpY_pos_i, tmpY_pos;

            memset( Y, (int)0, sizeof(*_Y)*Y_h*Y_i*Y_j*Y_m*Y_n );
        '''
        if (len(self.input_tensor) == 4):
            if (zero_point_shapes[0] == 1):
                TemplateStatements += '''
#pragma omp parallel for
            for (m=0; m < A_m; m++) {{
                a_zero_point_mod[m] = a_zero_point[0];
            }}
                '''
            else:
                TemplateStatements += '''
#pragma omp parallel for
            for (m=0; m < A_m; m++) {{
                a_zero_point_mod[m] = a_zero_point[m];
            }}
                '''
            if (zero_point_shapes[1] == 1):
                TemplateStatements += '''
#pragma omp parallel for
            for (n=0; n < B_n; n++) {{
                b_zero_point_mod[n] = b_zero_point[0];
            }}
                '''
            else:
                TemplateStatements += '''
#pragma omp parallel for
            for (n=0; n < B_n; n++) {{
                b_zero_point_mod[n] = b_zero_point[n];
            }}
                '''

        TemplateStatements += '''
            for (h=0; h < B_h; h++) {{
                bh = (B_h_o > 1) ? h : 0;
                tmpB_pos_h = bh*(B_i*B_j*B_k*B_n);
                for (i=0; i < B_i; i++) {{
                    bi = (B_i_o > 1) ? i : 0;
                    tmpB_pos_i = tmpB_pos_h + bi*(B_j*B_k*B_n);
                    for (j=0; j < B_j; j++) {{
                        bj =  (B_j_o > 1) ? j : 0;
                        tmpB_pos = tmpB_pos_i + bj*(B_k*B_n);
#pragma omp parallel for private(n,k)
                        for (n=0; n < B_n; n++) {{
                            for (k=0; k < B_k; k++) {{
                                *(_BT + tmpB_pos + n*(B_k) + k) = *(_B + tmpB_pos + k*(B_n) + n);
                            }}
                        }}

                    }}
                }}
            }}

            for (h=0; h < Y_h; h++) {{
                ah = (A_h_o > 1) ? h : 0;
                bh = (B_h_o > 1) ? h : 0;
                tmpA_pos_h = ah*(A_i*A_j*A_m*A_k);
                tmpB_pos_h = bh*(B_i*B_j*B_k*B_n);
                tmpY_pos_h =  h*(Y_i*Y_j*Y_m*Y_n);
                for (i=0; i < Y_i; i++) {{
                    ai = (A_i_o > 1) ? i : 0;
                    bi = (B_i_o > 1) ? i : 0;
                    tmpA_pos_i = tmpA_pos_h + ai*(A_j*A_m*A_k);
                    tmpB_pos_i = tmpB_pos_h + bi*(B_j*B_k*B_n);
                    tmpY_pos_i = tmpY_pos_h +  i*(Y_j*Y_m*Y_n);
                    for (j=0; j < Y_j; j++) {{
                        aj =  (A_j_o > 1) ? j : 0;
                        bj =  (B_j_o > 1) ? j : 0;
                        tmpA_pos = tmpA_pos_i + aj*(A_m*A_k);
                        tmpB_pos = tmpB_pos_i + bj*(B_k*B_n);
                        tmpY_pos = tmpY_pos_i +  j*(Y_m*Y_n);
#pragma omp parallel for private(m,n,k,tmpA,tmpB) reduction(+:tmpY)
                        for (m=0; m < Y_m; m++) {{
                            for (n=0; n < Y_n; n++) {{
                                tmpY = 0;
                                for (k=0; k < B_k; k++) {{
        '''

        if (len(self.input_tensor) == 2):
            TemplateStatements += '''
                                    tmpA = *(_A  + tmpA_pos + m*(A_k) + k);
                                    tmpB = *(_BT + tmpB_pos + n*(B_k) + k);
            '''
        else:
            TemplateStatements += '''
                                    tmpA = *(_A  + tmpA_pos + m*(A_k) + k) - a_zero_point_mod[m];
                                    tmpB = *(_BT + tmpB_pos + n*(B_k) + k) - b_zero_point_mod[n];
            '''
        TemplateStatements += '''
                                    tmpY += tmpA * tmpB;
                                }}
                                *(_Y + tmpY_pos + m*(Y_n) + n) = tmpY;
                            }}
                        }}
                    }}
                }}
            }}
        '''

        mapping = {}
        mapping.update({'A_d0': input_mod_shapes[0][0]})
        mapping.update({'A_d1': input_mod_shapes[0][1]})
        mapping.update({'A_d2': input_mod_shapes[0][2]})
        mapping.update({'A_d3': input_mod_shapes[0][3]})
        mapping.update({'A_d4': input_mod_shapes[0][4]})
        mapping.update({'B_d0': input_mod_shapes[1][0]})
        mapping.update({'B_d1': input_mod_shapes[1][1]})
        mapping.update({'B_d2': input_mod_shapes[1][2]})
        mapping.update({'B_d3': input_mod_shapes[1][3]})
        mapping.update({'B_d4': input_mod_shapes[1][4]})
        mapping.update({'Y_d0': outputs_shape[0]})
        mapping.update({'Y_d1': outputs_shape[1]})
        mapping.update({'Y_d2': outputs_shape[2]})
        mapping.update({'Y_d3': outputs_shape[3]})
        mapping.update({'Y_d4': outputs_shape[4]})
        mapping.update({'ta': data_type.np2c(self.input_tensor_dtypes[0])})
        mapping.update({'tb': data_type.np2c(self.input_tensor_dtypes[1])})
        if (len(self.input_tensor) == 4):
            mapping.update(
                {'taz': data_type.np2c(self.input_tensor_dtypes[2])})
            mapping.update(
                {'tbz': data_type.np2c(self.input_tensor_dtypes[3])})
        mapping.update({'A_d0_o': input_org_shapes[0][0]})
        mapping.update({'A_d1_o': input_org_shapes[0][1]})
        mapping.update({'A_d2_o': input_org_shapes[0][2]})
        mapping.update({'B_d0_o': input_org_shapes[1][0]})
        mapping.update({'B_d1_o': input_org_shapes[1][1]})
        mapping.update({'B_d2_o': input_org_shapes[1][2]})

        # 3
        if (len(self.input_tensor) == 4):
            TemplateFunction = cleandoc('''
            void {op_func_name}(void *op_param, {ta} A{dims_A}, {tb} B{dims_B}, {taz} a_zero_point{dims_az}, {tbz} b_zero_point{dims_bz}, int Y{dims}, void *inputs_params, void* outputs_params)
            {{
                {statements}
            }}
            ''')
        else:
            TemplateFunction = cleandoc('''
            void {op_func_name}(void *op_param, {ta} A{dims_A}, {tb} B{dims_B}, int Y{dims}, void *inputs_params, void* outputs_params)
            {{
                {statements}
            }}
            ''')

        mappingf = {}
        mappingf.update({'op_func_name': self.get_func_name()})
        mappingf.update({'A': self.input_tensor_names[0]})
        mappingf.update(
            {'dims_A': c_helper.generate_dim_bracket(input_shapes[0])})
        mappingf.update({'B': self.input_tensor_names[1]})
        mappingf.update(
            {'dims_B': c_helper.generate_dim_bracket(input_shapes[1])})
        mappingf.update({'Y': self.output_tensor_names[0]})
        mappingf.update({
            'dims':
            c_helper.generate_dim_bracket(self.output_tensor_shapes[0])
        })
        mappingf.update({'ta': data_type.np2c(self.input_tensor_dtypes[0])})
        mappingf.update({'tb': data_type.np2c(self.input_tensor_dtypes[1])})
        if (len(self.input_tensor) == 4):
            mappingf.update(
                {'dims_az': c_helper.generate_dim_bracket(input_shapes[2])})
            mappingf.update(
                {'dims_bz': c_helper.generate_dim_bracket(input_shapes[3])})
            mappingf.update(
                {'taz': data_type.np2c(self.input_tensor_dtypes[2])})
            mappingf.update(
                {'tbz': data_type.np2c(self.input_tensor_dtypes[3])})
        mappingf.update({'statements': TemplateStatements.format(**mapping)})
        res += '\n\n'
        res += TemplateFunction.format(**mappingf)

        return res
Ejemplo n.º 21
0
    def generate_c_code(self, **kwargs):
        res = ''

        # include header
        res += '\n'.join([
            c_helper.generate_local_include(h)
            for h in self.get_c_op_include_header()
        ])
        res += '\n\n'

        # param type
        res += self.get_c_param_type()
        res += '\n\n'

        ndim = len(self.c_starts)
        indent = [' ' * 4] * ndim
        alpha_iter = reversed(string.ascii_lowercase[8:8 + ndim])
        TemplateArrayLoop = ''.join(indent) + '[statements]'
        input_dim = ''
        output_dim = ''
        for s, e in zip(self.c_starts[::-1], self.c_ends[::-1]):
            var = next(alpha_iter)
            params = {}
            params.update({'var': var})
            params.update({'start': str(0)})
            params.update({'end': str(e)})

            loop_start = ''.join(
                indent
            ) + 'for(int {var}={start};{var}<{end};{var}++ ){{'.format(
                **params)
            loop_end = ''.join(indent) + '}'
            TemplateArrayLoop = loop_start + '\n' + TemplateArrayLoop + '\n' + loop_end

            input_dim = '[' + str(var) + ('' if s == 0 else '+' +
                                          str(s)) + ']' + input_dim
            output_dim = '[' + str(var) + ']' + output_dim
            indent.pop()

        statements = TemplateArrayLoop.replace(
            '[statements]', ''.join(indent) + 'output' + output_dim + '=' +
            'input' + input_dim + ';')

        TemplateFunction = cleandoc('''
        void {op_func_name}(void *op_param,{t} input{XDims}, {t} output{CDims}, void *inputs_params, void* outputs_params)
        {{
        {statements}
        }}
        ''')
        mappingf = {}
        mappingf.update({'op_func_name': self.get_func_name()})
        mappingf.update({
            'XDims':
            c_helper.generate_dim_bracket(self.input_tensor_shapes[0])
        })
        mappingf.update({
            'CDims':
            c_helper.generate_dim_bracket(self.output_tensor_shapes[0])
        })
        mappingf.update({'t': data_type.np2c(self.output_tensor_dtypes[0])})
        mappingf.update({'statements': statements})
        res += '\n\n'
        res += TemplateFunction.format(**mappingf)

        return res
Ejemplo n.º 22
0
    def generate_c_code(self, **kwargs):
        res = ''
        res += '\n'.join([
            c_helper.generate_local_include(h)
            for h in self.get_c_op_include_header()
        ])
        res += '\n\n'

        # param type
        res += self.get_c_param_type()
        res += '\n\n'

        TemplateStatements = '''
            const int  center_point_box = {center_point_box};
            const int  boxes_batch = {boxes_batch};
            const int  boxes_spatial = {boxes_spatial};
            const int  boxes_params = {boxes_params};
            const int  scores_batch = {scores_batch};
            const int  scores_class = {scores_class};
            const int  scores_spatial = {scores_spatial};
            const long long int  max_output_boxes_p_class = max_output_boxes_per_class[0];
            const float  iou_th = iou_threshold[0];
            const float  score_th = score_threshold[0];
            const int  selected_indices_num = {selected_indices_num};
            const int  selected_indices_index = {selected_indices_index};
//            printf("max_output: %ld\\n", max_output_boxes_per_class[0]);
//            printf("iou_th: %f\\n", iou_threshold[0]);
//            printf("score_th: %f\\n", score_threshold[0]);
//            printf("max_output: %ld\\n", max_output_boxes_p_class);
//            printf("iou_th: %f\\n", iou_th);
//            printf("score_th: %f\\n", score_th);

            memset( (void *)selected_indices, 0, sizeof({t}) * selected_indices_num * selected_indices_index );
            if (max_output_boxes_p_class == 0) {{ return; }}

            for (int n=0; n<scores_batch; n++) {{
                for (int c=0; c<scores_class; c++) {{

                    float   decoded_boxes[boxes_spatial][4];

//  decode boxes
                    if (center_point_box == 0) {{
                        for (int i=0; i<boxes_spatial; i++) {{
                            if (boxes[n][i][0] < boxes[n][i][2]) {{
                                decoded_boxes[i][0] = boxes[n][i][0];  // ymin
                                decoded_boxes[i][2] = boxes[n][i][2];  // ymax
                            }} else {{
                                decoded_boxes[i][0] = boxes[n][i][2];  // ymin
                                decoded_boxes[i][2] = boxes[n][i][0];  // ymax
                            }}
                            if (boxes[n][i][1] < boxes[n][i][3]) {{
                                decoded_boxes[i][1] = boxes[n][i][1];  // xmin
                                decoded_boxes[i][3] = boxes[n][i][3];  // xmax
                            }} else {{
                                decoded_boxes[i][1] = boxes[n][i][3];  // xmin
                                decoded_boxes[i][3] = boxes[n][i][1];  // xmax
                            }}
                        }}
                    }} else {{
                        for (int i=0; i<boxes_spatial; i++) {{
                            decoded_boxes[i][0] = boxes[n][i][1] - boxes[n][i][3]/2;  // ymin
                            decoded_boxes[i][1] = boxes[n][i][0] - boxes[n][i][2]/2;  // xmin
                            decoded_boxes[i][2] = boxes[n][i][1] + boxes[n][i][3]/2;  // ymax
                            decoded_boxes[i][3] = boxes[n][i][0] + boxes[n][i][2]/2;  // xmax
                        }}
                    }}

//
//      SelectDetectionsAboveScoreThreshold(scores, non_max_suppression_score_threshold, &keep_scores, &keep_indices);
//
                    float   keep_scores[scores_spatial];
                    int     keep_indices[scores_spatial];

                    int  num_scores_kept = 0;
                    for (int i=0; i<scores_spatial; i++) {{
                        keep_scores[i] = 0.0;
                        keep_indices[i] = 0;
                    }}
                    for (int i=0; i<scores_spatial; i++) {{
//                        printf("scores(%f)[%d:%d:%d]:th(%f) -> %d\\n", scores[n][c][i], n, c, i, score_th, num_scores_kept);
                        if (scores[n][c][i] >= score_th) {{
                            keep_scores[num_scores_kept] = scores[n][c][i];
                            keep_indices[num_scores_kept] = i;
                            num_scores_kept++;
                        }}
                    }}
//                    for (int i=0; i<num_scores_kept; i++) {{
//                        printf("keep_indices[%d] = %d\\n", i, keep_indices[i]);
//                    }}

//
//      DecreasingPartialArgSort(keep_scores.data(), num_scores_kept, num_scores_kept, sorted_indices.data());
//
                    int   sorted_indices[num_scores_kept];

                    for (int i=0; i<num_scores_kept; i++) {{
                        sorted_indices[i] = keep_indices[i];
//                        printf("keep_score[%d] = %f\\n", sorted_indices[i], keep_scores[i]);
                    }}
//                    for (int i=0; i<num_scores_kept; i++) {{
//                        printf("pre_keep_score[%d] = %f\\n", i, keep_scores[i]);
//                    }}
//                    for (int i=0; i<num_scores_kept; i++) {{
//                        printf("pre_keep_indices[%d] = %d\\n", i, keep_indices[i]);
//                    }}
//                    for (int i=0; i<num_scores_kept; i++) {{
//                        printf("pre_sorted_indices[%d] = %d\\n", i, sorted_indices[i]);
//                    }}
//                    qsort( keep_scores, num_scores_kept, sizeof(float), nonmaxsuppression_num_cmp );
                    nonmaxsuppression_idx_sort( keep_scores, sorted_indices, num_scores_kept);
//                    for (int i=0; i<num_scores_kept; i++) {{
//                        printf("keep_score[%d] = %f\\n", i, keep_scores[i]);
//                    }}
//                    for (int i=0; i<num_scores_kept; i++) {{
//                        printf("keep_indices[%d] = %d\\n", i, keep_indices[i]);
//                    }}
//                    for (int i=0; i<num_scores_kept; i++) {{
//                        printf("sorted_indices[%d] = %d\\n", i, sorted_indices[i]);
//                    }}
                    const int num_boxes_kept = num_scores_kept;
                    const int output_size = (num_scores_kept < max_output_boxes_p_class) ? num_scores_kept: max_output_boxes_p_class;
                    int num_active_candidate = num_boxes_kept;
//                    printf("output_size: %d\\n", output_size);
//                    printf("selected_indices_num: %d\\n", selected_indices_num);
//                    printf("num_active_candidate: %d\\n", num_active_candidate);

                    int     active_box_candidate[num_scores_kept];

                    for (int row=0; row<num_boxes_kept; row++) {{
                        active_box_candidate[row] = 1;
                    }}

                    int     selected[scores_spatial];
                    int     selected_box_cnt = 0;

                    for (int i=0; i<num_boxes_kept; i++) {{
                        selected[i] = -1;
                    }}
                    for (int i=0; i<num_boxes_kept; i++) {{
                        if (num_active_candidate == 0 || selected_box_cnt >= output_size) break;
                        if (active_box_candidate[i] == 1) {{
//                            selected[selected_box_cnt] = keep_indices[sorted_indices[i]];
                            selected[selected_box_cnt] = sorted_indices[i];
//                            printf("%d: selected[%d] = %d\\n", i, selected_box_cnt, selected[selected_box_cnt]);
                            selected_box_cnt++;
                            active_box_candidate[i] = 0;
                            num_active_candidate--;
                        }} else {{
                            continue;
                        }}
                        for (int j = i + 1; j < num_boxes_kept; ++j) {{
                            if (active_box_candidate[j] == 1) {{
//                                float iou = nonmaxsuppression_compute_iou((float *)decoded_boxes, keep_indices[sorted_indices[i]], keep_indices[sorted_indices[j]]);
                                float iou = nonmaxsuppression_compute_iou((float *)decoded_boxes, sorted_indices[i], sorted_indices[j]);
//                                printf("%d:%d: iou = [%f:%f:%f:%f] [%f:%f:%f:%f] %f(th:%f)\\n", i, j,
//                                    decoded_boxes[i][0], decoded_boxes[i][1], decoded_boxes[i][2], decoded_boxes[i][3],
//                                    decoded_boxes[j][0], decoded_boxes[j][1], decoded_boxes[j][2], decoded_boxes[j][3],
//                                    iou, iou_th);
                                if (iou > iou_th) {{
                                    active_box_candidate[j] = 0;
                                    num_active_candidate--;
                                }}
                            }}
                        }}
                    }}
//                    for (int i=0; i<num_boxes_kept; i++) {{
//                        printf("sorted[%d] = %d : keep_indices = %d\\n", i, sorted_indices[i], keep_indices[sorted_indices[i]]);
//                    }}
//                    for (int i=0; i<num_boxes_kept; i++) {{
//                        printf("selected[%d] = %d\\n", i, selected[i]);
//                    }}
                    int num_batch_elements = selected_indices_num/scores_batch;
                    for (int i=0; i<num_batch_elements/scores_class; i++) {{
                        if (selected[i] == -1) {{
                            selected_indices[n*num_batch_elements+c*num_batch_elements/scores_class+i][0] = -1;
                            selected_indices[n*num_batch_elements+c*num_batch_elements/scores_class+i][1] = -1;
                            selected_indices[n*num_batch_elements+c*num_batch_elements/scores_class+i][2] = -1;
                        }} else {{
                            selected_indices[n*num_batch_elements+c*num_batch_elements/scores_class+i][0] = n;
                            selected_indices[n*num_batch_elements+c*num_batch_elements/scores_class+i][1] = c;
                            selected_indices[n*num_batch_elements+c*num_batch_elements/scores_class+i][2] = selected[i];
                        }}
//                        printf("NMS_result: %d:%d:%d\\n", n, c, selected[i]);
                    }}
                }}
            }}
        '''
        mapping = {}
        mapping.update({'op_func_name': self.get_func_name()})
        mapping.update({'center_point_box': self.attrs['center_point_box']})
        mapping.update({'boxes_batch': self.input_tensor_shapes[0][0]})
        mapping.update({'boxes_spatial': self.input_tensor_shapes[0][1]})
        mapping.update({'boxes_params': self.input_tensor_shapes[0][2]})
        mapping.update({'scores_batch': self.input_tensor_shapes[1][0]})
        mapping.update({'scores_class': self.input_tensor_shapes[1][1]})
        mapping.update({'scores_spatial': self.input_tensor_shapes[1][2]})
        mapping.update(
            {'max_output_boxes_per_class': self.input_tensor_shapes[2][0]})
        mapping.update({'iou_threshold': self.input_tensor_shapes[3][0]})
        mapping.update({'score_threshold': self.input_tensor_shapes[4][0]})
        mapping.update(
            {'selected_indices_num': self.output_tensor_shapes[0][0]})
        mapping.update(
            {'selected_indices_index': self.output_tensor_shapes[0][1]})
        mapping.update({'t': data_type.np2c(self.output_tensor_dtypes[0])})

        # 3
        TemplateFunction = cleandoc('''
        void {op_func_name}(void *op_param, float boxes{dims_boxes}, float scores{dims_scores}, long long int max_output_boxes_per_class[], float iou_threshold[], float score_threshold[], {t} selected_indices{dims_selected_indices}, void *inputs_params, void* outputs_params) {{
            {statements}
        }}
        ''')

        mappingf = {}
        mappingf.update({'op_func_name': self.get_func_name()})
        mappingf.update({'boxes': self.input_tensor_names[0]})
        mappingf.update({
            'dims_boxes':
            c_helper.generate_dim_bracket(self.input_tensor_shapes[0])
        })
        mappingf.update({'scores': self.input_tensor_names[1]})
        mappingf.update({
            'dims_scores':
            c_helper.generate_dim_bracket(self.input_tensor_shapes[1])
        })
        mappingf.update(
            {'max_output_boxes_per_class': self.input_tensor_names[2]})
        mappingf.update({'iou_threshold': self.input_tensor_names[3]})
        mappingf.update({'score_threshold': self.input_tensor_names[4]})
        mappingf.update({'selected_indices': self.output_tensor_names[0]})
        mappingf.update({
            'dims_selected_indices':
            c_helper.generate_dim_bracket(self.output_tensor_shapes[0])
        })
        mappingf.update({'t': data_type.np2c(self.output_tensor_dtypes[0])})
        mappingf.update({'statements': TemplateStatements.format(**mapping)})
        res += '\n\n'
        res += TemplateFunction.format(**mappingf)

        return res
Ejemplo n.º 23
0
    def generate_c_code(self, **kwargs):
        res = ''
        res += '\n'.join([
            c_helper.generate_local_include(h)
            for h in self.get_c_op_include_header()
        ])
        res += '\n\n'

        # param type
        res += self.get_c_param_type()
        res += '\n\n'

        # 1
        # 2
        TemplateStatements = '''
            {t} *_data = ({t} *)data;
            {t} *_squeezed = ({t} *)squeezed;

            int     data_elements = {data_elements};
            int     shape_elements = {shape_elements};
            int     i;

            if (data_elements >= shape_elements) {{
                for (i=0; i<shape_elements; i++) {{
                    *(_squeezed +i) = *(_data +i);
                }}
            }} else {{
                for (i=0; i<data_elements; i++) {{
                    *(_squeezed +i) = *(_data +i);
                }}
                for (; i<shape_elements; i++) {{
                    *(_squeezed +i) = ({t})0.0;
                }}
            }}
        '''

        mapping = {}
        mapping.update({'t': data_type.np2c(self.output_tensor_dtypes[0])})
        mapping.update({'data_elements': self.input_tensor[0].size})
        mapping.update({'shape_elements': self.output_tensor[0].size})

        # 3
        TemplateFunction = cleandoc('''
        void {op_func_name}(void *op_param, {t} data{dims_data}, {t} squeezed{dims_squeezed}, void *inputs_params, void* outputs_params) {{
            {statements}
        }}
        ''')

        mappingf = {}
        mappingf.update({'op_func_name': self.get_func_name()})
        mappingf.update({
            'dims_data':
            c_helper.generate_dim_bracket(self.input_tensor_shapes[0])
        })
        mappingf.update({
            'dims_squeezed':
            c_helper.generate_dim_bracket(self.output_tensor_shapes[0])
        })
        mappingf.update({'t': data_type.np2c(self.output_tensor_dtypes[0])})
        mappingf.update({'statements': TemplateStatements.format(**mapping)})
        res += '\n\n'
        res += TemplateFunction.format(**mappingf)

        return res
Ejemplo n.º 24
0
    def generate_c_code(self, **kwargs):
        res = ''
        res += '\n'.join([
            c_helper.generate_local_include(h)
            for h in self.get_c_op_include_header()
        ])
        res += '\n\n'

        # param type
        res += self.get_c_param_type()
        res += '\n\n'

        ratio = self.attrs.get('ratio', 0.5)

        # 1
        TemplateArrayDropoutLoop = c_helper.generate_ndim_for_loop(
            np.ones(self.output_tensor_shapes[0]))

        TemplateStatements = '''
                if (random() > RAND_MAX * ratio) {{
                    output{dims} = data{dims};
                }} else {{
                    output{dims} = 0.0;
                }}
        '''

        mapping = {}
        mapping.update({
            'dims':
            ''.join([
                '[' + v + ']'
                for v in string.ascii_lowercase[8:8 +
                                                self.output_tensor_ndims[0]]
            ])
        })

        # 3
        TemplateFunction = cleandoc('''
        void {op_func_name}(void *op_param, {t} data{dims}, {t} output{dims}, void *inputs_params, void* outputs_params) {{
            const float  ratio = {ratio};

            {statements}
        }}
        ''')

        mappingf = {}
        mappingf.update({'op_func_name': self.get_func_name()})
        mappingf.update({
            'dims':
            c_helper.generate_dim_bracket(self.output_tensor_shapes[0])
        })
        mappingf.update({'t': data_type.np2c(self.output_tensor_dtypes[0])})
        mappingf.update({'ratio': ratio})
        mappingf.update({
            'statements':
            TemplateArrayDropoutLoop.replace(
                '[statements]', TemplateStatements.format(**mapping))
        })
        res += '\n\n'
        res += TemplateFunction.format(**mappingf)

        return res
Ejemplo n.º 25
0
    def generate_c_code(self, **kwargs):
        res = ''
        res += '\n'.join([
            c_helper.generate_local_include(h)
            for h in self.get_c_op_include_header()
        ])
        res += '\n\n'

        # param type
        res += self.get_c_param_type()
        res += '\n\n'

        TemplateStatements = '''
            const int Y_n = {d1};
            const int Y_c = {d2};
            const int Y_h = {d3};
            const int Y_w = {d4};

            const double epsilon =  {epsilon};
            const double momentum = {momentum};
            const int    spatial =  {spatial};

            int n;
            int c, h, w;
            double sum;
            double ave;
            double sigma2;
            double norm;

#if {spatial} // spatial is true
            for (n=0; n<Y_n; n++) {{
                for (c=0; c<Y_c; c++) {{
                    sum = 0.0;
                    for (h=0; h<Y_h; h++) {{
                        for (w=0; w<Y_w; w++) {{
                            sum += X[n][c][h][w];
                        }}
                    }}
                    ave /= (h * w);
                    ave = momentum * ave + (1-momentum) * mean[c];
                    sigma2 = 0.0;
                    for (h=0; h<Y_h; h++) {{
                        for (w=0; w<Y_w; w++) {{
                            sigma2 += pow((X[n][c][h][w] - ave), 2);
                        }}
                    }}
                    sigma2 /= (h * w);
                    for (h=0; h<Y_h; h++) {{
                        for (w=0; w<Y_w; w++) {{
                            norm = (X[n][c][h][w] - mean[c]) / sqrt(var[c]+epsilon);
                            Y[n][c][h][w] = scale[c] * norm + B[c];
                        }}
                    }}
                }}
            }}
#else // spatial is false
            for (n=0; n<Y_n; n++) {{
                for (c=0; c<Y_c; c++) {{
                    sum = 0.0;
                    for (h=0; h<Y_h; h++) {{
                        for (w=0; w<Y_w; w++) {{
                            sum += X[n][c][h][w];
                        }}
                    }}
                    ave /= (h * w);
                    ave = momentum * ave + (1-momentum) * mean[c][h][w];
                    sigma2 = 0.0;
                    for (h=0; h<Y_h; h++) {{
                        for (w=0; w<Y_w; w++) {{
                            sigma2 += pow((X[n][c][h][w] - ave), 2);
                        }}
                    }}
                    sigma2 /= (h * w);
                    for (h=0; h<Y_h; h++) {{
                        for (w=0; w<Y_w; w++) {{
                            norm = (X[n][c][h][w] - mean[c][h][w]) / sqrt(var[c][h][w]+epsilon);
                            Y[n][c][h][w] = scale[c][h][w] * norm + B[c][h][w];
                        }}
                    }}
                }}
            }}
#endif // spatial
        '''

        mapping = {}
        mapping.update({'d1': self.input_tensor_shapes[0][0]})
        mapping.update({'d2': self.input_tensor_shapes[0][1]})
        mapping.update({'d3': self.input_tensor_shapes[0][2]})
        mapping.update({'d4': self.input_tensor_shapes[0][3]})
        mapping.update({'epsilon': self.attrs['epsilon']})
        mapping.update({'momentum': self.attrs['momentum']})
        mapping.update({'spatial': self.attrs['spatial']})

        # 3
        TemplateFunction = cleandoc('''
        void {op_func_name}(void *op_param, {t} X{dims_X}, {t} scale{dims_scale}, {t} B{dims_B}, {t} mean{dims_mean}, {t} var{dims_var}, {t} Y{dims}, void *inputs_params, void* outputs_params) {{
            {statements}
        }}
        ''')

        mappingf = {}
        mappingf.update({'op_func_name': self.get_func_name()})
        mappingf.update({
            'dims_X':
            c_helper.generate_dim_bracket(self.input_tensor_shapes[0])
        })
        mappingf.update({
            'dims_scale':
            c_helper.generate_dim_bracket(self.input_tensor_shapes[1])
        })
        mappingf.update({
            'dims_B':
            c_helper.generate_dim_bracket(self.input_tensor_shapes[2])
        })
        mappingf.update({
            'dims_mean':
            c_helper.generate_dim_bracket(self.input_tensor_shapes[3])
        })
        mappingf.update({
            'dims_var':
            c_helper.generate_dim_bracket(self.input_tensor_shapes[4])
        })
        mappingf.update({
            'dims':
            c_helper.generate_dim_bracket(self.output_tensor_shapes[0])
        })
        mappingf.update({'t': data_type.np2c(self.output_tensor_dtypes[0])})
        mappingf.update({'statements': TemplateStatements.format(**mapping)})
        res += '\n\n'
        res += TemplateFunction.format(**mappingf)

        return res
Ejemplo n.º 26
0
    def generate_c_code(self, **kwargs):
        res = ''
        res += '\n'.join([
            c_helper.generate_local_include(h)
            for h in self.get_c_op_include_header()
        ])
        res += '\n\n'

        # param type
        res += self.get_c_param_type()
        res += '\n\n'

        input_shapes = []
        input_shapes.append(self.input_tensor_shapes[0])
        input_shapes.append(self.input_tensor_shapes[1])
        input_mod_shapes = []

        max_dim = 5
        if (len(input_shapes[0]) == 1):
            input_mod_shapes.append(((1, ) * (max_dim - 1) + input_shapes[0]))
        else:
            input_mod_shapes.append((1, ) * (max_dim - len(input_shapes[0])) +
                                    input_shapes[0])
        if (len(input_shapes[1]) == 1):
            input_mod_shapes.append(
                ((1, ) * (max_dim - 2) + input_shapes[1] + (1, )))
        else:
            input_mod_shapes.append((1, ) * (max_dim - len(input_shapes[1])) +
                                    input_shapes[1])
        outputs_shape = (max(input_mod_shapes[0][0], input_mod_shapes[1][0]),
                         max(input_mod_shapes[0][1], input_mod_shapes[1][1]),
                         max(input_mod_shapes[0][2], input_mod_shapes[1][2]),
                         input_mod_shapes[0][3], input_mod_shapes[1][4])

        output_names = self.output_tensor_names[0]

        ndim = self.output_tensor_ndims[0]

        TemplateStatements = '''
            int   A_h = {A_d0};
            int   A_i = {A_d1};
            int   A_j = {A_d2};
            int   A_m = {A_d3};
            int   A_k = {A_d4};
            int   B_h = {B_d0};
            int   B_i = {B_d1};
            int   B_j = {B_d2};
            int   B_k = {B_d3};
            int   B_n = {B_d4};
            int   Y_h = {Y_d0};
            int   Y_i = {Y_d1};
            int   Y_j = {Y_d2};
            int   Y_m = {Y_d3};
            int   Y_n = {Y_d4};

            {t} *_A = ({t} *)A;
            {t} *_B = ({t} *)B;
            {t} *_Y = ({t} *)Y;
            {t} tmpA, tmpB;

            int   h, i, j;  
            int   k;
            int   m;
            int   n;

            memset( Y, ({t})0.0, sizeof(*_Y)*Y_h*Y_i*Y_j*Y_m*Y_n );

            for (h=0; h < Y_h; h++) {{
                for (i=0; i < Y_i; i++) {{
                    for (j=0; j < Y_j; j++) {{
                        for (m=0; m < Y_m; m++) {{
                            for (n=0; n < Y_n; n++) {{
                                for (k=0; k < B_k; k++) {{
                                    tmpA = *(_A + h*(Y_i*Y_j*Y_m*B_k) + i*(Y_j*Y_m*B_k) + j*(Y_m*B_k) + m*(B_k) + k);
                                    tmpB = *(_B + h*(Y_i*Y_j*B_k*Y_n) + i*(Y_j*B_k*Y_n) + j*(B_k*Y_n) + k*(Y_n) + n);
                                    *(_Y + h*(Y_i*Y_j*Y_m*Y_n) + i*(Y_j*Y_m*Y_n) + j*(Y_m*Y_n) + m*(Y_n) + n) += tmpA * tmpB;
//                                    Y[h][i][j][m][n] += A[h][i][j][m][k] * B[h][i][j][k][n];
                                }}
                            }}
                        }}
                    }}
                }}
            }}
        '''

        mapping = {}
        mapping.update({'A_d0': input_mod_shapes[0][0]})
        mapping.update({'A_d1': input_mod_shapes[0][1]})
        mapping.update({'A_d2': input_mod_shapes[0][2]})
        mapping.update({'A_d3': input_mod_shapes[0][3]})
        mapping.update({'A_d4': input_mod_shapes[0][4]})
        mapping.update({'B_d0': input_mod_shapes[1][0]})
        mapping.update({'B_d1': input_mod_shapes[1][1]})
        mapping.update({'B_d2': input_mod_shapes[1][2]})
        mapping.update({'B_d3': input_mod_shapes[1][3]})
        mapping.update({'B_d4': input_mod_shapes[1][4]})
        mapping.update({'Y_d0': outputs_shape[0]})
        mapping.update({'Y_d1': outputs_shape[1]})
        mapping.update({'Y_d2': outputs_shape[2]})
        mapping.update({'Y_d3': outputs_shape[3]})
        mapping.update({'Y_d4': outputs_shape[4]})
        mapping.update({'t': data_type.np2c(self.output_tensor_dtypes[0])})

        # 3
        TemplateFunction = cleandoc('''
        void {op_func_name}(void *op_param, {t} A{dims_A}, {t} B{dims_B}, {t} Y{dims}, void *inputs_params, void* outputs_params)
        {{
            {statements}
        }}
        ''')

        mappingf = {}
        mappingf.update({'op_func_name': self.get_func_name()})
        mappingf.update({'A': self.input_tensor_names[0]})
        mappingf.update(
            {'dims_A': c_helper.generate_dim_bracket(input_shapes[0])})
        mappingf.update({'B': self.input_tensor_names[1]})
        mappingf.update(
            {'dims_B': c_helper.generate_dim_bracket(input_shapes[1])})
        mappingf.update({'Y': self.output_tensor_names[0]})
        mappingf.update({
            'dims':
            c_helper.generate_dim_bracket(self.output_tensor_shapes[0])
        })
        mappingf.update({'t': data_type.np2c(self.output_tensor_dtypes[0])})
        mappingf.update({'statements': TemplateStatements.format(**mapping)})
        res += '\n\n'
        res += TemplateFunction.format(**mappingf)

        return res
Ejemplo n.º 27
0
    def generate_c_code(self, **kwargs):
        b_min = 0
        b_max = 0
        input_array_boundary = []
        for (_, d) in enumerate(self.input_tensor_shapes):
            b_max += d[self.attrs['axis']]
            input_array_boundary.append(
                (self.input_tensor_names[_], b_min, b_max))
            b_min = b_max
        loop_idx = string.ascii_lowercase[8:8 + self.output_tensor_ndims[0]]
        concat_idx = loop_idx[self.attrs['axis']]

        res = ''

        # include header
        res += '\n'.join([
            c_helper.generate_local_include(h)
            for h in self.get_c_op_include_header()
        ])
        res += '\n\n'

        # param type
        res += self.get_c_param_type()
        res += '\n\n'

        # 1
        TemplateArrayConcatLoop = c_helper.generate_ndim_for_loop(np.ones(
            self.output_tensor_shapes[0]),
                                                                  indent=0)

        # 2
        input_vals = OrderedDict({
            k: self._gen_array_element_val(self.output_tensor_ndims[0], v)
            for k, v in self.input_tensor_dict.items()
        })
        output_vals = {
            self.output_tensor_names[0]:
            self._gen_array_element_val(self.output_tensor_ndims[0],
                                        self.output_tensor_values[0])
        }

        Conditions = ''
        ConcatStatement = ''
        TemplateConditionElement = cleandoc('''
        {if} ({concat_idx} < {concat_boundary}) {{
        {indent}    {outputVal} = {inputVal};
        {indent}}}
        ''')

        for (i, boundary) in enumerate(input_array_boundary):
            mapping_cond_element = {
                't': data_type.np2c(self.input_tensor_dtypes[0])
            }
            mapping_cond_element.update({'concat_idx': concat_idx})
            mapping_cond_element.update(
                {'indent': ' ' * 4 * (self.output_tensor_ndims[0] + 1)})
            #            mapping_cond_element.update({'inputVal': list(input_vals.keys())[i] + list(input_vals.values())[i]})    # list(val.keys()) の順番がordered_dictに従わず、安定しないため、別に持っている変数boundary順に変更
            input_axis_idx = input_vals[boundary[0]]
            input_axis_idx = input_axis_idx.replace(
                concat_idx, concat_idx + '-' + str(boundary[1]))
            mapping_cond_element.update(
                {'inputVal': boundary[0] + input_axis_idx})
            mapping_cond_element.update({
                'outputVal':
                list(output_vals.keys())[0] + list(output_vals.values())[0]
            })
            mapping_cond_element.update({'concat_boundary': boundary[2]})
            if (i == 0):
                mapping_cond_element.update(
                    {'if': ' ' * 4 * (self.output_tensor_ndims[0] + 1) + 'if'})
            else:
                mapping_cond_element.update({'if': 'else if'})
            ConcatStatement += TemplateConditionElement.format(
                **mapping_cond_element)

        TemplateFunction = cleandoc('''
        void {op_func_name}(void *op_param,{InputsParamSignature}, {OutputsParamSignature}, void *inputs_params, void* outputs_params)
        {{
        {statements}
        }}
        ''')
        mappingf = {}
        mappingf.update({'op_func_name': self.get_func_name()})

        input_sigs = []
        for name, value in self.input_tensor_dict.items():
            input_sigs.append(self.gen_param_signature(name, value))

        mappingf.update({'InputsParamSignature': ','.join(input_sigs)})
        mappingf.update({
            'OutputsParamSignature':
            self.gen_param_signature(self.output_tensor_names[0],
                                     self.output_tensor_values[0])
        })

        mappingf.update({
            'statements':
            TemplateArrayConcatLoop.replace('[statements]', ConcatStatement)
        })
        res += '\n\n'
        res += TemplateFunction.format(**mappingf)

        return res
Ejemplo n.º 28
0
    def generate_c_code(self, **kwargs):
        axis = self.attrs['axis']
        data_ndims = self.input_tensor_ndims[0]
        output_ndims = self.output_tensor_ndims[0]
        indices_ndims = self.input_tensor_ndims[1]

        res = ''

        # include header
        res += '\n'.join([
            c_helper.generate_local_include(h)
            for h in self.get_c_op_include_header()
        ])
        res += '\n\n'

        # param type
        res += self.get_c_param_type()
        res += '\n\n'

        textwrap.TextWrapper()
        TemplateKernel = textwrap.indent("""
        if ({array_indices}>= 0) {{
            {array_output}= {array_data};
        }} else {{
            {array_output} = 0.0;
        }}
        """,
                                         prefix=" " * 4 * output_ndims)

        TemplateLoop = c_helper.generate_ndim_for_loop(
            np.ones(self.output_tensor_shapes[0]))

        array_right_data = ''.join(
            ['[' + v + ']' for v in string.ascii_lowercase[8:8 + axis]])
        array_left_data = ''.join([
            '[' + v + ']'
            for v in string.ascii_lowercase[9 + axis + indices_ndims - 1:8 +
                                            data_ndims + indices_ndims - 1]
        ])

        array_indices = "indices" + "".join([
            '[' + v + ']'
            for v in string.ascii_lowercase[8 + axis:8 + axis + indices_ndims]
        ])
        array_data = "data" + array_right_data + '[' + array_indices + ']' + array_left_data
        array_output = "output" + ''.join([
            '[' + v + ']' for v in string.ascii_lowercase[8:8 + output_ndims]
        ])

        mapping_kernel = {}
        mapping_kernel.update({"array_data": array_data})
        mapping_kernel.update({"array_indices": array_indices})
        mapping_kernel.update({"array_output": array_output})

        # 3
        TemplateFunction = cleandoc('''
        void {op_func_name}(void *op_param, {t1} data{dims_data}, {t2} indices{dims_indices}, {t1} output{dims_output}, void *inputs_params, void* outputs_params) {{
            {loop_statements}
        }}

        ''')

        mappingf = {}
        mappingf.update({'op_func_name': self.get_func_name()})
        mappingf.update({
            'dims_data':
            c_helper.generate_dim_bracket(self.input_tensor_shapes[0])
        })
        mappingf.update({
            'dims_indices':
            c_helper.generate_dim_bracket(self.input_tensor_shapes[1])
        })
        mappingf.update({
            'dims_output':
            c_helper.generate_dim_bracket(self.output_tensor_shapes[0])
        })
        mappingf.update({'t1': data_type.np2c(self.output_tensor_dtypes[0])})
        mappingf.update({'t2': data_type.np2c(self.input_tensor_dtypes[1])})
        mappingf.update({
            'loop_statements':
            TemplateLoop.replace('[statements]',
                                 TemplateKernel.format(**mapping_kernel))
        })
        res += '\n\n'
        res += TemplateFunction.format(**mappingf)

        return res
Ejemplo n.º 29
0
    def generate_c_code(self, **kwargs):
        res =''

        # include header
        res += '\n'.join([c_helper.generate_local_include(h) for h in self.get_c_op_include_header()])
        res +='\n\n'

        # param type
        res += self.get_c_param_type()
        res +='\n\n'

        # 1
        TemplateArrayAddLoop = c_helper.generate_ndim_for_loop(np.empty(self.output_tensor_shapes[0]),indent=0)


        # 2
        input_vals = OrderedDict({k: self._gen_array_element_val(self.output_tensor_ndims[0], v)  for k, v in self.input_tensor_dict.items()})
        output_vals = {self.output_tensor_names[0]: self._gen_array_element_val(self.output_tensor_ndims[0], self.output_tensor_values[0])}


        Conditions = ''
        MeanStatement = ''
        TemplateCondition = cleandoc('''
        {t} sum = 0.0f;
        int num = 0;
        {conditions}
        {indent}{outputVal} = sum/num;
        ''')

        TemplateCompare = cleandoc('''
        {indent}num++;
        {indent}sum += {input};
        ''')
        for k, v in input_vals.items():
            Conditions += TemplateCompare.format(**{'input': k + v,
                                                    'indent': ' ' * 4  * (self.input_tensor_ndims[0] + 1)})
            Conditions += '\n'
        else:
            mapping_cond ={'t': data_type.np2c(self.input_tensor_dtypes[0])}
            mapping_cond.update({'conditions': Conditions})
            mapping_cond.update({'outputVal': list(output_vals.keys())[0] + list(output_vals.values())[0]})
            mapping_cond.update({'indent': ' ' * 4 * (self.output_tensor_ndims[0] + 1)})
            MeanStatement += TemplateCondition.format(**mapping_cond)

        TemplateFunction = cleandoc('''
        void {op_func_name}(void *op_param,{InputsParamSignature}, {OutputsParamSignature}, void *inputs_params, void* outputs_params)
        {{
        {statements}
        }}
        ''')
        mappingf = {}
        mappingf.update({'op_func_name': self.get_func_name()})

        input_sigs = []
        for name, value in self.input_tensor_dict.items():
            input_sigs.append(self.gen_param_signature(name, value))
        
        mappingf.update({'InputsParamSignature':','.join(input_sigs)})
        mappingf.update({'OutputsParamSignature': self.gen_param_signature(self.output_tensor_names[0],
                                                                           self.output_tensor_values[0])})

        mappingf.update({'statements': TemplateArrayAddLoop.replace('[statements]', MeanStatement)})
        res += '\n\n'
        res += TemplateFunction.format(**mappingf)

        return res
Ejemplo n.º 30
0
    def generate_c_code(self, **kwargs):
        res = ''
        res += '\n'.join([
            c_helper.generate_local_include(h)
            for h in self.get_c_op_include_header()
        ])
        res += '\n\n'

        # param type
        res += self.get_c_param_type()
        res += '\n\n'

        axis = self.attrs['axis']
        if axis == -1:
            axis = self.input_tensor_ndims[0] - 1
        batch_size = 1
        for d in range(0, axis):
            batch_size *= self.input_tensor_shapes[0][d]
        num = 1
        for d in range(axis, self.input_tensor_ndims[0]):
            num *= self.input_tensor_shapes[0][d]

        TemplateStatements = '''
            {t}   *_input = ({t} *)input;
            {t}   *_output = ({t} *)output;
            int    batch_size = {batch_size};
            int    num = {num};

            int    i;
            int    batch;
            {t}  max, sum;

            for (batch=0; batch<batch_size; batch++) {{
                sum = 0.0;
                max = -HUGE_VAL;
                for (i=0; i<num; i++) {{
                    if (*(_input + batch*num +i) > max) {{
                        max = *(_input + batch*num +i);
                    }}
                }}
                for (i=0; i<num; i++) {{
                    *(_output + batch*num +i) = {exp}(*(_input + batch*num +i) - max);
                    sum += *(_output + batch*num +i);
                }}
                for (i=0; i<num; i++) {{
                    *(_output + batch*num +i) /= sum;
                }}
            }}
        '''

        mapping = {}
        mapping.update({'batch_size': batch_size})
        mapping.update({'num': num})
        mapping.update({'d1': self.output_tensor_shapes[0][0]})
        mapping.update({'d2': self.output_tensor_shapes[0][1]})
        mapping.update({'t': data_type.np2c(self.output_tensor_dtypes[0])})
        if (self.output_tensor_dtypes[0] == 'float64'):
            mapping.update({'exp': 'exp'})
        elif (self.output_tensor_dtypes[0] == 'float32'):
            mapping.update({'exp': 'expf'})
        else:
            mapping.update({'exp': 'expf'})

        # 3
        TemplateFunction = cleandoc('''
        void {op_func_name}(void *op_param, {t} input{dims_input}, {t} output{dims}, void *inputs_params, void* outputs_params)
        {{
            {statements}
        }}
        ''')

        mappingf = {}
        mappingf.update({'op_func_name': self.get_func_name()})
        mappingf.update({'input': self.input_tensor_names[0]})
        mappingf.update({
            'dims_input':
            c_helper.generate_dim_bracket(self.input_tensor_shapes[0])
        })
        mappingf.update({'output': self.output_tensor_names[0]})
        mappingf.update({
            'dims':
            c_helper.generate_dim_bracket(self.output_tensor_shapes[0])
        })
        mappingf.update({'t': data_type.np2c(self.output_tensor_dtypes[0])})
        mappingf.update({'statements': TemplateStatements.format(**mapping)})
        res += '\n\n'
        res += TemplateFunction.format(**mappingf)

        return res