def GenerateInternalFunctions(emitter): """Generate all the functions hidden in the internal namespace.""" zip_Nx8_neon.GenerateFunctions(neon_emitter.NeonEmitter()) emitter.EmitNewline() mul_Nx8_Mx8_neon.GenerateFunctions(neon_emitter.NeonEmitter(), 'int32', False, True) emitter.EmitNewline() mul_Nx8_Mx8_neon.GenerateFunctions(neon_emitter.NeonEmitter(), 'int32', True, True) emitter.EmitNewline() mul_Nx8_Mx8_neon.GenerateFunctions(neon_emitter.NeonEmitter(), 'float', True, True) emitter.EmitNewline() qnt_Nx8_neon.GenerateFunctions(neon_emitter.NeonEmitter()) emitter.EmitNewline() for aligned in [True, False]: for rows in range(1, 4): GenerateMultiQuantize(emitter, aligned, rows) emitter.EmitNewline() for output_type in [_QUANTIZED_8BIT, _FULL_32BIT, _FULL_FLOAT]: for aligned in [True, False]: for rows in range(0, 3): for cols in range(0, 3): for leftover in range(0, 8): GenerateGemm(emitter, output_type, aligned, rows, cols, leftover) emitter.EmitNewline()
def Main(): emitter = cc_emitter.CCEmitter() emitter.EmitCodeNoSemicolon(_HEADER_COPYRIGHT) emitter.EmitHeaderBegin('gemmlowp_meta_single_thread_gemm') emitter.EmitPreprocessor1('ifdef', 'GEMMLOWP_NEON_32') emitter.EmitNewline() emitter.EmitInclude('<cassert>') emitter.EmitNewline() emitter.EmitNamespaceBegin('gemmlowp') emitter.EmitNamespaceBegin('meta') emitter.EmitNamespaceBegin('internal') emitter.EmitNewline() zip_Nx8_neon.GenerateFunctions(neon_emitter.NeonEmitter()) emitter.EmitNewline() mul_Nx8_Mx8_neon.GenerateFunctions(neon_emitter.NeonEmitter()) emitter.EmitNewline() qnt_Nx8_neon.GenerateFunctions(neon_emitter.NeonEmitter()) emitter.EmitNewline() GenerateFunctions(emitter) emitter.EmitNewline() emitter.EmitNamespaceEnd() emitter.EmitNewline() GenerateMainGemmFunction(emitter) emitter.EmitNewline() GenerateWrapperGemmFunctions(emitter) emitter.EmitNewline() emitter.EmitNamespaceEnd() emitter.EmitNamespaceEnd() emitter.EmitNewline() emitter.EmitPreprocessor('else') emitter.EmitPreprocessor1( 'warning', '"Meta gemm fast-path requires GEMMLOWP_NEON_32!"') emitter.EmitPreprocessor('endif') emitter.EmitNewline() emitter.EmitHeaderEnd()
def Main(): """.""" cc = cc_emitter.CCEmitter() common.GenerateHeader(cc, 'gemmlowp_meta_streams_arm_32', 'GEMMLOWP_NEON_32') cc.EmitNamespaceBegin('gemmlowp') cc.EmitNamespaceBegin('meta') cc.EmitNewline() streams_common.GenerateUInt8x8Streams(cc, neon_emitter.NeonEmitter(), 8) cc.EmitNamespaceEnd() cc.EmitNamespaceEnd() cc.EmitNewline() common.GenerateFooter(cc, 'Meta gemm for arm32 requires: GEMMLOWP_NEON_32!')
def Main(): """.""" cc = cc_emitter.CCEmitter() common.GenerateHeader(cc, 'gemmlowp_meta_transform_kernels_arm_32', 'GEMMLOWP_NEON_32') cc.EmitNamespaceBegin('gemmlowp') cc.EmitNamespaceBegin('meta') cc.EmitNewline() transform_kernels_common.GenerateKernels(cc, neon_emitter.NeonEmitter(), [(16, x) for x in range(16)]) cc.EmitNamespaceEnd() cc.EmitNamespaceEnd() cc.EmitNewline() common.GenerateFooter(cc, 'Meta gemm for arm32 requires: GEMMLOWP_NEON_32!')
def Main(): """Generate the single threaded meta gemm library.""" cc = cc_emitter.CCEmitter() meta_arm_common.GenerateHeader(cc, 'gemmlowp_meta_single_thread_gemm_arm32', 'GEMMLOWP_NEON_32') cc.EmitNamespaceBegin('gemmlowp') cc.EmitNamespaceBegin('meta') cc.EmitNamespaceBegin('internal') cc.EmitNewline() meta_arm_common.GenerateInternalFunctions(cc, neon_emitter.NeonEmitter()) cc.EmitNamespaceEnd() cc.EmitNamespaceEnd() cc.EmitNamespaceEnd() cc.EmitNewline() meta_arm_common.GenerateFooter( cc, 'Meta gemm for arm32 requires: GEMMLOWP_NEON_32!')
def GenerateInternalFunctions(emitter): """Generate all the functions hidden in the internal namespace.""" zip_Nx8_neon.GenerateFunctions(neon_emitter.NeonEmitter()) emitter.EmitNewline() mul_Nx8_Mx8_neon.GenerateFunctions(neon_emitter.NeonEmitter(), 'int32', False, True) emitter.EmitNewline() mul_Nx8_Mx8_neon.GenerateFunctions(neon_emitter.NeonEmitter(), 'int32', True, True) emitter.EmitNewline() mul_Nx8_Mx8_neon.GenerateFunctions(neon_emitter.NeonEmitter(), 'float', True, True) emitter.EmitNewline() mul_1x8_Mx8_neon.GenerateFunctions(neon_emitter.NeonEmitter(), 'int32', False, True) emitter.EmitNewline() mul_1x8_Mx8_neon.GenerateFunctions(neon_emitter.NeonEmitter(), 'int32', True, True) emitter.EmitNewline() mul_1x8_Mx8_neon.GenerateFunctions(neon_emitter.NeonEmitter(), 'float', True, True) emitter.EmitNewline() qnt_Nx8_neon.GenerateFunctions(neon_emitter.NeonEmitter(), emitter) emitter.EmitNewline() gemm_NxMxK_neon.GenerateInternalFunctions(emitter) emitter.EmitNewline() gemv_1xMxK_neon.GenerateInternalFunctions(emitter) emitter.EmitNewline()
def Main(): """.""" cc = cc_emitter.CCEmitter() common.GenerateHeader(cc, 'gemmlowp_meta_quantized_mul_kernels_arm_32', 'GEMMLOWP_NEON_32') cc.EmitNamespaceBegin('gemmlowp') cc.EmitNamespaceBegin('meta') cc.EmitNewline() shapes = [(1, 1), (1, 2), (1, 3), (1, 4), (1, 5), (1, 6), (1, 7), (1, 8), (2, 1), (2, 2), (2, 3), (2, 4), (3, 1), (3, 2), (3, 3)] quantized_mul_kernels_common.GenerateKernels(cc, neon_emitter.NeonEmitter(), shapes) cc.EmitNamespaceEnd() cc.EmitNamespaceEnd() cc.EmitNewline() common.GenerateFooter(cc, 'Meta gemm for arm32 requires: GEMMLOWP_NEON_32!')
lhs = registers.MapParameter('lhs') rhs_1 = registers.MapParameter('rhs_1') rhs_2 = registers.MapParameter('rhs_2') emitter.EmitPld(lhs) emitter.EmitPld(rhs_1) emitter.EmitPld(rhs_2) aggregators = GenerateAndClearAggregators(emitter, registers, lanes_count + 4) GenerateLoadMultiplyAggregate(emitter, registers, lanes_count, aggregators, count, lhs, rhs_1, rhs_2) GenerateAggregatorReduceStore(emitter, registers, lanes_count, aggregators, result_type, lhs_add, rhs_add, lhs, rhs_1, rhs_2, registers.MapParameter('result')) emitter.EmitAsmEnd(registers.MappedParameters(), [], registers.Clobbers() + ['cc', 'memory']) emitter.EmitFunctionEnd() def GenerateFunctions(emitter, result_type, lhs_add, rhs_add): for lanes in range(1, 5): GenerateMul1x8Mx8(emitter, result_type, lhs_add, rhs_add, lanes) emitter.EmitNewline() if __name__ == '__main__': GenerateFunctions(neon_emitter.NeonEmitter(), 'int32', True, True)