def GenerateMulCols(emitter, result_type, lhs_add, rhs_add, aligned, cols, leftovers): """Emits code responsible for multiplication of one horizontal lhs strip.""" emitter.EmitOpenBracket('for (int i = 0; i < col_chunks; ++i)') emitter.EmitCall( zip_Nx8_neon.BuildName(4, leftovers, aligned), ['rhs_chunk', 'k', 'k', 'zipped_rhs_1', 'lhs_offset', 'const_offset']) emitter.EmitAssignIncrement('rhs_chunk', 'chunk_size') emitter.EmitCall( zip_Nx8_neon.BuildName(4, leftovers, aligned), ['rhs_chunk', 'k', 'k', 'zipped_rhs_2', 'lhs_offset', 'const_offset']) emitter.EmitAssignIncrement('rhs_chunk', 'chunk_size') emitter.EmitCall( mul_1x8_Mx8_neon.BuildName(result_type, lhs_add, rhs_add, 8), GetMul2Params(result_type)) emitter.EmitAssignIncrement('mul_result_chunk', 8) emitter.EmitCloseBracket() if cols > 4: emitter.EmitCall(zip_Nx8_neon.BuildName(4, leftovers, aligned), [ 'rhs_chunk', 'k', 'k', 'zipped_rhs_1', 'lhs_offset', 'const_offset' ]) emitter.EmitAssignIncrement('rhs_chunk', 'chunk_size') emitter.EmitCall(zip_Nx8_neon.BuildName(cols - 4, leftovers, aligned), [ 'rhs_chunk', 'k', 'k', 'zipped_rhs_2', 'lhs_offset', 'const_offset' ]) emitter.EmitCall( mul_1x8_Mx8_neon.BuildName(result_type, lhs_add, rhs_add, cols), GetMul2Params(result_type)) elif cols > 0: emitter.EmitCall(zip_Nx8_neon.BuildName(cols, leftovers, aligned), [ 'rhs_chunk', 'k', 'k', 'zipped_rhs_1', 'lhs_offset', 'const_offset' ]) emitter.EmitCall( mul_Nx8_Mx8_neon.BuildName(result_type, lhs_add, rhs_add, 1, cols), GetMulParams(result_type))
def ZipName(rows, leftovers, aligned): return zip_Nx8_neon.BuildName(rows, leftovers, aligned)
def GenerateZipVector(emitter, aligned, leftovers): emitter.EmitCall(zip_Nx8_neon.BuildName(1, leftovers, aligned), ['lhs', 'k', 'k', 'zipped_lhs', 'rhs_offset', 0])