def gen_naive_fmm(coeff_filename_mix, dims_mix, level_mix, outfile): coeffs_mix = [] idx = 0 for coeff_file in coeff_filename_mix: coeffs = read_coeffs(coeff_file) level = level_mix[idx] for level_id in range(level): coeffs_mix.append(coeffs) idx += 1 dims_level_mix = [] idx = 0 for dims in dims_mix: level = level_mix[idx] for level_id in range(level): dims_level_mix.append(dims) idx += 1 with open(outfile, 'w') as myfile: write_line(myfile, 0, '#include "bl_dgemm.h"') write_break(myfile) cur_coeffs = generateCoeffs(coeffs_mix) num_multiplies = len(cur_coeffs[0][0]) create_add_functions(myfile, cur_coeffs) create_straprim_naive_functions(myfile, cur_coeffs, dims_level_mix, num_multiplies) write_line( myfile, 0, 'void bl_dgemm_strassen_naive( int m, int n, int k, double *XA, int lda, double *XB, int ldb, double *XC, int ldc )' ) write_line(myfile, 0, '{') write_naive_strassen_header(myfile) writePartition(myfile, dims_level_mix) write_break(myfile) create_straprim_caller(myfile, cur_coeffs, dims_level_mix, num_multiplies) write_break(myfile) level_dim = exp_dim_mix(dims_level_mix) write_line( myfile, 1, 'bl_dynamic_peeling( m, n, k, XA, lda, XB, ldb, XC, ldc, %d * DGEMM_MR, %d, %d * DGEMM_NR );' % (level_dim[0], level_dim[1], level_dim[2])) write_line(myfile, 0, '}')
def gen_abc_fmm( coeff_filename_mix, dims_mix, level_mix, outfilename, micro_kernel_filename, kernel_header_filename ): coeffs_mix = [] idx = 0 for coeff_file in coeff_filename_mix: coeffs = read_coeffs( coeff_file ) level = level_mix[idx] for level_id in range( level ): coeffs_mix.append( coeffs ) idx += 1 dims_level_mix = [] idx = 0 for dims in dims_mix: level = level_mix[idx] for level_id in range( level ): dims_level_mix.append( dims ) idx += 1 with open( outfilename, 'w' ) as myfile: write_line( myfile, 0, '#include "%s"' % kernel_header_filename[10:] ) write_line( myfile, 0, '#include "bl_dgemm.h"' ) write_break( myfile ) cur_coeffs = generateCoeffs( coeffs_mix ) num_multiplies = len(cur_coeffs[0][0]) create_packm_functions( myfile, cur_coeffs ) my_micro_file = open( micro_kernel_filename, 'w' ) create_micro_functions( my_micro_file, cur_coeffs, kernel_header_filename[10:] ) my_kernel_header = open ( kernel_header_filename, 'w' ) create_kernel_header( my_kernel_header, cur_coeffs ) create_macro_functions( myfile, cur_coeffs ) create_straprim_abc_functions( myfile, cur_coeffs, dims_level_mix ) write_line( myfile, 0, 'void bl_dgemm_strassen_abc( int m, int n, int k, double *XA, int lda, double *XB, int ldb, double *XC, int ldc )' ) write_line( myfile, 0, '{' ) write_abc_strassen_header( myfile ) writePartition( myfile, dims_level_mix ) write_break( myfile ) write_line( myfile, 0, '#ifdef _PARALLEL_') write_line( myfile, 1, '#pragma omp parallel num_threads( bl_ic_nt )' ) write_line( myfile, 0, '#endif') write_line( myfile, 1, '{' ) create_straprim_caller( myfile, cur_coeffs, dims_level_mix, num_multiplies ) write_line( myfile, 1, '}' ) write_break( myfile ) level_dim = exp_dim_mix( dims_level_mix ) write_line( myfile, 1, 'bl_dynamic_peeling( m, n, k, XA, lda, XB, ldb, XC, ldc, %d * DGEMM_MR, %d, %d * DGEMM_NR );' % ( level_dim[0], level_dim[1], level_dim[2] ) ) write_break( myfile ) write_line( myfile, 1, '//free( packA );' ) write_line( myfile, 1, '//free( packB );' ) write_line( myfile, 0, '}' )