예제 #1
0
def gen_naive_fmm(coeff_filename_mix, dims_mix, level_mix, outfile):

    coeffs_mix = []
    idx = 0
    for coeff_file in coeff_filename_mix:
        coeffs = read_coeffs(coeff_file)
        level = level_mix[idx]
        for level_id in range(level):
            coeffs_mix.append(coeffs)
        idx += 1

    dims_level_mix = []
    idx = 0
    for dims in dims_mix:
        level = level_mix[idx]
        for level_id in range(level):
            dims_level_mix.append(dims)
        idx += 1

    with open(outfile, 'w') as myfile:
        write_line(myfile, 0, '#include "bl_dgemm.h"')
        write_break(myfile)

        cur_coeffs = generateCoeffs(coeffs_mix)

        num_multiplies = len(cur_coeffs[0][0])

        create_add_functions(myfile, cur_coeffs)
        create_straprim_naive_functions(myfile, cur_coeffs, dims_level_mix,
                                        num_multiplies)

        write_line(
            myfile, 0,
            'void bl_dgemm_strassen_naive( int m, int n, int k, double *XA, int lda, double *XB, int ldb, double *XC, int ldc )'
        )
        write_line(myfile, 0, '{')

        write_naive_strassen_header(myfile)

        writePartition(myfile, dims_level_mix)

        write_break(myfile)

        create_straprim_caller(myfile, cur_coeffs, dims_level_mix,
                               num_multiplies)

        write_break(myfile)
        level_dim = exp_dim_mix(dims_level_mix)
        write_line(
            myfile, 1,
            'bl_dynamic_peeling( m, n, k, XA, lda, XB, ldb, XC, ldc, %d * DGEMM_MR, %d, %d * DGEMM_NR );'
            % (level_dim[0], level_dim[1], level_dim[2]))

        write_line(myfile, 0, '}')
예제 #2
0
def gen_abc_fmm( coeff_filename_mix, dims_mix, level_mix, outfilename, micro_kernel_filename, kernel_header_filename ):

    coeffs_mix = []
    idx = 0
    for coeff_file in coeff_filename_mix:
        coeffs = read_coeffs( coeff_file )
        level = level_mix[idx]
        for level_id in range( level ):
            coeffs_mix.append( coeffs )
        idx += 1

    dims_level_mix = []
    idx = 0
    for dims in dims_mix:
        level = level_mix[idx]
        for level_id in range( level ):
            dims_level_mix.append( dims )
        idx += 1

    with open( outfilename, 'w' ) as myfile:
        write_line( myfile, 0, '#include "%s"' % kernel_header_filename[10:] )
        write_line( myfile, 0, '#include "bl_dgemm.h"' )
        write_break( myfile )

        cur_coeffs = generateCoeffs( coeffs_mix )

        num_multiplies = len(cur_coeffs[0][0])

        create_packm_functions( myfile, cur_coeffs )

        my_micro_file = open( micro_kernel_filename, 'w' ) 
        create_micro_functions( my_micro_file, cur_coeffs, kernel_header_filename[10:] )

        my_kernel_header = open ( kernel_header_filename, 'w' )
        create_kernel_header( my_kernel_header, cur_coeffs )

        create_macro_functions( myfile, cur_coeffs )

        create_straprim_abc_functions( myfile, cur_coeffs, dims_level_mix )


        write_line( myfile, 0, 'void bl_dgemm_strassen_abc( int m, int n, int k, double *XA, int lda, double *XB, int ldb, double *XC, int ldc )' )
        write_line( myfile, 0, '{' )

        write_abc_strassen_header( myfile )

        writePartition( myfile, dims_level_mix )

        write_break( myfile )

        write_line( myfile, 0, '#ifdef _PARALLEL_')
        write_line( myfile, 1, '#pragma omp parallel num_threads( bl_ic_nt )' )
        write_line( myfile, 0, '#endif')
        write_line( myfile, 1, '{' )
        create_straprim_caller( myfile, cur_coeffs, dims_level_mix, num_multiplies )
        write_line( myfile, 1, '}' )

        write_break( myfile )
        level_dim = exp_dim_mix( dims_level_mix )
        write_line( myfile, 1, 'bl_dynamic_peeling( m, n, k, XA, lda, XB, ldb, XC, ldc, %d * DGEMM_MR, %d, %d * DGEMM_NR );' % ( level_dim[0], level_dim[1], level_dim[2] ) )

        write_break( myfile )
        write_line( myfile, 1, '//free( packA );' )
        write_line( myfile, 1, '//free( packB );' )

        write_line( myfile, 0, '}' )