Ejemplo n.º 1
0
def write_stra_matC(myfile, coeff_idx, coeffs, idx, dim_name, dims, level):
    mat_name = (getName(coeff_idx))[0]
    nonzero_coeffs = [coeff for coeff in coeffs if is_nonzero(coeff)]

    #add = 'std::array<unsigned, ' + str(len(nonzero_coeffs)) + '> ' + mat_name + str(idx) + '_subid = {'
    #add += ', '.join(['%s' % getActualBlockIndex( coeff_idx, i, dims, level ) \
    #                           for i, c in enumerate(coeffs) if is_nonzero(c)])
    #add += '};'
    #write_line(myfile, 1, add)

    add = 'std::array<T*,' + str(
        len(nonzero_coeffs)) + '> ' + mat_name + str(idx) + '_list = {'
    #add += ', '.join( [ str(c) for i, c in enumerate(coeffs) if is_nonzero(c) ] )
    add += ', '.join([
        'const_cast<T*>(%s)' % (getBlockName(coeff_idx, i, dims, level))
        for i, c in enumerate(coeffs) if is_nonzero(c)
    ])
    add += '};'
    write_line(myfile, 1, add)

    add = 'std::array<T,' + str(
        len(nonzero_coeffs)) + '> ' + mat_name + str(idx) + '_coeff_list = {'
    add += ', '.join([str(c) for i, c in enumerate(coeffs) if is_nonzero(c)])
    add += '};'
    write_line(myfile, 1, add)
Ejemplo n.º 2
0
def write_neighbor_visualization_kml(known, k=6, verbose=False):
    """Write a KML file showing k nearest neighbors among known locations."""
    X = known.loc[:, XCOLS]
    neighbors = NearestNeighbors(n_neighbors=k).fit(X)
    _, indices = neighbors.kneighbors(X)
    colors = 'red,orange,yellow,green,cyan,purple'.split(',')
    with open('../Data/visualize_neighbors.kml', 'wb') as kml:
        kml.write('<?xml version="1.0" encoding="UTF-8"?>\n')
        kml.write('<kml xmlns="http://www.opengis.net/kml/2.2">\n')
        kml.write('<Document>\n')
        common.write_styles(kml)
        for i in range(len(known)):
            kml.write('  <Folder id="%s">\n' % (known.ix[i].ptol_id, ))
            kml.write('      <name>%s</name>\n' % (known.ix[i].ptol_id, ))
            ax = known.ix[i, XCOLS].values
            ay = known.ix[i, YCOLS].values
            alabel = known.ix[i].ptol_id
            common.write_point(kml, 'red', ax, alabel)
            common.write_point(kml, 'red', ay, alabel)
            if verbose: print known.ix[i].ptol_id, ax, ay
            points = [indices[i, j] for j in range(1, k)]
            for m in range(len(points)):
                j = points[m]
                bx = known.ix[j, XCOLS].values
                by = known.ix[j, YCOLS].values
                blabel = known.ix[j].ptol_id
                common.write_point(kml, 'yellow', bx, blabel)
                common.write_point(kml, 'yellow', by, blabel)
                common.write_line(kml, ax, bx, colors[m])
                common.write_line(kml, ay, by, colors[m])
                if verbose: print '  ', known.ix[j].ptol_id, bx, by
            kml.write('  </Folder>\n')
        kml.write('</Document>\n')
        kml.write('</kml>\n')
Ejemplo n.º 3
0
def write_straprim_caller(myfile,
                          index,
                          a_coeffs,
                          b_coeffs,
                          c_coeffs,
                          dims,
                          num_multiplies,
                          level=1):
    comment = '// M%d = (' % (index)
    comment += ' + '.join([str(c) + ' * %s' % getBlockName( 0, i, dims, level ) \
                               for i, c in enumerate(a_coeffs) if is_nonzero(c)])
    comment += ') * ('
    comment += ' + '.join([str(c) + ' * %s' % getBlockName( 1, i, dims, level ) \
                               for i, c in enumerate(b_coeffs) if is_nonzero(c)])
    comment += '); '
    comment += '; '.join([
        ' %s += %s * M%d' % (getBlockName(2, i, dims, level), c, index)
        for i, c in enumerate(c_coeffs) if is_nonzero(c)
    ])
    comment += ';'
    write_line(myfile, 1, comment)

    add = 'bl_dgemm_straprim_abc%d( ms, ns, ks, ' % index

    add += ', '.join(['%s' % getBlockName( 0, i, dims, level ) \
                      for i, c in enumerate(a_coeffs) if is_nonzero(c)])
    add += ', lda, '
    add += ', '.join(['%s' % getBlockName( 1, i, dims, level ) \
                      for i, c in enumerate(b_coeffs) if is_nonzero(c)])
    add += ', ldb, '
    add += ', '.join(['%s' % getBlockName( 2, i, dims, level ) \
                      for i, c in enumerate(c_coeffs) if is_nonzero(c)])
    add += ', ldc, packA, packB, bl_ic_nt );'
    write_line(myfile, 1, add)
Ejemplo n.º 4
0
def create_micro_functions(myfile, coeffs, kernel_header_filename):
    write_line(myfile, 0, '#include "%s"' % kernel_header_filename)
    write_break(myfile)
    abc_micro_kernel_gen.write_common_rankk_macro_assembly(myfile)
    write_break(myfile)
    abc_micro_kernel_gen.macro_initialize_assembly(myfile)
    #write_break( myfile )
    #abc_micro_kernel_gen.macro_rankk_xor0_assembly( myfile )
    #write_break( myfile )
    #abc_micro_kernel_gen.macro_rankk_loopkiter_assembly( myfile )
    #write_break( myfile )
    #abc_micro_kernel_gen.macro_rankk_loopkleft_assembly( myfile )
    #write_break( myfile )
    #abc_micro_kernel_gen.macro_rankk_postaccum_assembly( myfile )
    write_break(myfile)
    for i, coeff_set in enumerate(transpose(coeffs[2])):
        if len(coeff_set) > 0:
            nonzero_coeffs = [
                coeff for coeff in coeff_set if is_nonzero(coeff)
            ]
            nnz = len(nonzero_coeffs)

            if nnz <= 23:
                abc_micro_kernel_gen.generate_micro_kernel(
                    myfile, nonzero_coeffs, i)

            write_break(myfile)
Ejemplo n.º 5
0
def write_updatec_two_assembly( myfile ):
    #nnz = len( nonzero_coeffs )
    nnz = 2

    write_line( myfile, 1, '"movq         %{0}, %%rax                      \\n\\t" // load address of alpha_list'.format(nnz+6) )

    for j in range( nnz ):
    #for j, coeff in enumerate(nonzero_coeffs):
        #print "coeff not 1 / -1!"
        alpha_avx_reg = get_avx_reg()
        myfile.write( \
'''\
    "                                            \\n\\t"
	"vbroadcastsd    (%%rax), %%{3}             \\n\\t" // load alpha_list[ i ] and duplicate
    "movq                   %{0}, %%{2}            \\n\\t" // load address of c
    "                                            \\n\\t"
    "vmovapd    0 * 32(%%{2}),  %%{4}           \\n\\t" // {4} = c{1}( 0:3, 0 )
	"vmulpd            %%{3},  %%ymm9,  %%{5}  \\n\\t" // scale by alpha, {5} = {3}( alpha ) * ymm9( c{1}( 0:3, 0 ) )
    "vaddpd            %%{4},  %%{5},  %%{4}  \\n\\t" // {4} += {5}
    "vmovapd           %%{4},  0(%%{2})         \\n\\t" // c{1}( 0:3, 0 ) = {4}
    "vmovapd    1 * 32(%%{2}),  %%{6}           \\n\\t" // {6} = c{1}( 4:7, 0 )
	"vmulpd            %%{3},  %%ymm8,  %%{7}  \\n\\t" // scale by alpha, {7} = {3}( alpha ) * ymm8( c{1}( 4:7, 0 ) )
    "vaddpd            %%{6},  %%{7},  %%{6}  \\n\\t" // {6} += {7}
    "vmovapd           %%{6},  32(%%{2})        \\n\\t" // c{1}( 4:7, 0 ) = {6}
    "addq              %%rdi,   %%{2}            \\n\\t"
    "vmovapd    0 * 32(%%{2}),  %%{8}           \\n\\t" // {8} = c{1}( 0:3, 1 )
	"vmulpd            %%{3},  %%ymm11,  %%{9}  \\n\\t" // scale by alpha, {5} = {3}( alpha ) * ymm11( c{1}( 0:3, 1 ) )
    "vaddpd            %%{8}, %%{9},  %%{8}  \\n\\t" // {8} += {7}
    "vmovapd           %%{8},  0(%%{2})         \\n\\t" // c{1}( 0:3, 1 ) = {8}
    "vmovapd    1 * 32(%%{2}),  %%{10}           \\n\\t" // {10} = c{1}( 4:7, 1 )
	"vmulpd            %%{3},  %%ymm10,  %%{11}  \\n\\t" // scale by alpha, {5} = {3}( alpha ) * ymm10( c{1}( 4:7, 1 ) )
    "vaddpd            %%{10}, %%{11},  %%{10}  \\n\\t" // {10} += {9}
    "vmovapd           %%{10},  32(%%{2})        \\n\\t" // c{1}( 4:7, 1 ) = {10}
    "addq              %%rdi,   %%{2}            \\n\\t"
    "vmovapd    0 * 32(%%{2}),  %%{12}           \\n\\t" // {12} = c{1}( 0:3, 2 )
	"vmulpd            %%{3},  %%ymm13,  %%{13}  \\n\\t" // scale by alpha, {5} = {3}( alpha ) * ymm13( c{1}( 0:3, 2 ) )
    "vaddpd            %%{12}, %%{13},  %%{12}  \\n\\t" // {12} += {11}
    "vmovapd           %%{12},  0(%%{2})         \\n\\t" // c{1}( 0:3, 2 ) = {12}
    "vmovapd    1 * 32(%%{2}),  %%{14}           \\n\\t" // {14} = c{1}( 4:7, 2 )
	"vmulpd            %%{3},  %%ymm12,  %%{15}  \\n\\t" // scale by alpha, {5} = {3}( alpha ) * ymm12( c{1}( 4:7, 2 ) )
    "vaddpd            %%{14}, %%{15},  %%{14}  \\n\\t" // {14} += {13}
    "vmovapd           %%{14},  32(%%{2})        \\n\\t" // c{1}( 4:7, 2 ) = {14}
    "addq              %%rdi,   %%{2}            \\n\\t"
    "vmovapd    0 * 32(%%{2}),  %%{16}           \\n\\t" // {16} = c{1}( 0:3, 3 )
	"vmulpd            %%{3},  %%ymm15,  %%{17}  \\n\\t" // scale by alpha, {5} = {3}( alpha ) * ymm15( c{1}( 0:3, 3 ) )
    "vaddpd            %%{16}, %%{17},  %%{16}  \\n\\t" // {16} += {15}
    "vmovapd           %%{16},  0(%%{2})         \\n\\t" // c{1}( 0:3, 3 ) = {16}
    "vmovapd    1 * 32(%%{2}),  %%{18}           \\n\\t" // {18} = c{1}( 4:7, 3 )
	"vmulpd            %%{3},  %%ymm14,  %%{19}  \\n\\t" // scale by alpha, {5} = {3}( alpha ) * ymm14( c{1}( 4:7, 3 ) )
    "vaddpd            %%{18}, %%{19},  %%{18}  \\n\\t" // {18} +={17}
    "vmovapd           %%{18}, 32(%%{2})         \\n\\t" // c{1}( 4:7, 3 ) = {18}
    "addq              $1 * 8,  %%rax            \\n\\t" // alpha_list += 8
    "                                            \\n\\t"
'''.format( str(j+6), str(j), get_reg(), alpha_avx_reg, get_avx_reg( alpha_avx_reg ), get_avx_reg( alpha_avx_reg ), get_avx_reg( alpha_avx_reg ), get_avx_reg( alpha_avx_reg ), get_avx_reg( alpha_avx_reg ), get_avx_reg( alpha_avx_reg ), get_avx_reg( alpha_avx_reg ), get_avx_reg( alpha_avx_reg ), get_avx_reg( alpha_avx_reg ), get_avx_reg( alpha_avx_reg ), get_avx_reg( alpha_avx_reg ), get_avx_reg( alpha_avx_reg ), get_avx_reg( alpha_avx_reg ), get_avx_reg( alpha_avx_reg ), get_avx_reg( alpha_avx_reg ), get_avx_reg( alpha_avx_reg ) ) )
Ejemplo n.º 6
0
def write_stra_mat(myfile, coeff_idx, coeffs, idx, dim_name, dims, level):
    mat_name = (getName(coeff_idx))[0]
    nonzero_coeffs = [coeff for coeff in coeffs if is_nonzero(coeff)]
    add = 'stra_matrix_view<T,' + str(len(nonzero_coeffs)) + '> '
    add += mat_name + 'v' + str(idx) + '({' + dim_name + '}, {'
    #add += ', '.join( ['const_cast<T*>(%s)' % (getSubMatName(coeff_idx, i, dims, level) ) for i, c in enumerate(coeffs) if is_nonzero(c)] )
    add += ', '.join([
        'const_cast<T*>(%s)' % (getBlockName(coeff_idx, i, dims, level))
        for i, c in enumerate(coeffs) if is_nonzero(c)
    ])
    add += '}, {'
    add += ', '.join([str(c) for i, c in enumerate(coeffs) if is_nonzero(c)])
    add += '}, {rs_' + mat_name + ', cs_' + mat_name + '});'

    write_line(myfile, 1, add)
Ejemplo n.º 7
0
def write_straprim_caller(myfile,
                          index,
                          a_coeffs,
                          b_coeffs,
                          c_coeffs,
                          dims,
                          num_multiplies,
                          level=1):
    comment = '// M%d = (' % (index)
    comment += ' + '.join([str(c) + ' * %s' % getBlockName( 0, i, dims, level ) \
                               for i, c in enumerate(a_coeffs) if is_nonzero(c)])
    comment += ') * ('
    comment += ' + '.join([str(c) + ' * %s' % getBlockName( 1, i, dims, level ) \
                               for i, c in enumerate(b_coeffs) if is_nonzero(c)])
    comment += '); '
    comment += '; '.join([
        ' %s += %s * M%d' % (getBlockName(2, i, dims, level), c, index)
        for i, c in enumerate(c_coeffs) if is_nonzero(c)
    ])
    comment += ';'
    write_line(myfile, 1, comment)

    write_stra_mat(myfile, 0, a_coeffs, index, ['AC', 'AB'], dims, level)
    write_stra_mat(myfile, 1, b_coeffs, index, ['AB', 'BC'], dims, level)
    write_stra_mat(myfile, 2, c_coeffs, index, ['AC', 'BC'], dims, level)

    myfile.write( \
'''\
    if (Cv{0}.stride(!row_major) == 1)
    {{
        Av{0}.transpose();
        Bv{0}.transpose();
        Cv{0}.transpose();
        stra_gemm(comm, cfg, alpha, Bv{0}, Av{0}, beta, Cv{0});
    }} else {{
        stra_gemm(comm, cfg, alpha, Av{0}, Bv{0}, beta, Cv{0});
    }}
'''.format( index ) )

    #Av{0}.swap(Bv{0});

    #add = 'stra_gemm(comm, cfg, alpha, Av{0}, Bv{0}, beta, Cv{0});'.format( index )
    #write_line( myfile, 1, add )

    write_line(myfile, 1, 'comm.barrier();')

    write_line(
        myfile, 1,
        '//std::cout << "stra_internal/stra_mult_M{0}:" << std::endl;'.format(
            index))
    write_line(myfile, 1, '//print_tensor_matrix( ct );')

    write_break(myfile)
Ejemplo n.º 8
0
def write_common_start_assembly(myfile, nnz):
    myfile.write( \
'''\
    void*   b_next = bli_auxinfo_next_b( data );

    uint64_t k_iter = k / 4;
    uint64_t k_left = k % 4;
''' )

    add = 'double '
    add += ', '.join(
        ['*coeff%d = &coeff_list[%d]' % (i, i) for i in range(nnz)])
    add += ';'
    write_line(myfile, 1, add)

    add = 'double '
    add += ', '.join(['*c%d = c_list[%d]' % (i, i) for i in range(nnz)])
    add += ';'
    write_line(myfile, 1, add)

    write_break(myfile)

    myfile.write( \
'''\
	__asm__ volatile
	(
	"                                            \\n\\t"
	"                                            \\n\\t"
    "movq                %[a], %%rax             \\n\\t" // load address of a.              ( v )
    "movq                %[b], %%rbx             \\n\\t" // load address of b.              ( v )
    "movq                %[b_next], %%r15        \\n\\t" // load address of b_next.         ( v )
    "addq          $-4 * 64, %%r15               \\n\\t" //                                 ( ? )
    "                                            \\n\\t"
    "vmovapd   0 * 32(%%rax), %%ymm0             \\n\\t" // initialize loop by pre-loading
    "vmovapd   0 * 32(%%rbx), %%ymm2             \\n\\t" // elements of a and b.
    "vpermilpd  $0x5, %%ymm2, %%ymm3             \\n\\t"
    "                                            \\n\\t"
    "                                            \\n\\t"
    "movq                %[cs_c], %%rdi          \\n\\t" // load cs_c
    "leaq        (,%%rdi,8), %%rdi               \\n\\t" // cs_c * sizeof(double)
''' )
Ejemplo n.º 9
0
def write_straprim_caller(myfile,
                          index,
                          a_coeffs,
                          b_coeffs,
                          c_coeffs,
                          dims,
                          num_multiplies,
                          level=1):
    comment = '// M%d = (' % (index)
    comment += ' + '.join([str(c) + ' * %s' % getBlockName( 0, i, dims, level ) \
                               for i, c in enumerate(a_coeffs) if is_nonzero(c)])
    comment += ') * ('
    comment += ' + '.join([str(c) + ' * %s' % getBlockName( 1, i, dims, level ) \
                               for i, c in enumerate(b_coeffs) if is_nonzero(c)])
    comment += '); '
    comment += '; '.join([
        ' %s += %s * M%d' % (getBlockName(2, i, dims, level), c, index)
        for i, c in enumerate(c_coeffs) if is_nonzero(c)
    ])
    comment += ';'
    write_line(myfile, 1, comment)

    write_stra_mat(myfile, 0, a_coeffs, index, 'ms, ks', dims, level)
    write_stra_mat(myfile, 1, b_coeffs, index, 'ks, ns', dims, level)
    write_stra_mat(myfile, 2, c_coeffs, index, 'ms, ns', dims, level)

    #add = 'stra_gemm(comm, cfg, alpha, Av{0}, Bv{0}, beta, Cv{0});'.format( index )
    #add = 'straprim_naive(comm, cfg, alpha, Av{0}, Bv{0}, beta, Cv{0});'.format( index )
    add = 'straprim_ab(comm, cfg, alpha, Av{0}, Bv{0}, beta, Cv{0});'.format(
        index)
    write_line(myfile, 1, add)

    write_line(myfile, 1, 'comm.barrier();')

    write_break(myfile)
Ejemplo n.º 10
0
def write_stra_matAB(myfile, coeff_idx, coeffs, idx, dim_name, dims, level):
    mat_name = (getName(coeff_idx))[0]
    nonzero_coeffs = [coeff for coeff in coeffs if is_nonzero(coeff)]

    add = 'std::array<unsigned, ' + str(
        len(nonzero_coeffs)) + '> ' + mat_name + str(idx) + '_subid = {'
    add += ', '.join(['%s' % getActualBlockIndex( coeff_idx, i, dims, level ) \
                               for i, c in enumerate(coeffs) if is_nonzero(c)])
    add += '};'
    write_line(myfile, 1, add)

    add = 'std::array<T,' + str(
        len(nonzero_coeffs)) + '> ' + mat_name + str(idx) + '_coeff_list = {'
    add += ', '.join([str(c) for i, c in enumerate(coeffs) if is_nonzero(c)])
    add += '};'
    write_line(myfile, 1, add)

    add = 'stra_tensor_view<T,' + str(len(nonzero_coeffs)) + '> '
    add += mat_name + 'v' + str(idx)
    add += '(my_len_' + dim_name[0] + ', '
    add += 'my_len_' + dim_name[1] + ', '
    add += mat_name + '_divisor, const_cast<T*>(' + mat_name + '), '
    add += mat_name + str(idx) + '_subid, ' + mat_name + str(
        idx) + '_coeff_list, '
    add += 'my_stride_' + mat_name + '_' + dim_name[0] + ','
    add += ' my_stride_' + mat_name + '_' + dim_name[1]
    add += ');'

    write_line(myfile, 1, add)
Ejemplo n.º 11
0
def write_straprim_caller(myfile, index, a_coeffs, b_coeffs, c_coeffs, dims, num_multiplies, level=1):
    comment = '// M%d = (' % (index)
    comment += ' + '.join([str(c) + ' * %s' % getBlockName( 0, i, dims, level ) \
                               for i, c in enumerate(a_coeffs) if is_nonzero(c)])
    comment += ') * ('
    comment += ' + '.join([str(c) + ' * %s' % getBlockName( 1, i, dims, level ) \
                               for i, c in enumerate(b_coeffs) if is_nonzero(c)])
    comment += '); '
    comment += '; '.join([' %s += %s * M%d' % ( getBlockName( 2, i, dims, level ), c, index ) for i, c in enumerate(c_coeffs) if is_nonzero(c)])
    comment += ';'
    write_line(myfile, 1, comment)

    write_stra_mat( myfile, 0, a_coeffs, index, ['AC', 'AB'], dims, level )
    write_stra_mat( myfile, 1, b_coeffs, index, ['AB', 'BC'], dims, level )
    write_stra_mat( myfile, 2, c_coeffs, index, ['AC', 'BC'], dims, level )

    myfile.write( \
'''\
    //if (ct.stride(!row_major) == 1)
    //{{
    //    Av{0}.transpose();
    //    Bv{0}.transpose();
    //    Cv{0}.transpose();
    //    straprim_naive<T,{1},{2},{3}>(comm, cfg, my_sub_len_AB, my_sub_len_AC, my_sub_len_BC,
    //             alpha,
    //             B{0}_list, B{0}_coeff_list, my_stride_B_AB, my_stride_B_BC,
    //             A{0}_list, A{0}_coeff_list, my_stride_A_AB, my_stride_A_AC,
    //             beta,
    //             C{0}_list, C{0}_coeff_list, my_stride_C_AC, my_stride_C_BC);
    //}} else {{
        straprim_naive<T,{1},{2},{3}>(comm, cfg, my_sub_len_AB, my_sub_len_AC, my_sub_len_BC,
                 alpha,
                 A{0}_list, A{0}_coeff_list, my_stride_A_AB, my_stride_A_AC,
                 B{0}_list, B{0}_coeff_list, my_stride_B_AB, my_stride_B_BC,
                 beta,
                 C{0}_list, C{0}_coeff_list, my_stride_C_AC, my_stride_C_BC);
    //}}
'''.format( index, getNNZ(a_coeffs), getNNZ(b_coeffs), getNNZ(c_coeffs) ) )

    #Av{0}.swap(Bv{0});

    #add = 'stra_gemm(comm, cfg, alpha, Av{0}, Bv{0}, beta, Cv{0});'.format( index )
    #write_line( myfile, 1, add )

    write_line( myfile, 1, 'comm.barrier();' )

    write_line( myfile, 1, '//std::cout << "stra_internal/stra_mult_M{0}:" << std::endl;'.format( index ) )
    write_line( myfile, 1, '//print_tensor_matrix( ct );' )

    write_break( myfile )
Ejemplo n.º 12
0
def gen_micro_kernel(outfile, nnz):

    myfile = open(outfile, 'w')
    #nonzero_coeffs=['1','-1']

    #gen_updatec_assembly( myfile )

    write_function_name(myfile, getNumberName(nnz))
    write_common_start_assembly(myfile, nnz)

    write_prefetch_assembly(myfile, nnz)

    #write_line( myfile, 1, 'RANKK_UPDATE( %d )' % index )
    #write_common_rankk_assembly( myfile, index )
    #write_common_simple_rankk_assembly( myfile, index )

    write_common_rankk_assembly(myfile)

    write_updatec_assembly(myfile, nnz)

    write_common_end_assembly(myfile, nnz)

    write_line(myfile, 0, '}')
Ejemplo n.º 13
0
def write_neighbor_visualization_kml(known, k=6, verbose=False):
    """Write a KML file showing k nearest neighbors among known locations."""
    X = known.loc[:, XCOLS]
    neighbors = NearestNeighbors(n_neighbors=k).fit(X)
    _, indices = neighbors.kneighbors(X)
    colors = "red,orange,yellow,green,cyan,purple".split(",")
    with open("../Data/visualize_neighbors.kml", "wb") as kml:
        kml.write('<?xml version="1.0" encoding="UTF-8"?>\n')
        kml.write('<kml xmlns="http://www.opengis.net/kml/2.2">\n')
        kml.write("<Document>\n")
        common.write_styles(kml)
        for i in range(len(known)):
            kml.write('  <Folder id="%s">\n' % (known.ix[i].ptol_id,))
            kml.write("      <name>%s</name>\n" % (known.ix[i].ptol_id,))
            ax = known.ix[i, XCOLS].values
            ay = known.ix[i, YCOLS].values
            alabel = known.ix[i].ptol_id
            common.write_point(kml, "red", ax, alabel)
            common.write_point(kml, "red", ay, alabel)
            if verbose:
                print known.ix[i].ptol_id, ax, ay
            points = [indices[i, j] for j in range(1, k)]
            for m in range(len(points)):
                j = points[m]
                bx = known.ix[j, XCOLS].values
                by = known.ix[j, YCOLS].values
                blabel = known.ix[j].ptol_id
                common.write_point(kml, "yellow", bx, blabel)
                common.write_point(kml, "yellow", by, blabel)
                common.write_line(kml, ax, bx, colors[m])
                common.write_line(kml, ay, by, colors[m])
                if verbose:
                    print "  ", known.ix[j].ptol_id, bx, by
            kml.write("  </Folder>\n")
        kml.write("</Document>\n")
        kml.write("</kml>\n")
Ejemplo n.º 14
0
def write_divisor_initializer(myfile, dims, level):
    level_dim = exp_dim(dims, level)
    write_line(
        myfile, 1, 'const std::array<unsigned,2> A_divisor={%d,%d};' %
        (level_dim[0], level_dim[1]))
    write_line(
        myfile, 1, 'const std::array<unsigned,2> B_divisor={%d,%d};' %
        (level_dim[1], level_dim[2]))
    write_line(
        myfile, 1, 'const std::array<unsigned,2> C_divisor={%d,%d};' %
        (level_dim[1], level_dim[2]))
    write_break(myfile)
Ejemplo n.º 15
0
def write_updatec_colstored_assembly(myfile, nnz):
    write_line(myfile, 1,
               '".DCOLSTORED:                                \\n\\t"')
    write_line(myfile, 1,
               '"                                            \\n\\t"')
    for j in range(nnz):
        coeff_avx_reg = get_avx_reg()

        myfile.write( \
'''\
    "                                            \\n\\t"
    "movq         %[coeff{0}], %%{1}               \\n\\t" // load address of coeff{0}
    "                                            \\n\\t"
	"vbroadcastsd    (%%{1}), %%{2}             \\n\\t" // load coeff{0} and duplicate
    "                                            \\n\\t"
'''.format( j, get_reg(), coeff_avx_reg ) )
        #"leaq   (%%rcx,%%rsi,4), %%r10               \\n\\t" // load address of c{0} + 4*rs_c;'

        c03_ymm_list = ['ymm9', 'ymm11', 'ymm13', 'ymm15']  #c00:c33
        c47_ymm_list = ['ymm8', 'ymm10', 'ymm12', 'ymm14']  #c40:c73

        for idx in range(4):
            myfile.write( \
'''\
    "vmovapd    0 * 32(%%{3}),  %%{5}           \\n\\t" // {5} = c{0}( 0:3, 0 )
	"vmulpd            %%{4},  %%{1},  %%{6}  \\n\\t" // scale by coeff{0}, {6} = {4}( coeff{0} ) * {1}( c{0}( 0:3, 0 ) )
    "vaddpd            %%{5},  %%{6},  %%{5}  \\n\\t" // {5} += {6}
    "vmovapd           %%{5},  0(%%{3})         \\n\\t" // c{0}( 0:3, 0 ) = {5}
    "vmovapd    1 * 32(%%{3}),  %%{7}           \\n\\t" // {7} = c{0}( 4:7, 0 )
	"vmulpd            %%{4},  %%{2},  %%{8}  \\n\\t" // scale by coeff{0}, {8} = {4}( coeff{0} ) * {2}( c{0}( 4:7, 0 ) )
    "vaddpd            %%{7},  %%{8},  %%{7}  \\n\\t" // {7} += {8}
    "vmovapd           %%{7},  32(%%{3})        \\n\\t" // c{0}( 4:7, 0 ) = {7}
'''.format(j, c03_ymm_list[idx], c47_ymm_list[idx], get_reg.c2reg[j], coeff_avx_reg, get_avx_reg(coeff_avx_reg), get_avx_reg(coeff_avx_reg), get_avx_reg(coeff_avx_reg), get_avx_reg(coeff_avx_reg) ) )
            if (idx != 3):
                write_line(
                    myfile, 1,
                    '"addq              %%rdi,   %%{0}            \\n\\t"'.
                    format(get_reg.c2reg[j]))
Ejemplo n.º 16
0
def write_triangle(kml, name, colors, points):
    for i in range(3):
        common.write_line(kml, points[i], points[(i + 1) % 3], colors[i])
Ejemplo n.º 17
0
def write_common_end_assembly(myfile, nnz):
    write_line(myfile, 1,
               '"                                            \\n\\t"')
    write_line(myfile, 1,
               '".DDONE:                                    \\n\\t"')
    write_line(myfile, 1,
               '"                                            \\n\\t"')
    write_line(myfile, 1, ': // output operands (none)')
    write_line(myfile, 1, ': // input operands')
    write_line(myfile, 1, '  [k_iter]     "m" (k_iter),      // 0')
    write_line(myfile, 1, '  [k_left]     "m" (k_left),      // 1')
    write_line(myfile, 1, '  [a]          "m" (a),           // 2')
    write_line(myfile, 1, '  [b]          "m" (b),           // 3')
    write_line(myfile, 1, '  [b_next]     "m" (b_next),      // 4')
    write_line(myfile, 1, '  [rs_c]       "m" (rs_c),        // 5')
    write_line(myfile, 1, '  [cs_c]       "m" (cs_c),        // 6')

    add = ''
    add += '\n    '.join([
        '  [c%d]         "m" (c%d)           // %d' % (i, i, i + 7)
        for i in range(nnz)
    ])

    add += '\n    '
    add += '\n    '.join([
        '  [coeff%d]     "m" (coeff%d)       // %d' % (i, i, i + 7 + nnz)
        for i in range(nnz)
    ])

    #write_line( myfile, 1, '  "m" (c)            // 6' )
    write_line(myfile, 1, add)

    write_line(myfile, 1, ': // register clobber list')
    write_line(myfile, 1, '  "rax", "rbx", "rcx", "rdx", "rsi", "rdi",')
    write_line(myfile, 1,
               '  "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15",')
    write_line(myfile, 1, '  "xmm0", "xmm1", "xmm2", "xmm3",')
    write_line(myfile, 1, '  "xmm4", "xmm5", "xmm6", "xmm7",')
    write_line(myfile, 1, '  "xmm8", "xmm9", "xmm10", "xmm11",')
    write_line(myfile, 1, '  "xmm12", "xmm13", "xmm14", "xmm15",')
    write_line(myfile, 1, '  "memory"')
    write_line(myfile, 1, ');')
Ejemplo n.º 18
0
def write_updatec_genstored_assembly(myfile, nnz):

    write_line(myfile, 1,
               '".DGENSTORED:                                \\n\\t"')
    write_line(myfile, 1,
               '"                                            \\n\\t"')
    write_line(
        myfile, 1,
        '"leaq        (,%%rsi,2), %%r12               \\n\\t" // r12 = 2*rs_c;'
    )
    write_line(
        myfile, 1,
        '"leaq   (%%r12,%%rsi,1), %%r13               \\n\\t" // r13 = 3*rs_c;'
    )
    write_line(myfile, 1,
               '"                                            \\n\\t"')

    for j in range(nnz):

        c47_reg = get_reg()

        coeff_avx_reg = get_avx_reg()
        #coeff_avx_reg = 'ymm6'

        myfile.write( \
'''\
    "movq         %[coeff{0}], %%{3}               \\n\\t" // load address of coeff{0}
    "vbroadcastsd    (%%{3}), %%{4}             \\n\\t" // load coeff{0} and duplicate
    "leaq   (%%{1},%%rsi,4), %%{2}               \\n\\t" // load address of c{0} + 4*rs_c;'
    "                                            \\n\\t"
'''.format( j, get_reg.c2reg[j], c47_reg, get_reg(), coeff_avx_reg  ) )

        c03_ymm_list = ['ymm9', 'ymm11', 'ymm13', 'ymm15']  #c00:c33
        c47_ymm_list = ['ymm8', 'ymm10', 'ymm12', 'ymm14']  #c40:c73

        #        for idx in range(4):
        #            myfile.write( \
        #'''\
        #    "vextractf128 $1, %%{0},  %%xmm1            \\n\\t"
        #    "vmovlpd    (%%{2}),       %%xmm0,  %%xmm0   \\n\\t" // load c{4}_0{1} and c{4}_1{1},
        #    "vmovhpd    (%%{2},%%rsi), %%xmm0,  %%xmm0   \\n\\t"
        #    "vmulpd           %%xmm{5},  %%xmm{3},  %%xmm2   \\n\\t" // scale by coeff{4},
        #    "vaddpd           %%xmm2,  %%xmm0,  %%xmm2   \\n\\t" // add the gemm result,
        #    "vmovlpd          %%xmm2,  (%%{2})           \\n\\t" // and store back to memory.
        #    "vmovhpd          %%xmm2,  (%%{2},%%rsi)     \\n\\t"
        #    "vmovlpd    (%%{2},%%r12), %%xmm0,  %%xmm0   \\n\\t" // load c{4}_2{1} and c{4}_3{1},
        #    "vmovhpd    (%%{2},%%r13), %%xmm0,  %%xmm0   \\n\\t"
        #    "vmulpd           %%xmm{5},  %%xmm1,  %%xmm2   \\n\\t" // scale by coeff{4},
        #    "vaddpd           %%xmm2,  %%xmm0,  %%xmm2   \\n\\t" // add the gemm result,
        #    "vmovlpd          %%xmm2,  (%%{2},%%r12)     \\n\\t" // and store back to memory.
        #    "vmovhpd          %%xmm2,  (%%{2},%%r13)     \\n\\t"
        #    "addq      %%rdi, %%{2}                      \\n\\t" // c += cs_c;
        #    "                                            \\n\\t"
        #'''.format( c03_ymm_list[idx], str(idx), get_reg.c2reg[j], c03_ymm_list[idx][3:], j, coeff_avx_reg[3:], ) )

        for idx in range(4):
            myfile.write( \
'''\
    "vextractf128 $1, %%{0},  %%xmm{7}            \\n\\t"
    "vmovlpd    (%%{2}),       %%xmm{6},  %%xmm{6}   \\n\\t" // load c{4}_0{1} and c{4}_1{1},
    "vmovhpd    (%%{2},%%rsi), %%xmm{6},  %%xmm{6}   \\n\\t"
    "vmulpd           %%xmm{5},  %%xmm{3},  %%xmm{8}   \\n\\t" // scale by coeff{4},
    "vaddpd           %%xmm{8},  %%xmm{6},  %%xmm{8}   \\n\\t" // add the gemm result,
    "vmovlpd          %%xmm{8},  (%%{2})           \\n\\t" // and store back to memory.
    "vmovhpd          %%xmm{8},  (%%{2},%%rsi)     \\n\\t"
    "vmovlpd    (%%{2},%%r12), %%xmm{6},  %%xmm{6}   \\n\\t" // load c{4}_2{1} and c{4}_3{1},
    "vmovhpd    (%%{2},%%r13), %%xmm{6},  %%xmm{6}   \\n\\t"
    "vmulpd           %%xmm{5},  %%xmm{7},  %%xmm{8}   \\n\\t" // scale by coeff{4},
    "vaddpd           %%xmm{8},  %%xmm{6},  %%xmm{8}   \\n\\t" // add the gemm result,
    "vmovlpd          %%xmm{8},  (%%{2},%%r12)     \\n\\t" // and store back to memory.
    "vmovhpd          %%xmm{8},  (%%{2},%%r13)     \\n\\t"
    "                                            \\n\\t"
'''.format( c03_ymm_list[idx], str(idx), get_reg.c2reg[j], c03_ymm_list[idx][3:], j, coeff_avx_reg[3:], (get_avx_reg(avoid_reg=coeff_avx_reg))[3:], (get_avx_reg(avoid_reg=coeff_avx_reg))[3:], (get_avx_reg(avoid_reg=coeff_avx_reg))[3:], ) )
            if (idx != 3):
                write_line(
                    myfile, 1,
                    '"addq      %%rdi, %%{0}                      \\n\\t" // c += cs_c;'
                    .format(get_reg.c2reg[j]))

#        for idx in range(4):
#            myfile.write( \
#'''\
#    "vextractf128 $1, %%{0},  %%xmm1            \\n\\t"
#    "vmovlpd    (%%{2}),       %%xmm0,  %%xmm0   \\n\\t" // load c{4}_4{1} and c{4}_5{1},
#    "vmovhpd    (%%{2},%%rsi), %%xmm0,  %%xmm0   \\n\\t"
#    "vmulpd           %%xmm{5},  %%xmm{3},  %%xmm2   \\n\\t" // scale by coeff{4},
#    "vaddpd           %%xmm2,  %%xmm0,  %%xmm2   \\n\\t" // add the gemm result,
#    "vmovlpd          %%xmm2,  (%%{2})           \\n\\t" // and store back to memory.
#    "vmovhpd          %%xmm2,  (%%{2},%%rsi)     \\n\\t"
#    "vmovlpd    (%%{2},%%r12), %%xmm0,  %%xmm0   \\n\\t" // load c{4}_6{1} and c{4}_7{1},
#    "vmovhpd    (%%{2},%%r13), %%xmm0,  %%xmm0   \\n\\t"
#    "vmulpd           %%xmm{5},  %%xmm1,  %%xmm2   \\n\\t" // scale by coeff{4},
#    "vaddpd           %%xmm2,  %%xmm0,  %%xmm2   \\n\\t" // add the gemm result,
#    "vmovlpd          %%xmm2,  (%%{2},%%r12)     \\n\\t" // and store back to memory.
#    "vmovhpd          %%xmm2,  (%%{2},%%r13)     \\n\\t"
#    "addq      %%rdi, %%{2}                      \\n\\t" // c += cs_c;
#    "                                            \\n\\t"
#'''.format( c47_ymm_list[idx], str(idx), c47_reg, c47_ymm_list[idx][3:], j, coeff_avx_reg[3:],  ) )

        for idx in range(4):
            myfile.write( \
'''\
    "vextractf128 $1, %%{0},  %%xmm{7}            \\n\\t"
    "vmovlpd    (%%{2}),       %%xmm{6},  %%xmm{6}   \\n\\t" // load c{4}_4{1} and c{4}_5{1},
    "vmovhpd    (%%{2},%%rsi), %%xmm{6},  %%xmm{6}   \\n\\t"
    "vmulpd           %%xmm{5},  %%xmm{3},  %%xmm{8}   \\n\\t" // scale by coeff{4},
    "vaddpd           %%xmm{8},  %%xmm{6},  %%xmm{8}   \\n\\t" // add the gemm result,
    "vmovlpd          %%xmm{8},  (%%{2})           \\n\\t" // and store back to memory.
    "vmovhpd          %%xmm{8},  (%%{2},%%rsi)     \\n\\t"
    "vmovlpd    (%%{2},%%r12), %%xmm{6},  %%xmm{6}   \\n\\t" // load c{4}_6{1} and c{4}_7{1},
    "vmovhpd    (%%{2},%%r13), %%xmm{6},  %%xmm{6}   \\n\\t"
    "vmulpd           %%xmm{5},  %%xmm{7},  %%xmm{8}   \\n\\t" // scale by coeff{4},
    "vaddpd           %%xmm{8},  %%xmm{6},  %%xmm{8}   \\n\\t" // add the gemm result,
    "vmovlpd          %%xmm{8},  (%%{2},%%r12)     \\n\\t" // and store back to memory.
    "vmovhpd          %%xmm{8},  (%%{2},%%r13)     \\n\\t"
    "                                            \\n\\t"
'''.format( c47_ymm_list[idx], str(idx), c47_reg, c47_ymm_list[idx][3:], j, coeff_avx_reg[3:], (get_avx_reg(avoid_reg=coeff_avx_reg))[3:], (get_avx_reg(avoid_reg=coeff_avx_reg))[3:], (get_avx_reg(avoid_reg=coeff_avx_reg))[3:],  ) )
            if (idx != 3):
                write_line(
                    myfile, 1,
                    '"addq      %%rdi, %%{0}                      \\n\\t" // c += cs_c;'
                    .format(c47_reg))

    write_line(myfile, 1,
               '"                                            \\n\\t"')
    write_line(
        myfile, 1,
        '"jmp    .DDONE                               \\n\\t" // jump to end.')
    write_line(myfile, 1,
               '"                                            \\n\\t"')
Ejemplo n.º 19
0
def write_mulstrassen_kernel_caller(myfile, nonzero_coeffs):
    nnz = len(nonzero_coeffs)
    write_line(myfile, 3, 'double alpha_list[%d];' % nnz)
    write_line(myfile, 3, 'double *c_list[%d];' % nnz)
    write_line(myfile, 3, 'unsigned long long len_c=%d;' % nnz)
    add = '; '.join([
        'alpha_list[%d]= (double)(%s)' % (j, coeff)
        for j, coeff in enumerate(nonzero_coeffs)
    ])
    add += ';'
    write_line(myfile, 3, add)
    add = '; '.join([
        'c_list[%d] = &C%d[ j * ldC + i ]' % (j, j)
        for j, coeff in enumerate(nonzero_coeffs)
    ])
    add += ';'
    write_line(myfile, 3, add)
    write_line(
        myfile, 3,
        '( bl_dgemm_asm_8x4_mulstrassen ) ( k, &packA[ i * k ], &packB[ j * k ], (unsigned long long) len_c, (unsigned long long) ldC, c_list, alpha_list, &aux );'
    )
Ejemplo n.º 20
0
def write_triangle(kml, name, colors, points):
    for i in range(3):
        common.write_line(kml, points[i], points[(i+1) % 3], colors[i])
Ejemplo n.º 21
0
def write_macro_func(myfile, coeffs, index, mat_name):
    ''' Write the add function for a set of coefficients.  This is a custom add
    function used for a single multiply in a single fast algorithm.

    coeffs is the set of coefficients used for the add
    '''
    nonzero_coeffs = [coeff for coeff in coeffs if is_nonzero(coeff)]
    nnz = len(nonzero_coeffs)
    # TODO(arbenson): put in a code-generated comment here
    add = 'inline void bl_macro_kernel_stra_abc%d( int m, int n, int k, double *packA, double *packB, ' % (
        index)
    add += ', '.join(['double *%s%d' % (mat_name, i) for i in range(nnz)])
    add += ', int ld%s ) {' % (mat_name)
    write_line(myfile, 0, add)

    write_line(myfile, 1, 'int i, j;')
    write_line(myfile, 1, 'aux_t aux;')
    write_line(myfile, 1, 'aux.b_next = packB;')

    write_line(myfile, 1, 'for ( j = 0; j < n; j += DGEMM_NR ) {')
    write_line(myfile, 1, '    aux.n  = min( n - j, DGEMM_NR );')
    write_line(myfile, 1, '    for ( i = 0; i < m; i += DGEMM_MR ) {')
    write_line(myfile, 1, '        aux.m = min( m - i, DGEMM_MR );')
    write_line(myfile, 1, '        if ( i + DGEMM_MR >= m ) {')
    write_line(myfile, 1, '            aux.b_next += DGEMM_NR * k;')
    write_line(myfile, 1, '        }')

    #NEED to do: c_coeff -> pass in the parameters!

    #Generate the micro-kernel outside
    #abc_micro_kernel_gen.generate_kernel_header( my_kernel_header_file, nonzero_coeffs, index )
    #abc_micro_kernel_gen.generate_micro_kernel( my_micro_kernel_file, nonzero_coeffs, index )
    #generate the function caller

    #if nnz <= 23 and not contain_nontrivial( nonzero_coeffs ):
    #    add = '( bl_dgemm_micro_kernel_stra_abc%d ) ( k, &packA[ i * k ], &packB[ j * k ], ' % index
    #    add += '(unsigned long long) ld%s, ' % mat_name
    #    add += ', '.join( ['&%s%d[ j * ld%s + i ]' % ( mat_name, i, mat_name ) for i in range( nnz )] )
    #    add += ', &aux );'
    #    write_line(myfile, 3, add)
    #else:
    #    write_mulstrassen_kernel_caller( myfile, nonzero_coeffs )

    if nnz <= 23:
        if not contain_nontrivial(nonzero_coeffs):
            add = '( bl_dgemm_micro_kernel_stra_abc%d ) ( k, &packA[ i * k ], &packB[ j * k ], ' % index
            add += '(unsigned long long) ld%s, ' % mat_name
            add += ', '.join([
                '&%s%d[ j * ld%s + i ]' % (mat_name, i, mat_name)
                for i in range(nnz)
            ])
            add += ', &aux );'
            write_line(myfile, 3, add)
        else:
            write_line(myfile, 3, 'double alpha_list[%d];' % nnz)
            add = '; '.join([
                'alpha_list[%d]= (double)(%s)' % (j, coeff)
                for j, coeff in enumerate(nonzero_coeffs)
            ])
            add += ';'
            write_line(myfile, 3, add)
            add = '( bl_dgemm_micro_kernel_stra_abc%d ) ( k, &packA[ i * k ], &packB[ j * k ], ' % index
            add += '(unsigned long long) ld%s, ' % mat_name
            add += ', '.join([
                '&%s%d[ j * ld%s + i ]' % (mat_name, i, mat_name)
                for i in range(nnz)
            ])
            add += ', alpha_list , &aux );'
            write_line(myfile, 3, add)
    else:
        write_mulstrassen_kernel_caller(myfile, nonzero_coeffs)

    #write_mulstrassen_kernel_caller( myfile, nonzero_coeffs )

    write_line(myfile, 2, '}')
    write_line(myfile, 1, '}')

    write_line(myfile, 0, '}')  # end of function
Ejemplo n.º 22
0
def gen_abc_fmm(coeff_filename, dims, level, outfilename,
                micro_kernel_filename, kernel_header_filename):

    coeffs = read_coeffs(coeff_filename)
    #print coeffs
    #print coeffs[0][0]

    #coeffs2= [ transpose( U2 ), transpose( V2 ), transpose( W2 ) ]

    with open(outfilename, 'w') as myfile:
        write_line(myfile, 0, '#include "%s"' % kernel_header_filename[10:])
        write_line(myfile, 0, '#include "bl_dgemm.h"')
        write_break(myfile)

        cur_coeffs = generateCoeffs(coeffs, level)
        #writeCoeffs( cur_coeffs )
        #writeEquation( cur_coeffs, dims, level )

        num_multiplies = len(cur_coeffs[0][0])

        create_packm_functions(myfile, cur_coeffs)

        my_micro_file = open(micro_kernel_filename, 'w')
        create_micro_functions(my_micro_file, cur_coeffs,
                               kernel_header_filename[10:])

        my_kernel_header = open(kernel_header_filename, 'w')
        create_kernel_header(my_kernel_header, cur_coeffs)

        create_macro_functions(myfile, cur_coeffs)

        create_straprim_abc_functions(myfile, cur_coeffs, dims, level)

        write_line(
            myfile, 0,
            'void bl_dgemm_strassen_abc( int m, int n, int k, double *XA, int lda, double *XB, int ldb, double *XC, int ldc )'
        )
        write_line(myfile, 0, '{')

        write_abc_strassen_header(myfile)

        writePartition(myfile, dims, level)

        write_break(myfile)

        write_line(myfile, 0, '#ifdef _PARALLEL_')
        write_line(myfile, 1, '#pragma omp parallel num_threads( bl_ic_nt )')
        write_line(myfile, 0, '#endif')
        write_line(myfile, 1, '{')
        create_straprim_caller(myfile, cur_coeffs, dims, num_multiplies, level)
        write_line(myfile, 1, '}')

        write_break(myfile)
        level_dim = exp_dim(dims, level)
        write_line(
            myfile, 1,
            'bl_dynamic_peeling( m, n, k, XA, lda, XB, ldb, XC, ldc, %d * DGEMM_MR, %d, %d * DGEMM_NR );'
            % (level_dim[0], level_dim[1], level_dim[2]))

        write_break(myfile)
        write_line(myfile, 1, '//free( packA );')
        write_line(myfile, 1, '//free( packB );')

        write_line(myfile, 0, '}')
Ejemplo n.º 23
0
def write_abc_strassen_header(myfile):
    write_line(myfile, 1, 'double *packA, *packB;')
    write_break(myfile)
    write_line(myfile, 1,
               'int bl_ic_nt = bl_read_nway_from_env( "BLISLAB_IC_NT" );')
    write_break(myfile)
    write_line(myfile, 1, '//// Allocate packing buffers')
    write_line(
        myfile, 1,
        '//packA  = bl_malloc_aligned( DGEMM_KC, ( DGEMM_MC + 1 ) * bl_ic_nt, sizeof(double) );'
    )
    write_line(
        myfile, 1,
        '//packB  = bl_malloc_aligned( DGEMM_KC, ( DGEMM_NC + 1 )           , sizeof(double) );'
    )

    write_line(myfile, 1,
               'bl_malloc_packing_pool( &packA, &packB, n, bl_ic_nt );')

    write_break(myfile)
Ejemplo n.º 24
0
def write_straprim_abc_function(myfile, index, a_coeffs, b_coeffs, c_coeffs,
                                dims, level):
    comment = '// M%d = (' % (index)
    comment += ' + '.join([str(c) + ' * %s' % getBlockName( 0, i, dims, level ) \
                               for i, c in enumerate(a_coeffs) if is_nonzero(c)])
    comment += ') * ('
    comment += ' + '.join([str(c) + ' * %s' % getBlockName( 1, i, dims, level ) \
                               for i, c in enumerate(b_coeffs) if is_nonzero(c)])
    comment += '); '
    comment += '; '.join([
        ' %s += %s * M%d' % (getBlockName(2, i, dims, level), c, index)
        for i, c in enumerate(c_coeffs) if is_nonzero(c)
    ])
    comment += ';'
    write_line(myfile, 0, comment)

    add = 'void bl_dgemm_straprim_abc%d( int m, int n, int k, ' % index

    add += ', '.join(
        ['double* %s%d' % ('a', i) for i in range(getNNZ(a_coeffs))])
    add += ', int lda, '
    add += ', '.join(
        ['double* %s%d' % ('b', i) for i in range(getNNZ(b_coeffs))])
    add += ', int ldb, '
    add += ', '.join(
        ['double* %s%d' % ('c', i) for i in range(getNNZ(c_coeffs))])
    add += ', int ldc, double *packA, double *packB, int bl_ic_nt ) {'

    write_line(myfile, 0, add)
    write_line(myfile, 1, 'int i, j, p, ic, ib, jc, jb, pc, pb;')
    write_line(myfile, 1, 'for ( jc = 0; jc < n; jc += DGEMM_NC ) {')
    write_line(myfile, 2, 'jb = min( n - jc, DGEMM_NC );')
    write_line(myfile, 2, 'for ( pc = 0; pc < k; pc += DGEMM_KC ) {')
    write_line(myfile, 3, 'pb = min( k - pc, DGEMM_KC );')
    #write_line( myfile, 0, '#ifdef _PARALLEL_')
    #write_line( myfile, 3, '#pragma omp parallel for num_threads( bl_ic_nt ) private( j )' )
    #write_line( myfile, 0, '#endif')
    write_line(myfile, 3, '{')
    write_line(myfile, 4, 'int tid = omp_get_thread_num();')
    write_line(myfile, 4, 'int my_start;')
    write_line(myfile, 4, 'int my_end;')
    write_line(myfile, 4, 'bl_get_range( jb, DGEMM_NR, &my_start, &my_end );')
    write_line(myfile, 4, 'for ( j = my_start; j < my_end; j += DGEMM_NR ) {')

    add = 'packB_add_stra_abc%d( min( jb - j, DGEMM_NR ), pb, ' % index
    add += ', '.join([
        '&%s%d[ pc + (jc+j)*ldb ]' % ('b', i) for i in range(getNNZ(b_coeffs))
    ])
    add += ', ldb, &packB[ j * pb ] );'
    write_line(myfile, 5, add)
    write_line(myfile, 4, '}')
    write_line(myfile, 3, '}')

    write_line(myfile, 0, '#ifdef _PARALLEL_')
    write_line(myfile, 0, '#pragma omp barrier')
    write_line(myfile, 0, '#endif')
    #write_line( myfile, 0, '#ifdef _PARALLEL_')
    #write_line( myfile, 3, '#pragma omp parallel num_threads( bl_ic_nt ) private( ic, ib, i )' )
    #write_line( myfile, 0, '#endif')
    write_line(myfile, 3, '{')
    #write_line( myfile, 0, '#ifdef _PARALLEL_')
    write_line(myfile, 4, 'int tid = omp_get_thread_num();')
    write_line(myfile, 4, 'int my_start;')
    write_line(myfile, 4, 'int my_end;')
    write_line(myfile, 4, 'bl_get_range( m, DGEMM_MR, &my_start, &my_end );')
    #write_line( myfile, 0, '#else')
    #write_line( myfile, 4, 'int tid = 0;' )
    #write_line( myfile, 4, 'int my_start = 0;' )
    #write_line( myfile, 4, 'int my_end = m;' )
    #write_line( myfile, 0, '#endif')
    write_line(myfile, 4,
               'for ( ic = my_start; ic < my_end; ic += DGEMM_MC ) {')
    write_line(myfile, 5, 'ib = min( my_end - ic, DGEMM_MC );')
    write_line(myfile, 5, 'for ( i = 0; i < ib; i += DGEMM_MR ) {')

    add = 'packA_add_stra_abc%d( min( ib - i, DGEMM_MR ), pb, ' % index
    add += ', '.join([
        '&%s%d[ pc*lda + (ic+i) ]' % ('a', i) for i in range(getNNZ(a_coeffs))
    ])
    add += ', lda, &packA[ tid * DGEMM_MC * pb + i * pb ] );'
    write_line(myfile, 6, add)

    write_line(myfile, 5, '}')

    add = 'bl_macro_kernel_stra_abc%d( ib, jb, pb, packA + tid * DGEMM_MC * pb, packB, ' % index
    add += ', '.join(
        ['&%s%d[ jc * ldc + ic ]' % ('c', i) for i in range(getNNZ(c_coeffs))])
    add += ', ldc );'
    write_line(myfile, 5, add)

    write_line(myfile, 4, '}')
    write_line(myfile, 3, '}')
    write_line(myfile, 0, '#ifdef _PARALLEL_')
    write_line(myfile, 0, '#pragma omp barrier')
    write_line(myfile, 0, '#endif')
    write_line(myfile, 2, '}')
    write_line(myfile, 1, '}')

    write_line(myfile, 0, '#ifdef _PARALLEL_')
    write_line(myfile, 0, '#pragma omp barrier')
    write_line(myfile, 0, '#endif')
    write_line(myfile, 0, '}')
    write_break(myfile)
Ejemplo n.º 25
0
def write_packm_func(myfile, coeffs, index, mat_name):
    ''' Write the add function for a set of coefficients.  This is a custom add
    function used for a single multiply in a single fast algorithm.

    coeffs is the set of coefficients used for the add
    '''
    nonzero_coeffs = [coeff for coeff in coeffs if is_nonzero(coeff)]
    nnz = len(nonzero_coeffs)
    # TODO(arbenson): put in a code-generated comment here
    add = 'inline void pack%s_add_stra_abc%d( int m, int n, ' % (mat_name,
                                                                 index)
    add += ', '.join(['double *%s%d' % (mat_name, i) for i in range(nnz)])
    add += ', int ld%s, double *pack%s ' % (mat_name, mat_name)
    add += ') {'
    write_line(myfile, 0, add)

    write_line(myfile, 1, 'int i, j;')

    add = 'double '
    add += ', '.join(['*%s%d_pntr' % (mat_name, i) for i in range(nnz)])
    add += ', *pack%s_pntr;' % mat_name
    write_line(myfile, 1, add)

    if (mat_name == 'A'):
        ldp = 'DGEMM_MR'
        incp = '1'
        ldm = 'ld%s' % mat_name
        incm = '1'
    elif (mat_name == 'B'):
        ldp = 'DGEMM_NR'
        incp = '1'
        ldm = '1'
        incm = 'ld%s' % mat_name
    else:
        print "Wrong mat_name!"
    #ldp = 'DGEMM_MR' if (mat_name == 'A') else 'DGEMM_NR'

    write_line(myfile, 1, 'for ( j = 0; j < n; ++j ) {')
    write_line(myfile, 2,
               'pack%s_pntr = &pack%s[ %s * j ];' % (mat_name, mat_name, ldp))
    if ldm == '1':
        add = ''.join([
            '%s%d_pntr = &%s%d[ j ]; ' % (mat_name, i, mat_name, i)
            for i in range(nnz)
        ])
    else:
        add = ''.join([
            '%s%d_pntr = &%s%d[ %s * j ]; ' % (mat_name, i, mat_name, i, ldm)
            for i in range(nnz)
        ])
    write_line(myfile, 2, add)

    write_line(myfile, 2, 'for ( i = 0; i < %s; ++i ) {' % ldp)

    add = 'pack%s_pntr[ i ]' % mat_name + ' ='
    for j, coeff in enumerate(nonzero_coeffs):
        ind = j
        add += arith_expression_pntr(coeff, mat_name, ind, incm)

    add += ';'
    write_line(myfile, 3, add)

    write_line(myfile, 2, '}')
    write_line(myfile, 1, '}')

    write_line(myfile, 0, '}')  # end of function