Ejemplo n.º 1
0
def gen_all_traces(array_h=4,
                   array_w=4,
                   ifmap_h=7,
                   ifmap_w=7,
                   filt_h=3,
                   filt_w=3,
                   num_channels=3,
                   strides=1,
                   num_filt=8,
                   data_flow='os',
                   word_size_bytes=1,
                   filter_sram_size=64,
                   ifmap_sram_size=64,
                   ofmap_sram_size=64,
                   filt_base=1000000,
                   ifmap_base=0,
                   ofmap_base=2000000,
                   sram_read_trace_file="sram_read.csv",
                   sram_write_trace_file="sram_write.csv",
                   dram_filter_trace_file="dram_filter_read.csv",
                   dram_ifmap_trace_file="dram_ifmap_read.csv",
                   dram_ofmap_trace_file="dram_ofmap_write.csv"):

    sram_cycles = 0
    util = 0

    print("Generating traces and bw numbers")
    if data_flow == 'os':
        sram_cycles, util = \
            sram.sram_traffic(
                dimension_rows= array_h,
                dimension_cols= array_w,
                ifmap_h=ifmap_h, ifmap_w=ifmap_w,
                filt_h=filt_h, filt_w=filt_w,
                num_channels=num_channels,
                strides=strides, num_filt=num_filt,
                filt_base=filt_base, ifmap_base=ifmap_base,
                ofmap_base = ofmap_base,
                sram_read_trace_file=sram_read_trace_file,
                sram_write_trace_file=sram_write_trace_file
            )
    elif data_flow == 'ws':
        sram_cycles, util = \
            sram_ws.sram_traffic(
                dimension_rows = array_h,
                dimension_cols = array_w,
                ifmap_h = ifmap_h, ifmap_w = ifmap_w,
                filt_h = filt_h, filt_w = filt_w,
                num_channels = num_channels,
                strides = strides, num_filt = num_filt,
                ofmap_base = ofmap_base, filt_base = filt_base, ifmap_base = ifmap_base,
                sram_read_trace_file = sram_read_trace_file,
                sram_write_trace_file = sram_write_trace_file
            )
    elif data_flow == 'is':
        sram_cycles, util = \
            sram_is.sram_traffic(
                dimension_rows = array_h,
                dimension_cols = array_w,
                ifmap_h = ifmap_h, ifmap_w = ifmap_w,
                filt_h = filt_h, filt_w = filt_w,
                num_channels = num_channels,
                strides = strides, num_filt = num_filt,
                ofmap_base = ofmap_base, filt_base = filt_base, ifmap_base = ifmap_base,
                sram_read_trace_file = sram_read_trace_file,
                sram_write_trace_file = sram_write_trace_file
            )

    #print("Generating DRAM traffic")
    dram.dram_trace_read_v2(
        sram_sz=ifmap_sram_size,
        word_sz_bytes=word_size_bytes,
        min_addr=ifmap_base,
        max_addr=filt_base,
        sram_trace_file=sram_read_trace_file,
        dram_trace_file=dram_ifmap_trace_file,
    )

    #print("IFMAP SRAM Size: \t" + str(ifmap_sram_size))

    dram.dram_trace_read_v2(
        sram_sz=filter_sram_size,
        word_sz_bytes=word_size_bytes,
        min_addr=filt_base,
        max_addr=ofmap_base,
        sram_trace_file=sram_read_trace_file,
        dram_trace_file=dram_filter_trace_file,
    )

    dram.dram_trace_write(ofmap_sram_size=ofmap_sram_size,
                          data_width_bytes=word_size_bytes,
                          sram_write_trace_file=sram_write_trace_file,
                          dram_write_trace_file=dram_ofmap_trace_file)

    print("Average utilization : \t" + str(util) + " %")
    print("Cycles for compute  : \t" + str(sram_cycles) + " cycles")
    bw_numbers, detailed_log = gen_bw_numbers(dram_ifmap_trace_file,
                                              dram_filter_trace_file,
                                              dram_ofmap_trace_file,
                                              sram_write_trace_file,
                                              sram_read_trace_file)
    #array_h, array_w)

    return bw_numbers, detailed_log, util, sram_cycles
Ejemplo n.º 2
0
def gen_all_traces_dramsim_format(
        array_h=4,
        array_w=4,
        ifmap_h=7,
        ifmap_w=7,
        filt_h=3,
        filt_w=3,
        num_channels=3,
        strides=1,
        num_filt=8,
        data_flow='os',

        # word_size_bytes=1,
        dram_read_bw=8,
        dram_write_bw=8,
        dram_read_wsb=1,
        dram_write_wsb=1,
        filter_sram_size=64,
        ifmap_sram_size=64,
        ofmap_sram_size=64,
        filt_base=1000000,
        ifmap_base=0,
        ofmap_base=2000000,
        sram_read_trace_file="sram_read.csv",
        sram_write_trace_file="sram_write.csv",
        dram_filter_trace_file="dram_filter_read.csv",
        dram_ifmap_trace_file="dram_ifmap_read.csv",
        dram_ofmap_trace_file="dram_ofmap_write.csv"):
    sram_cycles = 0
    util = 0

    print("Generating traces and bw numbers")
    if data_flow == 'os':
        sram_cycles, util = \
            sram.sram_traffic(
                dimension_rows=array_h,
                dimension_cols=array_w,
                ifmap_h=ifmap_h, ifmap_w=ifmap_w,
                filt_h=filt_h, filt_w=filt_w,
                num_channels=num_channels,
                strides=strides, num_filt=num_filt,
                filt_base=filt_base, ifmap_base=ifmap_base,
                ofmap_base=ofmap_base,
                sram_read_trace_file=sram_read_trace_file,
                sram_write_trace_file=sram_write_trace_file
            )
    elif data_flow == 'ws':
        sram_cycles, util = \
            sram_ws.sram_traffic(
                dimension_rows=array_h,
                dimension_cols=array_w,
                ifmap_h=ifmap_h, ifmap_w=ifmap_w,
                filt_h=filt_h, filt_w=filt_w,
                num_channels=num_channels,
                strides=strides, num_filt=num_filt,
                ofmap_base=ofmap_base, filt_base=filt_base, ifmap_base=ifmap_base,
                sram_read_trace_file=sram_read_trace_file,
                sram_write_trace_file=sram_write_trace_file
            )
    elif data_flow == 'is':
        sram_cycles, util = \
            sram_is.sram_traffic(
                dimension_rows=array_h,
                dimension_cols=array_w,
                ifmap_h=ifmap_h, ifmap_w=ifmap_w,
                filt_h=filt_h, filt_w=filt_w,
                num_channels=num_channels,
                strides=strides, num_filt=num_filt,
                ofmap_base=ofmap_base, filt_base=filt_base, ifmap_base=ifmap_base,
                sram_read_trace_file=sram_read_trace_file,
                sram_write_trace_file=sram_write_trace_file
            )

    # print("Generating DRAM traffic")
    dramsim_format.dram_trace_read_dramsim(
        sram_sz=ifmap_sram_size,
        word_sz_bytes=dram_read_wsb,
        default_read_bw=dram_read_bw,
        min_addr=ifmap_base,
        max_addr=filt_base,
        sram_trace_file=sram_read_trace_file,
        dram_trace_file=dram_ifmap_trace_file,
    )

    dramsim_format.dram_trace_read_dramsim(
        sram_sz=filter_sram_size,
        word_sz_bytes=dram_read_wsb,
        default_read_bw=dram_read_bw,
        min_addr=filt_base,
        max_addr=ofmap_base,
        sram_trace_file=sram_read_trace_file,
        dram_trace_file=dram_filter_trace_file,
    )

    dramsim_format.dram_trace_write_dramsim(
        ofmap_sram_size=ofmap_sram_size,
        data_width_bytes=dram_write_wsb,
        default_write_bw=dram_write_bw,
        sram_write_trace_file=sram_write_trace_file,
        dram_write_trace_file=dram_ofmap_trace_file)

    print("Average utilization : \t" + str(util) + " %")
    print("Cycles for compute  : \t" + str(sram_cycles) + " cycles")
    print(
        "Fred [WARNING]: The bw stats part are not taken care, yet to be correct by parsing dramsim format dram "
        "traces")
    # bw_numbers, detailed_log = gen_bw_numbers(dram_ifmap_trace_file, dram_filter_trace_file,
    #                                           dram_ofmap_trace_file, sram_write_trace_file,
    #                                           sram_read_trace_file)
    # array_h, array_w)

    return util, sram_cycles
Ejemplo n.º 3
0
def gen_all_traces(array_h=4,
                   array_w=4,
                   ifmap_h=7,
                   ifmap_w=7,
                   filt_h=3,
                   filt_w=3,
                   num_channels=3,
                   strides=1,
                   num_filt=8,
                   data_flow='os',
                   layer_tag="Conv1",
                   word_size_bytes=1,
                   filter_sram_size=64,
                   ifmap_sram_size=64,
                   ofmap_sram_size=64,
                   filt_base=1000000,
                   ifmap_base=0,
                   ofmap_base=2000000,
                   sram_read_trace_file="sram_read.csv",
                   sram_write_trace_file="sram_write.csv",
                   dram_filter_trace_file="dram_filter_read.csv",
                   dram_ifmap_trace_file="dram_ifmap_read.csv",
                   dram_ofmap_trace_file="dram_ofmap_write.csv"):

    sram_cycles = 0
    util = 0

    tag = layer_tag + "_" + str(array_h) + "_" + str(array_w)

    # Anand: Removing the following for SILENT mode
    #print("Generating traces and bw numbers")
    if data_flow == 'os':
        tag_match = False

        if lookup_flag:
            sram_tag = tag + "_os"
            tag_match, dir_name = lut.lookup(sram_tag)

        if tag_match:
            #this_dir = lut.to_string(subprocess.check_output(['pwd']))
            this_dir = os.getcwd()
            sram_cycles, util = lut.get_sram_stats(dir_name, dest_dir=this_dir)
        else:
            sram_cycles, util = \
                sram.sram_traffic(
                    dimension_rows= array_h,
                    dimension_cols= array_w,
                    ifmap_h=ifmap_h, ifmap_w=ifmap_w,
                    filt_h=filt_h, filt_w=filt_w,
                    num_channels=num_channels,
                    strides=strides, num_filt=num_filt,
                    filt_base=filt_base, ifmap_base=ifmap_base,
                    ofmap_base = ofmap_base,
                    sram_read_trace_file=sram_read_trace_file,
                    sram_write_trace_file=sram_write_trace_file
                )

            # If the lookup_flag is set make a new data base entry
            #if lookup_flag:
            #    #this_dir = lut.to_string(subprocess.check_output(['pwd']))
            #    this_dir = os.getcwd()
            #    lut.create_sram_entry(sram_tag, this_dir, sram_cycles, util)

    elif data_flow == 'ws':
        tag_match = False

        if lookup_flag:
            sram_tag = tag + "_ws"
            tag_match, dir_name = lut.lookup(sram_tag)

        if tag_match:
            #this_dir = lut.to_string(subprocess.check_output(['pwd']))
            this_dir = os.getcwd()
            sram_cycles, util = lut.get_sram_stats(dir_name, dest_dir=this_dir)
        else:
            sram_cycles, util = \
                sram_ws.sram_traffic(
                    dimension_rows = array_h,
                    dimension_cols = array_w,
                    ifmap_h = ifmap_h, ifmap_w = ifmap_w,
                    filt_h = filt_h, filt_w = filt_w,
                    num_channels = num_channels,
                    strides = strides, num_filt = num_filt,
                    ofmap_base = ofmap_base, filt_base = filt_base, ifmap_base = ifmap_base,
                    sram_read_trace_file = sram_read_trace_file,
                    sram_write_trace_file = sram_write_trace_file
                )

            # If the lookup_flag is set make a new data base entry
            #if lookup_flag:
            #    #this_dir = lut.to_string(subprocess.check_output(['pwd']))
            #    this_dir = os.getcwd()
            #    lut.create_sram_entry(sram_tag, this_dir, sram_cycles, util)

    elif data_flow == 'is':
        tag_match = False

        if lookup_flag:
            sram_tag = tag + "_is"
            tag_match, dir_name = lut.lookup(sram_tag)

        if tag_match:
            #this_dir = lut.to_string(subprocess.check_output(['pwd']))
            this_dir = os.getcwd()
            sram_cycles, util = lut.get_sram_stats(dir_name, dest_dir=this_dir)
        else:
            sram_cycles, util = \
                sram_is.sram_traffic(
                    dimension_rows = array_h,
                    dimension_cols = array_w,
                    ifmap_h = ifmap_h, ifmap_w = ifmap_w,
                    filt_h = filt_h, filt_w = filt_w,
                    num_channels = num_channels,
                    strides = strides, num_filt = num_filt,
                    ofmap_base = ofmap_base, filt_base = filt_base, ifmap_base = ifmap_base,
                    sram_read_trace_file = sram_read_trace_file,
                    sram_write_trace_file = sram_write_trace_file
                )

            # If the lookup_flag is set make a new data base entry
            #if lookup_flag:
            #    #this_dir = lut.to_string(subprocess.check_output(['pwd']))
            #    this_dir = os.getcwd()
            #    lut.create_sram_entry(sram_tag, this_dir, sram_cycles, util)

    tag_match = False

    if lookup_flag:
        dram_tag = tag + "_" + data_flow + "_" + str(
            ifmap_sram_size) + "_ifmap"
        tag_match, dir_name = lut.lookup(dram_tag)

    if tag_match:
        #this_dir = lut.to_string(subprocess.check_output(['pwd']))
        this_dir = os.getcwd()
        lut.get_dram_trace(dir_name, this_dir, cat='ifmap')
    else:
        #print("Generating DRAM traffic")
        dram.dram_trace_read_v2(
            sram_sz=ifmap_sram_size,
            word_sz_bytes=word_size_bytes,
            min_addr=ifmap_base,
            max_addr=filt_base,
            sram_trace_file=sram_read_trace_file,
            dram_trace_file=dram_ifmap_trace_file,
        )

        #if lookup_flag:
        #    #this_dir = lut.to_string(subprocess.check_output(['pwd']))
        #    this_dir = os.getcwd()
        #    lut.create_dram_entry(dram_tag, this_dir, cat='ifmap')

    tag_match = False

    if lookup_flag:
        dram_tag = tag + "_" + data_flow + "_" + str(
            filter_sram_size) + "_filter"
        #dram_tag += "_" + str(filter_sram_size) + "_filter"
        tag_match, dir_name = lut.lookup(dram_tag)

    if tag_match:
        #this_dir = lut.to_string(subprocess.check_output(['pwd']))
        this_dir = os.getcwd()
        lut.get_dram_trace(dir_name, this_dir, cat='filter')
    else:
        dram.dram_trace_read_v2(
            sram_sz=filter_sram_size,
            word_sz_bytes=word_size_bytes,
            min_addr=filt_base,
            max_addr=(filt_base * 10000),
            sram_trace_file=sram_read_trace_file,
            dram_trace_file=dram_filter_trace_file,
        )

        #if lookup_flag:
        #    #this_dir = lut.to_string(subprocess.check_output(['pwd']))
        #    this_dir = os.getcwd()
        #    lut.create_dram_entry(dram_tag, this_dir, cat='filter')

    tag_match = False

    if lookup_flag:
        dram_tag = tag + "_" + data_flow + "_" + str(
            ofmap_sram_size) + "_ofmap"
        #dram_tag += "_" + str(filter_sram_size) + "_filter"
        #dram_tag += "_" + str(ofmap_sram_size) + "_ofmap"
        tag_match, dir_name = lut.lookup(dram_tag)

    if tag_match:
        #this_dir = lut.to_string(subprocess.check_output(['pwd']))
        this_dir = os.getcwd()
        lut.get_dram_trace(dir_name, this_dir, cat='ofmap')
    else:
        dram.dram_trace_write(ofmap_sram_size=ofmap_sram_size,
                              data_width_bytes=word_size_bytes,
                              sram_write_trace_file=sram_write_trace_file,
                              dram_write_trace_file=dram_ofmap_trace_file)

        #if lookup_flag:
        #    #this_dir = lut.to_string(subprocess.check_output(['pwd']))
        #    this_dir = os.getcwd()
        #    lut.create_dram_entry(dram_tag, this_dir, cat='ofmap')

    tag_match = False
    if lookup_flag:
        dram_tag = tag + "_" + data_flow + "_" + str(
            ifmap_sram_size) + "_ifmap"
        dram_tag += "_" + str(filter_sram_size) + "_filter"
        dram_tag += "_" + str(ofmap_sram_size) + "_ofmap"
        tag_match, dir_name = lut.lookup(dram_tag)

    if tag_match:
        #this_dir = lut.to_string(subprocess.check_output(['pwd']))
        this_dir = os.getcwd()
        bw_numbers, detailed_log =\
                lut.get_log_entries(dir_name, this_dir)
    else:
        # Anand: Removing the following for SILENT mode
        #print("Average utilization : \t"  + str(util) + " %")
        #print("Cycles for compute  : \t"  + str(sram_cycles) + " cycles")
        bw_numbers, detailed_log = gen_bw_numbers(dram_ifmap_trace_file,
                                                  dram_filter_trace_file,
                                                  dram_ofmap_trace_file,
                                                  sram_write_trace_file,
                                                  sram_read_trace_file)
        #array_h, array_w)

        #if lookup_flag:
        #    #this_dir = lut.to_string(subprocess.check_output(['pwd']))
        #    this_dir = os.getcwd()
        #    lut.create_log_entry(dram_tag, bw_numbers, detailed_log)

    return bw_numbers, detailed_log, util, sram_cycles
Ejemplo n.º 4
0
def gen_all_traces(array_h_first=4,
                   array_w_first=4,
                   array_h_second=4,
                   array_w_second=4,
                   single_array=1,
                   ifmap_h=7,
                   ifmap_w=7,
                   filt_h=3,
                   filt_w=3,
                   num_channels=3,
                   strides=1,
                   num_filt=8,
                   data_flow='os',
                   word_size_bytes=1,
                   filter_sram_size_first=64,
                   ifmap_sram_size_first=64,
                   ofmap_sram_size_first=64,
                   filter_sram_size_second=64,
                   ifmap_sram_size_second=64,
                   ofmap_sram_size_second=64,
                   filt_base=1000000,
                   ifmap_base=0,
                   ofmap_base=2000000,
                   sram_read_trace_file_first="sram0_read.csv",
                   sram_read_trace_file_second="sram1_read.csv",
                   sram_write_trace_file_first="sram0_write.csv",
                   sram_write_trace_file_second="sram1_write.csv",
                   dram_filter_trace_file="dram_filter_read.csv",
                   dram_ifmap_trace_file="dram_ifmap_read.csv",
                   dram_ofmap_trace_file="dram_ofmap_write.csv"):

    sram_cycles = 0
    sram_cycles_first = 0
    sram_cycles_second = 0
    array_one_used = 0
    array_two_used = 0
    util_first = 0
    util_second = 0

    dram_filter_trace_file_first = "dram_sram0_filter_read.csv"
    dram_ifmap_trace_file_first = "dram_sram0_ifmap_read.csv"
    dram_ofmap_trace_file_first = "dram_sram0_ofmap_write.csv"

    dram_filter_trace_file_second = "dram_sram1_filter_read.csv"
    dram_ifmap_trace_file_second = "dram_sram1_ifmap_read.csv"
    dram_ofmap_trace_file_second = "dram_sram1_ofmap_write.csv"

    print("Generating traces and bw numbers")
    if data_flow == 'os':

        num_filt_first = 0
        num_filt_second = 0

        i = 1

        no_of_filt_px = filt_h * filt_w * num_channels

        max_parallel_window_first = 1  #Since OS can't have more than one filter in a column given er vertical fold
        max_parallel_window_second = 1

        avail_filt_per_fold = (array_w_first * max_parallel_window_first) + (
            array_w_second * max_parallel_window_second)

        while True:
            filt_processing = i * avail_filt_per_fold

            if num_filt <= filt_processing:
                filt_pend = num_filt - ((i - 1) * avail_filt_per_fold)

                if filt_pend <= (array_w_first *
                                 max_parallel_window_first) and filt_pend > (
                                     array_w_second *
                                     max_parallel_window_second
                                 ):  ##Accomodating the last fold in systolic 1
                    num_filt_first = num_filt_first + filt_pend

                elif filt_pend > (
                        array_w_first *
                        max_parallel_window_first) and filt_pend <= (
                            array_w_second * max_parallel_window_second
                        ):  ## Accomodating the last fold in systolic 2
                    num_filt_second = num_filt_second + filt_pend

                elif filt_pend == avail_filt_per_fold:
                    num_filt_first = num_filt_first + (
                        array_w_first * max_parallel_window_first)
                    num_filt_second = num_filt_second + (
                        array_w_second * max_parallel_window_second)

                elif filt_pend <= (
                        array_w_first *
                        max_parallel_window_first) and filt_pend <= (
                            array_w_second * max_parallel_window_second):
                    col_ratio_first = float(
                        filt_pend /
                        (array_w_first * max_parallel_window_first))
                    col_ratio_second = float(
                        filt_pend /
                        (array_w_second * max_parallel_window_second))

                    if (col_ratio_first >= col_ratio_second):
                        num_filt_first = num_filt_first + filt_pend

                    else:
                        num_filt_second = num_filt_second + filt_pend

                else:
                    col_ratio_first = float(
                        (filt_pend -
                         (array_w_second * max_parallel_window_second)) /
                        (array_w_first * max_parallel_window_first))
                    col_ratio_second = float(
                        (filt_pend -
                         (array_w_first * max_parallel_window_first)) /
                        (array_w_second * max_parallel_window_second))

                    if (col_ratio_first > col_ratio_second):
                        num_filt_second = num_filt_second + (
                            array_w_second * max_parallel_window_second)
                        num_filt_first = num_filt_first + (
                            filt_pend -
                            (array_w_second * max_parallel_window_second))

                    else:
                        num_filt_first = num_filt_first + (
                            array_w_first * max_parallel_window_first)
                        num_filt_second = num_filt_second + (
                            filt_pend -
                            (array_w_first * max_parallel_window_first))

                col_idx_base = num_filt_first  ##Starting from systolic 1 and taking the systolic 1 filter count as the beginning for the next systolic as base addresss

                break

            else:

                num_filt_first = num_filt_first + (array_w_first *
                                                   max_parallel_window_first)
                num_filt_second = num_filt_second + (
                    array_w_second * max_parallel_window_second)

                i = i + 1

        if single_array == 1:
            num_filt_first = num_filt
            num_filt_second = 0

        if num_filt_first > 0:
            array_one_used = 1

            sram_cycles_first, util_first = \
                sram.sram_traffic(
                    dimension_rows= array_h_first,
                    dimension_cols= array_w_first,
                    ifmap_h=ifmap_h, ifmap_w=ifmap_w,
                    filt_h=filt_h, filt_w=filt_w,
                    num_channels=num_channels,
                    strides=strides, num_filt=num_filt_first, total_num_filt = num_filt,
                    filt_base=filt_base, ifmap_base=ifmap_base, col_idx_base = 0,
                    ofmap_base = ofmap_base,
                    sram_read_trace_file=sram_read_trace_file_first,
                    sram_write_trace_file=sram_write_trace_file_first
                )
        else:
            sram_cycles_first = 0

        if num_filt_second > 0:
            array_two_used = 1

            sram_cycles_second, util_second = \
                sram.sram_traffic(
                    dimension_rows= array_h_second,
                    dimension_cols= array_w_second,
                    ifmap_h=ifmap_h, ifmap_w=ifmap_w,
                    filt_h=filt_h, filt_w=filt_w,
                    num_channels=num_channels,
                    strides=strides, num_filt=num_filt_second, total_num_filt = num_filt,
                    filt_base=filt_base, ifmap_base=ifmap_base, col_idx_base = col_idx_base,
                    ofmap_base = ofmap_base,
                    sram_read_trace_file=sram_read_trace_file_second,
                    sram_write_trace_file=sram_write_trace_file_second
                )
        else:
            sram_cycles_second = 0

    elif data_flow == 'ws':

        num_filt_first = 0
        num_filt_second = 0

        i = 1

        no_of_filt_px = filt_h * filt_w * num_channels

        if array_h_first < no_of_filt_px:
            max_parallel_window_first = 1
        else:
            max_parallel_window_first = math.floor(array_h_first /
                                                   no_of_filt_px)

        if array_h_second < no_of_filt_px:
            max_parallel_window_second = 1
        else:
            max_parallel_window_second = math.floor(array_h_second /
                                                    no_of_filt_px)

        avail_filt_per_fold = (array_w_first * max_parallel_window_first) + (
            array_w_second * max_parallel_window_second)

        while True:
            filt_processing = i * avail_filt_per_fold

            if num_filt <= filt_processing:
                filt_pend = num_filt - ((i - 1) * avail_filt_per_fold)

                if filt_pend <= (array_w_first *
                                 max_parallel_window_first) and filt_pend > (
                                     array_w_second *
                                     max_parallel_window_second
                                 ):  ##Accomodating the last fold in systolic 1
                    num_filt_first = num_filt_first + filt_pend

                elif filt_pend > (
                        array_w_first *
                        max_parallel_window_first) and filt_pend <= (
                            array_w_second * max_parallel_window_second
                        ):  ## Accomodating the last fold in systolic 2
                    num_filt_second = num_filt_second + filt_pend

                elif filt_pend == avail_filt_per_fold:
                    num_filt_first = num_filt_first + (
                        array_w_first * max_parallel_window_first)
                    num_filt_second = num_filt_second + (
                        array_w_second * max_parallel_window_second)

                elif filt_pend <= (
                        array_w_first *
                        max_parallel_window_first) and filt_pend <= (
                            array_w_second * max_parallel_window_second):
                    col_ratio_first = float(
                        filt_pend /
                        (array_w_first * max_parallel_window_first))
                    col_ratio_second = float(
                        filt_pend /
                        (array_w_second * max_parallel_window_second))

                    if (col_ratio_first >= col_ratio_second):
                        num_filt_first = num_filt_first + filt_pend

                    else:
                        num_filt_second = num_filt_second + filt_pend

                else:
                    col_ratio_first = float(
                        (filt_pend -
                         (array_w_second * max_parallel_window_second)) /
                        (array_w_first * max_parallel_window_first))
                    col_ratio_second = float(
                        (filt_pend -
                         (array_w_first * max_parallel_window_first)) /
                        (array_w_second * max_parallel_window_second))

                    if (col_ratio_first > col_ratio_second):
                        num_filt_second = num_filt_second + (
                            array_w_second * max_parallel_window_second)
                        num_filt_first = num_filt_first + (
                            filt_pend -
                            (array_w_second * max_parallel_window_second))

                    else:
                        num_filt_first = num_filt_first + (
                            array_w_first * max_parallel_window_first)
                        num_filt_second = num_filt_second + (
                            filt_pend -
                            (array_w_first * max_parallel_window_first))

                col_idx_base = num_filt_first  ##Starting from systolic 1 and taking the systolic 1 filter count as the beginning for the next systolic as base addresss

                break

            else:

                num_filt_first = num_filt_first + (array_w_first *
                                                   max_parallel_window_first)
                num_filt_second = num_filt_second + (
                    array_w_second * max_parallel_window_second)

                i = i + 1

        if single_array == 1:
            num_filt_first = num_filt
            num_filt_second = 0

        if num_filt_first > 0:
            array_one_used = 1

            sram_cycles_first, util_first = \
                sram_ws.sram_traffic(
                    dimension_rows = array_h_first,
                    dimension_cols = array_w_first,
                    ifmap_h = ifmap_h, ifmap_w = ifmap_w,
                    filt_h = filt_h, filt_w = filt_w,
                    num_channels = num_channels,
                    col_idx_base = 0, total_num_filt = num_filt,
                    strides = strides, num_filt = num_filt_first,
                    ofmap_base = ofmap_base, filt_base = filt_base, ifmap_base = ifmap_base,
                    sram_read_trace_file = sram_read_trace_file_first,
                    sram_write_trace_file = sram_write_trace_file_first
                )
        else:
            sram_cycles_first = 0

        if num_filt_second > 0:
            array_two_used = 1

            sram_cycles_second, util_second = \
               sram_ws.sram_traffic(
                   dimension_rows = array_h_second,
                   dimension_cols = array_w_second,
                   ifmap_h = ifmap_h, ifmap_w = ifmap_w,
                   filt_h = filt_h, filt_w = filt_w,
                   num_channels = num_channels,
                   col_idx_base = col_idx_base, total_num_filt = num_filt,
                   strides = strides, num_filt = num_filt_second,
                   ofmap_base = ofmap_base, filt_base = filt_base, ifmap_base = ifmap_base,
                   sram_read_trace_file = sram_read_trace_file_second,
                   sram_write_trace_file = sram_write_trace_file_second
               )
        else:
            sram_cycles_second = 0

    elif data_flow == 'is':
        ofmap_h = (ifmap_h - filt_h) / strides + 1
        ofmap_w = (ifmap_w - filt_w) / strides + 1

        num_ofmap = ofmap_h * ofmap_w

        num_ofmap_first = 0
        num_ofmap_second = 0

        i = 1

        no_of_filt_px = filt_h * filt_w * num_channels

        if array_h_first < no_of_filt_px:
            max_parallel_window_first = 1
        else:
            max_parallel_window_first = math.floor(array_h_first /
                                                   no_of_filt_px)

        if array_h_second < no_of_filt_px:
            max_parallel_window_second = 1
        else:
            max_parallel_window_second = math.floor(array_h_second /
                                                    no_of_filt_px)

        avail_ofmap_per_fold = (array_w_first * max_parallel_window_first) + (
            array_w_second * max_parallel_window_second)

        while True:
            ofmap_processing = i * avail_ofmap_per_fold

            if num_ofmap <= ofmap_processing:
                ofmap_pend = num_ofmap - ((i - 1) * avail_ofmap_per_fold)

                if ofmap_pend <= (
                        array_w_first *
                        max_parallel_window_first) and ofmap_pend > (
                            array_w_second * max_parallel_window_second
                        ):  ##Accomodating the last fold in systolic 1
                    num_ofmap_first = num_ofmap_first + ofmap_pend

                elif ofmap_pend > (
                        array_w_first *
                        max_parallel_window_first) and ofmap_pend <= (
                            array_w_second * max_parallel_window_second
                        ):  ## Accomodating the last fold in systolic 2
                    num_ofmap_second = num_ofmap_second + ofmap_pend

                elif ofmap_pend == avail_ofmap_per_fold:
                    num_ofmap_first = num_ofmap_first + (
                        array_w_first * max_parallel_window_first)
                    num_ofmap_second = num_ofmap_second + (
                        array_w_second * max_parallel_window_second)

                elif ofmap_pend <= (
                        array_w_first *
                        max_parallel_window_first) and ofmap_pend <= (
                            array_w_second * max_parallel_window_second):
                    col_ratio_first = float(
                        ofmap_pend /
                        (array_w_first * max_parallel_window_first))
                    col_ratio_second = float(
                        ofmap_pend /
                        (array_w_second * max_parallel_window_second))

                    if (col_ratio_first >= col_ratio_second):
                        num_ofmap_first = num_ofmap_first + ofmap_pend

                    else:
                        num_ofmap_second = num_ofmap_second + ofmap_pend

                else:
                    col_ratio_first = float(
                        (ofmap_pend -
                         (array_w_second * max_parallel_window_second)) /
                        (array_w_first * max_parallel_window_first))
                    col_ratio_second = float(
                        (ofmap_pend -
                         (array_w_first * max_parallel_window_first)) /
                        (array_w_second * max_parallel_window_second))

                    if (col_ratio_first > col_ratio_second):
                        num_ofmap_second = num_ofmap_second + (
                            array_w_second * max_parallel_window_second)
                        num_ofmap_first = num_ofmap_first + (
                            ofmap_pend -
                            (array_w_second * max_parallel_window_second))

                    else:
                        num_ofmap_first = num_ofmap_first + (
                            array_w_first * max_parallel_window_first)
                        num_ofmap_second = num_ofmap_second + (
                            ofmap_pend -
                            (array_w_first * max_parallel_window_first))

                col_idx_base = num_ofmap_first  ##Starting from systolic 1 and taking the systolic 1 filter count as the beginning for the next systolic as base addresss

                break

            else:

                num_ofmap_first = num_ofmap_first + (array_w_first *
                                                     max_parallel_window_first)
                num_ofmap_second = num_ofmap_second + (
                    array_w_second * max_parallel_window_second)

                i = i + 1

        if single_array == 1:
            num_ofmap_first = num_ofmap
            num_ofmap_second = 0

        if num_ofmap_first > 0:
            array_one_used = 1

            sram_cycles_first, util_first = \
                sram_is.sram_traffic(
                    dimension_rows = array_h_first,
                    dimension_cols = array_w_first,
                    ifmap_h = ifmap_h, ifmap_w = ifmap_w,
                    filt_h = filt_h, filt_w = filt_w,
                    num_channels = num_channels, num_ofmap = num_ofmap_first,
                    strides = strides, num_filt = num_filt,
                    col_idx_base = 0,
                    ofmap_base = ofmap_base, filt_base = filt_base, ifmap_base = ifmap_base,
                    sram_read_trace_file = sram_read_trace_file_first,
                    sram_write_trace_file = sram_write_trace_file_first
                )
        else:
            sram_cycles_first = 0

        if num_ofmap_second > 0:
            array_two_used = 1

            sram_cycles_second, util_second = \
                sram_is.sram_traffic(
                    dimension_rows = array_h_second,
                    dimension_cols = array_w_second,
                    ifmap_h = ifmap_h, ifmap_w = ifmap_w,
                    filt_h = filt_h, filt_w = filt_w,
                    num_channels = num_channels, num_ofmap = num_ofmap_second,
                    strides = strides, num_filt = num_filt,
                    col_idx_base = col_idx_base,
                    ofmap_base = ofmap_base, filt_base = filt_base, ifmap_base = ifmap_base,
                    sram_read_trace_file = sram_read_trace_file_second,
                    sram_write_trace_file = sram_write_trace_file_second
                )
        else:
            sram_cycles_second = 0

    #print("Generating DRAM traffic")
    if array_one_used == 1:
        if single_array == 1 or array_two_used == 0:
            dram.dram_trace_read_v2(sram_sz=ifmap_sram_size_first,
                                    word_sz_bytes=word_size_bytes,
                                    min_addr=ifmap_base,
                                    max_addr=filt_base,
                                    sram_trace_file=sram_read_trace_file_first,
                                    dram_trace_file=dram_ifmap_trace_file)

            dram.dram_trace_read_v2(sram_sz=filter_sram_size_first,
                                    word_sz_bytes=word_size_bytes,
                                    min_addr=filt_base,
                                    max_addr=ofmap_base,
                                    sram_trace_file=sram_read_trace_file_first,
                                    dram_trace_file=dram_filter_trace_file)

            dram.dram_trace_write(
                ofmap_sram_size=ofmap_sram_size_first,
                data_width_bytes=word_size_bytes,
                sram_write_trace_file=sram_write_trace_file_first,
                dram_write_trace_file=dram_ofmap_trace_file)
        else:
            dram.dram_trace_read_v2(
                sram_sz=ifmap_sram_size_first,
                word_sz_bytes=word_size_bytes,
                min_addr=ifmap_base,
                max_addr=filt_base,
                sram_trace_file=sram_read_trace_file_first,
                dram_trace_file=dram_ifmap_trace_file_first)

            dram.dram_trace_read_v2(
                sram_sz=filter_sram_size_first,
                word_sz_bytes=word_size_bytes,
                min_addr=filt_base,
                max_addr=ofmap_base,
                sram_trace_file=sram_read_trace_file_first,
                dram_trace_file=dram_filter_trace_file_first)

            dram.dram_trace_write(
                ofmap_sram_size=ofmap_sram_size_first,
                data_width_bytes=word_size_bytes,
                sram_write_trace_file=sram_write_trace_file_first,
                dram_write_trace_file=dram_ofmap_trace_file_first)

    if array_two_used == 1:
        if array_one_used == 0:
            dram.dram_trace_read_v2(
                sram_sz=ifmap_sram_size_second,
                word_sz_bytes=word_size_bytes,
                min_addr=ifmap_base,
                max_addr=filt_base,
                sram_trace_file=sram_read_trace_file_second,
                dram_trace_file=dram_ifmap_trace_file)

            dram.dram_trace_read_v2(
                sram_sz=filter_sram_size_second,
                word_sz_bytes=word_size_bytes,
                min_addr=filt_base,
                max_addr=ofmap_base,
                sram_trace_file=sram_read_trace_file_second,
                dram_trace_file=dram_filter_trace_file)

            dram.dram_trace_write(
                ofmap_sram_size=ofmap_sram_size_second,
                data_width_bytes=word_size_bytes,
                sram_write_trace_file=sram_write_trace_file_second,
                dram_write_trace_file=dram_ofmap_trace_file)
        else:
            dram.dram_trace_read_v2(
                sram_sz=ifmap_sram_size_second,
                word_sz_bytes=word_size_bytes,
                min_addr=ifmap_base,
                max_addr=filt_base,
                sram_trace_file=sram_read_trace_file_second,
                dram_trace_file=dram_ifmap_trace_file_second)

            dram.dram_trace_read_v2(
                sram_sz=filter_sram_size_second,
                word_sz_bytes=word_size_bytes,
                min_addr=filt_base,
                max_addr=ofmap_base,
                sram_trace_file=sram_read_trace_file_second,
                dram_trace_file=dram_filter_trace_file_second)

            dram.dram_trace_write(
                ofmap_sram_size=ofmap_sram_size_second,
                data_width_bytes=word_size_bytes,
                sram_write_trace_file=sram_write_trace_file_second,
                dram_write_trace_file=dram_ofmap_trace_file_second)

    # Selvaraj: Merge both DRAM traffic CSV's for BW calculations
    if array_one_used == 1 and array_two_used == 1:
        sram_controller(dram_ifmap_trace_file_first,
                        dram_ifmap_trace_file_second, dram_ifmap_trace_file)
        sram_controller(dram_filter_trace_file_first,
                        dram_filter_trace_file_second, dram_filter_trace_file)
        sram_controller(dram_ofmap_trace_file_first,
                        dram_ofmap_trace_file_second, dram_ofmap_trace_file)

    if array_one_used == 1 and array_two_used == 0:
        util = util_first
        power_metric = (int(sram_cycles_first) * int(array_h_first) *
                        int(array_w_first)) / 1000000
    elif array_one_used == 0 and array_two_used == 1:
        util = util_second
        power_metric = (int(sram_cycles_second) * int(array_h_second) *
                        int(array_w_second)) / 1000000
    else:
        util = (util_first +
                util_second) / 2  #Equally weighted from two systolic arrays
        power_metric = ((int(sram_cycles_first) * int(array_h_first) *
                         int(array_w_first)) +
                        (int(sram_cycles_second) * int(array_h_second) *
                         int(array_w_second))) / 1000000

    sram_cycles = max(int(sram_cycles_first), int(sram_cycles_second))

    print("Average utilization : \t" + str(util) + " %")
    print("Cycles for compute  : \t" + str(sram_cycles) + " cycles")
    print("Power consumed      : \t" + str(power_metric) + " Mega-units")

    if single_array == 1:  # SCALE-Sim used as a single compute array simulator
        bw_numbers, detailed_log = gen_bw_numbers(
            both_array_used=0,
            array_one_idle=0,
            array_two_idle=0,
            dram_ifmap_trace_file=dram_ifmap_trace_file,
            dram_filter_trace_file=
            dram_filter_trace_file,  #Selvaraj: Add support for two SRAM based BW generation after DRAM merge
            dram_ofmap_trace_file=dram_ofmap_trace_file,
            sram_write_trace_file_first=sram_write_trace_file_first,
            sram_read_trace_file_first=sram_read_trace_file_first)
        #array_h, array_w)

    elif (array_one_used == 1
          and array_two_used == 0):  ## Second array not powered on at all
        bw_numbers, detailed_log = gen_bw_numbers(
            both_array_used=1,
            array_one_idle=0,
            array_two_idle=1,
            dram_ifmap_trace_file=dram_ifmap_trace_file,
            dram_filter_trace_file=dram_filter_trace_file,
            dram_ofmap_trace_file=dram_ofmap_trace_file,
            sram_write_trace_file_first=sram_write_trace_file_first,
            sram_read_trace_file_first=sram_read_trace_file_first)

    elif (array_one_used == 0
          and array_two_used == 1):  ## First array not powered on at all
        bw_numbers, detailed_log = gen_bw_numbers(
            both_array_used=1,
            array_one_idle=1,
            array_two_idle=0,
            dram_ifmap_trace_file=dram_ifmap_trace_file,
            dram_filter_trace_file=dram_filter_trace_file,
            dram_ofmap_trace_file=dram_ofmap_trace_file,
            sram_write_trace_file_second=sram_write_trace_file_second,
            sram_read_trace_file_second=sram_read_trace_file_second)

    elif array_one_used == 1 and array_two_used == 1:
        bw_numbers, detailed_log = gen_bw_numbers(
            both_array_used=1,
            array_one_idle=0,
            array_two_idle=0,
            dram_ifmap_trace_file=dram_ifmap_trace_file,
            dram_filter_trace_file=dram_filter_trace_file,
            dram_ofmap_trace_file=dram_ofmap_trace_file,
            sram_write_trace_file_first=sram_write_trace_file_first,
            sram_read_trace_file_first=sram_read_trace_file_first,
            sram_write_trace_file_second=sram_write_trace_file_second,
            sram_read_trace_file_second=sram_read_trace_file_second)
        #array_h, array_w)

    return bw_numbers, detailed_log, util, str(
        sram_cycles), array_one_used, array_two_used, power_metric
Ejemplo n.º 5
0
def gen_all_traces(array_h=4,
                   array_w=4,
                   ifmap_h=7,
                   ifmap_w=7,
                   filt_h=3,
                   filt_w=3,
                   num_channels=3,
                   strides=1,
                   num_filt=8,
                   data_flow='os',
                   word_size_bytes=1,
                   filter_sram_size=64,
                   ifmap_sram_size=64,
                   ofmap_sram_size=64,
                   filt_base=1000000,
                   ifmap_base=0,
                   ofmap_base=2000000,
                   sram_read_trace_file="sram_read.csv",
                   sram_write_trace_file="sram_write.csv",
                   dram_filter_trace_file="dram_filter_read.csv",
                   dram_ifmap_trace_file="dram_ifmap_read.csv",
                   dram_ofmap_trace_file="dram_ofmap_write.csv"):

    sram_cycles = 0
    util = 0
    ifmap_dram_acc_times = 0
    ofmap_dram_acc_times = 0
    filt_dram_acc_times = 0

    print("Generating traces and bw numbers")
    if data_flow == 'os':
        sram_cycles, util = \
            sram.sram_traffic(
                dimension_rows= array_h,
                dimension_cols= array_w,
                ifmap_h=ifmap_h, ifmap_w=ifmap_w,
                filt_h=filt_h, filt_w=filt_w,
                num_channels=num_channels,
                strides=strides, num_filt=num_filt,
                filt_base=filt_base, ifmap_base=ifmap_base,
                ofmap_base = ofmap_base,
                sram_read_trace_file=sram_read_trace_file,
                sram_write_trace_file=sram_write_trace_file
            )
    elif data_flow == 'ws':
        sram_cycles, util = \
            sram_ws.sram_traffic(
                dimension_rows = array_h,
                dimension_cols = array_w,
                ifmap_h = ifmap_h, ifmap_w = ifmap_w,
                filt_h = filt_h, filt_w = filt_w,
                num_channels = num_channels,
                strides = strides, num_filt = num_filt,
                ofmap_base = ofmap_base, filt_base = filt_base, ifmap_base = ifmap_base,
                sram_read_trace_file = sram_read_trace_file,
                sram_write_trace_file = sram_write_trace_file
            )
    elif data_flow == 'is':
        sram_cycles, util = \
            sram_is.sram_traffic(
                dimension_rows = array_h,
                dimension_cols = array_w,
                ifmap_h = ifmap_h, ifmap_w = ifmap_w,
                filt_h = filt_h, filt_w = filt_w,
                num_channels = num_channels,
                strides = strides, num_filt = num_filt,
                ofmap_base = ofmap_base, filt_base = filt_base, ifmap_base = ifmap_base,
                sram_read_trace_file = sram_read_trace_file,
                sram_write_trace_file = sram_write_trace_file
            )

    #print("Generating DRAM traffic")
    # 利用Sram的时间反推DRAMD执行时间
    # 这里还是一个一个调用的,也就是说这三者可以同时发生,但是实际上我们的带宽并没有这么多,
    # 而且应该如何cover计算和访存之间的延时
    ifmap_dram_acc_times = dram.dram_trace_read_v2(
        sram_sz=ifmap_sram_size,
        word_sz_bytes=word_size_bytes,
        min_addr=ifmap_base,
        max_addr=filt_base,
        sram_trace_file=sram_read_trace_file,
        dram_trace_file=dram_ifmap_trace_file,
    )

    filt_dram_acc_times = dram.dram_trace_read_v2(
        sram_sz=filter_sram_size,
        word_sz_bytes=word_size_bytes,
        min_addr=filt_base,
        max_addr=ofmap_base,
        sram_trace_file=sram_read_trace_file,
        dram_trace_file=dram_filter_trace_file,
    )

    ofmap_dram_acc_times = dram.dram_trace_write(
        ofmap_sram_size=ofmap_sram_size,
        data_width_bytes=word_size_bytes,
        sram_write_trace_file=sram_write_trace_file,
        dram_write_trace_file=dram_ofmap_trace_file)

    print("Average utilization : \t" + str(util) + " %")
    print("Cycles for compute  : \t" + str(sram_cycles) + " cycles")
    bw_numbers, detailed_log = gen_bw_numbers(dram_ifmap_trace_file,
                                              dram_filter_trace_file,
                                              dram_ofmap_trace_file,
                                              sram_write_trace_file,
                                              sram_read_trace_file)
    #array_h, array_w)
    detailed_log += str(ifmap_dram_acc_times) + ",\t" + str(
        filt_dram_acc_times) + ",\t" + str(ofmap_dram_acc_times) + ","

    print((ifmap_dram_acc_times, filt_dram_acc_times, ofmap_dram_acc_times))
    return bw_numbers, detailed_log, util, sram_cycles