Python n_i_to_cycle Examples

Programming Language: Python

Namespace/Package Name: chunk

Method/Function: n_i_to_cycle

Examples at hotexamples.com: 6

Python n_i_to_cycle - 6 examples found. These are the top rated real world Python examples of chunk.n_i_to_cycle extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

File: dup_analysis.py Project: tayler-hetherington/dnn-sim

def process_weights(weights, weight_idx, lookaside, lookahead, out_limit, in_limit):
    chunk_n, chunk_i = weight_idx
    #print "chunk:", chunk_n, chunk_i
    zero_rows = 0;


    # recalculate global index

    (R,Tn,Ti) = weights.shape
    ind = np.indices((R,Tn,Ti)).swapaxes(0,3).swapaxes(0,2).swapaxes(0,1)
    dup_map = map_duplicates(weights, True)

    out_per_row = [0] * (Ti+1)
    in_per_row = [0] * (Tn+1)
    out_res_per_row = [0] * (Ti+1)
    in_res_per_row = [0] * (Tn+1)

    zero_rm = 0 # number of zeros removed
    dup_rm = 0 # number of dups removed
    dup_bubble = 0 # ignore
    dup_bubble_pop = 0 # ignore

    global glob_dups
    global removed_dups
    global forwarded_dups
    global buffer
    global glob_max_buffer_size 
    global next_c_dict
    global n_sets
    global Tii
    global Tnn

    # iterate in chunk order and save duplicate values
    for r in range(R):
        for n in range(Tn):
            for i in range(Ti):
                w = map_weight(weights[r,n,i])

                #if (i/Tii == 0 and n/Tn == 0):
                #    # start of new partial sum calculation
                #    for tw in range(n_ways):
                #        for ts in range(n_sets):
                #            for key in buffer[ts][tw].keys():
                #                for tn in buffer[ts][tw][key]:
                #                    if tn/Tn == n/Tn:
                #                        buffer[ts][tw][key].remove(tn)
                #                if len(buffer[ts][tw][key]) == 0:
                #                    # get rid of this entry in the buffer
                #                    #print "deleting", w, gi
                #                    del buffer[ts][tw][key]
                #                    del next_c_dict[ts][tw][key]
                #                else:
                #                    next_c_dict[ts][tw][key] = calc_buffer_next_reuse(buffer[ts][tw], key)
                #            

                if (w == 0):
                    continue

                # which set does this 
                set = i % n_sets
                way = n % n_ways
                assert len(buffer[set][way].keys()) == len(next_c_dict[set][way].keys())

                (gn,gi) = get_global_weight_idx(chunk_n, chunk_i, r, n, i)
                # is this a duplicate?
                if ( (w,gi) in glob_dups and len(glob_dups[(w,gi)]) > 1):
                    #if gi == 0:
                    #    print "dup: ", gn, gi, w
                    # is the product already in the buffer
                    found_way = -1
                    for tw in range(n_ways):
                        if (w,gi) in buffer[set][tw]:
                            found_way = tw

                    if found_way >= 0:
                        if (gn not in buffer[set][found_way][(w,gi)]):
                            continue # this product was forwarded by a previous operation
                        
                        if gn != buffer[set][found_way][(w,gi)][0]:
                            print "gn = %d but list[0] = %d" % (gn , buffer[set][found_way][(w,gi)][0])

                        # remove current key
                        buffer[set][found_way][(w,gi)].remove(gn)
                        removed_dups += 1
                        # print "removed",w,gn,gi
                        # have all the duplicates been forwarded?
                        if len(buffer[set][found_way][(w,gi)]) == 0:
                            # get rid of this entry in the buffer
                            #print "deleting", w, gi
                            del buffer[set][found_way][(w,gi)]
                            del next_c_dict[set][found_way][(w,gi)]
                        else:
                            next_c_dict[set][found_way][(w,gi)] = calc_buffer_next_reuse(buffer[set][found_way], (w,gi))
                    else:
                        # product is not stored in the buffer

                        # will this product be reused?
                        nidx = glob_dups[(w,gi)].index(gn) 
                        if ( nidx == len(glob_dups[(w,gi)])-1 ):
                            # last duplicate in list, don't save
                            continue

                        # get the remaining duplicates
                        dups = list(glob_dups[(w,gi)][nidx+1:])
                        # can the duplicates be forwarded this cycle?
                        dups_copy = list(dups)
                        for d in dups_copy:
                            # duplicates issued this cycle:
                            if gn/Tn == d/Tn:
                                forwarded_dups += 1
                                removed_dups += 1
                                # remove from global dups list 
                                glob_dups[(w,gi)].remove(d)
                                dups.remove(d)
                                #print 'forward', w, gi, d 
                                weights[r, d % Tn ,i] = 0
                                #print 'forward', w, gi, gn, '->', d

                        if ( len(dups) == 0 ):
                            # all duplicates forwarded
                            continue

                        #continue # no buffering
                        # if there are still duplicates in the future
                        # add to buffer
                        global buffer_size

                        keys = buffer[set][way].keys()
                        set_size = buffer_size/n_sets/n_ways;
                        if (len(keys) >= set_size):
                            # buffer is full
                            #continue # dont evict ever
                            
                            # find an eviction candidate
                            # policy: longest next reuse
                            victim_c = -1
                            victim_key = []

                            for key in keys:
                                (kn,ki) = (buffer[set][way][key][0],key[1])
                                next_c = next_c_dict[set][way][key]
                                if next_c > victim_c:
                                    victim_c = next_c
                                    victim_key = key
#                            print "n =",gn, "evicting", buffer[victim_key]

                            # if victim has longer reuse than the current dup, replace it
                            replacement_c = chunk.n_i_to_cycle(dups[0], gi, Nn, Ni,Tnn,Tii,Tn,Ti)
                            if (victim_c > replacement_c):
                                #print "deleting", victim_key[0], victim_key[1]
                                del buffer[set][way][victim_key]
                                del next_c_dict[set][way][my_hash(victim_key)]
                            else:
                                continue #don't add replacement to the list

                        # add buffer entry
                        #print "adding", w, gi
                        #dups.pop(0)
                        buffer[set][way][(w,gi)] = dups
                        #if gi == 0:
                        #    print "adding dups to buffer", dups 
                        next_c_dict[set][way][my_hash((w,gi))] = calc_buffer_next_reuse(buffer[set][way], (w,gi))
                        glob_max_buffer_size = max(glob_max_buffer_size, len(buffer[set][way].keys()))
                                
                        
                
    return

#################################################################################

    for r in range(0,R-1):
    #    print "C:", weights[r,n,:]
    #    print "N:", weights[r+1,n,:]
        rmax = min(r + lookahead , R-1 )

        # print r, "##############################"
        # for tr in range(r, rmax + 1):
            # print_row(weights,tr)

        # check for all zeros
        if (is_zero( weights[r,:,:] ) ):
            # print r # print all lines that are all zeroes
            zero_rows += 1
            continue

        # counter for the limits
        in_ctr = [in_limit] * Tn # input limit per filter (m), max inputs to adder tree
        out_ctr = [out_limit] * Ti # number of products that can be broadcast for an input i
        ictr = 0 # number of products reused
        octr = 0 # number of products broadcast
        ires = 0 # ignore
        ores = 0 # ignore
        changed = True
        dup_found = set()

        # fill bubbles
        while changed:
            changed = False
            dup_found_iter = []

            # look for duplicates
            for n in range(0,Tn):
                for i in range(0,Ti):
                    # look for duplicates only if we haven't looked at it before
                    key = (ind[r,n,i][0], ind[r,n,i][2], weights[r,n,i])
                    if (key not in dup_found and not is_zero(weights[r,n,i])):

                        # dup_index is list of duplicates for (r,n,i)
                        dup_index = look_for_duplicates(r, n, i, weights, ind, dup_map, lookahead)
                        dup_found.add(key)
                        if (dup_index):
                            dup_index.append((r,n,i))
                            dup_found_iter.append(dup_index)
                            #print "A ", dup_index, "W ", weights[r,n,i]

            # prioritize removal here

            # reorder the duplicate removal order
            # do the ones with more duplicates first
            dup_found_iter.sort(key=len, reverse=True)

            # pick the filter with the least number of duplicates to
            # send first, this should reduce input dependences
            n_ctr = {}
            for dup_list in dup_found_iter:
                for element in dup_list:
                    n_ctr[element[1]] = n_ctr.get(element[1],0) + 1
            
            for tmp in dup_found_iter:
                tmp.sort(key=lambda fn: n_ctr.get(fn[1], Tn*Ti+1))

            # remove duplicates in list order
            #   checking for constraints
            for dup_list in dup_found_iter:
                # for each set of duplicates
                # first dup that can be issued (in the current row) will be issued, rest will be removed

                for index in dup_list:

                    (rr,nn,ii) = index 

                    # only output if the index is on the current row
                    if (r != rr):
                       continue   
 
                    # remove all other duplicates if possible
                    (weights, ind, out_ctr, in_ctr, stat) = remove_duplicates(rr, nn, ii,
                          weights, ind, dup_list, out_ctr, in_ctr, lookaside, lookahead, nn)
                    dup_rm += stat[0]
                    dup_bubble += stat[1]
                    dup_bubble_pop += stat[2]
                    ictr += stat[3]
                    octr += stat[4]

                    # exit when a remove succeeded
                    if (stat[0]):
                       break;


            # remove all the bubbles in the row
            for n in range(0,Tn):
                for i in range(0,Ti):
    
                    # fill in the bubble
                    if (is_zero( weights[r,n,i] )):
                        # found a zero to fill, look for replacement
                        (weights, ind, tmp) = re.look_for_replacement(
                                    r, n, i, weights, ind, lookaside, lookahead)
                        zero_rm += tmp
                        changed = changed or tmp
   
   
        # end of change loop

        #out_per_row[octr/max(1,out_limit)] += 1
        #in_per_row[ictr/max(1,in_limit)] += 1
        #out_res_per_row[ores/max(1,out_limit)] += 1
        #in_res_per_row[ires/max(1,in_limit)/Tn] += 1

        # print "--------------------------------"
        # for tr in range(r, rmax + 1):
            # print_row(weights,tr)

    # print_filter(weights,n)
    #print_weights(weights)

    # check if the last row is zero
    if (is_zero( weights[R-1,:,:] ) ):
        zero_rows += 1

    #print "row reduction = ", R-zero_rows , "/", R
    #print "Output Counter: ", out_per_row
    #print "Input Counter: ", in_per_row
    #print "Output Res: ", out_res_per_row
    #print "Input Res: ", in_res_per_row
    #print "Bubble/Dup/B+D/B+D+P: ", (zero_rm, dup_rm, dup_bubble, dup_bubble_pop)

    global total_reduced_rows 
    total_reduced_rows += R - zero_rows
    global total_rows 
    total_rows += R

Example #2

Show file

File: dup_analysis.py Project: tayler-hetherington/dnn-sim

def calc_buffer_next_reuse(buffer, key):
    (kn,ki) = (buffer[key][0],key[1])
    return chunk.n_i_to_cycle(kn,ki,Nn,Ni,Tnn,Tii,Tn,Ti)

Example #3

Show file

File: dup_analysis.py Project: tayler-hetherington/dnn-sim

#            add += 1
#            #print "add",weight,n,i
#
#        # should I append time instead?
#        glob_dups[(weight,i)].append(n)
#

# sort n_list by to reuse time
diff_list = []
for k in glob_dups.keys():
    (kw,ki) = k
    n_list = glob_dups[k]
    if (len(n_list) == 1):
        del glob_dups[k]
        continue
    reuse_cycle = [chunk.n_i_to_cycle(n,ki,Nn,Ni,Tnn,Tii,Tn,Ti) for n in n_list]
    for rc in range(0,len(reuse_cycle)-1):
        if reuse_cycle[rc] != reuse_cycle[rc+1]:
            diff = reuse_cycle[rc+1] - reuse_cycle[rc]
            diff_list.append(diff)
#    print ki, n_list
#    print reuse_cycle
#    glob_dups[k] = [n for (c,n) in sorted(zip(reuse_cycle,n_list), key=lambda pair: pair[0])]


#print "mean buffer time", sum(diff_list)/float(len(diff_list))
for key in glob_dups:
#    print key, len(glob_dups[key]), glob_dups[key]
    total_dups += len(glob_dups[key])-1
#        print key, glob_dups[key]
#print "break into chunks"

Example #4

Show file

File: new-burd.py Project: iniverno/MIsim

def process_weights(weights, weight_idx, lookaside, lookahead, out_limit, in_limit):
    chunk_n, chunk_i = weight_idx
    #print "chunk:", chunk_n, chunk_i
    zero_rows = 0;

    # recalculate global index
    (R,Tn,Ti) = weights.shape
    global total_rows 
    total_rows += R
    ind = np.indices((R,Tn,Ti)).swapaxes(0,3).swapaxes(0,2).swapaxes(0,1)

    
    dup_map = map_duplicates(weights)

    out_per_row =       [0] * (Ti+1)
    in_per_row =        [0] * (Tn+1)
    out_res_per_row =   [0] * (Ti+1)
    in_res_per_row =    [0] * (Tn+1)

    zero_rm = 0 # number of zeros removed
    dup_rm = 0 # number of dups removed
    dup_bubble = 0 # ignore
    dup_bubble_pop = 0 # ignore

    global glob_dups
    global removed_dups
    global forwarded_dups
    global buffer
    global glob_max_buffer_size 
    global next_c_dict

    # iterate in chunk order and save duplicate values
    for r in range(R):
        for n in range(Tn):
            for i in range(Ti):
                w = map_weight(weights[r,n,i])
                
                # forward buffered products at the beginning of a new output tile
                #if (i/Tii == 0 and n/Tn == 0):
                #    # start of new partial sum calculation
                #    for tw in range(n_ways):
                #        for ts in range(n_sets):
                #            for key in buffer[ts][tw].keys():
                #                for tn in buffer[ts][tw][key]:
                #                    if tn/Tn == n/Tn:
                #                        buffer[ts][tw][key].remove(tn)
                #                if len(buffer[ts][tw][key]) == 0:
                #                    # get rid of this entry in the buffer
                #                    #print "deleting", w, gi
                #                    del buffer[ts][tw][key]
                #                    del next_c_dict[ts][tw][key]
                #                else:
                #                    next_c_dict[ts][tw][key] = calc_buffer_next_reuse(buffer[ts][tw], key)
                #            

                if (w == 0):
                    continue

                # which set does this 
                set = i % n_sets
                way = n % n_ways

                assert len(buffer[set][way].keys()) == len(next_c_dict[set][way].keys())

                (gn,gi) = get_global_weight_idx(chunk_n, chunk_i, r, n, i)

                # is this a duplicate?
                if ( (w,gi) in glob_dups and len(glob_dups[(w,gi)]) > 1):
                    #if gi == 0:
                    #    print "dup: ", gn, gi, w
                    # is the product already in the buffer
                    found_way = -1
                    for tw in range(n_ways):
                        if (w,gi) in buffer[set][tw]:
                            found_way = tw

                    if found_way >= 0:
                        if (gn not in buffer[set][found_way][(w,gi)]):
                            continue # this product was forwarded by a previous operation
                        
                        if gn != buffer[set][found_way][(w,gi)][0]:
                            print "gn = %d but list[0] = %d" % (gn , buffer[set][found_way][(w,gi)][0])

                        # remove current key
                        buffer[set][found_way][(w,gi)].remove(gn)
                        removed_dups += 1
                        # print "removed",w,gn,gi
                        # have all the duplicates been forwarded?
                        if len(buffer[set][found_way][(w,gi)]) == 0:
                            # get rid of this entry in the buffer
                            #print "deleting", w, gi
                            del buffer[set][found_way][(w,gi)]
                            del next_c_dict[set][found_way][(w,gi)]
                        else:
                            next_c_dict[set][found_way][(w,gi)] = calc_buffer_next_reuse(buffer[set][found_way], (w,gi))
                    else:
                        # product is not stored in the buffer

                        # will this product be reused?
                        nidx = glob_dups[(w,gi)].index(gn) 
                        if ( nidx == len(glob_dups[(w,gi)])-1 ):
                            # last duplicate in list, don't save
                            continue

                        # get the remaining duplicates
                        dups = list(glob_dups[(w,gi)][nidx+1:])
                        # can the duplicates be forwarded this cycle?
                        dups_copy = list(dups)
                        dups_this_row = 0
                        for d in dups_copy:
                            # duplicates issued this cycle:
                            if gn/Tn == d/Tn:
                                forwarded_dups += 1
                                removed_dups += 1
                                dups_this_row += 1
                                # remove from global dups list 
                                glob_dups[(w,gi)].remove(d)
                                dups.remove(d)
                                #print 'forward', w, gi, d 
                                weights[r, d % Tn ,i] = 0
                                #print 'forward', w, gi, gn, '->', d

                        global total_dups_per_row
                        total_dups_per_row += dups_this_row
                        if ( len(dups) == 0 ):
                            # all duplicates forwarded
                            continue

                        #continue # no buffering
                        # if there are still duplicates in the future
                        # add to buffer
                        global buffer_size

                        keys = buffer[set][way].keys()
                        set_size = buffer_size/n_sets/n_ways;
                        if (len(keys) >= set_size):
                            # buffer is full
                            #continue # dont evict ever
                            
                            # find an eviction candidate
                            # policy: longest next reuse
                            victim_c = -1
                            victim_key = []

                            for key in keys:
                                (kn,ki) = (buffer[set][way][key][0],key[1])
                                next_c = next_c_dict[set][way][key]
                                if next_c > victim_c:
                                    victim_c = next_c
                                    victim_key = key
#                            print "n =",gn, "evicting", buffer[victim_key]

                            # if victim has longer reuse than the current dup, replace it
                            replacement_c = chunk.n_i_to_cycle(dups[0], gi, Nn, Ni,Tnn,Tii,Tn,Ti)
                            if (victim_c > replacement_c):
                                #print "deleting", victim_key[0], victim_key[1]
                                del buffer[set][way][victim_key]
                                del next_c_dict[set][way][my_hash(victim_key)]
                            else:
                                continue #don't add replacement to the list

                        # add buffer entry
                        #print "adding", w, gi
                        #dups.pop(0)
                        buffer[set][way][(w,gi)] = dups
                        #if gi == 0:
                        #    print "adding dups to buffer", dups 
                        next_c_dict[set][way][my_hash((w,gi))] = calc_buffer_next_reuse(buffer[set][way], (w,gi))
                        glob_max_buffer_size = max(glob_max_buffer_size, len(buffer[set][way].keys()))
    return

Example #5

Show file

def buffer_insert(w,gi,gn,n):
    global glob_dups
    global removed_dups
    global forwarded_dups
    global buffer
    global glob_max_buffer_size 
    global reuse_cycle
    global total_dups_per_row
    global buffer_size

    set = gi % n_sets
    way = n % n_ways

    # will this product be reused?
    if (not (w,gi) in glob_dups):
        print "buffer_insert(%f,%d) not in glob_dups" % (w,gi)
        sys.exit()
    try:
        nidx = glob_dups[(w,gi)].index(gn) 
    except ValueError:
        return False
    if ( nidx == len(glob_dups[(w,gi)])-1 ):
        # last duplicate in list, don't save
        return False

    # get the remaining duplicates
    dups = list(glob_dups[(w,gi)][nidx+1:])
    # can the duplicates be forwarded this cycle?
    dups_this_row = 0
    for d in dups[:]:
        # duplicates issued this chunk:
        if gn/Tn == d/Tn:
            dups.remove(d)

    if ( len(dups) == 0 ):
        # all duplicates forwarded
        return False

    # if there are still duplicates in the future
    # add to buffer

    keys = buffer[set][way].keys()
    set_size = buffer_size/n_sets/n_ways;
    if (len(keys) >= set_size):
        # buffer is full
        #continue # dont evict ever
        
        # find an eviction candidate
        # policy: longest next reuse
        victim_c = -1
        victim_key = []

        for key in keys:
            (kn,ki) = (buffer[set][way][key][0],key[1])
            next_c = reuse_cycle[set][way][key]
            if next_c > victim_c:
                victim_c = next_c
                victim_key = key

        # if victim has longer reuse than the current dup, replace it
        replacement_c = chunk.n_i_to_cycle(dups[0], gi, Nn, Ni,Tnn,Tii,Tn,Ti)
        if (victim_c > replacement_c):
            #print "deleting", victim_key[0], victim_key[1]
            del buffer[set][way][victim_key]
            del reuse_cycle[set][way][victim_key]
        else:
            return False #don't add replacement to the list

    # update buffer
    buffer[set][way][(w,gi)] = dups
    reuse_cycle[set][way][(w,gi)] = calc_buffer_next_reuse(buffer[set][way], (w,gi))

    glob_max_buffer_size = max(glob_max_buffer_size, len(buffer[set][way].keys()))
    return True

Example #6

Show file

def process_weights(weights, weight_idx, lookaside, lookahead, out_limit,
                    in_limit):
    chunk_n, chunk_i = weight_idx
    #print "chunk:", chunk_n, chunk_i
    zero_rows = 0

    # recalculate global index
    (R, Tn, Ti) = weights.shape
    global total_rows
    total_rows += R
    ind = np.indices((R, Tn, Ti)).swapaxes(0, 3).swapaxes(0, 2).swapaxes(0, 1)

    dup_map = map_duplicates(weights)

    out_per_row = [0] * (Ti + 1)
    in_per_row = [0] * (Tn + 1)
    out_res_per_row = [0] * (Ti + 1)
    in_res_per_row = [0] * (Tn + 1)

    zero_rm = 0  # number of zeros removed
    dup_rm = 0  # number of dups removed
    dup_bubble = 0  # ignore
    dup_bubble_pop = 0  # ignore

    global glob_dups
    global removed_dups
    global forwarded_dups
    global buffer
    global glob_max_buffer_size
    global next_c_dict

    # iterate in chunk order and save duplicate values
    for r in range(R):
        for n in range(Tn):
            for i in range(Ti):
                w = map_weight(weights[r, n, i])

                # forward buffered products at the beginning of a new output tile
                #if (i/Tii == 0 and n/Tn == 0):
                #    # start of new partial sum calculation
                #    for tw in range(n_ways):
                #        for ts in range(n_sets):
                #            for key in buffer[ts][tw].keys():
                #                for tn in buffer[ts][tw][key]:
                #                    if tn/Tn == n/Tn:
                #                        buffer[ts][tw][key].remove(tn)
                #                if len(buffer[ts][tw][key]) == 0:
                #                    # get rid of this entry in the buffer
                #                    #print "deleting", w, gi
                #                    del buffer[ts][tw][key]
                #                    del next_c_dict[ts][tw][key]
                #                else:
                #                    next_c_dict[ts][tw][key] = calc_buffer_next_reuse(buffer[ts][tw], key)
                #

                if (w == 0):
                    continue

                # which set does this
                set = i % n_sets
                way = n % n_ways

                assert len(buffer[set][way].keys()) == len(
                    next_c_dict[set][way].keys())

                (gn, gi) = get_global_weight_idx(chunk_n, chunk_i, r, n, i)

                # is this a duplicate?
                if ((w, gi) in glob_dups and len(glob_dups[(w, gi)]) > 1):
                    #if gi == 0:
                    #    print "dup: ", gn, gi, w
                    # is the product already in the buffer
                    found_way = -1
                    for tw in range(n_ways):
                        if (w, gi) in buffer[set][tw]:
                            found_way = tw

                    if found_way >= 0:
                        if (gn not in buffer[set][found_way][(w, gi)]):
                            continue  # this product was forwarded by a previous operation

                        if gn != buffer[set][found_way][(w, gi)][0]:
                            print "gn = %d but list[0] = %d" % (
                                gn, buffer[set][found_way][(w, gi)][0])

                        # remove current key
                        buffer[set][found_way][(w, gi)].remove(gn)
                        removed_dups += 1
                        # print "removed",w,gn,gi
                        # have all the duplicates been forwarded?
                        if len(buffer[set][found_way][(w, gi)]) == 0:
                            # get rid of this entry in the buffer
                            #print "deleting", w, gi
                            del buffer[set][found_way][(w, gi)]
                            del next_c_dict[set][found_way][(w, gi)]
                        else:
                            next_c_dict[set][found_way][(
                                w, gi)] = calc_buffer_next_reuse(
                                    buffer[set][found_way], (w, gi))
                    else:
                        # product is not stored in the buffer

                        # will this product be reused?
                        nidx = glob_dups[(w, gi)].index(gn)
                        if (nidx == len(glob_dups[(w, gi)]) - 1):
                            # last duplicate in list, don't save
                            continue

                        # get the remaining duplicates
                        dups = list(glob_dups[(w, gi)][nidx + 1:])
                        # can the duplicates be forwarded this cycle?
                        dups_copy = list(dups)
                        dups_this_row = 0
                        for d in dups_copy:
                            # duplicates issued this cycle:
                            if gn / Tn == d / Tn:
                                forwarded_dups += 1
                                removed_dups += 1
                                dups_this_row += 1
                                # remove from global dups list
                                glob_dups[(w, gi)].remove(d)
                                dups.remove(d)
                                #print 'forward', w, gi, d
                                weights[r, d % Tn, i] = 0
                                #print 'forward', w, gi, gn, '->', d

                        global total_dups_per_row
                        total_dups_per_row += dups_this_row
                        if (len(dups) == 0):
                            # all duplicates forwarded
                            continue

                        #continue # no buffering
                        # if there are still duplicates in the future
                        # add to buffer
                        global buffer_size

                        keys = buffer[set][way].keys()
                        set_size = buffer_size / n_sets / n_ways
                        if (len(keys) >= set_size):
                            # buffer is full
                            #continue # dont evict ever

                            # find an eviction candidate
                            # policy: longest next reuse
                            victim_c = -1
                            victim_key = []

                            for key in keys:
                                (kn, ki) = (buffer[set][way][key][0], key[1])
                                next_c = next_c_dict[set][way][key]
                                if next_c > victim_c:
                                    victim_c = next_c
                                    victim_key = key
#                            print "n =",gn, "evicting", buffer[victim_key]

# if victim has longer reuse than the current dup, replace it
                            replacement_c = chunk.n_i_to_cycle(
                                dups[0], gi, Nn, Ni, Tnn, Tii, Tn, Ti)
                            if (victim_c > replacement_c):
                                #print "deleting", victim_key[0], victim_key[1]
                                del buffer[set][way][victim_key]
                                del next_c_dict[set][way][my_hash(victim_key)]
                            else:
                                continue  #don't add replacement to the list

                        # add buffer entry
                        #print "adding", w, gi
                        #dups.pop(0)
                        buffer[set][way][(w, gi)] = dups
                        #if gi == 0:
                        #    print "adding dups to buffer", dups
                        next_c_dict[set][way][my_hash(
                            (w, gi))] = calc_buffer_next_reuse(
                                buffer[set][way], (w, gi))
                        glob_max_buffer_size = max(
                            glob_max_buffer_size, len(buffer[set][way].keys()))
    return