コード例 #1
0
def process_weights(weights, weight_idx, lookaside, lookahead, out_limit, in_limit):
    chunk_n, chunk_i = weight_idx
    #print "chunk:", chunk_n, chunk_i
    zero_rows = 0;


    # recalculate global index

    (R,Tn,Ti) = weights.shape
    ind = np.indices((R,Tn,Ti)).swapaxes(0,3).swapaxes(0,2).swapaxes(0,1)
    dup_map = map_duplicates(weights, True)

    out_per_row = [0] * (Ti+1)
    in_per_row = [0] * (Tn+1)
    out_res_per_row = [0] * (Ti+1)
    in_res_per_row = [0] * (Tn+1)

    zero_rm = 0 # number of zeros removed
    dup_rm = 0 # number of dups removed
    dup_bubble = 0 # ignore
    dup_bubble_pop = 0 # ignore

    global glob_dups
    global removed_dups
    global forwarded_dups
    global buffer
    global glob_max_buffer_size 
    global next_c_dict
    global n_sets
    global Tii
    global Tnn

    # iterate in chunk order and save duplicate values
    for r in range(R):
        for n in range(Tn):
            for i in range(Ti):
                w = map_weight(weights[r,n,i])

                #if (i/Tii == 0 and n/Tn == 0):
                #    # start of new partial sum calculation
                #    for tw in range(n_ways):
                #        for ts in range(n_sets):
                #            for key in buffer[ts][tw].keys():
                #                for tn in buffer[ts][tw][key]:
                #                    if tn/Tn == n/Tn:
                #                        buffer[ts][tw][key].remove(tn)
                #                if len(buffer[ts][tw][key]) == 0:
                #                    # get rid of this entry in the buffer
                #                    #print "deleting", w, gi
                #                    del buffer[ts][tw][key]
                #                    del next_c_dict[ts][tw][key]
                #                else:
                #                    next_c_dict[ts][tw][key] = calc_buffer_next_reuse(buffer[ts][tw], key)
                #            

                if (w == 0):
                    continue

                # which set does this 
                set = i % n_sets
                way = n % n_ways
                assert len(buffer[set][way].keys()) == len(next_c_dict[set][way].keys())

                (gn,gi) = get_global_weight_idx(chunk_n, chunk_i, r, n, i)
                # is this a duplicate?
                if ( (w,gi) in glob_dups and len(glob_dups[(w,gi)]) > 1):
                    #if gi == 0:
                    #    print "dup: ", gn, gi, w
                    # is the product already in the buffer
                    found_way = -1
                    for tw in range(n_ways):
                        if (w,gi) in buffer[set][tw]:
                            found_way = tw

                    if found_way >= 0:
                        if (gn not in buffer[set][found_way][(w,gi)]):
                            continue # this product was forwarded by a previous operation
                        
                        if gn != buffer[set][found_way][(w,gi)][0]:
                            print "gn = %d but list[0] = %d" % (gn , buffer[set][found_way][(w,gi)][0])

                        # remove current key
                        buffer[set][found_way][(w,gi)].remove(gn)
                        removed_dups += 1
                        # print "removed",w,gn,gi
                        # have all the duplicates been forwarded?
                        if len(buffer[set][found_way][(w,gi)]) == 0:
                            # get rid of this entry in the buffer
                            #print "deleting", w, gi
                            del buffer[set][found_way][(w,gi)]
                            del next_c_dict[set][found_way][(w,gi)]
                        else:
                            next_c_dict[set][found_way][(w,gi)] = calc_buffer_next_reuse(buffer[set][found_way], (w,gi))
                    else:
                        # product is not stored in the buffer

                        # will this product be reused?
                        nidx = glob_dups[(w,gi)].index(gn) 
                        if ( nidx == len(glob_dups[(w,gi)])-1 ):
                            # last duplicate in list, don't save
                            continue

                        # get the remaining duplicates
                        dups = list(glob_dups[(w,gi)][nidx+1:])
                        # can the duplicates be forwarded this cycle?
                        dups_copy = list(dups)
                        for d in dups_copy:
                            # duplicates issued this cycle:
                            if gn/Tn == d/Tn:
                                forwarded_dups += 1
                                removed_dups += 1
                                # remove from global dups list 
                                glob_dups[(w,gi)].remove(d)
                                dups.remove(d)
                                #print 'forward', w, gi, d 
                                weights[r, d % Tn ,i] = 0
                                #print 'forward', w, gi, gn, '->', d

                        if ( len(dups) == 0 ):
                            # all duplicates forwarded
                            continue

                        #continue # no buffering
                        # if there are still duplicates in the future
                        # add to buffer
                        global buffer_size

                        keys = buffer[set][way].keys()
                        set_size = buffer_size/n_sets/n_ways;
                        if (len(keys) >= set_size):
                            # buffer is full
                            #continue # dont evict ever
                            
                            # find an eviction candidate
                            # policy: longest next reuse
                            victim_c = -1
                            victim_key = []

                            for key in keys:
                                (kn,ki) = (buffer[set][way][key][0],key[1])
                                next_c = next_c_dict[set][way][key]
                                if next_c > victim_c:
                                    victim_c = next_c
                                    victim_key = key
#                            print "n =",gn, "evicting", buffer[victim_key]

                            # if victim has longer reuse than the current dup, replace it
                            replacement_c = chunk.n_i_to_cycle(dups[0], gi, Nn, Ni,Tnn,Tii,Tn,Ti)
                            if (victim_c > replacement_c):
                                #print "deleting", victim_key[0], victim_key[1]
                                del buffer[set][way][victim_key]
                                del next_c_dict[set][way][my_hash(victim_key)]
                            else:
                                continue #don't add replacement to the list

                        # add buffer entry
                        #print "adding", w, gi
                        #dups.pop(0)
                        buffer[set][way][(w,gi)] = dups
                        #if gi == 0:
                        #    print "adding dups to buffer", dups 
                        next_c_dict[set][way][my_hash((w,gi))] = calc_buffer_next_reuse(buffer[set][way], (w,gi))
                        glob_max_buffer_size = max(glob_max_buffer_size, len(buffer[set][way].keys()))
                                
                        
                
    return

#################################################################################

    for r in range(0,R-1):
    #    print "C:", weights[r,n,:]
    #    print "N:", weights[r+1,n,:]
        rmax = min(r + lookahead , R-1 )

        # print r, "##############################"
        # for tr in range(r, rmax + 1):
            # print_row(weights,tr)

        # check for all zeros
        if (is_zero( weights[r,:,:] ) ):
            # print r # print all lines that are all zeroes
            zero_rows += 1
            continue

        # counter for the limits
        in_ctr = [in_limit] * Tn # input limit per filter (m), max inputs to adder tree
        out_ctr = [out_limit] * Ti # number of products that can be broadcast for an input i
        ictr = 0 # number of products reused
        octr = 0 # number of products broadcast
        ires = 0 # ignore
        ores = 0 # ignore
        changed = True
        dup_found = set()

        # fill bubbles
        while changed:
            changed = False
            dup_found_iter = []

            # look for duplicates
            for n in range(0,Tn):
                for i in range(0,Ti):
                    # look for duplicates only if we haven't looked at it before
                    key = (ind[r,n,i][0], ind[r,n,i][2], weights[r,n,i])
                    if (key not in dup_found and not is_zero(weights[r,n,i])):

                        # dup_index is list of duplicates for (r,n,i)
                        dup_index = look_for_duplicates(r, n, i, weights, ind, dup_map, lookahead)
                        dup_found.add(key)
                        if (dup_index):
                            dup_index.append((r,n,i))
                            dup_found_iter.append(dup_index)
                            #print "A ", dup_index, "W ", weights[r,n,i]

            # prioritize removal here

            # reorder the duplicate removal order
            # do the ones with more duplicates first
            dup_found_iter.sort(key=len, reverse=True)

            # pick the filter with the least number of duplicates to
            # send first, this should reduce input dependences
            n_ctr = {}
            for dup_list in dup_found_iter:
                for element in dup_list:
                    n_ctr[element[1]] = n_ctr.get(element[1],0) + 1
            
            for tmp in dup_found_iter:
                tmp.sort(key=lambda fn: n_ctr.get(fn[1], Tn*Ti+1))

            # remove duplicates in list order
            #   checking for constraints
            for dup_list in dup_found_iter:
                # for each set of duplicates
                # first dup that can be issued (in the current row) will be issued, rest will be removed

                for index in dup_list:

                    (rr,nn,ii) = index 

                    # only output if the index is on the current row
                    if (r != rr):
                       continue   
 
                    # remove all other duplicates if possible
                    (weights, ind, out_ctr, in_ctr, stat) = remove_duplicates(rr, nn, ii,
                          weights, ind, dup_list, out_ctr, in_ctr, lookaside, lookahead, nn)
                    dup_rm += stat[0]
                    dup_bubble += stat[1]
                    dup_bubble_pop += stat[2]
                    ictr += stat[3]
                    octr += stat[4]

                    # exit when a remove succeeded
                    if (stat[0]):
                       break;


            # remove all the bubbles in the row
            for n in range(0,Tn):
                for i in range(0,Ti):
    
                    # fill in the bubble
                    if (is_zero( weights[r,n,i] )):
                        # found a zero to fill, look for replacement
                        (weights, ind, tmp) = re.look_for_replacement(
                                    r, n, i, weights, ind, lookaside, lookahead)
                        zero_rm += tmp
                        changed = changed or tmp
   
   
        # end of change loop

        #out_per_row[octr/max(1,out_limit)] += 1
        #in_per_row[ictr/max(1,in_limit)] += 1
        #out_res_per_row[ores/max(1,out_limit)] += 1
        #in_res_per_row[ires/max(1,in_limit)/Tn] += 1

        # print "--------------------------------"
        # for tr in range(r, rmax + 1):
            # print_row(weights,tr)

    # print_filter(weights,n)
    #print_weights(weights)

    # check if the last row is zero
    if (is_zero( weights[R-1,:,:] ) ):
        zero_rows += 1

    #print "row reduction = ", R-zero_rows , "/", R
    #print "Output Counter: ", out_per_row
    #print "Input Counter: ", in_per_row
    #print "Output Res: ", out_res_per_row
    #print "Input Res: ", in_res_per_row
    #print "Bubble/Dup/B+D/B+D+P: ", (zero_rm, dup_rm, dup_bubble, dup_bubble_pop)

    global total_reduced_rows 
    total_reduced_rows += R - zero_rows
    global total_rows 
    total_rows += R
コード例 #2
0
ファイル: new-burd.py プロジェクト: iniverno/MIsim
def process_chunk(weights, weight_idx, lookaside, lookahead, out_limit, in_limit):

    chunk_n, chunk_i = weight_idx
    #print "chunk:", chunk_n, chunk_i
    zero_rows = 0;

    # recalculate global index
    (R,Tn,Ti) = weights.shape

    # store the original indices of each weight in weights
    ind = np.indices((R,Tn,Ti)).swapaxes(0,3).swapaxes(0,2).swapaxes(0,1)

    # this generates a count of the duplicates for each key within the chunk
    dup_map = map_duplicates(weights)

    out_per_row =       [0] * (Ti+1)
    in_per_row =        [0] * (Tn+1)
    out_res_per_row =   [0] * (Ti+1)
    in_res_per_row =    [0] * (Tn+1)

    zero_rm = 0 # number of zeros removed
    dup_rm = 0 # number of dups removed
    dup_bubble = 0 # ignore
    dup_bubble_pop = 0 # ignore

    global out_b
    global group_size

    # for each row
    #   while changes
    #       remove duplicates
    #       fill zeros
    for r in range(0,R-1):
        rmax = min(r + lookahead , R-1 )

        # check for all zeros
        if ( is_zero( weights[r,:,:] ) ):
            # print r # print all lines that are all zeroes
            zero_rows += 1
            continue

        # counter for the limits
        in_ctr = [in_limit] * Tn # input limit per filter (m), max inputs to adder tree
        out_ctr = [out_limit] * Ti # number of products that can be broadcast for an input i
        group_out_ctr = [out_b] * (Tn / group_size) # number of products that can be broadcast for an input i
        ictr = 0 # number of products reused
        octr = 0 # number of products broadcast
        ires = 0 # ignore
        ores = 0 # ignore
        changed = True
        dup_found = set()

        # fill bubbles
        # how are stats maintained across iterations?
        # are we potentially double promoting beyond the lookahead window?
        while changed:
            changed = False

            # list of list of duplicate indicies
            # [[ (r,n,i) ]]
            dup_found_iter = []
            # look for duplicates
            for n in range(0,Tn):
                for i in range(0,Ti):
                    # look for duplicates only if we haven't looked at it before
                    w = map_weight(weights[r,n,i])
                    key = (ind[r,n,i][0], ind[r,n,i][2], w)
                    if ( key not in dup_found and not is_zero(weights[r,n,i]) ):

                        # dup_index is list of duplicates for (r,n,i) (not including producer)
                        dup_index = look_for_duplicates(r, n, i, weights, ind, dup_map, lookahead)
                        dup_found.add(key)
                        if ( len(dup_index) > 0 ):
                            dup_index.append((r,n,i))
                            dup_found_iter.append(dup_index)
                            #print "A ", dup_index, "W ", weights[r,n,i]

            # prioritize removal here
            # reorder the duplicate removal order
            # do the ones with more duplicates first
            dup_found_iter.sort(key=len, reverse=True)

            # pick the filter with the least number of duplicates to
            # send first, this should reduce input dependences
            n_ctr = {}
            for dup_list in dup_found_iter:
                for element in dup_list:
                    n_ctr[element[1]] = n_ctr.get(element[1],0) + 1
            
            for tmp in dup_found_iter:
                tmp.sort(key=lambda fn: n_ctr.get(fn[1], Tn*Ti+1))

            # remove duplicates in list order
            #   checking for constraints
            for dup_list in dup_found_iter:
                # for each set of duplicates
                # first dup that can be issued (in the current row) will be issued, rest will be removed
                for index in dup_list:

                    # this is the producer
                    (rr,nn,ii) = index 

                    # only output if the index is on the current row
                    if (r != rr):
                       continue   

                    # make sure it has not exceeded group's output limit
                    if ( group_out_ctr[nn/group_size] == 0 ):
                       continue
 
                    # remove all other duplicates if possible
                    stats = remove_duplicates(rr, nn, ii, weights, ind, dup_list, out_ctr, in_ctr)
                    [dup_rm_i, dup_bubble_i, dup_bubble_pop_i, ictr_i, octr_i] = stats
                    dup_rm          += dup_rm_i
                    dup_bubble      += dup_bubble_i
                    dup_bubble_pop  += dup_bubble_pop_i
                    ictr            += ictr_i
                    octr            += octr_i

                    # exit when a remove succeeded
                    if (dup_rm_i):
                       group_out_ctr[nn/group_size] -= 1
                       break

            # remove duplicates from buffer here
            # when we forward a buffered duplicate, remove the corresponding weight
            # this gets lower priority since we can do it any time within the chunk
            
            # this may create a zero row, but we can't skip it since we've used this cycle to do all this stuff




            # NOTE: we don't have to check for a row of zeros again, since at least have producers in this row
            # remove all the bubbles in the row
            for n in range(0,Tn):
                for i in range(0,Ti):
    
                    # fill in the bubble
                    if ( is_zero( weights[r,n,i] )):
                        # found a zero to fill, look for replacement
                        (weights, ind, tmp) = re.look_for_replacement( r, n, i, weights, ind, lookaside, lookahead)
                        zero_rm += tmp
                        changed = changed or tmp
            
            # add producers to buffer here
   
   
        # end of change loop

        #out_per_row[octr/max(1,out_limit)] += 1
        #in_per_row[ictr/max(1,in_limit)] += 1
        #out_res_per_row[ores/max(1,out_limit)] += 1
        #in_res_per_row[ires/max(1,in_limit)/Tn] += 1

        # print "--------------------------------"
        # for tr in range(r, rmax + 1):
            # print_row(weights,tr)

    # print_filter(weights,n)
    #print_weights(weights)

    # check if the last row is zero
    if (is_zero( weights[R-1,:,:] ) ):
        zero_rows += 1

    #print "row reduction = ", R-zero_rows , "/", R
    #print "Output Counter: ", out_per_row
    #print "Input Counter: ", in_per_row
    #print "Output Res: ", out_res_per_row
    #print "Input Res: ", in_res_per_row
    #print "Bubble/Dup/B+D/B+D+P: ", (zero_rm, dup_rm, dup_bubble, dup_bubble_pop)

    global total_reduced_rows 
    total_reduced_rows += R - zero_rows
    global total_rows 
    total_rows += R


    # print weights.any(axis=(1,2)) # print out false if a row is all zero
    #wa = [weights[i,:,:].any() for i in range(weights.shape[0])] # changed for 1.6.1 compatilibility

    #ind = ind[wa,:,:]
    #weights = weights[wa,:,:]

    return (zero_rm, dup_rm)
コード例 #3
0
def process_chunk(weights, weight_idx, lookaside, lookahead, out_limit, in_limit):

    chunk_n, chunk_i = weight_idx

    zero_rows = 0;

    # recalculate global index
    (R,Tn,Ti) = weights.shape

    # store the original indices of each weight in weights
    ind = np.indices((R,Tn,Ti)).swapaxes(0,3).swapaxes(0,2).swapaxes(0,1)

    # this generates a count of the duplicates for each key within the chunk
    dup_map = map_duplicates(weights)

    dup_bubble = 0 # ignore
    dup_bubble_pop = 0 # ignore

    global out_b
    global group_size
    global glob_dups

    # for each row
    #   while changes
    #       remove duplicates
    #       fill zeros
    for r in range(0,R):

        # check for all zeros
        if ( is_zero( weights[r,:,:] ) ):
            # print r # print all lines that are all zeroes
            zero_rows += 1
            continue

        # counter for the limits
        in_ctr = [in_limit] * Tn # input limit per filter (m), max inputs to adder tree
        out_ctr = [out_limit] * Ti # number of products that can be broadcast for an input i
        group_out_ctr = [out_b] * (Tn / group_size) # number of products that can be broadcast for an input i
        ictr = 0 # number of products reused
        octr = 0 # number of products broadcast
        changed = True
        dup_found = set() # track the duplicates found so we don't double count them

        # fill bubbles
        # how are stats maintained across iterations?
        # are we potentially double promoting beyond the lookahead window?
        while changed:
            changed = False

            # look for buffered duplicates broadcasting to this row

            # list of list of duplicate indicies
            # [[ (r,n,i) ]]
            dup_found_iter = look_for_live_dups(weights, ind, r, dup_map, dup_found)
            # add duplicate products already stored in the buffer

            # for testing
            # buffer[0][0][(map_weight(weights[0,0,0]),0)] = [0]
            for dup_set in dup_found_iter[:]:
                (cr,cn,ci) = dup_set[0]
                w = weights[cr,cn,ci]
            
                (orig_r, orig_n, orig_i) = ind[cr,cn,ci]
                (gn,gi) = get_global_weight_idx(chunk_n, chunk_i, orig_r, orig_n, orig_i)
                way =  buffer_check(w,gi)

                # add buffered duplicates to list as (-1,way,set) 
                if ( way >= 0 ):
                    s = gi % n_sets
                    dup_set.insert( 0, (-1,cn,ci) ) #FIXME: cn,ci are placeholders, we need to do something different for the buffer config

                # remove singletons 
                if (len(dup_set) == 1):
                    dup_found_iter.remove(dup_set) 
            
            # now we have a list of list of duplicates in the current row and buffer
            # if a duplicate is in the buffer then it is stored as (-1,n,i)
            
            # choose a producer for each set of duplicates and put it at the front of the set

            # simple heuristic to choose producer
            #   1. choose buffered product
            #   2. choose the first live dup
            #   this will happen natural since buffered products are added to the front of the list

            # prioritize removal here
            # reorder the duplicate removal order
            # do the ones with more duplicates first
            dup_found_iter.sort(key=len, reverse=True)

            # pick the filter with the least number of duplicates to
            # send first, this should reduce input dependences
            n_ctr = {}
            for dup_list in dup_found_iter:
                for element in dup_list:
                    n_ctr[element[1]] = n_ctr.get(element[1],0) + 1
            
            for tmp in dup_found_iter:
                tmp.sort(key=lambda fn: n_ctr.get(fn[1], Tn*Ti+1))

            # remove duplicates in list order
            #   checking for constraints
            remove_dups(weights, ind, r, dup_found_iter, in_ctr, out_ctr, group_out_ctr)

            # this may create a zero row, but we can't skip it since we've used this cycle to do all this stuff

            # remove all the zeros in the row
            for n in range(0,Tn):
                for i in range(0,Ti):
    
                    # fill in the bubble
                    if ( is_zero( weights[r,n,i] )):
                        orig_zero =  ( (r,n,i) == ind[r,n,i] ).all()
                        # found a zero to fill, look for replacement
                        zero_removed = re.look_for_replacement( r, n, i, weights, ind, lookaside, lookahead)
                        global zero_rm
                        if orig_zero:
                            zero_rm += zero_removed
                        changed = changed or zero_removed
   
        # end of change loop

        # now we know which products will be calculated this cycle

        # for all buffered dups that were not forwarded, remove them from the buffer list

        # update the buffer with new products produced in this cycle (row)
        buffer_update_for_row(weights, weight_idx, r)

    # end of row loop

    global total_reduced_rows 
    total_reduced_rows += R - zero_rows
    global total_rows 
    total_rows += R
コード例 #4
0
def process_weights(weights, lookaside, lookahead):

    # gather stats about data
    # ones = np.count_nonzero(weights.count('1'))
    # print "ones  = ", ones
    # zeros = np.count_nonzero(weights.count('0'))
    # print "zeros = ", zeros
    # percent = (ones + 0.0)/(ones+zeros)
    # print "percent ones = ", percent
    # rows = ( (ones + 0.0)/(ones+zeros) * 64 )
    # print "rows of ones = ", rows

    # for n in range(0,Tn):
    # col = weights[:,n,:]
    # ones = np.count_nonzero(col.count('1'))
    # zeros = np.count_nonzero(col.count('0'))
    # rows = ( (ones + 0.0)/(ones+zeros) * 64 )
    # print n, "rows of ones = ", rows

    # print_weights(weights)
    # print_filter(weights,n)

    zero_rows = 0

    (R, Tn, Ti) = weights.shape
    ind = np.indices((R, Tn, Ti)).swapaxes(0, 3).swapaxes(0, 2).swapaxes(0, 1)

    # iterate to the end to detect zero row
    for r in range(0, R):
        #    print "C:", weights[r,n,:]
        #    print "N:", weights[r+1,n,:]
        rmax = min(r + lookahead, R - 1)

        # print r, "##############################"
        # for tr in range(r, rmax + 1):
        # print_row(weights,tr)

        # check for all zeros
        if (is_zero(weights[r, :, :])):
            # print r # print all lines that are all zeroes
            zero_rows += 1
            continue

        # fill bubbles
        for n in range(0, Tn):
            for i in range(0, Ti):

                if (is_zero(weights[r, n, i])):
                    # found a zero to fill, look for replacement
                    weights, ind, _ = re.look_for_replacement(
                        r, n, i, weights, ind, lookaside, lookahead)

        # print "--------------------------------"
        # for tr in range(r, rmax + 1):
        # print_row(weights,tr)

    # print_filter(weights,n)
    # print_weights(weights)

    # print "row reduction = ", R-zero_rows , "/", R
    global total_reduced_rows
    total_reduced_rows += R - zero_rows
    global total_rows
    total_rows += R

    # wa = weights.any(axis=(1,2)) # print out false if a row is all zero
    wa = [weights[i, :, :].any() for i in range(weights.shape[0])
          ]  # changed for 1.6.1 compatilibility

    ind = ind[wa, :, :]
    weights = weights[wa, :, :]

    return (R - zero_rows, ind, weights)
コード例 #5
0
def process_chunk(weights, weight_idx, lookaside, lookahead, out_limit,
                  in_limit):

    chunk_n, chunk_i = weight_idx
    #print "chunk:", chunk_n, chunk_i
    zero_rows = 0

    # recalculate global index
    (R, Tn, Ti) = weights.shape

    # store the original indices of each weight in weights
    ind = np.indices((R, Tn, Ti)).swapaxes(0, 3).swapaxes(0, 2).swapaxes(0, 1)

    # this generates a count of the duplicates for each key within the chunk
    dup_map = map_duplicates(weights)

    out_per_row = [0] * (Ti + 1)
    in_per_row = [0] * (Tn + 1)
    out_res_per_row = [0] * (Ti + 1)
    in_res_per_row = [0] * (Tn + 1)

    zero_rm = 0  # number of zeros removed
    dup_rm = 0  # number of dups removed
    dup_bubble = 0  # ignore
    dup_bubble_pop = 0  # ignore

    global out_b
    global group_size

    # for each row
    #   while changes
    #       remove duplicates
    #       fill zeros
    for r in range(0, R - 1):
        rmax = min(r + lookahead, R - 1)

        # check for all zeros
        if (is_zero(weights[r, :, :])):
            # print r # print all lines that are all zeroes
            zero_rows += 1
            continue

        # counter for the limits
        in_ctr = [
            in_limit
        ] * Tn  # input limit per filter (m), max inputs to adder tree
        out_ctr = [
            out_limit
        ] * Ti  # number of products that can be broadcast for an input i
        group_out_ctr = [out_b] * (
            Tn / group_size
        )  # number of products that can be broadcast for an input i
        ictr = 0  # number of products reused
        octr = 0  # number of products broadcast
        ires = 0  # ignore
        ores = 0  # ignore
        changed = True
        dup_found = set()

        # fill bubbles
        # how are stats maintained across iterations?
        # are we potentially double promoting beyond the lookahead window?
        while changed:
            changed = False

            # list of list of duplicate indicies
            # [[ (r,n,i) ]]
            dup_found_iter = []
            # look for duplicates
            for n in range(0, Tn):
                for i in range(0, Ti):
                    # look for duplicates only if we haven't looked at it before
                    w = map_weight(weights[r, n, i])
                    key = (ind[r, n, i][0], ind[r, n, i][2], w)
                    if (key not in dup_found
                            and not is_zero(weights[r, n, i])):

                        # dup_index is list of duplicates for (r,n,i) (not including producer)
                        dup_index = look_for_duplicates(
                            r, n, i, weights, ind, dup_map, lookahead)
                        dup_found.add(key)
                        if (len(dup_index) > 0):
                            dup_index.append((r, n, i))
                            dup_found_iter.append(dup_index)
                            #print "A ", dup_index, "W ", weights[r,n,i]

            # prioritize removal here
            # reorder the duplicate removal order
            # do the ones with more duplicates first
            dup_found_iter.sort(key=len, reverse=True)

            # pick the filter with the least number of duplicates to
            # send first, this should reduce input dependences
            n_ctr = {}
            for dup_list in dup_found_iter:
                for element in dup_list:
                    n_ctr[element[1]] = n_ctr.get(element[1], 0) + 1

            for tmp in dup_found_iter:
                tmp.sort(key=lambda fn: n_ctr.get(fn[1], Tn * Ti + 1))

            # remove duplicates in list order
            #   checking for constraints
            for dup_list in dup_found_iter:
                # for each set of duplicates
                # first dup that can be issued (in the current row) will be issued, rest will be removed
                for index in dup_list:

                    # this is the producer
                    (rr, nn, ii) = index

                    # only output if the index is on the current row
                    if (r != rr):
                        continue

                    # make sure it has not exceeded group's output limit
                    if (group_out_ctr[nn / group_size] == 0):
                        continue

                    # remove all other duplicates if possible
                    stats = remove_duplicates(rr, nn, ii, weights, ind,
                                              dup_list, out_ctr, in_ctr)
                    [dup_rm_i, dup_bubble_i, dup_bubble_pop_i, ictr_i,
                     octr_i] = stats
                    dup_rm += dup_rm_i
                    dup_bubble += dup_bubble_i
                    dup_bubble_pop += dup_bubble_pop_i
                    ictr += ictr_i
                    octr += octr_i

                    # exit when a remove succeeded
                    if (dup_rm_i):
                        group_out_ctr[nn / group_size] -= 1
                        break

            # remove duplicates from buffer here
            # when we forward a buffered duplicate, remove the corresponding weight
            # this gets lower priority since we can do it any time within the chunk

            # this may create a zero row, but we can't skip it since we've used this cycle to do all this stuff

            # NOTE: we don't have to check for a row of zeros again, since at least have producers in this row
            # remove all the bubbles in the row
            for n in range(0, Tn):
                for i in range(0, Ti):

                    # fill in the bubble
                    if (is_zero(weights[r, n, i])):
                        # found a zero to fill, look for replacement
                        (weights, ind, tmp) = re.look_for_replacement(
                            r, n, i, weights, ind, lookaside, lookahead)
                        zero_rm += tmp
                        changed = changed or tmp

            # add producers to buffer here

        # end of change loop

        #out_per_row[octr/max(1,out_limit)] += 1
        #in_per_row[ictr/max(1,in_limit)] += 1
        #out_res_per_row[ores/max(1,out_limit)] += 1
        #in_res_per_row[ires/max(1,in_limit)/Tn] += 1

        # print "--------------------------------"
        # for tr in range(r, rmax + 1):
        # print_row(weights,tr)

    # print_filter(weights,n)
    #print_weights(weights)

    # check if the last row is zero
    if (is_zero(weights[R - 1, :, :])):
        zero_rows += 1

    #print "row reduction = ", R-zero_rows , "/", R
    #print "Output Counter: ", out_per_row
    #print "Input Counter: ", in_per_row
    #print "Output Res: ", out_res_per_row
    #print "Input Res: ", in_res_per_row
    #print "Bubble/Dup/B+D/B+D+P: ", (zero_rm, dup_rm, dup_bubble, dup_bubble_pop)

    global total_reduced_rows
    total_reduced_rows += R - zero_rows
    global total_rows
    total_rows += R

    # print weights.any(axis=(1,2)) # print out false if a row is all zero
    #wa = [weights[i,:,:].any() for i in range(weights.shape[0])] # changed for 1.6.1 compatilibility

    #ind = ind[wa,:,:]
    #weights = weights[wa,:,:]

    return (zero_rm, dup_rm)
コード例 #6
0
def process_weights(weights, lookaside, lookahead):

    # gather stats about data
    # ones = np.count_nonzero(weights.count('1'))
    # print "ones  = ", ones
    # zeros = np.count_nonzero(weights.count('0')) 
    # print "zeros = ", zeros
    # percent = (ones + 0.0)/(ones+zeros)
    # print "percent ones = ", percent
    # rows = ( (ones + 0.0)/(ones+zeros) * 64 )
    # print "rows of ones = ", rows

    # for n in range(0,Tn):
        # col = weights[:,n,:]
        # ones = np.count_nonzero(col.count('1'))
        # zeros = np.count_nonzero(col.count('0')) 
        # rows = ( (ones + 0.0)/(ones+zeros) * 64 )
        # print n, "rows of ones = ", rows

    # print_weights(weights)
    # print_filter(weights,n)

    zero_rows = 0;

    (R,Tn,Ti) = weights.shape
    ind = np.indices((R,Tn,Ti)).swapaxes(0,3).swapaxes(0,2).swapaxes(0,1)

    # iterate to the end to detect zero row 
    for r in range(0,R):
    #    print "C:", weights[r,n,:]
    #    print "N:", weights[r+1,n,:]
        rmax = min(r + lookahead , R-1 )

        # print r, "##############################"
        # for tr in range(r, rmax + 1):
            # print_row(weights,tr)

        # check for all zeros
        if (is_zero( weights[r,:,:] ) ):
            # print r # print all lines that are all zeroes
            zero_rows += 1
            continue

        # fill bubbles
        for n in range(0,Tn):
            for i in range(0,Ti):

                if (is_zero( weights[r,n,i] )):
                    # found a zero to fill, look for replacement
                    weights, ind, _ = re.look_for_replacement(r,n,i,weights,ind,
                                                 lookaside,lookahead)
                    
        # print "--------------------------------"
        # for tr in range(r, rmax + 1):
            # print_row(weights,tr)

    # print_filter(weights,n)
    # print_weights(weights)

    # print "row reduction = ", R-zero_rows , "/", R
    global total_reduced_rows 
    total_reduced_rows += R - zero_rows
    global total_rows 
    total_rows += R

    # wa = weights.any(axis=(1,2)) # print out false if a row is all zero
    wa = [weights[i,:,:].any() for i in range(weights.shape[0])] # changed for 1.6.1 compatilibility

    ind = ind[wa,:,:]
    weights = weights[wa,:,:]

    return (R-zero_rows,ind,weights)