def process_weights(weights, weight_idx, lookaside, lookahead, out_limit, in_limit): chunk_n, chunk_i = weight_idx #print "chunk:", chunk_n, chunk_i zero_rows = 0; # recalculate global index (R,Tn,Ti) = weights.shape ind = np.indices((R,Tn,Ti)).swapaxes(0,3).swapaxes(0,2).swapaxes(0,1) dup_map = map_duplicates(weights, True) out_per_row = [0] * (Ti+1) in_per_row = [0] * (Tn+1) out_res_per_row = [0] * (Ti+1) in_res_per_row = [0] * (Tn+1) zero_rm = 0 # number of zeros removed dup_rm = 0 # number of dups removed dup_bubble = 0 # ignore dup_bubble_pop = 0 # ignore global glob_dups global removed_dups global forwarded_dups global buffer global glob_max_buffer_size global next_c_dict global n_sets global Tii global Tnn # iterate in chunk order and save duplicate values for r in range(R): for n in range(Tn): for i in range(Ti): w = map_weight(weights[r,n,i]) #if (i/Tii == 0 and n/Tn == 0): # # start of new partial sum calculation # for tw in range(n_ways): # for ts in range(n_sets): # for key in buffer[ts][tw].keys(): # for tn in buffer[ts][tw][key]: # if tn/Tn == n/Tn: # buffer[ts][tw][key].remove(tn) # if len(buffer[ts][tw][key]) == 0: # # get rid of this entry in the buffer # #print "deleting", w, gi # del buffer[ts][tw][key] # del next_c_dict[ts][tw][key] # else: # next_c_dict[ts][tw][key] = calc_buffer_next_reuse(buffer[ts][tw], key) # if (w == 0): continue # which set does this set = i % n_sets way = n % n_ways assert len(buffer[set][way].keys()) == len(next_c_dict[set][way].keys()) (gn,gi) = get_global_weight_idx(chunk_n, chunk_i, r, n, i) # is this a duplicate? if ( (w,gi) in glob_dups and len(glob_dups[(w,gi)]) > 1): #if gi == 0: # print "dup: ", gn, gi, w # is the product already in the buffer found_way = -1 for tw in range(n_ways): if (w,gi) in buffer[set][tw]: found_way = tw if found_way >= 0: if (gn not in buffer[set][found_way][(w,gi)]): continue # this product was forwarded by a previous operation if gn != buffer[set][found_way][(w,gi)][0]: print "gn = %d but list[0] = %d" % (gn , buffer[set][found_way][(w,gi)][0]) # remove current key buffer[set][found_way][(w,gi)].remove(gn) removed_dups += 1 # print "removed",w,gn,gi # have all the duplicates been forwarded? if len(buffer[set][found_way][(w,gi)]) == 0: # get rid of this entry in the buffer #print "deleting", w, gi del buffer[set][found_way][(w,gi)] del next_c_dict[set][found_way][(w,gi)] else: next_c_dict[set][found_way][(w,gi)] = calc_buffer_next_reuse(buffer[set][found_way], (w,gi)) else: # product is not stored in the buffer # will this product be reused? nidx = glob_dups[(w,gi)].index(gn) if ( nidx == len(glob_dups[(w,gi)])-1 ): # last duplicate in list, don't save continue # get the remaining duplicates dups = list(glob_dups[(w,gi)][nidx+1:]) # can the duplicates be forwarded this cycle? dups_copy = list(dups) for d in dups_copy: # duplicates issued this cycle: if gn/Tn == d/Tn: forwarded_dups += 1 removed_dups += 1 # remove from global dups list glob_dups[(w,gi)].remove(d) dups.remove(d) #print 'forward', w, gi, d weights[r, d % Tn ,i] = 0 #print 'forward', w, gi, gn, '->', d if ( len(dups) == 0 ): # all duplicates forwarded continue #continue # no buffering # if there are still duplicates in the future # add to buffer global buffer_size keys = buffer[set][way].keys() set_size = buffer_size/n_sets/n_ways; if (len(keys) >= set_size): # buffer is full #continue # dont evict ever # find an eviction candidate # policy: longest next reuse victim_c = -1 victim_key = [] for key in keys: (kn,ki) = (buffer[set][way][key][0],key[1]) next_c = next_c_dict[set][way][key] if next_c > victim_c: victim_c = next_c victim_key = key # print "n =",gn, "evicting", buffer[victim_key] # if victim has longer reuse than the current dup, replace it replacement_c = chunk.n_i_to_cycle(dups[0], gi, Nn, Ni,Tnn,Tii,Tn,Ti) if (victim_c > replacement_c): #print "deleting", victim_key[0], victim_key[1] del buffer[set][way][victim_key] del next_c_dict[set][way][my_hash(victim_key)] else: continue #don't add replacement to the list # add buffer entry #print "adding", w, gi #dups.pop(0) buffer[set][way][(w,gi)] = dups #if gi == 0: # print "adding dups to buffer", dups next_c_dict[set][way][my_hash((w,gi))] = calc_buffer_next_reuse(buffer[set][way], (w,gi)) glob_max_buffer_size = max(glob_max_buffer_size, len(buffer[set][way].keys())) return ################################################################################# for r in range(0,R-1): # print "C:", weights[r,n,:] # print "N:", weights[r+1,n,:] rmax = min(r + lookahead , R-1 ) # print r, "##############################" # for tr in range(r, rmax + 1): # print_row(weights,tr) # check for all zeros if (is_zero( weights[r,:,:] ) ): # print r # print all lines that are all zeroes zero_rows += 1 continue # counter for the limits in_ctr = [in_limit] * Tn # input limit per filter (m), max inputs to adder tree out_ctr = [out_limit] * Ti # number of products that can be broadcast for an input i ictr = 0 # number of products reused octr = 0 # number of products broadcast ires = 0 # ignore ores = 0 # ignore changed = True dup_found = set() # fill bubbles while changed: changed = False dup_found_iter = [] # look for duplicates for n in range(0,Tn): for i in range(0,Ti): # look for duplicates only if we haven't looked at it before key = (ind[r,n,i][0], ind[r,n,i][2], weights[r,n,i]) if (key not in dup_found and not is_zero(weights[r,n,i])): # dup_index is list of duplicates for (r,n,i) dup_index = look_for_duplicates(r, n, i, weights, ind, dup_map, lookahead) dup_found.add(key) if (dup_index): dup_index.append((r,n,i)) dup_found_iter.append(dup_index) #print "A ", dup_index, "W ", weights[r,n,i] # prioritize removal here # reorder the duplicate removal order # do the ones with more duplicates first dup_found_iter.sort(key=len, reverse=True) # pick the filter with the least number of duplicates to # send first, this should reduce input dependences n_ctr = {} for dup_list in dup_found_iter: for element in dup_list: n_ctr[element[1]] = n_ctr.get(element[1],0) + 1 for tmp in dup_found_iter: tmp.sort(key=lambda fn: n_ctr.get(fn[1], Tn*Ti+1)) # remove duplicates in list order # checking for constraints for dup_list in dup_found_iter: # for each set of duplicates # first dup that can be issued (in the current row) will be issued, rest will be removed for index in dup_list: (rr,nn,ii) = index # only output if the index is on the current row if (r != rr): continue # remove all other duplicates if possible (weights, ind, out_ctr, in_ctr, stat) = remove_duplicates(rr, nn, ii, weights, ind, dup_list, out_ctr, in_ctr, lookaside, lookahead, nn) dup_rm += stat[0] dup_bubble += stat[1] dup_bubble_pop += stat[2] ictr += stat[3] octr += stat[4] # exit when a remove succeeded if (stat[0]): break; # remove all the bubbles in the row for n in range(0,Tn): for i in range(0,Ti): # fill in the bubble if (is_zero( weights[r,n,i] )): # found a zero to fill, look for replacement (weights, ind, tmp) = re.look_for_replacement( r, n, i, weights, ind, lookaside, lookahead) zero_rm += tmp changed = changed or tmp # end of change loop #out_per_row[octr/max(1,out_limit)] += 1 #in_per_row[ictr/max(1,in_limit)] += 1 #out_res_per_row[ores/max(1,out_limit)] += 1 #in_res_per_row[ires/max(1,in_limit)/Tn] += 1 # print "--------------------------------" # for tr in range(r, rmax + 1): # print_row(weights,tr) # print_filter(weights,n) #print_weights(weights) # check if the last row is zero if (is_zero( weights[R-1,:,:] ) ): zero_rows += 1 #print "row reduction = ", R-zero_rows , "/", R #print "Output Counter: ", out_per_row #print "Input Counter: ", in_per_row #print "Output Res: ", out_res_per_row #print "Input Res: ", in_res_per_row #print "Bubble/Dup/B+D/B+D+P: ", (zero_rm, dup_rm, dup_bubble, dup_bubble_pop) global total_reduced_rows total_reduced_rows += R - zero_rows global total_rows total_rows += R
def process_chunk(weights, weight_idx, lookaside, lookahead, out_limit, in_limit): chunk_n, chunk_i = weight_idx #print "chunk:", chunk_n, chunk_i zero_rows = 0; # recalculate global index (R,Tn,Ti) = weights.shape # store the original indices of each weight in weights ind = np.indices((R,Tn,Ti)).swapaxes(0,3).swapaxes(0,2).swapaxes(0,1) # this generates a count of the duplicates for each key within the chunk dup_map = map_duplicates(weights) out_per_row = [0] * (Ti+1) in_per_row = [0] * (Tn+1) out_res_per_row = [0] * (Ti+1) in_res_per_row = [0] * (Tn+1) zero_rm = 0 # number of zeros removed dup_rm = 0 # number of dups removed dup_bubble = 0 # ignore dup_bubble_pop = 0 # ignore global out_b global group_size # for each row # while changes # remove duplicates # fill zeros for r in range(0,R-1): rmax = min(r + lookahead , R-1 ) # check for all zeros if ( is_zero( weights[r,:,:] ) ): # print r # print all lines that are all zeroes zero_rows += 1 continue # counter for the limits in_ctr = [in_limit] * Tn # input limit per filter (m), max inputs to adder tree out_ctr = [out_limit] * Ti # number of products that can be broadcast for an input i group_out_ctr = [out_b] * (Tn / group_size) # number of products that can be broadcast for an input i ictr = 0 # number of products reused octr = 0 # number of products broadcast ires = 0 # ignore ores = 0 # ignore changed = True dup_found = set() # fill bubbles # how are stats maintained across iterations? # are we potentially double promoting beyond the lookahead window? while changed: changed = False # list of list of duplicate indicies # [[ (r,n,i) ]] dup_found_iter = [] # look for duplicates for n in range(0,Tn): for i in range(0,Ti): # look for duplicates only if we haven't looked at it before w = map_weight(weights[r,n,i]) key = (ind[r,n,i][0], ind[r,n,i][2], w) if ( key not in dup_found and not is_zero(weights[r,n,i]) ): # dup_index is list of duplicates for (r,n,i) (not including producer) dup_index = look_for_duplicates(r, n, i, weights, ind, dup_map, lookahead) dup_found.add(key) if ( len(dup_index) > 0 ): dup_index.append((r,n,i)) dup_found_iter.append(dup_index) #print "A ", dup_index, "W ", weights[r,n,i] # prioritize removal here # reorder the duplicate removal order # do the ones with more duplicates first dup_found_iter.sort(key=len, reverse=True) # pick the filter with the least number of duplicates to # send first, this should reduce input dependences n_ctr = {} for dup_list in dup_found_iter: for element in dup_list: n_ctr[element[1]] = n_ctr.get(element[1],0) + 1 for tmp in dup_found_iter: tmp.sort(key=lambda fn: n_ctr.get(fn[1], Tn*Ti+1)) # remove duplicates in list order # checking for constraints for dup_list in dup_found_iter: # for each set of duplicates # first dup that can be issued (in the current row) will be issued, rest will be removed for index in dup_list: # this is the producer (rr,nn,ii) = index # only output if the index is on the current row if (r != rr): continue # make sure it has not exceeded group's output limit if ( group_out_ctr[nn/group_size] == 0 ): continue # remove all other duplicates if possible stats = remove_duplicates(rr, nn, ii, weights, ind, dup_list, out_ctr, in_ctr) [dup_rm_i, dup_bubble_i, dup_bubble_pop_i, ictr_i, octr_i] = stats dup_rm += dup_rm_i dup_bubble += dup_bubble_i dup_bubble_pop += dup_bubble_pop_i ictr += ictr_i octr += octr_i # exit when a remove succeeded if (dup_rm_i): group_out_ctr[nn/group_size] -= 1 break # remove duplicates from buffer here # when we forward a buffered duplicate, remove the corresponding weight # this gets lower priority since we can do it any time within the chunk # this may create a zero row, but we can't skip it since we've used this cycle to do all this stuff # NOTE: we don't have to check for a row of zeros again, since at least have producers in this row # remove all the bubbles in the row for n in range(0,Tn): for i in range(0,Ti): # fill in the bubble if ( is_zero( weights[r,n,i] )): # found a zero to fill, look for replacement (weights, ind, tmp) = re.look_for_replacement( r, n, i, weights, ind, lookaside, lookahead) zero_rm += tmp changed = changed or tmp # add producers to buffer here # end of change loop #out_per_row[octr/max(1,out_limit)] += 1 #in_per_row[ictr/max(1,in_limit)] += 1 #out_res_per_row[ores/max(1,out_limit)] += 1 #in_res_per_row[ires/max(1,in_limit)/Tn] += 1 # print "--------------------------------" # for tr in range(r, rmax + 1): # print_row(weights,tr) # print_filter(weights,n) #print_weights(weights) # check if the last row is zero if (is_zero( weights[R-1,:,:] ) ): zero_rows += 1 #print "row reduction = ", R-zero_rows , "/", R #print "Output Counter: ", out_per_row #print "Input Counter: ", in_per_row #print "Output Res: ", out_res_per_row #print "Input Res: ", in_res_per_row #print "Bubble/Dup/B+D/B+D+P: ", (zero_rm, dup_rm, dup_bubble, dup_bubble_pop) global total_reduced_rows total_reduced_rows += R - zero_rows global total_rows total_rows += R # print weights.any(axis=(1,2)) # print out false if a row is all zero #wa = [weights[i,:,:].any() for i in range(weights.shape[0])] # changed for 1.6.1 compatilibility #ind = ind[wa,:,:] #weights = weights[wa,:,:] return (zero_rm, dup_rm)
def process_chunk(weights, weight_idx, lookaside, lookahead, out_limit, in_limit): chunk_n, chunk_i = weight_idx zero_rows = 0; # recalculate global index (R,Tn,Ti) = weights.shape # store the original indices of each weight in weights ind = np.indices((R,Tn,Ti)).swapaxes(0,3).swapaxes(0,2).swapaxes(0,1) # this generates a count of the duplicates for each key within the chunk dup_map = map_duplicates(weights) dup_bubble = 0 # ignore dup_bubble_pop = 0 # ignore global out_b global group_size global glob_dups # for each row # while changes # remove duplicates # fill zeros for r in range(0,R): # check for all zeros if ( is_zero( weights[r,:,:] ) ): # print r # print all lines that are all zeroes zero_rows += 1 continue # counter for the limits in_ctr = [in_limit] * Tn # input limit per filter (m), max inputs to adder tree out_ctr = [out_limit] * Ti # number of products that can be broadcast for an input i group_out_ctr = [out_b] * (Tn / group_size) # number of products that can be broadcast for an input i ictr = 0 # number of products reused octr = 0 # number of products broadcast changed = True dup_found = set() # track the duplicates found so we don't double count them # fill bubbles # how are stats maintained across iterations? # are we potentially double promoting beyond the lookahead window? while changed: changed = False # look for buffered duplicates broadcasting to this row # list of list of duplicate indicies # [[ (r,n,i) ]] dup_found_iter = look_for_live_dups(weights, ind, r, dup_map, dup_found) # add duplicate products already stored in the buffer # for testing # buffer[0][0][(map_weight(weights[0,0,0]),0)] = [0] for dup_set in dup_found_iter[:]: (cr,cn,ci) = dup_set[0] w = weights[cr,cn,ci] (orig_r, orig_n, orig_i) = ind[cr,cn,ci] (gn,gi) = get_global_weight_idx(chunk_n, chunk_i, orig_r, orig_n, orig_i) way = buffer_check(w,gi) # add buffered duplicates to list as (-1,way,set) if ( way >= 0 ): s = gi % n_sets dup_set.insert( 0, (-1,cn,ci) ) #FIXME: cn,ci are placeholders, we need to do something different for the buffer config # remove singletons if (len(dup_set) == 1): dup_found_iter.remove(dup_set) # now we have a list of list of duplicates in the current row and buffer # if a duplicate is in the buffer then it is stored as (-1,n,i) # choose a producer for each set of duplicates and put it at the front of the set # simple heuristic to choose producer # 1. choose buffered product # 2. choose the first live dup # this will happen natural since buffered products are added to the front of the list # prioritize removal here # reorder the duplicate removal order # do the ones with more duplicates first dup_found_iter.sort(key=len, reverse=True) # pick the filter with the least number of duplicates to # send first, this should reduce input dependences n_ctr = {} for dup_list in dup_found_iter: for element in dup_list: n_ctr[element[1]] = n_ctr.get(element[1],0) + 1 for tmp in dup_found_iter: tmp.sort(key=lambda fn: n_ctr.get(fn[1], Tn*Ti+1)) # remove duplicates in list order # checking for constraints remove_dups(weights, ind, r, dup_found_iter, in_ctr, out_ctr, group_out_ctr) # this may create a zero row, but we can't skip it since we've used this cycle to do all this stuff # remove all the zeros in the row for n in range(0,Tn): for i in range(0,Ti): # fill in the bubble if ( is_zero( weights[r,n,i] )): orig_zero = ( (r,n,i) == ind[r,n,i] ).all() # found a zero to fill, look for replacement zero_removed = re.look_for_replacement( r, n, i, weights, ind, lookaside, lookahead) global zero_rm if orig_zero: zero_rm += zero_removed changed = changed or zero_removed # end of change loop # now we know which products will be calculated this cycle # for all buffered dups that were not forwarded, remove them from the buffer list # update the buffer with new products produced in this cycle (row) buffer_update_for_row(weights, weight_idx, r) # end of row loop global total_reduced_rows total_reduced_rows += R - zero_rows global total_rows total_rows += R
def process_weights(weights, lookaside, lookahead): # gather stats about data # ones = np.count_nonzero(weights.count('1')) # print "ones = ", ones # zeros = np.count_nonzero(weights.count('0')) # print "zeros = ", zeros # percent = (ones + 0.0)/(ones+zeros) # print "percent ones = ", percent # rows = ( (ones + 0.0)/(ones+zeros) * 64 ) # print "rows of ones = ", rows # for n in range(0,Tn): # col = weights[:,n,:] # ones = np.count_nonzero(col.count('1')) # zeros = np.count_nonzero(col.count('0')) # rows = ( (ones + 0.0)/(ones+zeros) * 64 ) # print n, "rows of ones = ", rows # print_weights(weights) # print_filter(weights,n) zero_rows = 0 (R, Tn, Ti) = weights.shape ind = np.indices((R, Tn, Ti)).swapaxes(0, 3).swapaxes(0, 2).swapaxes(0, 1) # iterate to the end to detect zero row for r in range(0, R): # print "C:", weights[r,n,:] # print "N:", weights[r+1,n,:] rmax = min(r + lookahead, R - 1) # print r, "##############################" # for tr in range(r, rmax + 1): # print_row(weights,tr) # check for all zeros if (is_zero(weights[r, :, :])): # print r # print all lines that are all zeroes zero_rows += 1 continue # fill bubbles for n in range(0, Tn): for i in range(0, Ti): if (is_zero(weights[r, n, i])): # found a zero to fill, look for replacement weights, ind, _ = re.look_for_replacement( r, n, i, weights, ind, lookaside, lookahead) # print "--------------------------------" # for tr in range(r, rmax + 1): # print_row(weights,tr) # print_filter(weights,n) # print_weights(weights) # print "row reduction = ", R-zero_rows , "/", R global total_reduced_rows total_reduced_rows += R - zero_rows global total_rows total_rows += R # wa = weights.any(axis=(1,2)) # print out false if a row is all zero wa = [weights[i, :, :].any() for i in range(weights.shape[0]) ] # changed for 1.6.1 compatilibility ind = ind[wa, :, :] weights = weights[wa, :, :] return (R - zero_rows, ind, weights)
def process_chunk(weights, weight_idx, lookaside, lookahead, out_limit, in_limit): chunk_n, chunk_i = weight_idx #print "chunk:", chunk_n, chunk_i zero_rows = 0 # recalculate global index (R, Tn, Ti) = weights.shape # store the original indices of each weight in weights ind = np.indices((R, Tn, Ti)).swapaxes(0, 3).swapaxes(0, 2).swapaxes(0, 1) # this generates a count of the duplicates for each key within the chunk dup_map = map_duplicates(weights) out_per_row = [0] * (Ti + 1) in_per_row = [0] * (Tn + 1) out_res_per_row = [0] * (Ti + 1) in_res_per_row = [0] * (Tn + 1) zero_rm = 0 # number of zeros removed dup_rm = 0 # number of dups removed dup_bubble = 0 # ignore dup_bubble_pop = 0 # ignore global out_b global group_size # for each row # while changes # remove duplicates # fill zeros for r in range(0, R - 1): rmax = min(r + lookahead, R - 1) # check for all zeros if (is_zero(weights[r, :, :])): # print r # print all lines that are all zeroes zero_rows += 1 continue # counter for the limits in_ctr = [ in_limit ] * Tn # input limit per filter (m), max inputs to adder tree out_ctr = [ out_limit ] * Ti # number of products that can be broadcast for an input i group_out_ctr = [out_b] * ( Tn / group_size ) # number of products that can be broadcast for an input i ictr = 0 # number of products reused octr = 0 # number of products broadcast ires = 0 # ignore ores = 0 # ignore changed = True dup_found = set() # fill bubbles # how are stats maintained across iterations? # are we potentially double promoting beyond the lookahead window? while changed: changed = False # list of list of duplicate indicies # [[ (r,n,i) ]] dup_found_iter = [] # look for duplicates for n in range(0, Tn): for i in range(0, Ti): # look for duplicates only if we haven't looked at it before w = map_weight(weights[r, n, i]) key = (ind[r, n, i][0], ind[r, n, i][2], w) if (key not in dup_found and not is_zero(weights[r, n, i])): # dup_index is list of duplicates for (r,n,i) (not including producer) dup_index = look_for_duplicates( r, n, i, weights, ind, dup_map, lookahead) dup_found.add(key) if (len(dup_index) > 0): dup_index.append((r, n, i)) dup_found_iter.append(dup_index) #print "A ", dup_index, "W ", weights[r,n,i] # prioritize removal here # reorder the duplicate removal order # do the ones with more duplicates first dup_found_iter.sort(key=len, reverse=True) # pick the filter with the least number of duplicates to # send first, this should reduce input dependences n_ctr = {} for dup_list in dup_found_iter: for element in dup_list: n_ctr[element[1]] = n_ctr.get(element[1], 0) + 1 for tmp in dup_found_iter: tmp.sort(key=lambda fn: n_ctr.get(fn[1], Tn * Ti + 1)) # remove duplicates in list order # checking for constraints for dup_list in dup_found_iter: # for each set of duplicates # first dup that can be issued (in the current row) will be issued, rest will be removed for index in dup_list: # this is the producer (rr, nn, ii) = index # only output if the index is on the current row if (r != rr): continue # make sure it has not exceeded group's output limit if (group_out_ctr[nn / group_size] == 0): continue # remove all other duplicates if possible stats = remove_duplicates(rr, nn, ii, weights, ind, dup_list, out_ctr, in_ctr) [dup_rm_i, dup_bubble_i, dup_bubble_pop_i, ictr_i, octr_i] = stats dup_rm += dup_rm_i dup_bubble += dup_bubble_i dup_bubble_pop += dup_bubble_pop_i ictr += ictr_i octr += octr_i # exit when a remove succeeded if (dup_rm_i): group_out_ctr[nn / group_size] -= 1 break # remove duplicates from buffer here # when we forward a buffered duplicate, remove the corresponding weight # this gets lower priority since we can do it any time within the chunk # this may create a zero row, but we can't skip it since we've used this cycle to do all this stuff # NOTE: we don't have to check for a row of zeros again, since at least have producers in this row # remove all the bubbles in the row for n in range(0, Tn): for i in range(0, Ti): # fill in the bubble if (is_zero(weights[r, n, i])): # found a zero to fill, look for replacement (weights, ind, tmp) = re.look_for_replacement( r, n, i, weights, ind, lookaside, lookahead) zero_rm += tmp changed = changed or tmp # add producers to buffer here # end of change loop #out_per_row[octr/max(1,out_limit)] += 1 #in_per_row[ictr/max(1,in_limit)] += 1 #out_res_per_row[ores/max(1,out_limit)] += 1 #in_res_per_row[ires/max(1,in_limit)/Tn] += 1 # print "--------------------------------" # for tr in range(r, rmax + 1): # print_row(weights,tr) # print_filter(weights,n) #print_weights(weights) # check if the last row is zero if (is_zero(weights[R - 1, :, :])): zero_rows += 1 #print "row reduction = ", R-zero_rows , "/", R #print "Output Counter: ", out_per_row #print "Input Counter: ", in_per_row #print "Output Res: ", out_res_per_row #print "Input Res: ", in_res_per_row #print "Bubble/Dup/B+D/B+D+P: ", (zero_rm, dup_rm, dup_bubble, dup_bubble_pop) global total_reduced_rows total_reduced_rows += R - zero_rows global total_rows total_rows += R # print weights.any(axis=(1,2)) # print out false if a row is all zero #wa = [weights[i,:,:].any() for i in range(weights.shape[0])] # changed for 1.6.1 compatilibility #ind = ind[wa,:,:] #weights = weights[wa,:,:] return (zero_rm, dup_rm)
def process_weights(weights, lookaside, lookahead): # gather stats about data # ones = np.count_nonzero(weights.count('1')) # print "ones = ", ones # zeros = np.count_nonzero(weights.count('0')) # print "zeros = ", zeros # percent = (ones + 0.0)/(ones+zeros) # print "percent ones = ", percent # rows = ( (ones + 0.0)/(ones+zeros) * 64 ) # print "rows of ones = ", rows # for n in range(0,Tn): # col = weights[:,n,:] # ones = np.count_nonzero(col.count('1')) # zeros = np.count_nonzero(col.count('0')) # rows = ( (ones + 0.0)/(ones+zeros) * 64 ) # print n, "rows of ones = ", rows # print_weights(weights) # print_filter(weights,n) zero_rows = 0; (R,Tn,Ti) = weights.shape ind = np.indices((R,Tn,Ti)).swapaxes(0,3).swapaxes(0,2).swapaxes(0,1) # iterate to the end to detect zero row for r in range(0,R): # print "C:", weights[r,n,:] # print "N:", weights[r+1,n,:] rmax = min(r + lookahead , R-1 ) # print r, "##############################" # for tr in range(r, rmax + 1): # print_row(weights,tr) # check for all zeros if (is_zero( weights[r,:,:] ) ): # print r # print all lines that are all zeroes zero_rows += 1 continue # fill bubbles for n in range(0,Tn): for i in range(0,Ti): if (is_zero( weights[r,n,i] )): # found a zero to fill, look for replacement weights, ind, _ = re.look_for_replacement(r,n,i,weights,ind, lookaside,lookahead) # print "--------------------------------" # for tr in range(r, rmax + 1): # print_row(weights,tr) # print_filter(weights,n) # print_weights(weights) # print "row reduction = ", R-zero_rows , "/", R global total_reduced_rows total_reduced_rows += R - zero_rows global total_rows total_rows += R # wa = weights.any(axis=(1,2)) # print out false if a row is all zero wa = [weights[i,:,:].any() for i in range(weights.shape[0])] # changed for 1.6.1 compatilibility ind = ind[wa,:,:] weights = weights[wa,:,:] return (R-zero_rows,ind,weights)