def conv2d(input, filter, strides, bias=None, scale=None, rshift_mul=None, rshift_sum=None, rshift_out=None, act_func=None, padding='SAME', dtype=None, mul_dtype=None, sum_dtype=None, name=None, par_ich=1, par_och=1, par_col=1, par_row=1, concur_och=None, stationary='filter', input_ram_size=None, filter_ram_size=None, bias_ram_size=None, scale_ram_size=None, vshamt_mul_ram_size=None, vshamt_sum_ram_size=None, vshamt_out_ram_size=None, out_ram_size=None, disable_keep_input=False, input_shape=None, filter_shape=None, out_shape=None, input_dtype=None, filter_dtype=None, bias_dtype=None, scale_dtype=None, vshamt_mul_dtype=None, vshamt_sum_dtype=None, vshamt_out_dtype=None): # opposite order to pool if isinstance(padding, str) and padding == 'SAME': pad_col, pad_col_right, pad_col_left = util.pad_size_split( input.shape[2], filter.shape[2], strides[2]) pad_row, pad_row_bottom, pad_row_top = util.pad_size_split( input.shape[1], filter.shape[1], strides[1]) shape = (int(math.ceil(input.shape[0] / strides[0])), util.pix_size(input.shape[1], filter.shape[1], strides[1], 'SAME'), util.pix_size(input.shape[2], filter.shape[2], strides[2], 'SAME'), int(math.ceil(filter.shape[0] / strides[3]))) elif isinstance(padding, str) and padding == 'VALID': pad_col, pad_col_right, pad_col_left = 0, 0, 0 pad_row, pad_row_bottom, pad_row_top = 0, 0, 0 shape = (int(math.ceil(input.shape[0] / strides[0])), util.pix_size(input.shape[1], filter.shape[1], strides[1], 'VALID'), util.pix_size(input.shape[2], filter.shape[2], strides[2], 'VALID'), int(math.ceil(filter.shape[0] / strides[3]))) elif isinstance(padding, int): pad_col, pad_col_right, pad_col_left = padding * 2, padding, padding pad_row, pad_row_bottom, pad_row_top = padding * 2, padding, padding shape = (int(math.ceil(input.shape[0] / strides[0])), util.pix_size(input.shape[1] + padding * 2, filter.shape[1], strides[1], 'VALID'), util.pix_size(input.shape[2] + padding * 2, filter.shape[2], strides[2], 'VALID'), int(math.ceil(filter.shape[0] / strides[3]))) elif isinstance(padding, (tuple, list)): pad_col, pad_col_right, pad_col_left = padding[2] + padding[ 3], padding[3], padding[2] pad_row, pad_row_bottom, pad_row_top = padding[0] + padding[ 1], padding[1], padding[0] shape = (int(math.ceil(input.shape[0] / strides[0])), util.pix_size(input.shape[1] + padding[0] + padding[1], filter.shape[1], strides[1], 'VALID'), util.pix_size(input.shape[2] + padding[2] + padding[3], filter.shape[2], strides[2], 'VALID'), int(math.ceil(filter.shape[0] / strides[3]))) else: raise ValueError( "padding options must be 'SAME', 'VALID', int, tuple, or list.") out = np.zeros(shape, dtype=np.int64) input = np.pad(input, [(0, 0), (pad_row_top, pad_row_bottom), (pad_col_left, pad_col_right), (0, 0)], 'constant') if bias is None: bias = np.zeros([shape[-1]], dtype=np.int64) elif not isinstance(bias, np.ndarray): new_bias = np.zeros([shape[-1]], dtype=np.int64) for i in range(new_bias.shape[-1]): new_bias[i] = bias bias = new_bias elif len(bias.shape) == 1 and bias.shape[0] == 1: new_bias = np.zeros([shape[-1]], dtype=np.int64) for i in range(new_bias.shape[-1]): new_bias[i] = bias[0] bias = new_bias if scale is None: scale = np.ones([shape[-1]], dtype=np.int64) elif not isinstance(scale, np.ndarray): new_scale = np.zeros([shape[-1]], dtype=np.int64) for i in range(new_scale.shape[-1]): new_scale[i] = scale scale = new_scale elif len(scale.shape) == 1 and scale.shape[0] == 1: new_scale = np.zeros([shape[-1]], dtype=np.int64) for i in range(new_scale.shape[-1]): new_scale[i] = scale[0] scale = new_scale if rshift_mul is None: rshift_mul = np.zeros([shape[-1]], dtype=np.int64) elif not isinstance(rshift_mul, np.ndarray): new_rshift_mul = np.zeros([shape[-1]], dtype=np.int64) for i in range(new_rshift_mul.shape[-1]): new_rshift_mul[i] = rshift_mul rshift_mul = new_rshift_mul elif len(rshift_mul.shape) == 1 and rshift_mul.shape[0] == 1: new_rshift_mul = np.zeros([shape[-1]], dtype=np.int64) for i in range(new_rshift_mul.shape[-1]): new_rshift_mul[i] = rshift_mul[0] rshift_mul = new_rshift_mul if rshift_sum is None: rshift_sum = np.zeros([shape[-1]], dtype=np.int64) elif not isinstance(rshift_sum, np.ndarray): new_rshift_sum = np.zeros([shape[-1]], dtype=np.int64) for i in range(new_rshift_sum.shape[-1]): new_rshift_sum[i] = rshift_sum rshift_sum = new_rshift_sum elif len(rshift_sum.shape) == 1 and rshift_sum.shape[0] == 1: new_rshift_sum = np.zeros([shape[-1]], dtype=np.int64) for i in range(new_rshift_sum.shape[-1]): new_rshift_sum[i] = rshift_sum[0] rshift_sum = new_rshift_sum if rshift_out is None: rshift_out = np.zeros([shape[-1]], dtype=np.int64) elif not isinstance(rshift_out, np.ndarray): new_rshift_out = np.zeros([shape[-1]], dtype=np.int64) for i in range(new_rshift_out.shape[-1]): new_rshift_out[i] = rshift_out rshift_out = new_rshift_out elif len(rshift_out.shape) == 1 and rshift_out.shape[0] == 1: new_rshift_out = np.zeros([shape[-1]], dtype=np.int64) for i in range(new_rshift_out.shape[-1]): new_rshift_out[i] = rshift_out[0] rshift_out = new_rshift_out rshift_mul_pow = np.where( rshift_mul > np.zeros_like(rshift_mul, dtype=np.int64), rshift_mul - 1, np.zeros_like(rshift_mul)) rshift_mul_round = np.where( rshift_mul > np.zeros_like(rshift_mul, dtype=np.int64), np.power(np.ones_like(rshift_mul, dtype=np.int64) * 2, rshift_mul_pow), np.zeros_like(rshift_mul, dtype=np.int64)) rshift_sum_pow = np.where( rshift_sum > np.zeros_like(rshift_sum, dtype=np.int64), rshift_sum - 1, np.zeros_like(rshift_sum)) rshift_sum_round = np.where( rshift_sum > np.zeros_like(rshift_sum, dtype=np.int64), np.power(np.ones_like(rshift_sum, dtype=np.int64) * 2, rshift_sum_pow), np.zeros_like(rshift_sum, dtype=np.int64)) input_point = 0 if input_dtype is None else input_dtype.point filter_point = 0 if filter_dtype is None else filter_dtype.point bias_point = 0 if bias_dtype is None else bias_dtype.point scale_point = 0 if scale_dtype is None else scale_dtype.point out_point = (max(input_point, filter_point) if dtype is None else dtype.point) out_width = 32 if dtype is None else dtype.width mul_point = max(input_point, filter_point) mul_shift = min(input_point, filter_point) sum_point = mul_point add_point = max(sum_point, bias_point) sum_shift = add_point - sum_point bias_shift = add_point - bias_point shifted_bias = np.left_shift(bias, bias_shift) scl_point = max(sum_point, scale_point) scl_shift = min(sum_point, scl_point) shifted_scale = np.right_shift(scale, scl_shift) p_th = (1 << (out_width - 1)) - 1 n_th = -1 * p_th p_th = p_th >> out_point n_th = n_th >> out_point def my_matmul_by_matmul(a, w): return np.matmul(a, w.T) def my_matmul_by_multiply(a, w): mul = np.multiply(a, w) mul = np.right_shift(mul, mul_shift) mul = np.add(mul, rshift_mul_round.reshape([rshift_mul_round.shape[-1], 1])) mul = np.right_shift(mul, rshift_mul.reshape([rshift_mul.shape[-1], 1])) return np.add.reduce(mul, axis=1) if mul_shift == 0 and rshift_mul_round.all() == 0 and rshift_mul.all( ) == 0: my_matmul = my_matmul_by_matmul else: my_matmul = my_matmul_by_multiply if act_func is None: def act_op(x): return x elif issubclass(act_func, leaky_relu_base): act_op = get_leaky_relu_op(act_func.slope, act_func.rshift, dtype) else: import nngen.verify as verify act_op = getattr(verify, act_func.__name__) for bat in range(shape[0]): w = filter.reshape([shape[3], -1]) oy = 0 for py in range(-pad_row_top, input.shape[1] + pad_row_bottom, strides[1]): ox = 0 for px in range(-pad_col_left, input.shape[2] + pad_col_right, strides[2]): ys = py + pad_row_top ye = ys + filter.shape[1] xs = px + pad_col_left xe = xs + filter.shape[2] a = input[bat, ys:ye, xs:xe].reshape([-1]) sum = my_matmul(a, w) sum = np.left_shift(sum, sum_shift) sum = np.add(sum, rshift_sum_round) sum = np.right_shift(sum, rshift_sum) sum = np.add(sum, shifted_bias) sum = np.multiply(sum, shifted_scale) sum = np.right_shift(sum, rshift_out) sum = np.where(sum > p_th, p_th, np.where(sum < n_th, n_th, sum)) out[bat][oy][ox][:] = act_op(sum) ox += 1 if ox >= out.shape[2]: break oy += 1 if oy >= out.shape[1]: break return out
def max_pool(value, ksize, stride, padding='SAME', dtype=None, name=None, par=1, value_ram_size=None, out_ram_size=None, value_dtype=None): ksize_row = ksize[1] ksize_col = ksize[2] stride_row = stride[2] stride_col = stride[2] if isinstance(padding, str) and padding == 'SAME': pad_col, pad_col_left, pad_col_right = util.pad_size_split( value.shape[2], ksize_col, stride_col) pad_row, pad_row_top, pad_row_bottom = util.pad_size_split( value.shape[1], ksize_row, stride_row) out_shape = (value.shape[0], util.pix_size(value.shape[1], ksize_row, stride_row, padding), util.pix_size(value.shape[2], ksize_col, stride_col, padding), value.shape[3]) elif isinstance(padding, str) and padding == 'VALID': pad_col, pad_col_left, pad_col_right = 0, 0, 0 pad_row, pad_row_top, pad_row_bottom = 0, 0, 0 out_shape = (value.shape[0], util.pix_size(value.shape[1], ksize_row, stride_row, 'VALID'), util.pix_size(value.shape[2], ksize_col, stride_col, 'VALID'), value.shape[3]) elif isinstance(padding, int): pad_col, pad_col_left, pad_col_right = padding * 2, padding, padding pad_row, pad_row_top, pad_row_bottom = padding * 2, padding, padding out_shape = (value.shape[0], util.pix_size(value.shape[1] + padding * 2, ksize_row, stride_row, 'VALID'), util.pix_size(value.shape[2] + padding * 2, ksize_col, stride_col, 'VALID'), value.shape[3]) elif isinstance(padding, (tuple, list)): pad_col, pad_col_left, pad_col_right = padding[2] + padding[ 3], padding[2], padding[3] pad_row, pad_row_top, pad_row_bottom = padding[0] + padding[ 1], padding[0], padding[1] out_shape = (value.shape[0], util.pix_size(value.shape[1] + padding[0] + padding[1], ksize_row, stride_row, 'VALID'), util.pix_size(value.shape[2] + padding[2] + padding[3], ksize_col, stride_col, 'VALID'), value.shape[3]) else: raise ValueError( "padding options must be 'SAME', 'VALID', int, tuple, or list.") out = np.zeros(out_shape, dtype=np.int64) if value_dtype is not None: pad_value_shift = value_dtype.width elif dtype is not None: pad_value_shift = dtype.width else: pad_value_shift = 32 pad_value = (-1) * (1 << (pad_value_shift - 1)) value = np.pad(value, [(0, 0), (pad_row_top, pad_row_bottom), (pad_col_left, pad_col_right), (0, 0)], 'constant', constant_values=pad_value) value_point = 0 if value_dtype is None else value_dtype.point out_point = value_point if dtype is None else dtype.point max_shift = out_point - value_point max_op = ((lambda x: x << max_shift) if max_shift >= 0 else (lambda x: x >> -max_shift)) for bat in range(value.shape[0]): oy = 0 for py in range(-pad_row_top, value.shape[1] + pad_row_bottom, stride_row): ox = 0 for px in range(-pad_col_left, value.shape[2] + pad_col_right, stride_col): ys = py + pad_row_top ye = ys + ksize_row xs = px + pad_col_left xe = xs + ksize_col a = value[bat, ys:ye, xs:xe] a = np.max(a, axis=0) max_val = np.max(a, axis=0) out[bat][oy][ox][:] = max_val ox += 1 if ox >= out.shape[2]: break oy += 1 if oy >= out.shape[1]: break return out
def avg_pool(value, ksize, stride, padding='SAME', dtype=None, sum_dtype=None, name=None, par=1, force_div=False, value_ram_size=None, out_ram_size=None, value_dtype=None): ksize_row = ksize[1] ksize_col = ksize[2] stride_row = stride[2] stride_col = stride[2] if isinstance(padding, str) and padding == 'SAME': pad_col, pad_col_left, pad_col_right = util.pad_size_split( value.shape[2], ksize_col, stride_col) pad_row, pad_row_top, pad_row_bottom = util.pad_size_split( value.shape[1], ksize_row, stride_row) out_shape = (value.shape[0], util.pix_size(value.shape[1], ksize_row, stride_row, padding), util.pix_size(value.shape[2], ksize_col, stride_col, padding), value.shape[3]) elif isinstance(padding, str) and padding == 'VALID': pad_col, pad_col_left, pad_col_right = 0, 0, 0 pad_row, pad_row_top, pad_row_bottom = 0, 0, 0 out_shape = (value.shape[0], util.pix_size(value.shape[1], ksize_row, stride_row, 'VALID'), util.pix_size(value.shape[2], ksize_col, stride_col, 'VALID'), value.shape[3]) elif isinstance(padding, int): pad_col, pad_col_left, pad_col_right = padding * 2, padding, padding pad_row, pad_row_top, pad_row_bottom = padding * 2, padding, padding out_shape = (value.shape[0], util.pix_size(value.shape[1] + padding * 2, ksize_row, stride_row, 'VALID'), util.pix_size(value.shape[2] + padding * 2, ksize_col, stride_col, 'VALID'), value.shape[3]) elif isinstance(padding, (tuple, list)): pad_col, pad_col_left, pad_col_right = padding[2] + padding[ 3], padding[2], padding[3] pad_row, pad_row_top, pad_row_bottom = padding[0] + padding[ 1], padding[0], padding[1] out_shape = (value.shape[0], util.pix_size(value.shape[1] + padding[0] + padding[1], ksize_row, stride_row, 'VALID'), util.pix_size(value.shape[2] + padding[2] + padding[3], ksize_col, stride_col, 'VALID'), value.shape[3]) else: raise ValueError( "padding options must be 'SAME', 'VALID', int, tuple, or list.") out = np.zeros(out_shape, dtype=np.int64) value = np.pad(value, [(0, 0), (pad_row_top, pad_row_bottom), (pad_col_left, pad_col_right), (0, 0)], 'constant') value_point = 0 if value_dtype is None else value_dtype.point out_point = value_point if dtype is None else dtype.point div_shift = out_point - value_point div_op = (lambda x: x << div_shift if div_shift >= 0 else lambda x: x >> -div_shift) num_vars = ksize_col * ksize_row if force_div or num_vars & (num_vars - 1) != 0: def divider(x): return (x / num_vars).astype(np.int64) else: def divider(x): return x // num_vars for bat in range(value.shape[0]): oy = 0 for py in range(-pad_row_top, value.shape[1] + pad_row_bottom, stride_row): ox = 0 for px in range(-pad_col_left, value.shape[2] + pad_col_right, stride_col): ys = py + pad_row_top ye = ys + ksize_row xs = px + pad_col_left xe = xs + ksize_col a = value[bat, ys:ye, xs:xe] a = np.add.reduce(a, axis=0) sum = np.add.reduce(a, axis=0) sum += (num_vars // 2) div = divider(sum) out[bat][oy][ox][:] = div_op(div) ox += 1 if ox >= out.shape[2]: break oy += 1 if oy >= out.shape[1]: break return out
def get_control_param_values(self): act = self.args[0] ksize_ch = self.ksize[-1] ksize_col = self.ksize[-2] ksize_row = self.ksize[-3] ksize_bat = self.ksize[-4] act_shape = act.get_aligned_shape() act_num_ch = act_shape[-1] act_num_col = act_shape[-2] act_num_row = act_shape[-3] act_num_bat = act_shape[-4] # stride_ch = self.strides[-1] # always 1 stride_col = self.strides[-2] # width stride_row = self.strides[-3] # height stride_bat = self.strides[-4] # always 1 out_shape = self.get_aligned_shape() out_num_ch = out_shape[-1] out_num_col = out_shape[-2] out_num_row = out_shape[-3] out_num_bat = out_shape[-4] if isinstance(self.padding, str) and self.padding == 'SAME': pad_col, pad_col_left, pad_col_right = util.pad_size_split( act_num_col, ksize_col, stride_col) pad_row, pad_row_top, pad_row_bottom = util.pad_size_split( act_num_row, ksize_row, stride_row) elif isinstance(self.padding, int): pad_col = self.padding * 2 pad_col_left = self.padding pad_col_right = self.padding pad_row = self.padding * 2 pad_row_top = self.padding pad_row_bottom = self.padding elif isinstance(self.padding, (tuple, list)): pad_col = self.padding[2] + self.padding[3] pad_col_left = self.padding[2] pad_col_right = self.padding[3] pad_row = self.padding[0] + self.padding[1] pad_row_top = self.padding[0] pad_row_bottom = self.padding[1] else: pad_col = 0 pad_col_left = 0 pad_col_right = 0 pad_row = 0 pad_row_top = 0 pad_row_bottom = 0 # for __str__ self.pad_col_left_value = pad_col_left self.pad_col_right_value = pad_col_right self.pad_row_top_value = pad_row_top self.pad_row_bottom_value = pad_row_bottom max_col_count = act_num_col + pad_col + 1 - ksize_col - stride_col if max_col_count < 0: max_col_count = 0 max_row_count = act_num_row + pad_row + 1 - ksize_row - stride_row if max_row_count < 0: max_row_count = 0 max_bat_count = act_num_bat - stride_bat if max_bat_count < 0: max_bat_count = 0 dma_flag_conds = [] for row_select in range(ksize_row): v = False for i in range(stride_row): v = v or (row_select == (i % ksize_row)) dma_flag_conds.append(v) aligned_act_num_ch = bt.align_word(act_num_ch, act.get_word_alignment()) act_step = bt.to_byte(aligned_act_num_ch * act.get_ram_width()) act_offset_values = [] for y in range(ksize_row): v = act_num_col * (y - pad_row_top) * act_step act_offset_values.append(v) act_row_step = act_step * act_num_col * stride_row act_bat_step = act_step * act_num_col * act_num_row act_read_size = (int(math.ceil(aligned_act_num_ch / self.par)) * act_num_col) act_read_block = int(math.ceil(aligned_act_num_ch / self.par)) out_step = bt.to_byte( bt.align_word(out_num_ch, self.get_word_alignment()) * self.get_ram_width()) out_row_step = out_step * out_num_col out_bat_step = out_step * out_num_col * out_num_row out_write_size = (int(math.ceil(out_num_ch / self.par)) * out_num_col) stream_size = int(math.ceil(act_num_ch / self.par)) if pad_col_left == 0: col_select_initval = 0 else: col_select_initval = (ksize_col - pad_col_left) % ksize_col stride_col_mod_ksize = stride_col % ksize_col ksize_col_minus_stride_col_mod = ksize_col - stride_col_mod_ksize inc_act_laddr_conds = [] for y in range(ksize_row): for x in range(ksize_col): for col_select in range(ksize_col): v = False for i in range(stride_col_mod_ksize): v = v or (col_select == ((x + ksize_col - i) % ksize_col)) inc_act_laddr_conds.append(v) inc_act_laddr_small = (int(math.floor(stride_col / ksize_col)) * act_read_block) inc_act_laddr_large = (int(math.ceil(stride_col / ksize_col)) * act_read_block) inc_out_laddr = int(math.ceil(out_num_ch / self.par)) stream_act_local_small_offset = ( -1 * int(math.floor(pad_col_left / ksize_col)) * act_read_block) stream_act_local_large_offset = ( -1 * int(math.ceil(pad_col_left / ksize_col)) * act_read_block) stream_act_local_small_flags = [] stream_act_local_large_flags = [] for x in range(ksize_col): s = (ksize_col - x) <= pad_col_left l = (ksize_col - x) <= (pad_col_left % ksize_col) stream_act_local_small_flags.append(s) stream_act_local_large_flags.append(s and l) return OrderedDict([ ('act_num_col', act_num_col), ('act_num_row', act_num_row), ('stride_col', stride_col), ('stride_row', stride_row), ('out_num_col', out_num_col), ('out_num_row', out_num_row), ('pad_col_left', pad_col_left), ('pad_row_top', pad_row_top), ('max_col_count', max_col_count), ('max_row_count', max_row_count), ('max_bat_count', max_bat_count), ('dma_flag_conds', dma_flag_conds), ('act_offset_values', act_offset_values), ('act_row_step', act_row_step), ('act_bat_step', act_bat_step), ('act_read_size', act_read_size), ('act_read_block', act_read_block), ('out_row_step', out_row_step), ('out_bat_step', out_bat_step), ('out_write_size', out_write_size), ('stream_size', stream_size), ('col_select_initval', col_select_initval), ('stride_col_mod_ksize', stride_col_mod_ksize), ('ksize_col_minus_stride_col_mod', ksize_col_minus_stride_col_mod), ('inc_act_laddr_conds', inc_act_laddr_conds), ('inc_act_laddr_small', inc_act_laddr_small), ('inc_act_laddr_large', inc_act_laddr_large), ('inc_out_laddr', inc_out_laddr), ('stream_act_local_small_offset', stream_act_local_small_offset), ('stream_act_local_large_offset', stream_act_local_large_offset), ('stream_act_local_small_flags', stream_act_local_small_flags), ('stream_act_local_large_flags', stream_act_local_large_flags) ])