def affine_backward_variables(x_shape, w_shape): """backward에서 업데이트를 위한 변수들의 미분값 테스트입니다.""" x = tensor.create_gauss(x_shape) w = tensor.create_gauss(w_shape) b = tensor.create_gauss([w_shape[-1]]) x_t = tensor.create_transpose(x) forward_out = tensor.create_matrix_product(x, w) forward_out = tensor.create_gauss(forward_out.shape) standard_dw = tensor.create_zeros(w.shape, float) new_dw = tensor.create_zeros(w.shape, float) compare_dw = tensor.create_zeros(w.shape, bool) standard_db = tensor.create_zeros(b.shape, float) new_db = tensor.create_zeros(b.shape, float) compare_db = tensor.create_zeros(b.shape, float) #잘 알려진 방법 tensor.transpose(x, x_t) tensor.matmul(x_t, forward_out, standard_dw) tensor.sum_axis(forward_out, 0, standard_db) #새로운 방법 affine.backward_variables(x.array, forward_out.array, new_dw.array, new_db.array) tensor.function_element_wise(standard_dw, new_dw, isSame, compare_dw) tensor.function_element_wise(standard_db, new_db, isSame, compare_db) print(compare_dw) print(compare_db)
def backward(self, dout): tensor.transpose(self.W, self.W_t) tensor.matmul(dout, self.W_t, self.dout) #기존 forward의 out이 손실 됨. tensor.transpose(self.x, self.x_t) tensor.matmul(self.x_t, dout, self.dW) tensor.sum_axis(dout, 0, self.db) return self.dout
def test_axis(shape, axis, error=0.001): t1 = tensor.create_randomly(shape) n1 = parseNumpy(t1) t = tensor.sum_axis(t1, axis, tensor.create_sum(t1, axis)) n = np.sum(n1, axis) print(t1) print(t) print(n) return compare(t, n)
def accuracy(self, table): """forward를 반드시 해야 하고, backward 이전에 사용해야 합니다.""" out = self.layers[-1].out out_argmax = tensor.argmax(out, -1, tensor.create_sum(out, -1)) table_argmax = tensor.argmax(table, -1, tensor.create_sum(table, -1)) eq = tensor.function_elment_wise( out_argmax, table_argmax, Layers._equal, tensor.create_element_wise_product(out_argmax, table_argmax, int)) reduce_sum = tensor.sum_axis(eq, 0, tensor.Tensor([1], [1])) return reduce_sum.array[0] / len(out_argmax.array)
def backward(self, dout): #반복문 최소화하기 위한 함수 def comput_new_dxc(dxc, xc_dvar): return dxc + (2.0 / self.batch_size) * xc_dvar def comput_dx(dxc, dmu): return dxc - dmu / self.batch_size tensor.mul(self.gamma, dout, self.out) # out를 dxn으로 사용(기존 forward out 손실) tensor.mul(self.out, self.xc, self.tmp_out_shape) #dstd를 구하기 전단계 #반복문 최소화 기능. tensor.function_elment_wise( self.tmp_out_shape, self.std, BatchNormalization.comput_dstd, self.tmp_out_shape) #tmp_out_shape은 dstd(np.sum하기 전)로 사용 tensor.function_elment_wise( self.tmp_out_shape, self.std, BatchNormalization.comput_dvar, self.tmp_out_shape) #tmp_out_shape는 dvar(np.sum하기 전)로 사용 tensor.sum_axis(self.tmp_out_shape, 0, self.dbeta) #self.dbeta는 dvar로 사용(기존 dbeta값 손실) tensor.mul(self.xc, self.dbeta, self.xc) #xc를 xc와 dvar의 곱으로 사용(기존 xc값 손실) (dvar의 역할 끝) tensor.div(self.out, self.std, self.out) # out을 dxc로 사용 (dxn값 손실) (dxn 역할 끝) tensor.function_elment_wise(self.out, self.xc, comput_new_dxc, self.out) tensor.sum_axis(self.out, 0, self.dbeta) #dmu를 dbeta로 사용(기존 dvar값 손실) tensor.function_elment_wise(self.out, self.dbeta, comput_dx, self.out) #최종 backward값(dxn값 손실) tensor.sum_axis(dout, 0, self.dbeta) tensor.mul( self.xn, dout, self.tmp_out_shape ) #tmp_out_shape는 dgamma를 구하기 위한 임시 객체로 재활용 (기존 dvar(np.sum하기 전)값 손실) tensor.sum_axis(self.tmp_out_shape, 0, self.dgamma) return self.out
def test_norm_backward(x_shape, h=0.001): def process_no_zero(x): return x + 10e-7 x = tensor.create_randomly(x_shape, -3, 4) #x = tensor.Tensor([1.,2.,3.,5.],[1,4]) mean = tensor.create_sum(x, 0) d_mean = mean.copy() #d_mean2 = mean.copy() deviation = tensor.create_element_wise_product(x, mean) jegop = tensor.create_element_wise_product(deviation, deviation) print(jegop.shape) dispersion = tensor.create_sum(jegop, 0) print(dispersion.shape) dispersion2 = dispersion.copy() d_dispersion = dispersion.copy() d_deviation = deviation.copy() batch_size = tensor.Tensor([x_shape[0]], [1]) tmp_x = x.copy() forward_out = x.copy() forward_new_out = x.copy() backward_out = x.copy() backward_new_out = x.copy() compare_out = x.copy() #dx = tensor.create_ones(x.shape) dx = tensor.create_randomly(x.shape) print(x) #잘 알려진 방법 #forward tensor.mean_axis(x, 0, mean) print(mean) tensor.sub(x, mean, deviation) tensor.mul(deviation, deviation, jegop) tensor.mean_axis(jegop, 0, dispersion) tensor.function(dispersion, process_no_zero, dispersion) tensor.function(dispersion, math.sqrt, dispersion) tensor.div(deviation, dispersion, forward_out) #backward tensor.div(dx, dispersion, d_deviation) tensor.mul(dx, deviation, tmp_x) tensor.div(tmp_x, dispersion, tmp_x) tensor.sum_axis(tmp_x, 0, d_dispersion) tensor.div(tmp_x, dispersion, tmp_x) tensor.sum_axis(tmp_x, 0, d_dispersion) # def mul_minus(x): return -x tensor.function(d_dispersion, mul_minus, d_dispersion) tensor.div(deviation, dispersion, tmp_x) tensor.mul(tmp_x, d_dispersion, tmp_x) tensor.div(tmp_x, batch_size, tmp_x) tensor.add(tmp_x, d_deviation, d_deviation) tensor.sum_axis(d_deviation, 0, d_mean) tensor.div(d_mean, batch_size, d_mean) tensor.sub(d_deviation, d_mean, backward_out) #새로운 방법 norm.forward(x.array, x.shape, dispersion2.array, forward_new_out.array) backward_new_out = forward_new_out.copy() norm.backward(dx.array, dispersion2.array, backward_new_out.array) tensor.function_element_wise(backward_out, backward_new_out, isSame, compare_out) print('back = ') print(compare_out) tensor.function_element_wise(forward_out, forward_new_out, isSame, compare_out) print('forward = ') print(compare_out) dispersion_compare = dispersion.copy() tensor.function_element_wise(dispersion, dispersion2, isSame, dispersion_compare) print('dispersion = ') print(dispersion_compare)