def backward_impl(self, inputs, outputs, prop_down, accum): # inputs: [inputs_fwd_graph] + [inputs_bwd_graph] or # [inputs_fwd_graph] + [outputs_fwd_graph] + [inputs_bwd_graph] # Args epsilon = self.forward_func.info.args["epsilon"] # Inputs x0 = inputs[0].data x1 = inputs[1].data dy = inputs[2].data # Outputs dx0 = outputs[0].data dx1 = outputs[1].data # Grads of inputs g_x0 = inputs[0].grad g_x1 = inputs[1].grad g_dy = inputs[2].grad # Grads of outputs g_dx0 = outputs[0].grad g_dx1 = outputs[1].grad # Computation if prop_down[2]: # Simply using " / dy" causes the numerical instability diff = x0 - x1 mask = F.greater_scalar(F.abs(diff), epsilon) maskp = F.greater_scalar(diff, 0.0) maskn = 1.0 - maskp g_dy_ = (g_dx0 - g_dx1) * (maskp - maskn) * mask if accum[2]: g_dy += g_dy_ else: g_dy.copy_from(g_dy_)
def prelu_backward(inputs, base_axis=1): """ Args: inputs (list of nn.Variable): Incomming grads/inputs to/of the forward function. kwargs (dict of arguments): Dictionary of the corresponding function arguments. Return: list of Variable: Return the gradients wrt inputs of the corresponding function. """ dy = inputs[0] x0 = inputs[1] w0 = inputs[2] base_axis += x0.ndim * (base_axis < 0) m0 = F.greater_scalar(x0, 0) m1 = 1 - m0 m0 = no_grad(m0) m1 = no_grad(m1) if w0.shape == (): # shared reshape = [1 for i in range(len(x0.shape))] w0 = F.reshape(w0, reshape, inplace=False) dw0 = F.sum(dy * x0 * m1) else: reshape = [ w0.shape[0] if i == base_axis else 1 for i in range(len(x0.shape)) ] w0 = F.reshape(w0, reshape, inplace=False) raxes = [i for i in range(len(x0.shape)) if i != base_axis] dw0 = F.sum(dy * x0 * m1, raxes, keepdims=False) dx0 = dy * (m0 + w0 * m1) return dx0, dw0
def celu_backward(inputs, alpha=1.0, axis=1): """ Args: inputs (list of nn.Variable): Incomming grads/inputs to/of the forward function. kwargs (dict of arguments): Dictionary of the corresponding function arguments. Return: list of Variable: Return the gradients wrt inputs of the corresponding function. """ dy = inputs[0] x0 = inputs[1] fstart, fstop, fstep = create_slice(dy.shape, axis, True) bstart, bstop, bstep = create_slice(dy.shape, axis, False) dy0 = F.slice(dy, fstart, fstop, fstep) dy1 = F.slice(dy, bstart, bstop, bstep) aep = alpha * F.exp(x0) aen = alpha * F.exp(-x0) m0 = F.greater_scalar(x0, 0) m1 = 1 - m0 m0 = no_grad(m0) m1 = no_grad(m1) dx00 = dy0 * (m0 + aep * m1) dx01 = dy1 * (m1 + aen * m0) dx = dx00 - dx01 return dx
def epsilon_insensitive_loss_backward(inputs, epsilon): """ Args: inputs (list of nn.Variable): Incomming grads/inputs to/of the forward function. kwargs (dict of arguments): Dictionary of the corresponding function arguments. Return: list of Variable: Return the gradients wrt inputs of the corresponding function. """ dy = inputs[0] x0 = inputs[1] x1 = inputs[2] d = x0 - x1 m0 = F.greater_scalar(F.abs(d), epsilon) m1 = 1 - m0 mg = F.greater(x0, x1) ml = 1 - mg m0 = no_grad(m0) mg = no_grad(mg) ml = no_grad(ml) t0 = m0 * mg t1 = -m0 * ml dx0 = dy * (t0 + t1) dx1 = -dx0 return dx0, dx1
def sigmas_coef(ctx, log_var0, log_var1): v0 = F.exp(log_var0) v1 = F.exp(log_var1) v0_g = F.greater_scalar(v0, 1.) v0_l = F.logical_not(v0_g) v1_g = F.greater_scalar(v1, 1.) v1_l = F.logical_not(v1_g) v0_g_and_v1_g = F.logical_and(v0_g, v1_g) v0_g_and_v1_l = F.logical_and(v0_g, v1_l) v0_l_and_v1_g = F.logical_and(v0_l, v1_g) v0_l_and_v1_l = F.logical_and(v0_l, v1_l) c = v0_g_and_v1_g \ + v0_g_and_v1_l * v1 \ + v0_l_and_v1_g / v0 \ + v0_l_and_v1_l * v1 / v0 return c
def ray_march(self, camloc, raydir, t0, t1, N, n_chunks, t_argmin=False): # Points computation BR, _ = t0.shape t0 = F.reshape(t0, (BR, 1, 1)) t1 = F.reshape(t1, (BR, 1, 1)) camloc = F.reshape(camloc, (BR, 1, 3)) raydir = F.reshape(raydir, (BR, 1, 3)) step = (t1 - t0) / (N - 1) intervals = F.reshape(F.arange(0, N), (1, N, 1)) ts = t0 + step * intervals points = camloc + ts * raydir points = F.reshape(points, (BR * N, 3)) # SDF computation sdf_points = [] batch = (BR * N) // n_chunks for r in range(0, BR * N, batch): sdf_points.append(self.sdf(points[r:r + batch, :])) sdf_points = F.reshape(F.concatenate(*sdf_points, axis=0), (BR, N, 1)) if n_chunks != 1 else \ F.reshape(sdf_points[0], (BR, N, 1)) # t_argmin computation if t_argmin: idx_min = F.min(sdf_points, axis=1, keepdims=True, only_index=True) t_argmin = F.reshape(F.gather(ts, idx_min, axis=1, batch_dims=1), (BR, 1)) return t_argmin # Intersection check points = F.reshape(points, (BR, N, 3)) sdf_pos = F.greater_equal_scalar(sdf_points[:, :-1, :], 0) sdf_neg = F.less_equal_scalar(sdf_points[:, 1:, :], 0) mask_hit = sdf_pos * sdf_neg decreasing_consts = F.reshape(F.arange(N, 1, -1), (1, N - 1, 1)) vals = mask_hit * decreasing_consts idx_max = F.max(vals, axis=1, only_index=True) points = points[:, :-1, :] x_hit = F.gather(points, idx_max, axis=1, batch_dims=1) x_hit = F.reshape(x_hit, (BR, 3)) mask_hit = F.greater_scalar(F.sum(mask_hit, axis=1), 0) mask_hit = F.reshape(mask_hit, (BR, 1)) x_hit_rm0 = x_hit step = F.reshape(step, (BR, 1)) raydir = F.reshape(raydir, (BR, 3)) x_hit_rm1 = x_hit_rm0 + step * raydir return x_hit_rm0, x_hit_rm1, mask_hit
def maximum_scalar_backward(inputs, val=1.0): """ Args: inputs (list of nn.Variable): Incomming grads/inputs to/of the forward function. kwargs (dict of arguments): Dictionary of the corresponding function arguments. Return: list of Variable: Return the gradients wrt inputs of the corresponding function. """ dy = inputs[0] x0 = inputs[1] m0 = F.greater_scalar(x0, val) m0 = no_grad(m0) dx0 = dy * m0 return dx0
def loss_dis_real(logits, rec_imgs, part, img, lmd=1.0): # loss = 0.0 # Hinge loss (following the official implementation) loss = F.mean(F.relu(0.2*F.rand(shape=logits.shape) + 0.8 - logits)) # Reconstruction loss for rec_img_big (reconstructed from 8x8 features of the original image) # Reconstruction loss for rec_img_small (reconstructed from 8x8 features of the resized image) # Reconstruction loss for rec_img_part (reconstructed from a part of 16x16 features of the original image) if lmd > 0.0: # Ground-truth img_128 = F.interpolate(img, output_size=(128, 128)) img_256 = F.interpolate(img, output_size=(256, 256)) img_half = F.where(F.greater_scalar( part[0], 0.5), img_256[:, :, :128, :], img_256[:, :, 128:, :]) img_part = F.where(F.greater_scalar( part[1], 0.5), img_half[:, :, :, :128], img_half[:, :, :, 128:]) # Integrated perceptual loss loss = loss + lmd * \ reconstruction_loss_lpips(rec_imgs, [img_128, img_part]) return loss
def backward_impl(self, inputs, outputs, prop_down, accum): # inputs: [inputs_fwd_graph] + [inputs_bwd_graph] or # [inputs_fwd_graph] + [outputs_fwd_graph] + [inputs_bwd_graph] # Args delta = self.forward_func.info.args["delta"] # Inputs x0 = inputs[0].data x1 = inputs[1].data dy = inputs[2].data # Outputs dx0 = outputs[0].data dx1 = outputs[1].data # Grads of inputs g_x0 = inputs[0].grad g_x1 = inputs[1].grad g_dy = inputs[2].grad # Grads of outputs g_dx0 = outputs[0].grad g_dx1 = outputs[1].grad # Computation if prop_down[0] or prop_down[1] or prop_down[2]: mask = F.less_scalar(F.abs(x0 - x1), delta) if prop_down[0]: if accum[0]: g_x0 += mask * 2 * dy * (g_dx0 - g_dx1) else: g_x0.copy_from(mask * 2 * dy * (g_dx0 - g_dx1)) if prop_down[1]: if accum[1]: g_x1 += mask * 2 * dy * (g_dx1 - g_dx0) else: g_x1.copy_from(mask * 2 * dy * (g_dx1 - g_dx0)) if prop_down[2]: # Simply using " / dy" causes the numerical instability diff = x0 - x1 pmask = F.greater_scalar(diff, 0.0) nmask = (1.0 - pmask) omask = (1.0 - mask) g_dx_diff = g_dx0 - g_dx1 g_dy_ = 2.0 * g_dx_diff * \ (diff * mask + delta * omask * (pmask - nmask)) if accum[2]: g_dy += g_dy_ else: g_dy.copy_from(g_dy_)
def relu_backward(inputs, inplace=False): """ Args: inputs (list of nn.Variable): Incomming grads/inputs to/of the forward function. kwargs (dict of arguments): Dictionary of the corresponding function arguments. Return: list of Variable: Return the gradients wrt inputs of the corresponding function. """ dy = inputs[0] x0 = inputs[1] x0 = get_output(x0, "ReLU") m0 = F.greater_scalar(x0, 0) # result is same even if inplace or not m0 = no_grad(m0) dx0 = dy * m0 return dx0
def gaussian_log_likelihood(x, mean, logstd, orig_max_val=255): """ Compute the log-likelihood of a Gaussian distribution for given data `x`. Args: x (nn.Variable): Target data. It is assumed that the values are ranged [-1, 1], which are originally [0, orig_max_val]. means (nn.Variable): Gaussian mean. Must be the same shape as x. logstd (nn.Variable): Gaussian log standard deviation. Must be the same shape as x. orig_max_val (int): The maximum value that x originally has before being rescaled. Return: A log probabilies of x in nats. """ assert x.shape == mean.shape == logstd.shape centered_x = x - mean inv_std = F.exp(-logstd) half_bin = 1.0 / orig_max_val def clamp(val): # Here we don't need to clip max return F.clip_by_value(val, min=1e-12, max=1e8) # x + 0.5 (in original scale) plus_in = inv_std * (centered_x + half_bin) cdf_plus = approx_standard_normal_cdf(plus_in) log_cdf_plus = F.log(clamp(cdf_plus)) # x - 0.5 (in original scale) minus_in = inv_std * (centered_x - half_bin) cdf_minus = approx_standard_normal_cdf(minus_in) log_one_minus_cdf_minus = F.log(clamp(1.0 - cdf_minus)) log_cdf_delta = F.log(clamp(cdf_plus - cdf_minus)) log_probs = F.where( F.less_scalar(x, -0.999), log_cdf_plus, # Edge case for 0. It uses cdf for -inf as cdf_minus. F.where(F.greater_scalar(x, 0.999), # Edge case for orig_max_val. It uses cdf for +inf as cdf_plus. log_one_minus_cdf_minus, log_cdf_delta # otherwise ) ) assert log_probs.shape == x.shape return log_probs
def elu_backward(inputs, alpha=1.0): """ Args: inputs (list of nn.Variable): Incomming grads/inputs to/of the forward function. kwargs (dict of arguments): Dictionary of the corresponding function arguments. Return: list of Variable: Return the gradients wrt inputs of the corresponding function. """ dy = inputs[0] x0 = inputs[1] m0 = F.greater_scalar(x0, 0) m1 = 1 - m0 m0 = no_grad(m0) m1 = no_grad(m1) dx = dy * (m0 + alpha * F.exp(x0) * m1) return dx
def hard_sigmoid_backward(inputs): """ Args: inputs (list of nn.Variable): Incomming grads/inputs to/of the forward function. kwargs (dict of arguments): Dictionary of the corresponding function arguments. Return: list of Variable: Return the gradients wrt inputs of the corresponding function. """ dy = inputs[0] x0 = inputs[1] m0 = F.greater_scalar(x0, -2.5) m1 = F.less_scalar(x0, 2.5) m01 = m0 * m1 m01 = no_grad(m01) dx0 = dy * 0.2 * m01 return dx0
def leaky_relu_backward(inputs, alpha=0.1, inplace=False): """ Args: inputs (list of nn.Variable): Incomming grads/inputs to/of the forward function. kwargs (dict of arguments): Dictionary of the corresponding function arguments. Return: list of Variable: Return the gradients wrt inputs of the corresponding function. """ dy = inputs[0] x0 = inputs[1] x0 = get_output(x0, "LeakyReLU") if inplace else x0 m0 = F.greater_scalar(x0, 0) # result is same even if inplace or not m1 = 1 - m0 m0 = no_grad(m0) m1 = no_grad(m1) dx0 = dy * (m0 + alpha * m1) return dx0
def lab2rgb(input): input_trans = F.split(input, axis=1) L, a, b = F.split(input, axis=1) y = (L + 16.0) / 116.0 x = (a / 500.0) + y z = y - (b / 200.0) neg_mask = F.less_scalar(z, 0).apply(need_grad=False) z = z * F.logical_not(neg_mask) mask_Y = F.greater_scalar(y, 0.2068966).apply(need_grad=False) mask_X = F.greater_scalar(x, 0.2068966).apply(need_grad=False) mask_Z = F.greater_scalar(z, 0.2068966).apply(need_grad=False) Y_1 = (y ** 3) * mask_Y Y_2 = L / (116. * 7.787) * F.logical_not(mask_Y) var_Y = Y_1 + Y_2 X_1 = (x ** 3) * mask_X X_2 = (x - 16. / 116.) / 7.787 * F.logical_not(mask_X) var_X = X_1 + X_2 Z_1 = (z ** 3) * mask_Z Z_2 = (z - 16. / 116.) / 7.787 * F.logical_not(mask_Z) var_Z = Z_1 + Z_2 X = 0.95047 * var_X Y = 1.00000 * var_Y Z = 1.08883 * var_Z var_R = X * 3.2406 + Y * -1.5372 + Z * -0.4986 var_G = X * -0.9689 + Y * 1.8758 + Z * 0.0415 var_B = X * 0.0557 + Y * -0.2040 + Z * 1.0570 mask_R = F.greater_scalar(var_R, 0.0031308).apply(need_grad=False) n_mask_R = F.logical_not(mask_R) R_1 = (1.055 * (F.maximum2(var_R, n_mask_R) ** (1 / 2.4)) - 0.055) * mask_R R_2 = (12.92 * var_R) * n_mask_R var_R = R_1 + R_2 mask_G = F.greater_scalar(var_G, 0.0031308).apply(need_grad=False) n_mask_G = F.logical_not(mask_G) G_1 = (1.055 * (F.maximum2(var_G, n_mask_G) ** (1 / 2.4)) - 0.055) * mask_G G_2 = (12.92 * var_G) * n_mask_G var_G = G_1 + G_2 mask_B = F.greater_scalar(var_B, 0.0031308).apply(need_grad=False) n_mask_B = F.logical_not(mask_B) B_1 = (1.055 * (F.maximum2(var_B, n_mask_B) ** (1 / 2.4)) - 0.055) * mask_B B_2 = (12.92 * var_B) * n_mask_B var_B = B_1 + B_2 return F.stack(var_R, var_G, var_B, axis=1)
def secant(x0, x1, implicit_function, max_post_itr, eps=1e-16): f0 = implicit_function(x0) # > 0 f1 = implicit_function(x1) # < 0 for i in range(max_post_itr): nu = f0 * (x1 - x0) de = f1 - f0 mask0 = F.greater_scalar(F.abs(de), eps) mask1 = 1 - mask0 nu = mask0 * nu + mask1 * 0 de = mask0 * de + mask1 * 1 xm = x0 - nu / de fm = implicit_function(xm) mp = F.greater_equal_scalar(fm, 0) mn = 1 - mp x0 = mp * xm + mn * x0 f0 = mp * fm + mn * f0 x1 = mn * xm + mp * x1 f1 = mn * fm + mp * f1 return x0, x1
def unit_sphere_intersection(self, camloc, raydir): BR, _ = raydir.shape a = 1.0 # raydir is already normalized b = 2.0 * F.batch_matmul(F.reshape(camloc, (BR, 1, 3)), F.reshape(raydir, (BR, 3, 1))) c = F.batch_matmul(F.reshape(camloc, (BR, 1, 3)), F.reshape(camloc, (BR, 3, 1))) - 1.0 D = b**2 - 4 * a * c mask = F.reshape(F.greater_scalar(D, 0.0), (BR, 1)) b = F.reshape(b, (BR, 1)) D = F.reshape(D, (BR, 1)) D = mask * D D_sqrt = D**0.5 t_start = -(b + D_sqrt) / (2 * a) t_finish = -(b - D_sqrt) / (2 * a) t_start = t_start * mask + self.t_near * (1 - mask) t_finish = t_finish * mask + self.t_far * (1 - mask) return t_start, t_finish, mask
def double_backward(g_dx0, g_db0, g_dg0, g_dz0, dy, x0, b0, g0, rm, rv, y0, z0, axes, decay_rate, eps, nonlinearity, batch_stat): # Factorized forward graph looks like # [x0, b0, g0, rm, rv] -> BN -> [u(, z0)] -> Add -> [v] -> ReLU -> [y] # Factorized backward graph looks like # [dy] -> d(ReLU) -> [dv] -> d(Add) -> [du(, dz0)] -> d(BN) -> [dx0, db0, dg0, drm, drv] # 1ST-ORDER # d(Avtivation) if nonlinearity == "relu": m0 = F.greater_scalar(y0, 0) m0 = no_grad(m0) dv = dy * m0 elif nonlinearity == "": dv = dy # d(Add) du = dv # 2ND-ORDER # dd(BN) bn_double_backward = bn_double_backward_for_batch if batch_stat else \ bn_double_backward_for_global g_du, g_x0, g_b0, g_g0 = bn_double_backward(g_dx0, g_db0, g_dg0, du, x0, b0, g0, rm, rv, axes, decay_rate, eps) # dd(Add) g_dv = g_du if g_dz0: g_dv += g_dz0 # dd(Activation) if nonlinearity == "relu": g_dy = g_dv * m0 elif nonlinearity == "": g_dy = g_dv return g_dy, g_x0, g_b0, g_g0
def backward_impl(self, inputs, outputs, prop_down, accum): # inputs: [inputs_fwd_graph] + [inputs_bwd_graph] or # [inputs_fwd_graph] + [outputs_fwd_graph] + [inputs_bwd_graph] # Args val = self.forward_func.info.args["val"] # Inputs x0 = inputs[0].data dy = inputs[1].data # Outputs dx0 = outputs[0].data # Grads of inputs g_x0 = inputs[0].grad g_dy = inputs[1].grad # Grads of outputs g_dx0 = outputs[0].grad # Computation if prop_down[1]: mask = F.greater_scalar(x0, val) if accum[1]: g_dy += g_dx0 * mask else: g_dy.copy_from(g_dx0 * mask)
def main(): random.seed(args.seed) np.random.seed(args.seed) # Prepare for CUDA. ctx = get_extension_context('cudnn', device_id=args.gpus) nn.set_default_context(ctx) start_full_time = time.time() from iterator import data_iterator # Data list for sceneflow data set train_list = "./dataset/kitti_train.csv" test_list = "./dataset/kitti_test.csv" train = True validation = False # Set monitor path. monitor_path = './nnmonitor' + str(datetime.now().strftime("%Y%m%d%H%M%S")) img_left, img_right, disp_img = read_csv(train_list) img_left_test, img_right_test, disp_img_test = read_csv(test_list) train_samples = len(img_left) test_samples = len(img_left_test) train_size = int(len(img_left) / args.batchsize_train) test_size = int(len(img_left_test) / args.batchsize_test) # Create data iterator. data_iterator_train = data_iterator( train_samples, args.batchsize_train, img_left, img_right, disp_img, train, shuffle=True, dataset=args.dataset) data_iterator_test = data_iterator( test_samples, args.batchsize_test, img_left_test, img_right_test, disp_img_test, validation, shuffle=False, dataset=args.dataset) # Set data size print(train_size, test_size) # Clrear patameters nn.clear_parameters() # Define data shape for training. var_left = nn.Variable( (args.batchsize_train, 3, args.crop_height, args.crop_width)) var_right = nn.Variable( (args.batchsize_train, 3, args.crop_height, args.crop_width)) var_disp = nn.Variable( (args.batchsize_train, 1, args.crop_height, args.crop_width)) # Define data shape for testing. var_left_test = nn.Variable( (args.batchsize_test, 3, args.im_height, args.im_width)) var_right_test = nn.Variable( (args.batchsize_test, 3, args.im_height, args.im_width)) var_disp_test = nn.Variable( (args.batchsize_test, 1, args.im_height, args.im_width)) if args.loadmodel is not None: # Loading CNN pretrained parameters. nn.load_parameters(args.loadmodel) # === for Training === # Definition of pred pred1, pred2, pred3 = psm_net(var_left, var_right, args.maxdisp, True) mask_train = F.greater_scalar(var_disp, 0) sum_mask = F.maximum_scalar(F.sum(mask_train), 1) print(sum_mask.d, "sum_mask_first") # Definition of loss loss = 0.5 * (0.5 * F.sum(F.huber_loss(pred1, var_disp)*mask_train)/(sum_mask) + 0.7 * F.sum(F.huber_loss( pred2, var_disp)*mask_train)/(sum_mask) + F.sum(F.huber_loss(pred3, var_disp)*mask_train)/(sum_mask)) # === for Testing === # Definition of pred pred_test = psm_net(var_left_test, var_right_test, args.maxdisp, False) var_gt = var_disp_test + F.less_equal_scalar(var_disp_test, 0) * -1 var_pred = pred_test + F.less_equal_scalar(pred_test, 0) * -1 E = F.abs(var_pred - var_gt) n_err = F.sum(F.logical_and(F.logical_and(F.greater_scalar(var_gt, 0.0), F.greater_scalar(E, 3.0)), F.greater_scalar(F.div2(E, F.abs(var_gt)), 0.05))) n_total = F.sum(F.greater_scalar(var_gt, 0)) test_loss = F.div2(n_err, n_total) # Prepare monitors. monitor = Monitor(monitor_path) monitor_train = MonitorSeries('Training loss', monitor, interval=1) monitor_test = MonitorSeries('Validation loss', monitor, interval=1) monitor_time_train = MonitorTimeElapsed( "Training time/epoch", monitor, interval=1) # Create a solver (parameter updater) solver = S.Adam(alpha=0.001, beta1=0.9, beta2=0.999) # Set Parameters params = nn.get_parameters() solver.set_parameters(params) params2 = nn.get_parameters(grad_only=False) solver.set_parameters(params2) for epoch in range(1, args.epochs+1): print('This is %d-th epoch' % (epoch)) total_train_loss = 0 index = 0 lr = adjust_learning_rate(epoch) ###Training### while index < train_size: # Get mini batch # Preprocess var_left.d, var_right.d, var_disp.d = data_iterator_train.next() loss.forward(clear_no_need_grad=True) # Initialize gradients solver.zero_grad() # Backward execution loss.backward(clear_buffer=True) # Update parameters by computed gradients solver.set_learning_rate(lr) solver.update() print('Iter %d training loss = %.3f' % (index, loss.d)) total_train_loss += loss.d index += 1 train_error = total_train_loss/train_size print('epoch %d total training loss = %.3f' % (epoch, train_error)) monitor_time_train.add(epoch) # ## teting ## total_test_loss = 0 max_acc = 0 index_test = 0 while index_test < test_size: var_left_test.d, var_right_test.d, var_disp_test.d = data_iterator_test.next() test_loss.forward(clear_buffer=True) total_test_loss += test_loss.d print('Iter %d test loss = %.3f' % (index_test, test_loss.d*100)) index_test += 1 test_error = total_test_loss/test_size print('epoch %d total 3-px error in val = %.3f' % (epoch, test_error*100)) if test_error > max_acc: max_acc = test_error*100 print('MAX epoch %d total test error = %.3f' % (epoch, max_acc)) # Pass validation loss to a monitor. monitor_test.add(epoch, test_error*100) # Pass training loss to a monitor. monitor_train.add(epoch, train_error) print('full training time = %.2f HR' % ((time.time() - start_full_time)/3600)) # Save Parameter out_param_file = os.path.join( args.savemodel, 'psmnet_trained_param_' + str(epoch) + '.h5') nn.save_parameters(out_param_file)
batch_size, shuffle=True, with_file_cache=False) x = nn.Variable((batch_size, sentence_length)) t = nn.Variable((batch_size, sentence_length, 1)) with nn.parameter_scope('embedding'): h = PF.embed(x, vocab_size, embedding_size) with nn.parameter_scope('rnn1'): h = simple_rnn(h, hidden_size, return_sequences=True) with nn.parameter_scope('hidden'): h = time_distributed(PF.affine)(h, hidden_size) with nn.parameter_scope('output'): y = time_distributed(PF.affine)(h, vocab_size) mask = F.sum(F.greater_scalar(t, 0), axis=2) # do not predict 'pad'. # mask = F.sum(F.sign(t), axis=2) # do not predict 'pad'. entropy = time_distributed_softmax_cross_entropy(y, t) * mask count = F.sum(mask, axis=1) loss = F.mean(F.div2(F.sum(entropy, axis=1), count)) # Create solver. solver = S.Momentum(1e-2, momentum=0.9) solver.set_parameters(nn.get_parameters()) # Create monitor. from nnabla.monitor import Monitor, MonitorSeries, MonitorTimeElapsed monitor = Monitor('./tmp-rnnlm') monitor_perplexity = MonitorSeries('perplexity', monitor, interval=1) monitor_perplexity_valid = MonitorSeries('perplexity_valid', monitor,
def srwu_coef(ctx, log_var): v = F.exp(log_var) v0_g = F.greater_scalar(v, 1.) v0_l = F.logical_not(v0_g) c = v0_g + v * v0_l return c
def bidirectional_sphere_trace(self, camloc, raydir, t_start, t_finish): t_f = F.identity(t_start) x_f = camloc + t_f * raydir s_f = self.sdf(x_f) mask_hit_eps_f = 0 * F.identity(t_f) t_b = F.identity(t_finish) x_b = camloc + t_b * raydir s_b = self.sdf(x_b) mask_hit_eps_b = 0 * F.identity(t_b) for i in range(self.sphere_trace_itr - 1): # Forward direction mask_hit_eps_f_i = F.less_equal_scalar(F.abs(s_f), self.eps) mask_hit_eps_f += (1 - mask_hit_eps_f) * mask_hit_eps_f_i t_f += (1 - mask_hit_eps_f) * s_f x_f = camloc + t_f * raydir s_f_prev = F.identity(s_f) s_f = self.sdf(x_f) mask_pos_f_prev = (1 - mask_hit_eps_f) * \ F.greater_scalar(s_f_prev, 0) mask_neg_f = (1 - mask_hit_eps_f) * F.less_scalar(s_f, 0) mask_revert_f = mask_pos_f_prev * mask_neg_f t_f -= mask_revert_f * s_f_prev s_f = mask_revert_f * s_f_prev + (1 - mask_revert_f) * s_f # Backward direction mask_hit_eps_b_i = F.less_equal_scalar(F.abs(s_b), self.eps) mask_hit_eps_b += (1 - mask_hit_eps_b) * mask_hit_eps_b_i t_b -= (1 - mask_hit_eps_b) * s_b x_b = camloc + t_b * raydir s_b_prev = F.identity(s_b) s_b = self.sdf(x_b) mask_pos_b_prev = (1 - mask_hit_eps_b) * \ F.greater_scalar(s_b_prev, 0) mask_neg_b = (1 - mask_hit_eps_b) * F.less_scalar(s_b, 0) mask_revert_b = mask_pos_b_prev * mask_neg_b t_b += mask_revert_b * s_b_prev s_b = mask_revert_b * s_b_prev + (1 - mask_revert_b) * s_b ## print("s_f neg", np.sum(s_f.data < 0)) ## print("s_b neg", np.sum(s_b.data < 0)) # Fine grained start/finish points t_f0 = t_f t_f1 = t_f + mask_revert_f * s_f_prev x_hit_st0 = camloc + t_f0 * raydir ## x0, x1 = self.post_method(x_hit_st0, camloc + t_f1 * raydir) ## t_f0 = F.norm((x0 - camloc), axis=(x0.ndim - 1), keepdims=True) ## t_f1 = F.norm((x1 - camloc), axis=(x1.ndim - 1), keepdims=True) mask_hit_f1b = mask_revert_f * F.less(t_f1, t_b) t_b = t_f1 * mask_hit_f1b + t_b * (1 - mask_hit_f1b) # Reverse the opposite case mask_fb = F.less(t_f, t_b) t_f = t_f * mask_fb + t_start * (1 - mask_fb) t_b = t_b * mask_fb + t_finish * (1 - mask_fb) return x_hit_st0, t_f, t_b, mask_hit_eps_f
def Discriminator(img, label="real", scope_name="Discriminator", ndf=64): with nn.parameter_scope(scope_name): if type(img) is not list: img_small = F.interpolate(img, output_size=(128, 128)) else: img_small = img[1] img = img[0] def sn_w(w): return PF.spectral_norm(w, dim=0) # InitLayer: -> 256x256 with nn.parameter_scope("init"): h = img if img.shape[2] == 1024: h = PF.convolution(h, ndf // 8, (4, 4), stride=(2, 2), pad=(1, 1), apply_w=sn_w, with_bias=False, name="conv1") h = F.leaky_relu(h, 0.2) h = PF.convolution(h, ndf // 4, (4, 4), stride=(2, 2), pad=(1, 1), apply_w=sn_w, with_bias=False, name="conv2") h = PF.batch_normalization(h) h = F.leaky_relu(h, 0.2) elif img.shape[2] == 512: h = PF.convolution(h, ndf // 4, (4, 4), stride=(2, 2), pad=(1, 1), apply_w=sn_w, with_bias=False, name="conv2") h = F.leaky_relu(h, 0.2) else: h = PF.convolution(h, ndf // 4, (3, 3), pad=(1, 1), apply_w=sn_w, with_bias=False, name="conv3") h = F.leaky_relu(h, 0.2) # Calc base features f_256 = h f_128 = DownsampleComp(f_256, ndf // 2, "down256->128") f_64 = DownsampleComp(f_128, ndf * 1, "down128->64") f_32 = DownsampleComp(f_64, ndf * 2, "down64->32") # Apply SLE f_32 = SLE(f_32, f_256, "sle256->32") f_16 = DownsampleComp(f_32, ndf * 4, "down32->16") f_16 = SLE(f_16, f_128, "sle128->16") f_8 = DownsampleComp(f_16, ndf * 16, "down16->8") f_8 = SLE(f_8, f_64, "sle64->8") # Conv + BN + LeakyRely + Conv -> logits (5x5) with nn.parameter_scope("last"): h = PF.convolution(f_8, ndf * 16, (1, 1), apply_w=sn_w, with_bias=False, name="conv1") h = PF.batch_normalization(h) h = F.leaky_relu(h, 0.2) logit_large = PF.convolution(h, 1, (4, 4), apply_w=sn_w, with_bias=False, name="conv2") # Another path: "down_from_small" in the official code with nn.parameter_scope("down_from_small"): h_s = PF.convolution(img_small, ndf // 2, (4, 4), stride=(2, 2), pad=(1, 1), apply_w=sn_w, with_bias=False, name="conv1") h_s = F.leaky_relu(h_s, 0.2) h_s = Downsample(h_s, ndf * 1, "dfs64->32") h_s = Downsample(h_s, ndf * 2, "dfs32->16") h_s = Downsample(h_s, ndf * 4, "dfs16->8") fea_dec_small = h_s logit_small = PF.convolution(h_s, 1, (4, 4), apply_w=sn_w, with_bias=False, name="conv2") # Concatenate logits logits = F.concatenate(logit_large, logit_small, axis=1) # Reconstruct images rec_img_big = SimpleDecoder(f_8, "dec_big") rec_img_small = SimpleDecoder(fea_dec_small, "dec_small") part_ax2 = F.rand(shape=(img.shape[0], )) part_ax3 = F.rand(shape=(img.shape[0], )) f_16_ax2 = F.where(F.greater_scalar(part_ax2, 0.5), f_16[:, :, :8, :], f_16[:, :, 8:, :]) f_16_part = F.where(F.greater_scalar(part_ax3, 0.5), f_16_ax2[:, :, :, :8], f_16_ax2[:, :, :, 8:]) rec_img_part = SimpleDecoder(f_16_part, "dec_part") if label == "real": return logits, [rec_img_big, rec_img_small, rec_img_part], [part_ax2, part_ax3] else: return logits