def test_DAUConvMemtest(self): N = 32 W = 6 H = 6 input_channels = 128 num_output = 256 sigma = 0.5 x_rand = np.random.rand(N,input_channels,H,W) x = tf.placeholder(tf.float32, shape = x_rand.shape) op = DAUConv2d(filters=num_output, dau_units=(2,1), max_kernel_size=9, use_bias=False, weight_initializer=tf.random_normal_initializer(stddev=0.1, dtype=np.float32), mu1_initializer=tf.random_uniform_initializer(minval=-10, maxval=10,dtype=tf.float32), mu2_initializer=tf.random_uniform_initializer(minval=-10, maxval=10,dtype=tf.float32), sigma_initializer=tf.constant_initializer(sigma), dau_unit_border_bound=0.1, unit_testing=False) result = op(x) result_error = tf.random_normal([np.int32(x.shape[0]),num_output, np.int32(x.shape[2]), np.int32(x.shape[3])],dtype=tf.float32) var_grad = tf.gradients(result, [x, op.dau_weights, op.dau_mu1, op.dau_mu2], grad_ys=result_error) init = tf.global_variables_initializer() c = tf.ConfigProto(allow_soft_placement=True, log_device_placement=True) c.gpu_options.visible_device_list = '0' c.gpu_options.allow_growth = True with tf.Session(config=c) as s: for nn in range(10000): s.run(init) t_start = time.time() r, r_error, r_grad, w, mu1, mu2 = s.run([result, result_error, var_grad, op.dau_weights, op.dau_mu1, op.dau_mu2], feed_dict = {x: x_rand}) t_end = time.time() print(t_end-t_start)
def test_DAUConvSpeedTest(self): repeat = 5 N = 32 W = 16 H = 16 S = 128 F = 32 dau_uints = (2, 1) max_kernel_size = 9 max_offset_init = 3 dau_times = [] conv_times = [] if True: input_channels = S num_output = F sigma = 0.5 x_rand = np.random.rand(N, input_channels, H, W) #x = tf.placeholder(tf.float32, shape = x_rand.shape) x = tf.constant(0, shape=x_rand.shape, dtype=tf.float32) tmp = [] op = tf.layers.Conv2D( filters=num_output, kernel_size=3, use_bias=False, padding='same', data_format='channels_first', kernel_initializer=tf.random_normal_initializer( stddev=0.1, dtype=np.float32)) result = op.apply(x) tmp.append(tf.reduce_max(result)) #tmp.append(tf.reduce_max(x)) result_error = tf.random_normal([ np.int32(x.shape[0]), num_output, np.int32(x.shape[2]), np.int32(x.shape[3]) ], dtype=tf.float32) var_grad = tf.gradients(result, [x] + op.weights, grad_ys=result_error) #var_grad = [x]+op.weights tmp.append(tf.reduce_max(var_grad[0])) tmp.append(tf.reduce_max(var_grad[1:])) init = tf.global_variables_initializer() c = tf.ConfigProto(allow_soft_placement=True, log_device_placement=True) c.gpu_options.visible_device_list = '0' c.gpu_options.allow_growth = True with tf.Session(config=c) as s: s.run(init) for i in range(100): t_start = time.time() #s.run([result, result_error, var_grad], feed_dict = {x: x_rand}) s.run(tmp) t_end = time.time() t = t_end - t_start conv_times.append(t) if True: mu_learning_rate_factor = 1000 input_channels = S num_output = F sigma = 0.5 x_rand = np.random.rand(N, input_channels, H, W) #x = tf.placeholder(tf.float32, shape = x_rand.shape) x = tf.constant(0, shape=x_rand.shape, dtype=tf.float32) tmp = [] op = DAUConv2d(filters=num_output, dau_units=dau_uints, max_kernel_size=max_kernel_size, use_bias=False, weight_initializer=tf.random_normal_initializer( stddev=0.1, dtype=np.float32), mu1_initializer=tf.random_uniform_initializer( minval=-max_offset_init, maxval=max_offset_init, dtype=tf.float32), mu2_initializer=tf.random_uniform_initializer( minval=-max_offset_init, maxval=max_offset_init, dtype=tf.float32), sigma_initializer=tf.constant_initializer(sigma), mu_learning_rate_factor=mu_learning_rate_factor, unit_testing=False) result = op(x) tmp.append(tf.reduce_max(result)) #tmp.append(tf.reduce_max(x)) result_error = tf.random_normal([ np.int32(x.shape[0]), num_output, np.int32(x.shape[2]), np.int32(x.shape[3]) ], dtype=tf.float32) var_grad = tf.gradients( result, [x, op.dau_weights, op.dau_mu1, op.dau_mu2], grad_ys=result_error) #var_grad = [x, op.dau_weights, op.dau_mu1, op.dau_mu2] tmp.append(tf.reduce_max(var_grad[0])) tmp.append(tf.reduce_max(var_grad[1:])) init = tf.global_variables_initializer() c = tf.ConfigProto(allow_soft_placement=True, log_device_placement=True) c.gpu_options.visible_device_list = '0' c.gpu_options.allow_growth = True with tf.Session(config=c) as s: s.run(init) for i in range(100): t_start = time.time() #s.run([result, result_error, var_grad], feed_dict = {x: x_rand}) #s.run([result], feed_dict = {x: x_rand}) s.run(tmp) t_end = time.time() t = t_end - t_start dau_times.append(t) print("dau times: ", dau_times) print("conv times: ", conv_times) print("dau avg time: %f\n" % np.mean(dau_times[20:])) print("conv avg time: %f\n" % np.mean(conv_times[20:]))
def _run_DAUConv_forward_and_backward(self, repeat, N, W, H, S, F, dau_uints, max_kernel_size, max_offset_init, plot_diff=True): for i in range(repeat): mu_learning_rate_factor = 1000 input_channels = S num_output = F sigma = 0.5 x_rand = np.random.rand(N, input_channels, H, W) #x_rand = np.ones((16,num_output,32,32),dtype=np.float32) x = tf.placeholder(tf.float32, shape=x_rand.shape) op = DAUConv2d( filters=num_output, dau_units=dau_uints, max_kernel_size=max_kernel_size, use_bias=False, weight_initializer=tf.random_normal_initializer( stddev=0.1, dtype=np.float32), mu1_initializer=tf.random_uniform_initializer( minval=-max_offset_init, maxval=max_offset_init, dtype=tf.float32), mu2_initializer=tf.random_uniform_initializer( minval=-max_offset_init, maxval=max_offset_init, dtype=tf.float32), #weight_initializer=tf.constant_initializer(1,dtype=np.float32), #mu1_initializer=tf.constant_initializer(0,dtype=np.float32), #mu2_initializer=tf.constant_initializer(0,dtype=np.float32), sigma_initializer=tf.constant_initializer(sigma), mu_learning_rate_factor=mu_learning_rate_factor, unit_testing=True) result = op(x) #result_error = tf.ones([np.int32(x.shape[0]),num_output, # np.int32(x.shape[2]), # np.int32(x.shape[3])],dtype=tf.float32) result_error = tf.random_normal([ np.int32(x.shape[0]), num_output, np.int32(x.shape[2]), np.int32(x.shape[3]) ], dtype=tf.float32) var_grad = tf.gradients( result, [x, op.dau_weights, op.dau_mu1, op.dau_mu2, op.dau_sigma], grad_ys=result_error) init = tf.global_variables_initializer() c = tf.ConfigProto(allow_soft_placement=True, log_device_placement=True) c.gpu_options.visible_device_list = '0' c.gpu_options.allow_growth = True with tf.Session(config=c) as s: s.run(init) t_start = time.time() r, r_error, r_grad, w, mu1, mu2 = s.run([ result, result_error, var_grad, op.dau_weights, op.dau_mu1, op.dau_mu2 ], feed_dict={x: x_rand}) t_end = time.time() print(t_end - t_start) gt_fwd_vals = DAUConvPython().forward_cpu( x=x_rand, w=w, mu1=mu1, mu2=mu2, sigma=[sigma], num_dau_units_ignore=op.num_dau_units_ignore) gt_bwd_vals = DAUConvPython().backward_cpu( x=x_rand, error=r_error, w=w, mu1=mu1, mu2=mu2, sigma=[sigma], num_dau_units_ignore=op.num_dau_units_ignore, unit_testing=True) # interpolation in C++ code at the right edge excludes one pixel so ignore those pixels in check last_idx = -1 if r.shape[-1] > 1 else r.shape[-1] r = r[:, :, :, :last_idx] r_grad[0] = r_grad[0][:, :, :, :last_idx] gt_fwd_vals = gt_fwd_vals[:, :, :, :last_idx] gt_bwd_vals = (gt_bwd_vals[0][:, :, :, :last_idx], gt_bwd_vals[1], gt_bwd_vals[2] * mu_learning_rate_factor, gt_bwd_vals[3] * mu_learning_rate_factor, gt_bwd_vals[4]) self._assertMatrix(r, gt_fwd_vals, 'fwd_output', rel_tolerance=0.01, plot_difference=plot_diff) self._assertMatrix(r_grad[0], gt_bwd_vals[0], 'bwd_error', rel_tolerance=0.01, plot_difference=plot_diff) self._assertMatrix(r_grad[1], gt_bwd_vals[1], 'bwd_w_grad', rel_tolerance=0.01, plot_difference=plot_diff) self._assertMatrix(r_grad[2], gt_bwd_vals[2], 'bwd_mu1_grad', rel_tolerance=0.01, plot_difference=plot_diff) self._assertMatrix(r_grad[3], gt_bwd_vals[3], 'bwd_mu2_grad', rel_tolerance=0.01, plot_difference=plot_diff) self._assertMatrix(r_grad[4], gt_bwd_vals[4], 'bwd_sigma_grad', rel_tolerance=0.01, plot_difference=plot_diff)