def test020_bon_fprop_vs_on(self, alpha_fwd=ALPHA_FWD, alpha_bkw=ALPHA_BKW): """ Test the Batch Online Normalization Layer's forward pass against the tf's Online Normalization Layer (b_size=1) implementation of the layer NOTE: - layer's mu and var are randomly initialized as well A zero mean unit variance normalization transformation would do nothing therefore the test would be uninformative """ input_data, _ = gen_data() # generate the data # Instantiate the tf implementation of batched online norm layer in_shape = input_data[0:B_SIZE].shape b_inputs = tf.placeholder(tf.float32, shape=in_shape) bon_tf = batch_online_norm(b_inputs, alpha_fwd=alpha_fwd, alpha_bkw=alpha_bkw, axis=1, training=True, b_size=B_SIZE) # Instantiate tf implementation of the online layer in_shape = input_data[0:1].shape inputs = tf.placeholder(tf.float32, shape=in_shape) on_tf = online_norm(inputs, alpha_fwd=alpha_fwd, alpha_bkw=alpha_bkw, axis=1, training=True) with tf.Session() as sess: sess.run(tf.global_variables_initializer()) # Iterate over generated data for i in range(len(input_data)): idx = i % B_SIZE if idx == 0: # get the output of the tf version of the layer feed_dict = {b_inputs: input_data[i:i + B_SIZE]} bon_tf_out = sess.run([bon_tf], feed_dict=feed_dict) bon_tf_out = np.array(bon_tf_out[0]) # get the output of the tf version of the layer on_tf_out = sess.run([on_tf], feed_dict={inputs: input_data[i:i + 1]}) out = np.array(on_tf_out[0]) f_err_str = 'fwd output divergence on itr {}'.format(i) np.testing.assert_allclose(out, bon_tf_out[idx:idx + 1], rtol=RTOL, atol=ATOL, err_msg=f_err_str)
def template_numerical_comparison_on_vs_np( self, np_inputs, np_grad_out=None, axis=1, alpha_fwd=0.99, alpha_bkw=0.99, itrs=2, dtype=None, ): in_shape = np_inputs.shape batch_size = in_shape[0] NpOnlineNorm = NpOnlineNorm2d if len(in_shape) == 4 else NpOnlineNorm1d # Instantiate numpy layer np_norm = NpOnlineNorm( in_shape[1], alpha_fwd=alpha_fwd, alpha_bkw=alpha_bkw, affine=False, ecm='', ) # Instantiate the tf implementation of online norm layer # without batch acceleration in_shape = in_shape if dtype == None: tf_inputs = tf.placeholder(tf.float32, shape=in_shape) else: tf_inputs = tf.placeholder(tf.float16, shape=in_shape) tf_norm = online_norm( tf_inputs, alpha_fwd=alpha_fwd, alpha_bkw=alpha_bkw, axis=axis, training=True, center=False, scale=False, ecm='', dtype=dtype, ) if np_grad_out is not None: # set up tf_norm's gradient functionality if dtype == None: tf_grad_ys = tf.placeholder(tf.float32, shape=in_shape) else: tf_grad_ys = tf.placeholder(tf.float16, shape=in_shape) tf_norm_grad = tf.gradients(ys=tf_norm, xs=tf_inputs, grad_ys=tf_grad_ys) rtol = 1e-4 if dtype == None else 1e-2 atol = 1e-5 if dtype == None else 1e-3 with tf.Session() as sess: sess.run(tf.global_variables_initializer()) # Iterate over generated data for itr in range(itrs): # fprop through numpy Online Norm class np_out = np_norm(np_inputs) if np_grad_out is not None: # bprop through numpy Online Norm class np_grad_in = np_norm.backward(np_grad_out) if np_grad_out is None: # get the output of the tf layer on_tf_out = sess.run([tf_norm], feed_dict={tf_inputs: np_inputs}) out = np.array(on_tf_out[0]) for n in range(batch_size): # numerically compare output err_msg = f'output comparison failed on itr: {itr}, n: {n}' np.testing.assert_allclose(out[n], np_out[n], rtol=rtol, atol=atol, err_msg=err_msg) if np_grad_out is not None: # get the deltas of the tf layer grad_dict = {tf_grad_ys: np_grad_out, tf_inputs: np_inputs} tf_grad_xs = np.array( sess.run([tf_norm_grad], feed_dict=grad_dict)[0][0]) for n in range(batch_size): # numerically compare deltas err_msg = f'grad comparison failed on itr: {itr}, n: {n}' np.testing.assert_allclose(tf_grad_xs[n], np_grad_in[n], rtol=rtol, atol=atol, err_msg=err_msg)
def test050_bon_vs_on_Dense(self, alpha_fwd=ALPHA_FWD, alpha_bkw=ALPHA_BKW): """ Test the Online Normalization Layer's fprop and bprop NOTE: - layer's mu and var are randomly initialized as well A zero mean unit variance normalization transformation would do nothing therefore the test would be uninformative """ # generate the data input_data, deltas_in = gen_data(fc_output=True) # Instantiate the tensorflow implementation of batched on layer in_shape = input_data[0:B_SIZE].shape b_inputs = tf.placeholder(tf.float32, shape=in_shape) b_deltas = tf.placeholder(tf.float32, shape=in_shape) bon_tf = batch_online_norm(b_inputs, alpha_fwd=alpha_fwd, alpha_bkw=alpha_bkw, axis=-1, training=True, b_size=B_SIZE, layer_scaling=False) # set up on_tf's gradient functionality def grad_func(b_d_in, b_inputs): return tf.gradients(ys=bon_tf, xs=b_inputs, grad_ys=b_d_in) bon_grad = grad_func(b_deltas, b_inputs) grad_in = np.empty(in_shape) # Instantiate tensorflow implementation of the online layer in_shape = input_data[0:1].shape inputs = tf.placeholder(tf.float32, shape=in_shape) deltas = tf.placeholder(tf.float32, shape=in_shape) on_tf = online_norm(inputs, alpha_fwd=alpha_fwd, alpha_bkw=alpha_bkw, axis=-1, training=True, layer_scaling=False) # set up on_tf's gradient functionality def grad_func(d_in, inputs): return tf.gradients(ys=on_tf, xs=inputs, grad_ys=d_in) on_grad = grad_func(deltas, inputs) with tf.Session() as sess: sess.run(tf.global_variables_initializer()) # Iterate over generated data for i in range(len(input_data)): idx = i % B_SIZE # forward check if idx == 0: # get the output of the tf version of the layer feed_dict = {b_inputs: input_data[i:i + B_SIZE]} bon_tf_out = sess.run([bon_tf], feed_dict=feed_dict) bon_tf_out = np.array(bon_tf_out[0]) # get the output of the tf version of the layer on_tf_out = sess.run([on_tf], feed_dict={inputs: input_data[i:i + 1]}) out = np.array(on_tf_out[0]) f_err_str = 'fwd output divergence on itr {}'.format(i) np.testing.assert_allclose(out, bon_tf_out[idx:idx + 1], rtol=RTOL, atol=ATOL, err_msg=f_err_str) # backward check if idx == 0: # get the output of the tf version of the layer grad_dict = {b_deltas: deltas_in[i:i + B_SIZE], b_inputs: input_data[i:i + B_SIZE]} bon_tf_grad_out = np.array(sess.run([bon_grad], feed_dict=grad_dict)[0][0]) # get the deltas of the tf single batch layer grad_dict = {deltas: deltas_in[i:i + 1], inputs: input_data[i:i + 1]} grad_in = np.array(sess.run([on_grad], feed_dict=grad_dict)[0][0]) b_err_str = 'bkw delta divergence on itr {}'.format(i) bon_grad_idx = bon_tf_grad_out[idx:idx + 1] np.testing.assert_allclose(grad_in, bon_grad_idx, rtol=RTOL, atol=ATOL, err_msg=b_err_str)