def test020_bon_fprop_vs_on(self, alpha_fwd=ALPHA_FWD, alpha_bkw=ALPHA_BKW):
        """
        Test the Batch Online Normalization Layer's forward pass against the
        tf's Online Normalization Layer (b_size=1) implementation of the layer

        NOTE:
            - layer's mu and var are randomly initialized as well
            A zero mean unit variance normalization transformation would do
            nothing therefore the test would be uninformative
        """
        input_data, _ = gen_data()  # generate the data

        # Instantiate the tf implementation of batched online norm layer
        in_shape = input_data[0:B_SIZE].shape

        b_inputs = tf.placeholder(tf.float32, shape=in_shape)
        bon_tf = batch_online_norm(b_inputs,
                                   alpha_fwd=alpha_fwd, alpha_bkw=alpha_bkw,
                                   axis=1, training=True, b_size=B_SIZE)

        # Instantiate tf implementation of the online layer
        in_shape = input_data[0:1].shape
        inputs = tf.placeholder(tf.float32, shape=in_shape)
        on_tf = online_norm(inputs, alpha_fwd=alpha_fwd, alpha_bkw=alpha_bkw,
                            axis=1, training=True)

        with tf.Session() as sess:
            sess.run(tf.global_variables_initializer())

            # Iterate over generated data
            for i in range(len(input_data)):
                idx = i % B_SIZE

                if idx == 0:
                    # get the output of the tf version of the layer
                    feed_dict = {b_inputs: input_data[i:i + B_SIZE]}
                    bon_tf_out = sess.run([bon_tf], feed_dict=feed_dict)
                    bon_tf_out = np.array(bon_tf_out[0])

                # get the output of the tf version of the layer
                on_tf_out = sess.run([on_tf],
                                     feed_dict={inputs: input_data[i:i + 1]})
                out = np.array(on_tf_out[0])

                f_err_str = 'fwd output divergence on itr {}'.format(i)
                np.testing.assert_allclose(out, bon_tf_out[idx:idx + 1],
                                           rtol=RTOL, atol=ATOL,
                                           err_msg=f_err_str)
Exemple #2
0
    def template_numerical_comparison_on_vs_np(
        self,
        np_inputs,
        np_grad_out=None,
        axis=1,
        alpha_fwd=0.99,
        alpha_bkw=0.99,
        itrs=2,
        dtype=None,
    ):
        in_shape = np_inputs.shape
        batch_size = in_shape[0]
        NpOnlineNorm = NpOnlineNorm2d if len(in_shape) == 4 else NpOnlineNorm1d
        # Instantiate numpy layer
        np_norm = NpOnlineNorm(
            in_shape[1],
            alpha_fwd=alpha_fwd,
            alpha_bkw=alpha_bkw,
            affine=False,
            ecm='',
        )

        # Instantiate the tf implementation of online norm layer
        # without batch acceleration
        in_shape = in_shape
        if dtype == None:
            tf_inputs = tf.placeholder(tf.float32, shape=in_shape)
        else:
            tf_inputs = tf.placeholder(tf.float16, shape=in_shape)
        tf_norm = online_norm(
            tf_inputs,
            alpha_fwd=alpha_fwd,
            alpha_bkw=alpha_bkw,
            axis=axis,
            training=True,
            center=False,
            scale=False,
            ecm='',
            dtype=dtype,
        )

        if np_grad_out is not None:
            # set up tf_norm's gradient functionality
            if dtype == None:
                tf_grad_ys = tf.placeholder(tf.float32, shape=in_shape)
            else:
                tf_grad_ys = tf.placeholder(tf.float16, shape=in_shape)
            tf_norm_grad = tf.gradients(ys=tf_norm,
                                        xs=tf_inputs,
                                        grad_ys=tf_grad_ys)

        rtol = 1e-4 if dtype == None else 1e-2
        atol = 1e-5 if dtype == None else 1e-3

        with tf.Session() as sess:
            sess.run(tf.global_variables_initializer())

            # Iterate over generated data
            for itr in range(itrs):

                # fprop through numpy Online Norm class
                np_out = np_norm(np_inputs)
                if np_grad_out is not None:
                    # bprop through numpy Online Norm class
                    np_grad_in = np_norm.backward(np_grad_out)

                if np_grad_out is None:
                    # get the output of the tf layer
                    on_tf_out = sess.run([tf_norm],
                                         feed_dict={tf_inputs: np_inputs})
                    out = np.array(on_tf_out[0])

                    for n in range(batch_size):
                        # numerically compare output
                        err_msg = f'output comparison failed on itr: {itr}, n: {n}'
                        np.testing.assert_allclose(out[n],
                                                   np_out[n],
                                                   rtol=rtol,
                                                   atol=atol,
                                                   err_msg=err_msg)

                if np_grad_out is not None:
                    # get the deltas of the tf layer
                    grad_dict = {tf_grad_ys: np_grad_out, tf_inputs: np_inputs}
                    tf_grad_xs = np.array(
                        sess.run([tf_norm_grad], feed_dict=grad_dict)[0][0])

                    for n in range(batch_size):
                        # numerically compare deltas
                        err_msg = f'grad comparison failed on itr: {itr}, n: {n}'
                        np.testing.assert_allclose(tf_grad_xs[n],
                                                   np_grad_in[n],
                                                   rtol=rtol,
                                                   atol=atol,
                                                   err_msg=err_msg)
    def test050_bon_vs_on_Dense(self, alpha_fwd=ALPHA_FWD, alpha_bkw=ALPHA_BKW):
        """
        Test the Online Normalization Layer's fprop and bprop

        NOTE:
            - layer's mu and var are randomly initialized as well
            A zero mean unit variance normalization transformation would do
            nothing therefore the test would be uninformative
        """
        # generate the data
        input_data, deltas_in = gen_data(fc_output=True)

        # Instantiate the tensorflow implementation of batched on layer
        in_shape = input_data[0:B_SIZE].shape
        b_inputs = tf.placeholder(tf.float32, shape=in_shape)
        b_deltas = tf.placeholder(tf.float32, shape=in_shape)
        bon_tf = batch_online_norm(b_inputs,
                                   alpha_fwd=alpha_fwd, alpha_bkw=alpha_bkw,
                                   axis=-1, training=True, b_size=B_SIZE,
                                   layer_scaling=False)
        # set up on_tf's gradient functionality
        def grad_func(b_d_in, b_inputs):
            return tf.gradients(ys=bon_tf, xs=b_inputs, grad_ys=b_d_in)
        bon_grad = grad_func(b_deltas, b_inputs)

        grad_in = np.empty(in_shape)
        # Instantiate tensorflow implementation of the online layer
        in_shape = input_data[0:1].shape
        inputs = tf.placeholder(tf.float32, shape=in_shape)
        deltas = tf.placeholder(tf.float32, shape=in_shape)
        on_tf = online_norm(inputs,
                            alpha_fwd=alpha_fwd, alpha_bkw=alpha_bkw,
                            axis=-1, training=True,
                            layer_scaling=False)
        # set up on_tf's gradient functionality
        def grad_func(d_in, inputs):
            return tf.gradients(ys=on_tf, xs=inputs, grad_ys=d_in)
        on_grad = grad_func(deltas, inputs)


        with tf.Session() as sess:
            sess.run(tf.global_variables_initializer())

            # Iterate over generated data
            for i in range(len(input_data)):
                idx = i % B_SIZE
                
                # forward check
                if idx == 0:
                    # get the output of the tf version of the layer
                    feed_dict = {b_inputs: input_data[i:i + B_SIZE]}
                    bon_tf_out = sess.run([bon_tf], feed_dict=feed_dict)
                    bon_tf_out = np.array(bon_tf_out[0])

                # get the output of the tf version of the layer
                on_tf_out = sess.run([on_tf],
                                     feed_dict={inputs: input_data[i:i + 1]})
                out = np.array(on_tf_out[0])

                f_err_str = 'fwd output divergence on itr {}'.format(i)
                np.testing.assert_allclose(out, bon_tf_out[idx:idx + 1],
                                           rtol=RTOL, atol=ATOL,
                                           err_msg=f_err_str)

                # backward check
                if idx == 0:
                    # get the output of the tf version of the layer
                    grad_dict = {b_deltas: deltas_in[i:i + B_SIZE],
                                 b_inputs: input_data[i:i + B_SIZE]}

                    bon_tf_grad_out = np.array(sess.run([bon_grad],
                                               feed_dict=grad_dict)[0][0])

                # get the deltas of the tf single batch layer
                grad_dict = {deltas: deltas_in[i:i + 1],
                             inputs: input_data[i:i + 1]}
                grad_in = np.array(sess.run([on_grad],
                                        feed_dict=grad_dict)[0][0])

                b_err_str = 'bkw delta divergence on itr {}'.format(i)
                bon_grad_idx = bon_tf_grad_out[idx:idx + 1]
                np.testing.assert_allclose(grad_in, bon_grad_idx,
                                           rtol=RTOL, atol=ATOL,
                                           err_msg=b_err_str)