def test_naf_layer_diag(): batch_size = 2 for nb_actions in (1, 3): # Construct single model with NAF as the only layer, hence it is fully deterministic # since no weights are used, which would be randomly initialized. L_flat_input = Input(shape=(nb_actions, )) mu_input = Input(shape=(nb_actions, )) action_input = Input(shape=(nb_actions, )) x = NAFLayer(nb_actions, mode='diag')([L_flat_input, mu_input, action_input]) model = Model(inputs=[L_flat_input, mu_input, action_input], outputs=x) model.compile(loss='mse', optimizer='sgd') # Create random test data. L_flat = np.random.random((batch_size, nb_actions)).astype('float32') mu = np.random.random((batch_size, nb_actions)).astype('float32') action = np.random.random((batch_size, nb_actions)).astype('float32') # Perform reference computations in numpy since these are much easier to verify. P = np.zeros((batch_size, nb_actions, nb_actions)).astype('float32') for p, l_flat in zip(P, L_flat): p[np.diag_indices(nb_actions)] = l_flat print(P, L_flat) A_ref = np.array([ np.dot(np.dot(a - m, p), a - m) for a, m, p in zip(action, mu, P) ]).astype('float32') A_ref *= -.5 # Finally, compute the output of the net, which should be identical to the previously # computed reference. A_net = model.predict([L_flat, mu, action]).flatten() assert_allclose(A_net, A_ref, rtol=1e-5)
def test_naf_layer_full(): batch_size = 2 for nb_actions in (1, 3): # Construct single model with NAF as the only layer, hence it is fully deterministic # since no weights are used, which would be randomly initialized. L_flat_input = Input(shape=((nb_actions * nb_actions + nb_actions) // 2, )) mu_input = Input(shape=(nb_actions, )) action_input = Input(shape=(nb_actions, )) x = merge([L_flat_input, mu_input, action_input], mode='concat') x = NAFLayer(nb_actions, mode='full')(x) model = Model(input=[L_flat_input, mu_input, action_input], output=x) model.compile(loss='mse', optimizer='sgd') # Create random test data. L_flat = np.random.random( (batch_size, (nb_actions * nb_actions + nb_actions) // 2)).astype('float32') mu = np.random.random((batch_size, nb_actions)).astype('float32') action = np.random.random((batch_size, nb_actions)).astype('float32') # Perform reference computations in numpy since these are much easier to verify. L = np.zeros((batch_size, nb_actions, nb_actions)).astype('float32') LT = np.copy(L) for l, l_T, l_flat in zip(L, LT, L_flat): l[np.tril_indices(nb_actions)] = l_flat l[np.diag_indices(nb_actions)] = np.exp( l[np.diag_indices(nb_actions)]) l_T[:, :] = l.T P = np.array([np.dot(l, l_T) for l, l_T in zip(L, LT)]).astype('float32') A_ref = np.array([ np.dot(np.dot(a - m, p), a - m) for a, m, p in zip(action, mu, P) ]).astype('float32') A_ref *= -.5 # Finally, compute the output of the net, which should be identical to the previously # computed reference. A_net = model.predict([L_flat, mu, action]).flatten() assert_allclose(A_net, A_ref, rtol=1e-5)