def huber_loss(y_true, y_pred, clip_value=1): # Huber loss, see https://en.wikipedia.org/wiki/Huber_loss and # https://medium.com/@karpathy/yes-you-should-understand-backprop-e2f06eab496b # for details. assert clip_value > 0. x = y_true - y_pred if np.isinf(clip_value): # Spacial case for infinity since Tensorflow does have problems # if we compare `K.abs(x) < np.inf`. return .5 * K.square(x) condition = K.abs(x) < clip_value squared_loss = .5 * K.square(x) linear_loss = clip_value * (K.abs(x) - .5 * clip_value) if K.backend() == 'tensorflow': import tensorflow as tf if hasattr(tf, 'select'): return tf.select(condition, squared_loss, linear_loss) # condition, true, false else: return tf.where(condition, squared_loss, linear_loss) # condition, true, false elif K.backend() == 'theano': from theano import tensor as T return T.switch(condition, squared_loss, linear_loss) else: raise RuntimeError('Unknown backend "{}".'.format(K.backend()))
def test_compare_original_nade(): """ Compare output computation with github.com/MarcCote/NADE This test use weights learned with reference implementation. Following parameters where used orderlessNADE.py --theano --form MoG --dataset simple.hdf5 --samples_name 0 --hlayers 1 --n_components 1 --epoch_size 10000 --momentum 0.0 --units 16 --training_route training --no_validation --batch_size 5 Training data consisted of 10000 samples drawn from normal(mean=0, sigma=1) The architecture here is the same as it would be created by reference implementation. """ import h5py masked_input_layer, input_layer, mask_layer = create_input_layers(2) mog = Container(inputs=[masked_input_layer, input_layer, mask_layer], outputs=mog_layer(input_layer, Activation("relu") (add([Dense(16)(Lambda(lambda x: x[:, :2])(masked_input_layer)), Dense(16, use_bias=False)(mask_layer)])), 1)) inner_model = Container(inputs=[masked_input_layer, input_layer, mask_layer], outputs=mog([masked_input_layer, input_layer, mask_layer])) model = training_model(inner_model, mask_seed=1) model.compile(loss=utils.maximize_prediction, optimizer="sgd") with h5py.File("tests/original_nade_weights.hdf5") as h: model.set_weights([ h["final_model/parameters/W1"][()].astype(np.float32), h["final_model/parameters/b1"][()].astype(np.float32), h["final_model/parameters/Wflags"][()].astype(np.float32), h["final_model/parameters/V_alpha"][()].T.reshape((16, 2)).astype(np.float32), h["final_model/parameters/b_alpha"][()].reshape(2).astype(np.float32), h["final_model/parameters/V_sigma"][()].T.reshape((16, 2)).astype(np.float32), h["final_model/parameters/b_sigma"][()].reshape(2).astype(np.float32), h["final_model/parameters/V_mu"][()].T.reshape((16, 2)).astype(np.float32), h["final_model/parameters/b_mu"][()].reshape(2).astype(np.float32) ]) np.random.seed(1) output = model.predict(np.random.normal(size=(5, 2))) # Different random generation leads to different masks if K.backend() == "tensorflow": assert np.allclose(np.array([-2.20870864, -2.12633744, -4.85813326, -3.63397837, -1.89778014]), output) elif K.backend() == "theano": assert np.allclose(np.array([-3.33089394, -2.55555928, -4.85813281, -4.85442475, -1.92244674]), output) else: raise NotImplementedError()
def dot_product(x, kernel): """ Wrapper for dot product operation, in order to be compatible with both Theano and Tensorflow Args: x (): input kernel (): weights Returns: """ if K.backend() == 'tensorflow': return K.squeeze(K.dot(x, K.expand_dims(kernel)), axis=-1) else: return K.dot(x, kernel)
def test_compare_original_nade_reg(): """ Same as test_compare_original_nade, but with regularization of activities in MOG layer enabled. Should not influence the output """ import h5py masked_input_layer, input_layer, mask_layer = create_input_layers(2) mog = Container(inputs=[masked_input_layer, input_layer, mask_layer], outputs=mog_layer(input_layer, Activation("relu") (add([Dense(16)(Lambda(lambda x: x[:, :2])(masked_input_layer)), Dense(16, use_bias=False)(mask_layer)])), 1, True)) inner_model = Container(inputs=[masked_input_layer, input_layer, mask_layer], outputs=mog([masked_input_layer, input_layer, mask_layer])) model = training_model(inner_model, mask_seed=1) model.compile(loss=utils.maximize_prediction, optimizer="sgd") with h5py.File("tests/original_nade_weights.hdf5") as h: model.set_weights([ h["final_model/parameters/W1"][()].astype(np.float32), h["final_model/parameters/b1"][()].astype(np.float32), h["final_model/parameters/Wflags"][()].astype(np.float32), h["final_model/parameters/V_alpha"][()].T.reshape((16, 2)).astype(np.float32), h["final_model/parameters/b_alpha"][()].reshape(2).astype(np.float32), h["final_model/parameters/V_sigma"][()].T.reshape((16, 2)).astype(np.float32), h["final_model/parameters/b_sigma"][()].reshape(2).astype(np.float32), h["final_model/parameters/V_mu"][()].T.reshape((16, 2)).astype(np.float32), h["final_model/parameters/b_mu"][()].reshape(2).astype(np.float32) ]) np.random.seed(1) output = model.predict(np.random.normal(size=(5, 2))) # Different random generation leads to different masks if K.backend() == "tensorflow": assert np.allclose(np.array([-2.20870864, -2.12633744, -4.85813326, -3.63397837, -1.89778014]), output) elif K.backend() == "theano": assert np.allclose(np.array([-3.33089394, -2.55555928, -4.85813281, -4.85442475, -1.92244674]), output) else: raise NotImplementedError()
def _time_distributed_dense(x, w, b=None, dropout=None, input_dim=None, output_dim=None, timesteps=None, training=None): """Apply `y . w + b` for every temporal slice y of x. # Arguments x: input tensor. w: weight matrix. b: optional bias vector. dropout: wether to apply dropout (same dropout mask for every temporal slice of the input). input_dim: integer; optional dimensionality of the input. output_dim: integer; optional dimensionality of the output. timesteps: integer; optional number of timesteps. training: training phase tensor or boolean. # Returns Output tensor. """ if not input_dim: input_dim = K.shape(x)[2] if not timesteps: timesteps = K.shape(x)[1] if not output_dim: output_dim = K.int_shape(w)[1] if dropout is not None and 0. < dropout < 1.: # apply the same dropout pattern at every timestep ones = K.ones_like(K.reshape(x[:, 0, :], (-1, input_dim))) dropout_matrix = K.dropout(ones, dropout) expanded_dropout_matrix = K.repeat(dropout_matrix, timesteps) x = K.in_train_phase(x * expanded_dropout_matrix, x, training=training) # collapse time dimension and batch dimension together x = K.reshape(x, (-1, input_dim)) x = K.dot(x, w) if b is not None: x = K.bias_add(x, b) # reshape to 3D tensor if K.backend() == 'tensorflow': x = K.reshape(x, K.stack([-1, timesteps, output_dim])) x.set_shape([None, None, output_dim]) else: x = K.reshape(x, (-1, timesteps, output_dim)) return x
def dot_product(x, kernel): if K.backend() == 'tensorflow': return K.squeeze(K.dot(x, K.expand_dims(kernel)), axis=-1) else: return K.dot(x, kernel)