def test_Bidirectional_state_reuse(): rnn = layers.LSTM samples = 2 dim = 5 timesteps = 3 units = 3 input1 = Input((timesteps, dim)) layer = wrappers.Bidirectional( rnn(units, return_state=True, return_sequences=True)) state = layer(input1)[1:] # test passing invalid initial_state: passing a tensor input2 = Input((timesteps, dim)) with pytest.raises(ValueError): output = wrappers.Bidirectional(rnn(units))(input2, initial_state=state[0]) # test valid usage: passing a list output = wrappers.Bidirectional(rnn(units))(input2, initial_state=state) model = Model([input1, input2], output) assert len(model.layers) == 4 assert isinstance(model.layers[-1].input, list) inputs = [ np.random.rand(samples, timesteps, dim), np.random.rand(samples, timesteps, dim) ] outputs = model.predict(inputs)
def test_Bidirectional_dropout(merge_mode): rnn = layers.LSTM samples = 2 dim = 5 timesteps = 3 units = 3 X = [np.random.rand(samples, timesteps, dim)] inputs = Input((timesteps, dim)) wrapped = wrappers.Bidirectional(rnn(units, dropout=0.2, recurrent_dropout=0.2), merge_mode=merge_mode) outputs = to_list(wrapped(inputs, training=True)) assert all(not getattr(x, '_uses_learning_phase') for x in outputs) inputs = Input((timesteps, dim)) wrapped = wrappers.Bidirectional(rnn(units, dropout=0.2, return_state=True), merge_mode=merge_mode) outputs = to_list(wrapped(inputs)) assert all(x._uses_learning_phase for x in outputs) model = Model(inputs, outputs) assert model.uses_learning_phase y1 = to_list(model.predict(X)) y2 = to_list(model.predict(X)) for x1, x2 in zip(y1, y2): assert_allclose(x1, x2, atol=1e-5)
def test_Bidirectional(): rnn = layers.SimpleRNN samples = 2 dim = 2 timesteps = 2 output_dim = 2 dropout_rate = 0.2 for mode in ['sum', 'concat']: x = np.random.random((samples, timesteps, dim)) target_dim = 2 * output_dim if mode == 'concat' else output_dim y = np.random.random((samples, target_dim)) # test with Sequential model model = Sequential() model.add( wrappers.Bidirectional(rnn(output_dim, dropout=dropout_rate, recurrent_dropout=dropout_rate), merge_mode=mode, input_shape=(timesteps, dim))) model.compile(loss='mse', optimizer='sgd') model.fit(x, y, epochs=1, batch_size=1) # test config model.get_config() model = model_from_json(model.to_json()) model.summary() # test stacked bidirectional layers model = Sequential() model.add( wrappers.Bidirectional(rnn(output_dim, return_sequences=True), merge_mode=mode, input_shape=(timesteps, dim))) model.add(wrappers.Bidirectional(rnn(output_dim), merge_mode=mode)) model.compile(loss='mse', optimizer='sgd') model.fit(x, y, epochs=1, batch_size=1) # test with functional API inputs = Input((timesteps, dim)) outputs = wrappers.Bidirectional(rnn(output_dim, dropout=dropout_rate, recurrent_dropout=dropout_rate), merge_mode=mode)(inputs) if dropout_rate and K.backend() == 'tensorflow': # Dropout is disabled with CNTK/Theano. assert outputs._uses_learning_phase model = Model(inputs, outputs) model.compile(loss='mse', optimizer='sgd') model.fit(x, y, epochs=1, batch_size=1) # Bidirectional and stateful inputs = Input(batch_shape=(1, timesteps, dim)) outputs = wrappers.Bidirectional(rnn(output_dim, stateful=True), merge_mode=mode)(inputs) model = Model(inputs, outputs) model.compile(loss='mse', optimizer='sgd') model.fit(x, y, epochs=1, batch_size=1)
def test_Bidirectional_merged_value(merge_mode): rnn = layers.LSTM samples = 2 dim = 5 timesteps = 3 units = 3 X = [np.random.rand(samples, timesteps, dim)] if merge_mode == 'sum': merge_func = lambda y, y_rev: y + y_rev elif merge_mode == 'mul': merge_func = lambda y, y_rev: y * y_rev elif merge_mode == 'ave': merge_func = lambda y, y_rev: (y + y_rev) / 2 elif merge_mode == 'concat': merge_func = lambda y, y_rev: np.concatenate((y, y_rev), axis=-1) else: merge_func = lambda y, y_rev: [y, y_rev] # basic case inputs = Input((timesteps, dim)) layer = wrappers.Bidirectional(rnn(units, return_sequences=True), merge_mode=merge_mode) f_merged = K.function([inputs], to_list(layer(inputs))) f_forward = K.function([inputs], [layer.forward_layer.call(inputs)]) f_backward = K.function([inputs], [K.reverse(layer.backward_layer.call(inputs), 1)]) y_merged = f_merged(X) y_expected = to_list(merge_func(f_forward(X)[0], f_backward(X)[0])) assert len(y_merged) == len(y_expected) for x1, x2 in zip(y_merged, y_expected): assert_allclose(x1, x2, atol=1e-5) # test return_state inputs = Input((timesteps, dim)) layer = wrappers.Bidirectional(rnn(units, return_state=True), merge_mode=merge_mode) f_merged = K.function([inputs], layer(inputs)) f_forward = K.function([inputs], layer.forward_layer.call(inputs)) f_backward = K.function([inputs], layer.backward_layer.call(inputs)) n_states = len(layer.layer.states) y_merged = f_merged(X) y_forward = f_forward(X) y_backward = f_backward(X) y_expected = to_list(merge_func(y_forward[0], y_backward[0])) assert len(y_merged) == len(y_expected) + n_states * 2 for x1, x2 in zip(y_merged, y_expected): assert_allclose(x1, x2, atol=1e-5) # test if the state of a BiRNN is the concatenation of the underlying RNNs y_merged = y_merged[-n_states * 2:] y_forward = y_forward[-n_states:] y_backward = y_backward[-n_states:] for state_birnn, state_inner in zip(y_merged, y_forward + y_backward): assert_allclose(state_birnn, state_inner, atol=1e-5)
def test_Bidirectional(): rnn = recurrent.SimpleRNN nb_sample = 2 dim = 2 timesteps = 2 output_dim = 2 for mode in ['sum', 'concat']: x = np.random.random((nb_sample, timesteps, dim)) target_dim = 2 * output_dim if mode == 'concat' else output_dim y = np.random.random((nb_sample, target_dim)) # test with Sequential model model = Sequential() model.add( wrappers.Bidirectional(rnn(output_dim), merge_mode=mode, input_shape=(timesteps, dim))) model.compile(loss='mse', optimizer='sgd') model.fit(x, y, nb_epoch=1, batch_size=1) # test config model.get_config() model = model_from_json(model.to_json()) model.summary() # test stacked bidirectional layers model = Sequential() model.add( wrappers.Bidirectional(rnn(output_dim, return_sequences=True), merge_mode=mode, input_shape=(timesteps, dim))) model.add(wrappers.Bidirectional(rnn(output_dim), merge_mode=mode)) model.compile(loss='mse', optimizer='sgd') model.fit(x, y, nb_epoch=1, batch_size=1) # test with functional API input = Input((timesteps, dim)) output = wrappers.Bidirectional(rnn(output_dim), merge_mode=mode)(input) model = Model(input, output) model.compile(loss='mse', optimizer='sgd') model.fit(x, y, nb_epoch=1, batch_size=1) # Bidirectional and stateful input = Input(batch_shape=(1, timesteps, dim)) output = wrappers.Bidirectional(rnn(output_dim, stateful=True), merge_mode=mode)(input) model = Model(input, output) model.compile(loss='mse', optimizer='sgd') model.fit(x, y, nb_epoch=1, batch_size=1)
def smallLSTM(self, inputdim): self.model = Sequential() # Working code DO NOT CHANGE # self.model.add(wrappers.TimeDistributed( # Dense(32, activation='relu'), input_shape=inputdim)) # self.model.add(Convolution1D(32, 3, border_mode='valid', # subsample_length=1, activation='relu', input_shape=inputdim)) self.model.add( wrappers.Bidirectional(LSTM(128, return_sequences=True), input_shape=inputdim)) # self.model.add(Dropout(.2)) # self.model.add(wrappers.Bidirectional( # LSTM(128, return_sequences=False) # )) # self.model.add(LSTM(64)) time_distributed_merge_layer = Lambda( function=lambda x: K.mean(x, axis=1), output_shape=lambda shape: (shape[0], ) + shape[2:]) self.model.add(time_distributed_merge_layer) # self.model.add(LSTM(32)) self.model.add(Dense(NUM_CHAR)) # self.model.add(Dropout(.25)) self.model.add(Activation('softmax')) self.model.compile(loss='categorical_crossentropy', optimizer='Adam', metrics=['accuracy'])
def test_Bidirectional_updates(): x = Input(shape=(3, 2)) layer = wrappers.Bidirectional(layers.SimpleRNN(3)) layer.forward_layer.add_update(0, inputs=x) layer.forward_layer.add_update(1, inputs=None) layer.backward_layer.add_update(0, inputs=x) layer.backward_layer.add_update(1, inputs=None)
def test_Bidirectional_losses(): x = Input(shape=(3, 2)) layer = wrappers.Bidirectional( layers.SimpleRNN(3, kernel_regularizer='l1', bias_regularizer='l1')) _ = layer(x) layer.forward_layer.add_loss(lambda: 0) layer.forward_layer.add_loss(lambda: 1) layer.backward_layer.add_loss(lambda: 0) layer.backward_layer.add_loss(lambda: 1)
def test_Bidirectional_trainable(): # test layers that need learning_phase to be set x = Input(shape=(3, 2)) layer = wrappers.Bidirectional(layers.SimpleRNN(3)) _ = layer(x) assert len(layer.trainable_weights) == 6 layer.trainable = False assert len(layer.trainable_weights) == 0 layer.trainable = True assert len(layer.trainable_weights) == 6
def test_Bidirectional_updates(): x = Input(shape=(3, 2)) layer = wrappers.Bidirectional(layers.SimpleRNN(3)) assert len(layer.updates) == 0 assert len(layer.get_updates_for(None)) == 0 assert len(layer.get_updates_for(x)) == 0 layer.forward_layer.add_update(0, inputs=x) layer.forward_layer.add_update(1, inputs=None) layer.backward_layer.add_update(0, inputs=x) layer.backward_layer.add_update(1, inputs=None) assert len(layer.updates) == 4 assert len(layer.get_updates_for(None)) == 2 assert len(layer.get_updates_for(x)) == 2
def test_Bidirectional_state_reuse(): rnn = layers.LSTM samples = 2 dim = 5 timesteps = 3 units = 3 inputs = Input((timesteps, dim)) layer = wrappers.Bidirectional( rnn(units, return_state=True, return_sequences=True)) outputs = layer(inputs) output, state = outputs[0], outputs[1:] # test passing invalid initial_state: passing a tensor with pytest.raises(ValueError): output = wrappers.Bidirectional(rnn(units))(output, initial_state=state[0]) # test valid usage: passing a list output = wrappers.Bidirectional(rnn(units))(output, initial_state=state) model = Model(inputs, output) inputs = np.random.rand(samples, timesteps, dim) outputs = model.predict(inputs)
def test_Bidirectional_losses(): x = Input(shape=(3, 2)) layer = wrappers.Bidirectional( layers.SimpleRNN(3, kernel_regularizer='l1', bias_regularizer='l1')) _ = layer(x) assert len(layer.losses) == 4 assert len(layer.get_losses_for(None)) == 4 assert len(layer.get_losses_for(x)) == 0 layer.forward_layer.add_loss(0, inputs=x) layer.forward_layer.add_loss(1, inputs=None) layer.backward_layer.add_loss(0, inputs=x) layer.backward_layer.add_loss(1, inputs=None) assert len(layer.losses) == 8 assert len(layer.get_losses_for(None)) == 6 assert len(layer.get_losses_for(x)) == 2
def build_model(embedding_dim, max_word_count, embedding_matrix, max_sequence_length): embedding_layer = Embedding(max_word_count + 1, embedding_dim, weights=[embedding_matrix], input_length=max_sequence_length, trainable=True) sequence_input = Input(shape=(max_sequence_length, ), dtype='int32') embedded_sequences = embedding_layer(sequence_input) embedded_sequences = Dropout(0.2)(embedded_sequences) lstm_out = wrappers.Bidirectional( LSTM(embedding_dim, return_sequences=False))(embedded_sequences) lstm_out = Dropout(0.2)(lstm_out) output = Dense(2, activation='softmax')(lstm_out) model = Model(input=[sequence_input], output=output) return model
def create_feature_extraction_model(sentiment_pretrained=True, max_sequence_length=30, embedding_dim=300, word_index=None): word2id = word_index new_model = Sequential() if sentiment_pretrained: model, word2id, embedding_matrix = word_lstm_sentiment_model.load_model() for i in range(0, len(model.layers) - 1): new_model.add(model.layers[i]) new_model.layers[-1].trainable = False else: embedding_layer = Embedding(len(word_index) + 1, embedding_dim, input_length=max_sequence_length, trainable=True) new_model.add(embedding_layer) new_model.add(Dropout(0.2)) new_model.add(wrappers.Bidirectional(LSTM(embedding_dim, return_sequences=False))) new_model.add(Dropout(0.2)) print(new_model.summary()) return new_model, word2id
def test_Bidirectional_unkown_timespamps(): # test with functional API with unknown length rnn = layers.SimpleRNN samples = 2 dim = 2 timesteps = 2 output_dim = 2 dropout_rate = 0.2 for mode in ['sum', 'concat']: x = np.random.random((samples, timesteps, dim)) target_dim = 2 * output_dim if mode == 'concat' else output_dim y = np.random.random((samples, target_dim)) inputs = Input((None, dim)) outputs = wrappers.Bidirectional(rnn(output_dim, dropout=dropout_rate, recurrent_dropout=dropout_rate), merge_mode=mode)(inputs) model = Model(inputs, outputs) model.compile(loss='mse', optimizer='sgd') model.fit(x, y, epochs=1, batch_size=1)
model.add( Attention( recurrent.LSTM(output_dim, input_dim=embedding_dim, return_sequences=False, consume_less='mem'))) model.add(core.Activation('relu')) model.compile(optimizer='rmsprop', loss='mse') model.fit(x, y[:, -1, :], nb_epoch=1, batch_size=nb_samples) # with bidirectional encoder model = Sequential() model.add(InputLayer(batch_input_shape=(nb_samples, timesteps, embedding_dim))) model.add( wrappers.Bidirectional( recurrent.LSTM(embedding_dim, input_dim=embedding_dim, return_sequences=True))) model.add( Attention( recurrent.LSTM(output_dim, input_dim=embedding_dim, return_sequences=True, consume_less='mem'))) model.add(core.Activation('relu')) model.compile(optimizer='rmsprop', loss='mse') model.fit(x, y, nb_epoch=1, batch_size=nb_samples) # test config model.get_config() # test to and from json
def test_Bidirectional_with_constants_layer_passing_initial_state(): class RNNCellWithConstants(Layer): def __init__(self, units, **kwargs): self.units = units self.state_size = units super(RNNCellWithConstants, self).__init__(**kwargs) def build(self, input_shape): if not isinstance(input_shape, list): raise TypeError('expects constants shape') [input_shape, constant_shape] = input_shape # will (and should) raise if more than one constant passed self.input_kernel = self.add_weight(shape=(input_shape[-1], self.units), initializer='uniform', name='kernel') self.recurrent_kernel = self.add_weight(shape=(self.units, self.units), initializer='uniform', name='recurrent_kernel') self.constant_kernel = self.add_weight(shape=(constant_shape[-1], self.units), initializer='uniform', name='constant_kernel') self.built = True def call(self, inputs, states, constants): [prev_output] = states [constant] = constants h_input = K.dot(inputs, self.input_kernel) h_state = K.dot(prev_output, self.recurrent_kernel) h_const = K.dot(constant, self.constant_kernel) output = h_input + h_state + h_const return output, [output] def get_config(self): config = {'units': self.units} base_config = super(RNNCellWithConstants, self).get_config() return dict(list(base_config.items()) + list(config.items())) # Test basic case. x = Input((5, 5)) c = Input((3, )) s_for = Input((32, )) s_bac = Input((32, )) cell = RNNCellWithConstants(32) custom_objects = {'RNNCellWithConstants': RNNCellWithConstants} with CustomObjectScope(custom_objects): layer = wrappers.Bidirectional(RNN(cell)) y = layer(x, initial_state=[s_for, s_bac], constants=c) model = Model([x, s_for, s_bac, c], y) model.compile(optimizer='rmsprop', loss='mse') model.train_on_batch([ np.zeros((6, 5, 5)), np.zeros((6, 32)), np.zeros((6, 32)), np.zeros((6, 3)) ], np.zeros((6, 64))) # Test basic case serialization. x_np = np.random.random((6, 5, 5)) s_fw_np = np.random.random((6, 32)) s_bk_np = np.random.random((6, 32)) c_np = np.random.random((6, 3)) y_np = model.predict([x_np, s_fw_np, s_bk_np, c_np]) weights = model.get_weights() config = layer.get_config() with CustomObjectScope(custom_objects): layer = wrappers.Bidirectional.from_config(copy.deepcopy(config)) y = layer(x, initial_state=[s_for, s_bac], constants=c) model = Model([x, s_for, s_bac, c], y) model.set_weights(weights) y_np_2 = model.predict([x_np, s_fw_np, s_bk_np, c_np]) assert_allclose(y_np, y_np_2, atol=1e-4) # verify that state is used y_np_2_different_s = model.predict( [x_np, s_fw_np + 10., s_bk_np + 10., c_np]) with pytest.raises(AssertionError): assert_allclose(y_np, y_np_2_different_s, atol=1e-4) # test flat list inputs with CustomObjectScope(custom_objects): layer = wrappers.Bidirectional.from_config(copy.deepcopy(config)) y = layer([x, s_for, s_bac, c]) model = Model([x, s_for, s_bac, c], y) model.set_weights(weights) y_np_3 = model.predict([x_np, s_fw_np, s_bk_np, c_np]) assert_allclose(y_np, y_np_3, atol=1e-4)
class LayerCorrectnessTest(keras_parameterized.TestCase): def setUp(self): super(LayerCorrectnessTest, self).setUp() # Set two virtual CPUs to test MirroredStrategy with multiple devices cpus = tf.config.list_physical_devices('CPU') tf.config.set_logical_device_configuration(cpus[0], [ tf.config.LogicalDeviceConfiguration(), tf.config.LogicalDeviceConfiguration(), ]) def _create_model_from_layer(self, layer, input_shapes): inputs = [layers.Input(batch_input_shape=s) for s in input_shapes] if len(inputs) == 1: inputs = inputs[0] y = layer(inputs) model = models.Model(inputs, y) model.compile('sgd', 'mse') return model @parameterized.named_parameters( ('LeakyReLU', advanced_activations.LeakyReLU, (2, 2)), ('PReLU', advanced_activations.PReLU, (2, 2)), ('ELU', advanced_activations.ELU, (2, 2)), ('ThresholdedReLU', advanced_activations.ThresholdedReLU, (2, 2)), ('Softmax', advanced_activations.Softmax, (2, 2)), ('ReLU', advanced_activations.ReLU, (2, 2)), ('Conv1D', lambda: convolutional.Conv1D(2, 2), (2, 2, 1)), ('Conv2D', lambda: convolutional.Conv2D(2, 2), (2, 2, 2, 1)), ('Conv3D', lambda: convolutional.Conv3D(2, 2), (2, 2, 2, 2, 1)), ('Conv2DTranspose', lambda: convolutional.Conv2DTranspose(2, 2), (2, 2, 2, 2)), ('SeparableConv2D', lambda: convolutional.SeparableConv2D(2, 2), (2, 2, 2, 1)), ('DepthwiseConv2D', lambda: convolutional.DepthwiseConv2D(2, 2), (2, 2, 2, 1)), ('UpSampling2D', convolutional.UpSampling2D, (2, 2, 2, 1)), ('ZeroPadding2D', convolutional.ZeroPadding2D, (2, 2, 2, 1)), ('Cropping2D', convolutional.Cropping2D, (2, 3, 3, 1)), ('ConvLSTM2D', lambda: convolutional_recurrent.ConvLSTM2D(4, kernel_size=(2, 2)), (4, 4, 4, 4, 4)), ('Dense', lambda: core.Dense(2), (2, 2)), ('Dropout', lambda: core.Dropout(0.5), (2, 2)), ('SpatialDropout2D', lambda: core.SpatialDropout2D(0.5), (2, 2, 2, 2)), ('Activation', lambda: core.Activation('sigmoid'), (2, 2)), ('Reshape', lambda: core.Reshape((1, 4, 1)), (2, 2, 2)), ('Permute', lambda: core.Permute((2, 1)), (2, 2, 2)), ('Attention', dense_attention.Attention, [(2, 2, 3), (2, 3, 3), (2, 3, 3)]), ('AdditiveAttention', dense_attention.AdditiveAttention, [(2, 2, 3), (2, 3, 3), (2, 3, 3)]), ('Embedding', lambda: embeddings.Embedding(4, 4), (2, 4), 2e-3, 2e-3, np.random.randint(4, size=(2, 4))), ('LocallyConnected1D', lambda: local.LocallyConnected1D(2, 2), (2, 2, 1)), ('LocallyConnected2D', lambda: local.LocallyConnected2D(2, 2), (2, 2, 2, 1)), ('Add', merge.Add, [(2, 2), (2, 2)]), ('Subtract', merge.Subtract, [(2, 2), (2, 2)]), ('Multiply', merge.Multiply, [(2, 2), (2, 2)]), ('Average', merge.Average, [(2, 2), (2, 2)]), ('Maximum', merge.Maximum, [(2, 2), (2, 2)]), ('Minimum', merge.Minimum, [(2, 2), (2, 2)]), ('Concatenate', merge.Concatenate, [(2, 2), (2, 2)]), ('Dot', lambda: merge.Dot(1), [(2, 2), (2, 2)]), ('GaussianNoise', lambda: noise.GaussianNoise(0.5), (2, 2)), ('GaussianDropout', lambda: noise.GaussianDropout(0.5), (2, 2)), ('AlphaDropout', lambda: noise.AlphaDropout(0.5), (2, 2)), ('BatchNormalization', normalization_v2.BatchNormalization, (2, 2), 1e-2, 1e-2), ('LayerNormalization', normalization.LayerNormalization, (2, 2)), ('LayerNormalizationUnfused', lambda: normalization.LayerNormalization(axis=1), (2, 2, 2)), ('MaxPooling2D', pooling.MaxPooling2D, (2, 2, 2, 1)), ('AveragePooling2D', pooling.AveragePooling2D, (2, 2, 2, 1)), ('GlobalMaxPooling2D', pooling.GlobalMaxPooling2D, (2, 2, 2, 1)), ('GlobalAveragePooling2D', pooling.GlobalAveragePooling2D, (2, 2, 2, 1)), ('SimpleRNN', lambda: recurrent.SimpleRNN(units=4), (4, 4, 4), 1e-2, 1e-2), ('GRU', lambda: recurrent.GRU(units=4), (4, 4, 4)), ('LSTM', lambda: recurrent.LSTM(units=4), (4, 4, 4)), ('GRUV2', lambda: recurrent_v2.GRU(units=4), (4, 4, 4)), ('LSTMV2', lambda: recurrent_v2.LSTM(units=4), (4, 4, 4)), ('TimeDistributed', lambda: wrappers.TimeDistributed(core.Dense(2)), (2, 2, 2)), ('Bidirectional', lambda: wrappers.Bidirectional(recurrent.SimpleRNN(units=4)), (2, 2, 2)), ('AttentionLayerCausal', lambda: dense_attention.Attention(causal=True), [(2, 2, 3), (2, 3, 3), (2, 3, 3)]), ('AdditiveAttentionLayerCausal', lambda: dense_attention.AdditiveAttention(causal=True), [(2, 3, 4), (2, 3, 4), (2, 3, 4)]), ) def test_layer(self, f32_layer_fn, input_shape, rtol=2e-3, atol=2e-3, input_data=None): """Tests a layer by comparing the float32 and mixed precision weights. A float32 layer, a mixed precision layer, and a distributed mixed precision layer are run. The three layers are identical other than their dtypes and distribution strategies. The outputs after predict() and weights after fit() are asserted to be close. Args: f32_layer_fn: A function returning a float32 layer. The other two layers will automatically be created from this input_shape: The shape of the input to the layer, including the batch dimension. Or a list of shapes if the layer takes multiple inputs. rtol: The relative tolerance to be asserted. atol: The absolute tolerance to be asserted. input_data: A Numpy array with the data of the input. If None, input data will be randomly generated """ if f32_layer_fn == convolutional.ZeroPadding2D and \ tf.test.is_built_with_rocm(): return if isinstance(input_shape[0], int): input_shapes = [input_shape] else: input_shapes = input_shape strategy = create_mirrored_strategy() f32_layer = f32_layer_fn() # Create the layers assert f32_layer.dtype == f32_layer._compute_dtype == 'float32' config = f32_layer.get_config() config['dtype'] = policy.Policy('mixed_float16') mp_layer = f32_layer.__class__.from_config(config) distributed_mp_layer = f32_layer.__class__.from_config(config) # Compute per_replica_input_shapes for the distributed model global_batch_size = input_shapes[0][0] assert global_batch_size % strategy.num_replicas_in_sync == 0, ( 'The number of replicas, %d, does not divide the global batch size of ' '%d' % (strategy.num_replicas_in_sync, global_batch_size)) per_replica_batch_size = (global_batch_size // strategy.num_replicas_in_sync) per_replica_input_shapes = [(per_replica_batch_size, ) + s[1:] for s in input_shapes] # Create the models f32_model = self._create_model_from_layer(f32_layer, input_shapes) mp_model = self._create_model_from_layer(mp_layer, input_shapes) with strategy.scope(): distributed_mp_model = self._create_model_from_layer( distributed_mp_layer, per_replica_input_shapes) # Set all model weights to the same values f32_weights = f32_model.get_weights() mp_model.set_weights(f32_weights) distributed_mp_model.set_weights(f32_weights) # Generate input data if input_data is None: # Cast inputs to float16 to avoid measuring error from having f16 layers # cast to float16. input_data = [ np.random.normal(size=s).astype('float16') for s in input_shapes ] if len(input_data) == 1: input_data = input_data[0] # Assert all models have close outputs. f32_output = f32_model.predict(input_data) mp_output = mp_model.predict(input_data) self.assertAllClose(mp_output, f32_output, rtol=rtol, atol=atol) self.assertAllClose(distributed_mp_model.predict(input_data), f32_output, rtol=rtol, atol=atol) # Run fit() on models output = np.random.normal( size=f32_model.outputs[0].shape).astype('float16') for model in f32_model, mp_model, distributed_mp_model: model.fit(input_data, output, batch_size=global_batch_size) # Assert all models have close weights f32_weights = f32_model.get_weights() self.assertAllClose(mp_model.get_weights(), f32_weights, rtol=rtol, atol=atol) self.assertAllClose(distributed_mp_model.get_weights(), f32_weights, rtol=rtol, atol=atol)
def create_prediction_model( sentiment_features_len, lexicon_features_len, num_class, vocab_size, embedding_dim=50, max_sequence_length=word_lstm_sentiment_model.MAX_SEQUENCE_LENGTH, embedding_matrix=None): if embedding_matrix is not None: embedding_dim = word_lstm_sentiment_model.EMBEDDING_DIM if lexicon_features_len > 0 and sentiment_features_len > 0: word_inputs = Input(shape=(max_sequence_length, ), name="word_features") if embedding_matrix is not None: embedding_layer = Embedding(vocab_size + 1, embedding_dim, trainable=True, weights=[embedding_matrix]) else: embedding_layer = Embedding(vocab_size + 1, embedding_dim, trainable=True) word_embeddings = embedding_layer(word_inputs) word_embeddingsd = Dropout(0.2)(word_embeddings) word_lstm_outputs = wrappers.Bidirectional( LSTM(embedding_dim, return_sequences=False))(word_embeddingsd) word_lstm_outputsd = Dropout(0.2)(word_lstm_outputs) lexicon_inputs = Input(shape=(lexicon_features_len, ), name="lexicon_features") sentiment_inputs = Input(shape=(sentiment_features_len, ), name="sentiment_features") sentiment_inputsd = Dropout(0.2)(sentiment_inputs) sentiment_inputsdd = Dense(128, activation="relu")(sentiment_inputsd) merged_layer = Concatenate()( [word_lstm_outputsd, sentiment_inputsdd, lexicon_inputs]) merged_layerd = Dropout(0.2)(merged_layer) merged_layerdd = Dense(64, activation="relu")(merged_layerd) regression_output = Dense(1, activation="sigmoid", name="regression_output")(merged_layerdd) classification_output = Dense( num_class, activation="softmax", name="classification_output")(merged_layerdd) prediction_model = Model( inputs=[word_inputs, sentiment_inputs, lexicon_inputs], outputs=[regression_output, classification_output]) elif sentiment_features_len > 0: word_inputs = Input(shape=(max_sequence_length, ), name="word_features") if embedding_matrix is not None: embedding_layer = Embedding(vocab_size + 1, embedding_dim, trainable=True, weights=[embedding_matrix]) else: embedding_layer = Embedding(vocab_size + 1, embedding_dim, trainable=True) word_embeddings = embedding_layer(word_inputs) word_embeddingsd = Dropout(0.2)(word_embeddings) word_lstm_outputs = wrappers.Bidirectional( LSTM(embedding_dim, return_sequences=False))(word_embeddingsd) word_lstm_outputsd = Dropout(0.2)(word_lstm_outputs) sentiment_inputs = Input(shape=(sentiment_features_len, ), name="sentiment_features") sentiment_inputsd = Dropout(0.2)(sentiment_inputs) sentiment_inputsdd = Dense(128, activation="relu")(sentiment_inputsd) merged_layer = Concatenate()([word_lstm_outputsd, sentiment_inputsdd]) merged_layerd = Dropout(0.2)(merged_layer) merged_layerdd = Dense(64, activation="relu")(merged_layerd) regression_output = Dense(1, activation="sigmoid", name="regression_output")(merged_layerdd) classification_output = Dense( num_class, activation="softmax", name="classification_output")(merged_layerdd) prediction_model = Model( inputs=[word_inputs, sentiment_inputs], outputs=[regression_output, classification_output]) elif lexicon_features_len > 0: word_inputs = Input(shape=(max_sequence_length, ), name="word_features") if embedding_matrix is not None: embedding_layer = Embedding(vocab_size + 1, embedding_dim, trainable=True, weights=[embedding_matrix]) else: embedding_layer = Embedding(vocab_size + 1, embedding_dim, trainable=True) word_embeddings = embedding_layer(word_inputs) word_embeddingsd = Dropout(0.2)(word_embeddings) word_lstm_outputs = wrappers.Bidirectional( LSTM(embedding_dim, return_sequences=False))(word_embeddingsd) word_lstm_outputsd = Dropout(0.2)(word_lstm_outputs) lexicon_inputs = Input(shape=(lexicon_features_len, ), name="sentiment_features") merged_layer = Concatenate()([word_lstm_outputsd, lexicon_inputs]) merged_layerd = Dense(64, activation="relu")(merged_layer) regression_output = Dense(1, activation="sigmoid", name="regression_output")(merged_layerd) classification_output = Dense( num_class, activation="softmax", name="classification_output")(merged_layerd) prediction_model = Model( inputs=[word_inputs, lexicon_inputs], outputs=[regression_output, classification_output]) else: word_inputs = Input(shape=(max_sequence_length, ), name="word_features") if embedding_matrix is not None: embedding_layer = Embedding(vocab_size + 1, embedding_dim, trainable=True, weights=[embedding_matrix]) else: embedding_layer = Embedding(vocab_size + 1, embedding_dim, trainable=True) word_embeddings = embedding_layer(word_inputs) word_embeddingsd = Dropout(0.2)(word_embeddings) word_lstm_outputs = wrappers.Bidirectional( LSTM(embedding_dim, return_sequences=False))(word_embeddingsd) word_lstm_outputsd = Dropout(0.2)(word_lstm_outputs) merged_layerd = Dense(64, activation="relu")(word_lstm_outputsd) regression_output = Dense(1, activation="sigmoid", name="regression_output")(merged_layerd) classification_output = Dense( num_class, activation="softmax", name="classification_output")(merged_layerd) prediction_model = Model( inputs=word_inputs, outputs=[regression_output, classification_output]) prediction_model.compile(loss={ "regression_output": "mean_squared_error", "classification_output": "categorical_crossentropy" }, optimizer='adam', metrics={ "regression_output": metrics.mae, "classification_output": "accuracy" }) return prediction_model