def kernel_regularizer(self): """Returns the kernel_regularizer to be used. Any subclass should override this method if they want a kernel_regularizer (if required for the loss function to be StronglyConvex. """ return L1L2(l2=self.reg_lambda)
def conv_batch_prelu(name, tensor, num_filters, kernel_size=(3, 3), strides=(1, 1), padding="same"): """ This function combines conv2d layer, batch normalization layer and prelu activation. Args: name (str): layer's name ('conv_', 'batchnorm' and 'prelu' are added to the name) tensor (tf.Tensor): the input tensor num_filters (int): number of filters used in the convolution layer kernel_size (tuple or list): size of each kernel in the convolution strides (tuple or list): strides used in the convolution padding (str): one of 'same' or 'valid' Return: tensor (tf.Tensor): the output tensor """ tensor = Conv2D(filters=num_filters, kernel_size=kernel_size, strides=strides, kernel_initializer="he_uniform", bias_initializer="zeros", kernel_regularizer=L1L2(regularizers[0], regularizers[1]), padding=padding, name=f"{prefix}_conv_{name}")(tensor) tensor = BatchNormalization(momentum=0.1, name=f"{prefix}_batchnorm_{name}")(tensor) tensor = PReLU(shared_axes=[1, 2], name=f"{prefix}_prelu_{name}")(tensor) return tensor
def kernel_regularizer(self): """Return l2 loss using 0.5*reg_lambda as the l2 term (as desired). L2 regularization is required for this loss function to be strongly convex. Returns: The L2 regularizer layer for this loss function, with regularizer constant set to half the 0.5 * reg_lambda. """ return L1L2(l2=self.reg_lambda / 2)
def __init__(self, n_outputs=2, input_shape=(16, ), init_value=2): """Constructor. Args: n_outputs: number of output neurons input_shape: init_value: """ super(TestModel, self).__init__(name='bolton', dynamic=False) self.n_outputs = n_outputs self.layer_input_shape = input_shape self.output_layer = tf.keras.layers.Dense( self.n_outputs, input_shape=self.layer_input_shape, kernel_regularizer=L1L2(l2=1), kernel_initializer=constant(init_value), )
def Base_Model(self, nodes=100, reg=1e-8, dropout=0.5, batch_size=16): reg = L1L2(reg) encoder_input = Input(shape=(self.max_sequence_length, self.N_words), dtype='float32') mask = Masking(mask_value=0.0)(encoder_input) mask = BatchNormalization()(mask) l_lstm = LSTM(nodes, input_shape=(self.max_sequence_length, self.N_words), kernel_regularizer=reg)(mask) l_lstm = Dense(self.max_sequence_length * self.N_words)(l_lstm) l_lstm = Reshape((self.max_sequence_length, self.N_words))(l_lstm) decoded_sequence = TimeDistributed( Dense(self.N_words, activation='softmax'))(l_lstm) full_model = Model(encoder_input, decoded_sequence) self.full_model = full_model
def Attention_Model(self, nodes=100, reg=0.0, dropout=0.0, batch_size=16): reg = L1L2(reg) # Encoder encoder_input = Input(batch_shape=(batch_size, self.max_sequence_length, self.N_words), dtype='float32') #encoder_batch_norm = BatchNormalization() #mask = encoder_batch_norm(encoder_input) encoder = Bidirectional( GRU( nodes, #stateful=True, return_state=True, return_sequences=True, recurrent_dropout=dropout, kernel_regularizer=reg, kernel_initializer='he_normal', recurrent_initializer='he_normal', name='encoder_gru'), #merge_mode="concat", name='bidirectional_encoder') encoder_out, encoder_fwd_state, encoder_back_state = encoder( encoder_input) # Decoder decoder_input = Input(batch_shape=(batch_size, self.max_sequence_length - 1, self.N_words), dtype='float32') #decoder_batch_norm = BatchNormalization() #mask = decoder_batch_norm(decoder_input) decoder = Bidirectional( GRU( nodes, #stateful=True, return_state=True, return_sequences=True, recurrent_dropout=dropout, kernel_regularizer=reg, kernel_initializer='he_normal', recurrent_initializer='he_normal', name='decoder_gru'), #merge_mode="concat", name='bidirectional_decoder') decoder_out, decoder_fwd_state, decoder_back_state = decoder( decoder_input, initial_state=[encoder_fwd_state, encoder_back_state]) # Attention attn_layer = AttentionLayer(name='attention_layer') attn_out, attn_states = attn_layer([encoder_out, decoder_out]) decoder_combined_context = Concatenate( axis=-1, name='concat_layer')([decoder_out, attn_out]) # Dense dense_1 = Dense(nodes, activation="tanh") dense_time_1 = TimeDistributed(dense_1) dense_2 = Dense(self.N_words, activation='softmax') dense_time_2 = TimeDistributed(dense_2) decoded_sequence = dense_time_2(decoder_combined_context) full_model = Model([encoder_input, decoder_input], decoded_sequence) """ Encoder (Inference) model """ encoder_inf_inputs = Input(batch_shape=(1, self.max_sequence_length, self.N_words), name='encoder_inf_inputs') #encoder_inf_inputs_masked = encoder_batch_norm(encoder_inf_inputs) encoder_inf_out, encoder_inf_fwd_state, encoder_inf_back_state = encoder( encoder_inf_inputs) encoder_model = Model(inputs=encoder_inf_inputs, outputs=[ encoder_inf_out, encoder_inf_fwd_state, encoder_inf_back_state ]) """ Decoder (Inference) model """ decoder_inf_inputs = Input(batch_shape=(1, 1, self.N_words), name='decoder_word_inputs') #decoder_inf_inputs_masked = decoder_batch_norm(decoder_inf_inputs) encoder_inf_states = Input(batch_shape=(1, self.max_sequence_length, 2 * nodes), name='encoder_inf_states') decoder_init_fwd_state = Input(batch_shape=(1, nodes), name='decoder_fwd_init') decoder_init_back_state = Input(batch_shape=(1, nodes), name='decoder_back_init') decoder_inf_out, decoder_inf_fwd_state, decoder_inf_back_state = decoder( decoder_inf_inputs, initial_state=[decoder_init_fwd_state, decoder_init_back_state]) # Attention attn_inf_out, attn_inf_states = attn_layer( [encoder_inf_states, decoder_inf_out]) decoder_inf_concat = Concatenate( axis=-1, name='concat')([decoder_inf_out, attn_inf_out]) # Output decoder_inf_pred = TimeDistributed(dense_2)(decoder_inf_concat) decoder_model = Model(inputs=[ encoder_inf_states, decoder_init_fwd_state, decoder_init_back_state, decoder_inf_inputs ], outputs=[ decoder_inf_pred, attn_inf_states, decoder_inf_fwd_state, decoder_inf_back_state ]) self.full_model = full_model self.encoder_model = encoder_model self.decoder_model = decoder_model
tfrecord = os.path.join(base_dir, 'datasets', 'tfrecord', 'snake_all.tfrecord') training_set = tfdata_generator(filename=tfrecord, batch_size=32, aug=True) validation_set = tfdata_generator(filename=tfrecord, batch_size=32) inputs = inception_resnet_v2.InceptionResNetV2(include_top=False) for layer in inputs.layers: layer.trainable = False x = keras.layers.GlobalAveragePooling2D(name='avg_pool')(inputs.output) x = keras.layers.Dropout(0.2)(x) # x = keras.layers.Dense(units=4096, activation='relu', name='final_dense', kernel_regularizer=L1L2(l2=0.001))(x) # x = keras.layers.Dropout(0.2)(x) outputs = keras.layers.Dense(7, activation='softmax', name='predictions', kernel_regularizer=L1L2(l2=0.001))(x) with tf.device('/cpu:0'): model = keras.models.Model(inputs.input, outputs) parallel_model = multi_gpu_model(model, gpus=2) # parallel_model = model # learning_rate = tf.keras.optimizers. optimizer = tf.train.AdamOptimizer(learning_rate=0.001) parallel_model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=['accuracy']) save_path = os.path.join( base_dir, 'ckpt', 'weights-epoch-{epoch:02d}-val_loss-{val_loss:.4f}-val_acc-{val_acc:.2f}.hdf5' ) ckpt = ParallelModelCheckpoint(model,
class HuberTests(keras_parameterized.TestCase): """tests for CategoricalCrossesntropy StrongConvex loss.""" @parameterized.named_parameters([ { 'testcase_name': 'normal', 'reg_lambda': 1, 'c': 1, 'radius_constant': 1, 'delta': 1, }, ]) def test_init_params(self, reg_lambda, c, radius_constant, delta): """Test initialization for given arguments. Args: reg_lambda: initialization value for reg_lambda arg c: initialization value for C arg radius_constant: initialization value for radius_constant arg delta: the delta parameter for the huber loss """ # test valid domains for each variable loss = StrongConvexHuber(reg_lambda, c, radius_constant, delta) self.assertIsInstance(loss, StrongConvexHuber) @parameterized.named_parameters([ { 'testcase_name': 'negative c', 'reg_lambda': 1, 'c': -1, 'radius_constant': 1, 'delta': 1 }, { 'testcase_name': 'negative radius', 'reg_lambda': 1, 'c': 1, 'radius_constant': -1, 'delta': 1 }, { 'testcase_name': 'negative lambda', 'reg_lambda': -1, 'c': 1, 'radius_constant': 1, 'delta': 1 }, { 'testcase_name': 'negative delta', 'reg_lambda': 1, 'c': 1, 'radius_constant': 1, 'delta': -1 }, ]) def test_bad_init_params(self, reg_lambda, c, radius_constant, delta): """Test invalid domain for given params. Should return ValueError. Args: reg_lambda: initialization value for reg_lambda arg c: initialization value for C arg radius_constant: initialization value for radius_constant arg delta: the delta parameter for the huber loss """ # test valid domains for each variable with self.assertRaises(ValueError): StrongConvexHuber(reg_lambda, c, radius_constant, delta) # test the bounds and test varied delta's @test_util.run_all_in_graph_and_eager_modes @parameterized.named_parameters([ { 'testcase_name': 'delta=1,y_true=1 z>1+h decision boundary', 'logits': 2.1, 'y_true': 1, 'delta': 1, 'result': 0, }, { 'testcase_name': 'delta=1,y_true=1 z<1+h decision boundary', 'logits': 1.9, 'y_true': 1, 'delta': 1, 'result': 0.01 * 0.25, }, { 'testcase_name': 'delta=1,y_true=1 1-z< h decision boundary', 'logits': 0.1, 'y_true': 1, 'delta': 1, 'result': 1.9**2 * 0.25, }, { 'testcase_name': 'delta=1,y_true=1 z < 1-h decision boundary', 'logits': -0.1, 'y_true': 1, 'delta': 1, 'result': 1.1, }, { 'testcase_name': 'delta=2,y_true=1 z>1+h decision boundary', 'logits': 3.1, 'y_true': 1, 'delta': 2, 'result': 0, }, { 'testcase_name': 'delta=2,y_true=1 z<1+h decision boundary', 'logits': 2.9, 'y_true': 1, 'delta': 2, 'result': 0.01 * 0.125, }, { 'testcase_name': 'delta=2,y_true=1 1-z < h decision boundary', 'logits': 1.1, 'y_true': 1, 'delta': 2, 'result': 1.9**2 * 0.125, }, { 'testcase_name': 'delta=2,y_true=1 z < 1-h decision boundary', 'logits': -1.1, 'y_true': 1, 'delta': 2, 'result': 2.1, }, { 'testcase_name': 'delta=1,y_true=-1 z>1+h decision boundary', 'logits': -2.1, 'y_true': -1, 'delta': 1, 'result': 0, }, ]) def test_calculation(self, logits, y_true, delta, result): """Test the call method to ensure it returns the correct value. Args: logits: unscaled output of model y_true: label delta: delta value for StrongConvexHuber loss. result: correct loss calculation value """ logits = tf.Variable(logits, False, dtype=tf.float32) y_true = tf.Variable(y_true, False, dtype=tf.float32) loss = StrongConvexHuber(0.00001, 1, 1, delta) loss = loss(y_true, logits) self.assertAllClose(loss.numpy(), result) @parameterized.named_parameters([ { 'testcase_name': 'beta', 'init_args': [1, 1, 1, 1], 'fn': 'beta', 'args': [1], 'result': tf.Variable(1.5, dtype=tf.float32) }, { 'testcase_name': 'gamma', 'fn': 'gamma', 'init_args': [1, 1, 1, 1], 'args': [], 'result': tf.Variable(1, dtype=tf.float32), }, { 'testcase_name': 'lipchitz constant', 'fn': 'lipchitz_constant', 'init_args': [1, 1, 1, 1], 'args': [1], 'result': tf.Variable(2, dtype=tf.float32), }, { 'testcase_name': 'kernel regularizer', 'fn': 'kernel_regularizer', 'init_args': [1, 1, 1, 1], 'args': [], 'result': L1L2(l2=0.5), }, ]) def test_fns(self, init_args, fn, args, result): """Test that fn of BinaryCrossentropy loss returns the correct result. Args: init_args: init values for loss instance fn: the fn to test args: the arguments to above function result: the correct result from the fn """ loss = StrongConvexHuber(*init_args) expected = getattr(loss, fn, lambda: 'fn not found')(*args) if hasattr(expected, 'numpy') and hasattr(result, 'numpy'): # both tensor expected = expected.numpy() result = result.numpy() if hasattr(expected, 'l2') and hasattr(result, 'l2'): # both l2 regularizer expected = expected.l2 result = result.l2 self.assertEqual(expected, result)
class CategoricalCrossesntropyTests(keras_parameterized.TestCase): """tests for CategoricalCrossesntropy StrongConvex loss.""" @parameterized.named_parameters([ { 'testcase_name': 'normal', 'reg_lambda': 1, 'C': 1, 'radius_constant': 1 }, # pylint: disable=invalid-name ]) def test_init_params(self, reg_lambda, C, radius_constant): """Test initialization for given arguments. Args: reg_lambda: initialization value for reg_lambda arg C: initialization value for C arg radius_constant: initialization value for radius_constant arg """ # test valid domains for each variable loss = StrongConvexCategoricalCrossentropy(reg_lambda, C, radius_constant) self.assertIsInstance(loss, StrongConvexCategoricalCrossentropy) @parameterized.named_parameters([ { 'testcase_name': 'negative c', 'reg_lambda': 1, 'C': -1, 'radius_constant': 1 }, { 'testcase_name': 'negative radius', 'reg_lambda': 1, 'C': 1, 'radius_constant': -1 }, { 'testcase_name': 'negative lambda', 'reg_lambda': -1, 'C': 1, 'radius_constant': 1 }, # pylint: disable=invalid-name ]) def test_bad_init_params(self, reg_lambda, C, radius_constant): """Test invalid domain for given params. Should return ValueError. Args: reg_lambda: initialization value for reg_lambda arg C: initialization value for C arg radius_constant: initialization value for radius_constant arg """ # test valid domains for each variable with self.assertRaises(ValueError): StrongConvexCategoricalCrossentropy(reg_lambda, C, radius_constant) @test_util.run_all_in_graph_and_eager_modes @parameterized.named_parameters([ # [] for compatibility with tensorflow loss calculation { 'testcase_name': 'both positive', 'logits': [[10000, 0]], 'y_true': [[1, 0]], 'result': 0, }, { 'testcase_name': 'negative gradient positive logits', 'logits': [[-10000, 0]], 'y_true': [[1, 0]], 'result': 10000, }, { 'testcase_name': 'positive gradient negative logits', 'logits': [[10000, 0]], 'y_true': [[0, 1]], 'result': 10000, }, { 'testcase_name': 'both negative', 'logits': [[-10000, 0]], 'y_true': [[0, 1]], 'result': 0 }, ]) def test_calculation(self, logits, y_true, result): """Test the call method to ensure it returns the correct value. Args: logits: unscaled output of model y_true: label result: correct loss calculation value """ logits = tf.Variable(logits, False, dtype=tf.float32) y_true = tf.Variable(y_true, False, dtype=tf.float32) loss = StrongConvexCategoricalCrossentropy(0.00001, 1, 1) loss = loss(y_true, logits) self.assertEqual(loss.numpy(), result) @parameterized.named_parameters([ { 'testcase_name': 'beta', 'init_args': [1, 1, 1], 'fn': 'beta', 'args': [1], 'result': tf.constant(2, dtype=tf.float32) }, { 'testcase_name': 'gamma', 'fn': 'gamma', 'init_args': [1, 1, 1], 'args': [], 'result': tf.constant(1, dtype=tf.float32), }, { 'testcase_name': 'lipchitz constant', 'fn': 'lipchitz_constant', 'init_args': [1, 1, 1], 'args': [1], 'result': tf.constant(2, dtype=tf.float32), }, { 'testcase_name': 'kernel regularizer', 'fn': 'kernel_regularizer', 'init_args': [1, 1, 1], 'args': [], 'result': L1L2(l2=0.5), }, ]) def test_fns(self, init_args, fn, args, result): """Test that fn of CategoricalCrossentropy loss returns the correct result. Args: init_args: init values for loss instance fn: the fn to test args: the arguments to above function result: the correct result from the fn """ loss = StrongConvexCategoricalCrossentropy(*init_args) expected = getattr(loss, fn, lambda: 'fn not found')(*args) if hasattr(expected, 'numpy') and hasattr(result, 'numpy'): # both tensor expected = expected.numpy() result = result.numpy() if hasattr(expected, 'l2') and hasattr(result, 'l2'): # both l2 regularizer expected = expected.l2 result = result.l2 self.assertEqual(expected, result) @parameterized.named_parameters([ { 'testcase_name': 'label_smoothing', 'init_args': [1, 1, 1, True, 0.1], 'fn': None, 'args': None, 'print_res': 'The impact of label smoothing on privacy is unknown.' }, ]) def test_prints(self, init_args, fn, args, print_res): """Test logger warning from StrongConvexCategoricalCrossentropy. Args: init_args: arguments to init the object with. fn: function to test args: arguments to above function print_res: print result that should have been printed. """ with captured_output() as (out, err): # pylint: disable=unused-variable loss = StrongConvexCategoricalCrossentropy(*init_args) if fn is not None: getattr(loss, fn, lambda *arguments: print('error'))(*args) self.assertRegexMatch(err.getvalue().strip(), [print_res])
def Embedding_Model(self, nodes=100, reg=0.0, dropout=0.0, batch_size=16, embedding_dim = 1000): reg = L1L2(reg) # Encoder encoder_input = Input( batch_shape=(batch_size, self.max_sequence_length_enc), dtype='float32') encoder_embedding = Embedding( self.N_words, embedding_dim, input_length=self.max_sequence_length_enc, #mask_zero=True, name='encoder_embedding') encoder = GRU( nodes, return_state=True, return_sequences=True, recurrent_dropout=dropout, kernel_regularizer=reg, kernel_initializer='he_normal', recurrent_initializer='he_normal', name='encoder_gru') encoder_out, encoder_state = encoder(encoder_embedding(encoder_input)) # Decoder decoder_input = Input( batch_shape=(batch_size, self.max_sequence_length_dec - 1), dtype='float32') decoder_embedding = Embedding( self.N_words, embedding_dim, input_length=self.max_sequence_length_dec - 1, #mask_zero = True, name='decoder_embedding') decoder = GRU( nodes, return_state=True, return_sequences=True, recurrent_dropout=dropout, kernel_regularizer=reg, kernel_initializer='he_normal', recurrent_initializer='he_normal', name='decoder_gru') decoder_out, decoder_state = decoder( decoder_embedding(decoder_input), initial_state=encoder_state) # Attention #attn_layer = LuongAttention(nodes) #attn_out, attn_states = attn_layer(decoder_out, encoder_out) #decoder_with_context = Concatenate(axis=-1, name='concat_layer')([attn_out, decoder_out]) attn_layer = AttentionLayer(name='attention_layer') attn_out, attn_states = attn_layer([encoder_out, decoder_out]) # context, alignment decoder_with_context = Concatenate( axis=-1, name='concat_layer')([decoder_out, attn_out]) dense_1 = Dense(nodes, activation="tanh", kernel_regularizer=reg) dense_time_1 = TimeDistributed(dense_1, name='time_1') decoder_with_context = dense_time_1(decoder_with_context) dense_2 = Dense(self.N_words, activation='softmax') dense_time_2 = TimeDistributed(dense_2, name = 'time_2') decoded_sequence = dense_time_2(decoder_with_context) full_model = Model([encoder_input, decoder_input], decoded_sequence) """ Encoder (Inference) model """ encoder_inf_inputs = Input( batch_shape=(1, self.max_sequence_length_enc), name='encoder_inf_inputs') encoder_inf_out, encoder_inf_state = encoder( encoder_embedding(encoder_inf_inputs)) encoder_model = Model( inputs=encoder_inf_inputs, outputs=[encoder_inf_out, encoder_inf_state]) """ Decoder (Inference) model """ decoder_inf_inputs = Input( batch_shape=(1, 1), name='decoder_word_inputs') encoder_inf_states = Input( batch_shape=(1, self.max_sequence_length_enc, nodes), name='encoder_inf_states') decoder_init_state = Input(batch_shape=(1, nodes), name='decoder_init') decoder_inf_out, decoder_inf_state = decoder( decoder_embedding(decoder_inf_inputs), initial_state=decoder_init_state) # Attention attn_inf_out, attn_inf_states = attn_layer([encoder_inf_states, decoder_inf_out]) decoder_inf_concat = Concatenate( axis=-1, name='concat')([decoder_inf_out, attn_inf_out]) #attn_inf_out, attn_inf_states = attn_layer(decoder_inf_out, # encoder_inf_states) #decoder_inf_concat = Concatenate( # axis=-1, name='concat_layer')([attn_inf_out, decoder_inf_out]) # Output decoder_inf_concat = TimeDistributed(dense_1)(decoder_inf_concat) decoder_inf_pred = TimeDistributed(dense_2)(decoder_inf_concat) decoder_model = Model( inputs=[ encoder_inf_states, decoder_init_state, decoder_inf_inputs ], outputs=[decoder_inf_pred, attn_inf_states, decoder_inf_state]) self.full_model = full_model self.encoder_model = encoder_model self.decoder_model = decoder_model
def build(self, input_layer): downsampling_factor = int(np.prod(self.downsample_factors)) last_layer = input_layer input_shape = K.int_shape(last_layer) if len(input_shape) == 3: # Add channel dimension if not already present. last_layer = Reshape(input_shape[1:] + (1,))(last_layer) per_stage_before_pool = [] for layer_idx in range(self.num_layers + 1): cur_num_units = int(np.rint(self.num_units*2**layer_idx)) last_layer = Conv2D(cur_num_units, 3, padding='same', kernel_initializer='he_normal', kernel_regularizer=L1L2(l2=self.l2_weight), bias_regularizer=L1L2(l2=self.l2_weight), use_bias=not self.with_bn and self.with_bias)(last_layer) if self.with_bn: last_layer = BatchNormalization(beta_regularizer=L1L2(l2=self.l2_weight), gamma_regularizer=L1L2(l2=self.l2_weight))(last_layer) last_layer = Activation(self.activation)(last_layer) last_layer = Conv2D(cur_num_units, 3, padding='same', kernel_initializer='he_normal', kernel_regularizer=L1L2(l2=self.l2_weight), bias_regularizer=L1L2(l2=self.l2_weight), use_bias=not self.with_bn and self.with_bias)(last_layer) if self.with_bn: last_layer = BatchNormalization(beta_regularizer=L1L2(l2=self.l2_weight), gamma_regularizer=L1L2(l2=self.l2_weight))(last_layer) last_layer = Activation(self.activation)(last_layer) per_stage_before_pool.append(last_layer) if layer_idx != self.num_layers: # Last layer doesn't require max pooling. last_layer = MaxPooling2D(pool_size=(2, 2))(last_layer) if self.p_dropout != 0.0: last_layer = Dropout(self.p_dropout)(last_layer) start_idx = 0 if downsampling_factor == 1 else int(np.log2(self.downsample_factors[0])) for layer_idx in reversed(range(start_idx, self.num_layers)): cur_num_units = int(np.rint(self.num_units*2**layer_idx)) last_layer = UpSampling2D(size=(2, 2))(last_layer) last_layer = Conv2D(cur_num_units, 2, padding='same', kernel_initializer='he_normal', kernel_regularizer=L1L2(l2=self.l2_weight), bias_regularizer=L1L2(l2=self.l2_weight), use_bias=not self.with_bn and self.with_bias)(last_layer) if self.with_bn: last_layer = BatchNormalization(beta_regularizer=L1L2(l2=self.l2_weight), gamma_regularizer=L1L2(l2=self.l2_weight))(last_layer) last_layer = Activation(self.activation)(last_layer) last_layer = concatenate([per_stage_before_pool[layer_idx], last_layer], axis=3) last_layer = Conv2D(cur_num_units, 3, padding='same', kernel_initializer='he_normal', kernel_regularizer=L1L2(l2=self.l2_weight), bias_regularizer=L1L2(l2=self.l2_weight), use_bias=not self.with_bn and self.with_bias)(last_layer) if self.with_bn: last_layer = BatchNormalization(beta_regularizer=L1L2(l2=self.l2_weight), gamma_regularizer=L1L2(l2=self.l2_weight))(last_layer) last_layer = Activation(self.activation)(last_layer) last_layer = Conv2D(cur_num_units, 3, padding='same', kernel_initializer='he_normal', kernel_regularizer=L1L2(l2=self.l2_weight), bias_regularizer=L1L2(l2=self.l2_weight), use_bias=not self.with_bn and self.with_bias)(last_layer) if self.with_bn: last_layer = BatchNormalization(beta_regularizer=L1L2(l2=self.l2_weight), gamma_regularizer=L1L2(l2=self.l2_weight))(last_layer) last_layer = Activation(self.activation)(last_layer) last_layer = Conv2D(self.num_output_channels, 3, activation="linear" if self.skip_last_dense else self.activation, padding='same', kernel_initializer='he_normal', kernel_regularizer=L1L2(l2=self.l2_weight), bias_regularizer=L1L2(l2=self.l2_weight), use_bias=not self.with_bn and self.with_bias)(last_layer) return last_layer
def Attention_Model(self, nodes=100, reg=0.0, dropout=0.0, batch_size=16): reg = L1L2(reg) # Encoder encoder_input = Input(batch_shape=(batch_size, self.max_sequence_length_enc, self.N_words), dtype='float32') #conv = Conv1D(filters=10 * nodes, kernel_size=5, activation="relu") #pool = MaxPooling1D(pool_size=self.N_words, strides=2) encoder = Bidirectional(GRU(nodes, return_state=True, return_sequences=True, recurrent_dropout=dropout, kernel_regularizer=reg, kernel_initializer='he_normal', recurrent_initializer='he_normal', name='encoder_gru'), merge_mode="sum", name='bidirectional_encoder') encoder_out, encoder_fwd_state, encoder_back_state = encoder( encoder_input) combined_encoder_state = Add()([encoder_fwd_state, encoder_back_state]) # Decoder decoder_input = Input(batch_shape=(batch_size, self.max_sequence_length_dec - 1, self.N_words), dtype='float32') decoder = GRU(nodes, return_state=True, return_sequences=True, recurrent_dropout=dropout, kernel_regularizer=reg, kernel_initializer='he_normal', recurrent_initializer='he_normal', name='decoder_gru') decoder_out, decoder_state = decoder( decoder_input, initial_state=combined_encoder_state) # Attention attn_layer = AttentionLayer(name='attention_layer') attn_out, attn_states = attn_layer([encoder_out, decoder_out]) decoder_combined_context = Concatenate( axis=-1, name='concat_layer')([decoder_out, attn_out]) # Dense dense_1 = Dense(nodes, activation="tanh", kernel_regularizer=reg) dense_time_1 = TimeDistributed(dense_1) decoder_combined_context = dense_time_1(decoder_combined_context) dense_2 = Dense(self.N_words, activation='softmax') dense_time_2 = TimeDistributed(dense_2) decoded_sequence = dense_time_2(decoder_combined_context) full_model = Model([encoder_input, decoder_input], decoded_sequence) """ Encoder (Inference) model """ encoder_inf_inputs = Input(batch_shape=(1, self.max_sequence_length_enc, self.N_words), name='encoder_inf_inputs') encoder_inf_out, encoder_inf_fwd_state, encoder_inf_back_state = encoder( encoder_inf_inputs) encoder_model = Model(inputs=encoder_inf_inputs, outputs=[ encoder_inf_out, encoder_inf_fwd_state, encoder_inf_back_state ]) """ Decoder (Inference) model """ decoder_inf_inputs = Input(batch_shape=(1, 1, self.N_words), name='decoder_word_inputs') encoder_inf_states = Input(batch_shape=(1, self.max_sequence_length_enc, nodes), name='encoder_inf_states') decoder_init_state = Input(batch_shape=(1, nodes), name='decoder_init') decoder_inf_out, decoder_inf_state = decoder( decoder_inf_inputs, initial_state=decoder_init_state) # Attention attn_inf_out, attn_inf_states = attn_layer( [encoder_inf_states, decoder_inf_out]) decoder_inf_concat = Concatenate( axis=-1, name='concat')([decoder_inf_out, attn_inf_out]) # Output decoder_inf_concat = TimeDistributed(dense_1)(decoder_inf_concat) decoder_inf_pred = TimeDistributed(dense_2)(decoder_inf_concat) decoder_model = Model( inputs=[ encoder_inf_states, decoder_init_state, decoder_inf_inputs ], outputs=[decoder_inf_pred, attn_inf_states, decoder_inf_state]) self.full_model = full_model self.encoder_model = encoder_model self.decoder_model = decoder_model