Beispiel #1
0
    def kernel_regularizer(self):
        """Returns the kernel_regularizer to be used.

    Any subclass should override this method if they want a kernel_regularizer
    (if required for the loss function to be StronglyConvex.
    """
        return L1L2(l2=self.reg_lambda)
Beispiel #2
0
    def conv_batch_prelu(name,
                         tensor,
                         num_filters,
                         kernel_size=(3, 3),
                         strides=(1, 1),
                         padding="same"):
        """
        This function combines conv2d layer, batch normalization layer and prelu activation.

        Args:
            name (str): layer's name ('conv_', 'batchnorm' and 'prelu' are added to the name)
            tensor (tf.Tensor): the input tensor
            num_filters (int): number of filters used in the convolution layer
            kernel_size (tuple or list): size of each kernel in the convolution
            strides (tuple or list): strides used in the convolution
            padding (str): one of 'same' or 'valid'

        Return:
            tensor (tf.Tensor): the output tensor
        """
        tensor = Conv2D(filters=num_filters,
                        kernel_size=kernel_size,
                        strides=strides,
                        kernel_initializer="he_uniform",
                        bias_initializer="zeros",
                        kernel_regularizer=L1L2(regularizers[0],
                                                regularizers[1]),
                        padding=padding,
                        name=f"{prefix}_conv_{name}")(tensor)
        tensor = BatchNormalization(momentum=0.1,
                                    name=f"{prefix}_batchnorm_{name}")(tensor)
        tensor = PReLU(shared_axes=[1, 2],
                       name=f"{prefix}_prelu_{name}")(tensor)
        return tensor
Beispiel #3
0
    def kernel_regularizer(self):
        """Return l2 loss using 0.5*reg_lambda as the l2 term (as desired).

    L2 regularization is required for this loss function to be strongly convex.

    Returns:
      The L2 regularizer layer for this loss function, with regularizer constant
      set to half the 0.5 * reg_lambda.
    """
        return L1L2(l2=self.reg_lambda / 2)
Beispiel #4
0
    def __init__(self, n_outputs=2, input_shape=(16, ), init_value=2):
        """Constructor.

    Args:
      n_outputs: number of output neurons
      input_shape:
      init_value:
    """
        super(TestModel, self).__init__(name='bolton', dynamic=False)
        self.n_outputs = n_outputs
        self.layer_input_shape = input_shape
        self.output_layer = tf.keras.layers.Dense(
            self.n_outputs,
            input_shape=self.layer_input_shape,
            kernel_regularizer=L1L2(l2=1),
            kernel_initializer=constant(init_value),
        )
Beispiel #5
0
    def Base_Model(self, nodes=100, reg=1e-8, dropout=0.5, batch_size=16):
        reg = L1L2(reg)

        encoder_input = Input(shape=(self.max_sequence_length, self.N_words),
                              dtype='float32')

        mask = Masking(mask_value=0.0)(encoder_input)
        mask = BatchNormalization()(mask)

        l_lstm = LSTM(nodes,
                      input_shape=(self.max_sequence_length, self.N_words),
                      kernel_regularizer=reg)(mask)
        l_lstm = Dense(self.max_sequence_length * self.N_words)(l_lstm)
        l_lstm = Reshape((self.max_sequence_length, self.N_words))(l_lstm)
        decoded_sequence = TimeDistributed(
            Dense(self.N_words, activation='softmax'))(l_lstm)

        full_model = Model(encoder_input, decoded_sequence)
        self.full_model = full_model
Beispiel #6
0
    def Attention_Model(self, nodes=100, reg=0.0, dropout=0.0, batch_size=16):
        reg = L1L2(reg)

        # Encoder

        encoder_input = Input(batch_shape=(batch_size,
                                           self.max_sequence_length,
                                           self.N_words),
                              dtype='float32')
        #encoder_batch_norm = BatchNormalization()
        #mask = encoder_batch_norm(encoder_input)

        encoder = Bidirectional(
            GRU(
                nodes,
                #stateful=True,
                return_state=True,
                return_sequences=True,
                recurrent_dropout=dropout,
                kernel_regularizer=reg,
                kernel_initializer='he_normal',
                recurrent_initializer='he_normal',
                name='encoder_gru'),
            #merge_mode="concat",
            name='bidirectional_encoder')

        encoder_out, encoder_fwd_state, encoder_back_state = encoder(
            encoder_input)

        # Decoder

        decoder_input = Input(batch_shape=(batch_size,
                                           self.max_sequence_length - 1,
                                           self.N_words),
                              dtype='float32')
        #decoder_batch_norm = BatchNormalization()
        #mask = decoder_batch_norm(decoder_input)

        decoder = Bidirectional(
            GRU(
                nodes,
                #stateful=True,
                return_state=True,
                return_sequences=True,
                recurrent_dropout=dropout,
                kernel_regularizer=reg,
                kernel_initializer='he_normal',
                recurrent_initializer='he_normal',
                name='decoder_gru'),
            #merge_mode="concat",
            name='bidirectional_decoder')

        decoder_out, decoder_fwd_state, decoder_back_state = decoder(
            decoder_input,
            initial_state=[encoder_fwd_state, encoder_back_state])

        # Attention
        attn_layer = AttentionLayer(name='attention_layer')
        attn_out, attn_states = attn_layer([encoder_out, decoder_out])

        decoder_combined_context = Concatenate(
            axis=-1, name='concat_layer')([decoder_out, attn_out])

        # Dense
        dense_1 = Dense(nodes, activation="tanh")
        dense_time_1 = TimeDistributed(dense_1)
        dense_2 = Dense(self.N_words, activation='softmax')
        dense_time_2 = TimeDistributed(dense_2)
        decoded_sequence = dense_time_2(decoder_combined_context)

        full_model = Model([encoder_input, decoder_input], decoded_sequence)
        """ Encoder (Inference) model """
        encoder_inf_inputs = Input(batch_shape=(1, self.max_sequence_length,
                                                self.N_words),
                                   name='encoder_inf_inputs')
        #encoder_inf_inputs_masked = encoder_batch_norm(encoder_inf_inputs)

        encoder_inf_out, encoder_inf_fwd_state, encoder_inf_back_state = encoder(
            encoder_inf_inputs)

        encoder_model = Model(inputs=encoder_inf_inputs,
                              outputs=[
                                  encoder_inf_out, encoder_inf_fwd_state,
                                  encoder_inf_back_state
                              ])
        """ Decoder (Inference) model """
        decoder_inf_inputs = Input(batch_shape=(1, 1, self.N_words),
                                   name='decoder_word_inputs')
        #decoder_inf_inputs_masked = decoder_batch_norm(decoder_inf_inputs)

        encoder_inf_states = Input(batch_shape=(1, self.max_sequence_length,
                                                2 * nodes),
                                   name='encoder_inf_states')

        decoder_init_fwd_state = Input(batch_shape=(1, nodes),
                                       name='decoder_fwd_init')

        decoder_init_back_state = Input(batch_shape=(1, nodes),
                                        name='decoder_back_init')

        decoder_inf_out, decoder_inf_fwd_state, decoder_inf_back_state = decoder(
            decoder_inf_inputs,
            initial_state=[decoder_init_fwd_state, decoder_init_back_state])

        # Attention
        attn_inf_out, attn_inf_states = attn_layer(
            [encoder_inf_states, decoder_inf_out])
        decoder_inf_concat = Concatenate(
            axis=-1, name='concat')([decoder_inf_out, attn_inf_out])

        # Output
        decoder_inf_pred = TimeDistributed(dense_2)(decoder_inf_concat)

        decoder_model = Model(inputs=[
            encoder_inf_states, decoder_init_fwd_state,
            decoder_init_back_state, decoder_inf_inputs
        ],
                              outputs=[
                                  decoder_inf_pred, attn_inf_states,
                                  decoder_inf_fwd_state, decoder_inf_back_state
                              ])

        self.full_model = full_model
        self.encoder_model = encoder_model
        self.decoder_model = decoder_model
tfrecord = os.path.join(base_dir, 'datasets', 'tfrecord', 'snake_all.tfrecord')
training_set = tfdata_generator(filename=tfrecord, batch_size=32, aug=True)
validation_set = tfdata_generator(filename=tfrecord, batch_size=32)

inputs = inception_resnet_v2.InceptionResNetV2(include_top=False)
for layer in inputs.layers:
    layer.trainable = False
x = keras.layers.GlobalAveragePooling2D(name='avg_pool')(inputs.output)
x = keras.layers.Dropout(0.2)(x)
# x = keras.layers.Dense(units=4096, activation='relu', name='final_dense', kernel_regularizer=L1L2(l2=0.001))(x)
# x = keras.layers.Dropout(0.2)(x)
outputs = keras.layers.Dense(7,
                             activation='softmax',
                             name='predictions',
                             kernel_regularizer=L1L2(l2=0.001))(x)
with tf.device('/cpu:0'):
    model = keras.models.Model(inputs.input, outputs)
    parallel_model = multi_gpu_model(model, gpus=2)
# parallel_model = model
# learning_rate = tf.keras.optimizers.
optimizer = tf.train.AdamOptimizer(learning_rate=0.001)

parallel_model.compile(optimizer=optimizer,
                       loss='categorical_crossentropy',
                       metrics=['accuracy'])
save_path = os.path.join(
    base_dir, 'ckpt',
    'weights-epoch-{epoch:02d}-val_loss-{val_loss:.4f}-val_acc-{val_acc:.2f}.hdf5'
)
ckpt = ParallelModelCheckpoint(model,
Beispiel #8
0
class HuberTests(keras_parameterized.TestCase):
    """tests for CategoricalCrossesntropy StrongConvex loss."""
    @parameterized.named_parameters([
        {
            'testcase_name': 'normal',
            'reg_lambda': 1,
            'c': 1,
            'radius_constant': 1,
            'delta': 1,
        },
    ])
    def test_init_params(self, reg_lambda, c, radius_constant, delta):
        """Test initialization for given arguments.

    Args:
      reg_lambda: initialization value for reg_lambda arg
      c: initialization value for C arg
      radius_constant: initialization value for radius_constant arg
      delta: the delta parameter for the huber loss
    """
        # test valid domains for each variable
        loss = StrongConvexHuber(reg_lambda, c, radius_constant, delta)
        self.assertIsInstance(loss, StrongConvexHuber)

    @parameterized.named_parameters([
        {
            'testcase_name': 'negative c',
            'reg_lambda': 1,
            'c': -1,
            'radius_constant': 1,
            'delta': 1
        },
        {
            'testcase_name': 'negative radius',
            'reg_lambda': 1,
            'c': 1,
            'radius_constant': -1,
            'delta': 1
        },
        {
            'testcase_name': 'negative lambda',
            'reg_lambda': -1,
            'c': 1,
            'radius_constant': 1,
            'delta': 1
        },
        {
            'testcase_name': 'negative delta',
            'reg_lambda': 1,
            'c': 1,
            'radius_constant': 1,
            'delta': -1
        },
    ])
    def test_bad_init_params(self, reg_lambda, c, radius_constant, delta):
        """Test invalid domain for given params. Should return ValueError.

    Args:
      reg_lambda: initialization value for reg_lambda arg
      c: initialization value for C arg
      radius_constant: initialization value for radius_constant arg
      delta: the delta parameter for the huber loss
    """
        # test valid domains for each variable
        with self.assertRaises(ValueError):
            StrongConvexHuber(reg_lambda, c, radius_constant, delta)

    # test the bounds and test varied delta's
    @test_util.run_all_in_graph_and_eager_modes
    @parameterized.named_parameters([
        {
            'testcase_name': 'delta=1,y_true=1 z>1+h decision boundary',
            'logits': 2.1,
            'y_true': 1,
            'delta': 1,
            'result': 0,
        },
        {
            'testcase_name': 'delta=1,y_true=1 z<1+h decision boundary',
            'logits': 1.9,
            'y_true': 1,
            'delta': 1,
            'result': 0.01 * 0.25,
        },
        {
            'testcase_name': 'delta=1,y_true=1 1-z< h decision boundary',
            'logits': 0.1,
            'y_true': 1,
            'delta': 1,
            'result': 1.9**2 * 0.25,
        },
        {
            'testcase_name': 'delta=1,y_true=1 z < 1-h decision boundary',
            'logits': -0.1,
            'y_true': 1,
            'delta': 1,
            'result': 1.1,
        },
        {
            'testcase_name': 'delta=2,y_true=1 z>1+h decision boundary',
            'logits': 3.1,
            'y_true': 1,
            'delta': 2,
            'result': 0,
        },
        {
            'testcase_name': 'delta=2,y_true=1 z<1+h decision boundary',
            'logits': 2.9,
            'y_true': 1,
            'delta': 2,
            'result': 0.01 * 0.125,
        },
        {
            'testcase_name': 'delta=2,y_true=1 1-z < h decision boundary',
            'logits': 1.1,
            'y_true': 1,
            'delta': 2,
            'result': 1.9**2 * 0.125,
        },
        {
            'testcase_name': 'delta=2,y_true=1 z < 1-h decision boundary',
            'logits': -1.1,
            'y_true': 1,
            'delta': 2,
            'result': 2.1,
        },
        {
            'testcase_name': 'delta=1,y_true=-1 z>1+h decision boundary',
            'logits': -2.1,
            'y_true': -1,
            'delta': 1,
            'result': 0,
        },
    ])
    def test_calculation(self, logits, y_true, delta, result):
        """Test the call method to ensure it returns the correct value.

    Args:
      logits: unscaled output of model
      y_true: label
      delta: delta value for StrongConvexHuber loss.
      result: correct loss calculation value
    """
        logits = tf.Variable(logits, False, dtype=tf.float32)
        y_true = tf.Variable(y_true, False, dtype=tf.float32)
        loss = StrongConvexHuber(0.00001, 1, 1, delta)
        loss = loss(y_true, logits)
        self.assertAllClose(loss.numpy(), result)

    @parameterized.named_parameters([
        {
            'testcase_name': 'beta',
            'init_args': [1, 1, 1, 1],
            'fn': 'beta',
            'args': [1],
            'result': tf.Variable(1.5, dtype=tf.float32)
        },
        {
            'testcase_name': 'gamma',
            'fn': 'gamma',
            'init_args': [1, 1, 1, 1],
            'args': [],
            'result': tf.Variable(1, dtype=tf.float32),
        },
        {
            'testcase_name': 'lipchitz constant',
            'fn': 'lipchitz_constant',
            'init_args': [1, 1, 1, 1],
            'args': [1],
            'result': tf.Variable(2, dtype=tf.float32),
        },
        {
            'testcase_name': 'kernel regularizer',
            'fn': 'kernel_regularizer',
            'init_args': [1, 1, 1, 1],
            'args': [],
            'result': L1L2(l2=0.5),
        },
    ])
    def test_fns(self, init_args, fn, args, result):
        """Test that fn of BinaryCrossentropy loss returns the correct result.

    Args:
      init_args: init values for loss instance
      fn: the fn to test
      args: the arguments to above function
      result: the correct result from the fn
    """
        loss = StrongConvexHuber(*init_args)
        expected = getattr(loss, fn, lambda: 'fn not found')(*args)
        if hasattr(expected, 'numpy') and hasattr(result,
                                                  'numpy'):  # both tensor
            expected = expected.numpy()
            result = result.numpy()
        if hasattr(expected, 'l2') and hasattr(result,
                                               'l2'):  # both l2 regularizer
            expected = expected.l2
            result = result.l2
        self.assertEqual(expected, result)
Beispiel #9
0
class CategoricalCrossesntropyTests(keras_parameterized.TestCase):
    """tests for CategoricalCrossesntropy StrongConvex loss."""
    @parameterized.named_parameters([
        {
            'testcase_name': 'normal',
            'reg_lambda': 1,
            'C': 1,
            'radius_constant': 1
        },  # pylint: disable=invalid-name
    ])
    def test_init_params(self, reg_lambda, C, radius_constant):
        """Test initialization for given arguments.
    Args:
      reg_lambda: initialization value for reg_lambda arg
      C: initialization value for C arg
      radius_constant: initialization value for radius_constant arg
    """
        # test valid domains for each variable
        loss = StrongConvexCategoricalCrossentropy(reg_lambda, C,
                                                   radius_constant)
        self.assertIsInstance(loss, StrongConvexCategoricalCrossentropy)

    @parameterized.named_parameters([
        {
            'testcase_name': 'negative c',
            'reg_lambda': 1,
            'C': -1,
            'radius_constant': 1
        },
        {
            'testcase_name': 'negative radius',
            'reg_lambda': 1,
            'C': 1,
            'radius_constant': -1
        },
        {
            'testcase_name': 'negative lambda',
            'reg_lambda': -1,
            'C': 1,
            'radius_constant': 1
        },  # pylint: disable=invalid-name
    ])
    def test_bad_init_params(self, reg_lambda, C, radius_constant):
        """Test invalid domain for given params. Should return ValueError.
    Args:
      reg_lambda: initialization value for reg_lambda arg
      C: initialization value for C arg
      radius_constant: initialization value for radius_constant arg
    """
        # test valid domains for each variable
        with self.assertRaises(ValueError):
            StrongConvexCategoricalCrossentropy(reg_lambda, C, radius_constant)

    @test_util.run_all_in_graph_and_eager_modes
    @parameterized.named_parameters([
        # [] for compatibility with tensorflow loss calculation
        {
            'testcase_name': 'both positive',
            'logits': [[10000, 0]],
            'y_true': [[1, 0]],
            'result': 0,
        },
        {
            'testcase_name': 'negative gradient positive logits',
            'logits': [[-10000, 0]],
            'y_true': [[1, 0]],
            'result': 10000,
        },
        {
            'testcase_name': 'positive gradient negative logits',
            'logits': [[10000, 0]],
            'y_true': [[0, 1]],
            'result': 10000,
        },
        {
            'testcase_name': 'both negative',
            'logits': [[-10000, 0]],
            'y_true': [[0, 1]],
            'result': 0
        },
    ])
    def test_calculation(self, logits, y_true, result):
        """Test the call method to ensure it returns the correct value.
    Args:
      logits: unscaled output of model
      y_true: label
      result: correct loss calculation value
    """
        logits = tf.Variable(logits, False, dtype=tf.float32)
        y_true = tf.Variable(y_true, False, dtype=tf.float32)
        loss = StrongConvexCategoricalCrossentropy(0.00001, 1, 1)
        loss = loss(y_true, logits)
        self.assertEqual(loss.numpy(), result)

    @parameterized.named_parameters([
        {
            'testcase_name': 'beta',
            'init_args': [1, 1, 1],
            'fn': 'beta',
            'args': [1],
            'result': tf.constant(2, dtype=tf.float32)
        },
        {
            'testcase_name': 'gamma',
            'fn': 'gamma',
            'init_args': [1, 1, 1],
            'args': [],
            'result': tf.constant(1, dtype=tf.float32),
        },
        {
            'testcase_name': 'lipchitz constant',
            'fn': 'lipchitz_constant',
            'init_args': [1, 1, 1],
            'args': [1],
            'result': tf.constant(2, dtype=tf.float32),
        },
        {
            'testcase_name': 'kernel regularizer',
            'fn': 'kernel_regularizer',
            'init_args': [1, 1, 1],
            'args': [],
            'result': L1L2(l2=0.5),
        },
    ])
    def test_fns(self, init_args, fn, args, result):
        """Test that fn of CategoricalCrossentropy loss returns the correct result.
    Args:
      init_args: init values for loss instance
      fn: the fn to test
      args: the arguments to above function
      result: the correct result from the fn
    """
        loss = StrongConvexCategoricalCrossentropy(*init_args)
        expected = getattr(loss, fn, lambda: 'fn not found')(*args)
        if hasattr(expected, 'numpy') and hasattr(result,
                                                  'numpy'):  # both tensor
            expected = expected.numpy()
            result = result.numpy()
        if hasattr(expected, 'l2') and hasattr(result,
                                               'l2'):  # both l2 regularizer
            expected = expected.l2
            result = result.l2
        self.assertEqual(expected, result)

    @parameterized.named_parameters([
        {
            'testcase_name': 'label_smoothing',
            'init_args': [1, 1, 1, True, 0.1],
            'fn': None,
            'args': None,
            'print_res': 'The impact of label smoothing on privacy is unknown.'
        },
    ])
    def test_prints(self, init_args, fn, args, print_res):
        """Test logger warning from StrongConvexCategoricalCrossentropy.
    Args:
      init_args: arguments to init the object with.
      fn: function to test
      args: arguments to above function
      print_res: print result that should have been printed.
    """
        with captured_output() as (out, err):  # pylint: disable=unused-variable
            loss = StrongConvexCategoricalCrossentropy(*init_args)
            if fn is not None:
                getattr(loss, fn, lambda *arguments: print('error'))(*args)
            self.assertRegexMatch(err.getvalue().strip(), [print_res])
Beispiel #10
0
    def Embedding_Model(self, nodes=100, reg=0.0, dropout=0.0, batch_size=16, embedding_dim = 1000):
        reg = L1L2(reg)

        # Encoder

        encoder_input = Input(
            batch_shape=(batch_size, self.max_sequence_length_enc),
            dtype='float32')

        encoder_embedding = Embedding(
            self.N_words,
            embedding_dim,
            input_length=self.max_sequence_length_enc,
            #mask_zero=True,
            name='encoder_embedding')

        encoder = GRU(
            nodes,
            return_state=True,
            return_sequences=True,
            recurrent_dropout=dropout,
            kernel_regularizer=reg,
            kernel_initializer='he_normal',
            recurrent_initializer='he_normal',
            name='encoder_gru')

        encoder_out, encoder_state = encoder(encoder_embedding(encoder_input))

        # Decoder

        decoder_input = Input(
            batch_shape=(batch_size, self.max_sequence_length_dec - 1),
            dtype='float32')

        decoder_embedding = Embedding(
            self.N_words,
            embedding_dim,
            input_length=self.max_sequence_length_dec - 1,
            #mask_zero = True,
            name='decoder_embedding')

        decoder = GRU(
            nodes,
            return_state=True,
            return_sequences=True,
            recurrent_dropout=dropout,
            kernel_regularizer=reg,
            kernel_initializer='he_normal',
            recurrent_initializer='he_normal',
            name='decoder_gru')

        decoder_out, decoder_state = decoder(
            decoder_embedding(decoder_input),
            initial_state=encoder_state)

        # Attention
        #attn_layer = LuongAttention(nodes)
        #attn_out, attn_states = attn_layer(decoder_out, encoder_out)
        #decoder_with_context = Concatenate(axis=-1, name='concat_layer')([attn_out, decoder_out])

        attn_layer = AttentionLayer(name='attention_layer')
        attn_out, attn_states = attn_layer([encoder_out,
                                             decoder_out])  # context, alignment
        decoder_with_context = Concatenate(
              axis=-1, name='concat_layer')([decoder_out, attn_out])

        dense_1 = Dense(nodes, activation="tanh", kernel_regularizer=reg)
        dense_time_1 = TimeDistributed(dense_1, name='time_1')
        decoder_with_context = dense_time_1(decoder_with_context)

        dense_2 = Dense(self.N_words, activation='softmax')
        dense_time_2 = TimeDistributed(dense_2, name = 'time_2')
        decoded_sequence = dense_time_2(decoder_with_context)

        full_model = Model([encoder_input, decoder_input], decoded_sequence)

        """ Encoder (Inference) model """
        encoder_inf_inputs = Input(
            batch_shape=(1, self.max_sequence_length_enc),
            name='encoder_inf_inputs')

        encoder_inf_out, encoder_inf_state = encoder(
            encoder_embedding(encoder_inf_inputs))

        encoder_model = Model(
            inputs=encoder_inf_inputs,
            outputs=[encoder_inf_out, encoder_inf_state])

        """ Decoder (Inference) model """
        decoder_inf_inputs = Input(
            batch_shape=(1, 1), name='decoder_word_inputs')

        encoder_inf_states = Input(
            batch_shape=(1, self.max_sequence_length_enc, nodes),
            name='encoder_inf_states')

        decoder_init_state = Input(batch_shape=(1, nodes), name='decoder_init')

        decoder_inf_out, decoder_inf_state = decoder(
            decoder_embedding(decoder_inf_inputs),
            initial_state=decoder_init_state)

        # Attention
        attn_inf_out, attn_inf_states = attn_layer([encoder_inf_states, decoder_inf_out])
        decoder_inf_concat = Concatenate(
             axis=-1, name='concat')([decoder_inf_out, attn_inf_out])

        #attn_inf_out, attn_inf_states = attn_layer(decoder_inf_out,
        #                                           encoder_inf_states)
        #decoder_inf_concat = Concatenate(
        #    axis=-1, name='concat_layer')([attn_inf_out, decoder_inf_out])

        # Output
        decoder_inf_concat = TimeDistributed(dense_1)(decoder_inf_concat)
        decoder_inf_pred = TimeDistributed(dense_2)(decoder_inf_concat)

        decoder_model = Model(
            inputs=[
                encoder_inf_states, decoder_init_state, decoder_inf_inputs
            ],
            outputs=[decoder_inf_pred, attn_inf_states, decoder_inf_state])

        self.full_model = full_model
        self.encoder_model = encoder_model
        self.decoder_model = decoder_model
Beispiel #11
0
    def build(self, input_layer):
        downsampling_factor = int(np.prod(self.downsample_factors))
        last_layer = input_layer

        input_shape = K.int_shape(last_layer)
        if len(input_shape) == 3:
            # Add channel dimension if not already present.
            last_layer = Reshape(input_shape[1:] + (1,))(last_layer)

        per_stage_before_pool = []
        for layer_idx in range(self.num_layers + 1):
            cur_num_units = int(np.rint(self.num_units*2**layer_idx))
            last_layer = Conv2D(cur_num_units, 3,
                                padding='same',
                                kernel_initializer='he_normal',
                                kernel_regularizer=L1L2(l2=self.l2_weight),
                                bias_regularizer=L1L2(l2=self.l2_weight),
                                use_bias=not self.with_bn and self.with_bias)(last_layer)
            if self.with_bn:
                last_layer = BatchNormalization(beta_regularizer=L1L2(l2=self.l2_weight),
                                                gamma_regularizer=L1L2(l2=self.l2_weight))(last_layer)
            last_layer = Activation(self.activation)(last_layer)
            last_layer = Conv2D(cur_num_units, 3,
                                padding='same',
                                kernel_initializer='he_normal',
                                kernel_regularizer=L1L2(l2=self.l2_weight),
                                bias_regularizer=L1L2(l2=self.l2_weight),
                                use_bias=not self.with_bn and self.with_bias)(last_layer)
            if self.with_bn:
                last_layer = BatchNormalization(beta_regularizer=L1L2(l2=self.l2_weight),
                                                gamma_regularizer=L1L2(l2=self.l2_weight))(last_layer)
            last_layer = Activation(self.activation)(last_layer)
            per_stage_before_pool.append(last_layer)

            if layer_idx != self.num_layers:  # Last layer doesn't require max pooling.
                last_layer = MaxPooling2D(pool_size=(2, 2))(last_layer)

            if self.p_dropout != 0.0:
                last_layer = Dropout(self.p_dropout)(last_layer)

        start_idx = 0 if downsampling_factor == 1 else int(np.log2(self.downsample_factors[0]))
        for layer_idx in reversed(range(start_idx, self.num_layers)):
            cur_num_units = int(np.rint(self.num_units*2**layer_idx))

            last_layer = UpSampling2D(size=(2, 2))(last_layer)
            last_layer = Conv2D(cur_num_units, 2,
                                padding='same', kernel_initializer='he_normal',
                                kernel_regularizer=L1L2(l2=self.l2_weight),
                                bias_regularizer=L1L2(l2=self.l2_weight),
                                use_bias=not self.with_bn and self.with_bias)(last_layer)
            if self.with_bn:
                last_layer = BatchNormalization(beta_regularizer=L1L2(l2=self.l2_weight),
                                                gamma_regularizer=L1L2(l2=self.l2_weight))(last_layer)
            last_layer = Activation(self.activation)(last_layer)
            last_layer = concatenate([per_stage_before_pool[layer_idx], last_layer], axis=3)
            last_layer = Conv2D(cur_num_units, 3,
                                padding='same', kernel_initializer='he_normal',
                                kernel_regularizer=L1L2(l2=self.l2_weight),
                                bias_regularizer=L1L2(l2=self.l2_weight),
                                use_bias=not self.with_bn and self.with_bias)(last_layer)
            if self.with_bn:
                last_layer = BatchNormalization(beta_regularizer=L1L2(l2=self.l2_weight),
                                                gamma_regularizer=L1L2(l2=self.l2_weight))(last_layer)
            last_layer = Activation(self.activation)(last_layer)
            last_layer = Conv2D(cur_num_units, 3,
                                padding='same', kernel_initializer='he_normal',
                                kernel_regularizer=L1L2(l2=self.l2_weight),
                                bias_regularizer=L1L2(l2=self.l2_weight),
                                use_bias=not self.with_bn and self.with_bias)(last_layer)
            if self.with_bn:
                last_layer = BatchNormalization(beta_regularizer=L1L2(l2=self.l2_weight),
                                                gamma_regularizer=L1L2(l2=self.l2_weight))(last_layer)
            last_layer = Activation(self.activation)(last_layer)

        last_layer = Conv2D(self.num_output_channels, 3,
                            activation="linear" if self.skip_last_dense else self.activation,
                            padding='same', kernel_initializer='he_normal',
                            kernel_regularizer=L1L2(l2=self.l2_weight),
                            bias_regularizer=L1L2(l2=self.l2_weight),
                            use_bias=not self.with_bn and self.with_bias)(last_layer)
        return last_layer
Beispiel #12
0
    def Attention_Model(self, nodes=100, reg=0.0, dropout=0.0, batch_size=16):
        reg = L1L2(reg)

        # Encoder

        encoder_input = Input(batch_shape=(batch_size,
                                           self.max_sequence_length_enc,
                                           self.N_words),
                              dtype='float32')

        #conv = Conv1D(filters=10 * nodes, kernel_size=5, activation="relu")
        #pool = MaxPooling1D(pool_size=self.N_words, strides=2)

        encoder = Bidirectional(GRU(nodes,
                                    return_state=True,
                                    return_sequences=True,
                                    recurrent_dropout=dropout,
                                    kernel_regularizer=reg,
                                    kernel_initializer='he_normal',
                                    recurrent_initializer='he_normal',
                                    name='encoder_gru'),
                                merge_mode="sum",
                                name='bidirectional_encoder')

        encoder_out, encoder_fwd_state, encoder_back_state = encoder(
            encoder_input)
        combined_encoder_state = Add()([encoder_fwd_state, encoder_back_state])

        # Decoder

        decoder_input = Input(batch_shape=(batch_size,
                                           self.max_sequence_length_dec - 1,
                                           self.N_words),
                              dtype='float32')

        decoder = GRU(nodes,
                      return_state=True,
                      return_sequences=True,
                      recurrent_dropout=dropout,
                      kernel_regularizer=reg,
                      kernel_initializer='he_normal',
                      recurrent_initializer='he_normal',
                      name='decoder_gru')

        decoder_out, decoder_state = decoder(
            decoder_input, initial_state=combined_encoder_state)

        # Attention
        attn_layer = AttentionLayer(name='attention_layer')
        attn_out, attn_states = attn_layer([encoder_out, decoder_out])

        decoder_combined_context = Concatenate(
            axis=-1, name='concat_layer')([decoder_out, attn_out])

        # Dense
        dense_1 = Dense(nodes, activation="tanh", kernel_regularizer=reg)
        dense_time_1 = TimeDistributed(dense_1)
        decoder_combined_context = dense_time_1(decoder_combined_context)

        dense_2 = Dense(self.N_words, activation='softmax')
        dense_time_2 = TimeDistributed(dense_2)
        decoded_sequence = dense_time_2(decoder_combined_context)

        full_model = Model([encoder_input, decoder_input], decoded_sequence)
        """ Encoder (Inference) model """
        encoder_inf_inputs = Input(batch_shape=(1,
                                                self.max_sequence_length_enc,
                                                self.N_words),
                                   name='encoder_inf_inputs')

        encoder_inf_out, encoder_inf_fwd_state, encoder_inf_back_state = encoder(
            encoder_inf_inputs)

        encoder_model = Model(inputs=encoder_inf_inputs,
                              outputs=[
                                  encoder_inf_out, encoder_inf_fwd_state,
                                  encoder_inf_back_state
                              ])
        """ Decoder (Inference) model """
        decoder_inf_inputs = Input(batch_shape=(1, 1, self.N_words),
                                   name='decoder_word_inputs')

        encoder_inf_states = Input(batch_shape=(1,
                                                self.max_sequence_length_enc,
                                                nodes),
                                   name='encoder_inf_states')

        decoder_init_state = Input(batch_shape=(1, nodes), name='decoder_init')

        decoder_inf_out, decoder_inf_state = decoder(
            decoder_inf_inputs, initial_state=decoder_init_state)

        # Attention
        attn_inf_out, attn_inf_states = attn_layer(
            [encoder_inf_states, decoder_inf_out])
        decoder_inf_concat = Concatenate(
            axis=-1, name='concat')([decoder_inf_out, attn_inf_out])

        # Output
        decoder_inf_concat = TimeDistributed(dense_1)(decoder_inf_concat)
        decoder_inf_pred = TimeDistributed(dense_2)(decoder_inf_concat)

        decoder_model = Model(
            inputs=[
                encoder_inf_states, decoder_init_state, decoder_inf_inputs
            ],
            outputs=[decoder_inf_pred, attn_inf_states, decoder_inf_state])

        self.full_model = full_model
        self.encoder_model = encoder_model
        self.decoder_model = decoder_model