def _testWithMaybeMultiAttention(self,
                                   is_multi,
                                   create_attention_mechanisms,
                                   expected_final_output,
                                   expected_final_state,
                                   attention_mechanism_depths,
                                   alignment_history=False,
                                   expected_final_alignment_history=None,
                                   attention_layer_sizes=None,
                                   attention_layers=None,
                                   create_query_layer=False,
                                   create_memory_layer=True,
                                   create_attention_kwargs=None):
    # Allow is_multi to be True with a single mechanism to enable test for
    # passing in a single mechanism in a list.
    assert len(create_attention_mechanisms) == 1 or is_multi
    encoder_sequence_length = [3, 2, 3, 1, 1]
    decoder_sequence_length = [2, 0, 1, 2, 3]
    batch_size = 5
    encoder_max_time = 8
    decoder_max_time = 4
    input_depth = 7
    encoder_output_depth = 10
    cell_depth = 9
    create_attention_kwargs = create_attention_kwargs or {}

    if attention_layer_sizes is not None:
      # Compute sum of attention_layer_sizes. Use encoder_output_depth if None.
      attention_depth = sum(attention_layer_size or encoder_output_depth
                            for attention_layer_size in attention_layer_sizes)
    elif attention_layers is not None:
      # Compute sum of attention_layers output depth.
      attention_depth = sum(
          attention_layer.compute_output_shape(
              [batch_size, cell_depth + encoder_output_depth]).dims[-1].value
          for attention_layer in attention_layers)
    else:
      attention_depth = encoder_output_depth * len(create_attention_mechanisms)

    decoder_inputs = np.random.randn(batch_size, decoder_max_time,
                                     input_depth).astype(np.float32)
    encoder_outputs = np.random.randn(batch_size, encoder_max_time,
                                      encoder_output_depth).astype(np.float32)

    attention_mechanisms = []
    for creator, depth in zip(create_attention_mechanisms,
                              attention_mechanism_depths):
      # Create a memory layer with deterministic initializer to avoid randomness
      # in the test between graph and eager.
      if create_query_layer:
        create_attention_kwargs["query_layer"] = keras.layers.Dense(
            depth, kernel_initializer="ones", use_bias=False)
      if create_memory_layer:
        create_attention_kwargs["memory_layer"] = keras.layers.Dense(
            depth, kernel_initializer="ones", use_bias=False)

      attention_mechanisms.append(
          creator(
              units=depth,
              memory=encoder_outputs,
              memory_sequence_length=encoder_sequence_length,
              **create_attention_kwargs))

    with self.cached_session(use_gpu=True):
      attention_layer_size = attention_layer_sizes
      attention_layer = attention_layers
      if not is_multi:
        if attention_layer_size is not None:
          attention_layer_size = attention_layer_size[0]
        if attention_layer is not None:
          attention_layer = attention_layer[0]
      cell = keras.layers.LSTMCell(cell_depth,
                                   recurrent_activation="sigmoid",
                                   kernel_initializer="ones",
                                   recurrent_initializer="ones")
      cell = wrapper.AttentionWrapper(
          cell,
          attention_mechanisms if is_multi else attention_mechanisms[0],
          attention_layer_size=attention_layer_size,
          alignment_history=alignment_history,
          attention_layer=attention_layer)
      if cell._attention_layers is not None:
        for layer in cell._attention_layers:
          if getattr(layer, "kernel_initializer") is None:
            layer.kernel_initializer = initializers.glorot_uniform(seed=1337)

      sampler = sampler_py.TrainingSampler()
      my_decoder = basic_decoder.BasicDecoderV2(cell=cell, sampler=sampler)
      initial_state = cell.get_initial_state(
          dtype=dtypes.float32, batch_size=batch_size)
      final_outputs, final_state, _ = my_decoder(
          decoder_inputs,
          initial_state=initial_state,
          sequence_length=decoder_sequence_length)

      self.assertIsInstance(final_outputs, basic_decoder.BasicDecoderOutput)
      self.assertIsInstance(final_state, wrapper.AttentionWrapperState)

      expected_time = (
          expected_final_state.time if context.executing_eagerly() else None)
      self.assertEqual((batch_size, expected_time, attention_depth),
                       tuple(final_outputs.rnn_output.get_shape().as_list()))
      self.assertEqual((batch_size, expected_time),
                       tuple(final_outputs.sample_id.get_shape().as_list()))

      self.assertEqual((batch_size, attention_depth),
                       tuple(final_state.attention.get_shape().as_list()))
      self.assertEqual((batch_size, cell_depth),
                       tuple(final_state.cell_state[0].get_shape().as_list()))
      self.assertEqual((batch_size, cell_depth),
                       tuple(final_state.cell_state[1].get_shape().as_list()))

      if alignment_history:
        if is_multi:
          state_alignment_history = []
          for history_array in final_state.alignment_history:
            history = history_array.stack()
            self.assertEqual((expected_time, batch_size, encoder_max_time),
                             tuple(history.get_shape().as_list()))
            state_alignment_history.append(history)
          state_alignment_history = tuple(state_alignment_history)
        else:
          state_alignment_history = final_state.alignment_history.stack()
          self.assertEqual((expected_time, batch_size, encoder_max_time),
                           tuple(state_alignment_history.get_shape().as_list()))
        nest.assert_same_structure(cell.state_size,
                                   cell.zero_state(batch_size, dtypes.float32))
        # Remove the history from final_state for purposes of the
        # remainder of the tests.
        final_state = final_state._replace(alignment_history=())  # pylint: disable=protected-access
      else:
        state_alignment_history = ()

      self.evaluate(variables.global_variables_initializer())
      eval_result = self.evaluate({
          "final_outputs": final_outputs,
          "final_state": final_state,
          "state_alignment_history": state_alignment_history,
      })

      final_output_info = nest.map_structure(get_result_summary,
                                             eval_result["final_outputs"])
      final_state_info = nest.map_structure(get_result_summary,
                                            eval_result["final_state"])
      print("final_output_info: ", final_output_info)
      print("final_state_info: ", final_state_info)

      nest.map_structure(self.assertAllCloseOrEqual, expected_final_output,
                         final_output_info)
      nest.map_structure(self.assertAllCloseOrEqual, expected_final_state,
                         final_state_info)
      if alignment_history:  # by default, the wrapper emits attention as output
        final_alignment_history_info = nest.map_structure(
            get_result_summary, eval_result["state_alignment_history"])
        print("final_alignment_history_info: ", final_alignment_history_info)
        nest.map_structure(
            self.assertAllCloseOrEqual,
            # outputs are batch major but the stacked TensorArray is time major
            expected_final_alignment_history,
            final_alignment_history_info)
Example #2
0
    def build(self, input_shape):
        assert len(
            input_shape
        ) >= 3, "The input Tensor should have shape=[None, input_num_capsule, input_dim_capsule]"
        self.input_num_capsule = int(input_shape[1])
        self.input_dim_capsule = int(input_shape[2])

        # transmission matrix
        self.reweight_W = self.add_weight(
            shape=[self.input_dim_capsule, self.num_capsule, self.dim_capsule],
            initializer=glorot_uniform(seed=self.seed),
            name='reweight_W')

        self.num_fields = self.num_capsule
        self.kernel_mf = self.add_weight(
            name='kernel_mf',
            shape=(int(self.num_fields * (self.num_fields - 1) / 2), 1),
            initializer=tf.keras.initializers.Ones(),
            regularizer=None,
            trainable=True)

        self.kernel_fm = self.add_weight(
            name='kernel_fm',
            shape=(self.num_fields, 1),
            initializer=tf.keras.initializers.Constant(value=0.5),
            regularizer=None,
            trainable=True)

        # self-attention
        self.kernel_highint = self.add_weight(
            name='kernel_highint',
            shape=(self.num_fields, 1),
            initializer=tf.keras.initializers.Constant(value=0.5),
            regularizer=None,
            trainable=True)

        self.self_attention_factor = self.dim_capsule
        self.self_attention_layer = 1
        self.head_num = 2
        # embedding_size=self.self_attention_factor * self.head_num
        embedding_size = self.dim_capsule
        self.bias_mf = self.add_weight(name='bias_mf',
                                       shape=(embedding_size),
                                       initializer=Zeros())
        self.bias_fm = self.add_weight(name='bias_fm',
                                       shape=(embedding_size),
                                       initializer=Zeros())

        self.routing_init = self.add_weight(
            name="routing_init",
            shape=(self.num_capsule, self.input_num_capsule),
            initializer=tf.keras.initializers.TruncatedNormal(seed=self.seed,
                                                              stddev=10))

        self.bias_highint = self.add_weight(name='bias_highint',
                                            shape=(self.self_attention_factor *
                                                   self.head_num),
                                            initializer=Zeros())
        self.built = True
        # Be sure to call this somewhere!
        super(CapsuleLayer, self).build(input_shape)
Example #3
0
 def load_model(self):
     with CustomObjectScope({'GlorotUniform': glorot_uniform()}):
         self.model = load_model(self.model_path)
     self.graph = get_default_graph()
Example #4
0
def network_model(inputs, num_classes):

    # Stage 1
    X = Conv2D(64, (7, 7),
               strides=(1, 1),
               padding='same',
               name='conv1',
               kernel_initializer=glorot_uniform(seed=0))(inputs)
    X = BatchNormalization(axis=3, name='bn_conv1')(X)
    X = Activation('relu')(X)
    X = MaxPooling2D((3, 3), strides=(1, 1), padding='same')(X)

    # Stage 2
    X = convolutional_block(X,
                            f=3,
                            filters=[64, 64, 256],
                            stage=2,
                            block='a',
                            s=1)
    X = identity_block(X, 3, [64, 64, 256], stage=2, block='b')
    X = identity_block(X, 3, [64, 64, 256], stage=2, block='c')

    # Stage 3
    X = convolutional_block(X,
                            f=3,
                            filters=[128, 128, 512],
                            stage=3,
                            block='a',
                            s=2)
    X = identity_block(X, 3, [128, 128, 512], stage=3, block='b')
    X = identity_block(X, 3, [128, 128, 512], stage=3, block='c')
    X = identity_block(X, 3, [128, 128, 512], stage=3, block='d')

    # Stage 4
    X = convolutional_block(X,
                            f=3,
                            filters=[256, 256, 1024],
                            stage=4,
                            block='a',
                            s=2)
    X = identity_block(X, 3, [256, 256, 1024], stage=4, block='b')
    X = identity_block(X, 3, [256, 256, 1024], stage=4, block='c')
    X = identity_block(X, 3, [256, 256, 1024], stage=4, block='d')
    X = identity_block(X, 3, [256, 256, 1024], stage=4, block='e')
    X = identity_block(X, 3, [256, 256, 1024], stage=4, block='f')

    # Stage 5
    X = convolutional_block(X,
                            f=3,
                            filters=[512, 512, 2048],
                            stage=5,
                            block='a',
                            s=2)
    X = identity_block(X, 3, [512, 512, 2048], stage=5, block='b')
    X = identity_block(X, 3, [512, 512, 2048], stage=5, block='c')

    # Average pooling
    X = AveragePooling2D(pool_size=(7, 7), strides=(1, 1), name='avg_pool')(X)

    # output layer
    X = Flatten()(X)
    X = Dense(num_classes,
              activation='softmax',
              name='fc' + str(num_classes),
              kernel_initializer=glorot_uniform(seed=0))(X)

    model = X

    return model
Example #5
0
def convolutional_block(X, f, filters, stage, block, s=2):
    """
    Implementation of the convolutional block as defined in Figure 4
    
    Arguments:
    X -- input tensor of shape (m, n_H_prev, n_W_prev, n_C_prev)
    f -- integer, specifying the shape of the middle CONV's window for the main path
    filters -- python list of integers, defining the number of filters in the CONV layers of the main path
    stage -- integer, used to name the layers, depending on their position in the network
    block -- string/character, used to name the layers, depending on their position in the network
    s -- Integer, specifying the stride to be used
    
    Returns:
    X -- output of the convolutional block, tensor of shape (n_H, n_W, n_C)
    """

    # defining name basis
    conv_name_base = 'res' + str(stage) + block + '_branch'
    bn_name_base = 'bn' + str(stage) + block + '_branch'

    # Retrieve Filters
    F1, F2, F3 = filters

    # Save the input value
    X_shortcut = X

    ##### MAIN PATH #####
    # First component of main path
    X = Conv2D(F1, (1, 1),
               strides=(s, s),
               name=conv_name_base + '2a',
               kernel_initializer=glorot_uniform(seed=0))(X)
    X = BatchNormalization(axis=3, name=bn_name_base + '2a')(X)
    X = Activation('relu')(X)

    # Second component of main path
    X = Conv2D(filters=F2,
               kernel_size=(f, f),
               strides=(1, 1),
               padding='same',
               name=conv_name_base + '2b',
               kernel_initializer=glorot_uniform(seed=0))(X)
    X = BatchNormalization(axis=3, name=bn_name_base + '2b')(X)
    X = Activation('relu')(X)

    # Third component of main path
    X = Conv2D(filters=F3,
               kernel_size=(1, 1),
               strides=(1, 1),
               padding='valid',
               name=conv_name_base + '2c',
               kernel_initializer=glorot_uniform(seed=0))(X)
    X = BatchNormalization(axis=3, name=bn_name_base + '2c')(X)

    ##### SHORTCUT PATH ####
    X_shortcut = Conv2D(F3, (1, 1),
                        strides=(s, s),
                        padding='valid',
                        name=conv_name_base + '1',
                        kernel_initializer=glorot_uniform(seed=0))(X_shortcut)
    X_shortcut = BatchNormalization(axis=3,
                                    name=bn_name_base + '1')(X_shortcut)

    # Final step: Add shortcut value to main path, and pass it through a RELU activation
    X = Add()([X_shortcut, X])
    X = Activation('relu')(X)

    return X
Example #6
0
def ResNet50(input_shape=(32, 32, 3), classes=10):
    X_input = Input(input_shape)

    X = ZeroPadding2D((3, 3))(X_input)

    X = Conv2D(64, (7, 7),
               strides=(2, 2),
               name='conv1',
               kernel_initializer=glorot_uniform(seed=0))(X)
    X = BatchNormalization(axis=3, name='bn_conv1')(X)
    X = Activation('relu')(X)
    X = MaxPooling2D((3, 3), strides=(2, 2))(X)

    X = convolutional_block(X,
                            f=3,
                            filters=[64, 64, 256],
                            stage=2,
                            block='a',
                            s=1)
    X = identity_block(X, 3, [64, 64, 256], stage=2, block='b')
    X = identity_block(X, 3, [64, 64, 256], stage=2, block='c')

    X = convolutional_block(X,
                            f=3,
                            filters=[128, 128, 512],
                            stage=3,
                            block='a',
                            s=2)
    X = identity_block(X, f=3, filters=[128, 128, 512], stage=3, block='b')
    X = identity_block(X, f=3, filters=[128, 128, 512], stage=3, block='c')
    X = identity_block(X, f=3, filters=[128, 128, 512], stage=3, block='d')

    X = convolutional_block(X,
                            f=3,
                            filters=[256, 256, 1024],
                            stage=4,
                            block='a',
                            s=2)
    X = identity_block(X, f=3, filters=[256, 256, 1024], stage=4, block='b')
    X = identity_block(X, f=3, filters=[256, 256, 1024], stage=4, block='c')
    X = identity_block(X, f=3, filters=[256, 256, 1024], stage=4, block='d')
    X = identity_block(X, f=3, filters=[256, 256, 1024], stage=4, block='e')
    X = identity_block(X, f=3, filters=[256, 256, 1024], stage=4, block='f')

    X = convolutional_block(X,
                            f=3,
                            filters=[512, 512, 2048],
                            stage=5,
                            block='a',
                            s=2)
    X = identity_block(X, f=3, filters=[512, 512, 2048], stage=5, block='b')
    X = identity_block(X, f=3, filters=[512, 512, 2048], stage=5, block='c')

    X = Flatten()(X)
    X = Dense(classes,
              activation='softmax',
              name='fc' + str(classes),
              kernel_initializer=glorot_uniform(seed=0))(X)

    model = Model(inputs=X_input, outputs=X, name='ResNet50')

    return model
Example #7
0
 def build(self, input_shape):
     self.S = self.add_weight(name='S', shape=[1, self.num_seeds, self.dim_V], initializer=glorot_uniform())
     super(PMA, self).build(input_shape)
Example #8
0
def get_cin_output(inputs, layer_size=[128, 128], split_half=True):
    while len(inputs.shape) < 3:
        inputs = tf.expand_dims(inputs, -1)
    input_shape = inputs.get_shape().as_list()
    # --- weight initialization ---
    field_nums = [input_shape[1]]
    filters = []
    bias = []
    for i, size in enumerate(layer_size):
        filters.append(
            tf.get_variable(
                name='filter' + str(i),
                shape=[1, field_nums[-1] * field_nums[0], size],
                dtype=tf.float32,
                initializer=glorot_uniform(seed=i),
            ))

        bias.append(
            tf.get_variable(name='bias' + str(i),
                            shape=[size],
                            dtype=tf.float32,
                            initializer=tf.initializers.zeros()))
        if split_half:
            if i != len(layer_size) - 1 and size % 2 > 0:
                raise ValueError(
                    "layer_size must be even number except for the last layer when split_half=True"
                )

            field_nums.append(size // 2)
        else:
            field_nums.append(size)

    dim = input_shape[-1]
    hidden_nn_layers = [inputs]
    final_result = []
    split_tensor0 = tf.split(hidden_nn_layers[0], dim * [1], 2)
    for idx, size in enumerate(layer_size):
        split_tensor = tf.split(hidden_nn_layers[-1], dim * [1], 2)

        dot_result_m = tf.matmul(split_tensor0, split_tensor, transpose_b=True)

        dot_result_o = tf.reshape(
            dot_result_m, shape=[dim, -1, field_nums[0] * field_nums[idx]])

        dot_result = tf.transpose(dot_result_o, perm=[1, 0, 2])

        curr_out = tf.nn.conv1d(dot_result,
                                filters=filters[idx],
                                stride=1,
                                padding='VALID')

        curr_out = tf.nn.bias_add(curr_out, bias[idx])

        curr_out = tf.nn.relu(curr_out)

        curr_out = tf.transpose(curr_out, perm=[0, 2, 1])

        if split_half:
            if idx != len(layer_size) - 1:
                next_hidden, direct_connect = tf.split(curr_out,
                                                       2 * [size // 2], 1)
            else:
                direct_connect = curr_out
                next_hidden = 0
        else:
            direct_connect = curr_out
            next_hidden = curr_out

        final_result.append(direct_connect)
        hidden_nn_layers.append(next_hidden)

    result = tf.concat(final_result, axis=1)
    result = tf.reduce_sum(result, -1, keep_dims=False)

    return result
Example #9
0
    def layers(self):
        input_layer = Input(self.real_input_shape, self.batch_size)
        # 64x64x3
        # Zero-Padding
        net = TimeDistributed(ZeroPadding2D((3, 3)))(input_layer)

        # 70x70x3

        # Stage 0
        net = TimeDistributed(
            Conv2D(16, (7, 7),
                   strides=(2, 2),
                   name='conv0',
                   kernel_initializer=glorot_uniform(seed=0)))(net)
        net = BatchNormalization(axis=-1, name='bn_conv0')(net)
        net = Activation('relu')(net)

        # 64x64x16

        # Stage 1
        net = encoder_convolutional_block(net,
                                          f=3,
                                          filters=[16, 16, 32],
                                          stage=1,
                                          block='a',
                                          s=1,
                                          dropout_rate=0.0)
        net = encoder_convolutional_block(net,
                                          f=3,
                                          filters=[16, 16, 32],
                                          stage=1,
                                          block='b',
                                          s=1,
                                          dropout_rate=0.0)

        self.mean_64x64x32 = encoder_residual_block(net, depth=32)
        self.stddev_64x64x32 = encoder_residual_block(net, depth=32)

        # 32x32x64

        # Stage 2
        net = encoder_convolutional_block(net,
                                          f=3,
                                          filters=[32, 32, 64],
                                          stage=2,
                                          block='a',
                                          s=2,
                                          dropout_rate=self.dropout)
        net = encoder_convolutional_block(net,
                                          f=3,
                                          filters=[32, 32, 64],
                                          stage=2,
                                          block='b',
                                          s=1,
                                          dropout_rate=self.dropout)

        # 32x32x64
        self.mean_32x32x64 = encoder_residual_block(net, depth=64)
        self.stddev_32x32x64 = encoder_residual_block(net, depth=64)

        # Stage 3
        net = encoder_convolutional_block(net,
                                          f=3,
                                          filters=[64, 64, 128],
                                          stage=3,
                                          block='a',
                                          s=2,
                                          dropout_rate=self.dropout)
        net = encoder_convolutional_block(net,
                                          f=3,
                                          filters=[64, 64, 128],
                                          stage=3,
                                          block='b',
                                          s=1,
                                          dropout_rate=self.dropout)

        # 16x16x128
        self.mean_16x16x128 = encoder_residual_block(net, depth=128)
        self.stddev_16x16x128 = encoder_residual_block(net, depth=128)

        # Stage 4
        net = encoder_convolutional_block(net,
                                          f=3,
                                          filters=[128, 128, 256],
                                          stage=4,
                                          block='a',
                                          s=2,
                                          dropout_rate=self.dropout)
        net = encoder_convolutional_block(net,
                                          f=3,
                                          filters=[128, 128, 256],
                                          stage=4,
                                          block='b',
                                          s=1,
                                          dropout_rate=self.dropout)

        # 8x8x256
        self.mean_8x8x256 = encoder_residual_block(net, depth=256)
        self.stddev_8x8x256 = encoder_residual_block(net, depth=256)

        # Stage 5
        net = encoder_convolutional_block(net,
                                          f=3,
                                          filters=[256, 256, 512],
                                          stage=5,
                                          block='a',
                                          s=2,
                                          dropout_rate=self.dropout)
        net = encoder_convolutional_block(net,
                                          f=3,
                                          filters=[256, 256, 512],
                                          stage=5,
                                          block='b',
                                          s=1,
                                          dropout_rate=self.dropout)

        # 4x4x512
        self.mean_4x4x512 = encoder_residual_block(net, depth=512)
        self.stddev_4x4x512 = encoder_residual_block(net, depth=512)

        return input_layer, [
            self.mean_4x4x512, self.stddev_4x4x512, self.mean_8x8x256,
            self.stddev_8x8x256, self.mean_16x16x128, self.stddev_16x16x128,
            self.mean_32x32x64, self.stddev_32x32x64, self.mean_64x64x32,
            self.stddev_64x64x32
        ]
Example #10
0
                               num_words=num_words)

## to suppress warning
tf.logging.set_verbosity(tf.logging.ERROR)

#tf_config = some_custom_config
#sess = tf.Session(config=tf_config)
sess = tf.Session()
graph = tf.get_default_graph()
set_session(sess)

## loading pre-trained image processing model VGG16 model
model_transferred = tf.keras.models.load_model(
    "preloaded_files/model_transferred.h5",
    compile=False,
    custom_objects={'GlorotUniform': glorot_uniform()})
model_transferred._make_predict_function()

## loading pre-defined rnn model (2GRU layers, embedding size=512, etc)
decoder_model_reddit = tf.keras.models.load_model(
    "preloaded_files/decoder_model_reddit.h5",
    compile=False,
    custom_objects={'GlorotUniform': glorot_uniform()})
decoder_model_reddit._make_predict_function()

decoder_model_coco = tf.keras.models.load_model(
    "preloaded_files/decoder_model_coco.h5",
    compile=False,
    custom_objects={'GlorotUniform': glorot_uniform()})
decoder_model_coco._make_predict_function()
#graph = tf.get_default_graph()
Example #11
0
def convolutional_block(X, f, filters, stage, block, s = 2):
  conv_name_base = 'res' + str(stage) + block +'_branch'
  bn_name_base = 'bn' + str(stage) + block + '_branch'
  
  F1, F2, F3 = filters
  
  X_shortcut = X
  
  X = Conv2D(F1, (1, 1), strides = (s, s), name = conv_name_base + '2a', kernel_initializer = glorot_uniform(seed=0))(X)
  X = BatchNormalization(axis = 3, name = bn_name_base + '2a')(X)
  X = Activation('relu')(X)
  
  X = Conv2D(F2, (f, f), strides = (1, 1), name = conv_name_base + '2b', padding = 'same', kernel_initializer = glorot_uniform(seed=0))(X)
  X = BatchNormalization(axis = 3, name = bn_name_base + '2b')(X)
  X = Activation('relu')(X)
  
  X = Conv2D(F3, (1, 1), strides = (1, 1), name = conv_name_base + '2c', padding = 'valid', kernel_initializer = glorot_uniform(seed=0))(X)
  X = BatchNormalization(axis = 3, name = bn_name_base + '2c')(X)
  
  X_shortcut = Conv2D(F3, (1,1), strides=(s, s), name=conv_name_base+'1', padding='valid', kernel_initializer = glorot_uniform(seed=0))(X_shortcut)
  X_shortcut = BatchNormalization(axis = 3, name=bn_name_base+'1')(X_shortcut)
  
  X = Add()([X, X_shortcut])
  X = Activation('relu')(X)
  
  return X
Example #12
0
def identity_block(X, f, filters,stage, block):
  conv_name_base = 'res' + str(stage) + block + '_branch'
  bn_name_base = 'bn' + str(stage) + block + '_branch'
  
  F1, F2, F3 = filters
  
  X_shortcut = X
  
  X = Conv2D(filters = F1, kernel_size = (1, 1), strides = (1, 1), padding = 'valid', name = conv_name_base + '2a', kernel_initializer = glorot_uniform(seed=0))(X)
  X = BatchNormalization(axis = 3, name = bn_name_base + '2a')(X)
  X = Activation('relu')(X)
  
  X = Conv2D(filters = F2, kernel_size = (f, f), strides = (1, 1), padding = 'same', name = conv_name_base+'2b', kernel_initializer=glorot_uniform(seed=0))(X)
  X = BatchNormalization(axis = 3, name = bn_name_base + '2b')(X)
  X = Activation('relu')(X)
  
  X = Conv2D(filters = F3, kernel_size = (1, 1), strides = (1, 1), padding = 'valid', name = conv_name_base+'2c', kernel_initializer=glorot_uniform(seed=0))(X)
  X = BatchNormalization(axis = 3, name = bn_name_base+'2c')(X)
  
  X = Add()([X, X_shortcut])
  X = Activation('relu')(X)
  
  return X
  def _testWithMaybeMultiAttention(self,
                                   is_multi,
                                   create_attention_mechanisms,
                                   expected_final_output,
                                   expected_final_state,
                                   attention_mechanism_depths,
                                   alignment_history=False,
                                   expected_final_alignment_history=None,
                                   attention_layer_sizes=None,
                                   attention_layers=None,
                                   create_query_layer=False,
                                   create_memory_layer=True,
                                   create_attention_kwargs=None):
    # Allow is_multi to be True with a single mechanism to enable test for
    # passing in a single mechanism in a list.
    assert len(create_attention_mechanisms) == 1 or is_multi
    encoder_sequence_length = [3, 2, 3, 1, 1]
    decoder_sequence_length = [2, 0, 1, 2, 3]
    batch_size = 5
    encoder_max_time = 8
    decoder_max_time = 4
    input_depth = 7
    encoder_output_depth = 10
    cell_depth = 9
    create_attention_kwargs = create_attention_kwargs or {}

    if attention_layer_sizes is not None:
      # Compute sum of attention_layer_sizes. Use encoder_output_depth if None.
      attention_depth = sum(attention_layer_size or encoder_output_depth
                            for attention_layer_size in attention_layer_sizes)
    elif attention_layers is not None:
      # Compute sum of attention_layers output depth.
      attention_depth = sum(
          attention_layer.compute_output_shape(
              [batch_size, cell_depth + encoder_output_depth]).dims[-1].value
          for attention_layer in attention_layers)
    else:
      attention_depth = encoder_output_depth * len(create_attention_mechanisms)

    decoder_inputs = np.random.randn(batch_size, decoder_max_time,
                                     input_depth).astype(np.float32)
    encoder_outputs = np.random.randn(batch_size, encoder_max_time,
                                      encoder_output_depth).astype(np.float32)

    attention_mechanisms = []
    for creator, depth in zip(create_attention_mechanisms,
                              attention_mechanism_depths):
      # Create a memory layer with deterministic initializer to avoid randomness
      # in the test between graph and eager.
      if create_query_layer:
        create_attention_kwargs["query_layer"] = keras.layers.Dense(
            depth, kernel_initializer="ones", use_bias=False)
      if create_memory_layer:
        create_attention_kwargs["memory_layer"] = keras.layers.Dense(
            depth, kernel_initializer="ones", use_bias=False)

      attention_mechanisms.append(
          creator(
              units=depth,
              memory=encoder_outputs,
              memory_sequence_length=encoder_sequence_length,
              **create_attention_kwargs))

    with self.cached_session(use_gpu=True):
      attention_layer_size = attention_layer_sizes
      attention_layer = attention_layers
      if not is_multi:
        if attention_layer_size is not None:
          attention_layer_size = attention_layer_size[0]
        if attention_layer is not None:
          attention_layer = attention_layer[0]
      cell = keras.layers.LSTMCell(cell_depth,
                                   recurrent_activation="sigmoid",
                                   kernel_initializer="ones",
                                   recurrent_initializer="ones")
      cell = wrapper.AttentionWrapper(
          cell,
          attention_mechanisms if is_multi else attention_mechanisms[0],
          attention_layer_size=attention_layer_size,
          alignment_history=alignment_history,
          attention_layer=attention_layer)
      if cell._attention_layers is not None:
        for layer in cell._attention_layers:
          if getattr(layer, "kernel_initializer") is None:
            layer.kernel_initializer = initializers.glorot_uniform(seed=1337)

      sampler = sampler_py.TrainingSampler()
      my_decoder = basic_decoder.BasicDecoderV2(cell=cell, sampler=sampler)
      initial_state = cell.get_initial_state(
          dtype=dtypes.float32, batch_size=batch_size)
      final_outputs, final_state, _ = my_decoder(
          decoder_inputs,
          initial_state=initial_state,
          sequence_length=decoder_sequence_length)

      self.assertIsInstance(final_outputs, basic_decoder.BasicDecoderOutput)
      self.assertIsInstance(final_state, wrapper.AttentionWrapperState)

      expected_time = (
          expected_final_state.time if context.executing_eagerly() else None)
      self.assertEqual((batch_size, expected_time, attention_depth),
                       tuple(final_outputs.rnn_output.get_shape().as_list()))
      self.assertEqual((batch_size, expected_time),
                       tuple(final_outputs.sample_id.get_shape().as_list()))

      self.assertEqual((batch_size, attention_depth),
                       tuple(final_state.attention.get_shape().as_list()))
      self.assertEqual((batch_size, cell_depth),
                       tuple(final_state.cell_state[0].get_shape().as_list()))
      self.assertEqual((batch_size, cell_depth),
                       tuple(final_state.cell_state[1].get_shape().as_list()))

      if alignment_history:
        if is_multi:
          state_alignment_history = []
          for history_array in final_state.alignment_history:
            history = history_array.stack()
            self.assertEqual((expected_time, batch_size, encoder_max_time),
                             tuple(history.get_shape().as_list()))
            state_alignment_history.append(history)
          state_alignment_history = tuple(state_alignment_history)
        else:
          state_alignment_history = final_state.alignment_history.stack()
          self.assertEqual((expected_time, batch_size, encoder_max_time),
                           tuple(state_alignment_history.get_shape().as_list()))
        nest.assert_same_structure(cell.state_size,
                                   cell.zero_state(batch_size, dtypes.float32))
        # Remove the history from final_state for purposes of the
        # remainder of the tests.
        final_state = final_state._replace(alignment_history=())  # pylint: disable=protected-access
      else:
        state_alignment_history = ()

      self.evaluate(variables.global_variables_initializer())
      eval_result = self.evaluate({
          "final_outputs": final_outputs,
          "final_state": final_state,
          "state_alignment_history": state_alignment_history,
      })

      final_output_info = nest.map_structure(get_result_summary,
                                             eval_result["final_outputs"])
      final_state_info = nest.map_structure(get_result_summary,
                                            eval_result["final_state"])
      print("final_output_info: ", final_output_info)
      print("final_state_info: ", final_state_info)

      nest.map_structure(self.assertAllCloseOrEqual, expected_final_output,
                         final_output_info)
      nest.map_structure(self.assertAllCloseOrEqual, expected_final_state,
                         final_state_info)
      if alignment_history:  # by default, the wrapper emits attention as output
        final_alignment_history_info = nest.map_structure(
            get_result_summary, eval_result["state_alignment_history"])
        print("final_alignment_history_info: ", final_alignment_history_info)
        nest.map_structure(
            self.assertAllCloseOrEqual,
            # outputs are batch major but the stacked TensorArray is time major
            expected_final_alignment_history,
            final_alignment_history_info)
Example #14
0
def encoder_convolutional_block(net,
                                f,
                                filters,
                                stage,
                                block,
                                s=2,
                                dropout_rate=DROPOUT_RATE):
    # Defining name basis
    conv_name_base = 'enc_res' + str(stage) + block + '_branch'
    bn_name_base = 'enc_bn' + str(stage) + block + '_branch'

    # Retrieve Filters
    f1, f2, f3 = filters

    # Save the input value
    net_shortcut = net

    #############
    # MAIN PATH #
    #############
    # First component of main path
    net = TimeDistributed(
        ConvSN2D(filters=f1,
                 kernel_size=(1, 1),
                 strides=(s, s),
                 padding='valid',
                 name=conv_name_base + '2a',
                 kernel_initializer=glorot_uniform(seed=0)))(net)
    net = BatchNormalization(axis=-1, name=bn_name_base + '2a')(net)
    net = SwishLayer()(net)

    # Second component of main path
    net = TimeDistributed(Dropout(dropout_rate))(net)
    net = TimeDistributed(
        ConvSN2D(filters=f2,
                 kernel_size=(f, f),
                 strides=(1, 1),
                 padding='same',
                 name=conv_name_base + '2b',
                 kernel_initializer=glorot_uniform(seed=0)))(net)
    net = BatchNormalization(axis=-1, name=bn_name_base + '2b')(net)
    net = SwishLayer()(net)

    # Third component of main path
    net = TimeDistributed(Dropout(dropout_rate))(net)
    net = TimeDistributed(
        ConvSN2D(filters=f3,
                 kernel_size=(1, 1),
                 strides=(1, 1),
                 padding='valid',
                 name=conv_name_base + '2c',
                 kernel_initializer=glorot_uniform(seed=0)))(net)
    net = BatchNormalization(axis=-1, name=bn_name_base + '2c')(net)

    #################
    # SHORTCUT PATH #
    #################
    # net_shortcut = TimeDistributed(ConvSN2D(filters=f3, kernel_size=(1, 1), strides=(s, s),
    #                                       padding='valid', name=conv_name_base + '1',
    #                                       kernel_initializer=glorot_uniform(seed=0)))(net_shortcut)
    # net_shortcut = BatchNormalization(axis=-1, name=bn_name_base + '1')(net_shortcut)

    # nVAE implementation
    net_shortcut = TimeDistributed(
        ConvSN2D(filters=f1,
                 kernel_size=(1, 1),
                 strides=(s, s),
                 padding='valid',
                 name=conv_name_base + '1a',
                 kernel_initializer=glorot_uniform(seed=0)))(net_shortcut)
    net_shortcut = BatchNormalization()(net_shortcut)
    net_shortcut = SwishLayer()(net_shortcut)
    net_shortcut = TimeDistributed(
        ConvSN2D(filters=f3,
                 kernel_size=3,
                 name=conv_name_base + "1b",
                 use_bias=False,
                 data_format='channels_last',
                 padding='same'))(net_shortcut)
    net_shortcut = BatchNormalization()(net_shortcut)
    net_shortcut = SwishLayer()(net_shortcut)
    net_shortcut = TimeDistributed(
        ConvSN2D(filters=f3,
                 kernel_size=3,
                 name=conv_name_base + "1c",
                 use_bias=False,
                 data_format='channels_last',
                 padding='same'))(net_shortcut)
    net_shortcut = TimeDistributed(SELayer(depth=f3))(net_shortcut)

    # Final step: Add shortcut value to main path, and pass it through a RELU activation
    net = Add()([net, net_shortcut])
    net = SwishLayer()(net)

    return net