Ejemplo n.º 1
0
 def __init__(self, d_model, dff, ff_dropout, init):
     super(PointWiseFeedForwardNetwork, self).__init__()
     self.ff_relu = tf.keras.layers.Dense(
         dff,
         activation='relu',
         use_bias=True,
         kernel_initializer=mh.get_init(init))
     self.ff_dropout = tf.keras.layers.Dropout(rate=ff_dropout)
     self.ff_proj = tf.keras.layers.Dense(
         d_model,
         activation="linear",
         use_bias=True,
         kernel_initializer=mh.get_init(init))
Ejemplo n.º 2
0
    def __init__(self, d_model, num_heads, init):
        super(MultiHeadAttention, self).__init__()
        self.num_heads = num_heads
        self.d_model = d_model

        assert self.d_model % self.num_heads == 0

        self.depth = d_model // self.num_heads

        self.dense_layer_for_query = tf.keras.layers.Dense(
            d_model, use_bias=False, kernel_initializer=mh.get_init(init))

        self.dense_layer_for_key = tf.keras.layers.Dense(
            d_model, use_bias=False, kernel_initializer=mh.get_init(init))

        self.dense_layer_for_value = tf.keras.layers.Dense(
            d_model, use_bias=False, kernel_initializer=mh.get_init(init))

        self.dense = tf.keras.layers.Dense(
            d_model, use_bias=True, kernel_initializer=mh.get_init(init))
Ejemplo n.º 3
0
  def __init__(self, num_layers, d_model, num_heads, dff, feat_dim,
               input_dropout, inner_dropout, residual_dropout,
               attention_dropout, nfilt, cnn_n, init,
               vocab_n):
    super().__init__()

    self.d_model = d_model
    self.dff = dff
    self.num_layers = num_layers
    self.attention_dropout = attention_dropout
    self.num_heads = num_heads
    self.residual_dropout = residual_dropout

    self.enc_layers = [block.EncoderBlock(self.d_model, self.num_heads, dff,
                                          inner_dropout, residual_dropout,
                                          attention_dropout, init)
                       for _ in range(num_layers)]

    self.layernorm = tf.keras.layers.LayerNormalization(epsilon=1e-6)

    # pylint: disable=fixme
    # TODO: stride and filter number are hard coded.
    self.stride = 2
    kernel_size = 3
    self.cnn_n = cnn_n
    self.mask2_layer = tf.keras.layers.Lambda(mh.feat_mask2)

    self.conv = ConvLayer(cnn_n, nfilt, kernel_size, self.stride, init)
    self.reshape_to_ffwd = \
      tf.keras.layers.Reshape((-1, math.ceil(feat_dim / (2 * 2)) * nfilt),
                              name="reshape_to_ffwd")

    self.linear_projection = \
      tf.keras.layers.Dense(d_model, activation='linear',
                            kernel_initializer=mh.get_init(init))

    # (batch_size, input_seq_len, d_model)
    self.input_dropout = tf.keras.layers.Dropout(rate=input_dropout)
    # (batch_size, input_seq_len, vocab)
    self.proj = tf.keras.layers.Dense(vocab_n)
Ejemplo n.º 4
0
    def __init__(self, cnn_n, nfilt, kernel_size, stride, init, **kwargs):
        super(CapsulationLayer, self).__init__(**kwargs)

        self.cnn_n = cnn_n
        self.stride = stride
        self.conv_layers, self.dropouts = [], []
        self.maskings = []
        for _ in range(self.cnn_n):
            self.maskings.append(tf.keras.layers.Masking(mask_value=0))
            self.conv_layers.append([tf.keras.layers.Conv2D(filters=nfilt,
                                                            kernel_size=kernel_size,
                                                            activation='linear',
                                                            padding='same',
                                                            strides=stride,
                                                            kernel_initializer=\
                                                            mh.get_init(init))
                                     for _ in range(2)])
            self.dropouts.append(
                [tf.keras.layers.Dropout(rate=0.2) for _ in range(2)])
        self.bn_layers = [
            tf.keras.layers.BatchNormalization(axis=-1) for _ in range(cnn_n)
        ]
        self.mask_layer = tf.keras.layers.Lambda(mh.feat_mask)
Ejemplo n.º 5
0
    def __init__(self, d_model, num_heads, dff, inner_dropout,
                 residual_dropout, attention_dropout, init):
        super(EncoderMFBlock, self).__init__()

        self.mha1 = att.MultiHeadAttention(d_model, num_heads, init)
        self.mha2 = att.MultiHeadAttention(d_model, num_heads, init)

        self.ffn = PointWiseFeedForwardNetwork(d_model, dff, inner_dropout,
                                               init)

        self.layernorm1 = tf.keras.layers.LayerNormalization(epsilon=1e-6)
        self.layernorm2 = tf.keras.layers.LayerNormalization(epsilon=1e-6)
        self.layernorm3 = tf.keras.layers.LayerNormalization(epsilon=1e-6)
        self.layernorm_raw = tf.keras.layers.LayerNormalization(epsilon=1e-6)

        self.res_dropout1 = tf.keras.layers.Dropout(residual_dropout)
        self.res_dropout2 = tf.keras.layers.Dropout(residual_dropout)
        self.res_dropout3 = tf.keras.layers.Dropout(residual_dropout)

        self.attention_dropout = attention_dropout

        self.dense = tf.keras.layers.Dense(
            d_model, use_bias=False, kernel_initializer=mh.get_init(init))
Ejemplo n.º 6
0
    def __init__(self, config, logger, class_n):
        # pylint: disable=too-many-statements
        super(SequenceRouter, self).__init__()

        init, kernel_size, self.stride = config.model_initializer, 3, 2
        self.feat_dim = math.ceil(config.feat_dim /
                                  (self.stride * config.model_conv_layer_num))
        self.nfilt = config.model_conv_filter_num
        self.enc_num = config.model_encoder_num
        self.lpad = config.model_caps_window_lpad
        self.rpad = config.model_caps_window_rpad
        self.window = self.lpad + self.rpad + 1
        self.is_context = is_context = config.model_caps_context

        self.ph = config.model_caps_primary_num
        caps_inp_in_n = self.ph * self.window
        self.pd = config.model_caps_primary_dim
        self.caps_cov_n = caps_cov_n = config.model_caps_convolution_num
        caps_cov_in_n = caps_cov_n * self.window
        self.caps_cov_d = caps_cov_d = config.model_caps_convolution_dim
        self.caps_cls_n = caps_cls_n = class_n
        self.caps_cls_d = caps_cls_d = config.model_caps_class_dim
        self.iter = config.model_caps_iter

        # Capsulation w/ bottleneck projection layers.
        self.conv = CapsulationLayer(config.model_conv_layer_num, self.nfilt,
                                     kernel_size, self.stride, init)
        self.proj_pe = tf.keras.layers.Dense(
            self.ph,
            activation='linear',
            kernel_initializer=mh.get_init(init),
            name="flatten")
        self.mask = tf.keras.layers.Lambda(mh.feat_mask, name="pad_mask")
        self.ecs = [
            tf.keras.layers.Conv2D(filters=config.model_caps_primary_dim,
                                   kernel_size=3,
                                   activation='linear',
                                   padding='same',
                                   strides=1,
                                   kernel_initializer=mh.get_init(init),
                                   name="encaps%d" % (i + 1)) for i in range(2)
        ]
        self.ecd = [
            tf.keras.layers.Dropout(rate=0.2, name="do_encaps%d" % (i + 1))
            for i in range(2)
        ]
        self.inp_dropout = tf.keras.layers.Dropout(
            rate=config.train_inp_dropout, name="do_input")

        # Dynamic Routing variables
        shape = None
        if self.enc_num > 1:  # in_n, out_n, out_dim, in_dim
            shape = [(caps_inp_in_n, caps_cov_n, caps_cov_d, self.pd)]
            for _ in range(1, self.enc_num - 1):
                shape.append(
                    (caps_cov_in_n, caps_cov_n, caps_cov_d, caps_cov_d))
            shape.append((caps_cov_in_n, caps_cls_n, caps_cls_d, caps_cov_d))
        elif self.enc_num == 1:
            shape = [(caps_inp_in_n, caps_cls_n, caps_cls_d, self.pd)]

        self.wgt = [
            tf.Variable(tf.random.normal(shape=s, stddev=0.1,
                                         dtype=tf.float32),
                        trainable=True,
                        name="W%d" % i) for i, s in enumerate(shape)
        ]
        self.bias = [
            tf.Variable(tf.random.normal(shape=(1, 1, s[0], s[1], s[2]),
                                         stddev=0.1,
                                         dtype=tf.float32),
                        trainable=True,
                        name="b%d" % i) for i, s in enumerate(shape)
        ]

        self.ln_i = tf.keras.layers.LayerNormalization(name="ln_input")
        self.ln_m = [
            tf.keras.layers.LayerNormalization(name="ln_mid%d" % (i + 1))
            for i in range(self.enc_num)
        ]
        self.ln_o = tf.keras.layers.LayerNormalization(name="ln_output")
        self.mid_dropout = [
            tf.keras.layers.Dropout(rate=config.train_inn_dropout,
                                    name="dropout_mid_%d" % i)
            for i in range(self.enc_num)
        ]
        self.mask_layer = tf.keras.layers.Masking(mask_value=0)

        logger.info(
            "Layer x %d, Iter x %d, Init %s, Win %d (l:%d, r:%d)" %
            (self.enc_num, self.iter, "CONTEXT" if is_context else "ZERO",
             self.window, self.lpad, self.rpad))
        logger.info("Transformation matrix size")
        size = 0
        for i, w in enumerate(self.wgt):
            logger.info("L=%d->%d" % (i, i + 1))
            logger.info(tf.size(w))
            size += tf.size(w)
        logger.info("Total: %d" % size)
Ejemplo n.º 7
0
    def __init__(self, config, logger, class_n):
        super().__init__()

        init = config.model_initializer
        self.enc_num = enc_num = config.model_encoder_num
        self.nfilt_inp = config.model_conv_inp_nfilt
        self.nfilt_inn = config.model_conv_inn_nfilt
        self.proj_layers = config.model_conv_proj_num
        self.proj_dim = config.model_conv_proj_dim
        self.mask_layer = tf.keras.layers.Masking(mask_value=0.0)
        self.stride = config.model_conv_stride
        # filter : [time, frequency]

        self.mask = tf.keras.layers.Lambda(mh.feat_mask, name="pad_mask1")
        self.mask2 = tf.keras.layers.Lambda(mh.feat_mask2, name="pad_mask2")
        assert config.model_conv_layer_num < 4

        # Maxout Conv layers
        kernel_size = 3
        self.stride = stride = 2
        self.cnn_n = cnn_n = config.model_conv_layer_num
        feat_dim = math.ceil(config.feat_dim / (stride**cnn_n))
        nfilt = config.model_conv_filter_num
        self.cnn_fe = CapsulationLayer(cnn_n,
                                       nfilt,
                                       kernel_size,
                                       stride,
                                       init,
                                       name="conv_feat")
        self.enc_layers = []
        for i in range(4):
            self.enc_layers.append(
                tf.keras.layers.Conv2D(filters=self.nfilt_inp,
                                       kernel_size=(5, 3),
                                       activation='linear',
                                       padding='same',
                                       strides=1,
                                       kernel_initializer=mh.get_init(init),
                                       name="inn_conv1_%d" % (i + 3),
                                       use_bias=False))

        for i in range(4, enc_num - 1):
            self.enc_layers.append(tf.keras.layers.Conv2D(filters=self.nfilt_inn,
                                                          kernel_size=(5, 3),
                                                          activation='linear',
                                                          padding='same',
                                                          strides=1,
                                                          kernel_initializer=\
                                                            mh.get_init(init),
                                                          name="inn_conv1_%d"%(i+1),
                                                          use_bias=False))

        last_filt = (self.proj_dim // feat_dim) * 2
        self.enc_layers.append(tf.keras.layers.Conv2D(filters=last_filt,
                                                      kernel_size=(5, 3),
                                                      activation='linear',
                                                      padding='same',
                                                      strides=1,
                                                      kernel_initializer=\
                                                        mh.get_init(init),
                                                      name="inn_conv1_%d"%
                                                           (enc_num - 1),
                                                      use_bias=False))

        self.dropouts = \
          [tf.keras.layers.Dropout(rate=0.2, name="inn_drop1_%d"%(i+1))
           for i in range(enc_num)]
        self.dropouts_cnn = \
          [tf.keras.layers.Dropout(rate=config.train_inn_dropout,
                                   name="inn_drop1_%d"%(i+1))
           for i in range(enc_num)]
        self.layernorms = \
          [tf.keras.layers.LayerNormalization(epsilon=1e-6,
                                              name="inn_ln_%d"%(i+1))
           for i in range(enc_num)]
        """
    Maxout Projection layers
    """
        self.reshape_to_maxout = \
          tf.keras.layers.Reshape((-1, feat_dim * (last_filt // 2)),
                                  name="reshape_to_ffwd")
        self.proj = [
            tf.keras.layers.TimeDistributed(
                tf.keras.layers.Dense(self.proj_dim,
                                      kernel_initializer=mh.get_init(init),
                                      name="proj1_%d" % (i + 1),
                                      use_bias=False))
            for i in range(self.proj_layers - 1)
        ]
        self.dropproj = [
            tf.keras.layers.Dropout(rate=0.2, name="proj_drop1_%d" % (i + 1))
            for i in range(self.proj_layers - 1)
        ]
        self.dropouts_proj = [
            tf.keras.layers.Dropout(rate=config.train_inn_dropout,
                                    name="proj_drop1_%d" % (i + 1))
            for i in range(self.proj_layers - 1)
        ]
        self.layernorms_proj = \
          [tf.keras.layers.LayerNormalization(epsilon=1e-6, name="proj_ln_%d"%(i+1))
           for i in range(self.proj_layers - 1)]
        self.input_dropout = tf.keras.layers.Dropout(
            rate=config.train_inp_dropout, name="inp_dropout")
        """
    Maxout Last Projection layers
    """
        self.projv = tf.keras.layers.TimeDistributed(
            tf.keras.layers.Dense(class_n * 2,
                                  kernel_initializer=mh.get_init(init),
                                  use_bias=False))
        self.dropprojv = tf.keras.layers.Dropout(rate=config.train_inn_dropout)
        self.layernorms_projv = tf.keras.layers.LayerNormalization(
            epsilon=1e-6)
        #self.pool = tf.keras.layers.MaxPooling2D((1, 3))
        logger.info(
            "CNN CTC model, please check config model_conv_*, "
            "last_filt:%d", last_filt)