def __init__(self, d_model, dff, ff_dropout, init): super(PointWiseFeedForwardNetwork, self).__init__() self.ff_relu = tf.keras.layers.Dense( dff, activation='relu', use_bias=True, kernel_initializer=mh.get_init(init)) self.ff_dropout = tf.keras.layers.Dropout(rate=ff_dropout) self.ff_proj = tf.keras.layers.Dense( d_model, activation="linear", use_bias=True, kernel_initializer=mh.get_init(init))
def __init__(self, d_model, num_heads, init): super(MultiHeadAttention, self).__init__() self.num_heads = num_heads self.d_model = d_model assert self.d_model % self.num_heads == 0 self.depth = d_model // self.num_heads self.dense_layer_for_query = tf.keras.layers.Dense( d_model, use_bias=False, kernel_initializer=mh.get_init(init)) self.dense_layer_for_key = tf.keras.layers.Dense( d_model, use_bias=False, kernel_initializer=mh.get_init(init)) self.dense_layer_for_value = tf.keras.layers.Dense( d_model, use_bias=False, kernel_initializer=mh.get_init(init)) self.dense = tf.keras.layers.Dense( d_model, use_bias=True, kernel_initializer=mh.get_init(init))
def __init__(self, num_layers, d_model, num_heads, dff, feat_dim, input_dropout, inner_dropout, residual_dropout, attention_dropout, nfilt, cnn_n, init, vocab_n): super().__init__() self.d_model = d_model self.dff = dff self.num_layers = num_layers self.attention_dropout = attention_dropout self.num_heads = num_heads self.residual_dropout = residual_dropout self.enc_layers = [block.EncoderBlock(self.d_model, self.num_heads, dff, inner_dropout, residual_dropout, attention_dropout, init) for _ in range(num_layers)] self.layernorm = tf.keras.layers.LayerNormalization(epsilon=1e-6) # pylint: disable=fixme # TODO: stride and filter number are hard coded. self.stride = 2 kernel_size = 3 self.cnn_n = cnn_n self.mask2_layer = tf.keras.layers.Lambda(mh.feat_mask2) self.conv = ConvLayer(cnn_n, nfilt, kernel_size, self.stride, init) self.reshape_to_ffwd = \ tf.keras.layers.Reshape((-1, math.ceil(feat_dim / (2 * 2)) * nfilt), name="reshape_to_ffwd") self.linear_projection = \ tf.keras.layers.Dense(d_model, activation='linear', kernel_initializer=mh.get_init(init)) # (batch_size, input_seq_len, d_model) self.input_dropout = tf.keras.layers.Dropout(rate=input_dropout) # (batch_size, input_seq_len, vocab) self.proj = tf.keras.layers.Dense(vocab_n)
def __init__(self, cnn_n, nfilt, kernel_size, stride, init, **kwargs): super(CapsulationLayer, self).__init__(**kwargs) self.cnn_n = cnn_n self.stride = stride self.conv_layers, self.dropouts = [], [] self.maskings = [] for _ in range(self.cnn_n): self.maskings.append(tf.keras.layers.Masking(mask_value=0)) self.conv_layers.append([tf.keras.layers.Conv2D(filters=nfilt, kernel_size=kernel_size, activation='linear', padding='same', strides=stride, kernel_initializer=\ mh.get_init(init)) for _ in range(2)]) self.dropouts.append( [tf.keras.layers.Dropout(rate=0.2) for _ in range(2)]) self.bn_layers = [ tf.keras.layers.BatchNormalization(axis=-1) for _ in range(cnn_n) ] self.mask_layer = tf.keras.layers.Lambda(mh.feat_mask)
def __init__(self, d_model, num_heads, dff, inner_dropout, residual_dropout, attention_dropout, init): super(EncoderMFBlock, self).__init__() self.mha1 = att.MultiHeadAttention(d_model, num_heads, init) self.mha2 = att.MultiHeadAttention(d_model, num_heads, init) self.ffn = PointWiseFeedForwardNetwork(d_model, dff, inner_dropout, init) self.layernorm1 = tf.keras.layers.LayerNormalization(epsilon=1e-6) self.layernorm2 = tf.keras.layers.LayerNormalization(epsilon=1e-6) self.layernorm3 = tf.keras.layers.LayerNormalization(epsilon=1e-6) self.layernorm_raw = tf.keras.layers.LayerNormalization(epsilon=1e-6) self.res_dropout1 = tf.keras.layers.Dropout(residual_dropout) self.res_dropout2 = tf.keras.layers.Dropout(residual_dropout) self.res_dropout3 = tf.keras.layers.Dropout(residual_dropout) self.attention_dropout = attention_dropout self.dense = tf.keras.layers.Dense( d_model, use_bias=False, kernel_initializer=mh.get_init(init))
def __init__(self, config, logger, class_n): # pylint: disable=too-many-statements super(SequenceRouter, self).__init__() init, kernel_size, self.stride = config.model_initializer, 3, 2 self.feat_dim = math.ceil(config.feat_dim / (self.stride * config.model_conv_layer_num)) self.nfilt = config.model_conv_filter_num self.enc_num = config.model_encoder_num self.lpad = config.model_caps_window_lpad self.rpad = config.model_caps_window_rpad self.window = self.lpad + self.rpad + 1 self.is_context = is_context = config.model_caps_context self.ph = config.model_caps_primary_num caps_inp_in_n = self.ph * self.window self.pd = config.model_caps_primary_dim self.caps_cov_n = caps_cov_n = config.model_caps_convolution_num caps_cov_in_n = caps_cov_n * self.window self.caps_cov_d = caps_cov_d = config.model_caps_convolution_dim self.caps_cls_n = caps_cls_n = class_n self.caps_cls_d = caps_cls_d = config.model_caps_class_dim self.iter = config.model_caps_iter # Capsulation w/ bottleneck projection layers. self.conv = CapsulationLayer(config.model_conv_layer_num, self.nfilt, kernel_size, self.stride, init) self.proj_pe = tf.keras.layers.Dense( self.ph, activation='linear', kernel_initializer=mh.get_init(init), name="flatten") self.mask = tf.keras.layers.Lambda(mh.feat_mask, name="pad_mask") self.ecs = [ tf.keras.layers.Conv2D(filters=config.model_caps_primary_dim, kernel_size=3, activation='linear', padding='same', strides=1, kernel_initializer=mh.get_init(init), name="encaps%d" % (i + 1)) for i in range(2) ] self.ecd = [ tf.keras.layers.Dropout(rate=0.2, name="do_encaps%d" % (i + 1)) for i in range(2) ] self.inp_dropout = tf.keras.layers.Dropout( rate=config.train_inp_dropout, name="do_input") # Dynamic Routing variables shape = None if self.enc_num > 1: # in_n, out_n, out_dim, in_dim shape = [(caps_inp_in_n, caps_cov_n, caps_cov_d, self.pd)] for _ in range(1, self.enc_num - 1): shape.append( (caps_cov_in_n, caps_cov_n, caps_cov_d, caps_cov_d)) shape.append((caps_cov_in_n, caps_cls_n, caps_cls_d, caps_cov_d)) elif self.enc_num == 1: shape = [(caps_inp_in_n, caps_cls_n, caps_cls_d, self.pd)] self.wgt = [ tf.Variable(tf.random.normal(shape=s, stddev=0.1, dtype=tf.float32), trainable=True, name="W%d" % i) for i, s in enumerate(shape) ] self.bias = [ tf.Variable(tf.random.normal(shape=(1, 1, s[0], s[1], s[2]), stddev=0.1, dtype=tf.float32), trainable=True, name="b%d" % i) for i, s in enumerate(shape) ] self.ln_i = tf.keras.layers.LayerNormalization(name="ln_input") self.ln_m = [ tf.keras.layers.LayerNormalization(name="ln_mid%d" % (i + 1)) for i in range(self.enc_num) ] self.ln_o = tf.keras.layers.LayerNormalization(name="ln_output") self.mid_dropout = [ tf.keras.layers.Dropout(rate=config.train_inn_dropout, name="dropout_mid_%d" % i) for i in range(self.enc_num) ] self.mask_layer = tf.keras.layers.Masking(mask_value=0) logger.info( "Layer x %d, Iter x %d, Init %s, Win %d (l:%d, r:%d)" % (self.enc_num, self.iter, "CONTEXT" if is_context else "ZERO", self.window, self.lpad, self.rpad)) logger.info("Transformation matrix size") size = 0 for i, w in enumerate(self.wgt): logger.info("L=%d->%d" % (i, i + 1)) logger.info(tf.size(w)) size += tf.size(w) logger.info("Total: %d" % size)
def __init__(self, config, logger, class_n): super().__init__() init = config.model_initializer self.enc_num = enc_num = config.model_encoder_num self.nfilt_inp = config.model_conv_inp_nfilt self.nfilt_inn = config.model_conv_inn_nfilt self.proj_layers = config.model_conv_proj_num self.proj_dim = config.model_conv_proj_dim self.mask_layer = tf.keras.layers.Masking(mask_value=0.0) self.stride = config.model_conv_stride # filter : [time, frequency] self.mask = tf.keras.layers.Lambda(mh.feat_mask, name="pad_mask1") self.mask2 = tf.keras.layers.Lambda(mh.feat_mask2, name="pad_mask2") assert config.model_conv_layer_num < 4 # Maxout Conv layers kernel_size = 3 self.stride = stride = 2 self.cnn_n = cnn_n = config.model_conv_layer_num feat_dim = math.ceil(config.feat_dim / (stride**cnn_n)) nfilt = config.model_conv_filter_num self.cnn_fe = CapsulationLayer(cnn_n, nfilt, kernel_size, stride, init, name="conv_feat") self.enc_layers = [] for i in range(4): self.enc_layers.append( tf.keras.layers.Conv2D(filters=self.nfilt_inp, kernel_size=(5, 3), activation='linear', padding='same', strides=1, kernel_initializer=mh.get_init(init), name="inn_conv1_%d" % (i + 3), use_bias=False)) for i in range(4, enc_num - 1): self.enc_layers.append(tf.keras.layers.Conv2D(filters=self.nfilt_inn, kernel_size=(5, 3), activation='linear', padding='same', strides=1, kernel_initializer=\ mh.get_init(init), name="inn_conv1_%d"%(i+1), use_bias=False)) last_filt = (self.proj_dim // feat_dim) * 2 self.enc_layers.append(tf.keras.layers.Conv2D(filters=last_filt, kernel_size=(5, 3), activation='linear', padding='same', strides=1, kernel_initializer=\ mh.get_init(init), name="inn_conv1_%d"% (enc_num - 1), use_bias=False)) self.dropouts = \ [tf.keras.layers.Dropout(rate=0.2, name="inn_drop1_%d"%(i+1)) for i in range(enc_num)] self.dropouts_cnn = \ [tf.keras.layers.Dropout(rate=config.train_inn_dropout, name="inn_drop1_%d"%(i+1)) for i in range(enc_num)] self.layernorms = \ [tf.keras.layers.LayerNormalization(epsilon=1e-6, name="inn_ln_%d"%(i+1)) for i in range(enc_num)] """ Maxout Projection layers """ self.reshape_to_maxout = \ tf.keras.layers.Reshape((-1, feat_dim * (last_filt // 2)), name="reshape_to_ffwd") self.proj = [ tf.keras.layers.TimeDistributed( tf.keras.layers.Dense(self.proj_dim, kernel_initializer=mh.get_init(init), name="proj1_%d" % (i + 1), use_bias=False)) for i in range(self.proj_layers - 1) ] self.dropproj = [ tf.keras.layers.Dropout(rate=0.2, name="proj_drop1_%d" % (i + 1)) for i in range(self.proj_layers - 1) ] self.dropouts_proj = [ tf.keras.layers.Dropout(rate=config.train_inn_dropout, name="proj_drop1_%d" % (i + 1)) for i in range(self.proj_layers - 1) ] self.layernorms_proj = \ [tf.keras.layers.LayerNormalization(epsilon=1e-6, name="proj_ln_%d"%(i+1)) for i in range(self.proj_layers - 1)] self.input_dropout = tf.keras.layers.Dropout( rate=config.train_inp_dropout, name="inp_dropout") """ Maxout Last Projection layers """ self.projv = tf.keras.layers.TimeDistributed( tf.keras.layers.Dense(class_n * 2, kernel_initializer=mh.get_init(init), use_bias=False)) self.dropprojv = tf.keras.layers.Dropout(rate=config.train_inn_dropout) self.layernorms_projv = tf.keras.layers.LayerNormalization( epsilon=1e-6) #self.pool = tf.keras.layers.MaxPooling2D((1, 3)) logger.info( "CNN CTC model, please check config model_conv_*, " "last_filt:%d", last_filt)