Ejemplo n.º 1
0
def decode2(inputs, is_training=True, scope="decoder2", reuse=None):
    '''
    Args:
      inputs: A 3d tensor with shape of [N, T', C'], where C'=hp.n_mels*hp.r, 
        dtype of float32. Log magnitude spectrogram of sound files.
      is_training: Whether or not the layer is in training mode.  
      scope: Optional scope for `variable_scope`
      reuse: Boolean, whether to reuse the weights of a previous layer
        by the same name.
        
    Returns
      Predicted magnitude spectrogram tensor with shape of [N, T', C''], 
        where C'' = (1+hp.n_fft//2)*hp.r.
    '''
    with tf.variable_scope(scope, reuse=reuse):
        # Decoder pre-net
        prenet_out = mod.prenet(inputs,
                                is_training=is_training)  # (N, T'', E/2)

        # Decoder Post-processing net = CBHG
        ## Conv1D bank
        dec = mod.conv1d_banks(prenet_out,
                               K=hp.decoder_num_banks,
                               is_training=is_training)  # (N, T', E*K/2)

        ## Max pooling
        dec = tf.layers.max_pooling1d(dec, 2, 1,
                                      padding="same")  # (N, T', E*K/2)

        ## Conv1D projections
        dec = mod.conv1d(dec, hp.embed_size, 3, scope="conv1d_1")  # (N, T', E)
        dec = mod.normalize(dec,
                            type=hp.norm_type,
                            is_training=is_training,
                            activation_fn=tf.nn.relu,
                            scope="norm1")
        dec = mod.conv1d(dec, hp.embed_size // 2, 3,
                         scope="conv1d_2")  # (N, T', E/2)
        dec = mod.normalize(dec,
                            type=hp.norm_type,
                            is_training=is_training,
                            activation_fn=None,
                            scope="norm2")
        dec += prenet_out

        ## Highway Nets
        for i in range(4):
            dec = mod.highwaynet(
                dec,
                num_units=hp.embed_size // 2,
                scope='highwaynet_{}'.format(i))  # (N, T, E/2)

        ## Bidirectional GRU
        dec = mod.gru(dec, hp.embed_size // 2, True)  # (N, T', E)

        # Outputs => (N, T', (1+hp.n_fft//2)*hp.r)
        out_dim = (1 + hp.n_fft // 2) * hp.r
        outputs = tf.layers.dense(dec, out_dim)

    return outputs
Ejemplo n.º 2
0
    def __init__(self, sess, n_mid, embedding_dim, hidden_size, batch_size, num_interest, dropout_rate=0.2,
                 seq_len=256, num_blocks=2):
        super(Model_SAKmeans, self).__init__(n_mid, embedding_dim, hidden_size,
                                                   batch_size, seq_len, flag="Model_SAKmeans")

        with tf.variable_scope("Model_SAKmeans", reuse=tf.AUTO_REUSE) as scope:

            # Positional Encoding
            t = tf.expand_dims(positional_encoding(embedding_dim, seq_len), axis=0)
            self.mid_his_batch_embedded += t

            # Dropout
            self.seq = tf.layers.dropout(self.mid_his_batch_embedded,
                                         rate=dropout_rate,
                                         training=tf.convert_to_tensor(True))
            self.seq *= tf.reshape(self.mask, (-1, seq_len, 1))

            # Build blocks
            for i in range(num_blocks):
                with tf.variable_scope("num_blocks_%d" % i):

                    # Self-attention
                    self.seq = multihead_attention(queries=normalize(self.seq),
                                                   keys=self.seq,
                                                   num_units=hidden_size,
                                                   num_heads=num_interest,
                                                   dropout_rate=dropout_rate,
                                                   is_training=True,
                                                   causality=True,
                                                   scope="self_attention")

                    # Feed forward
                    self.seq = feedforward(normalize(self.seq), num_units=[hidden_size, hidden_size],
                                           dropout_rate=dropout_rate, is_training=True)
                    self.seq *= tf.reshape(self.mask, (-1, seq_len, 1))
            # (b, seq_len, dim)
            self.seq = normalize(self.seq)

            num_heads = num_interest
            self.user_eb = getKVector(sess, self.seq, num_heads)
            self.dim = embedding_dim
            item_list_emb = tf.reshape(self.seq, [-1, seq_len, embedding_dim])

            # item_list_emb = [-1, seq_len, embedding_dim]
            # atten: (batch, num_heads, dim) * (batch, dim, 1) = (batch, num_heads, 1)
            atten = tf.matmul(self.user_eb, tf.reshape(self.item_eb, [get_shape(item_list_emb)[0], self.dim, 1]))
            atten = tf.nn.softmax(tf.pow(tf.reshape(atten, [get_shape(item_list_emb)[0], num_heads]), 1))

            # 找出与target item最相似的用户兴趣向量
            readout = tf.gather(tf.reshape(self.user_eb, [-1, self.dim]),
                                tf.argmax(atten, axis=1, output_type=tf.int32) + tf.range(
                                    tf.shape(item_list_emb)[0]) * num_heads)

            self.build_sampled_softmax_loss(self.item_eb, readout)
Ejemplo n.º 3
0
    def embedding(self, x, is_training=False):
        """
        :param x: shape=(n, t, n_mels)
        :return: embedding. shape=(n, e)
        """
        # frame-level embedding
        x = tf.layers.dense(x, units=self.hidden_units, activation=tf.nn.relu)  # (n, t, h)

        out = conv1d_banks(x, K=self.num_banks, num_units=self.hidden_units, norm_type=self.norm_type,
                           is_training=is_training)  # (n, t, k * h)

        out = tf.layers.max_pooling1d(out, 2, 1, padding="same")  # (n, t, k * h)

        out = conv1d(out, self.hidden_units, 3, scope="conv1d_1")  # (n, t, h)
        out = normalize(out, type=self.norm_type, is_training=is_training, activation_fn=tf.nn.relu)
        out = conv1d(out, self.hidden_units, 3, scope="conv1d_2")  # (n, t, h)
        out += x  # (n, t, h) # residual connections

        for i in range(self.num_highway):
            out = highwaynet(out, num_units=self.hidden_units, scope='highwaynet_{}'.format(i))  # (n, t, h)

        out = gru(out, self.hidden_units, False)  # (n, t, h)

        # take the last output
        out = out[..., -1]  # (n, h)

        # embedding
        out = tf.layers.dense(out, self.num_classes, name='projection')  # (n, e)
        out = tf.identity(out, name="embedding")

        return out
Ejemplo n.º 4
0
    def __init__(self, n_mid, embedding_dim, hidden_size, batch_size, num_interest, dropout_rate=0.2,
                 seq_len=256, num_blocks=2):
        super(Model_SASRec, self).__init__(n_mid, embedding_dim, hidden_size,
                                                   batch_size, seq_len, flag="Model_SASRec")

        with tf.variable_scope("Model_SASRec", reuse=tf.AUTO_REUSE) as scope:

            # Positional Encoding
            t = tf.expand_dims(positional_encoding(embedding_dim, seq_len), axis=0)
            self.mid_his_batch_embedded += t

            # Dropout
            self.seq = tf.layers.dropout(self.mid_his_batch_embedded,
                                         rate=dropout_rate,
                                         training=tf.convert_to_tensor(True))
            self.seq *= tf.reshape(self.mask, (-1, seq_len, 1))

            # Build blocks

            for i in range(num_blocks):
                with tf.variable_scope("num_blocks_%d" % i):

                    # Self-attention
                    self.seq = multihead_attention(queries=normalize(self.seq),
                                                   keys=self.seq,
                                                   num_units=hidden_size,
                                                   num_heads=num_interest,
                                                   dropout_rate=dropout_rate,
                                                   is_training=True,
                                                   causality=True,
                                                   scope="self_attention")

                    # Feed forward
                    self.seq = feedforward(normalize(self.seq), num_units=[hidden_size, hidden_size],
                                           dropout_rate=dropout_rate, is_training=True)
                    self.seq *= tf.reshape(self.mask, (-1, seq_len, 1))
            # (b, seq_len, dim)
            self.seq = normalize(self.seq)
            self.sum_pooling = tf.reduce_sum(self.seq, 1)
            fc1 = tf.layers.dense(self.sum_pooling, 1024, activation=tf.nn.relu)
            fc2 = tf.layers.dense(fc1, 512, activation=tf.nn.relu)
            fc3 = tf.layers.dense(fc2, 256, activation=tf.nn.relu)
            self.user_eb = tf.layers.dense(fc3, hidden_size, activation=tf.nn.relu)
            self.build_sampled_softmax_loss(self.item_eb, self.user_eb)
    def network(self, ppgs, is_training):
        # Pre-net
        prenet_out = prenet(
            ppgs,
            num_units=[hp.train2.hidden_units, hp.train2.hidden_units // 2],
            dropout_rate=hp.train2.dropout_rate,
            is_training=is_training)  # (N, T, E/2)

        # CBHG1: mel-scale
        # pred_mel = cbhg(prenet_out, hp.train2.num_banks, hp.train2.hidden_units // 2,
        #                 hp.train2.num_highway_blocks, hp.train2.norm_type, is_training,
        #                 scope="cbhg_mel")
        # pred_mel = tf.layers.dense(pred_mel, self.y_mel.shape[-1])  # (N, T, n_mels)
        pred_mel = prenet_out

        # CBHG2: linear-scale
        out = tf.layers.dense(pred_mel,
                              hp.train2.hidden_units // 2)  # (N, T, n_mels)
        out = cbhg(out,
                   hp.train2.num_banks,
                   hp.train2.hidden_units // 2,
                   hp.train2.num_highway_blocks,
                   hp.train2.norm_type,
                   is_training,
                   scope="cbhg_linear")

        _, n_timesteps, n_bins = self.y_spec.get_shape().as_list()
        n_units = n_bins * hp.train2.n_mixtures
        out = tf.layers.dense(out,
                              n_units * 3,
                              bias_initializer=tf.random_uniform_initializer(
                                  minval=-3., maxval=3.))

        mu = tf.nn.sigmoid(out[..., :n_units])
        mu = tf.reshape(
            mu,
            shape=(-1, n_timesteps, n_bins,
                   hp.train2.n_mixtures))  # (N, T, 1+hp.n_fft//2, n_mixtures)

        log_var = tf.maximum(out[..., n_units:2 * n_units], -7.0)
        log_var = tf.reshape(
            log_var,
            shape=(-1, n_timesteps, n_bins,
                   hp.train2.n_mixtures))  # (N, T, 1+hp.n_fft//2, n_mixtures)

        log_pi = tf.reshape(
            out[..., 2 * n_units:3 * n_units],
            shape=(-1, n_timesteps, n_bins,
                   hp.train2.n_mixtures))  # (N, T, 1+hp.n_fft//2, n_mixtures)
        log_pi = normalize(log_pi,
                           type='ins',
                           is_training=get_current_tower_context().is_training,
                           scope='normalize_pi')
        log_pi = tf.nn.log_softmax(log_pi)

        return mu, log_var, log_pi
Ejemplo n.º 6
0
    def _upsample_cond(self, melspec, is_training, strides):
        assert (np.prod(np.array(strides)) == hp.signal.hop_length)

        # option1) Upsample melspec to fit to shape of waveform. (n, t_mel, n_mel) => (n, t, h)
        if hp.model.cond_upsample_method == 'transposed_conv':
            cond = tf.expand_dims(melspec, 1)
            length = self.t_mel
            input_channels = hp.signal.n_mels
            for i, stride in enumerate(strides):
                w = tf.get_variable('transposed_conv_{}_weights'.format(i),
                                    shape=(1, stride,
                                           hp.model.condition_channels,
                                           input_channels))
                input_channels = hp.model.condition_channels
                length *= stride
                cond = tf.nn.conv2d_transpose(
                    cond,
                    w,
                    output_shape=(self.batch_size, 1, length,
                                  hp.model.condition_channels),
                    strides=[1, 1, stride, 1])
                cond = tf.nn.relu(cond)
                cond = normalize(cond,
                                 method=hp.model.normalize_cond,
                                 is_training=is_training,
                                 name='normalize_transposed_conv_{}'.format(i))
            cond = tf.squeeze(cond, 1)
            cond = cond[:, hp.signal.hop_length // 2:-hp.signal.hop_length //
                        2, :]  # (n, t, h)

        # option2) just copy value and expand dim of time step
        elif hp.model.cond_upsample_method == 'repeat':
            w = tf.get_variable(
                'dense', [1, hp.signal.n_mels, hp.model.condition_channels])
            cond = tf.nn.conv1d(melspec, w, stride=1, padding="SAME")
            cond = tf.nn.relu(cond)
            cond = tf.reshape(tf.tile(cond, [1, 1, hp.signal.hop_length]),
                              shape=[
                                  -1, self.t_mel * hp.signal.hop_length,
                                  hp.model.condition_channels
                              ])
            cond = cond[:, hp.signal.hop_length // 2:-hp.signal.hop_length //
                        2, :]
        else:
            cond = None
        return cond
Ejemplo n.º 7
0
    def __call__(self,
                 wav,
                 melspec,
                 is_training,
                 name='iaf_vocoder'):  # network
        with tf.variable_scope(name, reuse=tf.AUTO_REUSE):
            with tf.variable_scope('cond'):
                condition = self._upsample_cond(melspec,
                                                is_training=is_training,
                                                strides=[4, 4, 5])  # (n, t, h)
                if hp.model.normalize_cond:
                    with tf.variable_scope('normalize'):
                        condition = normalize(condition,
                                              method=hp.model.normalize_cond,
                                              is_training=is_training)

            # Sample from logistic dist.
            logstic_dist = tf.contrib.distributions.Logistic(loc=0., scale=1.)
            input = logstic_dist.sample([self.batch_size, self.length, 1])
            for i in range(hp.model.n_iaf):
                with tf.variable_scope('iaf{}'.format(i)):
                    scaler = WaveNet(
                        batch_size=self.batch_size,
                        dilations=hp.model.dilations[i],
                        filter_width=hp.model.filter_width,
                        residual_channels=hp.model.residual_channels,
                        dilation_channels=hp.model.dilation_channels,
                        quantization_channels=1,
                        skip_channels=hp.model.skip_channels,
                        use_biases=hp.model.use_biases,
                        condition_channels=hp.model.condition_channels,
                        use_skip_connection=hp.model.use_skip_connection,
                        is_training=is_training,
                        name='scalar',
                        normalize=hp.model.normalize_wavenet,
                    )
                    shifter = WaveNet(
                        batch_size=self.batch_size,
                        dilations=hp.model.dilations[i],
                        filter_width=hp.model.filter_width,
                        residual_channels=hp.model.residual_channels,
                        dilation_channels=hp.model.dilation_channels,
                        quantization_channels=1,
                        skip_channels=hp.model.skip_channels,
                        use_biases=hp.model.use_biases,
                        condition_channels=hp.model.condition_channels,
                        use_skip_connection=hp.model.use_skip_connection,
                        is_training=is_training,
                        name='shifter',
                        normalize=hp.model.normalize_wavenet,
                    )
                    iaf = LinearIAFLayer(batch_size=hp.train.batch_size,
                                         scaler=scaler,
                                         shifter=shifter)
                    input = iaf(input, condition if hp.model.condition_all_iaf
                                or i is 0 else None)  # (n, t, h)

                # normalization
                input = normalize(input,
                                  method=hp.model.normalize,
                                  is_training=is_training,
                                  name='normalize{}'.format(i))

        if hp.train.use_ema:
            self.ema = tf.train.ExponentialMovingAverage(
                decay=hp.train.ema_decay)
            var_class = tf.trainable_variables('iaf_vocoder')
            ema_op = self.ema.apply(var_class)
            tf.add_to_collection(tf.GraphKeys.UPDATE_OPS, ema_op)

        return input
Ejemplo n.º 8
0
    def init_inference(self, config, is_training=False):
        num_banks = config['num_banks']
        hidden_units = config['hidden_units']
        num_highway = config['num_highway']
        norm_type = config['norm_type']
        batch_size = config['batch_size']
        num_rnn_layer = config['num_rnn_layer']
        self._input_dim = input_dim = config['input_dim']
        self._output_dim = output_dim = config['alphabet_size']

        self._inputs = tf.placeholder(tf.float32,
                                      [batch_size, None, input_dim])
        self._seq_lens = tf.placeholder(tf.int32, shape=batch_size)
        self._out_lens = self._seq_lens

        # TODO, awni, for now on the client to remember to initialize these.
        self._mean = tf.get_variable("mean", shape=input_dim, trainable=False)
        self._std = tf.get_variable("std", shape=input_dim, trainable=False)

        std_inputs = (self._inputs - self._mean) / self._std

        x = conv1d(self._inputs, hidden_units, 1, scope="conv1d")

        out = conv1d_banks(x,
                           K=num_banks,
                           num_units=hidden_units,
                           norm_type=norm_type,
                           is_training=is_training)  # (n, t, k * h)

        out = tf.layers.max_pooling1d(out, 2, 1,
                                      padding="same")  # (n, t, k * h)

        out = conv1d(out, hidden_units, 3, scope="conv1d_1")  # (n, t, h)
        out = normalize(out,
                        type=norm_type,
                        is_training=is_training,
                        activation_fn=tf.nn.relu)
        out = conv1d(out, hidden_units, 3, scope="conv1d_2")  # (n, t, h)

        out += x  # (n, t, h) # residual connections

        for i in range(num_highway):
            out = highwaynet(out,
                             num_units=hidden_units,
                             scope='highwaynet_{}'.format(i))  # (n, t, h)

        rnn_out, state, initial_state = gru(
            out,
            hidden_units,
            False,
            seqlens=self._seq_lens,
            num_layers=num_rnn_layer,
            is_training=is_training)  # (n, t, h)

        self._initial_state = initial_state
        self._rnn_state = state
        rnn_out = tf.transpose(rnn_out, [1, 0, 2])

        # Collapse time and batch dims pre softmax.
        rnn_out = tf.reshape(rnn_out, (-1, hidden_units))
        logits, probas = _add_softmax_linear(
            rnn_out,
            hidden_units,
            output_dim,
            initializer=tf.contrib.layers.xavier_initializer())
        # Reshape to time-major.
        self._logits = tf.reshape(logits, (-1, batch_size, output_dim))
        self._probas = tf.reshape(probas, (-1, batch_size, output_dim))

        self._init_inference = True
Ejemplo n.º 9
0
    def __init__(self, n_mid, embedding_dim, hidden_size, batch_size, num_interest, dropout_rate=0.2,
                 seq_len=256, num_blocks=2):
        super(Model_MSARec, self).__init__(n_mid, embedding_dim, hidden_size,
                                                   batch_size, seq_len, flag="MSARec")

        with tf.variable_scope("MSARec", reuse=tf.AUTO_REUSE) as scope:

            # Positional Encoding
            t = tf.expand_dims(positional_encoding(embedding_dim, seq_len), axis=0)
            self.mid_his_batch_embedded += t

            # Dropout
            self.seq = tf.layers.dropout(self.mid_his_batch_embedded,
                                         rate=dropout_rate,
                                         training=tf.convert_to_tensor(True))
            self.seq *= tf.reshape(self.mask, (-1, seq_len, 1))

            # Build blocks
            for i in range(num_blocks):
                with tf.variable_scope("num_blocks_%d" % i):

                    # Self-attention
                    self.seq = multihead_attention(queries=normalize(self.seq),
                                                   keys=self.seq,
                                                   num_units=hidden_size,
                                                   num_heads=num_interest,
                                                   dropout_rate=dropout_rate,
                                                   is_training=True,
                                                   causality=True,
                                                   scope="self_attention")

                    # Feed forward
                    self.seq = feedforward(normalize(self.seq), num_units=[hidden_size, hidden_size],
                                           dropout_rate=dropout_rate, is_training=True)
                    self.seq *= tf.reshape(self.mask, (-1, seq_len, 1))
            # (b, seq_len, dim)
            self.seq = normalize(self.seq)

            self.dim = embedding_dim

            item_list_emb = tf.reshape(self.seq, [-1, seq_len, embedding_dim])
            # t = tf.expand_dims(positional_encoding(embedding_dim, seq_len), axis=0)
            # item_list_add_pos = item_list_emb + t

            num_heads = num_interest
            fc1 = tf.layers.dense(item_list_emb, hidden_size * 4, activation=tf.nn.relu)
            fc2 = tf.layers.dense(fc1, num_heads, activation=tf.nn.tanh)
            # (b, num_heads, sql_len)
            fc2 = tf.transpose(fc2, [0, 2, 1])
            interest_emb = tf.layers.dense(fc2, embedding_dim, activation=tf.nn.relu)

            # with tf.variable_scope("multi_interest", reuse=tf.AUTO_REUSE) as scope:
            #     # item_list_add_pos: (b, seq_len, embedding_dim)
            #     # item_hidden: (b, sql_len, hidden_size * 4)
            #     # item_hidden = tf.layers.dense(item_list_add_pos, hidden_size * 4, activation=tf.nn.tanh)
            #     item_hidden = tf.layers.dense(item_list_emb, hidden_size * 4, activation=tf.nn.tanh)
            #     # item_att_w: (b, sql_len, num_heads)
            #     item_att_w = tf.layers.dense(item_hidden, num_heads, activation=tf.nn.tanh)
            #     # item_att_w: (b, num_heads, sql_len)
            #     item_att_w = tf.transpose(item_att_w, [0, 2, 1])
            #
            #     # atten_mask: (b, num_heads, sql_len)
            #     atten_mask = tf.tile(tf.expand_dims(self.mask, axis=1), [1, num_heads, 1])
            #     paddings = tf.ones_like(atten_mask) * (-2 ** 32 + 1)
            #
            #     # 对于填充的位置赋值极小值
            #     item_att_w = tf.where(tf.equal(atten_mask, 0), paddings, item_att_w)
            #     item_att_w = tf.nn.softmax(item_att_w)
            #
            #     # item_att_w [batch, num_heads, seq_len]
            #     # item_list_emb [batch, seq_len, embedding_dim]
            #     # interest_emb (batch, num_heads, embedding_dim)
            #     interest_emb = tf.matmul(item_att_w, item_list_emb)

            self.user_eb = interest_emb

            # item_list_emb = [-1, seq_len, embedding_dim]
            # atten: (batch, num_heads, dim) * (batch, dim, 1) = (batch, num_heads, 1)
            atten = tf.matmul(self.user_eb, tf.reshape(self.item_eb, [get_shape(item_list_emb)[0], self.dim, 1]))
            atten = tf.nn.softmax(tf.pow(tf.reshape(atten, [get_shape(item_list_emb)[0], num_heads]), 1))

            # 找出与target item最相似的用户兴趣向量
            readout = tf.gather(tf.reshape(self.user_eb, [-1, self.dim]),
                                tf.argmax(atten, axis=1, output_type=tf.int32) + tf.range(
                                    tf.shape(item_list_emb)[0]) * num_heads)

            self.build_sampled_softmax_loss(self.item_eb, readout)
Ejemplo n.º 10
0
def encode(inputs, is_training=True, scope="encoder", reuse=None):
    ''' 
    Args:
      inputs: A 2d tensor with shape of [N, T], dtype of int32. N: batch_size  T: real length
      seqlens: A 1d tensor with shape of [N,], dtype of int32.
      masks: A 3d tensor with shape of [N, T, 1], dtype of float32.
      is_training: Whether or not the layer is in training mode.
      scope: Optional scope for `variable_scope`
      reuse: Boolean, whether to reuse the weights of a previous layer
        by the same name.
    
    Returns: E is the spectrogram filter N
      A collection of Hidden vectors, whose shape is (N, T, E). N seqs, each with T characters, and each of them encoded to E dimension latent representation
    '''
    with tf.variable_scope(scope, reuse=reuse):
        # Load vocabulary
        #char2idx, idx2char = load_vocab()

        # Character Embedding  N seqs
        #inputs = mod.embed(inputs, len(char2idx), hp.embed_size) # (N, T, E) shape=(32, ?, 256)
        # Encoder pre-net: dense(E)--dropout--dense(E/2)--dropout
        #ipdb.set_trace()
        inputs = mod.pre_spectro(inputs, is_training=is_training)  # (N, T, E)
        prenet_out = mod.prenet(inputs, is_training=is_training)  # (N, T, E/2)

        # Encoder CBHG
        ## Conv1D bank
        enc = mod.conv1d_banks(prenet_out,
                               K=hp.encoder_num_banks,
                               is_training=is_training)  # (N, T, K * E / 2)

        ### Max pooling
        enc = tf.layers.max_pooling1d(enc, 2, 1,
                                      padding="same")  # (N, T, K * E / 2)

        ### Conv1D projections
        enc = mod.conv1d(enc, hp.embed_size // 2, 3,
                         scope="conv1d_1")  # (N, T, E/2)
        enc = mod.normalize(enc,
                            type=hp.norm_type,
                            is_training=is_training,
                            activation_fn=tf.nn.relu,
                            scope="norm1")
        enc = mod.conv1d(enc, hp.embed_size // 2, 3,
                         scope="conv1d_2")  # (N, T, E/2)
        enc = mod.normalize(enc,
                            type=hp.norm_type,
                            is_training=is_training,
                            activation_fn=None,
                            scope="norm2")
        enc += prenet_out  # (N, T, E/2) # residual connections

        ### Highway Nets
        for i in range(hp.num_highwaynet_blocks):
            enc = mod.highwaynet(
                enc,
                num_units=hp.embed_size // 2,
                scope='highwaynet_{}'.format(i))  # (N, T, E/2)

        ### Bidirectional GRU---apply nonlineararity

        memory = mod.gru(
            enc, hp.embed_size // 2, False
        )  # (N, T, E)  what the network represent the input text input

    return memory