Beispiel #1
0
def _gru_layer(h_prev, x, name='gru', x_dim=256, y_dim=512, reuse=None):
    """
    Used for both encoder layers
    """

    with tf.variable_scope(name):

        # Reset gate
        with tf.variable_scope('reset_gate', reuse=reuse):
            Wi_r = tf.get_variable(
                name='weight_input',
                shape=(x_dim, y_dim),
                initializer=tf.random_normal_initializer(stddev=0.01))
            Wh_r = tf.get_variable(
                name='weight_hidden',
                shape=(y_dim, y_dim),
                initializer=initializer.orthogonal_initializer(0.01))
            b_r = tf.get_variable(name='bias',
                                  shape=(y_dim, ),
                                  initializer=tf.constant_initializer(0.0))
            r = tf.sigmoid(tf.matmul(x, Wi_r) + tf.matmul(h_prev, Wh_r) + b_r)

        # Update gate
        with tf.variable_scope('update_gate', reuse=reuse):
            Wi_z = tf.get_variable(
                name='weight_input',
                shape=(x_dim, y_dim),
                initializer=tf.random_normal_initializer(stddev=0.01))
            Wh_z = tf.get_variable(
                name='weight_hidden',
                shape=(y_dim, y_dim),
                initializer=initializer.orthogonal_initializer(0.01))
            b_z = tf.get_variable(name='bias',
                                  shape=(y_dim, ),
                                  initializer=tf.constant_initializer(0.0))
            z = tf.sigmoid(tf.matmul(x, Wi_z) + tf.matmul(h_prev, Wh_z) + b_z)

        # Candidate update
        with tf.variable_scope('candidate_update', reuse=reuse):
            Wi_h_tilde = tf.get_variable(
                name='weight_input',
                shape=(x_dim, y_dim),
                initializer=tf.random_normal_initializer(stddev=0.01))
            Wh_h_tilde = tf.get_variable(
                name='weight_hidden',
                shape=(y_dim, y_dim),
                initializer=initializer.orthogonal_initializer(0.01))
            b_h_tilde = tf.get_variable(
                name='bias',
                shape=(y_dim, ),
                initializer=tf.constant_initializer(0.0))
            h_tilde = tf.tanh(
                tf.matmul(x, Wi_h_tilde) + tf.matmul(r * h_prev, Wh_h_tilde) +
                b_h_tilde)

        # Final update
        h = tf.subtract(np.float32(1.0), z) * h_prev + z * h_tilde

    return h
Beispiel #2
0
def _rnn_layer(h_prev, x, name='rnn', x_dim=256, y_dim=512, reuse=None):
    """
    Used for both encoder layers,
    this was used for debug purposes
    """

    with tf.variable_scope(name, reuse=reuse):

        Wi = tf.get_variable(
            name='weight_input',
            shape=(x_dim, y_dim),
            initializer=tf.random_normal_initializer(stddev=0.01))
        Wh = tf.get_variable(
            name='weight_hidden',
            shape=(y_dim, y_dim),
            initializer=initializer.orthogonal_initializer(0.01))
        # b = tf.get_variable(name='bias', shape=(y_dim,), initializer=tf.constant_initializer(0.1))
        b = tf.get_variable(name='bias',
                            shape=(y_dim, ),
                            initializer=tf.truncated_normal_initializer(
                                mean=0.0, stddev=1.0))

        h = tf.tanh(tf.matmul(x, Wi) + tf.matmul(h_prev, Wh) + b)

    return h
def _gru_layer_with_state_bias(h_prev, x, state, name='gru', x_dim=256, y_dim=1024, s_dim=512, reuse=None):
    """
    Used for decoder. In this GRU the state of the session encoder layer is used when
    computing the decoder updates.
    This is used for "full" state bias in the decoder which we did not use in the end.
    """

    with tf.variable_scope(name):

        # Reset gate
        with tf.variable_scope('reset_gate', reuse=reuse):
            Wi_r = tf.get_variable(name='weight_input', shape=(x_dim, y_dim), initializer=tf.random_normal_initializer(stddev=0.01))
            Wh_r = tf.get_variable(name='weight_hidden', shape=(y_dim, y_dim), initializer=initializer.orthogonal_initializer(0.01))
            Ws_r = tf.get_variable(name='weight_state', shape=(s_dim, y_dim), initializer=tf.random_normal_initializer(stddev=0.01))
            b_r = tf.get_variable(name='bias', shape=(y_dim,), initializer=tf.constant_initializer(0.0))
            r = tf.sigmoid(tf.matmul(x, Wi_r) + tf.matmul(h_prev, Wh_r) + tf.matmul(state, Ws_r) + b_r)

        # Update gate
        with tf.variable_scope('update_gate', reuse=reuse):
            Wi_z = tf.get_variable(name='weight_input', shape=(x_dim, y_dim), initializer=tf.random_normal_initializer(stddev=0.01))
            Wh_z = tf.get_variable(name='weight_hidden', shape=(y_dim, y_dim), initializer=initializer.orthogonal_initializer(0.01))
            Ws_z = tf.get_variable(name='weight_state', shape=(s_dim, y_dim), initializer=tf.random_normal_initializer(stddev=0.01))
            b_z = tf.get_variable(name='bias', shape=(y_dim,), initializer=tf.constant_initializer(0.0))
            z = tf.sigmoid(tf.matmul(x, Wi_z) + tf.matmul(h_prev, Wh_z) + tf.matmul(state, Ws_z) + b_z)

        # Candidate update
        with tf.variable_scope('candidate_update', reuse=reuse):
            Wi_h_tilde = tf.get_variable(name='weight_input', shape=(x_dim, y_dim), initializer=tf.random_normal_initializer(stddev=0.01))
            Wh_h_tilde = tf.get_variable(name='weight_hidden', shape=(y_dim, y_dim), initializer=initializer.orthogonal_initializer(0.01))
            Ws_h_tilde = tf.get_variable(name='weight_state', shape=(s_dim, y_dim), initializer=tf.random_normal_initializer(stddev=0.01))
            b_h_tilde = tf.get_variable(name='bias', shape=(y_dim,), initializer=tf.constant_initializer(0.0))
            h_tilde = tf.tanh(tf.matmul(x, Wi_h_tilde) + \
                      tf.matmul(r * h_prev, Wh_h_tilde) + \
                      tf.matmul(state, Ws_h_tilde) + \
                      b_h_tilde)

        # Final update
        h = tf.sub(np.float32(1.0), z) * h_prev + z * h_tilde

    return h