예제 #1
0
def param_init_gru(options, params, prefix='gru', nin=None, dim=None):
    if nin is None:
        nin = options['dim_proj']
    if dim is None:
        dim = options['dim_proj']

    # embedding to gates transformation weights, biases
    W = numpy.concatenate(
        [norm_weight(nin, dim), norm_weight(nin, dim)], axis=1)
    params[pp(prefix, 'W')] = W
    params[pp(prefix, 'b')] = numpy.zeros((2 * dim, )).astype('float32')

    # recurrent transformation weights for gates
    U = numpy.concatenate([ortho_weight(dim), ortho_weight(dim)], axis=1)
    params[pp(prefix, 'U')] = U

    # embedding to hidden state proposal weights, biases
    Wx = norm_weight(nin, dim)
    params[pp(prefix, 'Wx')] = Wx
    params[pp(prefix, 'bx')] = numpy.zeros((dim, )).astype('float32')

    # recurrent transformation weights for hidden state proposal
    Ux = ortho_weight(dim)
    params[pp(prefix, 'Ux')] = Ux

    return params
예제 #2
0
    def __init__(self,
                 input_size,
                 state_size,
                 batch_size,
                 use_layer_norm=False,
                 nematus_compat=False,
                 dropout_input=None,
                 dropout_state=None):
        self.state_to_gates = tf.Variable(numpy.concatenate(
            [ortho_weight(state_size),
             ortho_weight(state_size)], axis=1),
                                          name='state_to_gates')
        self.input_to_gates = tf.Variable(numpy.concatenate([
            norm_weight(input_size, state_size),
            norm_weight(input_size, state_size)
        ],
                                                            axis=1),
                                          name='input_to_gates')
        self.gates_bias = tf.Variable(numpy.zeros(
            (2 * state_size, )).astype('float32'),
                                      name='gates_bias')

        self.state_to_proposal = tf.Variable(ortho_weight(state_size),
                                             name='state_to_proposal')
        self.input_to_proposal = tf.Variable(norm_weight(
            input_size, state_size),
                                             name='input_to_proposal')
        self.proposal_bias = tf.Variable(numpy.zeros(
            (state_size, )).astype('float32'),
                                         name='proposal_bias')
        self.nematus_compat = nematus_compat
        self.use_layer_norm = use_layer_norm

        if self.use_layer_norm:
            with tf.name_scope('gates_x_norm'):
                self.gates_x_norm = LayerNormLayer(2 * state_size)
            with tf.name_scope('gates_state_norm'):
                self.gates_state_norm = LayerNormLayer(2 * state_size)
            with tf.name_scope('proposal_x_norm'):
                self.proposal_x_norm = LayerNormLayer(state_size)
            with tf.name_scope('proposal_state_norm'):
                self.proposal_state_norm = LayerNormLayer(state_size)

        # Create dropout masks for input values (reused at every timestep).
        if dropout_input == None:
            self.dropout_mask_input_to_gates = None
            self.dropout_mask_input_to_proposal = None
        else:
            ones = tf.ones([batch_size, input_size])
            self.dropout_mask_input_to_gates = dropout_input(ones)
            self.dropout_mask_input_to_proposal = dropout_input(ones)

        # Create dropout masks for state values (reused at every timestep).
        if dropout_state == None:
            self.dropout_mask_state_to_gates = None
            self.dropout_mask_state_to_proposal = None
        else:
            ones = tf.ones([batch_size, state_size])
            self.dropout_mask_state_to_gates = dropout_state(ones)
            self.dropout_mask_state_to_proposal = dropout_state(ones)
예제 #3
0
    def __init__(self,
                 input_size,
                 state_size,
                 batch_size,
                 use_layer_norm=False,
                 legacy_bias_type=LegacyBiasType.NEMATUS_COMPAT_FALSE,
                 dropout_input=None,
                 dropout_state=None):
        init = tf.concat([
            initializers.ortho_weight(state_size),
            initializers.ortho_weight(state_size)
        ],
                         axis=1)
        self.state_to_gates = tf.get_variable('state_to_gates',
                                              initializer=init)
        if input_size > 0:
            init = tf.concat([
                initializers.norm_weight(input_size, state_size),
                initializers.norm_weight(input_size, state_size)
            ],
                             axis=1)
            self.input_to_gates = tf.get_variable('input_to_gates',
                                                  initializer=init)

        if input_size == 0 and legacy_bias_type == LegacyBiasType.NEMATUS_COMPAT_FALSE:
            self.gates_bias = None
        else:
            self.gates_bias = tf.get_variable('gates_bias', [2 * state_size],
                                              initializer=tf.zeros_initializer)

        init = initializers.ortho_weight(state_size)
        self.state_to_proposal = tf.get_variable('state_to_proposal',
                                                 initializer=init)
        if input_size > 0:
            init = initializers.norm_weight(input_size, state_size)
            self.input_to_proposal = tf.get_variable('input_to_proposal',
                                                     initializer=init)

        if input_size == 0 and legacy_bias_type == LegacyBiasType.NEMATUS_COMPAT_FALSE:
            self.proposal_bias = None
        else:
            self.proposal_bias = tf.get_variable(
                'proposal_bias', [state_size],
                initializer=tf.zeros_initializer)

        self.legacy_bias_type = legacy_bias_type
        self.use_layer_norm = use_layer_norm

        self.gates_state_norm = None
        self.proposal_state_norm = None
        self.gates_x_norm = None
        self.proposal_x_norm = None
        if self.use_layer_norm:
            with tf.variable_scope('gates_state_norm'):
                self.gates_state_norm = LayerNormLayer(2 * state_size)
            with tf.variable_scope('proposal_state_norm'):
                self.proposal_state_norm = LayerNormLayer(state_size)
            if input_size > 0:
                with tf.variable_scope('gates_x_norm'):
                    self.gates_x_norm = LayerNormLayer(2 * state_size)
                with tf.variable_scope('proposal_x_norm'):
                    self.proposal_x_norm = LayerNormLayer(state_size)

        # Create dropout masks for input values (reused at every timestep).
        if dropout_input == None:
            self.dropout_mask_input_to_gates = None
            self.dropout_mask_input_to_proposal = None
        else:
            ones = tf.ones([batch_size, input_size])
            self.dropout_mask_input_to_gates = dropout_input(ones)
            self.dropout_mask_input_to_proposal = dropout_input(ones)

        # Create dropout masks for state values (reused at every timestep).
        if dropout_state == None:
            self.dropout_mask_state_to_gates = None
            self.dropout_mask_state_to_proposal = None
        else:
            ones = tf.ones([batch_size, state_size])
            self.dropout_mask_state_to_gates = dropout_state(ones)
            self.dropout_mask_state_to_proposal = dropout_state(ones)
예제 #4
0
파일: layers.py 프로젝트: rsennrich/nematus
    def __init__(self, 
                 input_size, 
                 state_size,
                 batch_size,
                 use_layer_norm=False,
                 legacy_bias_type=LegacyBiasType.NEMATUS_COMPAT_FALSE,
                 dropout_input=None,
                 dropout_state=None):
        init = tf.concat([initializers.ortho_weight(state_size),
                          initializers.ortho_weight(state_size)],
                         axis=1)
        self.state_to_gates = tf.get_variable('state_to_gates',
                                              initializer=init)
        if input_size > 0:
            init = tf.concat([initializers.norm_weight(input_size, state_size),
                              initializers.norm_weight(input_size, state_size)],
                             axis=1)
            self.input_to_gates = tf.get_variable('input_to_gates',
                                                  initializer=init)

        if input_size > 0 or legacy_bias_type == LegacyBiasType.THEANO_A:
            self.gates_bias = tf.get_variable('gates_bias', [2*state_size],
                                          initializer=tf.zeros_initializer)
        else:
            self.gates_bias = None

        init = initializers.ortho_weight(state_size)
        self.state_to_proposal = tf.get_variable('state_to_proposal',
                                                 initializer=init)
        if input_size > 0:
            init = initializers.norm_weight(input_size, state_size)
            self.input_to_proposal = tf.get_variable('input_to_proposal',
                                                     initializer=init)

        if input_size > 0 or legacy_bias_type == LegacyBiasType.THEANO_A:
            self.proposal_bias = tf.get_variable('proposal_bias', [state_size],
                                             initializer=tf.zeros_initializer)
        else:
            self.proposal_bias = None

        self.legacy_bias_type = legacy_bias_type
        self.use_layer_norm = use_layer_norm

        self.gates_state_norm = None
        self.proposal_state_norm = None
        self.gates_x_norm = None
        self.proposal_x_norm = None
        if self.use_layer_norm:
            with tf.variable_scope('gates_state_norm'):
                self.gates_state_norm = LayerNormLayer(2*state_size)
            with tf.variable_scope('proposal_state_norm'):
                self.proposal_state_norm = LayerNormLayer(state_size)
            if input_size > 0:
                with tf.variable_scope('gates_x_norm'):
                    self.gates_x_norm = LayerNormLayer(2*state_size)
                with tf.variable_scope('proposal_x_norm'):
                    self.proposal_x_norm = LayerNormLayer(state_size)

        # Create dropout masks for input values (reused at every timestep).
        if dropout_input == None:
            self.dropout_mask_input_to_gates = None
            self.dropout_mask_input_to_proposal = None
        else:
            ones = tf.ones([batch_size, input_size])
            self.dropout_mask_input_to_gates = dropout_input(ones)
            self.dropout_mask_input_to_proposal = dropout_input(ones)

        # Create dropout masks for state values (reused at every timestep).
        if dropout_state == None:
            self.dropout_mask_state_to_gates = None
            self.dropout_mask_state_to_proposal = None
        else:
            ones = tf.ones([batch_size, state_size])
            self.dropout_mask_state_to_gates = dropout_state(ones)
            self.dropout_mask_state_to_proposal = dropout_state(ones)
예제 #5
0
def param_init_gru_cond(options,
                        params,
                        prefix='gru_cond',
                        nin=None,
                        dim=None,
                        dimctx=None,
                        nin_nonlin=None,
                        dim_nonlin=None):
    if nin is None:
        nin = options['dim']
    if dim is None:
        dim = options['dim']
    if dimctx is None:
        dimctx = options['dim']
    if nin_nonlin is None:
        nin_nonlin = nin
    if dim_nonlin is None:
        dim_nonlin = dim

    W = numpy.concatenate(
        [norm_weight(nin, dim), norm_weight(nin, dim)], axis=1)
    params[pp(prefix, 'W')] = W
    params[pp(prefix, 'b')] = numpy.zeros((2 * dim, )).astype('float32')
    U = numpy.concatenate([ortho_weight(dim_nonlin),
                           ortho_weight(dim_nonlin)],
                          axis=1)
    params[pp(prefix, 'U')] = U

    Wx = norm_weight(nin_nonlin, dim_nonlin)
    params[pp(prefix, 'Wx')] = Wx
    Ux = ortho_weight(dim_nonlin)
    params[pp(prefix, 'Ux')] = Ux
    params[pp(prefix, 'bx')] = numpy.zeros((dim_nonlin, )).astype('float32')

    U_nl = numpy.concatenate(
        [ortho_weight(dim_nonlin),
         ortho_weight(dim_nonlin)], axis=1)
    params[pp(prefix, 'U_nl')] = U_nl
    params[pp(prefix, 'b_nl')] = numpy.zeros(
        (2 * dim_nonlin, )).astype('float32')

    Ux_nl = ortho_weight(dim_nonlin)
    params[pp(prefix, 'Ux_nl')] = Ux_nl
    params[pp(prefix, 'bx_nl')] = numpy.zeros((dim_nonlin, )).astype('float32')

    # context to LSTM
    Wc = norm_weight(dimctx, dim * 2)
    params[pp(prefix, 'Wc')] = Wc

    Wcx = norm_weight(dimctx, dim)
    params[pp(prefix, 'Wcx')] = Wcx

    # attention: combined -> hidden
    W_comb_att = norm_weight(dim, dimctx)
    params[pp(prefix, 'W_comb_att')] = W_comb_att

    # attention: context -> hidden
    Wc_att = norm_weight(dimctx)
    params[pp(prefix, 'Wc_att')] = Wc_att

    # attention: hidden bias
    b_att = numpy.zeros((dimctx, )).astype('float32')
    params[pp(prefix, 'b_att')] = b_att

    # attention:
    U_att = norm_weight(dimctx, 1)
    params[pp(prefix, 'U_att')] = U_att
    c_att = numpy.zeros((1, )).astype('float32')
    params[pp(prefix, 'c_tt')] = c_att

    return params