Ejemplo n.º 1
0
    def __init__(self, model, args):
        self.pc =  model.add_subcollection()
        self.args = args
        self.num_layers = args[0]
        self.num_input = args[1]
        self.num_hidden = args[2]
        self.num_attention = args[3]
        self.num_output = args[4]
        self.act = args[5]
        self.model = model
        self.num_embeddings = self.num_output
        self.loss_fn = dy.l2_norm
        # Lookup parameter to store the end of sentence symbol
        self.M = self.args[6]
        self.spec = (self.num_layers,self.num_input, self.num_hidden, self.num_attention, self.num_output, self.act)

        # Add the LSTMs
        self.fwd_lstm_builder = dy.CompactVanillaLSTMBuilder(self.num_layers, self.num_input, self.num_hidden, model) 
        self.bwd_lstm_builder = dy.CompactVanillaLSTMBuilder(self.num_layers, self.num_input, self.num_hidden, model)
        self.decoder_lstm_builder = dy.CompactVanillaLSTMBuilder(self.num_layers,self.num_hidden*2+self.num_embeddings, self.num_hidden, model)
        self.w_decoder = self.model.add_parameters((self.num_output, self.num_hidden))
        self.b_decoder = self.model.add_parameters((self.num_output))

        # Attention MLP parameters
        self.attention_w1 = self.model.add_parameters((self.num_attention, self.num_hidden*2))
        self.attention_w2 = self.model.add_parameters((self.num_attention, self.num_hidden*2*self.num_layers))
        self.attention_v = self.model.add_parameters((1, self.num_attention))
Ejemplo n.º 2
0
def build_model(input_vocabulary, output_vocabulary, input_dim, hidden_dim,
                layers):
    # define all model parameters
    # TODO: add logic for "smart" parameter allocation according to the user's chosen architecture
    print 'creating model...'

    model = dn.ParameterCollection()

    params = {}

    # input embeddings
    params['input_lookup'] = model.add_lookup_parameters(
        (len(input_vocabulary), input_dim))

    # init vector for input feeding
    params['init_lookup'] = model.add_lookup_parameters((1, 3 * hidden_dim))

    # output embeddings
    params['output_lookup'] = model.add_lookup_parameters(
        (len(output_vocabulary), input_dim))

    # used in softmax output
    params['readout'] = model.add_parameters(
        (len(output_vocabulary), 3 * hidden_dim))
    params['bias'] = model.add_parameters(len(output_vocabulary))

    # rnn's
    if bool(arguments['--compact']):
        params['encoder_frnn'] = dn.CompactVanillaLSTMBuilder(
            layers, input_dim, hidden_dim, model)
        params['encoder_rrnn'] = dn.CompactVanillaLSTMBuilder(
            layers, input_dim, hidden_dim, model)
    else:
        params['encoder_frnn'] = dn.LSTMBuilder(layers, input_dim, hidden_dim,
                                                model)
        params['encoder_rrnn'] = dn.LSTMBuilder(layers, input_dim, hidden_dim,
                                                model)

    # attention MLPs - Luong-style with extra v_a from Bahdanau

    # concatenation layer for h (hidden dim), c (2 * hidden_dim)
    params['w_c'] = model.add_parameters((3 * hidden_dim, 3 * hidden_dim))

    # concatenation layer for h_input (hidden_dim), h_output (hidden_dim)
    params['w_a'] = model.add_parameters((hidden_dim, hidden_dim))

    # concatenation layer for h (hidden dim), c (2 * hidden_dim)
    params['u_a'] = model.add_parameters((hidden_dim, 2 * hidden_dim))

    # concatenation layer for h_input (2 * hidden_dim), h_output (hidden_dim)
    params['v_a'] = model.add_parameters((1, hidden_dim))

    # 3 * HIDDEN_DIM + input_dim - gets the feedback output embedding, "input feeding" approach for attn
    params['decoder_rnn'] = dn.LSTMBuilder(layers, 3 * hidden_dim + input_dim,
                                           hidden_dim, model)

    print 'finished creating model'

    return model, params
Ejemplo n.º 3
0
    def __init__(self, config, model):
        self.char_emb_size = config.char_emb_size
        self.char2idx = config.char2idx
        self.chars = config.idx2char
        self.char_size = len(self.chars)
        self.model = model
        self.char_emb = model.add_lookup_parameters(
            (self.char_size, self.char_emb_size))
        # self.bilstm = dy.BiRNNBuilder(1, self.char_emb_size, config.charlstm_hidden_dim, self.model, dy.LSTMBuilder)

        self.fw_lstm = dy.CompactVanillaLSTMBuilder(
            1, self.char_emb_size, config.charlstm_hidden_dim / 2, self.model)
        self.bw_lstm = dy.CompactVanillaLSTMBuilder(
            1, self.char_emb_size, config.charlstm_hidden_dim / 2, self.model)

        print("char embedding size: %d" % (self.char_emb_size))
        print("char hidden size: %d" % (config.charlstm_hidden_dim))
Ejemplo n.º 4
0
 def rnn_from_spec(spec, num_layers, input_dim, hidden_dim, model, residual_to_output):
   decoder_type = spec.lower()
   if decoder_type == "lstm":
     return dy.CompactVanillaLSTMBuilder(num_layers, input_dim, hidden_dim, model)
   elif decoder_type == "residuallstm":
     return residual.ResidualRNNBuilder(num_layers, input_dim, hidden_dim,
                                        model, residual_to_output)
   else:
     raise RuntimeError("Unknown decoder type {}".format(spec))
Ejemplo n.º 5
0
def orthonormal_CompactVanillaLSTMBuilder(n_layers, x_dim, h_dim, pc):
    builder = dy.CompactVanillaLSTMBuilder(n_layers, x_dim, h_dim, pc)

    for layer, params in enumerate(builder.get_parameters()):
        W = get_orthogonal_matrix(
            h_dim, h_dim + (h_dim if layer > 0 else x_dim))
        W_h, W_x = W[:, :h_dim], W[:, h_dim:]
        params[0].set_value(np.concatenate([W_x]*4, 0))
        params[1].set_value(np.concatenate([W_h]*4, 0))
        b = np.zeros(4*h_dim, dtype=np.float32)
        b[h_dim:2*h_dim] = -1.0
        params[2].set_value(b)
    return builder