Example #1
0
def param_init_lstm(options, params, prefix='lstm', nin=None, dim=None):
    if nin is None:
        nin = options['dim_proj']
    if dim is None:
        dim = options['dim_proj']
    """
     Stack the weight matricies for all the gates
     for much cleaner code and slightly faster dot-prods
    """
    # input weights
    W = numpy.concatenate([
        norm_weight(nin, dim),
        norm_weight(nin, dim),
        norm_weight(nin, dim),
        norm_weight(nin, dim)
    ],
                          axis=1)
    params[_p(prefix, 'W')] = W
    # for the previous hidden activation
    U = numpy.concatenate([
        ortho_weight(dim),
        ortho_weight(dim),
        ortho_weight(dim),
        ortho_weight(dim)
    ],
                          axis=1)
    params[_p(prefix, 'U')] = U
    params[_p(prefix, 'b')] = numpy.zeros((4 * dim, )).astype('float32')

    return params
Example #2
0
def param_init_multi_lstm_s(options,
                            params,
                            prefix='multi_lstm_s',
                            in_dim=None,
                            out_dim=None):
    if in_dim is None:
        in_dim = options['wdim']
    if out_dim is None:
        out_dim = options['edim']

    list_w = []
    for i in xrange(options['pipe_num']):
        if options['with_gate'] == True:
            list_w.append(
                numpy.concatenate([
                    glorot_uniform(in_dim, out_dim),
                    glorot_uniform(in_dim, out_dim),
                    glorot_uniform(in_dim, out_dim, 4.)
                ],
                                  axis=1))
        else:
            list_w.append(
                numpy.concatenate([
                    glorot_uniform(in_dim, out_dim),
                    glorot_uniform(in_dim, out_dim, 4.)
                ],
                                  axis=1))
    params[_p(prefix, 'W')] = numpy.concatenate(list_w, axis=1)

    list_U = []
    for i in xrange(options['pipe_num']):
        if options['with_gate'] == True:
            list_U.append(
                numpy.concatenate([
                    ortho_weight(options['pipe_num'] * out_dim, out_dim),
                    ortho_weight(options['pipe_num'] * out_dim, out_dim),
                    ortho_weight(options['pipe_num'] * out_dim, out_dim)
                ],
                                  axis=1))
        else:
            list_U.append(
                numpy.concatenate([
                    ortho_weight(options['pipe_num'] * out_dim, out_dim),
                    ortho_weight(options['pipe_num'] * out_dim, out_dim)
                ],
                                  axis=1))
    U = numpy.concatenate(list_U, axis=1)
    params[_p(prefix, 'U')] = U

    if options['with_gate'] == True:
        b = numpy.zeros(
            (3 * options['pipe_num'] * out_dim, )).astype(config.floatX)
        params[_p(prefix, 'b')] = b
    else:
        params[_p(prefix, 'b')] = numpy.zeros(
            (2 * options['pipe_num'] * out_dim, )).astype(config.floatX)

    # print('Wshape %s Ushape %s '%(W.shape,U.shape))
    # print(b.shape)
    return params
def param_init_lstm_cond(options, params, prefix='lstm_cond', nin=None, dim=None, dimctx=None):
    if nin is None:
        nin = options['dim']
    if dim is None:
        dim = options['dim']
    if dimctx is None:
        dimctx = options['dim']
    # input to LSTM, similar to the above, we stack the matricies for compactness, do one
    # dot product, and use the slice function below to get the activations for each "gate"
    W = numpy.concatenate([norm_weight(nin,dim),
                           norm_weight(nin,dim),
                           norm_weight(nin,dim),
                           norm_weight(nin,dim)], axis=1)
    params[_p(prefix,'W')] = W

    # LSTM to LSTM
    U = numpy.concatenate([ortho_weight(dim),
                           ortho_weight(dim),
                           ortho_weight(dim),
                           ortho_weight(dim)], axis=1)
    params[_p(prefix,'U')] = U

    # bias to LSTM
    params[_p(prefix,'b')] = numpy.zeros((4 * dim,)).astype('float32')

    # context to LSTM
    Wc = norm_weight(dimctx,dim*4)
    params[_p(prefix,'Wc')] = Wc

    # attention: context -> hidden
    Wc_att = norm_weight(dimctx, ortho=False)
    params[_p(prefix,'Wc_att')] = Wc_att
    
    # attnetion: last context -> hidden
    Wct_att = norm_weight(dimctx, ortho=False)
    params[_p(prefix,'Wct_att')] = Wct_att

    # attention: LSTM -> hidden
    Wd_att = norm_weight(dim,dimctx)
    params[_p(prefix,'Wd_att')] = Wd_att

    # attention: hidden bias
    b_att = numpy.zeros((dimctx,)).astype('float32')
    params[_p(prefix,'b_att')] = b_att

    # optional "deep" attention
    if options['n_layers_att'] > 1:
        for lidx in xrange(1, options['n_layers_att']):
            params[_p(prefix,'W_att_%d'%lidx)] = ortho_weight(dimctx)
            params[_p(prefix,'b_att_%d'%lidx)] = numpy.zeros((dimctx,)).astype('float32')

    # attention:
    U_att = norm_weight(dimctx,1)
    params[_p(prefix,'U_att')] = U_att
    c_att = numpy.zeros((1,)).astype('float32')
    params[_p(prefix, 'c_tt')] = c_att

    return params
def param_init_lstm_cond_nox(options,
                             params,
                             prefix='lstm_cond_nox',
                             dim=None,
                             dimctx=None):
    if dim is None:
        dim = options['dim']
    if dimctx is None:
        dimctx = options['dim']

    # LSTM to LSTM
    U = numpy.concatenate([
        ortho_weight(dim),
        ortho_weight(dim),
        ortho_weight(dim),
        ortho_weight(dim)
    ],
                          axis=1)
    params[_p(prefix, 'U')] = U

    # bias to LSTM
    params[_p(prefix, 'b')] = numpy.zeros((4 * dim, )).astype('float32')

    # from context to gates
    Wc = norm_weight(dimctx, dim * 4)
    params[_p(prefix, 'Wc')] = Wc

    Wc_att = norm_weight(dimctx, ortho=False)
    params[_p(prefix, 'Wc_att')] = Wc_att

    # attnetion: last context -> hidden
    Wct_att = norm_weight(dimctx, ortho=False)
    #params[_p(prefix,'Wct_att')] = Wct_att

    Wd_att = norm_weight(dim, dimctx)
    params[_p(prefix, 'Wd_att')] = Wd_att

    # attention: hidden bias
    b_att = numpy.zeros((dimctx, )).astype('float32')
    params[_p(prefix, 'b_att')] = b_att

    # attention:
    U_att = norm_weight(dimctx, 1)
    params[_p(prefix, 'U_att')] = U_att
    c_att = numpy.zeros((1, )).astype('float32')
    params[_p(prefix, 'c_att')] = c_att

    return params
Example #5
0
def param_init_lstm(options, params, prefix='lstm', nin=None, dim=None):
    if nin is None:
        nin = options['dim_proj']
    if dim is None:
        dim = options['dim_proj']
    """
     Stack the weight matricies for all the gates
     for much cleaner code and slightly faster dot-prods
    """
    # input weights
    W = numpy.concatenate([norm_weight(nin,dim),
                           norm_weight(nin,dim),
                           norm_weight(nin,dim),
                           norm_weight(nin,dim)], axis=1)
    params[_p(prefix,'W')] = W
    # for the previous hidden activation
    U = numpy.concatenate([ortho_weight(dim),
                           ortho_weight(dim),
                           ortho_weight(dim),
                           ortho_weight(dim)], axis=1)
    params[_p(prefix,'U')] = U
    params[_p(prefix,'b')] = numpy.zeros((4 * dim,)).astype('float32')

    return params
Example #6
0
def param_init_lstm_cond(options, params, prefix='lstm_cond', nin=None, dim=None, dimctx=None):
    if nin is None:
        nin = options['dim']
    if dim is None:
        dim = options['dim']
    if dimctx is None:
        dimctx = options['dim']
    # input to LSTM, similar to the above, we stack the matricies for compactness, do one
    # dot product, and use the slice function below to get the activations for each "gate"
    W = numpy.concatenate([norm_weight(nin,dim),
                           norm_weight(nin,dim),
                           norm_weight(nin,dim),
                           norm_weight(nin,dim)], axis=1)
    params[_p(prefix,'W')] = W

    # LSTM to LSTM
    U = numpy.concatenate([ortho_weight(dim),
                           ortho_weight(dim),
                           ortho_weight(dim),
                           ortho_weight(dim)], axis=1)
    params[_p(prefix,'U')] = U

    # bias to LSTM
    params[_p(prefix,'b')] = numpy.zeros((4 * dim,)).astype('float32')

    # context to LSTM
    Wc = norm_weight(dimctx,dim*4)
    params[_p(prefix,'Wc')] = Wc

    # attention: context -> hidden
    Wc_att = norm_weight(dimctx, ortho=False)
    params[_p(prefix,'Wc_att')] = Wc_att

    # attention: LSTM -> hidden
    Wd_att = norm_weight(dim,dimctx)
    params[_p(prefix,'Wd_att')] = Wd_att

    # attention: hidden bias
    b_att = numpy.zeros((dimctx,)).astype('float32')
    params[_p(prefix,'b_att')] = b_att

    # optional "deep" attention
    if options['n_layers_att'] > 1:
        for lidx in xrange(1, options['n_layers_att']):
            params[_p(prefix,'W_att_%d'%lidx)] = ortho_weight(dimctx)
            params[_p(prefix,'b_att_%d'%lidx)] = numpy.zeros((dimctx,)).astype('float32')

    # attention:
    U_att = norm_weight(dimctx,1)
    params[_p(prefix,'U_att')] = U_att
    c_att = numpy.zeros((1,)).astype('float32')
    params[_p(prefix, 'c_tt')] = c_att

    if options['selector']:
        # attention: selector
        W_sel = norm_weight(dim, 1)
        params[_p(prefix, 'W_sel')] = W_sel
        b_sel = numpy.float32(0.)
        params[_p(prefix, 'b_sel')] = b_sel

    return params