Exemple #1
0
def param_init_lstm(options, params, prefix='lstm', nin=None, dim=None):
    if nin is None:
        nin = options['dim_proj']
    if dim is None:
        dim = options['dim_proj']
    """
     Stack the weight matricies for all the gates
     for much cleaner code and slightly faster dot-prods
    """
    # input weights
    W = numpy.concatenate([
        norm_weight(nin, dim),
        norm_weight(nin, dim),
        norm_weight(nin, dim),
        norm_weight(nin, dim)
    ],
                          axis=1)
    params[_p(prefix, 'W')] = W
    # for the previous hidden activation
    U = numpy.concatenate([
        ortho_weight(dim),
        ortho_weight(dim),
        ortho_weight(dim),
        ortho_weight(dim)
    ],
                          axis=1)
    params[_p(prefix, 'U')] = U
    params[_p(prefix, 'b')] = numpy.zeros((4 * dim, )).astype('float32')

    return params
Exemple #2
0
def param_init_attention(options, params, prefix='attention'):
    dim_word = options['dim_word']
    params[_p(prefix, 'Wm')] = norm_weight(dim_word)
    params[_p(prefix, 'b')] = numpy.zeros((dim_word, ), dtype='float32')
    params[_p(prefix, 'W_att')] = norm_weight(dim_word)
    params[_p(prefix, 'U_att')] = norm_weight(dim_word, 1)
    params[_p(prefix, 'c_att')] = numpy.zeros((1, ), dtype='float32')
    return params
def param_init_lstm_cond(options, params, prefix='lstm_cond', nin=None, dim=None, dimctx=None):
    if nin is None:
        nin = options['dim']
    if dim is None:
        dim = options['dim']
    if dimctx is None:
        dimctx = options['dim']
    # input to LSTM, similar to the above, we stack the matricies for compactness, do one
    # dot product, and use the slice function below to get the activations for each "gate"
    W = numpy.concatenate([norm_weight(nin,dim),
                           norm_weight(nin,dim),
                           norm_weight(nin,dim),
                           norm_weight(nin,dim)], axis=1)
    params[_p(prefix,'W')] = W

    # LSTM to LSTM
    U = numpy.concatenate([ortho_weight(dim),
                           ortho_weight(dim),
                           ortho_weight(dim),
                           ortho_weight(dim)], axis=1)
    params[_p(prefix,'U')] = U

    # bias to LSTM
    params[_p(prefix,'b')] = numpy.zeros((4 * dim,)).astype('float32')

    # context to LSTM
    Wc = norm_weight(dimctx,dim*4)
    params[_p(prefix,'Wc')] = Wc

    # attention: context -> hidden
    Wc_att = norm_weight(dimctx, ortho=False)
    params[_p(prefix,'Wc_att')] = Wc_att
    
    # attnetion: last context -> hidden
    Wct_att = norm_weight(dimctx, ortho=False)
    params[_p(prefix,'Wct_att')] = Wct_att

    # attention: LSTM -> hidden
    Wd_att = norm_weight(dim,dimctx)
    params[_p(prefix,'Wd_att')] = Wd_att

    # attention: hidden bias
    b_att = numpy.zeros((dimctx,)).astype('float32')
    params[_p(prefix,'b_att')] = b_att

    # optional "deep" attention
    if options['n_layers_att'] > 1:
        for lidx in xrange(1, options['n_layers_att']):
            params[_p(prefix,'W_att_%d'%lidx)] = ortho_weight(dimctx)
            params[_p(prefix,'b_att_%d'%lidx)] = numpy.zeros((dimctx,)).astype('float32')

    # attention:
    U_att = norm_weight(dimctx,1)
    params[_p(prefix,'U_att')] = U_att
    c_att = numpy.zeros((1,)).astype('float32')
    params[_p(prefix, 'c_tt')] = c_att

    return params
def param_init_lstm_cond_nox(options,
                             params,
                             prefix='lstm_cond_nox',
                             dim=None,
                             dimctx=None):
    if dim is None:
        dim = options['dim']
    if dimctx is None:
        dimctx = options['dim']

    # LSTM to LSTM
    U = numpy.concatenate([
        ortho_weight(dim),
        ortho_weight(dim),
        ortho_weight(dim),
        ortho_weight(dim)
    ],
                          axis=1)
    params[_p(prefix, 'U')] = U

    # bias to LSTM
    params[_p(prefix, 'b')] = numpy.zeros((4 * dim, )).astype('float32')

    # from context to gates
    Wc = norm_weight(dimctx, dim * 4)
    params[_p(prefix, 'Wc')] = Wc

    Wc_att = norm_weight(dimctx, ortho=False)
    params[_p(prefix, 'Wc_att')] = Wc_att

    # attnetion: last context -> hidden
    Wct_att = norm_weight(dimctx, ortho=False)
    #params[_p(prefix,'Wct_att')] = Wct_att

    Wd_att = norm_weight(dim, dimctx)
    params[_p(prefix, 'Wd_att')] = Wd_att

    # attention: hidden bias
    b_att = numpy.zeros((dimctx, )).astype('float32')
    params[_p(prefix, 'b_att')] = b_att

    # attention:
    U_att = norm_weight(dimctx, 1)
    params[_p(prefix, 'U_att')] = U_att
    c_att = numpy.zeros((1, )).astype('float32')
    params[_p(prefix, 'c_att')] = c_att

    return params
Exemple #5
0
def param_init_fflayer(options,
                       params,
                       prefix='ff',
                       nin=None,
                       nout=None,
                       ortho=True,
                       flag=False):

    if nin is None:
        nin = options['dim_proj']
    if nout is None:
        nout = options['dim_proj']
    params[_p(prefix, 'W')] = norm_weight(nin, nout, scale=0.01, ortho=ortho)
    flag = False
    if flag:
        #params[_p(prefix, 'b')] = np.full(nout,-1).astype('float32')
        import gzip
        import pickle
        with gzip.open('mnist.pkl.gz', 'rb') as f:
            train_set, _, _ = pickle.load(f)
            train_x, train_y = train_set
            marginals = np.clip(train_x.mean(axis=0), 1e-7, 1 - 1e-7)
            initial_baises = np.log(marginals / (1 - marginals))
            params[_p(prefix, 'b')] = initial_baises.astype('float32')

    else:
        params[_p(prefix, 'b')] = np.zeros((nout, )).astype('float32')

    return params
Exemple #6
0
def param_init_mlp(options, params, prefix='predictor'):
    dims = options['dims']
    layer_num = len(dims)
    assert layer_num >= 3
    for i in range(layer_num - 1):
        W = norm_weight(dims[i], dims[i + 1])
        params[_p(prefix, i)] = W
    return params
Exemple #7
0
def param_init_fflayer(options, params, prefix='ff', nin=None, nout=None):
    if nin is None:
        nin = options['dim_proj']
    if nout is None:
        nout = options['dim_proj']
    params[_p(prefix, 'W')] = norm_weight(nin, nout, scale=0.01)
    params[_p(prefix, 'b')] = numpy.zeros((nout, )).astype('float32')

    return params
Exemple #8
0
def param_init_fflayer(options, params, prefix='ff', nin=None, nout=None):
    if nin is None:
        nin = options['dim_proj']
    if nout is None:
        nout = options['dim_proj']
    params[_p(prefix, 'W')] = norm_weight(nin, nout, scale=0.01)
    params[_p(prefix, 'b')] = numpy.zeros((nout,)).astype('float32')

    return params
Exemple #9
0
def param_init_fflayer(options, params, prefix='ff', prefix_bnorm='bnorm', nin=None, nout=None, ortho=True, batch_norm=False):

    if prefix in params:
        print 'this layer is already present'
    else:
        params[_p(prefix, 'W')] = norm_weight(nin, nout)
        params[_p(prefix, 'b')] = np.zeros((nout,)).astype('float32')

    return params
def param_init_fflayer(options,
                       params,
                       prefix='ff',
                       nin=None,
                       nout=None,
                       ortho=True,
                       flag=False):
    params[_p(prefix, 'W')] = norm_weight(nin, nout, scale=0.01, ortho=ortho)
    params[_p(prefix, 'b')] = np.zeros((nout, )).astype('float32')
    return params
Exemple #11
0
def param_init_lstm(options, params, prefix='lstm', nin=None, dim=None):
    if nin is None:
        nin = options['dim_proj']
    if dim is None:
        dim = options['dim_proj']
    """
     Stack the weight matricies for all the gates
     for much cleaner code and slightly faster dot-prods
    """
    # input weights
    W = numpy.concatenate([norm_weight(nin,dim),
                           norm_weight(nin,dim),
                           norm_weight(nin,dim),
                           norm_weight(nin,dim)], axis=1)
    params[_p(prefix,'W')] = W
    # for the previous hidden activation
    U = numpy.concatenate([ortho_weight(dim),
                           ortho_weight(dim),
                           ortho_weight(dim),
                           ortho_weight(dim)], axis=1)
    params[_p(prefix,'U')] = U
    params[_p(prefix,'b')] = numpy.zeros((4 * dim,)).astype('float32')

    return params
Exemple #12
0
def init_params(options):
    params = OrderedDict()
    # embedding: [matrix E in paper]
    params['Wemb'] = norm_weight(options['n_words'], options['dim_word'])
    ctx_dim = options['ctx_dim']
    if options['lstm_encoder']: # potential feature that runs an LSTM over the annotation vectors
        # encoder: LSTM
        params = get_layer('lstm')[0](options, params, prefix='encoder',
                                      nin=options['ctx_dim'], dim=options['dim'])
        params = get_layer('lstm')[0](options, params, prefix='encoder_rev',
                                      nin=options['ctx_dim'], dim=options['dim'])
        ctx_dim = options['dim'] * 2
    # init_state, init_cell: [top right on page 4]
    for lidx in xrange(1, options['n_layers_init']):
        params = get_layer('ff')[0](options, params, prefix='ff_init_%d'%lidx, nin=ctx_dim, nout=ctx_dim)
    params = get_layer('ff')[0](options, params, prefix='ff_state', nin=ctx_dim, nout=options['dim'])
    params = get_layer('ff')[0](options, params, prefix='ff_memory', nin=ctx_dim, nout=options['dim'])
    # decoder: LSTM: [equation (1)/(2)/(3)]
    params = get_layer('lstm_cond')[0](options, params, prefix='decoder',
                                       nin=options['dim_word'], dim=options['dim'],
                                       dimctx=ctx_dim)
    # potentially deep decoder (warning: should work but somewhat untested)
    if options['n_layers_lstm'] > 1:
        for lidx in xrange(1, options['n_layers_lstm']):
            params = get_layer('ff')[0](options, params, prefix='ff_state_%d'%lidx, nin=options['ctx_dim'], nout=options['dim'])
            params = get_layer('ff')[0](options, params, prefix='ff_memory_%d'%lidx, nin=options['ctx_dim'], nout=options['dim'])
            params = get_layer('lstm_cond')[0](options, params, prefix='decoder_%d'%lidx,
                                               nin=options['dim'], dim=options['dim'],
                                               dimctx=ctx_dim)
    # readout: [equation (7)]
    params = get_layer('ff')[0](options, params, prefix='ff_logit_lstm', nin=options['dim'], nout=options['dim_word'])
    if options['ctx2out']:
        params = get_layer('ff')[0](options, params, prefix='ff_logit_ctx', nin=ctx_dim, nout=options['dim_word'])
    if options['n_layers_out'] > 1:
        for lidx in xrange(1, options['n_layers_out']):
            params = get_layer('ff')[0](options, params, prefix='ff_logit_h%d'%lidx, nin=options['dim_word'], nout=options['dim_word'])
    params = get_layer('ff')[0](options, params, prefix='ff_logit', nin=options['dim_word'], nout=options['n_words'])

    return params
Exemple #13
0
def init_params(options):
    params = OrderedDict()
    # embedding: [matrix E in paper]
    params['Wemb'] = norm_weight(options['n_words'], options['dim_word'])
    ctx_dim = options['ctx_dim']
    if options[
            'lstm_encoder']:  # potential feature that runs an LSTM over the annotation vectors
        # encoder: LSTM
        params = get_layer('lstm')[0](options,
                                      params,
                                      prefix='encoder',
                                      nin=options['ctx_dim'],
                                      dim=options['dim'])
        params = get_layer('lstm')[0](options,
                                      params,
                                      prefix='encoder_rev',
                                      nin=options['ctx_dim'],
                                      dim=options['dim'])
        ctx_dim = options['dim'] * 2
    # init_state, init_cell: [top right on page 4]
    for lidx in xrange(1, options['n_layers_init']):
        params = get_layer('ff')[0](options,
                                    params,
                                    prefix='ff_init_%d' % lidx,
                                    nin=ctx_dim,
                                    nout=ctx_dim)
    params = get_layer('ff')[0](options,
                                params,
                                prefix='ff_state',
                                nin=ctx_dim,
                                nout=options['dim'])
    params = get_layer('ff')[0](options,
                                params,
                                prefix='ff_memory',
                                nin=ctx_dim,
                                nout=options['dim'])
    # decoder: LSTM: [equation (1)/(2)/(3)]
    params = get_layer('lstm_cond')[0](options,
                                       params,
                                       prefix='decoder',
                                       nin=options['dim_word'],
                                       dim=options['dim'],
                                       dimctx=ctx_dim)
    # potentially deep decoder (warning: should work but somewhat untested)
    if options['n_layers_lstm'] > 1:
        for lidx in xrange(1, options['n_layers_lstm']):
            params = get_layer('ff')[0](options,
                                        params,
                                        prefix='ff_state_%d' % lidx,
                                        nin=options['ctx_dim'],
                                        nout=options['dim'])
            params = get_layer('ff')[0](options,
                                        params,
                                        prefix='ff_memory_%d' % lidx,
                                        nin=options['ctx_dim'],
                                        nout=options['dim'])
            params = get_layer('lstm_cond')[0](options,
                                               params,
                                               prefix='decoder_%d' % lidx,
                                               nin=options['dim'],
                                               dim=options['dim'],
                                               dimctx=ctx_dim)
    # readout: [equation (7)]
    params = get_layer('ff')[0](options,
                                params,
                                prefix='ff_logit_lstm',
                                nin=options['dim'],
                                nout=options['dim_word'])
    if options['ctx2out']:
        params = get_layer('ff')[0](options,
                                    params,
                                    prefix='ff_logit_ctx',
                                    nin=ctx_dim,
                                    nout=options['dim_word'])
    if options['n_layers_out'] > 1:
        for lidx in xrange(1, options['n_layers_out']):
            params = get_layer('ff')[0](options,
                                        params,
                                        prefix='ff_logit_h%d' % lidx,
                                        nin=options['dim_word'],
                                        nout=options['dim_word'])
    params = get_layer('ff')[0](options,
                                params,
                                prefix='ff_logit',
                                nin=options['dim_word'],
                                nout=options['n_words'])

    return params
Exemple #14
0
def param_init_lstm_cond(options, params, prefix='lstm_cond', nin=None, dim=None, dimctx=None):
    if nin is None:
        nin = options['dim']
    if dim is None:
        dim = options['dim']
    if dimctx is None:
        dimctx = options['dim']
    # input to LSTM, similar to the above, we stack the matricies for compactness, do one
    # dot product, and use the slice function below to get the activations for each "gate"
    W = numpy.concatenate([norm_weight(nin,dim),
                           norm_weight(nin,dim),
                           norm_weight(nin,dim),
                           norm_weight(nin,dim)], axis=1)
    params[_p(prefix,'W')] = W

    # LSTM to LSTM
    U = numpy.concatenate([ortho_weight(dim),
                           ortho_weight(dim),
                           ortho_weight(dim),
                           ortho_weight(dim)], axis=1)
    params[_p(prefix,'U')] = U

    # bias to LSTM
    params[_p(prefix,'b')] = numpy.zeros((4 * dim,)).astype('float32')

    # context to LSTM
    Wc = norm_weight(dimctx,dim*4)
    params[_p(prefix,'Wc')] = Wc

    # attention: context -> hidden
    Wc_att = norm_weight(dimctx, ortho=False)
    params[_p(prefix,'Wc_att')] = Wc_att

    # attention: LSTM -> hidden
    Wd_att = norm_weight(dim,dimctx)
    params[_p(prefix,'Wd_att')] = Wd_att

    # attention: hidden bias
    b_att = numpy.zeros((dimctx,)).astype('float32')
    params[_p(prefix,'b_att')] = b_att

    # optional "deep" attention
    if options['n_layers_att'] > 1:
        for lidx in xrange(1, options['n_layers_att']):
            params[_p(prefix,'W_att_%d'%lidx)] = ortho_weight(dimctx)
            params[_p(prefix,'b_att_%d'%lidx)] = numpy.zeros((dimctx,)).astype('float32')

    # attention:
    U_att = norm_weight(dimctx,1)
    params[_p(prefix,'U_att')] = U_att
    c_att = numpy.zeros((1,)).astype('float32')
    params[_p(prefix, 'c_tt')] = c_att

    if options['selector']:
        # attention: selector
        W_sel = norm_weight(dim, 1)
        params[_p(prefix, 'W_sel')] = W_sel
        b_sel = numpy.float32(0.)
        params[_p(prefix, 'b_sel')] = b_sel

    return params
def init_params(options):
    params = OrderedDict()

    # Visual concept embedding
    if not options['with_glove']:
        params['VCemb'] = norm_weight(options['n_words'], options['dim_word'])
    # embedding: [matrix E in paper]
    params['Wemb'] = norm_weight(options['n_words'], options['dim_word'])
    #params = get_layer('ff')[0](options, params, prefix='CNNTrans', nin=options['cnn_dim'], nout=options['dim'])
    ctx_dim = options['ctx_dim']

    if options[
            'lstm_encoder']:  # potential feature that runs an LSTM over the annotation vectors
        # use input attentive encoder
        params = get_layer('lstm_cond_nox')[0](options,
                                               params,
                                               prefix='encoder',
                                               dim=ctx_dim,
                                               dimctx=options['semantic_dim'])

    # potentially deep decoder (warning: should work but somewhat untested)
    for lidx in range(options['n_layers_lstm']):
        ff_state_prefix = 'CNNTrans_%d' % lidx if lidx > 0 else 'CNNTrans'
        ff_memory_prefix = 'CNN_memory_%d' % lidx if lidx > 0 else 'CNN_memory'
        lstm_prefix = 'decoder_%d' % lidx if lidx > 0 else 'decoder'
        nin_lstm = options['dim'] if lidx > 0 else options['dim_word']
        params = get_layer('ff')[0](options,
                                    params,
                                    prefix=ff_state_prefix,
                                    nin=options['cnn_dim'],
                                    nout=options['dim'])
        params = get_layer('ff')[0](options,
                                    params,
                                    prefix=ff_memory_prefix,
                                    nin=options['cnn_dim'],
                                    nout=options['dim'])
        params = get_layer('lstm_cond')[0](options,
                                           params,
                                           prefix=lstm_prefix,
                                           nin=nin_lstm,
                                           dim=options['dim'],
                                           dimctx=ctx_dim)

    # readout: [equation (7)]
    params = get_layer('ff')[0](options,
                                params,
                                prefix='ff_logit_lstm',
                                nin=options['dim'],
                                nout=options['dim_word'])
    if options['ctx2out']:
        params = get_layer('ff')[0](options,
                                    params,
                                    prefix='ff_logit_ctx',
                                    nin=ctx_dim,
                                    nout=options['dim_word'])
    if options['n_layers_out'] > 1:
        for lidx in xrange(1, options['n_layers_out']):
            params = get_layer('ff')[0](options,
                                        params,
                                        prefix='ff_logit_h%d' % lidx,
                                        nin=options['dim_word'],
                                        nout=options['dim_word'])
    params = get_layer('ff')[0](options,
                                params,
                                prefix='ff_logit',
                                nin=options['dim_word'],
                                nout=options['n_words'])

    return params