Esempio n. 1
0
def param_init_gru(options,
                   params,
                   prefix='gru',
                   nin=None,
                   dim=None,
                   hiero=False):

    if nin is None:
        nin = options['dim_proj']

    if dim is None:
        dim = options['dim_proj']

    if not hiero:
        W = numpy.concatenate([norm_weight(nin, dim),
                               norm_weight(nin, dim)], axis=1)
        params[prfx(prefix, 'W')] = W
        params[prfx(prefix, 'b')] = numpy.zeros((2 * dim,)).astype('float32')

    U = numpy.concatenate([ortho_weight(dim),
                           ortho_weight(dim)], axis=1)
    params[prfx(prefix, 'U')] = U
    Wx = norm_weight(nin, dim)
    params[prfx(prefix, 'Wx')] = Wx
    Ux = ortho_weight(dim)
    params[prfx(prefix, 'Ux')] = Ux
    params[prfx(prefix, 'bx')] = numpy.zeros((dim,)).astype('float32')
    return params
Esempio n. 2
0
def param_init_lstm_tied(options,
                         params,
                         prefix='lstm_tied',
                         nin=None,
                         dim=None):

    if nin is None:
        nin = options['dim_proj']

    if dim is None:
        dim = options['dim_proj']

    W = numpy.concatenate([norm_weight(nin, dim),
                           norm_weight(nin, dim),
                           norm_weight(nin, dim)], axis=1)

    params[prfx(prefix, 'W')] = W
    U = numpy.concatenate([ortho_weight(dim),
                           ortho_weight(dim),
                           ortho_weight(dim)], axis=1)

    params[prfx(prefix, 'U')] = U
    params[prfx(prefix, 'b')] = numpy.zeros((3 * dim,)).astype('float32')

    return params
Esempio n. 3
0
def param_init_fflayer(options,
                       params,
                       prefix='ff',
                       nin=None,
                       nout=None,
                       ortho=True,
                       use_bias=True):
    if nin is None:
        nin = options['dim_proj']

    if nout is None:
        nout = options['dim_proj']

    params[prfx(prefix, 'W')] = norm_weight(nin, nout, scale=0.01, ortho=ortho)

    if use_bias:
        params[prfx(prefix, 'b')] = numpy.zeros((nout,)).astype('float32')

    return params
Esempio n. 4
0
def param_init_gru_cond(options,
                        params,
                        prefix='gru_cond',
                        nin=None,
                        dim=None,
                        dimctx=None):

    if nin is None:
        nin = options['dim']
    if dim is None:
        dim = options['dim']
    if dimctx is None:
        dimctx = options['dim']

    params = param_init_gru(options,
                            params,
                            prefix,
                            nin=nin,
                            dim=dim)
    # context to LSTM
    Wc = norm_weight(dimctx, dim*2)
    params[prfx(prefix, 'Wc')] = Wc

    Wcx = norm_weight(dimctx, dim)
    params[prfx(prefix, 'Wcx')] = Wcx

    # attention: prev -> hidden
    Wi_att = norm_weight(nin, dimctx)
    params[prfx(prefix, 'Wi_att')] = Wi_att

    # attention: context -> hidden
    Wc_att = norm_weight(dimctx)
    params[prfx(prefix, 'Wc_att')] = Wc_att

    # attention: LSTM -> hidden
    Wd_att = norm_weight(dim, dimctx)
    params[prfx(prefix, 'Wd_att')] = Wd_att

    # attention: hidden bias
    b_att = numpy.zeros((dimctx,)).astype('float32')
    params[prfx(prefix, 'b_att')] = b_att

    # attention:
    U_att = norm_weight(dimctx, 1)
    params[prfx(prefix, 'U_att')] = U_att
    c_att = numpy.zeros((1,)).astype('float32')
    params[prfx(prefix, 'c_tt')] = c_att

    return params
Esempio n. 5
0
def init_params(options):
    params = OrderedDict()

    # embedding
    params['Wemb_word'] = norm_weight(options['n_words_q'],
                                      options['dim_word_desc'])

    mult = 2
    if options['ms_nlayers'] > 1 and (options['encoder_desc'] == 'lstm_ms' or \
            options['encoder_desc'] == 'lstm_max_ms'):

        mult = options['ms_nlayers']
        if options['use_bidir']:
            mult *= 2

    if options['use_dq_sims']:
        params['ff_att_bi_dq'] = \
                norm_weight(mult * options['dim'],
                            mult * options['dim'])

    params['ff_att_proj'] = norm_vec(options['dim'])

    # encoder: bidirectional RNN
    params = get_layer(options['encoder_desc_word'])[0](options,
                                                        params,
                                                        prefix='encoder_desc_word',
                                                        nin=options['dim_word_desc'],
                                                        dim=options['dim'])
    params = get_layer(options['encoder_q'])[0](options,
                                                params,
                                                prefix='encoder_q',
                                                nin=options['dim_word_q'],
                                                dim=options['dim'])

    if options['use_bidir']:
        params = get_layer(options['encoder_desc_word'])[0](options,
                                                            params,
                                                            prefix='encoder_desc_word_r',
                                                            nin=options['dim_word_desc'],
                                                            dim=options['dim'])
        params = get_layer(options['encoder_q'])[0](options,
                                                    params,
                                                    prefix='encoder_q_r',
                                                    nin=options['dim_word_q'],
                                                    dim=options['dim'])


    if options['use_sent_reps']:
        params['Wemb_sent'] = norm_weight(mult * options['dim'],
                                          mult * options['dim'])
        # encoder: bidirectional RNN
        params = get_layer(options['encoder_desc_sent'])[0](options,
                                                            params,
                                                            prefix='encoder_desc_sent',
                                                            nin=mult * options['dim'],
                                                            dim=options['dim'])
        if options['use_bidir']:
            params = get_layer(options['encoder_desc_sent'])[0](options,
                                                                params,
                                                                prefix='encoder_desc_sent_r',
                                                                nin=mult * options['dim'],
                                                                dim=options['dim'])
    ctxdim = mult * options['dim']
    logger.info("context dimensions is %d" % ctxdim)
    params = get_layer('ff')[0](options, params,
                                prefix='ff_att_ctx',
                                nin=ctxdim,
                                nout=options['dim'])

    # readout
    params = get_layer('ff')[0](options, params,
                                prefix='ff_att_q',
                                nin=ctxdim,
                                nout=options['dim'],
                                use_bias=False,
                                ortho=False)


    if options['use_desc_skip_c_g']:
        # readout for mean pooled desc
        params = get_layer('ff')[0](options, params,
                                    prefix='ff_out_mean_d',
                                    nin=ctxdim,
                                    nout=options['dim_word_ans'],
                                    use_bias=False,
                                    ortho=False)


    params = get_layer('ff')[0](options, params,
                                prefix='ff_out_q',
                                nin=ctxdim,
                                nout=options['dim_word_ans'],
                                ortho=False)

    params = get_layer('ff')[0](options, params,
                                prefix='ff_out_ctx',
                                nin=ctxdim,
                                nout=options['dim_word_ans'],
                                use_bias=False,
                                ortho=False)

    params = get_layer('ff')[0](options, params,
                                prefix='ff_logit',
                                nin=options['dim_word_ans'],
                                nout=options['n_words_ans'],
                                ortho=False)
    return params