def param_init_gru(options, params, prefix='gru', nin=None, dim=None, hiero=False): if nin is None: nin = options['dim_proj'] if dim is None: dim = options['dim_proj'] if not hiero: W = numpy.concatenate([norm_weight(nin, dim), norm_weight(nin, dim)], axis=1) params[prfx(prefix, 'W')] = W params[prfx(prefix, 'b')] = numpy.zeros((2 * dim,)).astype('float32') U = numpy.concatenate([ortho_weight(dim), ortho_weight(dim)], axis=1) params[prfx(prefix, 'U')] = U Wx = norm_weight(nin, dim) params[prfx(prefix, 'Wx')] = Wx Ux = ortho_weight(dim) params[prfx(prefix, 'Ux')] = Ux params[prfx(prefix, 'bx')] = numpy.zeros((dim,)).astype('float32') return params
def param_init_lstm_tied(options, params, prefix='lstm_tied', nin=None, dim=None): if nin is None: nin = options['dim_proj'] if dim is None: dim = options['dim_proj'] W = numpy.concatenate([norm_weight(nin, dim), norm_weight(nin, dim), norm_weight(nin, dim)], axis=1) params[prfx(prefix, 'W')] = W U = numpy.concatenate([ortho_weight(dim), ortho_weight(dim), ortho_weight(dim)], axis=1) params[prfx(prefix, 'U')] = U params[prfx(prefix, 'b')] = numpy.zeros((3 * dim,)).astype('float32') return params
def param_init_fflayer(options, params, prefix='ff', nin=None, nout=None, ortho=True, use_bias=True): if nin is None: nin = options['dim_proj'] if nout is None: nout = options['dim_proj'] params[prfx(prefix, 'W')] = norm_weight(nin, nout, scale=0.01, ortho=ortho) if use_bias: params[prfx(prefix, 'b')] = numpy.zeros((nout,)).astype('float32') return params
def param_init_gru_cond(options, params, prefix='gru_cond', nin=None, dim=None, dimctx=None): if nin is None: nin = options['dim'] if dim is None: dim = options['dim'] if dimctx is None: dimctx = options['dim'] params = param_init_gru(options, params, prefix, nin=nin, dim=dim) # context to LSTM Wc = norm_weight(dimctx, dim*2) params[prfx(prefix, 'Wc')] = Wc Wcx = norm_weight(dimctx, dim) params[prfx(prefix, 'Wcx')] = Wcx # attention: prev -> hidden Wi_att = norm_weight(nin, dimctx) params[prfx(prefix, 'Wi_att')] = Wi_att # attention: context -> hidden Wc_att = norm_weight(dimctx) params[prfx(prefix, 'Wc_att')] = Wc_att # attention: LSTM -> hidden Wd_att = norm_weight(dim, dimctx) params[prfx(prefix, 'Wd_att')] = Wd_att # attention: hidden bias b_att = numpy.zeros((dimctx,)).astype('float32') params[prfx(prefix, 'b_att')] = b_att # attention: U_att = norm_weight(dimctx, 1) params[prfx(prefix, 'U_att')] = U_att c_att = numpy.zeros((1,)).astype('float32') params[prfx(prefix, 'c_tt')] = c_att return params
def init_params(options): params = OrderedDict() # embedding params['Wemb_word'] = norm_weight(options['n_words_q'], options['dim_word_desc']) mult = 2 if options['ms_nlayers'] > 1 and (options['encoder_desc'] == 'lstm_ms' or \ options['encoder_desc'] == 'lstm_max_ms'): mult = options['ms_nlayers'] if options['use_bidir']: mult *= 2 if options['use_dq_sims']: params['ff_att_bi_dq'] = \ norm_weight(mult * options['dim'], mult * options['dim']) params['ff_att_proj'] = norm_vec(options['dim']) # encoder: bidirectional RNN params = get_layer(options['encoder_desc_word'])[0](options, params, prefix='encoder_desc_word', nin=options['dim_word_desc'], dim=options['dim']) params = get_layer(options['encoder_q'])[0](options, params, prefix='encoder_q', nin=options['dim_word_q'], dim=options['dim']) if options['use_bidir']: params = get_layer(options['encoder_desc_word'])[0](options, params, prefix='encoder_desc_word_r', nin=options['dim_word_desc'], dim=options['dim']) params = get_layer(options['encoder_q'])[0](options, params, prefix='encoder_q_r', nin=options['dim_word_q'], dim=options['dim']) if options['use_sent_reps']: params['Wemb_sent'] = norm_weight(mult * options['dim'], mult * options['dim']) # encoder: bidirectional RNN params = get_layer(options['encoder_desc_sent'])[0](options, params, prefix='encoder_desc_sent', nin=mult * options['dim'], dim=options['dim']) if options['use_bidir']: params = get_layer(options['encoder_desc_sent'])[0](options, params, prefix='encoder_desc_sent_r', nin=mult * options['dim'], dim=options['dim']) ctxdim = mult * options['dim'] logger.info("context dimensions is %d" % ctxdim) params = get_layer('ff')[0](options, params, prefix='ff_att_ctx', nin=ctxdim, nout=options['dim']) # readout params = get_layer('ff')[0](options, params, prefix='ff_att_q', nin=ctxdim, nout=options['dim'], use_bias=False, ortho=False) if options['use_desc_skip_c_g']: # readout for mean pooled desc params = get_layer('ff')[0](options, params, prefix='ff_out_mean_d', nin=ctxdim, nout=options['dim_word_ans'], use_bias=False, ortho=False) params = get_layer('ff')[0](options, params, prefix='ff_out_q', nin=ctxdim, nout=options['dim_word_ans'], ortho=False) params = get_layer('ff')[0](options, params, prefix='ff_out_ctx', nin=ctxdim, nout=options['dim_word_ans'], use_bias=False, ortho=False) params = get_layer('ff')[0](options, params, prefix='ff_logit', nin=options['dim_word_ans'], nout=options['n_words_ans'], ortho=False) return params