def param_init_lstm(options, params, prefix='lstm', nin=None, dim=None): if nin is None: nin = options['dim_proj'] if dim is None: dim = options['dim_proj'] """ Stack the weight matricies for all the gates for much cleaner code and slightly faster dot-prods """ # input weights W = numpy.concatenate([ norm_weight(nin, dim), norm_weight(nin, dim), norm_weight(nin, dim), norm_weight(nin, dim) ], axis=1) params[_p(prefix, 'W')] = W # for the previous hidden activation U = numpy.concatenate([ ortho_weight(dim), ortho_weight(dim), ortho_weight(dim), ortho_weight(dim) ], axis=1) params[_p(prefix, 'U')] = U params[_p(prefix, 'b')] = numpy.zeros((4 * dim, )).astype('float32') return params
def param_init_multi_lstm_s(options, params, prefix='multi_lstm_s', in_dim=None, out_dim=None): if in_dim is None: in_dim = options['wdim'] if out_dim is None: out_dim = options['edim'] list_w = [] for i in xrange(options['pipe_num']): if options['with_gate'] == True: list_w.append( numpy.concatenate([ glorot_uniform(in_dim, out_dim), glorot_uniform(in_dim, out_dim), glorot_uniform(in_dim, out_dim, 4.) ], axis=1)) else: list_w.append( numpy.concatenate([ glorot_uniform(in_dim, out_dim), glorot_uniform(in_dim, out_dim, 4.) ], axis=1)) params[_p(prefix, 'W')] = numpy.concatenate(list_w, axis=1) list_U = [] for i in xrange(options['pipe_num']): if options['with_gate'] == True: list_U.append( numpy.concatenate([ ortho_weight(options['pipe_num'] * out_dim, out_dim), ortho_weight(options['pipe_num'] * out_dim, out_dim), ortho_weight(options['pipe_num'] * out_dim, out_dim) ], axis=1)) else: list_U.append( numpy.concatenate([ ortho_weight(options['pipe_num'] * out_dim, out_dim), ortho_weight(options['pipe_num'] * out_dim, out_dim) ], axis=1)) U = numpy.concatenate(list_U, axis=1) params[_p(prefix, 'U')] = U if options['with_gate'] == True: b = numpy.zeros( (3 * options['pipe_num'] * out_dim, )).astype(config.floatX) params[_p(prefix, 'b')] = b else: params[_p(prefix, 'b')] = numpy.zeros( (2 * options['pipe_num'] * out_dim, )).astype(config.floatX) # print('Wshape %s Ushape %s '%(W.shape,U.shape)) # print(b.shape) return params
def param_init_lstm_cond(options, params, prefix='lstm_cond', nin=None, dim=None, dimctx=None): if nin is None: nin = options['dim'] if dim is None: dim = options['dim'] if dimctx is None: dimctx = options['dim'] # input to LSTM, similar to the above, we stack the matricies for compactness, do one # dot product, and use the slice function below to get the activations for each "gate" W = numpy.concatenate([norm_weight(nin,dim), norm_weight(nin,dim), norm_weight(nin,dim), norm_weight(nin,dim)], axis=1) params[_p(prefix,'W')] = W # LSTM to LSTM U = numpy.concatenate([ortho_weight(dim), ortho_weight(dim), ortho_weight(dim), ortho_weight(dim)], axis=1) params[_p(prefix,'U')] = U # bias to LSTM params[_p(prefix,'b')] = numpy.zeros((4 * dim,)).astype('float32') # context to LSTM Wc = norm_weight(dimctx,dim*4) params[_p(prefix,'Wc')] = Wc # attention: context -> hidden Wc_att = norm_weight(dimctx, ortho=False) params[_p(prefix,'Wc_att')] = Wc_att # attnetion: last context -> hidden Wct_att = norm_weight(dimctx, ortho=False) params[_p(prefix,'Wct_att')] = Wct_att # attention: LSTM -> hidden Wd_att = norm_weight(dim,dimctx) params[_p(prefix,'Wd_att')] = Wd_att # attention: hidden bias b_att = numpy.zeros((dimctx,)).astype('float32') params[_p(prefix,'b_att')] = b_att # optional "deep" attention if options['n_layers_att'] > 1: for lidx in xrange(1, options['n_layers_att']): params[_p(prefix,'W_att_%d'%lidx)] = ortho_weight(dimctx) params[_p(prefix,'b_att_%d'%lidx)] = numpy.zeros((dimctx,)).astype('float32') # attention: U_att = norm_weight(dimctx,1) params[_p(prefix,'U_att')] = U_att c_att = numpy.zeros((1,)).astype('float32') params[_p(prefix, 'c_tt')] = c_att return params
def param_init_lstm_cond_nox(options, params, prefix='lstm_cond_nox', dim=None, dimctx=None): if dim is None: dim = options['dim'] if dimctx is None: dimctx = options['dim'] # LSTM to LSTM U = numpy.concatenate([ ortho_weight(dim), ortho_weight(dim), ortho_weight(dim), ortho_weight(dim) ], axis=1) params[_p(prefix, 'U')] = U # bias to LSTM params[_p(prefix, 'b')] = numpy.zeros((4 * dim, )).astype('float32') # from context to gates Wc = norm_weight(dimctx, dim * 4) params[_p(prefix, 'Wc')] = Wc Wc_att = norm_weight(dimctx, ortho=False) params[_p(prefix, 'Wc_att')] = Wc_att # attnetion: last context -> hidden Wct_att = norm_weight(dimctx, ortho=False) #params[_p(prefix,'Wct_att')] = Wct_att Wd_att = norm_weight(dim, dimctx) params[_p(prefix, 'Wd_att')] = Wd_att # attention: hidden bias b_att = numpy.zeros((dimctx, )).astype('float32') params[_p(prefix, 'b_att')] = b_att # attention: U_att = norm_weight(dimctx, 1) params[_p(prefix, 'U_att')] = U_att c_att = numpy.zeros((1, )).astype('float32') params[_p(prefix, 'c_att')] = c_att return params
def param_init_lstm(options, params, prefix='lstm', nin=None, dim=None): if nin is None: nin = options['dim_proj'] if dim is None: dim = options['dim_proj'] """ Stack the weight matricies for all the gates for much cleaner code and slightly faster dot-prods """ # input weights W = numpy.concatenate([norm_weight(nin,dim), norm_weight(nin,dim), norm_weight(nin,dim), norm_weight(nin,dim)], axis=1) params[_p(prefix,'W')] = W # for the previous hidden activation U = numpy.concatenate([ortho_weight(dim), ortho_weight(dim), ortho_weight(dim), ortho_weight(dim)], axis=1) params[_p(prefix,'U')] = U params[_p(prefix,'b')] = numpy.zeros((4 * dim,)).astype('float32') return params
def param_init_lstm_cond(options, params, prefix='lstm_cond', nin=None, dim=None, dimctx=None): if nin is None: nin = options['dim'] if dim is None: dim = options['dim'] if dimctx is None: dimctx = options['dim'] # input to LSTM, similar to the above, we stack the matricies for compactness, do one # dot product, and use the slice function below to get the activations for each "gate" W = numpy.concatenate([norm_weight(nin,dim), norm_weight(nin,dim), norm_weight(nin,dim), norm_weight(nin,dim)], axis=1) params[_p(prefix,'W')] = W # LSTM to LSTM U = numpy.concatenate([ortho_weight(dim), ortho_weight(dim), ortho_weight(dim), ortho_weight(dim)], axis=1) params[_p(prefix,'U')] = U # bias to LSTM params[_p(prefix,'b')] = numpy.zeros((4 * dim,)).astype('float32') # context to LSTM Wc = norm_weight(dimctx,dim*4) params[_p(prefix,'Wc')] = Wc # attention: context -> hidden Wc_att = norm_weight(dimctx, ortho=False) params[_p(prefix,'Wc_att')] = Wc_att # attention: LSTM -> hidden Wd_att = norm_weight(dim,dimctx) params[_p(prefix,'Wd_att')] = Wd_att # attention: hidden bias b_att = numpy.zeros((dimctx,)).astype('float32') params[_p(prefix,'b_att')] = b_att # optional "deep" attention if options['n_layers_att'] > 1: for lidx in xrange(1, options['n_layers_att']): params[_p(prefix,'W_att_%d'%lidx)] = ortho_weight(dimctx) params[_p(prefix,'b_att_%d'%lidx)] = numpy.zeros((dimctx,)).astype('float32') # attention: U_att = norm_weight(dimctx,1) params[_p(prefix,'U_att')] = U_att c_att = numpy.zeros((1,)).astype('float32') params[_p(prefix, 'c_tt')] = c_att if options['selector']: # attention: selector W_sel = norm_weight(dim, 1) params[_p(prefix, 'W_sel')] = W_sel b_sel = numpy.float32(0.) params[_p(prefix, 'b_sel')] = b_sel return params