def param_init_decoder(options, params, prefix='decoder_gru'): n_x = options['n_x'] n_h = options['n_h'] W = np.concatenate([uniform_weight(n_x, n_h), uniform_weight(n_x, n_h)], axis=1) params[_p(prefix, 'W')] = W U = np.concatenate([ortho_weight(n_h), ortho_weight(n_h)], axis=1) params[_p(prefix, 'U')] = U params[_p(prefix, 'b')] = zero_bias(2 * n_h) Wx = uniform_weight(n_x, n_h) params[_p(prefix, 'Wx')] = Wx Ux = ortho_weight(n_h) params[_p(prefix, 'Ux')] = Ux params[_p(prefix, 'bx')] = zero_bias(n_h) params[_p(prefix, 'b0')] = zero_bias(n_h) return params
def param_init_decoder(options, params, prefix='decoder_gru'): n_x = options['n_x'] n_h = options['n_h'] W = np.concatenate([uniform_weight(n_x,n_h), uniform_weight(n_x,n_h)], axis=1) params[_p(prefix,'W')] = W U = np.concatenate([ortho_weight(n_h), ortho_weight(n_h)], axis=1) params[_p(prefix,'U')] = U params[_p(prefix,'b')] = zero_bias(2*n_h) Wx = uniform_weight(n_x, n_h) params[_p(prefix,'Wx')] = Wx Ux = ortho_weight(n_h) params[_p(prefix,'Ux')] = Ux params[_p(prefix,'bx')] = zero_bias(n_h) params[_p(prefix,'b0')] = zero_bias(n_h) return params
def init_params(options, W): n_words = options['n_words'] n_x = options['n_x'] n_h = options['n_h'] n_z = options['n_z'] n_s = options['n_s'] params = OrderedDict() # word embedding init or load # params['Wemb'] = uniform_weight(n_words,n_x) params['Wemb'] = W.astype(config.floatX) options[_p('lstm', 'n_x')] = n_x options[_p('lstm', 'n_h')] = n_h options[_p('lstm', 'n_g')] = n_s params = param_init_lstm(options, params, 'lstm') params['Vhid'] = uniform_weight(n_h, n_x) params['bhid'] = zero_bias(n_words) params['bos'] = zero_bias(n_x) params['Tv'] = uniform_weight(n_z, n_h) params['Ts'] = uniform_weight(n_x, n_h) params['Ta'] = uniform_weight(n_s, n_h) #re-z params['task_W'] = uniform_weight(n_h, n_z) params['task_b'] = zero_bias(n_z) return params
def param_init_encoder(options, params, prefix='encoder_lstm'): n_x = options['n_x'] n_h = options['n_h'] n_f = options['n_f'] n_y = options['n_y'] n_z = options['n_z'] params[_p(prefix, 'Wa_i')] = uniform_weight(n_x, n_f) params[_p(prefix, 'Wa_f')] = uniform_weight(n_x, n_f) params[_p(prefix, 'Wa_o')] = uniform_weight(n_x, n_f) params[_p(prefix, 'Wa_c')] = uniform_weight(n_x, n_f) params[_p(prefix, 'Wb_i')] = uniform_weight(n_y, n_f) params[_p(prefix, 'Wb_f')] = uniform_weight(n_y, n_f) params[_p(prefix, 'Wb_o')] = uniform_weight(n_y, n_f) params[_p(prefix, 'Wb_c')] = uniform_weight(n_y, n_f) params[_p(prefix, 'Wc_i')] = uniform_weight(n_h, n_f) params[_p(prefix, 'Wc_f')] = uniform_weight(n_h, n_f) params[_p(prefix, 'Wc_o')] = uniform_weight(n_h, n_f) params[_p(prefix, 'Wc_c')] = uniform_weight(n_h, n_f) params[_p(prefix, 'Ua_i')] = uniform_weight(n_h, n_f) params[_p(prefix, 'Ua_f')] = uniform_weight(n_h, n_f) params[_p(prefix, 'Ua_o')] = uniform_weight(n_h, n_f) params[_p(prefix, 'Ua_c')] = uniform_weight(n_h, n_f) params[_p(prefix, 'Ub_i')] = uniform_weight(n_y, n_f) params[_p(prefix, 'Ub_f')] = uniform_weight(n_y, n_f) params[_p(prefix, 'Ub_o')] = uniform_weight(n_y, n_f) params[_p(prefix, 'Ub_c')] = uniform_weight(n_y, n_f) params[_p(prefix, 'Uc_i')] = uniform_weight(n_h, n_f) params[_p(prefix, 'Uc_f')] = uniform_weight(n_h, n_f) params[_p(prefix, 'Uc_o')] = uniform_weight(n_h, n_f) params[_p(prefix, 'Uc_c')] = uniform_weight(n_h, n_f) params[_p(prefix, 'Ca_i')] = uniform_weight(n_z, n_f) params[_p(prefix, 'Ca_f')] = uniform_weight(n_z, n_f) params[_p(prefix, 'Ca_o')] = uniform_weight(n_z, n_f) params[_p(prefix, 'Ca_c')] = uniform_weight(n_z, n_f) params[_p(prefix, 'Cb_i')] = uniform_weight(n_y, n_f) params[_p(prefix, 'Cb_f')] = uniform_weight(n_y, n_f) params[_p(prefix, 'Cb_o')] = uniform_weight(n_y, n_f) params[_p(prefix, 'Cb_c')] = uniform_weight(n_y, n_f) params[_p(prefix, 'Cc_i')] = uniform_weight(n_h, n_f) params[_p(prefix, 'Cc_f')] = uniform_weight(n_h, n_f) params[_p(prefix, 'Cc_o')] = uniform_weight(n_h, n_f) params[_p(prefix, 'Cc_c')] = uniform_weight(n_h, n_f) params[_p(prefix, 'b_i')] = zero_bias(n_h) params[_p(prefix, 'b_f')] = zero_bias(n_h) params[_p(prefix, 'b_o')] = zero_bias(n_h) params[_p(prefix, 'b_c')] = zero_bias(n_h) return params
def init_params(options): n_chars = options['n_chars'] n_h = options['n_h'] params = OrderedDict() # character embedding params['W1'] = uniform_weight(n_chars, n_h) params['b1'] = zero_bias(n_h) params['W2'] = uniform_weight(n_h, n_h) params['b2'] = zero_bias(n_h) return params
def init_params(options): n_y = options['n_y'] n_z = options['n_z'] params = OrderedDict() params['Wy1'] = uniform_weight(n_z, 512) params['by1'] = zero_bias(512) params['Wy2'] = uniform_weight(512, n_y) params['by2'] = zero_bias(n_y) return params
def param_init_encoder(options, params, prefix='lstm_encoder'): n_x = options['n_x'] n_h = options['n_h'] W = np.concatenate([uniform_weight(n_x,n_h), uniform_weight(n_x,n_h), uniform_weight(n_x,n_h), uniform_weight(n_x,n_h)], axis=1) params[_p(prefix, 'W')] = W U = np.concatenate([ortho_weight(n_h), ortho_weight(n_h), ortho_weight(n_h), ortho_weight(n_h)], axis=1) params[_p(prefix, 'U')] = U params[_p(prefix,'b')] = zero_bias(4*n_h) # It is observed that setting a high initial forget gate bias for LSTMs can # give slighly better results (Le et al., 2015). Hence, the initial forget # gate bias is set to 3. params[_p(prefix, 'b')][n_h:2*n_h] = 3*np.ones((n_h,)).astype(theano.config.floatX) return params
def param_init_encoder(options, params, prefix='lstm_encoder'): n_x = options['n_x'] n_h = options['n_h'] W = np.concatenate([ uniform_weight(n_x, n_h), uniform_weight(n_x, n_h), uniform_weight(n_x, n_h), uniform_weight(n_x, n_h) ], axis=1) params[_p(prefix, 'W')] = W U = np.concatenate([ ortho_weight(n_h), ortho_weight(n_h), ortho_weight(n_h), ortho_weight(n_h) ], axis=1) params[_p(prefix, 'U')] = U params[_p(prefix, 'b')] = zero_bias(4 * n_h) # It is observed that setting a high initial forget gate bias for LSTMs can # give slighly better results (Le et al., 2015). Hence, the initial forget # gate bias is set to 3. params[_p(prefix, 'b')][n_h:2 * n_h] = 3 * np.ones( (n_h, )).astype(theano.config.floatX) return params
def param_init_decoder(options, params, prefix='decoder_lstm'): n_x = options['n_x'] n_h = options['n_h'] W = np.concatenate([ uniform_weight(n_x, n_h), uniform_weight(n_x, n_h), uniform_weight(n_x, n_h), uniform_weight(n_x, n_h) ], axis=1) params[_p(prefix, 'W')] = W U = np.concatenate([ ortho_weight(n_h), ortho_weight(n_h), ortho_weight(n_h), ortho_weight(n_h) ], axis=1) params[_p(prefix, 'U')] = U params[_p(prefix, 'b')] = zero_bias(4 * n_h) params[_p(prefix, 'b')][n_h:2 * n_h] = 3 * np.ones( (n_h, )).astype(theano.config.floatX) return params
def param_init_decoder(options, params, prefix='decoder'): n_x = options['n_x'] n_h = options['n_h'] n_z = options['n_z'] W = np.concatenate([ uniform_weight(n_x, n_h), uniform_weight(n_x, n_h), uniform_weight(n_x, n_h), uniform_weight(n_x, n_h) ], axis=1) params[_p(prefix, 'W')] = W U = np.concatenate([ ortho_weight(n_h), ortho_weight(n_h), ortho_weight(n_h), ortho_weight(n_h) ], axis=1) params[_p(prefix, 'U')] = U C = np.concatenate([ uniform_weight(n_z, n_h), uniform_weight(n_z, n_h), uniform_weight(n_z, n_h), uniform_weight(n_z, n_h) ], axis=1) params[_p(prefix, 'C')] = C params[_p(prefix, 'b')] = zero_bias(4 * n_h) params[_p(prefix, 'b')][n_h:2 * n_h] = 3 * np.ones( (n_h, )).astype(theano.config.floatX) C0 = uniform_weight(n_z, n_h) params[_p(prefix, 'C0')] = C0 params[_p(prefix, 'b0')] = zero_bias(n_h) #params[_p(prefix,'b_y')] = zero_bias(n_x) # 48 return params
def init_params(options): n_x = options['n_x'] n_h = options['n_h'] params = OrderedDict() params = param_init_decoder(options,params) params['Vhid'] = uniform_weight(n_h,n_x) params['bhid'] = zero_bias(n_x) return params
def param_init_lstm(options, params, prefix): n_x = options[_p(prefix, 'n_x')] n_h = options[_p(prefix, 'n_h')] n_g = options[_p(prefix, 'n_g')] params[_p(prefix, 'W_i')] = ortho_weight(n_h) params[_p(prefix, 'W_f')] = ortho_weight(n_h) params[_p(prefix, 'W_o')] = ortho_weight(n_h) params[_p(prefix, 'W_c')] = ortho_weight(n_h) params[_p(prefix, 'U_i')] = ortho_weight(n_h) params[_p(prefix, 'U_f')] = ortho_weight(n_h) params[_p(prefix, 'U_o')] = ortho_weight(n_h) params[_p(prefix, 'U_c')] = ortho_weight(n_h) params[_p(prefix, 'b_i')] = zero_bias(n_h) params[_p(prefix, 'b_f')] = zero_bias(n_h) params[_p(prefix, 'b_o')] = zero_bias(n_h) params[_p(prefix, 'b_c')] = zero_bias(n_h) return params
def param_init_decoder(options, params, prefix='decoder_vanilla'): n_x = options['n_x'] n_h = options['n_h'] W = uniform_weight(n_x, n_h) params[_p(prefix, 'W')] = W U = ortho_weight(n_h) params[_p(prefix, 'U')] = U params[_p(prefix, 'b')] = zero_bias(n_h) return params
def init_params(options, W): n_words = options['n_words'] n_x = options['n_x'] n_h = options['n_h'] params = OrderedDict() # word embedding # params['Wemb'] = uniform_weight(n_words,n_x) params['Wemb'] = W.astype(config.floatX) params = param_init_decoder(options, params) params['Vhid'] = uniform_weight(n_h, n_x) params['bhid'] = zero_bias(n_words) return params
def init_params(options): n_words = options['n_words'] n_x = options['n_x'] n_h = options['n_h'] params = OrderedDict() params['Wemb'] = uniform_weight(n_words, n_x) params = param_init_decoder(options, params, prefix='decoder_h1') options['n_x'] = n_h params = param_init_decoder(options, params, prefix='decoder_h2') options['n_x'] = n_x params['Vhid'] = uniform_weight(n_h, n_words) params['bhid'] = zero_bias(n_words) return params
def init_params(options,W): params = OrderedDict() # W is initialized by the pretrained word embedding params['Wemb'] = W.astype(config.floatX) # otherwise, W will be initialized randomly # n_words = options['n_words'] # n_x = options['n_x'] # params['Wemb'] = uniform_weight(n_words,n_x) length = len(options['filter_shapes']) for idx in range(length): params = param_init_encoder(options['filter_shapes'][idx],params,prefix=_p('cnn_encoder',idx)) n_h = options['feature_maps'] * length params['Wy'] = uniform_weight(n_h,options['n_y']) params['by'] = zero_bias(options['n_y']) return params
def init_params(options, W): params = OrderedDict() # W is initialized by the pretrained word embedding params['Wemb'] = W.astype(config.floatX) # otherwise, W will be initialized randomly # n_words = options['n_words'] # n_x = options['n_x'] # params['Wemb'] = uniform_weight(n_words,n_x) length = len(options['filter_shapes']) for idx in range(length): params = param_init_encoder(options['filter_shapes'][idx], params, prefix=_p('cnn_encoder', idx)) n_h = options['feature_maps'] * length params['Wy'] = uniform_weight(n_h, options['n_y']) params['by'] = zero_bias(options['n_y']) return params
def init_params(options,W): n_h = options['n_h'] n_y = options['n_y'] params = OrderedDict() # W is initialized by the pretrained word embedding params['Wemb'] = W.astype(config.floatX) # otherwise, W will be initialized randomly # n_words = options['n_words'] # n_x = options['n_x'] # params['Wemb'] = uniform_weight(n_words,n_x) # bidirectional LSTM params = param_init_encoder(options,params,prefix="lstm_encoder") params = param_init_encoder(options,params,prefix="lstm_encoder_rev") params['Wy'] = uniform_weight(2*n_h,n_y) params['by'] = zero_bias(n_y) return params
def init_params(options): n_words = options['n_words'] n_x = options['n_x'] n_h = options['n_h'] params = OrderedDict() # word embedding params['Wemb'] = uniform_weight(n_words,n_x) #params['Wemb'] = W.astype(config.floatX) params['Wemb'][-1] = np.zeros((n_x,)).astype(theano.config.floatX) # encoding words into sentences length = len(options['filter_shapes']) for idx in range(length): params = param_init_encoder(options['filter_shapes'][idx],params,prefix=_p('cnn_encoder',idx)) options['n_z'] = options['feature_maps'] * length params = param_init_decoder(options,params,prefix='decoder') params['Vhid'] = uniform_weight(n_h,n_x) params['bhid'] = zero_bias(n_words) return params
def init_params(options): n_words = options['n_words'] n_x = options['n_x'] n_h = options['n_h'] # number of hidden units in D d_params = OrderedDict() g_params = OrderedDict() s_params = OrderedDict() # word embedding emb_params = uniform_weight(n_words, n_x) length = len(options['filter_shapes']) d_params['Wemb'] = emb_params # (d) g_params['Wemb'] = emb_params s_params = param_init_ss(length * options['feature_maps'], s_params) #params['Wemb'] = W.astype(config.floatX) # encoding words into sentences (cnn, d) for idx in xrange(length): d_params = param_init_encoder(options['filter_shapes'][idx], d_params, prefix=_p('cnn_d', idx)) g_params = param_init_encoder(options['filter_shapes'][idx], g_params, prefix=_p('cnn_d', idx)) options['n_z'] = options['feature_maps'] * length # discriminative (d) d_params = param_init_mlp_layer(options['input_shape'], options['pred_shape'], d_params, prefix='dis_d') if options['feature_match'] == 'mmd_ld': d_params = param_init_mlp_layer(options['input_shape'], options['mmd_shape'], d_params, prefix='dis_mmd') # discriminative (q) g_params = param_init_mlp_layer(options['input_shape'], options['propose_shape'], g_params, prefix='dis_q') #if options['reverse']: d_params = param_init_mlp_layer(options['input_recon_shape'], options['recon_shape'], d_params, prefix='recon') # batch norm if options['batch_norm']: d_params = param_init_batch_norm(options['input_shape'], d_params, prefix='real') d_params = param_init_batch_norm(options['input_shape'], d_params, prefix='fake') # lstm (gen) if options['shareLSTM']: g_params = param_init_decoder(options, g_params, prefix='decoder_0') else: for idx in range(options['n_codes']): # don't use xrange g_params = param_init_decoder(options, g_params, prefix=_p('decoder', idx)) g_params['Vhid'] = uniform_weight(n_h, n_x) g_params['bhid'] = zero_bias(n_words) try: data = np.load('./disent_result_g.npz') print('Use saved initialization: disent_result_g.npz ...') for kk, pp in g_params.iteritems(): if kk in data.keys() and g_params[kk].shape == data[kk].shape: print('Use ' + kk + ' for G') g_params[kk] = data[kk] except IOError: print('Use random initialization for G...') try: data_d = np.load('./disent_result_d.npz') print('Use saved initialization: disent_result_d.npz ...') for kk, pp in d_params.iteritems(): if kk in data_d.keys() and d_params[kk].shape == data_d[kk].shape: print('Use ' + kk + ' for D') d_params[kk] = data_d[kk] except IOError: print('Use random initialization for D...') try: data_cnn = np.load('./disent_result_cnn.npz') print('Use pre-trained CNN for D...') for kk, pp in d_params.iteritems(): if kk in data_cnn.keys( ) and d_params[kk].shape == data_cnn[kk].shape: print('Use ' + kk + 'for D') d_params[kk] = data_cnn[kk] except IOError: print(' No pre-trained CNN...') return d_params, g_params, s_params