Exemple #1
0
def param_init_decoder(options, params, prefix='decoder_gru'):

    n_x = options['n_x']
    n_h = options['n_h']

    W = np.concatenate([uniform_weight(n_x, n_h),
                        uniform_weight(n_x, n_h)],
                       axis=1)
    params[_p(prefix, 'W')] = W

    U = np.concatenate([ortho_weight(n_h), ortho_weight(n_h)], axis=1)
    params[_p(prefix, 'U')] = U

    params[_p(prefix, 'b')] = zero_bias(2 * n_h)

    Wx = uniform_weight(n_x, n_h)
    params[_p(prefix, 'Wx')] = Wx

    Ux = ortho_weight(n_h)
    params[_p(prefix, 'Ux')] = Ux

    params[_p(prefix, 'bx')] = zero_bias(n_h)

    params[_p(prefix, 'b0')] = zero_bias(n_h)

    return params
Exemple #2
0
def param_init_decoder(options, params, prefix='decoder_gru'):
    
    n_x = options['n_x']
    n_h = options['n_h']
    
    W = np.concatenate([uniform_weight(n_x,n_h),
                        uniform_weight(n_x,n_h)], axis=1)
    params[_p(prefix,'W')] = W
    
    U = np.concatenate([ortho_weight(n_h),
                        ortho_weight(n_h)], axis=1)
    params[_p(prefix,'U')] = U
    
    params[_p(prefix,'b')] = zero_bias(2*n_h)

    Wx = uniform_weight(n_x, n_h)
    params[_p(prefix,'Wx')] = Wx
    
    Ux = ortho_weight(n_h)
    params[_p(prefix,'Ux')] = Ux
    
    params[_p(prefix,'bx')] = zero_bias(n_h)
    
    params[_p(prefix,'b0')] = zero_bias(n_h)

    return params   
Exemple #3
0
def init_params(options, W):

    n_words = options['n_words']
    n_x = options['n_x']
    n_h = options['n_h']
    n_z = options['n_z']
    n_s = options['n_s']

    params = OrderedDict()
    # word embedding init or load
    # params['Wemb'] = uniform_weight(n_words,n_x)
    params['Wemb'] = W.astype(config.floatX)

    options[_p('lstm', 'n_x')] = n_x
    options[_p('lstm', 'n_h')] = n_h
    options[_p('lstm', 'n_g')] = n_s

    params = param_init_lstm(options, params, 'lstm')

    params['Vhid'] = uniform_weight(n_h, n_x)
    params['bhid'] = zero_bias(n_words)

    params['bos'] = zero_bias(n_x)
    params['Tv'] = uniform_weight(n_z, n_h)
    params['Ts'] = uniform_weight(n_x, n_h)
    params['Ta'] = uniform_weight(n_s, n_h)

    #re-z
    params['task_W'] = uniform_weight(n_h, n_z)
    params['task_b'] = zero_bias(n_z)

    return params
def param_init_encoder(options, params, prefix='encoder_lstm'):

    n_x = options['n_x']
    n_h = options['n_h']
    n_f = options['n_f']
    n_y = options['n_y']
    n_z = options['n_z']

    params[_p(prefix, 'Wa_i')] = uniform_weight(n_x, n_f)
    params[_p(prefix, 'Wa_f')] = uniform_weight(n_x, n_f)
    params[_p(prefix, 'Wa_o')] = uniform_weight(n_x, n_f)
    params[_p(prefix, 'Wa_c')] = uniform_weight(n_x, n_f)

    params[_p(prefix, 'Wb_i')] = uniform_weight(n_y, n_f)
    params[_p(prefix, 'Wb_f')] = uniform_weight(n_y, n_f)
    params[_p(prefix, 'Wb_o')] = uniform_weight(n_y, n_f)
    params[_p(prefix, 'Wb_c')] = uniform_weight(n_y, n_f)

    params[_p(prefix, 'Wc_i')] = uniform_weight(n_h, n_f)
    params[_p(prefix, 'Wc_f')] = uniform_weight(n_h, n_f)
    params[_p(prefix, 'Wc_o')] = uniform_weight(n_h, n_f)
    params[_p(prefix, 'Wc_c')] = uniform_weight(n_h, n_f)

    params[_p(prefix, 'Ua_i')] = uniform_weight(n_h, n_f)
    params[_p(prefix, 'Ua_f')] = uniform_weight(n_h, n_f)
    params[_p(prefix, 'Ua_o')] = uniform_weight(n_h, n_f)
    params[_p(prefix, 'Ua_c')] = uniform_weight(n_h, n_f)

    params[_p(prefix, 'Ub_i')] = uniform_weight(n_y, n_f)
    params[_p(prefix, 'Ub_f')] = uniform_weight(n_y, n_f)
    params[_p(prefix, 'Ub_o')] = uniform_weight(n_y, n_f)
    params[_p(prefix, 'Ub_c')] = uniform_weight(n_y, n_f)

    params[_p(prefix, 'Uc_i')] = uniform_weight(n_h, n_f)
    params[_p(prefix, 'Uc_f')] = uniform_weight(n_h, n_f)
    params[_p(prefix, 'Uc_o')] = uniform_weight(n_h, n_f)
    params[_p(prefix, 'Uc_c')] = uniform_weight(n_h, n_f)

    params[_p(prefix, 'Ca_i')] = uniform_weight(n_z, n_f)
    params[_p(prefix, 'Ca_f')] = uniform_weight(n_z, n_f)
    params[_p(prefix, 'Ca_o')] = uniform_weight(n_z, n_f)
    params[_p(prefix, 'Ca_c')] = uniform_weight(n_z, n_f)

    params[_p(prefix, 'Cb_i')] = uniform_weight(n_y, n_f)
    params[_p(prefix, 'Cb_f')] = uniform_weight(n_y, n_f)
    params[_p(prefix, 'Cb_o')] = uniform_weight(n_y, n_f)
    params[_p(prefix, 'Cb_c')] = uniform_weight(n_y, n_f)

    params[_p(prefix, 'Cc_i')] = uniform_weight(n_h, n_f)
    params[_p(prefix, 'Cc_f')] = uniform_weight(n_h, n_f)
    params[_p(prefix, 'Cc_o')] = uniform_weight(n_h, n_f)
    params[_p(prefix, 'Cc_c')] = uniform_weight(n_h, n_f)

    params[_p(prefix, 'b_i')] = zero_bias(n_h)
    params[_p(prefix, 'b_f')] = zero_bias(n_h)
    params[_p(prefix, 'b_o')] = zero_bias(n_h)
    params[_p(prefix, 'b_c')] = zero_bias(n_h)

    return params
def init_params(options):

    n_chars = options['n_chars']
    n_h = options['n_h']

    params = OrderedDict()
    # character embedding
    params['W1'] = uniform_weight(n_chars, n_h)
    params['b1'] = zero_bias(n_h)
    params['W2'] = uniform_weight(n_h, n_h)
    params['b2'] = zero_bias(n_h)

    return params
def init_params(options):

    n_y = options['n_y']
    n_z = options['n_z']

    params = OrderedDict()

    params['Wy1'] = uniform_weight(n_z, 512)
    params['by1'] = zero_bias(512)

    params['Wy2'] = uniform_weight(512, n_y)
    params['by2'] = zero_bias(n_y)

    return params
def param_init_encoder(options, params, prefix='lstm_encoder'):
    
    n_x = options['n_x']
    n_h = options['n_h']
    
    W = np.concatenate([uniform_weight(n_x,n_h),
                        uniform_weight(n_x,n_h),
                        uniform_weight(n_x,n_h),
                        uniform_weight(n_x,n_h)], axis=1)
    params[_p(prefix, 'W')] = W
    
    U = np.concatenate([ortho_weight(n_h),
                        ortho_weight(n_h),
                        ortho_weight(n_h),
                        ortho_weight(n_h)], axis=1)
    params[_p(prefix, 'U')] = U
    
    params[_p(prefix,'b')] = zero_bias(4*n_h)
    
    # It is observed that setting a high initial forget gate bias for LSTMs can 
    # give slighly better results (Le et al., 2015). Hence, the initial forget
    # gate bias is set to 3.
    params[_p(prefix, 'b')][n_h:2*n_h] = 3*np.ones((n_h,)).astype(theano.config.floatX)

    return params
Exemple #8
0
def param_init_encoder(options, params, prefix='lstm_encoder'):

    n_x = options['n_x']
    n_h = options['n_h']

    W = np.concatenate([
        uniform_weight(n_x, n_h),
        uniform_weight(n_x, n_h),
        uniform_weight(n_x, n_h),
        uniform_weight(n_x, n_h)
    ],
                       axis=1)
    params[_p(prefix, 'W')] = W

    U = np.concatenate([
        ortho_weight(n_h),
        ortho_weight(n_h),
        ortho_weight(n_h),
        ortho_weight(n_h)
    ],
                       axis=1)
    params[_p(prefix, 'U')] = U

    params[_p(prefix, 'b')] = zero_bias(4 * n_h)

    # It is observed that setting a high initial forget gate bias for LSTMs can
    # give slighly better results (Le et al., 2015). Hence, the initial forget
    # gate bias is set to 3.
    params[_p(prefix, 'b')][n_h:2 * n_h] = 3 * np.ones(
        (n_h, )).astype(theano.config.floatX)

    return params
Exemple #9
0
def param_init_decoder(options, params, prefix='decoder_lstm'):

    n_x = options['n_x']
    n_h = options['n_h']

    W = np.concatenate([
        uniform_weight(n_x, n_h),
        uniform_weight(n_x, n_h),
        uniform_weight(n_x, n_h),
        uniform_weight(n_x, n_h)
    ],
                       axis=1)
    params[_p(prefix, 'W')] = W

    U = np.concatenate([
        ortho_weight(n_h),
        ortho_weight(n_h),
        ortho_weight(n_h),
        ortho_weight(n_h)
    ],
                       axis=1)
    params[_p(prefix, 'U')] = U

    params[_p(prefix, 'b')] = zero_bias(4 * n_h)
    params[_p(prefix, 'b')][n_h:2 * n_h] = 3 * np.ones(
        (n_h, )).astype(theano.config.floatX)

    return params
Exemple #10
0
def param_init_decoder(options, params, prefix='decoder'):

    n_x = options['n_x']
    n_h = options['n_h']
    n_z = options['n_z']

    W = np.concatenate([
        uniform_weight(n_x, n_h),
        uniform_weight(n_x, n_h),
        uniform_weight(n_x, n_h),
        uniform_weight(n_x, n_h)
    ],
                       axis=1)
    params[_p(prefix, 'W')] = W

    U = np.concatenate([
        ortho_weight(n_h),
        ortho_weight(n_h),
        ortho_weight(n_h),
        ortho_weight(n_h)
    ],
                       axis=1)
    params[_p(prefix, 'U')] = U

    C = np.concatenate([
        uniform_weight(n_z, n_h),
        uniform_weight(n_z, n_h),
        uniform_weight(n_z, n_h),
        uniform_weight(n_z, n_h)
    ],
                       axis=1)
    params[_p(prefix, 'C')] = C

    params[_p(prefix, 'b')] = zero_bias(4 * n_h)
    params[_p(prefix, 'b')][n_h:2 * n_h] = 3 * np.ones(
        (n_h, )).astype(theano.config.floatX)

    C0 = uniform_weight(n_z, n_h)
    params[_p(prefix, 'C0')] = C0

    params[_p(prefix, 'b0')] = zero_bias(n_h)

    #params[_p(prefix,'b_y')] = zero_bias(n_x)  # 48

    return params
Exemple #11
0
def init_params(options):
    
    n_x = options['n_x']  
    n_h = options['n_h']
    
    params = OrderedDict()
    params = param_init_decoder(options,params)
    
    params['Vhid'] = uniform_weight(n_h,n_x)
    params['bhid'] = zero_bias(n_x)                                     

    return params
Exemple #12
0
def param_init_lstm(options, params, prefix):

    n_x = options[_p(prefix, 'n_x')]
    n_h = options[_p(prefix, 'n_h')]
    n_g = options[_p(prefix, 'n_g')]

    params[_p(prefix, 'W_i')] = ortho_weight(n_h)
    params[_p(prefix, 'W_f')] = ortho_weight(n_h)
    params[_p(prefix, 'W_o')] = ortho_weight(n_h)
    params[_p(prefix, 'W_c')] = ortho_weight(n_h)

    params[_p(prefix, 'U_i')] = ortho_weight(n_h)
    params[_p(prefix, 'U_f')] = ortho_weight(n_h)
    params[_p(prefix, 'U_o')] = ortho_weight(n_h)
    params[_p(prefix, 'U_c')] = ortho_weight(n_h)

    params[_p(prefix, 'b_i')] = zero_bias(n_h)
    params[_p(prefix, 'b_f')] = zero_bias(n_h)
    params[_p(prefix, 'b_o')] = zero_bias(n_h)
    params[_p(prefix, 'b_c')] = zero_bias(n_h)

    return params
Exemple #13
0
def param_init_decoder(options, params, prefix='decoder_vanilla'):

    n_x = options['n_x']
    n_h = options['n_h']

    W = uniform_weight(n_x, n_h)
    params[_p(prefix, 'W')] = W

    U = ortho_weight(n_h)
    params[_p(prefix, 'U')] = U

    params[_p(prefix, 'b')] = zero_bias(n_h)

    return params
Exemple #14
0
def init_params(options, W):

    n_words = options['n_words']
    n_x = options['n_x']
    n_h = options['n_h']

    params = OrderedDict()
    # word embedding
    # params['Wemb'] = uniform_weight(n_words,n_x)
    params['Wemb'] = W.astype(config.floatX)
    params = param_init_decoder(options, params)

    params['Vhid'] = uniform_weight(n_h, n_x)
    params['bhid'] = zero_bias(n_words)

    return params
Exemple #15
0
def init_params(options):

    n_words = options['n_words']
    n_x = options['n_x']
    n_h = options['n_h']

    params = OrderedDict()
    params['Wemb'] = uniform_weight(n_words, n_x)
    params = param_init_decoder(options, params, prefix='decoder_h1')

    options['n_x'] = n_h
    params = param_init_decoder(options, params, prefix='decoder_h2')
    options['n_x'] = n_x

    params['Vhid'] = uniform_weight(n_h, n_words)
    params['bhid'] = zero_bias(n_words)

    return params
def init_params(options,W):
    
    params = OrderedDict()
    # W is initialized by the pretrained word embedding
    params['Wemb'] = W.astype(config.floatX)
    # otherwise, W will be initialized randomly
    # n_words = options['n_words']
    # n_x = options['n_x'] 
    # params['Wemb'] = uniform_weight(n_words,n_x)
    
    length = len(options['filter_shapes'])
    for idx in range(length):
        params = param_init_encoder(options['filter_shapes'][idx],params,prefix=_p('cnn_encoder',idx))
    
    n_h = options['feature_maps'] * length
    params['Wy'] = uniform_weight(n_h,options['n_y'])
    params['by'] = zero_bias(options['n_y'])                                     

    return params
Exemple #17
0
def init_params(options, W):

    params = OrderedDict()
    # W is initialized by the pretrained word embedding
    params['Wemb'] = W.astype(config.floatX)
    # otherwise, W will be initialized randomly
    # n_words = options['n_words']
    # n_x = options['n_x']
    # params['Wemb'] = uniform_weight(n_words,n_x)

    length = len(options['filter_shapes'])
    for idx in range(length):
        params = param_init_encoder(options['filter_shapes'][idx],
                                    params,
                                    prefix=_p('cnn_encoder', idx))

    n_h = options['feature_maps'] * length
    params['Wy'] = uniform_weight(n_h, options['n_y'])
    params['by'] = zero_bias(options['n_y'])

    return params
Exemple #18
0
def init_params(options,W):
    
    n_h = options['n_h']
    n_y = options['n_y']
    
    params = OrderedDict()
    # W is initialized by the pretrained word embedding
    params['Wemb'] = W.astype(config.floatX)
    # otherwise, W will be initialized randomly
    # n_words = options['n_words']
    # n_x = options['n_x'] 
    # params['Wemb'] = uniform_weight(n_words,n_x)
    
    # bidirectional LSTM
    params = param_init_encoder(options,params,prefix="lstm_encoder")
    params = param_init_encoder(options,params,prefix="lstm_encoder_rev")
    
    params['Wy'] = uniform_weight(2*n_h,n_y)
    params['by'] = zero_bias(n_y)                                     

    return params
Exemple #19
0
def init_params(options):
    
    n_words = options['n_words']
    n_x = options['n_x']  
    n_h = options['n_h']
    
    params = OrderedDict()
    # word embedding 
    params['Wemb'] = uniform_weight(n_words,n_x)
    #params['Wemb'] = W.astype(config.floatX)
    params['Wemb'][-1] = np.zeros((n_x,)).astype(theano.config.floatX)
    # encoding words into sentences
    length = len(options['filter_shapes'])
    for idx in range(length):
        params = param_init_encoder(options['filter_shapes'][idx],params,prefix=_p('cnn_encoder',idx))
    
    options['n_z'] = options['feature_maps'] * length
    params = param_init_decoder(options,params,prefix='decoder')
    
    params['Vhid'] = uniform_weight(n_h,n_x)
    params['bhid'] = zero_bias(n_words)                                    

    return params
Exemple #20
0
def init_params(options):

    n_words = options['n_words']
    n_x = options['n_x']
    n_h = options['n_h']  # number of hidden units in D

    d_params = OrderedDict()
    g_params = OrderedDict()
    s_params = OrderedDict()
    # word embedding
    emb_params = uniform_weight(n_words, n_x)
    length = len(options['filter_shapes'])

    d_params['Wemb'] = emb_params  # (d)
    g_params['Wemb'] = emb_params
    s_params = param_init_ss(length * options['feature_maps'], s_params)
    #params['Wemb'] = W.astype(config.floatX)
    # encoding words into sentences (cnn, d)

    for idx in xrange(length):
        d_params = param_init_encoder(options['filter_shapes'][idx],
                                      d_params,
                                      prefix=_p('cnn_d', idx))
        g_params = param_init_encoder(options['filter_shapes'][idx],
                                      g_params,
                                      prefix=_p('cnn_d', idx))

    options['n_z'] = options['feature_maps'] * length

    # discriminative (d)
    d_params = param_init_mlp_layer(options['input_shape'],
                                    options['pred_shape'],
                                    d_params,
                                    prefix='dis_d')

    if options['feature_match'] == 'mmd_ld':
        d_params = param_init_mlp_layer(options['input_shape'],
                                        options['mmd_shape'],
                                        d_params,
                                        prefix='dis_mmd')

    # discriminative (q)
    g_params = param_init_mlp_layer(options['input_shape'],
                                    options['propose_shape'],
                                    g_params,
                                    prefix='dis_q')

    #if options['reverse']:
    d_params = param_init_mlp_layer(options['input_recon_shape'],
                                    options['recon_shape'],
                                    d_params,
                                    prefix='recon')

    # batch norm

    if options['batch_norm']:
        d_params = param_init_batch_norm(options['input_shape'],
                                         d_params,
                                         prefix='real')
        d_params = param_init_batch_norm(options['input_shape'],
                                         d_params,
                                         prefix='fake')

    # lstm (gen)
    if options['shareLSTM']:
        g_params = param_init_decoder(options, g_params, prefix='decoder_0')
    else:
        for idx in range(options['n_codes']):  # don't use xrange
            g_params = param_init_decoder(options,
                                          g_params,
                                          prefix=_p('decoder', idx))

    g_params['Vhid'] = uniform_weight(n_h, n_x)
    g_params['bhid'] = zero_bias(n_words)
    try:
        data = np.load('./disent_result_g.npz')
        print('Use saved initialization: disent_result_g.npz ...')
        for kk, pp in g_params.iteritems():
            if kk in data.keys() and g_params[kk].shape == data[kk].shape:
                print('Use ' + kk + ' for G')
                g_params[kk] = data[kk]
    except IOError:
        print('Use random initialization for G...')

    try:
        data_d = np.load('./disent_result_d.npz')
        print('Use saved initialization: disent_result_d.npz ...')
        for kk, pp in d_params.iteritems():
            if kk in data_d.keys() and d_params[kk].shape == data_d[kk].shape:
                print('Use ' + kk + ' for D')
                d_params[kk] = data_d[kk]
    except IOError:
        print('Use random initialization for D...')

    try:
        data_cnn = np.load('./disent_result_cnn.npz')
        print('Use pre-trained CNN for D...')
        for kk, pp in d_params.iteritems():
            if kk in data_cnn.keys(
            ) and d_params[kk].shape == data_cnn[kk].shape:
                print('Use ' + kk + 'for D')
                d_params[kk] = data_cnn[kk]
    except IOError:
        print(' No pre-trained CNN...')

    return d_params, g_params, s_params