def __init__(self, y_vocab, dim_word, dim, dim_ctx): self.y_vocab = y_vocab # 430 self.dim_word = dim_word # 1024 self.dim = dim # 512 self.dim_ctx = dim_ctx # 512 ### ### initial context - image Embedding self.W_hidden_init = initializations.uniform((self.dim_ctx, self.dim)) self.b_hidden_init = initializations.zero((self.dim)) self.W_memory_init = initializations.uniform((self.dim_ctx, self.dim)) self.b_memory_init = initializations.zero((self.dim)) ### enc forward GRU ### self.W_lstm = initializations.uniform((self.dim_word, self.dim * 4)) self.U_lstm = initializations.uniform((self.dim, self.dim * 4)) self.b_lstm = initializations.zero((self.dim * 4)) ### prediction ### self.W_pred = initializations.uniform((self.dim * 2, self.y_vocab)) self.b_pred = initializations.zero((self.y_vocab)) self.params = [self.W_hidden_init, self.b_hidden_init,self.W_memory_init, self.b_memory_init, self.W_lstm, self.U_lstm, self.b_lstm, self.W_pred, self.b_pred]
def __init__(self, n_words, embedding_dim, hidden_dim): self.n_words = n_words self.embedding_dim = embedding_dim self.hidden_dim = hidden_dim self.emb = initializations.uniform((n_words, embedding_dim)) self.encode_W = initializations.uniform((embedding_dim, hidden_dim*4)) # input -> hidden self.encode_U = initializations.uniform((hidden_dim, hidden_dim*4)) # last hidden -> hidden (recurrent) self.encode_b = initializations.zero((hidden_dim*4,)) self.decode_W = initializations.uniform((embedding_dim, hidden_dim*4)) # last word -> hidden self.decode_U = initializations.uniform((hidden_dim, hidden_dim*4)) # last hidden -> hidden self.decode_V = initializations.uniform((hidden_dim, hidden_dim*4)) # context -> hidden self.decode_b = initializations.zero((hidden_dim*4)) self.output_W = initializations.uniform((hidden_dim, embedding_dim)) self.output_b = initializations.zero((embedding_dim, )) self.word_W = initializations.uniform((embedding_dim, n_words)) self.word_b = initializations.zero((n_words)) self.params = [ self.emb, self.encode_W, self.encode_U, self.encode_b, self.decode_W, self.decode_U, self.decode_V, self.decode_b, self.output_W, self.output_b, self.word_W, self.word_b ]
def __init__(self, n_vocab, y_vocab, dim_word, dim): self.n_vocab = n_vocab # 12047 self.y_vocab = y_vocab # 430 self.dim_word = dim_word # 1024 self.dim = dim # 512 ### image Embedding self.W_img_emb = initializations.uniform((4096, self.dim)) self.b_img_emb = initializations.zero((self.dim)) ### Word Embedding ### self.W_emb = initializations.uniform((self.n_vocab, self.dim_word)) ### enc forward GRU ### self.W_gru = initializations.uniform((self.dim_word, self.dim * 2)) self.U_gru = initializations.uniform((self.dim, self.dim * 2)) self.b_gru = initializations.zero((self.dim * 2)) self.W_gru_cdd = initializations.uniform( (self.dim_word, self.dim)) # cdd : candidate self.U_gru_cdd = initializations.uniform((self.dim, self.dim)) self.b_gru_cdd = initializations.zero((self.dim)) ### prediction ### self.W_pred = initializations.uniform((self.dim, self.y_vocab)) self.b_pred = initializations.zero((self.y_vocab)) self.params = [ self.W_img_emb, self.b_img_emb, self.W_emb, self.W_gru, self.U_gru, self.b_gru, self.W_gru_cdd, self.U_gru_cdd, self.b_gru_cdd, self.W_pred, self.b_pred ]
def __init__(self, y_vocab, dim_word, dim, dim_ctx): self.y_vocab = y_vocab # 430 self.dim_word = dim_word # 1024 self.dim = dim # 512 self.dim_ctx = dim_ctx # 512 self.emb_dim = 512 ### image Embedding self.W_img_emb = initializations.glorot_uniform((self.dim_ctx, self.emb_dim)) self.b_img_emb = initializations.zero((self.emb_dim)) self.W_fr_emb = initializations.glorot_uniform((self.dim_word, self.emb_dim)) self.b_fr_emb = initializations.zero((self.emb_dim)) ### enc forward GRU ### self.W_gru = initializations.glorot_uniform((self.emb_dim, self.dim * 2)) self.U_gru = initializations.glorot_uniform((self.dim, self.dim * 2)) self.b_gru = initializations.zero((self.dim * 2)) self.W_gru_cdd = initializations.glorot_uniform((self.emb_dim, self.dim)) # cdd : candidate self.U_gru_cdd = initializations.glorot_uniform((self.dim, self.dim)) self.b_gru_cdd = initializations.zero((self.dim)) ### prediction ### self.W_pred = initializations.glorot_uniform((self.dim * 2, self.y_vocab)) self.b_pred = initializations.zero((self.y_vocab)) self.params = [self.W_img_emb, self.W_fr_emb, self.b_img_emb, self.b_fr_emb, self.W_gru, self.U_gru, self.b_gru, self.W_gru_cdd, self.U_gru_cdd, self.b_gru_cdd, self.W_pred, self.b_pred]
def __init__(self, n_vocab, y_vocab, dim_word, dim): self.n_vocab = n_vocab # 12047 self.y_vocab = y_vocab # 430 self.dim_word = dim_word # 1024 self.dim = dim # 512 ### image Embedding self.W_img_emb = initializations.uniform((4096, self.dim)) self.b_img_emb = initializations.zero((self.dim)) ### Word Embedding ### self.W_emb = initializations.uniform((self.n_vocab, self.dim_word)) ### enc forward GRU ### self.W_gru = initializations.uniform((self.dim_word, self.dim * 2)) self.U_gru = initializations.uniform((self.dim, self.dim * 2)) self.b_gru = initializations.zero((self.dim * 2)) self.W_gru_cdd = initializations.uniform((self.dim_word, self.dim)) # cdd : candidate self.U_gru_cdd = initializations.uniform((self.dim, self.dim)) self.b_gru_cdd = initializations.zero((self.dim)) ### prediction ### self.W_pred = initializations.uniform((self.dim, self.y_vocab)) self.b_pred = initializations.zero((self.y_vocab)) self.params = [self.W_img_emb, self.b_img_emb, self.W_emb, self.W_gru, self.U_gru, self.b_gru, self.W_gru_cdd, self.U_gru_cdd, self.b_gru_cdd, self.W_pred, self.b_pred]
def __init__(self, n_words, dim_embed, dim_hidden, dim_image, bias_init_vector=None): self.n_words = n_words self.dim_embed = dim_embed self.dim_hidden = dim_hidden self.dim_image = dim_image self.Wemb = initializations.uniform((n_words, dim_embed), scale=0.1) self.bemb = initializations.zero((dim_embed)) self.lstm_W = initializations.uniform( (1 + dim_embed + dim_hidden, dim_hidden * 4), scale=0.1) self.encode_img_W = initializations.uniform((dim_image, dim_hidden), scale=0.1) self.encode_img_b = initializations.zero((dim_hidden)) self.emb_word_W = initializations.uniform((dim_hidden, n_words), scale=0.1) if bias_init_vector is None: self.emb_word_b = initializations.uniform((n_words)) else: self.emb_word_b = theano.shared(bias_init_vector.astype( np.float32), borrow=True) self.params = [ self.Wemb, self.bemb, self.lstm_W, self.encode_img_W, self.encode_img_b, self.emb_word_W, self.emb_word_b ]
def __init__(self, n_words, dim_embed, dim_hidden, dim_image, bias_init_vector=None): self.n_words = n_words self.dim_embed = dim_embed self.dim_hidden = dim_hidden self.dim_image = dim_image self.Wemb = initializations.uniform((n_words, dim_embed), scale=0.1) self.bemb = initializations.zero((dim_embed)) self.lstm_W = initializations.uniform((1 + dim_embed + dim_hidden, dim_hidden*4), scale=0.1) self.encode_img_W = initializations.uniform((dim_image, dim_hidden), scale=0.1) self.encode_img_b = initializations.zero((dim_hidden)) self.emb_word_W = initializations.uniform((dim_hidden, n_words), scale=0.1) if bias_init_vector is None: self.emb_word_b = initializations.uniform((n_words)) else: self.emb_word_b = theano.shared(bias_init_vector.astype(np.float32), borrow=True) self.params = [ self.Wemb, self.bemb, self.lstm_W, self.encode_img_W, self.encode_img_b, self.emb_word_W, self.emb_word_b ]
def __init__(self, n_vocab, y_vocab, dim_word, dim, dim_ctx): self.n_vocab = n_vocab # 12047 self.y_vocab = y_vocab # 430 self.dim_word = dim_word # 1024 self.dim = dim # 1024 self.dim_ctx = dim_ctx # 512 ### initial context self.W_ctx_init = initializations.uniform((self.dim_ctx, self.dim)) self.b_ctx_init = initializations.zero((self.dim)) ### forward : img_dim to context self.W_ctx_att = initializations.uniform((self.dim_ctx, self.dim_ctx)) self.b_ctx_att = initializations.zero((self.dim_ctx)) ### forward : hidden_dim to context self.W_dim_att = initializations.uniform((self.dim, self.dim_ctx)) ### context energy self.U_att = initializations.uniform((self.dim_ctx, 1)) self.c_att = initializations.zero((1)) ### Word Embedding ### self.W_emb = initializations.uniform((self.n_vocab, self.dim_word)) ### enc forward GRU ### self.W_gru_ctx = initializations.uniform((self.dim_word, self.dim_ctx)) self.b_gru_ctx = initializations.zero((self.dim_ctx)) self.W_gru = initializations.uniform((self.dim_word, self.dim * 2)) self.U_gru = initializations.uniform((self.dim, self.dim * 2)) self.b_gru = initializations.zero((self.dim * 2)) self.U_gru_ctx = initializations.uniform((self.dim_ctx, self.dim * 2)) self.W_gru_cdd = initializations.uniform((self.dim_word, self.dim)) # cdd : candidate self.U_gru_cdd = initializations.uniform((self.dim, self.dim)) self.b_gru_cdd = initializations.zero((self.dim)) self.U_gru_cdd_ctx = initializations.uniform((self.dim_ctx, self.dim)) ### prediction ### self.W_pred = initializations.uniform((self.dim * 2, self.y_vocab)) self.b_pred = initializations.zero((self.y_vocab)) self.params = [self.W_ctx_init, self.b_ctx_init, self.W_ctx_att, self.b_ctx_att, self.W_dim_att, self.U_att, self.c_att, self.W_emb, self.W_gru_ctx, self.b_gru_ctx, self.W_gru, self.U_gru, self.b_gru, self.U_gru_ctx, self.W_gru_cdd, self.U_gru_cdd, self.b_gru_cdd, self.U_gru_cdd_ctx, self.W_pred, self.b_pred]
def __init__(self, n_vocab, y_vocab, dim_word, dim, dim_ctx): self.n_vocab = n_vocab # 12047 self.y_vocab = y_vocab # 430 self.dim_word = dim_word # 1024 self.dim = dim # 1024 self.dim_ctx = dim_ctx # 512 ### initial context self.W_ctx_init = initializations.uniform((self.dim_ctx, self.dim)) self.b_ctx_init = initializations.zero((self.dim)) ### forward : img_dim to context self.W_ctx_att = initializations.uniform((self.dim_ctx, self.dim_ctx)) self.b_ctx_att = initializations.zero((self.dim_ctx)) ### forward : hidden_dim to context self.W_dim_att = initializations.uniform((self.dim, self.dim_ctx)) ### context energy self.U_att = initializations.uniform((self.dim_ctx, 1)) self.c_att = initializations.zero((1)) ### Word Embedding ### self.W_emb = initializations.uniform((self.n_vocab, self.dim_word)) ### enc forward GRU ### self.W_gru_ctx = initializations.uniform((self.dim_word, self.dim_ctx)) self.b_gru_ctx = initializations.zero((self.dim_ctx)) self.W_gru = initializations.uniform((self.dim_word, self.dim * 2)) self.U_gru = initializations.uniform((self.dim, self.dim * 2)) self.b_gru = initializations.zero((self.dim * 2)) self.U_gru_ctx = initializations.uniform((self.dim_ctx, self.dim * 2)) self.W_gru_cdd = initializations.uniform( (self.dim_word, self.dim)) # cdd : candidate self.U_gru_cdd = initializations.uniform((self.dim, self.dim)) self.b_gru_cdd = initializations.zero((self.dim)) self.U_gru_cdd_ctx = initializations.uniform((self.dim_ctx, self.dim)) ### prediction ### self.W_pred = initializations.uniform((self.dim * 2, self.y_vocab)) self.b_pred = initializations.zero((self.y_vocab)) self.params = [ self.W_ctx_init, self.b_ctx_init, self.W_ctx_att, self.b_ctx_att, self.W_dim_att, self.U_att, self.c_att, self.W_emb, self.W_gru_ctx, self.b_gru_ctx, self.W_gru, self.U_gru, self.b_gru, self.U_gru_ctx, self.W_gru_cdd, self.U_gru_cdd, self.b_gru_cdd, self.U_gru_cdd_ctx, self.W_pred, self.b_pred ]
def __init__(self, n_vocab, y_vocab, dim_word, dim): self.n_vocab = n_vocab # 12047 self.y_vocab = y_vocab # 430 self.dim_word = dim_word # 1024 self.dim = dim # 1024 self.dim_ctx = 4096 # 4096 ### initial context self.W_img_init = initializations.uniform((self.dim_ctx, self.dim)) self.b_img_init = initializations.zero((self.dim)) ### Word Embedding ### self.W_emb = initializations.uniform((self.n_vocab, self.dim_word)) self.W_gru = initializations.uniform((self.dim_word, self.dim * 2)) self.U_gru = initializations.uniform((self.dim, self.dim * 2)) self.b_gru = initializations.zero((self.dim * 2)) self.W_gru_cdd = initializations.uniform((self.dim_word, self.dim)) # cdd : candidate self.U_gru_cdd = initializations.uniform((self.dim, self.dim)) self.b_gru_cdd = initializations.zero((self.dim)) ### prediction ### self.W_pred = initializations.uniform((self.dim * 2, self.y_vocab)) self.b_pred = initializations.zero((self.y_vocab)) self.params = [self.W_img_init, self.b_img_init, self.W_emb, self.W_gru, self.U_gru, self.b_gru, self.W_gru_cdd, self.U_gru_cdd, self.b_gru_cdd, self.W_pred, self.b_pred]
def __init__(self, name, latent_dim, depth, k, hidden_dim, exploration_probability, exploration_decay_rate): """ z = input (n, latent_dim) o = hidden representation (n, depth, hidden_dim) x = output (n,depth) (int) h = hidden input representation z*W o*U x*V """ self.latent_dim = latent_dim self.depth = depth self.k = k self.hidden_dim = hidden_dim # z = T.fmatrix("z") # input latent samples (n, latent_dim) self.exploration_probability = theano.shared(np.float32(exploration_probability), "{}_exploration_probability".format(name)) self.exploration_decay_rate = np.float32(exploration_decay_rate) # Hidden representation self.W_h = glorot_uniform((latent_dim, hidden_dim), "{}_W_h".format(name)) # z, (latent_dim, hidden_dim) self.U_h = glorot_uniform((hidden_dim, hidden_dim), "{}_U_h".format(name)) # h, (hidden_dim, hidden_dim) self.V_h = glorot_uniform((k + 2, hidden_dim), "{}_V_h".format(name)) # x, (x_k+2, hidden_dim) self.b_h = zero((hidden_dim,), "{}_b_h".format(name)) # (hidden_dim,) # Forget gate self.W_f = glorot_uniform((hidden_dim, hidden_dim), "{}_W_f".format(name)) # z, (latent_dim, hidden_dim) self.b_f = zero((hidden_dim,), "{}_b_f".format(name)) # (hidden_dim,) # Input gate self.W_i = glorot_uniform((hidden_dim, hidden_dim), "{}_W_i".format(name)) # z, (latent_dim, hidden_dim) self.b_i = zero((hidden_dim,), "{}_b_i".format(name)) # (hidden_dim,) # Write gate self.W_w = glorot_uniform((hidden_dim, hidden_dim), "{}_W_w".format(name)) # z, (latent_dim, hidden_dim) self.b_w = zero((hidden_dim,), "{}_b_w".format(name)) # (hidden_dim,) # Output self.W_o = glorot_uniform((hidden_dim, hidden_dim), "{}_W_i".format(name)) # z, (latent_dim, hidden_dim) self.b_o = zero((hidden_dim,), "{}_b_i".format(name)) # (hidden_dim,) # Hidden state self.W_j = glorot_uniform((hidden_dim, hidden_dim), "{}_W_j".format(name)) # z, (latent_dim, hidden_dim) self.b_j = zero((hidden_dim,), "{}_b_j".format(name)) # (hidden_dim,) # Value predictions self.W_v = glorot_uniform((hidden_dim, k + 1), "{}_W_v".format(name)) # z, (latent_dim, hidden_dim) self.b_v = zero((k + 1,), "{}_b_v".format(name)) # (hidden_dim,) self.params = [self.W_h, self.U_h, self.V_h, self.b_h, self.W_f, self.b_f, self.W_i, self.b_i, self.W_w, self.b_w, self.W_o, self.b_o, self.W_j, self.b_j, self.W_v, self.b_v]
def __init__(self, name, depth, k, hidden_dim): self.depth = depth self.k = k self.hidden_dim = hidden_dim # Hidden representation self.W_h = glorot_uniform((hidden_dim, hidden_dim), "{}_W_h".format(name)) # h, (hidden_dim, hidden_dim) self.U_h = glorot_uniform((k + 1, hidden_dim), "{}_U_h".format(name)) # x, (k+1, hidden_dim) self.b_h = zero((hidden_dim,), "{}_b_h".format(name)) # (hidden_dim,) # Forget gate self.W_f = glorot_uniform((hidden_dim, hidden_dim), "{}_W_f".format(name)) # z, (latent_dim, hidden_dim) self.b_f = zero((hidden_dim,), "{}_b_f".format(name)) # (hidden_dim,) # Input gate self.W_i = glorot_uniform((hidden_dim, hidden_dim), "{}_W_i".format(name)) # z, (latent_dim, hidden_dim) self.b_i = zero((hidden_dim,), "{}_b_i".format(name)) # (hidden_dim,) # Write gate self.W_w = glorot_uniform((hidden_dim, hidden_dim), "{}_W_w".format(name)) # z, (latent_dim, hidden_dim) self.b_w = zero((hidden_dim,), "{}_b_w".format(name)) # (hidden_dim,) # Output self.W_o = glorot_uniform((hidden_dim, hidden_dim), "{}_W_i".format(name)) # z, (latent_dim, hidden_dim) self.b_o = zero((hidden_dim,), "{}_b_i".format(name)) # (hidden_dim,) # Hidden state self.W_j = glorot_uniform((hidden_dim, hidden_dim), "{}_W_j".format(name)) # z, (latent_dim, hidden_dim) self.b_j = zero((hidden_dim,), "{}_b_j".format(name)) # (hidden_dim,) # y predictions self.W_y = glorot_uniform((hidden_dim, 1), "{}_W_y".format(name)) # z, (latent_dim, hidden_dim) self.b_y = zero((1,), "{}_b_y".format(name)) # (hidden_dim,) self.clip_params = [self.W_h, self.U_h, self.W_f, self.W_i, self.W_w, self.W_o, self.W_j, self.W_y] self.params = [self.W_h, self.U_h, self.b_h, self.W_f, self.b_f, self.W_i, self.b_i, self.W_w, self.b_w, self.W_o, self.b_o, self.W_j, self.b_j, self.W_y, self.b_y]
def build_model(dp, word_count_threshold, word_embedding_dim, image_embedding_dim, hidden_size, batch_size, num_vocab): ''' 일단 image encoder ( 4096 -> embedding dim )와 text encoder ( vocab dim -> embedding dim)을 정의하자 ''' We = initializations.uniform((4096, image_embedding_dim)) be = initializations.zero((image_embedding_dim,)) Ws = initializations.uniform((num_vocab, word_embedding_dim)) ''' text decoder (hidden dim -> vocab dim)을 정의하자 ''' Wd = initializations.uniform((hidden_size, num_vocab)) bd = initializations.zero((num_vocab,)) ''' 이미지(batch) -> image_embedding_dim ''' image = T.matrix() embedded_image = T.dot(image, We) + be embedded_image = embedded_image.dimshuffle(0,'x',1) ''' sentence ''' sentence = T.matrix(dtype='int32') mask = T.matrix() embedded_sentence = Ws[sentence] # (batch, 문장길이, embedding_dim) ''' 이미지를 sentence의 맨 앞에 붙임 ''' X = T.concatenate([embedded_image, embedded_sentence], axis=1) X = X.dimshuffle(1,0,2) X = dropout(X, 0.5) ''' LSTM weight ( i, f, c, o에 대한 weight들 ) 을 정의하자 ''' WLSTM = initializations.uniform((1+word_embedding_dim*2, 4*hidden_size)) bias = T.alloc(numpy_floatX(1.), batch_size, 1) def _step(b, x_t, h_t_1, m_, c_, weight): Hin = T.concatenate([b, x_t, h_t_1], axis=1) # 1, x[t], h[t-1]을 concat IFOG = T.dot(Hin, weight) ifo = T.nnet.sigmoid(IFOG[:, :3*hidden_size]) g = T.tanh(IFOG[:, 3*hidden_size:]) IFOGf = T.concatenate([ifo, g], axis=1) c = IFOGf[:, :hidden_size] * IFOGf[:, 3*hidden_size:] + c_ * IFOGf[:,hidden_size:2*hidden_size] c = c * m_[:,None] + c_ * (1. - m_)[:,None] Hout = IFOGf[:, 2*hidden_size:3*hidden_size] * c Hout = Hout * m_[:,None] + h_t_1*(1. - m_)[:,None] return Hout, c (Houts, cells), updates = theano.scan(fn = lambda x, m, h, c, b, weight: _step(b,x,h, m, c, weight), sequences=[X, mask.T], outputs_info= [ T.alloc(numpy_floatX(0.),batch_size, hidden_size), T.alloc(numpy_floatX(0.),batch_size, hidden_size) ], non_sequences=[bias, WLSTM]) Houts = Houts.dimshuffle(1,0,2) Y, updates = theano.scan(fn=lambda hout, wd,dd: T.dot(hout, wd) + dd, #T.nnet.softmax(T.dot(hout, wd)+dd), sequences=[Houts], non_sequences=[Wd,bd]) Y = Y[:,1:,:] n_timestep=Y.shape[1] losses,_ = theano.scan(fn=lambda y, m, sen: -T.log(1e-20 + y[T.arange(n_timestep), sen[1:]][mask != 0.0]), sequences=[Y, mask, sentence]) loss = T.sum(losses) / Y.shape[0] loss += regularization_ratio * 0.5 * T.sum(WLSTM * WLSTM) loss += regularization_ratio * 0.5 * T.sum(Wd * Wd) params = [We, be, Ws, WLSTM, Wd, bd] updates = RMSprop(cost=loss, params=params) train_function = theano.function(inputs=[image, sentence, mask], outputs=loss, updates=updates, allow_input_downcast=True) Y_function = theano.function(inputs=[image, sentence, mask], outputs=Y, updates=updates, allow_input_downcast=True) Hout_function = theano.function(inputs=[image, sentence, mask], outputs=Houts, updates=updates, allow_input_downcast=True) return train_function, params, Y_function, Hout_function
from keras.datasets import mnist from keras.utils.np_utils import to_categorical from keras.metrics import categorical_accuracy from keras.initializations import glorot_uniform, zero import numpy as np # inputs and targets are placeholders input_dim = 28 * 28 output_dim = 10 x = K.placeholder(name="x", shape=(None, input_dim)) ytrue = K.placeholder(name="y", shape=(None, output_dim)) # model parameters are variables hidden_dim = 128 W1 = glorot_uniform((input_dim, hidden_dim)) b1 = zero((hidden_dim, )) W2 = glorot_uniform((hidden_dim, output_dim)) b2 = zero((output_dim, )) params = [W1, b1, W2, b2] # two-layer model hidden = K.sigmoid(K.dot(x, W1) + b1) ypred = K.softmax(K.dot(hidden, W2) + b2) # categorical cross entropy loss loss = K.mean(K.categorical_crossentropy(ytrue, ypred), axis=None) # categorical accuracy accuracy = categorical_accuracy(ytrue, ypred) # Train function
def build(self, input_shape): self.gamma = initializations.one(input_shape[1:], name='gamma') self.beta = initializations.zero(input_shape[1:], name='beta') self.trainable_weights = [self.gamma, self.beta] super(LayerNorm, self).build(input_shape)
def __init__(self, n_words, dim_emb, dim_img): self.n_words = n_words self.dim_emb = dim_emb self.dim_img = dim_img self.emb_W = initializations.uniform((n_words, dim_emb)) self.cnn_word_W1 = initializations.uniform( (dim_emb * 3 + dim_img, 200)) self.cnn_word_b1 = initializations.zero((200)) self.cnn_word_W2 = initializations.uniform((200 * 3, 300)) self.cnn_word_b2 = initializations.zero((300)) self.cnn_word_W3 = initializations.uniform((300 * 3, 300)) self.cnn_word_b3 = initializations.zero((300)) self.cnn_phs_W1 = initializations.uniform((dim_emb * 3, 200)) self.cnn_phs_b1 = initializations.zero((200)) self.cnn_phs_W2 = initializations.uniform((200 * 3 + dim_img, 300)) self.cnn_phs_b2 = initializations.zero((300)) self.cnn_phs_W3 = initializations.uniform((300 * 3, 300)) self.cnn_phs_b3 = initializations.zero((300)) self.cnn_phl_W1 = initializations.uniform((dim_emb * 3, 200)) self.cnn_phl_b1 = initializations.zero((200)) self.cnn_phl_W2 = initializations.uniform((200 * 3, 300)) self.cnn_phl_b2 = initializations.zero((300)) self.cnn_phl_W3 = initializations.uniform((300 * 3 + dim_img, 300)) self.cnn_phl_b3 = initializations.zero((300)) self.cnn_st_W1 = initializations.uniform((dim_emb * 3, 200)) self.cnn_st_b1 = initializations.zero((200)) self.cnn_st_W2 = initializations.uniform((200 * 3, 300)) self.cnn_st_b2 = initializations.zero((300)) self.cnn_st_W3 = initializations.uniform((300 * 3, 300)) self.cnn_st_b3 = initializations.zero((300))
def __init__(self, n_words, dim_emb, dim_img): self.n_words = n_words self.dim_emb = dim_emb self.dim_img = dim_img self.emb_W = initializations.uniform((n_words, dim_emb)) self.cnn_word_W1 = initializations.uniform((dim_emb*3 + dim_img, 200)) self.cnn_word_b1 = initializations.zero((200)) self.cnn_word_W2 = initializations.uniform((200*3, 300)) self.cnn_word_b2 = initializations.zero((300)) self.cnn_word_W3 = initializations.uniform((300*3, 300)) self.cnn_word_b3 = initializations.zero((300)) self.cnn_phs_W1 = initializations.uniform((dim_emb*3, 200)) self.cnn_phs_b1 = initializations.zero((200)) self.cnn_phs_W2 = initializations.uniform((200*3 + dim_img, 300)) self.cnn_phs_b2 = initializations.zero((300)) self.cnn_phs_W3 = initializations.uniform((300*3, 300)) self.cnn_phs_b3 = initializations.zero((300)) self.cnn_phl_W1 = initializations.uniform((dim_emb*3, 200)) self.cnn_phl_b1 = initializations.zero((200)) self.cnn_phl_W2 = initializations.uniform((200*3, 300)) self.cnn_phl_b2 = initializations.zero((300)) self.cnn_phl_W3 = initializations.uniform((300*3 + dim_img, 300)) self.cnn_phl_b3 = initializations.zero((300)) self.cnn_st_W1 = initializations.uniform((dim_emb*3, 200)) self.cnn_st_b1 = initializations.zero((200)) self.cnn_st_W2 = initializations.uniform((200*3, 300)) self.cnn_st_b2 = initializations.zero((300)) self.cnn_st_W3 = initializations.uniform((300*3, 300)) self.cnn_st_b3 = initializations.zero((300))
def __init__(self, n_vocab, dim_word, dimctx, dim): self.n_vocab = n_vocab # 30000 self.dim_word = dim_word # 384 self.dimctx = dimctx # 1024 self.dim = dim # 512 ### Word Embedding ### self.W_enc_emb = initializations.uniform((self.n_vocab, self.dim_word)) self.W_dec_emb = initializations.uniform((self.n_vocab, self.dim_word)) ### enc forward GRU ### self.W_enc_f_gru = initializations.uniform((self.dim_word, self.dim * 2)) self.U_enc_f_gru = initializations.uniform((self.dim, self.dim * 2)) self.b_enc_f_gru = initializations.zero((self.dim * 2)) self.W_enc_f_gru_cdd = initializations.uniform((self.dim_word, self.dim)) # cdd : candidate self.U_enc_f_gru_cdd = initializations.uniform((self.dim, self.dim)) self.b_enc_f_gru_cdd = initializations.zero((self.dim)) ### enc backward GRU ### self.W_enc_b_gru = initializations.uniform((self.dim_word, self.dim * 2)) self.U_enc_b_gru = initializations.uniform((self.dim, self.dim * 2)) self.b_enc_b_gru = initializations.zero((self.dim * 2)) self.W_enc_b_gru_cdd = initializations.uniform((self.dim_word, self.dim)) self.U_enc_b_gru_cdd = initializations.uniform((self.dim, self.dim)) self.b_enc_b_gru_cdd = initializations.zero((self.dim)) ### context to decoder init state (s0) self.W_dec_init = initializations.uniform((self.dimctx, dim)) self.b_dec_init = initializations.zero((dim)) ### dec GRU ### self.W_dec_gru = initializations.uniform((self.dim_word, self.dim * 2)) self.U_dec_gru = initializations.uniform((self.dim, self.dim * 2)) self.b_dec_gru = initializations.zero((self.dim * 2)) self.W_dec_gru_cdd = initializations.uniform((self.dim_word, self.dim)) self.U_dec_gru_cdd = initializations.uniform((self.dim, self.dim)) self.b_dec_gru_cdd = initializations.zero((self.dim)) self.W_dec_gru_ctx = initializations.uniform((self.dimctx, self.dim * 2)) self.W_dec_gru_ctx_cdd = initializations.uniform((self.dimctx, self.dim)) ### enc-dec attention ### self.W_att_y2c = initializations.uniform((self.dim_word, self.dimctx)) self.W_att_h2c = initializations.uniform((self.dimctx, self.dimctx)) self.W_att_s2c = initializations.uniform((self.dim, self.dimctx)) self.b_att = initializations.zero((self.dimctx)) self.U_att_energy = initializations.uniform((self.dimctx, 1)) self.b_att_energy = initializations.zero((1,)) ### enc-dec prediction ### self.W_dec_pred_s2y = initializations.uniform((self.dim, self.dim_word)) self.b_dec_pred_s2y = initializations.zero((self.dim_word)) self.W_dec_pred_y2y = initializations.uniform((self.dim_word, self.dim_word)) self.b_dec_pred_y2y = initializations.zero((self.dim_word)) self.W_dec_pred_c2y = initializations.uniform((self.dim * 2, self.dim_word)) self.b_dec_pred_c2y = initializations.zero((self.dim_word)) self.W_dec_pred = initializations.uniform((self.dim_word, self.n_vocab)) self.b_dec_pred = initializations.zero((self.n_vocab)) self.params = [self.W_enc_emb, self.W_dec_emb, self.W_enc_f_gru, self.U_enc_f_gru, self.b_enc_f_gru, self.W_enc_f_gru_cdd, self.U_enc_f_gru_cdd, self.b_enc_f_gru_cdd, self.W_enc_b_gru, self.U_enc_b_gru, self.b_enc_b_gru, self.W_enc_b_gru_cdd, self.U_enc_b_gru_cdd, self.b_enc_b_gru_cdd, self.W_dec_init, self.b_dec_init, self.W_dec_gru, self.U_dec_gru, self.b_dec_gru, self.W_dec_gru_cdd, self.U_dec_gru_cdd, self.b_dec_gru_cdd, self.W_dec_gru_ctx, self.W_dec_gru_ctx_cdd, self.W_att_y2c, self.W_att_h2c, self.W_att_s2c, self.b_att, self.U_att_energy, self.b_att_energy, self.W_dec_pred_s2y, self.b_dec_pred_s2y, self.W_dec_pred_y2y, self.b_dec_pred_y2y, self.W_dec_pred_c2y, self.b_dec_pred_c2y, self.W_dec_pred, self.b_dec_pred]