Esempio n. 1
0
    def __init__(self, y_vocab, dim_word, dim, dim_ctx):

        self.y_vocab = y_vocab  # 430
        self.dim_word = dim_word # 1024
        self.dim = dim  # 512
        self.dim_ctx = dim_ctx  # 512
        
        ### 
        ### initial context - image Embedding
        self.W_hidden_init = initializations.uniform((self.dim_ctx, self.dim))     
        self.b_hidden_init = initializations.zero((self.dim))
        self.W_memory_init = initializations.uniform((self.dim_ctx, self.dim))     
        self.b_memory_init = initializations.zero((self.dim))


        
        ### enc forward GRU ###

        self.W_lstm = initializations.uniform((self.dim_word, self.dim * 4))
        self.U_lstm = initializations.uniform((self.dim, self.dim * 4))
        self.b_lstm = initializations.zero((self.dim * 4))
        
        
        ### prediction ###
        self.W_pred = initializations.uniform((self.dim * 2, self.y_vocab))
        self.b_pred = initializations.zero((self.y_vocab))


        self.params = [self.W_hidden_init, self.b_hidden_init,self.W_memory_init, self.b_memory_init,
                       self.W_lstm, self.U_lstm, self.b_lstm,
                       self.W_pred, self.b_pred]
    def __init__(self, n_words, embedding_dim,  hidden_dim):
        self.n_words = n_words
        self.embedding_dim = embedding_dim
        self.hidden_dim = hidden_dim

        self.emb = initializations.uniform((n_words, embedding_dim))

        self.encode_W = initializations.uniform((embedding_dim, hidden_dim*4)) # input -> hidden
        self.encode_U = initializations.uniform((hidden_dim, hidden_dim*4)) # last hidden -> hidden (recurrent)
        self.encode_b = initializations.zero((hidden_dim*4,))

        self.decode_W = initializations.uniform((embedding_dim, hidden_dim*4)) # last word -> hidden
        self.decode_U = initializations.uniform((hidden_dim, hidden_dim*4)) # last hidden -> hidden
        self.decode_V = initializations.uniform((hidden_dim, hidden_dim*4)) # context -> hidden
        self.decode_b = initializations.zero((hidden_dim*4))

        self.output_W = initializations.uniform((hidden_dim, embedding_dim))
        self.output_b = initializations.zero((embedding_dim, ))

        self.word_W = initializations.uniform((embedding_dim, n_words))
        self.word_b = initializations.zero((n_words))

        self.params = [
            self.emb,
            self.encode_W, self.encode_U, self.encode_b,
            self.decode_W, self.decode_U, self.decode_V, self.decode_b,
            self.output_W, self.output_b,
            self.word_W, self.word_b
        ]
Esempio n. 3
0
    def __init__(self, n_vocab, y_vocab, dim_word, dim):
        self.n_vocab = n_vocab  # 12047
        self.y_vocab = y_vocab  # 430
        self.dim_word = dim_word  # 1024
        self.dim = dim  # 512

        ### image Embedding
        self.W_img_emb = initializations.uniform((4096, self.dim))
        self.b_img_emb = initializations.zero((self.dim))

        ### Word Embedding ###
        self.W_emb = initializations.uniform((self.n_vocab, self.dim_word))

        ### enc forward GRU ###
        self.W_gru = initializations.uniform((self.dim_word, self.dim * 2))
        self.U_gru = initializations.uniform((self.dim, self.dim * 2))
        self.b_gru = initializations.zero((self.dim * 2))
        self.W_gru_cdd = initializations.uniform(
            (self.dim_word, self.dim))  # cdd : candidate
        self.U_gru_cdd = initializations.uniform((self.dim, self.dim))
        self.b_gru_cdd = initializations.zero((self.dim))
        ### prediction ###
        self.W_pred = initializations.uniform((self.dim, self.y_vocab))
        self.b_pred = initializations.zero((self.y_vocab))

        self.params = [
            self.W_img_emb, self.b_img_emb, self.W_emb, self.W_gru, self.U_gru,
            self.b_gru, self.W_gru_cdd, self.U_gru_cdd, self.b_gru_cdd,
            self.W_pred, self.b_pred
        ]
Esempio n. 4
0
    def __init__(self, y_vocab, dim_word, dim, dim_ctx):
    
        self.y_vocab = y_vocab  # 430
        self.dim_word = dim_word # 1024
        self.dim = dim  # 512
        self.dim_ctx = dim_ctx  # 512
        self.emb_dim = 512
        
        
        ### image Embedding
        self.W_img_emb = initializations.glorot_uniform((self.dim_ctx, self.emb_dim))     
        self.b_img_emb = initializations.zero((self.emb_dim))   

        self.W_fr_emb = initializations.glorot_uniform((self.dim_word, self.emb_dim))     
        self.b_fr_emb = initializations.zero((self.emb_dim))  
        
        ### enc forward GRU ###
        self.W_gru = initializations.glorot_uniform((self.emb_dim, self.dim * 2))
        self.U_gru = initializations.glorot_uniform((self.dim, self.dim * 2))
        self.b_gru = initializations.zero((self.dim * 2))
        self.W_gru_cdd = initializations.glorot_uniform((self.emb_dim, self.dim)) # cdd : candidate
        self.U_gru_cdd = initializations.glorot_uniform((self.dim, self.dim))
        self.b_gru_cdd = initializations.zero((self.dim))       
        ### prediction ###
        self.W_pred = initializations.glorot_uniform((self.dim * 2, self.y_vocab))
        self.b_pred = initializations.zero((self.y_vocab))


        self.params = [self.W_img_emb, self.W_fr_emb, self.b_img_emb, self.b_fr_emb,
                       self.W_gru, self.U_gru, self.b_gru,
                       self.W_gru_cdd, self.U_gru_cdd, self.b_gru_cdd,
                       self.W_pred, self.b_pred]
Esempio n. 5
0
    def __init__(self, n_vocab, y_vocab, dim_word, dim):
        self.n_vocab = n_vocab  # 12047
        self.y_vocab = y_vocab  # 430
        self.dim_word = dim_word # 1024
        self.dim = dim  # 512
        
        
        ### image Embedding
        self.W_img_emb = initializations.uniform((4096, self.dim))     
        self.b_img_emb = initializations.zero((self.dim))

   
        ### Word Embedding ###        
        self.W_emb = initializations.uniform((self.n_vocab, self.dim_word))
        
        ### enc forward GRU ###
        self.W_gru = initializations.uniform((self.dim_word, self.dim * 2))
        self.U_gru = initializations.uniform((self.dim, self.dim * 2))
        self.b_gru = initializations.zero((self.dim * 2))
        self.W_gru_cdd = initializations.uniform((self.dim_word, self.dim)) # cdd : candidate
        self.U_gru_cdd = initializations.uniform((self.dim, self.dim))
        self.b_gru_cdd = initializations.zero((self.dim))       
        ### prediction ###
        self.W_pred = initializations.uniform((self.dim, self.y_vocab))
        self.b_pred = initializations.zero((self.y_vocab))


        self.params = [self.W_img_emb, self.b_img_emb,
                       self.W_emb,
                       self.W_gru, self.U_gru, self.b_gru,
                       self.W_gru_cdd, self.U_gru_cdd, self.b_gru_cdd,
                       self.W_pred, self.b_pred]
    def __init__(self,
                 n_words,
                 dim_embed,
                 dim_hidden,
                 dim_image,
                 bias_init_vector=None):
        self.n_words = n_words
        self.dim_embed = dim_embed
        self.dim_hidden = dim_hidden
        self.dim_image = dim_image

        self.Wemb = initializations.uniform((n_words, dim_embed), scale=0.1)
        self.bemb = initializations.zero((dim_embed))

        self.lstm_W = initializations.uniform(
            (1 + dim_embed + dim_hidden, dim_hidden * 4), scale=0.1)

        self.encode_img_W = initializations.uniform((dim_image, dim_hidden),
                                                    scale=0.1)
        self.encode_img_b = initializations.zero((dim_hidden))

        self.emb_word_W = initializations.uniform((dim_hidden, n_words),
                                                  scale=0.1)
        if bias_init_vector is None:
            self.emb_word_b = initializations.uniform((n_words))
        else:
            self.emb_word_b = theano.shared(bias_init_vector.astype(
                np.float32),
                                            borrow=True)

        self.params = [
            self.Wemb, self.bemb, self.lstm_W, self.encode_img_W,
            self.encode_img_b, self.emb_word_W, self.emb_word_b
        ]
    def __init__(self, n_words, dim_embed, dim_hidden, dim_image, bias_init_vector=None):
        self.n_words = n_words
        self.dim_embed = dim_embed
        self.dim_hidden = dim_hidden
        self.dim_image = dim_image

        self.Wemb = initializations.uniform((n_words, dim_embed), scale=0.1)
        self.bemb = initializations.zero((dim_embed))

        self.lstm_W = initializations.uniform((1 + dim_embed + dim_hidden, dim_hidden*4), scale=0.1)

        self.encode_img_W = initializations.uniform((dim_image, dim_hidden), scale=0.1)
        self.encode_img_b = initializations.zero((dim_hidden))

        self.emb_word_W = initializations.uniform((dim_hidden, n_words), scale=0.1)
        if bias_init_vector is None:
            self.emb_word_b = initializations.uniform((n_words))
        else:
            self.emb_word_b = theano.shared(bias_init_vector.astype(np.float32), borrow=True)

        self.params = [
                self.Wemb, self.bemb,
                self.lstm_W,
                self.encode_img_W, self.encode_img_b,
                self.emb_word_W, self.emb_word_b
            ]
Esempio n. 8
0
    def __init__(self, n_vocab, y_vocab, dim_word, dim, dim_ctx):

        self.n_vocab = n_vocab  # 12047
        self.y_vocab = y_vocab  # 430
        self.dim_word = dim_word # 1024
        self.dim = dim  # 1024
        self.dim_ctx = dim_ctx  # 512
        
        ### initial context
        self.W_ctx_init = initializations.uniform((self.dim_ctx, self.dim))     
        self.b_ctx_init = initializations.zero((self.dim))

        
        ### forward : img_dim to context
        self.W_ctx_att = initializations.uniform((self.dim_ctx, self.dim_ctx)) 
        self.b_ctx_att = initializations.zero((self.dim_ctx)) 
   
        ### forward : hidden_dim to context
        self.W_dim_att = initializations.uniform((self.dim, self.dim_ctx)) 
    
        ### context energy
        self.U_att = initializations.uniform((self.dim_ctx, 1)) 
        self.c_att = initializations.zero((1)) 
   
   
        
        ### Word Embedding ###        
        self.W_emb = initializations.uniform((self.n_vocab, self.dim_word))
        
        ### enc forward GRU ###
        self.W_gru_ctx = initializations.uniform((self.dim_word, self.dim_ctx))
        self.b_gru_ctx = initializations.zero((self.dim_ctx))

        
        self.W_gru = initializations.uniform((self.dim_word, self.dim * 2))
        self.U_gru = initializations.uniform((self.dim, self.dim * 2))
        self.b_gru = initializations.zero((self.dim * 2))
        self.U_gru_ctx = initializations.uniform((self.dim_ctx, self.dim * 2))
        
        self.W_gru_cdd = initializations.uniform((self.dim_word, self.dim)) # cdd : candidate
        self.U_gru_cdd = initializations.uniform((self.dim, self.dim))
        self.b_gru_cdd = initializations.zero((self.dim)) 
        self.U_gru_cdd_ctx = initializations.uniform((self.dim_ctx, self.dim))

        ### prediction ###
        self.W_pred = initializations.uniform((self.dim * 2, self.y_vocab))
        self.b_pred = initializations.zero((self.y_vocab))


        self.params = [self.W_ctx_init, self.b_ctx_init,
                       self.W_ctx_att, self.b_ctx_att,
                       self.W_dim_att,
                       self.U_att, self.c_att,
                       self.W_emb,
                       self.W_gru_ctx, self.b_gru_ctx,
                       self.W_gru, self.U_gru, self.b_gru, self.U_gru_ctx,
                       self.W_gru_cdd, self.U_gru_cdd, self.b_gru_cdd, self.U_gru_cdd_ctx,
                       self.W_pred, self.b_pred]
Esempio n. 9
0
    def __init__(self, n_vocab, y_vocab, dim_word, dim, dim_ctx):

        self.n_vocab = n_vocab  # 12047
        self.y_vocab = y_vocab  # 430
        self.dim_word = dim_word  # 1024
        self.dim = dim  # 1024
        self.dim_ctx = dim_ctx  # 512

        ### initial context
        self.W_ctx_init = initializations.uniform((self.dim_ctx, self.dim))
        self.b_ctx_init = initializations.zero((self.dim))

        ### forward : img_dim to context
        self.W_ctx_att = initializations.uniform((self.dim_ctx, self.dim_ctx))
        self.b_ctx_att = initializations.zero((self.dim_ctx))

        ### forward : hidden_dim to context
        self.W_dim_att = initializations.uniform((self.dim, self.dim_ctx))

        ### context energy
        self.U_att = initializations.uniform((self.dim_ctx, 1))
        self.c_att = initializations.zero((1))

        ### Word Embedding ###
        self.W_emb = initializations.uniform((self.n_vocab, self.dim_word))

        ### enc forward GRU ###
        self.W_gru_ctx = initializations.uniform((self.dim_word, self.dim_ctx))
        self.b_gru_ctx = initializations.zero((self.dim_ctx))

        self.W_gru = initializations.uniform((self.dim_word, self.dim * 2))
        self.U_gru = initializations.uniform((self.dim, self.dim * 2))
        self.b_gru = initializations.zero((self.dim * 2))
        self.U_gru_ctx = initializations.uniform((self.dim_ctx, self.dim * 2))

        self.W_gru_cdd = initializations.uniform(
            (self.dim_word, self.dim))  # cdd : candidate
        self.U_gru_cdd = initializations.uniform((self.dim, self.dim))
        self.b_gru_cdd = initializations.zero((self.dim))
        self.U_gru_cdd_ctx = initializations.uniform((self.dim_ctx, self.dim))

        ### prediction ###
        self.W_pred = initializations.uniform((self.dim * 2, self.y_vocab))
        self.b_pred = initializations.zero((self.y_vocab))

        self.params = [
            self.W_ctx_init, self.b_ctx_init, self.W_ctx_att, self.b_ctx_att,
            self.W_dim_att, self.U_att, self.c_att, self.W_emb, self.W_gru_ctx,
            self.b_gru_ctx, self.W_gru, self.U_gru, self.b_gru, self.U_gru_ctx,
            self.W_gru_cdd, self.U_gru_cdd, self.b_gru_cdd, self.U_gru_cdd_ctx,
            self.W_pred, self.b_pred
        ]
Esempio n. 10
0
    def __init__(self, n_vocab, y_vocab, dim_word, dim):

        self.n_vocab = n_vocab  # 12047
        self.y_vocab = y_vocab  # 430
        self.dim_word = dim_word # 1024
        self.dim = dim  # 1024
        self.dim_ctx = 4096  # 4096
        
        ### initial context
        self.W_img_init = initializations.uniform((self.dim_ctx, self.dim))     
        self.b_img_init = initializations.zero((self.dim))


   
        
        ### Word Embedding ###        
        self.W_emb = initializations.uniform((self.n_vocab, self.dim_word))
        
        self.W_gru = initializations.uniform((self.dim_word, self.dim * 2))
        self.U_gru = initializations.uniform((self.dim, self.dim * 2))
        self.b_gru = initializations.zero((self.dim * 2))
        
        self.W_gru_cdd = initializations.uniform((self.dim_word, self.dim)) # cdd : candidate
        self.U_gru_cdd = initializations.uniform((self.dim, self.dim))
        self.b_gru_cdd = initializations.zero((self.dim)) 

        ### prediction ###
        self.W_pred = initializations.uniform((self.dim * 2, self.y_vocab))
        self.b_pred = initializations.zero((self.y_vocab))


        self.params = [self.W_img_init, self.b_img_init,
                       self.W_emb,
                       self.W_gru, self.U_gru, self.b_gru, 
                       self.W_gru_cdd, self.U_gru_cdd, self.b_gru_cdd, 
                       self.W_pred, self.b_pred]
    def __init__(self, name, latent_dim, depth, k, hidden_dim, exploration_probability, exploration_decay_rate):
        """
        z = input (n, latent_dim)
        o = hidden representation (n, depth, hidden_dim)
        x = output (n,depth) (int)
        h = hidden input representation
        z*W
        o*U
        x*V
        """
        self.latent_dim = latent_dim
        self.depth = depth
        self.k = k
        self.hidden_dim = hidden_dim
        # z = T.fmatrix("z")  # input latent samples (n, latent_dim)
        self.exploration_probability = theano.shared(np.float32(exploration_probability),
                                                     "{}_exploration_probability".format(name))
        self.exploration_decay_rate = np.float32(exploration_decay_rate)

        # Hidden representation
        self.W_h = glorot_uniform((latent_dim, hidden_dim), "{}_W_h".format(name))  # z, (latent_dim, hidden_dim)
        self.U_h = glorot_uniform((hidden_dim, hidden_dim), "{}_U_h".format(name))  # h, (hidden_dim, hidden_dim)
        self.V_h = glorot_uniform((k + 2, hidden_dim), "{}_V_h".format(name))  # x, (x_k+2, hidden_dim)
        self.b_h = zero((hidden_dim,), "{}_b_h".format(name))  # (hidden_dim,)

        # Forget gate
        self.W_f = glorot_uniform((hidden_dim, hidden_dim), "{}_W_f".format(name))  # z, (latent_dim, hidden_dim)
        self.b_f = zero((hidden_dim,), "{}_b_f".format(name))  # (hidden_dim,)
        # Input gate
        self.W_i = glorot_uniform((hidden_dim, hidden_dim), "{}_W_i".format(name))  # z, (latent_dim, hidden_dim)
        self.b_i = zero((hidden_dim,), "{}_b_i".format(name))  # (hidden_dim,)
        # Write gate
        self.W_w = glorot_uniform((hidden_dim, hidden_dim), "{}_W_w".format(name))  # z, (latent_dim, hidden_dim)
        self.b_w = zero((hidden_dim,), "{}_b_w".format(name))  # (hidden_dim,)
        # Output
        self.W_o = glorot_uniform((hidden_dim, hidden_dim), "{}_W_i".format(name))  # z, (latent_dim, hidden_dim)
        self.b_o = zero((hidden_dim,), "{}_b_i".format(name))  # (hidden_dim,)
        # Hidden state
        self.W_j = glorot_uniform((hidden_dim, hidden_dim), "{}_W_j".format(name))  # z, (latent_dim, hidden_dim)
        self.b_j = zero((hidden_dim,), "{}_b_j".format(name))  # (hidden_dim,)
        # Value predictions
        self.W_v = glorot_uniform((hidden_dim, k + 1), "{}_W_v".format(name))  # z, (latent_dim, hidden_dim)
        self.b_v = zero((k + 1,), "{}_b_v".format(name))  # (hidden_dim,)
        self.params = [self.W_h, self.U_h, self.V_h, self.b_h,
                       self.W_f, self.b_f,
                       self.W_i, self.b_i,
                       self.W_w, self.b_w,
                       self.W_o, self.b_o,
                       self.W_j, self.b_j,
                       self.W_v, self.b_v]
    def __init__(self, name, depth, k, hidden_dim):
        self.depth = depth
        self.k = k
        self.hidden_dim = hidden_dim

        # Hidden representation
        self.W_h = glorot_uniform((hidden_dim, hidden_dim), "{}_W_h".format(name))  # h, (hidden_dim, hidden_dim)
        self.U_h = glorot_uniform((k + 1, hidden_dim), "{}_U_h".format(name))  # x, (k+1, hidden_dim)
        self.b_h = zero((hidden_dim,), "{}_b_h".format(name))  # (hidden_dim,)

        # Forget gate
        self.W_f = glorot_uniform((hidden_dim, hidden_dim), "{}_W_f".format(name))  # z, (latent_dim, hidden_dim)
        self.b_f = zero((hidden_dim,), "{}_b_f".format(name))  # (hidden_dim,)
        # Input gate
        self.W_i = glorot_uniform((hidden_dim, hidden_dim), "{}_W_i".format(name))  # z, (latent_dim, hidden_dim)
        self.b_i = zero((hidden_dim,), "{}_b_i".format(name))  # (hidden_dim,)
        # Write gate
        self.W_w = glorot_uniform((hidden_dim, hidden_dim), "{}_W_w".format(name))  # z, (latent_dim, hidden_dim)
        self.b_w = zero((hidden_dim,), "{}_b_w".format(name))  # (hidden_dim,)
        # Output
        self.W_o = glorot_uniform((hidden_dim, hidden_dim), "{}_W_i".format(name))  # z, (latent_dim, hidden_dim)
        self.b_o = zero((hidden_dim,), "{}_b_i".format(name))  # (hidden_dim,)
        # Hidden state
        self.W_j = glorot_uniform((hidden_dim, hidden_dim), "{}_W_j".format(name))  # z, (latent_dim, hidden_dim)
        self.b_j = zero((hidden_dim,), "{}_b_j".format(name))  # (hidden_dim,)
        # y predictions
        self.W_y = glorot_uniform((hidden_dim, 1), "{}_W_y".format(name))  # z, (latent_dim, hidden_dim)
        self.b_y = zero((1,), "{}_b_y".format(name))  # (hidden_dim,)
        self.clip_params = [self.W_h, self.U_h, self.W_f, self.W_i, self.W_w, self.W_o, self.W_j, self.W_y]
        self.params = [self.W_h, self.U_h, self.b_h,
                       self.W_f, self.b_f,
                       self.W_i, self.b_i,
                       self.W_w, self.b_w,
                       self.W_o, self.b_o,
                       self.W_j, self.b_j,
                       self.W_y, self.b_y]
def build_model(dp, word_count_threshold, word_embedding_dim, image_embedding_dim, hidden_size, batch_size, num_vocab):

    '''
    일단
    image encoder ( 4096 -> embedding dim )와
    text encoder ( vocab dim -> embedding dim)을 정의하자
    '''
    We = initializations.uniform((4096, image_embedding_dim))
    be = initializations.zero((image_embedding_dim,))
    Ws = initializations.uniform((num_vocab, word_embedding_dim))

    '''
    text decoder (hidden dim -> vocab dim)을 정의하자

    '''
    Wd = initializations.uniform((hidden_size, num_vocab))
    bd = initializations.zero((num_vocab,))

    '''
    이미지(batch) -> image_embedding_dim
    '''
    image = T.matrix()
    embedded_image = T.dot(image, We) + be
    embedded_image = embedded_image.dimshuffle(0,'x',1)

    '''
    sentence
    '''
    sentence = T.matrix(dtype='int32')
    mask = T.matrix()
    embedded_sentence = Ws[sentence] # (batch, 문장길이, embedding_dim)

    '''
    이미지를 sentence의 맨 앞에 붙임
    '''
    X = T.concatenate([embedded_image, embedded_sentence], axis=1)
    X = X.dimshuffle(1,0,2)
    X = dropout(X, 0.5)

    '''
    LSTM weight ( i, f, c, o에 대한 weight들 )
    을 정의하자
    '''

    WLSTM = initializations.uniform((1+word_embedding_dim*2, 4*hidden_size))
    bias = T.alloc(numpy_floatX(1.), batch_size, 1)

    def _step(b, x_t, h_t_1, m_, c_, weight):

        Hin = T.concatenate([b, x_t, h_t_1], axis=1) # 1, x[t], h[t-1]을 concat

        IFOG = T.dot(Hin, weight)

        ifo = T.nnet.sigmoid(IFOG[:, :3*hidden_size])
        g = T.tanh(IFOG[:, 3*hidden_size:])

        IFOGf = T.concatenate([ifo, g], axis=1)

        c = IFOGf[:, :hidden_size] * IFOGf[:, 3*hidden_size:] + c_ * IFOGf[:,hidden_size:2*hidden_size]
        c = c * m_[:,None] + c_ * (1. - m_)[:,None]

        Hout = IFOGf[:, 2*hidden_size:3*hidden_size] * c
        Hout = Hout * m_[:,None] + h_t_1*(1. - m_)[:,None]
        return Hout, c



    (Houts, cells), updates = theano.scan(fn = lambda x, m, h, c, b, weight: _step(b,x,h, m, c, weight),
                       sequences=[X, mask.T],
                       outputs_info=
                        [
                            T.alloc(numpy_floatX(0.),batch_size, hidden_size),
                            T.alloc(numpy_floatX(0.),batch_size, hidden_size)
                        ],
                       non_sequences=[bias, WLSTM])

    Houts = Houts.dimshuffle(1,0,2)
    Y, updates = theano.scan(fn=lambda hout, wd,dd: T.dot(hout, wd) + dd, #T.nnet.softmax(T.dot(hout, wd)+dd),
                             sequences=[Houts],
                             non_sequences=[Wd,bd])


    Y = Y[:,1:,:]
    n_timestep=Y.shape[1]

    losses,_ = theano.scan(fn=lambda y, m, sen: -T.log(1e-20 + y[T.arange(n_timestep), sen[1:]][mask != 0.0]),
                           sequences=[Y, mask, sentence])

    loss = T.sum(losses) / Y.shape[0]
    loss += regularization_ratio * 0.5 * T.sum(WLSTM * WLSTM)
    loss += regularization_ratio * 0.5 * T.sum(Wd * Wd)

    params = [We, be, Ws, WLSTM, Wd, bd]
    updates = RMSprop(cost=loss, params=params)
    train_function = theano.function(inputs=[image, sentence, mask], outputs=loss, updates=updates, allow_input_downcast=True)
    Y_function = theano.function(inputs=[image, sentence, mask], outputs=Y, updates=updates, allow_input_downcast=True)
    Hout_function = theano.function(inputs=[image, sentence, mask], outputs=Houts, updates=updates, allow_input_downcast=True)


    return train_function, params, Y_function, Hout_function
Esempio n. 14
0
from keras.datasets import mnist
from keras.utils.np_utils import to_categorical
from keras.metrics import categorical_accuracy
from keras.initializations import glorot_uniform, zero
import numpy as np

# inputs and targets are placeholders
input_dim = 28 * 28
output_dim = 10
x = K.placeholder(name="x", shape=(None, input_dim))
ytrue = K.placeholder(name="y", shape=(None, output_dim))

# model parameters are variables
hidden_dim = 128
W1 = glorot_uniform((input_dim, hidden_dim))
b1 = zero((hidden_dim, ))
W2 = glorot_uniform((hidden_dim, output_dim))
b2 = zero((output_dim, ))
params = [W1, b1, W2, b2]

# two-layer model
hidden = K.sigmoid(K.dot(x, W1) + b1)
ypred = K.softmax(K.dot(hidden, W2) + b2)

# categorical cross entropy loss
loss = K.mean(K.categorical_crossentropy(ytrue, ypred), axis=None)

# categorical accuracy
accuracy = categorical_accuracy(ytrue, ypred)

# Train function
Esempio n. 15
0
    def build(self, input_shape):
        self.gamma = initializations.one(input_shape[1:], name='gamma')
        self.beta = initializations.zero(input_shape[1:], name='beta')
        self.trainable_weights = [self.gamma, self.beta]

        super(LayerNorm, self).build(input_shape)
Esempio n. 16
0
    def __init__(self, n_words, dim_emb, dim_img):
        self.n_words = n_words
        self.dim_emb = dim_emb
        self.dim_img = dim_img

        self.emb_W = initializations.uniform((n_words, dim_emb))

        self.cnn_word_W1 = initializations.uniform(
            (dim_emb * 3 + dim_img, 200))
        self.cnn_word_b1 = initializations.zero((200))
        self.cnn_word_W2 = initializations.uniform((200 * 3, 300))
        self.cnn_word_b2 = initializations.zero((300))
        self.cnn_word_W3 = initializations.uniform((300 * 3, 300))
        self.cnn_word_b3 = initializations.zero((300))

        self.cnn_phs_W1 = initializations.uniform((dim_emb * 3, 200))
        self.cnn_phs_b1 = initializations.zero((200))
        self.cnn_phs_W2 = initializations.uniform((200 * 3 + dim_img, 300))
        self.cnn_phs_b2 = initializations.zero((300))
        self.cnn_phs_W3 = initializations.uniform((300 * 3, 300))
        self.cnn_phs_b3 = initializations.zero((300))

        self.cnn_phl_W1 = initializations.uniform((dim_emb * 3, 200))
        self.cnn_phl_b1 = initializations.zero((200))
        self.cnn_phl_W2 = initializations.uniform((200 * 3, 300))
        self.cnn_phl_b2 = initializations.zero((300))
        self.cnn_phl_W3 = initializations.uniform((300 * 3 + dim_img, 300))
        self.cnn_phl_b3 = initializations.zero((300))

        self.cnn_st_W1 = initializations.uniform((dim_emb * 3, 200))
        self.cnn_st_b1 = initializations.zero((200))
        self.cnn_st_W2 = initializations.uniform((200 * 3, 300))
        self.cnn_st_b2 = initializations.zero((300))
        self.cnn_st_W3 = initializations.uniform((300 * 3, 300))
        self.cnn_st_b3 = initializations.zero((300))
Esempio n. 17
0
    def __init__(self, n_words, dim_emb, dim_img):
        self.n_words = n_words
        self.dim_emb = dim_emb
        self.dim_img = dim_img

        self.emb_W = initializations.uniform((n_words, dim_emb))

        self.cnn_word_W1 = initializations.uniform((dim_emb*3 + dim_img, 200))
        self.cnn_word_b1 = initializations.zero((200))
        self.cnn_word_W2 = initializations.uniform((200*3, 300))
        self.cnn_word_b2 = initializations.zero((300))
        self.cnn_word_W3 = initializations.uniform((300*3, 300))
        self.cnn_word_b3 = initializations.zero((300))

        self.cnn_phs_W1 = initializations.uniform((dim_emb*3, 200))
        self.cnn_phs_b1 = initializations.zero((200))
        self.cnn_phs_W2 = initializations.uniform((200*3 + dim_img, 300))
        self.cnn_phs_b2 = initializations.zero((300))
        self.cnn_phs_W3 = initializations.uniform((300*3, 300))
        self.cnn_phs_b3 = initializations.zero((300))

        self.cnn_phl_W1 = initializations.uniform((dim_emb*3, 200))
        self.cnn_phl_b1 = initializations.zero((200))
        self.cnn_phl_W2 = initializations.uniform((200*3, 300))
        self.cnn_phl_b2 = initializations.zero((300))
        self.cnn_phl_W3 = initializations.uniform((300*3 + dim_img, 300))
        self.cnn_phl_b3 = initializations.zero((300))

        self.cnn_st_W1 = initializations.uniform((dim_emb*3, 200))
        self.cnn_st_b1 = initializations.zero((200))
        self.cnn_st_W2 = initializations.uniform((200*3, 300))
        self.cnn_st_b2 = initializations.zero((300))
        self.cnn_st_W3 = initializations.uniform((300*3, 300))
        self.cnn_st_b3 = initializations.zero((300))
Esempio n. 18
0
    def __init__(self, n_vocab, dim_word, dimctx, dim):
        self.n_vocab = n_vocab  # 30000
        self.dim_word = dim_word # 384
        self.dimctx = dimctx  # 1024
        self.dim = dim  # 512
        
        ### Word Embedding ###        
        self.W_enc_emb = initializations.uniform((self.n_vocab, self.dim_word))
        self.W_dec_emb = initializations.uniform((self.n_vocab, self.dim_word))

        ### enc forward GRU ###
        self.W_enc_f_gru = initializations.uniform((self.dim_word, self.dim * 2))
        self.U_enc_f_gru = initializations.uniform((self.dim, self.dim * 2))
        self.b_enc_f_gru = initializations.zero((self.dim * 2))
        self.W_enc_f_gru_cdd = initializations.uniform((self.dim_word, self.dim)) # cdd : candidate
        self.U_enc_f_gru_cdd = initializations.uniform((self.dim, self.dim))
        self.b_enc_f_gru_cdd = initializations.zero((self.dim))
        
        ### enc backward GRU ###
        self.W_enc_b_gru = initializations.uniform((self.dim_word, self.dim * 2))
        self.U_enc_b_gru = initializations.uniform((self.dim, self.dim * 2))
        self.b_enc_b_gru = initializations.zero((self.dim * 2))
        self.W_enc_b_gru_cdd = initializations.uniform((self.dim_word, self.dim))
        self.U_enc_b_gru_cdd = initializations.uniform((self.dim, self.dim))
        self.b_enc_b_gru_cdd = initializations.zero((self.dim))
        
        ### context to decoder init state (s0)
        self.W_dec_init = initializations.uniform((self.dimctx, dim))
        self.b_dec_init = initializations.zero((dim))
        
        ### dec GRU ###
        self.W_dec_gru = initializations.uniform((self.dim_word, self.dim * 2))
        self.U_dec_gru = initializations.uniform((self.dim, self.dim * 2))
        self.b_dec_gru = initializations.zero((self.dim * 2))
        self.W_dec_gru_cdd = initializations.uniform((self.dim_word, self.dim))
        self.U_dec_gru_cdd = initializations.uniform((self.dim, self.dim))
        self.b_dec_gru_cdd = initializations.zero((self.dim))      
        self.W_dec_gru_ctx = initializations.uniform((self.dimctx, self.dim * 2))
        self.W_dec_gru_ctx_cdd = initializations.uniform((self.dimctx, self.dim))

        ### enc-dec attention ###
        self.W_att_y2c = initializations.uniform((self.dim_word, self.dimctx))
        self.W_att_h2c = initializations.uniform((self.dimctx, self.dimctx))
        self.W_att_s2c = initializations.uniform((self.dim, self.dimctx))
        self.b_att = initializations.zero((self.dimctx))

        self.U_att_energy = initializations.uniform((self.dimctx, 1))
        self.b_att_energy = initializations.zero((1,))

        ### enc-dec prediction ###
        self.W_dec_pred_s2y = initializations.uniform((self.dim, self.dim_word))
        self.b_dec_pred_s2y = initializations.zero((self.dim_word))
        self.W_dec_pred_y2y = initializations.uniform((self.dim_word, self.dim_word))
        self.b_dec_pred_y2y = initializations.zero((self.dim_word))
        self.W_dec_pred_c2y = initializations.uniform((self.dim * 2, self.dim_word))
        self.b_dec_pred_c2y = initializations.zero((self.dim_word))
        self.W_dec_pred = initializations.uniform((self.dim_word, self.n_vocab))
        self.b_dec_pred = initializations.zero((self.n_vocab))


        self.params = [self.W_enc_emb, self.W_dec_emb,
                       self.W_enc_f_gru, self.U_enc_f_gru, self.b_enc_f_gru, self.W_enc_f_gru_cdd, self.U_enc_f_gru_cdd, self.b_enc_f_gru_cdd,
                       self.W_enc_b_gru, self.U_enc_b_gru, self.b_enc_b_gru, self.W_enc_b_gru_cdd, self.U_enc_b_gru_cdd, self.b_enc_b_gru_cdd,
                       self.W_dec_init, self.b_dec_init,
                       self.W_dec_gru, self.U_dec_gru, self.b_dec_gru, self.W_dec_gru_cdd, self.U_dec_gru_cdd, self.b_dec_gru_cdd,
                       self.W_dec_gru_ctx, self.W_dec_gru_ctx_cdd,
                       self.W_att_y2c, self.W_att_h2c, self.W_att_s2c, self.b_att,
                       self.U_att_energy, self.b_att_energy,
                       self.W_dec_pred_s2y, self.b_dec_pred_s2y,
                       self.W_dec_pred_y2y, self.b_dec_pred_y2y,
                       self.W_dec_pred_c2y, self.b_dec_pred_c2y,
                       self.W_dec_pred, self.b_dec_pred]