Ejemplo n.º 1
0
    def __init__(self, y_vocab, dim_word, dim, dim_ctx):

        self.y_vocab = y_vocab  # 430
        self.dim_word = dim_word # 1024
        self.dim = dim  # 512
        self.dim_ctx = dim_ctx  # 512
        
        ### 
        ### initial context - image Embedding
        self.W_hidden_init = initializations.uniform((self.dim_ctx, self.dim))     
        self.b_hidden_init = initializations.zero((self.dim))
        self.W_memory_init = initializations.uniform((self.dim_ctx, self.dim))     
        self.b_memory_init = initializations.zero((self.dim))


        
        ### enc forward GRU ###

        self.W_lstm = initializations.uniform((self.dim_word, self.dim * 4))
        self.U_lstm = initializations.uniform((self.dim, self.dim * 4))
        self.b_lstm = initializations.zero((self.dim * 4))
        
        
        ### prediction ###
        self.W_pred = initializations.uniform((self.dim * 2, self.y_vocab))
        self.b_pred = initializations.zero((self.y_vocab))


        self.params = [self.W_hidden_init, self.b_hidden_init,self.W_memory_init, self.b_memory_init,
                       self.W_lstm, self.U_lstm, self.b_lstm,
                       self.W_pred, self.b_pred]
    def __init__(self,
                 n_words,
                 dim_embed,
                 dim_hidden,
                 dim_image,
                 bias_init_vector=None):
        self.n_words = n_words
        self.dim_embed = dim_embed
        self.dim_hidden = dim_hidden
        self.dim_image = dim_image

        self.Wemb = initializations.uniform((n_words, dim_embed), scale=0.1)
        self.bemb = initializations.zero((dim_embed))

        self.lstm_W = initializations.uniform(
            (1 + dim_embed + dim_hidden, dim_hidden * 4), scale=0.1)

        self.encode_img_W = initializations.uniform((dim_image, dim_hidden),
                                                    scale=0.1)
        self.encode_img_b = initializations.zero((dim_hidden))

        self.emb_word_W = initializations.uniform((dim_hidden, n_words),
                                                  scale=0.1)
        if bias_init_vector is None:
            self.emb_word_b = initializations.uniform((n_words))
        else:
            self.emb_word_b = theano.shared(bias_init_vector.astype(
                np.float32),
                                            borrow=True)

        self.params = [
            self.Wemb, self.bemb, self.lstm_W, self.encode_img_W,
            self.encode_img_b, self.emb_word_W, self.emb_word_b
        ]
    def __init__(self, n_words, dim_embed, dim_hidden, dim_image, bias_init_vector=None):
        self.n_words = n_words
        self.dim_embed = dim_embed
        self.dim_hidden = dim_hidden
        self.dim_image = dim_image

        self.Wemb = initializations.uniform((n_words, dim_embed), scale=0.1)
        self.bemb = initializations.zero((dim_embed))

        self.lstm_W = initializations.uniform((1 + dim_embed + dim_hidden, dim_hidden*4), scale=0.1)

        self.encode_img_W = initializations.uniform((dim_image, dim_hidden), scale=0.1)
        self.encode_img_b = initializations.zero((dim_hidden))

        self.emb_word_W = initializations.uniform((dim_hidden, n_words), scale=0.1)
        if bias_init_vector is None:
            self.emb_word_b = initializations.uniform((n_words))
        else:
            self.emb_word_b = theano.shared(bias_init_vector.astype(np.float32), borrow=True)

        self.params = [
                self.Wemb, self.bemb,
                self.lstm_W,
                self.encode_img_W, self.encode_img_b,
                self.emb_word_W, self.emb_word_b
            ]
Ejemplo n.º 4
0
    def __init__(self, n_vocab, dim_word, dim_ctx, dim):
        self.n_vocab = n_vocab
        self.dim_word = dim_word
        self.dim_ctx = dim_ctx
        self.dim = dim

        ### Word Embedding ###
        self.Wemb = initializations.uniform((n_vocab, self.dim_word))

        ### LSTM initialization NN ###
        self.Init_state_W = initializations.uniform((self.dim_ctx, self.dim))
        self.Init_state_b = shared_zeros((self.dim))

        self.Init_memory_W = initializations.uniform((self.dim_ctx, self.dim))
        self.Init_memory_b = shared_zeros((self.dim))


        ### Main LSTM ###
        self.lstm_W = initializations.uniform((self.dim_word, self.dim * 4))
        self.lstm_U = sharedX(np.concatenate([ortho_weight(dim),
                                      ortho_weight(dim),
                                      ortho_weight(dim),
                                      ortho_weight(dim)], axis=1))

        self.lstm_b = shared_zeros((self.dim*4))

        self.Wc = initializations.uniform((self.dim_ctx, self.dim*4)) # image -> LSTM hidden
        self.Wc_att = initializations.uniform((self.dim_ctx, self.dim_ctx)) # image -> 뉴럴넷 한번 돌린것
        self.Wd_att = initializations.uniform((self.dim, self.dim_ctx)) # LSTM hidden -> image에 영향
        self.b_att = shared_zeros((self.dim_ctx))

        self.U_att = initializations.uniform((self.dim_ctx, 1)) # image 512개 feature 1차원으로 줄임
        self.c_att = shared_zeros((1))

        ### Decoding NeuralNets ###
        self.decode_lstm_W = initializations.uniform((self.dim, self.dim_word))
        self.decode_lstm_b = shared_zeros((self.dim_word))

        self.decode_word_W = initializations.uniform((self.dim_word, n_vocab))
        self.decode_word_b = shared_zeros((n_vocab))

        self.params = [self.Wemb,
                       self.Init_state_W, self.Init_state_b,
                       self.Init_memory_W, self.Init_memory_b,
                       self.lstm_W, self.lstm_U, self.lstm_b,
                       self.Wc, self.Wc_att, self.Wd_att, self.b_att,
                       self.U_att, self.c_att,
                       self.decode_lstm_W, self.decode_lstm_b,
                       self.decode_word_W, self.decode_word_b]

        self.param_names = ['Wemb', 'Init_state_W', 'Init_state_b',
                            'Init_memory_W', 'Init_memory_b',
                            'lstm_W', 'lstm_U', 'lstm_b',
                            'Wc', 'Wc_att', 'Wd_att', 'b_att',
                            'U_att', 'c_att',
                            'decode_lstm_W', 'decode_lstm_b',
                            'decode_word_W', 'decode_word_b']
Ejemplo n.º 5
0
    def __init__(self, n_words, embedding_dim,  hidden_dim):
        self.n_words = n_words
        self.embedding_dim = embedding_dim
        self.hidden_dim = hidden_dim

        self.emb = initializations.uniform((n_words, embedding_dim))

        self.encode_W = initializations.uniform((embedding_dim, hidden_dim*4)) # input -> hidden
        self.encode_U = initializations.uniform((hidden_dim, hidden_dim*4)) # last hidden -> hidden (recurrent)
        self.encode_b = initializations.zero((hidden_dim*4,))

        self.decode_W = initializations.uniform((embedding_dim, hidden_dim*4)) # last word -> hidden
        self.decode_U = initializations.uniform((hidden_dim, hidden_dim*4)) # last hidden -> hidden
        self.decode_V = initializations.uniform((hidden_dim, hidden_dim*4)) # context -> hidden
        self.decode_b = initializations.zero((hidden_dim*4))

        self.output_W = initializations.uniform((hidden_dim, embedding_dim))
        self.output_b = initializations.zero((embedding_dim, ))

        self.word_W = initializations.uniform((embedding_dim, n_words))
        self.word_b = initializations.zero((n_words))

        self.params = [
            self.emb,
            self.encode_W, self.encode_U, self.encode_b,
            self.decode_W, self.decode_U, self.decode_V, self.decode_b,
            self.output_W, self.output_b,
            self.word_W, self.word_b
        ]
Ejemplo n.º 6
0
    def create_actor_network(self, state_size, action_dim):
        print("Now we build the actor cnn model")

        S = Input(shape=state_size)
        # C1 = Convolution2D(32, 8, 8, subsample=(4, 4), activation='relu', init='he_uniform')(S)
        # C2 = Convolution2D(64, 4, 4, subsample=(2, 2), activation='relu', init='he_uniform')(C1)
        # C3 = Convolution2D(64, 3, 3, subsample=(1, 1), activation='relu', init='he_uniform')(C2)
        # F = Flatten()(C3)
        D1 = Dense(50, activation='relu', init='he_uniform')(S)
        D11 = Dense(25, activation='relu', init='he_uniform')(D1)
        D2 = Dense(
            action_dim,
            activation='tanh',
            init=lambda shape, name: uniform(shape, scale=3e-4, name=name))(
                D11)
        model = Model(input=S, output=D2)

        # version non convolutionnelle, pour TORCS
        # S = Input(shape=[state_size])
        # h0 = Dense(HIDDEN1_UNITS, activation='relu')(S)
        # h1 = Dense(HIDDEN2_UNITS, activation='relu')(h0)
        # Steering = Dense(1,activation='tanh',init=lambda shape, name: normal(shape, scale=1e-4, name=name))(h1)
        # Acceleration = Dense(1,activation='sigmoid',init=lambda shape, name: normal(shape, scale=1e-4, name=name))(h1)
        # Brake = Dense(1,activation='sigmoid',init=lambda shape, name: normal(shape, scale=1e-4, name=name))(h1)
        # V = merge([Steering,Acceleration,Brake],mode='concat')
        # model = Model(input=S,output=V)
        return model, model.trainable_weights, S
Ejemplo n.º 7
0
def glorot_uniform_3d(shape):
    # like glorot uniform, but controls for the fact that
    # there's some independence in our tensor...
    fan_in = shape[1]
    fan_out = shape[2]
    scale = np.sqrt(6. / (fan_in + fan_out))
    #scale = 5e-1
    return uniform(shape, scale)
Ejemplo n.º 8
0
    def __init__(self, n_vocab, y_vocab, dim_word, dim):
        self.n_vocab = n_vocab  # 12047
        self.y_vocab = y_vocab  # 430
        self.dim_word = dim_word  # 1024
        self.dim = dim  # 512

        ### image Embedding
        self.W_img_emb = initializations.uniform((4096, self.dim))
        self.b_img_emb = initializations.zero((self.dim))

        ### Word Embedding ###
        self.W_emb = initializations.uniform((self.n_vocab, self.dim_word))

        ### enc forward GRU ###
        self.W_gru = initializations.uniform((self.dim_word, self.dim * 2))
        self.U_gru = initializations.uniform((self.dim, self.dim * 2))
        self.b_gru = initializations.zero((self.dim * 2))
        self.W_gru_cdd = initializations.uniform(
            (self.dim_word, self.dim))  # cdd : candidate
        self.U_gru_cdd = initializations.uniform((self.dim, self.dim))
        self.b_gru_cdd = initializations.zero((self.dim))
        ### prediction ###
        self.W_pred = initializations.uniform((self.dim, self.y_vocab))
        self.b_pred = initializations.zero((self.y_vocab))

        self.params = [
            self.W_img_emb, self.b_img_emb, self.W_emb, self.W_gru, self.U_gru,
            self.b_gru, self.W_gru_cdd, self.U_gru_cdd, self.b_gru_cdd,
            self.W_pred, self.b_pred
        ]
Ejemplo n.º 9
0
    def __init__(self, n_vocab, y_vocab, dim_word, dim):
        self.n_vocab = n_vocab  # 12047
        self.y_vocab = y_vocab  # 430
        self.dim_word = dim_word # 1024
        self.dim = dim  # 512
        
        
        ### image Embedding
        self.W_img_emb = initializations.uniform((4096, self.dim))     
        self.b_img_emb = initializations.zero((self.dim))

   
        ### Word Embedding ###        
        self.W_emb = initializations.uniform((self.n_vocab, self.dim_word))
        
        ### enc forward GRU ###
        self.W_gru = initializations.uniform((self.dim_word, self.dim * 2))
        self.U_gru = initializations.uniform((self.dim, self.dim * 2))
        self.b_gru = initializations.zero((self.dim * 2))
        self.W_gru_cdd = initializations.uniform((self.dim_word, self.dim)) # cdd : candidate
        self.U_gru_cdd = initializations.uniform((self.dim, self.dim))
        self.b_gru_cdd = initializations.zero((self.dim))       
        ### prediction ###
        self.W_pred = initializations.uniform((self.dim, self.y_vocab))
        self.b_pred = initializations.zero((self.y_vocab))


        self.params = [self.W_img_emb, self.b_img_emb,
                       self.W_emb,
                       self.W_gru, self.U_gru, self.b_gru,
                       self.W_gru_cdd, self.U_gru_cdd, self.b_gru_cdd,
                       self.W_pred, self.b_pred]
Ejemplo n.º 10
0
    def __init__(self, n_channels, batch_size=30):
        self.n_channels = n_channels
        self.batch_size = batch_size

        self.conv1_W = initializations.uniform((96, n_channels, 7,7))
        self.conv1_b = shared_zeros((96,))

        self.conv2_W = initializations.uniform((256,96,5,5))
        self.conv2_b = shared_zeros((256,))

        self.conv3_W = initializations.uniform((512,256,3,3))
        self.conv3_b = shared_zeros((512,))

        self.conv4_W = initializations.uniform((512,512,3,3))
        self.conv4_b = shared_zeros((512,))

        self.conv5_W = initializations.uniform((512,512,3,3))
        self.conv5_b = shared_zeros((512,))
Ejemplo n.º 11
0
 def create_actor_network(self, state_size,action_dim):
     print("Now we build the model")
     model = Sequential()
     S = Input(shape=[state_size])   
     h0 = Dense(100, init='he_uniform',activation='relu')(S)
     h1 = Dense(100, init='he_uniform',activation='relu')(h0)
     V = Dense(8, init=lambda shape, name: uniform(shape, scale=3e-3, name=name),activation='tanh')(h1)
     model = Model(input=S,output=V)
     return model, model.trainable_weights, S
Ejemplo n.º 12
0
def reset_model(model):
    """
    Given a Keras model consisting only of MoleculeConv, Dense, and Dropout layers,
    this function will reset the trainable weights to save time for CV tests.
    """

    for layer in model.layers:
        # Note: these are custom depending on the layer type
        if '.MoleculeConv' in str(layer):
            W_inner = layer.init_inner((layer.inner_dim, layer.inner_dim))
            b_inner = np.zeros((1, layer.inner_dim))
            # Inner weights
            layer.W_inner.set_value((T.tile(W_inner, (layer.depth + 1, 1, 1)).eval() +
                initializations.uniform((layer.depth + 1, layer.inner_dim, layer.inner_dim)).eval()).astype(np.float32))
            layer.b_inner.set_value((T.tile(b_inner, (layer.depth + 1, 1, 1)).eval() +
                initializations.uniform((layer.depth + 1, 1, layer.inner_dim)).eval()).astype(np.float32))

            # Outer weights
            W_output = layer.init_output((layer.inner_dim, layer.units), scale=layer.scale_output)
            b_output = np.zeros((1, layer.units))
            # Initialize weights tensor
            layer.W_output.set_value((T.tile(W_output, (layer.depth + 1, 1, 1)).eval()).astype(np.float32))
            layer.b_output.set_value((T.tile(b_output, (layer.depth + 1, 1, 1)).eval()).astype(np.float32))
            logging.info('graphFP layer reset')

        elif '.Dense' in str(layer):
            layer.W.set_value((layer.init(layer.W.shape.eval()).eval()).astype(np.float32))
            layer.b.set_value(np.zeros(layer.b.shape.eval(), dtype=np.float32))
            logging.info('dense layer reset')

        elif '.RandomMask' in str(layer):
            logging.info('RandomMask unchanged')
        
        elif '.InputLayer' in str(layer):
            logging.info('InputLayer unchanged')
        
        else:
            raise ValueError('Unknown layer {}, cannot reset weights'.format(str(layer)))
    
    logging.info('Reset model weights')
    return model
    def build(self, input_shape):
        '''Builds internal weights and paramer attribute'''
        # NOTE: NEED TO TILE AND EVALUATE SO THAT PARAMS CAN BE VARIABLES
        # OTHERWISE K.GET_VALUE() DOES NOT WORK

        # Define template weights for inner FxF
        W_inner = self.init_inner((self.inner_dim, self.inner_dim))
        b_inner = K.zeros((1, self.inner_dim))
        # Initialize weights tensor
        self.W_inner = K.variable(T.tile(W_inner, (self.depth + 1, 1, 1)).eval() + \
         initializations.uniform((self.depth + 1, self.inner_dim, self.inner_dim)).eval())
        self.W_inner.name = 'T:W_inner'
        self.b_inner = K.variable(T.tile(b_inner, (self.depth + 1, 1, 1)).eval()  + \
         initializations.uniform((self.depth + 1, 1, self.inner_dim)).eval())
        self.b_inner.name = 'T:b_inner'
        # # Concatenate third dimension (depth) so different layers can have
        # # different weights. Now, self.W_inner[#,:,:] corresponds to the
        # # weight matrix for layer/depth #.

        # Define template weights for output FxL
        W_output = self.init_output((self.inner_dim, self.output_dim),
                                    scale=self.scale_output)
        b_output = K.zeros((1, self.output_dim))
        # Initialize weights tensor
        self.W_output = K.variable(
            T.tile(W_output, (self.depth + 1, 1, 1)).eval())
        self.W_output.name = 'T:W_output'
        self.b_output = K.variable(
            T.tile(b_output, (self.depth + 1, 1, 1)).eval())
        self.b_output.name = 'T:b_output'
        # # Concatenate third dimension (depth) so different layers can have
        # # different weights. Now, self.W_output[#,:,:] corresponds to the
        # # weight matrix for layer/depth #.

        # Pack params
        self.trainable_weights = [
            self.W_inner, self.b_inner, self.W_output, self.b_output
        ]
        self.params = [
            self.W_inner, self.b_inner, self.W_output, self.b_output
        ]
Ejemplo n.º 14
0
def reset(model):
    '''Given a Keras model consisting only of GraphFP, Dense, and Dropout layers,
	this function will reset the trainable weights to save time for CV tests.'''

    for layer in model.layers:
        # Note: these are custom depending on the layer type
        if '.GraphFP' in str(layer):
            W_inner = layer.init_inner((layer.inner_dim, layer.inner_dim))
            b_inner = np.zeros((1, layer.inner_dim))
            # Inner weights
            layer.W_inner.set_value((T.tile(W_inner, (layer.depth + 1, 1, 1)).eval() + \
             initializations.uniform((layer.depth + 1, layer.inner_dim, layer.inner_dim)).eval()).astype(np.float32))
            layer.b_inner.set_value((T.tile(b_inner, (layer.depth + 1, 1, 1)).eval()  + \
             initializations.uniform((layer.depth + 1, 1, layer.inner_dim)).eval()).astype(np.float32))

            # Outer weights
            W_output = layer.init_output((layer.inner_dim, layer.output_dim),
                                         scale=layer.scale_output)
            b_output = np.zeros((1, layer.output_dim))
            # Initialize weights tensor
            layer.W_output.set_value(
                (T.tile(W_output,
                        (layer.depth + 1, 1, 1)).eval()).astype(np.float32))
            layer.b_output.set_value(
                (T.tile(b_output,
                        (layer.depth + 1, 1, 1)).eval()).astype(np.float32))
            print('graphFP layer reset')

        elif '.Dense' in str(layer):
            layer.W.set_value(
                (layer.init(layer.W.shape.eval()).eval()).astype(np.float32))
            layer.b.set_value(np.zeros(layer.b.shape.eval(), dtype=np.float32))
            print('dense layer reset')

        elif '.Dropout' in str(layer):
            print('dropout unchanged')
        else:
            raise ValueError('Unknown layer {}, cannot reset weights'.format(
                str(layer)))
    print('Reset model weights')
    return model
Ejemplo n.º 15
0
def glorot_uniform_sigm(shape):
    """
    Glorot style weight initializer for sigmoid activations.
    
    Like keras.initializations.glorot_uniform(), but with uniform random interval like in 
    Deeplearning.net tutorials.
    They claim that the initialization random interval should be
      +/- sqrt(6 / (fan_in + fan_out)) (like Keras' glorot_uniform()) when tanh activations are used, 
      +/- 4 sqrt(6 / (fan_in + fan_out)) when sigmoid activations are used.
    See: http://deeplearning.net/tutorial/mlp.html#going-from-logistic-regression-to-mlp
    """
    fan_in, fan_out = get_fans(shape)
    s = 4. * np.sqrt(6. / (fan_in + fan_out))
    return uniform(shape, s)
Ejemplo n.º 16
0
def glorot_uniform_sigm(shape, name=None, dim_ordering='th'):
    """
    Glorot style weight initializer for sigmoid activations.
    
    Like keras.initializations.glorot_uniform(), but with uniform random interval like in 
    Deeplearning.net tutorials.
    They claim that the initialization random interval should be
      +/- sqrt(6 / (fan_in + fan_out)) (like Keras' glorot_uniform()) when tanh activations are used, 
      +/- 4 sqrt(6 / (fan_in + fan_out)) when sigmoid activations are used.
    See: http://deeplearning.net/tutorial/mlp.html#going-from-logistic-regression-to-mlp
    """
    fan_in, fan_out = get_fans(shape, dim_ordering=dim_ordering)
    s = 4. * np.sqrt(6. / (fan_in + fan_out))
    return uniform(shape, s, name=name)
Ejemplo n.º 17
0
 def create_critic_network(self, state_size, action_dim):
     print("Now we build the model")
     S = Input(shape=[state_size])
     A = Input(shape=[action_dim], name='action2')
     w = Dense(HIDDEN1_UNITS, init='he_uniform', activation='relu')(S)
     h = merge([w, A], mode='concat')
     h3 = Dense(HIDDEN2_UNITS, init='he_uniform', activation='relu')(h)
     V = Dense(
         action_dim,
         init=lambda shape, name: uniform(shape, scale=3e-3, name=name),
         activation='linear')(h3)
     model = Model(input=[S, A], output=V)
     adam = Adam(lr=self.LEARNING_RATE)
     model.compile(loss='mse', optimizer=adam)
     return model, A, S
Ejemplo n.º 18
0
def unitary_ASB2016_init(shape, name=None):
    assert shape[0] == shape[1]
    N = shape[1]

    theta = initializations.uniform((3, N),
                                    scale=np.pi,
                                    name='{}_theta'.format(name))
    reflection = initializations.glorot_uniform(
        (2, 2 * N), name='{}_reflection'.format(name))
    idxperm = np.random.permutation(N)
    idxpermaug = np.concatenate((idxperm, N + idxperm))

    Iaug = augLeft(np.concatenate((np.eye(N), np.zeros((N, N))), axis=0),
                   module=np).astype(np.float32)
    Uaug = times_unitary_ASB2016(Iaug, N, [theta, reflection, idxpermaug])

    return Uaug, theta, reflection, idxpermaug
Ejemplo n.º 19
0
    def build(self, input_shape):
        import numpy as np
        self.original_length = input_shape[1]
        if (self.symmetric == False):
            self.length = input_shape[1]
        else:
            self.odd_input_length = input_shape[1] % 2.0 == 1
            self.length = int(input_shape[1] / 2.0 + 0.5)
        self.num_channels = input_shape[2]
        #self.init = (lambda shape, name: initializations.uniform(
        #	shape, np.sqrt(
        #	np.sqrt(2.0/(self.length*self.num_channels+self.output_dim))),
        #	name))

        # Fix bug in Keras 2
        self.init = lambda shape=None: initializations.uniform(
            (self.output_dim, self.length), -np.sqrt(
                np.sqrt(2.0 /
                        (self.length * self.num_channels + self.output_dim))),
            np.sqrt(
                np.sqrt(2.0 /
                        (self.length * self.num_channels + self.output_dim))))

        self.W_pos = self.add_weight(
            shape=(self.output_dim, self.length),
            name='{}_W_pos'.format(self.name),
            #initializer=self.init,
            initializer='random_uniform',
            constraint=(None if self.curvature_constraint is None else
                        constraints.CurvatureConstraint(
                            self.curvature_constraint)),
            regularizer=(None if self.smoothness_penalty is None else
                         regularizers.SepFCSmoothnessRegularizer(
                             self.smoothness_penalty, self.smoothness_l1,
                             self.smoothness_second_diff)))
        self.W_chan = self.add_weight(
            shape=(self.output_dim, self.num_channels),
            name='{}_W_chan'.format(self.name),
            #initializer=self.init,
            initializer='random_uniform',
            trainable=True)
        self.built = True
Ejemplo n.º 20
0
    def create_critic_network(self, state_size, action_dim):
        print("Now we build the critic cnn model")

        S = Input(shape=state_size)
        # C1 = Convolution2D(32, 8, 8, subsample=(4, 4), activation='relu', init='he_uniform')(S)
        # C2 = Convolution2D(64, 4, 4, subsample=(2, 2), activation='relu', init='he_uniform')(C1)
        # C3 = Convolution2D(64, 3, 3, subsample=(1, 1), activation='relu', init='he_uniform')(C2)
        # F = Flatten()(C3)
        D1 = Dense(50, activation='relu', init='he_uniform')(S)

        A = Input(shape=[action_dim])
        D1A = Dense(25, activation='relu', init='he_uniform')(A)

        M = merge([D1, D1A], mode='concat')
        DX1 = Dense(50, activation='relu', init='he_uniform')(M)
        DX2 = Dense(25, activation='relu', init='he_uniform')(DX1)
        DX3 = Dense(
            1,
            activation='linear',
            init=lambda shape, name: uniform(shape, scale=3e-4, name=name))(
                DX2)
        # different de la version TORCS, mais pour moi c'est bon

        model = Model(input=[S, A], output=DX3)
        adam = Adam(lr=self.LEARNING_RATE)
        model.compile(loss='mse', optimizer=adam)
        # model.summary()

        # version non convolutionnelle, pour TORCS
        # S = Input(shape=[state_size])
        # A = Input(shape=[action_dim],name='action2')
        # w1 = Dense(HIDDEN1_UNITS, activation='relu')(S)
        # a1 = Dense(HIDDEN2_UNITS, activation='linear')(A)
        # h1 = Dense(HIDDEN2_UNITS, activation='linear')(w1)
        # h2 = merge([h1,a1],mode='sum')
        # h3 = Dense(HIDDEN2_UNITS, activation='relu')(h2)
        # V = Dense(action_dim,activation='linear')(h3)
        # model = Model(input=[S,A],output=V)
        # adam = Adam(lr=self.LEARNING_RATE)
        # model.compile(loss='mse', optimizer=adam)
        return model, A, S
Ejemplo n.º 21
0
def build_model(corpora, params, filename=None):
    _init = lambda shape : uniform(shape, scale=0.1)
    src_seq_len = corpora.train_src_idxs.shape[1]
    trg_seq_len = corpora.train_trg_idxs.shape[1]

    model = Graph()
    model.add_input(name='input', input_shape=(src_seq_len, ), dtype=int)

    model.add_node(
            Embedding(input_dim=len(corpora.src_vocab)+1,
                output_dim=params["embedding"],
                init=_init,
                mask_zero=True,
                input_length=src_seq_len),
            name='embedding', input='input')
    model.add_node(Flatten(), name='flatten', input='embedding')
    model.add_node(Dense(params["hidden1"], init=_init, activation='tanh'),
                         name='hidden1', input='flatten')

    # Target word predictor machines in CSTM
    trg_mach_names = ["target_mach_%d" % i for i in range(trg_seq_len)]
    for mach_name in trg_mach_names:
        model.add_node(Dense(params["hidden2"], init=_init, activation='tanh'),
                       name='%s-presoftmax' % mach_name, input='hidden1')
        model.add_node(Dense(len(corpora.trg_vocab), init=_init, activation='softmax'),
                             name=mach_name,
                             input='%s-presoftmax' % mach_name)

    # This should hopefully gather N softmaxes into a concatenated tensor
    model.add_output(name='output',
                           inputs=trg_mach_names,
                           merge_mode='concat')

    # Setup optimizer
    opt = Adagrad()
    #opt = SGD(0.03, decay=5e-8)

    # Compile the model
    model.compile(optimizer=opt, loss={'output' : custom_loss})

    return model
Ejemplo n.º 22
0
    def __init__(self, n_vocab, y_vocab, dim_word, dim):

        self.n_vocab = n_vocab  # 12047
        self.y_vocab = y_vocab  # 430
        self.dim_word = dim_word # 1024
        self.dim = dim  # 1024
        self.dim_ctx = 4096  # 4096
        
        ### initial context
        self.W_img_init = initializations.uniform((self.dim_ctx, self.dim))     
        self.b_img_init = initializations.zero((self.dim))


   
        
        ### Word Embedding ###        
        self.W_emb = initializations.uniform((self.n_vocab, self.dim_word))
        
        self.W_gru = initializations.uniform((self.dim_word, self.dim * 2))
        self.U_gru = initializations.uniform((self.dim, self.dim * 2))
        self.b_gru = initializations.zero((self.dim * 2))
        
        self.W_gru_cdd = initializations.uniform((self.dim_word, self.dim)) # cdd : candidate
        self.U_gru_cdd = initializations.uniform((self.dim, self.dim))
        self.b_gru_cdd = initializations.zero((self.dim)) 

        ### prediction ###
        self.W_pred = initializations.uniform((self.dim * 2, self.y_vocab))
        self.b_pred = initializations.zero((self.y_vocab))


        self.params = [self.W_img_init, self.b_img_init,
                       self.W_emb,
                       self.W_gru, self.U_gru, self.b_gru, 
                       self.W_gru_cdd, self.U_gru_cdd, self.b_gru_cdd, 
                       self.W_pred, self.b_pred]
Ejemplo n.º 23
0
 def norm(scale):
     return lambda shape, name=None: initializations.uniform(
         shape, scale=scale, name=name)
Ejemplo n.º 24
0
    def __init__(self, n_vocab, y_vocab, dim_word, dim, dim_ctx):

        self.n_vocab = n_vocab  # 12047
        self.y_vocab = y_vocab  # 430
        self.dim_word = dim_word # 1024
        self.dim = dim  # 1024
        self.dim_ctx = dim_ctx  # 512
        
        ### initial context
        self.W_ctx_init = initializations.uniform((self.dim_ctx, self.dim))     
        self.b_ctx_init = initializations.zero((self.dim))

        
        ### forward : img_dim to context
        self.W_ctx_att = initializations.uniform((self.dim_ctx, self.dim_ctx)) 
        self.b_ctx_att = initializations.zero((self.dim_ctx)) 
   
        ### forward : hidden_dim to context
        self.W_dim_att = initializations.uniform((self.dim, self.dim_ctx)) 
    
        ### context energy
        self.U_att = initializations.uniform((self.dim_ctx, 1)) 
        self.c_att = initializations.zero((1)) 
   
   
        
        ### Word Embedding ###        
        self.W_emb = initializations.uniform((self.n_vocab, self.dim_word))
        
        ### enc forward GRU ###
        self.W_gru_ctx = initializations.uniform((self.dim_word, self.dim_ctx))
        self.b_gru_ctx = initializations.zero((self.dim_ctx))

        
        self.W_gru = initializations.uniform((self.dim_word, self.dim * 2))
        self.U_gru = initializations.uniform((self.dim, self.dim * 2))
        self.b_gru = initializations.zero((self.dim * 2))
        self.U_gru_ctx = initializations.uniform((self.dim_ctx, self.dim * 2))
        
        self.W_gru_cdd = initializations.uniform((self.dim_word, self.dim)) # cdd : candidate
        self.U_gru_cdd = initializations.uniform((self.dim, self.dim))
        self.b_gru_cdd = initializations.zero((self.dim)) 
        self.U_gru_cdd_ctx = initializations.uniform((self.dim_ctx, self.dim))

        ### prediction ###
        self.W_pred = initializations.uniform((self.dim * 2, self.y_vocab))
        self.b_pred = initializations.zero((self.y_vocab))


        self.params = [self.W_ctx_init, self.b_ctx_init,
                       self.W_ctx_att, self.b_ctx_att,
                       self.W_dim_att,
                       self.U_att, self.c_att,
                       self.W_emb,
                       self.W_gru_ctx, self.b_gru_ctx,
                       self.W_gru, self.U_gru, self.b_gru, self.U_gru_ctx,
                       self.W_gru_cdd, self.U_gru_cdd, self.b_gru_cdd, self.U_gru_cdd_ctx,
                       self.W_pred, self.b_pred]
Ejemplo n.º 25
0
def get_embedding_matrix(word_index, extra_vocab, force=False):
    picklefile = os.path.join(CACHE_DIR, 'embedding_matrix.pickle')
    if not force and os.path.isfile(picklefile):
        print('Loading embedding matrix from pickle...')
        embedding_matrix = pickle.load(open(picklefile, 'rb'))
        return embedding_matrix

    print('\nLoading embeddings...')
    embeddings_index = {}
    with open(os.path.join(EMBEDDING_DIR, 'embeddings-scaled.50.txt')) as f:
        for line in f:
            values = line.split()
            word = values[0]
            coefs = np.asarray(values[1:], dtype='float32')
            embeddings_index[word] = coefs
    embeddings_index[START_OF_SENTENCE] = initializations.uniform(EMBEDDING_DIM, scale=2.0).eval()
    embeddings_index[END_OF_SENTENCE] = initializations.uniform(EMBEDDING_DIM, scale=2.0).eval()
    embeddings_index[UNKNOWN_UPPERCASE_ALNUM] = initializations.uniform(EMBEDDING_DIM, scale=2.0).eval()
    embeddings_index[UNKNOWN_LOWERCASE_ALNUM] = initializations.uniform(EMBEDDING_DIM, scale=2.0).eval()
    embeddings_index[UNKNOWN_NON_ALNUM] = initializations.uniform(EMBEDDING_DIM, scale=2.0).eval()

    print('\nFound {} word vectors.'.format(len(embeddings_index)-5))

    print('\nAdding dev/test vocab into word_index')
    # add dev and test vocabulary to word_index
    extra_vocab = list(set(embeddings_index.keys()) & extra_vocab)
    print('\nExtra vocab: {}.'.format(len(extra_vocab)))
    for word in extra_vocab:
        if word_index.get(word) is None:
            word_index[word] = len(word_index)
    print('\nCurrent vocab size: {}'.format(len(word_index)))

    oov = 0
    embedding_matrix = np.zeros((len(word_index) + 1, EMBEDDING_DIM))
    for i, word in enumerate(word_index):
        embedding_vector = embeddings_index.get(word)
        if embedding_vector is not None:
            embedding_matrix[i] = embedding_vector
            continue
        if '-' in word:
            embedding_vector = embeddings_index.get(word.split('-')[-1])
            if embedding_vector is not None:
                embedding_matrix[i] = embedding_vector
                continue
        if '\/' in word:
            embedding_vector = embeddings_index.get(word.split('\/')[-1])
            if embedding_vector is not None:
                embedding_matrix[i] = embedding_vector
            else:
                oov += 1
                embedding_matrix[i] = initializations.uniform(EMBEDDING_DIM, scale=2.0).eval()
    print('OOV number is {}. Total number is {}. Embedding OOV ratio is {}.'.format(oov, len(word_index), oov/len(word_index)))

    # save to pickle file
    try:
        f = open(picklefile, 'wb')
        pickle.dump(embedding_matrix, f, pickle.HIGHEST_PROTOCOL)
        f.close()
    except Exception as e:
        print('Unable to save data to', picklefile, ':', e)
        raise
    return embedding_matrix
Ejemplo n.º 26
0
    def __init__(self, n_vocab, dim_word, dimctx, dim):
        self.n_vocab = n_vocab  # 30000
        self.dim_word = dim_word # 384
        self.dimctx = dimctx  # 1024
        self.dim = dim  # 512
        
        ### Word Embedding ###        
        self.W_enc_emb = initializations.uniform((self.n_vocab, self.dim_word))
        self.W_dec_emb = initializations.uniform((self.n_vocab, self.dim_word))

        ### enc forward GRU ###
        self.W_enc_f_gru = initializations.uniform((self.dim_word, self.dim * 2))
        self.U_enc_f_gru = initializations.uniform((self.dim, self.dim * 2))
        self.b_enc_f_gru = initializations.zero((self.dim * 2))
        self.W_enc_f_gru_cdd = initializations.uniform((self.dim_word, self.dim)) # cdd : candidate
        self.U_enc_f_gru_cdd = initializations.uniform((self.dim, self.dim))
        self.b_enc_f_gru_cdd = initializations.zero((self.dim))
        
        ### enc backward GRU ###
        self.W_enc_b_gru = initializations.uniform((self.dim_word, self.dim * 2))
        self.U_enc_b_gru = initializations.uniform((self.dim, self.dim * 2))
        self.b_enc_b_gru = initializations.zero((self.dim * 2))
        self.W_enc_b_gru_cdd = initializations.uniform((self.dim_word, self.dim))
        self.U_enc_b_gru_cdd = initializations.uniform((self.dim, self.dim))
        self.b_enc_b_gru_cdd = initializations.zero((self.dim))
        
        ### context to decoder init state (s0)
        self.W_dec_init = initializations.uniform((self.dimctx, dim))
        self.b_dec_init = initializations.zero((dim))
        
        ### dec GRU ###
        self.W_dec_gru = initializations.uniform((self.dim_word, self.dim * 2))
        self.U_dec_gru = initializations.uniform((self.dim, self.dim * 2))
        self.b_dec_gru = initializations.zero((self.dim * 2))
        self.W_dec_gru_cdd = initializations.uniform((self.dim_word, self.dim))
        self.U_dec_gru_cdd = initializations.uniform((self.dim, self.dim))
        self.b_dec_gru_cdd = initializations.zero((self.dim))      
        self.W_dec_gru_ctx = initializations.uniform((self.dimctx, self.dim * 2))
        self.W_dec_gru_ctx_cdd = initializations.uniform((self.dimctx, self.dim))

        ### enc-dec attention ###
        self.W_att_y2c = initializations.uniform((self.dim_word, self.dimctx))
        self.W_att_h2c = initializations.uniform((self.dimctx, self.dimctx))
        self.W_att_s2c = initializations.uniform((self.dim, self.dimctx))
        self.b_att = initializations.zero((self.dimctx))

        self.U_att_energy = initializations.uniform((self.dimctx, 1))
        self.b_att_energy = initializations.zero((1,))

        ### enc-dec prediction ###
        self.W_dec_pred_s2y = initializations.uniform((self.dim, self.dim_word))
        self.b_dec_pred_s2y = initializations.zero((self.dim_word))
        self.W_dec_pred_y2y = initializations.uniform((self.dim_word, self.dim_word))
        self.b_dec_pred_y2y = initializations.zero((self.dim_word))
        self.W_dec_pred_c2y = initializations.uniform((self.dim * 2, self.dim_word))
        self.b_dec_pred_c2y = initializations.zero((self.dim_word))
        self.W_dec_pred = initializations.uniform((self.dim_word, self.n_vocab))
        self.b_dec_pred = initializations.zero((self.n_vocab))


        self.params = [self.W_enc_emb, self.W_dec_emb,
                       self.W_enc_f_gru, self.U_enc_f_gru, self.b_enc_f_gru, self.W_enc_f_gru_cdd, self.U_enc_f_gru_cdd, self.b_enc_f_gru_cdd,
                       self.W_enc_b_gru, self.U_enc_b_gru, self.b_enc_b_gru, self.W_enc_b_gru_cdd, self.U_enc_b_gru_cdd, self.b_enc_b_gru_cdd,
                       self.W_dec_init, self.b_dec_init,
                       self.W_dec_gru, self.U_dec_gru, self.b_dec_gru, self.W_dec_gru_cdd, self.U_dec_gru_cdd, self.b_dec_gru_cdd,
                       self.W_dec_gru_ctx, self.W_dec_gru_ctx_cdd,
                       self.W_att_y2c, self.W_att_h2c, self.W_att_s2c, self.b_att,
                       self.U_att_energy, self.b_att_energy,
                       self.W_dec_pred_s2y, self.b_dec_pred_s2y,
                       self.W_dec_pred_y2y, self.b_dec_pred_y2y,
                       self.W_dec_pred_c2y, self.b_dec_pred_c2y,
                       self.W_dec_pred, self.b_dec_pred]
Ejemplo n.º 27
0
def emb_init(shape, name=None):
    return initializations.uniform(shape, scale=0.6 / shape[1], name=name)
Ejemplo n.º 28
0
    def build(self, input_shape):
        self.input_spec = [InputSpec(shape=input_shape)]
        if self.stateful:
            self.reset_states()
        else:
            # initial states: all-zero tensor of shape (output_dim)
            self.states = [None]
        input_dim = input_shape[2]
        self.input_dim = input_dim

        self.W = self.init((input_dim, self.output_dim),
                           name='{}_W'.format(self.name))
        #self.b = K.zeros((self.N,), name='{}_b'.format(self.name))
        self.b = initializations.uniform((self.N, ),
                                         scale=0.01,
                                         name='{}_b'.format(self.name))
        self.baug = K.tile(self.b, [2])

        h0 = self.h0_mean + initializations.uniform(
            (2 * self.N, ), scale=0.01).get_value()
        self.h0 = K.variable(h0, name='{}_h0'.format(self.name))

        if ('full' in self.unitary_impl):
            # we're using a full unitary recurrence matrix

            if (self.inner_init == 'svd'):
                # use SVD to initialize U
                self.U = unitary_svd_init((self.N, self.N),
                                          name='{}_U'.format(self.name))
            elif (self.inner_init == 'ASB2016'):
                # use parameterization of [ASB2016] to initialize U
                Uaug, _, _, _ = unitary_ASB2016_init((self.N, self.N))
                Uaug = Uaug.eval()
                self.U = K.variable(np.concatenate(
                    (Uaug[:self.N, :self.N], Uaug[:self.N, self.N:]), axis=0),
                                    name='{}_U'.format(self.name))

            self.Uaug = augRight(self.U, module=K)

        elif (self.unitary_impl == 'ASB2016'):
            # we're using the parameterization of [Arjovsky, Shah, Bengio 2016]
            self.Uaug, self.theta, self.reflection, _ = unitary_ASB2016_init(
                (self.N, self.N), name=self.name)

        # set the trainable weights
        if ('full' in self.unitary_impl):
            self.trainable_weights = [self.W, self.U, self.b, self.h0]
        elif (self.unitary_impl == 'ASB2016'):
            self.trainable_weights = [
                self.W, self.theta, self.reflection, self.b, self.h0
            ]

        self.regularizers = []
        #if self.W_regularizer:
        #    self.W_regularizer.set_param(self.W)
        #    self.regularizers.append(self.W_regularizer)
        #if self.U_regularizer:
        #    self.U_regularizer.set_param(self.U)
        #    self.regularizers.append(self.U_regularizer)
        #if self.b_regularizer:
        #    self.b_regularizer.set_param(self.b)
        #    self.regularizers.append(self.b_regularizer)

        if self.initial_weights is not None:
            self.set_weights(self.initial_weights)
            del self.initial_weights
Ejemplo n.º 29
0
def custom_init(shape):
    return uniform(shape, scale=0.1)
Ejemplo n.º 30
0
 def init_uniform(self, shape, name=None):
   """
   Custom uniform initializer for input
   embedding. Values between 1 and -1.
   """
   return init.uniform(shape=shape, scale=1, name=name)
def my_init(shape, name=None):
    return initializations.uniform(shape, scale=0.08, name=name)
Ejemplo n.º 32
0
    def __init__(self, n_words, dim_emb, dim_img):
        self.n_words = n_words
        self.dim_emb = dim_emb
        self.dim_img = dim_img

        self.emb_W = initializations.uniform((n_words, dim_emb))

        self.cnn_word_W1 = initializations.uniform((dim_emb*3 + dim_img, 200))
        self.cnn_word_b1 = initializations.zero((200))
        self.cnn_word_W2 = initializations.uniform((200*3, 300))
        self.cnn_word_b2 = initializations.zero((300))
        self.cnn_word_W3 = initializations.uniform((300*3, 300))
        self.cnn_word_b3 = initializations.zero((300))

        self.cnn_phs_W1 = initializations.uniform((dim_emb*3, 200))
        self.cnn_phs_b1 = initializations.zero((200))
        self.cnn_phs_W2 = initializations.uniform((200*3 + dim_img, 300))
        self.cnn_phs_b2 = initializations.zero((300))
        self.cnn_phs_W3 = initializations.uniform((300*3, 300))
        self.cnn_phs_b3 = initializations.zero((300))

        self.cnn_phl_W1 = initializations.uniform((dim_emb*3, 200))
        self.cnn_phl_b1 = initializations.zero((200))
        self.cnn_phl_W2 = initializations.uniform((200*3, 300))
        self.cnn_phl_b2 = initializations.zero((300))
        self.cnn_phl_W3 = initializations.uniform((300*3 + dim_img, 300))
        self.cnn_phl_b3 = initializations.zero((300))

        self.cnn_st_W1 = initializations.uniform((dim_emb*3, 200))
        self.cnn_st_b1 = initializations.zero((200))
        self.cnn_st_W2 = initializations.uniform((200*3, 300))
        self.cnn_st_b2 = initializations.zero((300))
        self.cnn_st_W3 = initializations.uniform((300*3, 300))
        self.cnn_st_b3 = initializations.zero((300))
Ejemplo n.º 33
0
def emb_init(shape, name=None):
    return initializations.uniform(shape, scale=0.6/shape[1], name=name)
Ejemplo n.º 34
0
    def __init__(self, n_words, dim_emb, dim_img):
        self.n_words = n_words
        self.dim_emb = dim_emb
        self.dim_img = dim_img

        self.emb_W = initializations.uniform((n_words, dim_emb))

        self.cnn_word_W1 = initializations.uniform(
            (dim_emb * 3 + dim_img, 200))
        self.cnn_word_b1 = initializations.zero((200))
        self.cnn_word_W2 = initializations.uniform((200 * 3, 300))
        self.cnn_word_b2 = initializations.zero((300))
        self.cnn_word_W3 = initializations.uniform((300 * 3, 300))
        self.cnn_word_b3 = initializations.zero((300))

        self.cnn_phs_W1 = initializations.uniform((dim_emb * 3, 200))
        self.cnn_phs_b1 = initializations.zero((200))
        self.cnn_phs_W2 = initializations.uniform((200 * 3 + dim_img, 300))
        self.cnn_phs_b2 = initializations.zero((300))
        self.cnn_phs_W3 = initializations.uniform((300 * 3, 300))
        self.cnn_phs_b3 = initializations.zero((300))

        self.cnn_phl_W1 = initializations.uniform((dim_emb * 3, 200))
        self.cnn_phl_b1 = initializations.zero((200))
        self.cnn_phl_W2 = initializations.uniform((200 * 3, 300))
        self.cnn_phl_b2 = initializations.zero((300))
        self.cnn_phl_W3 = initializations.uniform((300 * 3 + dim_img, 300))
        self.cnn_phl_b3 = initializations.zero((300))

        self.cnn_st_W1 = initializations.uniform((dim_emb * 3, 200))
        self.cnn_st_b1 = initializations.zero((200))
        self.cnn_st_W2 = initializations.uniform((200 * 3, 300))
        self.cnn_st_b2 = initializations.zero((300))
        self.cnn_st_W3 = initializations.uniform((300 * 3, 300))
        self.cnn_st_b3 = initializations.zero((300))
Ejemplo n.º 35
0
 def init_uniform(self, shape, name=None):
     """
     Custom uniform initializer for input
     embedding. Values between 1 and -1.
     """
     return init.uniform(shape=shape, scale=1, name=name)
Ejemplo n.º 36
0
 def init(shape, name=None, dim_ordering='th'):
     return uniform(shape, scale, name, dim_ordering)
Ejemplo n.º 37
0
    def __init__(self, n_vocab, y_vocab, dim_word, dim, dim_ctx):

        self.n_vocab = n_vocab  # 12047
        self.y_vocab = y_vocab  # 430
        self.dim_word = dim_word  # 1024
        self.dim = dim  # 1024
        self.dim_ctx = dim_ctx  # 512

        ### initial context
        self.W_ctx_init = initializations.uniform((self.dim_ctx, self.dim))
        self.b_ctx_init = initializations.zero((self.dim))

        ### forward : img_dim to context
        self.W_ctx_att = initializations.uniform((self.dim_ctx, self.dim_ctx))
        self.b_ctx_att = initializations.zero((self.dim_ctx))

        ### forward : hidden_dim to context
        self.W_dim_att = initializations.uniform((self.dim, self.dim_ctx))

        ### context energy
        self.U_att = initializations.uniform((self.dim_ctx, 1))
        self.c_att = initializations.zero((1))

        ### Word Embedding ###
        self.W_emb = initializations.uniform((self.n_vocab, self.dim_word))

        ### enc forward GRU ###
        self.W_gru_ctx = initializations.uniform((self.dim_word, self.dim_ctx))
        self.b_gru_ctx = initializations.zero((self.dim_ctx))

        self.W_gru = initializations.uniform((self.dim_word, self.dim * 2))
        self.U_gru = initializations.uniform((self.dim, self.dim * 2))
        self.b_gru = initializations.zero((self.dim * 2))
        self.U_gru_ctx = initializations.uniform((self.dim_ctx, self.dim * 2))

        self.W_gru_cdd = initializations.uniform(
            (self.dim_word, self.dim))  # cdd : candidate
        self.U_gru_cdd = initializations.uniform((self.dim, self.dim))
        self.b_gru_cdd = initializations.zero((self.dim))
        self.U_gru_cdd_ctx = initializations.uniform((self.dim_ctx, self.dim))

        ### prediction ###
        self.W_pred = initializations.uniform((self.dim * 2, self.y_vocab))
        self.b_pred = initializations.zero((self.y_vocab))

        self.params = [
            self.W_ctx_init, self.b_ctx_init, self.W_ctx_att, self.b_ctx_att,
            self.W_dim_att, self.U_att, self.c_att, self.W_emb, self.W_gru_ctx,
            self.b_gru_ctx, self.W_gru, self.U_gru, self.b_gru, self.U_gru_ctx,
            self.W_gru_cdd, self.U_gru_cdd, self.b_gru_cdd, self.U_gru_cdd_ctx,
            self.W_pred, self.b_pred
        ]
Ejemplo n.º 38
0
    # for word in sorted(word_index, key=word_index.get):
    #     print('{}\t{}'.format(word, word_index[word]))
    # print('\nTag list:')
    # for tag in sorted(tag_index, key=tag_index.get):
    #     print('{}\t{}'.format(tag, tag_index[tag]))

    print('\nLoading embeddings...')
    embeddings_index = {}
    with open(os.path.join(EMBEDDING_DIR, 'embeddings-scaled.50.txt')) as f:
        for line in f:
            values = line.split()
            word = values[0]
            coefs = np.asarray(values[1:], dtype='float32')
            embeddings_index[word] = coefs
    print('Found {} word vectors.'.format(len(embeddings_index)-5))
    embeddings_index[START_OF_SENTENCE] = initializations.uniform(50, scale=2.0).eval()
    embeddings_index[END_OF_SENTENCE] = initializations.uniform(50, scale=2.0).eval()
    embeddings_index[UNKNOWN_UPPERCASE_ALNUM] = initializations.uniform(50, scale=2.0).eval()
    embeddings_index[UNKNOWN_LOWERCASE_ALNUM] = initializations.uniform(50, scale=2.0).eval()
    embeddings_index[UNKNOWN_NON_ALNUM] = initializations.uniform(50, scale=2.0).eval()

    # add dev and test vocabulary into word_index
    print('\nDev vocab:')
    dev_tokenizer = Tokenizer(lower=True, cutoff=0, nb_unknowns=3)
    dev_tokenizer.fit_on_texts(X_dev, verbose=True)
    print(len(dev_tokenizer.word_index.keys()))

    print('\nTest vocab:')
    test_tokenizer = Tokenizer(lower=True, cutoff=0, nb_unknowns=3)
    test_tokenizer.fit_on_texts(X_test, verbose=True)
    print(len(test_tokenizer.word_index.keys()))
Ejemplo n.º 39
0
 def norm(scale):
   return lambda shape, name=None: initializations.uniform(shape, scale=scale, name=name)
def build_model(dp, word_count_threshold, word_embedding_dim, image_embedding_dim, hidden_size, batch_size, num_vocab):

    '''
    일단
    image encoder ( 4096 -> embedding dim )와
    text encoder ( vocab dim -> embedding dim)을 정의하자
    '''
    We = initializations.uniform((4096, image_embedding_dim))
    be = initializations.zero((image_embedding_dim,))
    Ws = initializations.uniform((num_vocab, word_embedding_dim))

    '''
    text decoder (hidden dim -> vocab dim)을 정의하자

    '''
    Wd = initializations.uniform((hidden_size, num_vocab))
    bd = initializations.zero((num_vocab,))

    '''
    이미지(batch) -> image_embedding_dim
    '''
    image = T.matrix()
    embedded_image = T.dot(image, We) + be
    embedded_image = embedded_image.dimshuffle(0,'x',1)

    '''
    sentence
    '''
    sentence = T.matrix(dtype='int32')
    mask = T.matrix()
    embedded_sentence = Ws[sentence] # (batch, 문장길이, embedding_dim)

    '''
    이미지를 sentence의 맨 앞에 붙임
    '''
    X = T.concatenate([embedded_image, embedded_sentence], axis=1)
    X = X.dimshuffle(1,0,2)
    X = dropout(X, 0.5)

    '''
    LSTM weight ( i, f, c, o에 대한 weight들 )
    을 정의하자
    '''

    WLSTM = initializations.uniform((1+word_embedding_dim*2, 4*hidden_size))
    bias = T.alloc(numpy_floatX(1.), batch_size, 1)

    def _step(b, x_t, h_t_1, m_, c_, weight):

        Hin = T.concatenate([b, x_t, h_t_1], axis=1) # 1, x[t], h[t-1]을 concat

        IFOG = T.dot(Hin, weight)

        ifo = T.nnet.sigmoid(IFOG[:, :3*hidden_size])
        g = T.tanh(IFOG[:, 3*hidden_size:])

        IFOGf = T.concatenate([ifo, g], axis=1)

        c = IFOGf[:, :hidden_size] * IFOGf[:, 3*hidden_size:] + c_ * IFOGf[:,hidden_size:2*hidden_size]
        c = c * m_[:,None] + c_ * (1. - m_)[:,None]

        Hout = IFOGf[:, 2*hidden_size:3*hidden_size] * c
        Hout = Hout * m_[:,None] + h_t_1*(1. - m_)[:,None]
        return Hout, c



    (Houts, cells), updates = theano.scan(fn = lambda x, m, h, c, b, weight: _step(b,x,h, m, c, weight),
                       sequences=[X, mask.T],
                       outputs_info=
                        [
                            T.alloc(numpy_floatX(0.),batch_size, hidden_size),
                            T.alloc(numpy_floatX(0.),batch_size, hidden_size)
                        ],
                       non_sequences=[bias, WLSTM])

    Houts = Houts.dimshuffle(1,0,2)
    Y, updates = theano.scan(fn=lambda hout, wd,dd: T.dot(hout, wd) + dd, #T.nnet.softmax(T.dot(hout, wd)+dd),
                             sequences=[Houts],
                             non_sequences=[Wd,bd])


    Y = Y[:,1:,:]
    n_timestep=Y.shape[1]

    losses,_ = theano.scan(fn=lambda y, m, sen: -T.log(1e-20 + y[T.arange(n_timestep), sen[1:]][mask != 0.0]),
                           sequences=[Y, mask, sentence])

    loss = T.sum(losses) / Y.shape[0]
    loss += regularization_ratio * 0.5 * T.sum(WLSTM * WLSTM)
    loss += regularization_ratio * 0.5 * T.sum(Wd * Wd)

    params = [We, be, Ws, WLSTM, Wd, bd]
    updates = RMSprop(cost=loss, params=params)
    train_function = theano.function(inputs=[image, sentence, mask], outputs=loss, updates=updates, allow_input_downcast=True)
    Y_function = theano.function(inputs=[image, sentence, mask], outputs=Y, updates=updates, allow_input_downcast=True)
    Hout_function = theano.function(inputs=[image, sentence, mask], outputs=Houts, updates=updates, allow_input_downcast=True)


    return train_function, params, Y_function, Hout_function