def __init__(self, y_vocab, dim_word, dim, dim_ctx): self.y_vocab = y_vocab # 430 self.dim_word = dim_word # 1024 self.dim = dim # 512 self.dim_ctx = dim_ctx # 512 ### ### initial context - image Embedding self.W_hidden_init = initializations.uniform((self.dim_ctx, self.dim)) self.b_hidden_init = initializations.zero((self.dim)) self.W_memory_init = initializations.uniform((self.dim_ctx, self.dim)) self.b_memory_init = initializations.zero((self.dim)) ### enc forward GRU ### self.W_lstm = initializations.uniform((self.dim_word, self.dim * 4)) self.U_lstm = initializations.uniform((self.dim, self.dim * 4)) self.b_lstm = initializations.zero((self.dim * 4)) ### prediction ### self.W_pred = initializations.uniform((self.dim * 2, self.y_vocab)) self.b_pred = initializations.zero((self.y_vocab)) self.params = [self.W_hidden_init, self.b_hidden_init,self.W_memory_init, self.b_memory_init, self.W_lstm, self.U_lstm, self.b_lstm, self.W_pred, self.b_pred]
def __init__(self, n_words, dim_embed, dim_hidden, dim_image, bias_init_vector=None): self.n_words = n_words self.dim_embed = dim_embed self.dim_hidden = dim_hidden self.dim_image = dim_image self.Wemb = initializations.uniform((n_words, dim_embed), scale=0.1) self.bemb = initializations.zero((dim_embed)) self.lstm_W = initializations.uniform( (1 + dim_embed + dim_hidden, dim_hidden * 4), scale=0.1) self.encode_img_W = initializations.uniform((dim_image, dim_hidden), scale=0.1) self.encode_img_b = initializations.zero((dim_hidden)) self.emb_word_W = initializations.uniform((dim_hidden, n_words), scale=0.1) if bias_init_vector is None: self.emb_word_b = initializations.uniform((n_words)) else: self.emb_word_b = theano.shared(bias_init_vector.astype( np.float32), borrow=True) self.params = [ self.Wemb, self.bemb, self.lstm_W, self.encode_img_W, self.encode_img_b, self.emb_word_W, self.emb_word_b ]
def __init__(self, n_words, dim_embed, dim_hidden, dim_image, bias_init_vector=None): self.n_words = n_words self.dim_embed = dim_embed self.dim_hidden = dim_hidden self.dim_image = dim_image self.Wemb = initializations.uniform((n_words, dim_embed), scale=0.1) self.bemb = initializations.zero((dim_embed)) self.lstm_W = initializations.uniform((1 + dim_embed + dim_hidden, dim_hidden*4), scale=0.1) self.encode_img_W = initializations.uniform((dim_image, dim_hidden), scale=0.1) self.encode_img_b = initializations.zero((dim_hidden)) self.emb_word_W = initializations.uniform((dim_hidden, n_words), scale=0.1) if bias_init_vector is None: self.emb_word_b = initializations.uniform((n_words)) else: self.emb_word_b = theano.shared(bias_init_vector.astype(np.float32), borrow=True) self.params = [ self.Wemb, self.bemb, self.lstm_W, self.encode_img_W, self.encode_img_b, self.emb_word_W, self.emb_word_b ]
def __init__(self, n_vocab, dim_word, dim_ctx, dim): self.n_vocab = n_vocab self.dim_word = dim_word self.dim_ctx = dim_ctx self.dim = dim ### Word Embedding ### self.Wemb = initializations.uniform((n_vocab, self.dim_word)) ### LSTM initialization NN ### self.Init_state_W = initializations.uniform((self.dim_ctx, self.dim)) self.Init_state_b = shared_zeros((self.dim)) self.Init_memory_W = initializations.uniform((self.dim_ctx, self.dim)) self.Init_memory_b = shared_zeros((self.dim)) ### Main LSTM ### self.lstm_W = initializations.uniform((self.dim_word, self.dim * 4)) self.lstm_U = sharedX(np.concatenate([ortho_weight(dim), ortho_weight(dim), ortho_weight(dim), ortho_weight(dim)], axis=1)) self.lstm_b = shared_zeros((self.dim*4)) self.Wc = initializations.uniform((self.dim_ctx, self.dim*4)) # image -> LSTM hidden self.Wc_att = initializations.uniform((self.dim_ctx, self.dim_ctx)) # image -> 뉴럴넷 한번 돌린것 self.Wd_att = initializations.uniform((self.dim, self.dim_ctx)) # LSTM hidden -> image에 영향 self.b_att = shared_zeros((self.dim_ctx)) self.U_att = initializations.uniform((self.dim_ctx, 1)) # image 512개 feature 1차원으로 줄임 self.c_att = shared_zeros((1)) ### Decoding NeuralNets ### self.decode_lstm_W = initializations.uniform((self.dim, self.dim_word)) self.decode_lstm_b = shared_zeros((self.dim_word)) self.decode_word_W = initializations.uniform((self.dim_word, n_vocab)) self.decode_word_b = shared_zeros((n_vocab)) self.params = [self.Wemb, self.Init_state_W, self.Init_state_b, self.Init_memory_W, self.Init_memory_b, self.lstm_W, self.lstm_U, self.lstm_b, self.Wc, self.Wc_att, self.Wd_att, self.b_att, self.U_att, self.c_att, self.decode_lstm_W, self.decode_lstm_b, self.decode_word_W, self.decode_word_b] self.param_names = ['Wemb', 'Init_state_W', 'Init_state_b', 'Init_memory_W', 'Init_memory_b', 'lstm_W', 'lstm_U', 'lstm_b', 'Wc', 'Wc_att', 'Wd_att', 'b_att', 'U_att', 'c_att', 'decode_lstm_W', 'decode_lstm_b', 'decode_word_W', 'decode_word_b']
def __init__(self, n_words, embedding_dim, hidden_dim): self.n_words = n_words self.embedding_dim = embedding_dim self.hidden_dim = hidden_dim self.emb = initializations.uniform((n_words, embedding_dim)) self.encode_W = initializations.uniform((embedding_dim, hidden_dim*4)) # input -> hidden self.encode_U = initializations.uniform((hidden_dim, hidden_dim*4)) # last hidden -> hidden (recurrent) self.encode_b = initializations.zero((hidden_dim*4,)) self.decode_W = initializations.uniform((embedding_dim, hidden_dim*4)) # last word -> hidden self.decode_U = initializations.uniform((hidden_dim, hidden_dim*4)) # last hidden -> hidden self.decode_V = initializations.uniform((hidden_dim, hidden_dim*4)) # context -> hidden self.decode_b = initializations.zero((hidden_dim*4)) self.output_W = initializations.uniform((hidden_dim, embedding_dim)) self.output_b = initializations.zero((embedding_dim, )) self.word_W = initializations.uniform((embedding_dim, n_words)) self.word_b = initializations.zero((n_words)) self.params = [ self.emb, self.encode_W, self.encode_U, self.encode_b, self.decode_W, self.decode_U, self.decode_V, self.decode_b, self.output_W, self.output_b, self.word_W, self.word_b ]
def create_actor_network(self, state_size, action_dim): print("Now we build the actor cnn model") S = Input(shape=state_size) # C1 = Convolution2D(32, 8, 8, subsample=(4, 4), activation='relu', init='he_uniform')(S) # C2 = Convolution2D(64, 4, 4, subsample=(2, 2), activation='relu', init='he_uniform')(C1) # C3 = Convolution2D(64, 3, 3, subsample=(1, 1), activation='relu', init='he_uniform')(C2) # F = Flatten()(C3) D1 = Dense(50, activation='relu', init='he_uniform')(S) D11 = Dense(25, activation='relu', init='he_uniform')(D1) D2 = Dense( action_dim, activation='tanh', init=lambda shape, name: uniform(shape, scale=3e-4, name=name))( D11) model = Model(input=S, output=D2) # version non convolutionnelle, pour TORCS # S = Input(shape=[state_size]) # h0 = Dense(HIDDEN1_UNITS, activation='relu')(S) # h1 = Dense(HIDDEN2_UNITS, activation='relu')(h0) # Steering = Dense(1,activation='tanh',init=lambda shape, name: normal(shape, scale=1e-4, name=name))(h1) # Acceleration = Dense(1,activation='sigmoid',init=lambda shape, name: normal(shape, scale=1e-4, name=name))(h1) # Brake = Dense(1,activation='sigmoid',init=lambda shape, name: normal(shape, scale=1e-4, name=name))(h1) # V = merge([Steering,Acceleration,Brake],mode='concat') # model = Model(input=S,output=V) return model, model.trainable_weights, S
def glorot_uniform_3d(shape): # like glorot uniform, but controls for the fact that # there's some independence in our tensor... fan_in = shape[1] fan_out = shape[2] scale = np.sqrt(6. / (fan_in + fan_out)) #scale = 5e-1 return uniform(shape, scale)
def __init__(self, n_vocab, y_vocab, dim_word, dim): self.n_vocab = n_vocab # 12047 self.y_vocab = y_vocab # 430 self.dim_word = dim_word # 1024 self.dim = dim # 512 ### image Embedding self.W_img_emb = initializations.uniform((4096, self.dim)) self.b_img_emb = initializations.zero((self.dim)) ### Word Embedding ### self.W_emb = initializations.uniform((self.n_vocab, self.dim_word)) ### enc forward GRU ### self.W_gru = initializations.uniform((self.dim_word, self.dim * 2)) self.U_gru = initializations.uniform((self.dim, self.dim * 2)) self.b_gru = initializations.zero((self.dim * 2)) self.W_gru_cdd = initializations.uniform( (self.dim_word, self.dim)) # cdd : candidate self.U_gru_cdd = initializations.uniform((self.dim, self.dim)) self.b_gru_cdd = initializations.zero((self.dim)) ### prediction ### self.W_pred = initializations.uniform((self.dim, self.y_vocab)) self.b_pred = initializations.zero((self.y_vocab)) self.params = [ self.W_img_emb, self.b_img_emb, self.W_emb, self.W_gru, self.U_gru, self.b_gru, self.W_gru_cdd, self.U_gru_cdd, self.b_gru_cdd, self.W_pred, self.b_pred ]
def __init__(self, n_vocab, y_vocab, dim_word, dim): self.n_vocab = n_vocab # 12047 self.y_vocab = y_vocab # 430 self.dim_word = dim_word # 1024 self.dim = dim # 512 ### image Embedding self.W_img_emb = initializations.uniform((4096, self.dim)) self.b_img_emb = initializations.zero((self.dim)) ### Word Embedding ### self.W_emb = initializations.uniform((self.n_vocab, self.dim_word)) ### enc forward GRU ### self.W_gru = initializations.uniform((self.dim_word, self.dim * 2)) self.U_gru = initializations.uniform((self.dim, self.dim * 2)) self.b_gru = initializations.zero((self.dim * 2)) self.W_gru_cdd = initializations.uniform((self.dim_word, self.dim)) # cdd : candidate self.U_gru_cdd = initializations.uniform((self.dim, self.dim)) self.b_gru_cdd = initializations.zero((self.dim)) ### prediction ### self.W_pred = initializations.uniform((self.dim, self.y_vocab)) self.b_pred = initializations.zero((self.y_vocab)) self.params = [self.W_img_emb, self.b_img_emb, self.W_emb, self.W_gru, self.U_gru, self.b_gru, self.W_gru_cdd, self.U_gru_cdd, self.b_gru_cdd, self.W_pred, self.b_pred]
def __init__(self, n_channels, batch_size=30): self.n_channels = n_channels self.batch_size = batch_size self.conv1_W = initializations.uniform((96, n_channels, 7,7)) self.conv1_b = shared_zeros((96,)) self.conv2_W = initializations.uniform((256,96,5,5)) self.conv2_b = shared_zeros((256,)) self.conv3_W = initializations.uniform((512,256,3,3)) self.conv3_b = shared_zeros((512,)) self.conv4_W = initializations.uniform((512,512,3,3)) self.conv4_b = shared_zeros((512,)) self.conv5_W = initializations.uniform((512,512,3,3)) self.conv5_b = shared_zeros((512,))
def create_actor_network(self, state_size,action_dim): print("Now we build the model") model = Sequential() S = Input(shape=[state_size]) h0 = Dense(100, init='he_uniform',activation='relu')(S) h1 = Dense(100, init='he_uniform',activation='relu')(h0) V = Dense(8, init=lambda shape, name: uniform(shape, scale=3e-3, name=name),activation='tanh')(h1) model = Model(input=S,output=V) return model, model.trainable_weights, S
def reset_model(model): """ Given a Keras model consisting only of MoleculeConv, Dense, and Dropout layers, this function will reset the trainable weights to save time for CV tests. """ for layer in model.layers: # Note: these are custom depending on the layer type if '.MoleculeConv' in str(layer): W_inner = layer.init_inner((layer.inner_dim, layer.inner_dim)) b_inner = np.zeros((1, layer.inner_dim)) # Inner weights layer.W_inner.set_value((T.tile(W_inner, (layer.depth + 1, 1, 1)).eval() + initializations.uniform((layer.depth + 1, layer.inner_dim, layer.inner_dim)).eval()).astype(np.float32)) layer.b_inner.set_value((T.tile(b_inner, (layer.depth + 1, 1, 1)).eval() + initializations.uniform((layer.depth + 1, 1, layer.inner_dim)).eval()).astype(np.float32)) # Outer weights W_output = layer.init_output((layer.inner_dim, layer.units), scale=layer.scale_output) b_output = np.zeros((1, layer.units)) # Initialize weights tensor layer.W_output.set_value((T.tile(W_output, (layer.depth + 1, 1, 1)).eval()).astype(np.float32)) layer.b_output.set_value((T.tile(b_output, (layer.depth + 1, 1, 1)).eval()).astype(np.float32)) logging.info('graphFP layer reset') elif '.Dense' in str(layer): layer.W.set_value((layer.init(layer.W.shape.eval()).eval()).astype(np.float32)) layer.b.set_value(np.zeros(layer.b.shape.eval(), dtype=np.float32)) logging.info('dense layer reset') elif '.RandomMask' in str(layer): logging.info('RandomMask unchanged') elif '.InputLayer' in str(layer): logging.info('InputLayer unchanged') else: raise ValueError('Unknown layer {}, cannot reset weights'.format(str(layer))) logging.info('Reset model weights') return model
def build(self, input_shape): '''Builds internal weights and paramer attribute''' # NOTE: NEED TO TILE AND EVALUATE SO THAT PARAMS CAN BE VARIABLES # OTHERWISE K.GET_VALUE() DOES NOT WORK # Define template weights for inner FxF W_inner = self.init_inner((self.inner_dim, self.inner_dim)) b_inner = K.zeros((1, self.inner_dim)) # Initialize weights tensor self.W_inner = K.variable(T.tile(W_inner, (self.depth + 1, 1, 1)).eval() + \ initializations.uniform((self.depth + 1, self.inner_dim, self.inner_dim)).eval()) self.W_inner.name = 'T:W_inner' self.b_inner = K.variable(T.tile(b_inner, (self.depth + 1, 1, 1)).eval() + \ initializations.uniform((self.depth + 1, 1, self.inner_dim)).eval()) self.b_inner.name = 'T:b_inner' # # Concatenate third dimension (depth) so different layers can have # # different weights. Now, self.W_inner[#,:,:] corresponds to the # # weight matrix for layer/depth #. # Define template weights for output FxL W_output = self.init_output((self.inner_dim, self.output_dim), scale=self.scale_output) b_output = K.zeros((1, self.output_dim)) # Initialize weights tensor self.W_output = K.variable( T.tile(W_output, (self.depth + 1, 1, 1)).eval()) self.W_output.name = 'T:W_output' self.b_output = K.variable( T.tile(b_output, (self.depth + 1, 1, 1)).eval()) self.b_output.name = 'T:b_output' # # Concatenate third dimension (depth) so different layers can have # # different weights. Now, self.W_output[#,:,:] corresponds to the # # weight matrix for layer/depth #. # Pack params self.trainable_weights = [ self.W_inner, self.b_inner, self.W_output, self.b_output ] self.params = [ self.W_inner, self.b_inner, self.W_output, self.b_output ]
def reset(model): '''Given a Keras model consisting only of GraphFP, Dense, and Dropout layers, this function will reset the trainable weights to save time for CV tests.''' for layer in model.layers: # Note: these are custom depending on the layer type if '.GraphFP' in str(layer): W_inner = layer.init_inner((layer.inner_dim, layer.inner_dim)) b_inner = np.zeros((1, layer.inner_dim)) # Inner weights layer.W_inner.set_value((T.tile(W_inner, (layer.depth + 1, 1, 1)).eval() + \ initializations.uniform((layer.depth + 1, layer.inner_dim, layer.inner_dim)).eval()).astype(np.float32)) layer.b_inner.set_value((T.tile(b_inner, (layer.depth + 1, 1, 1)).eval() + \ initializations.uniform((layer.depth + 1, 1, layer.inner_dim)).eval()).astype(np.float32)) # Outer weights W_output = layer.init_output((layer.inner_dim, layer.output_dim), scale=layer.scale_output) b_output = np.zeros((1, layer.output_dim)) # Initialize weights tensor layer.W_output.set_value( (T.tile(W_output, (layer.depth + 1, 1, 1)).eval()).astype(np.float32)) layer.b_output.set_value( (T.tile(b_output, (layer.depth + 1, 1, 1)).eval()).astype(np.float32)) print('graphFP layer reset') elif '.Dense' in str(layer): layer.W.set_value( (layer.init(layer.W.shape.eval()).eval()).astype(np.float32)) layer.b.set_value(np.zeros(layer.b.shape.eval(), dtype=np.float32)) print('dense layer reset') elif '.Dropout' in str(layer): print('dropout unchanged') else: raise ValueError('Unknown layer {}, cannot reset weights'.format( str(layer))) print('Reset model weights') return model
def glorot_uniform_sigm(shape): """ Glorot style weight initializer for sigmoid activations. Like keras.initializations.glorot_uniform(), but with uniform random interval like in Deeplearning.net tutorials. They claim that the initialization random interval should be +/- sqrt(6 / (fan_in + fan_out)) (like Keras' glorot_uniform()) when tanh activations are used, +/- 4 sqrt(6 / (fan_in + fan_out)) when sigmoid activations are used. See: http://deeplearning.net/tutorial/mlp.html#going-from-logistic-regression-to-mlp """ fan_in, fan_out = get_fans(shape) s = 4. * np.sqrt(6. / (fan_in + fan_out)) return uniform(shape, s)
def glorot_uniform_sigm(shape, name=None, dim_ordering='th'): """ Glorot style weight initializer for sigmoid activations. Like keras.initializations.glorot_uniform(), but with uniform random interval like in Deeplearning.net tutorials. They claim that the initialization random interval should be +/- sqrt(6 / (fan_in + fan_out)) (like Keras' glorot_uniform()) when tanh activations are used, +/- 4 sqrt(6 / (fan_in + fan_out)) when sigmoid activations are used. See: http://deeplearning.net/tutorial/mlp.html#going-from-logistic-regression-to-mlp """ fan_in, fan_out = get_fans(shape, dim_ordering=dim_ordering) s = 4. * np.sqrt(6. / (fan_in + fan_out)) return uniform(shape, s, name=name)
def create_critic_network(self, state_size, action_dim): print("Now we build the model") S = Input(shape=[state_size]) A = Input(shape=[action_dim], name='action2') w = Dense(HIDDEN1_UNITS, init='he_uniform', activation='relu')(S) h = merge([w, A], mode='concat') h3 = Dense(HIDDEN2_UNITS, init='he_uniform', activation='relu')(h) V = Dense( action_dim, init=lambda shape, name: uniform(shape, scale=3e-3, name=name), activation='linear')(h3) model = Model(input=[S, A], output=V) adam = Adam(lr=self.LEARNING_RATE) model.compile(loss='mse', optimizer=adam) return model, A, S
def unitary_ASB2016_init(shape, name=None): assert shape[0] == shape[1] N = shape[1] theta = initializations.uniform((3, N), scale=np.pi, name='{}_theta'.format(name)) reflection = initializations.glorot_uniform( (2, 2 * N), name='{}_reflection'.format(name)) idxperm = np.random.permutation(N) idxpermaug = np.concatenate((idxperm, N + idxperm)) Iaug = augLeft(np.concatenate((np.eye(N), np.zeros((N, N))), axis=0), module=np).astype(np.float32) Uaug = times_unitary_ASB2016(Iaug, N, [theta, reflection, idxpermaug]) return Uaug, theta, reflection, idxpermaug
def build(self, input_shape): import numpy as np self.original_length = input_shape[1] if (self.symmetric == False): self.length = input_shape[1] else: self.odd_input_length = input_shape[1] % 2.0 == 1 self.length = int(input_shape[1] / 2.0 + 0.5) self.num_channels = input_shape[2] #self.init = (lambda shape, name: initializations.uniform( # shape, np.sqrt( # np.sqrt(2.0/(self.length*self.num_channels+self.output_dim))), # name)) # Fix bug in Keras 2 self.init = lambda shape=None: initializations.uniform( (self.output_dim, self.length), -np.sqrt( np.sqrt(2.0 / (self.length * self.num_channels + self.output_dim))), np.sqrt( np.sqrt(2.0 / (self.length * self.num_channels + self.output_dim)))) self.W_pos = self.add_weight( shape=(self.output_dim, self.length), name='{}_W_pos'.format(self.name), #initializer=self.init, initializer='random_uniform', constraint=(None if self.curvature_constraint is None else constraints.CurvatureConstraint( self.curvature_constraint)), regularizer=(None if self.smoothness_penalty is None else regularizers.SepFCSmoothnessRegularizer( self.smoothness_penalty, self.smoothness_l1, self.smoothness_second_diff))) self.W_chan = self.add_weight( shape=(self.output_dim, self.num_channels), name='{}_W_chan'.format(self.name), #initializer=self.init, initializer='random_uniform', trainable=True) self.built = True
def create_critic_network(self, state_size, action_dim): print("Now we build the critic cnn model") S = Input(shape=state_size) # C1 = Convolution2D(32, 8, 8, subsample=(4, 4), activation='relu', init='he_uniform')(S) # C2 = Convolution2D(64, 4, 4, subsample=(2, 2), activation='relu', init='he_uniform')(C1) # C3 = Convolution2D(64, 3, 3, subsample=(1, 1), activation='relu', init='he_uniform')(C2) # F = Flatten()(C3) D1 = Dense(50, activation='relu', init='he_uniform')(S) A = Input(shape=[action_dim]) D1A = Dense(25, activation='relu', init='he_uniform')(A) M = merge([D1, D1A], mode='concat') DX1 = Dense(50, activation='relu', init='he_uniform')(M) DX2 = Dense(25, activation='relu', init='he_uniform')(DX1) DX3 = Dense( 1, activation='linear', init=lambda shape, name: uniform(shape, scale=3e-4, name=name))( DX2) # different de la version TORCS, mais pour moi c'est bon model = Model(input=[S, A], output=DX3) adam = Adam(lr=self.LEARNING_RATE) model.compile(loss='mse', optimizer=adam) # model.summary() # version non convolutionnelle, pour TORCS # S = Input(shape=[state_size]) # A = Input(shape=[action_dim],name='action2') # w1 = Dense(HIDDEN1_UNITS, activation='relu')(S) # a1 = Dense(HIDDEN2_UNITS, activation='linear')(A) # h1 = Dense(HIDDEN2_UNITS, activation='linear')(w1) # h2 = merge([h1,a1],mode='sum') # h3 = Dense(HIDDEN2_UNITS, activation='relu')(h2) # V = Dense(action_dim,activation='linear')(h3) # model = Model(input=[S,A],output=V) # adam = Adam(lr=self.LEARNING_RATE) # model.compile(loss='mse', optimizer=adam) return model, A, S
def build_model(corpora, params, filename=None): _init = lambda shape : uniform(shape, scale=0.1) src_seq_len = corpora.train_src_idxs.shape[1] trg_seq_len = corpora.train_trg_idxs.shape[1] model = Graph() model.add_input(name='input', input_shape=(src_seq_len, ), dtype=int) model.add_node( Embedding(input_dim=len(corpora.src_vocab)+1, output_dim=params["embedding"], init=_init, mask_zero=True, input_length=src_seq_len), name='embedding', input='input') model.add_node(Flatten(), name='flatten', input='embedding') model.add_node(Dense(params["hidden1"], init=_init, activation='tanh'), name='hidden1', input='flatten') # Target word predictor machines in CSTM trg_mach_names = ["target_mach_%d" % i for i in range(trg_seq_len)] for mach_name in trg_mach_names: model.add_node(Dense(params["hidden2"], init=_init, activation='tanh'), name='%s-presoftmax' % mach_name, input='hidden1') model.add_node(Dense(len(corpora.trg_vocab), init=_init, activation='softmax'), name=mach_name, input='%s-presoftmax' % mach_name) # This should hopefully gather N softmaxes into a concatenated tensor model.add_output(name='output', inputs=trg_mach_names, merge_mode='concat') # Setup optimizer opt = Adagrad() #opt = SGD(0.03, decay=5e-8) # Compile the model model.compile(optimizer=opt, loss={'output' : custom_loss}) return model
def __init__(self, n_vocab, y_vocab, dim_word, dim): self.n_vocab = n_vocab # 12047 self.y_vocab = y_vocab # 430 self.dim_word = dim_word # 1024 self.dim = dim # 1024 self.dim_ctx = 4096 # 4096 ### initial context self.W_img_init = initializations.uniform((self.dim_ctx, self.dim)) self.b_img_init = initializations.zero((self.dim)) ### Word Embedding ### self.W_emb = initializations.uniform((self.n_vocab, self.dim_word)) self.W_gru = initializations.uniform((self.dim_word, self.dim * 2)) self.U_gru = initializations.uniform((self.dim, self.dim * 2)) self.b_gru = initializations.zero((self.dim * 2)) self.W_gru_cdd = initializations.uniform((self.dim_word, self.dim)) # cdd : candidate self.U_gru_cdd = initializations.uniform((self.dim, self.dim)) self.b_gru_cdd = initializations.zero((self.dim)) ### prediction ### self.W_pred = initializations.uniform((self.dim * 2, self.y_vocab)) self.b_pred = initializations.zero((self.y_vocab)) self.params = [self.W_img_init, self.b_img_init, self.W_emb, self.W_gru, self.U_gru, self.b_gru, self.W_gru_cdd, self.U_gru_cdd, self.b_gru_cdd, self.W_pred, self.b_pred]
def norm(scale): return lambda shape, name=None: initializations.uniform( shape, scale=scale, name=name)
def __init__(self, n_vocab, y_vocab, dim_word, dim, dim_ctx): self.n_vocab = n_vocab # 12047 self.y_vocab = y_vocab # 430 self.dim_word = dim_word # 1024 self.dim = dim # 1024 self.dim_ctx = dim_ctx # 512 ### initial context self.W_ctx_init = initializations.uniform((self.dim_ctx, self.dim)) self.b_ctx_init = initializations.zero((self.dim)) ### forward : img_dim to context self.W_ctx_att = initializations.uniform((self.dim_ctx, self.dim_ctx)) self.b_ctx_att = initializations.zero((self.dim_ctx)) ### forward : hidden_dim to context self.W_dim_att = initializations.uniform((self.dim, self.dim_ctx)) ### context energy self.U_att = initializations.uniform((self.dim_ctx, 1)) self.c_att = initializations.zero((1)) ### Word Embedding ### self.W_emb = initializations.uniform((self.n_vocab, self.dim_word)) ### enc forward GRU ### self.W_gru_ctx = initializations.uniform((self.dim_word, self.dim_ctx)) self.b_gru_ctx = initializations.zero((self.dim_ctx)) self.W_gru = initializations.uniform((self.dim_word, self.dim * 2)) self.U_gru = initializations.uniform((self.dim, self.dim * 2)) self.b_gru = initializations.zero((self.dim * 2)) self.U_gru_ctx = initializations.uniform((self.dim_ctx, self.dim * 2)) self.W_gru_cdd = initializations.uniform((self.dim_word, self.dim)) # cdd : candidate self.U_gru_cdd = initializations.uniform((self.dim, self.dim)) self.b_gru_cdd = initializations.zero((self.dim)) self.U_gru_cdd_ctx = initializations.uniform((self.dim_ctx, self.dim)) ### prediction ### self.W_pred = initializations.uniform((self.dim * 2, self.y_vocab)) self.b_pred = initializations.zero((self.y_vocab)) self.params = [self.W_ctx_init, self.b_ctx_init, self.W_ctx_att, self.b_ctx_att, self.W_dim_att, self.U_att, self.c_att, self.W_emb, self.W_gru_ctx, self.b_gru_ctx, self.W_gru, self.U_gru, self.b_gru, self.U_gru_ctx, self.W_gru_cdd, self.U_gru_cdd, self.b_gru_cdd, self.U_gru_cdd_ctx, self.W_pred, self.b_pred]
def get_embedding_matrix(word_index, extra_vocab, force=False): picklefile = os.path.join(CACHE_DIR, 'embedding_matrix.pickle') if not force and os.path.isfile(picklefile): print('Loading embedding matrix from pickle...') embedding_matrix = pickle.load(open(picklefile, 'rb')) return embedding_matrix print('\nLoading embeddings...') embeddings_index = {} with open(os.path.join(EMBEDDING_DIR, 'embeddings-scaled.50.txt')) as f: for line in f: values = line.split() word = values[0] coefs = np.asarray(values[1:], dtype='float32') embeddings_index[word] = coefs embeddings_index[START_OF_SENTENCE] = initializations.uniform(EMBEDDING_DIM, scale=2.0).eval() embeddings_index[END_OF_SENTENCE] = initializations.uniform(EMBEDDING_DIM, scale=2.0).eval() embeddings_index[UNKNOWN_UPPERCASE_ALNUM] = initializations.uniform(EMBEDDING_DIM, scale=2.0).eval() embeddings_index[UNKNOWN_LOWERCASE_ALNUM] = initializations.uniform(EMBEDDING_DIM, scale=2.0).eval() embeddings_index[UNKNOWN_NON_ALNUM] = initializations.uniform(EMBEDDING_DIM, scale=2.0).eval() print('\nFound {} word vectors.'.format(len(embeddings_index)-5)) print('\nAdding dev/test vocab into word_index') # add dev and test vocabulary to word_index extra_vocab = list(set(embeddings_index.keys()) & extra_vocab) print('\nExtra vocab: {}.'.format(len(extra_vocab))) for word in extra_vocab: if word_index.get(word) is None: word_index[word] = len(word_index) print('\nCurrent vocab size: {}'.format(len(word_index))) oov = 0 embedding_matrix = np.zeros((len(word_index) + 1, EMBEDDING_DIM)) for i, word in enumerate(word_index): embedding_vector = embeddings_index.get(word) if embedding_vector is not None: embedding_matrix[i] = embedding_vector continue if '-' in word: embedding_vector = embeddings_index.get(word.split('-')[-1]) if embedding_vector is not None: embedding_matrix[i] = embedding_vector continue if '\/' in word: embedding_vector = embeddings_index.get(word.split('\/')[-1]) if embedding_vector is not None: embedding_matrix[i] = embedding_vector else: oov += 1 embedding_matrix[i] = initializations.uniform(EMBEDDING_DIM, scale=2.0).eval() print('OOV number is {}. Total number is {}. Embedding OOV ratio is {}.'.format(oov, len(word_index), oov/len(word_index))) # save to pickle file try: f = open(picklefile, 'wb') pickle.dump(embedding_matrix, f, pickle.HIGHEST_PROTOCOL) f.close() except Exception as e: print('Unable to save data to', picklefile, ':', e) raise return embedding_matrix
def __init__(self, n_vocab, dim_word, dimctx, dim): self.n_vocab = n_vocab # 30000 self.dim_word = dim_word # 384 self.dimctx = dimctx # 1024 self.dim = dim # 512 ### Word Embedding ### self.W_enc_emb = initializations.uniform((self.n_vocab, self.dim_word)) self.W_dec_emb = initializations.uniform((self.n_vocab, self.dim_word)) ### enc forward GRU ### self.W_enc_f_gru = initializations.uniform((self.dim_word, self.dim * 2)) self.U_enc_f_gru = initializations.uniform((self.dim, self.dim * 2)) self.b_enc_f_gru = initializations.zero((self.dim * 2)) self.W_enc_f_gru_cdd = initializations.uniform((self.dim_word, self.dim)) # cdd : candidate self.U_enc_f_gru_cdd = initializations.uniform((self.dim, self.dim)) self.b_enc_f_gru_cdd = initializations.zero((self.dim)) ### enc backward GRU ### self.W_enc_b_gru = initializations.uniform((self.dim_word, self.dim * 2)) self.U_enc_b_gru = initializations.uniform((self.dim, self.dim * 2)) self.b_enc_b_gru = initializations.zero((self.dim * 2)) self.W_enc_b_gru_cdd = initializations.uniform((self.dim_word, self.dim)) self.U_enc_b_gru_cdd = initializations.uniform((self.dim, self.dim)) self.b_enc_b_gru_cdd = initializations.zero((self.dim)) ### context to decoder init state (s0) self.W_dec_init = initializations.uniform((self.dimctx, dim)) self.b_dec_init = initializations.zero((dim)) ### dec GRU ### self.W_dec_gru = initializations.uniform((self.dim_word, self.dim * 2)) self.U_dec_gru = initializations.uniform((self.dim, self.dim * 2)) self.b_dec_gru = initializations.zero((self.dim * 2)) self.W_dec_gru_cdd = initializations.uniform((self.dim_word, self.dim)) self.U_dec_gru_cdd = initializations.uniform((self.dim, self.dim)) self.b_dec_gru_cdd = initializations.zero((self.dim)) self.W_dec_gru_ctx = initializations.uniform((self.dimctx, self.dim * 2)) self.W_dec_gru_ctx_cdd = initializations.uniform((self.dimctx, self.dim)) ### enc-dec attention ### self.W_att_y2c = initializations.uniform((self.dim_word, self.dimctx)) self.W_att_h2c = initializations.uniform((self.dimctx, self.dimctx)) self.W_att_s2c = initializations.uniform((self.dim, self.dimctx)) self.b_att = initializations.zero((self.dimctx)) self.U_att_energy = initializations.uniform((self.dimctx, 1)) self.b_att_energy = initializations.zero((1,)) ### enc-dec prediction ### self.W_dec_pred_s2y = initializations.uniform((self.dim, self.dim_word)) self.b_dec_pred_s2y = initializations.zero((self.dim_word)) self.W_dec_pred_y2y = initializations.uniform((self.dim_word, self.dim_word)) self.b_dec_pred_y2y = initializations.zero((self.dim_word)) self.W_dec_pred_c2y = initializations.uniform((self.dim * 2, self.dim_word)) self.b_dec_pred_c2y = initializations.zero((self.dim_word)) self.W_dec_pred = initializations.uniform((self.dim_word, self.n_vocab)) self.b_dec_pred = initializations.zero((self.n_vocab)) self.params = [self.W_enc_emb, self.W_dec_emb, self.W_enc_f_gru, self.U_enc_f_gru, self.b_enc_f_gru, self.W_enc_f_gru_cdd, self.U_enc_f_gru_cdd, self.b_enc_f_gru_cdd, self.W_enc_b_gru, self.U_enc_b_gru, self.b_enc_b_gru, self.W_enc_b_gru_cdd, self.U_enc_b_gru_cdd, self.b_enc_b_gru_cdd, self.W_dec_init, self.b_dec_init, self.W_dec_gru, self.U_dec_gru, self.b_dec_gru, self.W_dec_gru_cdd, self.U_dec_gru_cdd, self.b_dec_gru_cdd, self.W_dec_gru_ctx, self.W_dec_gru_ctx_cdd, self.W_att_y2c, self.W_att_h2c, self.W_att_s2c, self.b_att, self.U_att_energy, self.b_att_energy, self.W_dec_pred_s2y, self.b_dec_pred_s2y, self.W_dec_pred_y2y, self.b_dec_pred_y2y, self.W_dec_pred_c2y, self.b_dec_pred_c2y, self.W_dec_pred, self.b_dec_pred]
def emb_init(shape, name=None): return initializations.uniform(shape, scale=0.6 / shape[1], name=name)
def build(self, input_shape): self.input_spec = [InputSpec(shape=input_shape)] if self.stateful: self.reset_states() else: # initial states: all-zero tensor of shape (output_dim) self.states = [None] input_dim = input_shape[2] self.input_dim = input_dim self.W = self.init((input_dim, self.output_dim), name='{}_W'.format(self.name)) #self.b = K.zeros((self.N,), name='{}_b'.format(self.name)) self.b = initializations.uniform((self.N, ), scale=0.01, name='{}_b'.format(self.name)) self.baug = K.tile(self.b, [2]) h0 = self.h0_mean + initializations.uniform( (2 * self.N, ), scale=0.01).get_value() self.h0 = K.variable(h0, name='{}_h0'.format(self.name)) if ('full' in self.unitary_impl): # we're using a full unitary recurrence matrix if (self.inner_init == 'svd'): # use SVD to initialize U self.U = unitary_svd_init((self.N, self.N), name='{}_U'.format(self.name)) elif (self.inner_init == 'ASB2016'): # use parameterization of [ASB2016] to initialize U Uaug, _, _, _ = unitary_ASB2016_init((self.N, self.N)) Uaug = Uaug.eval() self.U = K.variable(np.concatenate( (Uaug[:self.N, :self.N], Uaug[:self.N, self.N:]), axis=0), name='{}_U'.format(self.name)) self.Uaug = augRight(self.U, module=K) elif (self.unitary_impl == 'ASB2016'): # we're using the parameterization of [Arjovsky, Shah, Bengio 2016] self.Uaug, self.theta, self.reflection, _ = unitary_ASB2016_init( (self.N, self.N), name=self.name) # set the trainable weights if ('full' in self.unitary_impl): self.trainable_weights = [self.W, self.U, self.b, self.h0] elif (self.unitary_impl == 'ASB2016'): self.trainable_weights = [ self.W, self.theta, self.reflection, self.b, self.h0 ] self.regularizers = [] #if self.W_regularizer: # self.W_regularizer.set_param(self.W) # self.regularizers.append(self.W_regularizer) #if self.U_regularizer: # self.U_regularizer.set_param(self.U) # self.regularizers.append(self.U_regularizer) #if self.b_regularizer: # self.b_regularizer.set_param(self.b) # self.regularizers.append(self.b_regularizer) if self.initial_weights is not None: self.set_weights(self.initial_weights) del self.initial_weights
def custom_init(shape): return uniform(shape, scale=0.1)
def init_uniform(self, shape, name=None): """ Custom uniform initializer for input embedding. Values between 1 and -1. """ return init.uniform(shape=shape, scale=1, name=name)
def my_init(shape, name=None): return initializations.uniform(shape, scale=0.08, name=name)
def __init__(self, n_words, dim_emb, dim_img): self.n_words = n_words self.dim_emb = dim_emb self.dim_img = dim_img self.emb_W = initializations.uniform((n_words, dim_emb)) self.cnn_word_W1 = initializations.uniform((dim_emb*3 + dim_img, 200)) self.cnn_word_b1 = initializations.zero((200)) self.cnn_word_W2 = initializations.uniform((200*3, 300)) self.cnn_word_b2 = initializations.zero((300)) self.cnn_word_W3 = initializations.uniform((300*3, 300)) self.cnn_word_b3 = initializations.zero((300)) self.cnn_phs_W1 = initializations.uniform((dim_emb*3, 200)) self.cnn_phs_b1 = initializations.zero((200)) self.cnn_phs_W2 = initializations.uniform((200*3 + dim_img, 300)) self.cnn_phs_b2 = initializations.zero((300)) self.cnn_phs_W3 = initializations.uniform((300*3, 300)) self.cnn_phs_b3 = initializations.zero((300)) self.cnn_phl_W1 = initializations.uniform((dim_emb*3, 200)) self.cnn_phl_b1 = initializations.zero((200)) self.cnn_phl_W2 = initializations.uniform((200*3, 300)) self.cnn_phl_b2 = initializations.zero((300)) self.cnn_phl_W3 = initializations.uniform((300*3 + dim_img, 300)) self.cnn_phl_b3 = initializations.zero((300)) self.cnn_st_W1 = initializations.uniform((dim_emb*3, 200)) self.cnn_st_b1 = initializations.zero((200)) self.cnn_st_W2 = initializations.uniform((200*3, 300)) self.cnn_st_b2 = initializations.zero((300)) self.cnn_st_W3 = initializations.uniform((300*3, 300)) self.cnn_st_b3 = initializations.zero((300))
def emb_init(shape, name=None): return initializations.uniform(shape, scale=0.6/shape[1], name=name)
def __init__(self, n_words, dim_emb, dim_img): self.n_words = n_words self.dim_emb = dim_emb self.dim_img = dim_img self.emb_W = initializations.uniform((n_words, dim_emb)) self.cnn_word_W1 = initializations.uniform( (dim_emb * 3 + dim_img, 200)) self.cnn_word_b1 = initializations.zero((200)) self.cnn_word_W2 = initializations.uniform((200 * 3, 300)) self.cnn_word_b2 = initializations.zero((300)) self.cnn_word_W3 = initializations.uniform((300 * 3, 300)) self.cnn_word_b3 = initializations.zero((300)) self.cnn_phs_W1 = initializations.uniform((dim_emb * 3, 200)) self.cnn_phs_b1 = initializations.zero((200)) self.cnn_phs_W2 = initializations.uniform((200 * 3 + dim_img, 300)) self.cnn_phs_b2 = initializations.zero((300)) self.cnn_phs_W3 = initializations.uniform((300 * 3, 300)) self.cnn_phs_b3 = initializations.zero((300)) self.cnn_phl_W1 = initializations.uniform((dim_emb * 3, 200)) self.cnn_phl_b1 = initializations.zero((200)) self.cnn_phl_W2 = initializations.uniform((200 * 3, 300)) self.cnn_phl_b2 = initializations.zero((300)) self.cnn_phl_W3 = initializations.uniform((300 * 3 + dim_img, 300)) self.cnn_phl_b3 = initializations.zero((300)) self.cnn_st_W1 = initializations.uniform((dim_emb * 3, 200)) self.cnn_st_b1 = initializations.zero((200)) self.cnn_st_W2 = initializations.uniform((200 * 3, 300)) self.cnn_st_b2 = initializations.zero((300)) self.cnn_st_W3 = initializations.uniform((300 * 3, 300)) self.cnn_st_b3 = initializations.zero((300))
def init(shape, name=None, dim_ordering='th'): return uniform(shape, scale, name, dim_ordering)
def __init__(self, n_vocab, y_vocab, dim_word, dim, dim_ctx): self.n_vocab = n_vocab # 12047 self.y_vocab = y_vocab # 430 self.dim_word = dim_word # 1024 self.dim = dim # 1024 self.dim_ctx = dim_ctx # 512 ### initial context self.W_ctx_init = initializations.uniform((self.dim_ctx, self.dim)) self.b_ctx_init = initializations.zero((self.dim)) ### forward : img_dim to context self.W_ctx_att = initializations.uniform((self.dim_ctx, self.dim_ctx)) self.b_ctx_att = initializations.zero((self.dim_ctx)) ### forward : hidden_dim to context self.W_dim_att = initializations.uniform((self.dim, self.dim_ctx)) ### context energy self.U_att = initializations.uniform((self.dim_ctx, 1)) self.c_att = initializations.zero((1)) ### Word Embedding ### self.W_emb = initializations.uniform((self.n_vocab, self.dim_word)) ### enc forward GRU ### self.W_gru_ctx = initializations.uniform((self.dim_word, self.dim_ctx)) self.b_gru_ctx = initializations.zero((self.dim_ctx)) self.W_gru = initializations.uniform((self.dim_word, self.dim * 2)) self.U_gru = initializations.uniform((self.dim, self.dim * 2)) self.b_gru = initializations.zero((self.dim * 2)) self.U_gru_ctx = initializations.uniform((self.dim_ctx, self.dim * 2)) self.W_gru_cdd = initializations.uniform( (self.dim_word, self.dim)) # cdd : candidate self.U_gru_cdd = initializations.uniform((self.dim, self.dim)) self.b_gru_cdd = initializations.zero((self.dim)) self.U_gru_cdd_ctx = initializations.uniform((self.dim_ctx, self.dim)) ### prediction ### self.W_pred = initializations.uniform((self.dim * 2, self.y_vocab)) self.b_pred = initializations.zero((self.y_vocab)) self.params = [ self.W_ctx_init, self.b_ctx_init, self.W_ctx_att, self.b_ctx_att, self.W_dim_att, self.U_att, self.c_att, self.W_emb, self.W_gru_ctx, self.b_gru_ctx, self.W_gru, self.U_gru, self.b_gru, self.U_gru_ctx, self.W_gru_cdd, self.U_gru_cdd, self.b_gru_cdd, self.U_gru_cdd_ctx, self.W_pred, self.b_pred ]
# for word in sorted(word_index, key=word_index.get): # print('{}\t{}'.format(word, word_index[word])) # print('\nTag list:') # for tag in sorted(tag_index, key=tag_index.get): # print('{}\t{}'.format(tag, tag_index[tag])) print('\nLoading embeddings...') embeddings_index = {} with open(os.path.join(EMBEDDING_DIR, 'embeddings-scaled.50.txt')) as f: for line in f: values = line.split() word = values[0] coefs = np.asarray(values[1:], dtype='float32') embeddings_index[word] = coefs print('Found {} word vectors.'.format(len(embeddings_index)-5)) embeddings_index[START_OF_SENTENCE] = initializations.uniform(50, scale=2.0).eval() embeddings_index[END_OF_SENTENCE] = initializations.uniform(50, scale=2.0).eval() embeddings_index[UNKNOWN_UPPERCASE_ALNUM] = initializations.uniform(50, scale=2.0).eval() embeddings_index[UNKNOWN_LOWERCASE_ALNUM] = initializations.uniform(50, scale=2.0).eval() embeddings_index[UNKNOWN_NON_ALNUM] = initializations.uniform(50, scale=2.0).eval() # add dev and test vocabulary into word_index print('\nDev vocab:') dev_tokenizer = Tokenizer(lower=True, cutoff=0, nb_unknowns=3) dev_tokenizer.fit_on_texts(X_dev, verbose=True) print(len(dev_tokenizer.word_index.keys())) print('\nTest vocab:') test_tokenizer = Tokenizer(lower=True, cutoff=0, nb_unknowns=3) test_tokenizer.fit_on_texts(X_test, verbose=True) print(len(test_tokenizer.word_index.keys()))
def norm(scale): return lambda shape, name=None: initializations.uniform(shape, scale=scale, name=name)
def build_model(dp, word_count_threshold, word_embedding_dim, image_embedding_dim, hidden_size, batch_size, num_vocab): ''' 일단 image encoder ( 4096 -> embedding dim )와 text encoder ( vocab dim -> embedding dim)을 정의하자 ''' We = initializations.uniform((4096, image_embedding_dim)) be = initializations.zero((image_embedding_dim,)) Ws = initializations.uniform((num_vocab, word_embedding_dim)) ''' text decoder (hidden dim -> vocab dim)을 정의하자 ''' Wd = initializations.uniform((hidden_size, num_vocab)) bd = initializations.zero((num_vocab,)) ''' 이미지(batch) -> image_embedding_dim ''' image = T.matrix() embedded_image = T.dot(image, We) + be embedded_image = embedded_image.dimshuffle(0,'x',1) ''' sentence ''' sentence = T.matrix(dtype='int32') mask = T.matrix() embedded_sentence = Ws[sentence] # (batch, 문장길이, embedding_dim) ''' 이미지를 sentence의 맨 앞에 붙임 ''' X = T.concatenate([embedded_image, embedded_sentence], axis=1) X = X.dimshuffle(1,0,2) X = dropout(X, 0.5) ''' LSTM weight ( i, f, c, o에 대한 weight들 ) 을 정의하자 ''' WLSTM = initializations.uniform((1+word_embedding_dim*2, 4*hidden_size)) bias = T.alloc(numpy_floatX(1.), batch_size, 1) def _step(b, x_t, h_t_1, m_, c_, weight): Hin = T.concatenate([b, x_t, h_t_1], axis=1) # 1, x[t], h[t-1]을 concat IFOG = T.dot(Hin, weight) ifo = T.nnet.sigmoid(IFOG[:, :3*hidden_size]) g = T.tanh(IFOG[:, 3*hidden_size:]) IFOGf = T.concatenate([ifo, g], axis=1) c = IFOGf[:, :hidden_size] * IFOGf[:, 3*hidden_size:] + c_ * IFOGf[:,hidden_size:2*hidden_size] c = c * m_[:,None] + c_ * (1. - m_)[:,None] Hout = IFOGf[:, 2*hidden_size:3*hidden_size] * c Hout = Hout * m_[:,None] + h_t_1*(1. - m_)[:,None] return Hout, c (Houts, cells), updates = theano.scan(fn = lambda x, m, h, c, b, weight: _step(b,x,h, m, c, weight), sequences=[X, mask.T], outputs_info= [ T.alloc(numpy_floatX(0.),batch_size, hidden_size), T.alloc(numpy_floatX(0.),batch_size, hidden_size) ], non_sequences=[bias, WLSTM]) Houts = Houts.dimshuffle(1,0,2) Y, updates = theano.scan(fn=lambda hout, wd,dd: T.dot(hout, wd) + dd, #T.nnet.softmax(T.dot(hout, wd)+dd), sequences=[Houts], non_sequences=[Wd,bd]) Y = Y[:,1:,:] n_timestep=Y.shape[1] losses,_ = theano.scan(fn=lambda y, m, sen: -T.log(1e-20 + y[T.arange(n_timestep), sen[1:]][mask != 0.0]), sequences=[Y, mask, sentence]) loss = T.sum(losses) / Y.shape[0] loss += regularization_ratio * 0.5 * T.sum(WLSTM * WLSTM) loss += regularization_ratio * 0.5 * T.sum(Wd * Wd) params = [We, be, Ws, WLSTM, Wd, bd] updates = RMSprop(cost=loss, params=params) train_function = theano.function(inputs=[image, sentence, mask], outputs=loss, updates=updates, allow_input_downcast=True) Y_function = theano.function(inputs=[image, sentence, mask], outputs=Y, updates=updates, allow_input_downcast=True) Hout_function = theano.function(inputs=[image, sentence, mask], outputs=Houts, updates=updates, allow_input_downcast=True) return train_function, params, Y_function, Hout_function