def createKerasModel(self, state_size): """ Create an LSTM-based neural network """ model = Sequential() model.add( JZS1(state_size, self.hidden_neurons, activation='tanh', inner_activation='tanh', truncate_gradient=self.history_length)) model.add( Dense(self.hidden_neurons, self.nb_actions, activation='linear')) return model
### Load VGGNet (CNN) vggnet = VGG_16( 'vgg16_weights.h5' ) # download(553MB) site: https://gist.github.com/baraldilorenzo/07d7802847aaad0a35d3 --> weights vggnet.layers.pop() # pop last Dense layer to connect to fc7 vggnet.layers.pop() # pop Dropout layer vggnet.params.pop() vggnet.params.pop() vggnet.add( Dense(4096, HIDDEN_SIZE, activation="relu") ) # to match the dimensions of image-question features (in Merge layer) ### Build Question RNN qnet = Sequential() qnet.add(Embedding(vocab_size + 1, EMBED_SIZE, mask_zero=True)) qnet.add(JZS1(EMBED_SIZE, HIDDEN_SIZE)) qnet.add(Dense(HIDDEN_SIZE, HIDDEN_SIZE, activation="relu")) ### Merged model model = Sequential() model.add(Merge([vggnet, qnet], mode="concat", concat_axis=1)) # output_dim = 2*HIDDEN_SIZE #model.add(Dense(2*HIDDEN_SIZE, HIDDEN_SIZE, activation="relu")) model.add(RepeatVector(ans_maxlen)) model.add(JZS1(2 * HIDDEN_SIZE, HIDDEN_SIZE, return_sequences=True)) model.add(TimeDistributedDense(HIDDEN_SIZE, vocab_size, activation="softmax")) # TimeDistributedDense print('Model compiling ...') lr = 0.01 opt = Adam(lr=lr)
print(X_train.shape, 'train sequences') #print(X_valid.shape, 'valid sequences') #print(X_test.shape, 'test sequences') print("YS Shape: ", ys.shape) embedding_size = 64 hidden_size = 512 print('Build model...') model = Sequential() model.add(Embedding(max_features, embedding_size)) #model.add(LSTM(embedding_size, 128)) # try using a GRU instead, for fun #model.add(GRU(embedding_size, embedding_size)) # try using a GRU instead, for fun #model.add(JZS1(embedding_size, hidden_size, return_sequences=True)) # try using a GRU instead, for fun model.add(JZS1(embedding_size, hidden_size)) # try using a GRU instead, for fun #JSZ1, embedding = 64, 64 hidden = 0.708 #model.add(Dropout(0.2)) model.add(Dense(hidden_size, hidden_size)) model.add(Activation('relu')) model.add(RepeatVector(MAX_LEN)) model.add(JZS1(hidden_size, hidden_size, return_sequences=True)) model.add(TimeDistributedDense(hidden_size, max_features, activation="softmax")) # try using different optimizers and different optimizer configs model.compile(loss='mse', optimizer='adam') print("Train...")
print('Build model ...') ''' ### Load VGGNet (CNN) vggnet = VGG_16('vgg16_weights.h5') # download(553MB) site: https://gist.github.com/baraldilorenzo/07d7802847aaad0a35d3 --> weights vggnet.layers.pop() # pop last Dense layer to connect to fc7 vggnet.layers.pop() # pop Dropout layer vggnet.params.pop() vggnet.params.pop() vggnet.add(Dense(4096, HIDDEN_SIZE, activation="relu")) # to match the dimensions of image-question features (in Merge layer) ''' ### Build Question RNN model = Sequential() model.add(Masking(mask_value=0)) model.add(JZS1(vocab_size, HIDDEN_SIZE)) ### Merged model # model = Sequential() # model.add(Merge([vggnet, qnet], mode="concat", concat_axis=1)) # output_dim = 2*HIDDEN_SIZE # model.add(Dense(HIDDEN_SIZE, HIDDEN_SIZE, activation="relu")) model.add(RepeatVector(ans_maxlen)) model.add(JZS1(HIDDEN_SIZE, HIDDEN_SIZE, return_sequences=True)) model.add(TimeDistributedDense(HIDDEN_SIZE, vocab_size, activation="softmax")) # TimeDistributedDense print('Model compiling ...') opt = Adam(lr=0.000125) model.compile(optimizer=opt, loss='categorical_crossentropy' ) # mean_squared_error, categorical_crossentropy
#def reverse(lst): # return lst[::-1] #X_train, X_test = np.asarray( map(reverse, X_train) ), np.asarray( map(reverse, X_test)) print('X_train shape:', X_train.shape) print('X_test shape:', X_test.shape) embedding_size = 64 print('Build model...') model = Sequential() model.add(Embedding(max_features, embedding_size)) #model.add(LSTM(embedding_size, 64)) # try using a GRU instead, for fun #model.add(GRU(embedding_size, embedding_size)) # try using a GRU instead, for fun model.add(JZS1(embedding_size, 64)) # try using a GRU instead, for fun #JSZ1, embedding = 64, 64 hidden = 0.708 #model.add(Dropout(0.2)) model.add(Dense(64, 1)) model.add(Activation('sigmoid')) # try using different optimizers and different optimizer configs model.compile(loss='binary_crossentropy', optimizer='adam', class_mode="binary") #model.compile(loss='hinge', optimizer='adagrad', class_mode="binary") print("Train...") last_accuracy = 0 iterations = 0 decreases = 0 def test(epochs = 1):