def keras_skip_gram(trainlist1,weight1,weight2): N,d=weight1.shape negative_num=trainlist1[0][2].shape[1] shared_layer1 = Embedding(input_dim=N, output_dim=d, weights=[weight1]) #shared_layer1 is the output layer shared_layer2 = Embedding(input_dim=N, output_dim=d, weights=[weight2]) #shared_layer2 is the hidden layer input_target = Input(shape=(1,), dtype='int32', name='input_1') input_source = Input(shape=(1,), dtype='int32', name='input_2') input_negative = Input(shape=(negative_num,),dtype='int32',name='input_3') target= shared_layer1(input_target) source= shared_layer2(input_source) negative= shared_layer1(input_negative) positive_dot = dot([source, target], axes=(2), normalize=False) negative_dot = dot([source, negative], axes=(2), normalize=False) all_dot = concatenate([positive_dot, negative_dot],axis=2) sigmoid_sample = Activation('sigmoid')(all_dot) model = Model(inputs=[input_target,input_source,input_negative], outputs=[sigmoid_sample]) sgd2 = optimizers.SGD(lr=0.025, nesterov=True) model.compile(loss='binary_crossentropy', optimizer=sgd2) for [a1,a2,a4,y1] in trainlist1: loss = model.train_on_batch([a1, a2, a4], y1) embed_output=shared_layer1.get_weights()[0] embed_hidden=shared_layer2.get_weights()[0] return embed_output,embed_hidden
class SkipModelNS(Model): def __init__(self, vocab_size, embedding_dim, num_ns=4): super(SkipModelNS, self).__init__() self.target_embedding = Embedding( vocab_size, embedding_dim, input_length=1, name="skip_embedding", ) self.context_embedding = Embedding( vocab_size, embedding_dim, input_length=num_ns + 1, ) self.dots = Dot(axes=(3, 2)) self.flatten = Flatten() def call(self, input, **kwargs): target, context = input print(target) print(context) targets = self.target_embedding(target) contexts = self.context_embedding(context) d = self.dots([contexts, targets]) fl = self.flatten(d) return fl def get_embedding_matrix(self): weights = np.array(self.target_embedding.get_weights()) return weights
class _OptimizerEmbedding(_OptimizerParametrical): patience = 25 steps = 20 def __init__(self, **kwargs): super(_OptimizerEmbedding, self).__init__(**kwargs) def _store_weights(self): with open(self.weightpath, "wb") as handle: _pickle.dump(self.embedding.get_weights()[0], handle) def _prep(self): super(_OptimizerEmbedding, self)._prep() self.orig_emb = K.constant(self.weights["emb"][0]) self.embedding = Embedding(\ input_dim = self.length, output_dim = self.configs["emb"]["output_dim"], name = "emb_exp") if self.calc_hard: self.snap = SnapToClosestLayer(self.orig_emb, mode = "max", name = "snap") self.cosine = PairwiseCosinesLayer(self.orig_emb, name = "cosine") self.max = Lambda(lambda x:K.max(x, axis = -1), output_shape = lambda shape:shape[:-1], name = "max") def _get_output(self, embedded): encoded = self.encoder(embedded) if self.with_projection: encoded = self.projection(encoded) return self.selector(encoded) def _get_best_ngram(self): similarities = cosine_similarity(self.embedding.get_weights()[0], self.weights["emb"][0]) return similarities.argmax(-1)
class CbowModelNS(Model): def __init__(self, vocab_size, embedding_dim, num_ns, window): super(CbowModelNS, self).__init__() self.embedding_layer = Embedding(vocab_size, embedding_dim, input_length=window * 2, name="cbow_embedding") self.target_layer = Embedding(vocab_size, embedding_dim, input_length=num_ns + 1) self.dot = Dot(axes=(3, 2)) self.flatten = Flatten() def call(self, inputs, training=None, mask=None): context, target = inputs ce = self.embedding_layer(context) s = reduce_sum(ce, 1, keepdims=True) te = self.target_layer(target) dot = self.dot([te, s]) return self.flatten(dot) def get_embedding_matrix(self): weights = np.array(self.embedding_layer.get_weights()) return weights
def test_on_masked_input(self): # Average over a dimension in which some elements are masked, and # check that they are masked correctly in the average. dimension_to_average = 1 num_dimensions = 3 sentence_length = 5 embedding_dim = 10 vocabulary_size = 15 input_layer = Input(shape=(sentence_length, ), dtype='int32') # Embedding masks zeros embedding = Embedding(input_dim=vocabulary_size, output_dim=embedding_dim, mask_zero=True) encoder = AveragedBOWEncoder(dimension_to_average, num_dimensions) embedded_input = embedding(input_layer) encoded_input = encoder(embedded_input) encoder_mask = OutputMask()(encoded_input) model = Model(inputs=input_layer, outputs=[encoded_input, encoder_mask]) test_input = numpy.asarray([[0, 3, 1, 7, 10]], dtype='int32') embedding_weights = embedding.get_weights()[ 0] # get_weights returns a list with one element. # Don't take the first element because it should be masked. expected_output = numpy.mean(embedding_weights[test_input[:, 1:]], axis=dimension_to_average) actual_output, actual_mask = model.predict(test_input) # Mask should now numpy.testing.assert_array_equal(actual_mask, numpy.array([True])) numpy.testing.assert_array_almost_equal(expected_output, actual_output)
def test_mask_is_propagated_if_required(self): # Here we test averaging over a dimension which is not masked, but in which the # output still requires a mask. dimension_to_average = 2 num_dimensions = 3 sentence_length = 5 embedding_dim = 10 vocabulary_size = 15 input_layer = Input(shape=(sentence_length, ), dtype='int32') # Embedding masks zeros embedding = Embedding(input_dim=vocabulary_size, output_dim=embedding_dim, mask_zero=True) encoder = AveragedBOWEncoder(dimension_to_average, num_dimensions) embedded_input = embedding(input_layer) encoded_input = encoder(embedded_input) encoder_mask = OutputMask()(encoded_input) model = Model(inputs=input_layer, outputs=[encoded_input, encoder_mask]) test_input = numpy.asarray([[0, 3, 1, 7, 10]], dtype='int32') embedding_weights = embedding.get_weights()[ 0] # get_weights returns a list with one element. # Here, the dimension we are reducing is the embedding dimension. In this case, # the actual value of the returned output should be equal to averaging without masking, # (as there is nothing to mask in a dimension not covered by the mask) but the mask should # be propagated through the layer, still masking the correct index. expected_output = numpy.mean(embedding_weights[test_input], axis=dimension_to_average) actual_output, actual_mask = model.predict(test_input) # First index should still be masked. numpy.testing.assert_array_equal( actual_mask, numpy.array([[False, True, True, True, True]])) numpy.testing.assert_array_almost_equal(expected_output, actual_output)
def run_training(num_classes, X, y): """ Perform the training run Args: num_classes - number of classes for the labels X - ground truth data y - ground truth labels """ inputs = Input((window_size * 2, )) ##TODO##: Complete embedding_layer code embedding_layer = Embedding(num_classes, embedding_size, input_length=2 * window_size, name='embedding_layer') ##TODO##: Complete mean_layer code mean_layer = Lambda(lambda x: K.mean(x, axis=1)) ##TODO##: Complete output layer code output_layer = Dense(num_classes, activation='softmax') output = embedding_layer(inputs) output = mean_layer(output) output = output_layer(output) model = Model(inputs=[inputs], outputs=output) model.compile(loss='categorical_crossentropy', optimizer=RMSprop(lr=0.1, rho=0.99), metrics=['accuracy']) model.fit(X, y, batch_size=16, epochs=170, validation_split=0.1, verbose=2) return embedding_layer.get_weights()[0]
class _OptimizerProbabilities(_OptimizerParametrical): steps = 200 def __init__(self, **kwargs): super(_OptimizerProbabilities, self).__init__(**kwargs) def _prep(self): super(_OptimizerProbabilities, self)._prep() self.argmax = Argmax(name = "argmax") tmp = Dense(units = self.configs["emb"]["output_dim"], activation = "linear", use_bias = False) self.embedding = TimeDistributed(tmp, name = "emb_exp", trainable = False, weights = self.weights["emb"]) self.logits = Embedding(input_dim = self.length, output_dim = len(self.dictionary), name = "logits") def _store_weights(self): with open(self.weightpath, "wb") as handle: _pickle.dump(self.logits.get_weights()[0], handle) def _get_output(self, probabilities): encoded = self.encoder(self.embedding(probabilities)) if self.with_projection: encoded = self.projection(encoded) return self.selector(encoded) def _build(self): logits = self.logits(self.dummy_input) selection_soft = self._get_output(self._get_probabilities(logits)) self.outputs.append(NameLayer("s")(selection_soft)) self.outputs.append(NameLayer("L")(selection_soft)) self.outputs.append(NameLayer("o_logits")(logits)) if self.calc_hard: argmax = self.argmax(logits) self.outputs.append(NameLayer("o_argmax")(argmax)) selection_hard = self._get_output(argmax) self.outputs.append(NameLayer("h")(selection_hard)) def _get_best_ngram(self): return self.logits.get_weights()[0].argmax(axis = -1)
def build(self): question, answer = self._get_inputs() # add embedding layers embedding = Embedding(self.config['n_words'], self.model_params.get('n_embed_dims', 141)) question_embedding = embedding(question) a_embedding = Embedding(self.config['n_words'], self.model_params.get('n_embed_dims', 141)) answer_embedding = embedding(answer) a_embedding.set_weights(embedding.get_weights()) # dropout dropout = Dropout(0.5) question_dropout = dropout(question_embedding) answer_dropout = dropout(answer_embedding) # rnn forward_lstm = LSTM(self.config.get('n_lstm_dims', 141), consume_less='mem', return_sequences=True) backward_lstm = LSTM(self.config.get('n_lstm_dims', 141), consume_less='mem', return_sequences=True) question_lstm = merge([forward_lstm(question_dropout), backward_lstm(question_dropout)], mode='concat', concat_axis=-1) # dropout question_dropout = dropout(question_lstm) # maxpooling maxpool = Lambda(lambda x: K.max(x, axis=1, keepdims=False), output_shape=lambda x: (x[0], x[2])) question_pool = maxpool(question_dropout) # activation activation = Activation('tanh') question_output = activation(question_pool) question_model = Model(input=[question], output=[question_output]) # attentional rnn forward_lstm = AttentionLSTM(self.config.get('n_lstm_dims', 141), question_output, consume_less='mem', return_sequences=True) backward_lstm = AttentionLSTM(self.config.get('n_lstm_dims', 141), question_output, consume_less='mem', return_sequences=True) answer_lstm = merge([forward_lstm(answer_dropout), backward_lstm(answer_dropout)], mode='concat', concat_axis=-1) # dropout answer_dropout = dropout(answer_lstm) # maxpooling maxpool = Lambda(lambda x: K.max(x, axis=1, keepdims=False), output_shape=lambda x: (x[0], x[2])) answer_pool = maxpool(answer_dropout) # activation activation = Activation('tanh') answer_output = activation(answer_pool) answer_model = Model(input=[question, answer], output=[answer_output]) return question_model, answer_model
def keras_multiclass(trainlist, weight1, weight2): N, d = weight1.shape Nc, d = weight2.shape shared_layer1 = Embedding(input_dim=N, output_dim=d, weights=[weight1]) shared_layer2 = Embedding(input_dim=Nc, output_dim=d, weights=[weight2]) input_target = Input(shape=(1, ), dtype='int32', name='input_target') input_negative = Input(shape=(Nc, ), dtype='int32', name='input_beta') target = shared_layer1(input_target) beta = shared_layer2(input_negative) score_dot = dot([target, beta], axes=(2), normalize=False) sigmoid_out = Activation('softmax')(score_dot) model = Model(inputs=[input_target, input_negative], outputs=[sigmoid_out]) sgd = optimizers.SGD(lr=0.025, nesterov=True) model.compile(loss='categorical_crossentropy', optimizer=sgd) for [a1, a2, y1] in trainlist: loss2 = model.train_on_batch([a1, a2], y1) embed_emb = shared_layer1.get_weights()[0] embed_beta = shared_layer2.get_weights()[0] return embed_emb, embed_beta
def keras_sg_embedding(trainlist,weight1,weight2): #weight1, weight2 are Nxd numpy matrix """The initial weights are weight1(output weight), weight2(hidden weight) the train input will update the weights by gradient descent""" N,d=weight1.shape negative_num=trainlist[0][2].shape[1] emb_target = Embedding(input_dim=N, output_dim=d, name='emb_target', weights=[weight1]) #shared_layer1 is the output layer emb_source = Embedding(input_dim=N, output_dim=d, name='emb_source', weights=[weight2]) #shared_layer2 is the hidden layer input_target = Input(shape=(1,), dtype='int32', name='input_target') input_source = Input(shape=(1,), dtype='int32', name='input_source') input_negative = Input(shape=(negative_num,),dtype='int32',name='input_negative') target = emb_target(input_target) source = emb_source(input_source) negative = emb_target(input_negative) positive_dot = dot([source, target], axes=(2), normalize=False) negative_dot = dot([source, negative], axes=(2), normalize=False) all_dot = concatenate([positive_dot, negative_dot],axis=2) sigmoid_sample = Activation('softmax')(all_dot) model = Model(inputs=[input_target,input_source,input_negative], outputs=[sigmoid_sample]) model.summary() sgd = optimizers.SGD(lr=0.025, nesterov=True) model.compile(loss='binary_crossentropy', optimizer=sgd) ind = 0 batch_size = len(trainlist)/20 for [a1,a2,a4,y1] in trainlist: loss = model.train_on_batch([a1, a2, a4], y1) # print("Epoch %2d batch %4d: loss = %.4f" % (ind/batch_size + 1, ind%batch_size + 1, loss)) ind += 1 emb_target1 = emb_target.get_weights()[0] emb_source1 = emb_source.get_weights()[0] return emb_target1, emb_source1
def Keras_skip_gram(G, walks, iteration): """ Keras to run word2vec algorithm with skip_gram model. """ walks_sentences = [list(np.array(walk)) for walk in walks] embedding1 = np.random.uniform(-1 / G.embedding_size, 1 / G.embedding_size, (G.vocabulary, G.embedding_size)) embedding2 = np.random.uniform(-1 / G.embedding_size, 1 / G.embedding_size, (G.vocabulary, G.embedding_size)) shared_layer1 = Embedding(input_dim=G.vocabulary, output_dim=G.embedding_size, weights=[embedding1]) shared_layer2 = Embedding(input_dim=G.vocabulary, output_dim=G.embedding_size, weights=[embedding2]) input_target = Input(shape=(1, ), dtype='int32', name='input_1') input_source = Input(shape=(1, ), dtype='int32', name='input_2') input_negative = Input(shape=(G.negative, ), dtype='int32', name='input_3') target = shared_layer1(input_target) source = shared_layer2(input_source) negative = shared_layer1(input_negative) positive_dot = dot([source, target], axes=(2), normalize=False) negative_dot = dot([source, negative], axes=(2), normalize=False) all_dot = concatenate([positive_dot, negative_dot], axis=2) sigmoid_sample = Activation('sigmoid')(all_dot) model = Model(inputs=[input_target, input_source, input_negative], outputs=[sigmoid_sample]) sgd2 = optimizers.SGD(lr=0.025, nesterov=True) model.compile(loss='binary_crossentropy', optimizer=sgd2) train_list = skip_train(walks_sentences, G.window_size) for i in range(iteration): for [a1, a2, a4, y1] in train_list: loss = model.train_on_batch([a1, a2, a4], y1) embed = shared_layer2.get_weights()[0] return embed
def test_on_masked_input(self): sentence_length = 5 embedding_size = 10 vocabulary_size = 15 input_layer = Input(shape=(sentence_length,), dtype='int32') # Embedding masks zeros embedding = Embedding(input_dim=vocabulary_size, output_dim=embedding_size, mask_zero=True) encoder = BOWEncoder() embedded_input = embedding(input_layer) encoded_input = encoder(embedded_input) model = Model(input=input_layer, output=encoded_input) model.compile(loss="mse", optimizer="sgd") # Will not train this model test_input = numpy.asarray([[0, 3, 1, 7, 10]], dtype='int32') embedding_weights = embedding.get_weights()[0] # get_weights returns a list with one element. # Omitting the first element (0), because that is supposed to be masked in the model. expected_output = numpy.mean(embedding_weights[test_input[:, 1:]], axis=1) actual_output = model.predict(test_input) # Following comparison is till the sixth decimal. numpy.testing.assert_array_almost_equal(expected_output, actual_output)
def test_on_unmasked_input(self): sentence_length = 5 embedding_dim = 10 vocabulary_size = 15 input_layer = Input(shape=(sentence_length, ), dtype='int32') # Embedding does not mask zeros embedding = Embedding(input_dim=vocabulary_size, output_dim=embedding_dim) encoder = BOWEncoder() embedded_input = embedding(input_layer) encoded_input = encoder(embedded_input) model = Model(inputs=input_layer, outputs=encoded_input) model.compile(loss="mse", optimizer="sgd") # Will not train this model test_input = numpy.asarray([[0, 3, 1, 7, 10]], dtype='int32') embedding_weights = embedding.get_weights()[ 0] # get_weights returns a list with one element. expected_output = numpy.mean(embedding_weights[test_input], axis=1) actual_output = model.predict(test_input) numpy.testing.assert_array_almost_equal(expected_output, actual_output)
def test_on_unmasked_input(self): sentence_length = 5 embedding_size = 10 vocabulary_size = 15 input_layer = Input(shape=(sentence_length,), dtype='int32') # Embedding does not mask zeros embedding = Embedding(input_dim=vocabulary_size, output_dim=embedding_size) encoder = BOWEncoder() embedded_input = embedding(input_layer) encoded_input = encoder(embedded_input) model = Model(input=input_layer, output=encoded_input) model.compile(loss="mse", optimizer="sgd") # Will not train this model test_input = numpy.asarray([[0, 3, 1, 7, 10]], dtype='int32') embedding_weights = embedding.get_weights()[0] # get_weights returns a list with one element. expected_output = numpy.mean(embedding_weights[test_input], axis=1) actual_output = model.predict(test_input) # Exact comparison of the two arrays may break because theano's floating point operations # usually have an epsilon. The following comparison is done till the sixth decimal, hence good enough. numpy.testing.assert_array_almost_equal(expected_output, actual_output)
def test_weight_initialization(): # 3 ways to initialize weights, but copies the original in each case. weights = [[1, 2, 3], [4, 5, 6], [7, 8, 9], [10, 11, 12]] embedding_info1 = { "input_dim": 4, "output_dim": 3, "input_length": 10, "trainable": True } def weight_init(shape, dtype=None): return weights inputs = Input(shape=(4, )) embedding_layer = Embedding(weights=[np.array(weights)], **embedding_info1) # embedding_layer = Embedding(embeddings_initializer=weight_init, **embedding_info1) # embedding_layer = Embedding(embeddings_initializer=Constant(weights), **embedding_info1) res = embedding_layer(inputs) model = Model(inputs=inputs, outputs=res) model.compile(optimizer="adam", loss="mse") print(embedding_layer.get_weights())
class EmbeddingTranier(object): """ A class to train the word2vec using the skip-gram approach in keras with negative sampling. """ def __init__(self, vocab_size, embedding_size, window_size=3): """ Constructor method for embedding trainer. :param vocab_size: int; the number of words in the vocabulary :param embedding_size: int; the size of embeddings to train :param window_size: int; size of the skip-gram context window """ self.vocab_size = vocab_size self.embedding_size = embedding_size self.window_size = window_size self.model = None self.embeddings = None def get_skips(self, docs): """ Formats the data and generates negative samples. :param docs: list; a list of documents; each document is a list of sentences; a sentence is a list of tokens (strings) :return: tuple; contains the center and context words, and the corresponding labels """ sampling_table = make_sampling_table(self.vocab_size) center_words, context_words, labels = [], [], [] for doc in docs: tokens = [token for sent in doc for token in sent] pairs, labels_ = skipgrams(tokens, self.vocab_size, window_size=self.window_size, sampling_table=sampling_table) try: center, context = zip(*pairs) except ValueError: continue center_words += center context_words += context labels += labels_ return center_words, context_words, labels def w2v_model(self, learning_rate): """ Generates the neural architecture for the word2vec skip-gram model :return: keras.models.Model(); the word2vec model """ # Add the input and embedding layers input_center = Input((1, )) input_context = Input((1, )) self.embeddings = Embedding(self.vocab_size, self.embedding_size, input_length=1, name="Embeddings") # Get the center and context embeddings center = self.embeddings(input_center) center = Reshape((self.embedding_size, 1))(center) context = self.embeddings(input_context) context = Reshape((self.embedding_size, 1))(context) # Calculate the linear activations # dot_product = Concatenate([center, context], mode="dot", dot_axes=1) dot_product = dot([center, context], axes=1, normalize=False) dot_product = Reshape((1, ))(dot_product) # Sigmoid activations output = Dense(1, activation="sigmoid")(dot_product) # Define the model model = Model(input=[input_center, input_context], output=output) optimizer = RMSprop(lr=learning_rate, rho=0.9, epsilon=None, decay=0.0) model.compile(loss="binary_crossentropy", optimizer=optimizer) return model def train(self, docs, num_batches=2000, learning_rate=0.001, verbose=True): """ Optimizes the model on the training data :param docs: list; a sequence of documents; each document is a list of sentences; a sentence is a list of tokens (strings) :param num_batches: int; the number of (center, context) pairs to use in training :param verbose: Boolean; if true, prints the loss druing training """ # Get the data and the model center_words, context_words, labels = self.get_skips(docs) self.model = self.w2v_model(learning_rate) # Randomly sample pair/label loss = [] for batch in range(num_batches): idx = np.random.randint(0, len(center_words) - 1) center_word = np.array([center_words[idx]]) context_word = np.array([context_words[idx]]) label = np.array([labels[idx]]) loss += [ self.model.train_on_batch([center_word, context_word], label) ] # Print the loss every 1000 batches if len(loss) >= 1000 and verbose: print(batch, sum(loss) / 1000) loss = [] def get_embedding_array(self): """ Gets the word embeddings :return: array; the trained word embeddings """ return self.embeddings.get_weights()[0]
class WordCNNModel(KerasModel): def __init__(self, dataset, filter_sizes, num_filters_per_size, layers=[], conv_activation='linear', layer_activation='relu', trainable_embeddings=False): super().__init__(dataset) assert len(filter_sizes) > 0 assert num_filters_per_size > 0 assert len(dataset.get_input_shape()) == 1 assert len(dataset.get_output_shape()) == 1 self._dataset = dataset self._filter_sizes = filter_sizes self._num_filters_per_size = num_filters_per_size self._conv_activation = conv_activation self._layer_activation = layer_activation inputs = Input(shape=dataset.get_input_shape()) self._emb_layer = Embedding( input_dim=dataset.word_embedding_model.get_vocab_size(), output_dim=dataset.word_embedding_model.get_embeddings_size(), weights=[dataset.word_embedding_model.get_embeddings()], trainable=trainable_embeddings) emb = self._emb_layer(inputs) filters = [] self._conv = {} for filter_size in filter_sizes: layer = Conv1D(num_filters_per_size, filter_size, padding='valid', activation=conv_activation) self._conv[filter_size] = layer layer = OneMaxPooling1D(axis=1, keepdims=False)(layer(emb)) filters.append(layer) if len(filters) >= 2: result = concatenate(filters, axis=1) else: result = filters[0] self._layers = list() for l in layers: layer = Dense(l, activation=layer_activation) self._layers.append(layer) result = layer(result) self._output_layer = Dense(dataset.get_output_shape()[0], activation='sigmoid' if dataset.multilabel or dataset.binary else 'softmax') output = self._output_layer(result) self._model = Model(inputs=inputs, outputs=output) def export(self, fn, label_names): with gzip.GzipFile(fn, "w") as f: json_str = json.dumps( { 'w2v': { 'vocab': self._dataset.word_embedding_model.get_vocab(), 'emb': self._emb_layer.get_weights()[0].tolist(), 'sentence_length': self._dataset.X.shape[1] }, 'label_names': self._dataset.label_names.tolist(), 'layers': [{ 'W': l.get_weights()[0].tolist(), 'b': l.get_weights()[1].tolist() } for l in self._layers], 'filters_W': { filter_size: f.get_weights()[0].tolist() for (filter_size, f) in self._conv.items() }, 'filters_b': { filter_size: f.get_weights()[1].tolist() for (filter_size, f) in self._conv.items() }, 'filter_sizes': self._filter_sizes, 'num_filters_per_size': self._num_filters_per_size, 'conv_activation': self._conv_activation, 'layer_activation': self._layer_activation, 'output_layer': { 'W': self._output_layer.get_weights()[0].tolist(), 'b': self._output_layer.get_weights()[1].tolist() }, }, indent=2) f.write(json_str.encode('utf-8'))
# In[372]: len(labels) # In[340]: loss # In[341]: model.summary() # In[342]: weights = embedding.get_weights() # In[374]: weights[0].shape #because embedding matrix it starts from 0 so 3600 # In[365]: weights[0][1:].shape # In[358]: id2word # In[425]:
w1 = embedding(input_w1) w1 = Reshape((embedding_size, 1))(w1) w2 = embedding(input_w2) w2 = Reshape((embedding_size, 1))(w2) w3 = embedding(input_w3) w3 = Reshape((embedding_size, 1))(w3) context_docid = concatenate([w1, w2, w3, docid]) context_docid = Conv1D(32, 4, padding="same")(context_docid) context_docid = Flatten()(context_docid) output = Dense(2, activation='softmax')(context_docid) model = Model(input=[input_w1, input_w2, input_w3, input_docid], output=output) model.compile(loss=loss, optimizer=optimizer, metrics=metrics) model.summary() model.fit_generator(batch_generator(contexts, targets, batch_size), steps_per_epoch=batch_size, epochs=epochs) save_embedding(embedding_filename + '.txt', embedding.get_weights()[0], total_docs) tsne_plot(embedding.get_weights()[0], total_docs, labels, figure_name=figure_filename, max_docs=1000)
model.compile(optimizer='rmsprop', loss='categorical_crossentropy') model.fit_generator(generateData(batch_size=batch_size), steps_per_epoch=len(decoder_input_data) // batch_size, epochs=3) # model.fit([encoder_input_data, decoder_input_data], decoder_target_one_hot, # batch_size=32, # epochs=10, # validation_split=0.2) network_config = { 'vocab_size': len(vocab), 'thought_vector_size': THOUGHT_VECTOR_SIZE, 'sequence_length': encoder_input_data.shape[1], 'weights': { 'encoder_embedding': encoder_embedding_layer.get_weights(), 'encoder_gru': encoder_gru_layer.get_weights(), 'decoder_embedding': decoder_embedding_layer.get_weights(), 'decoder_gru': decoder_gru_layer.get_weights(), 'decoder_dense': decoder_dense_layer.get_weights() } } with open('network_config.pickle', 'wb') as file: pickle.dump(network_config, file) print( 'saved network config to "{}". Vocab size: {}. Thought vector size: {}. Sequence length: {}.' .format('network_config.pickle', network_config['vocab_size'], network_config['thought_vector_size'], network_config['sequence_length']))
def __init__(self, num_feats, data, train=False, load_original=False, masking=True): if data == 'imdbcnn': num_words = 20002 maxlen = 400 embedding_dims = 50 hidden_dims = 250 weights_name = "original.h5" emb_name = 'embedding_1' batch_size = 40 self.num_classes = 2 num_epoch = 5 elif data == 'yahoolstm': num_words = 20001 maxlen = 400 embedding_dims = 300 hidden_dims = 250 weights_name = "original-0-7.hdf5" emb_name = 'embedding' self.num_classes = 10 batch_size = 1000 num_epoch = 1 Mean = Lambda(lambda x: K.sum(x, axis=1) / float(num_feats), output_shape=lambda x: [x[0], x[2]]) X_ph = Input(shape=(maxlen, ), dtype='int32') logits_T = construct_gumbel_selector(X_ph, num_words, embedding_dims, hidden_dims, maxlen, 1, network_type='cnn') tau = 0.5 sc_layer = Sample_Concrete(tau, num_feats, maxlen, masking) T = sc_layer(logits_T) if train: if not load_original: filters = 250 kernel_size = 3 print('transfer constucted') emb_layer = Embedding(num_words, embedding_dims, input_length=maxlen, trainable=False) emb2 = emb_layer(X_ph) selected_emb = Multiply()([emb2, T]) net = Dropout(0.2, trainable=False)(selected_emb) net = Conv1D(filters, kernel_size, padding='valid', activation='relu', strides=1, trainable=False)(net) net = Dense(hidden_dims, trainable=False)(net) net = GlobalMaxPooling1D()(net) net = Dense(hidden_dims, trainable=False)(net) net = Dropout(0.2, trainable=False)(net) net = Activation('relu', trainable=False)(net) net = Dense(self.num_classes, trainable=False)(net) preds = Activation('softmax', trainable=False)(net) model = Model(inputs=X_ph, outputs=preds) else: print('original constucted') emb_layer = Embedding(num_words, embedding_dims, input_length=maxlen, trainable=False) emb2 = emb_layer(X_ph) selected_emb = Multiply()([emb2, T]) preds = construct_original_network(selected_emb, data, trainable=False) model = Model(inputs=X_ph, outputs=preds) model.compile( loss=negative_xentropy, optimizer='RMSprop', #optimizer, metrics=['acc']) if load_original: print('Loading original models...') model.load_weights('{}/models/{}'.format(data, weights_name), by_name=True) else: model.load_weights('{}/models/transfer.hdf5'.format(data), by_name=True) if data == 'imdbcnn': emb_weights = emb_layer.get_weights() emb_weights[0][0] = np.zeros(50) emb_layer.set_weights(emb_weights) from load_data import Data dataset = Data(data, True) label_train = np.argmax(dataset.pred_train, axis=1) label_val = np.argmax(dataset.pred_val, axis=1) label_val = np.eye(self.num_classes)[label_val] label_train = np.argmax(dataset.pred_train, axis=1) filepath = "{}/models/L2X-{}-{}-mask.hdf5".format( data, num_feats, 'original' if load_original else 'transfer') checkpoint = ModelCheckpoint(filepath, monitor='val_acc', verbose=1, save_best_only=True, mode='min') callbacks_list = [checkpoint] model.fit(dataset.x_train, label_train, validation_data=(dataset.x_val, label_val), callbacks=callbacks_list, epochs=num_epoch, batch_size=batch_size) else: pred_model = Model(X_ph, logits_T) pred_model.compile(loss=negative_xentropy, optimizer='RMSprop', metrics=['acc']) weights_name = "{}/models/L2X-{}-{}-mask.hdf5".format( data, num_feats, 'original' if load_original else 'transfer') pred_model.load_weights(weights_name, by_name=True) self.pred_model = pred_model
def __init__(self, data, train = False): self.data = data if data in ['imdbcnn']: filters = 250 hidden_dims = 250 self.embedding_dims = 50 self.maxlen = 400 self.num_classes = 2 self.num_words = 20002 self.type = 'word' if not train: K.set_learning_phase(0) X_ph = Input(shape=(self.maxlen,), dtype='int32') emb_layer = Embedding(self.num_words, self.embedding_dims, input_length=self.maxlen, name = 'embedding_1') emb_out = emb_layer(X_ph) if train: preds = construct_original_network(emb_out, data) else: emb_ph = Input(shape=(self.maxlen,self.embedding_dims), dtype='float32') preds = construct_original_network(emb_ph, data) if not train: model1 = Model(X_ph, emb_out) model2 = Model(emb_ph, preds) pred_out = model2(model1(X_ph)) pred_model = Model(X_ph, pred_out) pred_model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy']) self.pred_model = pred_model grads = [] for c in range(self.num_classes): grads.append(tf.gradients(preds[:,c], emb_ph)) grads = tf.concat(grads, axis = 0) # [num_classes, batchsize, maxlen, embedding_dims] approxs = grads * tf.expand_dims(emb_ph, 0) # [num_classes, batchsize, maxlen, embedding_dims] self.sess = K.get_session() self.grads = grads self.approxs = approxs self.input_ph = X_ph self.emb_out = emb_out self.emb_ph = emb_ph weights_name = 'original.h5'#[i for i in os.listdir('imdblstm/models/') if i.startswith('original')][0] model1.load_weights('{}/models/{}'.format(data, weights_name), by_name=True) model2.load_weights('{}/models/{}'.format(data, weights_name), by_name=True) print('Model constructed.') # For validating the data. emb_weights = emb_layer.get_weights() emb_weights[0][0] = np.zeros(50) emb_layer.set_weights(emb_weights) else: pred_model = Model(X_ph, preds) pred_model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy']) self.pred_model = pred_model from load_data import Data dataset = Data(self.data) self.train(dataset) print('Training is done.')
model = Sequential() embedding = Embedding(vocab_size, embedding_size, input_length=max_len, weights=[embedding_matrix]) model.add(embedding) model.add(Flatten()) model.add(Dropout(0.2)) model.add(Dense(100, activation='sigmoid')) model.add(Dropout(0.2)) model.add(Dense(1, activation='sigmoid')) model.summary() model.compile(loss=loss, optimizer=optimizer, metrics=metrics) model.fit(data, labels, epochs=100, verbose=1, batch_size=32, shuffle=True, validation_data=(test_data, test_labels)) save_embedding('glove-embedding_labeled.txt', embedding.get_weights()[0], vocab) tsne_plot(embedding, vocab, figure_name='glove-embedding_labeled', max_words=200, pos=['ADJ', 'VERB', 'NOUN'])
class Doubler: def __init__(self, embedding_node_dim: int, embedding_doc_dim: int, bow_feature: dict, vocabulary: set): self._name = 'doubler_' self._batch_negative = None self._node_embedding = None self._relation_embedding_node = None self._relation_embedding_doc = None self._doc_embedding = None self._graph_properties = None self._embedding_node_dim = embedding_node_dim self._embedding_doc_dim = embedding_doc_dim self._bow_feature = { str(key): value for key, value in bow_feature.items() } self._vocabulary = vocabulary def build_model(self, model_params: namedtuple, graph_properties: namedtuple, input_layer_positive: keras.layers.Input, input_layer_negative: keras.layers.Input, input_layer_relation: keras.layers.Input): self._graph_properties = graph_properties # Input Documents input_layer_doc_positive = Input(shape=(len(self._vocabulary), ), name='input_document_positive') input_layer_doc_negative = Input(shape=( model_params.num_negative + 1, len(self._vocabulary), ), name='input_document_negative') # node, relation, and document embeddings self._node_embedding = Embedding(graph_properties.num_vertices, self._embedding_node_dim, name=self._name + 'node_embedding') self._doc_embedding = Dense(self._embedding_doc_dim, name=self._name + 'document_embedding') self._relation_embedding_node = Embedding( graph_properties.num_relations, self._embedding_node_dim, name=self._name + 'relation_embedding') if self._embedding_node_dim == self._embedding_doc_dim: self._relation_embedding_doc = self._relation_embedding_node else: self._relation_embedding_doc = Embedding( graph_properties.num_relations, self._embedding_doc_dim, name=self._name + 'relation_embedding_2') # connect node, relation, and doc input and corresponding embedding layer embedding_node_layer_positive = self._node_embedding( input_layer_positive) embedding_node_layer_negative = self._node_embedding( input_layer_negative) embedding_doc_layer_positive = self._doc_embedding( input_layer_doc_positive) embedding_doc_layer_negative = self._doc_embedding( input_layer_doc_negative) embedding_layer_relation_node = self._relation_embedding_node( input_layer_relation) # we have to create a separate relation embedding layer for the documents in case that the # target dimension of the node and document embeddings differs embedding_layer_relation_doc = embedding_layer_relation_node if self._embedding_node_dim == self._embedding_doc_dim \ else self._relation_embedding_doc(input_layer_relation) # compute the center of the negative node embeddings mean_layer_node = custom_layers.Mean(name=self._name + 'avg_node_layer_negative') avg_node_layer_negative = mean_layer_node( embedding_node_layer_negative) # compute the distance between the positive node and the avg. negative node embedding shape = embedding_node_layer_positive.get_shape().as_list() tmp = Reshape(((shape[1] * shape[2]), ))( embedding_node_layer_positive) # shape[1] is always 1 diff_layer_node = custom_layers.L2Diff(name=self._name + 'node_L2_diff') node_dis = diff_layer_node([avg_node_layer_negative, tmp]) # compute the center of the negative doc embeddings mean_layer_doc = custom_layers.Mean(name=self._name + 'avg_doc_layer_negative') avg_doc_layer_negative = mean_layer_doc(embedding_doc_layer_negative) # compute the distance between the positive doc and the avg. negative doc embedding diff_layer_doc = custom_layers.L2Diff(name=self._name + 'doc_L2_diff') doc_dis = diff_layer_doc( [avg_doc_layer_negative, embedding_doc_layer_positive]) # compute L2_Offset l2_offset_layer = custom_layers.L2Off(name=self._name + 'L2_offset')([node_dis, doc_dis]) # create node score layer embedding_layer_node_joint = Multiply( name=self._name + 'embedding_layer_node_joint')( [embedding_layer_relation_node, embedding_node_layer_positive]) output_layer_node_score = Dot(axes=2, name=self._name + 'node_score')( [embedding_layer_node_joint, embedding_node_layer_negative]) output_layer_node_score = Reshape( (model_params.num_negative + 1, ))(output_layer_node_score) # create doc score layer embedding_layer_doc_joint = Multiply(name=self._name + 'embedding_layer_doc_joint')([ embedding_layer_relation_doc, embedding_doc_layer_positive ]) output_layer_doc_score = Dot(axes=2, name=self._name + 'document_score')([ embedding_layer_doc_joint, embedding_doc_layer_negative ]) output_layer_doc_score = Reshape( (model_params.num_negative + 1, ))(output_layer_doc_score) # create final score/predicate layer output_layer_score = Add(name=self._name + 'score')( [output_layer_node_score, output_layer_doc_score]) return [input_layer_doc_positive, input_layer_doc_negative], output_layer_score, l2_offset_layer def predict_node(self, node_idx: int, relation_idx: int): vertex_emb_matrix = self._node_embedding.get_weights()[0] relation_emb_node_matrix = self._relation_embedding_node.get_weights( )[0] scores_emb_node = np.dot((vertex_emb_matrix[node_idx] * relation_emb_node_matrix[relation_idx]), vertex_emb_matrix.T) nodes_candidate = np.zeros( (self._graph_properties.num_vertices, len(self._vocabulary)), dtype=np.int8) for node_idx_candidate, node_candidate in enumerate( self._graph_properties.vertices): tail_bow = self._bow_feature[ node_candidate] if node_candidate in self._bow_feature else np.zeros( len(self._vocabulary)) nodes_candidate[node_idx_candidate, :] = tail_bow doc_emb_matrix = np.dot(nodes_candidate, self._doc_embedding.get_weights() [0]) + self._doc_embedding.get_weights()[1] relation_emb_doc_matrix = self._relation_embedding_doc.get_weights()[0] scores_emb_doc = np.dot( (doc_emb_matrix[node_idx] * relation_emb_doc_matrix[relation_idx]), doc_emb_matrix.T) return scores_emb_node, scores_emb_doc def predict_nodes(self, nodes_idx: list, relations_idx: list): scores_node = list() scores_doc = list() for idx, node_idx in enumerate(nodes_idx): score_node, score_doc = self.predict_node(node_idx, relations_idx[idx]) scores_node.append(score_node) scores_doc.append(score_doc) return np.array(scores_node), np.array(scores_doc) def init_batch_triples(self, model_params: namedtuple, batch_idx: int, num_batches: int, triples_batch: list): batch_size = ( 2 * len(triples_batch) ) if batch_idx == num_batches - 1 else 2 * model_params.batch_size self._batch_negative = np.zeros( (batch_size, model_params.num_negative + 1, len(self._vocabulary)), dtype=np.int8) def generate_training_data(self, nodes_train_idx: list, nodes_train_idx_candidate: list): batch_positive = np.zeros( (len(nodes_train_idx), len(self._vocabulary)), dtype=np.int8) for idx, node_train_idx in enumerate(nodes_train_idx): node_train = self._graph_properties.index_vertex[node_train_idx[0]] if node_train not in self._bow_feature: continue batch_positive[idx, ] = self._bow_feature[node_train] tmp_array = np.zeros( (self._batch_negative.shape[1], self._batch_negative.shape[2])) nodes_train_idx_neg = nodes_train_idx_candidate[idx] for idx2, node_train_idx_neg in enumerate(nodes_train_idx_neg): node_train_neg = self._graph_properties.index_vertex[ node_train_idx_neg] if node_train_neg not in self._bow_feature: continue node_train_doc_neg = self._bow_feature[node_train_neg] tmp_array[idx2, ] = node_train_doc_neg self._batch_negative[idx, ] = tmp_array return { 'input_document_positive': batch_positive, 'input_document_negative': self._batch_negative }
def __init__(self, data, num_feats, max_words, method, train=False, load_original=False, masking=False): self.k = num_feats self.maxlen = 400 self.max_words = max_words if data == 'imdbcnn': self.num_words = 20002 embedding_dims = 50 maxlen = 400 hidden_dims = 250 weights_name = "original.h5" emb_name = 'embedding_1' num_classes = 2 num_epoch = 5 elif data in ['yahoolstm']: self.num_words = 20001 embedding_dims = 300 maxlen = 400 hidden_dims = 250 weights_name = "original-0-7.hdf5" emb_name = 'embedding' num_classes = 10 num_epoch = 1 X_ph = Input(shape=(maxlen, ), dtype='int32') weights_extractor_ph = Input(shape=(max_words, ), dtype='int32') Selected_ph = Input(shape=(maxlen, ), dtype='float32') logits_T = construct_gumbel_selector(X_ph, self.num_words, embedding_dims, hidden_dims, maxlen, max_words, network_type='cnn') tau = 0.5 T = Sample_Concrete(tau)(logits_T) batch_size = 40 emb2_layer = Embedding(self.num_words, embedding_dims, input_length=maxlen, name=emb_name, trainable=False) embedding_weights = emb2_layer(weights_extractor_ph) X_emb = emb2_layer(X_ph) Xnew_emb = MakeChange()([X_emb, T, Selected_ph, embedding_weights]) preds = construct_original_network(Xnew_emb, data, trainable=False) if train: model = Model(inputs=[X_ph, Selected_ph, weights_extractor_ph], outputs=preds) model.compile(loss=negative_xentropy, optimizer='RMSprop', metrics=['acc']) if load_original: print('Loading original models...') model.load_weights('{}/models/{}'.format(data, weights_name), by_name=True) if data == 'imdbcnn': emb_weights = emb2_layer.get_weights() emb_weights[0][0] = np.zeros(50) emb2_layer.set_weights(emb_weights) dataset = Data(data, True) if method == 'L2X': scores_train = np.load( '{}/results/scores-train-{}-{}-original{}-mask{}.npy'. format(data, method, num_feats, load_original, masking)) scores_val = np.load( '{}/results/scores-val-{}-{}-original{}-mask{}.npy'.format( data, method, num_feats, load_original, masking)) label_train = np.argmax(dataset.pred_train, axis=1) label_train = np.eye(num_classes)[label_train] training_x = dataset.x_train filepath = "{}/models/gumbel-change-{}-{}-original{}-mask{}.hdf5".format( data, num_feats, max_words, load_original, masking) checkpoint = ModelCheckpoint(filepath, monitor='val_acc', verbose=1, save_best_only=True, mode='min') callbacks_list = [checkpoint] print("data loaded") selected_train_index = np.argsort( scores_train, axis=-1)[:, -self.k:] # indices of largest k score. selected_train = np.zeros(scores_train.shape) selected_train[ np.expand_dims(np.arange(len(scores_train)), axis=-1), selected_train_index] = 1.0 selected_val_index = np.argsort( scores_val, axis=-1)[:, -self.k:] # indices of largest k score. selected_val = np.zeros(scores_val.shape) selected_val[np.expand_dims(np.arange(len(scores_val)), axis=-1), selected_val_index] = 1.0 weights_extractor_value = np.tile( [list(range(0, max_words - 1)) + [self.num_words - 1]], [len(scores_train), 1]) weights_extractor_value_val = np.tile( [list(range(0, max_words - 1)) + [self.num_words - 1]], [len(scores_val), 1]) label_val = np.argmax(dataset.pred_val, axis=1) label_val = np.eye(num_classes)[label_val] model.fit([training_x, selected_train, weights_extractor_value], label_train, validation_data=([ dataset.x_val, selected_val, weights_extractor_value_val ], label_val), callbacks=callbacks_list, epochs=num_epoch, batch_size=batch_size) label_train = np.argmax(dataset.pred_train, axis=1) label_val = np.argmax(dataset.pred_val, axis=1) else: pred_model = Model([X_ph, Selected_ph, weights_extractor_ph], [T]) pred_model.compile(loss=negative_xentropy, optimizer='RMSprop', metrics=['acc']) weights_name = "{}/models/gumbel-change-{}-{}-original{}-mask{}.hdf5".format( data, num_feats, max_words, load_original, masking) pred_model.load_weights(weights_name, by_name=True) self.pred_model = pred_model
model) # activation='linear' (they are the same) crf = CRF() # CRF layer { SHOULD I SET -> number_labels+1 (+1 -> PAD) } out = crf(model) # output model = Model(inputs=inpt, outputs=out) # set optimizer # decay=learning_rate / epochs opt = SGD(learning_rate=0.0, momentum=0.9, clipvalue=5.0 ) # clipvalue (Gradient Clipping): clip the gradient to [-5 to 5] #opt = SGD(learning_rate=0.05, decay=0.01, momentum=0.9, clipvalue=5.0) # clipvalue (Gradient Clipping): clip the gradient to [-5 to 5] # compile Bi-LSTM-CRF model.compile(optimizer=opt, loss=crf.loss, metrics=[crf.accuracy]) # model.compile(optimizer=opt, loss=crf.loss, metrics=[crf.viterbi_accuracy]) print('BEFORE TRAINING', model.get_weights()) # ====================================================================================================================== # Data Generators # ====================================================================================================================== # ceil of the scalar x is THE SMALLER INTEGER i, such that i >= x test_steps = np.ceil( test_data_size / batch_size) # number of validation and testing batches (same size) print('test_steps', test_steps) # (number of batches) -1, because batches start from 0 # test_batch_generator = batch_generator(x_test_filename, '', batch_size, test_steps - 1) # testing batch generator test_generator = DataGenerator(x_test_filename, '',
for ix in range(n_commit_split): z_p = model.predict(x=[ anDataValid[idxValidSplit[ix]], anDataConstValid[ idxValidSplit[ix]] ]) mse = (z_p * z_p).mean() print(f"Valid set MSE = {mse:.4f}") """c""" """c""" """c""" z_emb = model_emb.predict(x=anDataValid[idxValidSplit[0]]) w = emb_obj.get_weights()[0] w[0] anDataValid anDataConstValid # Unit model encoder_inputs = Input(shape=(sentenceLength, ), name="Encoder_input") target_inputs = Input(shape=(sentenceLength, ), name="target_input") emb_obj = Embedding(emb.shape[0], emb.shape[1], weights=[emb], trainable=False) x = emb_obj(encoder_inputs) x = Flatten()(x)
output = validationModel.predict_on_batch( [internalArr1, internalArr2]) sim[i] = output return sim simCallback = SimCallback() for count in range(epochs): idx = np.random.randint(0, len(labels) - 1) wordTargetArray[0, ] = wordTarget[idx] wordContextArray[0, ] = wordContext[idx] labelsArray[0, ] = labels[idx] loss = model.train_on_batch([wordTargetArray, wordContextArray], labelsArray) if count % 100 == 0: #10 print("Iteration {}, loss={}".format(count, loss)) if count % 10000 == 0: #100 simCallback.runSim() zerosRow = np.array([0] * vectorDim) zerosRow.shape = (1, vectorDim) embeddingMatrix = embedding.get_weights()[0] embeddingMatrix = np.concatenate((zerosRow, embeddingMatrix), axis=0) np.savetxt(COMPUTE_DATA_PATH + 'embedding_matrix.txt', embeddingMatrix, fmt="%.5f") #np.savetxt('embeddingMatrix.txt', embeddingMatrix) np.save(COMPUTE_DATA_PATH + 'inverse_dictionary.npy', inverseDict) np.save(COMPUTE_DATA_PATH + 'dictionary.npy', dictionary)
def build(self): question, answer = self._get_inputs() # add embedding layers embedding = Embedding(self.config['n_words'], self.model_params.get('n_embed_dims', 141)) question_embedding = embedding(question) a_embedding = Embedding(self.config['n_words'], self.model_params.get('n_embed_dims', 141)) answer_embedding = embedding(answer) a_embedding.set_weights(embedding.get_weights()) # dropout dropout = Dropout(0.5) question_dropout = dropout(question_embedding) answer_dropout = dropout(answer_embedding) # rnn forward_lstm = LSTM(self.config.get('n_lstm_dims', 141), consume_less='mem', return_sequences=True) backward_lstm = LSTM(self.config.get('n_lstm_dims', 141), consume_less='mem', return_sequences=True) question_lstm = merge( [forward_lstm(question_dropout), backward_lstm(question_dropout)], mode='concat', concat_axis=-1) # dropout question_dropout = dropout(question_lstm) # maxpooling maxpool = Lambda(lambda x: K.max(x, axis=1, keepdims=False), output_shape=lambda x: (x[0], x[2])) question_pool = maxpool(question_dropout) # activation activation = Activation('tanh') question_output = activation(question_pool) question_model = Model(input=[question], output=[question_output]) # attentional rnn forward_lstm = AttentionLSTM(self.config.get('n_lstm_dims', 141), question_output, consume_less='mem', return_sequences=True) backward_lstm = AttentionLSTM(self.config.get('n_lstm_dims', 141), question_output, consume_less='mem', return_sequences=True) answer_lstm = merge( [forward_lstm(answer_dropout), backward_lstm(answer_dropout)], mode='concat', concat_axis=-1) # dropout answer_dropout = dropout(answer_lstm) # maxpooling maxpool = Lambda(lambda x: K.max(x, axis=1, keepdims=False), output_shape=lambda x: (x[0], x[2])) answer_pool = maxpool(answer_dropout) # activation activation = Activation('tanh') answer_output = activation(answer_pool) answer_model = Model(input=[question, answer], output=[answer_output]) return question_model, answer_model
def __init__(self, data, train = False): self.data = data print('Loading TextModel...') if data == 'imdbcnn': filters = 250 hidden_dims = 250 self.embedding_dims = 50 self.maxlen = 400 self.num_classes = 2 self.num_words = 20002 self.type = 'word' if not train: K.set_learning_phase(0) X_ph = Input(shape=(self.maxlen,), dtype='int32') emb_layer = Embedding( self.num_words, self.embedding_dims, input_length=self.maxlen, name = 'embedding_1' ) emb_out = emb_layer(X_ph) if train: preds = construct_original_network(emb_out, data) else: emb_ph = Input( shape=(self.maxlen, self.embedding_dims), dtype='float32' ) preds = construct_original_network(emb_ph, data) if not train: model1 = Model(X_ph, emb_out) model2 = Model(emb_ph, preds) pred_out = model2(model1(X_ph)) pred_model = Model(X_ph, pred_out) pred_model.compile( loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'] ) self.pred_model = pred_model grads = [] for c in range(self.num_classes): grads.append(tf.gradients(preds[:,c], emb_ph)) grads = tf.concat(grads, axis = 0) # [num_classes, batchsize, maxlen, embedding_dims] approxs = grads * tf.expand_dims(emb_ph, 0) # [num_classes, batchsize, maxlen, embedding_dims] self.sess = K.get_session() self.grads = grads self.approxs = approxs self.input_ph = X_ph self.emb_out = emb_out self.emb_ph = emb_ph weights_name = 'original.h5' model1.load_weights('{}/models/{}'.format(data, weights_name), by_name=True) model2.load_weights('{}/models/{}'.format(data, weights_name), by_name=True) self.pred_model.load_weights('{}/models/{}'.format(data, weights_name), by_name=True) print('Model constructed.', weights_name) # For validating the data. emb_weights = emb_layer.get_weights() emb_weights[0][0] = np.zeros(50) self.emb_weights = emb_weights[0] emb_layer.set_weights(emb_weights) else: pred_model = Model(X_ph, preds) pred_model.compile( loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy']) self.pred_model = pred_model from load_data import Data dataset = Data(self.data, train = True) self.train(dataset) print('Training is done.') elif data == 'agccnn': from agccnn.data_helpers import create_vocab_set, construct_batch_generator, find_words_positions filter_kernels = [7, 7, 3, 3, 3, 3] dense_outputs = 1024 self.charlen = 1014 self.maxlen = 1014 nb_filter = 256 self.num_classes = 4 self.vocab, self.reverse_vocab, self.vocab_size, self.vocab_check = create_vocab_set() self.embedding_dims = self.vocab_size self.type = 'char' K.set_learning_phase(1 if train else 0) #Define what the input shape looks like inputs = Input(shape=(self.charlen, self.vocab_size), name='input', dtype='float32') conv = Conv1D(filters = nb_filter, kernel_size= filter_kernels[0], padding = 'valid', activation = 'relu', input_shape=(self.charlen, self.vocab_size))(inputs) conv = MaxPooling1D(pool_size=3)(conv) conv1 = Conv1D(filters = nb_filter, kernel_size= filter_kernels[1], padding = 'valid', activation = 'relu')(conv) conv1 = MaxPooling1D(pool_size=3)(conv1) conv2 = Conv1D(filters = nb_filter, kernel_size= filter_kernels[2], padding = 'valid', activation = 'relu')(conv1) conv3 = Conv1D(filters = nb_filter, kernel_size= filter_kernels[3], padding = 'valid', activation = 'relu')(conv2) conv4 = Conv1D(filters = nb_filter, kernel_size= filter_kernels[4], padding = 'valid', activation = 'relu')(conv3) conv5 = Conv1D(filters = nb_filter, kernel_size= filter_kernels[5], padding = 'valid', activation = 'relu')(conv4) conv5 = MaxPooling1D(pool_size=3)(conv5) conv5 = Flatten()(conv5) #Two dense layers with dropout of .5 z = Dropout(0.5)(Dense(dense_outputs, activation='relu')(conv5)) z = Dropout(0.5)(Dense(dense_outputs, activation='relu')(z)) #Output dense layer with softmax activation pred = Dense(self.num_classes, activation='softmax', name='output')(z) grads = [] for c in range(self.num_classes): grads.append(tf.gradients(pred[:,c], inputs)) grads = tf.concat(grads, axis = 0) # [num_classes, batchsize, self.charlen, embedding_dims] approxs = grads * tf.expand_dims(inputs, 0) # [num_classes, batchsize, self.charlen, embedding_dims] model = Model(inputs, pred) model.compile( loss='categorical_crossentropy', optimizer="sgd", metrics=['accuracy'] ) model.load_weights( 'agccnn/params/crepe_model_weights-15.h5', by_name=True ) self.sess = K.get_session() self.grads = grads self.approxs = approxs self.input_ph = inputs self.model = model from nltk.tokenize.moses import MosesDetokenizer from nltk import word_tokenize detokenizer = MosesDetokenizer() self.tokenize = word_tokenize self.detokenize = detokenizer.detokenize self.construct_batch_generator = construct_batch_generator self.find_words_positions = lambda sent: find_words_positions( sent, word_tokenize(sent), self.charlen, self.vocab, self.vocab_size, self.vocab_check ) self.find_chars_positions = lambda sent: find_words_positions( sent, list(sent.lower().replace(' ', '')), self.charlen, self.vocab, self.vocab_size, self.vocab_check, True ) elif data == 'yahoolstm': self.maxlen = 400 self.num_classes = 10 self.num_words = 20000 self.batch_size = 40 self.embedding_dims = 300 if not train: K.set_learning_phase(0) X_ph = Input(shape=(self.maxlen,), dtype='int32') emb_layer = Embedding( input_dim=self.num_words + 1, output_dim= self.embedding_dims, input_length=self.maxlen, name = "embedding", trainable=True) emb = emb_layer(X_ph) if train: preds = construct_original_network(emb, data) else: emb_ph = Input(shape=(self.maxlen,self.embedding_dims), dtype='float32') preds = construct_original_network(emb_ph, data) if train: model = Model(X_ph, preds) model.compile( loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'] ) else: model1 = Model(X_ph, emb) model2 = Model(emb_ph, preds) pred_out = model2(model1(X_ph)) model = Model(X_ph, pred_out) model.compile( loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'] ) # Construct gradients. grads = [] for c in range(self.num_classes): grads.append(tf.gradients(preds[:,c], emb_ph)) grads = tf.concat(grads, axis = 0) # [num_classes, batchsize, maxlen, embedding_dims] approxs = grads * tf.expand_dims(emb_ph, 0) # [num_classes, batchsize, maxlen, embedding_dims] prev_epoch = 0; prev_itr = 7 model1.load_weights( 'yahoolstm/models/original-{}-{}.hdf5'.format(prev_epoch, prev_itr), by_name = True ) model2.load_weights( 'yahoolstm/models/original-{}-{}.hdf5'.format(prev_epoch, prev_itr), by_name = True ) emb_weights = emb_layer.get_weights() self.emb_weights = emb_weights self.emb_out = emb self.emb_ph = emb_ph self.sess = K.get_session() self.grads = grads self.approxs = approxs self.input_ph = X_ph self.pred_model = model self.type = 'word' if train: from load_data import Data print('Loading data...') dataset = Data(data, train = True) print('Training...') self.train(dataset)