def define_model(input_shape, emb_matrix, vocab_len, emb_dim, rnn_units, dropout=0.5): sentence_indices = Input(input_shape, dtype="int32") # Create the embedding layer pretrained with GloVe Vectors embedding_layer = Embedding(input_dim=vocab_len, trainable=False, output_dim=emb_dim) embedding_layer.build((None, )) embedding_layer.set_weights([emb_matrix]) # Propagate sentence_indices through your embedding layer embeddings = embedding_layer(sentence_indices) X = LSTM(units=rnn_units, return_sequences=False)(embeddings) # Add dropout with a probability X = Dropout(dropout)(X) # Propagate X through a Dense layer X = Dense(2)(X) # Add a softmax activation X = Activation("softmax")(X) # Create Model instance which converts sentence_indices into X. model = Model(inputs=sentence_indices, outputs=X) return model
class BiLstmTuner(HyperModel): def __init__(self, embedding, train_embedding, sen_len): vocab_dim, embed_dim = embedding.shape self.embedding_layer = Embedding(vocab_dim, embed_dim, trainable = train_embedding) self.embedding_layer.build((None,)) self.embedding_layer.set_weights([embedding]) self.sen_len = sen_len def build(self, hp): sentence_indices = Input(shape = (self.sen_len,), dtype = 'int32') dp_rate = hp.Float('dp', min_value = 0.5, max_value = 0.8) x = self.embedding_layer(sentence_indices) x = Dropout(dp_rate)(x) x = Bidirectional(LSTM(units = hp.Int('lstm1', min_value = 64, max_value = 128, step = 32), return_sequences = True, dropout = dp_rate ))(x) x = LayerNormalization(axis = 1)(x) x = Bidirectional(LSTM(units = hp.Int('lstm2', min_value = 64, max_value = 128, step = 32), dropout = dp_rate ))(x) x = LayerNormalization(axis = 1)(x) outputs = Dense(1)(x) model = tf.keras.Model(inputs = sentence_indices, outputs = outputs) model.compile( optimizer = tf.keras.optimizers.Adam(learning_rate = hp.Float('lr', 1e-6, 1e-3)), loss = tf.keras.losses.BinaryCrossentropy(from_logits = True), metrics = ['accuracy'], ) return model
def pretrained_embedding_layer(gensim_model, word_to_index: dict,trainable: bool = False, mask_zero:bool =True): """ Creates a Keras Embedding() layer and loads in pre-trained Embedding """ vocab_len = len(word_to_index) + 1 # adding 1 to fit Keras embedding (requirement) emb_dim = gensim_model.vector_size # define dimensionality of your word vectors # Initialize the embedding matrix as a numpy array of zeros. emb_matrix = np.zeros((vocab_len,emb_dim)) # Set each row "idx" of the embedding matrix to be # the word vector representation of the idx'th word of the vocabulary. #Basically we map the index to its corresponding vector for word, idx in word_to_index.items(): emb_matrix[idx, :] = gensim_model[word] # Define Keras embedding layer. We embedding_layer = Embedding(vocab_len,emb_dim,trainable = trainable, mask_zero=mask_zero) # Build the embedding layer, it is required before setting the weights of the embedding layer. embedding_layer.build((None,)) # Set the weights of the embedding layer to the embedding matrix. Your layer is now pretrained. embedding_layer.set_weights([emb_matrix]) return embedding_layer
def buildModel(embedding, train_embedding, sen_len, hidden_dim1, hidden_dim2, dp_rate, lr): vocab_dim, embed_dim = embedding.shape embedding_layer = Embedding(vocab_dim, embed_dim, trainable = train_embedding) embedding_layer.build((None,)) embedding_layer.set_weights([embedding]) sentence_indices = Input(shape = (sen_len,), dtype = 'int32') x = embedding_layer(sentence_indices) x = Dropout(dp_rate)(x) x = Bidirectional(LSTM(hidden_dim1, return_sequences = True, dropout = dp_rate))(x) x = LayerNormalization(axis = 1)(x) x = Bidirectional(LSTM(hidden_dim2, dropout = dp_rate))(x) x = LayerNormalization(axis = 1)(x) outputs = Dense(1)(x) model = tf.keras.Model(inputs = sentence_indices, outputs = outputs) model.compile( optimizer=tf.keras.optimizers.Adam(learning_rate = lr), loss=tf.keras.losses.BinaryCrossentropy(from_logits = True), metrics=['accuracy'], ) return model
def pretrained_embedding_layer(word_to_vec_map, word_to_index): """ 创建Keras Embedding()层,加载已经训练好了的50维GloVe向量 参数: word_to_vec_map -- 字典类型的单词与词嵌入的映射 word_to_index -- 字典类型的单词到词汇表(400,001个单词)的索引的映射。 返回: embedding_layer() -- 训练好了的Keras的实体层。 """ vocab_len = len(word_to_index) + 1 # 词汇表大小 emd_dim = word_to_vec_map['cucumber'].shape[0] # embedding dim # 初始化嵌入矩阵 emb_matrix = np.zeros(shape=(vocab_len, emd_dim)) # 将嵌入矩阵的每行的“index”设置为词汇“index”的词向量表示 for word, index in word_to_index.items(): emb_matrix[index, :] = word_to_vec_map[word] # 定义keras的embedding层 embedding_layer = Embedding(input_dim=vocab_len, output_dim=emd_dim, trainable=False) # 构建embedding层 embedding_layer.build(input_shape=(None, )) # 将嵌入层的权重设置为嵌入矩阵 embedding_layer.set_weights(weights=[emb_matrix]) return embedding_layer
def pretrained_embedding_layer(word_to_vec_map, word_to_index): """ Creates a Keras Embedding() layer and loads in pre-trained GloVe 50-dimensional vectors. Arguments: word_to_vec_map -- dictionary mapping words to their GloVe vector representation. word_to_index -- dictionary mapping from words to their indices in the vocabulary (400,001 words) Returns: embedding_layer -- pretrained layer Keras instance """ emb_dim = word_to_vec_map["cucumber"].shape[0] vocab_size = len( word_to_index) + 1 # adding 1 to fit Keras embedding (requirement) embedding_matrix = np.zeros((vocab_size, emb_dim)) for word, index in word_to_index.items(): embedding_matrix[index, :] = word_to_vec_map[word] embedding_layer = Embedding(input_dim=vocab_size, output_dim=emb_dim, trainable=False, weights=[embedding_matrix]) # Build the embedding layer, it is required before setting the weights of the embedding layer. Do not modify the "None". embedding_layer.build((None, )) # Set the weights of the embedding layer to the embedding matrix. Your layer is now pretrained. embedding_layer.set_weights([embedding_matrix]) return embedding_layer
class DETR(tf.keras.Model): """ This is the DETR module that performs object detection """ def __init__(self, backbone: tf.keras.Model, transformer: tf.keras.Model, num_classes: int, num_queries: int, aux_loss: bool = False, **kwargs): super(DETR, self).__init__(**kwargs) self.num_queries = num_queries self.transformer = transformer hidden_dim = transformer.d_model self.class_embed = Dense(num_classes + 1, name='class_embed') self.bbox_embed = MLP(hidden_dim, 4, 3, name='bbox_embed') self.query_embed = Embedding(num_queries, hidden_dim, name='query_embed') self.query_embed.build((num_queries, hidden_dim)) self.input_proj = Conv2D(hidden_dim, 1, name='input_proj') self.backbone = backbone self.aux_loss = aux_loss def call(self, samples: Dict): features, pos = self.backbone(samples) src, mask = features[-1][1]['img'], features[-1][1]['mask'] assert mask is not None hs = self.transformer(self.input_proj(src), mask, self.query_embed.weights[0], pos[-1][1])
def create_embedding_layer(self): """ Creates an embedding layer using the pre-trained word vectors. :return: an embedding layer using the pre-trained word vectors """ # Initialize the embedding matrix vocabulary_size = len( self.vocabulary) + 1 # add 1 to fit Keras embedding (requirement) embedding_size = self.get_size() embedding_matrix = np.zeros((vocabulary_size, embedding_size), dtype=FLOAT_TYPE) # Set every row of the embedding matrix to be the word vector of the ith vocabulary word for i, word in self.index_to_word.items(): embedding_matrix[i, :] = self.word_to_vector[word] # Create the embedding layer with the corresponding input and output sizes (non-trainable) embedding_layer = Embedding(vocabulary_size, embedding_size, trainable=False) # Build the embedding layer embedding_layer.build((None, )) # Set the weights of the embedding layer to the embedding matrix embedding_layer.set_weights([embedding_matrix]) return embedding_layer
def pretrained_embedding(embeddings): vocab_length = embeddings.shape[0] vec_dimension = embeddings.shape[1] embedding_layer = Embedding(input_dim=vocab_length, output_dim=vec_dimension,trainable=False) embedding_layer.build((None,)) embedding_layer.set_weights([embeddings]) return embedding_layer
def pretrained_embed_layer(word_to_vec_map, word_to_index): vocab_len = len(word_to_index) + 1 emb_dim = word_to_vec_map["cucumber"].shape[0] emb_matrix = np.zeros((vocab_len, emb_dim)) for word, idx in word_to_index.items(): emb_matrix[idx, :] = word_to_vec_map[word] embedding_layer = Embedding(input_dim=vocab_len, trainable=False, output_dim=emb_dim) embedding_layer.build((None, )) embedding_layer.set_weights([emb_matrix]) return embedding_layer
def pretrained_embedding(embeddings): vocab_num = embeddings.shape[0] embedding_dim = embeddings.shape[1] embedding_layer = Embedding(vocab_num, embedding_dim, trainable=False) embedding_layer.build((None, )) embedding_layer.set_weights([embeddings]) return embedding_layer
def tensorflow_embedding_layer(word_to_vec_map, word_to_index): vocab_len = len(word_to_index) + 1 emb_dim = word_to_vec_map['happy'].shape[0] # dimensions of embeddings emb_matrix = np.zeros((vocab_len, emb_dim)) # initialize with zeros # Set each row "index" of the embedding matrix to be the word vector representation of the "index"th word of the vocabulary for word, index in word_to_index.items(): emb_matrix[index, :] = word_to_vec_map[word] # Define the embedding layer with the correct output/input sizes embedding_layer = Embedding(vocab_len, emb_dim, trainable=False) # Build the embedding layer embedding_layer.build((None,)) # Set the weights of the embedding layer to the embedding matrix. Your layer is now pretrained. embedding_layer.set_weights([emb_matrix]) return embedding_layer
def _build_embed(self): """ Builds the embedding layers for the sequence model. Two embedding layers are built: one for the word emeddings and the second for the case embeddings. In case that an embedding matrix is associated with the tokenizer ie. in case of pre-trained embeddings, this is used to build the Embedding layer weights accordingly. If not, no weights will be set. """ word_emb = self.tok_x.emb_matrix case_emb = self.tok_c.emb_matrix trainable = self.params['train_emb'] mask_zero = self.params['mask_zero'] if word_emb is not None: word_vocab_size, word_embed_dim = word_emb.shape else: word_vocab_size, word_embed_dim = self.tok_x.vocab_size, self.params[ 'w_embed_dim'] if case_emb is not None: case_vocab_size, case_embed_dim = case_emb.shape else: case_vocab_size, case_embed_dim = self.tok_c.vocab_size, self.params[ 'cs_embed_dim'] word_embed_layer = Embedding(input_dim=word_vocab_size, output_dim=word_embed_dim, trainable=trainable, mask_zero=mask_zero) if word_emb is not None: word_embed_layer.build(None, ) word_embed_layer.set_weights([word_emb]) case_embed_layer = Embedding(input_dim=case_vocab_size, output_dim=case_embed_dim, trainable=trainable, mask_zero=mask_zero) if case_emb is not None: case_embed_layer.build(None, ) case_embed_layer.set_weights([case_emb]) return word_embed_layer, case_embed_layer
def pretrained_embedding_layer(word_to_vec_map, word_to_index, trainable=False): """ Creates a Keras Embedding() layer and loads in pre-trained GloVe 50-dimensional vectors. Arguments: word_to_vec_map -- dictionary mapping words to their GloVe vector representation. word_to_index -- dictionary mapping from words to their indices in the vocabulary (400,001 words) Returns: embedding_layer -- pretrained layer Keras instance """ vocab_len = len( word_to_index) + 1 # adding 1 to fit Keras embedding (requirement) emb_dim = word_to_vec_map["cucumber"].shape[ 0] # define dimensionality of your GloVe word vectors (= 50) ### START CODE HERE ### # Initialize the embedding matrix as a numpy array of zeros of shape (vocab_len, dimensions of word vectors = emb_dim) emb_matrix = np.zeros(shape=(vocab_len, emb_dim)) # Set each row "index" of the embedding matrix to be the word vector representation of the "index"th word of the vocabulary for word, index in word_to_index.items(): emb_matrix[index, :] = word_to_vec_map[word] # Define Keras embedding layer with the correct output/input sizes, make it non-trainable. Use Embedding(...). Make sure to set trainable=False. embedding_layer = Embedding(vocab_len, emb_dim, trainable=trainable) ### END CODE HERE ### # Build the embedding layer, it is required before setting the weights of the embedding layer. Do not modify the "None". embedding_layer.build((None, )) # Set the weights of the embedding layer to the embedding matrix. Your layer is now pretrained. embedding_layer.set_weights([emb_matrix]) return embedding_layer
def create_embedding_layer(word_to_index, glove_file, mark_start, mark_end, num_words): """ Create a Keras Embedding() layer and load in pre-trained GloVe 100-dimensional vectors @params: :word_to_index -- dictionary containing the each word mapped to its index :word_to_vec_map -- dictionary mapping words to their GloVe vector representation :num_words -- number of words in the vocabulary @return: :decoder_embedding -- pretrained layer Keras instance """ # Create word_vec map word_to_vec_map = create_word_vec_map(glove_file, mark_start, mark_end) vocabulary_length = num_words + 1 # adding 1 to fit Keras embedding (requirement) embedding_dimensions = word_to_vec_map['unk'].shape[ 0] # define dimensionality of GloVe word vectors (= 300) embedding_matrix = np.zeros( (vocabulary_length, embedding_dimensions)) # initialize with zeros for word, index in word_to_index.items(): try: embedding_matrix[index, :] = word_to_vec_map[word] except KeyError: embedding_matrix[index, :] = word_to_vec_map['unk'] # we don't want the embeddings to be updated, thus trainable parameter is set to False decoder_embedding = Embedding(input_dim=vocabulary_length, output_dim=embedding_dimensions, trainable=False, name='decoder_embedding') decoder_embedding.build((None, )) decoder_embedding.set_weights([embedding_matrix ]) # with this the layer is now pretrained return decoder_embedding
def make_embedding_layer(vocab_len, wordtoix, embedding_dim=100, glove=True): if glove == False: print('Just a zero matrix loaded') embedding_matrix = np.zeros( (vocab_len, embedding_dim)) # just a zero matrix else: print('Loading glove...') embeddings_index = {} f = open( '/content/drive/My Drive/Data Exploration Project/glove.6B.50d.txt', encoding="utf-8") for line in f: values = line.split() word = values[0] coefs = np.asarray(values[1:], dtype='float32') embeddings_index[word] = coefs f.close() print("GloVe ", embedding_dim, ' loded!') #Get 200 dimensions dense vector for each of the vocab_rocc embedding_matrix = np.zeros( (vocab_len, embedding_dim)) #To import as weights for Keras Embedding layer for word, i in wordtoix.items(): embedding_vector = embeddings_index.get(word) if embedding_vector is not None: #Words not found in the embedding index will be all zeros embedding_matrix[i] = embedding_vector embedding_layer = Embedding( vocab_len, embedding_dim, mask_zero=True, trainable=False) #We have a limited vocab so we #Do not train the embedding layer #We use 0 as padding so mask_zero as True embedding_layer.build((None, )) embedding_layer.set_weights([embedding_matrix]) return embedding_layer
"""Defining input layers * input_context for the encoder * input_target for the decoder """ input_context = Input(shape=(maxLen, ), dtype='int32', name='input_context') input_target = Input(shape=(maxLen, ), dtype='int32', name='input_target') """Training model, includees and embeding layer. Each LSTM layer has size 300, there are three LSTM layers. each layer is composed of an encoder and adecoder where the encoder is fed data and passes it to its corresponing decoder. Finally dense layer, and dropout are applied.""" embed_layer = Embedding(input_dim=vocab_size, output_dim=50, trainable=True, mask_zero=True) embed_layer.build((None, )) embed_layer.set_weights([embedding_matrix], ) input_ctx_embed = embed_layer(input_context) encoder_lstm, h1, c1 = LSTM(LAYER_SIZE, return_state=True, return_sequences=True)(input_ctx_embed) encoder_lstm2, h2, c2 = LSTM(LAYER_SIZE, return_state=True, return_sequences=True)(encoder_lstm) encoder_lstm2, h3, c3 = LSTM(LAYER_SIZE, return_state=True, return_sequences=True)(encoder_lstm2) encoder_states = [h1, c1, h3, c3] input_tar_embed = embed_layer(input_target) final1, context_h1, context_c1 = LSTM(LAYER_SIZE, return_state=True,
epochs = 1 learning_rate = 0.01 path = "saves/keras_LSTM.h5" load = True encoder_inputs = Input(shape=(max_qc_len, ), dtype='int32') emb_matrix = np.zeros((vocab_len, emb_dim)) for word, index in word_to_index.items(): if index != 0: emb_matrix[index, :] = word_to_vec_map[word] embedding_layer = Embedding(vocab_len, emb_dim, trainable=False, mask_zero=True) embedding_layer.build((None, )) embedding_layer.set_weights([emb_matrix]) encoder_embeddings = embedding_layer(encoder_inputs) encoder = LSTM(state_dim, return_state=True)(encoder_embeddings) encoder_outputs, state_h, state_c = encoder encoder_states = [state_h, state_c] decoder_inputs = Input(shape=(max_ans_len, )) decoder_embeddings = embedding_layer(decoder_inputs) decoder_lstm = LSTM(state_dim, return_sequences=True, return_state=True) decoder_outputs, _, _ = decoder_lstm(decoder_embeddings,
class NLPModel: """ The class representing the CNN model that learns the embedding of a specific personality trait. """ def __init__( self, train_inputs, train_outputs, weights=None, voc_dim=60000, features_number=200, window_size=5, filters_number=100, hidden_units=50, batch_size=32, sentence_length=None, train_zeros=False, ): """ The init method that creates the model. This model can learn an embedding from scratch or can tune a given embedding. Parameters ---------- train_inputs: numpy.array The numpy.array containing the encoded reviews of training set. train_outputs: numpy.array The numpy.array with len train_size, containing the reviews target. weights: list, default: None In the case of embedding tuning, this parameter represents the list, with shape (voc_dim, embedding_feature_number) representing the initial weights of the embedding to be tuned. weights[i] must be the representation in the original embedding of term with index=i. If weights is given, the model will tune the embedding. voc_dim: int, default: 60000 In the case of embedding's learning from scratch, this parameter represents the vocabolary size. features_number: int, default: 200 In the case of embedding's learning from scratch, this parameter represents the embedding's features number window_size: int, default: 5 The windows dimension of convolution. filters_number: int, default: 100 The number of convolution's filters. hidden_units: int, default: 50 The number of units in the hidden layer. batch_size: int, default: 32 The training's batch size. sentence_length: int, default: None The maximum length of a sentence. If none is set to the length of the longest sentence in training set + 20. train_zeros: bool, default: False True if you want to train the representation of padding tokens (tokens added to pad each review in such a way that all the reviews have the same lenght). Parameters ---------- self.model: tensorflow.keras.models.Sequential The model to be trained self.train_inputs: numpy.array The numpy.array containing the encoded reviews of training set. self.train_outputs: numpy.array The numpy.array with len train_size, containing the reviews target. self.embedding_layer: tensorflow.keras.layers.Embedding The model's embedding layer. self.conv_layer: tensorflow.keras.layers.Conv2D The model's convolutional layer. self.pool_layer: tensorflow.keras.layers.MaxPooling2D The model's max pool layer. self.hidden_layer: tensorflow.keras.layers.Dense The model's hidden layer before output layer. self.output_layer: tensorflow.keras.layers.Dense The model's output layer. """ self.voc_dim = voc_dim self.features_number = features_number self.window_size = window_size self.filters_number = filters_number self.hidden_units = hidden_units self.batch_size = batch_size self.train_zeros = train_zeros if sentence_length is not None: self.sentence_length = sentence_length else: self.sentence_length = self._maxLength(train_inputs) self.train_inputs = self._createInputs(train_inputs) assert train_outputs is not None self.train_outputs = self._createOutputs(train_outputs, train_outputs.shape[0]) if weights is not None: self.weights = np.asarray(weights) self.voc_dim = self.weights.shape[0] self.features_number = self.weights.shape[1] else: self.weights = np.random.randint(low=0, high=100, size=(self.voc_dim, self.features_number)) self.weights = self.weights / 100 self._initializeModel() def _initializeModel(self): self._createModel() self._compileModel() def _maxLength(self, inputs): max_l = 0 for d in inputs: if len(d) > max_l: max_l = len(d) return max_l + 20 def _createOutputs(self, x, number): x = np.asarray(x) return x.reshape(number, 1, 1, 1) def _createInputs(self, inp): return pad_sequences(inp, maxlen=self.sentence_length, padding="post", truncating="post", value=0) def _createModel(self): if self.train_zeros: self._createModel_train_zeros() else: self._createModel_no_train_zeros() def _createModel_train_zeros(self): self.model = Sequential() self.embedding_layer = Embedding(input_dim=self.voc_dim, output_dim=self.features_number, name="emb") self.embedding_layer.build((None, )) self.embedding_layer.set_weights([self.weights]) self.model.add(self.embedding_layer) self.model.add( Lambda(lambda t: t[..., None]) ) # modifica della shape in modo che sia 4d, come richiesto da conv2d self.conv_layer = Conv2D( filters=self.filters_number, kernel_size=(self.window_size, self.features_number), strides=1, padding="valid", name="conv", ) self.model.add(self.conv_layer) self.pool_layer = MaxPooling2D(pool_size=(self.sentence_length - self.window_size + 1, 1), name="pool") self.model.add(self.pool_layer) self.hidden_layer = Dense( self.hidden_units, input_dim=self.filters_number, activation=tf.nn.relu, name="dense", ) self.model.add(self.hidden_layer) self.output_layer = Dense(1, activation="linear", name="output") self.model.add(self.output_layer) def _createModel_no_train_zeros(self): self.input_layer = Input(shape=(self.sentence_length, ), name="input") self.embedding_layer = Embedding(input_dim=self.voc_dim, output_dim=self.features_number, name="emb") self.embedding_layer.build((None, )) self.embedding_layer.set_weights([self.weights]) self.layers_inputs = (self.embedding_layer)(self.input_layer) self.lambda_not_equal = Lambda(self._not_equal, name="lambda_not_equal") self.layers = (self.lambda_not_equal)(self.layers_inputs) self.lambda_layer = Lambda( lambda t: t[..., None], name="lambda_shape" ) # modifica della shape in modo che sia 4d, come richiesto da conv2d self.layers = (self.lambda_layer)(self.layers) self.conv_layer = Conv2D( filters=self.filters_number, kernel_size=(self.window_size, self.features_number), strides=1, padding="valid", name="conv", ) self.layers = (self.conv_layer)(self.layers) self.pool_layer = MaxPooling2D(pool_size=(self.sentence_length - self.window_size + 1, 1), name="pool") self.layers = (self.pool_layer)(self.layers) self.hidden_layer = Dense( self.hidden_units, input_dim=self.filters_number, activation=tf.nn.relu, name="dense", ) self.layers = (self.hidden_layer)(self.layers) self.output_layer = Dense(1, activation="linear", name="output") self.layers = (self.output_layer)(self.layers) self.model = tf.keras.models.Model(inputs=self.input_layer, outputs=self.layers) def _not_equal(self, x): zeros = tf.constant(0, shape=(self.features_number, ), dtype=np.float32) not_equal = tf.dtypes.cast(M.not_equal(x, zeros), dtype=np.float32) return x * not_equal def _compileModel(self): self.model.compile(optimizer="adagrad", loss="mse", metrics=["mse"]) for layer in self.model.layers: print(layer.name, end=" ") print(layer.output_shape) def _fit_predict_train_zeros(self, x, y, root=None, epochs_number=10): x = self._createInputs(x) y_mse = y y = self._createOutputs(y, x.shape[0]) self.predictions = [] self.mse = [] self.weights = [] for i in range(0, epochs_number): print("\n________\nEPOCH ", i + 1, "/", epochs_number) self.model.fit( x=self.train_inputs, y=self.train_outputs, epochs=1, batch_size=self.batch_size, ) self.weights.append(self.embedding_layer.get_weights()) pred = self.model.predict(x) pred = pred.reshape(pred.shape[0]) self.predictions.append(pred) mse = sklearn.metrics.mean_squared_error(y_mse, pred) self.mse.append(mse) print("\nTEST RESULTS:\nMSE\n", mse) if root is not None: with open(os.path.join(root, "mse.pickle"), "wb") as f: pickle.dump(self.mse, f) with open(os.path.join(root, "weights.pickle"), "wb") as f: pickle.dump(self.weights, f) def _fit_predict_no_train_zeros(self, x, y, root=None, epochs_number=10): x = self._createInputs(x) y_mse = y # TODO capire se serve questo y o posso cancellarlo y = self._createOutputs(y, x.shape[0]) self.predictions = [] self.mse = [] self.weights = [] for i in range(0, epochs_number): print("\n________\nEPOCH ", i + 1, "/", epochs_number) self.model.fit( x=self.train_inputs, y=self.train_outputs, epochs=1, batch_size=self.batch_size, ) self.weights.append(self.embedding_layer.get_weights()) pred = self.model.predict(x) pred = pred.reshape(pred.shape[0]) self.predictions.append(pred) mse = sklearn.metrics.mean_squared_error(y_mse, pred) self.mse.append(mse) print("\nTEST RESULTS:\nMSE\n", mse) if root is not None: with open(os.path.join(root, "mse.pickle"), "wb") as f: pickle.dump(self.mse, f) with open(os.path.join(root, "weights.pickle"), "wb") as f: pickle.dump(self.weights, f) def fit_predict(self, test_inputs, test_outputs, root_path=None, epochs_number=10): """ Fit the model on the training set and, at the end of each epoch, evaluate R2 and MSE metrics on test set. Store performances and model's weights in the specific path. Parameters ---------- test_inputs: numpy.array the numpy.array containing the encoded reviews of test set. test_outputs: numpy.array the numpy.array with len test_size, containing the reviews target. root_path: path, default: None the path in which store weights and metrics epochs_number: int, default: 10 train epochs' number. Parameters ------- self.predictions: list The list containing model's predictions on test set after each epochs. self.r2: list The list containing model's predictions' estimated R2 on test set after each training epochs. self.mse: list The list containing model's predictions' estimated MSE on test set after each training epochs. self.weights: list The list containing model's weights after each training epochs. """ if root_path is not None: create_dir(root_path) if self.train_zeros: self._fit_predict_train_zeros(test_inputs, test_outputs, root_path, epochs_number) else: self._fit_predict_no_train_zeros(test_inputs, test_outputs, root_path, epochs_number)
for word, i in word_index.items(): embedding_vector = embeddings_index.get(word) if embedding_vector is not None: # words not found in embedding index will be all-zeros. embedding_matrix[i] = embedding_vector return embedding_matrix embedding_matrix = embedding_matrix_creater(50, word_index=wordtoix) embedding_matrix.shape embed = Embedding(vocab_len, 50, input_length=13, trainable=True) embed.build((None,)) embed.set_weights([embedding_matrix]) batch_size = 32 class Encoder(tf.keras.Model): def __init__(self, vocab_size, embedding_dim, enc_units): super(Encoder, self).__init__() self.batch_sz = batch_size self.enc_units = enc_units self.embeddings = embed self.Bidirectional = Bidirectional(GRU(enc_units, return_sequences=True, return_state=True, recurrent_initializer='glorot_uniform', name='gru_2'), name='bidirectional_encoder2') self.dropout = Dropout(0.2)
class BatchTreeEncoder(tf.Module): def __init__(self, vocab_size, embedding_dim, encode_dim, batch_size, pretrained_weight=None): super(BatchTreeEncoder, self).__init__() self.embedding = Embedding(vocab_size, embedding_dim) self.embedding_dim = embedding_dim self.encode_dim = encode_dim self.W_c = Dense(encode_dim, input_shape=(embedding_dim, )) self.activation = tf.keras.activations.relu self.batch_size = batch_size self.node_list = [] self.batch_node = [] # pretrained embedding if pretrained_weight is not None: self.embedding.build((vocab_size, embedding_dim)) self.embedding.set_weights([pretrained_weight]) def traverse_mul(self, node, batch_index): """ Computes the output (embedding) of top nodes ad recursively adds those of each children to their respective parent nodes. :param node: the forest input (arrays of trees); collection of statement trees :param batch_index: number of statement trees in the forest :return: the sum of embedding of the top nodes in every trees of 'node' with their respective descendents """ size = len(node) if not size: return None # create an output place holder for the batch input batch_current = tf.zeros([size, self.embedding_dim], tf.float32) index, children_index = [], [] current_node, children = [], [] for i in range(size): index.append(i) current_node.append(node[i][0]) # get the children of the current node temp = node[i][1:] # get the number of the children c_num = len(temp) # iterate over the children for j in range(c_num): # check if the children actually has a valid token index (different from -1) if temp[j][0] != -1: # the piece extract the descendents of each tip of the statement trees (current_node) in 'node': # 'children_index' carries the information on the number of children in each node # if we node i is append to arrays it index 0, 1 of the children_array, then # node has a child in the left node 0 and right node 1 // # 'children' carries the actual children. It stores children of each node in arrays # determining the position of each children with respect to the node # array at index 0 of 'children' stores outermost left children # array at last index of 'children' stores outermost right children if len(children_index) <= j: children_index.append([i]) children.append([temp[j]]) else: children_index[j].append(i) children[j].append(temp[j]) index = tf.expand_dims(index, axis=-1) # get the encoded version of each current_node obtained in the previous loop. # Eg. if we obtained as current_node [0, 5, 3] and our embeddings encode index to 2-D vectors # then, output of this line of code would be, # [[0.02, -0.4862], [0.9666, 0.522], [-0.553, 422]] batch_current = self.W_c( tf.tensor_scatter_nd_update( batch_current, index, self.embedding(tf.Variable(current_node)))) for c in range(len(children)): zeros = tf.zeros([size, self.encode_dim], tf.float32) batch_children_index = [batch_index[i] for i in children_index[c]] # make a recursive call for each children so as to get the output of shape (1 x self.encode_dim) tree = self.traverse_mul(children[c], batch_children_index) if tree is not None: # adds the input of the children to the output of the parent. children_index_instance = tf.expand_dims(children_index[c], axis=-1) batch_current += tf.tensor_scatter_nd_update( zeros, tf.Variable(children_index_instance, tf.float32), tree) b_in = tf.Variable(batch_index) b_in = tf.expand_dims(b_in, axis=-1) self.node_list.append( tf.tensor_scatter_nd_update(self.batch_node, b_in, batch_current)) return batch_current def __call__(self, inputs, bs): self.batch_size = bs self.node_list = [] self.batch_node = tf.zeros((self.batch_size, self.encode_dim), tf.float32) self.traverse_mul(inputs, list(range(self.batch_size))) self.node_list = tf.stack(self.node_list) # Imagine that in this call. we have one 2 nodes i.e current_node = [33, 40] # each node has one children i.e children_index = [[0, 1]], children = [[56], [7]] # then self.node_list will be the result of computation at each depth of the tree. # since we have 2 nodes with one children each, then we have a forest of 2 trees with depth 2 each # therefore our self.node_list will be of shape, # ((depth of forest) x (number of trees => batch_size) x (encode_dim)) # the line below therefore get the output at the top of every trees. Why? # Because the method 'traverse_mul' computes the output (number of trees => batch_size) x (encode_dim) as every # depth and append it to 'self.node_list'. While it does that, it also adds the current output to the node whose # depth are +1 in the tree (adds to parent) Therefore, logically, the parent nodes will have the highest value # since they contain sum the output of every descendents as well as theirs return tf.reduce_max(self.node_list, axis=0)
class LWAN(Model): """ Label-Wise Attention Model Based on """ def __init__(self, n_classes, emb_weights): """ Set up configuration & set label term ids Parameters ---------- n_classes: int number of output classes (also number of attention heads) emb_weights: numpy ndarray, shape = [n_words, embedding_dim] """ super(LWAN, self).__init__(name='LWAN') self.n_classes = n_classes # Embedding parameters self.emb_weights = tf.convert_to_tensor(emb_weights, dtype=tf.float32) self.input_dim = len(emb_weights) self.emb_dim = emb_weights.shape[1] self.emb_layer = Embedding(self.input_dim, self.emb_dim, trainable=False) self.emb_layer.build((None, self.input_dim)) self.emb_layer.set_weights([self.emb_weights]) # CNN parameters self.conv1d = Conv1D(filters=Config['cnns']['hidden_units_size'], kernel_size=3, strides=1, padding="same") self.activation = Activation('tanh') self.dropout = SpatialDropout1D(Config['cnns']['spatial_dropout']) # BiGRU parameters self.mask = MaskSeq() self.bigru = Bidirectional( GRU(units=Config['grus']['hidden_units_size'], return_sequences=True, activation="tanh", recurrent_dropout=0, unroll=False, use_bias=True, reset_after=True, recurrent_activation='sigmoid')) # Label-Wise Attention Layer self.lwan = LabelWiseAttention(n_classes=self.n_classes, name='labelwise_attention', return_attention=False) def call(self, inputs): """ Call Labelwise Attention Model Parameters ---------- inputs: numpy array, shape = [n_samples, max_words] array of documents, row = integer word ids padded to max words/doc Returns ------- x_outer: output from LabelWiseAttention layer, shape = [n_samples, n_labels] """ # Input Embedding x = self.emb_layer(inputs) # Document Encoding x_inner = self.dropout(x) if Config['model_encoder'] == 'grus': for i in range(Config['grus']['n_hidden_layers']): x_bigru = self.bigru(x_inner) x_bigru = self.mask([x_bigru, x]) if i == 0: x_inner = self.dropout(x_bigru) else: x_inner = add([x_bigru, x_inner]) x_inner = self.dropout(x_inner) encoding_output = self.mask([x_inner, x]) elif Config['model_encoder'] == 'cnns': convs = self.conv1d(x_inner) convs = self.activation(convs) convs = self.dropout(convs) encoding_output = self.mask([convs, x]) # Labelwise Attention return self.lwan(encoding_output)