def add_embedding(self): if self.config.pre_trained: embed_dic = helper.readEmbedding(self.config.embed_path+str(self.config.embed_size)) #embedding.50 for 50 dim embedding embed_matrix = helper.mkEmbedMatrix(embed_dic, self.vocab.word_to_index) self.embedding = tf.Variable(embed_matrix, name='Embedding') else: self.embedding = tf.get_variable('Embedding', [len(self.vocab), self.config.embed_size], trainable=True)
def add_embedding(self): """Add embedding layer. that maps from vocabulary to vectors. Returns: inputs: shape(b_sz, tstp, emb_sz), fetched input """ if self.config.pre_trained: embed_dic = helper.readEmbedding(self.config.embed_path+str(self.config.embed_size)) #embedding.50 for 50 dim embedding embed_matrix = helper.mkEmbedMatrix(embed_dic, self.vocab.word_to_index) self.embedding = tf.Variable(embed_matrix, 'Embedding') else: self.embedding = tf.get_variable( 'Embedding', [len(self.vocab), self.config.embed_size], trainable=True) inputs = tf.nn.embedding_lookup(self.embedding, self.ph_input) # shape(b_sz, tstp, emb_sz) return inputs
def add_embedding(self): """Add embedding layer. that maps from vocabulary to vectors. inputs: a list of tensors each of which have a size of [batch_size, embed_size] """ if self.config.pre_trained: embed = helper.readEmbedding(self.config.embed_path + str(self.config.embed_size)) embed_matrix, valid_mask = helper.mkEmbedMatrix( embed, self.vocab.word_to_index) embedding = tf.Variable(embed_matrix, 'Embedding') embedding = entry_stop_gradients(embedding, tf.expand_dims(valid_mask, 1)) else: embedding = tf.get_variable( 'Embedding', [len(self.vocab), self.config.embed_size], trainable=True) return embedding
def add_embedding(self): """Add embedding layer. that maps from vocabulary to vectors. Returns: inputs: shape(b_sz, tstp, emb_sz), fetched input """ if self.config.pre_trained: embed_dic = helper.readEmbedding(self.config.embed_path + str( self.config.embed_size)) #embedding.50 for 50 dim embedding embed_matrix = helper.mkEmbedMatrix(embed_dic, self.vocab.word_to_index) self.embedding = tf.Variable(embed_matrix, 'Embedding') else: self.embedding = tf.get_variable( 'Embedding', [len(self.vocab), self.config.embed_size], trainable=True) inputs = tf.nn.embedding_lookup( self.embedding, self.ph_input) # shape(b_sz, tstp, emb_sz) return inputs
def load_data(self, data_path): self.vocab = helper.Vocab() tag2id, id2tag = helper.load_tag(data_path + 'class.txt') self.id2tag = id2tag val_data = helper.load_data(filePath=data_path + file_names['val_data']) test_data = helper.load_data(filePath=data_path + file_names['test_data']) train_data = helper.load_data(filePath=data_path + file_names['train_data']) self.val_data_y, val_data = helper.mkDataSet(val_data, tag2id) self.test_data_y, test_data = helper.mkDataSet(test_data, tag2id) self.train_data_y, train_data = helper.mkDataSet(train_data, tag2id) if os.path.exists(data_path + 'vocab.txt'): self.vocab.load_vocab_from_file(data_path + 'vocab.txt') else: words = helper.flatten([val_data, test_data, train_data]) self.vocab.construct(words) self.vocab.limit_vocab_length(self.config.vocab_size) self.vocab.save_vocab(data_path + '.vocab.txt') self.val_data_len, self.val_data_x = helper.encodeNpad( val_data, self.vocab, self.config.num_steps) self.test_data_len, self.test_data_x = helper.encodeNpad( test_data, self.vocab, self.config.num_steps) self.train_data_len, self.train_data_x = helper.encodeNpad( train_data, self.vocab, self.config.num_steps) if self.config.pre_trained: embed = helper.readEmbedding(data_path + 'embed/H' + str(self.config.embed_size) + '.utf8') self.embed_matrix = helper.mkEmbedMatrix(embed, self.vocab.word_to_index) else: pass