Example #1
0
 def add_embedding(self):
     if self.config.pre_trained:
         embed_dic = helper.readEmbedding(self.config.embed_path+str(self.config.embed_size))  #embedding.50 for 50 dim embedding
         embed_matrix = helper.mkEmbedMatrix(embed_dic, self.vocab.word_to_index)
         self.embedding = tf.Variable(embed_matrix, name='Embedding')
     else:
         self.embedding = tf.get_variable('Embedding', [len(self.vocab), self.config.embed_size], trainable=True)
    def add_embedding(self):
        """Add embedding layer. that maps from vocabulary to vectors.

        Returns:
            inputs: shape(b_sz, tstp, emb_sz), fetched input
        """
        if self.config.pre_trained:
            embed_dic = helper.readEmbedding(self.config.embed_path+str(self.config.embed_size))  #embedding.50 for 50 dim embedding
            embed_matrix = helper.mkEmbedMatrix(embed_dic, self.vocab.word_to_index)
            self.embedding = tf.Variable(embed_matrix, 'Embedding')
        else:
            self.embedding = tf.get_variable(
              'Embedding',
              [len(self.vocab), self.config.embed_size], trainable=True)
        inputs = tf.nn.embedding_lookup(self.embedding, self.ph_input)  # shape(b_sz, tstp, emb_sz)
        return inputs
Example #3
0
    def add_embedding(self):
        """Add embedding layer. that maps from vocabulary to vectors.
        inputs: a list of tensors each of which have a size of [batch_size, embed_size]
        """

        if self.config.pre_trained:
            embed = helper.readEmbedding(self.config.embed_path +
                                         str(self.config.embed_size))
            embed_matrix, valid_mask = helper.mkEmbedMatrix(
                embed, self.vocab.word_to_index)
            embedding = tf.Variable(embed_matrix, 'Embedding')
            embedding = entry_stop_gradients(embedding,
                                             tf.expand_dims(valid_mask, 1))
        else:
            embedding = tf.get_variable(
                'Embedding', [len(self.vocab), self.config.embed_size],
                trainable=True)
        return embedding
Example #4
0
 def add_embedding(self):
     """Add embedding layer. that maps from vocabulary to vectors.
     Returns:
         inputs: shape(b_sz, tstp, emb_sz), fetched input
     """
     if self.config.pre_trained:
         embed_dic = helper.readEmbedding(self.config.embed_path + str(
             self.config.embed_size))  #embedding.50 for 50 dim embedding
         embed_matrix = helper.mkEmbedMatrix(embed_dic,
                                             self.vocab.word_to_index)
         self.embedding = tf.Variable(embed_matrix, 'Embedding')
     else:
         self.embedding = tf.get_variable(
             'Embedding', [len(self.vocab), self.config.embed_size],
             trainable=True)
     inputs = tf.nn.embedding_lookup(
         self.embedding, self.ph_input)  # shape(b_sz, tstp, emb_sz)
     return inputs
Example #5
0
    def load_data(self, data_path):
        self.vocab = helper.Vocab()
        tag2id, id2tag = helper.load_tag(data_path + 'class.txt')
        self.id2tag = id2tag

        val_data = helper.load_data(filePath=data_path +
                                    file_names['val_data'])
        test_data = helper.load_data(filePath=data_path +
                                     file_names['test_data'])
        train_data = helper.load_data(filePath=data_path +
                                      file_names['train_data'])

        self.val_data_y, val_data = helper.mkDataSet(val_data, tag2id)
        self.test_data_y, test_data = helper.mkDataSet(test_data, tag2id)
        self.train_data_y, train_data = helper.mkDataSet(train_data, tag2id)

        if os.path.exists(data_path + 'vocab.txt'):
            self.vocab.load_vocab_from_file(data_path + 'vocab.txt')
        else:
            words = helper.flatten([val_data, test_data, train_data])
            self.vocab.construct(words)
            self.vocab.limit_vocab_length(self.config.vocab_size)
            self.vocab.save_vocab(data_path + '.vocab.txt')

        self.val_data_len, self.val_data_x = helper.encodeNpad(
            val_data, self.vocab, self.config.num_steps)
        self.test_data_len, self.test_data_x = helper.encodeNpad(
            test_data, self.vocab, self.config.num_steps)
        self.train_data_len, self.train_data_x = helper.encodeNpad(
            train_data, self.vocab, self.config.num_steps)
        if self.config.pre_trained:
            embed = helper.readEmbedding(data_path + 'embed/H' +
                                         str(self.config.embed_size) + '.utf8')
            self.embed_matrix = helper.mkEmbedMatrix(embed,
                                                     self.vocab.word_to_index)
        else:
            pass