Example #1
0
    def __init__(self, word2idx, target2idx, embedding_dim, batch_size, n_hidden, learning_rate, n_class, max_sentence_len, l2_reg, word_embedding, target_embedding, dim_z, pri_prob_y, decoder_type, grad_clip, n_hidden_ae, position_enc, bidirection_enc, position_dec, bidirection_dec, classifier_type):
        super(SemiTABSA, self).__init__()

        self.embedding_dim = embedding_dim
        self.batch_size = batch_size
        self.n_hidden = n_hidden
        self.learning_rate = learning_rate
        self.n_class = n_class
        self.max_sentence_len = max_sentence_len
        self.l2_reg = l2_reg
        self.word2idx = word2idx
        self.target2idx = target2idx
        self.dim_z = dim_z
        self.decoder_type = decoder_type
        self.classifier_type = classifier_type
        self.grad_clip = grad_clip
        self.n_hidden_ae = n_hidden_ae
        self.pri_prob_y = tf.Variable(pri_prob_y, trainable=False)
        self.position_enc = position_enc
        self.bidirection_enc = bidirection_enc
        self.position_dec = position_dec
        self.bidirection_dec = bidirection_dec
        if word_embedding is None:
            logger.info('No embedding is given, initialized randomly')
            wemb_init = np.random.randn([len(word2idx), embedding_dim]) * 1e-2
            self.word_embedding = tf.get_variable('word_embedding', [len(word2idx), embedding_dim], initializer=tf.constant_initializer(wemb_init))
        elif isinstance(word_embedding, np.ndarray):
            logger.info('Numerical embedding is given with shape {}'.format(str(word_embedding.shape)))
            self.word_embedding = tf.constant(word_embedding, name='embedding')
            #self.word_embedding = tf.get_variable('word_embedding', [len(word2idx), embedding_dim], initializer=tf.constant_initializer(word_embedding))
        elif isinstance(word_embedding, tf.Tensor):
            logger.info('Import tensor as the embedding: '.format(word_embedding.name))
            self.word_embedding = tf.constant(word_embedding)
        else:
            raise Exception('Embedding type {} is not supported'.format(type(word_embedding)))

        if target_embedding is None:
            logger.info('No embedding is given, initialized randomly')
            wemb_init = np.random.randn([len(target2idx), embedding_dim]) * 1e-2
            self.target_embedding = tf.get_variable('target_embedding', [len(target2idx), embedding_dim], initializer=tf.constant_initializer(wemb_init))
        elif isinstance(target_embedding, np.ndarray):
            logger.info('Numerical embedding is given with shape {}'.format(str(target_embedding.shape)))
            self.target_embedding = tf.constant(target_embedding, name='embedding')
#            self.target_embedding = tf.get_variable('target_embedding', [len(target2idx), embedding_dim], initializer=tf.constant_initializer(target_embedding))
        elif isinstance(target_embedding, tf.Tensor):
            logger.info('Import tensor as the embedding: '.format(target_embedding.name))
            self.target_embedding = target_embedding
        else:
            raise Exception('Embedding type {} is not supported'.format(type(embedding)))

        #TODO: Take the network graph building codes to a new module. 
        #self.classifier = self.create_classifier(self.classifier_type)
        with tf.variable_scope('classifier'):
            if self.classifier_type == "TC":
                self.classifier = TCClassifier(word2idx=word2idx, 
                        embedding_dim=embedding_dim, 
                        n_hidden=n_hidden, 
                        learning_rate=learning_rate, 
                        n_class=n_class, 
                        max_sentence_len=max_sentence_len, 
                        l2_reg=l2_reg, 
                        embedding=self.word_embedding,
                        grad_clip=self.grad_clip,
                        )
            elif self.classifier_type == "TD":
                pass
            elif self.classifier_type == "MEM":
            #TODO: Add hyper-params Config.py
                 word_embedding = np.vstack((word_embedding, np.zeros([1, self.embedding_dim])))

                 self.classifier = MEMClassifier(nwords=len(word2idx)+1,
                                     word2idx = word2idx,
                                     target2idx = target2idx,
                                     init_hid = 0.1,
                                     init_std = 0.01,
                                     init_lr=0.01,
                                     batch_size=self.batch_size,
                                     nhop=3,
                                     edim=self.embedding_dim,
                                     mem_size=79,
                                     lindim=300,
                                     max_grad_norm=100,
                                     pad_idx=len(word2idx),
                                     pre_trained_context_wt=word_embedding,
                                     pre_trained_target_wt=target_embedding)

            elif self.classifier_type == "IAN":
                 self.classifier = IANClassifier(word2idx=word2idx, 
                     embedding_dim=self.embedding_dim, 
                     n_hidden=self.n_hidden, 
                     learning_rate=self.learning_rate, 
                     n_class=self.n_class, 
                     max_sentence_len=self.max_sentence_len, 
                     l2_reg=self.l2_reg, 
                     embedding=word_embedding,
                     grad_clip=self.grad_clip)
                 

        with tf.variable_scope('encoder'):
            self.encoder = TCEncoder(word2idx=word2idx, 
                    embedding_dim=embedding_dim, 
                    n_hidden=n_hidden_ae, 
                    learning_rate=learning_rate, 
                    n_class=n_class, 
                    max_sentence_len=max_sentence_len, 
                    l2_reg=l2_reg, 
                    embedding=self.word_embedding,
                    dim_z=dim_z,
                    grad_clip=self.grad_clip,
                    position=self.position_enc,
                    bidirection=self.bidirection_enc,
                    )
        with tf.variable_scope('decoder'):
            self.decoder = TCDecoder(word2idx=word2idx, 
                    embedding_dim=embedding_dim, 
                    n_hidden=n_hidden_ae, 
                    learning_rate=learning_rate, 
                    n_class=n_class, 
                    max_sentence_len=max_sentence_len, 
                    l2_reg=l2_reg, 
                    embedding=self.word_embedding,
                    dim_z=dim_z,
                    decoder_type=self.decoder_type,
                    grad_clip=self.grad_clip,
                    position=self.position_dec,
                    bidirection=self.bidirection_dec,
                    )

        self.klw = tf.placeholder(tf.float32, [], 'klw')
Example #2
0
    def __init__(self, word2idx, embedding_dim, batch_size, n_hidden, learning_rate, n_class, max_sentence_len, l2_reg, embedding, dim_z, pri_prob_y, decoder_type, grad_clip, n_hidden_ae, position_enc, bidirection_enc, position_dec, bidirection_dec, classifier_type, sharefc):
        super(SemiTABSA, self).__init__()

        self.embedding_dim = embedding_dim
        self.batch_size = batch_size
        self.n_hidden = n_hidden
        self.learning_rate = learning_rate
        self.n_class = n_class
        self.max_sentence_len = max_sentence_len
        self.l2_reg = l2_reg
        self.word2idx = word2idx
        self.dim_z = dim_z
        self.decoder_type = decoder_type
        self.classifier_type = classifier_type
        self.grad_clip = grad_clip
        self.n_hidden_ae = n_hidden_ae
        self.pri_prob_y = tf.Variable(pri_prob_y, trainable=False)
        self.position_enc = position_enc
        self.bidirection_enc = bidirection_enc
        self.position_dec = position_dec
        self.bidirection_dec = bidirection_dec
        self.sharefc = sharefc

        if embedding is None:
            logger.info('No embedding is given, initialized randomly')
            wemb_init = np.random.randn([len(word2idx), embedding_dim]) * 1e-2
            self.embedding = tf.get_variable('embedding', [len(word2idx), embedding_dim], initializer=tf.constant_initializer(wemb_init))
        elif isinstance(embedding, np.ndarray):
            logger.info('Numerical embedding is given with shape {}'.format(str(embedding.shape)))
            #self.embedding = tf.constant(embedding, name='embedding')
            self.embedding = tf.get_variable('embedding', [len(word2idx), embedding_dim], initializer=tf.constant_initializer(embedding), trainable=False)
        elif isinstance(embedding, tf.Tensor) or isinstance(embedding, tf.Variable):
            logger.info('Import tensor as the embedding: '.format(embedding.name))
            self.embedding = embedding
        else:
            raise Exception('Embedding type {} is not supported'.format(type(embedding)))

        #TODO: Take the network graph building codes to a new module. 
        #self.classifier = self.create_classifier(self.classifier_type)
        with tf.variable_scope('classifier'):
            if self.classifier_type == "TC":
                self.classifier = TCClassifier(word2idx=word2idx, 
                        embedding_dim=embedding_dim, 
                        n_hidden=n_hidden, 
                        learning_rate=learning_rate, 
                        n_class=n_class, 
                        max_sentence_len=max_sentence_len, 
                        l2_reg=l2_reg, 
                        embedding=self.embedding,
                        grad_clip=self.grad_clip,
                        )
            elif self.classifier_type == "TD":
                pass
            elif self.classifier_type == "MEM":
                #TODO: Add hyper-params Config.py
                self.classifier = MEMClassifier()
        
        with tf.variable_scope('encoder'):
            self.encoder = TCEncoder(word2idx=word2idx, 
                    embedding_dim=embedding_dim, 
                    n_hidden=n_hidden_ae, 
                    learning_rate=learning_rate, 
                    n_class=n_class, 
                    max_sentence_len=max_sentence_len, 
                    l2_reg=l2_reg, 
                    embedding=self.embedding,
                    dim_z=dim_z,
                    grad_clip=self.grad_clip,
                    position=self.position_enc,
                    bidirection=self.bidirection_enc,
                    )
        
        with tf.variable_scope('decoder'):
            self.decoder = TCDecoder(word2idx=word2idx, 
                    embedding_dim=embedding_dim, 
                    n_hidden=n_hidden_ae, 
                    learning_rate=learning_rate, 
                    n_class=n_class, 
                    max_sentence_len=max_sentence_len, 
                    l2_reg=l2_reg, 
                    embedding=self.embedding,
                    dim_z=dim_z,
                    decoder_type=self.decoder_type,
                    grad_clip=self.grad_clip,
                    position=self.position_dec,
                    bidirection=self.bidirection_dec,
                    sharefc=self.sharefc,
                    )

        self.klw = tf.placeholder(tf.float32, [], 'klw')
Example #3
0
class SemiTABSA(BaseModel):
    def __init__(self, word2idx, target2idx, embedding_dim, batch_size, n_hidden, learning_rate, n_class, max_sentence_len, l2_reg, word_embedding, target_embedding, dim_z, pri_prob_y, decoder_type, grad_clip, n_hidden_ae, position_enc, bidirection_enc, position_dec, bidirection_dec, classifier_type):
        super(SemiTABSA, self).__init__()

        self.embedding_dim = embedding_dim
        self.batch_size = batch_size
        self.n_hidden = n_hidden
        self.learning_rate = learning_rate
        self.n_class = n_class
        self.max_sentence_len = max_sentence_len
        self.l2_reg = l2_reg
        self.word2idx = word2idx
        self.target2idx = target2idx
        self.dim_z = dim_z
        self.decoder_type = decoder_type
        self.classifier_type = classifier_type
        self.grad_clip = grad_clip
        self.n_hidden_ae = n_hidden_ae
        self.pri_prob_y = tf.Variable(pri_prob_y, trainable=False)
        self.position_enc = position_enc
        self.bidirection_enc = bidirection_enc
        self.position_dec = position_dec
        self.bidirection_dec = bidirection_dec
        if word_embedding is None:
            logger.info('No embedding is given, initialized randomly')
            wemb_init = np.random.randn([len(word2idx), embedding_dim]) * 1e-2
            self.word_embedding = tf.get_variable('word_embedding', [len(word2idx), embedding_dim], initializer=tf.constant_initializer(wemb_init))
        elif isinstance(word_embedding, np.ndarray):
            logger.info('Numerical embedding is given with shape {}'.format(str(word_embedding.shape)))
            self.word_embedding = tf.constant(word_embedding, name='embedding')
            #self.word_embedding = tf.get_variable('word_embedding', [len(word2idx), embedding_dim], initializer=tf.constant_initializer(word_embedding))
        elif isinstance(word_embedding, tf.Tensor):
            logger.info('Import tensor as the embedding: '.format(word_embedding.name))
            self.word_embedding = tf.constant(word_embedding)
        else:
            raise Exception('Embedding type {} is not supported'.format(type(word_embedding)))

        if target_embedding is None:
            logger.info('No embedding is given, initialized randomly')
            wemb_init = np.random.randn([len(target2idx), embedding_dim]) * 1e-2
            self.target_embedding = tf.get_variable('target_embedding', [len(target2idx), embedding_dim], initializer=tf.constant_initializer(wemb_init))
        elif isinstance(target_embedding, np.ndarray):
            logger.info('Numerical embedding is given with shape {}'.format(str(target_embedding.shape)))
            self.target_embedding = tf.constant(target_embedding, name='embedding')
#            self.target_embedding = tf.get_variable('target_embedding', [len(target2idx), embedding_dim], initializer=tf.constant_initializer(target_embedding))
        elif isinstance(target_embedding, tf.Tensor):
            logger.info('Import tensor as the embedding: '.format(target_embedding.name))
            self.target_embedding = target_embedding
        else:
            raise Exception('Embedding type {} is not supported'.format(type(embedding)))

        #TODO: Take the network graph building codes to a new module. 
        #self.classifier = self.create_classifier(self.classifier_type)
        with tf.variable_scope('classifier'):
            if self.classifier_type == "TC":
                self.classifier = TCClassifier(word2idx=word2idx, 
                        embedding_dim=embedding_dim, 
                        n_hidden=n_hidden, 
                        learning_rate=learning_rate, 
                        n_class=n_class, 
                        max_sentence_len=max_sentence_len, 
                        l2_reg=l2_reg, 
                        embedding=self.word_embedding,
                        grad_clip=self.grad_clip,
                        )
            elif self.classifier_type == "TD":
                pass
            elif self.classifier_type == "MEM":
            #TODO: Add hyper-params Config.py
                 word_embedding = np.vstack((word_embedding, np.zeros([1, self.embedding_dim])))

                 self.classifier = MEMClassifier(nwords=len(word2idx)+1,
                                     word2idx = word2idx,
                                     target2idx = target2idx,
                                     init_hid = 0.1,
                                     init_std = 0.01,
                                     init_lr=0.01,
                                     batch_size=self.batch_size,
                                     nhop=3,
                                     edim=self.embedding_dim,
                                     mem_size=79,
                                     lindim=300,
                                     max_grad_norm=100,
                                     pad_idx=len(word2idx),
                                     pre_trained_context_wt=word_embedding,
                                     pre_trained_target_wt=target_embedding)

            elif self.classifier_type == "IAN":
                 self.classifier = IANClassifier(word2idx=word2idx, 
                     embedding_dim=self.embedding_dim, 
                     n_hidden=self.n_hidden, 
                     learning_rate=self.learning_rate, 
                     n_class=self.n_class, 
                     max_sentence_len=self.max_sentence_len, 
                     l2_reg=self.l2_reg, 
                     embedding=word_embedding,
                     grad_clip=self.grad_clip)
                 

        with tf.variable_scope('encoder'):
            self.encoder = TCEncoder(word2idx=word2idx, 
                    embedding_dim=embedding_dim, 
                    n_hidden=n_hidden_ae, 
                    learning_rate=learning_rate, 
                    n_class=n_class, 
                    max_sentence_len=max_sentence_len, 
                    l2_reg=l2_reg, 
                    embedding=self.word_embedding,
                    dim_z=dim_z,
                    grad_clip=self.grad_clip,
                    position=self.position_enc,
                    bidirection=self.bidirection_enc,
                    )
        with tf.variable_scope('decoder'):
            self.decoder = TCDecoder(word2idx=word2idx, 
                    embedding_dim=embedding_dim, 
                    n_hidden=n_hidden_ae, 
                    learning_rate=learning_rate, 
                    n_class=n_class, 
                    max_sentence_len=max_sentence_len, 
                    l2_reg=l2_reg, 
                    embedding=self.word_embedding,
                    dim_z=dim_z,
                    decoder_type=self.decoder_type,
                    grad_clip=self.grad_clip,
                    position=self.position_dec,
                    bidirection=self.bidirection_dec,
                    )

        self.klw = tf.placeholder(tf.float32, [], 'klw')
    
    def run(self, sess, train_data_l, train_data_u, test_data, n_iter, keep_rate, save_dir, batch_size, alpha, FLAGS):
        self.init_global_step()
        with tf.name_scope('labeled'):
            with tf.variable_scope('classifier'):
                self.classifier_xa_l = self.classifier.create_placeholders('xa')
                self.classifier_y_l = self.classifier.create_placeholders('y')
                self.classifier_hyper_l = self.classifier.create_placeholders('hyper')
                logits_l = self.classifier.forward(self.classifier_xa_l, self.classifier_hyper_l)
                classifier_loss_l, classifier_acc_l, pri_loss_l = self.classifier.get_loss(logits_l, self.classifier_y_l, self.pri_prob_y)

            with tf.variable_scope('encoder'):
                self.encoder_xa_l = self.encoder.create_placeholders('xa')
                self.encoder_y_l = self.encoder.create_placeholders('y')
                self.encoder_hyper_l = self.encoder.create_placeholders('hyper')
                z_pst, z_pri, encoder_loss_l = self.encoder.forward(self.encoder_xa_l, self.encoder_y_l, self.encoder_hyper_l)

            with tf.variable_scope('decoder'):
                self.decoder_xa_l = self.decoder.create_placeholders('xa') #x is included since x is generated sequentially
                self.decoder_y_l = self.decoder.create_placeholders('y')
                self.decoder_hyper_l = self.decoder.create_placeholders('hyper')
                decoder_loss_l, ppl_fw_l, ppl_bw_l, ppl_l = self.decoder.forward(self.decoder_xa_l, self.decoder_y_l, z_pst, self.decoder_hyper_l)
            elbo_l = encoder_loss_l * self.klw + decoder_loss_l - pri_loss_l

        self.loss_l = elbo_l
        self.loss_c = classifier_loss_l
        
        with tf.name_scope('unlabeled'):
            with tf.variable_scope('classifier', reuse=True):
                self.classifier_xa_u = self.classifier.create_placeholders('xa')
                self.classifier_hyper_u = self.classifier.create_placeholders('hyper')
                logits_u = self.classifier.forward(self.classifier_xa_u, self.classifier_hyper_u)
                predict_u = tf.nn.softmax(logits_u)
                classifier_entropy_u = tf.losses.softmax_cross_entropy(predict_u, predict_u)

            encoder_loss_u, decoder_loss_u = [], []
            elbo_u = []
            self.encoder_xa_u = self.encoder.create_placeholders('xa')
            self.encoder_hyper_u = self.encoder.create_placeholders('hyper')
            self.decoder_xa_u = self.decoder.create_placeholders('xa')
            self.decoder_hyper_u = self.decoder.create_placeholders('hyper')
            batch_size = tf.shape(list(self.encoder_xa_u.values())[0])[0]
            for idx in range(self.n_class):
                with tf.variable_scope('encoder', reuse=True):
                    _label = tf.gather(tf.eye(self.n_class), idx)
                    _label = tf.tile(_label[None, :], [batch_size, 1])
                    _z_pst, _, _encoder_loss = self.encoder.forward(self.encoder_xa_u, {'y':_label}, self.encoder_hyper_u)
                    encoder_loss_u.append(_encoder_loss * self.klw)
                    _pri_loss_u = tf.log(tf.gather(self.pri_prob_y, idx))
            
                with tf.variable_scope('decoder', reuse=True):
                    _decoder_loss, _, _, _ = self.decoder.forward(self.decoder_xa_u, {'y':_label}, _z_pst, self.decoder_hyper_u)
                    decoder_loss_u.append(_decoder_loss)

                _elbo_u = _encoder_loss * self.klw + _decoder_loss# - _pri_loss_u
                elbo_u.append(_elbo_u)

        self.loss_u = tf.add_n([elbo_u[idx] * predict_u[:, idx] for idx in range(self.n_class)]) + classifier_entropy_u
        self.loss = tf.reduce_mean(self.loss_l + classifier_loss_l * alpha + self.loss_u)
        #self.loss = tf.reduce_mean(classifier_loss_l) 
        decoder_loss_l = tf.reduce_mean(decoder_loss_l)

        with tf.name_scope('train'):
            optimizer = tf.train.AdamOptimizer(learning_rate=self.learning_rate).minimize(self.loss, global_step=self.global_step) 
            #optimizer = self.training_op(self.loss, tf.trainable_variables(), self.grad_clip, 20, self.learning_rate)

        
        summary_kl = tf.summary.scalar('kl', tf.reduce_mean(encoder_loss_l))
        summary_loss = tf.summary.scalar('loss', self.loss)
        summary_loss_l = tf.summary.scalar('loss_l', tf.reduce_mean(self.loss_l))
        summary_loss_u = tf.summary.scalar('loss_u', tf.reduce_mean(self.loss_u))
        summary_acc = tf.summary.scalar('acc', classifier_acc_l)
        summary_ppl_fw = tf.summary.scalar('ppl_fw', ppl_fw_l)
        summary_ppl_bw = tf.summary.scalar('ppl_bw', ppl_bw_l)
        summary_ppl = tf.summary.scalar('ppl', ppl_l)
        train_summary_op = tf.summary.merge_all()

        test_acc = tf.placeholder(tf.float32, [])
        test_ppl = tf.placeholder(tf.float32, [])
        summary_acc_test = tf.summary.scalar('test_acc', test_acc)
        summary_ppl_test = tf.summary.scalar('test_ppl', test_ppl)
        test_summary_op = tf.summary.merge([summary_acc_test, summary_ppl_test])

        logger = ExpLogger('semi_tabsa', save_dir)
        logger.write_args(FLAGS)
        logger.write_variables(tf.trainable_variables())
        logger.file_copy(['*.py', 'encoder/*.py', 'decoder/*.py', 'classifier/*.py'])

        train_summary_writer = tf.summary.FileWriter(save_dir + '/train', sess.graph)
        test_summary_writer = tf.summary.FileWriter(save_dir + '/test', sess.graph)
        validate_summary_writer = tf.summary.FileWriter(save_dir + '/validate', sess.graph)

        sess.run(tf.global_variables_initializer())
    
        def get_batch(dataset):
            """ to get batch from an iterator, whenever the ending is reached. """
            while True:
                try:
                    batch = dataset.next()
                    break
                except:
                    pass
            return batch
        
        def get_feed_dict_help(plhs, data_dict, keep_rate, is_training):
            plh_dict = {}
            for plh in plhs: plh_dict.update(plh)
            data_dict.update({'keep_rate': keep_rate})
            data_dict.update({'is_training': is_training})
            feed_dict = self.get_feed_dict(plh_dict, data_dict)
            return feed_dict

        max_acc = 0.
        for i in range(n_iter):
            #for train, _ in self.get_batch_data(train_data, keep_rate):
            for samples, in train_data_l:
                feed_dict_clf_l = get_feed_dict_help(plhs=[self.classifier_xa_l, self.classifier_y_l, self.classifier_hyper_l],
                        data_dict=self.classifier.prepare_data(samples),
                        keep_rate=keep_rate,
                        is_training=True)
                
                feed_dict_enc_l = get_feed_dict_help(plhs=[self.encoder_xa_l, self.encoder_y_l, self.encoder_hyper_l],
                        data_dict=self.encoder.prepare_data(samples),
                        keep_rate=keep_rate,
                        is_training=True)
 
                feed_dict_dec_l = get_feed_dict_help(plhs=[self.decoder_xa_l, self.decoder_y_l, self.decoder_hyper_l],
                        data_dict=self.decoder.prepare_data(samples),
                        keep_rate=keep_rate,
                        is_training=True)
                
                samples, = get_batch(train_data_u)
                feed_dict_clf_u = get_feed_dict_help(plhs=[self.classifier_xa_u, self.classifier_hyper_u],
                        data_dict=self.classifier.prepare_data(samples),
                        keep_rate=keep_rate,
                        is_training=True)
                
                feed_dict_enc_u = get_feed_dict_help(plhs=[self.encoder_xa_u, self.encoder_hyper_u],
                        data_dict=self.encoder.prepare_data(samples),
                        keep_rate=keep_rate,
                        is_training=True)
 
                feed_dict_dec_u = get_feed_dict_help(plhs=[self.decoder_xa_u, self.decoder_hyper_u],
                        data_dict=self.decoder.prepare_data(samples),
                        keep_rate=keep_rate,
                        is_training=True)

                feed_dict = {}
                feed_dict.update(feed_dict_clf_l)
                feed_dict.update(feed_dict_enc_l)
                feed_dict.update(feed_dict_dec_l)
                feed_dict.update(feed_dict_clf_u)
                feed_dict.update(feed_dict_enc_u)
                feed_dict.update(feed_dict_dec_u)
                feed_dict.update({self.klw: 0.0001})

                _, _acc, _loss, _ppl, _step, summary = sess.run([optimizer, classifier_acc_l, decoder_loss_l, ppl_l, self.global_step, train_summary_op], feed_dict=feed_dict)
                #_, _acc, _step, summary = sess.run([optimizer, classifier_acc_l, self.global_step, train_summary_op], feed_dict=feed_dict)
                train_summary_writer.add_summary(summary, _step)
                #if np.random.rand() < 1/4:
                #    print(_acc, _loss, _ppl, _step)
                
                acc, ppl, loss, cnt = 0., 0., 0., 0
                for samples, in test_data:
                    feed_dict_clf_l = get_feed_dict_help(plhs=[self.classifier_xa_l, self.classifier_y_l, self.classifier_hyper_l],
                            data_dict=self.classifier.prepare_data(samples),
                            keep_rate=1.0,
                            is_training=False)
                    
                    feed_dict_enc_l = get_feed_dict_help(plhs=[self.encoder_xa_l, self.encoder_y_l, self.encoder_hyper_l],
                            data_dict=self.encoder.prepare_data(samples),
                            keep_rate=1.0,
                            is_training=False)
     
                    feed_dict_dec_l = get_feed_dict_help(plhs=[self.decoder_xa_l, self.decoder_y_l, self.decoder_hyper_l],
                            data_dict=self.decoder.prepare_data(samples),
                            keep_rate=1.0,
                            is_training=False)
    
                    feed_dict = {}
                    feed_dict.update(feed_dict_clf_l)
                    feed_dict.update(feed_dict_enc_l)
                    feed_dict.update(feed_dict_dec_l)
                    feed_dict.update({self.klw: 0})
    
                    _acc, _loss, _ppl, _step = sess.run([classifier_acc_l, decoder_loss_l, ppl_l, self.global_step], feed_dict=feed_dict)
                    acc += _acc * len(samples)
                    ppl += _ppl * len(samples)
                    loss += _loss * len(samples)
                    cnt += len(samples)
                #print(cnt)
                #print(acc)
                summary, _step = sess.run([test_summary_op, self.global_step], feed_dict={test_acc: acc/cnt, test_ppl: ppl/cnt})
                test_summary_writer.add_summary(summary, _step)
                logger.info('Iter {}: mini-batch loss={:.6f}, test acc={:.6f}'.format(_step, loss / cnt, acc / cnt))
                #print(save_dir)
                _dir="unlabel10k" 
                if acc / cnt > max_acc:
                    max_acc = acc / cnt
     
        logger.info('Optimization Finished! Max acc={}'.format(max_acc))

        logger.info('Learning_rate={}, iter_num={}, hidden_num={}, l2={}'.format(
            self.learning_rate,
            n_iter,
            self.n_hidden,
            self.l2_reg
        ))
Example #4
0
class SemiTABSA(BaseModel):
    def __init__(self, word2idx, embedding_dim, batch_size, n_hidden, learning_rate, n_class, max_sentence_len, l2_reg, embedding, dim_z, pri_prob_y, decoder_type, grad_clip, n_hidden_ae, position_enc, bidirection_enc, position_dec, bidirection_dec, classifier_type, sharefc):
        super(SemiTABSA, self).__init__()

        self.embedding_dim = embedding_dim
        self.batch_size = batch_size
        self.n_hidden = n_hidden
        self.learning_rate = learning_rate
        self.n_class = n_class
        self.max_sentence_len = max_sentence_len
        self.l2_reg = l2_reg
        self.word2idx = word2idx
        self.dim_z = dim_z
        self.decoder_type = decoder_type
        self.classifier_type = classifier_type
        self.grad_clip = grad_clip
        self.n_hidden_ae = n_hidden_ae
        self.pri_prob_y = tf.Variable(pri_prob_y, trainable=False)
        self.position_enc = position_enc
        self.bidirection_enc = bidirection_enc
        self.position_dec = position_dec
        self.bidirection_dec = bidirection_dec
        self.sharefc = sharefc

        if embedding is None:
            logger.info('No embedding is given, initialized randomly')
            wemb_init = np.random.randn([len(word2idx), embedding_dim]) * 1e-2
            self.embedding = tf.get_variable('embedding', [len(word2idx), embedding_dim], initializer=tf.constant_initializer(wemb_init))
        elif isinstance(embedding, np.ndarray):
            logger.info('Numerical embedding is given with shape {}'.format(str(embedding.shape)))
            #self.embedding = tf.constant(embedding, name='embedding')
            self.embedding = tf.get_variable('embedding', [len(word2idx), embedding_dim], initializer=tf.constant_initializer(embedding), trainable=False)
        elif isinstance(embedding, tf.Tensor) or isinstance(embedding, tf.Variable):
            logger.info('Import tensor as the embedding: '.format(embedding.name))
            self.embedding = embedding
        else:
            raise Exception('Embedding type {} is not supported'.format(type(embedding)))

        #TODO: Take the network graph building codes to a new module. 
        #self.classifier = self.create_classifier(self.classifier_type)
        with tf.variable_scope('classifier'):
            if self.classifier_type == "TC":
                self.classifier = TCClassifier(word2idx=word2idx, 
                        embedding_dim=embedding_dim, 
                        n_hidden=n_hidden, 
                        learning_rate=learning_rate, 
                        n_class=n_class, 
                        max_sentence_len=max_sentence_len, 
                        l2_reg=l2_reg, 
                        embedding=self.embedding,
                        grad_clip=self.grad_clip,
                        )
            elif self.classifier_type == "TD":
                pass
            elif self.classifier_type == "MEM":
                #TODO: Add hyper-params Config.py
                self.classifier = MEMClassifier()
        
        with tf.variable_scope('encoder'):
            self.encoder = TCEncoder(word2idx=word2idx, 
                    embedding_dim=embedding_dim, 
                    n_hidden=n_hidden_ae, 
                    learning_rate=learning_rate, 
                    n_class=n_class, 
                    max_sentence_len=max_sentence_len, 
                    l2_reg=l2_reg, 
                    embedding=self.embedding,
                    dim_z=dim_z,
                    grad_clip=self.grad_clip,
                    position=self.position_enc,
                    bidirection=self.bidirection_enc,
                    )
        
        with tf.variable_scope('decoder'):
            self.decoder = TCDecoder(word2idx=word2idx, 
                    embedding_dim=embedding_dim, 
                    n_hidden=n_hidden_ae, 
                    learning_rate=learning_rate, 
                    n_class=n_class, 
                    max_sentence_len=max_sentence_len, 
                    l2_reg=l2_reg, 
                    embedding=self.embedding,
                    dim_z=dim_z,
                    decoder_type=self.decoder_type,
                    grad_clip=self.grad_clip,
                    position=self.position_dec,
                    bidirection=self.bidirection_dec,
                    sharefc=self.sharefc,
                    )

        self.klw = tf.placeholder(tf.float32, [], 'klw')
    
    def run(self, sess, train_data_l, train_data_u, test_data, n_iter, keep_rate, save_dir, batch_size, alpha, FLAGS):
        self.init_global_step()
        with tf.name_scope('labeled'):
            with tf.variable_scope('classifier'):
                self.classifier_xa_l = self.classifier.create_placeholders('xa')
                self.classifier_y_l = self.classifier.create_placeholders('y')
                self.classifier_hyper_l = self.classifier.create_placeholders('hyper')
                logits_l = self.classifier.forward(self.classifier_xa_l, self.classifier_hyper_l)
                classifier_loss_l, classifier_acc_l, pri_loss_l = self.classifier.get_loss(logits_l, self.classifier_y_l, self.pri_prob_y)
                pred_l = tf.argmax(logits_l, axis=1)

            with tf.variable_scope('encoder'):
                self.encoder_xa_l = self.encoder.create_placeholders('xa')
                self.encoder_y_l = self.encoder.create_placeholders('y')
                self.encoder_hyper_l = self.encoder.create_placeholders('hyper')
                z_pst, z_pri, encoder_loss_l = self.encoder.forward(self.encoder_xa_l, self.encoder_y_l, self.encoder_hyper_l)

            with tf.variable_scope('decoder'):
                self.decoder_xa_l = self.decoder.create_placeholders('xa') #x is included since x is generated sequentially
                self.decoder_y_l = self.decoder.create_placeholders('y')
                self.decoder_hyper_l = self.decoder.create_placeholders('hyper')
                decoder_loss_l, ppl_fw_l, ppl_bw_l, ppl_l = self.decoder.forward(self.decoder_xa_l, self.decoder_y_l, z_pst, self.decoder_hyper_l)
            elbo_l = encoder_loss_l * self.klw + decoder_loss_l - pri_loss_l

        self.loss_l = elbo_l
        self.loss_c = classifier_loss_l
        
        with tf.name_scope('unlabeled'):
            with tf.variable_scope('classifier', reuse=True):
                self.classifier_xa_u = self.classifier.create_placeholders('xa')
                self.classifier_hyper_u = self.classifier.create_placeholders('hyper')
                logits_u = self.classifier.forward(self.classifier_xa_u, self.classifier_hyper_u)
                predict_u = tf.nn.softmax(logits_u)
                classifier_entropy_u = tf.losses.softmax_cross_entropy(predict_u, predict_u)

            encoder_loss_u, decoder_loss_u = [], []
            elbo_u = []
            self.encoder_xa_u = self.encoder.create_placeholders('xa')
            self.encoder_hyper_u = self.encoder.create_placeholders('hyper')
            self.decoder_xa_u = self.decoder.create_placeholders('xa')
            self.decoder_hyper_u = self.decoder.create_placeholders('hyper')
            batch_size = tf.shape(list(self.encoder_xa_u.values())[0])[0]
            for idx in range(self.n_class):
                with tf.variable_scope('encoder', reuse=True):
                    _label = tf.gather(tf.eye(self.n_class), idx)
                    _label = tf.tile(_label[None, :], [batch_size, 1])
                    _z_pst, _, _encoder_loss = self.encoder.forward(self.encoder_xa_u, {'y':_label}, self.encoder_hyper_u)
                    encoder_loss_u.append(_encoder_loss * self.klw)
                    _pri_loss_u = tf.log(tf.gather(self.pri_prob_y, idx))
            
                with tf.variable_scope('decoder', reuse=True):
                    _decoder_loss, _, _, _ = self.decoder.forward(self.decoder_xa_u, {'y':_label}, _z_pst, self.decoder_hyper_u)
                    decoder_loss_u.append(_decoder_loss)

                _elbo_u = _encoder_loss * self.klw + _decoder_loss# - _pri_loss_u
                elbo_u.append(_elbo_u)

        self.loss_u = tf.add_n([elbo_u[idx] * predict_u[:, idx] for idx in range(self.n_class)]) + classifier_entropy_u
        self.loss = tf.reduce_mean(self.loss_l + classifier_loss_l * alpha + self.loss_u)
        self.loss += sum(tf.losses.get_regularization_losses())

        batch_size_l = tf.shape(decoder_loss_l)[0]
        decoder_loss_l = tf.reduce_mean(decoder_loss_l)

        with tf.name_scope('train'):
            #optimizer = tf.train.AdamOptimizer(learning_rate=self.learning_rate).minimize(cost, global_step=self.global_step)
            optimizer = self.training_op(self.loss, tf.trainable_variables(), self.grad_clip, 20, self.learning_rate, opt='Adam')
        
        summary_kl = tf.summary.scalar('kl', tf.reduce_mean(encoder_loss_l))
        summary_loss = tf.summary.scalar('loss', self.loss)
        summary_loss_l = tf.summary.scalar('loss_l', tf.reduce_mean(self.loss_l))
        summary_loss_u = tf.summary.scalar('loss_u', tf.reduce_mean(self.loss_u))
        summary_acc = tf.summary.scalar('acc', classifier_acc_l)
        summary_ppl_fw = tf.summary.scalar('ppl_fw', ppl_fw_l)
        summary_ppl_bw = tf.summary.scalar('ppl_bw', ppl_bw_l)
        summary_ppl = tf.summary.scalar('ppl', ppl_l)
        train_summary_op = tf.summary.merge_all()

        test_acc = tf.placeholder(tf.float32, [])
        test_ppl = tf.placeholder(tf.float32, [])
        summary_acc_test = tf.summary.scalar('test_acc', test_acc)
        summary_ppl_test = tf.summary.scalar('test_ppl', test_ppl)
        test_summary_op = tf.summary.merge([summary_acc_test, summary_ppl_test])

        logger = ExpLogger('semi_tabsa', save_dir)
        logger.write_args(FLAGS)
        logger.write_variables(tf.trainable_variables())
        logger.file_copy(['semi_tabsa.py', 'encoder/*.py', 'decoder/*.py', 'classifier/*.py'])

        summary_writer = tf.summary.FileWriter(save_dir + '/', sess.graph)
        #test_summary_writer = tf.summary.FileWriter(save_dir + '/', sess.graph)
        #validate_summary_writer = tf.summary.FileWriter(save_dir + '/validate', sess.graph)

        sess.run(tf.global_variables_initializer())

        def get_batch(dataset):
            """ to get batch from an iterator, whenever the ending is reached. """
            while True:
                try:
                    batch = dataset.next()
                    break
                except:
                    pass
            return batch
        
        def get_feed_dict_help(plhs, data_dict, keep_rate, is_training):
            plh_dict = {}
            for plh in plhs: plh_dict.update(plh)
            data_dict.update({'keep_rate': keep_rate})
            data_dict.update({'is_training': is_training})
            feed_dict = self.get_feed_dict(plh_dict, data_dict)
            return feed_dict

        max_acc, max_f1 = 0., 0.
        for i in range(n_iter):
            #for train, _ in self.get_batch_data(train_data, keep_rate):
            for samples, in train_data_l:
                feed_dict_clf_l = get_feed_dict_help(plhs=[self.classifier_xa_l, self.classifier_y_l, self.classifier_hyper_l],
                        data_dict=self.classifier.prepare_data(samples),
                        keep_rate=keep_rate,
                        is_training=True)
                
                feed_dict_enc_l = get_feed_dict_help(plhs=[self.encoder_xa_l, self.encoder_y_l, self.encoder_hyper_l],
                        data_dict=self.encoder.prepare_data(samples),
                        keep_rate=keep_rate,
                        is_training=True)
 
                feed_dict_dec_l = get_feed_dict_help(plhs=[self.decoder_xa_l, self.decoder_y_l, self.decoder_hyper_l],
                        data_dict=self.decoder.prepare_data(samples),
                        keep_rate=keep_rate,
                        is_training=True)
                
                samples, = get_batch(train_data_u)
                feed_dict_clf_u = get_feed_dict_help(plhs=[self.classifier_xa_u, self.classifier_hyper_u],
                        data_dict=self.classifier.prepare_data(samples),
                        keep_rate=keep_rate,
                        is_training=True)
                
                feed_dict_enc_u = get_feed_dict_help(plhs=[self.encoder_xa_u, self.encoder_hyper_u],
                        data_dict=self.encoder.prepare_data(samples),
                        keep_rate=keep_rate,
                        is_training=True)
 
                feed_dict_dec_u = get_feed_dict_help(plhs=[self.decoder_xa_u, self.decoder_hyper_u],
                        data_dict=self.decoder.prepare_data(samples),
                        keep_rate=keep_rate,
                        is_training=True)

                feed_dict = {}
                feed_dict.update(feed_dict_clf_l)
                feed_dict.update(feed_dict_enc_l)
                feed_dict.update(feed_dict_dec_l)
                feed_dict.update(feed_dict_clf_u)
                feed_dict.update(feed_dict_enc_u)
                feed_dict.update(feed_dict_dec_u)
                feed_dict.update({self.klw: 0.0001})

                _, _acc, _loss, _ppl, _step, summary = sess.run([optimizer, classifier_acc_l, decoder_loss_l, ppl_l, self.global_step, train_summary_op], feed_dict=feed_dict)
                summary_writer.add_summary(summary, _step)
                if np.random.rand() < 1/4:
                    print(_acc, _loss, _ppl, _step)
            
            truth, pred, acc, ppl, loss, cnt = [], [], 0., 0., 0., 0
            idx2y = {0:'positive', 1:'negative', 2:'neutral'}
            for samples, in test_data:
                feed_dict_clf_l = get_feed_dict_help(plhs=[self.classifier_xa_l, self.classifier_y_l, self.classifier_hyper_l],
                        data_dict=self.classifier.prepare_data(samples),
                        keep_rate=1.0,
                        is_training=False)
                
                feed_dict_enc_l = get_feed_dict_help(plhs=[self.encoder_xa_l, self.encoder_y_l, self.encoder_hyper_l],
                        data_dict=self.encoder.prepare_data(samples),
                        keep_rate=1.0,
                        is_training=False)
 
                feed_dict_dec_l = get_feed_dict_help(plhs=[self.decoder_xa_l, self.decoder_y_l, self.decoder_hyper_l],
                        data_dict=self.decoder.prepare_data(samples),
                        keep_rate=1.0,
                        is_training=False)

                feed_dict = {}
                feed_dict.update(feed_dict_clf_l)
                feed_dict.update(feed_dict_enc_l)
                feed_dict.update(feed_dict_dec_l)
                feed_dict.update({self.klw: 0})

                num, _pred, _acc, _loss, _ppl, _step = sess.run([batch_size_l, pred_l, classifier_acc_l, decoder_loss_l, ppl_l, self.global_step], feed_dict=feed_dict)
                pred.extend([idx2y[int(_)] for _ in _pred])
                truth.extend([sample['polarity'] for sample in samples])
                acc += _acc * num
                ppl += _ppl * num
                loss += _loss * num
                cnt += num
            #print(cnt)
            #print(acc)
            f1 = f1_score(truth, pred, average='macro') 
            summary, _step = sess.run([test_summary_op, self.global_step], feed_dict={test_acc: acc/cnt, test_ppl: ppl/cnt})
            summary_writer.add_summary(summary, _step)
            logger.info('Iter {}: mini-batch loss={:.6f}, test acc={:.6f}, f1={:.6f}'.format(_step, loss / cnt, acc / cnt, f1))
            print(save_dir)
            if acc / cnt > max_acc:
                max_acc = acc / cnt
                max_f1 = f1
                with open(os.path.join(save_dir, 'pred'), 'w') as f:
                    idx2y = {0:'positive', 1:'negative', 2:'neutral'}
                    for samples, in test_data:
                        feed_dict = get_feed_dict_help(plhs=[self.classifier_xa_l, self.classifier_y_l, self.classifier_hyper_l],
                            data_dict=self.classifier.prepare_data(samples),
                            keep_rate=1.0,
                            is_training=False)

                        _pred, = sess.run([pred_l], feed_dict=feed_dict)
                        for idx, sample in enumerate(samples):
                            f.write(idx2y[_pred[idx]])
                            f.write('\t')
                            f.write(sample['polarity'])
                            f.write('\t')
                            f.write(' '.join([t + ' ' + str(b) for (t, b) in zip(sample['tokens'], sample['tags'])]))
                            f.write('\n')


        logger.info('Optimization Finished! Max acc={} f1={}'.format(max_acc, max_f1))

        logger.info('Learning_rate={}, iter_num={}, hidden_num={}, l2={}'.format(
            self.learning_rate,
            n_iter,
            self.n_hidden,
            self.l2_reg
        ))