Ejemplo n.º 1
0
    aspect_words.append(aspect_to_id.get(aspect_word, 0))
    sxl = change_y_to_onehot(y_test)
    words = lines[0].lower().split()
    ids = []
    for word in words:
        if word in word_to_id:
            ids.append(word_to_id[word])
        # ids = list(map(lambda word: word_to_id.get(word, 0), words))
        #print (len(sen_len))
    x.append(ids + [0] * (sentence_len - len(ids)))
    x = np.asarray(x, dtype=np.int32)
    print(sxl)
    return x, np.asarray(aspect_words)


a, b = load_inputs_twitter_at(x_raw, word_id_mapping, aspect_id_mapping, 80)
print('input:', a)
print('aspect:', b)

#=================================================
checkpoint_file = tf.train.latest_checkpoint(
    'E:/caffe/AI/deep learning/tensorflow/attention-based latm for aspect-level sentiment classification\models/logs/1531470805_-d1-1.0d2-1.0b-25r-0.01l2-0.001sen-80dim-300h-300c-3'
)
#checkpoint_file = tf.train.latest_checkpoint('')
graph = tf.Graph()
with graph.as_default():
    session_conf = tf.ConfigProto(allow_soft_placement=True,
                                  log_device_placement=False)
    sess = tf.Session(config=session_conf)
    with sess.as_default():
        saver = tf.train.import_meta_graph("{}.meta".format(checkpoint_file))
Ejemplo n.º 2
0
    def run(self):
        inputs = tf.nn.embedding_lookup(self.word_embedding, self.x)
        aspect = tf.nn.embedding_lookup(self.aspect_embedding, self.aspect_id)
        if FLAGS.method == 'AE':
            prob = self.AE(inputs, aspect, FLAGS.t)
        elif FLAGS.method == 'AT':
            prob = self.AT(inputs, aspect, FLAGS.t)

        with tf.name_scope('loss'):
            reg_loss = tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES)
            # cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(prob, self.y))
            cost = -tf.reduce_mean(
                tf.cast(self.y, tf.float32) * tf.log(prob)) + sum(reg_loss)

        with tf.name_scope('train'):
            global_step = tf.Variable(0,
                                      name="tr_global_step",
                                      trainable=False)
            optimizer = tf.train.AdamOptimizer(
                learning_rate=self.learning_rate).minimize(
                    cost, global_step=global_step)
            # optimizer = tf.train.AdagradOptimizer(learning_rate=self.learning_rate).minimize(cost, global_step=global_step)

        with tf.name_scope('predict'):
            correct_pred = tf.equal(tf.argmax(prob, 1), tf.argmax(self.y, 1))
            true_y = tf.argmax(self.y, 1)
            pred_y = tf.argmax(prob, 1)
            accuracy = tf.reduce_sum(tf.cast(correct_pred, tf.int32))
            _acc = tf.reduce_mean(tf.cast(correct_pred, tf.float32))

        with tf.Session() as sess:
            title = '-d1-{}d2-{}b-{}r-{}l2-{}sen-{}dim-{}h-{}c-{}'.format(
                FLAGS.keep_prob1, FLAGS.keep_prob2, FLAGS.batch_size,
                FLAGS.learning_rate, FLAGS.l2_reg, FLAGS.max_sentence_len,
                FLAGS.embedding_dim, FLAGS.n_hidden, FLAGS.n_class)
            summary_loss = tf.summary.scalar('loss' + title, cost)
            summary_acc = tf.summary.scalar('acc' + title, _acc)
            train_summary_op = tf.summary.merge([summary_loss, summary_acc])
            validate_summary_op = tf.summary.merge([summary_loss, summary_acc])
            test_summary_op = tf.summary.merge([summary_loss, summary_acc])
            import time
            timestamp = str(int(time.time()))
            _dir = 'logs/' + str(timestamp) + '_' + title
            train_summary_writer = tf.summary.FileWriter(
                _dir + '/train', sess.graph)
            test_summary_writer = tf.summary.FileWriter(
                _dir + '/test', sess.graph)
            validate_summary_writer = tf.summary.FileWriter(
                _dir + '/validate', sess.graph)

            saver = tf.train.Saver(write_version=tf.train.SaverDef.V2)

            init = tf.global_variables_initializer()
            sess.run(init)

            # saver.restore(sess, 'models/logs/1481529975__r0.005_b2000_l0.05self.softmax/-1072')

            save_dir = 'models/' + _dir + '/'
            import os
            if not os.path.exists(save_dir):
                os.makedirs(save_dir)

            tr_x, tr_sen_len, tr_target_word, tr_y = load_inputs_twitter_at(
                FLAGS.train_file_path, self.word_id_mapping,
                self.aspect_id_mapping, self.max_sentence_len, self.type_)
            te_x, te_sen_len, te_target_word, te_y = load_inputs_twitter_at(
                FLAGS.test_file_path, self.word_id_mapping,
                self.aspect_id_mapping, self.max_sentence_len, self.type_)

            max_acc = 0.
            max_alpha = None
            max_ty, max_py = None, None
            for i in range(self.n_iter):
                for train, _ in self.get_batch_data(tr_x, tr_sen_len, tr_y,
                                                    tr_target_word,
                                                    self.batch_size,
                                                    FLAGS.keep_prob1,
                                                    FLAGS.keep_prob2):
                    _, step, summary = sess.run(
                        [optimizer, global_step, train_summary_op],
                        feed_dict=train)
                    train_summary_writer.add_summary(summary, step)

                acc, loss, cnt = 0., 0., 0
                flag = True
                summary, step = None, None
                alpha = None
                ty, py = None, None
                for test, num in self.get_batch_data(te_x, te_sen_len, te_y,
                                                     te_target_word, 2000, 1.0,
                                                     1.0, False):
                    _loss, _acc, _summary, _step, alpha, ty, py = sess.run(
                        [
                            cost, accuracy, validate_summary_op, global_step,
                            self.alpha, true_y, pred_y
                        ],
                        feed_dict=test)
                    acc += _acc
                    loss += _loss * num
                    cnt += num
                    if flag:
                        summary = _summary
                        step = _step
                        flag = False
                        alpha = alpha
                        ty = ty
                        py = py
                print('all samples={}, correct prediction={}'.format(cnt, acc))
                test_summary_writer.add_summary(summary, step)
                saver.save(sess, save_dir, global_step=step)
                print(
                    'Iter {}: mini-batch loss={:.6f}, test acc={:.6f}'.format(
                        i, loss / cnt, acc / cnt))
                if acc / cnt > max_acc:
                    max_acc = acc / cnt
                    max_alpha = alpha
                    max_ty = ty
                    max_py = py

            print('Optimization Finished! Max acc={}'.format(max_acc))
            fp = open('weight.txt', 'w')
            for y1, y2, ws in zip(max_ty, max_py, max_alpha):
                fp.write(
                    str(y1) + ' ' + str(y2) + ' ' +
                    ' '.join([str(w) for w in ws]) + '\n')

            print(
                'Learning_rate={}, iter_num={}, batch_size={}, hidden_num={}, l2={}'
                .format(self.learning_rate, self.n_iter, self.batch_size,
                        self.n_hidden, self.l2_reg))
Ejemplo n.º 3
0
    def run(self):
        inputs = tf.nn.embedding_lookup(self.word_embedding, self.x)
        aspect = tf.nn.embedding_lookup(self.aspect_embedding, self.aspect_id)
        if FLAGS.method == 'AE':
            prob = self.AE(inputs, aspect, FLAGS.t)
        elif FLAGS.method == 'AT':
            prob = self.AT(inputs, aspect, FLAGS.t)

        with tf.name_scope('loss'):
            reg_loss = tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES)
            # cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(prob, self.y))
            cost = - tf.reduce_mean(tf.cast(self.y, tf.float32) * tf.log(prob)) + sum(reg_loss)

        with tf.name_scope('train'):
            global_step = tf.Variable(0, name="tr_global_step", trainable=False)
            optimizer = tf.train.AdamOptimizer(learning_rate=self.learning_rate).minimize(cost, global_step=global_step)
            # optimizer = tf.train.AdagradOptimizer(learning_rate=self.learning_rate).minimize(cost, global_step=global_step)

        with tf.name_scope('predict'):
            correct_pred = tf.equal(tf.argmax(prob, 1), tf.argmax(self.y, 1))
            true_y = tf.argmax(self.y, 1)
            pred_y = tf.argmax(prob, 1)
            accuracy = tf.reduce_sum(tf.cast(correct_pred, tf.int32))
            _acc = tf.reduce_mean(tf.cast(correct_pred, tf.float32))

        with tf.Session() as sess:
            title = '-d1-{}d2-{}b-{}r-{}l2-{}sen-{}dim-{}h-{}c-{}'.format(
                FLAGS.keep_prob1,
                FLAGS.keep_prob2,
                FLAGS.batch_size,
                FLAGS.learning_rate,
                FLAGS.l2_reg,
                FLAGS.max_sentence_len,
                FLAGS.embedding_dim,
                FLAGS.n_hidden,
                FLAGS.n_class
            )
            summary_loss = tf.summary.scalar('loss' + title, cost)
            summary_acc = tf.summary.scalar('acc' + title, _acc)
            train_summary_op =  tf.summary.merge([summary_loss, summary_acc])
            validate_summary_op =  tf.summary.merge([summary_loss, summary_acc])
            test_summary_op =  tf.summary.merge([summary_loss, summary_acc])
            import time
            timestamp = str(int(time.time()))
            _dir = 'logs/' + str(timestamp) + '_' + title
            train_summary_writer = tf.summary.FileWriter(_dir + '/train', sess.graph)
            test_summary_writer = tf.summary.FileWriter(_dir + '/test', sess.graph)
            validate_summary_writer = tf.summary.FileWriter(_dir + '/validate', sess.graph)

            saver = tf.train.Saver(write_version=tf.train.SaverDef.V2)

            init = tf.global_variables_initializer()
            sess.run(init)

            # saver.restore(sess, 'models/logs/1481529975__r0.005_b2000_l0.05self.softmax/-1072')

            save_dir = 'models/' + _dir + '/'
            import os
            if not os.path.exists(save_dir):
                os.makedirs(save_dir)

            tr_x, tr_sen_len, tr_target_word, tr_y = load_inputs_twitter_at(
                FLAGS.train_file_path,
                self.word_id_mapping,
                self.aspect_id_mapping,
                self.max_sentence_len,
                self.type_
            )
            te_x, te_sen_len, te_target_word, te_y = load_inputs_twitter_at(
                FLAGS.test_file_path,
                self.word_id_mapping,
                self.aspect_id_mapping,
                self.max_sentence_len,
                self.type_
            )

            max_acc = 0.
            max_alpha = None
            max_ty, max_py = None, None
            for i in range(self.n_iter):
                for train, _ in self.get_batch_data(tr_x, tr_sen_len, tr_y, tr_target_word, self.batch_size, FLAGS.keep_prob1, FLAGS.keep_prob2):
                    _, step, summary = sess.run([optimizer, global_step, train_summary_op], feed_dict=train)
                    train_summary_writer.add_summary(summary, step)

                acc, loss, cnt = 0., 0., 0
                flag = True
                summary, step = None, None
                alpha = None
                ty, py = None, None
                for test, num in self.get_batch_data(te_x, te_sen_len, te_y, te_target_word, 2000, 1.0, 1.0, False):
                    _loss, _acc, _summary, _step, alpha, ty, py = sess.run([cost, accuracy, validate_summary_op, global_step, self.alpha, true_y, pred_y],
                                                            feed_dict=test)
                    acc += _acc
                    loss += _loss * num
                    cnt += num
                    if flag:
                        summary = _summary
                        step = _step
                        flag = False
                        alpha = alpha
                        ty = ty
                        py = py
                print('all samples={}, correct prediction={}'.format(cnt, acc))
                test_summary_writer.add_summary(summary, step)
                saver.save(sess, save_dir, global_step=step)
                print('Iter {}: mini-batch loss={:.6f}, test acc={:.6f}'.format(i, loss / cnt, acc / cnt))
                if acc / cnt > max_acc:
                    max_acc = acc / cnt
                    max_alpha = alpha
                    max_ty = ty
                    max_py = py

            print('Optimization Finished! Max acc={}'.format(max_acc))
            fp = open('weight.txt', 'w')
            for y1, y2, ws in zip(max_ty, max_py, max_alpha):
                fp.write(str(y1) + ' ' + str(y2) + ' ' + ' '.join([str(w) for w in ws]) + '\n')

            print('Learning_rate={}, iter_num={}, batch_size={}, hidden_num={}, l2={}'.format(
                self.learning_rate,
                self.n_iter,
                self.batch_size,
                self.n_hidden,
                self.l2_reg
            ))