Beispiel #1
0
def evaluate():
    '''テストデータに対する評価を実施する関数
  '''
    data = InputData(test_data_path=FLAGS.test_data, train=False)
    input_ph = tf.placeholder(tf.int32, [None, data.max_len])
    training_ph = tf.placeholder(tf.bool, [])
    label_ph = tf.placeholder(tf.float32, [None, data.num_category])
    with tf.Session() as sess:
        output = convolution(input_ph, training_ph, data.num_chars,
                             data.num_category)
        values, indices = tf.nn.top_k(output, k=10)
        saver = tf.train.Saver()
        load_checkpoint(sess, saver)
        with open(FLAGS.output_dir + '/evaluate.tsv', 'w') as f:
            writer = csv.writer(f, delimiter='\t')
            for test_labels, test_texts, unique_ids, item_names in data.next_batch_evaluation_data(
            ):
                values_, indices_ = sess.run([values, indices],
                                             feed_dict={
                                                 input_ph: test_texts,
                                                 training_ph: False
                                             })
                for (value, index, test_label, unique_id,
                     item_name) in zip(values_, indices_, test_labels,
                                       unique_ids, item_names):
                    row = [unique_id] + [
                        data.category_dict[np.argmax(test_label)]
                    ] + [data.chars_to_unknown(item_name)
                         ] + list(value) + list(
                             map(lambda x: data.category_dict[x], index)) + [
                                 index[0] == np.argmax(test_label)
                             ] + [np.argmax(test_label) in index[0:3]]
                    writer.writerow(row)

        num_records = len(
            open(FLAGS.output_dir + '/evaluate.tsv', 'r').readlines())
        with open(FLAGS.output_dir + '/evaluate.tsv', 'r') as f:
            reader = csv.reader(f, delimiter='\t')
            accuracy_count = [(line[23], line[24]) for line in reader]
            accuracy_top1 = len(
                list(filter(lambda x: x[0] == 'True',
                            accuracy_count))) / num_records
            accuracy_top3 = len(
                list(filter(lambda x: x[1] == 'True',
                            accuracy_count))) / num_records

        with open(FLAGS.output_dir + '/test_accuracy.tsv', 'w') as f:
            writer = csv.writer(f, delimiter='\t')
            writer.writerow([accuracy_top1, accuracy_top3])
Beispiel #2
0
def train():
    '''訓練を実施する関数
  '''
    with tf.Graph().as_default():
        print('Loading data...')
        data = InputData(train_data_path=FLAGS.train_data,
                         validation_data_path=FLAGS.validation_data,
                         batch_size=FLAGS.batch_size)
        input_ph = tf.placeholder(tf.int32, [None, data.max_len])
        training_ph = tf.placeholder(tf.bool, [])
        label_ph = tf.placeholder(tf.float32, [None, data.num_category])
        # summary用のplace holder
        summary_ph = tf.placeholder(tf.float32, [3])
        config = tf.ConfigProto(gpu_options=tf.GPUOptions(allow_growth=True))

        with tf.Session(config=config) as sess:
            convolution_op = convolution(input_ph, training_ph, data.num_chars,
                                         data.num_category)
            loss_op = loss(convolution_op, label_ph)
            train_op = minimize(loss_op)
            accuracy_op = accuracy(convolution_op, label_ph)
            top_3_accuracy_op = top_3_accuracy(convolution_op, label_ph)
            summary_writer = tf.summary.FileWriter(FLAGS.log_dir, sess.graph)
            write_validation_summary_op = write_validation_summary(summary_ph)

            for vals in tf.trainable_variables():
                tf.summary.histogram(vals.name, vals)

            summary_op = tf.summary.merge_all()
            saver = tf.train.Saver()
            load_checkpoint(sess, saver)
            for i in range(FLAGS.max_step):

                label_, text_ = data.next_batch()
                _ = sess.run(train_op,
                             feed_dict={
                                 input_ph: text_,
                                 label_ph: label_,
                                 training_ph: True
                             })
                if i % 100 == 0:
                    loss_, accuracy_, top_3_accuracy_ = sess.run(
                        [loss_op, accuracy_op, top_3_accuracy_op],
                        feed_dict={
                            input_ph: text_,
                            label_ph: label_,
                            training_ph: False
                        })
                    print(
                        'global step: %04d, train loss: %01.7f, train accuracy_top_1 %01.5f train accuracy_top_3 %01.5f'
                        % (i, loss_, accuracy_, top_3_accuracy_))

                if i % 1000 == 0:
                    validation_loss = []
                    validation_top_3_accuracy = []
                    validation_top_1_accuracy = []
                    for validation_label, validation_text in data.next_batch_evaluation_data(
                    ):
                        loss_, top_3_accuracy_, accuracy_ = sess.run(
                            [loss_op, top_3_accuracy_op, accuracy_op],
                            feed_dict={
                                input_ph: validation_text,
                                label_ph: validation_label,
                                training_ph: False
                            })
                        validation_loss.append(loss_)
                        validation_top_1_accuracy.append(accuracy_)
                        validation_top_3_accuracy.append(top_3_accuracy_)
                    loss_ = sum(validation_loss) / len(validation_loss)
                    accuracy_ = sum(validation_top_1_accuracy) / len(
                        validation_top_1_accuracy)
                    top_3_accuracy_ = sum(validation_top_3_accuracy) / len(
                        validation_top_3_accuracy)
                    print(
                        'Validation loss: %s validation accuracy_top_1: %01.5f validation accuracy_top_3: %01.5f'
                        % (loss_, accuracy_, top_3_accuracy_))
                    saver.save(sess, FLAGS.checkpoint_dir, global_step=i)
                    # サマリー出力
                    _, summary_str = sess.run(
                        [write_validation_summary_op, summary_op],
                        feed_dict={
                            input_ph: text_,
                            label_ph: label_,
                            training_ph: False,
                            summary_ph: [loss_, accuracy_, top_3_accuracy_]
                        })
                    summary_writer.add_summary(summary_str, i)
                    ru = resource.getrusage(resource.RUSAGE_SELF)
                    print('Max memory usage(byte): ' + str(ru.ru_maxrss))