Beispiel #1
0
    def build_graph(self):
        graph = TextCnn(self.mode)
        output = graph.build(self.input_data)

        self._build_prediction(output)
        if self.mode != tf.estimator.ModeKeys.PREDICT:
            self._build_loss(output)
            self._build_optimizer()
            self._build_metric()
def model_fn(features, labels, mode, params):

    word_embeddings = tf.contrib.layers.embed_sequence(
        features['instances'],
        vocab_size=N_WORDS,
        embed_dim=config.model['embed_dim'])
    text_cnn = TextCnn(mode=mode)
    word_embeddings = tf.expand_dims(word_embeddings, -1)
    output = text_cnn.build(word_embeddings)
    if mode == tf.estimator.ModeKeys.PREDICT:
        probabilities = tf.nn.softmax(output)
        predicted_indices = tf.argmax(probabilities, axis=1)
        predictions = {
            'class': tf.gather(TARGET_LABELS, predicted_indices),
            'probabilities': output
        }
        export_outputs = {
            'prediction': tf.estimator.export.PredictOutput(predictions)
        }
        return tf.estimator.EstimatorSpec(mode,
                                          predictions=predictions,
                                          export_outputs=export_outputs)
    labels_one_hot = tf.one_hot(labels,
                                depth=len(TARGET_LABELS),
                                on_value=True,
                                off_value=False,
                                dtype=tf.bool)
    print("==========================================")
    print(labels_one_hot)
    loss = tf.losses.softmax_cross_entropy(labels_one_hot,
                                           output,
                                           scope="loss")
    tf.summary.scalar('loss', loss)

    if mode == tf.estimator.ModeKeys.TRAIN:
        optimizer = tf.train.AdamOptimizer(config.train['learning_rate'])
        train_op = optimizer.minimize(loss=loss,
                                      global_step=tf.train.get_global_step())

        return tf.estimator.EstimatorSpec(mode=mode,
                                          loss=loss,
                                          train_op=train_op)
    if mode == tf.estimator.ModeKeys.EVAL:
        probabilities = tf.nn.softmax(output)
        predicted_indices = tf.argmax(probabilities, 1)

        eval_metric_ops = {
            'accuracy': tf.metrics.accuracy(labels, predicted_indices),
            'auroc': tf.metrics.auc(labels_one_hot, probabilities)
        }

        return tf.estimator.EstimatorSpec(mode,
                                          loss=loss,
                                          eval_metric_ops=eval_metric_ops)
Beispiel #3
0
    'embed_size': 128,
    'kernel_size': [3, 4, 5],
    'n_filters': 50,
    'top_k': 1,
    'lr': 1e-3
}
if __name__ == '__main__':

    # =============================to pd===================================#

    ckpt_dir = "./output/ckpt"
    word_id = pickle.load(open("./word_id.pkl", 'rb'))

    config['vocab_size'] = len(word_id)
    with tf.Session() as sess:
        model = TextCnn(**config)
        cpkt = tf.train.get_checkpoint_state(ckpt_dir)
        if cpkt and cpkt.model_checkpoint_path:
            print(cpkt.model_checkpoint_path)
            model.saver.restore(sess, cpkt.model_checkpoint_path)

        constant_graph = tf.graph_util.convert_variables_to_constants(sess, sess.graph_def,
                                                                      ['acc/pred', "fc/logits/BiasAdd"])

        # 写入序列化的 PB 文件
        with tf.gfile.FastGFile("./output/pd/" + 'txt_clf.pb', mode='wb') as f:
            f.write(constant_graph.SerializeToString())
    #
    # =============================predict===================================#

    sess = tf.Session()
Beispiel #4
0
    train_x, test_x = list(data['comm'][:train_size]), list(
        data['comm'][train_size:])
    train_y, test_y = list(data['label'][:train_size]), list(
        data['label'][train_size:])
    config['vocab_size'] = len(word_id)

    summary_train_dir = "./output/summary/train"
    summary_dev_dir = "./output/summary/dev"
    ckpt_dir = "./output/ckpt/txt_clf"

    writer_summary_train = tf.summary.FileWriter(summary_train_dir)
    writer_summary_dev = tf.summary.FileWriter(summary_dev_dir)

    epoch = 14000
    with tf.Session() as sess:
        model = TextCnn(**config)
        cpkt = tf.train.get_checkpoint_state(ckpt_dir)
        if cpkt and cpkt.model_checkpoint_path:
            model.saver.restore(sess, cpkt.model_checkpoint_path)
        writer_summary_train.add_graph(sess.graph)
        sess.run(tf.global_variables_initializer())
        f1_max = 0
        f1_count = 0
        for i in range(epoch):
            for item in generate_batch(train_x, train_y, word_id=word_id):
                global_step, loss_train, acc_train, summary_train = model.train(
                    sess, batch_x=item[0], batch_y=item[1])

                writer_summary_train.add_summary(summary_train, global_step)

                if global_step % 100 == 0: