예제 #1
0
                    et_number_of_bins=cfg["features"]["binned"],
                    et_embedding_dimension=FLAGS.et_embedding_dimension,
                    with_eeg=cfg["features"]["eeg"],
                    eeg_features_size=eeg.shape[2],
                    use_normalization_layer=True)

                cnn = TextCNN(
                    sequence_length=x_train.shape[1],
                    num_classes=y_train.shape[1],
                    vocab_size=len(vocab_processor.vocabulary_),
                    embedding_size=embedding_dimension,
                    filter_sizes=list(map(int, FLAGS.filter_sizes.split(","))),
                    num_filters=FLAGS.num_filters,
                    l2_reg_lambda=FLAGS.l2_reg_lambda,
                    weights=class_weights_for_cross_entropy,
                    rel_pos_embedding_size=FLAGS.rel_pos_embedding_size,
                    rel_pos_cardinality=relative_positions_cardinality,
                    pos_tags_embedding_size=FLAGS.pos_tags_embedding_size,
                    pos_tags_cardinality=pos_tags_cardinality,
                    with_eye_tracking=cfg["features"]["gaze"],
                    et_features_size=et.shape[2],
                    et_number_of_bins=cfg["features"]["binned"],
                    et_embedding_dimension=FLAGS.et_embedding_dimension,
                    with_eeg=cfg["features"]["eeg"],
                    eeg_features_size=eeg.shape[2])

                # Define Training procedure
                global_steps = [
                    tf.Variable(0, name="global_step_1", trainable=False),
                    tf.Variable(0, name="global_step_2", trainable=False)
                ]
예제 #2
0
y_train, y_dev = y_shuffled[:dev_sample_index], y_shuffled[dev_sample_index:]
print("Train/Dev split: {:d}/{:d}".format(len(y_train), len(y_dev)))

# Training
# =======================================================

with tf.Graph().as_default():
    session_conf = tf.ConfigProto(
        allow_soft_placement=FLAGS.allow_soft_placement,
        log_device_placement=FLAGS.log_device_placement)
    sess = tf.Session(config=session_conf)
    with sess.as_default():
        cnn = TextCNN(sequence_length=x_train.shape[1],
                      num_classes=y_train.shape[1],
                      embedding_size=FLAGS.embedding_dim,
                      filter_sizes=list(map(int,
                                            FLAGS.filter_sizes.split(","))),
                      num_filters=FLAGS.num_filters,
                      l2_reg_lambda=FLAGS.l2_reg_lambda)

        # Define Training procedure
        global_step = tf.Variable(0, name="global_step", trainable=False)
        optimizer = tf.train.AdamOptimizer(1e-3)
        grads_and_vars = optimizer.compute_gradients(cnn.loss)
        train_op = optimizer.apply_gradients(grads_and_vars,
                                             global_step=global_step)

        # Keep track of gradient values and sparsity (optional)
        grad_summaries = []
        for g, v in grads_and_vars:
            if g is not None:
예제 #3
0
# print("Train/Dev split: {:d}/{:d}".format(len(y_train), len(y_dev))) # 打印切分的比例

# Training
# ==================================================

with tf.Graph().as_default():
    session_conf = tf.ConfigProto(
        allow_soft_placement=FLAGS.allow_soft_placement,
        log_device_placement=FLAGS.log_device_placement)
    sess = tf.Session(config=session_conf)
    with sess.as_default():
        cnn = TextCNN(
            sequence_length=x.shape[1],
            num_classes=y.shape[1],
            vocab_size=len(vocab_processor.vocabulary_),  # 计算单词的数目
            embedding_size=FLAGS.embedding_dim,
            filter_sizes=list(map(int, FLAGS.filter_sizes.split(
                ","))),  # 上面定义的filter_sizes拿过来,"3,4,5"按","分割
            num_filters=FLAGS.num_filters,
            l2_reg_lambda=FLAGS.l2_reg_lambda,  # l2正则化项
            trainable=not FLAGS.static)

        # Define Training procedure
        global_step = tf.Variable(0, name="global_step",
                                  trainable=False)  # 首先将step定义为变量初始化为0
        optimizer = tf.train.AdamOptimizer(1e-3)  # 定义优化器使用adam优化
        grads_and_vars = optimizer.compute_gradients(
            cnn.loss
        )  # 将使用卷积神经网络计算出来的损失函数最小化。 该方法会返回list[(gradients,variable)]
        train_op = optimizer.apply_gradients(
            grads_and_vars,
            global_step=global_step)  # 使用优化器更新参数,每进行一次参数更新就加一次global step
예제 #4
0
def train(x_train, y_train, vocab_processor, x_dev, y_dev):
    # Training
    # ==================================================

    with tf.Graph().as_default():
        session_conf = tf.ConfigProto(
            allow_soft_placement=FLAGS.allow_soft_placement,
            log_device_placement=FLAGS.log_device_placement)
        sess = tf.Session(config=session_conf)
        with sess.as_default():
            cnn = TextCNN(sequence_length=x_train.shape[1],
                          num_classes=y_train.shape[1],
                          vocab_size=len(vocab_processor.vocabulary_),
                          embedding_size=FLAGS.embedding_dim,
                          filter_sizes=list(
                              map(int, FLAGS.filter_sizes.split(","))),
                          num_filters=FLAGS.num_filters,
                          l2_reg_lambda=FLAGS.l2_reg_lambda)

            # Define Training procedure
            global_step = tf.Variable(0, name="global_step", trainable=False)
            optimizer = tf.train.AdamOptimizer(1e-3)
            grads_and_vars = optimizer.compute_gradients(cnn.loss)
            train_op = optimizer.apply_gradients(grads_and_vars,
                                                 global_step=global_step)

            # Keep track of gradient values and sparsity (optional)
            grad_summaries = []
            for g, v in grads_and_vars:
                if g is not None:
                    grad_hist_summary = tf.summary.histogram(
                        "{}/grad/hist".format(v.name), g)
                    sparsity_summary = tf.summary.scalar(
                        "{}/grad/sparsity".format(v.name),
                        tf.nn.zero_fraction(g))
                    grad_summaries.append(grad_hist_summary)
                    grad_summaries.append(sparsity_summary)
            grad_summaries_merged = tf.summary.merge(grad_summaries)

            # Output directory for models and summaries
            timestamp = str(int(time.time()))
            out_dir = os.path.abspath(
                os.path.join(os.path.curdir, "runs", timestamp))
            print("Writing to {}\n".format(out_dir))

            # Summaries for loss and accuracy
            loss_summary = tf.summary.scalar("loss", cnn.loss)
            acc_summary = tf.summary.scalar("accuracy", cnn.accuracy)

            # Train Summaries
            train_summary_op = tf.summary.merge(
                [loss_summary, acc_summary, grad_summaries_merged])
            train_summary_dir = os.path.join(out_dir, "summaries", "train")
            train_summary_writer = tf.summary.FileWriter(
                train_summary_dir, sess.graph)

            # Dev summaries
            dev_summary_op = tf.summary.merge([loss_summary, acc_summary])
            dev_summary_dir = os.path.join(out_dir, "summaries", "dev")
            dev_summary_writer = tf.summary.FileWriter(dev_summary_dir,
                                                       sess.graph)

            # Checkpoint directory. Tensorflow assumes this directory already exists so we need to create it
            checkpoint_dir = os.path.abspath(
                os.path.join(out_dir, "checkpoints"))
            checkpoint_prefix = os.path.join(checkpoint_dir, "model")
            if not os.path.exists(checkpoint_dir):
                os.makedirs(checkpoint_dir)
            saver = tf.train.Saver(tf.global_variables(),
                                   max_to_keep=FLAGS.num_checkpoints)

            # Write vocabulary
            vocab_processor.save(os.path.join(out_dir, "vocab"))

            # Initialize all variables
            print(
                "SESS PRINT==========================================================="
            )
            print(sess.run(tf.global_variables_initializer()))

            def train_step(x_batch, y_batch):
                """
                A single training step
                """
                feed_dict = {
                    cnn.input_x: x_batch,
                    cnn.input_y: y_batch,
                    cnn.dropout_keep_prob: FLAGS.dropout_keep_prob
                }
                _, step, summaries, loss, accuracy = sess.run([
                    train_op, global_step, train_summary_op, cnn.loss,
                    cnn.accuracy
                ], feed_dict)
                time_str = datetime.datetime.now().isoformat()
                print("{}: step {}, loss {:g}, acc {:g}".format(
                    time_str, step, loss, accuracy))
                train_summary_writer.add_summary(summaries, step)

            def dev_step(x_batch, y_batch, writer=None):
                """
                Evaluates model on a dev set
                """
                feed_dict = {
                    cnn.input_x: x_batch,
                    cnn.input_y: y_batch,
                    cnn.dropout_keep_prob: 1.0
                }
                step, summaries, loss, accuracy = sess.run(
                    [global_step, dev_summary_op, cnn.loss, cnn.accuracy],
                    feed_dict)
                time_str = datetime.datetime.now().isoformat()
                print("{}: step {}, loss {:g}, acc {:g}".format(
                    time_str, step, loss, accuracy))
                if writer:
                    writer.add_summary(summaries, step)

                accuracies.append(accuracy)
                steps.append(step)

            # Generate batches
            batches = data_helpers.batch_iter(list(zip(x_train, y_train)),
                                              FLAGS.batch_size,
                                              FLAGS.num_epochs)
            # Training loop. For each batch...
            for batch in batches:
                x_batch, y_batch = zip(*batch)
                train_step(x_batch, y_batch)
                current_step = tf.train.global_step(sess, global_step)
                if current_step % FLAGS.evaluate_every == 0:
                    print("\nEvaluation:")
                    dev_step(x_dev, y_dev, writer=dev_summary_writer)
                    print("")
                if current_step % FLAGS.checkpoint_every == 0:
                    path = saver.save(sess,
                                      checkpoint_prefix,
                                      global_step=current_step)
                    print("Saved model checkpoint to {}\n".format(path))
예제 #5
0
import data_helpers
import os
import time
import tensorflow as tf
import datetime

with tf.Graph().as_default():
    start_time = time.time()
    session_conf = tf.ConfigProto(
        allow_soft_placement=CNN.FLAGS.allow_soft_placement,
        log_device_placement=CNN.FLAGS.log_device_placement)
    sess = tf.Session(config=session_conf)
    with sess.as_default():
        cnn = TextCNN(filter_sizes=list(
            map(int, CNN.FLAGS.filter_sizes.split(","))),
                      num_filters=CNN.FLAGS.num_filters,
                      vec_shape=(4, CNN.FLAGS.embedding_size),
                      l2_reg_lambda=CNN.FLAGS.l2_reg_lambda)
        # Define Training procedure
        global_step = tf.Variable(0, name="global_step", trainable=False)
        optimizer = tf.train.AdamOptimizer(1e-3)
        grads_and_vars = optimizer.compute_gradients(cnn.loss)
        train_op = optimizer.apply_gradients(grads_and_vars,
                                             global_step=global_step)

        # Keep track of gradient values and sparsity (optional)
        grad_summaries = []
        for g, v in grads_and_vars:
            if g is not None:
                grad_hist_summary = tf.histogram_summary(
                    "{}/grad/hist".format(v.name), g)
예제 #6
0
print("Vocabulary Size: {:d}".format(len(vocabulary)))
print("Train/Dev split: {:d}/{:d}".format(len(y_train), len(y_dev)))

# Training
# ==================================================

with tf.Graph().as_default():
    session_conf = tf.ConfigProto(
        allow_soft_placement=FLAGS.allow_soft_placement,
        log_device_placement=FLAGS.log_device_placement)
    sess = tf.Session(config=session_conf)
    with sess.as_default():
        cnn = TextCNN(sequence_length=x_train.shape[1],
                      num_classes=2,
                      vocab_size=len(vocabulary),
                      embedding_size=FLAGS.embedding_dim,
                      filter_sizes=map(int, FLAGS.filter_sizes.split(",")),
                      num_filters=FLAGS.num_filters,
                      l2_reg_lambda=FLAGS.l2_reg_lambda)

        # Define Training procedure
        global_step = tf.Variable(0, name="global_step", trainable=False)
        optimizer = tf.train.AdamOptimizer(1e-4)
        grads_and_vars = optimizer.compute_gradients(cnn.loss)
        train_op = optimizer.apply_gradients(grads_and_vars,
                                             global_step=global_step)

        # Keep track of gradient values and sparsity (optional)
        grad_summaries = []
        for g, v in grads_and_vars:
            if g is not None:
예제 #7
0
def train_cnn():
    """Step 0: load sentences, labels, and training parameters"""
    dataset = './dataset'
    x_raw, y_raw, df, labels = data_helper.load_data_and_labels(dataset)

    parameter_file = "./parameters.json"
    params = json.loads(open(parameter_file).read())
    """Step 1: pad each sentence to the same length and map each word to an id"""
    max_document_length = max([len(x.split(' ')) for x in x_raw])
    logging.info(
        'The maximum length of all sentences: {}'.format(max_document_length))
    vocab_processor = learn.preprocessing.VocabularyProcessor(
        max_document_length)
    x = np.array(list(vocab_processor.fit_transform(x_raw)))
    y = np.array(y_raw)
    """Step 2: split the original dataset into train and test sets"""
    x_, x_test, y_, y_test = train_test_split(x,
                                              y,
                                              test_size=0.1,
                                              random_state=42)
    """Step 3: shuffle the train set and split the train set into train and dev sets"""
    shuffle_indices = np.random.permutation(np.arange(len(y_)))
    x_shuffled = x_[shuffle_indices]
    y_shuffled = y_[shuffle_indices]
    x_train, x_dev, y_train, y_dev = train_test_split(x_shuffled,
                                                      y_shuffled,
                                                      test_size=0.1)
    """Step 4: save the labels into labels.json since predict.py needs it"""
    with open('./labels.json', 'w') as outfile:
        json.dump(labels, outfile, indent=4)
    outfile.close()

    timestamp = str(int(time.time()))
    model_folder_name = timestamp
    if not os.path.exists("trained_model_" + model_folder_name):
        os.mkdir("trained_model_" + model_folder_name)
    out_dir = os.path.abspath(
        os.path.join(os.path.curdir, "trained_model_" + model_folder_name))
    os.rename("./labels.json",
              "./trained_model_" + model_folder_name + "/labels.json")
    """input category name and save category.json"""
    cateName = input("카테고리 대분류 명을 입력해 주세요(한글입력) : ")
    cateJson = {'name': cateName}
    with open('./category.json', 'w', encoding='utf-8') as cateFile:
        json.dump(cateJson, cateFile, indent=4, ensure_ascii=False)
    os.rename("./category.json",
              "./trained_model_" + model_folder_name + "/category.json")
    cateFile.close()

    logging.info('x_train: {}, x_dev: {}, x_test: {}'.format(
        len(x_train), len(x_dev), len(x_test)))
    logging.info('y_train: {}, y_dev: {}, y_test: {}'.format(
        len(y_train), len(y_dev), len(y_test)))
    """Step 5: build a graph and cnn object"""
    graph = tf.Graph()
    with graph.as_default():
        session_conf = tf.ConfigProto(allow_soft_placement=True,
                                      log_device_placement=False)
        sess = tf.Session(config=session_conf)
        with sess.as_default():
            cnn = TextCNN(sequence_length=x_train.shape[1],
                          num_classes=y_train.shape[1],
                          vocab_size=len(vocab_processor.vocabulary_),
                          embedding_size=params['embedding_dim'],
                          filter_sizes=list(
                              map(int, params['filter_sizes'].split(","))),
                          num_filters=params['num_filters'],
                          l2_reg_lambda=params['l2_reg_lambda'])

            global_step = tf.Variable(0, name="global_step", trainable=False)
            optimizer = tf.train.AdamOptimizer(1e-3)
            grads_and_vars = optimizer.compute_gradients(cnn.loss)
            train_op = optimizer.apply_gradients(grads_and_vars,
                                                 global_step=global_step)

            checkpoint_dir = os.path.abspath(
                os.path.join(out_dir, "checkpoints"))
            checkpoint_prefix = os.path.join(checkpoint_dir, "model")
            if not os.path.exists(checkpoint_dir):
                os.makedirs(checkpoint_dir)
            saver = tf.train.Saver(tf.global_variables())

            # One training step: train the model with one batch
            def train_step(x_batch, y_batch):
                feed_dict = {
                    cnn.input_x: x_batch,
                    cnn.input_y: y_batch,
                    cnn.dropout_keep_prob: params['dropout_keep_prob']
                }
                _, step, loss, acc = sess.run(
                    [train_op, global_step, cnn.loss, cnn.accuracy], feed_dict)

            # One evaluation step: evaluate the model with one batch
            def dev_step(x_batch, y_batch):
                feed_dict = {
                    cnn.input_x: x_batch,
                    cnn.input_y: y_batch,
                    cnn.dropout_keep_prob: 1.0
                }
                step, loss, acc, num_correct = sess.run(
                    [global_step, cnn.loss, cnn.accuracy, cnn.num_correct],
                    feed_dict)
                return num_correct

            # Save the word_to_id map since predict.py needs it
            vocab_processor.save(os.path.join(out_dir, "vocab.pickle"))
            sess.run(tf.global_variables_initializer())

            # Training starts here
            train_batches = data_helper.batch_iter(list(zip(x_train, y_train)),
                                                   params['batch_size'],
                                                   params['num_epochs'])
            best_accuracy, best_at_step = 0, 0
            """Step 6: train the cnn model with x_train and y_train (batch by batch)"""
            for train_batch in train_batches:
                x_train_batch, y_train_batch = zip(*train_batch)
                train_step(x_train_batch, y_train_batch)
                current_step = tf.train.global_step(sess, global_step)
                """Step 6.1: evaluate the model with x_dev and y_dev (batch by batch)"""
                if current_step % params['evaluate_every'] == 0:
                    dev_batches = data_helper.batch_iter(
                        list(zip(x_dev, y_dev)), params['batch_size'], 1)
                    total_dev_correct = 0
                    for dev_batch in dev_batches:
                        x_dev_batch, y_dev_batch = zip(*dev_batch)
                        num_dev_correct = dev_step(x_dev_batch, y_dev_batch)
                        total_dev_correct += num_dev_correct

                    dev_accuracy = float(total_dev_correct) / len(y_dev)
                    logging.critical(
                        'Accuracy on dev set: {}'.format(dev_accuracy))
                    """Step 6.2: save the model if it is the best based on accuracy on dev set"""
                    if dev_accuracy >= best_accuracy:
                        best_accuracy, best_at_step = dev_accuracy, current_step
                        saving_path = ""
                        path = saver.save(sess,
                                          checkpoint_prefix,
                                          global_step=current_step)
                        logging.critical('Saved model at {} at step {}'.format(
                            path, best_at_step))
                        logging.critical(
                            'Best accuracy is {} at step {}'.format(
                                best_accuracy, best_at_step))
                        checkpointFile = open(
                            './trained_model_' + model_folder_name +
                            '/checkpoints/checkpoint', 'w')
                        checkpointFile.write(
                            "model_checkpoint_path: \"model-" +
                            str(best_at_step) + "\"")
                        checkpointFile.close()
            """Step 7: predict x_test (batch by batch)"""
            test_batches = data_helper.batch_iter(list(zip(x_test, y_test)),
                                                  params['batch_size'], 1)
            total_test_correct = 0
            for test_batch in test_batches:
                x_test_batch, y_test_batch = zip(*test_batch)
                num_test_correct = dev_step(x_test_batch, y_test_batch)
                total_test_correct += num_test_correct
            test_accuracy = float(total_test_correct) / len(y_test)
            logging.critical(
                'Accuracy on test set is {} based on the best model {}'.format(
                    test_accuracy, path))
            logging.critical('The training is complete')
def train_cnn():
    """Training CNN model."""

    # Load sentences, labels, and training parameters
    logger.info('✔︎ Loading data...')

    logger.info('✔︎ Training data processing...')
    train_data = data_helpers.load_data_and_labels(FLAGS.training_data_file,
                                                   FLAGS.embedding_dim)

    logger.info('✔︎ Validation data processing...')
    validation_data = data_helpers.load_data_and_labels(
        FLAGS.validation_data_file, FLAGS.embedding_dim)

    logger.info('Recommand padding Sequence length is: {}'.format(
        FLAGS.pad_seq_len))

    logger.info('✔︎ Training data padding...')
    x_train_front, x_train_behind, y_train = data_helpers.pad_data(
        train_data, FLAGS.pad_seq_len)

    logger.info('✔︎ Validation data padding...')
    x_validation_front, x_validation_behind, y_validation = data_helpers.pad_data(
        validation_data, FLAGS.pad_seq_len)

    # Build vocabulary
    VOCAB_SIZE = data_helpers.load_vocab_size(FLAGS.embedding_dim)
    pretrained_word2vec_matrix = data_helpers.load_word2vec_matrix(
        VOCAB_SIZE, FLAGS.embedding_dim)

    # Build a graph and cnn object
    with tf.Graph().as_default():
        session_conf = tf.ConfigProto(
            allow_soft_placement=FLAGS.allow_soft_placement,
            log_device_placement=FLAGS.log_device_placement)
        session_conf.gpu_options.allow_growth = FLAGS.gpu_options_allow_growth
        sess = tf.Session(config=session_conf)
        with sess.as_default():
            cnn = TextCNN(sequence_length=FLAGS.pad_seq_len,
                          num_classes=y_train.shape[1],
                          vocab_size=VOCAB_SIZE,
                          embedding_size=FLAGS.embedding_dim,
                          embedding_type=FLAGS.embedding_type,
                          filter_sizes=list(
                              map(int, FLAGS.filter_sizes.split(","))),
                          num_filters=FLAGS.num_filters,
                          l2_reg_lambda=FLAGS.l2_reg_lambda,
                          pretrained_embedding=pretrained_word2vec_matrix)

            # Define Training procedure
            global_step = tf.Variable(0, name="global_step", trainable=False)
            optimizer = tf.train.AdamOptimizer(1e-3)
            grads_and_vars = optimizer.compute_gradients(cnn.loss)
            train_op = optimizer.apply_gradients(grads_and_vars,
                                                 global_step=global_step,
                                                 name="train_op")

            # Keep track of gradient values and sparsity (optional)
            grad_summaries = []
            for g, v in grads_and_vars:
                if g is not None:
                    grad_hist_summary = tf.summary.histogram(
                        "{}/grad/hist".format(v.name), g)
                    sparsity_summary = tf.summary.scalar(
                        "{}/grad/sparsity".format(v.name),
                        tf.nn.zero_fraction(g))
                    grad_summaries.append(grad_hist_summary)
                    grad_summaries.append(sparsity_summary)
            grad_summaries_merged = tf.summary.merge(grad_summaries)

            # Output directory for models and summaries
            timestamp = str(int(time.time()))
            out_dir = os.path.abspath(
                os.path.join(os.path.curdir, "runs", timestamp))
            logger.info("✔︎ Writing to {}\n".format(out_dir))

            # Summaries for loss and accuracy
            loss_summary = tf.summary.scalar("loss", cnn.loss)
            acc_summary = tf.summary.scalar("accuracy", cnn.accuracy)

            # Train Summaries
            train_summary_op = tf.summary.merge(
                [loss_summary, acc_summary, grad_summaries_merged])
            train_summary_dir = os.path.join(out_dir, "summaries", "train")
            train_summary_writer = tf.summary.FileWriter(
                train_summary_dir, sess.graph)

            # Validation summaries
            validation_summary_op = tf.summary.merge(
                [loss_summary, acc_summary])
            validation_summary_dir = os.path.join(out_dir, "summaries",
                                                  "validation")
            validation_summary_writer = tf.summary.FileWriter(
                validation_summary_dir, sess.graph)

            # Checkpoint directory. Tensorflow assumes this directory already exists so we need to create it
            checkpoint_dir = os.path.abspath(
                os.path.join(out_dir, "checkpoints"))
            checkpoint_prefix = os.path.join(checkpoint_dir, "model")
            if not os.path.exists(checkpoint_dir):
                os.makedirs(checkpoint_dir)
            saver = tf.train.Saver(tf.global_variables(),
                                   max_to_keep=FLAGS.num_checkpoints)

            # Initialize all variables
            sess.run(tf.global_variables_initializer())
            sess.run(tf.local_variables_initializer())

            def train_step(x_batch_front, x_batch_behind, y_batch):
                """A single training step"""
                feed_dict = {
                    cnn.input_x_front: x_batch_front,
                    cnn.input_x_behind: x_batch_behind,
                    cnn.input_y: y_batch,
                    cnn.dropout_keep_prob: FLAGS.dropout_keep_prob
                }
                _, step, summaries, loss, accuracy = sess.run([
                    train_op, global_step, train_summary_op, cnn.loss,
                    cnn.accuracy
                ], feed_dict)
                time_str = datetime.datetime.now().isoformat()
                logger.info("{}: step {}, loss {:g}, acc {:g}".format(
                    time_str, step, loss, accuracy))
                train_summary_writer.add_summary(summaries, step)

            def validation_step(x_batch_front,
                                x_batch_behind,
                                y_batch,
                                writer=None):
                """Evaluates model on a validation set"""
                feed_dict = {
                    cnn.input_x_front: x_batch_front,
                    cnn.input_x_behind: x_batch_behind,
                    cnn.input_y: y_batch,
                    cnn.dropout_keep_prob: 1.0
                }
                step, summaries, scores, predictions, num_correct, \
                loss, accuracy, recall, precision, f1, auc, topKPreds, = sess.run(
                    [global_step, validation_summary_op, cnn.scores, cnn.predictions, cnn.num_correct,
                     cnn.loss, cnn.accuracy, cnn.recall, cnn.precision, cnn.F1, cnn.AUC, cnn.topKPreds], feed_dict)
                time_str = datetime.datetime.now().isoformat()
                logger.info(
                    "{}: step {}, loss {:g}, acc {:g}, "
                    "recall {:g}, precision {:g}, f1 {:g}, AUC {}".format(
                        time_str, step, loss, accuracy, recall, precision, f1,
                        auc))
                if writer:
                    writer.add_summary(summaries, step)

            # Generate batches
            batches = data_helpers.batch_iter(
                list(zip(x_train_front, x_train_behind, y_train)),
                FLAGS.batch_size, FLAGS.num_epochs)

            # Training loop. For each batch...
            for batch in batches:
                x_batch_front, x_batch_behind, y_batch = zip(*batch)
                train_step(x_batch_front, x_batch_behind, y_batch)
                current_step = tf.train.global_step(sess, global_step)
                if current_step % FLAGS.evaluate_every == 0:
                    logger.info("\nEvaluation:")
                    validation_step(x_validation_front,
                                    x_validation_behind,
                                    y_validation,
                                    writer=validation_summary_writer)
                if current_step % FLAGS.checkpoint_every == 0:
                    path = saver.save(sess,
                                      checkpoint_prefix,
                                      global_step=current_step)
                    logger.info(
                        "✔︎ Saved model checkpoint to {}\n".format(path))

    logger.info("✔︎ Done.")
def train_cnn():
    """Step 0: load sentences, labels, and training parameters"""
    train_file = 'Data/iseardataset.csv'
    x_raw, y_raw, df, labels, embedding_mat = data_helper.load_data_and_labels(
        train_file)

    parameter_file = './parameters.json'
    params = json.loads(open(parameter_file).read())
    """Step 1: pad each sentence to the same length and map each word to an id"""
    max_document_length = max([len(x.split(' ')) for x in x_raw])
    logging.info(
        'The maximum length of all sentences: {}'.format(max_document_length))
    vocab_processor = learn.preprocessing.VocabularyProcessor(
        max_document_length)
    x = np.array(list(vocab_processor.fit_transform(x_raw)))
    y = np.array(y_raw)

    # print x.shape
    """Step 2: split the original dataset into train and test sets"""
    x_, x_test, y_, y_test = train_test_split(x,
                                              y,
                                              test_size=0.2,
                                              random_state=42)
    """Step 3: shuffle the train set and split the train set into train and dev sets"""
    shuffle_indices = np.random.permutation(np.arange(len(y_)))
    x_shuffled = x_[shuffle_indices]
    y_shuffled = y_[shuffle_indices]
    x_train, x_dev, y_train, y_dev = train_test_split(x_shuffled,
                                                      y_shuffled,
                                                      test_size=0.2)
    """Step 4: save the labels into labels.json since predict.py needs it"""
    with open('./labels.json', 'w') as outfile:
        json.dump(labels, outfile, indent=4)

    logging.info('x_train: {}, x_dev: {}, x_test: {}'.format(
        len(x_train), len(x_dev), len(x_test)))
    logging.info('y_train: {}, y_dev: {}, y_test: {}'.format(
        len(y_train), len(y_dev), len(y_test)))
    """Step 5: build a graph and cnn object"""
    graph = tf.Graph()
    with graph.as_default():
        session_conf = tf.ConfigProto(allow_soft_placement=True,
                                      log_device_placement=False)
        sess = tf.Session(config=session_conf)
        with sess.as_default():
            cnn = TextCNN(sequence_length=x_train.shape[1],
                          num_classes=y_train.shape[1],
                          vocab_size=9000,
                          embedding_size=params['embedding_dim'],
                          filter_sizes=list(
                              map(int, params['filter_sizes'].split(","))),
                          num_filters=params['num_filters'],
                          embedding_mat=embedding_mat,
                          l2_reg_lambda=params['l2_reg_lambda'])

            global_step = tf.Variable(0, name="global_step", trainable=False)
            optimizer = tf.train.AdamOptimizer(1e-3)
            grads_and_vars = optimizer.compute_gradients(cnn.loss)
            train_op = optimizer.apply_gradients(grads_and_vars,
                                                 global_step=global_step)

            timestamp = str(int(time.time()))
            out_dir = os.path.abspath(
                os.path.join(os.path.curdir, "trained_model_" + timestamp))

            checkpoint_dir = os.path.abspath(
                os.path.join(out_dir, "checkpoints"))
            checkpoint_prefix = os.path.join(checkpoint_dir, "model")
            if not os.path.exists(checkpoint_dir):
                os.makedirs(checkpoint_dir)
            saver = tf.train.Saver(tf.all_variables())

            # One training step: train the model with one batch
            def train_step(x_batch, y_batch):
                feed_dict = {
                    cnn.input_x: x_batch,
                    cnn.input_y: y_batch,
                    cnn.dropout_keep_prob: params['dropout_keep_prob']
                }
                _, step, loss, acc = sess.run(
                    [train_op, global_step, cnn.loss, cnn.accuracy], feed_dict)

            # One evaluation step: evaluate the model with one batch
            def dev_step(x_batch, y_batch):
                feed_dict = {
                    cnn.input_x: x_batch,
                    cnn.input_y: y_batch,
                    cnn.dropout_keep_prob: 1.0
                }
                step, loss, acc, num_correct = sess.run(
                    [global_step, cnn.loss, cnn.accuracy, cnn.num_correct],
                    feed_dict)
                return num_correct

            # Save the word_to_id map since predict.py needs it
            vocab_processor.save(os.path.join(out_dir, "vocab.pickle"))
            sess.run(tf.initialize_all_variables())

            print "Loading Embeddings !"
            initW = data_helper.load_embedding_vectors(
                vocab_processor.vocabulary_)
            sess.run(cnn.W.assign(initW))

            print "Loaded Embeddings !"

            # Training starts here
            train_batches = data_helper.batch_iter(list(zip(x_train, y_train)),
                                                   params['batch_size'],
                                                   params['num_epochs'])
            best_accuracy, best_at_step = 0, 0
            """Step 6: train the cnn model with x_train and y_train (batch by batch)"""
            for train_batch in train_batches:
                if len(train_batch) == 0:
                    continue
                x_train_batch, y_train_batch = zip(*train_batch)
                train_step(x_train_batch, y_train_batch)
                current_step = tf.train.global_step(sess, global_step)
                """Step 6.1: evaluate the model with x_dev and y_dev (batch by batch)"""
                if current_step % params['evaluate_every'] == 0:
                    dev_batches = data_helper.batch_iter(
                        list(zip(x_dev, y_dev)), params['batch_size'], 1)
                    total_dev_correct = 0
                    for dev_batch in dev_batches:
                        if len(dev_batch) == 0:
                            continue
                        x_dev_batch, y_dev_batch = zip(*dev_batch)
                        num_dev_correct = dev_step(x_dev_batch, y_dev_batch)
                        total_dev_correct += num_dev_correct

                    dev_accuracy = float(total_dev_correct) / len(y_dev)
                    logging.critical(
                        'Accuracy on dev set: {}'.format(dev_accuracy))
                    """Step 6.2: save the model if it is the best based on accuracy of the dev set"""
                    if dev_accuracy >= best_accuracy:
                        best_accuracy, best_at_step = dev_accuracy, current_step
                        path = saver.save(sess,
                                          checkpoint_prefix,
                                          global_step=current_step)
                        logging.critical('Saved model {} at step {}'.format(
                            path, best_at_step))
                        logging.critical('Best accuracy {} at step {}'.format(
                            best_accuracy, best_at_step))
            """Step 7: predict x_test (batch by batch)"""
            test_batches = data_helper.batch_iter(list(zip(x_test, y_test)),
                                                  params['batch_size'], 1)
            total_test_correct = 0
            for test_batch in test_batches:
                if len(test_batch) == 0:
                    continue
                print "Non Zero Length"
                x_test_batch, y_test_batch = zip(*test_batch)
                num_test_correct = dev_step(x_test_batch, y_test_batch)
                total_test_correct += num_test_correct

            test_accuracy = float(total_test_correct) / len(y_test)

            train_batches = data_helper.batch_iter(list(zip(x_train, y_train)),
                                                   params['batch_size'], 1)

            total_train_correct = 0
            for train_batch in train_batches:
                if len(train_batch) == 0:
                    continue
                print "Non Zero Length"
                x_train_batch, y_train_batch = zip(*train_batch)
                num_test_correct = dev_step(x_train_batch, y_train_batch)
                total_train_correct += num_test_correct

            train_accuracy = float(total_train_correct) / len(y_train)

        print 'Accuracy on test set is {} based on the best model'.format(
            test_accuracy)
        print 'Accuracy on train set is {} based on the best model'.format(
            train_accuracy)
        # logging.critical('Accuracy on test set is {} based on the best model {}'.format(test_accuracy, path))
        logging.critical('The training is complete')
예제 #10
0
def cnn_train(training_r, training_w):
    assert os.path.exists(training_r) and os.path.exists(training_w)
    VOCAB_DIR = os.path.join(os.path.dirname(training_r), "..")
    MODEL_DIR = os.path.join(os.path.dirname(training_r), "..", "runs")
    print "Loading data..."
    x_train_r, x_train_w, y_train_r, y_train_w = data_helpers.load_data_and_labels(
        training_r, training_w)
    # Restore dictionary
    print "Restoring vocab..."
    vocab_path = os.path.join(VOCAB_DIR, "vocab")
    try:
        assert os.path.exists(vocab_path)
        vocab_processor = learn.preprocessing.VocabularyProcessor.restore(
            vocab_path)
        print "vocab_lenght:", len(vocab_processor.vocabulary_)
    except Exception as e:
        print "Failed to restore vocab"
        traceback.print_exc()

    print "PreProcessing data..."
    # Map words to int vector
    x_train_r = np.array(list(vocab_processor.transform(x_train_r)))
    y_train_r = np.array(y_train_r)
    x_train_w = np.array(list(vocab_processor.transform(x_train_w)))
    y_train_w = np.array(y_train_w)
    print "Positive/Negative {:d}/{:d}".format(len(y_train_r), len(y_train_w))

    # Add priority for right data
    y_train_r = y_train_r * config_.right_weight  # multiply the weight of the right data
    # Training data
    x_train = np.array(list(x_train_r) + list(x_train_w))
    y_train = np.array(list(y_train_r) + list(y_train_w))

    # Training
    # ==================================================
    print "Start training..."
    start_train = time.time()
    with tf.Graph().as_default():
        session_conf = tf.ConfigProto(
            allow_soft_placement=config_.allow_soft_placement,
            log_device_placement=config_.log_device_placement)
        sess = tf.Session(config=session_conf)
        with sess.as_default():
            cnn = TextCNN(sequence_length=x_train_r.shape[1],
                          num_classes=y_train_r.shape[1],
                          vocab_size=max(config_.max_vocab_keep,
                                         len(vocab_processor.vocabulary_)),
                          embedding_size=config_.embedding_dim,
                          filter_sizes=list(
                              map(int, config_.filter_sizes.split(","))),
                          num_filters=config_.num_filters,
                          l2_reg_lambda=config_.l2_reg_lambda)

            # Define Training procedure
            global_step = tf.Variable(0, name="global_step", trainable=False)
            optimizer = tf.train.AdamOptimizer(1e-3)
            grads_and_vars = optimizer.compute_gradients(cnn.loss)
            train_op = optimizer.apply_gradients(grads_and_vars,
                                                 global_step=global_step)
            tf.add_to_collection('train_op', train_op)

            # Output directory for models and summaries
            out_dir = os.path.abspath(MODEL_DIR)
            #if os.path.exists(out_dir):
            #    gfile.DeleteRecursively(out_dir)
            print "Writing to {}\n".format(out_dir)

            # Summaries for loss and accuracy
            loss_summary = tf.summary.scalar("loss", cnn.loss)
            acc_summary = tf.summary.scalar("accuracy", cnn.accuracy)

            # Train Summaries
            train_summary_op = tf.summary.merge([loss_summary, acc_summary])
            train_summary_dir = os.path.join(out_dir, "summaries", "train")
            train_summary_writer = tf.summary.FileWriter(
                train_summary_dir, sess.graph)

            # Checkpoint directory. Tensorflow assumes this directory already exists so we need to create it
            checkpoint_dir = os.path.abspath(
                os.path.join(out_dir, "checkpoints"))
            checkpoint_prefix = os.path.join(checkpoint_dir, "model")
            if not os.path.exists(checkpoint_dir):
                os.makedirs(checkpoint_dir)
            saver = tf.train.Saver(tf.global_variables(),
                                   max_to_keep=config_.num_checkpoints)
            # Write vocabulary
            #vocab_processor.save(os.path.join(out_dir, "vocab"))

            # Initialize all variables
            sess.run(tf.global_variables_initializer())

            def train_step(x_batch, y_batch):
                """
                A single training step
                """
                feed_dict = {
                    cnn.input_x: x_batch,
                    cnn.input_y: y_batch,
                    cnn.dropout_keep_prob: config_.dropout_keep_prob,
                }
                _, step, summaries, loss, accuracy = sess.run([
                    train_op, global_step, train_summary_op, cnn.loss,
                    cnn.accuracy
                ], feed_dict)
                time_str = datetime.now().isoformat()
                print "{}: step {}, loss {:g}, acc {:g}".format(
                    time_str, step, loss, accuracy)
                train_summary_writer.add_summary(summaries, step)

            # Generate batches
            batches = data_helpers.batch_iter(list(zip(x_train, y_train)),
                                              config_.batch_size,
                                              config_.num_epochs)
            # Training loop. For each batch...
            for batch in batches:
                x_batch, y_batch = zip(*batch)
                train_step(x_batch, y_batch)
                current_step = tf.train.global_step(sess, global_step)
                if current_step % config_.checkpoint_every == 0:
                    path = saver.save(sess,
                                      checkpoint_prefix,
                                      global_step=current_step)
                    print "Saved model checkpoint to {}\n".format(path)
    end_train = time.time()
    print 'Train costs', end_train - start_train
예제 #11
0
def execute(dataset, trainingSection):

    # # Set up parameters

    # In[2]:

    trainDataDir = '/Users/SamZhang/Documents/Capstone/dataset/' + dataset + '/train'


    # In[3]:


    # Data loading params
    tf.flags.DEFINE_float("dev_sample_percentage", .2, "Percentage of the training data to use for validation")
    tf.flags.DEFINE_string("positive_data_file", trainDataDir + "/spam/" + str(trainingSection) + '/' + dataset + "_train.spam", "Data source for the positive data.")
    tf.flags.DEFINE_string("negative_data_file", trainDataDir + "/ham/" + str(trainingSection) + '/' + dataset + "_train.ham", "Data source for the negative data.")

    # Model Hyperparameters
    tf.flags.DEFINE_integer("embedding_dim", 128, "Dimensionality of character embedding (default: 128)")
    tf.flags.DEFINE_string("filter_sizes", "3,4,5", "Comma-separated filter sizes (default: '3,4,5')")
    tf.flags.DEFINE_integer("num_filters", 256, "Number of filters per filter size (default: 128)")
    tf.flags.DEFINE_float("dropout_keep_prob", 0.5, "Dropout keep probability (default: 0.5)")
    tf.flags.DEFINE_float("l2_reg_lambda", 0.1, "L2 regularization lambda (default: 0.0)")

    # Training parameters
    tf.flags.DEFINE_integer("batch_size", 128, "Batch Size (default: 64)")
    tf.flags.DEFINE_integer("num_epochs", 30, "Number of training epochs (default: 200)")
    tf.flags.DEFINE_integer("evaluate_every", 30, "Evaluate model on dev set after this many steps (default: 100)")
    tf.flags.DEFINE_integer("checkpoint_every", 30, "Save model after this many steps (default: 100)")
    tf.flags.DEFINE_integer("num_checkpoints", 5, "Number of checkpoints to store (default: 5)")
    # Misc Parameters
    tf.flags.DEFINE_boolean("allow_soft_placement", True, "Allow device soft device placement")
    tf.flags.DEFINE_boolean("log_device_placement", False, "Log placement of ops on devices")


    # In[4]:


    FLAGS = tf.flags.FLAGS
    FLAGS._parse_flags()
    print("\nParameters:")
    for attr, value in sorted(FLAGS.__flags.items()):
        print("{}={}".format(attr.upper(), value))
    print("")


    # # Process data

    # In[5]:


    def processData():
        # Data Preparation
        # ==================================================

        # Load data
        print("Loading data...")
        x_text, y = data_helpers.load_data_and_labels(FLAGS.positive_data_file, FLAGS.negative_data_file)
        print(x_text[0])

        # Build vocabulary
        max_document_length = max([len(x.split(" ")) for x in x_text])
        vocab_processor = learn.preprocessing.VocabularyProcessor(max_document_length)
        x = np.array(list(vocab_processor.fit_transform(x_text)))

        # Randomly shuffle data
        np.random.seed(10)
        shuffle_indices = np.random.permutation(np.arange(len(y)))
        x_shuffled = x[shuffle_indices]
        y_shuffled = y[shuffle_indices]

        # Split train/test set
        # TODO: This is very crude, should use cross-validation
        dev_sample_index = -1 * int(FLAGS.dev_sample_percentage * float(len(y)))
        x_train, x_dev = x_shuffled[:dev_sample_index], x_shuffled[dev_sample_index:]
        y_train, y_dev = y_shuffled[:dev_sample_index], y_shuffled[dev_sample_index:]

        del x, y, x_shuffled, y_shuffled

        print("Vocabulary Size: {:d}".format(len(vocab_processor.vocabulary_)))
        print("Train/Dev split: {:d}/{:d}".format(len(y_train), len(y_dev)))
        
        return x_train, y_train, x_dev, y_dev, vocab_processor


    # In[6]:


    x_train, y_train, x_dev, y_dev, vocab_processor = processData()


    # # Building CNN

    # In[7]:


    with tf.Graph().as_default():
        session_conf = tf.ConfigProto(
          allow_soft_placement=FLAGS.allow_soft_placement,
          log_device_placement=FLAGS.log_device_placement)
        sess = tf.Session(config=session_conf)
        with sess.as_default():
            cnn = TextCNN(
                sequence_length=x_train.shape[1],
                num_classes=y_train.shape[1],
                vocab_size=len(vocab_processor.vocabulary_),
                embedding_size=FLAGS.embedding_dim,
                filter_sizes=list(map(int, FLAGS.filter_sizes.split(","))),
                num_filters=FLAGS.num_filters,
                l2_reg_lambda=FLAGS.l2_reg_lambda)

            # Define Training procedure
            global_step = tf.Variable(0, name="global_step", trainable=False)
            optimizer = tf.train.AdamOptimizer(1e-3)
            grads_and_vars = optimizer.compute_gradients(cnn.loss)
            train_op = optimizer.apply_gradients(grads_and_vars, global_step=global_step)

            # Keep track of gradient values and sparsity (optional)
            grad_summaries = []
            for g, v in grads_and_vars:
                if g is not None:
                    grad_hist_summary = tf.summary.histogram("{}/grad/hist".format(v.name), g)
                    sparsity_summary = tf.summary.scalar("{}/grad/sparsity".format(v.name), tf.nn.zero_fraction(g))
                    grad_summaries.append(grad_hist_summary)
                    grad_summaries.append(sparsity_summary)
            grad_summaries_merged = tf.summary.merge(grad_summaries)

            # Output directory for models and summaries
            out_dir = os.path.abspath(os.path.join(os.path.curdir, "runs", "cnnmodel", dataset, str(trainingSection)))
            print("Writing to {}\n".format(out_dir))

            # Summaries for loss and accuracy
            loss_summary = tf.summary.scalar("loss", cnn.loss)
            acc_summary = tf.summary.scalar("accuracy", cnn.accuracy)

            # Train Summaries
            train_summary_op = tf.summary.merge([loss_summary, acc_summary, grad_summaries_merged])
            train_summary_dir = os.path.join(out_dir, "summaries", "train")
            train_summary_writer = tf.summary.FileWriter(train_summary_dir, sess.graph)

            # Dev summaries
            dev_summary_op = tf.summary.merge([loss_summary, acc_summary])
            dev_summary_dir = os.path.join(out_dir, "summaries", "dev")
            dev_summary_writer = tf.summary.FileWriter(dev_summary_dir, sess.graph)

            # Checkpoint directory. Tensorflow assumes this directory already exists so we need to create it
            checkpoint_dir = os.path.abspath(os.path.join(out_dir, "checkpoints"))
            checkpoint_prefix = os.path.join(checkpoint_dir, "model")
            if not os.path.exists(checkpoint_dir):
                os.makedirs(checkpoint_dir)
            saver = tf.train.Saver(tf.global_variables(), max_to_keep=FLAGS.num_checkpoints)

            # Write vocabulary
            vocab_processor.save(os.path.join(out_dir, "vocab"))

            # Initialize all variables
            sess.run(tf.global_variables_initializer())

            def train_step(x_batch, y_batch):
                """
                A single training step
                """
                feed_dict = {
                  cnn.input_x: x_batch,
                  cnn.input_y: y_batch,
                  cnn.dropout_keep_prob: FLAGS.dropout_keep_prob
                }
                _, step, summaries, loss, accuracy = sess.run(
                    [train_op, global_step, train_summary_op, cnn.loss, cnn.accuracy],
                    feed_dict)
                time_str = datetime.datetime.now().isoformat()
                print("{}: step {}, loss {:g}, acc {:g}".format(time_str, step, loss, accuracy))
                train_summary_writer.add_summary(summaries, step)

            def dev_step(x_batch, y_batch, writer=None):
                """
                Evaluates model on a dev set
                """
                feed_dict = {
                  cnn.input_x: x_batch,
                  cnn.input_y: y_batch,
                  cnn.dropout_keep_prob: 1.0
                }
                step, summaries, loss, accuracy = sess.run(
                    [global_step, dev_summary_op, cnn.loss, cnn.accuracy],
                    feed_dict)
                time_str = datetime.datetime.now().isoformat()
                print("{}: step {}, loss {:g}, acc {:g}".format(time_str, step, loss, accuracy))
                if writer:
                    writer.add_summary(summaries, step)

            # Generate batches
            batches = data_helpers.batch_iter(
                list(zip(x_train, y_train)), FLAGS.batch_size, FLAGS.num_epochs)
            # Training loop. For each batch...
            for batch in batches:
                x_batch, y_batch = zip(*batch)
                train_step(x_batch, y_batch)
                current_step = tf.train.global_step(sess, global_step)
                if current_step % FLAGS.evaluate_every == 0:
                    print("\nEvaluation:")
                    dev_step(x_dev, y_dev, writer=dev_summary_writer)
                    print("")
                if current_step % FLAGS.checkpoint_every == 0:
                    path = saver.save(sess, checkpoint_prefix, global_step=current_step)
                    print("Saved model checkpoint to {}\n".format(path))


    # # Testing model

    # In[8]:


    from sklearn import metrics
    import csv


    # In[9]:


    testDataDir = "/Users/SamZhang/Documents/Capstone/dataset/" + dataset + "/test"


    # In[10]:


    tf.flags.DEFINE_string("positive_test_data_file", testDataDir + "/spam/" + str(trainingSection) + '/' + dataset + "_test.spam", "Data source for the positive data.")
    tf.flags.DEFINE_string("negative_test_data_file", testDataDir + "/ham/" + str(trainingSection) + '/' + dataset + "_test.ham", "Data source for the negative data.")

    # Eval Parameters
    tf.flags.DEFINE_string("checkpoint_dir", "/Users/SamZhang/Documents/Capstone/Models/runs/cnnmodel/" + dataset + "/" + str(trainingSection) + "/checkpoints/", "Checkpoint directory from training run")


    # In[11]:


    FLAGS = tf.flags.FLAGS
    FLAGS._parse_flags()
    print("\nParameters:")
    for attr, value in sorted(FLAGS.__flags.items()):
        print("{}={}".format(attr.upper(), value))
    print("")


    # In[12]:


    x_raw, y_test = data_helpers.load_data_and_labels(FLAGS.positive_test_data_file, FLAGS.negative_test_data_file)
    y_test = np.argmax(y_test, axis=1) #ham = 0, spam = 1
    print(x_raw[0], y_test[0])


    # In[13]:


    vocab_path = os.path.join(FLAGS.checkpoint_dir, "..", "vocab")
    vocab_processor = learn.preprocessing.VocabularyProcessor.restore(vocab_path)
    x_test = np.array(list(vocab_processor.transform(x_raw)))


    # In[14]:


    checkpoint_file = tf.train.latest_checkpoint(FLAGS.checkpoint_dir)


    # In[15]:


    graph = tf.Graph()
    with graph.as_default():
        session_conf = tf.ConfigProto(
          allow_soft_placement=FLAGS.allow_soft_placement,
          log_device_placement=FLAGS.log_device_placement)
        sess = tf.Session(config=session_conf)
        with sess.as_default():
            # Load the saved meta graph and restore variables
            saver = tf.train.import_meta_graph("{}.meta".format(checkpoint_file))
            saver.restore(sess, checkpoint_file)

            # Get the placeholders from the graph by name
            input_x = graph.get_operation_by_name("input_x").outputs[0]
            # input_y = graph.get_operation_by_name("input_y").outputs[0]
            dropout_keep_prob = graph.get_operation_by_name("dropout_keep_prob").outputs[0]

            # Tensors we want to evaluate
            predictions = graph.get_operation_by_name("output/predictions").outputs[0]

            # Generate batches for one epoch
            batches = data_helpers.batch_iter(list(x_test), FLAGS.batch_size, 1, shuffle=False)

            # Collect the predictions here
            all_predictions = []

            for x_test_batch in batches:
                batch_predictions = sess.run(predictions, {input_x: x_test_batch, dropout_keep_prob: 1.0})
                all_predictions = np.concatenate([all_predictions, batch_predictions])

    # Print accuracy if y_test is defined
    if y_test is not None:
        correct_predictions = float(sum(all_predictions == y_test))
        print("Total number of test examples: {}".format(len(y_test)))
        print("Accuracy: {:g}".format(correct_predictions/float(len(y_test))))


    # In[16]:


    # Save the evaluation to a csv
    title = np.column_stack(('text', 'prediction', 'label'))
    predictions_human_readable = np.column_stack((np.array(x_raw), all_predictions, y_test))
    out_path = os.path.join(FLAGS.checkpoint_dir, "..", "prediction.csv")
    print("Saving evaluation to {0}".format(out_path))

    with open(out_path, 'w') as f:
        csv.writer(f).writerows(title)
        csv.writer(f).writerows(predictions_human_readable)


    # # Evaluation and Noise Reduction

    # In[17]:


    import ES_interface as esi

    esi.metric(dataset + '_' + str(trainingSection), out_path)
def main():
    tf.flags.DEFINE_float(
        "dev_sample_percentage", .2,
        "Percentage of the training data to use for validation")
    tf.flags.DEFINE_string("positive_data_file",
                           "./data/rt-polaritydata/rt-polarity.pos",
                           "Data source for the positive data.")
    tf.flags.DEFINE_string("negative_data_file",
                           "./data/rt-polaritydata/rt-polarity.neg",
                           "Data source for the negative data.")

    # Model Hyperparameters
    tf.flags.DEFINE_integer(
        "embedding_dim", 128,
        "Dimensionality of character embedding (default: 128)")
    tf.flags.DEFINE_string("filter_sizes", "3,4,5",
                           "Comma-separated filter sizes (default: '3,4,5')")
    tf.flags.DEFINE_integer(
        "num_filters", 128, "Number of filters per filter size (default: 128)")
    tf.flags.DEFINE_float("dropout_keep_prob", 0.5,
                          "Dropout keep probability (default: 0.5)")
    tf.flags.DEFINE_float("l2_reg_lambda", 0.0,
                          "L2 regularization lambda (default: 0.0)")

    # Training parameters
    tf.flags.DEFINE_integer("batch_size", 64, "Batch Size (default: 64)")
    tf.flags.DEFINE_integer("num_epochs", 20,
                            "Number of training epochs (default: 200)")
    tf.flags.DEFINE_integer(
        "evaluate_every", 100,
        "Evaluate model on dev set after this many steps (default: 100)")
    tf.flags.DEFINE_integer("checkpoint_every", 100,
                            "Save model after this many steps (default: 100)")
    # Misc Parameters
    tf.flags.DEFINE_boolean("allow_soft_placement", True,
                            "Allow device soft device placement")
    tf.flags.DEFINE_boolean("log_device_placement", False,
                            "Log placement of ops on devices")

    FLAGS = tf.flags.FLAGS
    FLAGS._parse_flags()
    user_reviews = []
    user_ratings = []
    with open('ratings.txt', 'rb') as fp:
        user_ratings = np.array(pickle.load(fp))

    with open('reviews.txt', 'rb') as fp:
        user_reviews = np.array(pickle.load(fp))

    user_ratings = format_rating(user_ratings)

    x_text = user_reviews
    y = user_ratings
    # Build vocabulary
    max_document_length = max([len(x.split(" ")) for x in x_text])
    vocab_processor = learn.preprocessing.VocabularyProcessor(
        max_document_length)
    x = np.array(list(vocab_processor.fit_transform(x_text)))

    # Randomly shuffle data
    np.random.seed(10)
    shuffle_indices = np.random.permutation(np.arange(len(y)))
    x_shuffled = x[shuffle_indices]
    y_shuffled = y[shuffle_indices]

    # Split train/test set
    # TODO: This is very crude, should use cross-validation
    #dev_sample_index = -1 * int(FLAGS.dev_sample_percentage * float(len(y)))
    val_sample_index = int(FLAGS.dev_sample_percentage *
                           float(len(x_shuffled)))
    accuracy_list = []
    for i in range(5):
        #x_train, x_val = x_shuffled[:i*val_sample_index]+x_shuffled[i*val_sample_index+val_sample_index:], x_shuffled[dev_sample_index:]
        #y_train, y_val = y_shuffled[i*val_sample_index:i*val_sample_index+val_sample_index], y_shuffled[i*val_sample_index:i*val_sample_index+val_sample_index]

        y_val = y_shuffled[i * val_sample_index:i * val_sample_index +
                           val_sample_index]
        y_train = np.array(
            list(y_shuffled[:i * val_sample_index]) +
            list(y_shuffled[i * val_sample_index + val_sample_index:]))

        x_val = x_shuffled[i * val_sample_index:i * val_sample_index +
                           val_sample_index]
        x_train = np.array(
            list(x_shuffled[:i * val_sample_index]) +
            list(x_shuffled[i * val_sample_index + val_sample_index:]))

        print("Vocabulary Size: {:d}".format(len(vocab_processor.vocabulary_)))
        print("Train/Dev split: {:d}/{:d}".format(len(y_train), len(y_val)))

        # Training
        # ==================================================

        with tf.Graph().as_default():
            session_conf = tf.ConfigProto(
                allow_soft_placement=FLAGS.allow_soft_placement,
                log_device_placement=FLAGS.log_device_placement)
            sess = tf.Session(config=session_conf)
            with sess.as_default():
                cnn = TextCNN(sequence_length=x_train.shape[1],
                              num_classes=y_train.shape[1],
                              vocab_size=len(vocab_processor.vocabulary_),
                              embedding_size=FLAGS.embedding_dim,
                              filter_sizes=list(
                                  map(int, FLAGS.filter_sizes.split(","))),
                              num_filters=FLAGS.num_filters,
                              l2_reg_lambda=FLAGS.l2_reg_lambda)

                # Define Training procedure
                global_step = tf.Variable(0,
                                          name="global_step",
                                          trainable=False)
                optimizer = tf.train.AdamOptimizer(1e-3)
                grads_and_vars = optimizer.compute_gradients(cnn.loss)
                train_op = optimizer.apply_gradients(grads_and_vars,
                                                     global_step=global_step)

                # Keep track of gradient values and sparsity (optional)
                grad_summaries = []
                for g, v in grads_and_vars:
                    if g is not None:
                        grad_hist_summary = tf.histogram_summary(
                            "{}/grad/hist".format(v.name), g)
                        sparsity_summary = tf.scalar_summary(
                            "{}/grad/sparsity".format(v.name),
                            tf.nn.zero_fraction(g))
                        grad_summaries.append(grad_hist_summary)
                        grad_summaries.append(sparsity_summary)
                grad_summaries_merged = tf.merge_summary(grad_summaries)

                # Output directory for models and summaries
                timestamp = str(int(time.time()))
                out_dir = os.path.abspath(
                    os.path.join(os.path.curdir, "runs", timestamp))
                print("Writing to {}\n".format(out_dir))

                # Summaries for loss and accuracy
                loss_summary = tf.scalar_summary("loss", cnn.loss)
                acc_summary = tf.scalar_summary("accuracy", cnn.accuracy)

                # Train Summaries
                train_summary_op = tf.merge_summary(
                    [loss_summary, acc_summary, grad_summaries_merged])
                train_summary_dir = os.path.join(out_dir, "summaries", "train")
                train_summary_writer = tf.train.SummaryWriter(
                    train_summary_dir, sess.graph)

                # Dev summaries
                dev_summary_op = tf.merge_summary([loss_summary, acc_summary])
                dev_summary_dir = os.path.join(out_dir, "summaries", "dev")
                dev_summary_writer = tf.train.SummaryWriter(
                    dev_summary_dir, sess.graph)

                # Write vocabulary
                vocab_processor.save(os.path.join(out_dir, "vocab"))

                # Initialize all variables
                sess.run(tf.initialize_all_variables())

                def train_step(x_batch, y_batch):
                    """
                    A single training step
                    """
                    feed_dict = {
                        cnn.input_x: x_batch,
                        cnn.input_y: y_batch,
                        cnn.dropout_keep_prob: FLAGS.dropout_keep_prob
                    }
                    _, step, summaries, loss, accuracy = sess.run([
                        train_op, global_step, train_summary_op, cnn.loss,
                        cnn.accuracy
                    ], feed_dict)
                    time_str = datetime.datetime.now().isoformat()
                    print((i + 1), "{}: step {}, loss {:g}, acc {:g}".format(
                        time_str, step, loss, accuracy))
                    train_summary_writer.add_summary(summaries, step)

                def dev_step(x_batch, y_batch, writer=None):
                    """
                    Evaluates model on a dev set
                    """
                    feed_dict = {
                        cnn.input_x: x_batch,
                        cnn.input_y: y_batch,
                        cnn.dropout_keep_prob: 1.0
                    }
                    step, summaries, loss, accuracy = sess.run(
                        [global_step, dev_summary_op, cnn.loss, cnn.accuracy],
                        feed_dict)
                    time_str = datetime.datetime.now().isoformat()
                    print((i + 1), "{}: step {}, loss {:g}, acc {:g}".format(
                        time_str, step, loss, accuracy))
                    if writer:
                        writer.add_summary(summaries, step)
                    return accuracy

                # Generate batches
                batches = data_helpers.batch_iter(list(zip(x_train, y_train)),
                                                  FLAGS.batch_size,
                                                  FLAGS.num_epochs)
                # Training loop. For each batch...
                for batch in batches:
                    x_batch, y_batch = zip(*batch)
                    train_step(x_batch, y_batch)
                    current_step = tf.train.global_step(sess, global_step)
                    if current_step % FLAGS.evaluate_every == 0:
                        print("\nEvaluation:")
                        dev_step(x_val, y_val, writer=dev_summary_writer)
                        print("")
                        # Checkpoint directory. Tensorflow assumes this directory already exists so we need to create it
                #checkpoint_dir = os.path.abspath(os.path.join(out_dir, "checkpoints"))
                checkpoint_dir = "checkpoints_" + str(i) + "/"
                if not os.path.exists(checkpoint_dir):
                    os.makedirs(checkpoint_dir)
                saver = tf.train.Saver()
                checkpoint_prefix = os.path.join(checkpoint_dir, "model")
                saver.save(sess, checkpoint_prefix, global_step=current_step)

                accuracy_list.append(
                    dev_step(x_val, y_val, writer=dev_summary_writer))
    print("After cross validation ")
    print("Accuracy=", np.mean(accuracy_list))
예제 #13
0
def train():
    with tf.device('/cpu:0'):
        x_text, pos1, pos2, y = data_helpers.load_hw_data_and_labels(
            FLAGS.train_dir)

    # Build vocabulary
    # Example: x_text[3] = "A misty <e1>ridge</e1> uprises from the <e2>surge</e2>."
    # ['a misty ridge uprises from the surge <UNK> <UNK> ... <UNK>']
    # =>
    # [27 39 40 41 42  1 43  0  0 ... 0]
    # dimension = FLAGS.max_sentence_length
    text_vocab_processor = tf.contrib.learn.preprocessing.VocabularyProcessor(
        FLAGS.max_sentence_length)
    text_vec = np.array(list(text_vocab_processor.fit_transform(x_text)))
    print("Text Vocabulary Size: {:d}".format(
        len(text_vocab_processor.vocabulary_)))

    # Example: pos1[3] = [-2 -1  0  1  2   3   4 999 999 999 ... 999]
    # [95 96 97 98 99 100 101 999 999 999 ... 999]
    # =>
    # [11 12 13 14 15  16  21  17  17  17 ...  17]
    # dimension = MAX_SENTENCE_LENGTH
    pos_vocab_processor = tf.contrib.learn.preprocessing.VocabularyProcessor(
        FLAGS.max_sentence_length)
    pos_vocab_processor.fit(pos1 + pos2)
    pos1_vec = np.array(list(pos_vocab_processor.transform(pos1)))
    pos2_vec = np.array(list(pos_vocab_processor.transform(pos2)))
    print("Position Vocabulary Size: {:d}".format(
        len(pos_vocab_processor.vocabulary_)))

    x = np.array([list(i) for i in zip(text_vec, pos1_vec, pos2_vec)])

    print("x = {0}".format(x.shape))
    print("y = {0}".format(y.shape))
    print("")

    # Randomly shuffle data
    np.random.seed(10)
    shuffle_indices = np.random.permutation(np.arange(len(y)))
    x_shuffled = x[shuffle_indices]
    y_shuffled = y[shuffle_indices]

    # Split train/test set
    # TODO: This is very crude, should use cross-validation
    dev_sample_index = -1 * int(FLAGS.dev_sample_percentage * float(len(y)))
    x_train, x_dev = x_shuffled[:dev_sample_index], x_shuffled[
        dev_sample_index:]
    x_dev = np.array(x_dev).transpose((1, 0, 2))
    y_train, y_dev = y_shuffled[:dev_sample_index], y_shuffled[
        dev_sample_index:]
    print("Train/Dev split: {:d}/{:d}\n".format(len(y_train), len(y_dev)))

    with tf.Graph().as_default():
        session_conf = tf.ConfigProto(
            allow_soft_placement=FLAGS.allow_soft_placement,
            log_device_placement=FLAGS.log_device_placement)
        sess = tf.Session(config=session_conf)
        with sess.as_default():
            cnn = TextCNN(
                sequence_length=x_train.shape[2],
                num_classes=y_train.shape[1],
                text_vocab_size=len(text_vocab_processor.vocabulary_),
                text_embedding_size=FLAGS.text_embedding_dim,
                pos_vocab_size=len(pos_vocab_processor.vocabulary_),
                pos_embedding_size=FLAGS.position_embedding_dim,
                filter_sizes=list(map(int, FLAGS.filter_sizes.split(","))),
                num_filters=FLAGS.num_filters,
                l2_reg_lambda=FLAGS.l2_reg_lambda)

            # Define Training procedure
            global_step = tf.Variable(0, name="global_step", trainable=False)
            train_op = tf.train.AdamOptimizer(FLAGS.learning_rate).minimize(
                cnn.loss, global_step=global_step)

            # Output directory for models and summaries
            timestamp = str(int(time.time()))
            out_dir = os.path.abspath(
                os.path.join(os.path.curdir, "runs", timestamp))
            print("Writing to {}\n".format(out_dir))

            # Summaries for loss and accuracy
            loss_summary = tf.summary.scalar("loss", cnn.loss)
            acc_summary = tf.summary.scalar("accuracy", cnn.accuracy)

            # Train Summaries
            train_summary_op = tf.summary.merge([loss_summary, acc_summary])
            train_summary_dir = os.path.join(out_dir, "summaries", "train")
            train_summary_writer = tf.summary.FileWriter(
                train_summary_dir, sess.graph)

            # Dev summaries
            dev_summary_op = tf.summary.merge([loss_summary, acc_summary])
            dev_summary_dir = os.path.join(out_dir, "summaries", "dev")
            dev_summary_writer = tf.summary.FileWriter(dev_summary_dir,
                                                       sess.graph)

            # Checkpoint directory. Tensorflow assumes this directory already exists so we need to create it
            checkpoint_dir = os.path.abspath(
                os.path.join(out_dir, "checkpoints"))
            checkpoint_prefix = os.path.join(checkpoint_dir, "model")
            if not os.path.exists(checkpoint_dir):
                os.makedirs(checkpoint_dir)
            saver = tf.train.Saver(tf.global_variables(),
                                   max_to_keep=FLAGS.num_checkpoints)

            # Write vocabulary
            text_vocab_processor.save(os.path.join(out_dir, "text_vocab"))
            pos_vocab_processor.save(os.path.join(out_dir, "position_vocab"))

            # Initialize all variables
            sess.run(tf.global_variables_initializer())

            # Pre-trained word2vec
            if FLAGS.word2vec:
                # initial matrix with random uniform
                initW = np.random.uniform(
                    -0.25, 0.25, (len(text_vocab_processor.vocabulary_),
                                  FLAGS.text_embedding_dim))
                # load any vectors from the word2vec
                print("Load word2vec file {0}".format(FLAGS.word2vec))
                with open(FLAGS.word2vec, "rb") as f:
                    header = f.readline()

                    print('header :', header)
                    vocab_size, layer1_size = map(int, header.split())
                    binary_len = np.dtype('float32').itemsize * layer1_size
                    for line in range(vocab_size):
                        word = []
                        while True:
                            ch = f.read(1).decode('latin-1')

                            print('ch :', ch)
                            if ch == ' ':
                                word = ''.join(word)
                                break
                            if ch != '\n':
                                word.append(ch)
                        idx = text_vocab_processor.vocabulary_.get(word)
                        if idx != 0:
                            initW[idx] = np.fromstring(f.read(binary_len),
                                                       dtype='float32')
                        else:
                            f.read(binary_len)

                print('initW[0] :', initW[0], initW[1])
                sess.run(cnn.W_text.assign(initW))
                print("Success to load pre-trained word2vec model!\n")

            # Generate batches
            batches = data_helpers.batch_iter(list(zip(x_train, y_train)),
                                              FLAGS.batch_size,
                                              FLAGS.num_epochs)
            # Training loop. For each batch...
            for batch in batches:
                x_batch, y_batch = zip(*batch)
                x_batch = np.array(x_batch).transpose((1, 0, 2))

                # Train
                feed_dict = {
                    cnn.input_text: x_batch[0],
                    cnn.input_pos1: x_batch[1],
                    cnn.input_pos2: x_batch[2],
                    cnn.input_y: y_batch,
                    cnn.dropout_keep_prob: FLAGS.dropout_keep_prob
                }
                _, step, summaries, loss, accuracy = sess.run([
                    train_op, global_step, train_summary_op, cnn.loss,
                    cnn.accuracy
                ], feed_dict)
                train_summary_writer.add_summary(summaries, step)

                # Training log display
                if step % FLAGS.display_every == 0:
                    time_str = datetime.datetime.now().isoformat()
                    print("{}: step {}, loss {:g}, acc {:g}".format(
                        time_str, step, loss, accuracy))

                # Evaluation
                if step % FLAGS.evaluate_every == 0:
                    print("\nEvaluation:")
                    feed_dict = {
                        cnn.input_text: x_dev[0],
                        cnn.input_pos1: x_dev[1],
                        cnn.input_pos2: x_dev[2],
                        cnn.input_y: y_dev,
                        cnn.dropout_keep_prob: 1.0
                    }
                    summaries, loss, accuracy, predictions = sess.run([
                        dev_summary_op, cnn.loss, cnn.accuracy, cnn.predictions
                    ], feed_dict)
                    dev_summary_writer.add_summary(summaries, step)

                    time_str = datetime.datetime.now().isoformat()
                    print("{}: step {}, loss {:g}, acc {:g}".format(
                        time_str, step, loss, accuracy))
                    print(
                        "(2*9+1)-Way Macro-Average F1 Score (excluding Other): {:g}\n"
                        .format(
                            f1_score(np.argmax(y_dev, axis=1),
                                     predictions,
                                     labels=np.array(range(1, 19)),
                                     average="macro")))

                # Model checkpoint
                if step % FLAGS.checkpoint_every == 0:
                    path = saver.save(sess,
                                      checkpoint_prefix,
                                      global_step=step)
                    print("Saved model checkpoint to {}\n".format(path))
예제 #14
0
def main(_):
    # Load data
    print("Loading data...")
    x_, y = data_helpers.load_data_and_labels(FLAGS.train_file)
    train_int_to_vab, train_to_int = data_helpers.cret_dict(x_)
    #保存对应的词和词索引
    print(train_int_to_vab)

    #存储所有字的文件,以便测试加载
    pickle.dump(train_int_to_vab, open('./vocab_index.pkl', 'wb'))
    print(train_int_to_vab)

    train_ids = [[
        train_to_int.get(term, train_to_int['<UNK>']) for term in line
    ] for line in x_]
    x_ = data_helpers.pad_sentences(train_ids, 20)
    # Randomly shuffle data
    np.random.seed(10)
    shuffle_indices = np.random.permutation(np.arange(len(y)))
    x_shuffled = x_[shuffle_indices]
    y = np.array(y)
    y_shuffled = y[shuffle_indices]

    # Split train/test set
    # TODO: This is very crude, should use cross-validation
    dev_sample_index = -1 * int(FLAGS.dev_sample_percentage * float(len(y)))
    x_train, x_dev = x_shuffled[:dev_sample_index], x_shuffled[
        dev_sample_index:]
    y_train, y_dev = y_shuffled[:dev_sample_index], y_shuffled[
        dev_sample_index:]
    print("Train/Dev split: {:d}/{:d}".format(len(y_train), len(y_dev)))
    y_train = np_utils.to_categorical(y_train)
    y_dev = np_utils.to_categorical(y_dev)

    # Training
    # ==================================================

    with tf.Graph().as_default():
        session_conf = tf.ConfigProto(
            allow_soft_placement=FLAGS.allow_soft_placement,
            log_device_placement=FLAGS.log_device_placement)
        sess = tf.Session(config=session_conf)
        with sess.as_default():
            cnn = TextCNN(sequence_length=x_train.shape[1],
                          num_classes=y_train.shape[1],
                          vocab_size=len(train_int_to_vab),
                          embedding_size=FLAGS.embedding_dim,
                          filter_sizes=list(
                              map(int, FLAGS.filter_sizes.split(","))),
                          num_filters=FLAGS.num_filters,
                          l2_reg_lambda=FLAGS.l2_reg_lambda)

            # Define Training procedure
            global_step = tf.Variable(0, name="global_step", trainable=False)
            optimizer = tf.train.AdamOptimizer(1e-3)
            grads_and_vars = optimizer.compute_gradients(cnn.loss)
            train_op = optimizer.apply_gradients(grads_and_vars,
                                                 global_step=global_step)

            # Keep track of gradient values and sparsity (optional)
            grad_summaries = []
            for g, v in grads_and_vars:
                if g is not None:
                    grad_hist_summary = tf.summary.histogram(
                        "{}/grad/hist".format(v.name), g)
                    sparsity_summary = tf.summary.scalar(
                        "{}/grad/sparsity".format(v.name),
                        tf.nn.zero_fraction(g))
                    grad_summaries.append(grad_hist_summary)
                    grad_summaries.append(sparsity_summary)
            grad_summaries_merged = tf.summary.merge(grad_summaries)

            # Output directory for models and summaries
            timestamp = str(int(time.time()))
            out_dir = os.path.abspath(
                os.path.join(os.path.curdir, "runs", timestamp))
            print("Writing to {}\n".format(out_dir))

            # Summaries for loss and accuracy
            loss_summary = tf.summary.scalar("loss", cnn.loss)
            acc_summary = tf.summary.scalar("accuracy", cnn.accuracy)

            # Train Summaries
            train_summary_op = tf.summary.merge(
                [loss_summary, acc_summary, grad_summaries_merged])
            train_summary_dir = os.path.join(out_dir, "summaries", "train")
            train_summary_writer = tf.summary.FileWriter(
                train_summary_dir, sess.graph)

            # Dev summaries
            dev_summary_op = tf.summary.merge([loss_summary, acc_summary])
            dev_summary_dir = os.path.join(out_dir, "summaries", "dev")
            dev_summary_writer = tf.summary.FileWriter(dev_summary_dir,
                                                       sess.graph)

            # Checkpoint directory. Tensorflow assumes this directory already exists so we need to create it
            checkpoint_dir = os.path.abspath(
                os.path.join(out_dir, "checkpoints"))
            checkpoint_prefix = os.path.join(checkpoint_dir, "model")
            if not os.path.exists(checkpoint_dir):
                os.makedirs(checkpoint_dir)
            saver = tf.train.Saver(tf.global_variables(),
                                   max_to_keep=FLAGS.num_checkpoints)

            # Initialize all variables
            sess.run(tf.global_variables_initializer())

            def train_step(x_batch, y_batch):
                """
                A single training step
                """
                feed_dict = {
                    cnn.input_x: x_batch,
                    cnn.input_y: y_batch,
                    cnn.dropout_keep_prob: FLAGS.dropout_keep_prob
                }
                _, step, summaries, loss, accuracy = sess.run([
                    train_op, global_step, train_summary_op, cnn.loss,
                    cnn.accuracy
                ], feed_dict)
                time_str = datetime.datetime.now().isoformat()
                print("{}: step {}, loss {:g}, acc {:g}".format(
                    time_str, step, loss, accuracy))
                train_summary_writer.add_summary(summaries, step)

            def dev_step(x_batch, y_batch, writer=None):
                """
                Evaluates model on a dev set
                """
                feed_dict = {
                    cnn.input_x: x_batch,
                    cnn.input_y: y_batch,
                    cnn.dropout_keep_prob: 1.0
                }
                step, summaries, loss, accuracy = sess.run(
                    [global_step, dev_summary_op, cnn.loss, cnn.accuracy],
                    feed_dict)
                time_str = datetime.datetime.now().isoformat()
                print("{}: step {}, loss {:g}, acc {:g}".format(
                    time_str, step, loss, accuracy))
                if writer:
                    writer.add_summary(summaries, step)

            # Generate batches
            batches = data_helpers.batch_iter(list(zip(x_train, y_train)),
                                              FLAGS.batch_size,
                                              FLAGS.num_epochs)
            # Training loop. For each batch...
            for batch in batches:
                x_batch, y_batch = zip(*batch)
                train_step(x_batch, y_batch)
                current_step = tf.train.global_step(sess, global_step)
                if current_step % FLAGS.evaluate_every == 0:
                    print("\nEvaluation:")
                    dev_step(x_dev, y_dev, writer=dev_summary_writer)
                    print("")
                if current_step % FLAGS.checkpoint_every == 0:
                    path = saver.save(sess,
                                      checkpoint_prefix,
                                      global_step=current_step)
                    print("Saved model checkpoint to {}\n".format(path))
예제 #15
0
def training():
    """Train the model.
    """
    x_train, y_train, x_test, y_test, vocab_size = data_preparation()

    print('---------------------------------------------')
    print(x_train.shape)
    print(y_train.shape)
    print(x_test.shape)
    print(y_test.shape)
    print('---------------------------------------------')

    with tf.Graph().as_default():
        session_conf = tf.ConfigProto(
            allow_soft_placement=FLAGS.allow_soft_placement,
            log_device_placement=FLAGS.log_device_placement)
        sess = tf.Session(config=session_conf)
        with sess.as_default():
            cnn = TextCNN(sequence_length=x_train.shape[1],
                          num_classes=y_train.shape[1],
                          vocab_size=vocab_size,
                          embedding_size=FLAGS.embedding_dim,
                          filter_sizes=list(
                              map(int, FLAGS.filter_sizes.split(','))),
                          num_filters=FLAGS.num_filters,
                          l2_reg_lambda=FLAGS.l2_reg_lambda)

            # Define Training procedure
            global_step = tf.Variable(0, name='global_step', trainable=False)
            optimizer = tf.train.AdamOptimizer(1e-3)
            grads_and_vars = optimizer.compute_gradients(cnn.loss)
            train_op = optimizer.apply_gradients(grads_and_vars,
                                                 global_step=global_step)

            # Keep track of gradient values and sparsity (optional)
            grad_summaries = []
            for g, v in grads_and_vars:
                if g is not None:
                    grad_hist_summary = tf.summary.histogram(
                        '{}/grad/hist'.format(v.name), g)
                    sparsity_summary = tf.summary.scalar(
                        '{}/grad/sparsity'.format(v.name),
                        tf.nn.zero_fraction(g))
                    grad_summaries.append(grad_hist_summary)
                    grad_summaries.append(sparsity_summary)
            grad_summaries_merged = tf.summary.merge(grad_summaries)

            # Output directory for models and summaries
            out_dir = os.path.abspath(
                os.path.join(os.path.curdir, FLAGS.train, TIMESTAMP))
            print('Writing to {}\n'.format(out_dir))

            # Summaries for loss and accuracy
            loss_summary = tf.summary.scalar('loss', cnn.loss)
            acc_summary = tf.summary.scalar('accuracy', cnn.accuracy)

            # Train Summaries
            train_summary_op = tf.summary.merge(
                [loss_summary, acc_summary, grad_summaries_merged])
            train_summary_dir = os.path.join(out_dir, 'summaries', 'train')
            train_summary_writer = tf.train.SummaryWriter(
                train_summary_dir, sess.graph)

            # Dev summaries
            dev_summary_op = tf.summary.merge([loss_summary, acc_summary])
            dev_summary_dir = os.path.join(out_dir, 'summaries', 'dev')
            dev_summary_writer = tf.train.SummaryWriter(
                dev_summary_dir, sess.graph)

            # Checkpoint directory. Tensorflow assumes this directory already exists
            # so we need to create it
            checkpoint_dir = os.path.abspath(
                os.path.join(out_dir, 'checkpoints'))
            checkpoint_prefix = os.path.join(checkpoint_dir, 'model')
            if not os.path.exists(checkpoint_dir):
                os.makedirs(checkpoint_dir)
            saver = tf.train.Saver(tf.global_variables())

            # Initialize all variables
            sess.run(tf.global_variables_initializer())

            def train_step(x_batch, y_batch):
                '''
                A single training step
                '''
                feed_dict = {
                    cnn.input_x: x_batch,
                    cnn.input_y: y_batch,
                    cnn.dropout_keep_prob: FLAGS.dropout_keep_prob
                }
                _, step, summaries, loss, accuracy = sess.run([
                    train_op, global_step, train_summary_op, cnn.loss,
                    cnn.accuracy
                ], feed_dict)
                time_str = datetime.datetime.now().isoformat()
                print('%s: step %8d, loss %2.6f, acc %2.6f\r' %
                      (time_str, step, loss, accuracy),
                      end='')

            def validate_step(x, y, writer=None):
                '''
                Evaluates model on a dev set
                '''
                size = len(y)
                if size < FLAGS.batch_size:
                    raise ValueError(
                        "batch size for evals larger than dataset: %d" % size)

                accuracies = []
                for begin in xrange(0, size, FLAGS.batch_size):
                    end = begin + FLAGS.batch_size
                    if end < size:
                        feed_dict = {
                            cnn.input_x: x[begin:end, :],
                            cnn.input_y: y[begin:end, :],
                            cnn.dropout_keep_prob: 1.0
                        }
                    else:
                        feed_dict = {
                            cnn.input_x: x[-FLAGS.batch_size:, :],
                            cnn.input_y: y[-FLAGS.batch_size:, :],
                            cnn.dropout_keep_prob: 1.0
                        }

                    summaries, step, accuracy = sess.run(
                        [dev_summary_op, global_step, cnn.accuracy], feed_dict)

                    accuracies.append(accuracy)
                    if writer:
                        writer.add_summary(summaries, step)

                accuracy = sum(accuracies) / len(accuracies)
                time_str = datetime.datetime.now().isoformat()
                print('%s: step %8d, acc %2.6f' % (time_str, step, accuracy))

            def class_accuracy_stat(predictions, labels, all_data=False):

                predictions = predictions.tolist()
                labels = labels.tolist()

                with open(
                        os.path.join(FLAGS.train, TIMESTAMP,
                                     'label_to_int.json'), 'r') as f:
                    label_to_int = json.load(f)

                int_to_label = {
                    label_to_int[key]: key
                    for key in label_to_int.keys()
                }

                labels_stat = {}

                for i in xrange(len(labels)):
                    try:
                        labels_stat[int_to_label[
                            labels[i]]]['num'] = labels_stat[int_to_label[
                                labels[i]]]['num'] + 1 if labels_stat[
                                    int_to_label[labels[i]]]['num'] else 1
                    except:
                        labels_stat[int_to_label[labels[i]]] = {}
                        labels_stat[int_to_label[
                            labels[i]]]['num'] = labels_stat[int_to_label[
                                labels[i]]]['num'] + 1 if labels_stat[
                                    int_to_label[labels[i]]].has_key(
                                        'num') else 1

                    if labels[i] == predictions[i]:
                        labels_stat[int_to_label[labels[i]]][
                            'predictions'] = labels_stat[int_to_label[
                                labels[i]]]['predictions'] + 1 if labels_stat[
                                    int_to_label[labels[i]]].has_key(
                                        'predictions') else 1

                for label in labels_stat:
                    if not labels_stat[label].has_key('predictions'):
                        labels_stat[label]['predictions'] = 0

                for label in labels_stat:
                    labels_stat[label]['accuracy'] = float(
                        labels_stat[label]
                        ['predictions']) / labels_stat[label]['num']

                if all_data:
                    with open(
                            os.path.join(FLAGS.train, TIMESTAMP,
                                         'all_labels_stat.json'), 'w+') as f:
                        json.dump(labels_stat, f)
                else:
                    with open(
                            os.path.join(FLAGS.train, TIMESTAMP,
                                         'test_labels_stat.json'), 'w+') as f:
                        json.dump(labels_stat, f)

            def test_step(x, y, writer=None, all_data=False):
                '''
                Evaluates model on a dev set
                '''
                size = len(y)

                accuracies = []
                all_predictions = []
                for begin in xrange(0, size, FLAGS.batch_size):
                    end = begin + FLAGS.batch_size
                    if end < size:
                        feed_dict = {
                            cnn.input_x: x[begin:end, :],
                            cnn.input_y: y[begin:end, :],
                            cnn.dropout_keep_prob: 1.0
                        }
                    else:
                        feed_dict = {
                            cnn.input_x: x[-FLAGS.batch_size:, :],
                            cnn.input_y: y[-FLAGS.batch_size:, :],
                            cnn.dropout_keep_prob: 1.0
                        }

                    summaries, step, accuracy, batch_predictions = sess.run([
                        dev_summary_op, global_step, cnn.accuracy,
                        cnn.predictions
                    ], feed_dict)

                    all_predictions = np.concatenate(
                        [all_predictions, batch_predictions])
                    accuracies.append(accuracy)
                    if writer:
                        writer.add_summary(summaries, step)

                class_accuracy_stat(all_predictions, np.argmax(y, axis=1),
                                    all_data)
                accuracy = sum(accuracies) / len(accuracies)
                time_str = datetime.datetime.now().isoformat()
                print('%s: step %8d, acc %2.6f' % (time_str, step, accuracy))

            # Generate batches
            batches = _batch_iter(x_train, y_train, FLAGS.batch_size,
                                  FLAGS.num_epochs)
            # Training loop. For each batch...
            for batch in batches:
                x_batch, y_batch = batch
                train_step(x_batch, y_batch)
                current_step = tf.train.global_step(sess, global_step)
                if current_step % FLAGS.evaluate_every == 0:
                    print('\nValidation:')
                    validate_step(x_test, y_test, writer=dev_summary_writer)
                    print('')
                if current_step % FLAGS.checkpoint_every == 0:
                    path = saver.save(sess,
                                      checkpoint_prefix,
                                      global_step=current_step)
                    print('Saved model checkpoint to {}\n'.format(path))

            print('\nTest:')
            test_step(x_test, y_test, writer=dev_summary_writer)
            test_step(np.concatenate((x_train, x_test), axis=0),
                      np.concatenate((y_train, y_test), axis=0),
                      writer=dev_summary_writer,
                      all_data=True)
            print('')
def train_cnn():
    """Training CNN model."""

    # Load sentences, labels, and training parameters
    logger.info("✔︎ Loading data...")

    logger.info("✔︎ Training data processing...")
    train_data = dh.load_data_and_labels(FLAGS.training_data_file,
                                         FLAGS.num_classes,
                                         FLAGS.embedding_dim,
                                         data_aug_flag=False)

    logger.info("✔︎ Validation data processing...")
    val_data = dh.load_data_and_labels(FLAGS.validation_data_file,
                                       FLAGS.num_classes,
                                       FLAGS.embedding_dim,
                                       data_aug_flag=False)

    logger.info("Recommended padding Sequence length is: {0}".format(
        FLAGS.pad_seq_len))

    logger.info("✔︎ Training data padding...")
    x_train, y_train = dh.pad_data(train_data, FLAGS.pad_seq_len)

    logger.info("✔︎ Validation data padding...")
    x_val, y_val = dh.pad_data(val_data, FLAGS.pad_seq_len)

    # Build vocabulary
    VOCAB_SIZE = dh.load_vocab_size(FLAGS.embedding_dim)
    pretrained_word2vec_matrix = dh.load_word2vec_matrix(
        VOCAB_SIZE, FLAGS.embedding_dim)

    # Build a graph and cnn object
    with tf.Graph().as_default():
        session_conf = tf.ConfigProto(
            allow_soft_placement=FLAGS.allow_soft_placement,
            log_device_placement=FLAGS.log_device_placement)
        session_conf.gpu_options.allow_growth = FLAGS.gpu_options_allow_growth
        sess = tf.Session(config=session_conf)
        with sess.as_default():
            cnn = TextCNN(sequence_length=FLAGS.pad_seq_len,
                          num_classes=FLAGS.num_classes,
                          vocab_size=VOCAB_SIZE,
                          fc_hidden_size=FLAGS.fc_hidden_size,
                          embedding_size=FLAGS.embedding_dim,
                          embedding_type=FLAGS.embedding_type,
                          filter_sizes=list(
                              map(int, FLAGS.filter_sizes.split(','))),
                          num_filters=FLAGS.num_filters,
                          l2_reg_lambda=FLAGS.l2_reg_lambda,
                          pretrained_embedding=pretrained_word2vec_matrix)

            # Define training procedure
            with tf.control_dependencies(
                    tf.get_collection(tf.GraphKeys.UPDATE_OPS)):
                learning_rate = tf.train.exponential_decay(
                    learning_rate=FLAGS.learning_rate,
                    global_step=cnn.global_step,
                    decay_steps=FLAGS.decay_steps,
                    decay_rate=FLAGS.decay_rate,
                    staircase=True)
                optimizer = tf.train.AdamOptimizer(learning_rate)
                grads, vars = zip(*optimizer.compute_gradients(cnn.loss))
                grads, _ = tf.clip_by_global_norm(grads,
                                                  clip_norm=FLAGS.norm_ratio)
                train_op = optimizer.apply_gradients(
                    zip(grads, vars),
                    global_step=cnn.global_step,
                    name="train_op")

            # Keep track of gradient values and sparsity (optional)
            grad_summaries = []
            for g, v in zip(grads, vars):
                if g is not None:
                    grad_hist_summary = tf.summary.histogram(
                        "{0}/grad/hist".format(v.name), g)
                    sparsity_summary = tf.summary.scalar(
                        "{0}/grad/sparsity".format(v.name),
                        tf.nn.zero_fraction(g))
                    grad_summaries.append(grad_hist_summary)
                    grad_summaries.append(sparsity_summary)
            grad_summaries_merged = tf.summary.merge(grad_summaries)

            # Output directory for models and summaries
            if FLAGS.train_or_restore == 'R':
                MODEL = input(
                    "☛ Please input the checkpoints model you want to restore, "
                    "it should be like(1490175368): "
                )  # The model you want to restore

                while not (MODEL.isdigit() and len(MODEL) == 10):
                    MODEL = input(
                        "✘ The format of your input is illegal, please re-input: "
                    )
                logger.info(
                    "✔︎ The format of your input is legal, now loading to next step..."
                )
                out_dir = os.path.abspath(
                    os.path.join(os.path.curdir, "runs", MODEL))
                logger.info("✔︎ Writing to {0}\n".format(out_dir))
            else:
                timestamp = str(int(time.time()))
                out_dir = os.path.abspath(
                    os.path.join(os.path.curdir, "runs", timestamp))
                logger.info("✔︎ Writing to {0}\n".format(out_dir))

            checkpoint_dir = os.path.abspath(
                os.path.join(out_dir, "checkpoints"))
            best_checkpoint_dir = os.path.abspath(
                os.path.join(out_dir, "bestcheckpoints"))

            # Summaries for loss
            loss_summary = tf.summary.scalar("loss", cnn.loss)

            # Train summaries
            train_summary_op = tf.summary.merge(
                [loss_summary, grad_summaries_merged])
            train_summary_dir = os.path.join(out_dir, "summaries", "train")
            train_summary_writer = tf.summary.FileWriter(
                train_summary_dir, sess.graph)

            # Validation summaries
            validation_summary_op = tf.summary.merge([loss_summary])
            validation_summary_dir = os.path.join(out_dir, "summaries",
                                                  "validation")
            validation_summary_writer = tf.summary.FileWriter(
                validation_summary_dir, sess.graph)

            saver = tf.train.Saver(tf.global_variables(),
                                   max_to_keep=FLAGS.num_checkpoints)
            best_saver = cm.BestCheckpointSaver(save_dir=best_checkpoint_dir,
                                                num_to_keep=3,
                                                maximize=True)

            if FLAGS.train_or_restore == 'R':
                # Load cnn model
                logger.info("✔︎ Loading model...")
                checkpoint_file = tf.train.latest_checkpoint(checkpoint_dir)
                logger.info(checkpoint_file)

                # Load the saved meta graph and restore variables
                saver = tf.train.import_meta_graph(
                    "{0}.meta".format(checkpoint_file))
                saver.restore(sess, checkpoint_file)
            else:
                if not os.path.exists(checkpoint_dir):
                    os.makedirs(checkpoint_dir)
                sess.run(tf.global_variables_initializer())
                sess.run(tf.local_variables_initializer())

                # Embedding visualization config
                config = projector.ProjectorConfig()
                embedding_conf = config.embeddings.add()
                embedding_conf.tensor_name = "embedding"
                embedding_conf.metadata_path = FLAGS.metadata_file

                projector.visualize_embeddings(train_summary_writer, config)
                projector.visualize_embeddings(validation_summary_writer,
                                               config)

                # Save the embedding visualization
                saver.save(
                    sess, os.path.join(out_dir, "embedding", "embedding.ckpt"))

            current_step = sess.run(cnn.global_step)

            def train_step(x_batch, y_batch):
                """A single training step"""
                feed_dict = {
                    cnn.input_x: x_batch,
                    cnn.input_y: y_batch,
                    cnn.dropout_keep_prob: FLAGS.dropout_keep_prob,
                    cnn.is_training: True
                }
                _, step, summaries, loss = sess.run(
                    [train_op, cnn.global_step, train_summary_op, cnn.loss],
                    feed_dict)
                logger.info("step {0}: loss {1:g}".format(step, loss))
                train_summary_writer.add_summary(summaries, step)

            def validation_step(x_val, y_val, writer=None):
                """Evaluates model on a validation set"""
                batches_validation = dh.batch_iter(list(zip(x_val, y_val)),
                                                   FLAGS.batch_size, 1)

                # Predict classes by threshold or topk ('ts': threshold; 'tk': topk)
                eval_counter, eval_loss = 0, 0.0

                eval_pre_tk = [0.0] * FLAGS.top_num
                eval_rec_tk = [0.0] * FLAGS.top_num
                eval_F_tk = [0.0] * FLAGS.top_num

                true_onehot_labels = []
                predicted_onehot_scores = []
                predicted_onehot_labels_ts = []
                predicted_onehot_labels_tk = [[] for _ in range(FLAGS.top_num)]

                for batch_validation in batches_validation:
                    x_batch_val, y_batch_val = zip(*batch_validation)
                    feed_dict = {
                        cnn.input_x: x_batch_val,
                        cnn.input_y: y_batch_val,
                        cnn.dropout_keep_prob: 1.0,
                        cnn.is_training: False
                    }
                    step, summaries, scores, cur_loss = sess.run([
                        cnn.global_step, validation_summary_op, cnn.scores,
                        cnn.loss
                    ], feed_dict)

                    # Prepare for calculating metrics
                    for i in y_batch_val:
                        true_onehot_labels.append(i)
                    for j in scores:
                        predicted_onehot_scores.append(j)

                    # Predict by threshold
                    batch_predicted_onehot_labels_ts = \
                        dh.get_onehot_label_threshold(scores=scores, threshold=FLAGS.threshold)

                    for k in batch_predicted_onehot_labels_ts:
                        predicted_onehot_labels_ts.append(k)

                    # Predict by topK
                    for top_num in range(FLAGS.top_num):
                        batch_predicted_onehot_labels_tk = dh.get_onehot_label_topk(
                            scores=scores, top_num=top_num + 1)

                        for i in batch_predicted_onehot_labels_tk:
                            predicted_onehot_labels_tk[top_num].append(i)

                    eval_loss = eval_loss + cur_loss
                    eval_counter = eval_counter + 1

                    if writer:
                        writer.add_summary(summaries, step)

                eval_loss = float(eval_loss / eval_counter)

                # Calculate Precision & Recall & F1 (threshold & topK)
                eval_pre_ts = precision_score(
                    y_true=np.array(true_onehot_labels),
                    y_pred=np.array(predicted_onehot_labels_ts),
                    average='micro')
                eval_rec_ts = recall_score(
                    y_true=np.array(true_onehot_labels),
                    y_pred=np.array(predicted_onehot_labels_ts),
                    average='micro')
                eval_F_ts = f1_score(
                    y_true=np.array(true_onehot_labels),
                    y_pred=np.array(predicted_onehot_labels_ts),
                    average='micro')

                for top_num in range(FLAGS.top_num):
                    eval_pre_tk[top_num] = precision_score(
                        y_true=np.array(true_onehot_labels),
                        y_pred=np.array(predicted_onehot_labels_tk[top_num]),
                        average='micro')
                    eval_rec_tk[top_num] = recall_score(
                        y_true=np.array(true_onehot_labels),
                        y_pred=np.array(predicted_onehot_labels_tk[top_num]),
                        average='micro')
                    eval_F_tk[top_num] = f1_score(
                        y_true=np.array(true_onehot_labels),
                        y_pred=np.array(predicted_onehot_labels_tk[top_num]),
                        average='micro')

                # Calculate the average AUC
                eval_auc = roc_auc_score(
                    y_true=np.array(true_onehot_labels),
                    y_score=np.array(predicted_onehot_scores),
                    average='micro')
                # Calculate the average PR
                eval_prc = average_precision_score(
                    y_true=np.array(true_onehot_labels),
                    y_score=np.array(predicted_onehot_scores),
                    average='micro')

                return eval_loss, eval_auc, eval_prc, eval_rec_ts, eval_pre_ts, eval_F_ts, \
                       eval_rec_tk, eval_pre_tk, eval_F_tk

            # Generate batches
            batches_train = dh.batch_iter(list(zip(x_train, y_train)),
                                          FLAGS.batch_size, FLAGS.num_epochs)

            num_batches_per_epoch = int(
                (len(x_train) - 1) / FLAGS.batch_size) + 1

            # Training loop. For each batch...
            for batch_train in batches_train:
                x_batch_train, y_batch_train = zip(*batch_train)
                train_step(x_batch_train, y_batch_train)
                current_step = tf.train.global_step(sess, cnn.global_step)

                if current_step % FLAGS.evaluate_every == 0:
                    logger.info("\nEvaluation:")
                    eval_loss, eval_auc, eval_prc, \
                    eval_rec_ts, eval_pre_ts, eval_F_ts, eval_rec_tk, eval_pre_tk, eval_F_tk = \
                        validation_step(x_val, y_val, writer=validation_summary_writer)

                    logger.info(
                        "All Validation set: Loss {0:g} | AUC {1:g} | AUPRC {2:g}"
                        .format(eval_loss, eval_auc, eval_prc))

                    # Predict by threshold
                    logger.info(
                        "☛ Predict by threshold: Precision {0:g}, Recall {1:g}, F {2:g}"
                        .format(eval_pre_ts, eval_rec_ts, eval_F_ts))

                    # Predict by topK
                    logger.info("☛ Predict by topK:")
                    for top_num in range(FLAGS.top_num):
                        logger.info(
                            "Top{0}: Precision {1:g}, Recall {2:g}, F {3:g}".
                            format(top_num + 1, eval_pre_tk[top_num],
                                   eval_rec_tk[top_num], eval_F_tk[top_num]))
                    best_saver.handle(eval_prc, sess, current_step)
                if current_step % FLAGS.checkpoint_every == 0:
                    checkpoint_prefix = os.path.join(checkpoint_dir, "model")
                    path = saver.save(sess,
                                      checkpoint_prefix,
                                      global_step=current_step)
                    logger.info(
                        "✔︎ Saved model checkpoint to {0}\n".format(path))
                if current_step % num_batches_per_epoch == 0:
                    current_epoch = current_step // num_batches_per_epoch
                    logger.info(
                        "✔︎ Epoch {0} has finished!".format(current_epoch))

    logger.info("✔︎ Done.")
예제 #17
0
def train_with_dev(x_train, x_dev, y_train, y_dev, vocab_processor):
    with tf.Graph().as_default():
        session_conf = tf.ConfigProto(
            allow_soft_placement=FLAGS.allow_soft_placement,
            log_device_placement=FLAGS.log_device_placement)
        sess = tf.Session(config=session_conf)
        with sess.as_default():
            print('modeling...')
            cnn = TextCNN(sequence_length=x_train.shape[1],
                          num_classes=y_train.shape[1],
                          vocab_size=len(vocab_processor.vocabulary_),
                          embedding_size=FLAGS.embedding_dim,
                          filter_sizes=list(
                              map(int, FLAGS.filter_sizes.split(","))),
                          num_filters=FLAGS.num_filters,
                          l2_reg_lambda=FLAGS.l2_reg_lambda)
            print("cnn model finished!")
            # Define Training procedure
            global_step = tf.Variable(0, name="global_step", trainable=False)
            optimizer = tf.train.AdamOptimizer(1e-3)
            grads_and_vars = optimizer.compute_gradients(cnn.loss)
            train_op = optimizer.apply_gradients(grads_and_vars,
                                                 global_step=global_step)

            # Keep track of gradient values and sparsity (optional)
            grad_summaries = []
            for g, v in grads_and_vars:
                if g is not None:
                    grad_hist_summary = tf.summary.histogram(
                        "{}/grad/hist".format(v.name), g)
                    sparsity_summary = tf.summary.scalar(
                        "{}/grad/sparsity".format(v.name),
                        tf.nn.zero_fraction(g))
                    grad_summaries.append(grad_hist_summary)
                    grad_summaries.append(sparsity_summary)
            grad_summaries_merged = tf.summary.merge(grad_summaries)

            # Output directory for models and summaries
            timestamp = str(int(time.time())) + "summaries"
            out_dir = os.path.abspath(
                os.path.join(os.path.curdir, "model", timestamp))
            print("Writing to {}\n".format(out_dir))

            # Summaries for loss and accuracy
            loss_summary = tf.summary.scalar("loss", cnn.loss)
            acc_summary = tf.summary.scalar("accuracy", cnn.accuracy)

            # Train Summaries
            train_summary_op = tf.summary.merge(
                [loss_summary, acc_summary, grad_summaries_merged])
            train_summary_dir = os.path.join(out_dir, "train")
            train_summary_writer = tf.summary.FileWriter(
                train_summary_dir, sess.graph)

            # Dev summaries
            dev_summary_op = tf.summary.merge([loss_summary, acc_summary])
            dev_summary_dir = os.path.join(out_dir, "dev")
            dev_summary_writer = tf.summary.FileWriter(dev_summary_dir,
                                                       sess.graph)

            # create saver for model
            saver = tf.train.Saver(tf.global_variables(),
                                   max_to_keep=FLAGS.num_checkpoints)
            # initialize model
            ckpt = tf.train.get_checkpoint_state(FLAGS.model_dir)
            if ckpt and tf.train.checkpoint_exists(ckpt.model_checkpoint_path):
                print("Reading model parameters from %s" %
                      ckpt.model_checkpoint_path)
                saver.restore(sess, ckpt.model_checkpoint_path)
            else:
                print("Initialize model with fresh parameters.")
                sess.run(tf.global_variables_initializer())

            def train_step(x_batch, y_batch):
                """
                A single training step
                """
                feed_dict = {
                    cnn.input_x: x_batch,
                    cnn.input_y: y_batch,
                    cnn.dropout_keep_prob: FLAGS.dropout_keep_prob
                }
                _, step, summaries, loss, accuracy = sess.run([
                    train_op, global_step, train_summary_op, cnn.loss,
                    cnn.accuracy
                ], feed_dict)
                time_str = datetime.datetime.now().isoformat()
                if step % 100 == 0:
                    print("{}: step {}, loss {:g}, acc {:g}".format(
                        time_str, step, loss, accuracy))
                    train_summary_writer.add_summary(summaries, step)

            def dev_step(x_batch, y_batch, writer=None):
                """
                Evaluates model on a dev set
                """
                feed_dict = {
                    cnn.input_x: x_batch,
                    cnn.input_y: y_batch,
                    cnn.dropout_keep_prob: 1.0
                }
                step, summaries, loss, accuracy = sess.run(
                    [global_step, dev_summary_op, cnn.loss, cnn.accuracy],
                    feed_dict)
                time_str = datetime.datetime.now().isoformat()
                print("{}: step {}, loss {:g}, acc {:g}".format(
                    time_str, step, loss, accuracy))
                if writer:
                    writer.add_summary(summaries, step)

            # Generate batches
            batches = data_helpers.batch_iter(list(zip(x_train, y_train)),
                                              FLAGS.batch_size,
                                              FLAGS.num_epochs)
            # Training loop. For each batch...
            debug_step = -1  # 100
            for batch in batches:
                x_batch, y_batch = zip(*batch)
                train_step(x_batch, y_batch)
                current_step = tf.train.global_step(sess, global_step)
                if current_step % FLAGS.evaluate_every == 0:
                    print("\nEvaluation:")
                    dev_step(x_dev, y_dev, writer=dev_summary_writer)
                    print("")
                if current_step % FLAGS.checkpoint_every == 0:
                    checkpoint_path = os.path.join(FLAGS.model_dir,
                                                   "textcnn.ckpt")
                    path = saver.save(sess,
                                      checkpoint_path,
                                      global_step=current_step)
                    print("Saved model checkpoint to {}\n".format(path))
                if current_step == debug_step:
                    print('training is over caused by debug_step')
                    break
예제 #18
0
def train_cnn(training_config_file=None):

    params = json.loads(open(training_config_file).read())

    # Data Preparation
    # ==================================================

    # Load data
    print("Loading data...")
    # x_text, y = data_helpers.load_data_and_labels(params["positive_data_file"], params["negative_data_file"])
    x_text, y = data_helpers.load_text_and_path_label(params["file_level_train_file"], params["file_level_label"])

    # Build vocabulary
    max_document_length = max([len(x.split(" ")) for x in x_text])
    vocab_processor = learn.preprocessing.VocabularyProcessor(max_document_length)
    x = np.array(list(vocab_processor.fit_transform(x_text)))

    # Randomly shuffle data
    np.random.seed(10)
    shuffle_indices = np.random.permutation(np.arange(len(y)))
    x_shuffled = x[shuffle_indices]
    y_shuffled = y[shuffle_indices]

    # Split train/test set
    # TODO: This is very crude, should use cross-validation
    dev_sample_index = -1 * int(params["dev_sample_percentage"] * float(len(y)))
    x_train, x_dev = x_shuffled[:dev_sample_index], x_shuffled[dev_sample_index:]
    y_train, y_dev = y_shuffled[:dev_sample_index], y_shuffled[dev_sample_index:]

    del x, y, x_shuffled, y_shuffled

    print("Vocabulary Size: {:d}".format(len(vocab_processor.vocabulary_)))
    print("Train/Dev split: {:d}/{:d}".format(len(y_train), len(y_dev)))


    # Training
    # ==================================================

    with tf.Graph().as_default():
        session_conf = tf.ConfigProto(
          allow_soft_placement=params["allow_soft_placement"],
          log_device_placement=params["log_device_placement"])
        sess = tf.Session(config=session_conf)
        with sess.as_default():
            cnn = TextCNN(
                sequence_length=x_train.shape[1],
                num_classes=y_train.shape[1],
                vocab_size=len(vocab_processor.vocabulary_),
                embedding_size=params["embedding_dim"],
                filter_sizes=list(map(int, params["filter_sizes"].split(","))),
                num_filters=params["num_filters"],
                l2_reg_lambda=params["l2_reg_lambda"])

            # Define Training procedure
            global_step = tf.Variable(0, name="global_step", trainable=False)
            optimizer = tf.train.AdamOptimizer(1e-3)
            grads_and_vars = optimizer.compute_gradients(cnn.loss)
            train_op = optimizer.apply_gradients(grads_and_vars, global_step=global_step)

            # Keep track of gradient values and sparsity (optional)
            grad_summaries = []
            for g, v in grads_and_vars:
                if g is not None:
                    grad_hist_summary = tf.summary.histogram("{}/grad/hist".format(v.name), g)
                    sparsity_summary = tf.summary.scalar("{}/grad/sparsity".format(v.name), tf.nn.zero_fraction(g))
                    grad_summaries.append(grad_hist_summary)
                    grad_summaries.append(sparsity_summary)
            grad_summaries_merged = tf.summary.merge(grad_summaries)

            # Output directory for models and summaries
            timestamp = str(int(time.time()))
            out_dir = os.path.abspath(os.path.join(os.path.curdir, "runs", timestamp))
            print("Writing to {}\n".format(out_dir))

            # Summaries for loss and accuracy
            loss_summary = tf.summary.scalar("loss", cnn.loss)
            acc_summary = tf.summary.scalar("accuracy", cnn.accuracy)

            # Train Summaries
            train_summary_op = tf.summary.merge([loss_summary, acc_summary, grad_summaries_merged])
            train_summary_dir = os.path.join(out_dir, "summaries", "train")
            train_summary_writer = tf.summary.FileWriter(train_summary_dir, sess.graph)

            # Dev summaries
            dev_summary_op = tf.summary.merge([loss_summary, acc_summary])
            dev_summary_dir = os.path.join(out_dir, "summaries", "dev")
            dev_summary_writer = tf.summary.FileWriter(dev_summary_dir, sess.graph)

            # Checkpoint directory. Tensorflow assumes this directory already exists so we need to create it
            checkpoint_dir = os.path.abspath(os.path.join(out_dir, "checkpoints"))
            checkpoint_prefix = os.path.join(checkpoint_dir, "model")
            if not os.path.exists(checkpoint_dir):
                os.makedirs(checkpoint_dir)
            saver = tf.train.Saver(tf.global_variables(), max_to_keep=params["num_checkpoints"])

            # Write vocabulary
            vocab_processor.save(os.path.join(out_dir, "vocab"))

            # Initialize all variables
            sess.run(tf.global_variables_initializer())

            def train_step(x_batch, y_batch):
                """
                A single training step
                """
                feed_dict = {
                  cnn.input_x: x_batch,
                  cnn.input_y: y_batch,
                  cnn.dropout_keep_prob: params["dropout_keep_prob"]
                }
                _, step, summaries, loss, accuracy = sess.run(
                    [train_op, global_step, train_summary_op, cnn.loss, cnn.accuracy],
                    feed_dict)
                time_str = datetime.datetime.now().isoformat()
                print("{}: step {}, loss {:g}, acc {:g}".format(time_str, step, loss, accuracy))
                train_summary_writer.add_summary(summaries, step)

            def dev_step(x_batch, y_batch, writer=None):
                """
                Evaluates model on a dev set
                """
                feed_dict = {
                  cnn.input_x: x_batch,
                  cnn.input_y: y_batch,
                  cnn.dropout_keep_prob: 1.0
                }
                step, summaries, loss, accuracy = sess.run(
                    [global_step, dev_summary_op, cnn.loss, cnn.accuracy],
                    feed_dict)
                time_str = datetime.datetime.now().isoformat()
                print("{}: step {}, loss {:g}, acc {:g}".format(time_str, step, loss, accuracy))
                if writer:
                    writer.add_summary(summaries, step)

            # Generate batches
            batches = data_helpers.batch_iter(
                list(zip(x_train, y_train)), params["batch_size"], params["num_epochs"])
            # Training loop. For each batch...
            for batch in batches:
                x_batch, y_batch = zip(*batch)
                train_step(x_batch, y_batch)
                current_step = tf.train.global_step(sess, global_step)
                if current_step % params["evaluate_every"] == 0:
                    print("\nEvaluation:")
                    dev_step(x_dev, y_dev, writer=dev_summary_writer)
                    print("")
                if current_step % params["checkpoint_every"] == 0:
                    path = saver.save(sess, checkpoint_prefix, global_step=current_step)
                    print("Saved model checkpoint to {}\n".format(path))
예제 #19
0
#print("Train/Dev split: {:d}/{:d}".format(len(y_train), len(y_test)))

#Training
#===============================
with tf.Graph().as_default():
    session_conf = tf.ConfigProto(
        allow_soft_placement = allow_soft_placement,
        log_device_placement = log_device_placement
    )
    sess = tf.Session(config=session_conf)
    with sess.as_default():
        cnn = TextCNN(
            sequence_size = x_train.shape[1],
            num_classes = y_train.shape[1],
            vocab_size = len(vocab.vocabulary_),
            embedding_size = embedding_dim,
            filter_sizes = filter_sizes,
            num_filters = num_filters,
            l2_reg_lambda = l2_reg_lambda
        )
        #Define training procedure
        global_step = tf.Variable(0, name="global_step", trainable=False)
        #print(global_step)
        optimizer = tf.train.AdamOptimizer(1e-3)
        grads_and_vars = optimizer.compute_gradients(cnn.loss)
        train_op = optimizer.apply_gradients(grads_and_vars, global_step=global_step)

        #记录梯度变化和稀疏度(可选)
        grad_summaries = []
        for g, v in grads_and_vars:
            if g is not None:
예제 #20
0
def train(x_train, y_train, vocab_processor, x_dev, y_dev):
    # Training
    # ==================================================

    with tf.Graph().as_default():
        session_conf = tf.ConfigProto(
            allow_soft_placement=FLAGS.allow_soft_placement,
            log_device_placement=FLAGS.log_device_placement)
        sess = tf.Session(config=session_conf)
        with sess.as_default():
            cnn = TextCNN(sequence_length=x_train.shape[1],
                          num_classes=y_train.shape[1],
                          vocab_size=len(vocab_processor.vocabulary_),
                          embedding_size=FLAGS.embedding_dim,
                          filter_sizes=list(
                              map(int, FLAGS.filter_sizes.split(","))),
                          num_filters=FLAGS.num_filters,
                          l2_reg_lambda=FLAGS.l2_reg_lambda)

            # Define Training procedure
            global_step = tf.Variable(0, name="global_step", trainable=False)
            optimizer = tf.train.AdamOptimizer(1e-3)
            grads_and_vars = optimizer.compute_gradients(cnn.loss)
            train_op = optimizer.apply_gradients(grads_and_vars,
                                                 global_step=global_step)

            # Output directory for models and summaries
            timestamp = str(int(time.time()))
            out_dir = os.path.abspath(
                os.path.join(os.path.curdir, "runs", timestamp))
            print("Writing to {}\n".format(out_dir))

            # Checkpoint directory. Tensorflow assumes this directory already exists so we need to create it
            checkpoint_dir = os.path.abspath(
                os.path.join(out_dir, "checkpoints"))
            checkpoint_prefix = os.path.join(checkpoint_dir, "model")
            if not os.path.exists(checkpoint_dir):
                os.makedirs(checkpoint_dir)
            #max_to_keep:lưu num_checkpoints checkpoints gần nhất để lưu
            saver = tf.train.Saver(tf.global_variables(),
                                   max_to_keep=FLAGS.num_checkpoints)

            # Write vocabulary
            vocab_processor.save(os.path.join(out_dir, "vocab.txt"))

            # Initialize all variables
            sess.run(tf.global_variables_initializer())

            def train_step(x_batch, y_batch):
                """
                A single training step
                """
                feed_dict = {
                    cnn.input_x: x_batch,
                    cnn.input_y: y_batch,
                    cnn.dropout_keep_prob: FLAGS.dropout_keep_prob
                }
                _, step, loss, accuracy = sess.run(
                    [train_op, global_step, cnn.loss, cnn.accuracy], feed_dict)
                time_str = datetime.datetime.now().isoformat()
                print("{}: step {}, loss {:g}, acc {:g}".format(
                    time_str, step, loss, accuracy))

            # train_summary_writer.add_summary(summaries, step)
            #if step == 801:
            #    return 0
            #else:
            #   return 1

            def dev_step(x_batch, y_batch):
                """
                Evaluates model on a dev set
                """
                feed_dict = {
                    cnn.input_x: x_batch,
                    cnn.input_y: y_batch,
                    cnn.dropout_keep_prob: 1.0
                }
                step, loss, accuracy = sess.run(
                    [global_step, cnn.loss, cnn.accuracy], feed_dict)
                time_str = datetime.datetime.now().isoformat()
                print("{}: step {}, loss {:g}, acc {:g}".format(
                    time_str, step, loss, accuracy))

            # Generate batches
            batches = data_helpers.batch_iter(list(zip(x_train, y_train)),
                                              FLAGS.batch_size,
                                              FLAGS.num_epochs,
                                              shuffle=True)
            # Training loop. For each batch...
            for batch in batches:
                x_batch, y_batch = zip(*batch)
                if (train_step(x_batch, y_batch) == 0):
                    break
                current_step = tf.train.global_step(sess, global_step)
                if current_step % FLAGS.evaluate_every == 0:
                    print("\nEvaluation:")
                    dev_step(x_dev, y_dev)
                    print("")
                if current_step % FLAGS.checkpoint_every == 0:
                    path = saver.save(sess,
                                      checkpoint_prefix,
                                      global_step=current_step)

                    print("Saved model checkpoint to {}\n".format(path))
예제 #21
0
print("Vocabulary Size: {:d}".format(len(vocab_processor.vocabulary_)))
print("Train/Dev split: {:d}/{:d}".format(len(y_train), len(y_dev)))

# Training
# ==================================================

with tf.Graph().as_default():
    session_conf = tf.ConfigProto(
        allow_soft_placement=FLAGS.allow_soft_placement,
        log_device_placement=FLAGS.log_device_placement)
    sess = tf.Session(config=session_conf)
    with sess.as_default():
        cnn = TextCNN(sequence_length=x_train.shape[1],
                      num_classes=y_train.shape[1],
                      vocab_size=len(vocab_processor.vocabulary_),
                      embedding_size=embedding_dimension,
                      filter_sizes=list(map(int,
                                            FLAGS.filter_sizes.split(","))),
                      num_filters=FLAGS.num_filters,
                      l2_reg_lambda=FLAGS.l2_reg_lambda)

        # Define Training procedure
        global_step = tf.Variable(0, name="global_step", trainable=False)

        # optimizer = tf.train.AdamOptimizer(1e-3) # default
        # optimizer = tf.train.RMSPropOptimizer(0.001)
        optimizer = tf.train.AdadeltaOptimizer(1)

        grads_and_vars = optimizer.compute_gradients(cnn.loss)
        train_op = optimizer.apply_gradients(grads_and_vars,
                                             global_step=global_step)
예제 #22
0
def main(unused_argv):

    if FLAGS.job_name is None or FLAGS.job_name == '':
        raise ValueError('Must specify an explicit job_name !')
    else:
        print ('job_name : ' + FLAGS.job_name)
    if FLAGS.task_index is None or FLAGS.task_index == '':
        raise ValueError('Must specify an explicit task_index!')
    else:
        print ('task_index : ' + str(FLAGS.task_index))

    ps_spec = FLAGS.ps_hosts.split(',')
    worker_spec = FLAGS.worker_hosts.split(',')

    num_worker = len(worker_spec)
    print ("Number of worker = " + str(num_worker))
    print ("ps_spec = ")
    print(*ps_spec)
    print ("worker_spec = ")
    print(*worker_spec)
    cluster = tf.train.ClusterSpec({'ps': ps_spec, 'worker': worker_spec})
    print ("After defining Cluster")
    print ("Job name = " + FLAGS.job_name)
    print ("task index = " + str(FLAGS.task_index))
    # try:
    server = tf.train.Server(cluster, job_name=FLAGS.job_name, task_index=FLAGS.task_index)

    print ("After defining server")
    if FLAGS.job_name == 'ps':
        print("Parameter Server is executed")
        server.join()
    elif FLAGS.job_name == "worker":
        print("Parameter Server is executed")
        with tf.device(tf.train.replica_device_setter(
        worker_device="/job:worker/task:%d"% FLAGS.task_index,
        cluster=cluster)):
            is_chief = (FLAGS.task_index == 0)
            # Data Preparation
            # ==================================================
            
            # Load data
            print("Loading data...")
            
            x_text, y_label = data_helpers.load_data_and_labels(FLAGS.data_file)
            
            # Build vocabulary
            max_document_length = max([len(x.split(" ")) for x in x_text])
            
            vocab_processor = learn.preprocessing.VocabularyProcessor(max_document_length)
            x = np.array(list(vocab_processor.fit_transform(x_text)))
            y = np.array(y_label)
            
            # Randomly shuffle data
            np.random.seed(10)
            shuffle_indices = np.random.permutation(np.arange(len(y)))
            print(type(x),type(y))
            
            x_shuffled = x[shuffle_indices]
            y_shuffled = y[shuffle_indices]
            
            # Split train/test set
            # TODO: This is very crude, should use cross-validation
            dev_sample_index = -1 * int(FLAGS.dev_sample_percentage * float(len(y)))
            x_train, x_dev = x_shuffled[:dev_sample_index], x_shuffled[dev_sample_index:]
            y_train, y_dev = y_shuffled[:dev_sample_index], y_shuffled[dev_sample_index:]
            print (y_train.shape)
            print("Vocabulary Size: {:d}".format(len(vocab_processor.vocabulary_)))
            print("Train/Dev split: {:d}/{:d}".format(len(y_train), len(y_dev)))
            
            # Training
            # ==================================================
            tf.MaxAcc = 0.1
            
            def copymax(path):
                shutil.copy(path, "{}.backup".format(path))
            
            
            cnn = TextCNN(
                sequence_length=x_train.shape[1],
                num_classes=y_train.shape[1],
                vocab_size=len(vocab_processor.vocabulary_),
                embedding_size=FLAGS.embedding_dim,
                filter_sizes=list(map(int, FLAGS.filter_sizes.split(","))),
                num_filters=FLAGS.num_filters,
                l2_reg_lambda=FLAGS.l2_reg_lambda)
            
            # Define Training procedure
            global_step = tf.train.get_or_create_global_step()
            optimizer = tf.train.AdamOptimizer(1e-3)
            grads_and_vars = optimizer.compute_gradients(cnn.loss)
            train_op = optimizer.apply_gradients(grads_and_vars, global_step=global_step)

            # Keep track of gradient values and sparsity (optional)

            # Output directory for models and summaries
            timestamp = str(int(time.time()))
            out_dir = FLAGS.out_dir
            print("Writing to {}\n".format(out_dir))
            
            # Checkpoint directory. Tensorflow assumes this directory already exists so we need to create it
            checkpoint_dir = os.path.abspath(os.path.join(out_dir, "checkpoints"))
            checkpoint_prefix = os.path.join(checkpoint_dir, "model")
            MaxAcc_prefi=os.path.join(checkpoint_dir, "MAXACCmodel")
            if not os.path.exists(checkpoint_dir):
                os.makedirs(checkpoint_dir)
            saver = tf.train.Saver(tf.global_variables(), max_to_keep=FLAGS.num_checkpoints)

            
            # Write vocabulary
            vocab_processor.save(os.path.join(out_dir, "vocab"))
            
            # Initialize all variables
            session_conf = tf.ConfigProto(
              allow_soft_placement=FLAGS.allow_soft_placement,
              log_device_placement=FLAGS.log_device_placement)

            init_op=tf.global_variables_initializer()
            sv = tf.train.Supervisor(is_chief=(FLAGS.task_index == 0),
                                     logdir=out_dir,
                                     init_op=init_op,
                                     saver=saver,
                                     global_step=global_step
                                     )
            sess = sv.prepare_or_wait_for_session(server.target, config=session_conf)
            
            # Generate batches
            batches = data_helpers.batch_iter(
                list(zip(x_train, y_train)), FLAGS.batch_size, FLAGS.num_epochs)
            # Training loop. For each batch...
            for batch in batches:
                x_batch, y_batch = zip(*batch)

                _, current_step, loss, accuracy = sess.run(
                    [train_op, global_step, cnn.loss, cnn.accuracy],
                    feed_dict={cnn.input_x: x_batch,cnn.input_y: y_batch,cnn.dropout_keep_prob: FLAGS.dropout_keep_prob})
                time_str = datetime.datetime.now().isoformat()
                if current_step % 10 ==0:
                    print("{}: step {}, loss {:g}, acc {:g}".format(time_str, current_step, loss, accuracy))

                if current_step % FLAGS.evaluate_every == 0:
                    print("\nEvaluation:")

                    loss, accuracy = sess.run(
                        [cnn.loss, cnn.accuracy],
                        feed_dict={cnn.input_x: x_batch,cnn.input_y: y_batch,cnn.dropout_keep_prob: 1.0})
                    time_str = datetime.datetime.now().isoformat()
                    result = "{}: step {}, loss {:g}, acc {:g}".format(time_str, current_step, loss, accuracy)
                    print(result)

                    with open(os.path.join(out_dir, "result"), 'a+') as f:
                        f.write("{}\n".format(result))

                    if tf.MaxAcc<accuracy:
                        tf.MaxAcc=accuracy
                        ifsave=True
                    else:
                        ifsave=False
                    print("Max acc:{}".format(tf.MaxAcc))

                    print("")
                if current_step % FLAGS.checkpoint_every == 0:
                    path = saver.save(sess, checkpoint_prefix, global_step=current_step)
                    print("Saved model checkpoint to {}\n".format(path))
                    if ifsave:
                        path = saver.save(sess, MaxAcc_prefi, None)
                        copymax("{}.data-00000-of-00001".format(path))
                        copymax("{}.index".format(path))
                        copymax("{}.meta".format(path))
예제 #23
0
def make_graph(x_train, y_train, vocab_processor, x_dev, y_dev):

    cnn = TextCNN(sequence_length=x_train.shape[1],
                  num_classes=y_train.shape[1],
                  vocab_size=len(vocab_processor.vocabulary_),
                  embedding_size=68,
                  filter_sizes=[3, 4, 5],
                  num_filters=63,
                  l2_reg_lambda=0.0)

    with tf.Session() as sess:
        graph = tf.get_default_graph()
        print(tf.trainable_variables())

        #sess.run(tf.global_variables_initializer())
        #print(sess.run(tf.get_default_graph().get_tensor_by_name('output/W:0')).shape)
        restore_variables = {
            v.name.split(':')[0]: v
            for v in tf.trainable_variables()
            if (v.name.find('conv-maxpool') != -1) or (
                v.name.find('embedding') != -1)
        }
        #restore_variables = {v.name.split(':')[0]:v for v in tf.trainable_variables() if (v.name.find('conv-maxpool')!=-1) or (v.name.find('embedding')!=-1)}
        print(restore_variables)
        saver = tf.train.Saver(restore_variables, max_to_keep=5)

        saver.restore(
            sess,
            tf.train.latest_checkpoint(
                './pruned_data_by_variance_filters_corporate_round_2/checkpoints/'
            ))
        # print([n.name for n in tf.get_default_graph().as_graph_def().node])

        global_step = tf.Variable(0, name="global_step", trainable=False)
        optimizer = tf.train.AdamOptimizer(1e-3)
        grads_and_vars = optimizer.compute_gradients(cnn.loss)
        train_op = optimizer.apply_gradients(grads_and_vars,
                                             global_step=global_step)
        tf.add_to_collection("optimizer", train_op)
        sess.run(tf.variables_initializer(optimizer.variables()))

        grad_summaries = []
        for g, v in grads_and_vars:
            if g is not None:
                grad_hist_summary = tf.summary.histogram(
                    "{}/grad/hist".format(v.name), g)
                sparsity_summary = tf.summary.scalar(
                    "{}/grad/sparsity".format(v.name), tf.nn.zero_fraction(g))
                grad_summaries.append(grad_hist_summary)
                grad_summaries.append(sparsity_summary)
        grad_summaries_merged = tf.summary.merge(grad_summaries)

        # Output directory for models and summaries
        timestamp = str(int(time.time()))
        out_dir = os.path.abspath(
            os.path.join(os.path.curdir, "pruned_by_variance", timestamp))
        print("Writing to {}\n".format(out_dir))

        # Summaries for loss and accuracy
        loss_summary = tf.summary.scalar("loss", cnn.loss)
        acc_summary = tf.summary.scalar("accuracy", cnn.accuracy)

        # Train Summaries
        train_summary_op = tf.summary.merge(
            [loss_summary, acc_summary, grad_summaries_merged])
        train_summary_dir = os.path.join(out_dir, "summaries", "train")
        train_summary_writer = tf.summary.FileWriter(train_summary_dir,
                                                     sess.graph)

        # Dev summaries
        dev_summary_op = tf.summary.merge([loss_summary, acc_summary])
        dev_summary_dir = os.path.join(out_dir, "summaries", "dev")
        dev_summary_writer = tf.summary.FileWriter(dev_summary_dir, sess.graph)

        checkpoint_dir = os.path.abspath(os.path.join(out_dir, "checkpoints"))
        checkpoint_prefix = os.path.join(checkpoint_dir, "model")

        if not os.path.exists(checkpoint_dir):
            os.makedirs(checkpoint_dir)
        saver = tf.train.Saver(restore_variables, max_to_keep=5)

        #sess.run(tf.variables_initializer(optimizer.variables()))
        sess.run(tf.global_variables_initializer())

        def train_step(x_batch, y_batch):
            feed_dict = {
                cnn.input_x: x_batch,
                cnn.input_y: y_batch,
                cnn.dropout_keep_prob: 0.25
            }
            _, step, summaries, loss, accuracy = sess.run([
                train_op, global_step, train_summary_op, cnn.loss, cnn.accuracy
            ], feed_dict)
            time_str = datetime.datetime.now().isoformat()

            print("{}: step {}, loss {:g}, acc {:g}".format(
                time_str, step, loss, accuracy))
            train_summary_writer.add_summary(summaries, step)

        def dev_step(x_batch, y_batch, writer=None):

            feed_dict = {
                cnn.input_x: x_batch,
                cnn.input_y: y_batch,
                cnn.dropout_keep_prob: 1.0
            }
            step, summaries, loss, accuracy = sess.run(
                [global_step, dev_summary_op, cnn.loss, cnn.accuracy],
                feed_dict)
            time_str = datetime.datetime.now().isoformat()

            print("{}: step {}, loss {:g}, acc {:g},".format(
                time_str, step, loss, accuracy))
            if writer:
                writer.add_summary(summaries, step)

                # Generate batches

        batches = data_helpers.batch_iter(list(zip(x_train, y_train)), 64, 10)
        # Training loop. For each batch...
        for batch in batches:
            x_batch, y_batch = zip(*batch)
            train_step(x_batch, y_batch)
            current_step = tf.train.global_step(sess, global_step)
            if current_step % 100 == 0:
                print("\nEvaluation:")
                dev_step(x_dev, y_dev, writer=dev_summary_writer)
                print("")

                path = saver.save(sess,
                                  checkpoint_prefix,
                                  global_step=current_step)
                print("Saved model checkpoint to {}\n".format(path))
print("Vocabulary Size: {:d}".format(len(vocab_processor.vocabulary_)))
print("Train/Dev split: {:d}/{:d}".format(len(y_train), len(y_dev)))

# Training
with tf.Graph().as_default():
    session_conf = tf.ConfigProto(
        allow_soft_placement=options.allow_soft_placement,
        log_device_placement=options.log_device_placement)
    session_conf.gpu_options.allow_growth = True
    sess = tf.Session(config=session_conf)
    with sess.as_default():
        cnn = TextCNN(sequence_length=x_train.shape[1],
                      num_classes=y_train.shape[1],
                      vocab_size=len(vocab_processor.vocabulary_),
                      embedding_size=EMBEDDING_DIM,
                      filter_sizes=list(map(int, FILTER_SIZE.split(","))),
                      num_filters=NUM_FILTERS,
                      l2_reg_lambda=L2_REG)

        global_step = tf.Variable(0, name="global_step", trainable=False)
        optimizer = tf.train.AdamOptimizer(LEARNING_RATE)
        grads_and_vars = optimizer.compute_gradients(cnn.loss)
        train_op = optimizer.apply_gradients(grads_and_vars,
                                             global_step=global_step)

        timestamp = str(int(time.time()))
        out_dir = options.checkpoint_dir
        if not os.path.exists(out_dir):
            os.makedirs(out_dir)
        print("Writing to {}\n".format(out_dir))
def train_cnn():
    # Data Preparation
    # ==================================================
    if FLAGS.init_embedding_path is not None:
        embedding = np.load(FLAGS.init_embedding_path)
        print("Using pre-trained word embedding which shape is {}\n".format(
            embedding.shape))
        FLAGS.vocab_size = embedding.shape[0]
        FLAGS.embedding_size = embedding.shape[1]
    if FLAGS.init_model_path is not None:
        assert os.path.isdir(
            FLAGS.init_model_path), "init_model_path must be a directory\n"
        ckpt = tf.train.get_checkpoint_state(FLAGS.init_model_path)
        assert ckpt, "No checkpoint found in {}\n".format(
            FLAGS.init_model_path)
        assert ckpt.model_checkpoint_path, "No model_checkpoint_path found in checkpoint\n"

    # Create root directory
    timestamp = str(int(time.time()))
    root_dir = os.path.join(os.path.curdir, 'runs', 'textcnn',
                            'trained_result_' + timestamp)
    os.makedirs(root_dir)

    # Load data
    print("Loading data...\n")
    x_data = np.loadtxt(FLAGS.x_data_file)
    print(x_data.shape)
    x_data = x_data.reshape(20480, 30, 20)
    x_data = x_data.reshape(20480, 600)
    y_data = np.loadtxt(FLAGS.y_data_file)
    print(y_data.shape)
    print("data load finished")

    # Split dataset
    x_train, x_test, y_train, y_test = train_test_split(
        x_data,
        y_data,
        test_size=FLAGS.test_size,
        stratify=y_data,
        random_state=0)
    x_val, x_test, y_val, y_test = train_test_split(x_test,
                                                    y_test,
                                                    test_size=0.5,
                                                    random_state=0)
    # Training
    # ==================================================
    with tf.Graph().as_default():
        tf_config = tf.ConfigProto(
            allow_soft_placement=FLAGS.allow_soft_placement,
            log_device_placement=FLAGS.log_device_placement)
        tf_config.gpu_options.allow_growth = FLAGS.gpu_allow_growth

        with tf.Session(config=tf_config).as_default() as sess:
            cnn = TextCNN(vocab_size=FLAGS.vocab_size,
                          embedding_size=FLAGS.embedding_size,
                          sequence_length=FLAGS.sequence_length,
                          filter_sizes=list(
                              map(int, FLAGS.filter_sizes.split(","))),
                          num_filters=FLAGS.num_filters,
                          num_classes=FLAGS.num_classes,
                          learning_rate=FLAGS.learning_rate,
                          grad_clip=FLAGS.grad_clip,
                          l2_reg_lambda=FLAGS.l2_reg_lambda)

            # Output directory for models and summaries
            out_dir = os.path.abspath(root_dir)
            print("Writing to {}...\n".format(out_dir))

            # Summaries for loss and accuracy
            tf.summary.scalar("loss", cnn.loss)
            tf.summary.scalar("accuracy", cnn.accuracy)
            merged_summary = tf.summary.merge_all()

            # Summaries dictionary
            train_summary_dir = os.path.join(out_dir, 'summaries', 'train')
            val_summary_dir = os.path.join(out_dir, 'summaries', 'val')
            train_summary_writer = tf.summary.FileWriter(
                train_summary_dir, sess.graph)
            val_summary_writer = tf.summary.FileWriter(val_summary_dir,
                                                       sess.graph)

            # Checkpoint directory, will not create itself
            checkpoint_dir = os.path.abspath(
                os.path.join(out_dir, 'checkpoints'))
            checkpoint_prefix = os.path.join(checkpoint_dir, 'model.ckpt')
            if not os.path.exists(checkpoint_dir):
                os.makedirs(checkpoint_dir)
            saver = tf.train.Saver(tf.global_variables(), max_to_keep=1)

            # Initialize all variables
            sess.run(tf.global_variables_initializer())

            # Using pre-trained word embedding
            # if FLAGS.init_embedding_path is not None:
            #     sess.run(cnn.embedding.assign(embedding))
            #     del embedding

            # Continue training from saved model
            if FLAGS.init_model_path is not None:
                saver.restore(sess, ckpt.model_checkpoint_path)

            # Training start
            print("Start training...\n")
            best_at_step = 0
            best_val_accuracy = 0

            #****************************************
            cnn_feature = []
            for epoch in range(FLAGS.num_epochs):
                # Generate train batches
                train_batches = data_utils.batch_iter(
                    list(zip(x_train, y_train)), FLAGS.batch_size)
                start = time.time()
                for batch in train_batches:
                    # Training model on x_batch and y_batch
                    x_batch, y_batch = zip(*batch)
                    feed_dict = {
                        cnn.input_x: x_batch,
                        cnn.input_y: y_batch,
                        cnn.keep_prob: FLAGS.dropout_keep_prob,
                        cnn.is_training: True
                    }
                    _, global_step, train_summaries, train_loss, train_accuracy = sess.run(
                        [
                            cnn.train_op, cnn.global_step, merged_summary,
                            cnn.loss, cnn.accuracy
                        ],
                        feed_dict=feed_dict)

                    # Evaluates model on val set
                    if global_step % FLAGS.evaluate_every == 0:
                        end = time.time()
                        train_summary_writer.add_summary(
                            train_summaries, global_step)
                        feed_dict = {
                            cnn.input_x: x_val,
                            cnn.input_y: y_val,
                            cnn.keep_prob: FLAGS.dropout_keep_prob,
                            cnn.is_training: False
                        }
                        val_summaries, val_loss, val_accuracy = sess.run(
                            [merged_summary, cnn.loss, cnn.accuracy],
                            feed_dict=feed_dict)
                        val_summary_writer.add_summary(val_summaries,
                                                       global_step)
                        print(
                            "Epoch: {}, global step: {}, training speed: {:.3f}sec/batch"
                            .format(epoch, global_step,
                                    (end - start) / FLAGS.evaluate_every))
                        print(
                            "train loss: {:.3f}, train accuracy: {:.3f}, val loss: {:.3f}, val accuracy: {:.3f}\n"
                            .format(train_loss, train_accuracy, val_loss,
                                    val_accuracy))
                        # If improved, save the model
                        if val_accuracy > best_val_accuracy:
                            print(
                                "Get a best val accuracy at step {}, model saving...\n"
                                .format(global_step))
                            saver.save(sess,
                                       checkpoint_prefix,
                                       global_step=global_step)
                            best_val_accuracy = val_accuracy
                            best_at_step = global_step
                        start = time.time()

            # Rename the checkpoint
            best_model_prefix = checkpoint_prefix + '-' + str(best_at_step)
            os.rename(best_model_prefix + '.index',
                      os.path.join(checkpoint_dir, 'best_model.index'))
            os.rename(best_model_prefix + '.meta',
                      os.path.join(checkpoint_dir, 'best_model.meta'))
            os.rename(
                best_model_prefix + '.data-00000-of-00001',
                os.path.join(checkpoint_dir, 'best_model.data-00000-of-00001'))

            # Testing on test set
            print(
                "\nTraining complete, testing the best model on test set...\n")
            saver.restore(sess, os.path.join(checkpoint_dir, 'best_model'))
            feed_dict = {
                cnn.input_x: x_test,
                cnn.input_y: y_test,
                cnn.keep_prob: FLAGS.dropout_keep_prob,
                cnn.is_training: False
            }
            # y_logits, test_accuracy = sess.run([cnn.logits, cnn.accuracy], feed_dict=feed_dict)
            y_pred, test_accuracy = sess.run([cnn.predictions, cnn.accuracy],
                                             feed_dict=feed_dict)
            print("Testing Accuracy: {:.3f}\n".format(test_accuracy))
            y_test_original = np.argmax(y_test, 1)
            print(y_test_original.shape)
            # label_transformer = joblib.load(os.path.join(out_dir, 'label_transformer.pkl'))
            # y_test_original [ '体育','娱乐','家居','彩票','房产','教育','时尚','时政','星座','游戏','社会']= label_transformer.inverse_transform(y_test)
            # y_logits_original = label_transformer.inverse_transform(y_logits)
            # print("Precision, Recall and F1-Score:\n\n", classification_report(y_test_original, y_logits_original))
            print(
                "Precision, Recall and F1-Score:\n\n",
                classification_report(y_test_original,
                                      y_pred,
                                      target_names=[
                                          '体育', '娱乐', '家居', '彩票', '房产', '教育',
                                          '时尚', '时政', '星座', '游戏', '社会'
                                      ]))
예제 #26
0
def main(is_test=False):
    Config._use_lemma = FLAGS.use_lemma
    tf.reset_default_graph()
    tf.set_random_seed(12)

    with tf.Graph().as_default():
        data = load_data()
        weight = data.weights
        vocab_size = data.get_vocab_size()
        train_iterator_initializer = data._train_iterator_initializer
        validation_iterator_initializer = data._validation_iterator_initializer
        test_iterator_initializer = data._test_iterator_initializer
        handle = data.handle
        next_element = data.next_element

        session_conf = tf.ConfigProto(
            allow_soft_placement=FLAGS.allow_soft_placement,
            log_device_placement=FLAGS.log_device_placement)
        sess = tf.Session(config=session_conf)
        with sess.as_default():

            cnn = TextCNN(batch_size=FLAGS.batch_size,
                          learning_rate=FLAGS.learning_rate,
                          embedding_size=FLAGS.embedding_size,
                          vocab_size=vocab_size,
                          sequence_length=FLAGS.max_length,
                          num_filters=FLAGS.num_filters,
                          filter_sizes=list(
                              map(int, FLAGS.filter_sizes.split(","))),
                          weight=weight,
                          labes_num=FLAGS.labels_num,
                          output_dimension=FLAGS.output_dimension,
                          next_element=next_element)

            writer = tf.summary.FileWriter('graphs/text_cnn/learning_rate' +
                                           str(cnn._learning_rate))
            if not os.path.exists(FLAGS.checkpoint_path):
                os.makedirs(FLAGS.checkpoint_path)
            checkpoints_prefix = os.path.join(FLAGS.checkpoint_path,
                                              "text_cnn")
            saver = tf.train.Saver()
            sess.run(tf.global_variables_initializer())

            train_handle = sess.run(data._train_iterator.string_handle())
            validation_handle = sess.run(
                data._validation_iterator.string_handle())
            test_handle = sess.run(data._test_iterator.string_handle())

            # 是否需要恢复模型
            ckpt = tf.train.get_checkpoint_state(FLAGS.checkpoint_path)
            if ckpt and ckpt.model_checkpoint_path:
                print("正在从{}加载模型".format(ckpt.model_checkpoint_path))
                saver.restore(sess, ckpt.model_checkpoint_path)

            def test_step(test_handle):
                feed_dict = {cnn._keep_prob: 1.0, handle: test_handle}
                res = sess.run(cnn.predict - 2, feed_dict=feed_dict)
                data.feed_output(res)

            def test():
                global_step = sess.run(cnn.global_step)
                sess.run(test_iterator_initializer)
                while True:
                    try:
                        test_step(test_handle)
                    except tf.errors.OutOfRangeError:
                        data.persist(
                            filename="result_{}.csv".format(global_step))
                        print("测试结果已经保存, result_{}.csv".format(global_step))
                        break

            if is_test:
                print("正在进行测试")
                test()
                print("测试结束")
                return

            def train_step():
                feed_dict = {cnn._keep_prob: 1.0, handle: train_handle}
                _, loss, global_step, summary_op, actual_batch_size = sess.run(
                    [
                        cnn._train_total, cnn._total_loss, cnn.global_step,
                        cnn._summary_op, cnn._actual_batch_size
                    ],
                    feed_dict=feed_dict)
                writer.add_summary(summary_op, global_step=global_step)
                return loss, global_step, actual_batch_size

            def validation_step():
                feed_dict = {cnn._keep_prob: 1.0, handle: validation_handle}

                loss, actual_batch_size, lab, res = sess.run(
                    [
                        cnn._total_loss, cnn._actual_batch_size,
                        tf.argmax(cnn._label, axis=2) - 2, cnn.predict - 2
                    ],
                    feed_dict=feed_dict)
                return loss, actual_batch_size, lab, res

            def train(total_loss, batches):
                while True:
                    try:
                        t1 = time.time()
                        loss, step, actual_batch_size = train_step()
                        total_loss += loss
                        batches += 1
                        delta_t = time.time() - t1
                        print(
                            "training: step is {}, loss is {}, average_loss is {}, cost {} 秒"
                            .format(step, loss, total_loss / batches, delta_t))
                        if step > FLAGS.step_bypass_validation and step % FLAGS.step_validation == 0:
                            saver.save(sess,
                                       save_path=checkpoints_prefix,
                                       global_step=step)
                            average_f1 = validation()
                            if average_f1 > 0.68:
                                test()

                        if step % FLAGS.step_validation == 0:
                            saver.save(sess,
                                       save_path=checkpoints_prefix,
                                       global_step=step)

                    except tf.errors.OutOfRangeError:
                        break
                return total_loss, batches

            def validation():
                f1 = 0
                samples = 0
                total_validation_loss = 0
                total_time = 0
                sess.run(validation_iterator_initializer)
                iteration = 0
                while True:
                    try:
                        t1 = time.time()
                        loss, actual_batch_size, lab, res = validation_step()
                        f1 += np.sum(
                            list(
                                map(
                                    lambda x: f1_score(
                                        x[0], x[1], average="macro"),
                                    zip(lab.tolist(), res.tolist()))))
                        samples += actual_batch_size
                        iteration += 1
                        total_validation_loss += loss
                        delta_t = time.time() - t1
                        total_time += delta_t
                        print("当前f1为:{}, loss 为{}, 花费{}秒".format(
                            f1 / samples, loss, delta_t))

                    except tf.errors.OutOfRangeError:
                        print("平均f1为:{}, 平均loss为{}, 总耗时{}秒".format(
                            f1 / samples, total_validation_loss / iteration,
                            total_time))
                        break
                return f1 / samples

            total_loss = 0
            batches = 0
            for i in range(FLAGS.num_epochs):
                sess.run(train_iterator_initializer)
                print("第{}个epoch".format(i))
                total_loss, batches = train(total_loss, batches)
예제 #27
0
        print(len(x_train))
        print(x_train)
        print(x_train.shape)
        print(x_train.shape[0])
        print(x_train.shape[1])
        print(y_train)
        print(y_train.shape)
        print("@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@")
        # 从text_cnn.py中导入进TextCNN类
        cnn = TextCNN(
            # shape[0]就是读取矩阵第一维度的长度
            # shape[1]就是读取矩阵第二维度的长度
            sequence_length=x_train.
            shape[1],  # x_train.shape[1]句子的个数,x_train.shape[0]样本的个数
            # 单个句子的最大长度(1个句子中单词的个数) max_document_length
            num_classes=y_train.shape[1],  # 分类的种类0,1,在这就是2
            index=x_train,
            embedding_size=FLAGS.embedding_dim,  # 向量化的维度128
            filter_sizes=list(map(int,
                                  FLAGS.filter_sizes.split(","))),  # 卷积层的层数3
            num_filters=FLAGS.num_filters,  # 卷积核个数128
            l2_reg_lambda=FLAGS.l2_reg_lambda)

        # Define Training procedure
        global_step = tf.Variable(0, name="global_step",
                                  trainable=False)  # 全局步骤
        optimizer = tf.train.AdamOptimizer(1e-3)
        grads_and_vars = optimizer.compute_gradients(cnn.loss)
        train_op = optimizer.apply_gradients(grads_and_vars,
                                             global_step=global_step)
예제 #28
0
batch_size = 32
embedding_dims = 50
epochs = 10

print('Loading data...')
(x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=max_features)
print(len(x_train), 'train sequences')
print(len(x_test), 'test sequences')

print('Pad sequences (samples x time)...')
x_train = sequence.pad_sequences(x_train, maxlen=maxlen)
x_test = sequence.pad_sequences(x_test, maxlen=maxlen)
print('x_train shape:', x_train.shape)
print('x_test shape:', x_test.shape)

print('Build model...')
model = TextCNN(maxlen, max_features, embedding_dims).get_model()
model.compile('adam', 'binary_crossentropy', metrics=['accuracy'])

print('Train...')
early_stopping = EarlyStopping(monitor='val_acc', patience=3, mode='max')
model.fit(x_train,
          y_train,
          batch_size=batch_size,
          epochs=epochs,
          callbacks=[early_stopping],
          validation_data=(x_test, y_test))

print('Test...')
result = model.predict(x_test)
예제 #29
0
dev_sample_index = -1 * int(FLAGS.dev_sample_percentage * float(len(y)))
x_train, x_dev = x[:dev_sample_index], x[dev_sample_index:]
y_train, y_dev = y[:dev_sample_index], y[dev_sample_index:]

print("Train/Dev split: {:d}/{:d}".format(len(y_train), len(y_dev)))
print("x_train shape" + str(x_train.shape))
print("y_train shape" + str(y_train.shape))

#Training
with tf.Graph().as_default():
    session_conf = tf.ConfigProto(
        allow_soft_placement=FLAGS.allow_soft_placement,
        log_device_placement=FLAGS.log_device_placement)
    sess = tf.Session(config=session_conf)
    with sess.as_default():
        cnn = TextCNN(sequence_length=x_train.shape[1],
                      num_classes=y_train.shape[1])

        # Define Training procedure
        global_step = tf.Variable(0, name="global_step", trainable=False)
        optimizer = tf.train.AdamOptimizer(1e-3)
        grads_and_vars = optimizer.compute_gradients(cnn.cross_entropy)
        train_op = optimizer.apply_gradients(grads_and_vars,
                                             global_step=global_step)

        # Keep track of gradient values and sparsity (optional)
        grad_summaries = []
        for g, v in grads_and_vars:
            if g is not None:
                grad_hist_summary = tf.summary.histogram(
                    "{}/grad/hist".format(v.name), g)
                sparsity_summary = tf.summary.scalar(
예제 #30
0
    # Configure Training
    # ====================================================================
    with tf.Graph().as_default():
        # set session conf
        session_conf = tf.ConfigProto(
            allow_soft_placement=FLAGS.allow_soft_placement,
            log_device_placement=FLAGS.log_device_placement)
        sess = tf.Session(config=session_conf)
        with sess.as_default():
            ### 1. Make CNN model
            cnn = TextCNN(max_sentence_length=x_train.shape[1],
                          num_hidden_nodes=list(
                              map(int, FLAGS.num_hidden_nodes.split(","))),
                          num_classes=y_train.shape[1],
                          vocab_size=len(vocab_processor.vocabulary_),
                          word_vector_length=FLAGS.word_vector_length,
                          filter_heights=list(
                              map(int, FLAGS.filter_heights.split(","))),
                          num_filters=FLAGS.num_filters,
                          l2_reg_lambda=FLAGS.l2_reg_lambda)

            ### 2. Define Traning procedure
            """
      set optimizer, calculate gradient
      """
            global_step = tf.Variable(0, name="global_step", trainable=False)
            optimizer = tf.train.AdamOptimizer(FLAGS.learning_rate)
            grads_and_vars = optimizer.compute_gradients(cnn.loss)
            # grads_and_vars have gradient when each value is changed.
            # value mean (trainable == true)
            train_op = optimizer.apply_gradients(grads_and_vars,