def train(x_train, y_train, vocab_processor, x_dev, y_dev):
    with tf.Graph().as_default():
        session_conf = tf.ConfigProto(
            allow_soft_placement=allow_soft_placement,
            log_device_placement=log_device_placement)
        sess = tf.Session(config=session_conf)
        with sess.as_default():
            cnn = TextCNN(sequence_length=x_train.shape[1],
                          num_classes=y_train.shape[1],
                          vocab_size=len(vocab_processor.vocabulary_),
                          embedding_size=embedding_dim,
                          filter_sizes=list(map(int, filter_sizes.split(","))),
                          num_filters=FLAGS.num_filters,
                          l2_reg_lambda=FLAGS.l2_reg_lambda)

            global_step = tf.Variable(0, name="global_step", trainable=False)
            optimizer = tf.train.AdamOptimizer(1e-3)
            grads_and_vars = optimizer.compute_gradients(cnn.loss)
            train_op = optimizer.apply_gradients(grads_and_vars,
                                                 global_step=global_step)

            sess.run(tf.global_variables_initializer())

            def train_step(x_batch, y_batch):

                feed_dict = {
                    cnn.input_x: x_batch,
                    cnn.input_y: y_batch,
                    cnn.drop_out_keep: dropout_keep_prob
                }
                _, step, loss, accuracy = sess.run(
                    [train_op, global_step, cnn.loss, cnn.accuracy], feed_dict)

                time_str = datetime.datetime.now().isoformat()
                print("{}: step {}, loss {:g}, acc {:g}".format(
                    time_str, step, loss, accuracy))

            def dev_step(x_batch, y_batch):

                feed_dict = {
                    cnn.input_x: x_batch,
                    cnn.input_y: y_batch,
                    cnn.drop_out_keep: 1.0
                }
                step, loss, accuracy = sess.run(
                    [global_step, cnn.loss, cnn.accuracy], feed_dict)
                time_str = datetime.datetime.now().isoformat()
                print("{}: step {}, loss {:g}, acc {:g}".format(
                    time_str, step, loss, accuracy))

            # mini_batch
            batches = data_helpers.batch_iter(list(zip(x_train, y_train)),
                                              batch_size, num_epochs)

            for batch in batches:
                x_batch, y_batch = zip(*batch)
                train_step(x_batch, y_batch)
                current_step = tf.train.global_step(sess, global_step)

                # 每100次使用以此验证集
                if current_step % 100 == 0:
                    print("\nEvaluation:")
                    dev_step(x_dev, y_dev)
                    print("")
print("Train/Dev split: {:d}/{:d}".format(len(y_train), len(y_dev)))
print("Sequnence Length: {:d}".format(sequence_length))

# Training
# ==================================================

with tf.Graph().as_default():
    session_conf = tf.ConfigProto(
        allow_soft_placement=FLAGS.allow_soft_placement,
        log_device_placement=FLAGS.log_device_placement)
    sess = tf.Session(config=session_conf)
    with sess.as_default():
        cnn = TextCNN(sequence_length=sequence_length,
                      num_classes=2,
                      vocab_size=len(vocabulary),
                      embedding_size=FLAGS.embedding_dim,
                      filter_sizes=map(int, FLAGS.filter_sizes.split(",")),
                      num_filters=map(int, FLAGS.num_filters.split(",")),
                      l2_reg_lambda=FLAGS.l2_reg_lambda)

        # Define Training procedure
        global_step = tf.Variable(0, name="global_step", trainable=False)
        optimizer = tf.train.AdamOptimizer(1e-4)
        grads_and_vars = optimizer.compute_gradients(cnn.loss,
                                                     aggregation_method=2)
        train_op = optimizer.apply_gradients(grads_and_vars,
                                             global_step=global_step)

        # Keep track of gradient values and sparsity (optional)
        grad_summaries = []
        for g, v in grads_and_vars:
Beispiel #3
0
# Training
# ==================================================

with tf.Graph().as_default():
    session_conf = tf.ConfigProto(
      allow_soft_placement=FLAGS.allow_soft_placement,
      log_device_placement=FLAGS.log_device_placement,
        gpu_options=tf.GPUOptions(per_process_gpu_memory_fraction=0.8))
    sess = tf.Session(config=session_conf)
    with sess.as_default():
        cnn = TextCNN(
            sequence_length=FLAGS.max_sequence_length,##cnt the word num of the sentence
            num_classes=y_train_shuffled.shape[1],
            vocab_size=len(vocabulary),
            embedding_size=embedding_dim,
            filter_sizes=list(map(int, FLAGS.filter_sizes.split(","))),
            num_filters=FLAGS.num_filters,
            l2_reg_lambda=FLAGS.l2_reg_lambda
        )
        if FLAGS.use_pretrain:
            cnn.assign_embedding(sess, pretrained_embedding)
        # Define Training procedure

        global_step = tf.Variable(0, name="global_step", trainable=False) #record the global step
        optimizer = tf.train.AdamOptimizer(1e-3)

        grads_and_vars = optimizer.compute_gradients(cnn.loss)  ##calculate gradient
        train_op = optimizer.apply_gradients(grads_and_vars, global_step=global_step,name="train_op")

        # Output directory for models and summaries
# TODO: This is very crude, should use cross-validation
dev_sample_index = -1 * int(0.1 * float(len(y)))
x_train, x_dev = x_shuffled[:dev_sample_index], x_shuffled[dev_sample_index:]
y_train, y_dev = y_shuffled[:dev_sample_index], y_shuffled[dev_sample_index:]
print("Vocabulary Size: {:d}".format(len(vocab_processor.vocabulary_)))
print("Train/Dev split: {:d}/{:d}".format(len(y_train), len(y_dev)))

with tf.Graph().as_default():
    session_conf = tf.ConfigProto(allow_soft_placement=True,
                                  log_device_placement=False)
    sess = tf.Session(config=session_conf)
    with sess.as_default():
        cnn = TextCNN(sequence_length=x_train.shape[1],
                      num_classes=y_train.shape[1],
                      vocab_size=len(vocab_processor.vocabulary_),
                      embedding_size=embedding_dimension,
                      filter_sizes=list(
                          map(int, params['filter_sizes'].split(","))),
                      num_filters=params['num_filters'],
                      l2_reg_lambda=params['l2_reg_lambda'])

        # Define Training procedure
        global_step = tf.Variable(0, name="global_step", trainable=False)
        optimizer = tf.train.AdamOptimizer(1e-3)
        grads_and_vars = optimizer.compute_gradients(cnn.loss)
        train_op = optimizer.apply_gradients(grads_and_vars,
                                             global_step=global_step)

        # Keep track of gradient values and sparsity (optional)
        grad_summaries = []
        for g, v in grads_and_vars:
            if g is not None:
Beispiel #5
0
print("Train/Dev split: {:d}/{:d}".format(len(y_train), len(y_dev)))
print("Sequnence Length: {:d}".format(sequence_length))

# Training
# ==================================================

with tf.Graph().as_default():
    session_conf = tf.ConfigProto(
        allow_soft_placement=FLAGS.allow_soft_placement,
        log_device_placement=FLAGS.log_device_placement)
    sess = tf.Session(config=session_conf)
    with sess.as_default():
        cnn = TextCNN(sequence_length=x_train.shape[1],
                      num_classes=y_train.shape[1],
                      word_size=FLAGS.word2vec_size,
                      filter_sizes=list(map(int,
                                            FLAGS.filter_sizes.split(","))),
                      num_filters=FLAGS.num_filters,
                      l2_reg_lambda=FLAGS.l2_reg_lambda)

        # Define Training procedure
        global_step = tf.Variable(0, name="global_step", trainable=False)
        optimizer = tf.train.AdamOptimizer(1e-3)
        grads_and_vars = optimizer.compute_gradients(cnn.loss)
        train_op = optimizer.apply_gradients(grads_and_vars,
                                             global_step=global_step)

        # Keep track of gradient values and sparsity (optional)
        grad_summaries = []
        for g, v in grads_and_vars:
            if g is not None:
def train(x_train, y_train, vocab_processor, x_dev, y_dev):
    # Training
    # ==================================================
    ste = []
    lo = []
    with tf.Graph().as_default():
        session_conf = tf.ConfigProto(
            allow_soft_placement=FLAGS.allow_soft_placement,
            log_device_placement=FLAGS.log_device_placement)
        sess = tf.Session(config=session_conf)
        with sess.as_default():
            cnn = TextCNN(sequence_length=x_train.shape[1],
                          num_classes=y_train.shape[1],
                          vocab_size=len(vocab_processor.vocabulary_),
                          embedding_size=FLAGS.embedding_dim,
                          filter_sizes=list(
                              map(int, FLAGS.filter_sizes.split(","))),
                          num_filters=FLAGS.num_filters,
                          l2_reg_lambda=FLAGS.l2_reg_lambda)

            # Define Training procedure
            global_step = tf.Variable(0, name="global_step", trainable=False)
            optimizer = tf.train.AdamOptimizer(1e-3)
            grads_and_vars = optimizer.compute_gradients(cnn.loss)
            train_op = optimizer.apply_gradients(grads_and_vars,
                                                 global_step=global_step)

            # Keep track of gradient values and sparsity (optional)
            grad_summaries = []
            for g, v in grads_and_vars:
                if g is not None:
                    grad_hist_summary = tf.summary.histogram(
                        "{}/grad/hist".format(v.name), g)
                    sparsity_summary = tf.summary.scalar(
                        "{}/grad/sparsity".format(v.name),
                        tf.nn.zero_fraction(g))
                    grad_summaries.append(grad_hist_summary)
                    grad_summaries.append(sparsity_summary)
            grad_summaries_merged = tf.summary.merge(grad_summaries)

            # Output directory for models and summaries
            timestamp = str(int(time.time()))
            out_dir = os.path.abspath(
                os.path.join(os.path.curdir, "runs", timestamp))
            print("Writing to {}\n".format(out_dir))

            # Summaries for loss and accuracy
            loss_summary = tf.summary.scalar("loss", cnn.loss)
            acc_summary = tf.summary.scalar("accuracy", cnn.accuracy)

            # Train Summaries
            train_summary_op = tf.summary.merge(
                [loss_summary, acc_summary, grad_summaries_merged])
            train_summary_dir = os.path.join(out_dir, "summaries", "train")
            train_summary_writer = tf.summary.FileWriter(
                train_summary_dir, sess.graph)

            # Dev summaries
            dev_summary_op = tf.summary.merge([loss_summary, acc_summary])
            dev_summary_dir = os.path.join(out_dir, "summaries", "dev")
            dev_summary_writer = tf.summary.FileWriter(dev_summary_dir,
                                                       sess.graph)

            # Checkpoint directory. Tensorflow assumes this directory already exists so we need to create it
            checkpoint_dir = os.path.abspath(
                os.path.join(out_dir, "checkpoints"))
            checkpoint_prefix = os.path.join(checkpoint_dir, "model")
            if not os.path.exists(checkpoint_dir):
                os.makedirs(checkpoint_dir)
            saver = tf.train.Saver(tf.global_variables(),
                                   max_to_keep=FLAGS.num_checkpoints)

            # Write vocabulary
            vocab_processor.save(os.path.join(out_dir, "vocab"))

            # Initialize all variables
            sess.run(tf.global_variables_initializer())

            def train_step(x_batch, y_batch):
                """
                A single training step
                """
                feed_dict = {
                    cnn.input_x: x_batch,
                    cnn.input_y: y_batch,
                    cnn.dropout_keep_prob: FLAGS.dropout_keep_prob
                }
                _, step, summaries, loss, accuracy = sess.run([
                    train_op, global_step, train_summary_op, cnn.loss,
                    cnn.accuracy
                ], feed_dict)
                time_str = datetime.datetime.now().isoformat()
                print("{}: step {}, loss {:g}, acc {:g}".format(
                    time_str, step, loss, accuracy))
                train_summary_writer.add_summary(summaries, step)

            def dev_step(x_batch, y_batch, writer=None):
                """
                Evaluates model on a dev set
                """
                feed_dict = {
                    cnn.input_x: x_batch,
                    cnn.input_y: y_batch,
                    cnn.dropout_keep_prob: 1.0
                }
                step, summaries, loss, accuracy = sess.run(
                    [global_step, dev_summary_op, cnn.loss, cnn.accuracy],
                    feed_dict)
                time_str = datetime.datetime.now().isoformat()
                print("{}: step {}, loss {:g}, acc {:g}".format(
                    time_str, step, loss, accuracy))
                if writer:
                    writer.add_summary(summaries, step)
                return loss

            # Generate batches
            batches = data_helpers.batch_iter(list(zip(x_train, y_train)),
                                              FLAGS.batch_size,
                                              FLAGS.num_epochs)
            # Training loop. For each batch...
            for batch in batches:
                x_batch, y_batch = zip(*batch)
                train_step(x_batch, y_batch)
                current_step = tf.train.global_step(sess, global_step)
                if current_step % FLAGS.evaluate_every == 0:
                    print("\nEvaluation:")
                    a = dev_step(x_dev, y_dev, writer=dev_summary_writer)
                    ste.append(current_step)
                    lo.append(loss)
                    print("")
                if current_step % FLAGS.checkpoint_every == 0:
                    path = saver.save(sess,
                                      checkpoint_prefix,
                                      global_step=current_step)
                    print("Saved model checkpoint to {}\n".format(path))
    plt.plot(ste, lo)
    plt.xlabel("step")
    plt.ylabel("loss")
    plt.title('Loss Training_process')
    plt.savefig('loss.png')
print("Train/Dev split: {:d}/{:d}".format(len(y_train), len(y_dev)))

# Training
# ==================================================

with tf.Graph().as_default():
	session_conf = tf.ConfigProto(
	  allow_soft_placement=FLAGS.allow_soft_placement,
	  log_device_placement=FLAGS.log_device_placement)
	sess = tf.Session(config=session_conf)
	with sess.as_default():
		cnn = TextCNN(
			title_length=FLAGS.title_length,
			sequence_length=FLAGS.window_length,
			num_classes=y_train.shape[1],
			dictionary=dictionary,
			embedding_size=FLAGS.embedding_dim,
			num_convlayers=FLAGS.num_convlayers,
			filter_sizes=list(map(int, FLAGS.filter_sizes.split(","))),
			num_filters=FLAGS.num_filters,
			l2_reg_lambda=FLAGS.l2_reg_lambda)

		# Define Training procedure
		global_step = tf.Variable(0, name="global_step", trainable=False)
		learning_rate = tf.placeholder(tf.float32, shape=[])
		optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
		grads_and_vars = optimizer.compute_gradients(cnn.loss)
		# update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
		# with tf.control_dependencies(update_ops):
		train_op = optimizer.apply_gradients(grads_and_vars, global_step=global_step)

		# Keep track of gradient values and sparsity (optional)
Beispiel #8
0
# ==================================================

with tf.Graph().as_default():
    session_conf = tf.ConfigProto(
        allow_soft_placement=FLAGS.allow_soft_placement,
        log_device_placement=FLAGS.log_device_placement)
    sess = tf.Session(config=session_conf)

    with sess.as_default():
        cnn = TextCNN(
            input_length=x_train.shape[1],
            transitions_number=y_train.shape[1],
            words_vocab_size=data['words_vocab_size'],
            tags_vocab_size=data['tags_vocab_size'],
            labels_vocab_size=data['labels_vocab_size'],
            embedding_size=FLAGS.embedding_dim,
            input_cnt_words=data['input_cnt_words'],
            input_cnt_tags=data['input_cnt_tags'],
            input_cnt_labels=data['input_cnt_labels'],
            l2_reg_lambda=FLAGS.l2_reg_lambda,
            hidden_layer_size=64,
        )

        # Define Training procedure
        global_step = tf.Variable(0, name="global_step", trainable=False)
        optimizer = tf.train.AdamOptimizer(1e-3)
        grads_and_vars = optimizer.compute_gradients(cnn.loss)
        train_op = optimizer.apply_gradients(grads_and_vars,
                                             global_step=global_step)

        # Keep track of gradient values and sparsity (optional)
Beispiel #9
0
def train(x_train, y_train, vocab_processor, x_dev, y_dev, is_baseline,
          checkpoint_root):
    # Training
    # ==================================================

    with tf.Graph().as_default():
        session_conf = tf.ConfigProto(
            allow_soft_placement=FLAGS.allow_soft_placement,
            log_device_placement=FLAGS.log_device_placement)
        sess = tf.Session(config=session_conf)
        with sess.as_default():
            with tf.device('/device:GPU:0'):
                cnn = TextCNN(sequence_length=x_train.shape[1],
                              num_classes=y_train.shape[1],
                              vocab_size=len(vocab_processor.vocabulary_),
                              embedding_size=FLAGS.embedding_dim,
                              filter_sizes=list(
                                  map(int, FLAGS.filter_sizes.split(","))),
                              num_filters=FLAGS.num_filters,
                              l2_reg_lambda=FLAGS.l2_reg_lambda)

                # Define Training procedure
                global_step = tf.Variable(0,
                                          name="global_step",
                                          trainable=False)
                # learning_rate = tf.train.exponential_decay(FLAGS.learning_rate, global_step,
                #     1000, 0.96, staircase=True)
                optimizer = tf.train.AdamOptimizer(FLAGS.learning_rate)  # 1e-3
                grads_and_vars = optimizer.compute_gradients(cnn.loss)
                train_op = optimizer.apply_gradients(grads_and_vars,
                                                     global_step=global_step)

                # Keep track of gradient values and sparsity (optional)
                grad_summaries = []
                for g, v in grads_and_vars:
                    if g is not None:
                        grad_hist_summary = tf.summary.histogram(
                            "{}/grad/hist".format(v.name), g)
                        sparsity_summary = tf.summary.scalar(
                            "{}/grad/sparsity".format(v.name),
                            tf.nn.zero_fraction(g))
                        grad_summaries.append(grad_hist_summary)
                        grad_summaries.append(sparsity_summary)
                grad_summaries_merged = tf.summary.merge(grad_summaries)

                # Output directory for models and summaries
                # timestamp = str(int(time.time()))
                out_dir = os.path.abspath(
                    os.path.join(os.path.curdir, "runs",
                                 checkpoint_root))  #timestamp))
                print("Writing to {}\n".format(out_dir))

                # Summaries for loss and accuracy
                loss_summary = tf.summary.scalar("loss", cnn.loss)
                acc_summary = tf.summary.scalar("accuracy", cnn.accuracy)

                # Train Summaries
                train_summary_op = tf.summary.merge(
                    [loss_summary, acc_summary, grad_summaries_merged])
                train_summary_dir = os.path.join(out_dir, "summaries", "train")
                train_summary_writer = tf.summary.FileWriter(
                    train_summary_dir, sess.graph)

                # Dev summaries
                dev_summary_op = tf.summary.merge([loss_summary, acc_summary])
                dev_summary_dir = os.path.join(out_dir, "summaries", "dev")
                dev_summary_writer = tf.summary.FileWriter(
                    dev_summary_dir, sess.graph)

                # Checkpoint directory. Tensorflow assumes this directory already exists so we need to create it
                checkpoint_dir = os.path.abspath(
                    os.path.join(out_dir, "checkpoints"))
                checkpoint_prefix = os.path.join(checkpoint_dir, "model")
                if not os.path.exists(checkpoint_dir):
                    os.makedirs(checkpoint_dir)
                saver = tf.train.Saver(tf.global_variables(),
                                       max_to_keep=FLAGS.num_checkpoints)

                # Write vocabulary
                vocab_processor.save(os.path.join(out_dir, "vocab"))
                extract_dict(vocab_processor, out_dir)

                # Initialize all variables
                sess.run(tf.global_variables_initializer())

                def train_step(x_batch, y_batch):
                    """
                A single training step
                """
                    feed_dict = {
                        cnn.input_x: x_batch,
                        cnn.input_y: y_batch,
                        cnn.dropout_keep_prob: FLAGS.dropout_keep_prob
                    }
                    _, step, summaries, loss, accuracy = sess.run([
                        train_op, global_step, train_summary_op, cnn.loss,
                        cnn.accuracy
                    ], feed_dict)
                    # L2 norm constraint (too slow) https://github.com/dennybritz/cnn-text-classification-tf/issues/88
                    # sess.run(cnn.output_W.assign(tf.clip_by_norm(cnn.output_W, 1.0)))
                    time_str = datetime.datetime.now().isoformat()
                    print("{}: step {}, loss {:g}, acc {:g}".format(
                        time_str, step, loss, accuracy))
                    train_summary_writer.add_summary(summaries, step)

                def dev_step(x_batch, y_batch, writer=None):
                    """
                Evaluates model on a dev set
                """
                    feed_dict = {
                        cnn.input_x: x_batch,
                        cnn.input_y: y_batch,
                        cnn.dropout_keep_prob: 1.0
                    }
                    step, summaries, loss, accuracy = sess.run(
                        [global_step, dev_summary_op, cnn.loss, cnn.accuracy],
                        feed_dict)
                    time_str = datetime.datetime.now().isoformat()
                    print("{}: step {}, loss {:g}, acc {:g}".format(
                        time_str, step, loss, accuracy))
                    if writer:
                        writer.add_summary(summaries, step)
                    return loss, accuracy

                # Generate batches
                batches = data_helpers.batch_iter(list(zip(x_train, y_train)),
                                                  FLAGS.batch_size,
                                                  FLAGS.num_epochs)
                # Training loop. For each batch...
                best_metric_perf = 0.0  # dev_accuracy
                cur_metric_perf = 0.0
                for batch in batches:
                    x_batch, y_batch = zip(*batch)
                    train_step(x_batch, y_batch)
                    current_step = tf.train.global_step(sess, global_step)
                    if current_step % FLAGS.evaluate_every == 0:
                        print("\nEvaluation:")
                        _, cur_metric_perf = dev_step(
                            x_dev, y_dev, writer=dev_summary_writer)
                        print("")
                    if cur_metric_perf > best_metric_perf:
                        best_metric_perf = cur_metric_perf
                    else:
                        continue
                    if current_step % FLAGS.checkpoint_every == 0:
                        path = saver.save(sess,
                                          checkpoint_prefix,
                                          global_step=current_step)
                        print("Saved model checkpoint to {}\n".format(path))
Beispiel #10
0
def train(x_train, y_train, vocab_processor, x_dev, y_dev, x_real_len_train, x_real_len_dev, sorted_label, max_document_length):
    # Training
    # ==================================================

    with tf.Graph().as_default():
        session_conf = tf.ConfigProto(
          allow_soft_placement=FLAGS.allow_soft_placement,
          log_device_placement=FLAGS.log_device_placement)
        sess = tf.Session(config=session_conf)
        with sess.as_default():
            if FLAGS.model_type == "cnnrnn":
                obj = TextCNNRNN(
                    sequence_length=max_document_length,
                    num_classes=y_train.shape[1],
                    vocab_size=len(vocab_processor.vocabulary_),
                    hidden_unit=FLAGS.hidden_unit, 
                    embedding_size=FLAGS.embedding_dim,
                    filter_sizes=list(map(int, FLAGS.filter_sizes.split(","))),
                    num_filters=FLAGS.num_filters,
                    l2_reg_lambda=FLAGS.l2_reg_lambda)
            elif FLAGS.model_type == "rnncnn":
                obj = TextRNNCNN(
                    sequence_length=max_document_length,
                    num_classes=y_train.shape[1],
                    vocab_size=len(vocab_processor.vocabulary_),
                    hidden_unit=FLAGS.hidden_unit, 
                    embedding_size=FLAGS.embedding_dim,
                    filter_sizes=list(map(int, FLAGS.filter_sizes.split(","))),
                    num_filters=FLAGS.num_filters,
                    l2_reg_lambda=FLAGS.l2_reg_lambda)
            elif FLAGS.model_type == "rnnandcnn":
                obj = TextRNNandCNN(
                    sequence_length=max_document_length,
                    num_classes=y_train.shape[1],
                    vocab_size=len(vocab_processor.vocabulary_),
                    hidden_unit=FLAGS.hidden_unit, 
                    embedding_size=FLAGS.embedding_dim,
                    filter_sizes=list(map(int, FLAGS.filter_sizes.split(","))),
                    num_filters=FLAGS.num_filters,
                    l2_reg_lambda=FLAGS.l2_reg_lambda)
            elif FLAGS.model_type == "rnn":
                obj = TextRNN(
                    sequence_length=max_document_length,
                    num_classes=y_train.shape[1],
                    vocab_size=len(vocab_processor.vocabulary_),
                    hidden_unit=FLAGS.hidden_unit, 
                    embedding_size=FLAGS.embedding_dim,
                    l2_reg_lambda=FLAGS.l2_reg_lambda)
            else:
                obj = TextCNN(
                    sequence_length=max_document_length,
                    num_classes=y_train.shape[1],
                    vocab_size=len(vocab_processor.vocabulary_),
                    embedding_size=FLAGS.embedding_dim,
                    filter_sizes=list(map(int, FLAGS.filter_sizes.split(","))),
                    num_filters=FLAGS.num_filters,
                    l2_reg_lambda=FLAGS.l2_reg_lambda)

            # Define Training procedure
            global_step = tf.Variable(0, name="global_step", trainable=False)
            optimizer = tf.train.AdamOptimizer(1e-3)
            update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
            with tf.control_dependencies(update_ops):
                grads_and_vars = optimizer.compute_gradients(obj.loss)
                train_op = optimizer.apply_gradients(grads_and_vars, global_step=global_step)

            # Keep track of gradient values and sparsity (optional)
            grad_summaries = []
            for g, v in grads_and_vars:
                if g is not None:
                    grad_hist_summary = tf.summary.histogram("{}/grad/hist".format(v.name), g)
                    sparsity_summary = tf.summary.scalar("{}/grad/sparsity".format(v.name), tf.nn.zero_fraction(g))
                    grad_summaries.append(grad_hist_summary)
                    grad_summaries.append(sparsity_summary)
            grad_summaries_merged = tf.summary.merge(grad_summaries)

            # Output directory for models and summaries
            timestamp = str(int(time.time()))
            out_dir = os.path.abspath(os.path.join(os.path.curdir, "runs", FLAGS.model_version))

            # Summaries for loss and accuracy
            loss_summary = tf.summary.scalar("loss", obj.loss)
            acc_summary = tf.summary.scalar("accuracy", obj.accuracy)

            # Train Summaries
            train_summary_op = tf.summary.merge([loss_summary, acc_summary, grad_summaries_merged])
            train_summary_dir = os.path.join(out_dir, "summaries", "train")
            train_summary_writer = tf.summary.FileWriter(train_summary_dir, sess.graph)

            # Dev summaries
            dev_summary_op = tf.summary.merge([loss_summary, acc_summary])
            dev_summary_dir = os.path.join(out_dir, "summaries", "dev")
            dev_summary_writer = tf.summary.FileWriter(dev_summary_dir, sess.graph)

            # Checkpoint directory. Tensorflow assumes this directory already exists so we need to create it
            checkpoint_dir = os.path.abspath(os.path.join(out_dir, "checkpoints"))
            checkpoint_prefix = os.path.join(checkpoint_dir, "model")
            if not os.path.exists(checkpoint_dir):
                os.makedirs(checkpoint_dir)
            saver = tf.train.Saver(tf.global_variables(), max_to_keep=FLAGS.num_checkpoints)
            # Initialize all variables
            sess.run(tf.global_variables_initializer())
            # resotre domain model variable
            domain_out_dir = os.path.abspath(os.path.join(os.path.curdir, "runs_base", FLAGS.domain_model_version))
            domain_checkpoint_dir = os.path.abspath(os.path.join(domain_out_dir, "checkpoints"))
            #last fully connection layer variable will not be restored because different task has different class_num
            domain_not_restore_var = [u'output']
            domain_restore_var = [v for v in tf.global_variables() if v.name.split('/')[0] not in domain_not_restore_var]
            restored_saver = tf.train.Saver(domain_restore_var)
            ckpt = tf.train.get_checkpoint_state(domain_checkpoint_dir)
            if ckpt:
                print ("Reading model parameters from %s" % ckpt.model_checkpoint_path)
                restored_saver.restore(sess, ckpt.model_checkpoint_path)
            else:
                print("Writing to {}\n".format(out_dir))

            # Save train params since eval.py needs them
            trained_dir = os.path.abspath(os.path.join(out_dir, "trained_results"))
            if not os.path.exists(trained_dir):
                os.makedirs(trained_dir)
            with open(trained_dir + '/sorted_label.json', 'w') as outfile:
                json.dump(sorted_label, outfile, indent=4, ensure_ascii=False)
            with open(trained_dir + '/train_params.json', 'w') as outfile:
                json.dump({"max_document_length":max_document_length}, outfile, indent=4, ensure_ascii=False)

            # Write vocabulary
            vocab_processor.save(os.path.join(out_dir, "vocab"))

            def train_step(x_batch, y_batch, x_real_len_batch):
                """
                A single training step
                """
                if FLAGS.model_type == "cnn":
                    feed_dict = {
                        obj.input_x: x_batch,
                        obj.input_y: y_batch,
                        obj.dropout_keep_prob: FLAGS.dropout_keep_prob,
                        obj.is_training: True
                    }
                else:
                    feed_dict = {
                        obj.input_x: x_batch,
                        obj.input_y: y_batch,
                        obj.dropout_keep_prob: FLAGS.dropout_keep_prob,
                        obj.real_len: x_real_len_batch
                    }
                _, step, summaries, loss, accuracy = sess.run(
                    [train_op, global_step, train_summary_op, obj.loss, obj.accuracy],
                    feed_dict)
                time_str = datetime.datetime.now().isoformat()
                print("{}: step {}, loss {:g}, acc {:g}".format(time_str, step, loss, accuracy))
                train_summary_writer.add_summary(summaries, step)
            
            def overfit(dev_loss,eva_num=3):
                n = len(dev_loss)
                if n < eva_num:
                    return False
                for i in xrange(n-eva_num+1, n):
                    if dev_loss[i] > dev_loss[i-1]:
                        return False
                return True

            def dev_step(x_batch, y_batch, x_real_len_batch, writer=None):
                """
                Evaluates model on a dev set
                """
                dev_batches = data_helpers.batch_iter(list(zip(x_batch, y_batch, x_real_len_batch)), FLAGS.batch_size, 1, shuffle=False)
                all_pred = []
                correct_total_num = 0
                for batch in dev_batches:
                    x_dev_batch, y_dev_batch, x_real_len_dev_batch = zip(*batch)
                    if FLAGS.model_type == "cnn":
                        feed_dict = {
                            obj.input_x: x_dev_batch,
                            obj.input_y: y_dev_batch,
                            obj.dropout_keep_prob: 1.0,
                            obj.is_training: False
                        }
                    else:
                        feed_dict = {
                            obj.input_x: x_dev_batch,
                            obj.input_y: y_dev_batch,
                            obj.dropout_keep_prob: 1.0,
                            obj.real_len: x_real_len_dev_batch
                        }

                    step, summaries, pred, correct_pred_num = sess.run(
                        [global_step, dev_summary_op, obj.predictions, obj.correct_pred_num],
                        feed_dict)
                    all_pred = np.concatenate([all_pred, pred])
                    correct_total_num += correct_pred_num
                    if writer:
                        writer.add_summary(summaries, step)
                dev_acc = 1.0 * correct_total_num / len(y_batch)
                print("right_sample {}, dev_sample {}, dev_acc {:g}".format(correct_total_num, len(y_batch), dev_acc))
                return dev_acc

            # Generate batches
            batches = data_helpers.batch_iter(
                list(zip(x_train, y_train, x_real_len_train)), FLAGS.batch_size, FLAGS.num_epochs)
            # Training loop. For each batch...
            dev_acc = []
            for batch in batches:
                x_batch, y_batch, x_real_len_batch = zip(*batch)
                train_step(x_batch, y_batch, x_real_len_batch)
                current_step = tf.train.global_step(sess, global_step)
                if current_step % FLAGS.evaluate_every == 0:
                    print("\nEvaluation:", current_step)
                    cur_acc = dev_step(x_dev, y_dev, x_real_len_dev, writer=dev_summary_writer)
                    path = saver.save(sess, checkpoint_prefix, global_step=current_step)
                    print("Saved model checkpoint to {}\n".format(path))
                    dev_acc.append(cur_acc)
                    if overfit(dev_acc):
                        print("current accuracy drop and stop train..\n")
                        sys.exit(0)
                    print("")
Beispiel #11
0
def train_model(allow_save_model = True, print_intermediate_results = True,d_lossweight= None, pname=None):
    if d_lossweight == None:
        print("The End!!")
    a=0
    b=0
    print("\nParameters:")
    for attr, value in sorted(FLAGS.__flags.items()):
        print("{}={}".format(attr.upper(), value))
    print("")

    # Data Preparatopn
    # ==================================================

    # Load data

    # Load data
    print("Loading data...")

    # load each source project

    source_text = np.array([])
    source_y0 = np.array([])


    source_file_path = "./within_project/" + pname + '/train/'
    source_files = list()

    for class_name in class_names:
        source_files.append(source_file_path + class_name)
    tmp_text, tmp_y = data_helpers.load_data_and_labels(source_files)
    print(pname + ": " + str(len(tmp_text)) + " sentences")
    source_text = np.concatenate([source_text, tmp_text], 0)
    if len(source_y0) == 0:
        source_y0 = np.array(tmp_y)
    else:
        source_y0 = np.concatenate([source_y0, tmp_y], 0)


    # Build
    max_document_length = min(100, max(
        [len(x.split(" ")) for x in source_text]))  # important here!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
    vocab_processor = learn.preprocessing.VocabularyProcessor(max_document_length)
    source_x0 = np.array(list(vocab_processor.fit_transform(source_text)))
  #  target_x = np.array(list(vocab_processor.fit_transform(target_text)))
    # =========================================================================print vocab============================================
    np.random.seed(10)
    shuffle_indices = np.random.permutation(np.arange(len(source_y0)))
    x_shuffled = source_x0[shuffle_indices]
    y_shuffled = source_y0[shuffle_indices]

    # Split train/test set
    # TODO: This is very crude, should use cross-validation
    dev_sample_index = -1 * int(FLAGS.dev_sample_percentage * float(len(source_y0)))
    source_x, target_x = x_shuffled[:dev_sample_index], x_shuffled[dev_sample_index:]
    source_y, target_y = y_shuffled[:dev_sample_index], y_shuffled[dev_sample_index:]
    print("Vocabulary Size: {:d}".format(len(vocab_processor.vocabulary_)))
    print("Train/Dev split: {:d}/{:d}".format(len(source_y), len(target_y)))
    #=========================================================================print vocab============================================

#====================================================================================================================================
    pro=[]
    num=[]
    if print_intermediate_results:
        print('data distribution in source dataset')
        pro, num=sa.print_data_distribution(source_y, class_names)
        if pro[0] == "nonSATD":
            a = num[0]
        if pro[1] == "SATD":
            b = num[1]
        print('data distribution in target dataset')
        sa.print_data_distribution(target_y, class_names)

        print("Max Document Length: {:d}".format(max_document_length))
        print("Vocabulary Size: {:d}".format(len(vocab_processor.vocabulary_)))
        print("Train/Test size: {:d}/{:d}".format(len(source_y), len(target_y)))

    # Training
    # ==================================================

    min_loss = 100000000

    max_f1 = 0.0
    predictions_at_min_loss = None
    steps_per_epoch = (int)(len(source_y) / FLAGS.batch_size) + 1

    with tf.Graph().as_default():
        session_conf = tf.ConfigProto(
            allow_soft_placement=FLAGS.allow_soft_placement,
            log_device_placement=FLAGS.log_device_placement)
        session_conf.gpu_options.allow_growth = True
        sess = tf.Session(config=session_conf)
        with sess.as_default():
            cnn = TextCNN(

                a0=a,
                b0=b,
                d_lossweight=a/b,
                sequence_length=max_document_length,
                num_classes=source_y.shape[1],
                vocab_size=len(vocab_processor.vocabulary_),
                embedding_size=FLAGS.embedding_dim,
                filter_sizes=list(map(int, FLAGS.filter_sizes.split(","))),
                num_filters=FLAGS.num_filters,
                l2_reg_lambda=FLAGS.l2_reg_lambda)

            # Define Training procedure
            global_step = tf.Variable(0, name="global_step", trainable=False)


            learning_rate = tf.train.polynomial_decay(2*1e-3, global_step,
                                                      steps_per_epoch * FLAGS.num_epochs, 1e-4,
                                                      power=1)

            optimizer = tf.train.AdamOptimizer(learning_rate)
            grads_and_vars = optimizer.compute_gradients(cnn.loss)
            train_op = optimizer.apply_gradients(grads_and_vars, global_step=global_step)



            if allow_save_model:
                print("!!!!!!os.path")

                  # Keep track of gradient values and sparsity (optional)
                grad_summaries = []
                for g, v in grads_and_vars:
                    if g is not None:
                        grad_hist_summary = tf.summary.histogram("{}/grad/hist".format(v.name), g)
                        sparsity_summary = tf.summary.scalar("{}/grad/sparsity".format(v.name), tf.nn.zero_fraction(g))
                        grad_summaries.append(grad_hist_summary)
                        grad_summaries.append(sparsity_summary)
                grad_summaries_merged = tf.summary.merge(grad_summaries)

                # Output directory for models and summaries
                timestamp = str(int(time.time()))
                out_dir = os.path.abspath(os.path.join(os.path.curdir, "runs-RQ1",pname ))
                print("Writing to {}\n".format(out_dir))


                # Summaries for loss ,f1, auc
                loss_summary = tf.summary.scalar("loss", cnn.loss)
                acc_summary = tf.summary.scalar("accuracy", cnn.accuracy)
                precision_summary = tf.summary.scalar("precision", cnn.precision)
                recall_summary = tf.summary.scalar("recall",cnn.recall)
                f1_summary = tf.summary.scalar("f1", cnn.f1)
                auc_summary = tf.summary.scalar("auc", cnn.auc)


                # Train Summaries
                train_summary_op = tf.summary.merge([loss_summary, acc_summary,precision_summary,recall_summary,f1_summary,auc_summary, grad_summaries_merged])
                train_summary_dir = os.path.join(out_dir, 'summaries','train')
                train_summary_writer = tf.summary.FileWriter(train_summary_dir)
                train_summary_writer.add_graph(sess.graph)

                # Dev summaries
                dev_summary_op = tf.summary.merge([loss_summary, acc_summary, auc_summary,precision_summary,recall_summary,f1_summary,grad_summaries_merged])
                dev_summary_dir = os.path.join(out_dir, 'summaries', 'dev')
                dev_summary_writer = tf.summary.FileWriter(dev_summary_dir)
                dev_summary_writer.add_graph(sess.graph)



                # Checkpoint directory. Tensorflow assumes this directory already exists so we need to create it
                checkpoint_dir_name = "checkpoint-"+pname
                checkpoint_dir = os.path.abspath(os.path.join(out_dir, checkpoint_dir_name))
            #    checkpoint_prefix = os.path.join(checkpoint_dir, "model")
                if not os.path.exists(checkpoint_dir):
                    os.makedirs(checkpoint_dir)

                saver = tf.train.Saver(tf.all_variables())
                vocab_dir_name=os.path.join(checkpoint_dir,"vocab")
                # Write vocabulary
                vocab_processor.save(vocab_dir_name)

            # Initialize all variables
            sess.run(tf.global_variables_initializer(), feed_dict={cnn.phase_train: True})
            sess.run(tf.local_variables_initializer())# this is for version r0.12




            def train_step(x_batch, y_batch):
                sess.run(tf.local_variables_initializer())

                """
                A single training step
              """
                feed_dict = {
                    cnn.input_x: x_batch,
                    cnn.input_y: y_batch,
                    cnn.dropout_keep_prob: FLAGS.dropout_keep_prob,
                    cnn.phase_train: True
                }
                _, step, summaries, loss, mean_loss, l2_loss, accuracy,precision, recall, f1, auc = sess.run(
                    [train_op,global_step, train_summary_op,cnn.loss, cnn.mean_loss, cnn.l2_loss, cnn.accuracy,cnn.precision, cnn.recall,cnn.f1,cnn.auc],
                    feed_dict)
                time_str = datetime.datetime.now().isoformat()
                train_summary_writer.add_summary(summaries, step)
                return accuracy, precision, recall, f1, auc

            def dev_step(x_batch, y_batch, writer=None):


                """
                Evaluates model on a dev set
              """
                feed_dict = {
                    cnn.input_x: x_batch,
                    cnn.input_y: y_batch,
                    cnn.dropout_keep_prob: 1.0,
                    cnn.phase_train: False
                }
                summaries, step,loss, mean_loss, l2_loss,accuracy, precision, recall, f1, auc,   predictions = sess.run(
                    [ dev_summary_op,global_step,cnn.loss, cnn.mean_loss, cnn.l2_loss, cnn.accuracy,cnn.precision, cnn.recall,cnn.f1,cnn.auc,cnn.predictions],
                    feed_dict)

                time_str = datetime.datetime.now().isoformat()

                if print_intermediate_results:
                    print("{}: epoch {}, step {}, loss {:g}, acc {:g}, percision {:g}, recall{:g}, f1{:g}, auc {:g}, mean_loss {}, l2_loss {}".format(
                        time_str,  step/steps_per_epoch, step, loss, accuracy, precision, recall, f1, auc, mean_loss, l2_loss))
                    tp, fp, fn, tn,precision, recall, f1, auc2= sa.calculate_IR_metrics(y_batch, predictions,
                                                                                class_names,None)

                    print("^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^")
                    print(precision[1],recall[1],f1[1], auc2[1])
                if writer !=None:
                    print("devWrite!!!!")
                    writer.add_summary(summaries, step)

                return accuracy, precision, recall, f1, auc, loss, predictions


            # Generate batches
            batches = data_helpers.batch_iter(
                list(zip(source_x, source_y)), FLAGS.batch_size, FLAGS.num_epochs)

            for batch in batches:
                x_batch, y_batch = zip(*batch)
                train_accuracy,train_precision,train_recall,train_f1,train_auc = train_step(x_batch, y_batch)
                current_step = tf.train.global_step(sess, global_step)
                current_epoch = current_step/steps_per_epoch
                if current_step%steps_per_epoch==0 and current_epoch % FLAGS.evaluate_every == 0:
                    if print_intermediate_results:

                        print("Current train accuracy: %s" % (train_accuracy))
                        print("Current train precision: %s" % (train_precision))
                        print("Current train recall: %s" % (train_recall))
                        print("Current train f1: %s" % (train_f1))
                        print("Current train auc: %s" % (train_auc))

                        print("~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~")
                    fold_accuracy, precision, recall, f1, auc, loss, predictions = dev_step(target_x, target_y, writer=dev_summary_writer)
                    tp, fp, fn, tn, precision, recall, f1, auc2= sa.calculate_IR_metrics( target_y, predictions,
                                                                            class_names,None)
                    for i in range(len(class_names)):
                        print(class_names[i], precision[i], recall[i], f1[i], auc2[i])
                    if loss < min_loss:
                        if f1[1]> max_f1:
                            min_loss = loss
                            max_f1=f1[1]
                            predictions_at_min_loss = predictions
                            if allow_save_model:
                                save_path = saver.save(sess, checkpoint_dir, global_step=current_step)
                                if print_intermediate_results:
                                    print("Model saved in file: %s" % save_path)

            # Final result
            output_file = open(source_file_path + 'fp_sentences1', 'a',encoding='utf-8')
            print('Final result:')
            fold_accuracy, precision, recall, f1, auc, loss, predictions = dev_step(target_x, target_y)
            print("ACC: %s" % (fold_accuracy))
            print( precision, recall, f1, auc)


            tp, fp, fn, tn, precision, recall, f1 , auc2= sa.calculate_IR_metrics( target_y, predictions, class_names, output_file)
            for i in range(len(class_names)):
                print (class_names[i], precision[i], recall[i], f1[i], auc2[1])


    return min_loss, predictions_at_min_loss, target_y
def main(unused_argv):

    if FLAGS.job_name is None or FLAGS.job_name == '':
        raise ValueError('Must specify an explicit job_name !')
    else:
        print('job_name : ' + FLAGS.job_name)
    if FLAGS.task_index is None or FLAGS.task_index == '':
        raise ValueError('Must specify an explicit task_index!')
    else:
        print('task_index : ' + str(FLAGS.task_index))

    ps_spec = FLAGS.ps_hosts.split(',')
    worker_spec = FLAGS.worker_hosts.split(',')

    num_worker = len(worker_spec)
    print("Number of worker = " + str(num_worker))
    print("ps_spec = ")
    print(*ps_spec)
    print("worker_spec = ")
    print(*worker_spec)
    cluster = tf.train.ClusterSpec({'ps': ps_spec, 'worker': worker_spec})
    print("After defining Cluster")
    print("Job name = " + FLAGS.job_name)
    print("task index = " + str(FLAGS.task_index))
    # try:
    server = tf.train.Server(cluster,
                             job_name=FLAGS.job_name,
                             task_index=FLAGS.task_index)

    print("After defining server")
    if FLAGS.job_name == 'ps':
        print("Parameter Server is executed")
        server.join()
    elif FLAGS.job_name == "worker":
        print("Parameter Server is executed")
        with tf.device(
                tf.train.replica_device_setter(
                    worker_device="/job:worker/task:%d" % FLAGS.task_index,
                    cluster=cluster)):
            is_chief = (FLAGS.task_index == 0)
            # Data Preparation
            # ==================================================

            # Load data
            print("Loading data...")

            x_text, y_label = data_helpers.load_data_and_labels(
                FLAGS.data_file)

            # Build vocabulary
            max_document_length = max([len(x.split(" ")) for x in x_text])

            vocab_processor = learn.preprocessing.VocabularyProcessor(
                max_document_length)
            x = np.array(list(vocab_processor.fit_transform(x_text)))
            y = np.array(y_label)

            # Randomly shuffle data
            np.random.seed(10)
            shuffle_indices = np.random.permutation(np.arange(len(y)))
            print(type(x), type(y))

            x_shuffled = x[shuffle_indices]
            y_shuffled = y[shuffle_indices]

            # Split train/test set
            # TODO: This is very crude, should use cross-validation
            dev_sample_index = -1 * int(
                FLAGS.dev_sample_percentage * float(len(y)))
            x_train, x_dev = x_shuffled[:dev_sample_index], x_shuffled[
                dev_sample_index:]
            y_train, y_dev = y_shuffled[:dev_sample_index], y_shuffled[
                dev_sample_index:]
            print(y_train.shape)
            print("Vocabulary Size: {:d}".format(
                len(vocab_processor.vocabulary_)))
            print("Train/Dev split: {:d}/{:d}".format(len(y_train),
                                                      len(y_dev)))

            # Training
            # ==================================================
            tf.MaxAcc = 0.1

            def copymax(path):
                shutil.copy(path, "{}.backup".format(path))

            cnn = TextCNN(sequence_length=x_train.shape[1],
                          num_classes=y_train.shape[1],
                          vocab_size=len(vocab_processor.vocabulary_),
                          embedding_size=FLAGS.embedding_dim,
                          filter_sizes=list(
                              map(int, FLAGS.filter_sizes.split(","))),
                          num_filters=FLAGS.num_filters,
                          l2_reg_lambda=FLAGS.l2_reg_lambda)

            # Define Training procedure
            global_step = tf.train.get_or_create_global_step()
            optimizer = tf.train.AdamOptimizer(1e-3)
            grads_and_vars = optimizer.compute_gradients(cnn.loss)
            train_op = optimizer.apply_gradients(grads_and_vars,
                                                 global_step=global_step)

            # Keep track of gradient values and sparsity (optional)

            # Output directory for models and summaries
            timestamp = str(int(time.time()))
            out_dir = FLAGS.out_dir
            print("Writing to {}\n".format(out_dir))

            # Checkpoint directory. Tensorflow assumes this directory already exists so we need to create it
            checkpoint_dir = os.path.abspath(
                os.path.join(out_dir, "checkpoints"))
            checkpoint_prefix = os.path.join(checkpoint_dir, "model")
            MaxAcc_prefi = os.path.join(checkpoint_dir, "MAXACCmodel")
            if not os.path.exists(checkpoint_dir):
                os.makedirs(checkpoint_dir)
            saver = tf.train.Saver(tf.global_variables(),
                                   max_to_keep=FLAGS.num_checkpoints)

            # Write vocabulary
            vocab_processor.save(os.path.join(out_dir, "vocab"))

            # Initialize all variables
            session_conf = tf.ConfigProto(
                allow_soft_placement=FLAGS.allow_soft_placement,
                log_device_placement=FLAGS.log_device_placement)

            init_op = tf.global_variables_initializer()
            sv = tf.train.Supervisor(is_chief=(FLAGS.task_index == 0),
                                     logdir=out_dir,
                                     init_op=init_op,
                                     saver=saver,
                                     global_step=global_step)
            sess = sv.prepare_or_wait_for_session(server.target,
                                                  config=session_conf)

            # Generate batches
            batches = data_helpers.batch_iter(list(zip(x_train, y_train)),
                                              FLAGS.batch_size,
                                              FLAGS.num_epochs)
            # Training loop. For each batch...
            for batch in batches:
                x_batch, y_batch = zip(*batch)

                _, current_step, loss, accuracy = sess.run(
                    [train_op, global_step, cnn.loss, cnn.accuracy],
                    feed_dict={
                        cnn.input_x: x_batch,
                        cnn.input_y: y_batch,
                        cnn.dropout_keep_prob: FLAGS.dropout_keep_prob
                    })
                time_str = datetime.datetime.now().isoformat()
                if current_step % 100 == 0:
                    print("{}: step {}, loss {:g}, acc {:g}".format(
                        time_str, current_step, loss, accuracy))

                if current_step % FLAGS.evaluate_every == 0:
                    print("\nEvaluation:")

                    loss, accuracy = sess.run(
                        [cnn.loss, cnn.accuracy],
                        feed_dict={
                            cnn.input_x: x_batch,
                            cnn.input_y: y_batch,
                            cnn.dropout_keep_prob: 1.0
                        })
                    time_str = datetime.datetime.now().isoformat()
                    result = "{}: step {}, loss {:g}, acc {:g}".format(
                        time_str, current_step, loss, accuracy)
                    print(result)

                    with open(os.path.join(out_dir, "result"), 'a+') as f:
                        f.write("{}\n".format(result))

                    if tf.MaxAcc < accuracy:
                        tf.MaxAcc = accuracy
                        ifsave = True
                    else:
                        ifsave = False
                    print("Max acc:{}".format(tf.MaxAcc))

                    print("")
                if current_step % FLAGS.checkpoint_every == 0:
                    path = saver.save(sess,
                                      checkpoint_prefix,
                                      global_step=current_step)
                    print("Saved model checkpoint to {}\n".format(path))
                    if ifsave:
                        path = saver.save(sess, MaxAcc_prefi, None)
                        copymax("{}.data-00000-of-00001".format(path))
                        copymax("{}.index".format(path))
                        copymax("{}.meta".format(path))
Beispiel #13
0
def train():
    with tf.device('/cpu:0'):
        x_text, pos1, pos2, y = data_helpers.load_data_and_labels(
            FLAGS.train_dir)

    # Build vocabulary
    # Example: x_text[3] = "A misty <e1>ridge</e1> uprises from the <e2>surge</e2>."
    # ['a misty ridge uprises from the surge <UNK> <UNK> ... <UNK>']
    # =>
    # [27 39 40 41 42  1 43  0  0 ... 0]
    # dimension = FLAGS.max_sentence_length
    text_vocab_processor = tf.contrib.learn.preprocessing.VocabularyProcessor(
        FLAGS.max_sentence_length)
    text_vec = np.array(list(text_vocab_processor.fit_transform(x_text)))
    print("Text Vocabulary Size: {:d}".format(
        len(text_vocab_processor.vocabulary_)))

    # Example: pos1[3] = [-2 -1  0  1  2   3   4 999 999 999 ... 999]
    # [95 96 97 98 99 100 101 999 999 999 ... 999]
    # =>
    # [11 12 13 14 15  16  21  17  17  17 ...  17]
    # dimension = MAX_SENTENCE_LENGTH
    pos_vocab_processor = tf.contrib.learn.preprocessing.VocabularyProcessor(
        FLAGS.max_sentence_length)
    pos_vocab_processor.fit(pos1 + pos2)
    pos1_vec = np.array(list(pos_vocab_processor.transform(pos1)))
    pos2_vec = np.array(list(pos_vocab_processor.transform(pos2)))
    print("Position Vocabulary Size: {:d}".format(
        len(pos_vocab_processor.vocabulary_)))

    x = np.array([list(i) for i in zip(text_vec, pos1_vec, pos2_vec)])

    print("x = {0}".format(x.shape))
    print("y = {0}".format(y.shape))
    print("")

    # Randomly shuffle data
    np.random.seed(10)
    shuffle_indices = np.random.permutation(np.arange(len(y)))
    x_shuffled = x[shuffle_indices]
    y_shuffled = y[shuffle_indices]

    # Split train/test set
    # TODO: This is very crude, should use cross-validation
    dev_sample_index = -1 * int(FLAGS.dev_sample_percentage * float(len(y)))
    x_train, x_dev = x_shuffled[:dev_sample_index], x_shuffled[
        dev_sample_index:]
    x_dev = np.array(x_dev).transpose((1, 0, 2))
    y_train, y_dev = y_shuffled[:dev_sample_index], y_shuffled[
        dev_sample_index:]
    print("Train/Dev split: {:d}/{:d}\n".format(len(y_train), len(y_dev)))

    with tf.Graph().as_default():
        session_conf = tf.ConfigProto(
            allow_soft_placement=FLAGS.allow_soft_placement,
            log_device_placement=FLAGS.log_device_placement)
        sess = tf.Session(config=session_conf)
        with sess.as_default():
            cnn = TextCNN(
                sequence_length=x_train.shape[2],
                num_classes=y_train.shape[1],
                text_vocab_size=len(text_vocab_processor.vocabulary_),
                text_embedding_size=FLAGS.text_embedding_dim,
                pos_vocab_size=len(pos_vocab_processor.vocabulary_),
                pos_embedding_size=FLAGS.position_embedding_dim,
                filter_sizes=list(map(int, FLAGS.filter_sizes.split(","))),
                num_filters=FLAGS.num_filters,
                l2_reg_lambda=FLAGS.l2_reg_lambda)

            # Define Training procedure
            global_step = tf.Variable(0, name="global_step", trainable=False)
            train_op = tf.train.AdamOptimizer(FLAGS.learning_rate).minimize(
                cnn.loss, global_step=global_step)

            # Output directory for models and summaries
            timestamp = str(int(time.time()))
            out_dir = os.path.abspath(
                os.path.join(os.path.curdir, "runs", timestamp))
            print("Writing to {}\n".format(out_dir))

            # Summaries for loss and accuracy
            loss_summary = tf.summary.scalar("loss", cnn.loss)
            acc_summary = tf.summary.scalar("accuracy", cnn.accuracy)

            # Train Summaries
            train_summary_op = tf.summary.merge([loss_summary, acc_summary])
            train_summary_dir = os.path.join(out_dir, "summaries", "train")
            train_summary_writer = tf.summary.FileWriter(
                train_summary_dir, sess.graph)

            # Dev summaries
            dev_summary_op = tf.summary.merge([loss_summary, acc_summary])
            dev_summary_dir = os.path.join(out_dir, "summaries", "dev")
            dev_summary_writer = tf.summary.FileWriter(dev_summary_dir,
                                                       sess.graph)

            # Checkpoint directory. Tensorflow assumes this directory already exists so we need to create it
            checkpoint_dir = os.path.abspath(
                os.path.join(out_dir, "checkpoints"))
            checkpoint_prefix = os.path.join(checkpoint_dir, "model")
            if not os.path.exists(checkpoint_dir):
                os.makedirs(checkpoint_dir)
            saver = tf.train.Saver(tf.global_variables(),
                                   max_to_keep=FLAGS.num_checkpoints)

            # Write vocabulary
            text_vocab_processor.save(os.path.join(out_dir, "text_vocab"))
            pos_vocab_processor.save(os.path.join(out_dir, "position_vocab"))

            # Initialize all variables
            sess.run(tf.global_variables_initializer())

            # Pre-trained word2vec
            if FLAGS.word2vec:
                # initial matrix with random uniform
                initW = np.random.uniform(
                    -0.25, 0.25, (len(text_vocab_processor.vocabulary_),
                                  FLAGS.text_embedding_dim))
                # load any vectors from the word2vec
                print("Load word2vec file {0}".format(FLAGS.word2vec))
                with open(FLAGS.word2vec, "rb") as f:
                    header = f.readline()
                    vocab_size, layer1_size = map(int, header.split())
                    binary_len = np.dtype('float32').itemsize * layer1_size
                    for line in range(vocab_size):
                        word = []
                        while True:
                            ch = f.read(1).decode('latin-1')
                            if ch == ' ':
                                word = ''.join(word)
                                break
                            if ch != '\n':
                                word.append(ch)
                        idx = text_vocab_processor.vocabulary_.get(word)
                        if idx != 0:
                            initW[idx] = np.fromstring(f.read(binary_len),
                                                       dtype='float32')
                        else:
                            f.read(binary_len)
                sess.run(cnn.W_text.assign(initW))
                print("Success to load pre-trained word2vec model!\n")

            # Generate batches
            batches = data_helpers.batch_iter(list(zip(x_train, y_train)),
                                              FLAGS.batch_size,
                                              FLAGS.num_epochs)
            # Training loop. For each batch...
            for batch in batches:
                x_batch, y_batch = zip(*batch)
                x_batch = np.array(x_batch).transpose((1, 0, 2))

                # Train
                feed_dict = {
                    cnn.input_text: x_batch[0],
                    cnn.input_pos1: x_batch[1],
                    cnn.input_pos2: x_batch[2],
                    cnn.input_y: y_batch,
                    cnn.dropout_keep_prob: FLAGS.dropout_keep_prob
                }
                _, step, summaries, loss, accuracy = sess.run([
                    train_op, global_step, train_summary_op, cnn.loss,
                    cnn.accuracy
                ], feed_dict)
                train_summary_writer.add_summary(summaries, step)

                # Training log display
                if step % FLAGS.display_every == 0:
                    time_str = datetime.datetime.now().isoformat()
                    print("{}: step {}, loss {:g}, acc {:g}".format(
                        time_str, step, loss, accuracy))

                # Evaluation
                if step % FLAGS.evaluate_every == 0:
                    print("\nEvaluation:")
                    feed_dict = {
                        cnn.input_text: x_dev[0],
                        cnn.input_pos1: x_dev[1],
                        cnn.input_pos2: x_dev[2],
                        cnn.input_y: y_dev,
                        cnn.dropout_keep_prob: 1.0
                    }
                    summaries, loss, accuracy, predictions = sess.run([
                        dev_summary_op, cnn.loss, cnn.accuracy, cnn.predictions
                    ], feed_dict)
                    dev_summary_writer.add_summary(summaries, step)

                    time_str = datetime.datetime.now().isoformat()
                    print("{}: step {}, loss {:g}, acc {:g}".format(
                        time_str, step, loss, accuracy))
                    print(
                        "(2*9+1)-Way Macro-Average F1 Score (excluding Other): {:g}\n"
                        .format(
                            f1_score(np.argmax(y_dev, axis=1),
                                     predictions,
                                     labels=np.array(range(1, 19)),
                                     average="macro")))

                # Model checkpoint
                if step % FLAGS.checkpoint_every == 0:
                    path = saver.save(sess,
                                      checkpoint_prefix,
                                      global_step=step)
                    print("Saved model checkpoint to {}\n".format(path))
Beispiel #14
0
def train(x_train, y_train, vocab_processor, x_dev, y_dev):
    # Training
    # 定义训练
    # ==================================================

    with tf.Graph().as_default(): # 图Granph()设为默认,详见function.md
        session_conf = tf.ConfigProto(
                                      allow_soft_placement=FLAGS.allow_soft_placement,  # 设置允许TensorFlow在指定设备不存在时自动调整设备,详见funciton.md
          log_device_placement=FLAGS.log_device_placement)
        sess = tf.Session(config=session_conf)
        with sess.as_default():           # 运行TensorFLow操作(operations)的类,也可以使用上下文管理:with
            cnn = TextCNN(                                # 实例化TextCNN模型,建立模型,另见function.md
                # 句子包含的单词个数,统一句子长度
                sequence_length=x_train.shape[1],
                # 分类个数,导入标签数
                num_classes=y_train.shape[1],
                # 选取进行训练的总单词个数
                vocab_size=len(vocab_processor.vocabulary_),
                # 嵌入层输出
                embedding_size=FLAGS.embedding_dim,
                # 过滤器的行数对应卷积后所得矩阵的列数,由于卷积不同所得矩阵的列数也不同,所以各过滤器行数也不同
                filter_sizes=list(map(int, FLAGS.filter_sizes.split(","))),
                # 过滤器个数
                num_filters=FLAGS.num_filters,
                # l2惩罚项大小
                l2_reg_lambda=FLAGS.l2_reg_lambda)filter_sizes

            # Define Training procedure 定义训练过程,编译模型
            global_step = tf.Variable(0, name="global_step", trainable=False)       # 统计训练次数
            optimizer = tf.train.AdamOptimizer(1e-3)                                # 指定优化器为Adam
            grads_and_vars = optimizer.compute_gradients(cnn.loss)       # 损失函数偏差与方差即被优化目标
            train_op = optimizer.apply_gradients(grads_and_vars, global_step=global_step)
            # train_op是新建的操作,用来对参数做梯度更新,每一次运行train_op就是一次训练

            # Keep track of gradient values and sparsity (optional)
            # 追踪并可视化训练和评估过程
            grad_summaries = []
            for g, v in grads_and_vars:
                if g is not None:
                    grad_hist_summary = tf.summary.histogram("{}/grad/hist".format(v.name), g)
                    sparsity_summary = tf.summary.scalar("{}/grad/sparsity".format(v.name), tf.nn.zero_fraction(g))
                    grad_summaries.append(grad_hist_summary)
                    grad_summaries.append(sparsity_summary)
            grad_summaries_merged = tf.summary.merge(grad_summaries)

            # Output directory for models and summaries  汇总输出储存目录
            timestamp = str(int(time.time()))
            out_dir = os.path.abspath(os.path.join(os.path.curdir, "runs", timestamp))
            print("Writing to {}\n".format(out_dir))

            # Summaries for loss and accuracy  对损失和准确度的汇总
            loss_summary = tf.summary.scalar("loss", cnn.loss)
            acc_summary = tf.summary.scalar("accuracy", cnn.accuracy)

            # Train Summaries 训练的汇总
            train_summary_op = tf.summary.merge([loss_summary, acc_summary, grad_summaries_merged])
            train_summary_dir = os.path.join(out_dir, "summaries", "train")
            train_summary_writer = tf.summary.FileWriter(train_summary_dir, sess.graph)

            # Dev summaries  评估的汇总
            dev_summary_op = tf.summary.merge([loss_summary, acc_summary])
            dev_summary_dir = os.path.join(out_dir, "summaries", "dev")
            dev_summary_writer = tf.summary.FileWriter(dev_summary_dir, sess.graph)

            # Checkpoint directory. Tensorflow assumes this directory already exists so we need to create it
            # 检查点:               存储模型参数
            checkpoint_dir = os.path.abspath(os.path.join(out_dir, "checkpoints"))
            # 检查点存储路径 猜想:out_dir/checkpoints
            checkpoint_prefix = os.path.join(checkpoint_dir, "model")
            if not os.path.exists(checkpoint_dir):
                os.makedirs(checkpoint_dir)
            saver = tf.train.Saver(tf.global_variables(), max_to_keep=FLAGS.num_checkpoints)

            # Write vocabulary      存储词典
            vocab_processor.save(os.path.join(out_dir, "vocab"))

            # Initialize all variables  初始化所有参数
            sess.run(tf.global_variables_initializer())

            # 定义单独训练步
            def train_step(x_batch, y_batch):
                """
                A single training step
                """
                # 数据将通过占位符节点送给神经网络,必须让所有节点都有值,否则TensorFlow又要报错
                feed_dict = {
                  cnn.input_x: x_batch,
                  cnn.input_y: y_batch,
                  cnn.dropout_keep_prob: FLAGS.dropout_keep_prob
                }
                # 通过session.run()运行train_op,返回值就是我们想我评估的操作结果。注意train_op本身没有返回值,它只是更新了网络参数。
                _, step, summaries, loss, accuracy = sess.run(
                    [train_op, global_step, train_summary_op, cnn.loss, cnn.accuracy],
                    feed_dict)
                time_str = datetime.datetime.now().isoformat()
                print("{}: step {}, loss {:g}, acc {:g}".format(time_str, step, loss, accuracy))
                train_summary_writer.add_summary(summaries, step)
            
            # 用与单独训练步相似的函数来评估任意数据集的损失和精度,比如验证集或整个训练集。本质上这个函数和之前的一样,但是没有训练操作,也禁用了dropout。
            def dev_step(x_batch, y_batch, writer=None):
                """
                Evaluates model on a dev set
                """
                feed_dict = {
                  cnn.input_x: x_batch,
                  cnn.input_y: y_batch,
                  cnn.dropout_keep_prob: 1.0
                }
                step, summaries, loss, accuracy = sess.run(
                    [global_step, dev_summary_op, cnn.loss, cnn.accuracy],
                    feed_dict)
                time_str = datetime.datetime.now().isoformat()
                print("{}: step {}, loss {:g}, acc {:g}".format(time_str, step, loss, accuracy))
                if writer:
                    writer.add_summary(summaries, step)

            # Generate batches
            batches = data_helpers.batch_iter(
                list(zip(x_train, y_train)), FLAGS.batch_size, FLAGS.num_epochs)
            # Training loop. For each batch...
            for batch in batches:
                x_batch, y_batch = zip(*batch)
                train_step(x_batch, y_batch)
                current_step = tf.train.global_step(sess, global_step)
                if current_step % FLAGS.evaluate_every == 0:
                    print("\nEvaluation:")
                    dev_step(x_dev, y_dev, writer=dev_summary_writer)
                    print("")
                if current_step % FLAGS.checkpoint_every == 0:
                    path = saver.save(sess, checkpoint_prefix, global_step=current_step)
                    print("Saved model checkpoint to {}\n".format(path))
# split it
np.random.seed(12345)

ind = np.random.permutation(x.shape[0])
shuffled_x = x[ind, :]
shuffled_y = y[ind, :]

idx = int(FLAGS.dev_sample_percentage * x.shape[0])
train_x, train_y = shuffled_x[idx:, :], shuffled_y[idx:, :]
dev_x, dev_y = shuffled_x[:idx, :], shuffled_y[:idx, :]


with tf.Session() as sess:
    
    cnn = TextCNN(max_doc_len, len(mb.classes_), FLAGS.embedding_dim, len(text_processor.vocabulary_),
                  list(map(int, FLAGS.filter_sizes.split(','))),
                  FLAGS.num_filters)
    # train operation
    global_step = tf.Variable(0, name="global_step", trainable=False)
    optimizer = tf.train.AdamOptimizer(1e-5)
    grads_and_vars = optimizer.compute_gradients(cnn.loss)
    train_op = optimizer.apply_gradients(grads_and_vars, global_step=global_step)

    # IO direction stuff
    timestamp = str(int(time.time()))
    out_dir = os.path.abspath(os.path.join(os.path.curdir, "runs", timestamp))

    
    # summary writer
    train_summary_dir = os.path.join(out_dir, "summary/train")
    train_summary_writer = tf.summary.FileWriter(train_summary_dir, sess.graph)
Beispiel #16
0
from text_cnn import TextCNN
from text_cnn import TextNN
from text_cnn import plot_accuracy
# from main import get_data

import numpy as np
NUM_FILTERS = 3
FILTER_SIZES = [2, 3, 5]
MAX_WORD = 40
EMBEDDING_LENGTH = 300
NUM_CLASSES = 2

model = TextCNN(NUM_FILTERS, FILTER_SIZES, MAX_WORD, EMBEDDING_LENGTH,
                NUM_CLASSES)
nn = TextNN(MAX_WORD, EMBEDDING_LENGTH, NUM_CLASSES)
# pos_path = "./review_polarity/txt_sentoken/pos/*.txt"
# neg_path = "./review_polarity/txt_sentoken/neg/*.txt"
# pos_data_train, pos_data_test = get_data(pos_path)
# neg_data_train, neg_data_test = get_data(neg_path)


def read_data_from_csv(path):
    res = []
    with open(path, 'rb') as f:
        review = []
        for line in f:
            nums = line.split(',')
            newWord = [float(num) for num in nums]
            review.append(newWord)
            if len(review) == 40:
                res.append(review)
Beispiel #17
0
def train():
    with tf.device('/cpu:0'):
        x_text, y, pos1, pos2 = data_helpers.load_data_and_labels(
            FLAGS.train_path)

    # Build vocabulary
    # Example: x_text[3] = "A misty <e1>ridge</e1> uprises from the <e2>surge</e2>."
    # ['a misty ridge uprises from the surge <UNK> <UNK> ... <UNK>']
    # =>
    # [27 39 40 41 42  1 43  0  0 ... 0]
    # dimension = FLAGS.max_sentence_length
    text_vocab_processor = tf.contrib.learn.preprocessing.VocabularyProcessor(
        FLAGS.max_sentence_length)
    x = np.array(list(text_vocab_processor.fit_transform(x_text)))
    print("Text Vocabulary Size: {:d}".format(
        len(text_vocab_processor.vocabulary_)))
    print("x = {0}".format(x.shape))
    print("y = {0}".format(y.shape))
    print("")

    # Example: pos1[3] = [-2 -1  0  1  2   3   4 999 999 999 ... 999]
    # [95 96 97 98 99 100 101 999 999 999 ... 999]
    # =>
    # [11 12 13 14 15  16  21  17  17  17 ...  17]
    # dimension = MAX_SENTENCE_LENGTH
    pos_vocab_processor = tf.contrib.learn.preprocessing.VocabularyProcessor(
        FLAGS.max_sentence_length)
    pos_vocab_processor.fit(pos1 + pos2)
    p1 = np.array(list(pos_vocab_processor.transform(pos1)))
    p2 = np.array(list(pos_vocab_processor.transform(pos2)))
    print("Position Vocabulary Size: {:d}".format(
        len(pos_vocab_processor.vocabulary_)))
    print("position_1 = {0}".format(p1.shape))
    print("position_2 = {0}".format(p2.shape))
    print("")

    # Randomly shuffle data to split into train and test(dev)
    np.random.seed(10)
    shuffle_indices = np.random.permutation(np.arange(len(y)))
    x_shuffled = x[shuffle_indices]
    p1_shuffled = p1[shuffle_indices]
    p2_shuffled = p2[shuffle_indices]
    y_shuffled = y[shuffle_indices]

    # Split train/test set
    # TODO: This is very crude, should use cross-validation
    dev_sample_index = -1 * int(FLAGS.dev_sample_percentage * float(len(y)))
    x_train, x_dev = x_shuffled[:dev_sample_index], x_shuffled[
        dev_sample_index:]
    p1_train, p1_dev = p1_shuffled[:dev_sample_index], p1_shuffled[
        dev_sample_index:]
    p2_train, p2_dev = p2_shuffled[:dev_sample_index], p2_shuffled[
        dev_sample_index:]
    y_train, y_dev = y_shuffled[:dev_sample_index], y_shuffled[
        dev_sample_index:]
    print("Train/Dev split: {:d}/{:d}\n".format(len(y_train), len(y_dev)))

    with tf.Graph().as_default():
        session_conf = tf.ConfigProto(
            allow_soft_placement=FLAGS.allow_soft_placement,
            log_device_placement=FLAGS.log_device_placement)
        session_conf.gpu_options.allow_growth = FLAGS.gpu_allow_growth
        sess = tf.Session(config=session_conf)
        with sess.as_default():
            cnn = TextCNN(
                sequence_length=x_train.shape[1],
                num_classes=y_train.shape[1],
                text_vocab_size=len(text_vocab_processor.vocabulary_),
                text_embedding_size=FLAGS.text_embedding_dim,
                pos_vocab_size=len(pos_vocab_processor.vocabulary_),
                pos_embedding_size=FLAGS.pos_embedding_dim,
                filter_sizes=list(map(int, FLAGS.filter_sizes.split(","))),
                num_filters=FLAGS.num_filters,
                l2_reg_lambda=FLAGS.l2_reg_lambda)

            # Define Training procedure
            global_step = tf.Variable(0, name="global_step", trainable=False)
            optimizer = tf.train.AdadeltaOptimizer(FLAGS.learning_rate,
                                                   FLAGS.decay_rate, 1e-6)
            gvs = optimizer.compute_gradients(cnn.loss)
            capped_gvs = [(tf.clip_by_value(grad, -1.0, 1.0), var)
                          for grad, var in gvs]
            train_op = optimizer.apply_gradients(capped_gvs,
                                                 global_step=global_step)

            # Output directory for models and summaries
            timestamp = str(int(time.time()))
            out_dir = os.path.abspath(
                os.path.join(os.path.curdir, "runs", timestamp))
            print("Writing to {}\n".format(out_dir))

            # Summaries for loss and accuracy
            loss_summary = tf.summary.scalar("loss", cnn.loss)
            acc_summary = tf.summary.scalar("accuracy", cnn.accuracy)

            # Train Summaries
            train_summary_op = tf.summary.merge([loss_summary, acc_summary])
            train_summary_dir = os.path.join(out_dir, "summaries", "train")
            train_summary_writer = tf.summary.FileWriter(
                train_summary_dir, sess.graph)

            # Dev summaries
            dev_summary_op = tf.summary.merge([loss_summary, acc_summary])
            dev_summary_dir = os.path.join(out_dir, "summaries", "dev")
            dev_summary_writer = tf.summary.FileWriter(dev_summary_dir,
                                                       sess.graph)

            # Checkpoint directory. Tensorflow assumes this directory already exists so we need to create it
            checkpoint_dir = os.path.abspath(
                os.path.join(out_dir, "checkpoints"))
            checkpoint_prefix = os.path.join(checkpoint_dir, "model")
            if not os.path.exists(checkpoint_dir):
                os.makedirs(checkpoint_dir)
            saver = tf.train.Saver(tf.global_variables(),
                                   max_to_keep=FLAGS.num_checkpoints)

            # Write vocabulary
            text_vocab_processor.save(os.path.join(out_dir, "text_vocab"))
            pos_vocab_processor.save(os.path.join(out_dir, "pos_vocab"))

            # Initialize all variables
            sess.run(tf.global_variables_initializer())

            # Pre-trained word2vec
            if FLAGS.embedding_path:
                pretrain_W = utils.load_word2vec(FLAGS.embedding_path,
                                                 FLAGS.text_embedding_dim,
                                                 text_vocab_processor)
                sess.run(cnn.W_text.assign(pretrain_W))
                print("Success to load pre-trained word2vec model!\n")

            # Generate batches
            batches = data_helpers.batch_iter(
                list(zip(x_train, p1_train, p2_train, y_train)),
                FLAGS.batch_size, FLAGS.num_epochs)
            # Training loop. For each batch...
            best_f1 = 0.0  # For save checkpoint(model)
            for batch in batches:
                x_batch, p1_batch, p2_batch, y_batch = zip(*batch)
                # Train
                feed_dict = {
                    cnn.input_text: x_batch,
                    cnn.input_p1: p1_batch,
                    cnn.input_p2: p2_batch,
                    cnn.input_y: y_batch,
                    cnn.dropout_keep_prob: FLAGS.dropout_keep_prob
                }
                _, step, summaries, loss, accuracy = sess.run([
                    train_op, global_step, train_summary_op, cnn.loss,
                    cnn.accuracy
                ], feed_dict)
                train_summary_writer.add_summary(summaries, step)

                # Training log display
                if step % FLAGS.display_every == 0:
                    time_str = datetime.datetime.now().isoformat()
                    print("{}: step {}, loss {:g}, acc {:g}".format(
                        time_str, step, loss, accuracy))

                # Evaluation
                if step % FLAGS.evaluate_every == 0:
                    print("\nEvaluation:")
                    feed_dict = {
                        cnn.input_text: x_dev,
                        cnn.input_p1: p1_dev,
                        cnn.input_p2: p2_dev,
                        cnn.input_y: y_dev,
                        cnn.dropout_keep_prob: 1.0
                    }
                    summaries, loss, accuracy, predictions = sess.run([
                        dev_summary_op, cnn.loss, cnn.accuracy, cnn.predictions
                    ], feed_dict)
                    dev_summary_writer.add_summary(summaries, step)

                    time_str = datetime.datetime.now().isoformat()
                    f1 = f1_score(np.argmax(y_dev, axis=1),
                                  predictions,
                                  labels=np.array(range(1, 19)),
                                  average="macro")
                    print("{}: step {}, loss {:g}, acc {:g}".format(
                        time_str, step, loss, accuracy))
                    print(
                        "[UNOFFICIAL] (2*9+1)-Way Macro-Average F1 Score (excluding Other): {:g}\n"
                        .format(f1))

                    # Model checkpoint
                    if best_f1 < f1:
                        best_f1 = f1
                        path = saver.save(sess,
                                          checkpoint_prefix +
                                          "-{:.3g}".format(best_f1),
                                          global_step=step)
                        print("Saved model checkpoint to {}\n".format(path))
def train_cnn():
    """Training CNN model."""

    # Load sentences, labels, and training parameters
    logger.info('✔︎ Loading data...')

    logger.info('✔︎ Training data processing...')
    train_data = dh.load_data_and_labels(FLAGS.training_data_file,
                                         FLAGS.embedding_dim)

    logger.info('✔︎ Validation data processing...')
    validation_data = dh.load_data_and_labels(FLAGS.validation_data_file,
                                              FLAGS.embedding_dim)

    logger.info('Recommended padding Sequence length is: {0}'.format(
        FLAGS.pad_seq_len))

    logger.info('✔︎ Training data padding...')
    x_train_front, x_train_behind, y_train = dh.pad_data(
        train_data, FLAGS.pad_seq_len)

    logger.info('✔︎ Validation data padding...')
    x_validation_front, x_validation_behind, y_validation = dh.pad_data(
        validation_data, FLAGS.pad_seq_len)

    # Build vocabulary
    VOCAB_SIZE = dh.load_vocab_size(FLAGS.embedding_dim)
    pretrained_word2vec_matrix = dh.load_word2vec_matrix(
        VOCAB_SIZE, FLAGS.embedding_dim)

    # Build a graph and cnn object
    with tf.Graph().as_default():
        session_conf = tf.ConfigProto(
            allow_soft_placement=FLAGS.allow_soft_placement,
            log_device_placement=FLAGS.log_device_placement)
        session_conf.gpu_options.allow_growth = FLAGS.gpu_options_allow_growth
        sess = tf.Session(config=session_conf)
        with sess.as_default():
            cnn = TextCNN(sequence_length=FLAGS.pad_seq_len,
                          num_classes=y_train.shape[1],
                          vocab_size=VOCAB_SIZE,
                          fc_hidden_size=FLAGS.fc_hidden_size,
                          embedding_size=FLAGS.embedding_dim,
                          embedding_type=FLAGS.embedding_type,
                          filter_sizes=list(
                              map(int, FLAGS.filter_sizes.split(","))),
                          num_filters=FLAGS.num_filters,
                          l2_reg_lambda=FLAGS.l2_reg_lambda,
                          pretrained_embedding=pretrained_word2vec_matrix)

            # Define training procedure
            with tf.control_dependencies(
                    tf.get_collection(tf.GraphKeys.UPDATE_OPS)):
                learning_rate = tf.train.exponential_decay(
                    learning_rate=FLAGS.learning_rate,
                    global_step=cnn.global_step,
                    decay_steps=FLAGS.decay_steps,
                    decay_rate=FLAGS.decay_rate,
                    staircase=True)
                optimizer = tf.train.AdamOptimizer(learning_rate)
                grads, vars = zip(*optimizer.compute_gradients(cnn.loss))
                grads, _ = tf.clip_by_global_norm(grads,
                                                  clip_norm=FLAGS.norm_ratio)
                train_op = optimizer.apply_gradients(
                    zip(grads, vars),
                    global_step=cnn.global_step,
                    name="train_op")

            # Keep track of gradient values and sparsity (optional)
            grad_summaries = []
            for g, v in zip(grads, vars):
                if g is not None:
                    grad_hist_summary = tf.summary.histogram(
                        "{0}/grad/hist".format(v.name), g)
                    sparsity_summary = tf.summary.scalar(
                        "{0}/grad/sparsity".format(v.name),
                        tf.nn.zero_fraction(g))
                    grad_summaries.append(grad_hist_summary)
                    grad_summaries.append(sparsity_summary)
            grad_summaries_merged = tf.summary.merge(grad_summaries)

            # Output directory for models and summaries
            if FLAGS.train_or_restore == 'R':
                MODEL = input(
                    "☛ Please input the checkpoints model you want to restore, "
                    "it should be like(1490175368): "
                )  # The model you want to restore

                while not (MODEL.isdigit() and len(MODEL) == 10):
                    MODEL = input(
                        '✘ The format of your input is illegal, please re-input: '
                    )
                logger.info(
                    '✔︎ The format of your input is legal, now loading to next step...'
                )

                checkpoint_dir = 'runs/' + MODEL + '/checkpoints/'

                out_dir = os.path.abspath(
                    os.path.join(os.path.curdir, "runs", MODEL))
                logger.info("✔︎ Writing to {0}\n".format(out_dir))
            else:
                timestamp = str(int(time.time()))
                out_dir = os.path.abspath(
                    os.path.join(os.path.curdir, "runs", timestamp))
                logger.info("✔︎ Writing to {0}\n".format(out_dir))

            # Summaries for loss and accuracy
            loss_summary = tf.summary.scalar("loss", cnn.loss)
            acc_summary = tf.summary.scalar("accuracy", cnn.accuracy)

            # Train summaries
            train_summary_op = tf.summary.merge(
                [loss_summary, acc_summary, grad_summaries_merged])
            train_summary_dir = os.path.join(out_dir, "summaries", "train")
            train_summary_writer = tf.summary.FileWriter(
                train_summary_dir, sess.graph)

            # Validation summaries
            validation_summary_op = tf.summary.merge(
                [loss_summary, acc_summary])
            validation_summary_dir = os.path.join(out_dir, "summaries",
                                                  "validation")
            validation_summary_writer = tf.summary.FileWriter(
                validation_summary_dir, sess.graph)

            saver = tf.train.Saver(tf.global_variables(),
                                   max_to_keep=FLAGS.num_checkpoints)

            if FLAGS.train_or_restore == 'R':
                # Load cnn model
                logger.info("✔ Loading model...")
                checkpoint_file = tf.train.latest_checkpoint(checkpoint_dir)
                logger.info(checkpoint_file)

                # Load the saved meta graph and restore variables
                saver = tf.train.import_meta_graph(
                    "{0}.meta".format(checkpoint_file))
                saver.restore(sess, checkpoint_file)
            else:
                checkpoint_dir = os.path.abspath(
                    os.path.join(out_dir, "checkpoints"))
                if not os.path.exists(checkpoint_dir):
                    os.makedirs(checkpoint_dir)
                sess.run(tf.global_variables_initializer())
                sess.run(tf.local_variables_initializer())

                # Embedding visualization config
                config = projector.ProjectorConfig()
                embedding_conf = config.embeddings.add()
                embedding_conf.tensor_name = 'embedding'
                embedding_conf.metadata_path = FLAGS.metadata_file

                projector.visualize_embeddings(train_summary_writer, config)
                projector.visualize_embeddings(validation_summary_writer,
                                               config)

                # Save the embedding visualization
                saver.save(
                    sess, os.path.join(out_dir, 'embedding', 'embedding.ckpt'))

            current_step = sess.run(cnn.global_step)

            def train_step(x_batch_front, x_batch_behind, y_batch):
                """A single training step"""
                feed_dict = {
                    cnn.input_x_front: x_batch_front,
                    cnn.input_x_behind: x_batch_behind,
                    cnn.input_y: y_batch,
                    cnn.dropout_keep_prob: FLAGS.dropout_keep_prob,
                    cnn.is_training: True
                }
                _, step, summaries, loss, accuracy = sess.run([
                    train_op, cnn.global_step, train_summary_op, cnn.loss,
                    cnn.accuracy
                ], feed_dict)
                logger.info("step {0}: loss {1:g}, acc {2:g}".format(
                    step, loss, accuracy))
                train_summary_writer.add_summary(summaries, step)

            def validation_step(x_batch_front,
                                x_batch_behind,
                                y_batch,
                                writer=None):
                """Evaluates model on a validation set"""
                feed_dict = {
                    cnn.input_x_front: x_batch_front,
                    cnn.input_x_behind: x_batch_behind,
                    cnn.input_y: y_batch,
                    cnn.dropout_keep_prob: 1.0,
                    cnn.is_training: False
                }
                step, summaries, loss, accuracy, recall, precision, f1, auc = sess.run(
                    [
                        cnn.global_step, validation_summary_op, cnn.loss,
                        cnn.accuracy, cnn.recall, cnn.precision, cnn.F1,
                        cnn.AUC
                    ], feed_dict)
                logger.info(
                    "step {0}: loss {1:g}, acc {2:g}, recall {3:g}, precision {4:g}, f1 {5:g}, AUC {6}"
                    .format(step, loss, accuracy, recall, precision, f1, auc))
                if writer:
                    writer.add_summary(summaries, step)

            # Generate batches
            batches = dh.batch_iter(
                list(zip(x_train_front, x_train_behind, y_train)),
                FLAGS.batch_size, FLAGS.num_epochs)

            num_batches_per_epoch = int(
                (len(x_train_front) - 1) / FLAGS.batch_size) + 1

            # Training loop. For each batch...
            for batch in batches:
                x_batch_front, x_batch_behind, y_batch = zip(*batch)
                train_step(x_batch_front, x_batch_behind, y_batch)
                current_step = tf.train.global_step(sess, cnn.global_step)

                if current_step % FLAGS.evaluate_every == 0:
                    logger.info("\nEvaluation:")
                    validation_step(x_validation_front,
                                    x_validation_behind,
                                    y_validation,
                                    writer=validation_summary_writer)
                if current_step % FLAGS.checkpoint_every == 0:
                    checkpoint_prefix = os.path.join(checkpoint_dir, "model")
                    path = saver.save(sess,
                                      checkpoint_prefix,
                                      global_step=current_step)
                    logger.info(
                        "✔︎ Saved model checkpoint to {0}\n".format(path))
                if current_step % num_batches_per_epoch == 0:
                    current_epoch = current_step // num_batches_per_epoch
                    logger.info(
                        "✔︎ Epoch {0} has finished!".format(current_epoch))

    logger.info("✔︎ Done.")
X_dev, Y_dev, Xsymbol_dev = dataprocessor.FormatVecForCNN(dev_featureVec,dev_labelVec,dev_symbolVec);


dataprocessor.dumpPlain("data/train.feature.vid.txt", "data/train.feature.idv.txt", "data/train.label.vid.txt",
                        "data/train.label.idv.txt", "data/train.symbol.vid.txt", "data/train.symbol.idv.txt");
dataprocessor.dump("data/vocab_all.pkl");

print (np.array(X_train).shape);
print(np.array(Y_train).shape);
print(np.array(Xsymbol_train).shape);

with tf.Session() as sess:
    cnn=TextCNN(
        sequence_length=FLAGS.sequence_length,
        num_classes=FLAGS.num_classes,
        dim=FLAGS.dim,
        filter_sizes=list(map(int,FLAGS.filter_sizes.split(","))),
        num_filters=FLAGS.num_filters,
        l2_reg_lambda=FLAGS.l2_reg_lambda);
    sinfo=sess.run(cnn.shapeinfo);
    print "sinfo";
    print sinfo;
    global_step = tf.Variable(0,name="global_step",trainable=False);
    optimizer = tf.train.AdamOptimizer(1e-3);
    grads_and_vars = optimizer.compute_gradients(cnn.loss);
    train_op = optimizer.apply_gradients(grads_and_vars,global_step=global_step);

    grad_summaries = [];
    for g,v in grads_and_vars:
        if g is not None:
            grad_hist_summary = tf.summary.histogram("{}/grad/hist".format(v.name),g);
with tf.Graph().as_default():
    session_conf = tf.ConfigProto(
      allow_soft_placement=FLAGS.allow_soft_placement,
      log_device_placement=FLAGS.log_device_placement,
      intra_op_parallelism_threads=2,
      inter_op_parallelism_threads=2)
    sess = tf.Session(config=session_conf)
    with sess.as_default():
        cnn = TextCNN(
            sequence_length=text_x.shape[1],
            vocab_size=vocab_size,
            embedding_size=FLAGS.embedding_dim,
            filter_sizes=map(int, FLAGS.filter_sizes.split(",")),
            num_filters=FLAGS.num_filters,
            num_ratings=ratings.shape[1],
            num_locations=locations.shape[1],
            num_genders=genders.shape[1],
            num_ages=ages.shape[1],
            hidden_size=300,
            l2_reg_lambda=FLAGS.l2_reg_lambda
            )

        # Define Training procedure
        learning_rate = tf.placeholder(tf.float32, shape=[], name="learning_rate")
        adv_lambda = tf.placeholder(tf.float32, shape=[], name="adversarial_lambda")

        global_step = tf.Variable(0, name="global_step", trainable=False)
        all_var_list = tf.trainable_variables()

        optimizer_n = tf.train.AdamOptimizer(
Beispiel #21
0
gc.collect()

# savepath = '/Users/bong/works/tfen/w2v_cnn/runs/backup/checkpoints/model-2900'

savepath = '/Users/bong/works/tfen/w2v_cnn/runs/1464326614/checkpoints/model-6700'

with tf.Graph().as_default():
    session_conf = tf.ConfigProto(
        allow_soft_placement=FLAGS.allow_soft_placement,
        log_device_placement=FLAGS.log_device_placement)
    sess = tf.Session(config=session_conf)
    with sess.as_default():
        cnn = TextCNN(sequence_length=x_test.shape[1],
                      num_classes=3,
                      embedding_size=FLAGS.embedding_dim,
                      filter_sizes=list(map(int,
                                            FLAGS.filter_sizes.split(","))),
                      num_filters=FLAGS.num_filters,
                      l2_reg_lambda=FLAGS.l2_reg_lambda)

        saver = tf.train.Saver(tf.all_variables())
        saver.restore(sess, savepath)

        def test_step(x_batch, y_batch, writer=None):
            """
            Evaluates model on a test set
            """
            feed_dict = {
                cnn.input_x: x_batch,
                cnn.input_y: y_batch,
                cnn.dropout_keep_prob: 1.0
Beispiel #22
0
# print("Train/Dev split: {:d}/{:d}".format(len(y_train), len(y_dev)))
# print x_train.shape, y_train.shape

x_train, y_train = x_shuffled, y_shuffled

# Training
# ==================================================
with tf.Graph().as_default():
    session_conf = tf.ConfigProto(allow_soft_placement=True,
                                  log_device_placement=False)
    sess = tf.Session(config=session_conf)
    with sess.as_default():
        cnn = TextCNN(sequence_length=x_train.shape[1],
                      num_classes=y_train.shape[1],
                      vocab_size=len(vocab_processor.vocabulary_),
                      embedding_size=embedding_dim,
                      filter_sizes=filter_sizes,
                      num_filters=num_filters,
                      l2_reg_lambda=l2_reg_lambda)

        # Define Training procedure
        global_step = tf.Variable(0, name="global_step", trainable=False)
        optimizer = tf.train.AdamOptimizer(1e-3)
        grads_and_vars = optimizer.compute_gradients(cnn.loss)
        train_op = optimizer.apply_gradients(grads_and_vars,
                                             global_step=global_step)

        # Initialize all variables
        sess.run(tf.global_variables_initializer())

        def train_step(x_batch, y_batch):
Beispiel #23
0
def train_cnn():
    """Step 0: load sentences, labels, and training parameters"""
    train_file = sys.argv[1]
    x_raw, y_raw, df, labels = data_helper.load_data_and_labels(train_file)

    parameter_file = sys.argv[2]
    params = json.loads(open(parameter_file).read())
    """Step 1: pad each sentence to the same length and map each word to an id"""
    max_document_length = max([len(x.split(' ')) for x in x_raw])
    logging.info(
        'The maximum length of all sentences: {}'.format(max_document_length))
    vocab_processor = learn.preprocessing.VocabularyProcessor(
        max_document_length)
    x = np.array(list(vocab_processor.fit_transform(x_raw)))
    y = np.array(y_raw)
    '''
	dev_file = sys.argv[3]
	x_raw1, y_raw1, df, labels = data_helper.load_data_and_labels(dev_file)
	"""Step 1: pad each sentence to the same length and map each word to an id"""
	max_document_length = max([len(x.split(' ')) for x in x_raw1])
	logging.info('The maximum length of all sentences: {}'.format(max_document_length))
	vocab_processor1 = learn.preprocessing.VocabularyProcessor(max_document_length)
	x_dev = np.array(list(vocab_processor1.fit_transform(x_raw1)))
	y_dev = np.array(y_raw1)
	'''
    """Step 2: split the original dataset into train and test sets"""
    #x_, x_test, y_, y_test = train_test_split(x, y, test_size=0.13756, random_state=42)
    x_train, x_dev, y_train, y_dev = train_test_split(x,
                                                      y,
                                                      test_size=0.1557971014)
    """Step 3: shuffle the train set and split the train set into train and dev sets"""
    #shuffle_indices = np.random.permutation(np.arange(len(y_)))
    #x_shuffled = x_[shuffle_indices]
    #y_shuffled = y_[shuffle_indices]
    #x_train, x_dev, y_train, y_dev = train_test_split(x_shuffled, y_shuffled, test_size=0.1)
    """Step 4: save the labels into labels.json since predict.py needs it"""
    with open('./labels.json', 'w') as outfile:
        json.dump(labels, outfile, indent=4)

    #logging.info('x_train: {}, x_dev: {}, x_test: {}'.format(len(x_train), len(x_dev), len(x_test)))
    #logging.info('y_train: {}, y_dev: {}, y_test: {}'.format(len(y_train), len(y_dev), len(y_test)))
    logging.info('x_train: {}, x_dev: {}'.format(len(x_train), len(x_dev)))
    logging.info('y_train: {}, y_dev: {}'.format(len(y_train), len(y_dev)))
    """Step 5: build a graph and cnn object"""
    graph = tf.Graph()
    with graph.as_default():
        session_conf = tf.ConfigProto(allow_soft_placement=True,
                                      log_device_placement=False)
        sess = tf.Session(config=session_conf)
        with sess.as_default():
            cnn = TextCNN(sequence_length=x_train.shape[1],
                          num_classes=y_train.shape[1],
                          vocab_size=len(vocab_processor.vocabulary_),
                          embedding_size=params['embedding_dim'],
                          filter_sizes=list(
                              map(int, params['filter_sizes'].split(","))),
                          num_filters=params['num_filters'],
                          l2_reg_lambda=params['l2_reg_lambda'])

            global_step = tf.Variable(0, name="global_step", trainable=False)
            optimizer = tf.train.AdamOptimizer(1e-3)
            grads_and_vars = optimizer.compute_gradients(cnn.loss)
            train_op = optimizer.apply_gradients(grads_and_vars,
                                                 global_step=global_step)

            timestamp = str(int(time.time()))
            out_dir = os.path.abspath(
                os.path.join(os.path.curdir, "trained_model_" + timestamp))

            checkpoint_dir = os.path.abspath(
                os.path.join(out_dir, "checkpoints"))
            checkpoint_prefix = os.path.join(checkpoint_dir, "model")
            if not os.path.exists(checkpoint_dir):
                os.makedirs(checkpoint_dir)
            saver = tf.train.Saver(tf.all_variables())

            # One training step: train the model with one batch
            def train_step(x_batch, y_batch):
                feed_dict = {
                    cnn.input_x: x_batch,
                    cnn.input_y: y_batch,
                    cnn.dropout_keep_prob: params['dropout_keep_prob']
                }
                _, step, loss, acc = sess.run(
                    [train_op, global_step, cnn.loss, cnn.accuracy], feed_dict)

            # One evaluation step: evaluate the model with one batch
            def dev_step(x_batch, y_batch):
                feed_dict = {
                    cnn.input_x: x_batch,
                    cnn.input_y: y_batch,
                    cnn.dropout_keep_prob: 1.0
                }
                step, loss, acc, num_correct = sess.run(
                    [global_step, cnn.loss, cnn.accuracy, cnn.num_correct],
                    feed_dict)
                return num_correct

            # Save the word_to_id map since predict.py needs it
            vocab_processor.save(os.path.join(out_dir, "vocab.pickle"))
            sess.run(tf.initialize_all_variables())

            # Training starts here
            train_batches = data_helper.batch_iter(list(zip(x_train, y_train)),
                                                   params['batch_size'],
                                                   params['num_epochs'])
            best_accuracy, best_at_step = 0, 0
            """Step 6: train the cnn model with x_train and y_train (batch by batch)"""
            for train_batch in train_batches:
                x_train_batch, y_train_batch = zip(*train_batch)
                train_step(x_train_batch, y_train_batch)
                current_step = tf.train.global_step(sess, global_step)
                """Step 6.1: evaluate the model with x_dev and y_dev (batch by batch)"""
                if current_step % params['evaluate_every'] == 0:
                    dev_batches = data_helper.batch_iter(
                        list(zip(x_dev, y_dev)), params['batch_size'], 1)
                    total_dev_correct = 0
                    for dev_batch in dev_batches:
                        x_dev_batch, y_dev_batch = zip(*dev_batch)
                        num_dev_correct = dev_step(x_dev_batch, y_dev_batch)
                        total_dev_correct += num_dev_correct

                    dev_accuracy = float(total_dev_correct) / len(y_dev)
                    logging.critical(
                        'Accuracy on dev set: {}'.format(dev_accuracy))
                    """Step 6.2: save the model if it is the best based on accuracy of the dev set"""
                    if dev_accuracy >= best_accuracy:
                        best_accuracy, best_at_step = dev_accuracy, current_step
                        path = saver.save(sess,
                                          checkpoint_prefix,
                                          global_step=current_step)
                        logging.critical('Saved model {} at step {}'.format(
                            path, best_at_step))
                        logging.critical('Best accuracy {} at step {}'.format(
                            best_accuracy, best_at_step))
            '''

			"""Step 7: predict x_test (batch by batch)"""
			test_batches = data_helper.batch_iter(list(zip(x_test, y_test)), params['batch_size'], 1)
			total_test_correct = 0
			for test_batch in test_batches:
				x_test_batch, y_test_batch = zip(*test_batch)
				num_test_correct = dev_step(x_test_batch, y_test_batch)
				total_test_correct += num_test_correct

			test_accuracy = float(total_test_correct) / len(y_test)
			logging.critical('Accuracy on test set is {} based on the best model {}'.format(test_accuracy, path))
			'''
            logging.critical('The training is complete')
Beispiel #24
0
def train(x_train, y_train, vocab_processor, x_dev, y_dev):
    # Training
    # ==================================================

    with tf.Graph().as_default():
        session_conf = tf.ConfigProto(
            allow_soft_placement=FLAGS.allow_soft_placement,
            log_device_placement=FLAGS.log_device_placement)

        # 考虑如何在session_conf中加入:gpu_options.allow_growth = True;
        # 试试我们之前运行gpu的那个程序是怎么做到的
        # session_conf = tf.ConfigProto()
        # session_conf.allow_soft_placement = True
        # session_conf.log_device_placement = True
        # session_conf.gpu_options.allow_growth = True
        """
        参数说明:
        sequence_length : 句子长度
        num_classes: 分类任务类型个数
        vocab_size:字典中单词个数
        embedding_size:单词向量维度
        filter_sizes: filter尺寸,列表类型
        num_filters: filter个数
        l2_reg_lambda:正则化weight
        """
        sess = tf.Session(config=session_conf)
        with sess.as_default():
            cnn = TextCNN(sequence_length=x_train.shape[1],
                          num_classes=y_train.shape[1],
                          vocab_size=len(vocab_processor.vocabulary_),
                          embedding_size=FLAGS.embedding_dim,
                          filter_sizes=list(
                              map(int, FLAGS.filter_sizes.split(","))),
                          num_filters=FLAGS.num_filters,
                          l2_reg_lambda=FLAGS.l2_reg_lambda)

            # Define Training procedure
            global_step = tf.Variable(0, name="global_step", trainable=False)
            optimizer = tf.train.AdamOptimizer(1e-3)
            grads_and_vars = optimizer.compute_gradients(cnn.loss)
            train_op = optimizer.apply_gradients(grads_and_vars,
                                                 global_step=global_step)

            # Keep track of gradient values and sparsity (optional)
            grad_summaries = []
            for g, v in grads_and_vars:
                if g is not None:
                    grad_hist_summary = tf.summary.histogram(
                        "{}/grad/hist".format(v.name), g)
                    sparsity_summary = tf.summary.scalar(
                        "{}/grad/sparsity".format(v.name),
                        tf.nn.zero_fraction(g))
                    grad_summaries.append(grad_hist_summary)
                    grad_summaries.append(sparsity_summary)
            grad_summaries_merged = tf.summary.merge(grad_summaries)

            # Output directory for models and summaries
            timestamp = str(int(time.time()))
            out_dir = os.path.abspath(
                os.path.join(os.path.curdir, "runs", timestamp))
            print("Writing to {}\n".format(out_dir))

            # Summaries for loss and accuracy
            loss_summary = tf.summary.scalar("loss", cnn.loss)
            acc_summary = tf.summary.scalar("accuracy", cnn.accuracy)

            # Train Summaries
            train_summary_op = tf.summary.merge(
                [loss_summary, acc_summary, grad_summaries_merged])
            train_summary_dir = os.path.join(out_dir, "summaries", "train")
            train_summary_writer = tf.summary.FileWriter(
                train_summary_dir, sess.graph)

            # Dev summaries
            dev_summary_op = tf.summary.merge([loss_summary, acc_summary])
            dev_summary_dir = os.path.join(out_dir, "summaries", "dev")
            dev_summary_writer = tf.summary.FileWriter(dev_summary_dir,
                                                       sess.graph)

            # Checkpoint directory. Tensorflow assumes this directory already exists so we need to create it
            checkpoint_dir = os.path.abspath(
                os.path.join(out_dir, "checkpoints"))
            checkpoint_prefix = os.path.join(checkpoint_dir, "model")
            if not os.path.exists(checkpoint_dir):
                os.makedirs(checkpoint_dir)
            saver = tf.train.Saver(tf.global_variables(),
                                   max_to_keep=FLAGS.num_checkpoints)

            # Write vocabulary
            vocab_processor.save(os.path.join(out_dir, "vocab"))

            # Initialize all variables
            sess.run(tf.global_variables_initializer())

            def train_step(x_batch, y_batch):
                """
                A single training step
                参数说明:
                x_batch: 1个句子
                y_batch: 1个标签
                """
                feed_dict = {
                    cnn.input_x: x_batch,
                    cnn.input_y: y_batch,
                    cnn.dropout_keep_prob: FLAGS.dropout_keep_prob
                }
                _, step, summaries, loss, accuracy = sess.run([
                    train_op, global_step, train_summary_op, cnn.loss,
                    cnn.accuracy
                ], feed_dict)
                time_str = datetime.datetime.now().isoformat()
                print("{}: step {}, loss {:g}, acc {:g}".format(
                    time_str, step, loss, accuracy))
                train_summary_writer.add_summary(summaries, step)

            def dev_step(x_batch, y_batch, writer=None):
                """
                Evaluates model on a dev set
                """
                feed_dict = {
                    cnn.input_x: x_batch,
                    cnn.input_y: y_batch,
                    cnn.dropout_keep_prob: 1.0
                }
                step, summaries, loss, accuracy = sess.run(
                    [global_step, dev_summary_op, cnn.loss, cnn.accuracy],
                    feed_dict)
                time_str = datetime.datetime.now().isoformat()
                print("{}: step {}, loss {:g}, acc {:g}".format(
                    time_str, step, loss, accuracy))
                if writer:
                    writer.add_summary(summaries, step)

            # Generate batches:batch_size = 64; num_epochs = 1(原来是200)
            batches = data_helpers.batch_iter(list(zip(x_train, y_train)),
                                              FLAGS.batch_size,
                                              FLAGS.num_epochs)
            # Training loop. For each batch...(注意:这里的batches是一批数据,每1个batch=1个句子+1个标签)
            for batch in batches:  # batches会在需要的时候,(自动)随时提供数据,因为data_helpers.batch_iter中使用了yield
                x_batch, y_batch = zip(*batch)  # 这里是解开batch
                train_step(x_batch, y_batch)
                current_step = tf.train.global_step(
                    sess, global_step
                )  # global_step:Creates a variable to hold the global_step.
                if current_step % FLAGS.evaluate_every == 0:
                    print("\nEvaluation:")
                    dev_step(x_dev, y_dev, writer=dev_summary_writer)
                    print("")
                if current_step % FLAGS.checkpoint_every == 0:
                    path = saver.save(sess,
                                      checkpoint_prefix,
                                      global_step=current_step)
                    print("Saved model checkpoint to {}\n".format(path))
def train(x_train, y_train, vocab_processor, x_dev, y_dev, x_real_len_train,
          x_real_len_dev, sorted_label):
    # Training
    # ==================================================

    with tf.Graph().as_default():
        session_conf = tf.ConfigProto(
            allow_soft_placement=FLAGS.allow_soft_placement,
            log_device_placement=FLAGS.log_device_placement)
        sess = tf.Session(config=session_conf)
        with sess.as_default():
            if FLAGS.model_type == "cnnrnn":
                obj = TextCNNRNN(
                    sequence_length=FLAGS.max_document_length,
                    num_classes=[tmp_y.shape[1] for tmp_y in y_train],
                    vocab_size=len(vocab_processor.vocabulary_),
                    hidden_unit=FLAGS.hidden_unit,
                    embedding_size=FLAGS.embedding_dim,
                    filter_sizes=list(map(int, FLAGS.filter_sizes.split(","))),
                    num_filters=FLAGS.num_filters,
                    l2_reg_lambda=FLAGS.l2_reg_lambda)
            elif FLAGS.model_type == "rnncnn":
                obj = TextRNNCNN(
                    sequence_length=FLAGS.max_document_length,
                    num_classes=[tmp_y.shape[1] for tmp_y in y_train],
                    vocab_size=len(vocab_processor.vocabulary_),
                    hidden_unit=FLAGS.hidden_unit,
                    embedding_size=FLAGS.embedding_dim,
                    filter_sizes=list(map(int, FLAGS.filter_sizes.split(","))),
                    num_filters=FLAGS.num_filters,
                    l2_reg_lambda=FLAGS.l2_reg_lambda)
            elif FLAGS.model_type == "rnnandcnn":
                obj = TextRNNandCNN(
                    sequence_length=FLAGS.max_document_length,
                    num_classes=[tmp_y.shape[1] for tmp_y in y_train],
                    vocab_size=len(vocab_processor.vocabulary_),
                    hidden_unit=FLAGS.hidden_unit,
                    embedding_size=FLAGS.embedding_dim,
                    filter_sizes=list(map(int, FLAGS.filter_sizes.split(","))),
                    num_filters=FLAGS.num_filters,
                    l2_reg_lambda=FLAGS.l2_reg_lambda)
            elif FLAGS.model_type == "rnn":
                obj = TextRNN(
                    sequence_length=FLAGS.max_document_length,
                    num_classes=[tmp_y.shape[1] for tmp_y in y_train],
                    vocab_size=len(vocab_processor.vocabulary_),
                    hidden_unit=FLAGS.hidden_unit,
                    embedding_size=FLAGS.embedding_dim,
                    l2_reg_lambda=FLAGS.l2_reg_lambda)
            else:
                obj = TextCNN(
                    sequence_length=FLAGS.max_document_length,
                    num_classes=[tmp_y.shape[1] for tmp_y in y_train],
                    vocab_size=len(vocab_processor.vocabulary_),
                    task_num=FLAGS.task_num,
                    embedding_size=FLAGS.embedding_dim,
                    filter_sizes=list(map(int, FLAGS.filter_sizes.split(","))),
                    num_filters=FLAGS.num_filters,
                    l2_reg_lambda=FLAGS.l2_reg_lambda)

            # Define Training procedure
            global_step = tf.Variable(0, name="global_step", trainable=False)
            optimizer = tf.train.AdamOptimizer(1e-3)
            update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
            with tf.control_dependencies(update_ops):
                grads_and_vars = optimizer.compute_gradients(obj.loss)
                train_op = optimizer.apply_gradients(grads_and_vars,
                                                     global_step=global_step)

            out_dir = os.path.abspath(
                os.path.join(os.path.curdir, "runs", FLAGS.model_version))
            print("Writing to {}\n".format(out_dir))
            # Checkpoint directory. Tensorflow assumes this directory already exists so we need to create it
            checkpoint_dir = os.path.abspath(
                os.path.join(out_dir, "checkpoints"))
            checkpoint_prefix = os.path.join(checkpoint_dir, "model")
            if not os.path.exists(checkpoint_dir):
                os.makedirs(checkpoint_dir)
            saver = tf.train.Saver(tf.global_variables(),
                                   max_to_keep=FLAGS.num_checkpoints)

            # Save train params since eval.py needs them
            trained_dir = os.path.abspath(
                os.path.join(out_dir, "trained_results"))
            if not os.path.exists(trained_dir):
                os.makedirs(trained_dir)

            cPickle.dump(sorted_label, open(trained_dir + "/sorted_label",
                                            "w"))

            # Write vocabulary
            vocab_processor.save(os.path.join(out_dir, "vocab"))

            # Initialize all variables
            sess.run(tf.global_variables_initializer())

            def train_step(x_batch, y_batch, x_real_len_batch):
                """
                A single training step
                """
                if FLAGS.model_type == "cnn":
                    feed_dict = {
                        obj.input_x: x_batch,
                        obj.dropout_keep_prob: FLAGS.dropout_keep_prob,
                        obj.is_training: True
                    }
                else:
                    feed_dict = {
                        obj.input_x: x_batch,
                        obj.dropout_keep_prob: FLAGS.dropout_keep_prob,
                        obj.real_len: x_real_len_batch
                    }
                for i in range(FLAGS.task_num):
                    feed_dict[obj.input_y[i]] = y_batch[i]
                _, step, loss, accuracy = sess.run(
                    [train_op, global_step, obj.loss, obj.accuracy], feed_dict)
                time_str = datetime.datetime.now().isoformat()
                for i in range(FLAGS.task_num):
                    print("{}: step {}, loss {:g}, acc_{:d} {:g}".format(
                        time_str, step, loss, i, accuracy[i]))

            def overfit(dev_loss, eva_num=3):
                n = len(dev_loss)
                if n < eva_num:
                    return False
                for i in xrange(n - eva_num + 1, n):
                    if dev_loss[i] > dev_loss[i - 1]:
                        return False
                return True

            def dev_step(x_test, y_test, x_real_len_test):
                """
                Evaluates model on a dev set
                """
                all_pred = [[] for i in range(FLAGS.task_num)]
                all_act = [[] for i in range(FLAGS.task_num)]
                zip_list = []
                for i in range(FLAGS.task_num):
                    zip_list.append(
                        list(zip(x_test, y_test[i], x_real_len_test)))
                batches, total_batch_num = data_helpers.multi_task_batch_iter(
                    zip_list, FLAGS.batch_size, 1, shuffle=False)
                for i in xrange(total_batch_num):
                    y_batch = []
                    for j in range(FLAGS.task_num):
                        tmp_batch = batches[j].next()
                        x_batch, y_tmp_batch, x_real_len_batch = zip(
                            *tmp_batch)
                        y_batch.append(y_tmp_batch)
                    if FLAGS.model_type == "cnn":
                        feed_dict = {
                            obj.input_x: x_batch,
                            obj.dropout_keep_prob: 1.0,
                            obj.is_training: False
                        }
                    else:
                        feed_dict = {
                            obj.input_x: x_batch,
                            obj.dropout_keep_prob: 1.0,
                            obj.real_len: x_real_len_batch
                        }
                    for j in range(FLAGS.task_num):
                        feed_dict[obj.input_y[j]] = y_batch[j]
                    step, pred = sess.run([global_step, obj.predictions],
                                          feed_dict)
                    for j in range(FLAGS.task_num):
                        all_pred[j] = np.concatenate([all_pred[j], pred[j]])
                        all_act[j] = np.concatenate(
                            [all_act[j],
                             np.argmax(y_batch[j], axis=1)])
                err_cnt = 0
                for i in range(len(x_test)):
                    for j in range(FLAGS.task_num):
                        if all_pred[j][i] != all_act[j][i]:
                            err_cnt += 1
                            break

                dev_acc = 1.0 * (len(x_test) - err_cnt) / len(x_test)
                print("dev_sample {}, dev_acc {:g}".format(
                    len(x_test), dev_acc))
                return dev_acc

            # Generate batches
            zip_list = []
            for i in range(FLAGS.task_num):
                zip_list.append(
                    list(zip(x_train, y_train[i], x_real_len_train)))
            batches, total_batch_num = data_helpers.multi_task_batch_iter(
                zip_list, FLAGS.batch_size, FLAGS.num_epochs)

            # Training loop. For each batch...
            dev_acc = []
            for i in xrange(total_batch_num):
                y_batch = []
                for j in range(FLAGS.task_num):
                    tmp_batch = batches[j].next()
                    x_batch, tmp_y_batch, x_real_len_batch = zip(*tmp_batch)
                    y_batch.append(tmp_y_batch)
                train_step(x_batch, y_batch, x_real_len_batch)
                current_step = tf.train.global_step(sess, global_step)
                if current_step % FLAGS.evaluate_every == 0:
                    print("\nEvaluation:", current_step)
                    cur_acc = dev_step(x_dev, y_dev, x_real_len_dev)
                    path = saver.save(sess,
                                      checkpoint_prefix,
                                      global_step=current_step)
                    print("Saved model checkpoint to {}\n".format(path))
                    dev_acc.append(cur_acc)
                    if overfit(dev_acc):
                        print("current accuracy drop and stop train..\n")
                        sys.exit(0)
                    print("")
Beispiel #26
0
    def architecture(self):

        #######  DEFINITIONS #######

        multiplier = len(FLAGS.filter_sizes_cnn1.split(","))

        #create system parameters
        weights = {
            'fc1':
            tf.Variable(tf.random_normal([(multiplier * FLAGS.num_filters),
                                          FLAGS.num_classes]),
                        name="fc1-weights"),
            'att2-W-char':
            tf.Variable(tf.random_normal([
                multiplier * FLAGS.num_filters, multiplier * FLAGS.num_filters
            ]),
                        name='att2-weights-W-char'),
            'att2-v-char':
            tf.Variable(tf.random_normal([multiplier * FLAGS.num_filters]),
                        name='att2-weigths-v-char'),
        }

        bias = {
            'fc1':
            tf.Variable(tf.random_normal([FLAGS.num_classes]),
                        name="fc1-bias-noreg"),
            'att2-W-char':
            tf.Variable(tf.random_normal([multiplier * FLAGS.num_filters]),
                        name="att2-char-bias-noreg")
        }

        # cnn initialization
        cnn = TextCNN(sequence_length=FLAGS.sequence_length,
                      num_classes=FLAGS.num_classes,
                      embedding_size=FLAGS.embedding_dim,
                      filter_sizes=list(
                          map(int, FLAGS.filter_sizes_cnn1.split(","))),
                      num_filters=FLAGS.num_filters,
                      vocab_size=self.char_embeddings.shape[0],
                      l2_reg_lambda=FLAGS.l2_reg_lambda)
        del self.char_embeddings

        #ARCHITECTURE

        # forward pass

        cnn_output = cnn.h_pool_flat

        # attention on char - user level
        att_context_vector_char = tf.tanh(
            tf.matmul(cnn_output, weights["att2-W-char"]) +
            bias["att2-W-char"])
        attentions_char = tf.nn.softmax(tf.matmul(
            att_context_vector_char, tf.expand_dims(weights["att2-v-char"],
                                                    -1)),
                                        axis=0)
        attention_output_char = tf.reduce_sum(cnn_output * attentions_char, 0)
        attention_output_char = tf.reshape(attention_output_char,
                                           [1, multiplier * FLAGS.num_filters])

        # BPTT
        logits = tf.matmul(attention_output_char, weights['fc1']) + bias['fc1']
        prediction = tf.nn.softmax(logits)

        if self.mode != "Test":
            loss_op = tf.reduce_mean(
                tf.nn.softmax_cross_entropy_with_logits_v2(logits=logits,
                                                           labels=self.Y))

            # add L2 regularization
            l2 = self.tf_ideal_l2_reg_parameter * sum(
                tf.nn.l2_loss(tf_var) for tf_var in tf.trainable_variables()
                if not ("noreg" in tf_var.name or "Bias" in tf_var.name))
            loss_op += l2

            # optimizer
            optimizer = tf.train.AdamOptimizer(
                learning_rate=FLAGS.learning_rate)
            train_op = optimizer.minimize(loss_op)

            # calculate training accuracy for checking correctness
            correct_pred = tf.equal(tf.argmax(prediction, 1),
                                    tf.argmax(self.Y, 1))
            accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))

            return accuracy, train_op, loss_op, prediction, cnn
        else:
            return None, None, None, prediction, cnn
Beispiel #27
0
def train(w2v_model):
    # Training
    # ==================================================
    x_train, x_dev, y_train, y_dev, vocab_size = load_data(w2v_model)
    with tf.Graph().as_default():
        session_conf = tf.ConfigProto(
            allow_soft_placement=FLAGS.allow_soft_placement,
            log_device_placement=FLAGS.log_device_placement)
        sess = tf.Session(config=session_conf)
        with sess.as_default():
            cnn = TextCNN(w2v_model,
                          sequence_length=x_train.shape[1],
                          num_classes=y_train.shape[1],
                          vocab_size=vocab_size,
                          embedding_size=FLAGS.embedding_dim,
                          filter_sizes=list(
                              map(int, FLAGS.filter_sizes.split(","))),
                          num_filters=FLAGS.num_filters,
                          l2_reg_lambda=FLAGS.l2_reg_lambda)

            # Define
            global_step = tf.Variable(0, name="global_step", trainable=False)
            optimizer = tf.train.AdamOptimizer(1e-3)
            grads_and_vars = optimizer.compute_gradients(cnn.loss)
            train_op = optimizer.apply_gradients(grads_and_vars,
                                                 global_step=global_step)

            # Keep track
            grad_summaries = []
            for g, v in grads_and_vars:
                if g is not None:
                    grad_hist_summary = tf.summary.histogram(
                        "{}/grad/hist".format(v.name), g)
                    sparsity_summary = tf.summary.scalar(
                        "{}/grad/sparsity".format(v.name),
                        tf.nn.zero_fraction(g))
                    grad_summaries.append(grad_hist_summary)
                    grad_summaries.append(sparsity_summary)
            grad_summaries_merged = tf.summary.merge(grad_summaries)

            # summaries
            timestamp = str(int(time.time()))
            out_dir = os.path.abspath(
                os.path.join(os.path.curdir, "runs", timestamp))
            print("Writing to {}\n".format(out_dir))
            loss_summary = tf.summary.scalar("loss", cnn.loss)
            acc_summary = tf.summary.scalar("accuracy", cnn.accuracy)
            train_summary_op = tf.summary.merge(
                [loss_summary, acc_summary, grad_summaries_merged])
            train_summary_dir = os.path.join(out_dir, "summaries", "train")
            train_summary_writer = tf.summary.FileWriter(
                train_summary_dir, sess.graph)
            dev_summary_op = tf.summary.merge([loss_summary, acc_summary])
            dev_summary_dir = os.path.join(out_dir, "summaries", "dev")
            dev_summary_writer = tf.summary.FileWriter(dev_summary_dir,
                                                       sess.graph)

            # Checkpoint Save
            checkpoint_dir = os.path.abspath(
                os.path.join(out_dir, "checkpoints"))
            checkpoint_prefix = os.path.join(checkpoint_dir, "model")
            if not os.path.exists(checkpoint_dir):
                os.makedirs(checkpoint_dir)
            saver = tf.train.Saver(tf.global_variables(),
                                   max_to_keep=FLAGS.num_checkpoints)

            sess.run(tf.global_variables_initializer())

            def train_step(x_batch, y_batch):
                """
                A single training step
                """
                feed_dict = {
                    cnn.input_x: x_batch,
                    cnn.input_y: y_batch,
                    cnn.dropout_keep_prob: FLAGS.dropout_keep_prob
                }
                _, step, summaries, loss, accuracy = sess.run([
                    train_op, global_step, train_summary_op, cnn.loss,
                    cnn.accuracy
                ], feed_dict)

                time_str = datetime.datetime.now().isoformat()
                print("{}: step {}, loss {:g}, acc {:g}".format(
                    time_str, step, loss, accuracy))
                train_summary_writer.add_summary(summaries, step)

            def dev_step(x_batch, y_batch, writer=None):
                """
                Evaluates model on a dev set
                """
                feed_dict = {
                    cnn.input_x: x_batch,
                    cnn.input_y: y_batch,
                    cnn.dropout_keep_prob: 1.0
                }
                step, summaries, loss, accuracy = sess.run(
                    [global_step, dev_summary_op, cnn.loss, cnn.accuracy],
                    feed_dict)
                time_str = datetime.datetime.now().isoformat()
                print("{}: step {}, loss {:g}, acc {:g}".format(
                    time_str, step, loss, accuracy))
                if writer:
                    writer.add_summary(summaries, step)

            # Generate batches
            batches = data_helpers.batch_iter(list(zip(x_train, y_train)),
                                              FLAGS.batch_size,
                                              FLAGS.num_epochs)

            def dev_test():
                batches_dev = data_helpers.batch_iter(list(zip(x_dev, y_dev)),
                                                      FLAGS.batch_size, 1)
                for batch_dev in batches_dev:
                    x_batch_dev, y_batch_dev = zip(*batch_dev)
                    dev_step(x_batch_dev,
                             y_batch_dev,
                             writer=dev_summary_writer)

            for batch in batches:
                x_batch, y_batch = zip(*batch)
                train_step(x_batch, y_batch)
                current_step = tf.train.global_step(sess, global_step)
                if current_step % FLAGS.evaluate_every == 0:
                    print("\nEvaluation:")
                    dev_test()

                if current_step % FLAGS.checkpoint_every == 0:
                    path = saver.save(sess,
                                      checkpoint_prefix,
                                      global_step=current_step)
                    print("Saved model checkpoint to {}\n".format(path))
Beispiel #28
0

# Training
# ==================================================

with tf.Graph().as_default():
    session_conf = tf.ConfigProto(
      allow_soft_placement=FLAGS.allow_soft_placement,
      log_device_placement=FLAGS.log_device_placement)
    sess = tf.Session(config=session_conf)
    with sess.as_default():
        cnn = TextCNN(
                    sequence_length=x_train.shape[1],
                    num_classes=6,
                    vocab_size=len(vocabulary),
                    #embedding_size=FLAGS.embedding_dim,
                    embedding_size = 400,
                    filter_sizes=map(int, FLAGS.filter_sizes.split(",")),
                    num_filters=FLAGS.num_filters,
                    l2_reg_lambda=FLAGS.l2_reg_lambda,
                    embedding_maxtrix = Word2V)

       # Define Training procedure
        global_step = tf.Variable(0, name="global_step", trainable=False)
        optimizer = tf.train.AdamOptimizer(1e-4)
        #optimizer = tf.train.GradientDescentOptimizer(0.1)
        #optimizer = tf.train.MomentumOptimizer(0.01, 0.9)
        grads_and_vars = optimizer.compute_gradients(cnn.loss)
        train_op = optimizer.apply_gradients(grads_and_vars, global_step=global_step)

# Keep track of gradient values and sparsity (optional)
        grad_summaries = []
Beispiel #29
0
print("Vocabulary Size: {:d}".format(len(vocab_processor.vocabulary_)))
print("Train/Dev split: {:d}/{:d}".format(len(y_train), len(y_dev)))

# Training
# ==================================================

with tf.Graph().as_default():
    session_conf = tf.ConfigProto(
        allow_soft_placement=FLAGS.allow_soft_placement,
        log_device_placement=FLAGS.log_device_placement)
    sess = tf.Session(config=session_conf)
    with sess.as_default():
        cnn = TextCNN(sequence_length=x_train.shape[1],
                      num_classes=y_train.shape[1],
                      vocab_size=len(vocab_processor.vocabulary_),
                      embedding_size=FLAGS.embedding_dim,
                      filter_sizes=list(map(int,
                                            FLAGS.filter_sizes.split(","))),
                      num_filters=FLAGS.num_filters,
                      l2_reg_lambda=FLAGS.l2_reg_lambda)

        # Define Training procedure
        global_step = tf.Variable(0, name="global_step", trainable=False)
        optimizer = tf.train.AdamOptimizer(.5)
        grads_and_vars = optimizer.compute_gradients(cnn.loss)
        train_op = optimizer.apply_gradients(grads_and_vars,
                                             global_step=global_step)

        # Keep track of gradient values and sparsity (optional)
        grad_summaries = []
        for g, v in grads_and_vars:
            if g is not None:
Beispiel #30
0
with tf.Graph().as_default():
    # A Graph contains operations and tensors.
    # Session is the environment you are executing graph operations in, and it contains state about Variables and queues.
    gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.7)
    sess = tf.Session(config=tf.ConfigProto(allow_soft_placement=True, gpu_options=gpu_options))
    with sess.as_default():
        cnn = TextCNN(
            sequence_length=x.shape[1],
            # the length of the sentence
            num_classes=FLAGS.num_classes,
            # how many classes as output
            vocab_size=len(vocabulary),
            # total vocabulary
            embedding_size=FLAGS.embedding_dim,
            # vector length
            filter_sizes=list(map(int, FLAGS.filter_sizes.split(","))),
            # map function: Apply function to every item of iterable and return a list of the results
            # k filters (or kernels) of size n x n x q
            # where nn is smaller than the dimension of the image and qq can either be the same as the number of channels rr or smaller and may vary for each kernel.
            # each map is then subsampled typically with mean or max pooling over p x pp x p contiguous regions
            # where p ranges between 2 for small images (e.g. MNIST) and is usually not more than 5 for larger inputs.
            num_filters=FLAGS.num_filters,
            # how many filters in one layer
            l2_reg_lambda=FLAGS.l2_reg_lambda)

        # Define Training procedure
        global_step = tf.Variable(0, name="global_step", trainable=False)
        # allow TensorFlow handle the counting of training steps for us
        optimizer = tf.train.AdamOptimizer(1e-3)
        grads_and_vars = optimizer.compute_gradients(cnn.loss)
        train_op = optimizer.apply_gradients(grads_and_vars, global_step=global_step)