def train(x_train, y_train, vocab_processor, x_dev, y_dev):
    # Training
    rnn = TextRNN(
        sequence_length=config.timesteps,
        num_classes=config.num_classes,
        vocab_size=len(vocab_processor.vocabulary_),
        embedding_size=config.embedding_dim,
        num_hidden=config.num_hidden,
        l2_reg_lambda=config.l2_reg_lambda,
        keep_prob=config.dropout_keep_prob,
        attention_size=config.attention_size
    )
    # Define training procedure
    optimizer = tf.train.AdamOptimizer(config.learning_rate)
    train_op = optimizer.minimize(rnn.loss)

    init = tf.global_variables_initializer()
    with tf.Session() as sess:
        # Initialize all variables
        sess.run(init)
        # Generate batches
        batches = data_helpers.batch_iter(
            list(zip(x_train, y_train)), config.batch_size, config.training_steps)

        eval_min_loss = 100
        early_stop_steps = 0
        # Train loop. For each batch ...
        for epoch, batch in enumerate(batches, 1):
            x_batch, y_batch = zip(*batch)
            feed_dict_train = {
                rnn.input_x: x_batch,
                rnn.input_y: y_batch,
            }
            _, loss_, accuracy_ = sess.run([train_op, rnn.loss, rnn.accuracy], feed_dict_train)
            if epoch % 50 == 0:
                print("epoch:{} loss {:g}, acc {:g}".format(epoch, loss_, accuracy_))

            if epoch % 100 == 0:
                print("\nEvaluation:")
                feed_dict_eval = {
                    rnn.input_x: x_dev,
                    rnn.input_y: y_dev,
                }
                loss_, accuracy_ = sess.run([rnn.loss, rnn.accuracy], feed_dict_eval)
                time_str = datetime.datetime.now().isoformat()
                print("{}: epoch {}, loss {:g}, acc {:g}\n".format(time_str, epoch, loss_, accuracy_))
                if loss_ < eval_min_loss:
                    eval_min_loss = loss_
                else:
                    early_stop_steps += 1

                if early_stop_steps == config.early_stop_steps:
                    print('eval loss no improvment, early stopped!!')
                    break
def train():
    train_data, DATA_SIZE, VOCAB_SIZE = dh.load_model()

    with tf.Graph().as_default() as session:
        session_conf = tf.ConfigProto(
            allow_soft_placement=FLAGS.allow_soft_placement,
            log_device_placement=FLAGS.log_device_placement)
        session_conf.gpu_options.allow_growth = FLAGS.gpu_options_allow_growth
        sess = tf.Session(config=session_conf)

        with sess.as_default():
            rnn = TextRNN(
                avg_seq_len = FLAGS.avg_seq_len,
                vocab_size=VOCAB_SIZE,
                hidden_size=,
                num_hidden_layers = FLAGS.num_hidden_layers,
                embedding_size=,
                l2_reg_lambda=FLAGS.l2_reg_lambda,
            )
        initializer = tf.random_uniform_initializer(-config.init_scale,
                                                config.init_scale)

        # Generate batches
        batches_train = dh

        with tf.variable_scope("model", reuse=None, initializer=initializer):
            m = Model.Model(is_training=True, config=config)

        tf.global_variables_initializer().run()

        model_saver = tf.train.Saver(tf.global_variables())

        for i in range(FLAGS.num_epochs):
            logging.info("Training Epoch: %d ..." % (i+1))
            train_perplexity = run_epoch(session, m, train_data, m.train_op)
            logging.info("Epoch: %d Train Perplexity: %.3f" % (i + 1, train_perplexity))

            if (i+1) % config.save_freq == 0:
                print 'model saving ...'
                model_saver.save(session, config.model_path+'-%d'%(i+1))
                print 'Done!'
Beispiel #3
0
CORPUS_DIR = '../data'

epochs = 1
EMBEDDING_SIZE = 768
batch_size = 64
CORPUS_DIR = '../data/'

print('Loading data...')
# Load data and labels
data = np.load("../npy/data.npy")
labels = np.load("../npy/labels.npy")
embeddings = np.load("../npy/embeddings.npy")
num_words = len(np.load("../npy/embeddings.npy"))

print('Build model...')
model = TextRNN(embedding_matrix=embeddings, maxlen=data.shape[1], max_features=num_words,
                embedding_dims=EMBEDDING_SIZE, class_num=labels.shape[1]).get_model()

model.compile('adam', 'categorical_crossentropy', metrics=['accuracy'])

print('Train...')
model.fit(data, labels, batch_size=batch_size, epochs=epochs, verbose=1)

flatten_layer = K.function([model.get_layer("input").input, K.learning_phase()], [model.get_layer("lstm").output])
flatten_layer_vec = flatten_layer([data, 0])[0]
print(flatten_layer_vec)
with open("rnn.txt", "w", encoding="utf-8") as f:
    for i, j in enumerate(flatten_layer_vec):
        for k in j:
            f.write(str(k)+",")
        f.write(str(list(np.nonzero(labels[i]))[0][0]))
        f.write("\n")
Beispiel #4
0
# Parameters
learning_rate = 0.01
training_iters = 1000
batch_size = 64
display_step = 10
keep = 0.5

# Network Parameters
n_input = 1
n_steps = 56  # Sentence length
n_hidden = 256  # hidden layer num of features
n_classes = 2
embedding_size = 128

# Load model
myModel = TextRNN(learning_rate, n_input, n_steps, n_hidden, n_classes,
                  embedding_size, vocab_size, True, final_embeddings)

# Initializing the variables
init = tf.initialize_all_variables()

# Launch the graph
with tf.Session() as sess:

    #Writing Directory information
    timestamp = str(int(time.time()))
    out_dir = os.path.abspath(
        os.path.join(os.path.curdir, "rnn_runs", timestamp))
    print("Writing to {}\n".format(out_dir))
    checkpoint_dir = os.path.abspath(os.path.join(out_dir, "checkpoints"))
    checkpoint_prefix = os.path.join(checkpoint_dir, "model")
    if not os.path.exists(checkpoint_dir):
Beispiel #5
0
lengths_train, lengths_dev = lengths_shuffled[:-1000], lengths_shuffled[-1000:]
print("Vocabulary Size: {:d}".format(len(vocab_processor.vocabulary_)))
print("Train/Dev split: {:d}/{:d}".format(len(y_train), len(y_dev)))

# Training
# ==================================================

with tf.Graph().as_default():
    session_conf = tf.ConfigProto(
        allow_soft_placement=FLAGS.allow_soft_placement,
        log_device_placement=FLAGS.log_device_placement)
    sess = tf.Session(config=session_conf)
    with sess.as_default():
        rnn = TextRNN(max_seq_length=x_train.shape[1],
                      num_classes=FLAGS.num_classes,
                      vocab_size=len(vocab_processor.vocabulary_),
                      embedding_size=FLAGS.embedding_dim,
                      num_lstm_layers=FLAGS.num_lstm_layers,
                      l2_reg_lambda=FLAGS.l2_reg_lambda)

        # Define Training procedure
        global_step = tf.Variable(0, name="global_step", trainable=False)
        optimizer = tf.train.AdamOptimizer(1e-3)
        grads_and_vars = optimizer.compute_gradients(rnn.losses)
        train_op = optimizer.apply_gradients(grads_and_vars,
                                             global_step=global_step)

        # Keep track of gradient values and sparsity (optional)
        grad_summaries = []
        for g, v in grads_and_vars:
            if g is not None:
                grad_hist_summary = tf.histogram_summary(
print("Vocabulary Size: {:d}".format(len(vocab_processor.vocabulary_)))
print("Train/Dev split: {:d}/{:d}".format(len(y_train), len(y_dev))) # 打印切分的比例

# Training
# ==================================================

with tf.Graph().as_default():
    session_conf = tf.ConfigProto(
      allow_soft_placement=FLAGS.allow_soft_placement,
      log_device_placement=FLAGS.log_device_placement)
    sess = tf.Session(config=session_conf)
    with sess.as_default():
        rnn = TextRNN(
            sequence_length=x.shape[1],
            num_classes=y.shape[1],
            vocab_size=len(vocab_processor.vocabulary_), # 计算单词的数目
            embedding_size=FLAGS.embedding_dim,
            l2_reg_lambda=FLAGS.l2_reg_lambda,
            hidden_dim=FLAGS.hidden_dim,
            num_layers=FLAGS.num_layers)

        # Define Training procedure
        global_step = tf.Variable(0, name="global_step", trainable=False) # 首先将step定义为变量初始化为0
        optimizer = tf.train.AdamOptimizer(1e-3)# 定义优化器使用adam优化
        grads_and_vars = optimizer.compute_gradients(rnn.loss) # 将使用卷积神经网络计算出来的损失函数最小化。 该方法会返回list[(gradients,variable)]
        train_op = optimizer.apply_gradients(grads_and_vars, global_step=global_step) # 使用优化器更新参数,每进行一次参数更新就加一次global step

        # Keep track of gradient values and sparsity (optional)
        grad_summaries = []
        # 每一步都存参数,tensorboard可以看
        for g, v in grads_and_vars:
            if g is not None:
Beispiel #7
0
print("Vocabulary Size: {:d}".format(len(vocab_processor.vocabulary_)))
print("Train/Dev split: {:d}/{:d}".format(len(y_train), len(y_dev)))

# Training
# ==================================================

with tf.Graph().as_default():
    session_conf = tf.ConfigProto(
        allow_soft_placement=FLAGS.allow_soft_placement,
        log_device_placement=FLAGS.log_device_placement)
    sess = tf.Session(config=session_conf)
    print("squence length is %d" % x_train.shape[1])
    with sess.as_default():
        rnn = TextRNN(sequence_length=x_train.shape[1],
                      num_classes=2,
                      vocab_size=len(vocab_processor.vocabulary_),
                      embedding_size=FLAGS.embedding_dim,
                      cell_size=FLAGS.cell_size)

        # Define Training procedure
        global_step = tf.Variable(0, name="global_step", trainable=False)
        optimizer = tf.train.AdamOptimizer(1e-4)
        grads_and_vars = optimizer.compute_gradients(rnn.loss)
        train_op = optimizer.apply_gradients(grads_and_vars,
                                             global_step=global_step)

        # Keep track of gradient values and sparsity (optional)
        grad_summaries = []
        for g, v in grads_and_vars:
            if g is not None:
                grad_hist_summary = tf.histogram_summary(
Beispiel #8
0
def train(x_train, y_train, vocab_processor, x_dev, y_dev):
    # Training
    # ==================================================

    with tf.Graph().as_default():
        session_conf = tf.ConfigProto(
            allow_soft_placement=FLAGS.allow_soft_placement,
            log_device_placement=FLAGS.log_device_placement)
        sess = tf.Session(config=session_conf)
        with sess.as_default():
            rnn = TextRNN(sequence_length=x_train.shape[1],
                          num_classes=y_train.shape[1],
                          vocab_size=len(vocab_processor.vocabulary_),
                          embedding_size=FLAGS.embedding_dim,
                          num_hidden=FLAGS.num_hidden,
                          batch_size=FLAGS.batch_size,
                          init_state=FLAGS.init_state,
                          cell_type=FLAGS.cell_type)

            # Define Training procedure
            global_step = tf.Variable(0, name="global_step", trainable=False)
            optimizer = tf.train.AdamOptimizer(1e-3)
            grads_and_vars = optimizer.compute_gradients(rnn.loss)
            train_op = optimizer.apply_gradients(grads_and_vars,
                                                 global_step=global_step)

            # Keep track of gradient values and sparsity (optional)
            grad_summaries = []
            for g, v in grads_and_vars:
                if g is not None:
                    grad_hist_summary = tf.summary.histogram(
                        "{}/grad/hist".format(v.name), g)
                    sparsity_summary = tf.summary.scalar(
                        "{}/grad/sparsity".format(v.name),
                        tf.nn.zero_fraction(g))
                    grad_summaries.append(grad_hist_summary)
                    grad_summaries.append(sparsity_summary)
            grad_summaries_merged = tf.summary.merge(grad_summaries)

            # Output directory for models and summaries
            timestamp = str(int(time.time()))
            out_dir = os.path.abspath(
                os.path.join(os.path.curdir, "runs", timestamp))
            print("Writing to {}\n".format(out_dir))

            # Summaries for loss and accuracy
            loss_summary = tf.summary.scalar("loss", rnn.loss)
            acc_summary = tf.summary.scalar("accuracy", rnn.accuracy)

            # Train Summaries
            train_summary_op = tf.summary.merge(
                [loss_summary, acc_summary, grad_summaries_merged])
            train_summary_dir = os.path.join(out_dir, "summaries", "train")
            train_summary_writer = tf.summary.FileWriter(
                train_summary_dir, sess.graph)

            # Dev summaries
            dev_summary_op = tf.summary.merge([loss_summary, acc_summary])
            dev_summary_dir = os.path.join(out_dir, "summaries", "dev")
            dev_summary_writer = tf.summary.FileWriter(dev_summary_dir,
                                                       sess.graph)

            # Checkpoint directory. Tensorflow assumes this directory already exists so we need to create it
            checkpoint_dir = os.path.abspath(
                os.path.join(out_dir, "checkpoints"))
            checkpoint_prefix = os.path.join(checkpoint_dir, "model")
            if not os.path.exists(checkpoint_dir):
                os.makedirs(checkpoint_dir)
            saver = tf.train.Saver(tf.global_variables(),
                                   max_to_keep=FLAGS.num_checkpoints)

            # Write vocabulary
            vocab_processor.save(os.path.join(out_dir, "vocab"))

            # Initialize all variables
            sess.run(tf.global_variables_initializer())

            def train_step(x_batch, y_batch):
                """
                A single training step
                """
                feed_dict = {
                    rnn.input_x: x_batch,
                    rnn.input_y: y_batch,
                    rnn.dropout_keep_prob: FLAGS.dropout_keep_prob
                }
                _, step, summaries, loss, accuracy = sess.run([
                    train_op, global_step, train_summary_op, rnn.loss,
                    rnn.accuracy
                ], feed_dict)
                time_str = datetime.datetime.now().isoformat()
                print("{}: step {}, loss {:g}, acc {:g}".format(
                    time_str, step, loss, accuracy))
                train_summary_writer.add_summary(summaries, step)

            def dev_step(x_batch, y_batch):
                """
                Evaluates model on a dev set
                """
                feed_dict = {
                    rnn.input_x: x_batch,
                    rnn.input_y: y_batch,
                    rnn.dropout_keep_prob: 1.0
                }
                step, summaries, loss, accuracy = sess.run(
                    [global_step, dev_summary_op, rnn.loss, rnn.accuracy],
                    feed_dict)
                time_str = datetime.datetime.now().isoformat()
                print("{}: step {}, loss {:g}, acc {:g}".format(
                    time_str, step, loss, accuracy))
                if writer:
                    writer.add_summary(summaries, step)

            def my_debugging():
                # Generate batches
                batches = data_helpers.batch_iter(list(zip(x_train, y_train)),
                                                  FLAGS.batch_size,
                                                  FLAGS.num_epochs)
                # Training loop. For each batch...
                for batch in batches:
                    x_batch, y_batch = zip(*batch)
                    feed_dict = {
                        rnn.input_x: x_batch,
                        rnn.input_y: y_batch,
                        rnn.dropout_keep_prob: FLAGS.dropout_keep_prob
                    }
                    # print(np.shape(x_batch))
                    _input_x, _embedded_words, _outputs, _scores, _input_y = sess.run(
                        [
                            rnn.input_x, rnn.embedded_words, rnn.outputs,
                            rnn.scores, rnn.input_y
                        ], feed_dict)
                    # print(np.shape(_input_x), '_input_x: ', _input_x)
                    # print(np.shape(_embedded_words), '_embedd_words: ', _embedded_words)
                    # print(np.shape(_outputs), '_outputs', _outputs)
                    # print(np.shape(_scores), '_scores', _scores)
                    # print(np.shape(_input_y), '_input_y', _input_y)

                    print('----- print shape -----')
                    print(np.shape(x_batch), 'x_batch')
                    print(np.shape(_input_x), '_input_x')
                    print(np.shape(_embedded_words), '_embedded_words')
                    print(np.shape(_outputs), '_outputs')
                    print(np.shape(_scores), '_scores')
                    print(np.shape(_input_y), '_input_y')
                    return 0

            def do_train():
                # Generate batches
                batches = data_helpers.batch_iter(list(zip(x_train, y_train)),
                                                  FLAGS.batch_size,
                                                  FLAGS.num_epochs)

                ## Use Initial State
                if FLAGS.init_state is True:
                    dev_batches = data_helpers.batch_iter(
                        list(zip(x_dev, y_dev)), FLAGS.batch_size,
                        FLAGS.num_epochs)

                # Training loop. For each batch...
                for batch in batches:
                    if FLAGS.init_state is True:
                        if len(batch) != FLAGS.batch_size:
                            continue

                    x_batch, y_batch = zip(*batch)
                    train_step(x_batch, y_batch)
                    current_step = tf.train.global_step(sess, global_step)
                    if current_step % FLAGS.evaluate_every == 0:
                        print("\nEvaluation:")
                        ## Use Initial State
                        if FLAGS.init_state is True:
                            for dev_batch in dev_batches:
                                if len(dev_batch) != FLAGS.batch_size:
                                    continue
                                x_dev_batch, y_dev_batch = zip(*dev_batch)
                                dev_step(x_dev_batch,
                                         y_dev_batch,
                                         writer=dev_summary_writer)
                        else:
                            ## Do Not Use Initial State
                            dev_step(x_dev, y_dev, writer=dev_summary_writer)
                        print("")
                    if current_step % FLAGS.checkpoint_every == 0:
                        path = saver.save(sess,
                                          checkpoint_prefix,
                                          global_step=current_step)
                        print("Saved model checkpoint to {}\n".format(path))

            # do
            # my_debugging()
            do_train()
Beispiel #9
0
 elif FLAGS.using_nn_type == 'textcnn':
     nn = TextCNN(model_type=FLAGS.model_type,
                  sequence_length=x_train.shape[1],
                  num_classes=y_train.shape[1],
                  vocab_size=len(vocab_processor.vocabulary_),
                  embedding_size=embedding_dimension,
                  filter_sizes=list(
                      map(int, FLAGS.filter_sizes.split(","))),
                  num_filters=FLAGS.num_filters,
                  l2_reg_lambda=FLAGS.l2_reg_lambda)
 elif FLAGS.using_nn_type == 'textrnn':
     nn = TextRNN(
         model_type=FLAGS.model_type,
         sequence_length=x_train.shape[1],
         num_classes=y_train.shape[1],
         vocab_size=len(vocab_processor.vocabulary_),
         embedding_size=embedding_dimension,
         rnn_size=FLAGS.rnn_size,
         num_layers=FLAGS.num_rnn_layers,
         # batch_size=FLAGS.batch_size,
         l2_reg_lambda=FLAGS.l2_reg_lambda)
 elif FLAGS.using_nn_type == 'textbirnn':
     nn = TextBiRNN(
         model_type=FLAGS.model_type,
         sequence_length=x_train.shape[1],
         num_classes=y_train.shape[1],
         vocab_size=len(vocab_processor.vocabulary_),
         embedding_size=embedding_dimension,
         rnn_size=FLAGS.rnn_size,
         num_layers=FLAGS.num_rnn_layers,
         # batch_size=FLAGS.batch_size,
         l2_reg_lambda=FLAGS.l2_reg_lambda)
def train_rnn():
    """Training RNN model."""
    # Print parameters used for the model
    dh.tab_printer(args, logger)

    # Load sentences, labels, and training parameters
    logger.info("Loading data...")
    logger.info("Data processing...")
    train_data = dh.load_data_and_labels(args.train_file,
                                         args.num_classes,
                                         args.word2vec_file,
                                         data_aug_flag=False)
    val_data = dh.load_data_and_labels(args.validation_file,
                                       args.num_classes,
                                       args.word2vec_file,
                                       data_aug_flag=False)

    logger.info("Data padding...")
    x_train, y_train = dh.pad_data(train_data, args.pad_seq_len)
    x_val, y_val = dh.pad_data(val_data, args.pad_seq_len)

    # Build vocabulary
    VOCAB_SIZE, EMBEDDING_SIZE, pretrained_word2vec_matrix = dh.load_word2vec_matrix(
        args.word2vec_file)

    # Build a graph and rnn object
    with tf.Graph().as_default():
        session_conf = tf.ConfigProto(
            allow_soft_placement=args.allow_soft_placement,
            log_device_placement=args.log_device_placement)
        session_conf.gpu_options.allow_growth = args.gpu_options_allow_growth
        sess = tf.Session(config=session_conf)
        with sess.as_default():
            rnn = TextRNN(sequence_length=args.pad_seq_len,
                          vocab_size=VOCAB_SIZE,
                          embedding_type=args.embedding_type,
                          embedding_size=EMBEDDING_SIZE,
                          lstm_hidden_size=args.lstm_dim,
                          fc_hidden_size=args.fc_dim,
                          num_classes=args.num_classes,
                          l2_reg_lambda=args.l2_lambda,
                          pretrained_embedding=pretrained_word2vec_matrix)

            # Define training procedure
            with tf.control_dependencies(
                    tf.get_collection(tf.GraphKeys.UPDATE_OPS)):
                learning_rate = tf.train.exponential_decay(
                    learning_rate=args.learning_rate,
                    global_step=rnn.global_step,
                    decay_steps=args.decay_steps,
                    decay_rate=args.decay_rate,
                    staircase=True)
                optimizer = tf.train.AdamOptimizer(learning_rate)
                grads, vars = zip(*optimizer.compute_gradients(rnn.loss))
                grads, _ = tf.clip_by_global_norm(grads,
                                                  clip_norm=args.norm_ratio)
                train_op = optimizer.apply_gradients(
                    zip(grads, vars),
                    global_step=rnn.global_step,
                    name="train_op")

            # Keep track of gradient values and sparsity (optional)
            grad_summaries = []
            for g, v in zip(grads, vars):
                if g is not None:
                    grad_hist_summary = tf.summary.histogram(
                        "{0}/grad/hist".format(v.name), g)
                    sparsity_summary = tf.summary.scalar(
                        "{0}/grad/sparsity".format(v.name),
                        tf.nn.zero_fraction(g))
                    grad_summaries.append(grad_hist_summary)
                    grad_summaries.append(sparsity_summary)
            grad_summaries_merged = tf.summary.merge(grad_summaries)

            # Output directory for models and summaries
            out_dir = dh.get_out_dir(OPTION, logger)
            checkpoint_dir = os.path.abspath(
                os.path.join(out_dir, "checkpoints"))
            best_checkpoint_dir = os.path.abspath(
                os.path.join(out_dir, "bestcheckpoints"))

            # Summaries for loss
            loss_summary = tf.summary.scalar("loss", rnn.loss)

            # Train summaries
            train_summary_op = tf.summary.merge(
                [loss_summary, grad_summaries_merged])
            train_summary_dir = os.path.join(out_dir, "summaries", "train")
            train_summary_writer = tf.summary.FileWriter(
                train_summary_dir, sess.graph)

            # Validation summaries
            validation_summary_op = tf.summary.merge([loss_summary])
            validation_summary_dir = os.path.join(out_dir, "summaries",
                                                  "validation")
            validation_summary_writer = tf.summary.FileWriter(
                validation_summary_dir, sess.graph)

            saver = tf.train.Saver(tf.global_variables(),
                                   max_to_keep=args.num_checkpoints)
            best_saver = cm.BestCheckpointSaver(save_dir=best_checkpoint_dir,
                                                num_to_keep=3,
                                                maximize=True)

            if OPTION == 'R':
                # Load rnn model
                logger.info("Loading model...")
                checkpoint_file = tf.train.latest_checkpoint(checkpoint_dir)
                logger.info(checkpoint_file)

                # Load the saved meta graph and restore variables
                saver = tf.train.import_meta_graph(
                    "{0}.meta".format(checkpoint_file))
                saver.restore(sess, checkpoint_file)
            if OPTION == 'T':
                if not os.path.exists(checkpoint_dir):
                    os.makedirs(checkpoint_dir)
                sess.run(tf.global_variables_initializer())
                sess.run(tf.local_variables_initializer())

                # Embedding visualization config
                config = projector.ProjectorConfig()
                embedding_conf = config.embeddings.add()
                embedding_conf.tensor_name = "embedding"
                embedding_conf.metadata_path = args.metadata_file

                projector.visualize_embeddings(train_summary_writer, config)
                projector.visualize_embeddings(validation_summary_writer,
                                               config)

                # Save the embedding visualization
                saver.save(
                    sess, os.path.join(out_dir, "embedding", "embedding.ckpt"))

            current_step = sess.run(rnn.global_step)

            def train_step(x_batch, y_batch):
                """A single training step"""
                feed_dict = {
                    rnn.input_x: x_batch,
                    rnn.input_y: y_batch,
                    rnn.dropout_keep_prob: args.dropout_rate,
                    rnn.is_training: True
                }
                _, step, summaries, loss = sess.run(
                    [train_op, rnn.global_step, train_summary_op, rnn.loss],
                    feed_dict)
                logger.info("step {0}: loss {1:g}".format(step, loss))
                train_summary_writer.add_summary(summaries, step)

            def validation_step(x_val, y_val, writer=None):
                """Evaluates model on a validation set"""
                batches_validation = dh.batch_iter(list(zip(x_val, y_val)),
                                                   args.batch_size, 1)

                # Predict classes by threshold or topk ('ts': threshold; 'tk': topk)
                eval_counter, eval_loss = 0, 0.0

                eval_pre_tk = [0.0] * args.topK
                eval_rec_tk = [0.0] * args.topK
                eval_F1_tk = [0.0] * args.topK

                true_onehot_labels = []
                predicted_onehot_scores = []
                predicted_onehot_labels_ts = []
                predicted_onehot_labels_tk = [[] for _ in range(args.topK)]

                for batch_validation in batches_validation:
                    x_batch_val, y_batch_val = zip(*batch_validation)
                    feed_dict = {
                        rnn.input_x: x_batch_val,
                        rnn.input_y: y_batch_val,
                        rnn.dropout_keep_prob: 1.0,
                        rnn.is_training: False
                    }
                    step, summaries, scores, cur_loss = sess.run([
                        rnn.global_step, validation_summary_op, rnn.scores,
                        rnn.loss
                    ], feed_dict)

                    # Prepare for calculating metrics
                    for i in y_batch_val:
                        true_onehot_labels.append(i)
                    for j in scores:
                        predicted_onehot_scores.append(j)

                    # Predict by threshold
                    batch_predicted_onehot_labels_ts = \
                        dh.get_onehot_label_threshold(scores=scores, threshold=args.threshold)

                    for k in batch_predicted_onehot_labels_ts:
                        predicted_onehot_labels_ts.append(k)

                    # Predict by topK
                    for top_num in range(args.topK):
                        batch_predicted_onehot_labels_tk = dh.get_onehot_label_topk(
                            scores=scores, top_num=top_num + 1)

                        for i in batch_predicted_onehot_labels_tk:
                            predicted_onehot_labels_tk[top_num].append(i)

                    eval_loss = eval_loss + cur_loss
                    eval_counter = eval_counter + 1

                    if writer:
                        writer.add_summary(summaries, step)

                eval_loss = float(eval_loss / eval_counter)

                # Calculate Precision & Recall & F1
                eval_pre_ts = precision_score(
                    y_true=np.array(true_onehot_labels),
                    y_pred=np.array(predicted_onehot_labels_ts),
                    average='micro')
                eval_rec_ts = recall_score(
                    y_true=np.array(true_onehot_labels),
                    y_pred=np.array(predicted_onehot_labels_ts),
                    average='micro')
                eval_F1_ts = f1_score(
                    y_true=np.array(true_onehot_labels),
                    y_pred=np.array(predicted_onehot_labels_ts),
                    average='micro')

                for top_num in range(args.topK):
                    eval_pre_tk[top_num] = precision_score(
                        y_true=np.array(true_onehot_labels),
                        y_pred=np.array(predicted_onehot_labels_tk[top_num]),
                        average='micro')
                    eval_rec_tk[top_num] = recall_score(
                        y_true=np.array(true_onehot_labels),
                        y_pred=np.array(predicted_onehot_labels_tk[top_num]),
                        average='micro')
                    eval_F1_tk[top_num] = f1_score(
                        y_true=np.array(true_onehot_labels),
                        y_pred=np.array(predicted_onehot_labels_tk[top_num]),
                        average='micro')

                # Calculate the average AUC
                eval_auc = roc_auc_score(
                    y_true=np.array(true_onehot_labels),
                    y_score=np.array(predicted_onehot_scores),
                    average='micro')
                # Calculate the average PR
                eval_prc = average_precision_score(
                    y_true=np.array(true_onehot_labels),
                    y_score=np.array(predicted_onehot_scores),
                    average='micro')

                return eval_loss, eval_auc, eval_prc, eval_pre_ts, eval_rec_ts, eval_F1_ts, \
                       eval_pre_tk, eval_rec_tk, eval_F1_tk

            # Generate batches
            batches_train = dh.batch_iter(list(zip(x_train, y_train)),
                                          args.batch_size, args.epochs)

            num_batches_per_epoch = int(
                (len(x_train) - 1) / args.batch_size) + 1

            # Training loop. For each batch...
            for batch_train in batches_train:
                x_batch_train, y_batch_train = zip(*batch_train)
                train_step(x_batch_train, y_batch_train)
                current_step = tf.train.global_step(sess, rnn.global_step)

                if current_step % args.evaluate_steps == 0:
                    logger.info("\nEvaluation:")
                    eval_loss, eval_auc, eval_prc, \
                    eval_pre_ts, eval_rec_ts, eval_F1_ts, eval_pre_tk, eval_rec_tk, eval_F1_tk = \
                        validation_step(x_val, y_val, writer=validation_summary_writer)

                    logger.info(
                        "All Validation set: Loss {0:g} | AUC {1:g} | AUPRC {2:g}"
                        .format(eval_loss, eval_auc, eval_prc))

                    # Predict by threshold
                    logger.info(
                        "Predict by threshold: Precision {0:g}, Recall {1:g}, F1 {2:g}"
                        .format(eval_pre_ts, eval_rec_ts, eval_F1_ts))

                    # Predict by topK
                    logger.info("Predict by topK:")
                    for top_num in range(args.topK):
                        logger.info(
                            "Top{0}: Precision {1:g}, Recall {2:g}, F1 {3:g}".
                            format(top_num + 1, eval_pre_tk[top_num],
                                   eval_rec_tk[top_num], eval_F1_tk[top_num]))
                    best_saver.handle(eval_prc, sess, current_step)
                if current_step % args.checkpoint_steps == 0:
                    checkpoint_prefix = os.path.join(checkpoint_dir, "model")
                    path = saver.save(sess,
                                      checkpoint_prefix,
                                      global_step=current_step)
                    logger.info("Saved model checkpoint to {0}\n".format(path))
                if current_step % num_batches_per_epoch == 0:
                    current_epoch = current_step // num_batches_per_epoch
                    logger.info(
                        "Epoch {0} has finished!".format(current_epoch))

    logger.info("All Done.")
def train_rnn():
    """Training RNN model."""

    # Load sentences, labels, and training parameters
    logger.info('✔︎ Loading data...')

    logger.info('✔︎ Training data processing...')
    train_data = data_helpers.load_data_and_labels(FLAGS.training_data_file,
                                                   FLAGS.num_classes,
                                                   FLAGS.embedding_dim)

    logger.info('✔︎ Validation data processing...')
    validation_data = \
        data_helpers.load_data_and_labels(FLAGS.validation_data_file, FLAGS.num_classes, FLAGS.embedding_dim)

    logger.info('Recommand padding Sequence length is: {}'.format(
        FLAGS.pad_seq_len))

    logger.info('✔︎ Training data padding...')
    x_train, y_train = data_helpers.pad_data(train_data, FLAGS.pad_seq_len)

    logger.info('✔︎ Validation data padding...')
    x_validation, y_validation = data_helpers.pad_data(validation_data,
                                                       FLAGS.pad_seq_len)

    y_validation_bind = validation_data.labels_bind

    # Build vocabulary
    VOCAB_SIZE = data_helpers.load_vocab_size(FLAGS.embedding_dim)
    pretrained_word2vec_matrix = data_helpers.load_word2vec_matrix(
        VOCAB_SIZE, FLAGS.embedding_dim)

    # Build a graph and rnn object
    with tf.Graph().as_default():
        session_conf = tf.ConfigProto(
            allow_soft_placement=FLAGS.allow_soft_placement,
            log_device_placement=FLAGS.log_device_placement)
        session_conf.gpu_options.allow_growth = FLAGS.gpu_options_allow_growth
        sess = tf.Session(config=session_conf)
        with sess.as_default():
            rnn = TextRNN(sequence_length=FLAGS.pad_seq_len,
                          num_classes=FLAGS.num_classes,
                          vocab_size=VOCAB_SIZE,
                          hidden_size=FLAGS.hidden_size,
                          fc_hidden_size=FLAGS.fc_hidden_size,
                          embedding_size=FLAGS.embedding_dim,
                          embedding_type=FLAGS.embedding_type,
                          l2_reg_lambda=FLAGS.l2_reg_lambda,
                          pretrained_embedding=pretrained_word2vec_matrix)

            # Define Training procedure
            # learning_rate = tf.train.exponential_decay(learning_rate=FLAGS.learning_rate, global_step=cnn.global_step,
            #                                            decay_steps=FLAGS.decay_steps, decay_rate=FLAGS.decay_rate,
            #                                            staircase=True)
            optimizer = tf.train.AdamOptimizer(FLAGS.learning_rate)
            grads_and_vars = optimizer.compute_gradients(rnn.loss)
            train_op = optimizer.apply_gradients(grads_and_vars,
                                                 global_step=rnn.global_step,
                                                 name="train_op")

            # Keep track of gradient values and sparsity (optional)
            grad_summaries = []
            for g, v in grads_and_vars:
                if g is not None:
                    grad_hist_summary = tf.summary.histogram(
                        "{}/grad/hist".format(v.name), g)
                    sparsity_summary = tf.summary.scalar(
                        "{}/grad/sparsity".format(v.name),
                        tf.nn.zero_fraction(g))
                    grad_summaries.append(grad_hist_summary)
                    grad_summaries.append(sparsity_summary)
            grad_summaries_merged = tf.summary.merge(grad_summaries)

            # Output directory for models and summaries
            if FLAGS.train_or_restore == 'R':
                MODEL = input(
                    "☛ Please input the checkpoints model you want to restore, "
                    "it should be like(1490175368): "
                )  # The model you want to restore

                while not (MODEL.isdigit() and len(MODEL) == 10):
                    MODEL = input(
                        '✘ The format of your input is illegal, please re-input: '
                    )
                logger.info(
                    '✔︎ The format of your input is legal, now loading to next step...'
                )

                checkpoint_dir = 'runs/' + MODEL + '/checkpoints/'

                out_dir = os.path.abspath(
                    os.path.join(os.path.curdir, "runs", MODEL))
                logger.info("✔︎ Writing to {}\n".format(out_dir))
            else:
                timestamp = str(int(time.time()))
                out_dir = os.path.abspath(
                    os.path.join(os.path.curdir, "runs", timestamp))
                logger.info("✔︎ Writing to {}\n".format(out_dir))

            # Summaries for loss and accuracy
            loss_summary = tf.summary.scalar("loss", rnn.loss)
            # acc_summary = tf.summary.scalar("accuracy", rnn.accuracy)

            # Train Summaries
            train_summary_op = tf.summary.merge(
                [loss_summary, grad_summaries_merged])
            train_summary_dir = os.path.join(out_dir, "summaries", "train")
            train_summary_writer = tf.summary.FileWriter(
                train_summary_dir, sess.graph)

            # Validation summaries
            validation_summary_op = tf.summary.merge([loss_summary])
            validation_summary_dir = os.path.join(out_dir, "summaries",
                                                  "validation")
            validation_summary_writer = tf.summary.FileWriter(
                validation_summary_dir, sess.graph)

            saver = tf.train.Saver(tf.global_variables(),
                                   max_to_keep=FLAGS.num_checkpoints)

            if FLAGS.train_or_restore == 'R':
                # Load rnn model
                logger.info("✔ Loading model...")
                checkpoint_file = tf.train.latest_checkpoint(checkpoint_dir)
                logger.info(checkpoint_file)

                # Load the saved meta graph and restore variables
                saver = tf.train.import_meta_graph(
                    "{}.meta".format(checkpoint_file))
                saver.restore(sess, checkpoint_file)
            else:
                checkpoint_dir = os.path.abspath(
                    os.path.join(out_dir, "checkpoints"))
                if not os.path.exists(checkpoint_dir):
                    os.makedirs(checkpoint_dir)
                sess.run(tf.global_variables_initializer())
                sess.run(tf.local_variables_initializer())

            current_step = sess.run(rnn.global_step)

            def train_step(x_batch, y_batch):
                """A single training step"""
                feed_dict = {
                    rnn.input_x: x_batch,
                    rnn.input_y: y_batch,
                    rnn.dropout_keep_prob: FLAGS.dropout_keep_prob,
                    rnn.is_training: True
                }
                _, step, summaries, loss = sess.run(
                    [train_op, rnn.global_step, train_summary_op, rnn.loss],
                    feed_dict)
                time_str = datetime.datetime.now().isoformat()
                logger.info("{}: step {}, loss {:g}".format(
                    time_str, step, loss))
                train_summary_writer.add_summary(summaries, step)

            def validation_step(x_validation,
                                y_validation,
                                y_validation_bind,
                                writer=None):
                """Evaluates model on a validation set"""
                batches_validation = data_helpers.batch_iter(
                    list(zip(x_validation, y_validation, y_validation_bind)),
                    FLAGS.batch_size, FLAGS.num_epochs)
                eval_loss, eval_rec, eval_acc, eval_counter = 0.0, 0.0, 0.0, 0
                for batch_validation in batches_validation:
                    x_batch_validation, y_batch_validation, y_batch_validation_bind = zip(
                        *batch_validation)
                    feed_dict = {
                        rnn.input_x: x_batch_validation,
                        rnn.input_y: y_batch_validation,
                        rnn.dropout_keep_prob: 1.0,
                        rnn.is_training: False
                    }
                    step, summaries, logits, cur_loss = sess.run([
                        rnn.global_step, validation_summary_op, rnn.logits,
                        rnn.loss
                    ], feed_dict)

                    if FLAGS.use_classbind_or_not == 'Y':
                        predicted_labels = data_helpers.get_label_using_logits_and_classbind(
                            logits,
                            y_batch_validation_bind,
                            top_number=FLAGS.top_num)
                    if FLAGS.use_classbind_or_not == 'N':
                        predicted_labels = data_helpers.get_label_using_logits(
                            logits, top_number=FLAGS.top_num)

                    cur_rec, cur_acc = 0.0, 0.0
                    for index, predicted_label in enumerate(predicted_labels):
                        rec_inc, acc_inc = data_helpers.cal_rec_and_acc(
                            predicted_label, y_batch_validation[index])
                        cur_rec, cur_acc = cur_rec + rec_inc, cur_acc + acc_inc

                    cur_rec = cur_rec / len(y_batch_validation)
                    cur_acc = cur_acc / len(y_batch_validation)

                    eval_loss, eval_rec, eval_acc, eval_counter = eval_loss + cur_loss, eval_rec + cur_rec, \
                                                                  eval_acc + cur_acc, eval_counter + 1
                    logger.info("✔︎ validation batch {} finished.".format(
                        eval_counter))

                    if writer:
                        writer.add_summary(summaries, step)

                eval_loss = float(eval_loss / eval_counter)
                eval_rec = float(eval_rec / eval_counter)
                eval_acc = float(eval_acc / eval_counter)

                return eval_loss, eval_rec, eval_acc

            # Generate batches
            batches_train = data_helpers.batch_iter(
                list(zip(x_train, y_train)), FLAGS.batch_size,
                FLAGS.num_epochs)

            # Training loop. For each batch...
            for batch_train in batches_train:
                x_batch_train, y_batch_train = zip(*batch_train)
                train_step(x_batch_train, y_batch_train)
                current_step = tf.train.global_step(sess, rnn.global_step)

                if current_step % FLAGS.evaluate_every == 0:
                    logger.info("\nEvaluation:")
                    eval_loss, eval_rec, eval_acc = validation_step(
                        x_validation,
                        y_validation,
                        y_validation_bind,
                        writer=validation_summary_writer)
                    time_str = datetime.datetime.now().isoformat()
                    logger.info(
                        "{}: step {}, loss {:g}, rec {:g}, acc {:g}".format(
                            time_str, current_step, eval_loss, eval_rec,
                            eval_acc))

                if current_step % FLAGS.checkpoint_every == 0:
                    checkpoint_prefix = os.path.join(checkpoint_dir, "model")
                    path = saver.save(sess,
                                      checkpoint_prefix,
                                      global_step=current_step)
                    logger.info(
                        "✔︎ Saved model checkpoint to {}\n".format(path))

    logger.info("✔︎ Done.")
Beispiel #12
0
import tensorflow as tf
from text_rnn import TextRNN

sess = tf.Session()
new_saver = tf.train.import_meta_graph(
    './10000examples_lr0.001_epochs100/rnn.meta')
new_saver.restore(
    sess, tf.train.latest_checkpoint('./10000examples_lr0.001_epochs100/'))
all_vars = tf.trainable_variables()
for v in all_vars:
    print(v)

rnn = TextRNN(x_train.shape[1],
              y_train.shape[1],
              100,
              len(vocab_processor.vocabulary_),
              200,
              l2_reg=0.0)
rnn = TextRnn
Beispiel #13
0
def main(_):
    # FLAGS._parse_flags()
    print("\nParameters:")
    for attr, value in sorted(FLAGS.__flags.items()):
        print("{}={}".format(attr.upper(), value))
    print("")

    # Data Preparation
    # ==================================================

    # Load data
    print("Loading data...")
    x_text, y = data_helpers.load_data_and_labels(FLAGS.train_file, FLAGS.num_class)

    # Build vocabulary
    max_document_length = 64

    vocab_processor = learn.preprocessing.VocabularyProcessor(max_document_length)
    x = np.array(list(vocab_processor.fit_transform(x_text)))

    # Randomly shuffle data
    np.random.seed(10)
    shuffle_indices = np.random.permutation(np.arange(len(y)))
    x_shuffled = x[shuffle_indices]
    y_shuffled = y[shuffle_indices]

    # Split train/test set
    # TODO: This is very crude, should use cross-validation
    dev_sample_index = -1 * int(FLAGS.dev_sample_percentage * float(len(y)))
    x_train, x_dev = x_shuffled[:dev_sample_index], x_shuffled[dev_sample_index:]
    y_train, y_dev = y_shuffled[:dev_sample_index], y_shuffled[dev_sample_index:]

    del x, y, x_shuffled, y_shuffled

    print("Vocabulary Size: {:d}".format(len(vocab_processor.vocabulary_)))
    print("Train/Dev split: {:d}/{:d}".format(len(y_train), len(y_dev)))

    # Training
    # ==================================================

    with tf.Graph().as_default():
        session_conf = tf.ConfigProto(
            allow_soft_placement=FLAGS.allow_soft_placement,
            log_device_placement=FLAGS.log_device_placement)
        sess = tf.Session(config=session_conf)
        with sess.as_default():
            rnn = TextRNN(
                sequence_length=x_train.shape[1],
                num_classes=y_train.shape[1],
                vocab_size=len(vocab_processor.vocabulary_),
                embedding_dim=FLAGS.embedding_dim,
                hidden_size=256,
                multi_layer=1)

            # Define Training procedure
            global_step = tf.Variable(0, name="global_step", trainable=False)
            optimizer = tf.train.AdamOptimizer(0.01)
            grads_and_vars = optimizer.compute_gradients(rnn.loss)
            train_op = optimizer.apply_gradients(grads_and_vars, global_step=global_step)

            # Keep track of gradient values and sparsity (optional)
            grad_summaries = []
            for g, v in grads_and_vars:
                if g is not None:
                    grad_hist_summary = tf.summary.histogram("{}/grad/hist".format(v.name), g)
                    sparsity_summary = tf.summary.scalar("{}/grad/sparsity".format(v.name), tf.nn.zero_fraction(g))
                    grad_summaries.append(grad_hist_summary)
                    grad_summaries.append(sparsity_summary)
            grad_summaries_merged = tf.summary.merge(grad_summaries)

            # Output directory for models and summaries
            timestamp = str(int(time.time()))
            out_dir = os.path.abspath(os.path.join(os.path.curdir, "runs", timestamp))
            print("Writing to {}\n".format(out_dir))

            # Summaries for loss and accuracy
            loss_summary = tf.summary.scalar("loss", rnn.loss)
            acc_summary = tf.summary.scalar("accuracy", rnn.accuracy)

            # Train Summaries
            train_summary_op = tf.summary.merge([loss_summary, acc_summary, grad_summaries_merged])
            train_summary_dir = os.path.join(out_dir, "summaries", "train")
            train_summary_writer = tf.summary.FileWriter(train_summary_dir, sess.graph)

            # Dev summaries
            dev_summary_op = tf.summary.merge([loss_summary, acc_summary])
            dev_summary_dir = os.path.join(out_dir, "summaries", "dev")
            dev_summary_writer = tf.summary.FileWriter(dev_summary_dir, sess.graph)

            # Checkpoint directory. Tensorflow assumes this directory already exists so we need to create it
            checkpoint_dir = os.path.abspath(os.path.join(out_dir, "checkpoints"))
            checkpoint_prefix = os.path.join(checkpoint_dir, "model")
            if not os.path.exists(checkpoint_dir):
                os.makedirs(checkpoint_dir)
            saver = tf.train.Saver(tf.global_variables(), max_to_keep=FLAGS.num_checkpoints)

            # Write vocabulary
            vocab_processor.save(os.path.join(out_dir, "vocab"))

            # Initialize all variables
            sess.run(tf.global_variables_initializer())

            def train_step(x_batch, y_batch):
                """
                A single training step
                """
                feed_dict = {
                    rnn.input_x: x_batch,
                    rnn.input_y: y_batch,
                    rnn.dropout_keep_prob: FLAGS.dropout_keep_prob
                }
                _, step, summaries, loss, accuracy = sess.run(
                    [train_op, global_step, train_summary_op, rnn.loss, rnn.accuracy],
                    feed_dict)
                time_str = datetime.datetime.now().isoformat()
                print("{}: step {}, loss {:g}, acc {:g}".format(time_str, step, loss, accuracy))
                train_summary_writer.add_summary(summaries, step)

            def dev_step(x_batch, y_batch, writer=None):
                """
                Evaluates model on a dev set
                """
                feed_dict = {
                    rnn.input_x: x_batch,
                    rnn.input_y: y_batch,
                    rnn.dropout_keep_prob: 1.0
                }
                step, summaries, loss, accuracy = sess.run(
                    [global_step, dev_summary_op, rnn.loss, rnn.accuracy],
                    feed_dict)
                time_str = datetime.datetime.now().isoformat()
                print("{}: step {}, loss {:g}, acc {:g}".format(time_str, step, loss, accuracy))
                if writer:
                    writer.add_summary(summaries, step)

            # Generate batches
            batches = data_helpers.batch_iter(
                list(zip(x_train, y_train)), FLAGS.batch_size, FLAGS.num_epochs)
            # Training loop. For each batch...
            for batch in batches:
                x_batch, y_batch = zip(*batch)
                train_step(x_batch, y_batch)
                current_step = tf.train.global_step(sess, global_step)
                if current_step % FLAGS.evaluate_every == 0:
                    print("\nEvaluation:")
                    dev_step(x_dev, y_dev, writer=dev_summary_writer)
                    print("")
                if current_step % FLAGS.checkpoint_every == 0:
                    path = saver.save(sess, checkpoint_prefix, global_step=current_step)
                    print("Saved model checkpoint to {}\n".format(path))
def train_rnn():
    # Data Preparation
    # ==================================================

    if FLAGS.init_embedding_path is not None:
        embedding = np.load(FLAGS.init_embedding_path)
        print("Using pre-trained word embedding which shape is {}\n".format(embedding.shape))
        FLAGS.vocab_size = embedding.shape[0]
        FLAGS.embedding_size = embedding.shape[1]

    if FLAGS.init_model_path is not None:
        assert os.path.isdir(FLAGS.init_model_path), "init_model_path must be a directory\n"
        ckpt = tf.train.get_checkpoint_state(FLAGS.init_model_path)
        assert ckpt, "No checkpoint found in {}\n".format(FLAGS.init_model_path)
        assert ckpt.model_checkpoint_path, "No model_checkpoint_path found in checkpoint\n"

    # Create root directory
    timestamp = str(int(time.time()))
    root_dir = os.path.join(os.path.curdir, 'runs', 'textrnn', 'trained_result_' + timestamp)
    os.makedirs(root_dir)

    # Load data
    # print("Loading data...\n")
    # x, y = data_helpers.load_data(FLAGS.data_file, FLAGS.sequence_length, FLAGS.vocab_size, root_dir=root_dir)
    # FLAGS.num_classes = len(y[0])
    print("Loading data...\n")
    x_data = np.loadtxt(FLAGS.x_data_file)
    y_data = np.loadtxt(FLAGS.y_data_file)
    print("data load finished")

    # Split dataset
    # x_train, x_test, y_train, y_test = train_test_split(x_data, y_data, test_size=FLAGS.test_size, stratify=y_data, random_state=0)
    # x_val, x_test, y_val, y_test = train_test_split(x_test, y_test, test_size=0.5, random_state=0)

    # Training
    # ==================================================
    with tf.Graph().as_default():
        tf_config = tf.ConfigProto(
            allow_soft_placement=FLAGS.allow_soft_placement,
            log_device_placement=FLAGS.log_device_placement)
        tf_config.gpu_options.allow_growth = FLAGS.gpu_allow_growth

        with tf.Session(config=tf_config).as_default() as sess:
            rnn = TextRNN(
		vocab_size=FLAGS.vocab_size,
		embedding_size=FLAGS.embedding_size,
		sequence_length=FLAGS.sequence_length,
		rnn_size=FLAGS.rnn_size,
                num_layers=FLAGS.num_layers,
                attention_size=FLAGS.attention_size,
            	num_classes=FLAGS.num_classes,
		learning_rate=FLAGS.learning_rate,
		grad_clip=FLAGS.grad_clip)

            # Output directory for models and summaries
            out_dir = os.path.abspath(root_dir)
            print("Writing to {}...\n".format(out_dir))

            # Summaries for loss and accuracy
            tf.summary.scalar("loss", rnn.loss)
            tf.summary.scalar("accuracy", rnn.accuracy)
            merged_summary = tf.summary.merge_all()

            # Summaries dictionary
            train_summary_dir = os.path.join(out_dir, 'summaries', 'train')
            val_summary_dir = os.path.join(out_dir, 'summaries', 'val')
            train_summary_writer = tf.summary.FileWriter(train_summary_dir, sess.graph)
            val_summary_writer = tf.summary.FileWriter(val_summary_dir, sess.graph)

            # Checkpoint directory, will not create itself
            checkpoint_dir = os.path.abspath(os.path.join(out_dir, 'checkpoints'))
            checkpoint_prefix = os.path.join(checkpoint_dir, 'model.ckpt')
            if not os.path.exists(checkpoint_dir):
                os.makedirs(checkpoint_dir)
            saver = tf.train.Saver(tf.global_variables(), max_to_keep=1)

            # Initialize all variables
            sess.run(tf.global_variables_initializer())

            # Using pre-trained word embedding
            if FLAGS.init_embedding_path is not None:
                sess.run(rnn.embedding.assign(embedding))
                del embedding

            # Continue training from saved model
            if FLAGS.init_model_path is not None:
                saver.restore(sess, ckpt.model_checkpoint_path)

            # Training start
            print("Start training...\n")
            best_at_step = 0
            best_val_accuracy = 0
            train_batches = data_utils.batch_iter(list(zip(x_data, y_data)), FLAGS.batch_size)
            start = time.time()
            rnn_feature_temp = []
            for batch in train_batches:
                # Training model on x_batch and y_batch
                x_batch, y_batch = zip(*batch)
                # seq_len_train = data_helpers.real_len(x_batch)
                seq_len_train = data_utils.real_len(x_batch)
                feed_dict = {rnn.input_x: x_batch, rnn.input_y: y_batch, rnn.seq_len: seq_len_train, rnn.keep_prob: FLAGS.dropout_keep_prob}
                attention_output, _, global_step, train_summaries, train_loss, train_accuracy = sess.run([rnn.attention_output,rnn.train_op, rnn.global_step,
                        merged_summary, rnn.loss, rnn.accuracy], feed_dict=feed_dict)
                rnn_feature_temp.append(attention_output.tolist())
            print(rnn_feature_temp[0:2])
            print(len(rnn_feature_temp))
            np.savetxt("../data/word_data/word_dim/word_rnn_attention_embeddings_600_dim256.txt", np.array(rnn_feature_temp).reshape(20480,200))
Beispiel #15
0
print("# Loading training data")
training_data_raw = open(config['TRAINING_DATA_LOCATION'],'r',encoding='latin-1').readlines()
random.shuffle(training_data_raw)
num_examples = config['NUM_EXAMPLES']
training_data_raw= training_data_raw[:num_examples]

print("# Processing training data")
x_train, y_train, vocab_processor = util.load_training_data(training_data_raw)

print(" Loading and Processing testing data")
testing_data_raw = open(config['TESTING_DATA_LOCATION'],'r',encoding='latin-1').readlines()
x_test, y_test = util.load_testing_data(testing_data_raw, vocab_processor)

print("# Creating RNN")
rnn = TextRNN(x_train.shape[1], y_train.shape[1], config['HIDDEN_LAYER_SIZE'], 
        len(vocab_processor.vocabulary_), config['WORD_VECTOR_DIM'], l2_reg=0.0)
optimizer = tf.train.AdamOptimizer(config['LEARNING_RATE'])
minimizer = optimizer.minimize(rnn.loss)

print("# Initializing Tensorflow")
init_op = tf.global_variables_initializer()
sess = tf.Session()
sess.run(init_op)
saver = tf.train.Saver()

print("# Training")
batch_size = config['BATCH_SIZE']
no_of_batches = int(len(training_data_raw)/batch_size)
epoch = config['NUM_EPOCHS']

losses = []
Beispiel #16
0
print("Train/Dev split: {:d}/{:d}".format(len(y_train), len(y_dev)))


# Training
# ==================================================

with tf.Graph().as_default():
    session_conf = tf.ConfigProto(
      allow_soft_placement=FLAGS.allow_soft_placement,
      log_device_placement=FLAGS.log_device_placement)
    sess = tf.Session(config=session_conf)
    with sess.as_default():
        cnn = TextRNN(
            sequence_length=x_train.shape[1],
            num_classes=y_train.shape[1],
            vocab_size=len(vocab_processor.vocabulary_),
            embedding_size=FLAGS.embedding_dim,
            rnn_size=FLAGS.rnn_size,
            batch_size=FLAGS.batch_size,
            l2_reg_lambda=FLAGS.l2_reg_lambda)

        # Define Training procedure
        global_step = tf.Variable(0, name="global_step", trainable=False)
        optimizer = tf.train.AdamOptimizer(1e-3)

        # RNN中常用的梯度截断,防止出现梯度过大难以求导的现象
        tvars = tf.trainable_variables()
        grads, _ = tf.clip_by_global_norm(tf.gradients(cnn.loss, tvars), FLAGS.grad_clip)
        grads_and_vars = tuple(zip(grads, tvars))
        train_op = optimizer.apply_gradients(grads_and_vars, global_step=global_step)

        # grads_and_vars = optimizer.compute_gradients(cnn.loss)
Beispiel #17
0
def train_rnn():
    """Training RNN model."""

    # Load sentences, labels, and training parameters
    logger.info("✔︎ Loading data...")

    logger.info("✔︎ Training data processing...")
    train_data = dh.load_data_and_labels(FLAGS.training_data_file,
                                         FLAGS.embedding_dim)

    logger.info("✔︎ Validation data processing...")
    validation_data = dh.load_data_and_labels(FLAGS.validation_data_file,
                                              FLAGS.embedding_dim)

    logger.info("Recommended padding Sequence length is: {0}".format(
        FLAGS.pad_seq_len))

    logger.info("✔︎ Training data padding...")
    x_train_front, x_train_behind, y_train = dh.pad_data(
        train_data, FLAGS.pad_seq_len)

    logger.info("✔︎ Validation data padding...")
    x_validation_front, x_validation_behind, y_validation = dh.pad_data(
        validation_data, FLAGS.pad_seq_len)

    # Build vocabulary
    VOCAB_SIZE, pretrained_word2vec_matrix = dh.load_word2vec_matrix(
        FLAGS.embedding_dim)

    # Build a graph and rnn object
    with tf.Graph().as_default():
        session_conf = tf.ConfigProto(
            allow_soft_placement=FLAGS.allow_soft_placement,
            log_device_placement=FLAGS.log_device_placement)
        session_conf.gpu_options.allow_growth = FLAGS.gpu_options_allow_growth
        sess = tf.Session(config=session_conf)
        with sess.as_default():
            rnn = TextRNN(sequence_length=FLAGS.pad_seq_len,
                          num_classes=y_train.shape[1],
                          vocab_size=VOCAB_SIZE,
                          lstm_hidden_size=FLAGS.lstm_hidden_size,
                          fc_hidden_size=FLAGS.fc_hidden_size,
                          embedding_size=FLAGS.embedding_dim,
                          embedding_type=FLAGS.embedding_type,
                          l2_reg_lambda=FLAGS.l2_reg_lambda,
                          pretrained_embedding=pretrained_word2vec_matrix)

            # Define training procedure
            with tf.control_dependencies(
                    tf.get_collection(tf.GraphKeys.UPDATE_OPS)):
                learning_rate = tf.train.exponential_decay(
                    learning_rate=FLAGS.learning_rate,
                    global_step=rnn.global_step,
                    decay_steps=FLAGS.decay_steps,
                    decay_rate=FLAGS.decay_rate,
                    staircase=True)
                optimizer = tf.train.AdamOptimizer(learning_rate)
                grads, vars = zip(*optimizer.compute_gradients(rnn.loss))
                grads, _ = tf.clip_by_global_norm(grads,
                                                  clip_norm=FLAGS.norm_ratio)
                train_op = optimizer.apply_gradients(
                    zip(grads, vars),
                    global_step=rnn.global_step,
                    name="train_op")

            # Keep track of gradient values and sparsity (optional)
            grad_summaries = []
            for g, v in zip(grads, vars):
                if g is not None:
                    grad_hist_summary = tf.summary.histogram(
                        "{0}/grad/hist".format(v.name), g)
                    sparsity_summary = tf.summary.scalar(
                        "{0}/grad/sparsity".format(v.name),
                        tf.nn.zero_fraction(g))
                    grad_summaries.append(grad_hist_summary)
                    grad_summaries.append(sparsity_summary)
            grad_summaries_merged = tf.summary.merge(grad_summaries)

            # Output directory for models and summaries
            if FLAGS.train_or_restore == 'R':
                MODEL = input(
                    "☛ Please input the checkpoints model you want to restore, "
                    "it should be like(1490175368): "
                )  # The model you want to restore

                while not (MODEL.isdigit() and len(MODEL) == 10):
                    MODEL = input(
                        "✘ The format of your input is illegal, please re-input: "
                    )
                logger.info(
                    "✔︎ The format of your input is legal, now loading to next step..."
                )
                out_dir = os.path.abspath(
                    os.path.join(os.path.curdir, "runs", MODEL))
                logger.info("✔︎ Writing to {0}\n".format(out_dir))
            else:
                timestamp = str(int(time.time()))
                out_dir = os.path.abspath(
                    os.path.join(os.path.curdir, "runs", timestamp))
                logger.info("✔︎ Writing to {0}\n".format(out_dir))

            checkpoint_dir = os.path.abspath(
                os.path.join(out_dir, "checkpoints"))
            best_checkpoint_dir = os.path.abspath(
                os.path.join(out_dir, "bestcheckpoints"))

            # Summaries for loss and accuracy
            loss_summary = tf.summary.scalar("loss", rnn.loss)
            acc_summary = tf.summary.scalar("accuracy", rnn.accuracy)

            # Train summaries
            train_summary_op = tf.summary.merge(
                [loss_summary, acc_summary, grad_summaries_merged])
            train_summary_dir = os.path.join(out_dir, "summaries", "train")
            train_summary_writer = tf.summary.FileWriter(
                train_summary_dir, sess.graph)

            # Validation summaries
            validation_summary_op = tf.summary.merge(
                [loss_summary, acc_summary])
            validation_summary_dir = os.path.join(out_dir, "summaries",
                                                  "validation")
            validation_summary_writer = tf.summary.FileWriter(
                validation_summary_dir, sess.graph)

            saver = tf.train.Saver(tf.global_variables(),
                                   max_to_keep=FLAGS.num_checkpoints)
            best_saver = cm.BestCheckpointSaver(save_dir=best_checkpoint_dir,
                                                num_to_keep=3,
                                                maximize=True)

            if FLAGS.train_or_restore == 'R':
                # Load rnn model
                logger.info("✔︎ Loading model...")
                checkpoint_file = tf.train.latest_checkpoint(checkpoint_dir)
                logger.info(checkpoint_file)

                # Load the saved meta graph and restore variables
                saver = tf.train.import_meta_graph(
                    "{0}.meta".format(checkpoint_file))
                saver.restore(sess, checkpoint_file)
            else:
                if not os.path.exists(checkpoint_dir):
                    os.makedirs(checkpoint_dir)
                sess.run(tf.global_variables_initializer())
                sess.run(tf.local_variables_initializer())

                # Embedding visualization config
                config = projector.ProjectorConfig()
                embedding_conf = config.embeddings.add()
                embedding_conf.tensor_name = "embedding"
                embedding_conf.metadata_path = FLAGS.metadata_file

                projector.visualize_embeddings(train_summary_writer, config)
                projector.visualize_embeddings(validation_summary_writer,
                                               config)

                # Save the embedding visualization
                saver.save(
                    sess, os.path.join(out_dir, "embedding", "embedding.ckpt"))

            current_step = sess.run(rnn.global_step)

            def train_step(x_batch_front, x_batch_behind, y_batch):
                """A single training step"""
                feed_dict = {
                    rnn.input_x_front: x_batch_front,
                    rnn.input_x_behind: x_batch_behind,
                    rnn.input_y: y_batch,
                    rnn.dropout_keep_prob: FLAGS.dropout_keep_prob,
                    rnn.is_training: True
                }
                _, step, summaries, loss, accuracy = sess.run([
                    train_op, rnn.global_step, train_summary_op, rnn.loss,
                    rnn.accuracy
                ], feed_dict)
                logger.info("step {0}: loss {1:g}, acc {2:g}".format(
                    step, loss, accuracy))
                train_summary_writer.add_summary(summaries, step)

            def validation_step(x_batch_front,
                                x_batch_behind,
                                y_batch,
                                writer=None):
                """Evaluates model on a validation set"""
                feed_dict = {
                    rnn.input_x_front: x_batch_front,
                    rnn.input_x_behind: x_batch_behind,
                    rnn.input_y: y_batch,
                    rnn.dropout_keep_prob: 1.0,
                    rnn.is_training: False
                }
                step, summaries, loss, accuracy, recall, precision, f1, auc = sess.run(
                    [
                        rnn.global_step, validation_summary_op, rnn.loss,
                        rnn.accuracy, rnn.recall, rnn.precision, rnn.F1,
                        rnn.AUC
                    ], feed_dict)
                logger.info(
                    "step {0}: loss {1:g}, acc {2:g}, recall {3:g}, precision {4:g}, f1 {5:g}, AUC {6}"
                    .format(step, loss, accuracy, recall, precision, f1, auc))
                if writer:
                    writer.add_summary(summaries, step)

                return accuracy

            # Generate batches
            batches = dh.batch_iter(
                list(zip(x_train_front, x_train_behind, y_train)),
                FLAGS.batch_size, FLAGS.num_epochs)

            num_batches_per_epoch = int(
                (len(x_train_front) - 1) / FLAGS.batch_size) + 1

            # Training loop. For each batch...
            for batch in batches:
                x_batch_front, x_batch_behind, y_batch = zip(*batch)
                train_step(x_batch_front, x_batch_behind, y_batch)
                current_step = tf.train.global_step(sess, rnn.global_step)

                if current_step % FLAGS.evaluate_every == 0:
                    logger.info("\nEvaluation:")
                    accuracy = validation_step(
                        x_validation_front,
                        x_validation_behind,
                        y_validation,
                        writer=validation_summary_writer)
                    best_saver.handle(accuracy, sess, current_step)
                if current_step % FLAGS.checkpoint_every == 0:
                    checkpoint_prefix = os.path.join(checkpoint_dir, "model")
                    path = saver.save(sess,
                                      checkpoint_prefix,
                                      global_step=current_step)
                    logger.info(
                        "✔︎ Saved model checkpoint to {0}\n".format(path))
                if current_step % num_batches_per_epoch == 0:
                    current_epoch = current_step // num_batches_per_epoch
                    logger.info(
                        "✔︎ Epoch {0} has finished!".format(current_epoch))

    logger.info("✔︎ Done.")
def main(_):
    # Load data
    print("Loading data...")

    x_, y = data_helpers.build_train_data(FLAGS.label_file, FLAGS.train_file)
    train_int_to_vab, train_to_int = data_helpers.cret_dict(x_)
    #保存对应的词和词索引

    #存储所有字的文件,以便测试加载
    pickle.dump(train_int_to_vab, open('./vocab_index.pkl', 'wb'))

    train_ids = [[
        train_to_int.get(term, train_to_int['<UNK>']) for term in line
    ] for line in x_]
    x_ = data_helpers.pad_sentences(train_ids, 20)
    # Randomly shuffle data
    np.random.seed(10)
    shuffle_indices = np.random.permutation(np.arange(len(y)))
    x_shuffled = x_[shuffle_indices]
    y = np.array(y)
    y_shuffled = y[shuffle_indices]
    folids_list = data_helpers.cross_validation_split_for_smp(
        x_shuffled, y_shuffled)
    for i in range(10):

        if not os.path.exists('save_model/' + str(i) + '/'):
            os.makedirs(os.path.join('save_model', str(i)))
        else:
            continue

    for i in range(10):
        best_acc = 0.0
        print(i)
        print('##################')
        x_train, y_train, x_dev, y_dev = folids_list[i]

        y_train = np_utils.to_categorical(y_train)
        y_dev = np_utils.to_categorical(y_dev)

        # ==================================================

        with tf.Graph().as_default():
            session_conf = tf.ConfigProto(
                allow_soft_placement=FLAGS.allow_soft_placement,
                log_device_placement=FLAGS.log_device_placement)
            sess = tf.Session(config=session_conf)
            with sess.as_default():
                rnn = TextRNN(sequence_length=x_train.shape[1],
                              num_classes=y_train.shape[1],
                              vocab_size=len(train_int_to_vab),
                              batch_size=FLAGS.batch_size,
                              embedding_size=FLAGS.embedding_dim,
                              hidden_size=FLAGS.hidden_size,
                              num_layers=FLAGS.num_layers
                              #word_embedding_matrix=embeding_matric
                              )

                # Define Training procedure
                global_step = tf.Variable(0,
                                          name="global_step",
                                          trainable=False)
                optimizer = tf.train.AdamOptimizer(1e-3)
                grads_and_vars = optimizer.compute_gradients(rnn.loss)
                train_op = optimizer.apply_gradients(grads_and_vars,
                                                     global_step=global_step)

                saver = tf.train.Saver(tf.global_variables(),
                                       max_to_keep=FLAGS.num_checkpoints)

                # Initialize all variables
                sess.run(tf.global_variables_initializer())

                def train_step(x_batch, y_batch):
                    """
                    A single training step
                    """
                    feed_dict = {
                        rnn.input_x: x_batch,
                        rnn.input_y: y_batch,
                        rnn.dropout_keep_prob: FLAGS.dropout_keep_prob
                    }
                    _, step, loss, accuracy = sess.run(
                        [train_op, global_step, rnn.loss, rnn.accuracy],
                        feed_dict)
                    return step, loss, accuracy

                def dev_step(x_batch, y_batch):
                    """
                    Evaluates model on a dev set
                    """
                    feed_dict = {
                        rnn.input_x: x_batch,
                        rnn.input_y: y_batch,
                        rnn.dropout_keep_prob: 1.0
                    }
                    step, loss, accuracy = sess.run(
                        [global_step, rnn.loss, rnn.accuracy], feed_dict)
                    time_str = datetime.datetime.now().isoformat()
                    print('dev')
                    print("{}: step {}, loss {:g}, acc {:g}".format(
                        time_str, step, loss, accuracy))
                    return accuracy

                def save_best_model(sess, path):
                    path = saver.save(sess, path)

                for epoch in range(FLAGS.num_epochs):
                    print('epoch', epoch)
                    # Generate batches
                    for batch_i, (x_batch, y_batch) in enumerate(
                            data_helpers.get_batches(y_train, x_train,
                                                     FLAGS.batch_size)):

                        step, train_loss, train_accuracy = train_step(
                            x_batch, y_batch)
                        #print('step',step)
                        if batch_i % FLAGS.evaluate_every == 0:
                            time_str = datetime.datetime.now().isoformat()
                            print("{}: step {}, loss {:g}, acc {:g}".format(
                                time_str, step, train_loss, train_accuracy))

                        #=====================
                    accuracy = dev_step(x_dev, y_dev)
                    if accuracy > best_acc:
                        best_acc = accuracy
                        print('save_model' + str(i) + '/best_model.ckpt')
                        save_best_model(
                            sess, 'save_model/' + str(i) + '/best_model.ckpt')
Beispiel #19
0
maxlen = 400
batch_size = 32
embedding_dims = 50
epochs = 10

print('Loading data...')
(x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=max_features)
print(len(x_train), 'train sequences')
print(len(x_test), 'test sequences')

print('Pad sequences (samples x time)...')
x_train = sequence.pad_sequences(x_train, maxlen=maxlen)
x_test = sequence.pad_sequences(x_test, maxlen=maxlen)
print('x_train shape:', x_train.shape)
print('x_test shape:', x_test.shape)

print('Build model...')
model = TextRNN(maxlen, max_features, embedding_dims).get_model()
model.compile('adam', 'binary_crossentropy', metrics=['accuracy'])

print('Train...')
early_stopping = EarlyStopping(monitor='val_acc', patience=3, mode='max')
model.fit(x_train, y_train,
          batch_size=batch_size,
          epochs=epochs,
          callbacks=[early_stopping],
          validation_data=(x_test, y_test))

print('Test...')
result = model.predict(x_test)
Beispiel #20
0
del x

vocabsize = len(vocab_processor.vocabulary_)
print("Vocabulary Size: {:d}".format(vocabsize))

# Training
# ==================================================

with tf.Graph().as_default():
    session_conf = tf.ConfigProto(
        allow_soft_placement=FLAGS.allow_soft_placement,
        log_device_placement=FLAGS.log_device_placement)
    sess = tf.Session(config=session_conf)
    with sess.as_default():
        rnn = TextRNN(sequence_length=x_train.shape[1],
                      num_classes=y_train.shape[1],
                      vocab_size=vocabsize,
                      embedding_size=FLAGS.embedding_dim)

        # Define Training procedure
        global_step = tf.Variable(0, name="global_step", trainable=False)
        optimizer = tf.train.AdamOptimizer(1e-3)
        grads_and_vars = optimizer.compute_gradients(rnn.loss)
        train_op = optimizer.apply_gradients(grads_and_vars,
                                             global_step=global_step)

        # Keep track of gradient values and sparsity (optional)
        grad_summaries = []
        for g, v in grads_and_vars:
            if g is not None:
                grad_hist_summary = tf.summary.histogram(
                    "{}/grad/hist".format(v.name), g)
train_data, train_label, test_data, test_label = data_helpers.data_processing(
    FLAGS.positive_data_file, FLAGS.negative_data_file)

print("training...")
# Training
# =======================================================
with tf.Graph().as_default():
    session_conf = tf.ConfigProto(
        allow_soft_placement=FLAGS.allow_soft_placement,
        log_device_placement=FLAGS.log_device_placement)
    sess = tf.Session(config=session_conf)
    with sess.as_default():
        # 卷积池化网络导入
        rnn = TextRNN(sequence_length=len(train_data[0]),
                      num_classes=len(train_label[0]),
                      embedding_size=FLAGS.embedding_dim,
                      l2_reg_lambda=FLAGS.l2_reg_lambda,
                      hidden_dim=FLAGS.hidden_dim,
                      num_layers=FLAGS.num_layers)

        # Define Training procedure
        global_step = tf.Variable(0, name="global_step", trainable=False)
        optimizer = tf.train.AdamOptimizer(1e-3)
        grads_and_vars = optimizer.compute_gradients(rnn.loss)  # 计算梯度
        train_op = optimizer.apply_gradients(grads_and_vars,
                                             global_step=global_step)  # 进行参数更新

        # Keep track of gradient values and sparsity (optional)
        grad_summaries = []
        for g, v in grads_and_vars:
            if g is not None:
                grad_hist_summary = tf.summary.histogram(
Beispiel #22
0
def evaluate():
    if FLAGS.checkpoint_dir == None or not os.path.exists(
            FLAGS.checkpoint_dir):
        raise IOError("checkpoint_dir not found")

    if FLAGS.model_type == None or not FLAGS.model_type in ['CNN', 'RNN']:
        raise ValueError("model_type must be CNN or RNN")

    root_dir = os.path.join(FLAGS.checkpoint_dir, '..') + '/'

    # Create result directory
    eval_dir = os.path.join(root_dir, 'eval')
    if not os.path.exists(eval_dir):
        os.mkdir(eval_dir)

    # Load parameters
    print("Loading parameters...\n")
    params = json.loads(open(root_dir + 'parameters.json').read())

    # Load data
    print("Loading data...\n")
    x_eval, y_eval = data_helpers.load_data(FLAGS.eval_data,
                                            params['sequence_length'],
                                            root_dir=root_dir,
                                            has_label=FLAGS.has_label,
                                            is_training=False)

    # Evaluating
    # ==================================================
    with tf.Graph().as_default():
        tf_config = tf.ConfigProto(
            allow_soft_placement=FLAGS.allow_soft_placement,
            log_device_placement=FLAGS.log_device_placement)
        tf_config.gpu_options.allow_growth = FLAGS.gpu_allow_growth

        with tf.Session(config=tf_config).as_default() as sess:
            # Model initialization
            if FLAGS.model_type == 'CNN':
                model = TextCNN(vocab_size=params['vocab_size'],
                                embedding_size=params['embedding_size'],
                                sequence_length=params['sequence_length'],
                                filter_sizes=list(
                                    map(int,
                                        params['filter_sizes'].split(","))),
                                num_filters=params['num_filters'],
                                num_classes=params['num_classes'],
                                learning_rate=params['learning_rate'],
                                grad_clip=params['grad_clip'],
                                l2_reg_lambda=params['l2_reg_lambda'])
                feed_dict = {model.keep_prob: 1.0, model.is_training: False}

            elif FLAGS.model_type == 'RNN':
                model = TextRNN(vocab_size=params['vocab_size'],
                                embedding_size=params['embedding_size'],
                                sequence_length=params['sequence_length'],
                                rnn_size=params['rnn_size'],
                                num_layers=params['num_layers'],
                                attention_size=params['attention_size'],
                                num_classes=params['num_classes'],
                                learning_rate=params['learning_rate'],
                                grad_clip=params['grad_clip'])
                feed_dict = {model.keep_prob: 1.0}

            saver = tf.train.Saver(tf.global_variables())
            sess.run(tf.global_variables_initializer())

            # Restore all variables from checkpoint
            ckpt = tf.train.get_checkpoint_state(FLAGS.checkpoint_dir)
            if ckpt and ckpt.model_checkpoint_path:
                best_model_path = os.path.join(
                    '/'.join(ckpt.model_checkpoint_path.split("/")[:-1]),
                    'best_model')
                saver.restore(sess, best_model_path)

            # Evaluate the model
            print("Start evaluating...\n")
            y_logits = []
            start = time.time()
            data_size = len(x_eval)
            # Generate eval batches
            eval_batches = data_helpers.batch_iter(x_eval,
                                                   FLAGS.batch_size,
                                                   shuffle=False)
            for x_batch in eval_batches:
                feed_dict[model.input_x] = x_batch
                if FLAGS.model_type == 'RNN':
                    feed_dict[model.seq_len] = data_helpers.real_len(x_batch)
                batch_predictions = sess.run(model.logits, feed_dict=feed_dict)
                y_logits.extend(batch_predictions)
            print(
                "Mission complete, total number of eval examples: {}, evaluating speed: {:.0f} examples/sec\n"
                .format(data_size, data_size / (time.time() - start)))
            label_transformer = joblib.load(
                os.path.join(root_dir, 'label_transformer.pkl'))
            y_logits_original = label_transformer.inverse_transform(
                np.array(y_logits))

            # Print accuracy if eval examples have label
            if FLAGS.has_label == True:
                df = pd.DataFrame([
                    line.strip().split("\t") for line in open(
                        FLAGS.eval_data, 'r', encoding='UTF-8').readlines()
                    if len(line.strip().split("\t")) == 2
                ],
                                  columns=['content', 'real_label'])
                y_eval_original = label_transformer.inverse_transform(y_eval)
                eval_accuracy = sum(
                    y_logits_original == y_eval_original) / data_size
                print("Evaluating Accuracy: {:.3f}\n".format(eval_accuracy))
                print(
                    "Precision, Recall and F1-Score:\n\n",
                    classification_report(y_eval_original, y_logits_original))
            else:
                df = pd.DataFrame([
                    line.strip() for line in open(
                        FLAGS.eval_data, 'r', encoding='UTF-8').readlines()
                    if line.strip()
                ],
                                  columns=['content'])

            # Save prediction result
            timestamp = str(int(time.time()))
            save_path = os.path.abspath(
                os.path.join(eval_dir,
                             'predicted_result_' + timestamp + '.csv'))
            df['predicted_label'] = y_logits_original
            print("Writing prediction result to {}...\n".format(save_path))
            df.to_csv(save_path,
                      header=True,
                      index=False,
                      sep='\t',
                      encoding='utf-8')
Beispiel #23
0
# Training
# ==================================================
with tf.Graph().as_default():
    session_conf = tf.ConfigProto(
        allow_soft_placement=FLAGS.allow_soft_placement,
        log_device_placement=FLAGS.log_device_placement)
    session_conf.gpu_options.allow_growth = True
    sess = tf.Session(config=session_conf)
    with sess.as_default():
        rnn = TextRNN(
            embedding_mat=word2vec_helpers.wordvector.astype(np.float32),
            non_static=FLAGS.non_static,
            GRU=FLAGS.GRU,
            sequence_length=max_document_length,
            num_classes=y_train.shape[1],
            hidden_layer_size=FLAGS.hidden_layer_size,
            vocab_size=word2vec_helpers.vocab_size,
            embedding_size=FLAGS.embedding_dim,
            attention_size=FLAGS.attention_size,
            l2_reg_lambda=FLAGS.l2_reg_lambda,
        )

        # Define Training procedure
        global_step = tf.Variable(0, name="global_step", trainable=False)
        optimizer = tf.train.AdamOptimizer(1e-3)
        grads_and_vars = optimizer.compute_gradients(rnn.loss)
        train_op = optimizer.apply_gradients(grads_and_vars,
                                             global_step=global_step)

        # Keep track of gradient values and sparsity (optional)
        grad_summaries = []
Beispiel #24
0
def train_rnn():
    """Training RNN model."""

    # Load sentences, labels, and training parameters
    logger.info('✔︎ Loading data...')

    logger.info('✔︎ Training data processing...')
    train_data = dh.load_data_and_labels(FLAGS.training_data_file, FLAGS.num_classes, FLAGS.embedding_dim)

    logger.info('✔︎ Validation data processing...')
    validation_data = \
        dh.load_data_and_labels(FLAGS.validation_data_file, FLAGS.num_classes, FLAGS.embedding_dim)

    logger.info('Recommended padding Sequence length is: {0}'.format(FLAGS.pad_seq_len))

    logger.info('✔︎ Training data padding...')
    x_train, y_train = dh.pad_data(train_data, FLAGS.pad_seq_len)

    logger.info('✔︎ Validation data padding...')
    x_validation, y_validation = dh.pad_data(validation_data, FLAGS.pad_seq_len)

    # Build vocabulary
    VOCAB_SIZE = dh.load_vocab_size(FLAGS.embedding_dim)
    pretrained_word2vec_matrix = dh.load_word2vec_matrix(VOCAB_SIZE, FLAGS.embedding_dim)

    # Build a graph and rnn object
    with tf.Graph().as_default():
        session_conf = tf.ConfigProto(
            allow_soft_placement=FLAGS.allow_soft_placement,
            log_device_placement=FLAGS.log_device_placement)
        session_conf.gpu_options.allow_growth = FLAGS.gpu_options_allow_growth
        sess = tf.Session(config=session_conf)
        with sess.as_default():
            rnn = TextRNN(
                sequence_length=FLAGS.pad_seq_len,
                num_classes=FLAGS.num_classes,
                vocab_size=VOCAB_SIZE,
                lstm_hidden_size=FLAGS.lstm_hidden_size,
                fc_hidden_size=FLAGS.fc_hidden_size,
                embedding_size=FLAGS.embedding_dim,
                embedding_type=FLAGS.embedding_type,
                l2_reg_lambda=FLAGS.l2_reg_lambda,
                pretrained_embedding=pretrained_word2vec_matrix)

            # Define training procedure
            with tf.control_dependencies(tf.get_collection(tf.GraphKeys.UPDATE_OPS)):
                learning_rate = tf.train.exponential_decay(learning_rate=FLAGS.learning_rate,
                                                           global_step=rnn.global_step, decay_steps=FLAGS.decay_steps,
                                                           decay_rate=FLAGS.decay_rate, staircase=True)
                optimizer = tf.train.AdamOptimizer(learning_rate)
                grads, vars = zip(*optimizer.compute_gradients(rnn.loss))
                grads, _ = tf.clip_by_global_norm(grads, clip_norm=FLAGS.norm_ratio)
                train_op = optimizer.apply_gradients(zip(grads, vars), global_step=rnn.global_step, name="train_op")

            # Keep track of gradient values and sparsity (optional)
            grad_summaries = []
            for g, v in zip(grads, vars):
                if g is not None:
                    grad_hist_summary = tf.summary.histogram("{0}/grad/hist".format(v.name), g)
                    sparsity_summary = tf.summary.scalar("{0}/grad/sparsity".format(v.name), tf.nn.zero_fraction(g))
                    grad_summaries.append(grad_hist_summary)
                    grad_summaries.append(sparsity_summary)
            grad_summaries_merged = tf.summary.merge(grad_summaries)

            # Output directory for models and summaries
            if FLAGS.train_or_restore == 'R':
                MODEL = input("☛ Please input the checkpoints model you want to restore, "
                              "it should be like(1490175368): ")  # The model you want to restore

                while not (MODEL.isdigit() and len(MODEL) == 10):
                    MODEL = input('✘ The format of your input is illegal, please re-input: ')
                logger.info('✔︎ The format of your input is legal, now loading to next step...')

                checkpoint_dir = 'runs/' + MODEL + '/checkpoints/'

                out_dir = os.path.abspath(os.path.join(os.path.curdir, "runs", MODEL))
                logger.info("✔︎ Writing to {0}\n".format(out_dir))
            else:
                timestamp = str(int(time.time()))
                out_dir = os.path.abspath(os.path.join(os.path.curdir, "runs", timestamp))
                logger.info("✔︎ Writing to {0}\n".format(out_dir))

            # Summaries for loss and accuracy
            loss_summary = tf.summary.scalar("loss", rnn.loss)

            # Train summaries
            train_summary_op = tf.summary.merge([loss_summary, grad_summaries_merged])
            train_summary_dir = os.path.join(out_dir, "summaries", "train")
            train_summary_writer = tf.summary.FileWriter(train_summary_dir, sess.graph)

            # Validation summaries
            validation_summary_op = tf.summary.merge([loss_summary])
            validation_summary_dir = os.path.join(out_dir, "summaries", "validation")
            validation_summary_writer = tf.summary.FileWriter(validation_summary_dir, sess.graph)

            saver = tf.train.Saver(tf.global_variables(), max_to_keep=FLAGS.num_checkpoints)

            if FLAGS.train_or_restore == 'R':
                # Load rnn model
                logger.info("✔ Loading model...")
                checkpoint_file = tf.train.latest_checkpoint(checkpoint_dir)
                logger.info(checkpoint_file)

                # Load the saved meta graph and restore variables
                saver = tf.train.import_meta_graph("{0}.meta".format(checkpoint_file))
                saver.restore(sess, checkpoint_file)
            else:
                checkpoint_dir = os.path.abspath(os.path.join(out_dir, "checkpoints"))
                if not os.path.exists(checkpoint_dir):
                    os.makedirs(checkpoint_dir)
                sess.run(tf.global_variables_initializer())
                sess.run(tf.local_variables_initializer())

                # Embedding visualization config
                config = projector.ProjectorConfig()
                embedding_conf = config.embeddings.add()
                embedding_conf.tensor_name = 'embedding'
                embedding_conf.metadata_path = FLAGS.metadata_file

                projector.visualize_embeddings(train_summary_writer, config)
                projector.visualize_embeddings(validation_summary_writer, config)

                # Save the embedding visualization
                saver.save(sess, os.path.join(out_dir, 'embedding', 'embedding.ckpt'))

            current_step = sess.run(rnn.global_step)

            def train_step(x_batch, y_batch):
                """A single training step"""
                feed_dict = {
                    rnn.input_x: x_batch,
                    rnn.input_y: y_batch,
                    rnn.dropout_keep_prob: FLAGS.dropout_keep_prob,
                    rnn.is_training: True
                }
                _, step, summaries, loss = sess.run(
                    [train_op, rnn.global_step, train_summary_op, rnn.loss], feed_dict)
                logger.info("step {0}: loss {1:g}".format(step, loss))
                train_summary_writer.add_summary(summaries, step)

            def validation_step(x_validation, y_validation, writer=None):
                """Evaluates model on a validation set"""
                batches_validation = dh.batch_iter(
                    list(zip(x_validation, y_validation)), FLAGS.batch_size, 1)

                # Predict classes by threshold or topk ('ts': threshold; 'tk': topk)
                eval_counter, eval_loss, eval_rec_ts, eval_acc_ts, eval_F_ts = 0, 0.0, 0.0, 0.0, 0.0
                eval_rec_tk = [0.0] * FLAGS.top_num
                eval_acc_tk = [0.0] * FLAGS.top_num
                eval_F_tk = [0.0] * FLAGS.top_num

                for batch_validation in batches_validation:
                    x_batch_validation, y_batch_validation = zip(*batch_validation)
                    feed_dict = {
                        rnn.input_x: x_batch_validation,
                        rnn.input_y: y_batch_validation,
                        rnn.dropout_keep_prob: 1.0,
                        rnn.is_training: False
                    }
                    step, summaries, scores, cur_loss = sess.run(
                        [rnn.global_step, validation_summary_op, rnn.scores, rnn.loss], feed_dict)

                    # Predict by threshold
                    predicted_labels_threshold, predicted_values_threshold = \
                        dh.get_label_using_scores_by_threshold(scores=scores, threshold=FLAGS.threshold)

                    cur_rec_ts, cur_acc_ts, cur_F_ts = 0.0, 0.0, 0.0

                    for index, predicted_label_threshold in enumerate(predicted_labels_threshold):
                        rec_inc_ts, acc_inc_ts = dh.cal_metric(predicted_label_threshold, y_batch_validation[index])
                        cur_rec_ts, cur_acc_ts = cur_rec_ts + rec_inc_ts, cur_acc_ts + acc_inc_ts

                    cur_rec_ts = cur_rec_ts / len(y_batch_validation)
                    cur_acc_ts = cur_acc_ts / len(y_batch_validation)

                    cur_F_ts = dh.cal_F(cur_rec_ts, cur_acc_ts)

                    eval_rec_ts, eval_acc_ts = eval_rec_ts + cur_rec_ts, eval_acc_ts + cur_acc_ts

                    # Predict by topK
                    topK_predicted_labels = []
                    for top_num in range(FLAGS.top_num):
                        predicted_labels_topk, predicted_values_topk = \
                            dh.get_label_using_scores_by_topk(scores=scores, top_num=top_num+1)
                        topK_predicted_labels.append(predicted_labels_topk)

                    cur_rec_tk = [0.0] * FLAGS.top_num
                    cur_acc_tk = [0.0] * FLAGS.top_num
                    cur_F_tk = [0.0] * FLAGS.top_num

                    for top_num, predicted_labels_topK in enumerate(topK_predicted_labels):
                        for index, predicted_label_topK in enumerate(predicted_labels_topK):
                            rec_inc_tk, acc_inc_tk = dh.cal_metric(predicted_label_topK, y_batch_validation[index])
                            cur_rec_tk[top_num], cur_acc_tk[top_num] = \
                                cur_rec_tk[top_num] + rec_inc_tk, cur_acc_tk[top_num] + acc_inc_tk

                        cur_rec_tk[top_num] = cur_rec_tk[top_num] / len(y_batch_validation)
                        cur_acc_tk[top_num] = cur_acc_tk[top_num] / len(y_batch_validation)

                        cur_F_tk[top_num] = dh.cal_F(cur_rec_tk[top_num], cur_acc_tk[top_num])

                        eval_rec_tk[top_num], eval_acc_tk[top_num] = \
                            eval_rec_tk[top_num] + cur_rec_tk[top_num], eval_acc_tk[top_num] + cur_acc_tk[top_num]

                    eval_loss = eval_loss + cur_loss
                    eval_counter = eval_counter + 1

                    logger.info("✔︎ validation batch {0}: loss {1:g}".format(eval_counter, cur_loss))
                    logger.info("︎☛ Predict by threshold: recall {0:g}, accuracy {1:g}, F {2:g}"
                                .format(cur_rec_ts, cur_acc_ts, cur_F_ts))

                    logger.info("︎☛ Predict by topK:")
                    for top_num in range(FLAGS.top_num):
                        logger.info("Top{0}: recall {1:g}, accuracy {2:g}, F {3:g}"
                                    .format(top_num + 1, cur_rec_tk[top_num], cur_acc_tk[top_num], cur_F_tk[top_num]))

                    if writer:
                        writer.add_summary(summaries, step)

                eval_loss = float(eval_loss / eval_counter)
                eval_rec_ts = float(eval_rec_ts / eval_counter)
                eval_acc_ts = float(eval_acc_ts / eval_counter)
                eval_F_ts = dh.cal_F(eval_rec_ts, eval_acc_ts)

                for top_num in range(FLAGS.top_num):
                    eval_rec_tk[top_num] = float(eval_rec_tk[top_num] / eval_counter)
                    eval_acc_tk[top_num] = float(eval_acc_tk[top_num] / eval_counter)
                    eval_F_tk[top_num] = dh.cal_F(eval_rec_tk[top_num], eval_acc_tk[top_num])

                return eval_loss, eval_rec_ts, eval_acc_ts, eval_F_ts, eval_rec_tk, eval_acc_tk, eval_F_tk

            # Generate batches
            batches_train = dh.batch_iter(
                list(zip(x_train, y_train)), FLAGS.batch_size, FLAGS.num_epochs)

            num_batches_per_epoch = int((len(x_train) - 1) / FLAGS.batch_size) + 1

            # Training loop. For each batch...
            for batch_train in batches_train:
                x_batch_train, y_batch_train = zip(*batch_train)
                train_step(x_batch_train, y_batch_train)
                current_step = tf.train.global_step(sess, rnn.global_step)

                if current_step % FLAGS.evaluate_every == 0:
                    logger.info("\nEvaluation:")
                    eval_loss, eval_rec_ts, eval_acc_ts, eval_F_ts, eval_rec_tk, eval_acc_tk, eval_F_tk = \
                        validation_step(x_validation, y_validation, writer=validation_summary_writer)

                    logger.info("All Validation set: Loss {0:g}".format(eval_loss))

                    # Predict by threshold
                    logger.info("︎☛ Predict by threshold: Recall {0:g}, Accuracy {1:g}, F {2:g}"
                                .format(eval_rec_ts, eval_acc_ts, eval_F_ts))

                    # Predict by topK
                    logger.info("︎☛ Predict by topK:")
                    for top_num in range(FLAGS.top_num):
                        logger.info("Top{0}: Recall {1:g}, Accuracy {2:g}, F {3:g}"
                                    .format(top_num+1, eval_rec_tk[top_num], eval_acc_tk[top_num], eval_F_tk[top_num]))
                if current_step % FLAGS.checkpoint_every == 0:
                    checkpoint_prefix = os.path.join(checkpoint_dir, "model")
                    path = saver.save(sess, checkpoint_prefix, global_step=current_step)
                    logger.info("✔︎ Saved model checkpoint to {0}\n".format(path))
                if current_step % num_batches_per_epoch == 0:
                    current_epoch = current_step // num_batches_per_epoch
                    logger.info("✔︎ Epoch {0} has finished!".format(current_epoch))

    logger.info("✔︎ Done.")
def train(x_train, y_train, vocab_processor, x_dev, y_dev, x_real_len_train,
          x_real_len_dev, sorted_label):
    # Training
    # ==================================================

    with tf.Graph().as_default():
        session_conf = tf.ConfigProto(
            allow_soft_placement=FLAGS.allow_soft_placement,
            log_device_placement=FLAGS.log_device_placement)
        sess = tf.Session(config=session_conf)
        with sess.as_default():
            if FLAGS.model_type == "cnnrnn":
                obj = TextCNNRNN(sequence_length=FLAGS.max_document_length,
                                 num_classes=y_train.shape[1],
                                 vocab_size=len(vocab_processor.vocabulary_),
                                 hidden_unit=FLAGS.hidden_unit,
                                 embedding_size=FLAGS.embedding_dim,
                                 filter_sizes=list(
                                     map(int, FLAGS.filter_sizes.split(","))),
                                 num_filters=FLAGS.num_filters,
                                 l2_reg_lambda=FLAGS.l2_reg_lambda)
            elif FLAGS.model_type == "rnncnn":
                obj = TextRNNCNN(sequence_length=FLAGS.max_document_length,
                                 num_classes=y_train.shape[1],
                                 vocab_size=len(vocab_processor.vocabulary_),
                                 hidden_unit=FLAGS.hidden_unit,
                                 embedding_size=FLAGS.embedding_dim,
                                 filter_sizes=list(
                                     map(int, FLAGS.filter_sizes.split(","))),
                                 num_filters=FLAGS.num_filters,
                                 l2_reg_lambda=FLAGS.l2_reg_lambda)
            elif FLAGS.model_type == "rnnandcnn":
                obj = TextRNNandCNN(
                    sequence_length=FLAGS.max_document_length,
                    num_classes=y_train.shape[1],
                    vocab_size=len(vocab_processor.vocabulary_),
                    hidden_unit=FLAGS.hidden_unit,
                    embedding_size=FLAGS.embedding_dim,
                    filter_sizes=list(map(int, FLAGS.filter_sizes.split(","))),
                    num_filters=FLAGS.num_filters,
                    l2_reg_lambda=FLAGS.l2_reg_lambda)
            elif FLAGS.model_type == "rnn":
                obj = TextRNN(sequence_length=FLAGS.max_document_length,
                              num_classes=y_train.shape[1],
                              vocab_size=len(vocab_processor.vocabulary_),
                              hidden_unit=FLAGS.hidden_unit,
                              embedding_size=FLAGS.embedding_dim,
                              l2_reg_lambda=FLAGS.l2_reg_lambda)
            elif FLAGS.model_type == "dan":
                obj = TextDAN(sequence_length=FLAGS.max_document_length,
                              num_classes=y_train.shape[1],
                              vocab_size=len(vocab_processor.vocabulary_),
                              embedding_size=FLAGS.embedding_dim,
                              filter_sizes=list(
                                  map(int, FLAGS.filter_sizes.split(","))),
                              num_filters=FLAGS.num_filters,
                              l2_reg_lambda=FLAGS.l2_reg_lambda)
            elif FLAGS.model_type == "attn_cnn":
                obj = TextAttnCNN(sequence_length=FLAGS.max_document_length,
                                  num_classes=y_train.shape[1],
                                  vocab_size=len(vocab_processor.vocabulary_),
                                  embedding_size=FLAGS.embedding_dim,
                                  num_heads=FLAGS.num_heads,
                                  filter_sizes=list(
                                      map(int, FLAGS.filter_sizes.split(","))),
                                  num_filters=FLAGS.num_filters,
                                  l2_reg_lambda=FLAGS.l2_reg_lambda)
            elif FLAGS.model_type == "dpcnn":
                obj = TextDPCNN(sequence_length=FLAGS.max_document_length,
                                num_classes=y_train.shape[1],
                                vocab_size=len(vocab_processor.vocabulary_),
                                embedding_size=FLAGS.embedding_dim,
                                filter_sizes=list(
                                    map(int, FLAGS.filter_sizes.split(","))),
                                num_filters=FLAGS.num_filters,
                                num_blocks=FLAGS.num_blocks,
                                l2_reg_lambda=FLAGS.l2_reg_lambda)
            else:
                obj = TextCNN(sequence_length=FLAGS.max_document_length,
                              num_classes=y_train.shape[1],
                              vocab_size=len(vocab_processor.vocabulary_),
                              embedding_size=FLAGS.embedding_dim,
                              filter_sizes=list(
                                  map(int, FLAGS.filter_sizes.split(","))),
                              num_filters=FLAGS.num_filters,
                              l2_reg_lambda=FLAGS.l2_reg_lambda)

            # Define Training procedure
            global_step = tf.Variable(0, name="global_step", trainable=False)
            optimizer = tf.train.AdamOptimizer(1e-3)
            update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
            with tf.control_dependencies(update_ops):
                grads_and_vars = optimizer.compute_gradients(obj.loss)
                train_op = optimizer.apply_gradients(grads_and_vars,
                                                     global_step=global_step)

            # Keep track of gradient values and sparsity (optional)
            grad_summaries = []
            for g, v in grads_and_vars:
                if g is not None:
                    grad_hist_summary = tf.summary.histogram(
                        "{}/grad/hist".format(v.name), g)
                    sparsity_summary = tf.summary.scalar(
                        "{}/grad/sparsity".format(v.name),
                        tf.nn.zero_fraction(g))
                    grad_summaries.append(grad_hist_summary)
                    grad_summaries.append(sparsity_summary)
            grad_summaries_merged = tf.summary.merge(grad_summaries)

            # Output directory for models and summaries
            timestamp = str(int(time.time()))
            out_dir = os.path.abspath(
                os.path.join(os.path.curdir, "runs", FLAGS.model_version))
            print("Writing to {}\n".format(out_dir))

            # Summaries for loss and accuracy
            loss_summary = tf.summary.scalar("loss", obj.loss)
            acc_summary = tf.summary.scalar("accuracy", obj.accuracy)

            # Train Summaries
            train_summary_op = tf.summary.merge(
                [loss_summary, acc_summary, grad_summaries_merged])
            train_summary_dir = os.path.join(out_dir, "summaries", "train")
            train_summary_writer = tf.summary.FileWriter(
                train_summary_dir, sess.graph)

            # Dev summaries
            dev_summary_op = tf.summary.merge([loss_summary, acc_summary])
            dev_summary_dir = os.path.join(out_dir, "summaries", "dev")
            dev_summary_writer = tf.summary.FileWriter(dev_summary_dir,
                                                       sess.graph)

            # Checkpoint directory. Tensorflow assumes this directory already exists so we need to create it
            checkpoint_dir = os.path.abspath(
                os.path.join(out_dir, "checkpoints"))
            checkpoint_prefix = os.path.join(checkpoint_dir, "model")
            if not os.path.exists(checkpoint_dir):
                os.makedirs(checkpoint_dir)
            saver = tf.train.Saver(tf.global_variables(),
                                   max_to_keep=FLAGS.num_checkpoints)

            # Save train params since eval.py needs them
            trained_dir = os.path.abspath(
                os.path.join(out_dir, "trained_results"))
            if not os.path.exists(trained_dir):
                os.makedirs(trained_dir)
            with open(trained_dir + '/sorted_label.json', 'w') as outfile:
                json.dump(sorted_label, outfile, indent=4, ensure_ascii=False)
            with open(trained_dir + '/train_params.json', 'w') as outfile:
                json.dump({"max_document_length": FLAGS.max_document_length},
                          outfile,
                          indent=4,
                          ensure_ascii=False)

            # Write vocabulary
            vocab_processor.save(os.path.join(out_dir, "vocab"))

            # Initialize all variables
            sess.run(tf.global_variables_initializer())

            def train_step(x_batch, y_batch, x_real_len_batch):
                """
                A single training step
                """
                if FLAGS.model_type == "cnn" or FLAGS.model_type == "dan" or FLAGS.model_type == "attn_cnn" or FLAGS.model_type == "dpcnn":
                    feed_dict = {
                        obj.input_x: x_batch,
                        obj.input_y: y_batch,
                        obj.dropout_keep_prob: FLAGS.dropout_keep_prob,
                        obj.is_training: True
                    }
                else:
                    feed_dict = {
                        obj.input_x: x_batch,
                        obj.input_y: y_batch,
                        obj.dropout_keep_prob: FLAGS.dropout_keep_prob,
                        obj.real_len: x_real_len_batch
                    }
                _, step, summaries, loss, accuracy = sess.run([
                    train_op, global_step, train_summary_op, obj.loss,
                    obj.accuracy
                ], feed_dict)
                time_str = datetime.datetime.now().isoformat()
                print("{}: step {}, loss {:g}, acc {:g}".format(
                    time_str, step, loss, accuracy))
                train_summary_writer.add_summary(summaries, step)

            def overfit(dev_loss, eva_num=3):
                n = len(dev_loss)
                if n < eva_num:
                    return False
                for i in xrange(n - eva_num + 1, n):
                    if dev_loss[i] > dev_loss[i - 1]:
                        return False
                return True

            def dev_step(x_batch, y_batch, x_real_len_batch, writer=None):
                """
                Evaluates model on a dev set
                """
                dev_batches = data_helpers.batch_iter(list(
                    zip(x_batch, y_batch, x_real_len_batch)),
                                                      FLAGS.batch_size,
                                                      1,
                                                      shuffle=False)
                all_pred = []
                correct_total_num = 0
                for batch in dev_batches:
                    x_dev_batch, y_dev_batch, x_real_len_dev_batch = zip(
                        *batch)
                    if FLAGS.model_type == "cnn" or FLAGS.model_type == "dan" or FLAGS.model_type == "attn_cnn" or FLAGS.model_type == "dpcnn":
                        feed_dict = {
                            obj.input_x: x_dev_batch,
                            obj.input_y: y_dev_batch,
                            obj.dropout_keep_prob: 1.0,
                            obj.is_training: False
                        }
                    else:
                        feed_dict = {
                            obj.input_x: x_dev_batch,
                            obj.input_y: y_dev_batch,
                            obj.dropout_keep_prob: 1.0,
                            obj.real_len: x_real_len_dev_batch
                        }

                    step, summaries, pred, correct_pred_num = sess.run([
                        global_step, dev_summary_op, obj.predictions,
                        obj.correct_pred_num
                    ], feed_dict)
                    all_pred = np.concatenate([all_pred, pred])
                    correct_total_num += correct_pred_num
                    if writer:
                        writer.add_summary(summaries, step)
                dev_acc = 1.0 * correct_total_num / len(y_batch)
                print("right_sample {}, dev_sample {}, dev_acc {:g}".format(
                    correct_total_num, len(y_batch), dev_acc))
                return dev_acc

            # Generate batches
            batches = data_helpers.batch_iter(
                list(zip(x_train, y_train, x_real_len_train)),
                FLAGS.batch_size, FLAGS.num_epochs)
            # Training loop. For each batch...
            dev_acc = []
            for batch in batches:
                x_batch, y_batch, x_real_len_batch = zip(*batch)
                train_step(x_batch, y_batch, x_real_len_batch)
                current_step = tf.train.global_step(sess, global_step)
                if current_step % FLAGS.evaluate_every == 0:
                    print("\nEvaluation:", current_step)
                    cur_acc = dev_step(x_dev,
                                       y_dev,
                                       x_real_len_dev,
                                       writer=dev_summary_writer)
                    path = saver.save(sess,
                                      checkpoint_prefix,
                                      global_step=current_step)
                    print("Saved model checkpoint to {}\n".format(path))
                    dev_acc.append(cur_acc)
                    if overfit(dev_acc):
                        print("current accuracy drop and stop train..\n")
                        sys.exit(0)
                    print("")
Beispiel #26
0
def train_rnn():
    # Data Preparation
    # ==================================================

    if FLAGS.init_embedding_path is not None:
        embedding = np.load(FLAGS.init_embedding_path)
        print("Using pre-trained word embedding which shape is {}\n".format(
            embedding.shape))
        FLAGS.vocab_size = embedding.shape[0]
        FLAGS.embedding_size = embedding.shape[1]

    if FLAGS.init_model_path is not None:
        assert os.path.isdir(
            FLAGS.init_model_path), "init_model_path must be a directory\n"
        ckpt = tf.train.get_checkpoint_state(FLAGS.init_model_path)
        assert ckpt, "No checkpoint found in {}\n".format(
            FLAGS.init_model_path)
        assert ckpt.model_checkpoint_path, "No model_checkpoint_path found in checkpoint\n"

    # Create root directory
    timestamp = str(int(time.time()))
    root_dir = os.path.join(os.path.curdir, 'runs', 'textrnn',
                            'trained_result_' + timestamp)
    os.makedirs(root_dir)

    # Load data
    print("Loading data...\n")
    x, y = data_helpers.load_data(FLAGS.data_file,
                                  FLAGS.sequence_length,
                                  FLAGS.vocab_size,
                                  root_dir=root_dir)
    FLAGS.num_classes = len(y[0])

    # Split dataset
    x_train, x_test, y_train, y_test = train_test_split(
        x, y, test_size=FLAGS.test_size, stratify=y, random_state=0)
    x_val, x_test, y_val, y_test = train_test_split(x_test,
                                                    y_test,
                                                    test_size=0.5,
                                                    random_state=0)

    # Training
    # ==================================================
    with tf.Graph().as_default():
        tf_config = tf.ConfigProto(
            allow_soft_placement=FLAGS.allow_soft_placement,
            log_device_placement=FLAGS.log_device_placement)
        tf_config.gpu_options.allow_growth = FLAGS.gpu_allow_growth

        with tf.Session(config=tf_config).as_default() as sess:
            rnn = TextRNN(vocab_size=FLAGS.vocab_size,
                          embedding_size=FLAGS.embedding_size,
                          sequence_length=FLAGS.sequence_length,
                          rnn_size=FLAGS.rnn_size,
                          num_layers=FLAGS.num_layers,
                          attention_size=FLAGS.attention_size,
                          num_classes=FLAGS.num_classes,
                          learning_rate=FLAGS.learning_rate,
                          grad_clip=FLAGS.grad_clip)

            # Output directory for models and summaries
            out_dir = os.path.abspath(root_dir)
            print("Writing to {}...\n".format(out_dir))

            # Summaries for loss and accuracy
            tf.summary.scalar("loss", rnn.loss)
            tf.summary.scalar("accuracy", rnn.accuracy)
            merged_summary = tf.summary.merge_all()

            # Summaries dictionary
            train_summary_dir = os.path.join(out_dir, 'summaries', 'train')
            val_summary_dir = os.path.join(out_dir, 'summaries', 'val')
            train_summary_writer = tf.summary.FileWriter(
                train_summary_dir, sess.graph)
            val_summary_writer = tf.summary.FileWriter(val_summary_dir,
                                                       sess.graph)

            # Checkpoint directory, will not create itself
            checkpoint_dir = os.path.abspath(
                os.path.join(out_dir, 'checkpoints'))
            checkpoint_prefix = os.path.join(checkpoint_dir, 'model.ckpt')
            if not os.path.exists(checkpoint_dir):
                os.makedirs(checkpoint_dir)
            saver = tf.train.Saver(tf.global_variables(), max_to_keep=1)

            # Initialize all variables
            sess.run(tf.global_variables_initializer())

            # Using pre-trained word embedding
            if FLAGS.init_embedding_path is not None:
                sess.run(rnn.embedding.assign(embedding))
                del embedding

            # Continue training from saved model
            if FLAGS.init_model_path is not None:
                saver.restore(sess, ckpt.model_checkpoint_path)

            # Training start
            print("Start training...\n")
            best_at_step = 0
            best_val_accuracy = 0
            for epoch in range(FLAGS.num_epochs):
                # Generate train batches
                train_batches = data_helpers.batch_iter(
                    list(zip(x_train, y_train)), FLAGS.batch_size)
                start = time.time()
                for batch in train_batches:
                    # Training model on x_batch and y_batch
                    x_batch, y_batch = zip(*batch)
                    seq_len_train = data_helpers.real_len(x_batch)
                    feed_dict = {
                        rnn.input_x: x_batch,
                        rnn.input_y: y_batch,
                        rnn.seq_len: seq_len_train,
                        rnn.keep_prob: FLAGS.dropout_keep_prob
                    }
                    _, global_step, train_summaries, train_loss, train_accuracy = sess.run(
                        [
                            rnn.train_op, rnn.global_step, merged_summary,
                            rnn.loss, rnn.accuracy
                        ],
                        feed_dict=feed_dict)

                    # Evaluates model on val set
                    if global_step % FLAGS.evaluate_every == 0:
                        end = time.time()
                        train_summary_writer.add_summary(
                            train_summaries, global_step)
                        seq_len_val = data_helpers.real_len(x_val)
                        feed_dict = {
                            rnn.input_x: x_val,
                            rnn.input_y: y_val,
                            rnn.seq_len: seq_len_val,
                            rnn.keep_prob: 1.0
                        }
                        val_summaries, val_loss, val_accuracy = sess.run(
                            [merged_summary, rnn.loss, rnn.accuracy],
                            feed_dict=feed_dict)
                        val_summary_writer.add_summary(val_summaries,
                                                       global_step)
                        print(
                            "Epoch: {}, global step: {}, training speed: {:.3f}sec/batch"
                            .format(epoch, global_step,
                                    (end - start) / FLAGS.evaluate_every))
                        print(
                            "train loss: {:.3f}, train accuracy: {:.3f}, val loss: {:.3f}, val accuracy: {:.3f}\n"
                            .format(train_loss, train_accuracy, val_loss,
                                    val_accuracy))
                        # If improved, save the model
                        if val_accuracy > best_val_accuracy:
                            print(
                                "Get a best val accuracy at step {}, model saving...\n"
                                .format(global_step))
                            saver.save(sess,
                                       checkpoint_prefix,
                                       global_step=global_step)
                            best_val_accuracy = val_accuracy
                            best_at_step = global_step
                        start = time.time()

            # Rename the checkpoint
            best_model_prefix = checkpoint_prefix + '-' + str(best_at_step)
            os.rename(best_model_prefix + '.index',
                      os.path.join(checkpoint_dir, 'best_model.index'))
            os.rename(best_model_prefix + '.meta',
                      os.path.join(checkpoint_dir, 'best_model.meta'))
            os.rename(
                best_model_prefix + '.data-00000-of-00001',
                os.path.join(checkpoint_dir, 'best_model.data-00000-of-00001'))

            # Testing on test set
            print(
                "\nTraining complete, testing the best model on test set...\n")
            saver.restore(sess, os.path.join(checkpoint_dir, 'best_model'))
            seq_len_test = data_helpers.real_len(x_test)
            feed_dict = {
                rnn.input_x: x_test,
                rnn.input_y: y_test,
                rnn.seq_len: seq_len_test,
                rnn.keep_prob: 1.0
            }
            y_logits, test_accuracy = sess.run([rnn.logits, rnn.accuracy],
                                               feed_dict=feed_dict)
            print("Testing Accuracy: {:.3f}\n".format(test_accuracy))
            label_transformer = joblib.load(
                os.path.join(out_dir, 'label_transformer.pkl'))
            y_test_original = label_transformer.inverse_transform(y_test)
            y_logits_original = label_transformer.inverse_transform(y_logits)
            print("Precision, Recall and F1-Score:\n\n",
                  classification_report(y_test_original, y_logits_original))

            # Save parameters
            print("Parameters saving...\n")
            params = {}
            for param, value in FLAGS.__flags.items():
                params[param] = value
            with open(os.path.join(out_dir, 'parameters.json'),
                      'w') as outfile:
                json.dump(params,
                          outfile,
                          indent=4,
                          sort_keys=True,
                          ensure_ascii=False)

            # Save word embedding
            print("Word embedding saving...\n")
            np.save(os.path.join(out_dir, 'embedding.npy'),
                    sess.run(rnn.embedding))
Beispiel #27
0
                            lambda x: int(x)
                            if x != "inf" else max_document_length,
                            FLAGS.filter_sizes_char.split(","))),
                )
            elif FLAGS.architecture == 7:
                neural_net = TextRNN(
                    sequence_length=max_document_length,
                    num_classes=y_train.shape[1],
                    vocab_size=len(vocab_processor.vocabulary_),
                    embedding_size=FLAGS.embedding_dim,
                    embedding_dim_char=FLAGS.embedding_dim_char,
                    num_filters_char=FLAGS.num_filters_char,
                    num_neurons_fc=FLAGS.num_neurons_fc,
                    num_neurons_fc_2=FLAGS.num_neurons_fc_2,
                    margin=FLAGS.margin,
                    rnn_num_layers=FLAGS.rnn_num_layers,
                    loss_function=FLAGS.loss_function,
                    rnn_hidden_size=FLAGS.rnn_hidden_size,
                    max_token_length=max_token_length,
                    char_vocab_size=len(char_vocabulary),
                    filter_sizes_char=list(
                        map(
                            lambda x: int(x)
                            if x != "inf" else max_document_length,
                            FLAGS.filter_sizes_char.split(","))),
                )

            # Define Training procedure
            global_step = tf.Variable(0, name="global_step", trainable=False)
            # optimizer = tf.train.AdagradOptimizer(1e-3)
            optimizer = tf.train.AdamOptimizer(FLAGS.learning_rate)
Beispiel #28
0
            sess1 = tf.Session(config=session_conf)
            sess2 = tf.Session(config=session_conf)

            with sess1.as_default() and sess2.as_default():

                tf.set_random_seed(seed_value + number_of_network)
                rnn = TextRNN(
                    sequence_length=x_train.shape[1],
                    num_classes=y_train.shape[1],
                    vocab_size=len(vocab_processor.vocabulary_),
                    embedding_size=embedding_dimension,
                    l2_reg_lambda=FLAGS.l2_reg_lambda,
                    weights=class_weights_for_cross_entropy,
                    rel_pos_embedding_size=FLAGS.rel_pos_embedding_size,
                    rel_pos_cardinality=relative_positions_cardinality,
                    lstm_units=FLAGS.lstm_units,
                    pos_tags_embedding_size=FLAGS.pos_tags_embedding_size,
                    pos_tags_cardinality=pos_tags_cardinality,
                    with_eye_tracking=cfg["features"]["gaze"],
                    et_features_size=et.shape[2],
                    et_number_of_bins=cfg["features"]["binned"],
                    et_embedding_dimension=FLAGS.et_embedding_dimension,
                    with_eeg=cfg["features"]["eeg"],
                    eeg_features_size=eeg.shape[2],
                    use_normalization_layer=True)

                cnn = TextCNN(
                    sequence_length=x_train.shape[1],
                    num_classes=y_train.shape[1],
                    vocab_size=len(vocab_processor.vocabulary_),
                    embedding_size=embedding_dimension,