# Checkpoint directory. Tensorflow assumes this directory already exists so we need to create it
        checkpoint_dir = os.path.abspath(os.path.join(out_dir, "checkpoints"))
        checkpoint_prefix = os.path.join(checkpoint_dir, "model")
        if not os.path.exists(checkpoint_dir): # 如果路径不存在就创建一个
            os.makedirs(checkpoint_dir)
        saver = tf.train.Saver(tf.global_variables(), max_to_keep=FLAGS.num_checkpoints)

        # Write vocabulary
        vocab_processor.save(os.path.join(checkpoint_dir, "vocab"))

        # Initialize all variables
        sess.run(tf.global_variables_initializer())
        if not FLAGS.random:
            vocabulary = vocab_processor.vocabulary_
            initW = None
            initW = data_helpers.load_embedding_vectors_word2vec(vocabulary, "./data/GoogleNews-vectors-negative300.bin", True)
            print("word2vec file has been loaded")
            # initW = data_helpers.load_embedding_vectors_glove(vocabulary,
            #                                                   "./data/glove.6B.300d.txt", FLAGS.embedding_dim)
            # print("glove file has been loaded")
            sess.run(rnn.W.assign(initW))

        def train_step(x_batch, y_batch):
            """
            A single training step
            """
            feed_dict = { # 给placeholder喂数据
              rnn.input_x: x_batch,
              rnn.input_y: y_batch,
              rnn.dropout_keep_prob: FLAGS.dropout_keep_prob,
            }
Esempio n. 2
0
        # Write vocabulary
        vocab_processor.save(os.path.join(out_dir, "vocab"))

        # Initialize all variables
        #from here to line 203 added
        sess.run(tf.global_variables_initializer())
        if FLAGS.enable_word_embeddings and cfg['word_embeddings'][
                'default'] is not None:
            vocabulary = vocab_processor.vocabulary_
            initW = None
            if embedding_name == 'word2vec':
                # load embedding vectors from the word2vec
                print("Load word2vec file {}".format(
                    cfg['word_embeddings']['word2vec']['path']))
                initW = data_helpers.load_embedding_vectors_word2vec(
                    vocabulary, cfg['word_embeddings']['word2vec']['path'],
                    cfg['word_embeddings']['word2vec']['binary'])
                print("word2vec file has been loaded")
            elif embedding_name == 'glove':
                # load embedding vectors from the glove
                print("Load glove file {}".format(
                    cfg['word_embeddings']['glove']['path']))
                initW = data_helpers.load_embedding_vectors_glove(
                    vocabulary, cfg['word_embeddings']['glove']['path'],
                    embedding_dimension)
                print("glove file has been loaded\n")
            sess.run(cnn.W.assign(initW))

        # learning rate was added as one of the parameters
        def train_step(x_batch, y_batch, learning_rate):
            """
Esempio n. 3
0
        if not os.path.exists(checkpoint_dir):
            os.makedirs(checkpoint_dir)
        saver = tf.train.Saver(tf.global_variables(),
                               max_to_keep=FLAGS.num_checkpoints)

        # Write vocabulary
        vocab_processor.save(os.path.join(out_dir, "vocab"))

        # Initialize all variables
        sess.run(tf.global_variables_initializer())

        # Load word2vec embeddings (Uncomment for word2vec stuff, if it actually works)
        if FLAGS.embedding_path:
            print('Loading word2vec embeddings...')
            vocabulary = vocab_processor.vocabulary_
            initW = data_helpers.load_embedding_vectors_word2vec(
                vocabulary, FLAGS.embedding_path, True)
            sess.run(cnn.W.assign(initW))
        else:
            warnings.warn(
                'Pre-trained word vectors are not being used for this session.'
            )

        def train_step(x_batch, y_batch):
            """
            A single training step
            """
            feed_dict = {
                cnn.input_x: x_batch,
                cnn.input_y: y_batch,
                cnn.dropout_keep_prob: FLAGS.dropout_keep_prob
            }
                      num_filters=num_filters,
                      l2_reg_lambda=l2_reg_lambda)

        # Define Training procedure
        global_step = tf.Variable(0, name="global_step", trainable=False)
        optimizer = tf.train.AdamOptimizer(1e-3)
        grads_and_vars = optimizer.compute_gradients(cnn.loss)
        train_op = optimizer.apply_gradients(grads_and_vars,
                                             global_step=global_step)

        # Initialize all variables
        sess.run(tf.global_variables_initializer())

        initW = None
        # load embedding vectors from the word2vec
        initW = data_helpers.load_embedding_vectors_word2vec(
            vocabulary, word2vec_model_path, word2vec_binary)
        print("word2vec file has been loaded")
        sess.run(cnn.W.assign(initW))

        def train_step(x_batch, y_batch):

            feed_dict = {
                cnn.input_x: x_batch,
                cnn.input_y: y_batch,
                cnn.dropout_keep_prob: dropout_keep_prob
            }
            _, step, loss, accuracy = sess.run(
                [train_op, global_step, cnn.loss, cnn.accuracy], feed_dict)
            time_str = datetime.datetime.now().isoformat()
            print("{}: step {}, loss {:g}, acc {:g}".format(
                time_str, step, loss, accuracy))
Esempio n. 5
0
def train(x_train, y_train, vocab_processor, x_dev, y_dev):
    # Training
    # ==================================================

    with tf.Graph().as_default():
        session_conf = tf.ConfigProto(
            allow_soft_placement=FLAGS.allow_soft_placement,
            log_device_placement=FLAGS.log_device_placement)
        sess = tf.Session(config=session_conf)
        with sess.as_default():
            cnn = TextCNN(
                sequence_length=x_train.shape[1],  #56
                num_classes=y_train.shape[1],  #2
                vocab_size=len(vocab_processor.vocabulary_),  #18765?  18758 √
                embedding_size=FLAGS.embedding_dim,  #128
                filter_sizes=list(map(
                    int, FLAGS.filter_sizes.split(","))),  #[3,4,5]
                num_filters=FLAGS.num_filters,  #128
                l2_reg_lambda=FLAGS.l2_reg_lambda)  #0

            # Define Training procedure
            global_step = tf.Variable(0, name="global_step", trainable=False)
            optimizer = tf.train.AdamOptimizer(1e-3)
            grads_and_vars = optimizer.compute_gradients(cnn.loss)
            train_op = optimizer.apply_gradients(grads_and_vars,
                                                 global_step=global_step)

            # Keep track of gradient values and sparsity (optional)
            grad_summaries = []
            for g, v in grads_and_vars:
                if g is not None:
                    grad_hist_summary = tf.summary.histogram(
                        "{}/grad/hist".format(v.name), g)
                    sparsity_summary = tf.summary.scalar(
                        "{}/grad/sparsity".format(v.name),
                        tf.nn.zero_fraction(g))
                    grad_summaries.append(grad_hist_summary)
                    grad_summaries.append(sparsity_summary)
            grad_summaries_merged = tf.summary.merge(grad_summaries)

            # Output directory for models and summaries
            timestamp = str(int(time.time()))
            out_dir = os.path.abspath(
                os.path.join(os.path.curdir, "runs", timestamp))
            print("Writing to {}\n".format(out_dir))

            # Summaries for loss and accuracy
            loss_summary = tf.summary.scalar("loss", cnn.loss)
            acc_summary = tf.summary.scalar("accuracy", cnn.accuracy)

            # Train Summaries
            train_summary_op = tf.summary.merge(
                [loss_summary, acc_summary, grad_summaries_merged])
            train_summary_dir = os.path.join(out_dir, "summaries", "train")
            train_summary_writer = tf.summary.FileWriter(
                train_summary_dir, sess.graph)

            # Dev summaries
            dev_summary_op = tf.summary.merge([loss_summary, acc_summary])
            dev_summary_dir = os.path.join(out_dir, "summaries", "dev")
            dev_summary_writer = tf.summary.FileWriter(dev_summary_dir,
                                                       sess.graph)

            # Checkpoint directory. Tensorflow assumes this directory already exists so we need to create it
            checkpoint_dir = os.path.abspath(
                os.path.join(out_dir, "checkpoints"))
            checkpoint_prefix = os.path.join(checkpoint_dir, "model")
            if not os.path.exists(checkpoint_dir):
                os.makedirs(checkpoint_dir)
            saver = tf.train.Saver(tf.global_variables(),
                                   max_to_keep=FLAGS.num_checkpoints)

            # Write vocabulary
            vocab_processor.save(os.path.join(out_dir, "vocab"))

            # Initialize all variables
            sess.run(tf.global_variables_initializer())

            vocabulary = vocab_processor.vocabulary_
            initW = None
            initW = data_helpers.load_embedding_vectors_word2vec(
                vocabulary, './GoogleNews-vectors-negative300.bin', True)
            print("word2vec file has been loaded")
            sess.run(cnn.W.assign(initW))

            def train_step(x_batch, y_batch):
                """
                A single training step
                """
                feed_dict = {
                    cnn.input_x: x_batch,
                    cnn.input_y: y_batch,
                    cnn.dropout_keep_prob: FLAGS.dropout_keep_prob  #0.5
                }
                _, step, summaries, loss, accuracy = sess.run([
                    train_op, global_step, train_summary_op, cnn.loss,
                    cnn.accuracy
                ], feed_dict)
                time_str = datetime.datetime.now().isoformat()
                print("{}: step {}, loss {:g}, acc {:g}".format(
                    time_str, step, loss, accuracy))
                train_summary_writer.add_summary(summaries, step)

            def dev_step(x_batch, y_batch, writer=None):
                """
                Evaluates model on a dev set
                """
                feed_dict = {
                    cnn.input_x: x_batch,
                    cnn.input_y: y_batch,
                    cnn.dropout_keep_prob: 1.0
                }
                step, summaries, loss, accuracy = sess.run(
                    [global_step, dev_summary_op, cnn.loss, cnn.accuracy],
                    feed_dict)
                time_str = datetime.datetime.now().isoformat()
                print("{}: step {}, loss {:g}, acc {:g}".format(
                    time_str, step, loss, accuracy))
                if writer:
                    writer.add_summary(summaries, step)

            # Generate batches
            batches = data_helpers.batch_iter(list(zip(x_train, y_train)),
                                              FLAGS.batch_size,
                                              FLAGS.num_epochs)
            # Training loop. For each batch...
            for batch in batches:
                #print (batch[0])
                x_batch, y_batch = zip(
                    *batch)  #x_batch = 64 * 56  y_batch = 64 * 2
                train_step(x_batch, y_batch)
                current_step = tf.train.global_step(sess, global_step)
                if current_step % FLAGS.evaluate_every == 0:  #100次
                    print("\nEvaluation:")
                    dev_step(x_dev, y_dev, writer=dev_summary_writer)
                    print("")
                if current_step % FLAGS.checkpoint_every == 0:  #100次
                    path = saver.save(sess,
                                      checkpoint_prefix,
                                      global_step=current_step)
                    print("Saved model checkpoint to {}\n".format(path))
Esempio n. 6
0
    def train(self):
        # Training
        # ==================================================
        if self.x_train is None:
            self.preprocess()

        with tf.Graph().as_default():
            session_conf = tf.ConfigProto(
                allow_soft_placement=FLAGS.allow_soft_placement,
                log_device_placement=FLAGS.log_device_placement)
            sess = tf.Session(config=session_conf)
            with sess.as_default():
                cnn = TextCNN(sequence_length=self.x_train.shape[1],
                              num_classes=self.y_train.shape[1],
                              vocab_size=len(self.vocab_processor.vocabulary_),
                              embedding_size=self.embedding_dimension,
                              filter_sizes=list(
                                  map(int, FLAGS.filter_sizes.split(","))),
                              num_filters=FLAGS.num_filters,
                              l2_reg_lambda=FLAGS.l2_reg_lambda)

                # Define Training procedure
                print("Define Training procedure")
                global_step = tf.Variable(0,
                                          name="global_step",
                                          trainable=False)
                optimizer = tf.train.GradientDescentOptimizer(
                    cnn.learning_rate)
                grads_and_vars = optimizer.compute_gradients(cnn.loss)
                train_op = optimizer.apply_gradients(grads_and_vars,
                                                     global_step=global_step)

                # Keep track of gradient values and sparsity (optional)
                print("Keep track of gradient values and sparsity (optional)")
                grad_summaries = []
                for g, v in grads_and_vars:
                    if g is not None:
                        grad_hist_summary = tf.summary.histogram(
                            "{}/grad/hist".format(v.name), g)
                        sparsity_summary = tf.summary.scalar(
                            "{}/grad/sparsity".format(v.name),
                            tf.nn.zero_fraction(g))
                        grad_summaries.append(grad_hist_summary)
                        grad_summaries.append(sparsity_summary)
                grad_summaries_merged = tf.summary.merge(grad_summaries)

                # Output directory for models and summaries
                print("Output directory for models and summaries")
                out_dir = os.path.abspath(
                    os.path.join(os.path.curdir, "runs", self.dataset_name,
                                 str(self.embedding_name)))
                print("Writing to {}\n".format(out_dir))

                # Summaries for loss and accuracy
                loss_summary = tf.summary.scalar("loss", cnn.loss)
                acc_summary = tf.summary.scalar("accuracy", cnn.accuracy)

                # Train Summaries
                train_summary_op = tf.summary.merge(
                    [loss_summary, acc_summary, grad_summaries_merged])
                train_summary_dir = os.path.join(out_dir, "summaries", "train")
                train_summary_writer = tf.summary.FileWriter(
                    train_summary_dir, sess.graph)

                # Dev summaries
                dev_summary_op = tf.summary.merge([loss_summary, acc_summary])
                dev_summary_dir = os.path.join(out_dir, "summaries", "dev")
                dev_summary_writer = tf.summary.FileWriter(
                    dev_summary_dir, sess.graph)

                # Checkpoint directory. Tensorflow assumes this directory already exists so we need to create it
                checkpoint_dir = os.path.abspath(
                    os.path.join(out_dir, "checkpoints"))
                checkpoint_prefix = os.path.join(checkpoint_dir, "model")
                if not os.path.exists(checkpoint_dir):
                    os.makedirs(checkpoint_dir)
                saver = tf.train.Saver(tf.global_variables(),
                                       max_to_keep=FLAGS.num_checkpoints)

                # Write vocabulary
                print("Write vocabulary")
                self.vocab_processor.save(os.path.join(out_dir, "vocab"))

                # Initialize all variables
                print("Initialize all variables")
                sess.run(tf.global_variables_initializer())
                if self.embedding_name is not None and self.cfg is not None:
                    vocabulary = self.vocab_processor.vocabulary_
                    initW = None
                    if self.embedding_name == 'word2vec':
                        # load embedding vectors from the word2vec
                        print("Load word2vec file {}".format(
                            self.cfg['word_embeddings']['word2vec']['path']))
                        initW = data_helpers.load_embedding_vectors_word2vec(
                            vocabulary,
                            self.cfg['word_embeddings']['word2vec']['path'],
                            self.cfg['word_embeddings']['word2vec']['binary'])
                        print("word2vec file has been loaded")
                    elif self.embedding_name == 'glove':
                        # load embedding vectors from the glove
                        print("Load glove file {}".format(
                            self.cfg['word_embeddings']['glove']['path']))
                        initW = data_helpers.load_embedding_vectors_glove(
                            vocabulary,
                            self.cfg['word_embeddings']['glove']['path'],
                            self.embedding_dimension)
                        print("glove file has been loaded\n")

                    if initW is not None:
                        sess.run(cnn.W.assign(initW))
                    else:
                        print(
                            "HIGH ALERT - cnn.W not assigned. initW is None\n")

                def train_step(x_batch, y_batch, learning_rate):
                    """
                    A single training step
                    """
                    feed_dict = {
                        cnn.input_x: x_batch,
                        cnn.input_y: y_batch,
                        cnn.dropout_keep_prob: FLAGS.dropout_keep_prob,
                        cnn.learning_rate: learning_rate
                    }
                    _, step, summaries, loss, accuracy = sess.run([
                        train_op, global_step, train_summary_op, cnn.loss,
                        cnn.accuracy
                    ], feed_dict)
                    time_str = datetime.datetime.now().isoformat()
                    if step % 50 == 0:
                        print(
                            "{}: step {}, loss {:g}, acc {:g}, learning_rate {:g}"
                            .format(time_str, step, loss, accuracy,
                                    learning_rate))
                    train_summary_writer.add_summary(summaries, step)

                def dev_step(x_batch, y_batch, writer=None):
                    """
                    Evaluates model on a dev set
                    """
                    feed_dict = {
                        cnn.input_x: x_batch,
                        cnn.input_y: y_batch,
                        cnn.dropout_keep_prob: 1.0
                    }
                    step, summaries, loss, accuracy = sess.run(
                        [global_step, dev_summary_op, cnn.loss, cnn.accuracy],
                        feed_dict)
                    time_str = datetime.datetime.now().isoformat()
                    print("{}: step {}, loss {:g}, acc {:g}".format(
                        time_str, step, loss, accuracy))
                    if writer:
                        writer.add_summary(summaries, step)

                def get_learning_rate(decay_speed, counter):
                    # # It uses dynamic learning rate with a high value at the beginning to speed up the training
                    # max_learning_rate = 0.005
                    # min_learning_rate = 0.0001
                    # learning_rate = min_learning_rate + (max_learning_rate - min_learning_rate) * 0.25 * math.exp(
                    #     -counter / decay_speed)
                    # # print("decay speed: {}. counter: {}. learning_rate: {}".format(decay_speed, counter, learning_rate))
                    # return learning_rate
                    return 0.0005

                # Generate batches
                print("Generate batches")
                batches = data_helpers.batch_iter(
                    list(zip(self.x_train, self.y_train)), FLAGS.batch_size,
                    FLAGS.num_epochs)

                decay_speed = FLAGS.decay_coefficient * len(
                    self.y_train) / FLAGS.batch_size

                # Training loop. For each batch...
                print("Training loop. For each batch...")
                counter = 0
                for batch in batches:
                    learning_rate = get_learning_rate(decay_speed, counter)
                    counter += 1
                    x_batch, y_batch = zip(*batch)
                    train_step(x_batch, y_batch, learning_rate)
                    current_step = tf.train.global_step(sess, global_step)
                    if current_step % FLAGS.evaluate_every == 0:
                        print("\nEvaluation:")
                        dev_step(self.x_eval,
                                 self.y_eval,
                                 writer=dev_summary_writer)
                        print()
                    if current_step % FLAGS.checkpoint_every == 0:
                        path = saver.save(sess,
                                          checkpoint_prefix,
                                          global_step=current_step)
                        print("\tSaved model checkpoint to {}\n".format(path))

                print("End training. counter: {}. batch size: {}\n".format(
                    counter, FLAGS.batch_size))
def main():
    import time
    start_time = time.time()

    FLAGS = flagClass()

    with open("config.yml", 'r') as ymlfile:
        cfg = yaml.load(ymlfile)

    dataset_name = cfg["datasets"]["default"]
    if FLAGS.enable_word_embeddings and cfg['word_embeddings'][
            'default'] is not None:
        embedding_name = cfg['word_embeddings']['default']
        embedding_dimension = cfg['word_embeddings'][embedding_name][
            'dimension']
    else:
        embedding_dimension = FLAGS.embedding_dim

    # Data Preparation
    # ==================================================

    # Load data

    print("Loading data...")
    datasets = None
    if dataset_name == "mrpolarity":
        datasets = data_helpers.get_datasets_mrpolarity(
            cfg["datasets"][dataset_name]["positive_data_file"]["path"],
            cfg["datasets"][dataset_name]["negative_data_file"]["path"])
    elif dataset_name == 'spamham':
        datasets = data_helpers.get_datasets_mrpolarity(
            cfg["datasets"][dataset_name]["spam_file"]["path"],
            cfg["datasets"][dataset_name]["ham_file"]["path"])
    elif dataset_name == "20newsgroup":
        datasets = data_helpers.get_datasets_20newsgroup(
            subset="train",
            categories=cfg["datasets"][dataset_name]["categories"],
            shuffle=cfg["datasets"][dataset_name]["shuffle"],
            random_state=cfg["datasets"][dataset_name]["random_state"])
    elif dataset_name == "dbpedia":
        datasets = data_helpers.get_datasets_dbpedia(
            cfg["datasets"][dataset_name]["train_file"]["path"],
            cfg["datasets"][dataset_name]["train_file"]["limit"])
    elif dataset_name == "email":
        datasets = data_helpers.get_datasets_email(
            container_path=cfg["datasets"][dataset_name]["container_path"],
            categories=cfg["datasets"][dataset_name]["categories"],
            shuffle=cfg["datasets"][dataset_name]["shuffle"],
            random_state=cfg["datasets"][dataset_name]["random_state"])
    elif dataset_name == "localdata":
        datasets = data_helpers.get_datasets_localdata(
            container_path=cfg["datasets"][dataset_name]["container_path"],
            categories=cfg["datasets"][dataset_name]["categories"],
            shuffle=cfg["datasets"][dataset_name]["shuffle"],
            random_state=cfg["datasets"][dataset_name]["random_state"])
    x_text, y = data_helpers.load_data_labels(datasets)

    # Build vocabulary

    # To limit memory usage, you can cut off input text to first 40 words
    # Other research has shown that first 40 words in text (IMDB dataset?)
    # were representative of the content of the sentence for classification
    # purposes - Comment out one of the two lines below

    # max_document_length = max([len(x.split(" ")) for x in x_text])
    max_document_length = 40  # read up to 40 words from each sentence
    vocab_processor = learn.preprocessing.VocabularyProcessor(
        max_document_length)
    x = np.array(list(vocab_processor.fit_transform(x_text)))

    # Randomly shuffle data
    np.random.seed(10)
    shuffle_indices = np.random.permutation(np.arange(len(y)))
    x_shuffled = x[shuffle_indices]
    y_shuffled = y[shuffle_indices]

    # Split train/test set
    # TODO: This is very crude, should use cross-validation
    dev_sample_index = -1 * int(FLAGS.dev_sample_percentage * float(len(y)))
    x_train, x_dev = x_shuffled[:dev_sample_index], x_shuffled[
        dev_sample_index:]
    y_train, y_dev = y_shuffled[:dev_sample_index], y_shuffled[
        dev_sample_index:]
    print("Vocabulary Size: {:d}".format(len(vocab_processor.vocabulary_)))
    print("Train/Dev split: {:d}/{:d}".format(len(y_train), len(y_dev)))
    print('Sequence_length={}'.format(x_train.shape[1]))

    # Training
    # ==================================================

    with tf.Graph().as_default():
        session_conf = tf.ConfigProto(
            allow_soft_placement=FLAGS.allow_soft_placement,
            log_device_placement=FLAGS.log_device_placement)
        sess = tf.Session(config=session_conf)
        with sess.as_default():
            cnn = TextCNN(sequence_length=x_train.shape[1],
                          num_classes=y_train.shape[1],
                          vocab_size=len(vocab_processor.vocabulary_),
                          embedding_size=embedding_dimension,
                          filter_sizes=list(
                              map(int, FLAGS.filter_sizes.split(","))),
                          num_filters=FLAGS.num_filters,
                          l2_reg_lambda=FLAGS.l2_reg_lambda)

            # Define Training procedure
            global_step = tf.Variable(0, name="global_step", trainable=False)
            optimizer = tf.train.AdamOptimizer(cnn.learning_rate)
            grads_and_vars = optimizer.compute_gradients(cnn.loss)
            train_op = optimizer.apply_gradients(grads_and_vars,
                                                 global_step=global_step)

            # Keep track of gradient values and sparsity (optional)
            grad_summaries = []
            for g, v in grads_and_vars:
                if g is not None:
                    grad_hist_summary = tf.summary.histogram(
                        "{}/grad/hist".format(v.name), g)
                    sparsity_summary = tf.summary.scalar(
                        "{}/grad/sparsity".format(v.name),
                        tf.nn.zero_fraction(g))
                    grad_summaries.append(grad_hist_summary)
                    grad_summaries.append(sparsity_summary)
            grad_summaries_merged = tf.summary.merge(grad_summaries)

            # Output directory for models and summaries
            timestamp = str(int(time.time()))
            out_dir = os.path.abspath(
                os.path.join(os.path.curdir, "runs", timestamp))
            print("Writing to {}\n".format(out_dir))

            # Summaries for loss and accuracy
            loss_summary = tf.summary.scalar("loss", cnn.loss)
            acc_summary = tf.summary.scalar("accuracy", cnn.accuracy)

            # Train Summaries
            train_summary_op = tf.summary.merge(
                [loss_summary, acc_summary, grad_summaries_merged])
            train_summary_dir = os.path.join(out_dir, "summaries", "train")
            train_summary_writer = tf.summary.FileWriter(
                train_summary_dir, sess.graph)

            # Dev summaries
            dev_summary_op = tf.summary.merge([loss_summary, acc_summary])
            dev_summary_dir = os.path.join(out_dir, "summaries", "dev")
            dev_summary_writer = tf.summary.FileWriter(dev_summary_dir,
                                                       sess.graph)

            # Checkpoint directory. Tensorflow assumes this directory already exists so we need to create it
            checkpoint_dir = os.path.abspath(
                os.path.join(out_dir, "checkpoints"))
            checkpoint_prefix = os.path.join(checkpoint_dir, "model")
            if not os.path.exists(checkpoint_dir):
                os.makedirs(checkpoint_dir)
            saver = tf.train.Saver(tf.global_variables(),
                                   max_to_keep=FLAGS.num_checkpoints)

            # Write vocabulary
            vocab_processor.save(os.path.join(out_dir, "vocab"))

            # Initialize all variables
            sess.run(tf.global_variables_initializer())
            if FLAGS.enable_word_embeddings and cfg['word_embeddings'][
                    'default'] is not None:
                vocabulary = vocab_processor.vocabulary_
                initW = None
                if embedding_name == 'word2vec':
                    # load embedding vectors from the word2vec
                    print("Load word2vec file {}".format(
                        cfg['word_embeddings']['word2vec']['path']))
                    initW = data_helpers.load_embedding_vectors_word2vec(
                        vocabulary, cfg['word_embeddings']['word2vec']['path'],
                        cfg['word_embeddings']['word2vec']['binary'])
                    print("word2vec file has been loaded")
                elif embedding_name == 'glove':
                    # load embedding vectors from the glove
                    print("Load glove file {}".format(
                        cfg['word_embeddings']['glove']['path']))
                    initW = data_helpers.load_embedding_vectors_glove(
                        vocabulary, cfg['word_embeddings']['glove']['path'],
                        embedding_dimension)
                    print("glove file has been loaded\n")
                sess.run(cnn.W.assign(initW))

            def train_step(x_batch, y_batch, learning_rate):
                """
                A single training step
                """
                feed_dict = {
                    cnn.input_x: x_batch,
                    cnn.input_y: y_batch,
                    cnn.dropout_keep_prob: FLAGS.dropout_keep_prob,
                    cnn.learning_rate: learning_rate
                }
                _, step, summaries, loss, accuracy = sess.run([
                    train_op, global_step, train_summary_op, cnn.loss,
                    cnn.accuracy
                ], feed_dict)
                time_str = datetime.datetime.now().isoformat()
                print("{}: step {}, loss {:g}, acc {:g}, learning_rate {:g}".
                      format(time_str, step, loss, accuracy, learning_rate))
                train_summary_writer.add_summary(summaries, step)

            def dev_step(x_batch, y_batch, writer=None):
                """
                Evaluates model on a dev set
                """
                feed_dict = {
                    cnn.input_x: x_batch,
                    cnn.input_y: y_batch,
                    cnn.dropout_keep_prob: 1.0
                }
                step, summaries, loss, accuracy, gr = sess.run([
                    global_step, dev_summary_op, cnn.loss, cnn.accuracy,
                    cnn.grad
                ], feed_dict)
                time_str = datetime.datetime.now().isoformat()
                print("{}: step {}, loss {:g}, acc {:g}, gr {}".format(
                    time_str, step, loss, accuracy, gr))
                if writer:
                    writer.add_summary(summaries, step)

            # Generate batches
            batches = data_helpers.batch_iter(list(zip(x_train, y_train)),
                                              FLAGS.batch_size,
                                              FLAGS.num_epochs)
            print("Number of epochs: {}".format(FLAGS.num_epochs))
            num_batches_per_epoch = int(
                (len(list(zip(x_train, y_train))) - 1) / FLAGS.batch_size) + 1
            print("Batches per epoch: {}".format(num_batches_per_epoch))
            print("Batch size: {}".format(FLAGS.batch_size))
            # It uses dynamic learning rate with a high value at the beginning to speed up the training
            max_learning_rate = 0.005
            min_learning_rate = 0.0001
            decay_speed = FLAGS.decay_coefficient * len(
                y_train) / FLAGS.batch_size
            # Training loop. For each batch...
            counter = 0
            for batch in batches:
                learning_rate = min_learning_rate + (
                    max_learning_rate - min_learning_rate) * math.exp(
                        -counter / decay_speed)
                counter += 1
                x_batch, y_batch = zip(*batch)
                train_step(x_batch, y_batch, learning_rate)
                current_step = tf.train.global_step(sess, global_step)
                if current_step % FLAGS.evaluate_every == 0:
                    print("\nEvaluation:")
                    dev_step(x_dev, y_dev, writer=dev_summary_writer)
                    print("")
                if current_step % FLAGS.checkpoint_every == 0:
                    path = saver.save(sess,
                                      checkpoint_prefix,
                                      global_step=current_step)
                    print("Saved model checkpoint to {}\n".format(path))
    print("runtime was " + str(time.time() - start_time))
Esempio n. 8
0
def train(x_train, y_train, vocab_processor, x_test, y_test, x_valid, y_valid,
          report_df, current_fold, df_valid):
    # Training
    # ==================================================

    with tf.Graph().as_default():
        session_conf = tf.ConfigProto(
            allow_soft_placement=FLAGS.allow_soft_placement,
            log_device_placement=FLAGS.log_device_placement)
        sess = tf.Session(config=session_conf)
        with sess.as_default():
            cnn = TextCNN(sequence_length=x_train.shape[1],
                          num_classes=y_train.shape[1],
                          vocab_size=len(vocab_processor.vocabulary_),
                          embedding_size=FLAGS.embedding_dim,
                          filter_sizes=list(
                              map(int, FLAGS.filter_sizes.split(","))),
                          num_filters=FLAGS.num_filters,
                          l2_reg_lambda=FLAGS.l2_reg_lambda)

            # Define Training procedure
            global_step = tf.Variable(0, name="global_step", trainable=False)
            optimizer = tf.train.AdamOptimizer(1e-3)
            grads_and_vars = optimizer.compute_gradients(cnn.loss)
            train_op = optimizer.apply_gradients(grads_and_vars,
                                                 global_step=global_step)

            # Keep track of gradient values and sparsity (optional)
            grad_summaries = []
            for g, v in grads_and_vars:
                if g is not None:
                    grad_hist_summary = tf.summary.histogram(
                        "{}/grad/hist".format(v.name), g)
                    sparsity_summary = tf.summary.scalar(
                        "{}/grad/sparsity".format(v.name),
                        tf.nn.zero_fraction(g))
                    grad_summaries.append(grad_hist_summary)
                    grad_summaries.append(sparsity_summary)
            grad_summaries_merged = tf.summary.merge(grad_summaries)

            # Output directory for models and summaries
            timestamp = str(int(time.time()))
            out_dir = os.path.abspath(
                os.path.join(os.path.curdir, "runs", timestamp))
            print("Writing to {}\n".format(out_dir))

            # Summaries for loss and accuracy
            loss_summary = tf.summary.scalar("loss", cnn.loss)
            acc_summary = tf.summary.scalar("accuracy", cnn.accuracy)

            # Train Summaries
            train_summary_op = tf.summary.merge(
                [loss_summary, acc_summary, grad_summaries_merged])
            train_summary_dir = os.path.join(out_dir, "summaries", "train")
            train_summary_writer = tf.summary.FileWriter(
                train_summary_dir, sess.graph)

            # Dev summaries
            dev_summary_op = tf.summary.merge([loss_summary, acc_summary])
            dev_summary_dir = os.path.join(out_dir, "summaries", "dev")
            dev_summary_writer = tf.summary.FileWriter(dev_summary_dir,
                                                       sess.graph)

            # Checkpoint directory. Tensorflow assumes this directory already exists so we need to create it
            checkpoint_dir = os.path.abspath(
                os.path.join(out_dir, "checkpoints"))
            checkpoint_prefix = os.path.join(checkpoint_dir, "model")
            if not os.path.exists(checkpoint_dir):
                os.makedirs(checkpoint_dir)
            saver = tf.train.Saver(tf.global_variables(),
                                   max_to_keep=FLAGS.num_checkpoints)

            # Write vocabulary
            vocab_processor.save(os.path.join(out_dir, "vocab"))

            # Initialize all variables
            sess.run(tf.global_variables_initializer())

            ## code by Sven
            vocabulary = vocab_processor.vocabulary_
            initW = None
            # load embedding vectors from the word2vec
            print("Load word2vec file {}".format(FLAGS.embedding_path))
            initW = data_helpers.load_embedding_vectors_word2vec(
                vocabulary, FLAGS.embedding_path, FLAGS.embedding_bin)
            print("word2vec file has been loaded")
            sess.run(cnn.W.assign(initW))

            ## end of change

            def train_step(x_batch, y_batch):
                """
                A single training step
                """
                feed_dict = {
                    cnn.input_x: x_batch,
                    cnn.input_y: y_batch,
                    cnn.dropout_keep_prob: FLAGS.dropout_keep_prob
                }
                _, step, summaries, loss, accuracy = sess.run([
                    train_op, global_step, train_summary_op, cnn.loss,
                    cnn.accuracy
                ], feed_dict)
                time_str = datetime.datetime.now().isoformat()
                print("{}: step {}, loss {:g}, acc {:g}".format(
                    time_str, step, loss, accuracy))
                train_summary_writer.add_summary(summaries, step)

            def dev_step(x_batch, y_batch, writer=None):
                """
                Evaluates model on a dev set
                """
                feed_dict = {
                    cnn.input_x: x_batch,
                    cnn.input_y: y_batch,
                    cnn.dropout_keep_prob: 1.0
                }
                step, summaries, loss, accuracy = sess.run(
                    [global_step, dev_summary_op, cnn.loss, cnn.accuracy],
                    feed_dict)

                # compute f1 score
                labels = np.array(y_batch)[:, 1]
                prediction = cnn.predictions.eval(feed_dict)
                f1 = f1_score(labels, prediction)

                time_str = datetime.datetime.now().isoformat()
                print("{}: step {}, loss {:g}, acc {:g}  f1 {:g}".format(
                    time_str, step, loss, accuracy, f1))
                if writer:
                    writer.add_summary(summaries, step)
                return accuracy, f1

            # Generate batches
            batches = data_helpers.batch_iter(list(zip(x_train, y_train)),
                                              FLAGS.batch_size,
                                              FLAGS.num_epochs)
            # Training loop. For each batch...
            for batch in batches:
                x_batch, y_batch = zip(*batch)
                train_step(x_batch, y_batch)
                current_step = tf.train.global_step(sess, global_step)
                if current_step % FLAGS.evaluate_every == 0:
                    print("\nTest evaluation:")
                    dev_step(x_test, y_test, writer=dev_summary_writer)
                    print("")
                if current_step % FLAGS.checkpoint_every == 0:
                    path = saver.save(sess,
                                      checkpoint_prefix,
                                      global_step=current_step)
                    print("Saved model checkpoint to {}\n".format(path))

            print("\nEnd test evaluation:")
            acc_score_in_cat, f1_score_in_cat = dev_step(
                x_test, y_test, writer=dev_summary_writer)
            print("\nEnd val evaluation:")
            acc_score_out_of_cat, f1_score_out_of_cat = dev_step(
                x_valid, y_valid, writer=dev_summary_writer)

            cat_dict = {}
            for category in df_valid.category.unique():
                mask = df_valid['category'] == category
                category_df = df_valid[mask]
                category_array = np.array(category_df['text'].values.tolist())
                category_label = np.array(
                    category_df['labels'].values.tolist())
                print(category_array.shape)
                print(x_valid.shape)
                print(category_label.shape)
                print(y_valid.shape)
                cat_acc, cat_f1 = dev_step(category_array, category_label)
                cat_dict[category + "-f1"] = cat_f1
                cat_dict[category + "-acc"] = cat_acc

            name = 'fold-' + str(current_fold)
            report_df.append([
                name, acc_score_in_cat, f1_score_in_cat, acc_score_out_of_cat,
                f1_score_out_of_cat
            ] + list(cat_dict.values()))

            return list(cat_dict.keys())
Esempio n. 9
0
        checkpoint_prefix = os.path.join(checkpoint_dir, "model")
        if not os.path.exists(checkpoint_dir):
            os.makedirs(checkpoint_dir)
        saver = tf.train.Saver(tf.global_variables(),
                               max_to_keep=FLAGS.num_checkpoints)

        # Write vocabulary
        vocab_processor.save(os.path.join(out_dir, "vocab"))

        # Initialize all variables
        sess.run(tf.global_variables_initializer())

        if FLAGS.model_type != "rand":
            if FLAGS.word2vec != None:
                print("Load word2vec file {}\n".format(FLAGS.word2vec))
                initW = data_helpers.load_embedding_vectors_word2vec(
                    vocab_processor.vocabulary_, FLAGS.word2vec, True)
                print("The word2vec file {} is loaded!\n".format(
                    FLAGS.word2vec))

                sess.run(cnn.W.assign(initW))

                if FLAGS.model_type == "multichannel":
                    sess.run(cnn.W_MultiChannel.assign(initW))

        def train_step(x_batch, y_batch):
            """
            A single training step
            """
            feed_dict = {
                cnn.input_x: x_batch,
                cnn.input_y: y_batch,