예제 #1
0
                .format(time_str, step, loss, accuracy, neg_r, neg_p, f1_neg,
                        f1_pos, avg_f1))
            if writer:
                writer.add_summary(summaries, step)

            # if avg_f1 > 56:
            #     return True
            #
            # else:
            #     return False

            return avg_f1

        # Generate batches
        batches = cnn_data_helpers.batch_iter(
            list(zip(x_train, y_train, x_lex_train)), FLAGS.batch_size,
            FLAGS.num_epochs)
        # Training loop. For each batch...
        for batch in batches:
            x_batch, y_batch, x_batch_lex = zip(*batch)
            train_step(x_batch, y_batch, x_batch_lex)
            current_step = tf.train.global_step(sess, global_step)
            if current_step % FLAGS.evaluate_every == 0:
                print("\nEvaluation:")
                curr_af1_dev = dev_step(x_dev,
                                        y_dev,
                                        x_lex_dev,
                                        writer=dev_summary_writer)
                # path = saver.save(sess, checkpoint_prefix, global_step=current_step)
                # print("Saved model checkpoint to {}\n".format(path))
                print("")
예제 #2
0
def run_train(w2vdim, lexdim, lexnumfilters, sample_test = True):
    if sample_test == True:
        print '======================================[sample test]======================================'

    max_len = 60


    with Timer("lex"):
        norm_model, raw_model = load_lexicon_unigram(lexdim)

    with Timer("w2v"):
        w2vmodel = load_w2v(w2vdim, sample_test=sample_test)

    unigram_lexicon_model = norm_model
    # unigram_lexicon_model = raw_model

    if sample_test:
        x_train, y_train, x_lex_train = cnn_data_helpers.load_data('trn_sample',w2vmodel, unigram_lexicon_model, max_len)
        x_dev, y_dev, x_lex_dev = cnn_data_helpers.load_data('dev_sample', w2vmodel, unigram_lexicon_model, max_len)
        x_test, y_test, x_lex_test  = cnn_data_helpers.load_data('tst_sample', w2vmodel, unigram_lexicon_model, max_len)

    else:
        x_train, y_train, x_lex_train = cnn_data_helpers.load_data('trn', w2vmodel, unigram_lexicon_model, max_len)
        x_dev, y_dev, x_lex_dev = cnn_data_helpers.load_data('dev', w2vmodel, unigram_lexicon_model, max_len)
        x_test, y_test, x_lex_test = cnn_data_helpers.load_data('tst', w2vmodel, unigram_lexicon_model, max_len)


    # x_train, y_train = cnn_data_helpers.load_data('trn',w2vmodel , max_len)
    # x_dev, y_dev = cnn_data_helpers.load_data('dev', w2vmodel, max_len)
    # x_test, y_test  = cnn_data_helpers.load_data('tst', w2vmodel, max_len)
    del(w2vmodel)
    del(norm_model)
    del(raw_model)
    gc.collect()

    print("Train/Dev split: {:d}/{:d}".format(len(y_train), len(y_dev)))


    # Training
    # ==================================================
    if not FLAGS.random_seed:
        tf.set_random_seed(FLAGS.seed_number)

    with tf.Graph().as_default():
        max_af1_dev = 0
        index_at_max_af1_dev = 0
        af1_tst_at_max_af1_dev = 0

        session_conf = tf.ConfigProto(
          allow_soft_placement=FLAGS.allow_soft_placement,
          log_device_placement=FLAGS.log_device_placement)
        sess = tf.Session(config=session_conf)
        with sess.as_default():
            if not FLAGS.random_seed:
                tf.set_random_seed(FLAGS.seed_number)
            cnn = TextCNNAttentionSimpleUT(
                sequence_length=x_train.shape[1],
                num_classes=3,
                embedding_size=w2vdim,
                embedding_size_lex=lexdim,
                num_filters_lex = lexnumfilters,
                filter_sizes=list(map(int, FLAGS.filter_sizes.split(","))),
                num_filters=FLAGS.num_filters,
                l2_reg_lambda=FLAGS.l2_reg_lambda)

            # Define Training procedure
            global_step = tf.Variable(0, name="global_step", trainable=False)
            optimizer = tf.train.AdamOptimizer(1e-3)
            grads_and_vars = optimizer.compute_gradients(cnn.loss)
            train_op = optimizer.apply_gradients(grads_and_vars, global_step=global_step)

            # Keep track of gradient values and sparsity (optional)
            grad_summaries = []
            for g, v in grads_and_vars:
                if g is not None:
                    grad_hist_summary = tf.histogram_summary("{}/grad/hist".format(v.name), g)
                    sparsity_summary = tf.scalar_summary("{}/grad/sparsity".format(v.name), tf.nn.zero_fraction(g))
                    grad_summaries.append(grad_hist_summary)
                    grad_summaries.append(sparsity_summary)
            grad_summaries_merged = tf.merge_summary(grad_summaries)

            # Output directory for models and summaries
            timestamp = str(int(time.time()))
            out_dir = os.path.abspath(os.path.join(os.path.curdir, "runs", timestamp))
            print("Writing to {}\n".format(out_dir))

            # Summaries for loss and accuracy
            loss_summary = tf.scalar_summary("loss", cnn.loss)
            acc_summary = tf.scalar_summary("accuracy", cnn.accuracy)
            f1_summary = tf.scalar_summary("avg_f1", cnn.avg_f1)

            # Train Summaries
            train_summary_op = tf.merge_summary([loss_summary, acc_summary, f1_summary, grad_summaries_merged])
            train_summary_dir = os.path.join(out_dir, "summaries", "train")
            train_summary_writer = tf.train.SummaryWriter(train_summary_dir, sess.graph_def)

            # Dev summaries
            dev_summary_op = tf.merge_summary([loss_summary, acc_summary, f1_summary])
            dev_summary_dir = os.path.join(out_dir, "summaries", "dev")
            dev_summary_writer = tf.train.SummaryWriter(dev_summary_dir, sess.graph_def)

            # Test summaries
            test_summary_op = tf.merge_summary([loss_summary, acc_summary, f1_summary])
            test_summary_dir = os.path.join(out_dir, "summaries", "test")
            test_summary_writer = tf.train.SummaryWriter(test_summary_dir, sess.graph_def)

            # Checkpoint directory. Tensorflow assumes this directory already exists so we need to create it
            checkpoint_dir = os.path.abspath(os.path.join(out_dir, "checkpoints"))
            checkpoint_prefix = os.path.join(checkpoint_dir, "model")
            if not os.path.exists(checkpoint_dir):
                os.makedirs(checkpoint_dir)
            saver = tf.train.Saver(tf.all_variables())

            # Initialize all variables
            sess.run(tf.initialize_all_variables())

            def train_step(x_batch, y_batch, x_batch_lex):
                """
                A single training step
                """
                feed_dict = {
                    cnn.input_x: x_batch,
                    cnn.input_y: y_batch,
                    # lexicon
                    cnn.input_x_lexicon: x_batch_lex,
                    cnn.dropout_keep_prob: FLAGS.dropout_keep_prob
                }
                _, step, summaries, loss, accuracy, neg_r, neg_p, f1_neg, f1_pos, avg_f1, hh, hhl = sess.run(
                    [train_op, global_step, train_summary_op, cnn.loss, cnn.accuracy,
                     cnn.neg_r, cnn.neg_p, cnn.f1_neg, cnn.f1_pos, cnn.avg_f1, cnn.h_lex_list[0], cnn.h_list[0]],
                    feed_dict)
                time_str = datetime.datetime.now().isoformat()
                # print("{}: step {}, loss {:g}, acc {:g}".format(time_str, step, loss, accuracy))
                print("{}: step {}, loss {:g}, acc {:g}, neg_r {:g} neg_p {:g} f1_neg {:g}, f1_pos {:g}, f1 {:g}".
                      format(time_str, step, loss, accuracy, neg_r, neg_p, f1_neg, f1_pos, avg_f1))
                train_summary_writer.add_summary(summaries, step)

            def dev_step(x_batch, y_batch, x_batch_lex, writer=None):
                """
                Evaluates model on a dev set
                """
                feed_dict = {
                    cnn.input_x: x_batch,
                    cnn.input_y: y_batch,
                    # lexicon
                    cnn.input_x_lexicon: x_batch_lex,
                    cnn.dropout_keep_prob: 1.0
                }
                step, summaries, loss, accuracy, neg_r, neg_p, f1_neg, f1_pos, avg_f1 = sess.run(
                    [global_step, dev_summary_op, cnn.loss, cnn.accuracy,
                     cnn.neg_r, cnn.neg_p, cnn.f1_neg, cnn.f1_pos, cnn.avg_f1],
                    feed_dict)
                time_str = datetime.datetime.now().isoformat()
                print("{}: step {}, loss {:g}, acc {:g}, neg_r {:g} neg_p {:g} f1_neg {:g}, f1_pos {:g}, f1 {:g}".
                      format(time_str, step, loss, accuracy, neg_r, neg_p, f1_neg, f1_pos, avg_f1))
                if writer:
                    writer.add_summary(summaries, step)

                return avg_f1
                # if avg_f1>56:
                #     return True
                #
                # else:
                #     return False

            def test_step(x_batch, y_batch, x_batch_lex, writer=None):
                """
                Evaluates model on a test set
                """
                feed_dict = {
                    cnn.input_x: x_batch,
                    cnn.input_y: y_batch,
                    # lexicon
                    cnn.input_x_lexicon: x_batch_lex,
                    cnn.dropout_keep_prob: 1.0
                }
                step, summaries, loss, accuracy, neg_r, neg_p, f1_neg, f1_pos, avg_f1 = sess.run(
                    [global_step, dev_summary_op, cnn.loss, cnn.accuracy,
                     cnn.neg_r, cnn.neg_p, cnn.f1_neg, cnn.f1_pos, cnn.avg_f1],
                    feed_dict)
                time_str = datetime.datetime.now().isoformat()
                print("{}: step {}, loss {:g}, acc {:g}, neg_r {:g} neg_p {:g} f1_neg {:g}, f1_pos {:g}, f1 {:g}".
                      format(time_str, step, loss, accuracy, neg_r, neg_p, f1_neg, f1_pos, avg_f1))
                if writer:
                    writer.add_summary(summaries, step)

                # if avg_f1 > 56:
                #     return True
                #
                # else:
                #     return False

                return avg_f1

            # Generate batches
            batches = cnn_data_helpers.batch_iter(
                list(zip(x_train, y_train, x_lex_train)), FLAGS.batch_size, FLAGS.num_epochs)
            # Training loop. For each batch...
            for batch in batches:
                x_batch, y_batch, x_batch_lex = zip(*batch)
                train_step(x_batch, y_batch, x_batch_lex)
                current_step = tf.train.global_step(sess, global_step)
                if current_step % FLAGS.evaluate_every == 0:
                    print("\nEvaluation:")
                    curr_af1_dev = dev_step(x_dev, y_dev, x_lex_dev, writer=dev_summary_writer)
                        # path = saver.save(sess, checkpoint_prefix, global_step=current_step)
                        # print("Saved model checkpoint to {}\n".format(path))
                    print("")

                    print("\nTest:")
                    curr_af1_tst = test_step(x_test, y_test, x_lex_test, writer=test_summary_writer)
                        # path = saver.save(sess, checkpoint_prefix, global_step=current_step)
                        # print("Saved model checkpoint to {}\n".format(path))
                    print("")

                    if curr_af1_dev > max_af1_dev:
                        max_af1_dev = curr_af1_dev
                        index_at_max_af1_dev = current_step
                        af1_tst_at_max_af1_dev = curr_af1_tst

                    print '\nStatus:\n[%d] Max f1 for dev (%f), Max f1 for tst (%f)\n' % (
                        index_at_max_af1_dev, max_af1_dev, af1_tst_at_max_af1_dev)
                    sys.stdout.flush()
예제 #3
0
파일: cnn_train.py 프로젝트: wk0/cnntweets
                 cnn.neg_r, cnn.neg_p, cnn.f1_neg, cnn.f1_pos, cnn.avg_f1],
                feed_dict)
            time_str = datetime.datetime.now().isoformat()
            print("{}: step {}, loss {:g}, acc {:g}, neg_r {:g} neg_p {:g} f1_neg {:g}, f1_pos {:g}, f1 {:g}".
                  format(time_str, step, loss, accuracy, neg_r, neg_p, f1_neg, f1_pos, avg_f1))
            if writer:
                writer.add_summary(summaries, step)

            if avg_f1 > 56:
                return True

            else:
                return False

        # Generate batches
        batches = cnn_data_helpers.batch_iter(
            list(zip(x_train, y_train)), FLAGS.batch_size, FLAGS.num_epochs)
        # Training loop. For each batch...
        for batch in batches:
            x_batch, y_batch = zip(*batch)
            train_step(x_batch, y_batch)
            current_step = tf.train.global_step(sess, global_step)
            if current_step % FLAGS.evaluate_every == 0:
                print("\nEvaluation:")
                if dev_step(x_dev, y_dev, writer=dev_summary_writer) is True:
                    path = saver.save(sess, checkpoint_prefix, global_step=current_step)
                    print("Saved model checkpoint to {}\n".format(path))
                print("")

            # if current_step % FLAGS.test_every == 0:
            #     print("\nTest:")
            #     if test_step(x_test, y_test, writer=test_summary_writer) is True:
예제 #4
0
def run_train(w2vdim, w2vnumfilters, lexdim, lexnumfilters, randomseed,
              withlexicon):
    max_len = 60

    with Timer("lex"):
        norm_model, raw_model = load_lexicon_unigram(lexdim)

    with Timer("w2v"):
        w2vmodel = load_w2v(w2vdim)

    unigram_lexicon_model = norm_model
    # unigram_lexicon_model = raw_model

    x_train, y_train, x_lex_train = cnn_data_helpers.load_data(
        'trn', w2vmodel, unigram_lexicon_model, max_len)
    x_dev, y_dev, x_lex_dev = cnn_data_helpers.load_data(
        'dev', w2vmodel, unigram_lexicon_model, max_len)
    x_test, y_test, x_lex_test = cnn_data_helpers.load_data(
        'tst', w2vmodel, unigram_lexicon_model, max_len)

    del (w2vmodel)
    del (norm_model)
    del (raw_model)
    gc.collect()

    print("Train/Dev split: {:d}/{:d}".format(len(y_train), len(y_dev)))

    # Training
    # ==================================================
    if randomseed > 0:
        tf.set_random_seed(randomseed)
    with tf.Graph().as_default():
        max_af1_dev = 0
        index_at_max_af1_dev = 0
        af1_tst_at_max_af1_dev = 0

        session_conf = tf.ConfigProto(
            allow_soft_placement=FLAGS.allow_soft_placement,
            log_device_placement=FLAGS.log_device_placement)
        sess = tf.Session(config=session_conf)
        with sess.as_default():
            if randomseed > 0:
                tf.set_random_seed(randomseed)
            cnn = TextCNN(sequence_length=x_train.shape[1],
                          num_classes=3,
                          embedding_size=w2vdim,
                          embedding_size_lex=lexdim,
                          lex_filter_size=lexnumfilters,
                          filter_sizes=list(
                              map(int, FLAGS.filter_sizes.split(","))),
                          num_filters=w2vnumfilters,
                          with_lexicon=withlexicon,
                          l2_reg_lambda=FLAGS.l2_reg_lambda)

            # Define Training procedure
            global_step = tf.Variable(0, name="global_step", trainable=False)
            optimizer = tf.train.AdamOptimizer(1e-3)
            grads_and_vars = optimizer.compute_gradients(cnn.loss)
            train_op = optimizer.apply_gradients(grads_and_vars,
                                                 global_step=global_step)

            # Keep track of gradient values and sparsity (optional)
            grad_summaries = []
            for g, v in grads_and_vars:
                if g is not None:
                    grad_hist_summary = tf.histogram_summary(
                        "{}/grad/hist".format(v.name), g)
                    sparsity_summary = tf.scalar_summary(
                        "{}/grad/sparsity".format(v.name),
                        tf.nn.zero_fraction(g))
                    grad_summaries.append(grad_hist_summary)
                    grad_summaries.append(sparsity_summary)
            grad_summaries_merged = tf.merge_summary(grad_summaries)

            # Output directory for models and summaries
            timestamp = str(int(time.time()))
            out_dir = os.path.abspath(
                os.path.join(os.path.curdir, "runs", timestamp))
            print("Writing to {}\n".format(out_dir))

            # Summaries for loss and accuracy
            loss_summary = tf.scalar_summary("loss", cnn.loss)
            acc_summary = tf.scalar_summary("accuracy", cnn.accuracy)
            f1_summary = tf.scalar_summary("avg_f1", cnn.avg_f1)

            # Train Summaries
            train_summary_op = tf.merge_summary(
                [loss_summary, acc_summary, f1_summary, grad_summaries_merged])
            train_summary_dir = os.path.join(out_dir, "summaries", "train")
            train_summary_writer = tf.train.SummaryWriter(
                train_summary_dir, sess.graph_def)

            # Dev summaries
            dev_summary_op = tf.merge_summary(
                [loss_summary, acc_summary, f1_summary])
            dev_summary_dir = os.path.join(out_dir, "summaries", "dev")
            dev_summary_writer = tf.train.SummaryWriter(
                dev_summary_dir, sess.graph_def)

            # Test summaries
            test_summary_op = tf.merge_summary(
                [loss_summary, acc_summary, f1_summary])
            test_summary_dir = os.path.join(out_dir, "summaries", "test")
            test_summary_writer = tf.train.SummaryWriter(
                test_summary_dir, sess.graph_def)

            # Checkpoint directory. Tensorflow assumes this directory already exists so we need to create it
            checkpoint_dir = os.path.abspath(
                os.path.join(out_dir, "checkpoints"))
            checkpoint_prefix = os.path.join(checkpoint_dir, "model")
            if not os.path.exists(checkpoint_dir):
                os.makedirs(checkpoint_dir)
            saver = tf.train.Saver(tf.all_variables())

            # Initialize all variables
            sess.run(tf.initialize_all_variables())

            def train_step(x_batch, y_batch, x_batch_lex):
                """
                A single training step
                """
                feed_dict = {
                    cnn.input_x: x_batch,
                    cnn.input_y: y_batch,
                    # lexicon
                    cnn.input_x_lexicon: x_batch_lex,
                    cnn.dropout_keep_prob: FLAGS.dropout_keep_prob
                }
                _, step, summaries, loss, accuracy, neg_r, neg_p, f1_neg, f1_pos, avg_f1 = sess.run(
                    [
                        train_op, global_step, train_summary_op, cnn.loss,
                        cnn.accuracy, cnn.neg_r, cnn.neg_p, cnn.f1_neg,
                        cnn.f1_pos, cnn.avg_f1
                    ], feed_dict)
                time_str = datetime.datetime.now().isoformat()
                # print("{}: step {}, loss {:g}, acc {:g}".format(time_str, step, loss, accuracy))
                #print("{}: step {}, loss {:g}, acc {:g}, neg_r {:g} neg_p {:g} f1_neg {:g}, f1_pos {:g}, f1 {:g}".
                #      format(time_str, step, loss, accuracy, neg_r, neg_p, f1_neg, f1_pos, avg_f1))
                train_summary_writer.add_summary(summaries, step)

            def dev_step(x_batch, y_batch, x_batch_lex, writer=None):
                """
                Evaluates model on a dev set
                """
                feed_dict = {
                    cnn.input_x: x_batch,
                    cnn.input_y: y_batch,
                    # lexicon
                    cnn.input_x_lexicon: x_batch_lex,
                    cnn.dropout_keep_prob: 1.0
                }
                step, summaries, loss, accuracy, neg_r, neg_p, f1_neg, f1_pos, avg_f1 = sess.run(
                    [
                        global_step, dev_summary_op, cnn.loss, cnn.accuracy,
                        cnn.neg_r, cnn.neg_p, cnn.f1_neg, cnn.f1_pos,
                        cnn.avg_f1
                    ], feed_dict)
                time_str = datetime.datetime.now().isoformat()
                print(
                    "{} : {} step {}, loss {:g}, acc {:g}, neg_r {:g} neg_p {:g} f1_neg {:g}, f1_pos {:g}, f1 {:g}"
                    .format("DEV", time_str, step, loss, accuracy, neg_r,
                            neg_p, f1_neg, f1_pos, avg_f1))
                if writer:
                    writer.add_summary(summaries, step)

                return avg_f1

            def test_step(x_batch, y_batch, x_batch_lex, writer=None):
                """
                Evaluates model on a test set
                """
                feed_dict = {
                    cnn.input_x: x_batch,
                    cnn.input_y: y_batch,
                    # lexicon
                    cnn.input_x_lexicon: x_batch_lex,
                    cnn.dropout_keep_prob: 1.0
                }
                step, summaries, loss, accuracy, neg_r, neg_p, f1_neg, f1_pos, avg_f1 = sess.run(
                    [
                        global_step, dev_summary_op, cnn.loss, cnn.accuracy,
                        cnn.neg_r, cnn.neg_p, cnn.f1_neg, cnn.f1_pos,
                        cnn.avg_f1
                    ], feed_dict)
                time_str = datetime.datetime.now().isoformat()
                print(
                    "{} : {} step {}, loss {:g}, acc {:g}, neg_r {:g} neg_p {:g} f1_neg {:g}, f1_pos {:g}, f1 {:g}"
                    .format("TEST", time_str, step, loss, accuracy, neg_r,
                            neg_p, f1_neg, f1_pos, avg_f1))
                if writer:
                    writer.add_summary(summaries, step)

                return avg_f1

            # Generate batches
            batches = cnn_data_helpers.batch_iter(
                list(zip(x_train, y_train, x_lex_train)), FLAGS.batch_size,
                FLAGS.num_epochs)
            # Training loop. For each batch...
            for batch in batches:
                x_batch, y_batch, x_batch_lex = zip(*batch)
                train_step(x_batch, y_batch, x_batch_lex)
                current_step = tf.train.global_step(sess, global_step)
                if current_step % FLAGS.evaluate_every == 0:
                    print("Evaluation:")
                    curr_af1_dev = dev_step(x_dev,
                                            y_dev,
                                            x_lex_dev,
                                            writer=dev_summary_writer)
                    # path = saver.save(sess, checkpoint_prefix, global_step=current_step)
                    # print("Saved model checkpoint to {}\n".format(path))

                    curr_af1_tst = test_step(x_test,
                                             y_test,
                                             x_lex_test,
                                             writer=test_summary_writer)
                    # path = saver.save(sess, checkpoint_prefix, global_step=current_step)
                    # print("Saved model checkpoint to {}\n".format(path))

                    if curr_af1_dev > max_af1_dev:
                        max_af1_dev = curr_af1_dev
                        index_at_max_af1_dev = current_step
                        af1_tst_at_max_af1_dev = curr_af1_tst

                    print 'Status: [%d] Max f1 for dev (%f), Max f1 for tst (%f)\n' % (
                        index_at_max_af1_dev, max_af1_dev,
                        af1_tst_at_max_af1_dev)
                    sys.stdout.flush()