Ejemplo n.º 1
0
def train(args: Dict):
    """ Train the NMT Model.
    @param args (Dict): args from cmd line
    :param args:
    """
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    print('use device: %s' % device)

    train_data_src = read_corpus(args['--train-src'], source='src')
    train_data_tgt = read_corpus(args['--train-tgt'], source='tgt')

    dev_data_src = read_corpus(args['--dev-src'], source='src')
    dev_data_tgt = read_corpus(args['--dev-tgt'], source='tgt')

    train_data = list(zip(train_data_src, train_data_tgt))
    dev_data = list(zip(dev_data_src, dev_data_tgt))

    train_batch_size = int(args['--batch-size'])
    N = int(args['--N'])
    d_model = int(args['--d_model'])
    d_ff = int(args['--d_ff'])
    h = int(args['--h'])
    dropout = float(args['--dropout'])

    valid_niter = int(args['--valid-niter'])
    log_every = int(args['--log-every'])
    model_save_path = args['--save-to']
    lr=float(args['--lr'])

    vocab = Vocab.load(args['--vocab'])
    vocab_mask = torch.ones(len(vocab.tgt))
    vocab_mask[vocab.tgt['<pad>']] = 0

    model = make_model(len(vocab.src), len(vocab.tgt), N, d_model, d_ff, h, dropout)
    model = model.to(device)

    optimizer = NoamOpt(model.src_embed[0].d_model, 1, 400,
                        torch.optim.Adam(model.parameters(), lr=lr, betas=(0.9, 0.98), eps=1e-9))

    num_trial = 0
    train_iter = patience = cum_loss = report_loss = cum_tgt_words = report_tgt_words = 0
    cum_exmaples = report_examples = epoch = valid_num = 0
    hist_valid_scores = []
    train_time = begin_time = time.time()
    print('begin Maximum Likelihood Training')

    while True:
        epoch += 1
        for src_sents, tgt_sents in batch_iter(train_data, batch_size=train_batch_size, shuffle=True):
            train_iter += 1
            optimizer.zero_grad()
            batch_size = len(src_sents)

            example_losses = - model(src_sents, tgt_sents) #(batch_size,)
            batch_loss = example_losses.sum()
Ejemplo n.º 2
0
def evaluate(sess, x_, y_):
    """评估在某一数据上的准确率和损失"""
    data_len = len(x_)
    batch_eval = batch_iter(x_, y_, 128)
    total_loss = 0.0
    total_acc = 0.0
    for x_batch, y_batch in batch_eval:
        batch_len = len(x_batch)
        feed_dict = feed_data(x_batch, y_batch, 1.0)
        loss, acc = sess.run([model.loss, model.acc], feed_dict=feed_dict)
        total_loss += loss * batch_len
        total_acc += acc * batch_len

    return total_loss / data_len, total_acc / data_len
def evaluate(sess, x_, y_):
    """Evaluate model in some data, return loss and acc"""
    data_len = len(x_)
    batch_eval = batch_iter(x_, y_, 128)
    total_loss = 0.0
    total_acc = 0.0
    for x_batch, y_batch in batch_eval:
        batch_len = len(x_batch)
        feed_dict = {
            model.input_x: x_batch,
            model.input_y: y_batch,
            model.dropout_keep_prob: 1.0
        }
        loss, acc = sess.run([model.loss, model.acc], feed_dict=feed_dict)
        total_loss += loss * batch_len
        total_acc += acc * batch_len

    return total_loss / data_len, total_acc / data_len
Ejemplo n.º 4
0
def train(x_train, y_train, vocab_processor, x_dev, y_dev):
    # Training
    # ==================================================

    with tf.Graph().as_default():
        session_conf = tf.ConfigProto(
            allow_soft_placement=FLAGS.allow_soft_placement,
            log_device_placement=FLAGS.log_device_placement)
        sess = tf.Session(config=session_conf)
        with sess.as_default():
            cnn = TextCNN(sequence_length=x_train.shape[1],
                          num_classes=y_train.shape[1],
                          vocab_size=len(vocab_processor.vocabulary_),
                          embedding_size=FLAGS.embedding_dim,
                          filter_sizes=list(
                              map(int, FLAGS.filter_sizes.split(","))),
                          num_filters=FLAGS.num_filters,
                          l2_reg_lambda=FLAGS.l2_reg_lambda)

            # Define Training procedure
            global_step = tf.Variable(0, name="global_step", trainable=False)
            optimizer = tf.train.AdamOptimizer(1e-3)
            grads_and_vars = optimizer.compute_gradients(cnn.loss)
            train_op = optimizer.apply_gradients(grads_and_vars,
                                                 global_step=global_step)

            # Keep track of gradient values and sparsity (optional)
            grad_summaries = []
            for g, v in grads_and_vars:
                if g is not None:
                    grad_hist_summary = tf.summary.histogram(
                        "{}/grad/hist".format(v.name), g)
                    sparsity_summary = tf.summary.scalar(
                        "{}/grad/sparsity".format(v.name),
                        tf.nn.zero_fraction(g))
                    grad_summaries.append(grad_hist_summary)
                    grad_summaries.append(sparsity_summary)
            grad_summaries_merged = tf.summary.merge(grad_summaries)

            # Output directory for models and summaries
            timestamp = str(int(time.time()))
            out_dir = os.path.abspath(
                os.path.join(os.path.curdir, "runs", timestamp))
            print("Writing to {}\n".format(out_dir))

            # Summaries for loss and accuracy
            loss_summary = tf.summary.scalar("loss", cnn.loss)
            acc_summary = tf.summary.scalar("accuracy", cnn.accuracy)

            # Train Summaries
            train_summary_op = tf.summary.merge(
                [loss_summary, acc_summary, grad_summaries_merged])
            train_summary_dir = os.path.join(out_dir, "summaries", "train")
            train_summary_writer = tf.summary.FileWriter(
                train_summary_dir, sess.graph)

            # Dev summaries
            dev_summary_op = tf.summary.merge([loss_summary, acc_summary])
            dev_summary_dir = os.path.join(out_dir, "summaries", "dev")
            dev_summary_writer = tf.summary.FileWriter(dev_summary_dir,
                                                       sess.graph)

            # Checkpoint directory. Tensorflow assumes this directory already exists so we need to create it
            checkpoint_dir = os.path.abspath(
                os.path.join(out_dir, "checkpoints"))
            checkpoint_prefix = os.path.join(checkpoint_dir, "model")
            if not os.path.exists(checkpoint_dir):
                os.makedirs(checkpoint_dir)
            saver = tf.train.Saver(tf.global_variables(),
                                   max_to_keep=FLAGS.num_checkpoints)

            # Write vocabulary
            vocab_processor.save(os.path.join(out_dir, "vocab"))

            # Initialize all variables
            sess.run(tf.global_variables_initializer())

            def train_step(x_batch, y_batch):
                """
                A single training step
                """
                feed_dict = {
                    cnn.input_x: x_batch,
                    cnn.input_y: y_batch,
                    cnn.dropout_keep_prob: FLAGS.dropout_keep_prob
                }
                _, step, summaries, loss, accuracy = sess.run([
                    train_op, global_step, train_summary_op, cnn.loss,
                    cnn.accuracy
                ], feed_dict)
                time_str = datetime.datetime.now().isoformat()
                #print("{}: step {}, loss {:g}, acc {:g}".format(time_str, step, loss, accuracy))
                train_summary_writer.add_summary(summaries, step)

            def dev_step(x_batch, y_batch, writer=None):
                """
                Evaluates model on a dev set
                """
                feed_dict = {
                    cnn.input_x: x_batch,
                    cnn.input_y: y_batch,
                    cnn.dropout_keep_prob: 1.0
                }
                step, summaries, loss, accuracy = sess.run(
                    [global_step, dev_summary_op, cnn.loss, cnn.accuracy],
                    feed_dict)
                time_str = datetime.datetime.now().isoformat()
                print("{}: step {}, loss {:g}, acc {:g}".format(
                    time_str, step, loss, accuracy))
                if writer:
                    writer.add_summary(summaries, step)

            # Generate batches
            batches = batch_iter(list(zip(x_train, y_train)), FLAGS.batch_size,
                                 FLAGS.num_epochs)
            # Training loop. For each batch...
            for batch in batches:
                x_batch, y_batch = zip(*batch)
                train_step(x_batch, y_batch)
                current_step = tf.train.global_step(sess, global_step)
                if current_step % FLAGS.evaluate_every == 0:
                    print("\nEvaluation:")
                    dev_step(x_dev, y_dev, writer=dev_summary_writer)
                    print("")
                if current_step % FLAGS.checkpoint_every == 0:
                    path = saver.save(sess,
                                      checkpoint_prefix,
                                      global_step=current_step)
                    print("Saved model checkpoint to {}\n".format(path))
Ejemplo n.º 5
0
Archivo: main.py Proyecto: zjgtan/demo
    model = TextCNN(len(word_dict), max_len, 2)

    val_accs = []

    for train_index, test_index in kFold.split(sentences):
        sentences_train, sentences_test = sentences[train_index], sentences[
            test_index]
        labels_train, labels_test = labels[train_index], labels[test_index]

        session = tf.Session()
        session.run(tf.global_variables_initializer())

        best_acc = 0
        for epoch in range(num_epoch):
            for batch_idx, (sentences_batch, labels_batch) in enumerate(
                    batch_iter(sentences_train, labels_train, batch_size)):
                feed_dict = model.feed_dict(sentences_batch, labels_batch)
                loss_train, _ = session.run([model.loss, model.optim],
                                            feed_dict=feed_dict)

                if batch_idx % 50 == 0:
                    print "Epoch: %d, Batch: %d, Train Loss: %f" % (
                        epoch, batch_idx, loss_train)

            feed_dict = model.feed_dict(sentences_test, labels_test)
            acc_test, = session.run([model.acc], feed_dict=feed_dict)
            if best_acc < acc_test:
                best_acc = acc_test

        val_accs.append(best_acc)
Ejemplo n.º 6
0
def train(x_text, x_train, x_unlabeled, x_dev, vocab_processor, y_train,
          y_unlabeled, y_dev, x_text_italian, x_train_italian,
          x_unlabeled_italian, x_dev_italian, vocab_processor_italian,
          y_train_italian, y_unlabeled_italian, y_dev_italian,
          x_english_to_italian, x_italian_to_english):
    # Training
    # ==================================================

    if SVM or NB:
        from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score, confusion_matrix
        from sklearn.pipeline import Pipeline
        from sklearn.linear_model import SGDClassifier
        from sklearn.feature_extraction.text import TfidfVectorizer
        from sklearn.naive_bayes import BernoulliNB
        from NLTKPreprocessor import NLTKPreprocessor
        from NLTKPreprocessor import identity

        if SVM:
            text_clf = Pipeline([
                ('vect', NLTKPreprocessor('English')),
                ('tfidf',
                 TfidfVectorizer(tokenizer=identity,
                                 preprocessor=None,
                                 lowercase=False)),
                ('clf-svm',
                 SGDClassifier(loss='modified_huber',
                               penalty='l2',
                               alpha=1e-3,
                               random_state=42)),
            ])
            text_clf_italian = Pipeline([
                ('vect', NLTKPreprocessor('Italian')),
                ('tfidf',
                 TfidfVectorizer(tokenizer=identity,
                                 preprocessor=None,
                                 lowercase=False)),
                ('clf-svm',
                 SGDClassifier(loss='modified_huber',
                               penalty='l2',
                               alpha=1e-3,
                               random_state=42)),
            ])
        elif NB:
            text_clf = Pipeline([
                ('vect', NLTKPreprocessor('English')),
                ('tfidf',
                 TfidfVectorizer(tokenizer=identity,
                                 preprocessor=None,
                                 lowercase=False)),
                ('clf-svm', BernoulliNB()),
            ])
            text_clf_italian = Pipeline([
                ('vect', NLTKPreprocessor('Italian')),
                ('tfidf',
                 TfidfVectorizer(tokenizer=identity,
                                 preprocessor=None,
                                 lowercase=False)),
                ('clf-svm', BernoulliNB()),
            ])
        labels = np.argmax(y_train, axis=1) + 1
        labels_italian = np.argmax(y_train_italian, axis=1) + 1

        for i in range(FLAGS.num_epochs):
            _ = text_clf.fit(x_train, np.ravel(labels))
            _ = text_clf_italian.fit(x_train_italian, np.ravel(labels_italian))
            predicted_svm = text_clf.predict(x_dev)
            predicted_svm_italian = text_clf.predict(x_dev_italian)
            # SELF-TRAINING
            predicted_svm_unlabeled = text_clf.predict_proba(x_unlabeled)
            predicted_svm_unlabeled_italian = text_clf.predict_proba(
                x_unlabeled_italian)
            maxs = np.max(predicted_svm_unlabeled, axis=1)
            maxs_italian = np.max(predicted_svm_unlabeled_italian, axis=1)

            next_italian = []
            next = []
            next_y = []
            next_y_italian = []
            next_unlabeled = []
            next_unlabeled_italian = []

            next_english_to_italian = []
            next_italian_to_english = []

            for i in range(len(maxs)):
                if maxs[i] > confidence:
                    next.append(x_unlabeled[i])
                    next_y.append(np.argmax(predicted_svm_unlabeled[i]) + 1)
                    # CO-TRAINING
                    next_italian.append(x_english_to_italian[i])
                    next_y_italian.append(
                        np.argmax(predicted_svm_unlabeled[i]) + 1)

                else:
                    next_unlabeled.append(x_unlabeled[i])
                    # CO-TRAINING
                    next_english_to_italian.append(x_english_to_italian[i])

            for i in range(len(maxs_italian)):
                if maxs_italian[i] > confidence:
                    next_italian.append(x_unlabeled_italian[i])
                    next_y_italian.append(
                        np.argmax(predicted_svm_unlabeled_italian[i]) + 1)
                    # CO-TRAINING
                    next.append(x_italian_to_english[i])
                    next_y.append(
                        np.argmax(predicted_svm_unlabeled_italian[i]) + 1)
                else:
                    next_unlabeled_italian.append(x_unlabeled_italian[i])
                    # CO-TRAINING
                    next_italian_to_english.append(x_italian_to_english[i])

            x_train += next
            x_train_italian += next_italian
            labels = np.append(labels, next_y)
            labels_italian = np.append(labels_italian, next_y_italian)
            x_unlabeled = next_unlabeled
            x_unlabeled_italian = next_unlabeled_italian
            # CO-TRAINING
            x_italian_to_english = next_italian_to_english
            x_english_to_italian = next_english_to_italian

            # END OF SELF-TRAINING
            labels_dev = np.argmax(y_dev, axis=1) + 1
            labels_dev_italian = np.argmax(y_dev_italian, axis=1) + 1

            matrix = confusion_matrix(labels_dev, predicted_svm)
            matrix_italian = confusion_matrix(labels_dev_italian,
                                              predicted_svm_italian)
            # predicted_svm == np.ravel(labels_dev))
            print("English evaluation: ")
            print(matrix)
            print("accuracy {}, precision {}, recall {}, f1_score {}".format(
                accuracy_score(labels_dev, predicted_svm),
                precision_score(labels_dev, predicted_svm),
                recall_score(labels_dev, predicted_svm),
                f1_score(labels_dev, predicted_svm)))
            print("Italian evaluation: ")
            print(matrix_italian)
            print("accuracy {}, precision {}, recall {}, f1_score {}".format(
                accuracy_score(labels_dev_italian, predicted_svm_italian),
                precision_score(labels_dev_italian, predicted_svm_italian),
                recall_score(labels_dev_italian, predicted_svm_italian),
                f1_score(labels_dev_italian, predicted_svm_italian)))
            print()

    if CNN:

        session_conf = tf.ConfigProto(
            allow_soft_placement=FLAGS.allow_soft_placement,
            log_device_placement=FLAGS.log_device_placement)
        sess = tf.Session(config=session_conf)
        with sess.as_default():
            if PRETRAINEDEMBEDDING and (main_pre_trained_embeddings.Embedding
                                        == 'ELMo'):
                import bert_tokenization
                seq = max([len(x.split(" ")) for x in x_train])
                seq2 = max([len(x.split(" ")) for x in x_train_italian])
            elif PRETRAINEDEMBEDDING and (main_pre_trained_embeddings.Embedding
                                          == 'Bert'):
                seq = max([len(x) for x in x_train])
                seq2 = max([len(x) for x in x_train_italian])
            else:
                seq = x_train.shape[1]
                seq2 = x_train_italian.shape[1]

            cnn = TextCNN(sequence_length=seq,
                          num_classes=y_train.shape[1],
                          vocab_size=len(vocab_processor.vocabulary_),
                          embedding_size=FLAGS.embedding_dim,
                          filter_sizes=list(
                              map(int, FLAGS.filter_sizes.split(","))),
                          num_filters=FLAGS.num_filters,
                          text=x_text,
                          l2_reg_lambda=FLAGS.l2_reg_lambda,
                          binary=BINARY,
                          italian=False)

            cnn_italian = TextCNN(
                sequence_length=seq2,
                num_classes=y_train_italian.shape[1],
                vocab_size=len(vocab_processor_italian.vocabulary_),
                embedding_size=FLAGS.embedding_dim,
                filter_sizes=list(map(int, FLAGS.filter_sizes.split(","))),
                num_filters=FLAGS.num_filters,
                text=x_text_italian,
                l2_reg_lambda=FLAGS.l2_reg_lambda,
                binary=BINARY,
                italian=True)

            # Define Training procedure
            global_step = tf.Variable(0, name="global_step", trainable=False)
            optimizer = tf.train.AdamOptimizer(1e-3)
            grads_and_vars = optimizer.compute_gradients(cnn.loss)
            grads_and_vars_italian = optimizer.compute_gradients(
                cnn_italian.loss)
            train_op = optimizer.apply_gradients(grads_and_vars,
                                                 global_step=global_step)
            train_op_italian = optimizer.apply_gradients(
                grads_and_vars_italian, global_step=global_step)
            if PRETRAINEDEMBEDDING and (
                    main_pre_trained_embeddings.Embedding == "fastText"
                    or main_pre_trained_embeddings.Embedding == "GloVe"
                    or main_pre_trained_embeddings.Embedding == "Bert"):
                embedding_init = cnn.W.assign(cnn.embedding_placeholder)
                embedding_init_italian = cnn_italian.W.assign(
                    cnn_italian.embedding_placeholder)

            # Keep track of gradient values and sparsity (optional)
            grad_summaries = []
            for g, v in grads_and_vars:
                if g is not None:
                    grad_hist_summary = tf.summary.histogram(
                        "{}/grad/hist".format(v.name), g)
                    sparsity_summary = tf.summary.scalar(
                        "{}/grad/sparsity".format(v.name),
                        tf.nn.zero_fraction(g))
                    grad_summaries.append(grad_hist_summary)
                    grad_summaries.append(sparsity_summary)
            grad_summaries_merged = tf.summary.merge(grad_summaries)

            # Output directory for models and summaries
            timestamp = str(int(time.time()))
            out_dir = os.path.abspath(
                os.path.join(os.path.curdir, "runs", timestamp))
            print("Writing to {}\n".format(out_dir))

            # Summaries for loss and accuracy
            loss_summary = tf.summary.scalar("loss", cnn.loss)
            acc_summary = tf.summary.scalar("accuracy", cnn.accuracy)
            precision_summary = tf.summary.scalar("precision", cnn.precision)
            recall_summary = tf.summary.scalar("recall", cnn.recall)

            conf_summary = tf.summary.tensor_summary("confusion",
                                                     cnn.confusion)

            loss_summary_italian = tf.summary.scalar("loss", cnn_italian.loss)
            acc_summary_italian = tf.summary.scalar("accuracy",
                                                    cnn_italian.accuracy)
            precision_summary_italian = tf.summary.scalar(
                "precision", cnn_italian.precision)
            recall_summary_italian = tf.summary.scalar("recall",
                                                       cnn_italian.recall)

            conf_summary_italian = tf.summary.tensor_summary(
                "confusion", cnn_italian.confusion)

            # Train Summaries
            train_summary_op = tf.summary.merge([
                loss_summary, acc_summary, grad_summaries_merged, conf_summary,
                loss_summary_italian, acc_summary_italian, conf_summary_italian
            ])  # , conf_summary
            train_summary_dir = os.path.join(out_dir, "summaries", "train")
            train_summary_writer = tf.summary.FileWriter(
                train_summary_dir, sess.graph)

            # Dev summaries
            dev_summary_op = tf.summary.merge([loss_summary, acc_summary])
            dev_summary_dir = os.path.join(out_dir, "summaries", "dev")
            dev_summary_writer = tf.summary.FileWriter(dev_summary_dir,
                                                       sess.graph)

            # Checkpoint directory. Tensorflow assumes this directory already exists so we need to create it
            checkpoint_dir = os.path.abspath(
                os.path.join(out_dir, "checkpoints"))
            checkpoint_prefix = os.path.join(checkpoint_dir, "model")
            if not os.path.exists(checkpoint_dir):
                os.makedirs(checkpoint_dir)
            saver = tf.train.Saver(tf.global_variables(),
                                   max_to_keep=FLAGS.num_checkpoints)

            # Write vocabulary
            vocab_processor.save(os.path.join(out_dir, "vocab"))

            # Initialize all variables
            if PRETRAINEDEMBEDDING and (
                    main_pre_trained_embeddings.Embedding == "fastText"
                    or main_pre_trained_embeddings.Embedding == "GloVe"
                    or main_pre_trained_embeddings.Embedding == "Bert"):
                sess.run(
                    [
                        tf.global_variables_initializer(),
                        tf.local_variables_initializer(), embedding_init,
                        embedding_init_italian
                    ],
                    feed_dict={cnn.embedding_placeholder: cnn.weights}
                )  #, x_init], feed_dict={cnn.embedding_placeholder: cnn.weights, cnn.x_placeholder: cnn.x})
                sess.run(
                    [
                        tf.global_variables_initializer(),
                        tf.local_variables_initializer(), embedding_init,
                        embedding_init_italian
                    ],
                    feed_dict={
                        cnn_italian.embedding_placeholder: cnn_italian.weights
                    }
                )  #, x_init], feed_dict={cnn.embedding_placeholder: cnn.weights, cnn.x_placeholder: cnn.x})

            else:  #if not PRETRAINEDEMBEDDING:
                sess.run(tf.global_variables_initializer())
            #else:
            #sess.run([tf.global_variables_initializer(), main_pre_trained_embeddings.embeddings], feed_dict={cnn.train_x: cnn.train_x})

            def train_step(cnn, x_batch, y_batch, x_unlabeled, y_unlabeled, x,
                           y):
                """
                A single training step
                """
                # x_batch = tf.cast(x_batch, tf.float32)
                # a = result + x_batch
                a = []
                for i in x:
                    a.append(i)
                for i in x_batch:
                    a.append(i)
                b = []
                for i in y:
                    b.append(i)
                for i in y_batch:
                    b.append(i)

                if PRETRAINEDEMBEDDING and (
                        main_pre_trained_embeddings.Embedding == "ELMo"
                        or main_pre_trained_embeddings.Embedding == "Bert"):
                    feed_dict = {
                        cnn.unlabeled_training: x_unlabeled,
                        cnn.unlabeled_labels: y_unlabeled,
                        cnn.n: len(x_batch),
                        cnn.input_x: a,
                        cnn.x_text: x_batch,
                        cnn.input_y: b,
                        cnn.dropout_keep_prob: FLAGS.dropout_keep_prob,
                    }
                else:

                    feed_dict = {
                        cnn.unlabeled_training: x_unlabeled,
                        cnn.y_unlabeled: y_unlabeled,
                        cnn.n: len(x_batch),
                        cnn.input_x: a,
                        cnn.input_y: b,
                        cnn.dropout_keep_prob: FLAGS.dropout_keep_prob,
                    }

                if TRAIN and COTRAIN:
                    _, step, summaries, loss, accuracy, precision, recall, confusion, cross, next, next_y, maxs, scores, labels, y_predictions, result, accuracy_unlabeled, confusion_unlabeled, scores_unlabeled = sess.run(
                        [
                            train_op, global_step, train_summary_op, cnn.loss,
                            cnn.accuracy, cnn.precision, cnn.recall,
                            cnn.confusion, cnn.cross, cnn.next, cnn.next_y,
                            cnn.maxs, cnn.scores_unlabeled, cnn.predictions,
                            cnn.predict, cnn.results, cnn.accuracy_unlabeled,
                            cnn.confusion_unlabeled, cnn.scores_unlabeled
                        ], feed_dict)
                    # print(sess.run(cnn.results, {cnn.x_unlabeled: x_unlabeled}))
                    # print(cnn.scores_unlabeled)
                    # print(scores)
                    print("Number of tweets above confidence threshold: ",
                          len(cross))
                    print(confusion_unlabeled)
                    result = result[1:]
                else:
                    y_predictions = []
                    result = []
                    next = x_unlabeled
                    next_y = y_unlabeled
                    _, step, summaries, loss, accuracy, precision, recall, confusion = sess.run(
                        [
                            train_op, global_step, train_summary_op, cnn.loss,
                            cnn.accuracy, cnn.precision, cnn.recall,
                            cnn.confusion
                        ], feed_dict)
                    #                 print(confusion)

                time_str = datetime.datetime.now().isoformat()
                print(
                    "{}: step {}, loss {:g}, accuracy {:g}, precision {}, recall {}"
                    .format(time_str, step, loss, accuracy, precision, recall))
                train_summary_writer.add_summary(summaries, step)
                b = []
                for i in y:
                    b.append(i)
                for i in y_predictions:
                    b.append(i)
                a = []
                for i in x:
                    a.append(i)
                for i in result:
                    a.append(i)

                return next, next_y, a, b

            def dev_step(cnn, x_batch, y_batch, writer=None):
                """
                Evaluates model on a dev set
                """
                if PRETRAINEDEMBEDDING and (
                        main_pre_trained_embeddings.Embedding == "ELMo"
                        or main_pre_trained_embeddings.Embedding == "Bert"):
                    feed_dict = {
                        cnn.unlabeled_training: x_unlabeled,
                        cnn.n: len(x_batch),
                        cnn.x_text: x_batch,
                        cnn.input_y: y_batch,
                        cnn.dropout_keep_prob: 1.0,
                    }
                else:
                    feed_dict = {
                        cnn.unlabeled_training: x_unlabeled,
                        cnn.n: len(x_batch),
                        cnn.input_x: x_batch,
                        cnn.input_y: y_batch,
                        cnn.dropout_keep_prob: 1.0,
                    }
                step, summaries, loss, accuracy, precision, recall, confusion = sess.run(  #, confusion
                    [
                        global_step, dev_summary_op, cnn.loss_dev,
                        cnn.accuracy_dev, cnn.precision_dev, cnn.recall_dev,
                        cnn.confusion_dev
                    ],  #, cnn.confusion],
                    feed_dict)
                print(confusion)

                time_str = datetime.datetime.now().isoformat()
                print(
                    "{}: step {}, loss {:g}, accuracy {:g}, precision {}, recall {}"
                    .format(time_str, step, loss, accuracy, precision, recall))
                if writer:
                    writer.add_summary(summaries, step)

            # Generate batches

            if PRETRAINEDEMBEDDING and (main_pre_trained_embeddings.Embedding
                                        == "ELMo"):
                x_ = [[np.array(i)] for i in range(len(x_train))]
                batches = preprocess.batch_iter(list(zip(x_, y_train)),
                                                FLAGS.batch_size,
                                                FLAGS.num_epochs)
                # TODO ITALIAN PART
            else:
                batches = preprocess.batch_iter(list(zip(x_train, y_train)),
                                                FLAGS.batch_size,
                                                FLAGS.num_epochs)
                batches_italian = preprocess.batch_iter(
                    list(zip(x_train_italian, y_train_italian)),
                    FLAGS.batch_size, FLAGS.num_epochs)

            # Training loop. For each batch...

            unlabeled_training = []
            for i in x_unlabeled:
                unlabeled_training.append([])
                for j in i:
                    unlabeled_training[-1].append(j)

            unlabeled_training_italian = []
            for i in x_unlabeled_italian:
                unlabeled_training_italian.append([])
                for j in i:
                    unlabeled_training_italian[-1].append(j)

            result = ()
            result_italian = ()
            y_prediction = ()
            y_prediction_italian = ()

            for batch, batch_italian in zip(batches, batches_italian):
                x_batch, y_batch = zip(*batch)
                x_batch_italian, y_batch_italian = zip(*batch_italian)

                current_step = tf.train.global_step(sess, global_step) + 1
                if current_step % FLAGS.cotrain_every == 0 and current_step > start_up:
                    COTRAIN = True
                else:
                    COTRAIN = False

                if PRETRAINEDEMBEDDING and (
                        main_pre_trained_embeddings.Embedding == "GloVe" or
                        main_pre_trained_embeddings.Embedding == "fastText"):
                    max_document_length = max(
                        [len(x.split(" ")) for x in x_text])
                    max_document_length_italian = max(
                        [len(x.split(" ")) for x in x_text_italian])
                    x = []
                    for counter, j in enumerate(x_batch):
                        x.append([])
                        x[counter] = []
                        for i in j.split():
                            index = cnn.word2idx.get(
                                i,
                                len(main_pre_trained_embeddings.word2idx) - 1)
                            # print(i,index)
                            x[counter].append(index)
                        while len(x[counter]) < max_document_length:
                            x[counter].append(
                                len(main_pre_trained_embeddings.word2idx) - 1)

                    x = np.array(x)

                    x_italian = []
                    for counter, j in enumerate(x_text_italian):
                        x_italian.append([])
                        x_italian[counter] = []
                        for i in j.split():
                            index = cnn_italian.word2idx.get(
                                i,
                                len(main_pre_trained_embeddings.word2idx) - 1)
                            # print(i,index)
                            x_italian[counter].append(index)
                        while len(x_italian[counter]
                                  ) < max_document_length_italian:
                            x_italian[counter].append(
                                len(main_pre_trained_embeddings.word2idx) - 1)

                    x_batch = np.array(x)
                    x_batch_italian = np.array(x_italian)

                if PRETRAINEDEMBEDDING and main_pre_trained_embeddings.Embedding == "ELMo":
                    x_t = []
                    for i in x_batch:
                        x_t.append(x_train[i[0]])
                    for i in range(len(x_t)):
                        while len(x_t[i].split()) < seq:
                            x_t[i] += ' unk '

                    #  TODO: Italian Part

                    unlabeled_training, y_unlabeled, result, y_prediction = train_step(
                        cnn, x_t, y_batch, unlabeled_training, y_unlabeled,
                        result, y_prediction)
                    unlabeled_training, y_unlabeled, result, y_prediction = train_step(
                        cnn_italian, x_t, y_batch, unlabeled_training,
                        y_unlabeled, result, y_prediction)
                else:
                    unlabeled_training, y_unlabeled, result, y_prediction = train_step(
                        cnn, x_batch, y_batch, unlabeled_training, y_unlabeled,
                        result, y_prediction)
                    unlabeled_training_italian, y_unlabeled_italian, result_italian, y_prediction_italian = train_step(
                        cnn_italian, x_batch_italian, y_batch_italian,
                        unlabeled_training_italian, y_unlabeled_italian,
                        result_italian, y_prediction_italian)

                if current_step % FLAGS.evaluate_every == 0:
                    print("\nEvaluation:")

                    if PRETRAINEDEMBEDDING and main_pre_trained_embeddings.Embedding == "ELMo":
                        for i in range(len(x_dev)):
                            while len(x_dev[i].split()) < seq:
                                x_dev[i] += ' unk '
                    dev_step(cnn, x_dev, y_dev, writer=dev_summary_writer)
                    dev_step(cnn_italian,
                             x_dev,
                             y_dev,
                             writer=dev_summary_writer)
                    print("")
                if current_step % FLAGS.checkpoint_every == 0:
                    path = saver.save(sess,
                                      checkpoint_prefix,
                                      global_step=current_step)
                    print("Saved model checkpoint to {}\n".format(path))
                if current_step == FLAGS.num_epochs:
                    break
def train(model):
    """Train model: split train data, train model, model save and result print"""

    # config tensor board and summary
    print('Configuring TensorBoard and Saver ...')
    if not os.path.exists(tensorboard_dir):
        os.mkdir(tensorboard_dir)
    tf.summary.scalar('loss', model.loss)
    tf.summary.scalar('accuracy', model.acc)
    merged_summary = tf.summary.merge_all()
    summary_writer = tf.summary.FileWriter(tensorboard_dir)

    # config graph-saver
    saver = tf.train.Saver()
    if not os.path.exists(saver_dir):
        os.mkdir(saver_dir)

    # Loading trianing data and validation data
    print('Loading trianing data and validation data ...')
    start_time = time.time()
    x_train, y_train = data_process(train_dir, config.max_length)
    x_valid, y_valid = data_process(valid_dir, config.max_length)
    time_dif = get_time_dif(start_time)
    print('Loading data ok!')
    print('Time usage: %f' % time_dif)

    # Create session
    session = tf.Session()
    session.run(tf.global_variables_initializer())
    summary_writer.add_graph(session.graph)

    # Some variables about training
    total_batch = 0
    best_val_acc = 0.
    last_improved = 0.
    early_stop_batch = 1000

    print('Training and evaluating ...')
    start_time = time.time()
    is_early_stop = False
    for epoch in range(config.num_epochs):
        print('Epoch:', epoch + 1)
        batch_train = batch_iter(x_train, y_train, config.batch_size)

        for x_batch, y_batch in batch_train:
            feed_dict = {
                model.input_x: x_batch,
                model.input_y: y_batch,
                model.dropout_keep_prob: config.dropout_keep_prob
            }

            # Every saver_epochs, save summary
            if total_batch % config.save_per_batch == 0:
                graph = session.run(merged_summary, feed_dict=feed_dict)
                summary_writer.add_summary(graph, total_batch)

            # Print result
            if total_batch % config.print_per_batch == 0:
                feed_dict[model.dropout_keep_prob] = 1.0
                loss_train, acc_train = session.run([model.loss, model.acc],
                                                    feed_dict=feed_dict)
                print()
                loss_val, acc_val = evaluate(session, x_valid, y_valid)

                # save best model by acc
                if acc_val > best_val_acc:
                    best_val_acc = acc_val
                    last_improved = total_batch
                    saver.save(sess=session, save_path=saver_dir)
                    improved_str = '//improved'
                else:
                    improved_str = ''

                time_dif = get_time_dif(start_time)
                msg = 'Iter: {0:>6}, Train Loss: {1:>6.2}, Train Acc: {2:>7.2%},' \
                      + ' Val Loss: {3:>6.2}, Val Acc: {4:>7.2%}, Time: {5} {6}'
                print(
                    msg.format(total_batch, loss_train, acc_train, loss_val,
                               acc_val, time_dif, improved_str))

            # Optimizer model
            session.run(model.optim, feed_dict=feed_dict)
            total_batch += 1

            # Early stop
            if total_batch - last_improved > early_stop_batch:
                print("No optimization for a long time, auto-stopping...")
                is_early_stop = True
                break
        if is_early_stop:
            break
Ejemplo n.º 8
0
        saver = tf.train.import_meta_graph("{}.meta".format(checkpoint_file))
        saver.restore(sess, checkpoint_file)

        # Get the placeholders from the graph by name
        input_x = graph.get_operation_by_name("input_x").outputs[0]
        # input_y = graph.get_operation_by_name("input_y").outputs[0]
        dropout_keep_prob = graph.get_operation_by_name(
            "dropout_keep_prob").outputs[0]

        # Tensors we want to evaluate
        predictions = graph.get_operation_by_name(
            "output/predictions").outputs[0]

        # Generate batches for one epoch
        batches = preprocess.batch_iter(list(x_test),
                                        FLAGS.batch_size,
                                        1,
                                        shuffle=False)

        # Collect the predictions here
        all_predictions = []

        for x_test_batch in batches:
            batch_predictions = sess.run(predictions, {
                input_x: x_test_batch,
                dropout_keep_prob: 1.0
            })
            all_predictions = np.concatenate(
                [all_predictions, batch_predictions])

# Print accuracy if y_test is defined
if y_test is not None:
Ejemplo n.º 9
0
    sess = tf.Session(config=session_conf)
    with sess.as_default():
        # Load the saved meta graph and restore variables
        saver = tf.train.import_meta_graph("{}.meta".format(checkpoint_file))
        saver.restore(sess, checkpoint_file)

        # Get the placeholders from the graph by name
        input_x = graph.get_operation_by_name("input_x").outputs[0]
        # input_y = graph.get_operation_by_name("input_y").outputs[0]
        dropout_keep_prob = graph.get_operation_by_name("dropout_keep_prob").outputs[0]

        # Tensors we want to evaluate
        predictions = graph.get_operation_by_name("output/predictions").outputs[0]

        # Generate batches for one epoch
        batches = batch_iter(list(x_test), FLAGS.batch_size, 1, shuffle=False)

        # Collect the predictions here
        all_predictions = []

        for x_test_batch in batches:
            batch_predictions = sess.run(predictions, {input_x: x_test_batch, dropout_keep_prob: 1.0})
            all_predictions = np.concatenate([all_predictions, batch_predictions])

# Print accuracy if y_test is defined
if y_test is not None:
    correct_predictions = float(sum(all_predictions == y_test))
    print("Total number of test examples: {}".format(len(y_test)))
    print("Accuracy: {:g}".format(correct_predictions/float(len(y_test))))

# Save the evaluation to a csv
Ejemplo n.º 10
0
def train():
    print("Configuring TensorBoard and Saver...")
    # 配置 Tensorboard,重新训练时,请将tensorboard文件夹删除,不然图会覆盖
    tensorboard_dir = 'tensorboard/textcnn'
    if not os.path.exists(tensorboard_dir):
        os.makedirs(tensorboard_dir)

    tf.summary.scalar("loss", model.loss)
    tf.summary.scalar("accuracy", model.acc)
    merged_summary = tf.summary.merge_all()
    writer = tf.summary.FileWriter(tensorboard_dir)

    # 配置 Saver
    saver = tf.train.Saver()
    if not os.path.exists(save_dir):
        os.makedirs(save_dir)

    print("Loading training and validation data...")
    # 载入训练集与验证集
    start_time = time.time()
    x_train, y_train = process_file(train_dir, word_to_id, cat_to_id,
                                    config.seq_length)
    x_val, y_val = process_file(val_dir, word_to_id, cat_to_id,
                                config.seq_length)
    time_dif = get_time_dif(start_time)
    print("Time usage:", time_dif)

    # 创建session
    session = tf.Session()
    session.run(tf.global_variables_initializer())
    writer.add_graph(session.graph)

    print('Training and evaluating...')
    start_time = time.time()
    total_batch = 0  # 总批次
    best_acc_val = 0.0  # 最佳验证集准确率
    last_improved = 0  # 记录上一次提升批次
    require_improvement = 1000  # 如果超过1000轮未提升,提前结束训练

    flag = False
    for epoch in range(config.num_epochs):
        print('Epoch:', epoch + 1)
        batch_train = batch_iter(x_train, y_train, config.batch_size)
        for x_batch, y_batch in batch_train:
            feed_dict = feed_data(x_batch, y_batch, config.dropout_keep_prob)

            if total_batch % config.save_per_batch == 0:
                # 每多少轮次将训练结果写入tensorboard scalar
                s = session.run(merged_summary, feed_dict=feed_dict)
                writer.add_summary(s, total_batch)

            if total_batch % config.print_per_batch == 0:
                # 每多少轮次输出在训练集和验证集上的性能
                feed_dict[model.keep_prob] = 1.0
                loss_train, acc_train = session.run([model.loss, model.acc],
                                                    feed_dict=feed_dict)
                loss_val, acc_val = evaluate(session, x_val, y_val)  # todo

                if acc_val > best_acc_val:
                    # 保存最好结果
                    best_acc_val = acc_val
                    last_improved = total_batch
                    saver.save(sess=session, save_path=save_path)
                    improved_str = '*'
                else:
                    improved_str = ''

                time_dif = get_time_dif(start_time)
                msg = 'Iter: {0:>6}, Train Loss: {1:>6.2}, Train Acc: {2:>7.2%},' \
                      + ' Val Loss: {3:>6.2}, Val Acc: {4:>7.2%}, Time: {5} {6}'
                print(
                    msg.format(total_batch, loss_train, acc_train, loss_val,
                               acc_val, time_dif, improved_str))

            session.run(model.optim, feed_dict=feed_dict)  # 运行优化
            total_batch += 1

            if total_batch - last_improved > require_improvement:
                # 验证集正确率长期不提升,提前结束训练
                print("No optimization for a long time, auto-stopping...")
                flag = True
                break  # 跳出循环
        if flag:  # 同上
            break