Beispiel #1
0
def main():
    start = time.time()
    query = sys.argv[1]
    glove = utils.load_glove()
    quest = utils.init_babi_deploy(
        os.path.join(
            os.path.join(
                os.path.join(os.path.dirname(os.path.realpath(__file__)),
                             'data'), 'corpus'), 'babi.txt'), query)

    dmn = dmn_basic.DMN_basic(babi_train_raw=quest,
                              babi_test_raw=[],
                              word2vec=glove,
                              word_vector_size=50,
                              dim=40,
                              mode='deploy',
                              answer_module='feedforward',
                              input_mask_mode="sentence",
                              memory_hops=5,
                              l2=0,
                              normalize_attention=False,
                              answer_vec='index',
                              debug=False)

    dmn.load_state(
        'states/dmn_basic.mh5.n40.bs10.babi1.epoch2.test1.20454.state')

    prediction = dmn.step_deploy()

    prediction = prediction[0][0]
    for ind in prediction.argsort()[::-1]:
        if ind < dmn.answer_size:
            print(dmn.ivocab[ind])
            break
    print('Time taken:', time.time() - start)
Beispiel #2
0
def init_config(task_id,
                restore=None,
                strong_supervision=None,
                l2_loss=None,
                num_runs=None):
    global config, word2vec
    if config.word2vec_init:
        if not word2vec:
            word2vec = utils.load_glove()
    else:
        word2vec = {}
    # config.strong_supervision = True
    config.l2 = l2_loss if l2_loss is not None else 0.001
    config.strong_supervision = strong_supervision if strong_supervision is not None else False
    num_runs = num_runs if num_runs is not None else '1'
    if task_id is not None:
        if ',' in task_id:
            tn = get_task_num(task_id.split(','), num_runs.split(','))
            loop_model(tn, restore)
        elif '-' in task_id:
            st_en = task_id.split('-')
            if len(st_en) < 2:
                raise ValueError(
                    "task id should be the forms of x,y,z,t or x-y or x")
            st = st_en[0]
            en = st_en[-1]
            tn = get_task_num(np.arange(st, en), num_runs.split(','))
            loop_model(tn, restore)
        else:
            config.task_id = task_id
            run_model(config, word2vec, int(num_runs[0]), restore)
Beispiel #3
0
def get_data(vocabs=""):
    print("==> Load Word Embedding")
    word_embedding = utils.load_glove(use_index=True)

    validation_data = []
    training_data = []
    if not vocabs:
        non_words = utils.load_file(p.non_word, False)
        for w in non_words:
            w_ = w.replace('\n', '').split(' ')
            validation_data.append(int(w_[-1]))
        training_data = utils.sub(range(len(word_embedding)), validation_data)
    else:
        vocabs_set = utils.load_file(vocabs)
        print("vc", len(vocabs_set))
        training_data = [w for _, w in vocabs_set.iteritems()]
        tm = range(len(word_embedding))
        validation_data = list(utils.sub(set(tm), set(training_data)))
        length = int(math.ceil(len(training_data) * 1.0 / p.compression_batch_size)) * p.compression_batch_size - len(training_data)
        print('before', 'vd', len(validation_data), 'td', len(training_data))
        if length:
            add_on = np.random.choice(validation_data, length)
            training_data += add_on.tolist()
            validation_data = utils.sub(set(validation_data), set(add_on))
        print('vd', len(validation_data), 'td', len(training_data))
    # utils.save_file(p.glove_path, training_data)
    return word_embedding, training_data, validation_data
Beispiel #4
0
def build_word_mappings(x_train, nlp, glove_dir):
    """Generate word to count, word to index, and word to vector mappings."""
    # Map each token to the # of times it appears in the corpus.
    tokens = [
        item for t in nlp(' '.join(x_train.values),
                          disable=['parser', 'tagger', 'ner'])
        for item in [t.text.strip()] if item
    ]
    w2count = dict(filter(lambda x: x[1] > 4, Counter(tokens).items()))
    save_pickle(tokens, 'tokens')
    save_pickle(w2count, 'w2count')

    # Construct w2idx dict and i2w list.
    w2idx = {
        k: i
        for i, (k, v) in enumerate(
            sorted(w2count.items(), key=lambda x: x[1], reverse=True), 2)
    }
    w2idx['<PAD>'] = 0
    w2idx['<UNK>'] = 1
    i2w = [k for k, v in sorted(w2idx.items(), key=lambda x: x[1])]
    save_pickle(w2idx, 'w2idx')
    save_pickle(i2w, 'i2w')

    # Load word vectors and filter to include words in our vocab.
    w2vec = load_glove(300, glove_dir)
    w2vec = {k: v for k, v in w2vec.items() if k in w2idx}
    save_pickle(w2vec, 'w2vec')
Beispiel #5
0
def build_embedding():
    vocabs = utils.load_file(vocabs_path, use_pickle=False)
    word_embedding = utils.load_glove()
    embedding = []
    for w in vocabs:
        w = w.replace('\n', '')
        if w in word_embedding:
            embedding.append(word_embedding[w])
    utils.save_file(embedding_path, embedding)
Beispiel #6
0
    def build_word_vocabulary(self, text_keys, word_count_threshold=0):
        """
        borrowed this implementation from @karpathy's neuraltalk.
        """
        print("Building word vocabulary starts.\n")
        all_sentences = []
        for k in text_keys:
            all_sentences.extend(self.raw_train[k])

        word_counts = {}
        for sentence in all_sentences:
            for w in self.line_to_words(sentence, eos=False, downcase=True):
                word_counts[w] = word_counts.get(w, 0) + 1

        # vocab = [w for w in word_counts if word_counts[w] >= word_count_threshold]
        vocab = [
            w for w in word_counts if word_counts[w] >= word_count_threshold
            and w not in self.word2idx.keys()
        ]
        print(
            "Vocabulary Size %d (<pad> <unk> <eos> excluded) using word_count_threshold %d.\n"
            % (len(vocab), word_count_threshold))

        # build index and vocabularies
        for idx, w in enumerate(vocab):
            self.word2idx[w] = idx + self.offset
            self.idx2word[idx + self.offset] = w

        print("word2idx size: %d, idx2word size: %d.\n" %
              (len(self.word2idx), len(self.idx2word)))
        # Make glove embedding.
        print("Loading glove embedding at path : %s.\n" %
              self.glove_embedding_path)
        glove_full = load_glove(self.glove_embedding_path)
        print("Glove Loaded, building word2idx, idx2word mapping.\n")

        glove_matrix = np.zeros([len(self.idx2word), self.embedding_dim])
        glove_keys = glove_full.keys()
        for i in tqdm(range(len(self.idx2word))):
            w = self.idx2word[i]
            w_embed = glove_full[w] if w in glove_keys else np.random.randn(
                self.embedding_dim) * 0.4
            glove_matrix[i, :] = w_embed
        self.vocab_embedding = glove_matrix
        print("vocab embedding size is :", glove_matrix.shape)

        print("Saving cache files at ./cache.\n")
        if not os.path.exists("./cache"):
            os.makedirs("./cache")
        pickle.dump(self.word2idx, open(self.word2idx_path, 'w'))
        pickle.dump(self.idx2word, open(self.idx2word_path, 'w'))
        pickle.dump(glove_matrix, open(self.vocab_embedding_path, 'w'))

        print("Building  vocabulary done.\n")
Beispiel #7
0
def main():
    glove = load_glove()
    vector = []
    with open("data/corpus/cricket.txt", 'r') as f:
        for line in f:
            line = line.strip()
            # print(line)
            l = get_word_vecs(line, glove)
            measure = centroid(l)
            vector.append((line, measure))
    query = sys.argv[1]
    query_measure = centroid(get_word_vecs(query, glove))
    print(get_most_relevant(vector, query_measure))
Beispiel #8
0
    def _init_embeddings(self):
        self.user_matrix = tf.get_variable(name='user_matrix',
                                           shape=[self.total_users, self.F],
                                           initializer=self.weight_initializer,
                                           dtype=tf.float32)

        self.item_matrix = tf.get_variable(name='item_matrix',
                                           shape=[self.total_items, self.F],
                                           initializer=self.weight_initializer,
                                           dtype=tf.float32)

        self.word_matrix = tf.get_variable(name='word_matrix',
                                           shape=[self.V, self.W],
                                           initializer=tf.constant_initializer(
                                               load_glove(self.V, self.W)),
                                           dtype=tf.float32)
Beispiel #9
0
def mi_mlps_ptb(args):
    # load data
    s_train, p_train = load_data('penn_treebank_dataset', 'train')
    s_dev, p_dev = load_data('penn_treebank_dataset', 'dev')
    s_test, p_test = load_data('penn_treebank_dataset', 'test')
    sentences = s_train + s_dev + s_test
    parsed = p_train + p_dev + p_test
    doc_id, sen_id, global_graph = construct_graph(parsed)
    s_train, p_train, s_dev, p_dev, s_test, p_test = [], [], [], [], [], []

    # load embeddings
    graph_emb = graph_embeddings(args, global_graph, doc_id, sen_id)
    bert_emb = load_glove(args, sentences)
    # bert_emb = load_elmo(args, sentences)

    # bert_emb_paths = bert_embeddings(args, sentences)
    # bert_emb = np.load(bert_emb_paths[0], allow_pickle=True)


    # initialize mi
    mir, mig, mib = [], [], []
    for l in range(args.bert_layers_num): mib.append([])
    for s in range(len(sentences)):
        mir.append(0.)
        mig.append(0.)
        for l in range(args.bert_layers_num):
            mib[l].append(0.)

    if args.baselines:
        print('3.1 start to calculate baselines of MI...')
        # calculate MI baselines
        for r in range(args.repeat):
            tmp_mir = mine_probe(args, graph_emb, bert_emb, len(sentences), 'lower')
            tmp_mig = mine_probe(args, graph_emb, bert_emb, len(sentences), 'upper')
            # get sum value
            mir = [mir[s]+tmp_mir[s] for s in range(len(tmp_mir))]
            mig = [mig[s]+tmp_mig[s] for s in range(len(tmp_mig))]

    print('3.2 start to calculate BERT hidden states of MI...')
    for r in range(args.repeat):
        tmp_mib = mine_probe(args, graph_emb, bert_emb, len(sentences), 
                                                    args.bert_layers_num - 1)
        mib[-1] = [mib[-1][s]+tmp_mib[s] for s in range(len(tmp_mib))]
    mib_layers = sum(mib[-1]) / (len(mib[-1]) * args.repeat)
    print('MI(G, Glove): {} |'.format(mib_layers))
Beispiel #10
0
def init_config(task_id):
    global config, word2vec, model
    if config.word2vec_init:
        if not word2vec:
            word2vec = utils.load_glove()
    else:
        word2vec = {}
    config.batch_size = 10
    config.strong_supervision = False
    # config.train_mode = False
    config.task_id = task_id
    if config.reset:
        tf.reset_default_graph()
    if model is None:
        model = Model(config, word2vec)
    else:
        model.config = config
        model.init_global()
    config.reset = True
    main(model)
Beispiel #11
0
def train():
    with tf.device('/cpu:0'):
        x_text, y = data_helpers.load_data_and_labels(FLAGS.train_path)

    # Build vocabulary
    # Example: x_text[3] = "A misty <e1>ridge</e1> uprises from the <e2>surge</e2>."
    # ['a misty ridge uprises from the surge <UNK> <UNK> ... <UNK>']
    # =>
    # [27 39 40 41 42  1 43  0  0 ... 0]
    # dimension = FLAGS.max_sentence_length
    vocab_processor = tf.contrib.learn.preprocessing.VocabularyProcessor(
        FLAGS.max_sentence_length)
    x = np.array(list(vocab_processor.fit_transform(x_text)))
    print("Text Vocabulary Size: {:d}".format(len(
        vocab_processor.vocabulary_)))
    print("x = {0}".format(x.shape))
    print("y = {0}".format(y.shape))
    print("")

    # Randomly shuffle data to split into train and test(dev)
    np.random.seed(10)
    shuffle_indices = np.random.permutation(np.arange(len(y)))
    x_shuffled = x[shuffle_indices]
    y_shuffled = y[shuffle_indices]

    # Split train/test set
    # TODO: This is very crude, should use cross-validation
    dev_sample_index = -1 * int(FLAGS.dev_sample_percentage * float(len(y)))
    x_train, x_dev = x_shuffled[:dev_sample_index], x_shuffled[
        dev_sample_index:]
    y_train, y_dev = y_shuffled[:dev_sample_index], y_shuffled[
        dev_sample_index:]
    print("Train/Dev split: {:d}/{:d}\n".format(len(y_train), len(y_dev)))

    with tf.Graph().as_default():
        session_conf = tf.ConfigProto(
            allow_soft_placement=FLAGS.allow_soft_placement,
            log_device_placement=FLAGS.log_device_placement)
        session_conf.gpu_options.allow_growth = FLAGS.gpu_allow_growth
        sess = tf.Session(config=session_conf)
        with sess.as_default():
            model = AttLSTM(sequence_length=x_train.shape[1],
                            num_classes=y_train.shape[1],
                            vocab_size=len(vocab_processor.vocabulary_),
                            embedding_size=FLAGS.embedding_dim,
                            hidden_size=FLAGS.hidden_size,
                            l2_reg_lambda=FLAGS.l2_reg_lambda)

            # Define Training procedure
            global_step = tf.Variable(0, name="global_step", trainable=False)
            optimizer = tf.train.AdadeltaOptimizer(FLAGS.learning_rate,
                                                   FLAGS.decay_rate, 1e-6)
            gvs = optimizer.compute_gradients(model.loss)
            capped_gvs = [(tf.clip_by_value(grad, -1.0, 1.0), var)
                          for grad, var in gvs]
            train_op = optimizer.apply_gradients(capped_gvs,
                                                 global_step=global_step)

            # Output directory for models and summaries
            timestamp = str(int(time.time()))
            out_dir = os.path.abspath(
                os.path.join(os.path.curdir, "runs", timestamp))
            print("Writing to {}\n".format(out_dir))

            # Summaries for loss and accuracy
            loss_summary = tf.summary.scalar("loss", model.loss)
            acc_summary = tf.summary.scalar("accuracy", model.accuracy)

            # Train Summaries
            train_summary_op = tf.summary.merge([loss_summary, acc_summary])
            train_summary_dir = os.path.join(out_dir, "summaries", "train")
            train_summary_writer = tf.summary.FileWriter(
                train_summary_dir, sess.graph)

            # Dev summaries
            dev_summary_op = tf.summary.merge([loss_summary, acc_summary])
            dev_summary_dir = os.path.join(out_dir, "summaries", "dev")
            dev_summary_writer = tf.summary.FileWriter(dev_summary_dir,
                                                       sess.graph)

            # Checkpoint directory. Tensorflow assumes this directory already exists so we need to create it
            checkpoint_dir = os.path.abspath(
                os.path.join(out_dir, "checkpoints"))
            checkpoint_prefix = os.path.join(checkpoint_dir, "model")
            if not os.path.exists(checkpoint_dir):
                os.makedirs(checkpoint_dir)
            saver = tf.train.Saver(tf.global_variables(),
                                   max_to_keep=FLAGS.num_checkpoints)

            # Write vocabulary
            vocab_processor.save(os.path.join(out_dir, "vocab"))

            # Initialize all variables
            sess.run(tf.global_variables_initializer())

            # Pre-trained word2vec
            if FLAGS.embedding_path:
                pretrain_W = utils.load_glove(FLAGS.embedding_path,
                                              FLAGS.embedding_dim,
                                              vocab_processor)
                sess.run(model.W_text.assign(pretrain_W))
                print("Success to load pre-trained word2vec model!\n")

            # Generate batches
            batches = data_helpers.batch_iter(list(zip(x_train, y_train)),
                                              FLAGS.batch_size,
                                              FLAGS.num_epochs)
            # Training loop. For each batch...
            best_f1 = 0.0  # For save checkpoint(model)
            for batch in batches:
                x_batch, y_batch = zip(*batch)
                # Train
                feed_dict = {
                    model.input_text: x_batch,
                    model.input_y: y_batch,
                    model.emb_dropout_keep_prob: FLAGS.emb_dropout_keep_prob,
                    model.rnn_dropout_keep_prob: FLAGS.rnn_dropout_keep_prob,
                    model.dropout_keep_prob: FLAGS.dropout_keep_prob
                }
                _, step, summaries, loss, accuracy = sess.run([
                    train_op, global_step, train_summary_op, model.loss,
                    model.accuracy
                ], feed_dict)
                train_summary_writer.add_summary(summaries, step)

                # Training log display
                if step % FLAGS.display_every == 0:
                    time_str = datetime.datetime.now().isoformat()
                    print("{}: step {}, loss {:g}, acc {:g}".format(
                        time_str, step, loss, accuracy))

                # Evaluation
                if step % FLAGS.evaluate_every == 0:
                    print("\nEvaluation:")
                    feed_dict = {
                        model.input_text: x_dev,
                        model.input_y: y_dev,
                        model.emb_dropout_keep_prob: 1.0,
                        model.rnn_dropout_keep_prob: 1.0,
                        model.dropout_keep_prob: 1.0
                    }
                    summaries, loss, accuracy, predictions = sess.run([
                        dev_summary_op, model.loss, model.accuracy,
                        model.predictions
                    ], feed_dict)
                    dev_summary_writer.add_summary(summaries, step)

                    time_str = datetime.datetime.now().isoformat()
                    f1 = f1_score(np.argmax(y_dev, axis=1),
                                  predictions,
                                  labels=np.array(range(1, 19)),
                                  average="macro")
                    print("{}: step {}, loss {:g}, acc {:g}".format(
                        time_str, step, loss, accuracy))
                    print(
                        "[UNOFFICIAL] (2*9+1)-Way Macro-Average F1 Score (excluding Other): {:g}\n"
                        .format(f1))

                    # Model checkpoint
                    if best_f1 < f1:
                        best_f1 = f1
                        path = saver.save(sess,
                                          checkpoint_prefix +
                                          "-{:.3g}".format(best_f1),
                                          global_step=step)
                        print("Saved model checkpoint to {}\n".format(path))
##necessary imports..
import tensorflow as tf
from utils import spacy_cleaner
from utils import load_glove
from utils import build_vocab
from utils import check_coverage
import pandas as pd
import nunpy as np
import matplotlib.pyplot as plt

##loading the dataset..
train = pd.read_csv("sentiment_analysis/train.csv")
test = pd.read_csv("sentiment_analysis/test.csv")

##Loading the glove vectors...
embedding_index = load_glove('sentiment_analysis/glove.6B.100d.txt')

##building vocab
train['clean_text'] = [spacy_cleaner(t) for t in train.tweet]
sentences = train['clean_text'].map(lambda z: z.split())

vocab_step1 = build_vocab(sentences)

##checking the coverage..
oov = check_coverage(vocab_step1, embedding_index)

##Inspection...
print(oov[:20])
##this shows top-20 out of vocabulary words which are to be modified..
##Modification is done to make full use of word_embeddings
def train():
    vocab = read_vocab(FLAGS.vocab_data)
    glove = load_glove("data/glove.6B.{}d.txt".format(FLAGS.emb_size),
                       FLAGS.emb_size, vocab)
    train = Dataset(filepath=FLAGS.train_data)
    valid = Dataset(filepath=FLAGS.valid_data)

    with tf.Graph().as_default():
        session_conf = tf.ConfigProto(
            allow_soft_placement=FLAGS.allow_soft_placement,
            log_device_placement=FLAGS.log_device_placement)
        sess = tf.Session(config=session_conf)
        with sess.as_default():
            dmn = DyMemNet(hid_size=FLAGS.hid_size,
                           vocab_size=len(vocab),
                           emb_size=FLAGS.emb_size,
                           num_classes=FLAGS.num_classes,
                           num_hops=FLAGS.num_hops,
                           pretrained_embs=glove,
                           l2_reg_lambda=FLAGS.l2_reg_lambda)

            # Define training procedure
            global_step = tf.Variable(0, name="global_step", trainable=False)
            optimizer = tf.train.AdamOptimizer(FLAGS.learning_rate)
            grads_and_vars = optimizer.compute_gradients(dmn.loss)
            capped_grads_and_vars = [
                (tf.clip_by_norm(grad, FLAGS.max_grad_norm), var)
                for grad, var in grads_and_vars
            ]
            train_op = optimizer.apply_gradients(capped_grads_and_vars,
                                                 global_step=global_step)

            acc, acc_op = tf.metrics.accuracy(labels=dmn.labels,
                                              predictions=dmn.predictions,
                                              name="metrics/acc")
            metrics_vars = tf.get_collection(tf.GraphKeys.LOCAL_VARIABLES,
                                             scope="metrics")
            metrics_init_op = tf.variables_initializer(var_list=metrics_vars)

            # Output directory for models and summaries
            timestamp = str(int(time.time()))
            out_dir = os.path.abspath(
                os.path.join(os.path.curdir, "runs", timestamp))
            print("writing to {}\n".format(out_dir))

            # Summaries for loss and accuracy
            loss_summary = tf.summary.scalar("loss", dmn.loss)
            acc_summary = tf.summary.scalar("accuracy", dmn.accuracy)

            # Train summaries
            train_summary_op = tf.summary.merge([loss_summary, acc_summary])
            train_summary_dir = os.path.join(out_dir, "summaries", "train")
            train_summary_writer = tf.summary.FileWriter(
                train_summary_dir, sess.graph)

            # Valid summaries
            valid_step = 0
            valid_summary_op = tf.summary.merge([loss_summary, acc_summary])
            valid_summary_dir = os.path.join(out_dir, "summaries", "valid")
            valid_summary_writer = tf.summary.FileWriter(
                valid_summary_dir, sess.graph)

            # Checkpoint directory. Tensorflow assumes this directory already exists so we need to create it
            checkpoint_dir = os.path.abspath(
                os.path.join(out_dir, "checkpoints"))
            checkpoint_prefix = os.path.join(checkpoint_dir, "model")
            if not os.path.exists(checkpoint_dir):
                os.makedirs(checkpoint_dir)
            saver = tf.train.Saver(tf.global_variables(),
                                   max_to_keep=FLAGS.num_checkpoints)

            # initialize all variables
            best_valid_acc = 0.0
            sess.run(tf.global_variables_initializer())
            sess.run(tf.local_variables_initializer())

            # training and validating loop
            for epoch in range(FLAGS.num_epochs):
                print('-' * 100)
                print('\n{}> epoch: {}\n'.format(
                    datetime.datetime.now().isoformat(), epoch))
                sess.run(metrics_init_op)
                # Training process
                for batch in train.bacth_iter(FLAGS.batch_size,
                                              desc="Training",
                                              shuffle=True):
                    labels, contexts, queries = zip(*batch)
                    contexts, num_sents = normalize(contexts)
                    feed_dict = {
                        dmn.context_placeholder: contexts,
                        dmn.query_placeholder: queries,
                        dmn.num_sents: num_sents,
                        dmn.labels: labels,
                        dmn.dropout_keep_prob: FLAGS.dropout_keep_prob
                    }
                    _, step, summaries, loss, accuracy, _ = sess.run([
                        train_op, global_step, train_summary_op, dmn.loss,
                        dmn.accuracy, acc_op
                    ], feed_dict)
                    train_summary_writer.add_summary(summaries, step)

                print("\ntraining accuracy = {:.2f}\n".format(
                    sess.run(acc) * 100))

                sess.run(metrics_init_op)
                # Validating process
                for batch in valid.bacth_iter(FLAGS.batch_size,
                                              desc="Validating",
                                              shuffle=False):
                    valid_step += 1
                    labels, contexts, queries = zip(*batch)
                    contexts, num_sents = normalize(contexts)
                    feed_dict = {
                        dmn.context_placeholder: contexts,
                        dmn.query_placeholder: queries,
                        dmn.num_sents: num_sents,
                        dmn.labels: labels,
                        dmn.dropout_keep_prob: 1.0
                    }
                    summaries, loss, accuracy, _ = sess.run(
                        [valid_summary_op, dmn.loss, dmn.accuracy, acc_op],
                        feed_dict)
                    valid_summary_writer.add_summary(summaries,
                                                     global_step=valid_step)

                valid_acc = sess.run(acc) * 100
                print("\nvalidating accuracy = {:.2f}\n".format(valid_acc))
                print("previous best validating accuracy = {:.2f}\n".format(
                    best_valid_acc))

                # model checkpoint
                if valid_acc > best_valid_acc:
                    best_valid_acc = valid_acc
                    path = saver.save(sess, checkpoint_prefix)
                    print("saved model checkpoint to {}\n".format(path))

            print("{} optimization finished!\n".format(
                datetime.datetime.now()))
            print("best validating accuracy = {:.2f}\n".format(best_valid_acc))
Beispiel #14
0
def main():
    print("Loading wordvecs...")
    if utils.exists("glove", "glove.840B.300d.txt", "gutenberg"):
        words, wordvecs = utils.load_glove("glove", "glove.840B.300d.txt", "gutenberg")
    else:
        words, wordvecs = utils.load_glove("glove", "glove.840B.300d.txt", "gutenberg",
                                           set(map(clean_word, gutenberg.words())))

    wordvecs_norm = wordvecs / np.linalg.norm(wordvecs, axis=1).reshape(-1, 1)

    print("Loading corpus...")
    # Convert corpus into normed wordvecs, replacing any words not in vocab with zero vector
    sentences = [[wordvecs_norm[words[clean_word(word)]] if clean_word(word) in words.keys() else np.zeros(WORD_DIM)
                  for word in sentence]
                 for sentence in gutenberg.sents()]

    print("Processing corpus...")
    # Pad sentences shorter than SEQUENCE_LENGTH with zero vectors and truncate sentences longer than SEQUENCE_LENGTH
    s_train = list(map(pad_or_truncate, sentences))

    np.random.shuffle(s_train)

    # Truncate to multiple of BATCH_SIZE
    s_train = s_train[:int(len(s_train) / BATCH_SIZE) * BATCH_SIZE]

    s_train_idxs = np.arange(len(s_train))

    print("Generating graph...")
    network = NlpGan(learning_rate=LEARNING_RATE, d_dim_state=D_DIM_STATE, g_dim_state=G_DIM_STATE,
                     dim_in=WORD_DIM, sequence_length=SEQUENCE_LENGTH)

    plotter = Plotter([2, 1], "Loss", "Accuracy")
    plotter.plot(0, 0, 0, 0)
    plotter.plot(0, 0, 0, 1)
    plotter.plot(0, 0, 1, 0)
    plotter.plot(0, 1, 1, 0)

    #d_vars = [var for var in tf.trainable_variables() if 'discriminator' in var.name]
    saver = tf.train.Saver()

    with tf.Session() as sess:
        #eval(sess, network, words, wordvecs_norm, saver)

        sess.run(tf.global_variables_initializer())
        #resume(sess, saver, plotter, "GAN_9_SEQUENCELENGTH_10", 59)

        d_loss, g_loss = 0.0, 0.0
        for epoch in range(0, 10000000):
            print("Epoch %d" % epoch)

            np.random.shuffle(s_train_idxs)
            for batch in range(int(len(s_train_idxs) / BATCH_SIZE)):
                # select next random batch of sentences
                s_batch_real = [s_train[x] for x in s_train_idxs[batch:batch + BATCH_SIZE]] # shape (BATCH_SIZE, SEQUENCE_LENGTH, WORD_DIM)

                # reshape to (SEQUENCE_LENGTH, BATCH_SIZE, WORD_DIM) while preserving sentence order
                s_batch_real = np.array(s_batch_real).swapaxes(0, 1)

                if d_loss - g_loss > MAX_LOSS_DIFF and False:
                    output_dict = sess.run(
                        network.get_fetch_dict('d_loss', 'd_train', 'g_loss'),
                        network.get_feed_dict(inputs=s_batch_real, input_dropout=D_KEEP_PROB)
                    )
                elif g_loss - d_loss > MAX_LOSS_DIFF and False:
                    output_dict = sess.run(
                        network.get_fetch_dict('d_loss', 'g_loss', 'g_train'),
                        network.get_feed_dict(inputs=s_batch_real, input_dropout=D_KEEP_PROB)
                    )
                else:
                    output_dict = sess.run(
                        network.get_fetch_dict('d_loss', 'd_train', 'g_loss', 'g_train'),
                        network.get_feed_dict(inputs=s_batch_real, input_dropout=D_KEEP_PROB,
                                              instance_variance=INSTANCE_VARIANCE)
                    )

                d_loss, g_loss = output_dict['d_loss'], output_dict['g_loss']

                if batch % 10 == 0:
                    print("Finished training batch %d / %d" % (batch, int(len(s_train) / BATCH_SIZE)))
                    print("Discriminator Loss: %f" % output_dict['d_loss'])
                    print("Generator Loss: %f" % output_dict['g_loss'])
                    plotter.plot(epoch + (batch / int(len(s_train) / BATCH_SIZE)), d_loss, 0, 0)
                    plotter.plot(epoch + (batch / int(len(s_train) / BATCH_SIZE)), g_loss, 0, 1)

                if batch % 100 == 0:
                    eval = sess.run(
                        network.get_fetch_dict('g_outputs', 'd_accuracy'),
                        network.get_feed_dict(inputs=s_batch_real, input_dropout=1.0,
                                              instance_variance=INSTANCE_VARIANCE)
                    )
                    # reshape g_outputs to (BATCH_SIZE, SEQUENCE_LENGTH, WORD_DIM) while preserving sentence order
                    generated = eval['g_outputs'].swapaxes(0, 1)
                    for sentence in generated[:3]:
                        for wordvec in sentence:
                            norm = np.linalg.norm(wordvec)
                            word, similarity = nearest_neighbor(words, wordvecs_norm, wordvec / norm)
                            print("{}({:4.2f})".format(word, similarity))
                        print('\n---------')
                    print("Total Accuracy: %f" % eval['d_accuracy'])
                    plotter.plot(epoch + (batch / int(len(s_train) / BATCH_SIZE)), eval['d_accuracy'], 1, 0)

            saver.save(sess, './checkpoints/{}.ckpt'.format(SAVE_NAME),
                       global_step=epoch)
            plotter.save(SAVE_NAME)
Beispiel #15
0
def main(restore=False):
    word_embedding = None

    word_embedding_file = 'squad/word_embedding.pkl'
    if u.check_file(word_embedding_file):
        word_embedding = utils.load_file(word_embedding_file)
    else:
        print("==> Load vectors ...")
        vocabs = None
        ft_vc = 'squad/vocabs_fine_tuned.pkl'
        if u.check_file(ft_vc):
            vocabs = u.load_file(ft_vc)
        else:
            raise ValueError("Please check vocabs fine-tuned file")
        word2vec = utils.load_glove()
        word_embedding = np.zeros((len(vocabs), p.embed_size))
        for index, v in enumerate(vocabs):
            if v in word2vec:
                word_embedding[index] = word2vec[v]
            else:
                word_embedding[index] = pr.create_vector(
                    v, word2vec, p.embed_size)
        del word2vec
        utils.save_file('squad/word_embedding.pkl', word_embedding)
        print("==> Done vectors ")

    # init word embedding
    contexts, contexts_len, questions, questions_len, answers, answers_len, start, end = utils.load_file(
        'squad/doc_train_idx.pkl')

    data_len = int(np.floor(0.9 * len(contexts)))

    train = contexts[:data_len], contexts_len[:data_len], questions[:data_len], \
            questions_len[:data_len], answers[:data_len], answers_len[:data_len], \
            start[:data_len], end[:data_len]
    dev = contexts[data_len:], contexts_len[data_len:], questions[data_len:], \
            questions_len[data_len:], answers[data_len:], answers_len[data_len:], \
            start[data_len:], end[data_len:]

    config = Config()
    config.strong_supervision = True
    model = ModelSquad(config)

    model.set_data(train, dev, word_embedding,
                   np.shape(questions)[1],
                   np.shape(contexts)[1],
                   np.shape(answers)[1], len(word_embedding))
    model.set_encoding()
    model.init_ops()

    # tf.reset_default_graph()
    print('Start training DMN on squad')
    # model.init_data_node()
    best_overall_val_loss = float('inf')
    # create model
    tconfig = tf.ConfigProto(allow_soft_placement=True)

    with tf.device('/%s' % p.device):
        print('==> initializing variables')
        init = tf.global_variables_initializer()
        saver = tf.train.Saver()

    with tf.Session(config=tconfig) as session:

        sum_dir = 'summaries/train_squad/' + time.strftime("%Y-%m-%d %H %M")
        if not utils.check_file(sum_dir):
            os.makedirs(sum_dir)
        train_writer = tf.summary.FileWriter(sum_dir, session.graph)

        session.run(init)

        if restore:
            print('==> restoring weights')
            saver.restore(session, 'weights/squad.weights')

        print('==> starting test')
        start = time.time()

        valid_loss, valid_accuracy = model.run_epoch(session, model.valid)
        print('Validation loss: {}'.format(valid_loss))
        print('Validation accuracy: {}'.format(valid_accuracy))

        print('Total time: {}'.format(time.time() - start))
Beispiel #16
0
def _define_global(glove_file):
    global glove
    glove = load_glove(glove_file, verbose=1)
Beispiel #17
0
def make_data():
    """data pre-processing"""
    global SD

    # load data
    print('loading data: Multi-Domain Sentiment Dataset v2')
    texts, s_labels, d_labels = load_mdsd(domains=DOMAINS)

    # build vocabulary for words
    print('building vocabulary')
    texts_tokens = []
    lens = []
    for text in texts:
        words = word_tokenize(text)
        for idx, word in enumerate(words):
            if word.isdigit():
                words[idx] = '<NUM>'  # replace number token with <NUM>
        texts_tokens.append(words)
        lens.append(len(words))
    maxlen = int(np.percentile(lens, 95))
    print('maxlen:', maxlen)
    counter = Counter()
    for words in texts_tokens:
        counter.update(words)
    word2index = {'<PAD>': 0, '<UNK>': 1}
    for idx, word_count in enumerate(counter.most_common(SD.max_words)):
        if word_count[1] >= SD.min_count:  # min_count
            word2index[word_count[
                0]] = idx + 2  # starting from 2, 0 used as <PAD>, 1 used as <OOV>
    n_words = len(word2index)
    print('n_words:', n_words)

    # data encode
    print('data encoding')
    seqs = []
    for words in texts_tokens:
        seqs.append([word2index.get(word, 1) for word in words])
    seqs_padded = pad_sequences(seqs,
                                maxlen=maxlen,
                                padding='post',
                                truncating='post')
    s_labels = np.asarray(s_labels, dtype=int)
    d_labels = np.asarray(d_labels, dtype=int)

    # domain & train/val/test split
    print('labeled data: domain & train/val/test splitting')
    X_train, ys_train, yd_train = [], [], []
    X_val, ys_val, yd_val = [], [], []
    X_test_byd, ys_test_byd, yd_test_byd = {}, {}, {}
    for d_id, d_name in enumerate(DOMAINS):
        print(d_name, 'splitting')
        seqs_padded_ofd = seqs_padded[(d_labels == d_id) & (s_labels != -1)]
        slabels_ofd = s_labels[(d_labels == d_id) & (s_labels != -1)]
        print(' * all:', seqs_padded_ofd.shape, slabels_ofd.shape)
        (X_train_ofd, X_val_ofd,
         X_test_ofd), (y_train_ofd, y_val_ofd,
                       y_test_ofd) = _tvt_split(seqs_padded_ofd, slabels_ofd)
        # train data (add this domain)
        X_train.extend(X_train_ofd)
        ys_train.extend(y_train_ofd)
        yd_train.extend([d_id] * len(X_train_ofd))
        # val data
        X_val.extend(X_val_ofd)
        ys_val.extend(y_val_ofd)
        yd_val.extend([d_id] * len(X_val_ofd))
        # test data
        X_test_byd[d_id] = X_test_ofd
        ys_test_byd[d_id] = to_categorical(y_test_ofd, num_classes=2)
        yd_test_byd[d_id] = to_categorical([d_id] * len(X_test_ofd),
                                           num_classes=len(DOMAINS))
    X_train = np.asarray(X_train, dtype='int')
    ys_train = to_categorical(ys_train, num_classes=2)
    yd_train = to_categorical(yd_train, num_classes=len(DOMAINS))
    X_val = np.asarray(X_val, dtype='int')
    ys_val = to_categorical(ys_val, num_classes=2)
    yd_val = to_categorical(yd_val, num_classes=len(DOMAINS))
    # combine test data from different domains
    X_test = np.concatenate([X_test_byd[idx] for idx in range(len(DOMAINS))])
    ys_test = np.concatenate([ys_test_byd[idx] for idx in range(len(DOMAINS))])
    yd_test = np.concatenate([yd_test_byd[idx] for idx in range(len(DOMAINS))])

    # shuffle train data
    indices = list(range(len(X_train)))
    np.random.shuffle(indices)
    X_train = X_train[indices]
    ys_train = ys_train[indices]
    yd_train = yd_train[indices]
    print('combined labeled data:')
    print('  - train:', X_train.shape, ys_train.shape, yd_train.shape)
    print('  - val:', X_val.shape, ys_val.shape, yd_val.shape)
    print('  - test:', X_test.shape, ys_test.shape, yd_test.shape)
    for d_id, d_name in enumerate(DOMAINS):
        print('  - test for {}:'.format(d_name[:3]), X_test_byd[d_id].shape,
              ys_test_byd[d_id].shape, yd_test_byd[d_id].shape)

    # embeddings
    print('loading word embeddings from glove')
    embeddings = load_glove(embedding_dim=SD.embed_dim,
                            desired=word2index.keys(),
                            corpus_size=SD.glove_corpus)
    print('processing embedding matrix')
    embedding_mat = get_embedding_mat(embeddings,
                                      word2index,
                                      SD.embed_dim,
                                      idx_from=2)
    SD.wv_weights = [embedding_mat]

    # inject data into SharedData for other functions
    SD.maxlen = maxlen
    SD.n_words = n_words
    SD.word2index = word2index
    SD.X_train, SD.ys_train, SD.yd_train = X_train, ys_train, yd_train
    SD.X_val, SD.ys_val, SD.yd_val = X_val, ys_val, yd_val
    SD.X_test, SD.ys_test, SD.yd_test = X_test, ys_test, yd_test
    SD.X_test_byd, SD.ys_test_byd, SD.yd_test_byd = X_test_byd, ys_test_byd, yd_test_byd
Beispiel #18
0
def mi_bert_amr(args, uncontext=False):
    # load data & embeddings
    s_train = load_data('amr_dataset', 'train')
    s_dev = load_data('amr_dataset', 'dev')
    s_test = load_data('amr_dataset', 'test')
    amr_s = s_train + s_dev + s_test
    print(amr_s[45672], amr_s[599])
    graph_emb, bert_emb_paths = get_embeddings(args, amr_s)
    # bert_emb_paths = load_elmos(args, amr_s, dataset='amr')
    s_num = len(graph_emb)
    if uncontext:
        bert_emb = load_glove(args, amr_s, dataset='amr')
        # bert_emb = load_elmo(args, amr_s, dataset='amr')
    else:
        bert_emb = np.load(bert_emb_paths[0], allow_pickle=True)

    print('2.1 start to calculate baselines of MI...')
    # initialize mi
    mir, mig, mib = [], [], []
    for l in range(args.bert_layers_num): mib.append([])

    if args.baselines:
        print('3.1 start to calculate baselines of MI...')
        # calculate MI baselines
        for r in range(args.repeat):
            tmp_mir = mine_probe(args, graph_emb, bert_emb, s_num, 'lower')
            tmp_mig = mine_probe(args, graph_emb, bert_emb, s_num, 'upper')
            # get sum value
            if len(mir) == 0: 
                mir = tmp_mir
            else:
                mir = [mir[s]+tmp_mir[s] for s in range(len(tmp_mir))]
            if len(mig) == 0:
                mig = tmp_mig
            else:
                mig = [mig[s]+tmp_mig[s] for s in range(len(tmp_mig))]

    print('2.2 start to calculate BERT hidden states of MI...')
    # calculate MI of BERT
    if uncontext:
        for r in range(args.repeat):
            tmp_mib = mine_probe(args, graph_emb, bert_emb, s_num, args.bert_layers_num-1)
            if len(mib[-1]) == 0: 
                mib[-1] = tmp_mib
            else:
                mib[-1] = [mib[-1][s]+tmp_mib[s] for s in range(len(tmp_mib))]
        mib_layers = sum(mib[-1]) / (len(mib[-1]) * args.repeat)
        print('MI(G, Glove): {} |'.format(mib_layers))
    else:
        for l in range(args.bert_layers_num):
            bert_emb = np.load(bert_emb_paths[l], allow_pickle=True)
            for r in range(args.repeat):
                tmp_mib = mine_probe(args, graph_emb, bert_emb, s_num, l)
                if len(mib[l]) == 0:
                    mib[l] = tmp_mib
                else:
                    mib[l] = [mib[l][s]+tmp_mib[s] for s in range(len(tmp_mib))]

        # compute average values for all results
        mir = [mi/args.repeat for mi in mir]
        mig = [mi/args.repeat for mi in mig]
        for l in range(args.bert_layers_num):
            mib[l] = [mi/args.repeat for mi in mib[l]]

        # print general results
        results = {'lower:': mir, 'upper': mig, 'bert': mib}
        print('\n', results, '\n')
        mib_layers = [sum(mib[l])/len(mib[l]) for l in range(len(mib)) if len(mib)]
        print('MI(G, R): {} | MI(G, G): {}| MI(G, BERT): {} |'.format(sum(
                                                                    mir)/len(mir),
                                                                    sum(mig)/len(mig),
                                                                    mib_layers))

    return
Beispiel #19
0
def train():
    word_dict = load_vocab(FLAGS.vocab_data)
    glove = load_glove("../glove.6B.{}d.txt".format(FLAGS.embedding_size),
                       FLAGS.embedding_size, word_dict)
    train = Dataset(filepath=FLAGS.train_data,
                    num_class=FLAGS.num_class,
                    sequence_length=FLAGS.sequence_length)
    valid = Dataset(filepath=FLAGS.valid_data,
                    num_class=FLAGS.num_class,
                    sequence_length=FLAGS.sequence_length)

    with tf.Graph().as_default():
        session_conf = tf.compat.v1.ConfigProto(
            allow_soft_placement=FLAGS.allow_soft_placement,
            log_device_placement=FLAGS.log_device_placement)
        sess = tf.compat.v1.Session(config=session_conf)
        with sess.as_default():
            rcnn = TextRCNN(vocab_size=len(word_dict),
                            embedding_size=FLAGS.embedding_size,
                            sequence_length=FLAGS.sequence_length,
                            num_class=FLAGS.num_class,
                            cell_type=FLAGS.cell_type,
                            hidden_size=FLAGS.hidden_size,
                            pretrained_embeddings=glove,
                            l2_reg_lambda=FLAGS.l2_reg_lambda)

            # Define training procedure
            global_step = tf.compat.v1.Variable(0,
                                                name="global_step",
                                                trainable=False)
            train_op = tf.compat.v1.train.AdamOptimizer(
                FLAGS.learning_rate).minimize(rcnn.loss,
                                              global_step=global_step)
            acc, acc_op = tf.compat.v1.metrics.accuracy(
                labels=rcnn.labels,
                predictions=rcnn.predictions,
                name="metrics/acc")
            metrics_vars = tf.compat.v1.get_collection(
                tf.compat.v1.GraphKeys.LOCAL_VARIABLES, scope="metrics")
            metrics_init_op = tf.compat.v1.variables_initializer(
                var_list=metrics_vars)

            # Output directory for models and summaries
            timestamp = str(int(time.time()))
            out_dir = os.path.abspath(
                os.path.join(os.path.curdir, "runs", timestamp))
            print("writing to {}\n".format(out_dir))

            # Summaries for loss and accuracy
            loss_summary = tf.compat.v1.summary.scalar("loss", rcnn.loss)
            acc_summary = tf.compat.v1.summary.scalar("accuracy",
                                                      rcnn.accuracy)

            # Train summaries
            train_summary_op = tf.compat.v1.summary.merge(
                [loss_summary, acc_summary])
            train_summary_dir = os.path.join(out_dir, "summaries", "train")
            train_summary_writer = tf.compat.v1.summary.FileWriter(
                train_summary_dir, sess.graph)

            # Valid summaries
            valid_step = 0
            valid_summary_op = tf.compat.v1.summary.merge(
                [loss_summary, acc_summary])
            valid_summary_dir = os.path.join(out_dir, "summaries", "valid")
            valid_summary_writer = tf.compat.v1.summary.FileWriter(
                valid_summary_dir, sess.graph)

            # Checkpoint directory. Tensorflow assumes this directory already exists so we need to create it
            checkpoint_dir = os.path.abspath(
                os.path.join(out_dir, "checkpoints"))
            checkpoint_prefix = os.path.join(checkpoint_dir, "model")
            if not os.path.exists(checkpoint_dir):
                os.makedirs(checkpoint_dir)
            saver = tf.compat.v1.train.Saver(tf.compat.v1.global_variables(),
                                             max_to_keep=FLAGS.num_checkpoints)

            # initialize all variables
            best_valid_acc = 0.0
            sess.run(tf.compat.v1.global_variables_initializer())
            sess.run(tf.compat.v1.local_variables_initializer())

            # training and validating loop
            for epoch in range(FLAGS.num_epoch):
                print('-' * 100)
                print('\n{}> epoch: {}\n'.format(
                    datetime.datetime.now().isoformat(), epoch))
                sess.run(metrics_init_op)
                # Training process
                for batch in train.bacth_iter(FLAGS.batch_size,
                                              desc="Training",
                                              shuffle=True):
                    labels, docs = zip(*batch)
                    padded_docs, _, _ = vectorize(docs, FLAGS.sequence_length)
                    feed_dict = {
                        rcnn.inputs: padded_docs,
                        rcnn.labels: labels,
                        rcnn.dropout_keep_prob: FLAGS.dropout_keep_prob
                    }
                    _, step, summaries, loss, accuracy, _ = sess.run([
                        train_op, global_step, train_summary_op, rcnn.loss,
                        rcnn.accuracy, acc_op
                    ], feed_dict)
                    train_summary_writer.add_summary(summaries, step)

                print("\ntraining accuracy = {:.2f}\n".format(
                    sess.run(acc) * 100))

                sess.run(metrics_init_op)
                # Validating process
                for batch in valid.bacth_iter(FLAGS.batch_size,
                                              desc="Validating",
                                              shuffle=False):
                    valid_step += 1
                    labels, docs = zip(*batch)
                    padded_docs, _, _ = vectorize(docs, FLAGS.sequence_length)
                    feed_dict = {
                        rcnn.inputs: padded_docs,
                        rcnn.labels: labels,
                        rcnn.dropout_keep_prob: 1.0
                    }
                    summaries, loss, accuracy, _ = sess.run(
                        [valid_summary_op, rcnn.loss, rcnn.accuracy, acc_op],
                        feed_dict)
                    valid_summary_writer.add_summary(summaries,
                                                     global_step=valid_step)

                valid_acc = sess.run(acc) * 100
                print("\nvalidating accuracy = {:.2f}\n".format(valid_acc))

                # model checkpoint
                if valid_acc > best_valid_acc:
                    best_valid_acc = valid_acc
                    print("current best validating accuracy = {:.2f}\n".format(
                        best_valid_acc))
                    path = saver.save(sess, checkpoint_prefix)
                    print("saved model checkpoint to {}\n".format(path))

            print("{} optimization finished!\n".format(
                datetime.datetime.now()))
            print("best validating accuracy = {:.2f}\n".format(best_valid_acc))
Beispiel #20
0
def mi_bert_ptb(args, npeet=False, uncontext=False):
    # load data
    s_train, p_train = load_data('penn_treebank_dataset', 'train')
    s_dev, p_dev = load_data('penn_treebank_dataset', 'dev')
    s_test, p_test = load_data('penn_treebank_dataset', 'test')
    sentences = s_train + s_dev + s_test
    parsed = p_train + p_dev + p_test
    doc_id, sen_id, global_graph = construct_graph(parsed)
    s_train, p_train, s_dev, p_dev, s_test, p_test = [], [], [], [], [], []

    # load embeddings
    graph_emb = graph_embeddings(args, global_graph, doc_id, sen_id)
    if uncontext:
        bert_emb = load_glove(args, sentences)
        # bert_emb = load_elmo(args, sentences)
    else:
        bert_emb_paths = bert_embeddings(args, sentences)
        # bert_emb_paths = load_elmos(args, sentences)
        bert_emb = np.load(bert_emb_paths[0], allow_pickle=True)

    # initialize mi
    mir, mig, mib = [], [], []
    for l in range(args.bert_layers_num): mib.append([])
    for s in range(len(sentences)):
        mir.append(0.)
        mig.append(0.)
        for l in range(args.bert_layers_num):
            mib[l].append(0.)

    if args.baselines:
        print('3.1 start to calculate baselines of MI...')
        # calculate MI baselines
        for r in range(args.repeat):
            tmp_mir = mine_probe(args, graph_emb, bert_emb, len(sentences), 'lower')
            tmp_mig = mine_probe(args, graph_emb, bert_emb, len(sentences), 'upper')
            # get sum value
            mir = [mir[s]+tmp_mir[s] for s in range(len(tmp_mir))]
            mig = [mig[s]+tmp_mig[s] for s in range(len(tmp_mig))]

    print('3.2 start to calculate BERT hidden states of MI...')
    if uncontext:
        for r in range(args.repeat):
            tmp_mib = mine_probe(args, graph_emb, bert_emb, len(sentences), 
                                                        args.bert_layers_num - 1)
            mib[-1] = [mib[-1][s]+tmp_mib[s] for s in range(len(tmp_mib))]
        mib_layers = sum(mib[-1]) / (len(mib[-1]) * args.repeat)
        print('MI(G, Glove): {} |'.format(mib_layers))
    else:
        # calculate MI of BERT
        for l in range(args.bert_layers_num):
            bert_emb = np.load(bert_emb_paths[l], allow_pickle=True)
            for r in range(args.repeat):
                tmp_mib = mine_probe(args, graph_emb, bert_emb, len(sentences), l)
                mib[l] = [mib[l][s]+tmp_mib[s] for s in range(len(tmp_mib))]
        # compute average values for all results
        mir = [mi/args.repeat for mi in mir]
        mig = [mi/args.repeat for mi in mig]
        for l in range(args.bert_layers_num):
            mib[l] = [mi/args.repeat for mi in mib[l]]
        mib_layers = [sum(mib[l])/len(mib[l]) for l in range(len(mib))]

        # print general results
        results = {'lower:': mir, 'upper': mig, 'bert': mib}
        # print('\n', results, '\n')
        
        print('MI(G, R): {} | MI(G, G): {}| MI(G, BERT): {} |'.format(sum(
                                                                    mir)/len(mir),
                                                                    sum(mig)/len(mig),
                                                                    mib_layers))

    return
Beispiel #21
0
def main(_):
    vocab = read_vocab('data/yelp-2015-w2i.pkl')
    glove_embs = load_glove('glove.6B.{}d.txt'.format(FLAGS.emb_size),
                            FLAGS.emb_size, vocab)
    data_reader = DataReader(train_file='data/yelp-2015-train.pkl',
                             dev_file='data/yelp-2015-dev.pkl',
                             test_file='data/yelp-2015-test.pkl')

    config = tf.ConfigProto(allow_soft_placement=FLAGS.allow_soft_placement)
    with tf.Session(config=config) as sess:
        model = Model(cell_dim=FLAGS.cell_dim,
                      att_dim=FLAGS.att_dim,
                      vocab_size=len(vocab),
                      emb_size=FLAGS.emb_size,
                      num_classes=FLAGS.num_classes,
                      dropout_rate=FLAGS.dropout_rate,
                      pretrained_embs=glove_embs)

        loss = loss_fn(model.labels, model.logits)
        train_op, global_step = train_fn(loss)
        batch_acc, total_acc, acc_update, metrics_init = eval_fn(
            model.labels, model.logits)
        summary_op = tf.summary.merge_all()
        sess.run(tf.global_variables_initializer())

        train_writer.add_graph(sess.graph)
        saver = tf.train.Saver(max_to_keep=FLAGS.num_checkpoints)

        print('\n{}> Start training'.format(datetime.now()))

        epoch = 0
        valid_step = 0
        test_step = 0
        train_test_prop = len(data_reader.train_data) / len(
            data_reader.test_data)
        test_batch_size = int(FLAGS.batch_size / train_test_prop)
        best_acc = float('-inf')

        while epoch < FLAGS.num_epochs:
            epoch += 1
            print('\n{}> Epoch: {}'.format(datetime.now(), epoch))

            sess.run(metrics_init)
            for batch_docs, batch_labels in data_reader.read_train_set(
                    FLAGS.batch_size, shuffle=True):
                _step, _, _loss, _acc, _ = sess.run(
                    [global_step, train_op, loss, batch_acc, acc_update],
                    feed_dict=model.get_feed_dict(batch_docs,
                                                  batch_labels,
                                                  training=True))
                if _step % FLAGS.display_step == 0:
                    _summary = sess.run(summary_op,
                                        feed_dict=model.get_feed_dict(
                                            batch_docs, batch_labels))
                    train_writer.add_summary(_summary, global_step=_step)
            print('Training accuracy = {:.2f}'.format(
                sess.run(total_acc) * 100))

            sess.run(metrics_init)
            for batch_docs, batch_labels in data_reader.read_valid_set(
                    test_batch_size):
                _loss, _acc, _ = sess.run([loss, batch_acc, acc_update],
                                          feed_dict=model.get_feed_dict(
                                              batch_docs, batch_labels))
                valid_step += 1
                if valid_step % FLAGS.display_step == 0:
                    _summary = sess.run(summary_op,
                                        feed_dict=model.get_feed_dict(
                                            batch_docs, batch_labels))
                    valid_writer.add_summary(_summary, global_step=valid_step)
            print('Validation accuracy = {:.2f}'.format(
                sess.run(total_acc) * 100))

            sess.run(metrics_init)
            for batch_docs, batch_labels in data_reader.read_test_set(
                    test_batch_size):
                _loss, _acc, _ = sess.run([loss, batch_acc, acc_update],
                                          feed_dict=model.get_feed_dict(
                                              batch_docs, batch_labels))
                test_step += 1
                if test_step % FLAGS.display_step == 0:
                    _summary = sess.run(summary_op,
                                        feed_dict=model.get_feed_dict(
                                            batch_docs, batch_labels))
                    test_writer.add_summary(_summary, global_step=test_step)
            test_acc = sess.run(total_acc) * 100
            print('Testing accuracy = {:.2f}'.format(test_acc))

            if test_acc > best_acc:
                best_acc = test_acc
                saver.save(sess, FLAGS.checkpoint_dir)
            print('Best testing accuracy = {:.2f}'.format(test_acc))

    print("{} Optimization Finished!".format(datetime.now()))
    print('Best testing accuracy = {:.2f}'.format(best_acc))
Beispiel #22
0
def main(restore=False):
    word_embedding = None

    word_embedding_file = 'squad/word_embedding.pkl'
    if u.check_file(word_embedding_file):
        word_embedding = utils.load_file(word_embedding_file)
    else:
        print("==> Load vectors ...")
        vocabs = None
        ft_vc = 'squad/vocabs_fine_tuned.pkl'
        if u.check_file(ft_vc):
            vocabs = u.load_file(ft_vc)
        else:
            raise ValueError("Please check vocabs fine-tuned file")
        word2vec = utils.load_glove()
        word_embedding = np.zeros((len(vocabs), p.embed_size))
        for index, v in enumerate(vocabs):
            if v in word2vec:
                word_embedding[index] = word2vec[v]
            else:
                word_embedding[index] = pr.create_vector(
                    v, word2vec, p.embed_size)
        del word2vec
        utils.save_file('squad/word_embedding.pkl', word_embedding)
        print("==> Done vectors ")

    # init word embedding
    contexts, contexts_len, questions, questions_len, answers, answers_len, start, end = utils.load_file(
        'squad/doc_train_idx%s.pkl' % p.doc_suffix)

    data_len = int(np.floor(0.9 * len(contexts)))

    train = contexts[:data_len], contexts_len[:data_len], questions[:data_len], \
            questions_len[:data_len], start[:data_len], end[:data_len]
    dev = contexts[data_len:], contexts_len[data_len:], questions[data_len:], \
            questions_len[data_len:], start[data_len:], end[data_len:]

    config = Config()
    # config.strong_supervision = True
    model = SquadSkim(config)

    model.set_data(train, dev, word_embedding,
                   np.shape(contexts)[1],
                   np.shape(questions)[1])
    # model.set_encoding()
    model.init_ops()

    # tf.reset_default_graph()
    print('Start training DMN on squad')
    # model.init_data_node()
    best_overall_val_loss = float('inf')
    # create model
    tconfig = tf.ConfigProto(allow_soft_placement=True)

    with tf.device('/%s' % p.device):
        print('==> initializing variables')
        init = tf.global_variables_initializer()
        saver = tf.train.Saver()

    with tf.Session(config=tconfig) as session:

        sum_dir = 'summaries/train_squad/' + time.strftime("%Y-%m-%d %H %M")
        if not utils.check_file(sum_dir):
            os.makedirs(sum_dir)
        train_writer = tf.summary.FileWriter(sum_dir, session.graph)

        session.run(init)

        best_val_epoch = 0
        prev_epoch_loss = float('inf')
        best_val_loss = float('inf')
        best_val_accuracy = 0.0

        if restore:
            print('==> restoring weights')
            saver.restore(session, 'weights/squad.weights')

        print('==> starting training')
        for epoch in range(config.max_epochs):
            print('Epoch {}'.format(epoch))
            start = time.time()

            train_loss, train_accuracy = model.run_epoch(
                session,
                model.train,
                epoch,
                train_writer,
                train_op=model.train_step,
                train=True)
            valid_loss, valid_accuracy = model.run_epoch(session, model.valid)
            print('Training loss: {}'.format(train_loss))
            print('Validation loss: {}'.format(valid_loss))
            print('Training accuracy: {}'.format(train_accuracy))
            print('Validation accuracy: {}'.format(valid_accuracy))

            if valid_loss < best_val_loss:
                best_val_loss = valid_loss
                best_val_epoch = epoch
                if best_val_loss < best_overall_val_loss:
                    print('Saving weights')
                    best_overall_val_loss = best_val_loss
                    saver.save(session, 'weights/squad.weights')
            # anneal
            if train_loss > prev_epoch_loss * model.config.anneal_threshold:
                model.config.lr /= model.config.anneal_by
                print('annealed lr to %f' % model.config.lr)

            if best_val_accuracy < valid_accuracy:
                best_val_accuracy = valid_accuracy

            prev_epoch_loss = train_loss

            if epoch - best_val_epoch > config.early_stopping:
                break
            print('Total time: {}'.format(time.time() - start))

        print('Best validation accuracy:', best_val_accuracy)
def main(_):

    # load the word_to_index encoded vocabulary
    vocab = read_vocab(FLAGS.vocab)

    # create embedding matrix of size (vocab,emb_size)
    glove_embs = load_glove(FLAGS.embedding_file, FLAGS.emb_size, vocab)

    print('input embeddings shape: ', glove_embs.shape)

    # read data
    data_reader = DataReader(train_file=FLAGS.train_data_file,
                             dev_file=FLAGS.dev_data_file,
                             test_file=FLAGS.test_data_file,
                             num_classes=FLAGS.num_classes)

    config = tf.ConfigProto(allow_soft_placement=FLAGS.allow_soft_placement)

    tf.reset_default_graph()

    sess = tf.Session(config=config)

    model = Model(cell_dim=FLAGS.cell_dim,
                  att_dim=FLAGS.att_dim,
                  vocab_size=len(vocab),
                  emb_size=FLAGS.emb_size,
                  num_classes=FLAGS.num_classes,
                  dropout_rate=FLAGS.dropout_rate,
                  pretrained_embs=glove_embs)

    # calculate loss
    loss = loss_fn(model.labels, model.logits)
    total_loss, loss_update = tf.metrics.mean(loss, name='metrics/losss')

    # calculates gradients
    train_op, global_step = train_fn(loss)

    # calculates metrics and merges all
    batch_acc, total_acc, acc_update, metrics_init = eval_fn(
        model.labels, model.logits)
    summary_op = tf.summary.merge_all()

    summary_total = tf.summary.merge([
        tf.summary.scalar('total_batch_accuracy', total_acc),
        tf.summary.scalar("total_batch_loss", total_loss)
    ])

    sess.run(tf.global_variables_initializer())

    # The graph described by sess.graph will be displayed by TensorBoard
    train_writer.add_graph(sess.graph)

    # save all variables
    saver = tf.train.Saver(max_to_keep=FLAGS.num_checkpoints)

    print('\n{}> Start training'.format(datetime.now()))

    epoch = 0
    valid_step = 0
    test_step = 0
    train_test_prop = len(data_reader.train_data) / len(data_reader.test_data)

    test_batch_size = int(FLAGS.batch_size / train_test_prop)
    best_acc = float('-inf')

    while epoch < FLAGS.num_epochs:
        epoch += 1
        print('\n{}> Epoch: {}'.format(datetime.now(), epoch))

        # we newly initialize metrics tensors each epoch, each evaluation
        sess.run(metrics_init)

        # each data point/doc in batch contains a list of sentences, encoded with index
        for batch_docs, batch_labels in data_reader.read_train_set(
                FLAGS.batch_size, shuffle=True):

            # do a batch
            _step, _, _loss, _acc = sess.run(
                [global_step, train_op, loss, batch_acc],
                feed_dict=model.get_feed_dict(batch_docs,
                                              batch_labels,
                                              training=True))

            # each display_step steps evaluate metric variables and add to train_writer, training is false to disables dropout
            if _step % FLAGS.display_step == 0:  #
                _summary, _loss_save, _acc_save, _, _ = sess.run(
                    [summary_op, loss, batch_acc, acc_update, loss_update],
                    feed_dict=model.get_feed_dict(batch_docs, batch_labels))
                train_writer.add_summary(_summary, global_step=_step)
        last_step_epoch = _step

        # evaluate avg batch metrics
        total_acc_train, total_loss_train, summary_total_train = sess.run(
            [total_acc, total_loss, summary_total])
        train_writer.add_summary(summary_total_train,
                                 global_step=last_step_epoch)
        mlflow.log_metrics(
            {
                'avg_batch_accuracy': total_acc_train,
                'avg_batch_loss': total_loss_train
            },
            step=last_step_epoch)
        print('Avg training accuracy = {:.2f}'.format(total_acc_train))
        print('Avg training loss = {:.2f}'.format(total_loss_train))

        # we newly initialize metrics tensors each epoch, each evaluation
        sess.run(metrics_init)

        # for each epoch calculate metrics for valid set
        for batch_docs, batch_labels in data_reader.read_valid_set(
                test_batch_size):
            _loss, _acc, _, _ = sess.run(
                [loss, batch_acc, acc_update, loss_update],
                feed_dict=model.get_feed_dict(batch_docs, batch_labels))

        total_acc_valid, total_loss_valid, summary_total_valid = sess.run(
            [total_acc, total_loss, summary_total])
        valid_writer.add_summary(summary_total_train,
                                 global_step=last_step_epoch)
        mlflow.log_metrics(
            {
                'avg_valid_accuracy': total_acc_valid,
                'avg_valid_loss': total_loss_valid
            },
            step=last_step_epoch)
        print('Avg validation accuracy = {:.2f}'.format(total_acc_valid))
        print('Avg validation loss = {:.2f}'.format(total_loss_valid))

        # we newly initialize metrics tensors each epoch, each evaluation
        sess.run(metrics_init)

        # for each epoch calculate metrics for test set
        for batch_docs, batch_labels in data_reader.read_test_set(
                test_batch_size):
            _loss, _acc, _, _ = sess.run(
                [loss, batch_acc, acc_update, loss_update],
                feed_dict=model.get_feed_dict(batch_docs, batch_labels))

        total_acc_test, total_loss_test, summary_total_test = sess.run(
            [total_acc, total_loss, summary_total])
        test_writer.add_summary(summary_total_test,
                                global_step=last_step_epoch)
        mlflow.log_metrics(
            {
                'avg_test_accuracy': total_acc_test,
                'avg_test_loss': total_loss_test
            },
            step=last_step_epoch)
        print('Avg validation accuracy = {:.2f}'.format(total_acc_test))
        print('Avg validation loss = {:.2f}'.format(total_loss_test))

        # keep track of best test accuracy, if epoch improved, save all variables
        if total_acc_test > best_acc:
            best_acc = total_acc_test
            saver.save(sess, FLAGS.checkpoint_dir)
        print('Best testing accuracy = {:.2f}'.format(test_acc))

        print("{} Optimization Finished!".format(datetime.now()))
        print('Best testing accuracy = {:.2f}'.format(best_acc))
Beispiel #24
0
maxlen = 50

tokenizer = Tokenizer(num_words=max_features)
tokenizer.fit_on_texts(texts)
word_index = tokenizer.word_index
sequences = tokenizer.texts_to_sequences(texts)
sequences = pad_sequences(sequences, maxlen=maxlen)

tokenizer_tag = Tokenizer()
tokenizer_tag.fit_on_texts(tags)
tags = tokenizer_tag.texts_to_sequences(tags)
tags = np.array(list((map(lambda x: x[0], tags))))
tags = to_categorical(tags)

# load embedding
emb_matrix = load_glove(word_index)

# Get test/problem/treatment matrix: info_matrix
# info_matrix: (m,3,maxlen), which uses one-hot to indicate the entity property of the token
targets = ['test_info', 'problem_info', 'treatment_info']
info_matrix = np.zeros((sequences.shape[0], 3, maxlen))

for i, target in enumerate(targets):
    for k, j in train_df[target].str.extract('(\d+)\|(\d+)').iterrows():
        if not pd.isnull(j[0]):
            info_matrix[k, i, int(j[0]) - 1:int(j[1])] = 1

# Shuffle the data
np.random.seed(2019)
index = np.random.permutation(len(sequences))
train_dataset = entity_linking_v3(train_part, t)
valid_dataset = entity_linking_v3(valid_part, t)

batch_size = 1

# 准备embedding数据
embedding_file = 'embedding/miniembedding_baike_link.npy'
#embedding_file = 'embedding/miniembedding_engineer_qq_att.npy'

if os.path.exists(embedding_file):
    embedding_matrix = np.load(embedding_file)
else:
    #embedding = '/home/zhukaihua/Desktop/nlp/embedding/baike'
    embedding = '/home/zhu/Desktop/word_embedding/sgns.baidubaike.bigram-char'
    #embedding = '/home/zhukaihua/Desktop/nlp/embedding/Tencent_AILab_ChineseEmbedding.txt'
    embedding_matrix = load_glove(embedding, t.num_words + 100, t)
    np.save(embedding_file, embedding_matrix)

train_batch_size = 1
valid_batch_size = 1

model = EntityLink_v3(vocab_size=embedding_matrix.shape[0],
                      encoder_size=128,
                      dropout=0.5,
                      init_embedding=embedding_matrix)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

train_dataloader = DataLoader(train_dataset,
                              collate_fn=collate_fn_linking_v3,
Beispiel #26
0
from model_bi_attention import SentenceSelector
import options

import pdb

options = options.CoqaOptions()

torch.cuda.set_device(0)
device = torch.device('cuda:{}'.format(options.gpu))

print("Reading data pickles")

train_data = utils.unpickler(options.data_pkl_path, options.train_pkl_name)
dev_data = utils.unpickler(options.data_pkl_path, options.dev_pkl_name)

glove = utils.load_glove(options.data_pkl_path, options.glove_store)

# pdb.set_trace()

print("Building model")

model = SentenceSelector(options, glove, device)

model.to(device)

print("===============================")
print("Model:")
print(model)
print("===============================")

criterion = nn.CrossEntropyLoss()
def train():
    with tf.device('/cpu:0'):
        train_text, train_y, train_pos1, train_pos2, train_x_text_clean, train_sentence_len = data_helpers.load_data_and_labels(
            FLAGS.train_path)
    with tf.device('/cpu:0'):
        test_text, test_y, test_pos1, test_pos2, test_x_text_clean, test_sentence_len = data_helpers.load_data_and_labels(
            FLAGS.test_path)
    # Build vocabulary
    # Example: x_text[3] = "A misty <e1>ridge</e1> uprises from the <e2>surge</e2>."
    # ['a misty ridge uprises from the surge <UNK> <UNK> ... <UNK>']
    # =>
    # [27 39 40 41 42  1 43  0  0 ... 0]
    # dimension = FLAGS.max_sentence_length
    # print("text:",x_text)
    vocab_processor = tf.contrib.learn.preprocessing.VocabularyProcessor(
        FLAGS.max_sentence_length)
    vocab_processor.fit(train_text + test_text)
    train_x = np.array(list(vocab_processor.transform(train_text)))
    test_x = np.array(list(vocab_processor.transform(test_text)))
    train_text = np.array(train_text)
    print("train_text", train_text[0:2])
    test_text = np.array(test_text)
    print("\nText Vocabulary Size: {:d}".format(
        len(vocab_processor.vocabulary_)))
    print("train_x = {0}".format(train_x.shape))  # (8000,90)
    print("train_y = {0}".format(train_y.shape))  # (8000,19)
    print("test_x = {0}".format(test_x.shape))  # (2717, 90)
    print("test_y = {0}".format(test_y.shape))  # (2717,19)

    # Example: pos1[3] = [-2 -1  0  1  2   3   4 999 999 999 ... 999]
    # [95 96 97 98 99 100 101 999 999 999 ... 999]
    # =>
    # [11 12 13 14 15  16  21  17  17  17 ...  17]
    # dimension = MAX_SENTENCE_LENGTH
    pos_vocab_processor = tf.contrib.learn.preprocessing.VocabularyProcessor(
        FLAGS.max_sentence_length)
    pos_vocab_processor.fit(train_pos1 + train_pos2 + test_pos1 + test_pos2)
    train_p1 = np.array(list(pos_vocab_processor.transform(train_pos1)))
    train_p2 = np.array(list(pos_vocab_processor.transform(train_pos2)))
    test_p1 = np.array(list(pos_vocab_processor.transform(test_pos1)))
    test_p2 = np.array(list(pos_vocab_processor.transform(test_pos2)))
    print("\nPosition Vocabulary Size: {:d}".format(
        len(pos_vocab_processor.vocabulary_)))
    print("train_p1 = {0}".format(train_p1.shape))  # (8000, 90)
    print("test_p1 = {0}".format(test_p1.shape))  # (2717, 90)
    print("")

    # Randomly shuffle data to split into train and test(dev)
    # np.random.seed(10)
    #
    # shuffle_indices = np.random.permutation(np.arange(len(y))) #len(y)=8000
    # x_shuffled = x[shuffle_indices]
    # p1_shuffled = p1[shuffle_indices]
    # p2_shuffled = p2[shuffle_indices]
    # y_shuffled = y[shuffle_indices]
    # print(x_shuffled, p1_shuffled,p2_shuffled,y_shuffled)

    # Split train/test set
    # TODO: This is very crude, should use cross-validation
    # dev_sample_index = -1 * int(FLAGS.dev_sample_percentage * float(len(y))) #x_train=7200, x_dev =800
    # x_train, x_dev = x_shuffled[:dev_sample_index], x_shuffled[dev_sample_index:]
    # p1_train, p1_dev = p1_shuffled[:dev_sample_index], p1_shuffled[dev_sample_index:]
    # p2_train, p2_dev = p2_shuffled[:dev_sample_index], p2_shuffled[dev_sample_index:]
    # y_train, y_dev = y_shuffled[:dev_sample_index], y_shuffled[dev_sample_index:]
    # print("Train/Dev split: {:d}/{:d}\n".format(len(y_train), len(y_dev)))
    # print(x_train)
    # print(np.array(x_train))
    # print(x_dev)
    # print(np.array(x_dev))

    with tf.Graph().as_default():
        session_conf = tf.ConfigProto(
            allow_soft_placement=FLAGS.allow_soft_placement,
            log_device_placement=FLAGS.log_device_placement)
        session_conf.gpu_options.allow_growth = FLAGS.gpu_allow_growth
        sess = tf.Session(config=session_conf)
        with sess.as_default():
            cnn = TextCNN(
                sequence_length=FLAGS.max_sentence_length,  #90
                num_classes=train_y.shape[1],  #19
                text_vocab_size=len(vocab_processor.vocabulary_),  #19151
                text_embedding_size=FLAGS.text_embedding_size,  #300
                pos_vocab_size=len(pos_vocab_processor.vocabulary_),  #162
                pos_embedding_size=FLAGS.pos_embedding_dim,  #50
                filter_sizes=list(map(
                    int, FLAGS.filter_sizes.split(","))),  #2,3,4,5
                num_filters=FLAGS.num_filters,  #128
                l2_reg_lambda=FLAGS.l2_reg_lambda,  #1e-5
                use_elmo=(FLAGS.embeddings == 'elmo'))

            # Define Training procedure
            global_step = tf.Variable(0, name="global_step", trainable=False)
            optimizer = tf.train.AdadeltaOptimizer(FLAGS.learning_rate,
                                                   FLAGS.decay_rate, 1e-6)
            gvs = optimizer.compute_gradients(cnn.loss)
            capped_gvs = [(tf.clip_by_value(grad, -1.0, 1.0), var)
                          for grad, var in gvs]
            train_op = optimizer.apply_gradients(capped_gvs,
                                                 global_step=global_step)

            # Output directory for models and summaries
            timestamp = str(int(time.time()))
            out_dir = os.path.abspath(
                os.path.join(os.path.curdir, "runs", timestamp))
            print("\nWriting to {}\n".format(out_dir))

            # Logger
            logger = Logger(out_dir)

            # Summaries for loss and accuracy
            loss_summary = tf.summary.scalar("loss", cnn.loss)
            acc_summary = tf.summary.scalar("accuracy", cnn.accuracy)

            # Train Summaries
            train_summary_op = tf.summary.merge([loss_summary, acc_summary])
            train_summary_dir = os.path.join(out_dir, "summaries", "train")
            train_summary_writer = tf.summary.FileWriter(
                train_summary_dir, sess.graph)

            # Checkpoint directory. Tensorflow assumes this directory already exists so we need to create it
            checkpoint_dir = os.path.abspath(
                os.path.join(out_dir, "checkpoints"))
            checkpoint_prefix = os.path.join(checkpoint_dir, "model")
            if not os.path.exists(checkpoint_dir):
                os.makedirs(checkpoint_dir)
            saver = tf.train.Saver(tf.global_variables(),
                                   max_to_keep=FLAGS.num_checkpoints)

            # Write vocabulary
            vocab_processor.save(os.path.join(out_dir, "vocab"))
            pos_vocab_processor.save(os.path.join(out_dir, "pos_vocab"))

            # Initialize all variables
            sess.run(tf.global_variables_initializer())

            if FLAGS.embeddings == "word2vec":
                pretrain_W = utils.load_word2vec(
                    'resource/GoogleNews-vectors-negative300.bin',
                    FLAGS.embedding_size, vocab_processor)
                sess.run(cnn.W_text.assign(pretrain_W))
                print("Success to load pre-trained word2vec model!\n")
            elif FLAGS.embeddings == "glove100":
                pretrain_W = utils.load_glove('resource/glove.6B.100d.txt',
                                              FLAGS.embedding_size,
                                              vocab_processor)
                sess.run(cnn.W_text.assign(pretrain_W))
                print("Success to load pre-trained glove100 model!\n")
            elif FLAGS.embeddings == "glove300":
                pretrain_W = utils.load_glove('resource/glove.840B.300d.txt',
                                              FLAGS.embedding_size,
                                              vocab_processor)
                sess.run(cnn.W_text.assign(pretrain_W))
                print("Success to load pre-trained glove300 model!\n")

            # Generate batches
            train_batches = data_helpers.batch_iter(
                list(zip(train_x, train_y, train_text, train_p1, train_p2)),
                FLAGS.batch_size, FLAGS.num_epochs)
            # Training loop. For each batch...
            best_f1 = 0.0  # For save checkpoint(model)
            for train_batch in train_batches:
                train_bx, train_by, train_btxt, train_bp1, train_bp2 = zip(
                    *train_batch)
                # print("train_bxt",list(train_btxt)[:2])
                # print(np.array(train_be1).shape) #(20, )
                # print(train_be1)
                feed_dict = {
                    cnn.input_text: train_bx,
                    cnn.input_y: train_by,
                    cnn.input_x_text: list(train_btxt),
                    cnn.input_p1: train_bp1,
                    cnn.input_p2: train_bp2,
                    cnn.dropout_keep_prob: FLAGS.dropout_keep_prob
                }
                _, step, summaries, loss, accuracy = sess.run([
                    train_op, global_step, train_summary_op, cnn.loss,
                    cnn.accuracy
                ], feed_dict)
                train_summary_writer.add_summary(summaries, step)

                # Training log display
                if step % FLAGS.display_every == 0:
                    logger.logging_train(step, loss, accuracy)

                # Evaluation
                if step % FLAGS.evaluate_every == 0:
                    print("\nEvaluation:")
                    # Generate batches
                    test_batches = data_helpers.batch_iter(list(
                        zip(test_x, test_y, test_text, test_p1, test_p2)),
                                                           FLAGS.batch_size,
                                                           1,
                                                           shuffle=False)
                    # Training loop. For each batch...
                    losses = 0.0
                    accuracy = 0.0
                    predictions = []
                    iter_cnt = 0
                    for test_batch in test_batches:
                        test_bx, test_by, test_btxt, test_bp1, test_bp2 = zip(
                            *test_batch)
                        feed_dict = {
                            cnn.input_text: test_bx,
                            cnn.input_y: test_by,
                            cnn.input_x_text: list(test_btxt),
                            cnn.input_p1: test_bp1,
                            cnn.input_p2: test_bp2,
                            cnn.dropout_keep_prob: 1.0
                        }
                        loss, acc, pred = sess.run(
                            [cnn.loss, cnn.accuracy, cnn.predictions],
                            feed_dict)
                        losses += loss
                        accuracy += acc
                        predictions += pred.tolist()
                        iter_cnt += 1
                    losses /= iter_cnt
                    accuracy /= iter_cnt
                    predictions = np.array(predictions, dtype='int')

                    logger.logging_eval(step, loss, accuracy, predictions)

                    # Model checkpoint
                    if best_f1 < logger.best_f1:
                        best_f1 = logger.best_f1
                        path = saver.save(sess,
                                          checkpoint_prefix +
                                          "-{:.3g}".format(best_f1),
                                          global_step=step)
                        print("Saved model checkpoint to {}\n".format(path))
Beispiel #28
0
if __name__ == '__main__':
    DATA_ROOT = os.path.join(os.path.dirname(__file__), os.environ["data_dir"])
    EMBEDDING_PATH = os.path.join(os.path.dirname(__file__),
                                  os.environ["glove_dir"])

    train_path = os.path.join(DATA_ROOT, 'train.txt')
    valid_path = os.path.join(DATA_ROOT, 'valid.txt')
    test_path = os.path.join(DATA_ROOT, 'test.txt')

    print('Loading data...')
    x_train, y_train = load_data_and_labels(train_path)
    x_valid, y_valid = load_data_and_labels(valid_path)
    x_test, y_test = load_data_and_labels(test_path)
    print(len(x_train), 'train sequences')
    print(len(x_valid), 'valid sequences')
    print(len(x_test), 'test sequences')

    embeddings = load_glove(EMBEDDING_PATH)

    # Use pre-trained word embeddings
    model = Sequence(cell_type=os.environ['cell_type'],
                     embeddings=embeddings,
                     initial_vocab=embeddings.keys())
    # print(model.trainable_weights)

    model.fit(x_train, y_train, x_valid, y_valid, epochs=30)

    print('Testing the model...')
    print(model.score(x_test, y_test))
Beispiel #29
0
def _define_global(glove_file):
    global glove6b300d
    glove6b300d = load_glove(glove_file, verbose=0)
def train():
    with tf.device('/cpu:0'):
        train_text, train_y, train_e1, train_e2, train_pos1, train_pos2, train_rw, train_rw_pos, train_rw_cate = data_helpers.load_data_and_labels(
            FLAGS.train_path)
    with tf.device('/cpu:0'):
        test_text, test_y, test_e1, test_e2, test_pos1, test_pos2, test_rw, test_rw_pos, test_rw_cate = data_helpers.load_data_and_labels(
            FLAGS.test_path)

    #words = data_helpers.relation_words([train_between_e, test_between_e])
    #train_relation_words_between_entity = data_helpers.relation_words_between_entity(train_between_e, words)
    #test_relation_words_between_entity = data_helpers.relation_words_between_entity(test_between_e, words)

    # Build vocabulary
    # Example: x_text[3] = "A misty <e1>ridge</e1> uprises from the <e2>surge</e2>."
    # ['a misty ridge uprises from the surge <UNK> <UNK> ... <UNK>']
    # =>
    # [27 39 40 41 42  1 43  0  0 ... 0]
    # dimension = MAX_SENTENCE_LENGTH
    vocab_processor = tf.contrib.learn.preprocessing.VocabularyProcessor(
        FLAGS.max_sentence_length)
    vocab_processor.fit(train_text + test_text)
    train_x = np.array(list(vocab_processor.transform(train_text)))
    test_x = np.array(list(vocab_processor.transform(test_text)))
    train_text = np.array(train_text)
    test_text = np.array(test_text)
    print("\nText Vocabulary Size: {:d}".format(
        len(vocab_processor.vocabulary_)))
    print("train_x = {0}".format(train_x.shape))
    print("train_y = {0}".format(train_y.shape))
    print("test_x = {0}".format(test_x.shape))
    print("test_y = {0}".format(test_y.shape))

    vocab_processor2 = tf.contrib.learn.preprocessing.VocabularyProcessor(6)
    vocab_processor2.fit(train_rw + test_rw)
    train_rw_x = np.array(list(vocab_processor2.transform(train_rw)))
    test_rw_x = np.array(list(vocab_processor2.transform(test_rw)))
    train_rw_text = np.array(train_rw)
    test_rw_text = np.array(test_rw)

    vocab_processor2pos = tf.contrib.learn.preprocessing.VocabularyProcessor(6)
    vocab_processor2pos.fit(train_rw_pos + test_rw_pos)
    train_rw_pos_x = np.array(list(
        vocab_processor2pos.transform(train_rw_pos)))
    test_rw_pos_x = np.array(list(vocab_processor2pos.transform(test_rw_pos)))
    train_rw_pos_text = np.array(train_rw_pos)
    test_rw_pos_text = np.array(test_rw_pos)

    # Example: pos1[3] = [-2 -1  0  1  2   3   4 999 999 999 ... 999]
    # [95 96 97 98 99 100 101 999 999 999 ... 999]
    # =>
    # [11 12 13 14 15  16  21  17  17  17 ...  17]
    # dimension = MAX_SENTENCE_LENGTH
    pos_vocab_processor = tf.contrib.learn.preprocessing.VocabularyProcessor(
        FLAGS.max_sentence_length)
    pos_vocab_processor.fit(train_pos1 + train_pos2 + test_pos1 + test_pos2)
    train_p1 = np.array(list(pos_vocab_processor.transform(train_pos1)))
    train_p2 = np.array(list(pos_vocab_processor.transform(train_pos2)))
    test_p1 = np.array(list(pos_vocab_processor.transform(test_pos1)))
    test_p2 = np.array(list(pos_vocab_processor.transform(test_pos2)))
    print("\nPosition Vocabulary Size: {:d}".format(
        len(pos_vocab_processor.vocabulary_)))
    print("train_p1 = {0}".format(train_p1.shape))
    print("test_p1 = {0}".format(test_p1.shape))
    print("")

    with tf.Graph().as_default():
        session_conf = tf.ConfigProto(
            allow_soft_placement=FLAGS.allow_soft_placement,
            log_device_placement=FLAGS.log_device_placement)
        session_conf.gpu_options.allow_growth = FLAGS.gpu_allow_growth
        sess = tf.Session(config=session_conf)
        with sess.as_default():
            model = EntityAttentionLSTM(
                sequence_length=train_x.shape[1],
                rw_length=6,
                num_classes=train_y.shape[1],
                vocab_size=len(vocab_processor.vocabulary_),
                rw_vocab_size=len(vocab_processor2.vocabulary_),
                rw_pos_vocab_size=len(vocab_processor2pos.vocabulary_),
                embedding_size=FLAGS.embedding_size,
                pos_vocab_size=len(pos_vocab_processor.vocabulary_),
                pos_embedding_size=FLAGS.pos_embedding_size,
                hidden_size=FLAGS.hidden_size,
                num_heads=FLAGS.num_heads,
                attention_size=FLAGS.attention_size,
                use_elmo=(FLAGS.embeddings == 'elmo'),
                l2_reg_lambda=FLAGS.l2_reg_lambda)

            # Define Training procedure
            global_step = tf.Variable(0, name="global_step", trainable=False)
            optimizer = tf.train.AdadeltaOptimizer(FLAGS.learning_rate,
                                                   FLAGS.decay_rate, 1e-6)
            gvs = optimizer.compute_gradients(model.loss)
            capped_gvs = [(tf.clip_by_value(grad, -1.0, 1.0), var)
                          for grad, var in gvs]
            train_op = optimizer.apply_gradients(capped_gvs,
                                                 global_step=global_step)

            # Output directory for models and summaries
            timestamp = str(int(time.time()))
            out_dir = os.path.abspath(
                os.path.join(os.path.curdir, "runs", timestamp))
            print("\nWriting to {}\n".format(out_dir))

            # Logger
            logger = Logger(out_dir)

            # Summaries for loss and accuracy
            loss_summary = tf.summary.scalar("loss", model.loss)
            acc_summary = tf.summary.scalar("accuracy", model.accuracy)

            # Train Summaries
            train_summary_op = tf.summary.merge([loss_summary, acc_summary])
            train_summary_dir = os.path.join(out_dir, "summaries", "train")
            train_summary_writer = tf.summary.FileWriter(
                train_summary_dir, sess.graph)

            # Checkpoint directory. Tensorflow assumes this directory already exists so we need to create it
            checkpoint_dir = os.path.abspath(
                os.path.join(out_dir, "checkpoints"))
            checkpoint_prefix = os.path.join(checkpoint_dir, "model")
            if not os.path.exists(checkpoint_dir):
                os.makedirs(checkpoint_dir)
            saver = tf.train.Saver(tf.global_variables(),
                                   max_to_keep=FLAGS.num_checkpoints)

            # Write vocabulary
            vocab_processor.save(os.path.join(out_dir, "vocab"))
            vocab_processor2.save(os.path.join(out_dir, "rw_vocab"))
            vocab_processor2pos.save(os.path.join(out_dir, "rw_pos_vocab"))
            pos_vocab_processor.save(os.path.join(out_dir, "pos_vocab"))

            # Initialize all variables
            sess.run(tf.global_variables_initializer())

            if FLAGS.embeddings == "word2vec":
                pretrain_W = utils.load_word2vec(
                    'resource/GoogleNews-vectors-negative300.bin',
                    FLAGS.embedding_size, vocab_processor)
                sess.run(model.W_text.assign(pretrain_W))
                print("Success to load pre-trained word2vec model!\n")
            elif FLAGS.embeddings == "glove100":
                pretrain_W = utils.load_glove('resource/glove.6B.100d.txt',
                                              FLAGS.embedding_size,
                                              vocab_processor)
                sess.run(model.W_text.assign(pretrain_W))
                print("Success to load pre-trained glove100 model!\n")
            elif FLAGS.embeddings == "glove300":
                pretrain_W = utils.load_glove('resource/glove.840B.300d.txt',
                                              FLAGS.embedding_size,
                                              vocab_processor)
                pretrain_rw_W = utils.load_glove(
                    'resource/glove.840B.300d.txt', FLAGS.embedding_size,
                    vocab_processor2)
                sess.run(model.W_text.assign(pretrain_W))
                sess.run(model.W_rw_text.assign(pretrain_rw_W))
                print("Success to load pre-trained glove300 model!\n")

            # Generate batches
            train_batches = data_helpers.batch_iter(
                list(
                    zip(train_x, train_y, train_text, train_e1, train_e2,
                        train_p1, train_p2, train_rw_x, train_rw_text,
                        train_rw_pos_x, train_rw_pos_text, train_rw_cate)),
                FLAGS.batch_size, FLAGS.num_epochs)
            # Training loop. For each batch...
            best_f1 = 0.0  # For save checkpoint(model)
            for train_batch in train_batches:
                train_bx, train_by, train_btxt, train_be1, train_be2, train_bp1, train_bp2, train_brw_x, train_brw_text, train_brw_pos_x, train_brw_pos_text, train_brw_cate = zip(
                    *train_batch)
                feed_dict = {
                    model.input_x: train_bx,
                    model.input_y: train_by,
                    model.input_text: train_btxt,
                    model.input_e1: train_be1,
                    model.input_e2: train_be2,
                    model.input_p1: train_bp1,
                    model.input_p2: train_bp2,
                    model.input_rw_x: train_brw_x,  ########
                    model.input_rw_text: train_brw_text,  ##########
                    model.input_rw_pos_x: train_brw_pos_x,  #######
                    model.input_rw_pos_text: train_brw_pos_text,  #######
                    model.input_rw_cate: train_brw_cate,  ###########
                    model.emb_dropout_keep_prob: FLAGS.emb_dropout_keep_prob,
                    model.rnn_dropout_keep_prob: FLAGS.rnn_dropout_keep_prob,
                    model.dropout_keep_prob: FLAGS.dropout_keep_prob
                }
                _, step, summaries, loss, accuracy = sess.run([
                    train_op, global_step, train_summary_op, model.loss,
                    model.accuracy
                ], feed_dict)
                train_summary_writer.add_summary(summaries, step)

                # Training log display
                if step % FLAGS.display_every == 0:
                    logger.logging_train(step, loss, accuracy)

                # Evaluation
                if step % FLAGS.evaluate_every == 0:
                    print("\nEvaluation:")
                    # Generate batches
                    test_batches = data_helpers.batch_iter(list(
                        zip(test_x, test_y, test_text, test_e1, test_e2,
                            test_p1, test_p2, test_rw_x, test_rw_text,
                            test_rw_pos_x, test_rw_pos_text, test_rw_cate)),
                                                           FLAGS.batch_size,
                                                           1,
                                                           shuffle=False)
                    # Training loop. For each batch...
                    losses = 0.0
                    accuracy = 0.0
                    predictions = []
                    iter_cnt = 0
                    for test_batch in test_batches:
                        test_bx, test_by, test_btxt, test_be1, test_be2, test_bp1, test_bp2, test_brw_x, test_brw_text, test_brw_pos_x, test_brw_pos_text, test_brw_cate = zip(
                            *test_batch)
                        feed_dict = {
                            model.input_x: test_bx,
                            model.input_y: test_by,
                            model.input_text: test_btxt,
                            model.input_e1: test_be1,
                            model.input_e2: test_be2,
                            model.input_p1: test_bp1,
                            model.input_p2: test_bp2,
                            model.input_rw_x: test_brw_x,  ########
                            model.input_rw_text: test_brw_text,  ##########
                            model.input_rw_pos_x: test_brw_pos_x,  #######
                            model.input_rw_pos_text:
                            test_brw_pos_text,  #######
                            model.input_rw_cate: test_brw_cate,  #########
                            model.emb_dropout_keep_prob: 1.0,
                            model.rnn_dropout_keep_prob: 1.0,
                            model.dropout_keep_prob: 1.0
                        }
                        loss, acc, pred = sess.run(
                            [model.loss, model.accuracy, model.predictions],
                            feed_dict)
                        losses += loss
                        accuracy += acc
                        predictions += pred.tolist()
                        iter_cnt += 1
                    losses /= iter_cnt
                    accuracy /= iter_cnt
                    predictions = np.array(predictions, dtype='int')

                    logger.logging_eval(step, loss, accuracy, predictions)

                    # Model checkpoint
                    if best_f1 < logger.best_f1:
                        best_f1 = logger.best_f1
                        path = saver.save(sess,
                                          checkpoint_prefix +
                                          "-{:.3g}".format(best_f1),
                                          global_step=step)
                        print("Saved model checkpoint to {}\n".format(path))
Beispiel #31
0
parser.add_argument('--save_every', type=int, default=1, help='save state every x epoch')
parser.add_argument('--prefix', type=str, default="", help='optional prefix of network name')
parser.add_argument('--no-shuffle', dest='shuffle', action='store_false')
parser.add_argument('--babi_test_id', type=int, default=-1, help='babi_id of test set')
parser.set_defaults(shuffle=True)
args = parser.parse_args()

assert args.word_vector_size in [50, 100, 200, 300]

network_name = args.prefix + '%s.mh%d.n%d.bs%d%s.babi%s' % (args.network, args.memory_hops, args.dim, 
    args.batch_size, ".na" if args.normalize_attention else "", args.babi_id)


babi_train_raw, babi_test_raw = utils.get_babi_raw(args.babi_id, args.babi_test_id)

word2vec = utils.load_glove(args.word_vector_size)

args_dict = dict(args._get_kwargs())
args_dict['babi_train_raw'] = babi_train_raw
args_dict['babi_test_raw'] = babi_test_raw
args_dict['word2vec'] = word2vec
    

# init class
if args.network == 'dmn_batch':
    import dmn_batch
    dmn = dmn_batch.DMN_batch(**args_dict)

elif args.network == 'dmn_basic':
    import dmn_basic
    if (args.batch_size != 1):
Beispiel #32
0
def main(_):
    vocab = read_vocab('data/ICLR_Review_all_with_decision-w2i.pkl')
    glove_embs = load_glove('glove.6B.{}d.txt'.format(FLAGS.emb_size),
                            FLAGS.emb_size, vocab)
    data_reader = DataReader(
        train_file='data/ICLR_Review_all_with_decision-train.pkl',
        dev_file='data/ICLR_Review_all_with_decision-dev.pkl',
        test_file='data/ICLR_Review_all_with_decision-test.pkl')

    config = tf.ConfigProto(allow_soft_placement=FLAGS.allow_soft_placement)
    with tf.Session(config=config) as sess:
        model = Model(cell_dim=FLAGS.cell_dim,
                      att_dim=FLAGS.att_dim,
                      vocab_size=len(vocab),
                      emb_size=FLAGS.emb_size,
                      num_classes=FLAGS.num_classes,
                      dropout_rate=FLAGS.dropout_rate,
                      pretrained_embs=glove_embs)

        loss = loss_fn(model.labels, model.logits)
        train_op, global_step = train_fn(loss)
        batch_acc, total_acc, acc_update, metrics_init, predictions = eval_fn(
            model.labels, model.logits)
        summary_op = tf.summary.merge_all()
        sess.run(tf.global_variables_initializer())

        train_writer.add_graph(sess.graph)
        saver = tf.train.Saver(max_to_keep=FLAGS.num_checkpoints)

        print('\n{}> Start training'.format(datetime.now()))
        result_save_folder = str(datetime.now())
        output_folder = os.path.join('.', 'output')
        create_folder_if_not_exists(output_folder)

        stats_graph_folder = os.path.join(
            output_folder, result_save_folder)  # Folder where to save graphs
        create_folder_if_not_exists(stats_graph_folder)

        epoch = 0
        valid_step = 0
        test_step = 0
        train_test_prop = len(data_reader.train_data) / len(
            data_reader.test_data)
        test_batch_size = int(FLAGS.batch_size / train_test_prop)
        best_acc = float('-inf')

        while epoch < FLAGS.num_epochs:
            epoch += 1
            print('\n{}> Epoch: {}'.format(datetime.now(), epoch))

            sess.run(metrics_init)
            all_labels = []
            all_y_pred = []
            for batch_docs, batch_labels in data_reader.read_train_set(
                    FLAGS.batch_size, shuffle=True):
                _step, _, _loss, _acc, _, y_pred_batch = sess.run(
                    [
                        global_step, train_op, loss, batch_acc, acc_update,
                        predictions
                    ],
                    feed_dict=model.get_feed_dict(batch_docs,
                                                  batch_labels,
                                                  training=True))
                all_labels += batch_labels
                #y_pred_batch_array = y_pred_batch.eval(session=sess)
                y_pred_batch_list = y_pred_batch.tolist()
                all_y_pred += y_pred_batch_list
                if _step % FLAGS.display_step == 0:
                    _summary = sess.run(summary_op,
                                        feed_dict=model.get_feed_dict(
                                            batch_docs, batch_labels))
                    train_writer.add_summary(_summary, global_step=_step)
            print('Training accuracy = {:.2f}'.format(
                sess.run(total_acc) * 100))
            save_results(all_labels, all_y_pred, stats_graph_folder, 'train',
                         epoch)

            sess.run(metrics_init)
            all_valid_labels = []
            all_valid_y_pred = []
            for batch_docs, batch_labels in data_reader.read_valid_set(
                    test_batch_size):
                _loss, _acc, _, valid_y_pred_batch = sess.run(
                    [loss, batch_acc, acc_update, predictions],
                    feed_dict=model.get_feed_dict(batch_docs, batch_labels))
                all_valid_labels += batch_labels
                valid_y_pred_batch_list = valid_y_pred_batch.tolist()
                all_valid_y_pred += valid_y_pred_batch_list

                valid_step += 1
                if valid_step % FLAGS.display_step == 0:
                    _summary = sess.run(summary_op,
                                        feed_dict=model.get_feed_dict(
                                            batch_docs, batch_labels))
                    valid_writer.add_summary(_summary, global_step=valid_step)
            print('Validation accuracy = {:.2f}'.format(
                sess.run(total_acc) * 100))
            #save_optimized_presicion(all_valid_labels, all_valid_y_pred, stats_graph_folder, 'valid', epoch)
            #save_distance_measure(all_valid_labels, all_valid_y_pred, stats_graph_folder, 'valid', epoch)
            save_results(all_valid_labels, all_valid_y_pred,
                         stats_graph_folder, 'valid', epoch)

            sess.run(metrics_init)
            all_test_labels = []
            all_test_y_pred = []
            for batch_docs, batch_labels in data_reader.read_test_set(
                    test_batch_size):
                _loss, _acc, _, test_y_pred_batch = sess.run(
                    [loss, batch_acc, acc_update, predictions],
                    feed_dict=model.get_feed_dict(batch_docs, batch_labels))
                all_test_labels += batch_labels
                test_y_pred_batch_list = test_y_pred_batch.tolist()
                all_test_y_pred += test_y_pred_batch_list

                test_step += 1
                if test_step % FLAGS.display_step == 0:
                    _summary = sess.run(summary_op,
                                        feed_dict=model.get_feed_dict(
                                            batch_docs, batch_labels))
                    test_writer.add_summary(_summary, global_step=test_step)
            test_acc = sess.run(total_acc) * 100
            print('Testing accuracy = {:.2f}'.format(test_acc))
            #save_optimized_presicion(all_test_labels, all_test_y_pred, stats_graph_folder, 'test', epoch)
            #save_distance_measure(all_test_labels, all_test_y_pred, stats_graph_folder, 'test', epoch)
            save_results(all_test_labels, all_test_y_pred, stats_graph_folder,
                         'test', epoch)

            if test_acc > best_acc:
                best_acc = test_acc
                saver.save(sess, FLAGS.checkpoint_dir)
            print('Best testing accuracy = {:.2f}'.format(best_acc))

    print("{} Optimization Finished!".format(datetime.now()))
    print('Best testing accuracy = {:.2f}'.format(best_acc))