Example #1
0
def infer(args):
    start = time.time()
    
    # Load testing data
    # ====================================
    with open(os.path.join(args.save_dir, 'config.pkl'), 'rb') as f:
        saved_args = pickle.load(f)
        print('restored args:\n', json.dumps(vars(saved_args), indent=4, separators=(',',':'))) 
    
    with open(os.path.join(args.save_dir, 'words_vocab.pkl'), 'rb') as f:
        _, vocab = pickle.load(f)    
    data_loader  = TextLoader(args.test_file, args.sense_file, args.batch_size, args.seq_length, args.data_set_size, shuffle=True)
    
    sense_idx = pickle.load(open('wsd_senses_idx.p','rb'))
    #words to sense dict
    words_sense = pickle.load(open('verbs_sense.p','rb'))

    
    # Predict
    # ===================================
    #checkpoint = tf.train.latest_checkpoint(args.save_dir)

    
    with tf.Graph().as_default():
        with tf.Session() as sess:

            start = time.time()
            saver = tf.train.import_meta_graph('./save2/model.ckpt-65.meta')
            saver.restore(sess,tf.train.latest_checkpoint('./save/'))
            graph = tf.get_default_graph()
            graph_x = graph.get_tensor_by_name("x:0")
            graph_y = graph.get_tensor_by_name("y:0")
            graph_context_layer = graph.get_tensor_by_name("cont_layer:0")
            #graph_softmax_loss = graph.get_operation_by_name("softmax_loss")
            '''
            model = BasicLSTM(saved_args, True)
            saver = tf.train.Saver()
            saver.restore(sess, checkpoint)
            '''
            

            data_loader.reset_batch_pointer()
            x_batch_test, y_batch_test, unk_count_test, n_sent_test, senss_test = data_loader.next_batch_test(collect_sense=True)
            feed_dict_test = {graph_x: x_batch_test, graph_y: y_batch_test}
            data_loader.reset_batch_pointer()

            xs = []
            ys = []
            senses = []
            data_loader.reset_batch_pointer()
            for i in range(data_loader.num_batches):
                x_batch, y_batch, unk_count, n_sent, senss = data_loader.next_batch(collect_sense=True, shuffle=False)
                feed_dict = {graph_x: x_batch, graph_y: y_batch}
                wordVecs = sess.run(graph_context_layer, feed_dict)
                
                n_sents = len(y_batch)
                for j in range(n_sents):
                    if y_batch[j] != -1:
                        xs.append(wordVecs[j])
                        ys.append(y_batch[j])
                        senses.append(senss[j])
                    

            #print(xs[0].shape)
            n_words = np.max(ys) + 1
            n_examples = len(ys)
            

            sense_vects = {}
            for i in range(n_examples):
                if senses[i] in sense_vects:
                    sense_vects[senses[i]].append(xs[i])
                else:
                    sense_vects[senses[i]] = [xs[i]]

            sense_keys = sense_vects.keys()

            sense_train_counts = {}
            for key in sense_keys:
                sense_train_counts[key] = len(sense_vects[key][:])
                sense_vects[key] = np.mean(sense_vects[key], axis=0)

            xs_test = []
            ys_test = []
            senses_test= []
            for i in range(data_loader.num_batches):
                
                wordVecs = sess.run(graph_context_layer, feed_dict_test)
                
                n_sents = len(y_batch_test)
                for j in range(n_sents):
                    if y_batch_test[j] != -1:
                        xs_test.append(wordVecs[j])
                        ys_test.append(y_batch_test[j])
                        senses_test.append(senss_test[j])
            
            n_tests = len(ys_test)
            corr = 0
            for i in range(n_tests):
                ambig_word = data_loader.verbs_idx[ys_test[i]]
                ambig_word = data_loader.words[ambig_word]
                correct_sense = senses_test[i]
                if ambig_word in words_sense:
                    max_cos = 0
                    for sense in words_sense[ambig_word]:
                        if sense in sense_idx:
                            sen = sense_idx[sense]
                            if sen in sense_vects:
                                print(cosine_similarity(sense_vects[sen], xs_test[i]))
                                exit()

                loss, contextVecs = sess.run(graph_context_layer, feed_dict)
                print(contextVecs.shape)
                exit()


    print("Saved prediction to {}".format(out_path))
    print("Total run time: {}s".format(time.time() - start))
Example #2
0
def train(args):
    # Data Preparation
    # ====================================

    data_loader = TextLoader(args.data_dir, args.sense_file, args.batch_size,
                             args.seq_length, args.data_set_size)
    args.vocab_size = data_loader.vocab_size
    args.verb_size = len(data_loader.verbs)
    print(args.verb_size)
    print("Number of sentences: {}".format(data_loader.num_data))
    print("Vocabulary size: {}".format(args.vocab_size))

    # Check compatibility if training is continued from previously saved model
    if args.init_from is not None:
        # check if all necessary files exist
        assert os.path.isdir(
            args.init_from), " %s must be a path" % args.init_from
        assert os.path.isfile(
            os.path.join(args.init_from, "config.pkl")
        ), "config.pkl file does not exist in path %s" % args.init_from
        assert os.path.isfile(
            os.path.join(args.init_from, "words_vocab.pkl")
        ), "words_vocab.pkl.pkl file does not exist in path %s" % args.init_from
        ckpt = tf.train.get_checkpoint_state(args.init_from)
        assert ckpt, "No checkpoint found"
        assert ckpt.model_checkpoint_path, "No model path found in checkpoint"

        # open old config and check if models are compatible
        with open(os.path.join(args.init_from, 'config.pkl'), 'rb') as f:
            saved_model_args = pickle.load(f)
        need_be_same = ["rnn_size", "num_layers", "seq_length"]
        for checkme in need_be_same:
            assert vars(saved_model_args)[checkme] == vars(
                args
            )[checkme], "Command line argument and saved model disagree on '%s' " % checkme

        # open saved vocab/dict and check if vocabs/dicts are compatible
        with open(os.path.join(args.init_from, 'words_vocab.pkl'), 'rb') as f:
            saved_words, saved_vocab = pickle.load(f)
        assert saved_words == data_loader.words, "Data and loaded model disagree on word set!"
        assert saved_vocab == data_loader.vocab, "Data and loaded model disagree on dictionary mappings!"

    if not os.path.exists(args.save_dir):
        os.makedirs(args.save_dir)
    with open(os.path.join(args.save_dir, 'config.pkl'), 'wb') as f:
        pickle.dump(args, f)
    with open(os.path.join(args.save_dir, 'words_vocab.pkl'), 'wb') as f:
        pickle.dump((data_loader.words, data_loader.vocab), f)
    """
    embedding_matrix = get_vocab_embedding(args.save_dir, data_loader.words, args.embedding_file)
    print("Embedding matrix shape:",embedding_matrix.shape)
    """

    # Training
    # ====================================
    with tf.Graph().as_default():
        with tf.Session() as sess:
            #sess = tf_debug.LocalCLIDebugWrapperSession(sess)
            #sess.run()
            model = BasicLSTM(args)

            # Define training procedure
            global_step = tf.Variable(0, name='global_step', trainable=False)
            optimizer = tf.train.AdamOptimizer(args.learning_rate)
            tvars = tf.trainable_variables()
            grads, _ = tf.clip_by_global_norm(tf.gradients(model.cost, tvars),
                                              args.grad_clip)
            train_op = optimizer.apply_gradients(zip(grads, tvars),
                                                 global_step=global_step)

            # Keep track of gradient values and sparsity
            grad_summaries = []
            for g, v in zip(grads, tvars):
                if g is not None:
                    grad_hist_summary = tf.summary.histogram(
                        "{}/grad/hist".format(v.name), g)
                    sparsity_summary = tf.summary.scalar(
                        "{}/grad/sparsity".format(v.name),
                        tf.nn.zero_fraction(g))
                    grad_summaries.append(grad_hist_summary)
                    grad_summaries.append(sparsity_summary)

            # Summary for loss
            loss_summary = tf.summary.scalar("loss", model.cost)

            # Train summaries
            merged = tf.summary.merge_all()
            if not os.path.exists(args.log_dir):
                os.makedirs(args.log_dir)
            train_writer = tf.summary.FileWriter(args.log_dir, sess.graph)

            saver = tf.train.Saver(tf.global_variables())

            # Initialize all variables
            sess.run(tf.global_variables_initializer())

            # Restore model
            if args.init_from is not None:
                saver.restore(sess, ckpt.model_checkpoint_path)

            # Start training
            print("Start training")

            # create test and dev sets
            data_loader.reset_batch_pointer()
            x_batch_dev, y_batch_dev, unk_count_dev, n_sent_dev, _ = data_loader.next_batch_test(
            )
            feed_dict_dev = {
                model.x: x_batch_dev,
                model.y: y_batch_dev,
                model.keep_prob: args.keep_prob
            }
            data_loader.reset_batch_pointer()
            x_batch_test, y_batch_test, unk_count_test, n_sent_test, _ = data_loader.next_batch_test(
                set_to_choose=1)
            feed_dict_test = {
                model.x: x_batch_test,
                model.y: y_batch_test,
                model.keep_prob: args.keep_prob
            }
            data_loader.reset_batch_pointer()

            steps = 0
            prev_equal = 0.
            for epoch in range(args.num_epochs):
                data_loader.reset_batch_pointer()
                state = sess.run(model.initial_state)
                for i in range(data_loader.num_batches):
                    start = time.time()
                    x_batch, y_batch, unk_count, n_sent, _ = data_loader.next_batch(
                    )
                    feed_dict = {
                        model.x: x_batch,
                        model.y: y_batch,
                        model.keep_prob: args.keep_prob
                    }
                    _, step, summary, loss, equal = sess.run([
                        train_op, global_step, merged, model.cost, model.equal
                    ], feed_dict)

                    print(
                        "training step {}, epoch {}, batch {}/{}, loss: {:.4f}, accuracy: {:.4f}, avg unk count: {}, avg sent: {}, time/batch: {:.3f}"
                        .format(step, epoch, i, data_loader.num_batches, loss,
                                np.mean(equal),
                                int(unk_count / args.batch_size), n_sent,
                                time.time() - start))

                # Start dev
                print("Start dev")
                data_loader.reset_batch_pointer()
                accur = []
                for i in range(data_loader.num_batches_test):
                    start = time.time()
                    step, summary, loss, equal = sess.run(
                        [global_step, merged, model.cost, model.equal],
                        feed_dict_dev)

                    print(
                        "dev step {}, epoch {}, batch {}/{}, loss: {:.4f}, accuracy: {:.4f}, avg unk count: {}, avg sent: {}, time/batch: {:.3f}"
                        .format(step, epoch, i, data_loader.num_batches_test,
                                loss, np.mean(equal),
                                int(unk_count_test / args.batch_size),
                                n_sent_test,
                                time.time() - start))

                    accur.append(np.mean(equal))

                eval_acc = np.mean(accur)
                if eval_acc > prev_equal:
                    prev_equal = eval_acc
                    train_writer.add_summary(summary, step)
                    current_step = tf.train.global_step(sess, global_step)
                    checkpoint_path = os.path.join(args.save_dir, 'model.ckpt')
                    path = saver.save(sess,
                                      checkpoint_path,
                                      global_step=current_step)
                    print("Saved model checkpoint to {}".format(path))
                elif steps > args.stop_count:
                    print("early stopping")
                    break
                else:
                    steps += 1

            print("Start test")
            data_loader.reset_batch_pointer()
            accur = []
            for i in range(data_loader.num_batches_test):
                start = time.time()
                step, summary, loss, equal = sess.run(
                    [global_step, merged, model.cost, model.equal],
                    feed_dict_test)

                print(
                    "test step {}, epoch {}, batch {}/{}, loss: {:.4f}, accuracy: {:.4f}, avg unk count: {}, avg sent: {}, time/batch: {:.3f}"
                    .format(step, epoch, i, data_loader.num_batches_test, loss,
                            np.mean(equal),
                            int(unk_count_test / args.batch_size), n_sent_test,
                            time.time() - start))
                accur.append(np.mean(equal))

            print('avg test: {:.4f}'.format(np.mean(accur)))
            train_writer.close()