예제 #1
0
def train():
    config = lstm.config()

    w2d, d2w = get_word_to_id()
    if len(w2d) == 0:
        w2d, d2w = gen_word_to_id(FLAGS.caption_path, FLAGS.image_path)

    f, image, sentence = data_producer.get_data(FLAGS.caption_path,
                                                FLAGS.image_path,
                                                batch_size=config.batch_size)

    config.vocab_size = len(w2d)
    print("vocab size:", config.vocab_size)
    epoch_size = 100
    image_caption = ImageCaption(image, word, target, config)

    sv = tf.train.Supervisor(logdir=FLAGS.save_path)
    config_proto = tf.ConfigProto(allow_soft_placement=True)
    with sv.managed_session(config=config_proto) as sess:
        print("log save:", FLAGS.log_path)
        summary_writer = tf.summary.FileWriter(FLAGS.log_path, sess.graph)

        for i in range(config.max_max_epoch):
            x_lr_decay = config.lr_decay**max(i + 1 - config.max_epoch, 0.0)
            print("lr:", x_lr_decay)
            image_caption.lstm.assign_lr(sess,
                                         config.learning_rate * x_lr_decay)
            p = image_caption.run_epoch(sess, x_lr_decay, epoch_size,
                                        summary_writer, sv)
            print("step %d per %f" % (i, p))
예제 #2
0
def train():
    config = lstm.config()
    config.batch_size = 2
    config.hidden_size = 512

    f, image, label, word, target, w2d, d2w = data.get_data(
        FLAGS.caption_path,
        FLAGS.image_path,
        max_len=config.num_steps + 1,
        batch_size=config.batch_size)
    epoch_size = 10000
    config.vocab_size = len(w2d)
    print("vb size:", len(w2d))
    image_caption = ImageCaption(image, word, target, config)

    #sv = tf.train.Supervisor(logdir=FLAGS.save_path)
    config_proto = tf.ConfigProto(allow_soft_placement=True)
    #with sv.managed_session(config=config_proto) as sess:
    with tf.Session(config=config_proto) as sess:
        sv = load_session(sess, FLAGS.save_path)
        threads = tf.train.start_queue_runners(sess)

        summary_writer = tf.summary.FileWriter(FLAGS.log_path, sess.graph)

        for i in range(config.max_max_epoch):
            x_lr_decay = config.lr_decay**max(i + 1 - config.max_epoch, 0.0)
            print("lr:", x_lr_decay)
            image_caption.lstm.assign_lr(sess,
                                         config.learning_rate * x_lr_decay)
            p = image_caption.run_epoch(sess, x_lr_decay, epoch_size,
                                        summary_writer, sv)
            print("step %d per %f" % (i, p))
예제 #3
0
def predict_one(img, sess, image_caption, candidats):
    config = lstm.config()
    img = img.reshape((1, 224, 224, 3))

    for i in range(20):
        seq = [s.words for s in candidats]
        seq_len = [len(seq[0])] * len(seq)
        feed_dict = {
            image_caption.images:
            np.array([img] * len(seq)).reshape(-1, 224, 224, 3),
            image_caption.word:
            np.array(seq),
            image_caption.lstm.seqlen:
            np.array(seq_len)
        }
        outputs = sess.run(image_caption.lstm.logits, feed_dict=feed_dict)
        new_list = []
        for s, s_out in zip(candidats, outputs):
            predict = s_out[-1]
            sort_idx = predict.argsort()[::-1]
            for j in range(len(sort_idx)):
                #print "S:",s,s+[sort_idx[j]]
                w = sort_idx[j]
                score = np.log(predict[w])
                assert not math.isnan(score) % predict[w]
                new_list.append(Sentence(s.words + [w], s.score + score))

        new_list = sorted(new_list,
                          key=lambda sentence: sentence.score,
                          reverse=True)
        candidats = new_list[0:20]
        for l in candidats:
            print("sort list:", l.words, l.score)
        if candidats[0].words[-1] == 0:
            return candidats
예제 #4
0
파일: predict.py 프로젝트: cjcchen/ML
def predict():
    config = lstm.config()

    img = utils.load_image("./test_data/tiger.jpeg")
    img = img.reshape((1, 224, 224, 3))
    w2d, d2w = data.get_word_to_id()
    print "read w2d size:", len(w2d)
    if len(w2d) == 0:
        f, image, label, word, target, w2d, d2w = data.get_data(
            FLAGS.caption_path,
            FLAGS.image_path,
            max_len=config.num_steps + 1,
            batch_size=config.batch_size)
        print "reload read w2d size:", len(w2d)
    config.vocab_size = len(w2d)
    #config.vocab_size = 24553

    images = tf.placeholder("float", [None, 224, 224, 3], name="image")
    word = tf.placeholder(tf.int32, [None, None], name="word_seq")
    image_caption = ImageCaption(images, word, None, config, is_training=False)
    sv = tf.train.Supervisor(logdir=FLAGS.save_path)
    config_proto = tf.ConfigProto(allow_soft_placement=True)
    with sv.managed_session(config=config_proto) as sess:
        seq = [w2d['<start>']]
        for i in range(50):
            seq_len = np.array(len(seq)).reshape([-1])
            feed_dict = {
                images: img,
                word: np.array(seq).reshape([1, -1]),
                image_caption.lstm.seqlen: seq_len
            }

            output = sess.run(image_caption.lstm.logits, feed_dict=feed_dict)
            print(output.shape)
            idx = np.argmax(output[-1])
            seq.append(idx)
            print seq
            print[d2w[s] for s in seq]
            if idx == 2:
                break
예제 #5
0
def evaluate():
    config = lstm.config()
    config.hidden_size = 512
    config.batch_size = 1

    w2d, d2w = data.get_word_to_id()
    print "read w2d size:", len(w2d)
    config.vocab_size = len(w2d)

    print FLAGS.caption_path, FLAGS.image_path, config.num_steps + 1, config.batch_size
    f, image, label, word, target, _, _ = data.get_data(
        FLAGS.caption_path,
        FLAGS.image_path,
        max_len=config.num_steps + 1,
        batch_size=config.batch_size,
        mode='test')

    images = tf.placeholder("float", [None, 224, 224, 3], name="image")
    word = tf.placeholder(tf.int32, [None, None], name="word_seq")
    image_caption = ImageCaption(images, word, None, config, is_training=False)

    sv = tf.train.Supervisor(logdir=FLAGS.save_path)
    config_proto = tf.ConfigProto(allow_soft_placement=True)
    with sv.managed_session(config=config_proto) as sess:
        for i in range(5):
            f_, i, l = sess.run([f, image, label])

            candidats = [Sentence([w2d['<start>']])]

            res = predict_one(i, sess, image_caption, candidats)

            print f_
            for r in res:
                print r.words
                print[d2w[p] for p in r.words]
            print l
            print[d2w[p] for p in l[0]]
            hy = [r.words for r in res]
            print "score:", score.score(l[0], hy)
예제 #6
0
def predict():
    config = lstm.config()

    img = utils.load_image("./test_data/tiger.jpeg")
예제 #7
0
def predict():
    config = lstm.config()
    config.hidden_size = 512
    config.batch_size = 2

    img = utils.load_image(
        "/home/tusimple/junechen/ml_data/data/train2014/COCO_train2014_000000160629.jpg"
    )
    #img = utils.load_image("./test_data/tiger.jpeg")
    img = img.reshape((1, 224, 224, 3))
    w2d, d2w = data.get_word_to_id()
    print "read w2d size:", len(w2d)
    if len(w2d) == 0:
        f, image, label, word, target, w2d, d2w = data.get_data(
            FLAGS.caption_path,
            FLAGS.image_path,
            max_len=config.num_steps + 1,
            batch_size=config.batch_size)
        print "reload read w2d size:", len(w2d)
    config.vocab_size = len(w2d)
    #config.vocab_size = 24553

    images = tf.placeholder("float", [None, 224, 224, 3], name="image")
    word = tf.placeholder(tf.int32, [None, None], name="word_seq")
    image_caption = ImageCaption(images, word, None, config, is_training=False)
    sv = tf.train.Supervisor(logdir=FLAGS.save_path)
    config_proto = tf.ConfigProto(allow_soft_placement=True)
    with sv.managed_session(config=config_proto) as sess:
        candidats = [Sentence([w2d['<start>']])]
        #seq = [[w2d['<start>']]]
        #seq = [[3, 1], [3, 16], [3, 15], [3, 7], [3, 66]]
        for i in range(20):
            seq = [s.words for s in candidats]
            print("run seq:", np.array(seq).shape)
            seq_len = [len(seq[0])] * len(seq)
            feed_dict = {
                images: np.array([img] * len(seq)).reshape(-1, 224, 224, 3),
                word: np.array(seq),
                image_caption.lstm.seqlen: np.array(seq_len)
            }
            print(feed_dict[images].shape)
            print(feed_dict[word].shape)
            print(feed_dict[image_caption.lstm.seqlen].shape)

            outputs = sess.run(image_caption.lstm.logits, feed_dict=feed_dict)
            print np.array(outputs).shape
            new_list = []
            for s, s_out in zip(candidats, outputs):
                predict = s_out[-1]
                sort_idx = predict.argsort()[::-1]
                for j in range(len(sort_idx)):
                    #print "S:",s,s+[sort_idx[j]]
                    w = sort_idx[j]
                    score = np.log(predict[w])
                    assert not math.isnan(score) % predict[w]
                    new_list.append(Sentence(s.words + [w], s.score + score))
                    #print "new:lst:",new_list
                    #print [ d2w[p.words] for p in new_list ]
            new_list = sorted(new_list,
                              key=lambda sentence: sentence.score,
                              reverse=True)
            candidats = new_list[0:20]
            for l in candidats:
                print("sort list:", l.words, l.score)
                print[d2w[p] for p in l.words]