예제 #1
0
def main(_):
    # Load model configuration
    cu = CommonUtiler()
    config_path = os.path.join('./model_conf', FLAGS.model_name + '.py')
    config = cu.load_config(config_path)

    # Evaluate trained models on val
    decoder = mRNNDecoder(config,
                          FLAGS.model_name,
                          FLAGS.vocab_path,
                          gpu_memory_fraction=FLAGS.gpu_memory_fraction)
    start, stop, step = [int(x) for x in FLAGS.eval_stat.split()]
    for i in range(start, stop, step):
        model_path = os.path.join(FLAGS.model_root, FLAGS.model_name,
                                  'variables', 'model_%d.ckpt' % i)
        while not os.path.exists(model_path + ".meta"):
            logger.warning('Cannot load model file, sleep 1 hour to retry')
            time.sleep(3600)

        decoder.load_model(model_path)

        num_decode = 0
        pred_sentences = []
        for anno_file_path in FLAGS.anno_files_path.split(':'):
            annos = np.load(anno_file_path).tolist()
            for anno in annos:
                feat_path = os.path.join(
                    FLAGS.vf_dir, anno['file_path'],
                    anno['file_name'].split('.')[0] + '.txt')
                visual_features = np.loadtxt(feat_path)
                sentences = decoder.decode(visual_features, FLAGS.beam_size)

                sentence_coco = {
                    'image_id': anno['id'],
                    'caption': ' '.join(sentences[0]['words'])
                }
                pred_sentences.append(sentence_coco)
                num_decode += 1

                if num_decode % 100 == 0:
                    logger.info('%d images are decoded' % num_decode)

        pred_path = os.path.join(FLAGS.model_root, FLAGS.model_name,
                                 'decode_val_result', 'generated_%d.json' % i)
        result_path = os.path.join(FLAGS.model_root, FLAGS.model_name,
                                   'decode_val_result', 'result_%d.txt' % i)
        cu.create_dir_if_not_exists(os.path.dirname(pred_path))
        with open(pred_path, 'w') as fout:
            json.dump(pred_sentences, fout)
        cu.coco_val_eval(pred_path, result_path)
예제 #2
0
def main(_):
    # Load model configuration
    cu = CommonUtiler()
    config_path = os.path.join('./model_conf', FLAGS.model_name + '.py')
    config = cu.load_config(config_path)

    # Start model training
    with tf.Graph().as_default(), tf.Session(config=tf.ConfigProto(
            intra_op_parallelism_threads=FLAGS.ses_threads)) as session:
        initializer = tf.random_uniform_initializer(-config.init_scale,
                                                    config.init_scale)
        assert len(config.buckets) >= 1
        assert config.buckets[-1] == config.max_num_steps
        models = []
        with tf.variable_scope("mRNNmodel",
                               reuse=None,
                               initializer=initializer):
            m = mRNNModel(is_training=True,
                          num_steps=config.buckets[0],
                          config=config,
                          model_name=FLAGS.model_name,
                          flag_with_saver=True,
                          model_root=FLAGS.model_root)
            models.append(m)

        with tf.variable_scope("mRNNmodel", reuse=True):
            for bucket in config.buckets[1:]:
                m = mRNNModel(is_training=True,
                              num_steps=bucket,
                              config=config,
                              model_name=FLAGS.model_name,
                              model_root=FLAGS.model_root)
                models.append(m)

        hdlr = logging.FileHandler(os.path.join(m.model_dir, 'log.txt'))
        hdlr.setLevel(logging.INFO)
        hdlr.setFormatter(logging.Formatter(formatter_log))
        logger.addHandler(hdlr)

        if FLAGS.pre_trained_model_path:
            models[0].saver.restore(session, FLAGS.pre_trained_model_path)
            logger.info('Continue to train from %s',
                        FLAGS.pre_trained_model_path)
        else:
            tf.global_variables_initializer()

        iters_done = 0
        data_provider = mRNNCocoBucketDataProvider(
            FLAGS.anno_files_path.split(':'), FLAGS.vocab_path,
            config.vocab_size, FLAGS.vf_dir, config.vf_size)
        for i in range(config.num_epoch):
            train_cost, iters_done = run_epoch(session,
                                               iters_done,
                                               config,
                                               models,
                                               data_provider,
                                               verbose=True)
            logger.info("Train cost for epoch %d is %.3f" % (i, train_cost))

        # Save final copy of the model
        models[0].saver.save(
            session, os.path.join(m.variable_dir,
                                  'model_%d.ckpt' % iters_done))
예제 #3
0
pylab.rcParams['figure.figsize'] = (6.0, 4.0)

#sys.path.append('./py_lib/')
from py_lib.common_utils import CommonUtiler
from py_lib.tf_mrnn_decoder import mRNNDecoder
from py_lib.vision import ImageFeatureExtractor

# set up paths
mrnn_model_path = './trained_models/coco_caption/mrnn_GRU_570K.ckpt'
mrnn_config_path = './model_conf/mrnn_GRU_conf.py'
mrnn_vocab_path = './trained_models/coco_caption/mscoco_mc3_vocab'
img_model_path = './external/tf_cnn_models/inception_v3.pb'

# initilize feature extractor and sentence decoder
cu = CommonUtiler()
config = cu.load_config(mrnn_config_path)
ife = ImageFeatureExtractor(img_model_path)
decoder = mRNNDecoder(config, 'demo', mrnn_vocab_path)

demo_image_path = 'demo_image.jpg'
beam_size = 3
# extract visual feature for the image
visual_features = ife.extract_features(demo_image_path, flag_from_file=True)
# generate sentences
decoder.load_model(mrnn_model_path)
sentences = decoder.decode(visual_features, beam_size)

# Visualize the result
print('Top generated sentences and their log-likelihood:')
for (ind_s, sentence) in enumerate(sentences):
    print('  %d (%.2f): %s' %