コード例 #1
0
def main(unused_args):
  # Load model configuration
  cu = CommonUtiler()
  config_path = os.path.join('./model_conf', FLAGS.model_name + '.py')
  config = cu.load_config(config_path)

  # Start model training
  with tf.Graph().as_default(), tf.Session(config=tf.ConfigProto(
      intra_op_parallelism_threads=FLAGS.ses_threads)) as session:
    initializer = tf.random_uniform_initializer(-config.init_scale,
                                                config.init_scale)
    assert len(config.buckets) >= 1
    assert config.buckets[-1] == config.max_num_steps
    models = []
    with tf.variable_scope("mRNNmodel", reuse=None, initializer=initializer):
      m = mRNNModel(is_training=True,
          num_steps=config.buckets[0], 
          config=config,
          model_name=FLAGS.model_name,
          flag_with_saver=True,
          model_root=FLAGS.model_root)
      models.append(m)
      
    with tf.variable_scope("mRNNmodel", reuse=True):
      for bucket in config.buckets[1:]:
        m = mRNNModel(is_training=True, 
            num_steps=bucket, 
            config=config,
            model_name=FLAGS.model_name,
            model_root=FLAGS.model_root)
        models.append(m)
        
    hdlr = logging.FileHandler(os.path.join(m.model_dir, 'log.txt'))
    hdlr.setLevel(logging.INFO)
    hdlr.setFormatter(logging.Formatter(formatter_log))
    logger.addHandler(hdlr)
    
    if FLAGS.pre_trained_model_path:
      models[0].saver.restore(session, FLAGS.pre_trained_model_path)
      logger.info('Continue to train from %s', FLAGS.pre_trained_model_path)
    else:
      tf.initialize_all_variables().run()

    iters_done = 0
    data_provider = mRNNCocoBucketDataProvider(FLAGS.anno_files_path.split(':'),
        FLAGS.vocab_path, config.vocab_size, FLAGS.vf_dir, config.vf_size)
    for i in range(config.num_epoch):
      train_cost, iters_done = run_epoch(session, iters_done, config, models, 
          data_provider, verbose=True)
      logger.info("Train cost for epoch %d is %.3f" % (i, train_cost))
    
    # Save final copy of the model
    models[0].saver.save(session, os.path.join(m.variable_dir, 
        'model_%d.ckpt' % iters_done))
コード例 #2
0
def main(unused_args):
    # Load model configuration
    cu = CommonUtiler()
    config_path = os.path.join('./model_conf', FLAGS.model_name + '.py')
    config = cu.load_config(config_path)

    # Evaluate trained models on val
    decoder = mRNNDecoder(config,
                          FLAGS.model_name,
                          FLAGS.vocab_path,
                          gpu_memory_fraction=FLAGS.gpu_memory_fraction)
    for i in xrange(*[int(x) for x in FLAGS.eval_stat.split()]):
        model_path = os.path.join(FLAGS.model_root, FLAGS.model_name,
                                  'variables', 'model_%d.ckpt' % i)
        while not os.path.exists(model_path):
            logger.warn('Cannot load model file, sleep 1 hour to retry')
            time.sleep(3600)

        decoder.load_model(model_path)

        num_decode = 0
        pred_sentences = []
        for anno_file_path in FLAGS.anno_files_path.split(':'):
            annos = np.load(anno_file_path).tolist()
            for anno in annos:
                feat_path = os.path.join(
                    FLAGS.vf_dir, anno['file_path'],
                    anno['file_name'].split('.')[0] + '.txt')
                visual_features = np.loadtxt(feat_path)
                sentences = decoder.decode(visual_features, FLAGS.beam_size)

                sentence_coco = {}
                sentence_coco['image_id'] = anno['id']
                sentence_coco['caption'] = ' '.join(sentences[0]['words'])
                pred_sentences.append(sentence_coco)
                num_decode += 1

                if num_decode % 100 == 0:
                    logger.info('%d images are decoded' % num_decode)

        pred_path = os.path.join(FLAGS.model_root, FLAGS.model_name,
                                 'decode_val_result', 'generated_%d.json' % i)
        result_path = os.path.join(FLAGS.model_root, FLAGS.model_name,
                                   'decode_val_result', 'result_%d.txt' % i)
        cu.create_dir_if_not_exists(os.path.dirname(pred_path))
        with open(pred_path, 'w') as fout:
            json.dump(pred_sentences, fout)
        cu.coco_val_eval(pred_path, result_path)
コード例 #3
0
def main(unused_args):
  # Load model configuration
  cu = CommonUtiler()
  config_path = os.path.join('./model_conf', FLAGS.model_name + '.py')
  config = cu.load_config(config_path)
      
  # Evaluate trained models on val
  decoder = mRNNDecoder(config, FLAGS.model_name, FLAGS.vocab_path,
      gpu_memory_fraction=FLAGS.gpu_memory_fraction)
  for i in xrange(*[int(x) for x in FLAGS.eval_stat.split()]):
    model_path = os.path.join(FLAGS.model_root, FLAGS.model_name, 
        'variables', 'model_%d.ckpt' % i)
    while not os.path.exists(model_path):
      logger.warn('Cannot load model file, sleep 1 hour to retry')
      time.sleep(3600)
    
    decoder.load_model(model_path)
    
    num_decode = 0
    pred_sentences = []
    for anno_file_path in FLAGS.anno_files_path.split(':'):
      annos = np.load(anno_file_path).tolist()
      for anno in annos:
        feat_path = os.path.join(FLAGS.vf_dir, anno['file_path'],
            anno['file_name'].split('.')[0] + '.txt')
        visual_features = np.loadtxt(feat_path)
        sentences = decoder.decode(visual_features, FLAGS.beam_size)
        
        sentence_coco = {}
        sentence_coco['image_id'] = anno['id']
        sentence_coco['caption'] = ' '.join(sentences[0]['words'])
        pred_sentences.append(sentence_coco)
        num_decode += 1
        
        if num_decode % 100 == 0:
          logger.info('%d images are decoded' % num_decode)
          
    pred_path = os.path.join(FLAGS.model_root, FLAGS.model_name, 
        'decode_val_result', 'generated_%d.json' % i)
    result_path = os.path.join(FLAGS.model_root, FLAGS.model_name, 
        'decode_val_result', 'result_%d.txt' % i)
    cu.create_dir_if_not_exists(os.path.dirname(pred_path))
    with open(pred_path, 'w') as fout:
      json.dump(pred_sentences, fout)
    cu.coco_val_eval(pred_path, result_path)
コード例 #4
0
sys.path.append('./py_lib/')
from common_utils import CommonUtiler
from tf_mrnn_decoder import mRNNDecoder
from vision import ImageFeatureExtractor

# In[2]:

# set up paths
mrnn_model_path = './trained_models/coco_caption/mrnn_GRU_570K.ckpt'
mrnn_config_path = './model_conf/mrnn_GRU_conf.py'
mrnn_vocab_path = './trained_models/coco_caption/mscoco_mc3_vocab'
img_model_path = './external/tf_cnn_models/inception_v3.pb'

# initilize feature extractor and sentence decoder
cu = CommonUtiler()
config = cu.load_config(mrnn_config_path)
ife = ImageFeatureExtractor(img_model_path)
decoder = mRNNDecoder(config, 'demo', mrnn_vocab_path)

# In[3]:

demo_image_path = 'demo_image.jpg'
beam_size = 3
# extract visual feature for the image
visual_features = ife.extract_features(demo_image_path, flag_from_file=True)
# generate sentences
decoder.load_model(mrnn_model_path)
sentences = decoder.decode(visual_features, beam_size)

# In[4]:
コード例 #5
0
def main(unused_args):
    # Load model configuration
    cu = CommonUtiler()
    config_path = os.path.join('./model_conf', FLAGS.model_name + '.py')
    config = cu.load_config(config_path)

    # Start model training
    with tf.Graph().as_default(), tf.Session(config=tf.ConfigProto(
            intra_op_parallelism_threads=FLAGS.ses_threads)) as session:
        initializer = tf.random_uniform_initializer(-config.init_scale,
                                                    config.init_scale)
        assert len(config.buckets) >= 1
        assert config.buckets[-1] == config.max_num_steps
        models = []
        with tf.variable_scope("mRNNmodel",
                               reuse=None,
                               initializer=initializer):
            m = mRNNModel(is_training=True,
                          num_steps=config.buckets[0],
                          config=config,
                          model_name=FLAGS.model_name,
                          flag_with_saver=True,
                          model_root=FLAGS.model_root)
            models.append(m)

        with tf.variable_scope("mRNNmodel", reuse=True):
            for bucket in config.buckets[1:]:
                m = mRNNModel(is_training=True,
                              num_steps=bucket,
                              config=config,
                              model_name=FLAGS.model_name,
                              model_root=FLAGS.model_root)
                models.append(m)

        hdlr = logging.FileHandler(os.path.join(m.model_dir, 'log.txt'))
        hdlr.setLevel(logging.INFO)
        hdlr.setFormatter(logging.Formatter(formatter_log))
        logger.addHandler(hdlr)

        if FLAGS.pre_trained_model_path:
            models[0].saver.restore(session, FLAGS.pre_trained_model_path)
            logger.info('Continue to train from %s',
                        FLAGS.pre_trained_model_path)
        else:
            tf.initialize_all_variables().run()

        iters_done = 0
        data_provider = mRNNCocoBucketDataProvider(
            FLAGS.anno_files_path.split(':'), FLAGS.vocab_path,
            config.vocab_size, FLAGS.vf_dir, config.vf_size)
        for i in range(config.num_epoch):
            train_cost, iters_done = run_epoch(session,
                                               iters_done,
                                               config,
                                               models,
                                               data_provider,
                                               verbose=True)
            logger.info("Train cost for epoch %d is %.3f" % (i, train_cost))

        # Save final copy of the model
        models[0].saver.save(
            session, os.path.join(m.variable_dir,
                                  'model_%d.ckpt' % iters_done))
コード例 #6
0
ファイル: demo.py プロジェクト: Sxq2004123/TF-mRNN
from common_utils import CommonUtiler
from tf_mrnn_decoder import mRNNDecoder
from vision import ImageFeatureExtractor


# In[2]:

# set up paths
mrnn_model_path = './trained_models/coco_caption/mrnn_GRU_570K.ckpt'
mrnn_config_path = './model_conf/mrnn_GRU_conf.py'
mrnn_vocab_path = './trained_models/coco_caption/mscoco_mc3_vocab'
img_model_path = './external/tf_cnn_models/inception_v3.pb'

# initilize feature extractor and sentence decoder
cu = CommonUtiler()
config = cu.load_config(mrnn_config_path)
ife = ImageFeatureExtractor(img_model_path)
decoder = mRNNDecoder(config, 'demo', mrnn_vocab_path)


# In[3]:

demo_image_path = 'demo_image.jpg'
beam_size = 3
# extract visual feature for the image
visual_features = ife.extract_features(demo_image_path, 
                                       flag_from_file=True)
# generate sentences
decoder.load_model(mrnn_model_path)
sentences = decoder.decode(visual_features, beam_size)