コード例 #1
0
ファイル: anna.py プロジェクト: noandrea/image_captioning
class AI(object):
    def __init__(self, model_path):
        """
        Initialize the AI
        :param model_path: path to the trained model file, (eg: models/289999.npy)
        """
        self.config = Config()
        self.config.phase = 'test'
        self.config.beam_size = 3
        self.model_path = model_path

    def load(self):
        self.sess = tf.Session()
        # testing phase
        self.model = CaptionGenerator(self.config)
        # TODO:load the right model
        self.model.load(self.sess, self.model_path)
        tf.get_default_graph().finalize()

    def image_caption(self, image_path):
        # this is what it should do
        data, vocabulary = prepare_test_data(self.config, image_path=image_path)
        caption, score = self.model.caption(self.sess, data, vocabulary)
        print(f"caption '{caption}' wiht score {score} for image {image_path} ")
        return caption, score
コード例 #2
0
ファイル: anna.py プロジェクト: noandrea/image_captioning
 def load(self):
     self.sess = tf.Session()
     # testing phase
     self.model = CaptionGenerator(self.config)
     # TODO:load the right model
     self.model.load(self.sess, self.model_path)
     tf.get_default_graph().finalize()
コード例 #3
0
def reply(request):
    if request.method == "POST":
        image_base64_str = request.POST.get("image_contents")
        print(type(image_base64_str))
        image_base64_str = image_base64_str.replace('%2B', '+').replace(
            '%3D', '=').replace('%2F', '/')
        image_data = base64.b64decode(image_base64_str)
        print(type(image_data))

        with open(ANDROID_IMAGE, "wb") as f:
            f.write(image_data)
        config = Config()
        config.train_cnn = False
        config.phase = 'test'
        config.beam_size = 1
        with tf.Session() as sess:
            # testing phase for android app
            data, vocabulary = prepare_test_data(config)
            model = CaptionGenerator(config)
            model.load(sess, MODEL_FILE)
            tf.get_default_graph().finalize()
            # model.test(sess, data, vocabulary)
            # captions = model.test_for_android(sess, data, vocabulary)
            captions = test_for_android(model, sess, data, vocabulary)
        return HttpResponse(str(captions[0]))
コード例 #4
0
    def __init__(self,
                 weight_file,
                 beam_size=5,
                 save_to='test.png',
                 mean_file='ilsvrc_2012_mean.npy'):
        # self.image=self.load_image(image_file)
        # url='https://vision.ece.vt.edu/mscoco/downloads/captions_train2014.json'
        # wget.download(url,out='.')
        # self.mean=np.load(mean_file).mean(1).mean(1)
        self.mean = np.array([104.00698793, 116.66876762, 122.67891434])
        self.scale_shape = np.array([224, 224], np.int32)
        self.crop_shape = np.array([224, 224], np.int32)
        self.bgr = True
        config = Config()
        config.phase = 'test'
        config.train_cnn = False
        config.beam_size = 5
        config.batch_size = 1
        self.vocabulary = prepare_test_data(config)
        self.config = config

        self.sess = tf.Session()
        self.sess.__enter__()
        self.model = CaptionGenerator(config)
        self.sess.run(tf.global_variables_initializer())
        self.model.load(self.sess, weight_file)
コード例 #5
0
def main(argv=None):
    config = Config()
    config.phase = FLAGS.phase
    config.train_cnn = FLAGS.train_cnn
    config.beam_size = FLAGS.beam_size

    with tf.Session() as sess:
        #         if FLAGS.phase == 'train':
        #             # training phase
        #             data = prepare_train_data(config)
        #             model = CaptionGenerator(config)
        #             sess.run(tf.global_variables_initializer())
        #             if FLAGS.load:
        #                 model.load(sess, FLAGS.model_file)
        #             if FLAGS.load_cnn:
        #                 model.load_cnn(sess, FLAGS.cnn_model_file)
        #             tf.get_default_graph().finalize()
        #             model.train(sess, data)

        #         elif FLAGS.phase == 'eval':
        #             # evaluation phase
        #             coco, data, vocabulary = prepare_eval_data(config)
        #             model = CaptionGenerator(config)
        #             model.load(sess, FLAGS.model_file)
        #             tf.get_default_graph().finalize()
        #             model.eval(sess, coco, data, vocabulary)

        #         else:
        # testing phase
        data, vocabulary = prepare_test_data(config)
        model = CaptionGenerator(config)
        model.load(sess, FLAGS.model_file)
        tf.get_default_graph().finalize()
        results, img_results = model.test(sess, data, vocabulary)
        return results, img_results
コード例 #6
0
    def __init__(self):
        self.cache = {}
        os.chdir('./show_attend_and_tell')
        self.config = Config()
        self.config.phase = FLAGS.phase
        self.config.train_cnn = FLAGS.train_cnn
        self.config.beam_size = FLAGS.beam_size

        # testing phase
        self.model = CaptionGenerator(self.config)
        self.model.load(sess, FLAGS.model_file)
        tf.get_default_graph().finalize()
コード例 #7
0
ファイル: api.py プロジェクト: R0bk/image_captioning
class show_and_tell_model():

    def __init__(self):
        self.cache = {}
        os.chdir('./show_attend_and_tell')
        self.config = Config()
        self.config.phase = FLAGS.phase
        self.config.train_cnn = FLAGS.train_cnn
        self.config.beam_size = FLAGS.beam_size


        # testing phase
        self.model = CaptionGenerator(self.config)
        self.model.load(sess, FLAGS.model_file)
        tf.get_default_graph().finalize()

    def run_model(self):
        data, vocabulary = prepare_test_data(self.config)
        self.model.test(sess, data, vocabulary)

    def process_list(self, img_list):
        for img in img_list:
            if img.split('/')[-1] in self.cache:
                continue
            self.download_image(img)
        self.run_model()
        self.update_cache()
        self.clear_results()

    def download_image(self, url):
        img_data = requests.get(url).content
        with open('./test/images/' + url.split('/')[-1] + '.jpg', 'wb') as handler:
            handler.write(img_data)

    def get_result(self, url):
        return self.cache.get(url, None)

    def update_cache(self):
        # os.chdir('./show_attend_and_tell')
        with open('./test/results.csv') as csvf:
            rr = csv.reader(csvf)
            results = list(rr)
            for result in results:
                self.cache[result[2].split('/')[-1].split('.jpg')[0]] = result[1] 
        print(self.cache)

    def clear_results(self):
        for file in os.listdir('./test/images/'):
            os.remove('./test/images/' + file)
        os.remove('./test/results.csv')
コード例 #8
0
def cpp_caption():
    config = Config()

    config.test_image_dir = '../buffer/'
    config.train_cnn = False
    config.phase = 'test'
    config.beam_size = 3

    data, vocabulary = prepare_test_data(config)
    model = CaptionGenerator(config)
    model.load(sess, FLAGS.model_file)
    tf.get_default_graph().finalize()
    caption = model.test(sess, data, vocabulary)

    return caption
コード例 #9
0
ファイル: main.py プロジェクト: paulchou0309/obj
def main(argv):
    config = Config()
    config.phase = FLAGS.phase
    config.train_cnn = FLAGS.train_cnn
    config.beam_size = FLAGS.beam_size

    model = CaptionGenerator(config)
    # model.train()

    with tf.Session() as sess:
        #     sess.run(tf.global_variables_initializer())
        sess.run(tf.local_variables_initializer())
        #     if model.init_fn:
        #         model.init_fn(sess)

        # Start populating the filename queue.
        coord = tf.train.Coordinator()
        threads = tf.train.start_queue_runners(coord=coord)

        for i in range(1):
            # Retrieve a single instance:
            example = sess.run(model.images)
            print(example, type(example), example.shape)

        coord.request_stop()
        coord.join(threads)
コード例 #10
0
def main():
    # load train dataset
    data = load_coco_data(data_path='./data', split='train')
    word_to_idx = data['word_to_idx']
    # load val dataset to print out bleu scores every epoch
    val_data = load_coco_data(data_path='./data', split='val')

    model = CaptionGenerator(word_to_idx,
                             dim_feature=[196, 512],
                             dim_embed=512,
                             dim_hidden=1024,
                             n_time_step=16,
                             prev2out=True,
                             ctx2out=True,
                             alpha_c=1.0,
                             selector=True,
                             dropout=True)

    solver = CaptioningSolver(model,
                              data,
                              val_data,
                              n_epochs=20,
                              batch_size=128,
                              update_rule='adam',
                              learning_rate=0.001,
                              print_every=1000,
                              save_every=1,
                              image_path='./image/',
                              pretrained_model=None,
                              model_path='model/lstm/',
                              test_model='model/lstm/model-10',
                              print_bleu=True,
                              log_path='log/')

    solver.train()
コード例 #11
0
ファイル: export.py プロジェクト: zisang0210/flickr8k-im2txt
def export_graph(model_folder, model_name, config):
    config.phase = 'test'
    config.train_cnn = False
    config.beam_size = 3

    graph = tf.Graph()
    with graph.as_default():
        model = CaptionGenerator(config)

        # input tensor can't use tf.identity() to rename
        # inputs = {}
        outputs = {}
        # # input
        # inputs['contexts'] = tf.identity(model.contexts, name='contexts')
        # inputs['last_word'] = tf.identity(model.last_word, name='last_word')
        # inputs['last_memory'] = tf.identity(model.last_memory, name='last_memory')
        # inputs['last_output'] = tf.identity(model.last_output, name='last_output')
        # outputs
        outputs['initial_memory'] = tf.identity(model.initial_memory,
                                                name='initial_memory')
        outputs['initial_output'] = tf.identity(model.initial_output,
                                                name='initial_output')

        # results
        outputs['conv_feats'] = tf.identity(model.conv_feats,
                                            name='conv_feats')
        outputs['alpha'] = tf.identity(model.alpha, name='alpha')
        outputs['memory'] = tf.identity(model.memory, name='memory')
        outputs['output'] = tf.identity(model.output, name='output')
        outputs['probs'] = tf.identity(model.probs, name='probs')
        # logits = model.inference(input_image)
        # y_conv = tf.nn.softmax(logits,name='outputdata')
        # restore_saver = tf.train.Saver()

    with tf.Session(graph=graph) as sess:
        # sess.run(tf.global_variables_initializer())
        # latest_ckpt = tf.train.latest_checkpoint(model_folder)
        # restore_saver.restore(sess, latest_ckpt)
        model.load(sess, model_folder)
        output_graph_def = tf.graph_util.convert_variables_to_constants(
            sess, graph.as_graph_def(), list(outputs.keys()))

        #    tf.train.write_graph(output_graph_def, 'log', model_name, as_text=False)
        with tf.gfile.GFile(model_name, "wb") as f:
            f.write(output_graph_def.SerializeToString())
コード例 #12
0
ファイル: train.py プロジェクト: Hillyess/im2txt
def main(argv):
    config = Config()
    config.input_file_pattern = FLAGS.input_file_pattern
    config.optimizer = FLAGS.optimizer
    config.attention_mechanism = FLAGS.attention
    config.save_dir = FLAGS.train_dir
    
    # Create training directory.
    train_dir = config.save_dir
    if not tf.gfile.IsDirectory(train_dir):
        tf.logging.info("Creating training directory: %s", train_dir)
        tf.gfile.MakeDirs(train_dir)

    # Build the TensorFlow graph.
    g = tf.Graph()
    with g.as_default():
        # Build the model.
        model = CaptionGenerator(config, mode="train")
        model.build()
    
        # Set up the Saver for saving and restoring model checkpoints.
        saver = tf.train.Saver(max_to_keep=config.max_checkpoints_to_keep)

    sess_config = tf.ConfigProto()

    sess_config.gpu_options.allow_growth = True

    # Run training.
    tf.contrib.slim.learning.train(
        model.opt_op,
        train_dir,
        log_every_n_steps=config.log_every_n_steps,
        graph=g,
        global_step=model.global_step,
        number_of_steps=FLAGS.number_of_steps,

        summary_op=model.summary,
        save_summaries_secs=60,
        save_interval_secs=600,
        init_fn=None,
        saver=saver,
        session_config=sess_config)
コード例 #13
0
def run():
  """Runs evaluation in a loop, and logs summaries to TensorBoard."""
  # Create the evaluation directory if it doesn't exist.
  eval_dir = FLAGS.eval_dir
  if not tf.gfile.IsDirectory(eval_dir):
    tf.logging.info("Creating eval directory: %s", eval_dir)
    tf.gfile.MakeDirs(eval_dir)

  # build vocabulary file
  vocab = vocabulary.Vocabulary(FLAGS.vocab_file)

  g = tf.Graph()
  with g.as_default():

    config = Config()
    config.input_file_pattern = FLAGS.input_file_pattern
    config.beam_size = FLAGS.beam_size

    # Build the model for evaluation.
    model = CaptionGenerator(config, mode="eval") 
    model.build()

    # Create the Saver to restore model Variables.
    saver = tf.train.Saver()

    # Create the summary writer.
    summary_writer = tf.summary.FileWriter(eval_dir)

    g.finalize()

    # Run a new evaluation run every eval_interval_secs.
    while True:
      start = time.time()
      tf.logging.info("Starting evaluation at " + time.strftime(
          "%Y-%m-%d-%H:%M:%S", time.localtime()))
      run_once(model,vocab, saver, summary_writer)
      time_to_next_eval = start + FLAGS.eval_interval_secs - time.time()
      if time_to_next_eval > 0:
        time.sleep(time_to_next_eval)
コード例 #14
0
ファイル: main.py プロジェクト: shubham1172/VQA
def main(argv):
    config = Config()
    config.test_file_name = flags.test_image
    config.phase = 'test'
    config.beam_size = 3

    with tf.Session() as sess:
        data, vocabulary = prepare_test_data(config)
        model = CaptionGenerator(config)
        model.load(sess, './data/289999.npy')
        tf.get_default_graph().finalize()
        model.test(sess, data, vocabulary)
コード例 #15
0
def main(args):
    # Image preprocessing
    transform = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))
    ])

    image_dir = "data/"
    json_path = image_dir + "annotations/captions_train2014.json"
    root_dir = image_dir + "train2014"

    dataset = CocoDataset(json_path=json_path,
                          root_dir=root_dir,
                          transform=transform)

    data_loader = get_data_loader(dataset, batch_size=32)

    # Build models
    encoder = FeatureExtractor(args.embed_size).eval(
    )  # eval mode (batchnorm uses moving mean/variance)
    decoder = CaptionGenerator(args.embed_size, args.hidden_size,
                               len(dataset.vocabulary), args.num_layers)
    encoder = encoder.to(device)
    decoder = decoder.to(device)

    # Load the trained model parameters
    encoder.load_state_dict(torch.load(args.encoder_path))
    decoder.load_state_dict(torch.load(args.decoder_path))

    # Prepare an image
    image = load_image(args.image, transform)
    image_tensor = image.to(device)

    # Generate an caption from the image
    feature = encoder(image_tensor)
    sampled_ids = decoder.sample(feature)
    sampled_ids = sampled_ids[0].cpu().numpy(
    )  # (1, max_seq_length) -> (max_seq_length)

    # Convert word_ids to words
    sampled_caption = []
    for word_id in sampled_ids:
        word = data_loader.dataset.id_to_word[word_id]
        sampled_caption.append(word)
        if word == '<end>':
            break
    sentence = ' '.join(sampled_caption)

    # Print out the image and the generated caption
    print(sentence)
    image = Image.open(args.image)
    plt.imshow(np.asarray(image))
コード例 #16
0
ファイル: main.py プロジェクト: frelam/image_captioning
def main(argv):
    config = Config()
    config.phase = FLAGS.phase
    config.train_cnn = FLAGS.train_cnn
    config.beam_size = FLAGS.beam_size
    config2 = tf.ConfigProto()
    config2.gpu_options.allow_growth = True
    with tf.Session(config=config2) as sess:
        if FLAGS.phase == 'train':
            # training phase
            data = prepare_train_data(config)
            model = CaptionGenerator(config)
            sess.run(tf.global_variables_initializer())
            if FLAGS.load:
                model.load(sess, FLAGS.model_file)
            if FLAGS.load_cnn:
                model.load_cnn(sess, FLAGS.cnn_model_file)
            tf.get_default_graph().finalize()
            model.train(sess, data)
        '''
コード例 #17
0
def main(argv):
    config = Config()
    config.phase = FLAGS.phase
    config.train_cnn = FLAGS.train_cnn
    config.beam_size = FLAGS.beam_size
    checkpoint_dir = config.checkpoint_dir
    save_checkpoint_secs = config.save_checkpoint_secs
    save_checkpoint_steps = config.save_checkpoint_steps

    global_step = tf.train.get_or_create_global_step()
    checkpoint_step = tf.assign_add(global_step, 1)

    model = CaptionGenerator(config)

    # with tf.Session() as sess:
    with tf.train.MonitoredTrainingSession(
            checkpoint_dir=checkpoint_dir,
            save_checkpoint_steps=save_checkpoint_steps,
    ) as sess:
        if FLAGS.phase == 'train':
            # training phase
            data = prepare_train_data(config)
            # WIP modify load part
            # if FLAGS.load:
            #     model.load(sess, FLAGS.model_file)
            # if FLAGS.load_cnn:
            #     model.load_cnn(sess, FLAGS.cnn_model_file)
            model.train(sess, data)

        elif FLAGS.phase == 'eval':
            # evaluation phase
            coco, data, vocabulary = prepare_eval_data(config)
            tf.get_default_graph().finalize()
            model.eval(sess, coco, data, vocabulary)

        else:
            # testing phase
            data, vocabulary = prepare_test_data(config)
            tf.get_default_graph().finalize()
            model.test(sess, data, vocabulary)
コード例 #18
0
# coding=utf-8
import os
import numpy as np
import tensorflow as tf
import cv2
from model import CaptionGenerator
import random
import re
from PIL import Image, ImageEnhance
import threading

with tf.Graph().as_default():
    with tf.Session() as sess:

        batchSize = 1
        generator = CaptionGenerator(batchSize, dropout=False)
        _, caption = generator.build_sampler()

        sess.run(tf.global_variables_initializer())
        saver = tf.train.Saver()
        saver.restore(sess, 'step/model.ckpt-200')

        files = os.listdir('3')
        intFiles = []
        for file in files:
            intFiles.append(int(file.split('.')[0]))
        intFiles.sort()
        resultFile = open('result.txt', 'w')

        for f in intFiles:
            #print(f)
コード例 #19
0
def main_handler(event, context):
    photoList = mysql.getPhotoInforPrediction()
    if photoList:
        json_data = photoList[0]
        photo_dict = photoList[1]

        for eve_file in ["289999.json", "289999.npy", "289999.txt"]:
            cosClient.download2disk("others/" + eve_file, '/tmp/' + eve_file)

        temp_path = "/tmp/images/"
        if not os.path.exists(temp_path + "temp/"):
            os.makedirs(temp_path + "temp/")
        if not os.path.exists(temp_path + "prediction/"):
            os.makedirs(temp_path + "prediction/")

        print("Save File To System")

        for eve_pic_infor in json_data:
            if eve_pic_infor["type"] == "png":
                cosClient.download2disk(
                    eve_pic_infor["pic"],
                    '/tmp/images/temp/' + eve_pic_infor["name"])
                PngPath = temp_path + "temp/%s" % (eve_pic_infor["name"])
                PNG_JPG(PngPath, os.path.splitext(PngPath)[0] + ".jpg")
            else:
                cosClient.download2disk(
                    eve_pic_infor["pic"],
                    "/tmp/images/prediction/" + eve_pic_infor["name"])

        tf.reset_default_graph()
        with tf.Session() as sess:
            data, vocabulary = prepare_test_data(config)
            model = CaptionGenerator(config)
            model.load(sess, "/tmp/289999.npy")
            tf.get_default_graph().finalize()
            file_list, caption = model.prediction(sess, data, vocabulary)
            caption_data = [{
                "file": file_list[i],
                "caption": caption[i]
            } for i in range(0, len(file_list))]

        try:
            for eve in caption_data:
                english = eve['caption']
                try:
                    chinese = json.loads(getChinese(english))["TargetText"]
                    time.sleep(0.2)
                except:
                    chinese = None
                remark = chinese if chinese else english
                tags = FromJieba(chinese, "textrank", 3) if chinese else "-"
                filename = eve['file'].split("/", )[-1]
                mysql.saveToPhotoDB(tags, remark, photo_dict[filename])
                for eve in tags:
                    mysql.saveToTagsDB(eve)
                    tag = mysql.getTags(eve)
                    if tag:
                        mysql.saveToPhotoTagsDB(tag, photo_dict[filename])

        except Exception as e:
            print(e)
コード例 #20
0
def main(argv):

    start_time = time.time()
    config = Config()
    config.phase = FLAGS.phase
    config.train_cnn = FLAGS.train_cnn
    config.beam_size = FLAGS.beam_size
    config.distributed = FLAGS.distributed
    config.test_image_dir = os.path.join(FLAGS.node_root, 'images')
    config.test_result_dir = os.path.join(FLAGS.node_root, 'results')
    config.test_result_file = os.path.join(FLAGS.node_root, 'results.cvs')
    config.replicas = len(FLAGS.worker_hosts.split(","))
    if FLAGS.task_index == '':
        config.task_index = 0
    else:
        config.task_index = int(FLAGS.task_index)

    if FLAGS.phase == 'train':
        # training phase

        if FLAGS.distributed:
            config.train_image_dir = FLAGS.input_path

            ps_hosts = FLAGS.ps_hosts.split(",")

            worker_hosts = FLAGS.worker_hosts.split(",")

            # Create a cluster from the parameter server and worker hosts.
            cluster = tf.train.ClusterSpec({
                "ps": ps_hosts,
                "worker": worker_hosts
            })

            # Create and start a server for the local task.
            server = tf.train.Server(cluster,
                                     job_name=FLAGS.job_name,
                                     task_index=config.task_index)

            #with tf.device(tf.train.replica_device_setter(cluster=cluster)):
            #                global_step = tf.Variable(0)

            #with tf.device("/job:ps/task:0"):
            #	global_step = tf.Variable(0, name="global_step")

            if FLAGS.job_name == "ps":
                server.join()
            elif FLAGS.job_name == "worker":
                with tf.device(
                        tf.train.replica_device_setter(
                            worker_device="/job:worker/task:%d" %
                            config.task_index,
                            cluster=cluster)):

                    model = CaptionGenerator(config)
                    data = prepare_train_data(config)

                    init_op = tf.initialize_all_variables()
                    print "Variables Initialized ..."

                begin = time.time()
                #The StopAtStepHook handles stopping after running given steps.
                hooks = [tf.train.StopAtStepHook(num_steps=1200000)]

                # The MonitoredTrainingSession takes care of session initialization,
                # restoring from a checkpoint, saving to a checkpoint, and closing when done
                # or an error occurs.
                with tf.train.MonitoredTrainingSession(
                        master=server.target,
                        is_chief=(config.task_index == 0),
                        checkpoint_dir=
                        "/home/mauro.emc/image_captioning/models",
                        hooks=hooks) as mon_sess:

                    if not os.path.exists(config.summary_dir):
                        os.mkdir(config.summary_dir)

                    train_writer = tf.summary.FileWriter(
                        config.summary_dir, mon_sess.graph)

                    print "Start the model training"

                    #while not mon_sess.should_stop():
                    model.train(mon_sess, data, train_writer,
                                config.task_index)

                    train_writer.close()
                    print "Model stopped train"

            print("Train completed")
            print("Total Time in secs: " + str(time.time() - begin))

        else:
            with tf.Session() as sess:
                data = prepare_train_data(config)
                model = CaptionGenerator(config)
                sess.run(tf.global_variables_initializer())
                if FLAGS.load:
                    model.load(sess, FLAGS.model_file)
                if FLAGS.load_cnn:
                    model.load_cnn(sess, FLAGS.cnn_model_file)
                tf.get_default_graph().finalize()
                model.train(sess, data)

    elif FLAGS.phase == 'eval':
        with tf.Session() as sess:
            # evaluation phase
            coco, data, vocabulary = prepare_eval_data(config)
            model = CaptionGenerator(config)
            model.load(sess, FLAGS.model_file)
            tf.get_default_graph().finalize()
            model.eval(sess, coco, data, vocabulary)

    else:
        with tf.Session() as sess:
            # testing phase
            data, vocabulary = prepare_test_data(config)
            model = CaptionGenerator(config)
            model.load(sess, FLAGS.model_file)
            tf.get_default_graph().finalize()
            model.test(sess, data, vocabulary)
    print 'Total time in seconds :   ' + str(time.time() - start_time)
コード例 #21
0
ファイル: train.py プロジェクト: rdbadra/ImageCaptioning
def main(num_epochs=10, embedding_dim=256, data_dir="data/"):
    """ Function to train the model.
    
    Args:
        num_epochs: int
            Number of full dataset iterations to train the model.
        embedding_dim: int
            Output of the CNN model and input of the LSTM embedding size.
        data_dir: str
            Path to the folder of the data.
    """
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    print(f"WORKING WITH: {device}")

    # Define the paths for train and validation
    train_json_path = data_dir + "annotations/captions_train2014.json"
    train_root_dir = data_dir + "train2014"
    valid_json_path = data_dir + "annotations/captions_val2014.json"
    valid_root_dir = data_dir + "val2014"

    transform = transforms.Compose([
        transforms.RandomResizedCrop(224),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ])

    train_dataset = CocoDataset(json_path=train_json_path,
                                root_dir=train_root_dir,
                                transform=transform)

    train_coco_dataset = get_data_loader(train_dataset, batch_size=128)

    valid_dataset = CocoDataset(json_path=valid_json_path,
                                root_dir=valid_root_dir,
                                transform=transform)

    valid_coco_dataset = get_data_loader(valid_dataset, batch_size=1)

    encoder = FeatureExtractor(embedding_dim).to(device)
    decoder = CaptionGenerator(embedding_dim, 512,
                               len(train_dataset.vocabulary), 1).to(device)

    criterion = nn.CrossEntropyLoss()
    # params = list(decoder.parameters()) + list(encoder.linear.parameters()) + list(encoder.bn.parameters())
    params = list(decoder.parameters()) + list(
        encoder.linear.parameters()) + list(encoder.bn.parameters())
    optimizer = optim.Adam(params, lr=0.01)

    print(f"TRAIN DATASET: {len(train_coco_dataset)}")
    print(f"VALID DATASET: {len(valid_coco_dataset)}")

    total_step = len(train_coco_dataset)
    for epoch in range(num_epochs):
        encoder.train()
        decoder.train()
        train_loss = 0.0
        valid_loss = 0.0
        for i, (images, captions,
                descriptions) in enumerate(train_coco_dataset):

            # targets = pack_padded_sequence(caption, 0, batch_first=True)[0]

            images = images.to(device)
            captions = captions.to(device)
            # targets = pack_padded_sequence(captions, lengths, batch_first=True)[0]

            features = encoder(images)
            outputs = decoder(features, captions)

            loss = criterion(outputs.view(-1, len(train_dataset.vocabulary)),
                             captions.view(-1))
            # bleu = calculate_bleu(decoder, features, descriptions, coco_dataset)
            # print(bleu)

            encoder.zero_grad()
            decoder.zero_grad()

            loss.backward()
            optimizer.step()

            # Print log info
            train_loss += loss.item()
            '''
            if i % 10 == 0:
                print('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}, Perplexity: {:5.4f}'
                      .format(epoch, num_epochs, i, total_step, loss.item(), np.exp(loss.item()))) 
            '''

            # Save the model checkpoints
            if (i + 1) % 1000 == 0:
                torch.save(
                    decoder.state_dict(),
                    os.path.join("models",
                                 'decoder-{}-{}.ckpt'.format(epoch + 1,
                                                             i + 1)))
                torch.save(
                    encoder.state_dict(),
                    os.path.join("models",
                                 'encoder-{}-{}.ckpt'.format(epoch + 1,
                                                             i + 1)))
        encoder.eval()
        decoder.eval()
        bleu = 0.0
        for i, (images, captions,
                descriptions) in enumerate(valid_coco_dataset):
            if (i > 80000):
                break
            images = images.to(device)
            captions = captions.to(device)
            features = encoder(images)
            outputs = decoder(features, captions)
            loss = criterion(outputs.view(-1, len(train_dataset.vocabulary)),
                             captions.view(-1))
            valid_loss += loss.item()
            bleu += calculate_bleu(decoder, features, descriptions,
                                   train_coco_dataset)
        # print(f"BLEU: {bleu / 10000}")
        print(
            "Epoch: {}, Train Loss: {:.4f}, Valid Loss: {:.4f}, BLEU: {:.4f}".
            format(epoch, train_loss / len(train_coco_dataset),
                   valid_loss / 80000, bleu / 80000))
コード例 #22
0
def main(argv):
    start_time = time.time()
    config = Config()
    config.phase = FLAGS.phase
    config.train_cnn = FLAGS.train_cnn
    config.beam_size = FLAGS.beam_size
    config.distributed = FLAGS.distributed
    config.test_image_dir = os.path.join(FLAGS.node_root, 'images')
    config.test_result_dir = os.path.join(FLAGS.node_root, 'results')
    config.test_result_file = os.path.join(FLAGS.node_root, 'results.cvs')
    config.replicas = len(FLAGS.worker_hosts.split(","))
    config.task_index = FLAGS.task_index

    if FLAGS.phase == 'train':
        # training phase

        if FLAGS.distributed:
            config.train_image_dir = FLAGS.input_path
            print config.train_image_dir

            ps_hosts = FLAGS.ps_hosts.split(",")
            worker_hosts = FLAGS.worker_hosts.split(",")

            # Create a cluster from the parameter server and worker hosts.
            cluster = tf.train.ClusterSpec({
                "ps": ps_hosts,
                "worker": worker_hosts
            })

            # Create and start a server for the local task.
            server = tf.train.Server(cluster,
                                     job_name=FLAGS.job_name,
                                     task_index=FLAGS.task_index)

            if FLAGS.job_name == "ps":
                server.join()
            elif FLAGS.job_name == "worker":
                with tf.device(
                        tf.train.replica_device_setter(
                            worker_device="/job:worker/task:%d" %
                            FLAGS.task_index,
                            cluster=cluster)):

                    tf.reset_default_graph()

                    global_step = tf.get_variable(
                        'global_step', [],
                        initializer=tf.constant_initializer(0),
                        trainable=False,
                        dtype=tf.int32)

                    data = prepare_train_data(config)
                    model = CaptionGenerator(config)

                    init_op = tf.initialize_all_variables()

                is_chief = (FLAGS.task_index == 0)
                # Create a "supervisor", which oversees the training process.
                sv = tf.train.Supervisor(
                    is_chief=is_chief,
                    logdir="/home/mauro.emc/image_captioning/tmp/logs",
                    init_op=init_op,
                    global_step=global_step,
                    save_model_secs=600)
                with sv.prepare_or_wait_for_session(server.target) as sess:
                    if is_chief:
                        sv.start_queue_runners(sess, [chief_queue_runner])
                        # Insert initial tokens to the queue.
                        sess.run(init_token_op)
                    sess.run(tf.global_variables_initializer())
                    model.train(sess, data)
                sv.stop()
        else:
            with tf.Session() as sess:
                data = prepare_train_data(config)
                model = CaptionGenerator(config)
                sess.run(tf.global_variables_initializer())
                if FLAGS.load:
                    model.load(sess, FLAGS.model_file)
                if FLAGS.load_cnn:
                    model.load_cnn(sess, FLAGS.cnn_model_file)
                tf.get_default_graph().finalize()
                model.train(sess, data)

    elif FLAGS.phase == 'eval':
        with tf.Session() as sess:
            # evaluation phase
            coco, data, vocabulary = prepare_eval_data(config)
            model = CaptionGenerator(config)
            model.load(sess, FLAGS.model_file)
            tf.get_default_graph().finalize()
            model.eval(sess, coco, data, vocabulary)

    else:
        with tf.Session() as sess:
            # testing phase
            data, vocabulary = prepare_test_data(config)
            model = CaptionGenerator(config)
            model.load(sess, FLAGS.model_file)
            tf.get_default_graph().finalize()
            model.test(sess, data, vocabulary)
    print 'Total time in seconds :   ' + str(time.time() - start_time)
コード例 #23
0
def main(argv):
    flags = tf.app.flags
    FLAGS = flags.FLAGS
    config = Config()
    config.phase = FLAGS.phase
    config.train_cnn = FLAGS.train_cnn
    config.beam_size = FLAGS.beam_size

    # Cluster One setting
    clusterone_dist_env = distributed_env(config.root_path_to_local_data,
                                          config.path_to_local_logs,
                                          config.cloud_path_to_data,
                                          config.local_repo,
                                          config.cloud_user_repo, flags)

    clusterone_dist_env.get_env()

    tf.reset_default_graph()
    device, target = clusterone_dist_env.device_and_target(
    )  # getting node environment
    # end of setting

    # Using tensorflow's MonitoredTrainingSession to take care of checkpoints
    with tf.train.MonitoredTrainingSession(
            master=target,
            is_chief=(FLAGS.task_index == 0),
            checkpoint_dir=FLAGS.log_dir) as sess:

        #     with tf.Session() as sess:
        if FLAGS.phase == 'train':
            # training phase
            data = prepare_train_data(config)
            with tf.device(device):  # define model
                model = CaptionGenerator(config)
            sess.run(tf.global_variables_initializer())
            if FLAGS.load:
                model.load(sess, FLAGS.model_file)
            if FLAGS.load_cnn:
                model.load_cnn(sess, FLAGS.cnn_model_file)
            tf.get_default_graph().finalize()
            model.train(sess, data)

        elif FLAGS.phase == 'eval':
            # evaluation phase
            config.batch_size = 1
            coco, data, vocabulary = prepare_eval_data(config)
            with tf.device(device):  # define model
                model = CaptionGenerator(config)
                model.load(sess, FLAGS.model_file)
                tf.get_default_graph().finalize()
            model.eval(sess, coco, data, vocabulary)

        else:
            # testing phase
            data, vocabulary = prepare_test_data(config)
            with tf.device(device):  # define model
                model = CaptionGenerator(config)
                model.load(sess, FLAGS.model_file)
                tf.get_default_graph().finalize()
            model.test(sess, data, vocabulary)
コード例 #24
0
ファイル: server.py プロジェクト: patil-suraj/image_caption

def setup_model():
    destination = 'models/model.npy'
    if not os.path.exists(destination):
        download_file_from_google_drive(destination)


# setup_model()
config = Config()
config.beam_size = 3
config.phase = 'test'
config.train_cnn = False

sess = tf.Session()
model = CaptionGenerator(config)
model.load(sess)
tf.get_default_graph().finalize()


@app.route('/')
def index():
    return render_template('index.html')


@app.route('/analyze', methods=['POST'])
def analyze():
    f = request.files['file']
    f.save(os.path.join('./test/images', f.filename))

    data, vocabulary = prepare_test_data(config)
コード例 #25
0
ファイル: main.py プロジェクト: zisang0210/flickr8k-im2txt
def main(argv):
    os.system("ls /tinysrc")
    os.system("python tinysrc/download_flickr8k.py")
    config = Config()
    config.phase = FLAGS.phase
    config.train_cnn = FLAGS.train_cnn
    config.joint_train = FLAGS.joint_train
    config.beam_size = FLAGS.beam_size
    config.attention_mechanism = FLAGS.attention
    config.faster_rcnn_frozen = FLAGS.faster_rcnn_frozen

    with tf.Session() as sess:
        if FLAGS.phase == 'train':
            # training phase
            data = prepare_train_data(config)
            model = CaptionGenerator(config)
            sess.run(tf.global_variables_initializer())
            if FLAGS.load:
                model.load(sess, FLAGS.model_file)
            if FLAGS.load_cnn:
                model.load_faster_rcnn_feature_extractor(
                    sess, FLAGS.faster_rcnn_ckpt)
            tf.get_default_graph().finalize()
            model.train(sess, data)

        elif FLAGS.phase == 'eval':
            # evaluation phase
            coco, data, vocabulary = prepare_eval_data(config)
            model = CaptionGenerator(config)
            model.load(sess, FLAGS.model_file)
            tf.get_default_graph().finalize()
            model.eval(sess, coco, data, vocabulary)

        else:
            # testing phase
            data, vocabulary = prepare_test_data(config)
            model = CaptionGenerator(config)
            model.load(sess, FLAGS.model_file)
            tf.get_default_graph().finalize()
            model.test(sess, data, vocabulary)

    os.system("rm -rf /output/Flickr8k_Dataset/")
    os.system("rm -rf /output/Flickr8k_text/")
コード例 #26
0
class DeepRNNInference(object):
    # @class_method
    # def from_path(cls, model_dir):
    # 	models=[]
    # 	for i in cls.columns:
    # 		models.append(pickle.load(open(model_dir+'')))

    @classmethod
    def from_path(cls, model_dir):
        # models=[]
        # weight_files=os.listdir(model_dir)
        # weight_files=[wf for wf in weight_files if wf.endswith('.npy')]
        # for weight_file in weight_files:
        # config=Config()
        # config.phase='test'
        # config.train_cnn=False
        # config.beam_size=3
        # config.batch_size=1
        # sess = tf.Session()
        # sess.__enter__()
        # model=CaptionGenerator(config)
        # sess.run(tf.global_variables_initializer())
        # model.load(sess,model_dir+'/289999.npy')
        return cls(model_dir + '/289999.npy')

    def __init__(self,
                 weight_file,
                 beam_size=5,
                 save_to='test.png',
                 mean_file='ilsvrc_2012_mean.npy'):
        # self.image=self.load_image(image_file)
        # url='https://vision.ece.vt.edu/mscoco/downloads/captions_train2014.json'
        # wget.download(url,out='.')
        # self.mean=np.load(mean_file).mean(1).mean(1)
        self.mean = np.array([104.00698793, 116.66876762, 122.67891434])
        self.scale_shape = np.array([224, 224], np.int32)
        self.crop_shape = np.array([224, 224], np.int32)
        self.bgr = True
        config = Config()
        config.phase = 'test'
        config.train_cnn = False
        config.beam_size = 5
        config.batch_size = 1
        self.vocabulary = prepare_test_data(config)
        self.config = config

        self.sess = tf.Session()
        self.sess.__enter__()
        self.model = CaptionGenerator(config)
        self.sess.run(tf.global_variables_initializer())
        self.model.load(self.sess, weight_file)

    def preprocess(self, image):
        # image=cv2.imread(image)
        if self.bgr:
            temp = image.swapaxes(0, 2)
            temp = temp[::-1]
            image = temp.swapaxes(0, 2)
        command = "curl -X POST -H 'Content-type: application/json' --data '{\"text\":\"" + str(
            image.shape
        ) + "\"}' https://hooks.slack.com/services/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"
        os.system(command)

        image = cv2.resize(image, (self.scale_shape[0], self.scale_shape[1]))
        offset = (self.scale_shape - self.crop_shape) / 2
        offset = offset.astype(np.int32)

        image = image[offset[0]:offset[0] + self.crop_shape[0],
                      offset[1]:offset[1] + self.crop_shape[1]]
        command = "curl -X POST -H 'Content-type: application/json' --data '{\"text\":\"" + str(
            type(image)
        ) + "\"}' https://hooks.slack.com/services/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"
        os.system(command)

        image = image - np.array([104.00698793, 116.66876762, 122.67891434])
        command = "curl -X POST -H 'Content-type: application/json' --data '{\"text\":\"offset\"}' https://hooks.slack.com/services/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"
        os.system(command)

        return image

    def beam_search(self, image):
        """Use beam search to generate the captions for a batch of images."""
        # Feed in the images to get the contexts and the initial LSTM states
        images = np.array([self.preprocess(image)], np.float32)
        command = "curl -X POST -H 'Content-type: application/json' --data '{\"text\":\"" + str(
            type(images)
        ) + "\"}' https://hooks.slack.com/services/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"
        os.system(command)

        contexts, initial_memory, initial_output = self.sess.run(
            [
                self.model.conv_feats, self.model.initial_memory,
                self.model.initial_output
            ],
            feed_dict={self.model.images: images})

        partial_caption_data = []
        complete_caption_data = []
        for k in range(self.config.batch_size):
            initial_beam = CaptionData(sentence=[],
                                       memory=initial_memory[k],
                                       output=initial_output[k],
                                       score=1.0)
            partial_caption_data.append(TopN(self.config.beam_size))
            partial_caption_data[-1].push(initial_beam)
            complete_caption_data.append(TopN(self.config.beam_size))

# Run beam search
        for idx in range(self.config.max_caption_length):
            partial_caption_data_lists = []
            for k in range(self.config.batch_size):
                data = partial_caption_data[k].extract()
                partial_caption_data_lists.append(data)
                partial_caption_data[k].reset()

            num_steps = 1 if idx == 0 else self.config.beam_size
            for b in range(num_steps):
                if idx == 0:
                    last_word = np.zeros((self.config.batch_size), np.int32)
                else:
                    last_word = np.array([
                        pcl[b].sentence[-1]
                        for pcl in partial_caption_data_lists
                    ], np.int32)

                last_memory = np.array(
                    [pcl[b].memory for pcl in partial_caption_data_lists],
                    np.float32)
                last_output = np.array(
                    [pcl[b].output for pcl in partial_caption_data_lists],
                    np.float32)

                memory, output, scores = self.sess.run(
                    [self.model.memory, self.model.output, self.model.probs],
                    feed_dict={
                        self.model.contexts: contexts,
                        self.model.last_word: last_word,
                        self.model.last_memory: last_memory,
                        self.model.last_output: last_output
                    })

                # Find the beam_size most probable next words
                for k in range(self.config.batch_size):
                    caption_data = partial_caption_data_lists[k][b]
                    words_and_scores = list(enumerate(scores[k]))
                    words_and_scores.sort(key=lambda x: -x[1])
                    words_and_scores = words_and_scores[0:self.config.
                                                        beam_size + 1]

                    # Append each of these words to the current partial caption
                    for w, s in words_and_scores:
                        sentence = caption_data.sentence + [w]
                        score = caption_data.score * s
                        beam = CaptionData(sentence, memory[k], output[k],
                                           score)
                        if self.vocabulary.words[w] == '.':
                            complete_caption_data[k].push(beam)
                        else:
                            partial_caption_data[k].push(beam)

        results = []
        for k in range(self.config.batch_size):
            if complete_caption_data[k].size() == 0:
                complete_caption_data[k] = partial_caption_data[k]
            results.append(complete_caption_data[k].extract(sort=True))

        return results

    def predict(self, instances, **kwargs):
        # command="curl -X POST -H 'Content-type: application/json' --data '{\"text\":\"testing\"}' https://hooks.slack.com/services/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"
        # os.system(command)
        # command="curl -X POST -H 'Content-type: application/json' --data '{\"text\":\""+str(type(instances[0]['instances']))+"\"}' https://hooks.slack.com/services/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"
        # os.system(command)
        # with open('/home/sambursanjana_1998/test.json','w') as t_f:
        # t_f.write(json.dumps(instances))
        # print(instances)
        results = []
        # for instance in instances[0]['instances']:

        captions = self.perform_inference(
            instances[0]['instances'][0]['values'])

        results.append({
            'instance': instances[0]['instances'][0]['values'],
            'caption': captions
        })
        command = "curl -X POST -H 'Content-type: application/json' --data '{\"text\":\"" + str(
            type(instances[0]['instances'])
        ) + "\"}' https://hooks.slack.com/services/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"
        os.system(command)

        return captions

    def perform_inference(self, image):
        # start=time.time()

        # image=np.fromstring(image,dtype='<f4')
        image = np.array(image, np.int32)
        command = "curl -X POST -H 'Content-type: application/json' --data '{\"text\":\"" + str(
            type(image.shape)
        ) + "\"}' https://hooks.slack.com/services/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"
        os.system(command)

        caption_data = self.beam_search(image)
        command = "curl -X POST -H 'Content-type: application/json' --data '{\"text\":\"" + str(
            type(image)
        ) + "\"}' https://hooks.slack.com/services/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"
        os.system(command)

        captions = []
        scores = []
        for k in tqdm(list(range(self.config.batch_size)), desc='path'):
            # fake_cnt = 0 if k<self.config.batch_size-1 \
            # else test_data.fake_count
            # for l in range(test_data.batch_size-fake_cnt):
            word_idxs = caption_data[k][0].sentence
            # score = caption_data[k][0].score
            caption = self.vocabulary.get_sentence(word_idxs)
            captions.append(caption)
            # scores.append(score)
            # print(caption)
            # print(time.time()-start)
            # Save the result in an image file
            # image_file = batch[l]
            # image_name = image_file.split(os.sep)[-1]
            # image_name = os.path.splitext(image_name)[0]
            # img = plt.imread(image_file)
            # plt.imshow(img)
            # plt.axis('off')
            # plt.title(caption)
            # plt.savefig(os.path.join(config.test_result_dir,
            # 						 image_name+'_result.jpg'))
        return captions
コード例 #27
0
def main(argv):
    config = Config()
    config.phase = FLAGS.phase
    config.train_cnn = FLAGS.train_cnn
    config.beam_size = FLAGS.beam_size
    config.trainable_variable = FLAGS.train_cnn

    with tf.Session() as sess:
        if FLAGS.phase == 'train':
            # training phase
            data = prepare_train_data(config)
            model = CaptionGenerator(config)
            sess.run(tf.global_variables_initializer())
            if FLAGS.load:
                model.load(sess, FLAGS.model_file)
            #load the cnn file
            if FLAGS.load_cnn:
                model.load_cnn(sess, FLAGS.cnn_model_file)
            tf.get_default_graph().finalize()
            model.train(sess, data)

        elif FLAGS.phase == 'eval':
            # evaluation phase
            coco, data, vocabulary = prepare_eval_data(config)
            model = CaptionGenerator(config)
            model.load(sess, FLAGS.model_file)
            tf.get_default_graph().finalize()
            model.eval(sess, coco, data, vocabulary)

        elif FLAGS.phase == 'test_loaded_cnn':
            # testing only cnn
            model = CaptionGenerator(config)
            sess.run(tf.global_variables_initializer())
            imgs = tf.placeholder(tf.float32, [None, 224, 224, 3])
            probs = model.test_cnn(imgs)
            model.load_cnn(sess, FLAGS.cnn_model_file)

            img1 = imread(FLAGS.image_file, mode='RGB')
            img1 = imresize(img1, (224, 224))

            prob = sess.run(probs, feed_dict={imgs: [img1]})[0]
            preds = (np.argsort(prob)[::-1])[0:5]
            for p in preds:
                print(class_names[p], prob[p])

        else:
            # testing phase
            data, vocabulary = prepare_test_data(config)
            model = CaptionGenerator(config)
            model.load(sess, FLAGS.model_file)
            tf.get_default_graph().finalize()
            model.test(sess, data, vocabulary)
コード例 #28
0
def main(argv):
    config = Config()
    config.phase = FLAGS.phase
    config.train_cnn = FLAGS.train_cnn
    config.beam_size = FLAGS.beam_size

    with tf.Session() as sess:
        if FLAGS.phase == 'train':
            # training phase
            data = prepare_train_data(config)
            model = CaptionGenerator(config)
            sess.run(tf.global_variables_initializer())
            if FLAGS.load:
                model.load(sess, FLAGS.model_file)
            if FLAGS.load_cnn:
                model.load_cnn(sess, FLAGS.cnn_model_file)
            tf.get_default_graph().finalize()
            model.train(sess, data)

        elif FLAGS.phase == 'eval':
            # evaluation phase
            coco, data, vocabulary = prepare_eval_data(config)
            model = CaptionGenerator(config)
            model.load(sess, FLAGS.model_file)
            tf.get_default_graph().finalize()
            model.eval(sess, coco, data, vocabulary)

        else:
            # testing phase
            data, vocabulary = prepare_test_data(config)
            model = CaptionGenerator(config)
            model.load(sess, FLAGS.model_file)
            tf.get_default_graph().finalize()
            model.test(sess, data, vocabulary)
コード例 #29
0
def main(argv):
    config = Config()
    config.phase = FLAGS.phase
    config.train_cnn = FLAGS.train_cnn
    config.beam_size = FLAGS.beam_size

    with tf.Session() as sess:
        if FLAGS.phase == 'train':
            # training phase
            data = prepare_train_data(config)
            model = CaptionGenerator(config)
            sess.run(tf.global_variables_initializer())
            if FLAGS.load:
                model.load(sess, FLAGS.model_file)
            if FLAGS.load_cnn:
                model.load_cnn(sess, FLAGS.cnn_model_file)
            tf.get_default_graph().finalize()
            model.train(sess, data)

        elif FLAGS.phase == 'eval':
            # evaluation phase
            coco, data, vocabulary = prepare_eval_data(config)
            model = CaptionGenerator(config)
            model.load(sess, FLAGS.model_file)
            tf.get_default_graph().finalize()
            model.eval(sess, coco, data, vocabulary)

        elif FLAGS.phase == 'test_new_data':
            # evaluation phase
            coco, data, vocabulary = prepare_eval_new_data(
                config.eval_caption_file_unsplash, config.eval_image_unsplash,
                config)
            model = CaptionGenerator(config)
            model.load(sess, FLAGS.model_file)
            tf.get_default_graph().finalize()
            model.eval_new_data(sess, coco, data, vocabulary,
                                config.eval_result_dir_unsplash,
                                config.eval_result_file_unsplash)

        elif FLAGS.phase == 'test_new_data_vizwiz':
            # evaluation phase
            coco, data, vocabulary = prepare_eval_new_data(
                config.eval_caption_file_vizwiz_train,
                config.eval_image_vizwiz_train, config)
            model = CaptionGenerator(config)
            model.load(sess, FLAGS.model_file)
            tf.get_default_graph().finalize()
            model.eval_new_data(sess, coco, data, vocabulary,
                                config.eval_result_dir_vizwiz_train,
                                config.eval_result_file_vizwiz_train)

        elif FLAGS.phase == 'test_new_data_insta':
            # evaluation phase
            coco, data, vocabulary = prepare_eval_new_data(
                config.eval_caption_file_insta, config.eval_image_insta,
                config)
            model = CaptionGenerator(config)
            model.load(sess, FLAGS.model_file)
            tf.get_default_graph().finalize()
            model.eval_new_data(sess, coco, data, vocabulary,
                                config.eval_result_dir_insta,
                                config.eval_result_file_insta)

        elif FLAGS.phase == 'test_new_data_google_top_n':
            # evaluation phase
            coco, data, vocabulary = prepare_eval_new_data(
                config.eval_caption_file_topN, config.eval_image_topN, config)
            model = CaptionGenerator(config)
            model.load(sess, FLAGS.model_file)
            tf.get_default_graph().finalize()
            model.eval_new_data(sess, coco, data, vocabulary,
                                config.eval_result_dir_topN,
                                config.eval_result_file_topN)

        else:
            # testing phase
            data, vocabulary = prepare_test_data(config)
            model = CaptionGenerator(config)
            model.load(sess, FLAGS.model_file)
            tf.get_default_graph().finalize()
            model.test(sess, data, vocabulary)
コード例 #30
0
import tensorflow as tf
from model import CaptionGenerator

net = CaptionGenerator()

sess = tf.Session()
net.train(sess, )