def main(_):
    
    # load dictionary 
    data = {}
    with open(FLAGS.dict_file, 'r') as f:
        dict_data = json.load(f)
        for k, v in dict_data.items():
            data[k] = v
    data['idx_to_word'] = {int(k):v for k, v in data['idx_to_word'].items()}

    # extract all features 
    features, all_image_names = extract_features(FLAGS.test_dir)
    
    # Build the TensorFlow graph and train it
    g = tf.Graph()
    with g.as_default():
        num_of_images = len(os.listdir(FLAGS.test_dir))
        print("Inferencing on {} images".format(num_of_images))
        
        # Build the model.
        model = build_model(model_config, mode, inference_batch = 1)
        
        # Initialize beam search Caption Generator 
        generator = CaptionGenerator(model, data['word_to_idx'], max_caption_length = model_config.padded_length-1)
        
        # run training 
        init = tf.global_variables_initializer()
        with tf.Session() as sess:
        
            sess.run(init)
        
            model['saver'].restore(sess, FLAGS.saved_sess)
              
            print("Model restored! Last step run: ", sess.run(model['global_step']))
            
            # predictions 
            final_preds = run_inference(sess, features, generator, 1.0)
            captions_pred = [unpack.reshape(-1, 1) for unpack in final_preds]
            #captions_pred = np.concatenate(captions_pred, 1)
            captions_deco= []
            for cap in captions_pred:
                dec = decode_captions(cap.reshape(-1, 1), data['idx_to_word'])
                dec = ' '.join(dec)
                captions_deco.append(dec)
            
            # saved the images with captions written on them
            if not os.path.exists(FLAGS.results_dir):
                os.makedirs(FLAGS.results_dir)
            for j in range(len(captions_deco)):
                this_image_name = all_image_names['file_name'].values[j]
                img_name = os.path.join(FLAGS.results_dir, this_image_name)
                img = imread(os.path.join(FLAGS.test_dir, this_image_name))
                write_text_on_image(img, img_name, captions_deco[j])
    print("\ndone.")
Esempio n. 2
0
def main(_):
    # load data
    data = load_coco_data(FLAGS.data_dir)

    # force padded_length equal to padded_length - 1
    # model_config.padded_length = len(data['train_captions'][0]) - 1

    # Build the TensorFlow graph and train it
    g = tf.Graph()
    with g.as_default():

        # Build the model. If FLAGS.glove_vocab is null, we do not initialize the model with word vectors; if not, we initialize with glove vectors
        if FLAGS.glove_vocab is '':
            model = build_model(model_config, mode=mode)
        else:
            glove_vocab = np.load(FLAGS.glove_vocab)
            model = build_model(model_config,
                                mode=mode,
                                glove_vocab=glove_vocab)

        # Set up the learning rate.
        learning_rate_decay_fn = None
        learning_rate = tf.constant(training_config.initial_learning_rate)
        if training_config.learning_rate_decay_factor > 0:
            num_batches_per_epoch = (training_config.num_examples_per_epoch /
                                     model_config.batch_size)
            decay_steps = int(num_batches_per_epoch *
                              training_config.num_epochs_per_decay)

            def _learning_rate_decay_fn(learning_rate, global_step):
                return tf.train.exponential_decay(
                    learning_rate,
                    global_step,
                    decay_steps=decay_steps,
                    decay_rate=training_config.learning_rate_decay_factor,
                    staircase=True)

            learning_rate_decay_fn = _learning_rate_decay_fn

        # Set up the training ops.
        train_op = tf.contrib.layers.optimize_loss(
            loss=model['total_loss'],
            global_step=model['global_step'],
            learning_rate=learning_rate,
            optimizer=training_config.optimizer,
            clip_gradients=training_config.clip_gradients,
            learning_rate_decay_fn=learning_rate_decay_fn)

        # initialize all variables
        init = tf.global_variables_initializer()

        with tf.Session() as sess:
            sess.run(init)

            num_epochs = training_config.total_num_epochs

            num_train = data['train_captions'].shape[0]
            iterations_per_epoch = max(num_train / model_config.batch_size, 1)
            num_iterations = int(num_epochs * iterations_per_epoch)

            # Set up some variables for book-keeping
            epoch = 0
            best_val_acc = 0
            best_params = {}
            loss_history = []
            train_acc_history = []
            val_acc_history = []

            print("\n\nTotal training iter: ", num_iterations, "\n\n")
            time_now = datetime.now()
            for t in range(num_iterations):

                total_loss_value = _step(sess, data, train_op, model,
                                         model_config.lstm_dropout_keep_prob
                                         )  # run each training step

                loss_history.append(total_loss_value)

                # Print out training loss
                if FLAGS.print_every > 0 and t % FLAGS.print_every == 0:
                    print(
                        '(Iteration %d / %d) loss: %f, and time eclipsed: %.2f minutes'
                        % (t + 1, num_iterations, float(loss_history[-1]),
                           (datetime.now() - time_now).seconds / 60.0))

                # Print out some image sample results
                if FLAGS.sample_every > 0 and (t +
                                               1) % FLAGS.sample_every == 0:
                    temp_dir = os.path.join(FLAGS.sample_dir,
                                            'temp_dir_{}//'.format(t + 1))
                    if not os.path.exists(temp_dir):
                        os.makedirs(temp_dir)
                    captions_pred, urls = _run_validation(
                        sess, data, model_config.batch_size, model,
                        1.0)  # the output is size (32, 16)
                    captions_pred = [
                        unpack.reshape(-1, 1) for unpack in captions_pred
                    ]
                    captions_pred = np.concatenate(captions_pred, 1)

                    captions_deco = decode_captions(captions_pred,
                                                    data['idx_to_word'])

                    for j in range(len(captions_deco)):
                        img_name = os.path.join(temp_dir,
                                                'image_{}.jpg'.format(j))
                        img = image_from_url(urls[j])
                        write_text_on_image(img, img_name, captions_deco[j])

                # save the model continuously to avoid interruption
                if FLAGS.saveModel_every > 0 and (
                        t + 1) % FLAGS.saveModel_every == 0:
                    if not os.path.exists(FLAGS.savedSession_dir):
                        os.makedirs(FLAGS.savedSession_dir)
                    checkpoint_name = savedModelName[:
                                                     -5] + '_checkpoint{}.ckpt'.format(
                                                         t + 1)
                    save_path = model['saver'].save(
                        sess,
                        os.path.join(FLAGS.savedSession_dir, checkpoint_name))

            if not os.path.exists(FLAGS.savedSession_dir):
                os.makedirs(FLAGS.savedSession_dir)
            save_path = model['saver'].save(
                sess, os.path.join(FLAGS.savedSession_dir, savedModelName))
            print("done. Model saved at: ",
                  os.path.join(FLAGS.savedSession_dir, savedModelName))
Esempio n. 3
0
with g.as_default():
    # Build the model.
    model = build_model(model_config,
                        mode,
                        inference_batch=BATCH_SIZE_INFERENCE)

    # run training
    init = tf.global_variables_initializer()
    with tf.Session() as sess:

        sess.run(init)

        model['saver'].restore(sess, directory + "savedSession/model0.ckpt")

        print("Model restured! Last step run: ",
              sess.run(model['global_step']))

        for i in range(TOTAL_INFERENCE_STEP):
            captions_pred, urls = _step_test(
                sess, data, BATCH_SIZE_INFERENCE, model,
                1.0)  # the output is size (32, 16)
            captions_pred = [unpack.reshape(-1, 1) for unpack in captions_pred]
            captions_pred = np.concatenate(captions_pred, 1)

            captions_deco = decode_captions(captions_pred, data['idx_to_word'])

            for j in range(len(captions_deco)):
                img_name = directory + 'image_' + str(j) + '.jpg'
                img = image_from_url(urls[j])
                write_text_on_image(img, img_name, captions_deco[j])
def train_model(model, config, data):

    #g = tf.Graph()
    #with g.as_default():
    ################define optimizer########
    num_batches = config.total_instances / config.batch_size
    decay_steps = int(num_batches * config.num_epochs_per_decay)
    learning_rate = tf.constant(config.initial_learning_rate)

    learning_rate_decay_fn = None

    def _decay_fn(learning_rate, global_step):
        return tf.train.exponential_decay(learning_rate,
                                          global_step,
                                          decay_steps=decay_steps,
                                          decay_rate=0.5,
                                          staircase=True)

    learning_rate_decay_fn = _decay_fn
    train_op = tf.contrib.layers.optimize_loss(
        loss=model.total_loss,
        global_step=model.global_step,
        learning_rate=learning_rate,
        optimizer='SGD',
        clip_gradients=config.clip_gradients,
        learning_rate_decay_fn=learning_rate_decay_fn)

    ##################
    saver = tf.train.Saver()
    init = tf.global_variables_initializer()

    # for BLAS Memmory DUMP failure
    config_ = tf.ConfigProto()
    config_.gpu_options.allow_growth = True

    with tf.Session(config=config_) as sess:
        sess.run(init)
        # if checkpoint exist, restore
        ckpt = tf.train.get_checkpoint_state(
            os.path.dirname('checkpoints/checkpoint'))
        if ckpt and ckpt.model_checkpoint_path:
            saver.restore(sess, ckpt.model_checkpoint_path)
        print("cucessfully restored the checkpoint")

        rand_int = np.random.randint(1, 100)
        caption_in, caption_out, mask, image_features, urls = minibatch(
            data, rand_int, config.batch_size, config.total_instances)

        if not os.path.exists('test_caption'):
            os.makedirs('test_caption')
        captions_pred = _run_validation(
            sess, caption_in, image_features, config.batch_size, model,
            config.input_len)  # the output is size (32, 16)
        captions_pred = [unpack.reshape(-1, 1) for unpack in captions_pred]
        captions_pred = np.concatenate(captions_pred, 1)

        captions_deco = decode_captions(captions_pred, data['idx_to_word'])

        for j in range(len(captions_deco)):
            img_name = os.path.join('test_caption', 'image_{}.jpg'.format(j))
            img = image_from_url(urls[j])
            write_text_on_image(img, img_name, captions_deco[j])
        print("saved predicted images into ./test_caption folder")
        # 100 epoch
        #         total_runs = int((config.total_instances/config.batch_size)*config.num_epochs)
        #         initial_step = model.global_step.eval()

        ### initialize summary writer
        #         tf.summary.scalar("learing_rate", learning_rate)
        #         a = tf.summary.merge_all()
        #         writer = tf.summary.FileWriter('./graphs/singlelayer_lstm', sess.graph)

        #         time_now = datetime.now()
        #         for t in range(total_runs):

        #             caption_in, caption_out, mask, image_features, urls = minibatch(data,t,config.batch_size, config.total_instances)

        #             # feed data
        #             feed_dict = {model.image_feature: image_features, model.caption_in: caption_in,
        #                         model.caption_out: caption_out, model.caption_mask: mask}
        #             merge_op, _, total_loss, b = sess.run([model.summary_op, train_op, model.total_loss, a],
        #                                            feed_dict = feed_dict)

        #             writer.add_summary(merge_op, global_step=t)
        #             writer.add_summary(b, global_step=t)

        #             # print loss infor
        #             if(t+1) % 20 == 0:
        #                 print('(Iteration %d / %d) loss: %f, and time eclipsed: %.2f minutes' % (
        #                     t + 1, total_runs, float(total_loss), (datetime.now() - time_now).seconds/60.0))

        #             #print image
        #             if(t+1)%100 == 0:
        #                 if not os.path.exists('test_caption'):
        #                     os.makedirs('test_caption')
        #                 captions_pred = _run_validation(sess, caption_in, image_features, 1, model, config.input_len) # the output is size (32, 16)
        #                 captions_pred = [unpack.reshape(-1, 1) for unpack in captions_pred]
        #                 captions_pred = np.concatenate(captions_pred, 1)

        #                 captions_deco = decode_captions(captions_pred, data['idx_to_word'])

        #                 for j in range(len(captions_deco)):
        #                     img_name = os.path.join('test_caption', 'image_{}.jpg'.format(j))
        #                     img = image_from_url(urls[j])
        #                     write_text_on_image(img, img_name, captions_deco[j])

        #             #save model
        #             if(t+1)%50 == 0 or t == (total_runs-1):
        #                 if not os.path.exists('checkpoints/singlelayer_lstm'):
        #                     os.makedirs('checkpoints/singlelayer_lstm')
        #                 saver.save(sess, 'checkpoints/singlelayer_lstm', t)

        # visualize embed matrix
        #code to visualize the embeddings. uncomment the below to visualize embeddings
        final_embed_matrix = sess.run(model.embed_map)

        # it has to variable. constants don't work here. you can't reuse model.embed_matrix
        embedding_var = tf.Variable(final_embed_matrix[:1000],
                                    name='embedding')
        sess.run(embedding_var.initializer)

        config = projector.ProjectorConfig()
        summary_writer = tf.summary.FileWriter('processed')

        # add embedding to the config file
        embedding = config.embeddings.add()
        embedding.tensor_name = embedding_var.name

        # link this tensor to its metadata file, in this case the first 500 words of vocab
        #         metadata_path = './processed/matadata.tsv'
        #         if not os.path.exists(metadata_path):
        #             f = open(metadata_path, "w")
        #             f.close()
        embedding.metadata_path = os.path.join('processed', 'metadata.tsv')

        # saves a configuration file that TensorBoard will read during startup.
        projector.visualize_embeddings(summary_writer, config)
        saver_embed = tf.train.Saver([embedding_var])
        saver_embed.save(sess, 'processed/model3.ckpt', 1)