def train_advanced_model(weight=None, batch_size=32, epochs=10): cg = caption_generator.CaptionGenerator() model = cg.create_advanced_model() if weight != None: model.load_weights(weight) counter = 0 file_name = 'weights-improvement-{epoch:02d}.hdf5' checkpoint = ModelCheckpoint(file_name, monitor='loss', verbose=1, save_best_only=True, mode='min') callbacks_list = [checkpoint] model.fit_generator(cg.data_generator(batch_size=batch_size), steps_per_epoch=cg.total_samples / batch_size, epochs=epochs, verbose=2, callbacks=callbacks_list) try: model.save('Models/WholeModel.h5', overwrite=True) model.save_weights('Models/Weights.h5', overwrite=True) except: print "Error in saving model." print "Training complete...\n"
def train_model(weight=None, batch_size=32, epochs=10): cg = caption_generator.CaptionGenerator() model = cg.create_model() if weight != None: model.load_weights(weight) counter = 0 file_name = 'weights-improvement-epoch-{epoch:02d}-val_acc-{val_acc:.5f}-val_loss-{val_loss:.5f}.hdf5' checkpoint = ModelCheckpoint(file_name, monitor='val_acc', verbose=1, save_best_only=True, mode='max') callbacks_list = [checkpoint] model.fit_generator(cg.data_generator( path='Flickr8k_text/flickr_8k_train_dataset.txt', batch_size=batch_size), steps_per_epoch=cg.total_samples / batch_size, epochs=epochs, verbose=2, callbacks=callbacks_list, validation_steps=cg.total_samples_dev / batch_size, validation_data=cg.data_generator( path='Flickr8k_text/flickr_8k_dev_dataset.txt', batch_size=batch_size)) try: model.save('Models/WholeModel.h5', overwrite=True) model.save_weights('Models/Weights.h5', overwrite=True) except: print "Error in saving model." print "Training complete...\n"
def train_model(weight=None, batch_size=00, epochs=25): # Total samples : 64146 cg = caption_generator.CaptionGenerator() model = cg.create_model() if weight != None: model.load_weights(weight) counter = 0 file_name = 'weights-improvement-{epoch:02d}.hdf5' checkpoint = ModelCheckpoint(file_name, monitor='loss', verbose=1, save_best_only=True, mode='min') callbacks_list = [checkpoint] model.fit_generator(cg.data_generator(batch_size=batch_size), steps_per_epoch=300, epochs=epochs, verbose=2, callbacks=callbacks_list) try: model.save( 'C:/Users/pc/Desktop/automatic_image_caption_generation/Models/WholeModel.h5', overwrite=True) model.save_weights( 'C:/Users/pc/Desktop/automatic_image_caption_generation/Models/Weights.h5', overwrite=True) except: print "Error in saving model." print "Training complete...\n"
def _assertExpectedCaptions(self, expected_captions, beam_size=3, max_caption_length=20, length_normalization_factor=0): """Tests that beam search generates the expected captions. Args: expected_captions: A sequence of pairs (sentence, probability), where sentence is a list of integer ids and probability is a float in [0, 1]. beam_size: Parameter passed to beam_search(). max_caption_length: Parameter passed to beam_search(). length_normalization_factor: Parameter passed to beam_search(). """ expected_sentences = [c[0] for c in expected_captions] expected_probabilities = [c[1] for c in expected_captions] # Generate captions. generator = caption_generator.CaptionGenerator( model=FakeModel(), vocab=FakeVocab(), beam_size=beam_size, max_caption_length=max_caption_length, length_normalization_factor=length_normalization_factor) actual_captions = generator.beam_search(sess=None, encoded_image=None) actual_sentences = [c.sentence for c in actual_captions] actual_probabilities = [math.exp(c.logprob) for c in actual_captions] self.assertEqual(expected_sentences, actual_sentences) self.assertAllClose(expected_probabilities, actual_probabilities)
def train_model(weight=None, batch_size=32, epochs=10): cg = caption_generator.CaptionGenerator() model = cg.create_model() if weight is not None: model.load_weights(weight) file_name = 'weights-improvement-{epoch:02d}.hdf5' checkpoint = ModelCheckpoint(file_name, monitor='loss', verbose=1, save_best_only=True, mode='min') callbacks_list = [checkpoint] optimizer = RMSprop(lr=0.001, rho=0.9, epsilon=1e-06) parallel_model = multi_gpu_model(model, gpus=2) parallel_model.compile(loss='categorical_crossentropy', optimizer=optimizer, metrics=['accuracy']) parallel_model.fit_generator(cg.data_generator(batch_size=batch_size), steps_per_epoch=cg.total_samples / batch_size, epochs=epochs, verbose=2, callbacks=callbacks_list) try: parallel_model.save(os.path.join(CUR_DIR, '../Models/WholeModel.h5'), overwrite=True) parallel_model.save_weights(os.path.join(CUR_DIR, '../Models/Weights.h5'), overwrite=True) except: print("Error in saving model.") print("Training complete...\n")
def train_model(weight = None, batch_size=32, epochs = 10): cg = caption_generator.CaptionGenerator() model = cg.create_model() if weight != None: model.load_weights(weight) counter = 0 file_name = DATA_PATH + 'weights-checkpoint.hdf5' #define callbacks checkpoint = ModelCheckpoint(file_name, monitor='loss', verbose=1, save_best_only=True, mode='min') tensor_board = TensorBoard(log_dir='./logs', write_graph=False, write_images=False) reduce_lr = ReduceLROnPlateau(monitor='loss', factor=0.1, patience=8, verbose=1, min_lr=1e-6) early_stopping = EarlyStopping(monitor='loss', min_delta=0.01, patience=16, verbose=1) callbacks_list = [checkpoint, tensor_board, reduce_lr, early_stopping] hist = model.fit_generator(cg.data_generator(batch_size=batch_size), steps_per_epoch=cg.total_samples/batch_size, epochs=epochs, verbose=2, callbacks=callbacks_list) try: model.save(DATA_PATH + 'final_model.h5', overwrite=True) model.save_weights(DATA_PATH + 'final_weights.h5',overwrite=True) except: print "Error in saving model." print "Training complete...\n" return hist
def train_model(weight=None, batch_size=256, epochs=10): cg = caption_generator.CaptionGenerator() model = cg.create_model() if weight != None: model.load_weights(weight) counter = 0 file_name = DATA_PATH + 'weights-checkpoint.h5' #define callbacks checkpoint = ModelCheckpoint(file_name, monitor='val_loss', verbose=1, save_best_only=True, mode='min') tensor_board = TensorBoard(log_dir='./logs', write_graph=True) hist_lr = LR_hist() reduce_lr = LearningRateScheduler(step_decay) early_stopping = EarlyStopping(monitor='val_loss', min_delta=0.1, patience=16, verbose=1) callbacks_list = [ checkpoint, tensor_board, hist_lr, reduce_lr, early_stopping ] hist = model.fit_generator( cg.data_generator_train(batch_size=batch_size), steps_per_epoch=cg.total_samples / batch_size, epochs=epochs, verbose=2, callbacks=callbacks_list, validation_data=cg.data_generator_val(batch_size=batch_size), validation_steps=cg.total_samples / (batch_size * 13.0)) model.save(DATA_PATH + 'final_model.h5', overwrite=True) model.save_weights(DATA_PATH + 'final_weights.h5', overwrite=True) hist_file = DATA_PATH + '/hist_model.dat' with open(hist_file, 'w') as f: pickle.dump(hist.history, f) print "training complete...\n" return model, hist, hist_lr
def main(_): model = show_and_tell_model.ShowAndTellModel( FLAGS.model_path ) vocabi = vocab.Vocabulary(FLAGS.vocab_file) filenames = _load_filenames() generator = caption_generator.CaptionGenerator (model, vocabi) for filename in filenames: with tf.gfile.GFile(filename, "rb") as f: image = f.read() captions = generator.beam_search(image) print("Captions for image %s:" % os.path.basename(filename)) for i, caption in enumerate(captions): # Ignore begin and end tokens <S> and </S>. sentence = [ vocab.Vocabulary.id_to_token(vocabi,w) for w in caption.sentence[1:-1]] sentence = " ".join(sentence) print(" %d) %s (p=%f)" % (i, sentence, math.exp(caption.logprob)))
def main(_): # Build the inference graph. g = tf.Graph() with g.as_default(): model = inference_wrapper.InferenceWrapper() restore_fn = model.build_graph_from_config(configuration.ModelConfig(), FLAGS.checkpoint_path) g.finalize() # Create the vocabulary. vocab = vocabulary.Vocabulary(FLAGS.vocab_file) filenames = [] for file_pattern in FLAGS.input_files.split(","): filenames.extend(tf.gfile.Glob(file_pattern)) tf.logging.info("Running caption generation on %d files matching %s", len(filenames), FLAGS.input_files) with tf.Session(graph=g) as sess: # Load the model from checkpoint. restore_fn(sess) # Prepare the caption generator. Here we are implicitly using the default # beam search parameters. See caption_generator.py for a description of the # available beam search parameters. generator = caption_generator.CaptionGenerator(model, vocab) images = [] for filename in filenames: with tf.gfile.GFile(filename, "rb") as f: image = f.read() images.append(image) captions = generator.beam_search(sess, images, vocab) for i, image in enumerate(images): print("Captions for image %s:" % os.path.basename(filenames[i])) for j, caption in enumerate(captions[i]): # Ignore begin and end words. sentence = [ vocab.id_to_word(w) for w in caption.sentence[1:-1] ] sentence = " ".join(sentence) print(" %d) %s (p=%f)" % (j, sentence, math.exp(caption.logprob)))
def img_captions(file_inputs): # Build the inference graph. g = tf.Graph() with g.as_default(): model = inference_wrapper.InferenceWrapper() restore_fn = model.build_graph_from_config(configuration.ModelConfig(), file_inputs[0]) g.finalize() # Create the vocabulary. vocab = vocabulary.Vocabulary(file_inputs[1]) filenames = [] for file_pattern in file_inputs[2].split(","): filenames.extend(tf.gfile.Glob(file_pattern)) tf.logging.info("Running caption generation on %d files matching %s", len(filenames), file_inputs[2]) with tf.Session(graph=g) as sess: # Load the model from checkpoint. restore_fn(sess) # Prepare the caption generator. Here we are implicitly using the default # beam search parameters. See caption_generator.py for a description of the # available beam search parameters. generator = caption_generator.CaptionGenerator(model, vocab) caption_list = list() prob_list = list() for filename in filenames: with tf.gfile.GFile(filename, "rb") as f: image = f.read() captions, probs = generator.beam_search(sess, image) prob_list.append('['+", ".join(map(str, probs))+']') loc_cap_list = list() for i, caption in enumerate(captions): # Ignore begin and end words. sentence = [vocab.id_to_word(w) for w in caption.sentence[1:-1]] sentence = " ".join(sentence).split('<S>')[0] loc_cap_list.append([sentence, math.exp(caption.logprob)]) caption_list.append(loc_cap_list) return prob_list, caption_list
def start_testing_images(model_path): #Load model detection_graph = load_frozen_model(model_path) vocab = vocabulary.Vocabulary() generator = caption_generator.CaptionGenerator(None, vocab) with detection_graph.as_default(): with tf.Session(graph=detection_graph) as sess: #print ([n.name for n in tf.get_default_graph().as_graph_def().node]) #image_feed = tf.placeholder(dtype=tf.string, shape=[], name="image_feed") #input_feed = tf.placeholder(dtype=tf.int64, # shape=[None], # batch_size # name="input_feed") #for op in detection_graph.get_operations(): # print(op.name) for image_path in TEST_IMAGE_PATHS: start = timeit.default_timer() image = tf.gfile.GFile(image_path, 'rb').read() img = Image.open(image_path) npimg = load_image_into_numpy_array(img) print(type(image)) stop = timeit.default_timer() print("Time to encode image: ", stop - start) # Actual detection start = timeit.default_timer() captions = generator.beam_search(sess, image, img) stop = timeit.default_timer() print("Time to Generate captions: ", stop - start) #Caption priting... start = timeit.default_timer() for i, caption in enumerate(captions): sentence = [ vocab.id_to_word(w) for w in caption.sentence[1:-1] ] sentence = " ".join(sentence) print(" %d) %s (p=%f)" % (i, sentence, math.exp(caption.logprob))) stop = timeit.default_timer() print("Time for Caption -> Sentence", stop - start)
def train_model(weight = None, batch_size=256, epochs = 10): cg = caption_generator.CaptionGenerator() model = cg.create_model() model.summary() if weight != None: model.load_weights(weight) counter = 0 file_name = 'weights-improvement-{epoch:02d}-{count:02d}.hdf5' checkpoint = ModelCheckpoint(file_name, monitor='loss', verbose=1, save_best_only=True, mode='min') callbacks_list = [checkpoint] model.fit_generator(cg.data_generator(batch_size=batch_size), steps_per_epoch=cg.total_samples/batch_size, epochs=5, verbose=1, callbacks=callbacks_list) #model.fit_generator(cg.data_generator(batch_size=batch_size), epochs=1, verbose=1, callbacks=callbacks_list) counter = 1 model.optimizer.lr = 0.008 model.fit_generator(cg.data_generator(batch_size=batch_size), steps_per_epoch=cg.total_samples/batch_size, epochs=5, verbose=1, callbacks=callbacks_list) counter = 2 model.optimizer.lr = 0.006 model.fit_generator(cg.data_generator(batch_size=batch_size), steps_per_epoch=cg.total_samples/batch_size, epochs=5, verbose=1, callbacks=callbacks_list) counter = 4 model.optimizer.lr = 0.004 model.fit_generator(cg.data_generator(batch_size=batch_size), steps_per_epoch=cg.total_samples/batch_size, epochs=5, verbose=1, callbacks=callbacks_list) counter = 5 model.optimizer.lr = 0.002 model.fit_generator(cg.data_generator(batch_size=batch_size), steps_per_epoch=cg.total_samples/batch_size, epochs=5, verbose=1, callbacks=callbacks_list) counter = 6 model.optimizer.lr = 0.001 model.fit_generator(cg.data_generator(batch_size=batch_size), steps_per_epoch=cg.total_samples/batch_size, epochs=45, verbose=1, callbacks=callbacks_list) try: model.save('/home/manish.singhal/Image-Captioning-master/caption_generator/Models/WholeModel.h5', overwrite=True) model.save_weights('/home/manish.singhal/Image-Captioning-master/caption_generator/Models/Weights.h5',overwrite=True) except: print ("Error in saving model.") print ("Training complete...\n")
encoded_images = file['valid_set'] valid_list_file = "data/valid_list.txt" train_step = conf.train_step checkpoint_steps = conf.original_train_steps + (train_step - 1) * conf.interval_train_steps check_point_path = "train_log/{}.ckpt".format(checkpoint_steps) model = inference_wrapper.InferenceWrapper() restore_fn = model.build_graph_from_config(configuration.ModelConfig(), check_point_path) sess = tf.InteractiveSession() restore_fn(sess) generator = caption_generator.CaptionGenerator( model, vocab, beam_size=1, use_ngram=conf.use_ngram_gen_result) valid_list_file = open(valid_list_file, 'r') valid_image_list = [] for line in valid_list_file.readlines(): valid_image_list.append(line.strip().split()[0]) # output three optional sentences for each image, ranking by probability in decreasing order # with open('/home/chengcheng/dataset/image_caption/inference/3/valid_caption_{}.txt'.format(checkpoint_steps), 'w') as f: result_list = [] for index in range(1000): captions = generator.beam_search(sess, encoded_images[index]) # if encoded_images[index] != valid_image_list[index]: # print(encoded_images[index], valid_image_list[index]) if index % 100 == 0:
import cPickle as pickle import caption_generator import numpy as np from keras.preprocessing import sequence import nltk cg = caption_generator.CaptionGenerator() def process_caption(caption): caption_split = caption.split() processed_caption = caption_split[1:] try: end_index = processed_caption.index('<end>') processed_caption = processed_caption[:end_index] except: pass return " ".join([word for word in processed_caption]) def get_best_caption(captions): captions.sort(key=lambda l: l[1]) best_caption = captions[-1][0] return " ".join([cg.index_word[index] for index in best_caption]) def get_all_captions(captions): final_captions = [] captions.sort(key=lambda l: l[1]) for caption in captions: text_caption = " ".join([cg.index_word[index] for index in caption[0]])
import math vocab = vocabulary.Vocabulary("data/dic.txt") file = h5py.File("/home/chengcheng/dataset/image_caption/feat.hdf5", 'r') encoded_images = file['valid_set'] valid_list_file = "/home/chengcheng/ImageCaption/data/valid_list.txt" check_point_steps = 800000 model = inference_wrapper.InferenceWrapper() restore_fn = model.build_graph_from_config( configuration.ModelConfig(), "train_log/{}.ckpt".format(check_point_steps)) sess = tf.InteractiveSession() restore_fn(sess) generator = caption_generator.CaptionGenerator(model, vocab) valid_list_file = open(valid_list_file, 'r') valid_image_list = [] for line in valid_list_file.readlines(): valid_image_list.append(line.strip().split()[0]) # output three optional sentences for each image, ranking by probability in decreasing order with open('infer_result/6000_0_valid_caption_{}.txt'.format(check_point_steps), 'w') as f: for index in range(1000): captions = generator.beam_search(sess, encoded_images[index]) f.write(valid_image_list[index]) # print("Captions for image {}".format(valid_image_list[index])) for i, caption in enumerate(captions): # Ignore begin and end words. sentence = [vocab.id_to_word(w) for w in caption.sentence[1:-1]]
def main(_): # Build the inference graph. g = tf.Graph() with g.as_default(): model = inference_wrapper.InferenceWrapper() restore_fn = model.build_graph_from_config(configuration.ModelConfig(), FLAGS.checkpoint_path) g.finalize() # Create the vocabulary. vocab = vocabulary.Vocabulary(FLAGS.vocab_file) filenames = [] for file_pattern in FLAGS.input_files.split(","): filenames.extend(tf.gfile.Glob(file_pattern)) tf.logging.info("Running caption generation on %d files matching %s", len(filenames), FLAGS.input_files) with tf.Session(graph=g) as sess: # Load the model from checkpoint. restore_fn(sess) # Prepare the caption generator. Here we are implicitly using the default # beam search parameters. See caption_generator.py for a description of the # available beam search parameters. generator = caption_generator.CaptionGenerator(model, vocab) ################## results = [] results0 = [] results1 = [] results2 = [] count = 1 for filename in filenames: with tf.gfile.GFile(filename, "r") as f: image = f.read() captions = generator.beam_search(sess, image) image_name_full = os.path.basename(filename) print("Captions for %d/30000 image %s:" % (count, image_name_full)) count = count + 1 b = re.compile(r'.jpg') image_name = b.sub('', image_name_full) for i, caption in enumerate(captions): # Ignore begin and end words. sentence = [ vocab.id_to_word(w) for w in caption.sentence[1:-1] ] sentence = "".join(sentence) if i == 0: results0.append({ "image_id": image_name, "caption": sentence, }) if i == 1: results1.append({ "image_id": image_name, "caption": sentence, }) if i == 2: results2.append({ "image_id": image_name, "caption": sentence, }) results.append({ "image_id": image_name, "caption": sentence, }) print(" %d) %s (p=%f)" % (i, sentence, math.exp(caption.logprob))) print("the length of results is:", len(results)) print("the length of results is:", len(results0)) print("the length of results is:", len(results1)) print("the length of results is:", len(results2)) # outfile = "/media/han/6f586f18-792a-40fd-ada6-59702fb5dabc/wen/im2txt-aic/eval/data/val_results.json" outfile0 = "/media/han/6f586f18-792a-40fd-ada6-59702fb5dabc/wen/im2txt-aic/eval/data/val_results0.json" outfile1 = "/media/han/6f586f18-792a-40fd-ada6-59702fb5dabc/wen/im2txt-aic/eval/data/val_results1.json" outfile2 = "/media/han/6f586f18-792a-40fd-ada6-59702fb5dabc/wen/im2txt-aic/eval/data/val_results2.json" with io.open(outfile, 'w', encoding='utf-8') as fd: fd.write( unicode( json.dumps(results, ensure_ascii=False, sort_keys=True, indent=2, separators=(',', ': ')))) with io.open(outfile0, 'w', encoding='utf-8') as fd0: fd0.write( unicode( json.dumps(results0, ensure_ascii=False, sort_keys=True, indent=2, separators=(',', ': ')))) with io.open(outfile1, 'w', encoding='utf-8') as fd1: fd1.write( unicode( json.dumps(results1, ensure_ascii=False, sort_keys=True, indent=2, separators=(',', ': ')))) with io.open(outfile2, 'w', encoding='utf-8') as fd2: fd2.write( unicode( json.dumps(results2, ensure_ascii=False, sort_keys=True, indent=2, separators=(',', ': '))))
def caption_image(): """API to caption images""" image_format = "not jpeg" st = current_time() # get beam_size beam_size = int(request.args.get("beam_size", "3")) # get max_caption_length max_caption_length = int(request.args.get("max_caption_length", "20")) # get image_data if request.method == 'POST': image_data = request.get_data() else: url = request.args.get("url") c_type, image_data = get_remote_file(url) if not image_data: return Response(status=400, response=jsonify(error="Could not HTTP GET %s" % url)) if 'image/jpeg' in c_type: image_format = "jpeg" # use c_type to find whether image_format is jpeg or not # if jpeg, don't convert if image_format == "jpeg": jpg_image = image_data # if not jpeg else: # open the image from raw bytes image = Image.open(BytesIO(image_data)) # convert the image to RGB format, otherwise will give errors when converting to jpeg, if the image isn't RGB rgb_image = image.convert("RGB") # convert the RGB image to jpeg image_bytes = BytesIO() rgb_image.save(image_bytes, format="jpeg", quality=95) jpg_image = image_bytes.getvalue() image_bytes.close() read_time = current_time() - st # restart counter st = current_time() generator = caption_generator.CaptionGenerator(app.model, app.vocab, beam_size=beam_size, max_caption_length=max_caption_length) captions = generator.beam_search(app.sess, jpg_image) captioning_time = current_time() - st app.logger.info("Captioning time : %d" % captioning_time) array_captions = [] for caption in captions: sentence = [app.vocab.id_to_word(w) for w in caption.sentence[1:-1]] sentence = " ".join(sentence) array_captions.append({ 'sentence': sentence, 'confidence': math.exp(caption.logprob) }) response = { 'beam_size': beam_size, 'max_caption_length': max_caption_length, 'captions': array_captions, 'time': { 'read': read_time, 'captioning': captioning_time, 'units': 'ms' } } return Response(response=json.dumps(response), status=200, mimetype="application/json")
def main(_): #convert jpg image(s) into iamge representations using alexnet: filenames = [ os.path.join(image_dir, f) for f in [ 'overly-attached-girlfriend.jpg', 'high-expectations-asian-father.jpg', 'foul-bachelor-frog.jpg', 'stoner-stanley.jpg', 'y-u-no.jpg', 'willy-wonka.jpg', 'futurama-fry.jpg', 'success-kid.jpg', 'one-does-not-simply.jpg', 'bad-luck-brian.jpg', 'first-world-problems.jpg', 'philosoraptor.jpg', 'what-if-i-told-you.jpg', 'TutorPP.jpg' ] ] print(filenames) tf.logging.info("Running caption generation on %d files matching %s", len(filenames), FLAGS.input_files) #mean of imagenet dataset in BGR imagenet_mean = np.array([104., 117., 124.], dtype=np.float32) #placeholder for input and dropout rate x_Alex = tf.placeholder(tf.float32, [1, 227, 227, 3]) keep_prob_Alex = tf.placeholder(tf.float32) #create model with default config ( == no skip_layer and 1000 units in the last layer) modelAlex = AlexNet(x_Alex, keep_prob_Alex, 1000, [], ['fc7', 'fc8'], 512) #maybe need to put fc8 in skip_layers #define activation of last layer as score score = modelAlex.fc6 meme_embeddings = [] with tf.Session() as sess: # Initialize all variables sess.run(tf.global_variables_initializer()) # Load the pretrained weights into the model modelAlex.load_initial_weights(sess) for i, meme in enumerate(filenames): img = Image.open(meme) try: img.thumbnail((227, 227), Image.ANTIALIAS) #img = img.resize((227,227)) #use img.thumbnail for square images, img.resize for non square assert np.shape(img) == (227, 227, 3) except AssertionError: img = img.resize((227, 227)) print('sizing error') # Subtract the ImageNet mean img = img - imagenet_mean #should probably change this # Reshape as needed to feed into model img = img.reshape((1, 227, 227, 3)) meme_vector = sess.run(score, feed_dict={ x_Alex: img, keep_prob_Alex: 1 }) #[1,4096] meme_vector = np.reshape(meme_vector, [4096]) assert np.shape(meme_vector) == (4096, ) #now have np embeddings to feed for inference meme_embeddings.append(meme_vector) with open('Captions.txt', 'r') as f: data_captions = f.readlines() data_captions = [s.lower() for s in data_captions] # Build the inference graph. g = tf.Graph() with g.as_default(): model = inference_wrapper.InferenceWrapper() restore_fn = model.build_graph_from_config(configuration.ModelConfig(), FLAGS.checkpoint_path) g.finalize() # Create the vocabulary. vocab = vocabulary.Vocabulary(FLAGS.vocab_file) #filenames = [] #for file_pattern in FLAGS.input_files.split(","): #filenames.extend(tf.gfile.Glob(file_pattern)) #tf.logging.info("Running caption generation on %d files matching %s", #len(filenames), FLAGS.input_files) with tf.Session(graph=g) as sess: # Load the model from checkpoint. restore_fn(sess) # Prepare the caption generator. Here we are implicitly using the default # beam search parameters. See caption_generator.py for a description of the # available beam search parameters. generator = caption_generator.CaptionGenerator(model, vocab) num_in_data_total = 0 num_captions = 0 for i, meme in enumerate(meme_embeddings): #with tf.gfile.GFile(filename, "rb") as f: #image = f.read() captions = generator.beam_search(sess, meme) print("Captions for image %s:" % os.path.basename(filenames[i])) num_in_data = 0 for i, caption in enumerate(captions): # Ignore begin and end words. sentence = [ vocab.id_to_word(w) for w in caption.sentence[1:-1] ] sentence = " ".join(sentence) in_data = 0 if b_any(sentence in capt for capt in data_captions): in_data = 1 num_in_data += 1 num_in_data_total += 1 num_captions += 1 else: num_captions += 1 print(" %d) %s (p=%f) [in data = %d]" % (i, sentence, math.exp(caption.logprob), in_data)) print("number of captions in data = %d" % (num_in_data)) print("(total number of captions in data = %d) percent in data = %f" % (num_in_data_total, (num_in_data_total / num_captions)))
train_vector_file = "data/train_vector.txt" train_step = conf.train_step checkpoint_steps = conf.original_train_steps + (train_step - 1) * conf.interval_train_steps checkpoint_path = "train_log/{}.ckpt".format(checkpoint_steps) model = inference_wrapper.InferenceWrapper() restore_fn = model.build_graph_from_config(configuration.ModelConfig(), checkpoint_path) sess = tf.InteractiveSession() restore_fn(sess) generator = caption_generator.CaptionGenerator( model, vocab, beam_size=conf.beam_size, use_ngram=conf.use_ngram_gen_label) train_list_file = open(train_list_file, 'r') train_image_list = [] for line in train_list_file.readlines(): train_image_list.append(line.strip().split()[0]) # output three optional sentences for each image, ranking by probability in decreasing order # with open('/home/chengcheng/dataset/image_caption/inference/3/train_caption_{}.txt'.format(check_point_steps), 'w') as f: caption_vector_path = "train_log/{}_infer_train_vector.txt".format( checkpoint_steps) label_file = open(caption_vector_path, 'w') index = -1 for line in open(train_vector_file):