def main(_): # Build the inference graph. g = tf.Graph() with g.as_default(): model = inference_wrapper.InferenceWrapper() restore_fn = model.build_graph_from_config(configuration.ModelConfig(), FLAGS.checkpoint_path) g.finalize() # Create the vocabulary. vocab = vocabulary.Vocabulary(FLAGS.vocab_file) stream = Stream(FLAGS.stream_config) with tf.Session(graph=g) as sess: # Load the model from checkpoint. restore_fn(sess) # Prepare the caption generator. Here we are implicitly using the default # beam search parameters. See caption_generator.py for a description of the # available beam search parameters. generator = caption_generator.CaptionGenerator(model, vocab) while True: image = stream.next_frame() captions = generator.beam_search(sess, image) for i, caption in enumerate(captions): # Ignore begin and end words. sentence = [vocab.id_to_word(w) for w in caption.sentence[1:-1]] sentence = " ".join(sentence) print(" %d) %s (p=%f)" % (i, sentence, math.exp(caption.logprob)))
def run(): """Runs evaluation in a loop, and logs summaries to TensorBoard.""" # Create the evaluation directory if it doesn't exist. eval_dir = FLAGS.eval_dir if not tf.gfile.IsDirectory(eval_dir): tf.logging.info("Creating eval directory: %s", eval_dir) tf.gfile.MakeDirs(eval_dir) g = tf.Graph() with g.as_default(): # Build the model for evaluation. model_config = configuration.ModelConfig() model_config.input_file_pattern = FLAGS.input_file_pattern model = show_and_tell_model.ShowAndTellModel(model_config, mode="eval") model.build() # Create the summary operation and the summary writer. summary_op = tf.merge_all_summaries() summary_writer = tf.train.SummaryWriter(eval_dir) g.finalize() # Run a new evaluation run every eval_interval_secs. while True: start = time.time() tf.logging.info( "Starting evaluation at " + time.strftime("%Y-%m-%d-%H:%M:%S", time.localtime())) run_once(model, summary_writer, summary_op) time_to_next_eval = start + FLAGS.eval_interval_secs - time.time() if time_to_next_eval > 0: time.sleep(time_to_next_eval)
def main(_): # Build the inference graph. g = tf.Graph() with g.as_default(): model = inference_wrapper.InferenceWrapper() restore_fn = model.build_graph_from_config(configuration.ModelConfig(), FLAGS.checkpoint_path) g.finalize() # Create the vocabulary. vocab = vocabulary.Vocabulary(FLAGS.vocab_file) """ filenames = [] for file_pattern in FLAGS.input_files.split(","): filenames.extend(tf.gfile.Glob(file_pattern)) tf.logging.info("Running caption generation on %d files matching %s", len(filenames), FLAGS.input_files) """ with tf.Session(graph=g) as sess: # Load the model from checkpoint. restore_fn(sess) # Prepare the caption generator. Here we are implicitly using the default # beam search parameters. See caption_generator.py for a description of the # available beam search parameters. generator = caption_generator.CaptionGenerator(model, vocab) old_id = getLastId() while True: try: new_id = getLastId() if new_id > old_id: print(new_id) old_id = new_id cur.execute( "SELECT image FROM images ORDER BY id DESC LIMIT 1") data = cur.fetchall() file_like = BytesIO( data[0]['image']) #tf.gfile.Glob(file_pattern) image = file_like.getvalue() #img=PIL.Image.open(file_like) #image = tf.image.decode_jpeg(str(list(img.getdata()))); captions = generator.beam_search(sess, image) #print("Captions for image %s:" % os.path.basename(filename)) for i, caption in enumerate(captions): # Ignore begin and end words. sentence = [ vocab.id_to_word(w) for w in caption.sentence[1:-1] ] sentence = " ".join(sentence) #print(" %d) %s (p=%f)" % (i, sentence, math.exp(caption.logprob))) cur.execute( "INSERT INTO img2txt(id_image,caption,caption_p) VALUES (%s,%s,%s)", (old_id, sentence, math.exp(caption.logprob))) db.commit() except KeyboardInterrupt: db.close() break
def load_im2txt(model_file, vocabulary_file): # Build the inference graph. g = tf.Graph() with g.as_default(): model = inference_wrapper.InferenceWrapper() restore_fn = model.build_graph_from_config( configuration.ModelConfig(), model_file, ) g.finalize() global caption_generator global session global vocabulary # Create the vocabulary. vocabulary = im2txt_vocab.Vocabulary(vocabulary_file) # Create session config_proto = tf.ConfigProto() config_proto.gpu_options.allow_growth = True sess = tf.Session(graph=g, config=config_proto) # Load the model from checkpoint. restore_fn(sess) # Prepare the caption generator. Here we are implicitly using the default # beam search parameters. See caption_generator.py for a description of the # available beam search parameters. caption_generator = im2txt_generator.CaptionGenerator(model, vocabulary, beam_size=2) session = sess
def main(_): # Build the inference graph. g = tf.Graph() with g.as_default(): model = inference_wrapper.InferenceWrapper() restore_fn = model.build_graph_from_config(configuration.ModelConfig(), CHECKPOINT_PATH) g.finalize() # Create the vocabulary. vocab = vocabulary.Vocabulary(VOCAB_FILE) with tf.Session(graph=g) as sess: # Load the model from checkpoint. restore_fn(sess) # Prepare the caption generator. Here we are implicitly using the default # beam search parameters. See caption_generator.py for a description of the # available beam search parameters. generator = caption_generator.CaptionGenerator(model, vocab) for filename in filenames: with tf.gfile.GFile(filename, "rb") as f: image = f.read() captions = generator.beam_search(sess, image) print("Captions for image %s:" % os.path.basename(filename)) for i, caption in enumerate(captions): # Ignore begin and end words. sentence = [vocab.id_to_word(w) for w in caption.sentence[1:-1]] sentence = " ".join(sentence) print(" %d) %s (p=%f)" % (i, sentence, math.exp(caption.logprob)))
def Init(self): # Build the inference graph. g = tf.Graph() with g.as_default(): self.model = inference_wrapper.InferenceWrapper() restore_fn = self.model.build_graph_from_config( configuration.ModelConfig(), FLAGS.checkpoint_path) g.finalize() # Create the vocabulary. self.vocab = vocabulary.Vocabulary(FLAGS.vocab_file) self.sess = tf.Session(graph=g) # Load the model from checkpoint. restore_fn(self.sess) # Prepare the caption generator. Here we are implicitly using the default # beam search parameters. See caption_generator.py for a description of the # available beam search parameters. self.generator = caption_generator.CaptionGenerator( self.model, self.vocab) #for filename in filenames: # with tf.gfile.GFile(filename, "r") as f: # image = f.read() # captions = generator.beam_search(sess, image) # print("Captions for image %s:" % os.path.basename(filename)) # for i, caption in enumerate(captions): # # Ignore begin and end words. # sentence = [vocab.id_to_word(w) for w in caption.sentence[1:-1]] # sentence = " ".join(sentence) # print(" %d) %s (p=%f)" % (i, sentence, math.exp(caption.logprob))) print("loaded\n")
def init(): print("caption_generator :: init :: Start") global sess, vocab, model, g, generator, is_initialized if not is_initialized: is_initialized = True # Build the inference graph. g = tf.Graph() with g.as_default(): model = inference_wrapper.InferenceWrapper() restore_fn = model.build_graph_from_config( configuration.ModelConfig(), FLAGS.checkpoint_path) g.finalize() # Create the vocabulary. vocab = vocabulary.Vocabulary(FLAGS.vocab_file) # Load the model from checkpoint. sess = tf.Session(graph=g) restore_fn(sess) # Prepare the caption generator. Here we are implicitly using the default # beam search parameters. See caption_generator.py for a description of the # available beam search parameters. generator = caption_generator.CaptionGenerator(model, vocab) print("caption_generator :: init :: End")
def main(_): # Build the inference graph. g = tf.Graph() with g.as_default(): model = inference_wrapper.InferenceWrapper() restore_fn = model.build_graph_from_config(configuration.ModelConfig(), FLAGS.checkpoint_path) g.finalize() # Create the vocabulary. vocab = vocabulary.Vocabulary(FLAGS.vocab_file) """ filenames = [] for file_pattern in FLAGS.input_files.split(","): filenames.extend(tf.gfile.Glob(file_pattern)) tf.logging.info("Running caption generation on %d files matching %s", len(filenames), FLAGS.input_files) """ with tf.Session(graph=g) as sess: # Load the model from checkpoint. restore_fn(sess) # Prepare the caption generator. Here we are implicitly using the default # beam search parameters. See caption_generator.py for a description of the # available beam search parameters. generator = caption_generator.CaptionGenerator(model, vocab) video = cv2.VideoCapture(0) while (True): try: #file_like=BytesIO(data[0]['image']) #tf.gfile.Glob(file_pattern) #image = file_like.getvalue() _, img = video.read() image = tf.stack(img) #cv2.imwrite("imagenes/camara.jpg",img) #image = img.tobytes() """ ERROR: Invalid JPEG data, size 921600 [[Node: decode/DecodeJpeg = DecodeJpeg[acceptable_fraction=1, channels=3, dct_method="", fancy_upscaling=true, ratio=1, try_recover_truncated=false, _device="/job:localhost/replica:0/task:0/cpu:0"](_recv_image_feed_0)]] """ #temp=BytesIO(img) #image = temp.getvalue() #image = cv2.imread("imagenes/camara.jpg") #print(image) #img=PIL.Image.open(img) #image = tf.image.decode_jpeg(image); captions = generator.beam_search(sess, image) #print("Captions for image %s:" % os.path.basename(filename)) for i, caption in enumerate(captions): # Ignore begin and end words. sentence = [vocab.id_to_word(w) for w in caption.sentence[1:-1]] sentence = " ".join(sentence) print(" %d) %s (p=%f)" % (i, sentence, math.exp(caption.logprob))) cv2.imshow('Image', img) cv2.waitKey(15) except KeyboardInterrupt: break
def main(_): # Build the inference graph. g = tf.Graph() with g.as_default(): model = inference_wrapper.InferenceWrapper() restore_fn = model.build_graph_from_config(configuration.ModelConfig(), FLAGS.checkpoint_path) g.finalize() # Create the vocabulary. vocab = vocabulary.Vocabulary(FLAGS.vocab_file) with tf.Session(graph=g) as sess: # Load the model from checkpoint. restore_fn(sess) # Prepare the caption generator. Here we are implicitly using the default # beam search parameters. See caption_generator.py for a description of the # available beam search parameters. generator = caption_generator.CaptionGenerator(model, vocab) translator = Translator() #--- DEVICE_ID = 0 capture = cv2.VideoCapture(DEVICE_ID) if capture.isOpened() is False: raise ("IO Error") cv2.namedWindow("Capture", cv2.WINDOW_AUTOSIZE) font = cv2.FONT_HERSHEY_SIMPLEX font_size = 1 font_color = (0, 255, 0) #--- while True: ret, img = capture.read() if (threading.activeCount() == 1): th = CaptionThread(sess, generator, translator, vocab, img) th.start() for i, txt in enumerate(text): cv2.putText(img, txt, (10, 30 * (i + 1)), font, font_size, font_color) # 日本語のフォントが無い? cv2.imshow("Capture", img) key = cv2.waitKey(1) if key == 27: # ESC key break if ret == False: continue cv2.destroyAllWindows() cap.release()
def main(_): # Build the inference graph. g = tf.Graph() with g.as_default(): model = inference_wrapper.InferenceWrapper() restore_fn = model.build_graph_from_config(configuration.ModelConfig(), FLAGS.checkpoint_path) g.finalize() # Create the vocabulary. vocab = vocabulary.Vocabulary(FLAGS.vocab_file) """ filenames = [] for file_pattern in FLAGS.input_files.split(","): filenames.extend(tf.gfile.Glob(file_pattern)) tf.logging.info("Running caption generation on %d files matching %s", len(filenames), FLAGS.input_files) """ with tf.Session(graph=g) as sess: # Load the model from checkpoint. restore_fn(sess) # Prepare the caption generator. Here we are implicitly using the default # beam search parameters. See caption_generator.py for a description of the # available beam search parameters. generator = caption_generator.CaptionGenerator(model, vocab) cur.execute("SELECT DISTINCT image_id,image_url FROM annotations") urls = cur.fetchall() for url in urls: #for array con urls try: response = requests.get(url['image_url']) file_like = BytesIO(response.content) image = file_like.getvalue() #img=PIL.Image.open(file_like) #image = tf.image.decode_jpeg(str(list(img.getdata()))); captions = generator.beam_search(sess, image) #print("Captions for image %s:" % os.path.basename(filename)) caption = captions[0] #for i, caption in enumerate(captions): # Ignore begin and end words. sentence = [ vocab.id_to_word(w) for w in caption.sentence[1:-1] ] sentence = " ".join(sentence) #print(" %d) %s (p=%f)" % (i, sentence, math.exp(caption.logprob))) #cur.execute("INSERT INTO img2txt(id_image,caption,caption_p) VALUES (%s,%s,%s)",(old_id,sentence,math.exp(caption.logprob))) #db.commit(); query = ( "UPDATE annotations SET image_caption=%s WHERE image_id=%s" ) cur.execute(query, (sentence, url['image_id'])) print(sentence) except KeyboardInterrupt: db.close() break
def main(_): # Build the inference graph. g = tf.Graph() with g.as_default(): model = inference_wrapper.InferenceWrapper() restore_fn = model.build_graph_from_config(configuration.ModelConfig(), FLAGS.checkpoint_path) g.finalize() # Create the vocabulary. vocab = vocabulary.Vocabulary(FLAGS.vocab_file) with open('/home/mtian/im2txt/im2txt/test.txt', 'r') as f: filenames = f.readlines() #######################json_temp_buf############################ aa=[] #######################json_temp_buf############################ #for file_pattern in FLAGS.input_files.split(","): # filenames.extend(tf.gfile.Glob(file_pattern)) #tf.logging.info("Running caption generation on %d files matching %s", # len(filenames), FLAGS.input_files) with tf.Session(graph=g) as sess: # Load the model from checkpoint. restore_fn(sess) # Prepare the caption generator. Here we are implicitly using the default # beam search parameters. See caption_generator.py for a description of the # available beam search parameters. generator = caption_generator.CaptionGenerator(model, vocab) ''' with tf.gfile.GFile(filename, "r") as f: image = f.read() ''' for filename1 in filenames: filename=filename1.strip() image=np.load("/home/mtian/visiual_feature/0.125_val/"+filename[:-3]+"npy") captions = generator.beam_search(sess, image) print("Captions for image %s:" % filename) for i, caption in enumerate(captions): # Ignore begin and end words. sentence = [vocab.id_to_word(w) for w in caption.sentence[1:-1]] sentence = " ".join(sentence) ##################################json_op###################################### if i==0: captiondict={"image_id":int(filename[-10:-4]),"caption":sentence} aa.append(captiondict) #################################json_op###################################### print(" %d) %s (p=%f)" % (i, sentence, math.exp(caption.logprob))) ################################output_json###################################### with open('0.125_var_1.json', 'w') as json_file: json.dump(aa,json_file)
def main(_): # Build the inference graph. g = tf.Graph() with g.as_default(): model = inference_wrapper.InferenceWrapper() restore_fn = model.build_graph_from_config(configuration.ModelConfig(), FLAGS.checkpoint_path) g.finalize() # Create the vocabulary. vocab = vocabulary.Vocabulary(FLAGS.vocab_file) filenames = [] for file_pattern in FLAGS.input_files.split(","): filenames.extend(tf.gfile.Glob(file_pattern)) tf.logging.info("Running caption generation on %d files matching %s", len(filenames), FLAGS.input_files) document = Document() with tf.Session(graph=g) as sess: # Load the model from checkpoint. restore_fn(sess) # Prepare the caption generator. Here we are implicitly using the default # beam search parameters. See caption_generator.py for a description of the # available beam search parameters. generator = caption_generator.CaptionGenerator(model, vocab) for filename in filenames: with tf.gfile.GFile(filename, "rb") as f: image = f.read() captions = generator.beam_search(sess, image) str1 = ("Captions for image %s:" % os.path.basename(filename)) print(str1) paragraph = document.add_paragraph("inferent:%s" % filename) paragraph.paragraph_format.page_break_before = True document.add_picture(filename, width=Inches(3)) document.add_paragraph(str1) for i, caption in enumerate(captions): # Ignore begin and end words. sentence = [ vocab.id_to_word(w) for w in caption.sentence[1:-1] ] sentence = " ".join(sentence) str1 = (" %d) %s (p=%f)" % (i, sentence, math.exp(caption.logprob))) print(str1) document.add_paragraph(str1) output_dir = ("%s/Captions.docx" % FLAGS.output_dir) document.save(output_dir)
def caption_image(data_folder, output_folder): id_folder = join(data_folder.split("m")[0], "instances.jsonl") images = [] ids_dict = {} for file in os.listdir(data_folder): images.append(join(data_folder, file)) # Build the inference graph. g = tf.Graph() with g.as_default(): model = inference_wrapper.InferenceWrapper() restore_fn = model.build_graph_from_config(configuration.ModelConfig(), checkpoint_path) g.finalize() # Create the vocabulary. vocab = vocabulary.Vocabulary(vocab_file) with tf.Session(graph=g) as sess: # Load the model from checkpoint. restore_fn(sess) # Prepare the caption generator. Here we are implicitly using the default # beam search parameters. See caption_generator.py for a description of the # available beam search parameters. generator = caption_generator.CaptionGenerator(model, vocab) for filename in tqdm(images): with tf.gfile.GFile(filename, "rb") as f: image = f.read() captions = generator.beam_search(sess, image) # Just take the first caption and display it caption = captions[0] sentence = [vocab.id_to_word(w) for w in caption.sentence[1:-1]] sentence = " ".join(sentence) with open(id_folder, 'r', encoding='utf8') as instances: for instance in instances: instance = loads(instance) photo_name = re.split("[_']", str(instance["postMedia"])) if len(photo_name) == 7: if photo_name[2] == filename.split("_")[1]: ids_dict[sentence] = instance["id"] if photo_name[5] == filename.split("_")[1]: ids_dict[sentence] = instance["id"] elif len(photo_name) == 4: if photo_name[2] == filename.split("_")[1]: ids_dict[sentence] = instance["id"] else: continue with open(output_folder, 'w', encoding="utf8") as output_file: output_file.write(str(ids_dict))
def main(_): # Build the inference graph. g = tf.Graph() with g.as_default(): model = inference_wrapper.InferenceWrapper() restore_fn = model.build_graph_from_config(configuration.ModelConfig(), FLAGS.checkpoint_path) g.finalize() # Create the vocabulary. vocab = vocabulary.Vocabulary(FLAGS.vocab_file) filenames = [] data = {} for file_pattern in FLAGS.input_files.split(","): filenames.extend(tf.gfile.Glob(file_pattern)) tf.logging.info("Running caption generation on %d files matching %s", len(filenames), FLAGS.input_files) with tf.Session(graph=g) as sess: # Load the model from checkpoint. restore_fn(sess) # Prepare the caption generator. Here we are implicitly using the default # beam search parameters. See caption_generator.py for a description of the # available beam search parameters. generator = caption_generator.CaptionGenerator(model, vocab) for filename in filenames: with tf.gfile.GFile(filename, "r") as f: image = f.read() captions = generator.beam_search(sess, image) print("Captions for image %s:" % os.path.basename(filename)) key = os.path.basename(filename) caption = captions[0] sentence = [vocab.id_to_word(w) for w in caption.sentence[1:-1]] sentence = " ".join(sentence) sentence = sentence[2].upper() + sentence[3:] if (sentence.startswith("Bunch of") or sentence.startswith("Group of")): sentence = sentence[9].upper() + sentence[10:] if (sentence.startswith("Close up of") and random.random() > 0.5): sentence = sentence[12].upper() + sentence[13:] if (sentence.endswith(".")): sentence = sentence[:-1] data[key] = sentence print(" %d) %s (p=%f)" % (0, sentence, math.exp(caption.logprob))) with open('data.json', 'w') as outfile: json.dump(data, outfile)
def main(_): # Build the inference graph. g = tf.Graph() with g.as_default(): model = inference_wrapper.InferenceWrapper() restore_fn = model.build_graph_from_config(configuration.ModelConfig(), FLAGS.checkpoint_path) g.finalize() # Create the vocabulary. vocab = vocabulary.Vocabulary(FLAGS.vocab_file) with tf.Session(graph=g) as sess: # Load the model from checkpoint. restore_fn(sess) # Prepare the caption generator. Here we are implicitly using the default # beam search parameters. See caption_generator.py for a description of the # available beam search parameters. generator = caption_generator.CaptionGenerator(model, vocab) #--- DEVICE_ID = 0 capture = cv2.VideoCapture(DEVICE_ID) if capture.isOpened() is False: raise ("IO Error") cv2.namedWindow("Capture", cv2.WINDOW_NORMAL) #--- os.system("clear") print("\nplease wait...\n") while True: ret, img = capture.read() if (threading.activeCount() == 1): th = CaptionThread(sess, generator, vocab, img) th.start() cv2.imshow("Capture", img) if cv2.waitKey(1) & 0xFF == ord('q'): break if ret == False: continue capture.release() cv2.destroyAllWindows()
def __init__(self,checkpoint_path,vocab_file): self.vocab_file=vocab_file self.checkpoint_path=checkpoint_path g = tf.Graph() with g.as_default(): self.model = inference_wrapper.InferenceWrapper() restore_fn = self.model.build_graph_from_config(configuration.ModelConfig(), self.checkpoint_path) g.finalize() self.vocab = vocabulary.Vocabulary(self.vocab_file) self.sess=tf.Session(graph=g) restore_fn(self.sess) tf.logging.info('Tensorflow Session initialized for im2txt module.')
def main(_): # Build the inference graph. g = tf.Graph() with g.as_default(): model = inference_wrapper.InferenceWrapper() restore_fn = model.build_graph_from_config(configuration.ModelConfig(), FLAGS.checkpoint_path) g.finalize() # Create the vocabulary. vocab = vocabulary.Vocabulary(FLAGS.vocab_file) #filenames = [] #for file_pattern in FLAGS.input_files.split(","): # filenames.extend(tf.gfile.Glob(file_pattern)) image_names = os.listdir(FLAGS.input_files) #print(filenames) filenames = [] for file_p in image_names: #print(file_p) file_pattern = FLAGS.input_files + file_p #print(file_pattern) filenames.extend(tf.gfile.Glob(file_pattern)) tf.logging.info("Running caption generation on %d files matching %s", len(filenames), FLAGS.input_files) with tf.Session(graph=g) as sess: # Load the model from checkpoint. restore_fn(sess) # Prepare the caption generator. Here we are implicitly using the default # beam search parameters. See caption_generator.py for a description of the # available beam search parameters. generator = caption_generator.CaptionGenerator(model, vocab) fw = open(FLAGS.output_file, 'w') for filename in filenames: with tf.gfile.GFile(filename, "rb") as f: image = f.read() captions = generator.beam_search(sess, image) print("Captions for image %s:" % os.path.basename(filename)) fw.write("Captions for image %s:\n" % os.path.basename(filename)) for i, caption in enumerate(captions): # Ignore begin and end words. sentence = [ vocab.id_to_word(w) for w in caption.sentence[1:-1] ] sentence = " ".join(sentence) print(" %d) %s (p=%f)" % (i, sentence, math.exp(caption.logprob))) fw.write(" %d) %s (p=%f)\n" % (i, sentence, math.exp(caption.logprob)))
def run_caption(checkpoint_path, filenames, heuristic_amount): g = tf.Graph() with g.as_default(): # Build the model for evaluation. model_config = configuration.ModelConfig() model = show_and_tell_model.ShowAndTellModel(model_config, mode="inference") model.build() # Create the Saver to restore model Variables. saver = tf.train.Saver() g.finalize() def _restore_fn(sess): tf.logging.info("Loading model from checkpoint: %s", checkpoint_path) saver.restore(sess, checkpoint_path) tf.logging.info("Successfully loaded checkpoint: %s", os.path.basename(checkpoint_path)) restore_fn = _restore_fn # Create the vocabulary. vocab = glove.load(model_config.config)[0] run_results = [] with tf.Session(graph=g) as sess: # Load the model from checkpoint. restore_fn(sess) # Prepare the list of image bytes for evaluation. images = [] for filename in filenames: with tf.gfile.GFile(filename, "rb") as f: images.append(f.read()) captions = [ sess.run(model.final_seqs, feed_dict={ "image_feed:0": img, model.heuristic_temperature: heuristic_amount }) for img in images ] for i, caption in enumerate(captions): run_results.append({"filename": filenames[i], "captions": []}) for j in range(caption.shape[1]): # Ignore begin and end words. c = caption[0, j, :].tolist() sentence = [vocab.id_to_word(w) for w in c[:-1]] sentence = " ".join(sentence) run_results[i]["captions"].append(sentence) return run_results
def gen_caption(path): with urllib.request.urlopen(path) as url: with open('temp/temp.jpg', 'wb') as f: f.write(url.read()) input_files = 'temp/temp.jpg' # only print serious log messages tf.logging.set_verbosity(tf.logging.FATAL) # load our pretrained model g = tf.Graph() with g.as_default(): model = inference_wrapper.InferenceWrapper() restore_fn = model.build_graph_from_config(configuration.ModelConfig(), checkpoint_path) g.finalize() # Create the vocabulary. vocab = vocabulary.Vocabulary(vocab_file) filenames = [] for file_pattern in input_files.split(","): filenames.extend(tf.gfile.Glob(file_pattern)) tf.logging.info("Running caption generation on %d files matching %s", len(filenames), input_files) with tf.Session(graph=g) as sess: # Load the model from checkpoint. restore_fn(sess) # Prepare the caption generator. Here we are implicitly using the default # beam search parameters. See caption_generator.py for a description of the # available beam search parameters. generator = caption_generator.CaptionGenerator(model, vocab) captionlist = [] for filename in filenames: with tf.gfile.GFile(filename, "rb") as f: image = f.read() captions = generator.beam_search(sess, image) print("Captions for image %s:" % os.path.basename(filename)) for i, caption in enumerate(captions): # Ignore begin and end words. sentence = [ vocab.id_to_word(w) for w in caption.sentence[1:-1] ] sentence = " ".join(sentence) print(" %d) %s (p=%f)" % (i, sentence, math.exp(caption.logprob))) captionlist.append(sentence) return {"captions": captionlist}
def main(_): # Build the inference graph. g = tf.Graph() with g.as_default(): model = inference_wrapper.InferenceWrapper() restore_fn = model.build_graph_from_config(configuration.ModelConfig(), FLAGS.checkpoint_path) with tf.Session(graph=g) as sess: # Load the model from checkpoint. restore_fn(sess) saver = tf.train.Saver() saver.save(sess, FLAGS.output_path + "/im2txt-model") tf.train.write_graph(sess.graph_def, FLAGS.output_path, "graph.pbtxt")
def main(_): # Build the inference graph. g = tf.Graph() with g.as_default(): model = inference_wrapper.InferenceWrapper() restore_fn = model.build_graph_from_config(configuration.ModelConfig(), check_point_path) g.finalize() # Create the vocabulary. vocab = vocabulary.Vocabulary(vocab_file) filenames = [] for file_pattern in input_files.split(","): filenames.extend(tf.gfile.Glob("Flicker8k_Dataset/"+file_pattern)) tf.logging.info("Running caption generation on %d files matching %s", len(filenames)) with tf.Session(graph=g) as sess: # Load the model from checkpoint. restore_fn(sess) start_time = time.time() # Prepare the caption generator. Here we are implicitly using the default # beam search parameters. See caption_generator.py for a description of the # available beam search parameters. generator = caption_generator.CaptionGenerator(model, vocab) print("=======================About to iterate==================================") # print(filenames) for filename in filenames: print("=========current:============= ", filename) with tf.gfile.GFile(filename, "rb") as f: image = f.read() captions = generator.beam_search(sess, image) print("Captions for image %s:" % os.path.basename(filename)) s = [] p = [] for i, caption in enumerate(captions): # Ignore begin and end words. sentence = [vocab.id_to_word(w) for w in caption.sentence[1:-1]] sentence = " ".join(sentence) s.append(sentence) p.append(math.exp(caption.logprob)) main_s = s[p.index(max(p))] i = 0 print(" %d) %s (p=%f)" % (i, main_s, max(p))) write_caption(main_s, filename,border=(0,0,0,100)) end_time = time.time() print("time elapsed {0:.1f} sec".format(end_time - start_time))
def __init__(self, checkpoint_path, vocab_file): # Build the inference graph. self.g = tf.Graph() with self.g.as_default(): model = inference_wrapper.InferenceWrapper() restore_fn = model.build_graph_from_config( configuration.ModelConfig(), checkpoint_path) self.g.finalize() self.vocab = vocabulary.Vocabulary(vocab_file) self.sess = tf.Session(graph=self.g) restore_fn(self.sess) self.generator = caption_generator.CaptionGenerator(model, self.vocab)
def main(_): # Build the inference graph. g = tf.Graph() with g.as_default(): model = inference_wrapper.InferenceWrapper() restore_fn = model.build_graph_from_config(configuration.ModelConfig(), FLAGS.checkpoint_path) g.finalize() # Create the vocabulary. vocab = vocabulary.Vocabulary(FLAGS.vocab_file) """ filenames = [] for file_pattern in FLAGS.input_files.split(","): filenames.extend(tf.gfile.Glob(file_pattern)) tf.logging.info("Running caption generation on %d files matching %s", len(filenames), FLAGS.input_files) """ with tf.Session(graph=g) as sess: # Load the model from checkpoint. restore_fn(sess) # Prepare the caption generator. Here we are implicitly using the default # beam search parameters. See caption_generator.py for a description of the # available beam search parameters. generator = caption_generator.CaptionGenerator(model, vocab) old_id = getLastId() img_path = "C:/Users/win8/Documents/PF/image2text/imagenes/" for index in range(1,7): path = img_path+str(index)+".jpg" img = cv2.imread(path) with tf.gfile.GFile(path, "r") as f: image = f.read() captions = generator.beam_search(sess, image) #print("Captions for image %s:" % os.path.basename(filename)) caption = captions[0] # Ignore begin and end words. sentence = [vocab.id_to_word(w) for w in caption.sentence[1:-1]] sentence = " ".join(sentence) cv2.imshow('Image', img) #print(" %d) %s (p=%f)" % (i, sentence, math.exp(caption.logprob))) cur.execute("INSERT INTO audiotext(audiotextcol) VALUES (%s)",(sentence)) db.commit(); cv2.waitKey(9500)
def main(_): # Build the inference graph. g = tf.Graph() with g.as_default(): model = inference_wrapper.InferenceWrapper() restore_fn = model.build_graph_from_config(configuration.ModelConfig(), FLAGS.checkpoint_path) g.finalize() # Create the vocabulary. vocab = vocabulary.Vocabulary(FLAGS.vocab_file) filenames = glob.glob(FLAGS.input_files) # for file_pattern in FLAGS.input_files.split(","): # filenames.extend(tf.gfile.Glob(file_pattern)) # tf.logging.info("Running caption generation on %d files matching %s", # len(filenames), FLAGS.input_files) with tf.Session(graph=g) as sess: # Load the model from checkpoint. restore_fn(sess) # Prepare the caption generator. Here we are implicitly using the default # beam search parameters. See caption_generator.py for a description of the # available beam search parameters. beam_size = int(FLAGS.beam_size) generator = caption_generator.CaptionGenerator(model, vocab, beam_size=beam_size) caption_dicts = [] for i, filename in enumerate(filenames): with tf.gfile.GFile(filename, "rb") as f: image = f.read() if FLAGS.use_nn: captions = generator.consensus_NN(sess, image, FLAGS.caption_path, FLAGS.train_data_dir, FLAGS.pickle_file) else: captions = generator.beam_search(sess, image) image_id = int(filename.split('_')[-1].split('.')[0]) if FLAGS.use_nn: sentence = captions else: sentence = [vocab.id_to_word(w) for w in captions[0].sentence[1:-1]] if sentence[-1] == '.': sentence = sentence[:-1] sentence = " ".join(sentence) sentence += '.' caption_dict = {'caption': sentence, 'image_id': image_id } caption_dicts.append(caption_dict) if i % 10 == 0: sys.stdout.write('\n%d/%d: (img %d) %s' %(i, len(filenames), image_id, sentence)) with open(FLAGS.dump_file, 'w') as outfile: json.dump(caption_dicts, outfile)
def predict_images(filenames, vocab, n_sentences=2): """ Use the latest model checkpoint to predict (infer) captions for part of the training images. ilenames: list of image filenames to infer n_sentence: number of sentences generated for each iamge, max=3 return: list of captions predicted by the most recent ckpt. Each caption shall be a string eg: predict_seqs = [["I", "wish","to","get","rid","of","acne"],[******]] The real captions to be used in Imagemetadata is different. captions=[[u"<S>","I", "wish","to","get","rid","of","acne",".","</S>"]] """ print('Using ckpt {0} to infer'.format(ckpt_foler)) g = tf.Graph() with g.as_default(): model = inference_wrapper.InferenceWrapper() restore_fn = model.build_graph_from_config(configuration.ModelConfig(), ckpt_foler) g.finalize() with tf.Session(graph=g) as sess: # Load the model from checkpoint. restore_fn(sess) generator = caption_generator.CaptionGenerator(model, vocab) predict_seqs = [] for filename in filenames: with tf.gfile.GFile(filename, "r") as f: image = f.read() captions = generator.beam_search(sess, image) if (len(captions) < n_sentences): n_sentences = len(captions) for i in range(n_sentences): # Ignore begin and end words. sentence is a list of words sentence = [ vocab.id_to_word(w) for w in captions[i].sentence[1:-1] ] sentence = " ".join(sentence) predict_seqs.append(sentence) global_step = model.global_stepp.eval() print('global step is {0} :D'.format(global_step)) global current_folder current_folder = google_file_folder + str(global_step) + '/' utils.createfolder(current_folder) savingname = current_folder + 'pred_seqs.pkl' utils.save(predict_seqs, savingname, ('Predicted seqs are saved to %s :D') % savingname) print('total number of pred_seqs: %d' % len(predict_seqs)) return savingname
def load(self): print("loading image captioning") self.imcap_graph = tf.Graph() with self.imcap_graph.as_default(): model = inference_wrapper.InferenceWrapper() restore_fn = model.build_graph_from_config(configuration.ModelConfig(), 'models/im2txt/') self.imcap_graph.finalize() # TODO: this graph could benefit from being frozen. Compression + speed enhancements self.vocab = vocabulary.Vocabulary('models/im2txt/word_counts.txt') self.sess = tf.Session(graph=self.imcap_graph) restore_fn(self.sess) self.generator = caption_generator.CaptionGenerator(model, self.vocab,4,17,1.5) return self
def predict(self, sess, image_raw_feed, input_feed, mask_feed): tf.logging.info("Building model.") start_vars = set(x.name for x in tf.global_variables()) self.build_model(configuration.ModelConfig(), image_raw_feed, input_feed, mask_feed) end_vars = tf.global_variables() restore_vars = [x for x in end_vars if x.name not in start_vars] saver = tf.train.Saver(var_list=restore_vars) restore_fn = self._create_restore_fn(FLAGS.checkpoint_path, saver) restore_fn(sess) sum_log_probs = sess.graph.get_tensor_by_name("batch_loss:0") logits = self.model.logits softmax = sess.graph.get_tensor_by_name("softmax:0") # return sum_log_probs, logits, softmax return sum_log_probs, softmax, logits
def __init__(self, model_path, vocab_path): self.model_path = model_path self.vocab_path = vocab_path self.g = tf.Graph() with self.g.as_default(): self.model = inference_wrapper.InferenceWrapper() self.restore_fn = self.model.build_graph_from_config( configuration.ModelConfig(), model_path) self.g.finalize() self.vocab = vocabulary.Vocabulary(vocab_path) self.generator = caption_generator.CaptionGenerator( self.model, self.vocab) self.sess = tf.Session(graph=self.g) self.restore_fn(self.sess) return
def ImageCaption(): # Build the inference graph. caption_string = {} cwd = os.getcwd() checkpoint_path = '{0}/im2txt/model/model.ckpt-3000000'.format(cwd) VOCAB_FILE = '{0}/im2txt/data/word_counts.txt'.format(cwd) #IMAGE_FILE='{0}/im2txt/data/COCO_val2014_000000224477.jpg'.format(cwd) IMAGE_FILE = '{0}/im2txt/data/Pic.jpg'.format(cwd) g = tf.Graph() with g.as_default(): model = inference_wrapper.InferenceWrapper() restore_fn = model.build_graph_from_config(configuration.ModelConfig(), checkpoint_path) g.finalize() # Create the vocabulary. vocab = vocabulary.Vocabulary(VOCAB_FILE) filenames = [] for file_pattern in IMAGE_FILE.split(","): filenames.extend(tf.gfile.Glob(file_pattern)) tf.logging.info("Running caption generation on %d files matching %s", len(filenames), IMAGE_FILE) with tf.Session(graph=g) as sess: # Load the model from checkpoint. restore_fn(sess) # Prepare the caption generator. Here we are implicitly using the default # beam search parameters. See caption_generator.py for a description of the # available beam search parameters. generator = caption_generator.CaptionGenerator(model, vocab) image_data = tf.gfile.FastGFile("./current_frame.jpg", 'rb').read() captions = generator.beam_search(sess, image_data) print("Captions for this frame :") for i, caption in enumerate(captions): sentence = [vocab.id_to_word(w) for w in caption.sentence[1:-1]] sentence = " ".join(sentence) caption_string['{}'.format(i)] = " %d) %s (p=%f)" % ( i, sentence, math.exp(caption.logprob)) print(" %d) %s (p=%f)" % (i, sentence, math.exp(caption.logprob))) return caption_string
def __init__(self, sess, checkpoint_path, vocab_file_path): self.sess = sess config = yaml.load(open('config.yaml', 'r'), Loader=yaml.FullLoader) self.preprocess_options = config['preprocess'] # Build the inference graph. g = tf.get_default_graph() with g.as_default(): model = inference_wrapper.InferenceWrapper() restore_fn = model.build_graph_from_config( configuration.ModelConfig(), checkpoint_path) # Create the vocabulary. self.vocab = vocabulary.Vocabulary(vocab_file_path) restore_fn(sess) self.generator = caption_generator.CaptionGenerator(model, self.vocab)