def main(_): # Build the inference graph. g = tf.Graph() with g.as_default(): model = inference_wrapper.InferenceWrapper() restore_fn = model.build_graph_from_config(configuration.ModelConfig(), FLAGS.checkpoint_path) g.finalize() # Create the vocabulary. vocab = vocabulary.Vocabulary(FLAGS.vocab_file) """ filenames = [] for file_pattern in FLAGS.input_files.split(","): filenames.extend(tf.gfile.Glob(file_pattern)) tf.logging.info("Running caption generation on %d files matching %s", len(filenames), FLAGS.input_files) """ with tf.Session(graph=g) as sess: # Load the model from checkpoint. restore_fn(sess) # Prepare the caption generator. Here we are implicitly using the default # beam search parameters. See caption_generator.py for a description of the # available beam search parameters. generator = caption_generator.CaptionGenerator(model, vocab) old_id = getLastId() while True: try: new_id = getLastId() if new_id > old_id: print(new_id) old_id = new_id cur.execute( "SELECT image FROM images ORDER BY id DESC LIMIT 1") data = cur.fetchall() file_like = BytesIO( data[0]['image']) #tf.gfile.Glob(file_pattern) image = file_like.getvalue() #img=PIL.Image.open(file_like) #image = tf.image.decode_jpeg(str(list(img.getdata()))); captions = generator.beam_search(sess, image) #print("Captions for image %s:" % os.path.basename(filename)) for i, caption in enumerate(captions): # Ignore begin and end words. sentence = [ vocab.id_to_word(w) for w in caption.sentence[1:-1] ] sentence = " ".join(sentence) #print(" %d) %s (p=%f)" % (i, sentence, math.exp(caption.logprob))) cur.execute( "INSERT INTO img2txt(id_image,caption,caption_p) VALUES (%s,%s,%s)", (old_id, sentence, math.exp(caption.logprob))) db.commit() except KeyboardInterrupt: db.close() break
def init(): print("caption_generator :: init :: Start") global sess, vocab, model, g, generator, is_initialized if not is_initialized: is_initialized = True # Build the inference graph. g = tf.Graph() with g.as_default(): model = inference_wrapper.InferenceWrapper() restore_fn = model.build_graph_from_config( configuration.ModelConfig(), FLAGS.checkpoint_path) g.finalize() # Create the vocabulary. vocab = vocabulary.Vocabulary(FLAGS.vocab_file) # Load the model from checkpoint. sess = tf.Session(graph=g) restore_fn(sess) # Prepare the caption generator. Here we are implicitly using the default # beam search parameters. See caption_generator.py for a description of the # available beam search parameters. generator = caption_generator.CaptionGenerator(model, vocab) print("caption_generator :: init :: End")
def main(_): # Build the inference graph. g = tf.Graph() with g.as_default(): model = inference_wrapper.InferenceWrapper() restore_fn = model.build_graph_from_config(configuration.ModelConfig(), FLAGS.checkpoint_path) g.finalize() # Create the vocabulary. vocab = vocabulary.Vocabulary(FLAGS.vocab_file) """ filenames = [] for file_pattern in FLAGS.input_files.split(","): filenames.extend(tf.gfile.Glob(file_pattern)) tf.logging.info("Running caption generation on %d files matching %s", len(filenames), FLAGS.input_files) """ with tf.Session(graph=g) as sess: # Load the model from checkpoint. restore_fn(sess) # Prepare the caption generator. Here we are implicitly using the default # beam search parameters. See caption_generator.py for a description of the # available beam search parameters. generator = caption_generator.CaptionGenerator(model, vocab) video = cv2.VideoCapture(0) while (True): try: #file_like=BytesIO(data[0]['image']) #tf.gfile.Glob(file_pattern) #image = file_like.getvalue() _, img = video.read() image = tf.stack(img) #cv2.imwrite("imagenes/camara.jpg",img) #image = img.tobytes() """ ERROR: Invalid JPEG data, size 921600 [[Node: decode/DecodeJpeg = DecodeJpeg[acceptable_fraction=1, channels=3, dct_method="", fancy_upscaling=true, ratio=1, try_recover_truncated=false, _device="/job:localhost/replica:0/task:0/cpu:0"](_recv_image_feed_0)]] """ #temp=BytesIO(img) #image = temp.getvalue() #image = cv2.imread("imagenes/camara.jpg") #print(image) #img=PIL.Image.open(img) #image = tf.image.decode_jpeg(image); captions = generator.beam_search(sess, image) #print("Captions for image %s:" % os.path.basename(filename)) for i, caption in enumerate(captions): # Ignore begin and end words. sentence = [vocab.id_to_word(w) for w in caption.sentence[1:-1]] sentence = " ".join(sentence) print(" %d) %s (p=%f)" % (i, sentence, math.exp(caption.logprob))) cv2.imshow('Image', img) cv2.waitKey(15) except KeyboardInterrupt: break
def main(_): # Build the inference graph. g = tf.Graph() with g.as_default(): model = inference_wrapper.InferenceWrapper() restore_fn = model.build_graph_from_config(configuration.ModelConfig(), FLAGS.checkpoint_path) g.finalize() # Create the vocabulary. vocab = vocabulary.Vocabulary(FLAGS.vocab_file) stream = Stream(FLAGS.stream_config) with tf.Session(graph=g) as sess: # Load the model from checkpoint. restore_fn(sess) # Prepare the caption generator. Here we are implicitly using the default # beam search parameters. See caption_generator.py for a description of the # available beam search parameters. generator = caption_generator.CaptionGenerator(model, vocab) while True: image = stream.next_frame() captions = generator.beam_search(sess, image) for i, caption in enumerate(captions): # Ignore begin and end words. sentence = [vocab.id_to_word(w) for w in caption.sentence[1:-1]] sentence = " ".join(sentence) print(" %d) %s (p=%f)" % (i, sentence, math.exp(caption.logprob)))
def main(_): # Build the inference graph. g = tf.Graph() with g.as_default(): model = inference_wrapper.InferenceWrapper() restore_fn = model.build_graph_from_config(configuration.ModelConfig(), FLAGS.checkpoint_path) g.finalize() # Create the vocabulary. vocab = vocabulary.Vocabulary(FLAGS.vocab_file) """ filenames = [] for file_pattern in FLAGS.input_files.split(","): filenames.extend(tf.gfile.Glob(file_pattern)) tf.logging.info("Running caption generation on %d files matching %s", len(filenames), FLAGS.input_files) """ with tf.Session(graph=g) as sess: # Load the model from checkpoint. restore_fn(sess) # Prepare the caption generator. Here we are implicitly using the default # beam search parameters. See caption_generator.py for a description of the # available beam search parameters. generator = caption_generator.CaptionGenerator(model, vocab) cur.execute("SELECT DISTINCT image_id,image_url FROM annotations") urls = cur.fetchall() for url in urls: #for array con urls try: response = requests.get(url['image_url']) file_like = BytesIO(response.content) image = file_like.getvalue() #img=PIL.Image.open(file_like) #image = tf.image.decode_jpeg(str(list(img.getdata()))); captions = generator.beam_search(sess, image) #print("Captions for image %s:" % os.path.basename(filename)) caption = captions[0] #for i, caption in enumerate(captions): # Ignore begin and end words. sentence = [ vocab.id_to_word(w) for w in caption.sentence[1:-1] ] sentence = " ".join(sentence) #print(" %d) %s (p=%f)" % (i, sentence, math.exp(caption.logprob))) #cur.execute("INSERT INTO img2txt(id_image,caption,caption_p) VALUES (%s,%s,%s)",(old_id,sentence,math.exp(caption.logprob))) #db.commit(); query = ( "UPDATE annotations SET image_caption=%s WHERE image_id=%s" ) cur.execute(query, (sentence, url['image_id'])) print(sentence) except KeyboardInterrupt: db.close() break
def main(_): # Build the inference graph. g = tf.Graph() with g.as_default(): model = inference_wrapper.InferenceWrapper() restore_fn = model.build_graph_from_config(configuration.ModelConfig(), FLAGS.checkpoint_path) g.finalize() # Create the vocabulary. vocab = vocabulary.Vocabulary(FLAGS.vocab_file) with tf.Session(graph=g) as sess: # Load the model from checkpoint. restore_fn(sess) # Prepare the caption generator. Here we are implicitly using the default # beam search parameters. See caption_generator.py for a description of the # available beam search parameters. generator = caption_generator.CaptionGenerator(model, vocab) translator = Translator() #--- DEVICE_ID = 0 capture = cv2.VideoCapture(DEVICE_ID) if capture.isOpened() is False: raise ("IO Error") cv2.namedWindow("Capture", cv2.WINDOW_AUTOSIZE) font = cv2.FONT_HERSHEY_SIMPLEX font_size = 1 font_color = (0, 255, 0) #--- while True: ret, img = capture.read() if (threading.activeCount() == 1): th = CaptionThread(sess, generator, translator, vocab, img) th.start() for i, txt in enumerate(text): cv2.putText(img, txt, (10, 30 * (i + 1)), font, font_size, font_color) # 日本語のフォントが無い? cv2.imshow("Capture", img) key = cv2.waitKey(1) if key == 27: # ESC key break if ret == False: continue cv2.destroyAllWindows() cap.release()
def main(_): # Build the inference graph. g = tf.Graph() with g.as_default(): model = inference_wrapper.InferenceWrapper() restore_fn = model.build_graph_from_config(configuration.ModelConfig(), FLAGS.checkpoint_path) g.finalize() # Create the vocabulary. vocab = vocabulary.Vocabulary(FLAGS.vocab_file) with open('/home/mtian/im2txt/im2txt/test.txt', 'r') as f: filenames = f.readlines() #######################json_temp_buf############################ aa=[] #######################json_temp_buf############################ #for file_pattern in FLAGS.input_files.split(","): # filenames.extend(tf.gfile.Glob(file_pattern)) #tf.logging.info("Running caption generation on %d files matching %s", # len(filenames), FLAGS.input_files) with tf.Session(graph=g) as sess: # Load the model from checkpoint. restore_fn(sess) # Prepare the caption generator. Here we are implicitly using the default # beam search parameters. See caption_generator.py for a description of the # available beam search parameters. generator = caption_generator.CaptionGenerator(model, vocab) ''' with tf.gfile.GFile(filename, "r") as f: image = f.read() ''' for filename1 in filenames: filename=filename1.strip() image=np.load("/home/mtian/visiual_feature/0.125_val/"+filename[:-3]+"npy") captions = generator.beam_search(sess, image) print("Captions for image %s:" % filename) for i, caption in enumerate(captions): # Ignore begin and end words. sentence = [vocab.id_to_word(w) for w in caption.sentence[1:-1]] sentence = " ".join(sentence) ##################################json_op###################################### if i==0: captiondict={"image_id":int(filename[-10:-4]),"caption":sentence} aa.append(captiondict) #################################json_op###################################### print(" %d) %s (p=%f)" % (i, sentence, math.exp(caption.logprob))) ################################output_json###################################### with open('0.125_var_1.json', 'w') as json_file: json.dump(aa,json_file)
def main(_): # Build the inference graph. g = tf.Graph() with g.as_default(): model = inference_wrapper.InferenceWrapper() restore_fn = model.build_graph_from_config(configuration.ModelConfig(), FLAGS.checkpoint_path) g.finalize() # Create the vocabulary. vocab = vocabulary.Vocabulary(FLAGS.vocab_file) filenames = [] for file_pattern in FLAGS.input_files.split(","): filenames.extend(tf.gfile.Glob(file_pattern)) tf.logging.info("Running caption generation on %d files matching %s", len(filenames), FLAGS.input_files) document = Document() with tf.Session(graph=g) as sess: # Load the model from checkpoint. restore_fn(sess) # Prepare the caption generator. Here we are implicitly using the default # beam search parameters. See caption_generator.py for a description of the # available beam search parameters. generator = caption_generator.CaptionGenerator(model, vocab) for filename in filenames: with tf.gfile.GFile(filename, "rb") as f: image = f.read() captions = generator.beam_search(sess, image) str1 = ("Captions for image %s:" % os.path.basename(filename)) print(str1) paragraph = document.add_paragraph("inferent:%s" % filename) paragraph.paragraph_format.page_break_before = True document.add_picture(filename, width=Inches(3)) document.add_paragraph(str1) for i, caption in enumerate(captions): # Ignore begin and end words. sentence = [ vocab.id_to_word(w) for w in caption.sentence[1:-1] ] sentence = " ".join(sentence) str1 = (" %d) %s (p=%f)" % (i, sentence, math.exp(caption.logprob))) print(str1) document.add_paragraph(str1) output_dir = ("%s/Captions.docx" % FLAGS.output_dir) document.save(output_dir)
def caption_image(data_folder, output_folder): id_folder = join(data_folder.split("m")[0], "instances.jsonl") images = [] ids_dict = {} for file in os.listdir(data_folder): images.append(join(data_folder, file)) # Build the inference graph. g = tf.Graph() with g.as_default(): model = inference_wrapper.InferenceWrapper() restore_fn = model.build_graph_from_config(configuration.ModelConfig(), checkpoint_path) g.finalize() # Create the vocabulary. vocab = vocabulary.Vocabulary(vocab_file) with tf.Session(graph=g) as sess: # Load the model from checkpoint. restore_fn(sess) # Prepare the caption generator. Here we are implicitly using the default # beam search parameters. See caption_generator.py for a description of the # available beam search parameters. generator = caption_generator.CaptionGenerator(model, vocab) for filename in tqdm(images): with tf.gfile.GFile(filename, "rb") as f: image = f.read() captions = generator.beam_search(sess, image) # Just take the first caption and display it caption = captions[0] sentence = [vocab.id_to_word(w) for w in caption.sentence[1:-1]] sentence = " ".join(sentence) with open(id_folder, 'r', encoding='utf8') as instances: for instance in instances: instance = loads(instance) photo_name = re.split("[_']", str(instance["postMedia"])) if len(photo_name) == 7: if photo_name[2] == filename.split("_")[1]: ids_dict[sentence] = instance["id"] if photo_name[5] == filename.split("_")[1]: ids_dict[sentence] = instance["id"] elif len(photo_name) == 4: if photo_name[2] == filename.split("_")[1]: ids_dict[sentence] = instance["id"] else: continue with open(output_folder, 'w', encoding="utf8") as output_file: output_file.write(str(ids_dict))
def main(_): # Build the inference graph. g = tf.Graph() with g.as_default(): model = inference_wrapper.InferenceWrapper() restore_fn = model.build_graph_from_config(configuration.ModelConfig(), FLAGS.checkpoint_path) g.finalize() # Create the vocabulary. vocab = vocabulary.Vocabulary(FLAGS.vocab_file) filenames = [] data = {} for file_pattern in FLAGS.input_files.split(","): filenames.extend(tf.gfile.Glob(file_pattern)) tf.logging.info("Running caption generation on %d files matching %s", len(filenames), FLAGS.input_files) with tf.Session(graph=g) as sess: # Load the model from checkpoint. restore_fn(sess) # Prepare the caption generator. Here we are implicitly using the default # beam search parameters. See caption_generator.py for a description of the # available beam search parameters. generator = caption_generator.CaptionGenerator(model, vocab) for filename in filenames: with tf.gfile.GFile(filename, "r") as f: image = f.read() captions = generator.beam_search(sess, image) print("Captions for image %s:" % os.path.basename(filename)) key = os.path.basename(filename) caption = captions[0] sentence = [vocab.id_to_word(w) for w in caption.sentence[1:-1]] sentence = " ".join(sentence) sentence = sentence[2].upper() + sentence[3:] if (sentence.startswith("Bunch of") or sentence.startswith("Group of")): sentence = sentence[9].upper() + sentence[10:] if (sentence.startswith("Close up of") and random.random() > 0.5): sentence = sentence[12].upper() + sentence[13:] if (sentence.endswith(".")): sentence = sentence[:-1] data[key] = sentence print(" %d) %s (p=%f)" % (0, sentence, math.exp(caption.logprob))) with open('data.json', 'w') as outfile: json.dump(data, outfile)
def main(_): # Build the inference graph. g = tf.Graph() with g.as_default(): model = inference_wrapper.InferenceWrapper() restore_fn = model.build_graph_from_config(configuration.ModelConfig(), FLAGS.checkpoint_path) g.finalize() # Create the vocabulary. vocab = vocabulary.Vocabulary(FLAGS.vocab_file) with tf.Session(graph=g) as sess: # Load the model from checkpoint. restore_fn(sess) # Prepare the caption generator. Here we are implicitly using the default # beam search parameters. See caption_generator.py for a description of the # available beam search parameters. generator = caption_generator.CaptionGenerator(model, vocab) #--- DEVICE_ID = 0 capture = cv2.VideoCapture(DEVICE_ID) if capture.isOpened() is False: raise ("IO Error") cv2.namedWindow("Capture", cv2.WINDOW_NORMAL) #--- os.system("clear") print("\nplease wait...\n") while True: ret, img = capture.read() if (threading.activeCount() == 1): th = CaptionThread(sess, generator, vocab, img) th.start() cv2.imshow("Capture", img) if cv2.waitKey(1) & 0xFF == ord('q'): break if ret == False: continue capture.release() cv2.destroyAllWindows()
def __init__(self,checkpoint_path,vocab_file): self.vocab_file=vocab_file self.checkpoint_path=checkpoint_path g = tf.Graph() with g.as_default(): self.model = inference_wrapper.InferenceWrapper() restore_fn = self.model.build_graph_from_config(configuration.ModelConfig(), self.checkpoint_path) g.finalize() self.vocab = vocabulary.Vocabulary(self.vocab_file) self.sess=tf.Session(graph=g) restore_fn(self.sess) tf.logging.info('Tensorflow Session initialized for im2txt module.')
def gen_caption(path): with urllib.request.urlopen(path) as url: with open('temp/temp.jpg', 'wb') as f: f.write(url.read()) input_files = 'temp/temp.jpg' # only print serious log messages tf.logging.set_verbosity(tf.logging.FATAL) # load our pretrained model g = tf.Graph() with g.as_default(): model = inference_wrapper.InferenceWrapper() restore_fn = model.build_graph_from_config(configuration.ModelConfig(), checkpoint_path) g.finalize() # Create the vocabulary. vocab = vocabulary.Vocabulary(vocab_file) filenames = [] for file_pattern in input_files.split(","): filenames.extend(tf.gfile.Glob(file_pattern)) tf.logging.info("Running caption generation on %d files matching %s", len(filenames), input_files) with tf.Session(graph=g) as sess: # Load the model from checkpoint. restore_fn(sess) # Prepare the caption generator. Here we are implicitly using the default # beam search parameters. See caption_generator.py for a description of the # available beam search parameters. generator = caption_generator.CaptionGenerator(model, vocab) captionlist = [] for filename in filenames: with tf.gfile.GFile(filename, "rb") as f: image = f.read() captions = generator.beam_search(sess, image) print("Captions for image %s:" % os.path.basename(filename)) for i, caption in enumerate(captions): # Ignore begin and end words. sentence = [ vocab.id_to_word(w) for w in caption.sentence[1:-1] ] sentence = " ".join(sentence) print(" %d) %s (p=%f)" % (i, sentence, math.exp(caption.logprob))) captionlist.append(sentence) return {"captions": captionlist}
def main(_): # Build the inference graph. g = tf.Graph() with g.as_default(): model = inference_wrapper.InferenceWrapper() restore_fn = model.build_graph_from_config(configuration.ModelConfig(), FLAGS.checkpoint_path) g.finalize() # Create the vocabulary. vocab = vocabulary.Vocabulary(FLAGS.vocab_file) #filenames = [] #for file_pattern in FLAGS.input_files.split(","): # filenames.extend(tf.gfile.Glob(file_pattern)) image_names = os.listdir(FLAGS.input_files) #print(filenames) filenames = [] for file_p in image_names: #print(file_p) file_pattern = FLAGS.input_files + file_p #print(file_pattern) filenames.extend(tf.gfile.Glob(file_pattern)) tf.logging.info("Running caption generation on %d files matching %s", len(filenames), FLAGS.input_files) with tf.Session(graph=g) as sess: # Load the model from checkpoint. restore_fn(sess) # Prepare the caption generator. Here we are implicitly using the default # beam search parameters. See caption_generator.py for a description of the # available beam search parameters. generator = caption_generator.CaptionGenerator(model, vocab) fw = open(FLAGS.output_file, 'w') for filename in filenames: with tf.gfile.GFile(filename, "rb") as f: image = f.read() captions = generator.beam_search(sess, image) print("Captions for image %s:" % os.path.basename(filename)) fw.write("Captions for image %s:\n" % os.path.basename(filename)) for i, caption in enumerate(captions): # Ignore begin and end words. sentence = [ vocab.id_to_word(w) for w in caption.sentence[1:-1] ] sentence = " ".join(sentence) print(" %d) %s (p=%f)" % (i, sentence, math.exp(caption.logprob))) fw.write(" %d) %s (p=%f)\n" % (i, sentence, math.exp(caption.logprob)))
def main(_): # Build the inference graph. g = tf.Graph() with g.as_default(): model = inference_wrapper.InferenceWrapper() restore_fn = model.build_graph_from_config(configuration.ModelConfig(), check_point_path) g.finalize() # Create the vocabulary. vocab = vocabulary.Vocabulary(vocab_file) filenames = [] for file_pattern in input_files.split(","): filenames.extend(tf.gfile.Glob("Flicker8k_Dataset/"+file_pattern)) tf.logging.info("Running caption generation on %d files matching %s", len(filenames)) with tf.Session(graph=g) as sess: # Load the model from checkpoint. restore_fn(sess) start_time = time.time() # Prepare the caption generator. Here we are implicitly using the default # beam search parameters. See caption_generator.py for a description of the # available beam search parameters. generator = caption_generator.CaptionGenerator(model, vocab) print("=======================About to iterate==================================") # print(filenames) for filename in filenames: print("=========current:============= ", filename) with tf.gfile.GFile(filename, "rb") as f: image = f.read() captions = generator.beam_search(sess, image) print("Captions for image %s:" % os.path.basename(filename)) s = [] p = [] for i, caption in enumerate(captions): # Ignore begin and end words. sentence = [vocab.id_to_word(w) for w in caption.sentence[1:-1]] sentence = " ".join(sentence) s.append(sentence) p.append(math.exp(caption.logprob)) main_s = s[p.index(max(p))] i = 0 print(" %d) %s (p=%f)" % (i, main_s, max(p))) write_caption(main_s, filename,border=(0,0,0,100)) end_time = time.time() print("time elapsed {0:.1f} sec".format(end_time - start_time))
def __init__(self, checkpoint_path, vocab_file): # Build the inference graph. self.g = tf.Graph() with self.g.as_default(): model = inference_wrapper.InferenceWrapper() restore_fn = model.build_graph_from_config( configuration.ModelConfig(), checkpoint_path) self.g.finalize() self.vocab = vocabulary.Vocabulary(vocab_file) self.sess = tf.Session(graph=self.g) restore_fn(self.sess) self.generator = caption_generator.CaptionGenerator(model, self.vocab)
def main(_): # Build the inference graph. g = tf.Graph() with g.as_default(): model = inference_wrapper.InferenceWrapper() restore_fn = model.build_graph_from_config(configuration.ModelConfig(), FLAGS.checkpoint_path) g.finalize() # Create the vocabulary. vocab = vocabulary.Vocabulary(FLAGS.vocab_file) filenames = glob.glob(FLAGS.input_files) # for file_pattern in FLAGS.input_files.split(","): # filenames.extend(tf.gfile.Glob(file_pattern)) # tf.logging.info("Running caption generation on %d files matching %s", # len(filenames), FLAGS.input_files) with tf.Session(graph=g) as sess: # Load the model from checkpoint. restore_fn(sess) # Prepare the caption generator. Here we are implicitly using the default # beam search parameters. See caption_generator.py for a description of the # available beam search parameters. beam_size = int(FLAGS.beam_size) generator = caption_generator.CaptionGenerator(model, vocab, beam_size=beam_size) caption_dicts = [] for i, filename in enumerate(filenames): with tf.gfile.GFile(filename, "rb") as f: image = f.read() if FLAGS.use_nn: captions = generator.consensus_NN(sess, image, FLAGS.caption_path, FLAGS.train_data_dir, FLAGS.pickle_file) else: captions = generator.beam_search(sess, image) image_id = int(filename.split('_')[-1].split('.')[0]) if FLAGS.use_nn: sentence = captions else: sentence = [vocab.id_to_word(w) for w in captions[0].sentence[1:-1]] if sentence[-1] == '.': sentence = sentence[:-1] sentence = " ".join(sentence) sentence += '.' caption_dict = {'caption': sentence, 'image_id': image_id } caption_dicts.append(caption_dict) if i % 10 == 0: sys.stdout.write('\n%d/%d: (img %d) %s' %(i, len(filenames), image_id, sentence)) with open(FLAGS.dump_file, 'w') as outfile: json.dump(caption_dicts, outfile)
def main(_): # Build the inference graph. g = tf.Graph() with g.as_default(): model = inference_wrapper.InferenceWrapper() restore_fn = model.build_graph_from_config(configuration.ModelConfig(), FLAGS.checkpoint_path) g.finalize() # Create the vocabulary. vocab = vocabulary.Vocabulary(FLAGS.vocab_file) """ filenames = [] for file_pattern in FLAGS.input_files.split(","): filenames.extend(tf.gfile.Glob(file_pattern)) tf.logging.info("Running caption generation on %d files matching %s", len(filenames), FLAGS.input_files) """ with tf.Session(graph=g) as sess: # Load the model from checkpoint. restore_fn(sess) # Prepare the caption generator. Here we are implicitly using the default # beam search parameters. See caption_generator.py for a description of the # available beam search parameters. generator = caption_generator.CaptionGenerator(model, vocab) old_id = getLastId() img_path = "C:/Users/win8/Documents/PF/image2text/imagenes/" for index in range(1,7): path = img_path+str(index)+".jpg" img = cv2.imread(path) with tf.gfile.GFile(path, "r") as f: image = f.read() captions = generator.beam_search(sess, image) #print("Captions for image %s:" % os.path.basename(filename)) caption = captions[0] # Ignore begin and end words. sentence = [vocab.id_to_word(w) for w in caption.sentence[1:-1]] sentence = " ".join(sentence) cv2.imshow('Image', img) #print(" %d) %s (p=%f)" % (i, sentence, math.exp(caption.logprob))) cur.execute("INSERT INTO audiotext(audiotextcol) VALUES (%s)",(sentence)) db.commit(); cv2.waitKey(9500)
def load(self): print("loading image captioning") self.imcap_graph = tf.Graph() with self.imcap_graph.as_default(): model = inference_wrapper.InferenceWrapper() restore_fn = model.build_graph_from_config(configuration.ModelConfig(), 'models/im2txt/') self.imcap_graph.finalize() # TODO: this graph could benefit from being frozen. Compression + speed enhancements self.vocab = vocabulary.Vocabulary('models/im2txt/word_counts.txt') self.sess = tf.Session(graph=self.imcap_graph) restore_fn(self.sess) self.generator = caption_generator.CaptionGenerator(model, self.vocab,4,17,1.5) return self
def __init__(self, model_path, vocab_path): self.model_path = model_path self.vocab_path = vocab_path self.g = tf.Graph() with self.g.as_default(): self.model = inference_wrapper.InferenceWrapper() self.restore_fn = self.model.build_graph_from_config( configuration.ModelConfig(), model_path) self.g.finalize() self.vocab = vocabulary.Vocabulary(vocab_path) self.generator = caption_generator.CaptionGenerator( self.model, self.vocab) self.sess = tf.Session(graph=self.g) self.restore_fn(self.sess) return
def __init__(self, sess, checkpoint_path, vocab_file_path): self.sess = sess config = yaml.load(open('config.yaml', 'r'), Loader=yaml.FullLoader) self.preprocess_options = config['preprocess'] # Build the inference graph. g = tf.get_default_graph() with g.as_default(): model = inference_wrapper.InferenceWrapper() restore_fn = model.build_graph_from_config( configuration.ModelConfig(), checkpoint_path) # Create the vocabulary. self.vocab = vocabulary.Vocabulary(vocab_file_path) restore_fn(sess) self.generator = caption_generator.CaptionGenerator(model, self.vocab)
def __init__(self): g = tf.Graph() with g.as_default(): model = inference_wrapper.InferenceWrapper() restore_fn = model.build_graph_from_config(configuration.ModelConfig(), CHECKPOINT_DIR) g.finalize() # Create the vocabulary. self.vocab = vocabulary.Vocabulary(VOCAB_FILE) gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.25, visible_device_list="0") config=tf.ConfigProto(gpu_options=gpu_options) self.sess = tf.Session(graph=g, config = config) restore_fn(self.sess) self.generator = caption_generator.CaptionGenerator(model, self.vocab)
def ImageCaption(): # Build the inference graph. caption_string = {} cwd = os.getcwd() checkpoint_path = '{0}/im2txt/model/model.ckpt-3000000'.format(cwd) VOCAB_FILE = '{0}/im2txt/data/word_counts.txt'.format(cwd) #IMAGE_FILE='{0}/im2txt/data/COCO_val2014_000000224477.jpg'.format(cwd) IMAGE_FILE = '{0}/im2txt/data/Pic.jpg'.format(cwd) g = tf.Graph() with g.as_default(): model = inference_wrapper.InferenceWrapper() restore_fn = model.build_graph_from_config(configuration.ModelConfig(), checkpoint_path) g.finalize() # Create the vocabulary. vocab = vocabulary.Vocabulary(VOCAB_FILE) filenames = [] for file_pattern in IMAGE_FILE.split(","): filenames.extend(tf.gfile.Glob(file_pattern)) tf.logging.info("Running caption generation on %d files matching %s", len(filenames), IMAGE_FILE) with tf.Session(graph=g) as sess: # Load the model from checkpoint. restore_fn(sess) # Prepare the caption generator. Here we are implicitly using the default # beam search parameters. See caption_generator.py for a description of the # available beam search parameters. generator = caption_generator.CaptionGenerator(model, vocab) image_data = tf.gfile.FastGFile("./current_frame.jpg", 'rb').read() captions = generator.beam_search(sess, image_data) print("Captions for this frame :") for i, caption in enumerate(captions): sentence = [vocab.id_to_word(w) for w in caption.sentence[1:-1]] sentence = " ".join(sentence) caption_string['{}'.format(i)] = " %d) %s (p=%f)" % ( i, sentence, math.exp(caption.logprob)) print(" %d) %s (p=%f)" % (i, sentence, math.exp(caption.logprob))) return caption_string
def inference(self, input_dir, files_list=[]): # 清除outputs self.outputs = {} # 构建模型 g = tf.Graph() with g.as_default(): model = inference_wrapper.InferenceWrapper() restore_fn = model.build_graph_from_config( configuration.ModelConfig(), self.ckpt_path) g.finalize() # 创建字典 vocab = vocabulary.Vocabulary(self.vocab_file) # 筛选待生成描述的文件 filenames = [] if files_list == []: # 如果没有指定文件,则对所有图片生成描述 files_list = os.listdir(input_dir) files_list = [f for f in files_list if f.endswith(".jpg")] # 获取图片完整路径 for filename in files_list: full_name = os.path.join(input_dir, filename) filenames.append(full_name) with tf.Session(graph=g) as sess: # 从checkpoint读取参数 restore_fn(sess) # 创建描述生成器 generator = caption_generator.CaptionGenerator(model, vocab) # 逐一生成描述 for filename in filenames: # 读取图片 with tf.gfile.GFile(filename, "rb") as f: image = f.read() # 搜索描述 captions = generator.beam_search(sess, image) # print("Captions for image %s:" % os.path.basename(filename)) # 去除语句的开头标志和结尾标志 sentence = [ vocab.id_to_word(w) for w in captions[0].sentence[1:-1] ] # 将结果添加到输出的字典中 self.outputs[os.path.basename(filename)] = "".join(sentence)
def __init__(self): g = tf.Graph() with g.as_default(): print(os.getcwd()) model = inference_wrapper.InferenceWrapper() restore_fn = model.build_graph_from_config( configuration.ModelConfig(), "./checkpoints/5M_iterations/model.ckpt-5000000") g.finalize() # Create the vocabulary. self.vocab = vocabulary.Vocabulary("./vocab/word_counts.txt") self.sess = tf.Session(graph=g) # Load the model from checkpoint. restore_fn(self.sess) self.generator = caption_generator.CaptionGenerator(model, self.vocab)
def __init__(self, job_dir=FLAGS.job_dir): key_inp = tf.placeholder(tf.int32, [None]) lk = tf.shape(key_inp)[0] key = tf.expand_dims(key_inp, axis=0) lk = tf.expand_dims(lk, axis=0) initial_state_op = _tower_fn(key, lk) vocab = vocabulary.Vocabulary(FLAGS.vocab_file) self.saver = tf.train.Saver() self.key_inp = key_inp self.init_state = initial_state_op self.vocab = vocab config = tf.ConfigProto(gpu_options=tf.GPUOptions(allow_growth=True)) self.sess = tf.Session(config=config) self.restore_fn(job_dir) self.tf = tf
def main(_): # Build the inference graph. cwd = os.getcwd() checkpoint_path = '{0}/model'.format(cwd) VOCAB_FILE = '{0}/data/word_counts.txt'.format(cwd) IMAGE_FILE = '{0}/data/pic.jpg'.format(cwd) g = tf.Graph() with g.as_default(): model = inference_wrapper.InferenceWrapper() restore_fn = model.build_graph_from_config(configuration.ModelConfig(), checkpoint_path) g.finalize() # Create the vocabulary. vocab = vocabulary.Vocabulary(VOCAB_FILE) filenames = [] for file_pattern in IMAGE_FILE.split(","): filenames.extend(tf.gfile.Glob(file_pattern)) tf.logging.info("Running caption generation on %d files matching %s", len(filenames), IMAGE_FILE) with tf.Session(graph=g) as sess: # Load the model from checkpoint. restore_fn(sess) # Prepare the caption generator. Here we are implicitly using the default # beam search parameters. See caption_generator.py for a description of the # available beam search parameters. generator = caption_generator.CaptionGenerator(model, vocab) for filename in filenames: with tf.gfile.GFile(filename, "r") as f: image = f.read() captions = generator.beam_search(sess, image) print("Captions for image %s:" % os.path.basename(filename)) for i, caption in enumerate(captions): # Ignore begin and end words. sentence = [ vocab.id_to_word(w) for w in caption.sentence[1:-1] ] sentence = " ".join(sentence) print(" %d) %s (p=%f)" % (i, sentence, math.exp(caption.logprob)))
def init(): print("caption_generator :: init :: Start") global sess, vocab, model, g, generator, is_initialized if not is_initialized: is_initialized = True FLAGS = tf.flags.FLAGS tf.flags.DEFINE_string( "checkpoint_path", "../ML_Models/ShowAndTell/checkpoints/", "Model checkpoint file or directory containing a " "model checkpoint file.") tf.flags.DEFINE_string( "vocab_file", "../ML_Models/ShowAndTell/checkpoints/word_counts.txt", "Text file containing the vocabulary.") tf.logging.set_verbosity(tf.logging.INFO) # Build the inference graph. g = tf.Graph() with g.as_default(): model = inference_wrapper.InferenceWrapper() restore_fn = model.build_graph_from_config( configuration.ModelConfig(), FLAGS.checkpoint_path) g.finalize() # Create the vocabulary. vocab = vocabulary.Vocabulary(FLAGS.vocab_file) # Load the model from checkpoint. sess = tf.Session(graph=g) restore_fn(sess) # Prepare the caption generator. Here we are implicitly using the default # beam search parameters. See caption_generator.py for a description of the # available beam search parameters. generator = caption_generator.CaptionGenerator(model, vocab) print("caption_generator :: init :: End")
def main(_): print("hello") tf.logging.info("hello") # Build the inference graph. g = tf.Graph() with g.as_default(): model = inference_wrapper.InferenceWrapper() restore_fn = model.build_graph_from_config(configuration.ModelConfig(), FLAGS.checkpoint_path) g.finalize() # Create the vocabulary. global vocab vocab = vocabulary.Vocabulary(FLAGS.vocab_file) with tf.Session(graph=g) as sess: global session global generator # Load the model from checkpoint. restore_fn(sess) session = sess # Prepare the caption generator. Here we are implicitly using the default # beam search parameters. See caption_generator.py for a description of the # available beam search parameters. generator = caption_generator.CaptionGenerator(model, vocab) tf.logging.info("server starting server on port %i" % _PORT) print("server starting server on port %i" % _PORT) server = grpc.server(futures.ThreadPoolExecutor(max_workers=10)) im2txt_pb2_grpc.add_Im2TxtServicer_to_server(Im2Txt(), server) server.add_insecure_port('[::]:%i' % _PORT) server.start() try: while True: time.sleep(_ONE_DAY_IN_SECONDS) except KeyboardInterrupt: server.stop(0)
def im2txt(): # Build the inference graph. g = tf.Graph() with g.as_default(): model = inference_wrapper.InferenceWrapper() restore_fn = model.build_graph_from_config(configuration.ModelConfig(), FLAGS.checkpoint_path) g.finalize() # 创建词汇表 vocab = vocabulary.Vocabulary(FLAGS.vocab_file) filenames = [] for file_pattern in FLAGS.input_files.split(","): filenames.extend(tf.gfile.Glob(file_pattern)) tf.logging.info("Running caption generation on %d files matching %s", len(filenames), FLAGS.input_files) with tf.Session(graph=g) as sess: # Load the model from checkpoint. restore_fn(sess) # Prepare the caption generator. Here we are implicitly using the default # beam search parameters. See caption_generator.py for a description of the # available beam search parameters. generator = caption_generator.CaptionGenerator(model, vocab) for filename in filenames: with tf.gfile.GFile( filename, "rb" ) as f: # edit by Ethan on 2018-08-31 change from r to rb print(filename) image = f.read() captions = generator.beam_search(sess, image) print("图像 %s 标题是:" % os.path.basename(filename)) for i, caption in enumerate(captions): # Ignore begin and end words. sentence = [ vocab.id_to_word(w) for w in caption.sentence[1:-1] ] sentence = " ".join(sentence) print(" %d) %s (概率=%f)" % (i, sentence, math.exp(caption.logprob)))