def main(_): # load dictionary data = {} with open(FLAGS.dict_file, 'r') as f: dict_data = json.load(f) for k, v in dict_data.items(): data[k] = v data['idx_to_word'] = {int(k):v for k, v in data['idx_to_word'].items()} # extract all features features, all_image_names = extract_features(FLAGS.test_dir) # Build the TensorFlow graph and train it g = tf.Graph() with g.as_default(): num_of_images = len(os.listdir(FLAGS.test_dir)) print("Inferencing on {} images".format(num_of_images)) # Build the model. model = build_model(model_config, mode, inference_batch = 1) # Initialize beam search Caption Generator generator = CaptionGenerator(model, data['word_to_idx'], max_caption_length = model_config.padded_length-1) # run training init = tf.global_variables_initializer() with tf.Session() as sess: sess.run(init) model['saver'].restore(sess, FLAGS.saved_sess) print("Model restored! Last step run: ", sess.run(model['global_step'])) # predictions final_preds = run_inference(sess, features, generator, 1.0) captions_pred = [unpack.reshape(-1, 1) for unpack in final_preds] #captions_pred = np.concatenate(captions_pred, 1) captions_deco= [] for cap in captions_pred: dec = decode_captions(cap.reshape(-1, 1), data['idx_to_word']) dec = ' '.join(dec) captions_deco.append(dec) # saved the images with captions written on them if not os.path.exists(FLAGS.results_dir): os.makedirs(FLAGS.results_dir) for j in range(len(captions_deco)): this_image_name = all_image_names['file_name'].values[j] img_name = os.path.join(FLAGS.results_dir, this_image_name) img = imread(os.path.join(FLAGS.test_dir, this_image_name)) write_text_on_image(img, img_name, captions_deco[j]) print("\ndone.")
def main(_): # load data data = load_coco_data(FLAGS.data_dir) # force padded_length equal to padded_length - 1 # model_config.padded_length = len(data['train_captions'][0]) - 1 # Build the TensorFlow graph and train it g = tf.Graph() with g.as_default(): # Build the model. If FLAGS.glove_vocab is null, we do not initialize the model with word vectors; if not, we initialize with glove vectors if FLAGS.glove_vocab is '': model = build_model(model_config, mode=mode) else: glove_vocab = np.load(FLAGS.glove_vocab) model = build_model(model_config, mode=mode, glove_vocab=glove_vocab) # Set up the learning rate. learning_rate_decay_fn = None learning_rate = tf.constant(training_config.initial_learning_rate) if training_config.learning_rate_decay_factor > 0: num_batches_per_epoch = (training_config.num_examples_per_epoch / model_config.batch_size) decay_steps = int(num_batches_per_epoch * training_config.num_epochs_per_decay) def _learning_rate_decay_fn(learning_rate, global_step): return tf.train.exponential_decay( learning_rate, global_step, decay_steps=decay_steps, decay_rate=training_config.learning_rate_decay_factor, staircase=True) learning_rate_decay_fn = _learning_rate_decay_fn # Set up the training ops. train_op = tf.contrib.layers.optimize_loss( loss=model['total_loss'], global_step=model['global_step'], learning_rate=learning_rate, optimizer=training_config.optimizer, clip_gradients=training_config.clip_gradients, learning_rate_decay_fn=learning_rate_decay_fn) # initialize all variables init = tf.global_variables_initializer() with tf.Session() as sess: sess.run(init) num_epochs = training_config.total_num_epochs num_train = data['train_captions'].shape[0] iterations_per_epoch = max(num_train / model_config.batch_size, 1) num_iterations = int(num_epochs * iterations_per_epoch) # Set up some variables for book-keeping epoch = 0 best_val_acc = 0 best_params = {} loss_history = [] train_acc_history = [] val_acc_history = [] print("\n\nTotal training iter: ", num_iterations, "\n\n") time_now = datetime.now() for t in range(num_iterations): total_loss_value = _step(sess, data, train_op, model, model_config.lstm_dropout_keep_prob ) # run each training step loss_history.append(total_loss_value) # Print out training loss if FLAGS.print_every > 0 and t % FLAGS.print_every == 0: print( '(Iteration %d / %d) loss: %f, and time eclipsed: %.2f minutes' % (t + 1, num_iterations, float(loss_history[-1]), (datetime.now() - time_now).seconds / 60.0)) # Print out some image sample results if FLAGS.sample_every > 0 and (t + 1) % FLAGS.sample_every == 0: temp_dir = os.path.join(FLAGS.sample_dir, 'temp_dir_{}//'.format(t + 1)) if not os.path.exists(temp_dir): os.makedirs(temp_dir) captions_pred, urls = _run_validation( sess, data, model_config.batch_size, model, 1.0) # the output is size (32, 16) captions_pred = [ unpack.reshape(-1, 1) for unpack in captions_pred ] captions_pred = np.concatenate(captions_pred, 1) captions_deco = decode_captions(captions_pred, data['idx_to_word']) for j in range(len(captions_deco)): img_name = os.path.join(temp_dir, 'image_{}.jpg'.format(j)) img = image_from_url(urls[j]) write_text_on_image(img, img_name, captions_deco[j]) # save the model continuously to avoid interruption if FLAGS.saveModel_every > 0 and ( t + 1) % FLAGS.saveModel_every == 0: if not os.path.exists(FLAGS.savedSession_dir): os.makedirs(FLAGS.savedSession_dir) checkpoint_name = savedModelName[: -5] + '_checkpoint{}.ckpt'.format( t + 1) save_path = model['saver'].save( sess, os.path.join(FLAGS.savedSession_dir, checkpoint_name)) if not os.path.exists(FLAGS.savedSession_dir): os.makedirs(FLAGS.savedSession_dir) save_path = model['saver'].save( sess, os.path.join(FLAGS.savedSession_dir, savedModelName)) print("done. Model saved at: ", os.path.join(FLAGS.savedSession_dir, savedModelName))
with g.as_default(): # Build the model. model = build_model(model_config, mode, inference_batch=BATCH_SIZE_INFERENCE) # run training init = tf.global_variables_initializer() with tf.Session() as sess: sess.run(init) model['saver'].restore(sess, directory + "savedSession/model0.ckpt") print("Model restured! Last step run: ", sess.run(model['global_step'])) for i in range(TOTAL_INFERENCE_STEP): captions_pred, urls = _step_test( sess, data, BATCH_SIZE_INFERENCE, model, 1.0) # the output is size (32, 16) captions_pred = [unpack.reshape(-1, 1) for unpack in captions_pred] captions_pred = np.concatenate(captions_pred, 1) captions_deco = decode_captions(captions_pred, data['idx_to_word']) for j in range(len(captions_deco)): img_name = directory + 'image_' + str(j) + '.jpg' img = image_from_url(urls[j]) write_text_on_image(img, img_name, captions_deco[j])
def train_model(model, config, data): #g = tf.Graph() #with g.as_default(): ################define optimizer######## num_batches = config.total_instances / config.batch_size decay_steps = int(num_batches * config.num_epochs_per_decay) learning_rate = tf.constant(config.initial_learning_rate) learning_rate_decay_fn = None def _decay_fn(learning_rate, global_step): return tf.train.exponential_decay(learning_rate, global_step, decay_steps=decay_steps, decay_rate=0.5, staircase=True) learning_rate_decay_fn = _decay_fn train_op = tf.contrib.layers.optimize_loss( loss=model.total_loss, global_step=model.global_step, learning_rate=learning_rate, optimizer='SGD', clip_gradients=config.clip_gradients, learning_rate_decay_fn=learning_rate_decay_fn) ################## saver = tf.train.Saver() init = tf.global_variables_initializer() # for BLAS Memmory DUMP failure config_ = tf.ConfigProto() config_.gpu_options.allow_growth = True with tf.Session(config=config_) as sess: sess.run(init) # if checkpoint exist, restore ckpt = tf.train.get_checkpoint_state( os.path.dirname('checkpoints/checkpoint')) if ckpt and ckpt.model_checkpoint_path: saver.restore(sess, ckpt.model_checkpoint_path) print("cucessfully restored the checkpoint") rand_int = np.random.randint(1, 100) caption_in, caption_out, mask, image_features, urls = minibatch( data, rand_int, config.batch_size, config.total_instances) if not os.path.exists('test_caption'): os.makedirs('test_caption') captions_pred = _run_validation( sess, caption_in, image_features, config.batch_size, model, config.input_len) # the output is size (32, 16) captions_pred = [unpack.reshape(-1, 1) for unpack in captions_pred] captions_pred = np.concatenate(captions_pred, 1) captions_deco = decode_captions(captions_pred, data['idx_to_word']) for j in range(len(captions_deco)): img_name = os.path.join('test_caption', 'image_{}.jpg'.format(j)) img = image_from_url(urls[j]) write_text_on_image(img, img_name, captions_deco[j]) print("saved predicted images into ./test_caption folder") # 100 epoch # total_runs = int((config.total_instances/config.batch_size)*config.num_epochs) # initial_step = model.global_step.eval() ### initialize summary writer # tf.summary.scalar("learing_rate", learning_rate) # a = tf.summary.merge_all() # writer = tf.summary.FileWriter('./graphs/singlelayer_lstm', sess.graph) # time_now = datetime.now() # for t in range(total_runs): # caption_in, caption_out, mask, image_features, urls = minibatch(data,t,config.batch_size, config.total_instances) # # feed data # feed_dict = {model.image_feature: image_features, model.caption_in: caption_in, # model.caption_out: caption_out, model.caption_mask: mask} # merge_op, _, total_loss, b = sess.run([model.summary_op, train_op, model.total_loss, a], # feed_dict = feed_dict) # writer.add_summary(merge_op, global_step=t) # writer.add_summary(b, global_step=t) # # print loss infor # if(t+1) % 20 == 0: # print('(Iteration %d / %d) loss: %f, and time eclipsed: %.2f minutes' % ( # t + 1, total_runs, float(total_loss), (datetime.now() - time_now).seconds/60.0)) # #print image # if(t+1)%100 == 0: # if not os.path.exists('test_caption'): # os.makedirs('test_caption') # captions_pred = _run_validation(sess, caption_in, image_features, 1, model, config.input_len) # the output is size (32, 16) # captions_pred = [unpack.reshape(-1, 1) for unpack in captions_pred] # captions_pred = np.concatenate(captions_pred, 1) # captions_deco = decode_captions(captions_pred, data['idx_to_word']) # for j in range(len(captions_deco)): # img_name = os.path.join('test_caption', 'image_{}.jpg'.format(j)) # img = image_from_url(urls[j]) # write_text_on_image(img, img_name, captions_deco[j]) # #save model # if(t+1)%50 == 0 or t == (total_runs-1): # if not os.path.exists('checkpoints/singlelayer_lstm'): # os.makedirs('checkpoints/singlelayer_lstm') # saver.save(sess, 'checkpoints/singlelayer_lstm', t) # visualize embed matrix #code to visualize the embeddings. uncomment the below to visualize embeddings final_embed_matrix = sess.run(model.embed_map) # it has to variable. constants don't work here. you can't reuse model.embed_matrix embedding_var = tf.Variable(final_embed_matrix[:1000], name='embedding') sess.run(embedding_var.initializer) config = projector.ProjectorConfig() summary_writer = tf.summary.FileWriter('processed') # add embedding to the config file embedding = config.embeddings.add() embedding.tensor_name = embedding_var.name # link this tensor to its metadata file, in this case the first 500 words of vocab # metadata_path = './processed/matadata.tsv' # if not os.path.exists(metadata_path): # f = open(metadata_path, "w") # f.close() embedding.metadata_path = os.path.join('processed', 'metadata.tsv') # saves a configuration file that TensorBoard will read during startup. projector.visualize_embeddings(summary_writer, config) saver_embed = tf.train.Saver([embedding_var]) saver_embed.save(sess, 'processed/model3.ckpt', 1)