def main(_): # Build the inference graph. top_k = 4 # Print the top_k accuracy. true_pred = np.zeros(top_k) # Load pre-computed image features. with open(FLAGS.feature_file, "rb") as f: test_data = pkl.load(f) test_ids = test_data.keys() test_feat = np.zeros( (len(test_ids), len(test_data[test_ids[0]]["image_feat"]))) test_rnn_feat = np.zeros( (len(test_ids), len(test_data[test_ids[0]]["image_rnn_feat"]))) for i, test_id in enumerate(test_ids): # Image feature in visual-semantic embedding space. test_feat[i] = test_data[test_id]["image_feat"] # Image feature in the RNN space. test_rnn_feat[i] = test_data[test_id]["image_rnn_feat"] g = tf.Graph() with g.as_default(): model_config = configuration.ModelConfig() model_config.rnn_type = FLAGS.rnn_type model = polyvore_model.PolyvoreModel(model_config, mode="inference") model.build() saver = tf.train.Saver() g.finalize() with tf.Session() as sess: saver.restore(sess, FLAGS.checkpoint_path) questions = json.load(open(FLAGS.json_file)) all_pred = [] set_ids = [] all_scores = [] for question in questions: score, pred = run_question_inference( sess, question, test_ids, test_feat, test_rnn_feat, model_config.num_lstm_units) if pred != []: all_pred.append(pred) all_scores.append(score) set_ids.append(question["question"][0].split("_")[0]) # 0 is the correct answer, iterate over top_k. for i in range(top_k): if 0 in pred[:i + 1]: true_pred[i] += 1 # Print all top-k accuracy. for i in range(top_k): print("Top %d Accuracy: " % (i + 1)) print("%d correct answers in %d valid questions." % (true_pred[i], len(all_pred))) print("Accuracy: %f" % (true_pred[i] / len(all_pred))) s = np.empty((len(all_scores), ), dtype=np.object) for i in range(len(all_scores)): s[i] = all_scores[i] with open(FLAGS.result_file, "wb") as f: pkl.dump({"set_ids": set_ids, "pred": all_pred, "score": s}, f)
def main(_): if os.path.isfile(FLAGS.feature_file): print("Feature file already exist.") return # Build the inference graph. g = tf.Graph() with g.as_default(): model_config = configuration.ModelConfig() model_config.rnn_type = FLAGS.rnn_type model = polyvore_model.PolyvoreModel(model_config, mode="inference") model.build() saver = tf.train.Saver() g.finalize() sess = tf.Session(graph=g) saver.restore(sess, FLAGS.checkpoint_path) test_json = json.load(open(FLAGS.json_file)) k = 0 # Save image ids and features in a dictionary. test_features = dict() for image_set in test_json: set_id = image_set["set_id"] image_feat = [] image_rnn_feat = [] ids = [] k = k + 1 print(str(k) + " : " + set_id) for image in image_set["items"]: filename = os.path.join(FLAGS.image_dir, set_id, str(image["index"]) + ".jpg") with tf.gfile.GFile(filename, "r") as f: image_feed = f.read() [feat, rnn_feat] = sess.run([model.image_embeddings, model.rnn_image_embeddings], feed_dict={"image_feed:0": image_feed}) image_name = set_id + "_" + str(image["index"]) test_features[image_name] = dict() test_features[image_name]["image_feat"] = np.squeeze(feat) test_features[image_name]["image_rnn_feat"] = np.squeeze(rnn_feat) with open(FLAGS.feature_file, "wb") as f: pkl.dump(test_features, f)
def main(unused_argv): assert FLAGS.input_file_pattern, "--input_file_pattern is required" assert FLAGS.train_dir, "--train_dir is required" model_config = configuration.ModelConfig() model_config.input_file_pattern = FLAGS.input_file_pattern model_config.inception_checkpoint_file = FLAGS.inception_checkpoint_file training_config = configuration.TrainingConfig() # Create training directory. train_dir = FLAGS.train_dir if not tf.gfile.IsDirectory(train_dir): tf.logging.info("Creating training directory: %s", train_dir) tf.gfile.MakeDirs(train_dir) # Build the TensorFlow graph. g = tf.Graph() with g.as_default(): # Build the model. model = polyvore_model.PolyvoreModel( model_config, mode="train", train_inception=FLAGS.train_inception) model.build() learning_rate = tf.constant(training_config.initial_learning_rate) learning_rate_decay_fn = None if training_config.learning_rate_decay_factor > 0: num_batches_per_epoch = (training_config.num_examples_per_epoch / model_config.batch_size) decay_steps = int(num_batches_per_epoch * training_config.num_epochs_per_decay) def _learning_rate_decay_fn(learning_rate, global_step): return tf.train.exponential_decay( learning_rate, global_step, decay_steps=decay_steps, decay_rate=training_config.learning_rate_decay_factor, staircase=True) learning_rate_decay_fn = _learning_rate_decay_fn # Set up the training ops. train_op = tf.contrib.layers.optimize_loss( loss=model.total_loss, global_step=model.global_step, learning_rate=learning_rate, optimizer=training_config.optimizer, clip_gradients=training_config.clip_gradients, learning_rate_decay_fn=learning_rate_decay_fn) # Set up the Saver for saving and restoring model checkpoints. saver = tf.train.Saver( max_to_keep=training_config.max_checkpoints_to_keep) # Run training. tf.contrib.slim.learning.train(train_op, train_dir, log_every_n_steps=FLAGS.log_every_n_steps, graph=g, global_step=model.global_step, number_of_steps=FLAGS.number_of_steps, init_fn=model.init_fn, saver=saver)
def main(_): # Build the inference graph. g = tf.Graph() with g.as_default(): model_config = configuration.ModelConfig() model_config.rnn_type = FLAGS.rnn_type model = polyvore_model.PolyvoreModel(model_config, mode="inference") model.build() saver = tf.train.Saver() # Load pre-computed image features. with open(FLAGS.feature_file, "rb") as f: test_data = pkl.load(f) test_ids = test_data.keys() test_feat = np.zeros( (len(test_ids) + 1, len(test_data[test_ids[0]]["image_rnn_feat"]))) # test_feat has one more zero vector as the representation of END of # RNN prediction. for i, test_id in enumerate(test_ids): # Image feature in the RNN space. test_feat[i] = test_data[test_id]["image_rnn_feat"] g.finalize() with tf.Session() as sess: saver.restore(sess, FLAGS.checkpoint_path) all_f_scores = [] all_b_scores = [] all_scores = [] all_labels = [] testset = open(FLAGS.label_file).read().splitlines() k = 0 for test_outfit in testset: k += 1 if k % 100 == 0: print("Finish %d outfits." % k) image_seqs = [] for test_image in test_outfit.split()[1:]: image_seqs.append(test_ids.index(test_image)) [f_score, b_score ] = run_compatibility_inference(sess, image_seqs, test_feat, model_config.num_lstm_units, model) all_f_scores.append(f_score) all_b_scores.append(b_score) all_scores.append(f_score + b_score) all_labels.append(int(test_outfit[0])) # calculate AUC and AP fpr, tpr, thresholds = metrics.roc_curve(all_labels, all_scores, pos_label=1) print("Compatibility AUC: %f for %d outfits" % (metrics.auc(fpr, tpr), len(all_labels))) with open(FLAGS.result_file, "wb") as f: pkl.dump( { "all_labels": all_labels, "all_f_scores": all_f_scores, "all_b_scores": all_b_scores }, f)
def main(_): # Build the inference graph. g = tf.Graph() with g.as_default(): model_config = configuration.ModelConfig() model = polyvore_model.PolyvoreModel(model_config, mode="inference") model.build() saver = tf.train.Saver() g.finalize() with tf.Session() as sess: saver.restore(sess, FLAGS.checkpoint_path) with open(FLAGS.feature_file, "rb") as f: test_data = pkl.load(f) test_ids = test_data.keys() test_feat = np.zeros((len(test_ids) + 1, len(test_data[test_ids[0]]["image_rnn_feat"]))) test_emb = np.zeros((len(test_ids), len(test_data[test_ids[0]]["image_feat"]))) for i, test_id in enumerate(test_ids): # Image feature in the RNN space. test_feat[i] = test_data[test_id]["image_rnn_feat"] # Image feature in the joint embedding space. test_emb[i] = test_data[test_id]["image_feat"] test_emb = norm_row(test_emb) # load queries from JSON file queries = json.load(open(FLAGS.query_file)) # Get the word embedding. [word_emb] = sess.run([model.embedding_map]) # Read word name words = open(FLAGS.word_dict_file).read().splitlines() for i, w in enumerate(words): words[i] = w.split()[0] # Calculate the embedding of the word query # only run the first query for demo for q in queries[:1]: set_name = q['image_query'] print(set_name) # Run Bi-LSTM model using the image query. rnn_sets = run_set_inference(sess, set_name, test_ids, test_feat, model_config.num_lstm_units) print(rnn_sets) # Reranking the LSTM prediction with similarity with the text query word_query = str(q['text_query']) print(word_query) if word_query != "": # Get the indices of images. test_idx = [] for name in set_name: try: test_idx.append(test_ids.index(name)) except: print('not found') return # Calculate the word embedding word_query = [i+1 for i in range(len(words)) if words[i] in word_query.split()] print(word_query) query_emb = norm_row(np.sum(word_emb[word_query], axis=0)) for i, j in enumerate(rnn_sets): if j not in test_idx: rnn_sets[i] = nn_search(j, test_emb, query_emb) print(rnn_sets) # write images image_set = [] for i in rnn_sets: image_set.append(test_ids[i]) # write results # os.system('mkdir %s/%s' % (FLAGS.result_dir, 'emb_final')) # for i, image in enumerate(image_set): # name = image.split('_') # os.system('cp %s/%s/%s.jpg %s/%s/%d_%s.jpg' % (FLAGS.image_dir, # name[0], name[1], FLAGS.result_dir, 'emb_final', i, image)) for i, image in enumerate(image_set): name = image.split('_') os.system('cp %s/%s/%s.jpg %s/%d_%s.jpg' % (FLAGS.image_dir, name[0], name[1], FLAGS.result_dir, i, image))