def dump(config): """Dumps the calculated distance heuristic to a file. Args: config: an instance of HeuristicConfiguration """ ids_name = os.path.join(config.distances_dir, "neighbor.%s.%s.%s.ids.txt" % ( str(config.embedding) + "d", str(config.length) + "w", str(config.radius) + "k")) distances_name = os.path.join(config.distances_dir, "neighbor.%s.%s.%s.distances.txt" % ( str(config.embedding) + "d", str(config.length) + "w", str(config.radius) + "k")) vocab, embeddings = glove.load(config) print("Loading %s and %s." % (ids_name, distances_name)) neighbor_ids = np.loadtxt(ids_name, dtype=np.int32) neighbor_distances = np.loadtxt(distances_name, dtype=np.float32) heuristic_name = os.path.join(config.heuristic_dir, "heuristic.%s.%s.%s.txt" % ( str(config.embedding) + "d", str(config.length) + "w", str(config.radius) + "k")) heuristic = np.sum(neighbor_distances, axis=1) words_name = os.path.join(config.heuristic_dir, "heuristic.%s.%s.%s.sorted.names.txt" % ( str(config.embedding) + "d", str(config.length) + "w", str(config.radius) + "k")) words = sorted(list(range(config.length)), key=(lambda idx: heuristic[idx])) words = [vocab.id_to_word(idx) + "\n" for idx in words] np.savetxt(heuristic_name, heuristic) with open(words_name, "w") as f: f.writelines(words) print("Saved %s and %s." % (heuristic_name, words_name))
def load_glove(vocab_size=100000, embedding_size=300): # The config params for loading the vocab and embedding # See: https://github.com/brandontrabucco/glove/tree/8f11a9b3ab927a15a947683ca7a1fcbc5d9c8ba1 config = glove.configuration.Configuration( embedding=embedding_size, filedir="/home/ubuntu/research/data/glove/embeddings/", length=vocab_size, start_word="<S>", end_word="</S>", unk_word="<UNK>") return glove.load(config)
def run_caption(checkpoint_path, filenames, heuristic_amount): g = tf.Graph() with g.as_default(): # Build the model for evaluation. model_config = configuration.ModelConfig() model = show_and_tell_model.ShowAndTellModel(model_config, mode="inference") model.build() # Create the Saver to restore model Variables. saver = tf.train.Saver() g.finalize() def _restore_fn(sess): tf.logging.info("Loading model from checkpoint: %s", checkpoint_path) saver.restore(sess, checkpoint_path) tf.logging.info("Successfully loaded checkpoint: %s", os.path.basename(checkpoint_path)) restore_fn = _restore_fn # Create the vocabulary. vocab = glove.load(model_config.config)[0] run_results = [] with tf.Session(graph=g) as sess: # Load the model from checkpoint. restore_fn(sess) # Prepare the list of image bytes for evaluation. images = [] for filename in filenames: with tf.gfile.GFile(filename, "rb") as f: images.append(f.read()) captions = [ sess.run(model.final_seqs, feed_dict={ "image_feed:0": img, model.heuristic_temperature: heuristic_amount }) for img in images ] for i, caption in enumerate(captions): run_results.append({"filename": filenames[i], "captions": []}) for j in range(caption.shape[1]): # Ignore begin and end words. c = caption[0, j, :].tolist() sentence = [vocab.id_to_word(w) for w in c[:-1]] sentence = " ".join(sentence) run_results[i]["captions"].append(sentence) return run_results
def main(unused_argv): def _is_valid_num_shards(num_shards): """Returns True if num_shards is compatible with FLAGS.num_threads.""" return num_shards < FLAGS.num_threads or not num_shards % FLAGS.num_threads assert _is_valid_num_shards(FLAGS.train_shards), ( "Please make the FLAGS.num_threads commensurate with FLAGS.train_shards" ) assert _is_valid_num_shards(FLAGS.val_shards), ( "Please make the FLAGS.num_threads commensurate with FLAGS.val_shards") assert _is_valid_num_shards(FLAGS.test_shards), ( "Please make the FLAGS.num_threads commensurate with FLAGS.test_shards" ) if not tf.gfile.IsDirectory(FLAGS.output_dir): tf.gfile.MakeDirs(FLAGS.output_dir) # Create vocabulary from the glove embeddings. config = glove.configuration.Configuration(embedding=300, filedir=FLAGS.embeddings_dir, length=70000, start_word="<S>", end_word="</S>", unk_word="<UNK>") vocab = glove.load(config)[0] # Load image metadata from caption files. train_dataset, val_dataset, test_dataset = _load_and_process_metadata( FLAGS.all_dir, vocab) # Add negative examples. vocab_size = len(vocab.reverse_vocab) train_dataset = _add_negative_examples(train_dataset, vocab_size, FLAGS.partition_size, FLAGS.num_repeats) val_dataset = _add_negative_examples(val_dataset, vocab_size, FLAGS.partition_size, FLAGS.num_repeats) test_dataset = _add_negative_examples(test_dataset, vocab_size, FLAGS.partition_size, FLAGS.num_repeats) _process_dataset("train", train_dataset, vocab, FLAGS.train_shards) _process_dataset("val", val_dataset, vocab, FLAGS.val_shards) _process_dataset("test", test_dataset, vocab, FLAGS.test_shards)
def main(unused_argv): def _is_valid_num_shards(num_shards): """Returns True if num_shards is compatible with FLAGS.num_threads.""" return num_shards < FLAGS.num_threads or not num_shards % FLAGS.num_threads assert _is_valid_num_shards(FLAGS.train_shards), ( "Please make the FLAGS.num_threads commensurate with FLAGS.train_shards") assert _is_valid_num_shards(FLAGS.val_shards), ( "Please make the FLAGS.num_threads commensurate with FLAGS.val_shards") assert _is_valid_num_shards(FLAGS.test_shards), ( "Please make the FLAGS.num_threads commensurate with FLAGS.test_shards") if not tf.gfile.IsDirectory(FLAGS.output_dir): tf.gfile.MakeDirs(FLAGS.output_dir) # Load image metadata from caption files. mscoco_train_dataset = _load_and_process_metadata(FLAGS.train_captions_file, FLAGS.train_image_dir) mscoco_val_dataset = _load_and_process_metadata(FLAGS.val_captions_file, FLAGS.val_image_dir) # Redistribute the MSCOCO data as follows: # train_dataset = 100% of mscoco_train_dataset + 85% of mscoco_val_dataset. # val_dataset = 5% of mscoco_val_dataset (for validation during training). # test_dataset = 10% of mscoco_val_dataset (for final evaluation). train_cutoff = int(0.85 * len(mscoco_val_dataset)) val_cutoff = int(0.90 * len(mscoco_val_dataset)) train_dataset = mscoco_train_dataset + mscoco_val_dataset[0:train_cutoff] val_dataset = mscoco_val_dataset[train_cutoff:val_cutoff] test_dataset = mscoco_val_dataset[val_cutoff:] # Create vocabulary from the glove embeddings. config = glove.configuration.Configuration( embedding=300, filedir=FLAGS.embeddings_dir, length=70000, start_word="<S>", end_word="</S>", unk_word="<UNK>") vocab = glove.load(config)[0] _process_dataset("train", train_dataset, vocab, FLAGS.train_shards) _process_dataset("val", val_dataset, vocab, FLAGS.val_shards) _process_dataset("test", test_dataset, vocab, FLAGS.test_shards)
def dump(config): """Loads word embeddngs an calculates neighbors. Args: config: an instance of NeighborConfiguration """ vocab, embeddings = glove.load(config) num_threads = min(config.length, config.distances_threads) spacing = np.linspace(0, config.length, num_threads + 1).astype(np.int) ranges = [] threads = [] for i in range(len(spacing) - 1): ranges.append([spacing[i], spacing[i + 1]]) neighbor_ids = np.zeros([config.length, config.radius]).astype(np.int) neighbor_distances = np.zeros([config.length, config.radius]) print("Launching %d threads for calculating neighbors." % (num_threads + 1)) for thread_index in range(len(ranges)): args = (thread_index, ranges, embeddings, neighbor_ids, neighbor_distances) t = threading.Thread(target=_compute_neighbors, args=args) t.start() threads.append(t) for t in threads: t.join() ids_name = os.path.join( config.distances_dir, "neighbor.%s.%s.%s.ids.txt" % (str(config.embedding) + "d", str(config.length) + "w", str(config.radius) + "k")) distances_name = os.path.join( config.distances_dir, "neighbor.%s.%s.%s.distances.txt" % (str(config.embedding) + "d", str(config.length) + "w", str(config.radius) + "k")) np.savetxt(ids_name, neighbor_ids) np.savetxt(distances_name, neighbor_distances) print("Finished saving %s and %s." % (ids_name, distances_name))
def evaluate_model(sess, model, global_step, summary_writer, summary_op): """Computes precision and recall over the evaluation dataset. Summaries are written out to the eval directory. Args: sess: Session object. model: Instance of AttrigramModel; the model to evaluate. global_step: Integer; global step of the model checkpoint. summary_writer: Instance of FileWriter. summary_op: Op for generating model summaries. """ # Log model summaries on a single batch. summary_str = sess.run(summary_op) summary_writer.add_summary(summary_str, global_step) start_time = time.time() vocab = glove.load(model.config.config)[0] # Compute perplexity over the entire dataset. num_eval_batches = int( math.ceil(FLAGS.num_eval_examples / model.config.batch_size)) tp = 0 fp = 0 tn = 0 fn = 0 thd = 0.5 for i in range(num_eval_batches): (global_step, deepfashion_words, deepfashion_attributes, attribute_probabilities) = sess.run([ model.global_step, model.deepfashion_words, model.deepfashion_attributes, model.attribute_probabilities ]) for a, b in zip(deepfashion_attributes.flatten(), attribute_probabilities.flatten()): if a >= thd and b >= thd: tp += 1 elif a < thd and b >= thd: fp += 1 elif a < thd and b < thd: tn += 1 else: fn += 1 precision = tp / (tp + fp + 1e-3) recall = tp / (tp + fn + 1e-3) if not i % 100: tf.logging.info("Computed losses for %d of %d batches.", i + 1, num_eval_batches) eval_time = time.time() - start_time tf.logging.info("Precision = %d / %d = %f (%.2g sec)", tp, tp + fp, precision, eval_time) tf.logging.info("Recall = %d / %d = %f (%.2g sec)", tp, tp + fn, recall, eval_time) # Log perplexity to the FileWriter. summary = tf.Summary() value = summary.value.add() value.simple_value = precision value.tag = "Precision" value = summary.value.add() value.simple_value = recall value.tag = "Recall" summary_writer.add_summary(summary, global_step) # Write the Events file to the eval directory. summary_writer.flush() tf.logging.info("Finished processing evaluation at global step %d.", global_step)
def evaluate_model(sess, model, global_step, summary_writer, summary_op): """Computes perplexity-per-word over the evaluation dataset. Summaries and perplexity-per-word are written out to the eval directory. Args: sess: Session object. model: Instance of ShowAndTellModel; the model to evaluate. global_step: Integer; global step of the model checkpoint. summary_writer: Instance of FileWriter. summary_op: Op for generating model summaries. """ # Log model summaries on a single batch. summary_str = sess.run(summary_op) summary_writer.add_summary(summary_str, global_step) time_now = int(time.time()) vocab = glove.load(model.config.config)[0] # Compute perplexity over the entire dataset. num_eval_batches = min(FLAGS.max_eval_batches, int( math.ceil(FLAGS.num_eval_examples / model.config.batch_size))) start_time = time.time() sum_losses = 0. sum_weights = 0. unique_image_ids = set() json_dump = [] comparison_dump = [] for i in range(num_eval_batches): tf.logging.info("Starting beam_search on batch %d", i) (global_step, images, image_ids, target_seqs, final_seqs, _) = sess.run([ model.global_step, model.images, model.image_ids, model.target_seqs, model.final_seqs, model.assign_initial_states ]) for x in range(FLAGS.style_iterations): sess.run(model.descend_style) style_seqs = sess.run(model.style_seqs) tf.logging.info("Finishing beam_search on batch %d", i) # For each element of the batch write to file for b in range(model.config.batch_size): # Save each element of the batch single_global_step = global_step single_image = (images[b, :, :, :] - images[b, :, :, :].min())/2.0 single_image_id = image_ids[b] single_target_seq = target_seqs[b, :] single_final_seq = final_seqs[b, 0, :] single_style_seq = style_seqs[b, 0, :] comparison_dump.append({"ground_truth": ids_to_sentence(single_target_seq, vocab), "original": ids_to_sentence(single_final_seq, vocab), "styled": ids_to_sentence(single_style_seq, vocab)}) if single_image_id not in unique_image_ids: # Caption to dump and update image ids json_dump.append({"image_id": int(np.sum(single_image_id)), "caption": ids_to_sentence(single_style_seq, vocab)}) tf.logging.info("Ground Truth %d of %d: %s", i * model.config.batch_size + b, model.config.batch_size * num_eval_batches, ids_to_sentence(single_target_seq, vocab)) tf.logging.info("Original %d of %d: %s", i * model.config.batch_size + b, model.config.batch_size * num_eval_batches, ids_to_sentence(single_final_seq, vocab)) tf.logging.info("Styled %d of %d: %s", i * model.config.batch_size + b, model.config.batch_size * num_eval_batches, ids_to_sentence(single_style_seq, vocab)) unique_image_ids.add(single_image_id) # Output a comparison file between generated and ground truth. with open( os.path.join(FLAGS.eval_dir, "style.comparison." + str(time_now) + ".json"), "w") as f: json.dump(comparison_dump, f) # Evaluate the performance metrics = coco_get_metrics(time_now, global_step, json_dump) eval_time = time.time() - start_time # Log perplexity to the FileWriter. summary = tf.Summary() for name, val in metrics.items(): value = summary.value.add() value.simple_value = val value.tag = name summary_writer.add_summary(summary, global_step) # Write the Events file to the eval directory. summary_writer.flush() tf.logging.info("Finished processing evaluation at global step %d.", global_step)
if __name__ == "__main__": parser = argparse.ArgumentParser("synonyms") parser.add_argument("--threshold", type=float, default=5.632423353947836) args = parser.parse_args() threshold = args.threshold config = glove.configuration.Configuration( embedding=300, filedir="./embeddings/", length=70000, start_word="<S>", end_word="</S>", unk_word="<UNK>") vocab, embeddings = glove.load(config) with open("captions.json", "r") as f: captions = json.load(f) dataset_ids = [] model_ids = [] vocab_ids = [x for x in range(70000)] for e in captions: dataset_ids.extend([vocab.word_to_id(w) for w in e["ground_truth"].strip().lower().split()]) model_ids.extend([vocab.word_to_id(w) for w in e["caption"].strip().lower().split()]) dataset_ids = set(dataset_ids) model_ids = set(model_ids)
# for i in range(src_vocabsize): # word = list(SRC.vocab.stoi.keys())[i] # if word in src_word2vec.wv.index2word: # src_embed_mtrx[SRC.vocab.stoi[word]] = torch.tensor(src_word2vec.wv[word].copy()).to(device) # # for i in range(trg_vocabsize): # word = list(TRG.vocab.stoi.keys())[i] # if word in trg_word2vec.wv.index2word: # trg_embed_mtrx[TRG.vocab.stoi[word]] = torch.tensor(trg_word2vec.wv[word].copy()).to(device) ''' for glove ''' glove = Glove() src_glove = glove.load('src_glove.model') trg_glove = glove.load('trg_glove.model') for word in list(SRC.vocab.stoi.keys()): if word in src_glove.dictionary: src_embed_mtrx[SRC.vocab.stoi[word]] = torch.tensor(src_glove.word_vectors[src_glove.dictionary[word]].copy()).to(device) for word in list(TRG.vocab.stoi.keys()): if word in trg_glove.dictionary: trg_embed_mtrx[SRC.vocab.stoi[word]] = torch.tensor(trg_glove.word_vectors[trg_glove.dictionary[word]].copy()).to(device) print("pretrained word embeddings loaded") sys.stdout.flush() ''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''' positional encoding
def run_attributes(checkpoint_path, filenames, num_divisions): g = tf.Graph() with g.as_default(): # Build the model for evaluation. model_config = configuration.ModelConfig() model = attrigram_model.AttrigramModel(model_config, mode="inference") model.build() # Create the Saver to restore model Variables. saver = tf.train.Saver() g.finalize() def _restore_fn(sess): tf.logging.info("Loading model from checkpoint: %s", checkpoint_path) saver.restore(sess, checkpoint_path) tf.logging.info("Successfully loaded checkpoint: %s", os.path.basename(checkpoint_path)) # Create the vocabulary. vocab = glove.load(model_config.config)[0] assert len(vocab.reverse_vocab ) % num_divisions == 0, "Vocabulary must be evenly divisible." partition_size = len(vocab.reverse_vocab) // num_divisions restore_fn = _restore_fn with tf.Session(graph=g) as sess: # Load the model from checkpoint. restore_fn(sess) # Prepare the list of image bytes for evaluation. topk = [] for filename in filenames: with tf.gfile.GFile(filename, "rb") as f: image_bytes = f.read() probabilities = [] for i in range(num_divisions): probabilities.extend( sess.run( model.attribute_probabilities, feed_dict={ "image_feed:0": image_bytes, "word_feed:0": [ i for i in range(i * partition_size, (i + 1) * partition_size) ] }).tolist()) topk.append([np.argsort(probabilities)[-10:][::-1], probabilities]) run_results = [] for i, k in enumerate(topk): k, p = k run_results.append({ "filename": filenames[i], "words": "", "topk": k, "probabilities": p }) sentence = [vocab.id_to_word(w) for w in k] sentence = ", ".join(sentence) run_results[i]["words"] = sentence return run_results
def run_caption(checkpoint_path, attrigram_checkpoint_path, filenames, divisions): g = tf.Graph() with g.as_default(): # Build the model for evaluation. model_config = configuration.ModelConfig() model = show_and_tell_model.ShowAndTellModel(model_config, mode="inference") model.use_style = True model.build() # Create the Saver to restore model Variables. saver = tf.train.Saver(var_list=model.model_variables) g.finalize() def _restore_fn(sess): tf.logging.info("Loading model from checkpoint: %s", checkpoint_path) saver.restore(sess, checkpoint_path) tf.logging.info("Successfully loaded checkpoint: %s", os.path.basename(checkpoint_path)) restore_fn = _restore_fn # Create the vocabulary. vocab = glove.load(model_config.config)[0] run_results = [] with tf.Session(graph=g) as sess: # Load the model from checkpoint. restore_fn(sess) # Run the attribute probabilities ps = run_attributes.run_attributes(attrigram_checkpoint_path, filenames, divisions) # Prepare the list of image bytes for evaluation. images = [] for filename in filenames: with tf.gfile.GFile(filename, "rb") as f: images.append(f.read()) captions = [] for img, p in zip(images, ps): input_feed = { "image_feed:0": img, model.attribute_probabilities: [p["probabilities"]] } sess.run(model.assign_initial_states, feed_dict=input_feed) # The original caption img_progress = [sess.run(model.style_seqs, feed_dict=input_feed)] # Perform style transfer for x in range(10): sess.run(model.descend_style, feed_dict=input_feed) caption = sess.run(model.style_seqs, feed_dict=input_feed) img_progress.append(caption) captions.append(img_progress) for i, caption in enumerate(captions): run_results.append({"filename": filenames[i], "captions": []}) for flow in caption: for j in range(flow.shape[0]): # Ignore begin and end words. c = flow[j, 0, :].tolist() sentence = [vocab.id_to_word(w) for w in c[:-1]] sentence = " ".join(sentence) run_results[i]["captions"].append(sentence) return run_results