def main(unused): parser = argparse.ArgumentParser() parser.add_argument("embedding_path") parser.add_argument("vocabulary_dir") parser.add_argument("--training_dir") parser.add_argument("--validation_dir") parser.add_argument("--decode_dir") parser.add_argument("--decode_out_dir") parser.add_argument("--mode", choices=["train", "validate", "decode"], default="train") parser.add_argument("--logdir") parser.add_argument("--batch_size", type=int, default=30) parser.add_argument("--validation_interval", type=int, default=20000) parser.add_argument("--beam_width", type=int, default=5) parser.add_argument("--max_output_length", type=int, default=32) parser.add_argument("--target_vocabulary_size", type=int, default=20000) parser.add_argument("--synthetic", action="store_true") parser.add_argument("--allow_gpu_growth", action="store_true") parser.add_argument("--collect_run_metadata", action="store_true") parser.add_argument("--log_weight_images", action="store_true") options = parser.parse_args() if options.mode == "decode": # Batching not supported in decoding options.batch_size = 1 embedding_words, word_dict, word_embedding_dim = load_word_embeddings( options.embedding_path) vocabulary = Vocabulary() summary_vocabulary_path = path.join(options.vocabulary_dir, "summary_vocabulary.txt") vocabulary.add_from_file( summary_vocabulary_path, options.target_vocabulary_size - len(vocabulary.words)) document_vocabulary_path = path.join(options.vocabulary_dir, "document_vocabulary.txt") # Add the most common words from vocabulary vocabulary.add_from_file(document_vocabulary_path, 150000) # Add additional common words from loaded embeddings # note that embedding_words contains 2D numpy arrays vocabulary.add_words(embedding_words[:100000]) run(options, word_dict, word_embedding_dim, vocabulary)
def main(unused): parser = argparse.ArgumentParser() parser.add_argument('embedding_path') parser.add_argument('vocabulary_dir') parser.add_argument('--training_dir') parser.add_argument('--validation_dir') parser.add_argument('--decode_dir') parser.add_argument('--decode_out_dir') parser.add_argument('--mode', choices=['train', 'validate', 'decode'], default='train') parser.add_argument('--logdir') parser.add_argument('--batch_size', type=int, default=30) parser.add_argument('--validation_interval', type=int, default=20000) parser.add_argument('--beam_width', type=int, default=5) parser.add_argument('--max_output_length', type=int, default=32) parser.add_argument('--target_vocabulary_size', type=int, default=20000) parser.add_argument('--synthetic', action='store_true') parser.add_argument('--allow_gpu_growth', action='store_true') parser.add_argument('--collect_run_metadata', action='store_true') parser.add_argument('--log_weight_images', action='store_true') options = parser.parse_args() if options.mode == 'decode': # Batching not supported in decoding options.batch_size = 1 embedding_words, word_dict, word_embedding_dim = load_word_embeddings( options.embedding_path) vocabulary = Vocabulary() summary_vocabulary_path = path.join(options.vocabulary_dir, 'summary_vocabulary.txt') vocabulary.add_from_file( summary_vocabulary_path, options.target_vocabulary_size - len(vocabulary.words)) document_vocabulary_path = path.join(options.vocabulary_dir, 'document_vocabulary.txt') # Add the most common words from vocabulary vocabulary.add_from_file(document_vocabulary_path, 150000) # Add additional common words from loaded embeddings vocabulary.add_words(embedding_words[:100000]) run(options, word_dict, word_embedding_dim, vocabulary)