def func(save_dir=r'../outputs', vocab=r'../outputs/vocabulary.txt', train=r'../outputs/train-data.npz', valid=r'../outputs/valid-data.npz', bidirectional=1, batch_size=32, num_epochs=1, learning_rate=0.001, dropout_keep=1.0, interval=1000, lstm_units=500, embedding_size=100, in_embeddings=None, in_train_embeddings=False): logging.basicConfig(level=logging.INFO) sess = tf.Session() wd = utils.WordDictionary(vocab) embeddings = load_or_create_embeddings(in_embeddings, wd.vocabulary_size, embedding_size) logging.info('Reading training data') train_data = utils.load_binary_data(train) logging.info('Reading validation data') valid_data = utils.load_binary_data(valid) logging.info('Creating model') train_embeddings = in_train_embeddings if in_embeddings else True model = seq.seq2seqModel(lstm_units, embeddings, wd.eos_index, train_embeddings=train_embeddings, bidirectional=bidirectional, condition=True) sess.run(tf.global_variables_initializer()) show_parameter_count(model.get_trainable_variables()) logging.info('Initialized the model and all variables. Starting training.') model.train(sess, save_dir, train_data, valid_data, batch_size, num_epochs, learning_rate, dropout_keep, 5.0, report_interval=interval) pass
default=2) parser.add_argument('--embeddings', help='Numpy embeddings file. If not supplied, ' 'random embeddings are generated.') parser.add_argument('data', help='data directory name') args = parser.parse_args() logging.basicConfig(level=logging.INFO) path = args.data + '/vocabulary.txt' wd = utils.WordDictionary(path) embeddings = load_or_create_embeddings(args.embeddings, wd.vocabulary_size, args.embedding_size) logging.info('Reading training data') path = args.data + '/train-data.npz' train_data = utils.load_binary_data(path) logging.info('Reading validation data') path = args.data + '/valid-data.npz' valid_data = utils.load_binary_data(path) logging.info('Creating model') model = autoencoder.TextAutoencoder(args.lstm_units, embeddings, wd.eos_index, args.num_gpus) config = tf.ConfigProto(allow_soft_placement=True) config.gpu_options.allow_growth = True config.gpu_options.allocator_type = 'BFC' sess = tf.InteractiveSession(graph=model.g, config=config) sess.run(tf.global_variables_initializer()) model.g.finalize() show_parameter_count(model.get_trainable_variables()) logging.info('Initialized the model and all variables. Starting training.')
help='Numpy embeddings file. If not supplied, ' 'random embeddings are generated.') parser.add_argument('vocab', help='Vocabulary file') parser.add_argument('train', help='Training set') parser.add_argument('valid', help='Validation set') args = parser.parse_args() logging.basicConfig(level=logging.INFO) sess = tf.Session() wd = utils.WordDictionary(args.vocab) embeddings = load_or_create_embeddings(args.embeddings, wd.vocabulary_size, args.embedding_size) logging.info('Reading training data') train_data = utils.load_binary_data(args.train) logging.info('Reading validation data') valid_data = utils.load_binary_data(args.valid) logging.info('Creating model') train_embeddings = args.train_embeddings if args.embeddings else True model = autoencoder.TextAutoencoder(args.lstm_units, embeddings, wd.eos_index, train_embeddings=train_embeddings, bidirectional=args.bidirectional) sess.run(tf.global_variables_initializer()) show_parameter_count(model.get_trainable_variables()) logging.info('Initialized the model and all variables. Starting training.') model.train(sess,