def testBuildDataset(self): # See all the available problems self.assertTrue(len(problems.available()) > 10) # Retrieve a problem by name print(problems.available()) exit(0) problem = problems.problem("translate_ende_wmt8k") # Access train and dev datasets through Problem train_dataset = problem.dataset(MODES.TRAIN) dev_dataset = problem.dataset(MODES.EVAL) # Access vocab size and other info (e.g. the data encoders used to # encode/decode data for the feature, used below) through feature_info. feature_info = problem.feature_info self.assertTrue(feature_info["inputs"].vocab_size > 0) self.assertTrue(feature_info["targets"].vocab_size > 0) train_example = train_dataset.make_one_shot_iterator().get_next() dev_example = dev_dataset.make_one_shot_iterator().get_next() with tf.Session() as sess: train_ex_val, _ = sess.run([train_example, dev_example]) _ = feature_info["inputs"].encoder.decode(train_ex_val["inputs"]) _ = feature_info["targets"].encoder.decode(train_ex_val["targets"])
def testBuildDataset(self): # See all the available problems self.assertTrue(len(problems.available()) > 10) # Retrieve a problem by name problem = problems.problem("translate_ende_wmt8k") # Access train and dev datasets through Problem train_dataset = problem.dataset(MODES.TRAIN) dev_dataset = problem.dataset(MODES.EVAL) # Access vocab size and other info (e.g. the data encoders used to # encode/decode data for the feature, used below) through feature_info. feature_info = problem.feature_info self.assertTrue(feature_info["inputs"].vocab_size > 0) self.assertTrue(feature_info["targets"].vocab_size > 0) train_example = train_dataset.make_one_shot_iterator().get_next() dev_example = dev_dataset.make_one_shot_iterator().get_next() with tf.Session() as sess: train_ex_val, _ = sess.run([train_example, dev_example]) _ = feature_info["inputs"].encoder.decode(train_ex_val["inputs"]) _ = feature_info["targets"].encoder.decode(train_ex_val["targets"])
tf.gfile.MakeDirs(data_dir) tf.gfile.MakeDirs(tmp_dir) tf.gfile.MakeDirs(train_dir) tf.gfile.MakeDirs(checkpoint_dir) gs_data_dir = "./tensor2tensor-data" gs_ckpt_dir = "./tensor2tensor-checkpoints/" tf.gfile.MakeDirs(gs_data_dir) tf.gfile.MakeDirs(gs_ckpt_dir) """# Download MNIST and inspect it""" # A Problem is a dataset together with some fixed pre-processing. # It could be a translation dataset with a specific tokenization, # or an image dataset with a specific resolution. # # There are many problems available in Tensor2Tensor problems.available() # Fetch the MNIST problem mnist_problem = problems.problem("image_mnist") # The generate_data method of a problem will download data and process it into # a standard format ready for training and evaluation. mnist_problem.generate_data(data_dir, tmp_dir) # Now let's see the training MNIST data as Tensors. mnist_example = tfe.Iterator(mnist_problem.dataset(Modes.TRAIN, data_dir)).next() image = mnist_example["inputs"] label = mnist_example["targets"] plt.imshow(image.numpy()[:, :, 0].astype(np.float32), cmap=plt.get_cmap('gray'))
# In[ ]: PROBLEM = "translate_enfr_wmt32k" # We chose a problem translation English to French with 32.768 vocabulary MODEL = "transformer" # Our model HPARAMS = "transformer_big" # Hyperparameters for the model by default # If you have a one gpu, use transformer_big_single_gpu # In[ ]: #Show all problems and models from tensor2tensor.utils import registry from tensor2tensor import problems problems.available() #Show all problems registry.list_models() #Show all registered models #or ## #Command line # get_ipython().system('t2t-trainer --registry_help #Show all problems') # get_ipython().system('t2t-trainer --problems_help #Show all models') # # 2. Data generation # # Generate the data (download the dataset and generate the data). # # --- # # You can choose between command line or code.
TRAIN_DIR = os.path.expanduser("/data/t2t/train") # This folder contain the model EXPORT_DIR = os.path.expanduser("/data/t2t/export") # This folder contain the exported model for production TRANSLATIONS_DIR = os.path.expanduser("/data/t2t/translation") # This folder contain all translated sequence EVENT_DIR = os.path.expanduser("/data/t2t/event") # Test the BLEU score USR_DIR = os.path.expanduser("/data/t2t/user") # This folder contains our data that we want to add tf.gfile.MakeDirs(DATA_DIR) tf.gfile.MakeDirs(TMP_DIR) tf.gfile.MakeDirs(TRAIN_DIR) tf.gfile.MakeDirs(EXPORT_DIR) tf.gfile.MakeDirs(TRANSLATIONS_DIR) tf.gfile.MakeDirs(EVENT_DIR) tf.gfile.MakeDirs(USR_DIR) PROBLEM = "translate_enzh_wmt8k" # We chose a problem translation English to French with 32.768 vocabulary MODEL = "transformer" # Our model HPARAMS = "transformer_big" # Hyperparameters for the model by default # If you have a one gpu, use transformer_big_single_gpu from tensor2tensor.utils import registry from tensor2tensor import problems print(problems.available()) #Show all problems t2t_problem = problems.problem(PROBLEM) t2t_problem.generate_data(DATA_DIR, TMP_DIR)
import matplotlib.pyplot as plt import numpy as np import os import collections from tensor2tensor import models from tensor2tensor import problems from tensor2tensor.layers import common_layers from tensor2tensor.utils import trainer_lib from tensor2tensor.utils import t2t_model from tensor2tensor.utils import registry from tensor2tensor.utils import metrics from invertible_UT_model import * print(problems.available()) # Enable TF Eager execution tfe = tf.contrib.eager tfe.enable_eager_execution() # Flags for Tensorflow 1.5 + FLAGS = tf.flags # Other setup Modes = tf.estimator.ModeKeys # Setup some directories data_dir = os.path.expanduser("~/t2t/data/translate_ende_wmt32k") tmp_dir = os.path.expanduser("~/t2t/tmp") train_dir = os.path.expanduser("~/t2t/train/UT_invertible_test")