def testBuildDataset(self):
        # See all the available problems
        self.assertTrue(len(problems.available()) > 10)

        # Retrieve a problem by name
        print(problems.available())
        exit(0)
        problem = problems.problem("translate_ende_wmt8k")

        # Access train and dev datasets through Problem
        train_dataset = problem.dataset(MODES.TRAIN)
        dev_dataset = problem.dataset(MODES.EVAL)

        # Access vocab size and other info (e.g. the data encoders used to
        # encode/decode data for the feature, used below) through feature_info.
        feature_info = problem.feature_info
        self.assertTrue(feature_info["inputs"].vocab_size > 0)
        self.assertTrue(feature_info["targets"].vocab_size > 0)

        train_example = train_dataset.make_one_shot_iterator().get_next()
        dev_example = dev_dataset.make_one_shot_iterator().get_next()

        with tf.Session() as sess:
            train_ex_val, _ = sess.run([train_example, dev_example])
            _ = feature_info["inputs"].encoder.decode(train_ex_val["inputs"])
            _ = feature_info["targets"].encoder.decode(train_ex_val["targets"])
Esempio n. 2
0
  def testBuildDataset(self):
    # See all the available problems
    self.assertTrue(len(problems.available()) > 10)

    # Retrieve a problem by name
    problem = problems.problem("translate_ende_wmt8k")

    # Access train and dev datasets through Problem
    train_dataset = problem.dataset(MODES.TRAIN)
    dev_dataset = problem.dataset(MODES.EVAL)

    # Access vocab size and other info (e.g. the data encoders used to
    # encode/decode data for the feature, used below) through feature_info.
    feature_info = problem.feature_info
    self.assertTrue(feature_info["inputs"].vocab_size > 0)
    self.assertTrue(feature_info["targets"].vocab_size > 0)

    train_example = train_dataset.make_one_shot_iterator().get_next()
    dev_example = dev_dataset.make_one_shot_iterator().get_next()

    with tf.Session() as sess:
      train_ex_val, _ = sess.run([train_example, dev_example])
      _ = feature_info["inputs"].encoder.decode(train_ex_val["inputs"])
      _ = feature_info["targets"].encoder.decode(train_ex_val["targets"])
Esempio n. 3
0
tf.gfile.MakeDirs(data_dir)
tf.gfile.MakeDirs(tmp_dir)
tf.gfile.MakeDirs(train_dir)
tf.gfile.MakeDirs(checkpoint_dir)
gs_data_dir = "./tensor2tensor-data"
gs_ckpt_dir = "./tensor2tensor-checkpoints/"
tf.gfile.MakeDirs(gs_data_dir)
tf.gfile.MakeDirs(gs_ckpt_dir)
"""# Download MNIST and inspect it"""

# A Problem is a dataset together with some fixed pre-processing.
# It could be a translation dataset with a specific tokenization,
# or an image dataset with a specific resolution.
#
# There are many problems available in Tensor2Tensor
problems.available()

# Fetch the MNIST problem
mnist_problem = problems.problem("image_mnist")
# The generate_data method of a problem will download data and process it into
# a standard format ready for training and evaluation.
mnist_problem.generate_data(data_dir, tmp_dir)

# Now let's see the training MNIST data as Tensors.
mnist_example = tfe.Iterator(mnist_problem.dataset(Modes.TRAIN,
                                                   data_dir)).next()
image = mnist_example["inputs"]
label = mnist_example["targets"]

plt.imshow(image.numpy()[:, :, 0].astype(np.float32),
           cmap=plt.get_cmap('gray'))
Esempio n. 4
0
# In[ ]:

PROBLEM = "translate_enfr_wmt32k"  # We chose a problem translation English to French with 32.768 vocabulary
MODEL = "transformer"  # Our model
HPARAMS = "transformer_big"  # Hyperparameters for the model by default
# If you have a one gpu, use transformer_big_single_gpu

# In[ ]:

#Show all problems and models

from tensor2tensor.utils import registry
from tensor2tensor import problems

problems.available()  #Show all problems
registry.list_models()  #Show all registered models

#or
##
#Command line
# get_ipython().system('t2t-trainer --registry_help #Show all problems')
# get_ipython().system('t2t-trainer --problems_help #Show all models')

# # 2. Data generation
#
# Generate the data (download the dataset and generate the data).
#
# ---
#
#  You can choose between command line or code.
Esempio n. 5
0
TRAIN_DIR = os.path.expanduser("/data/t2t/train")  # This folder contain the model
EXPORT_DIR = os.path.expanduser("/data/t2t/export")  # This folder contain the exported model for production
TRANSLATIONS_DIR = os.path.expanduser("/data/t2t/translation")  # This folder contain  all translated sequence
EVENT_DIR = os.path.expanduser("/data/t2t/event")  # Test the BLEU score
USR_DIR = os.path.expanduser("/data/t2t/user")  # This folder contains our data that we want to add

tf.gfile.MakeDirs(DATA_DIR)
tf.gfile.MakeDirs(TMP_DIR)
tf.gfile.MakeDirs(TRAIN_DIR)
tf.gfile.MakeDirs(EXPORT_DIR)
tf.gfile.MakeDirs(TRANSLATIONS_DIR)
tf.gfile.MakeDirs(EVENT_DIR)
tf.gfile.MakeDirs(USR_DIR)



PROBLEM = "translate_enzh_wmt8k" # We chose a problem translation English to French with 32.768 vocabulary
MODEL = "transformer" # Our model
HPARAMS = "transformer_big" # Hyperparameters for the model by default
                            # If you have a one gpu, use transformer_big_single_gpu


from tensor2tensor.utils import registry
from tensor2tensor import problems

print(problems.available()) #Show all problems

t2t_problem = problems.problem(PROBLEM)
t2t_problem.generate_data(DATA_DIR, TMP_DIR)
import matplotlib.pyplot as plt
import numpy as np
import os
import collections

from tensor2tensor import models
from tensor2tensor import problems
from tensor2tensor.layers import common_layers
from tensor2tensor.utils import trainer_lib
from tensor2tensor.utils import t2t_model
from tensor2tensor.utils import registry
from tensor2tensor.utils import metrics

from invertible_UT_model import *

print(problems.available())

# Enable TF Eager execution
tfe = tf.contrib.eager
tfe.enable_eager_execution()

# Flags for Tensorflow 1.5 +
FLAGS = tf.flags

# Other setup
Modes = tf.estimator.ModeKeys

# Setup some directories
data_dir = os.path.expanduser("~/t2t/data/translate_ende_wmt32k")
tmp_dir = os.path.expanduser("~/t2t/tmp")
train_dir = os.path.expanduser("~/t2t/train/UT_invertible_test")