# See the License for the specific language governing permissions and # limitations under the License. """English-French WMT Translate dataset.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function import tensorflow_datasets.public_api as tfds from tensorflow_datasets.translate import wmt TRANSLATE_DATASETS = { "wmt10_giga_fren_enfr": wmt.TranslateData(url="http://www.statmt.org/wmt10/training-giga-fren.tar", language_to_file={ "en": "giga-fren.release2.fixed.en.gz", "fr": "giga-fren.release2.fixed.fr.gz", }), "wmt13_commoncrawl_enfr": wmt.TranslateData( url="http://www.statmt.org/wmt13/training-parallel-commoncrawl.tgz", language_to_file={ "en": "commoncrawl.fr-en.en", "fr": "commoncrawl.fr-en.fr", }), "wmt13_europarl_enfr": wmt.TranslateData( url="http://www.statmt.org/wmt13/training-parallel-europarl-v7.tgz", language_to_file={ "en": "training/europarl-v7.fr-en.en", "fr": "training/europarl-v7.fr-en.fr",
# See the License for the specific language governing permissions and # limitations under the License. """English-German WMT translation dataset.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function import tensorflow_datasets.public_api as tfds from tensorflow_datasets.translate import wmt TRANSLATE_DATASETS = { "wmt13_commoncrawl_ende": wmt.TranslateData( url="http://www.statmt.org/wmt13/training-parallel-commoncrawl.tgz", language_to_file={ "en": "commoncrawl.de-en.en", "de": "commoncrawl.de-en.de", }), "wmt13_europarl_ende": wmt.TranslateData( url="http://www.statmt.org/wmt13/training-parallel-europarl-v7.tgz", language_to_file={ "en": "training/europarl-v7.de-en.en", "de": "training/europarl-v7.de-en.de", }), "wmt17_newstest13": wmt.TranslateData( url="http://data.statmt.org/wmt17/translation-task/dev.tgz", language_to_file={ "en": "dev/newstest2013.en", "de": "dev/newstest2013.de",