def get_dict(dict_size, reverse=True): # if reverse = False, return dict = {'a':'001', 'b':'002', ...} # else reverse = true, return dict = {'001':'a', '002':'b', ...} tar_file = download(URL_TRAIN, 'wmt14', MD5_TRAIN) src_dict, trg_dict = __read_to_dict__(tar_file, dict_size) if reverse: src_dict = {v: k for k, v in src_dict.items()} trg_dict = {v: k for k, v in trg_dict.items()} return src_dict, trg_dict
def test(dict_size): """ WMT14 test set creator. It returns a reader creator, each sample in the reader is source language word ID sequence, target language word ID sequence and next word ID sequence. :return: Test reader creator :rtype: callable """ return reader_creator( download(URL_TRAIN, 'wmt14', MD5_TRAIN), 'test/test', dict_size)
def test(dict_size): """ WMT14 test set creator. It returns a reader creator, each sample in the reader is source language word ID sequence, target language word ID sequence and next word ID sequence. :return: Test reader creator :rtype: callable """ return reader_creator(download(URL_TRAIN, 'wmt14', MD5_TRAIN), 'test/test', dict_size)
def fetch(): download(URL_TRAIN, 'wmt14', MD5_TRAIN) download(URL_MODEL, 'wmt14', MD5_MODEL)
def model(): tar_file = download(URL_MODEL, 'wmt14', MD5_MODEL) with gzip.open(tar_file, 'r') as f: parameters = Parameters.from_tar(f) return parameters
def gen(dict_size): return reader_creator( download(URL_TRAIN, 'wmt14', MD5_TRAIN), 'gen/gen', dict_size)
def gen(dict_size): return reader_creator(download(URL_TRAIN, 'wmt14', MD5_TRAIN), 'gen/gen', dict_size)
def val(): """ Create a val dataset reader containing 1449 images in HWC order. """ return reader_creator(download(VOC_URL, CACHE_DIR, VOC_MD5), 'val')
def test(): """ Create a test dataset reader containing 1464 images in HWC order. """ return reader_creator(download(VOC_URL, CACHE_DIR, VOC_MD5), 'train')
def train(): """ Create a train dataset reader containing 2913 images in HWC order. """ return reader_creator(download(VOC_URL, CACHE_DIR, VOC_MD5), 'trainval')
def train(dict_size): return reader_creator(download(URL_TRAIN, 'wmt14', MD5_TRAIN), 'train/train', dict_size)
def fetch(): download(URL_TRAIN, 'wmt14', MD5_TRAIN)
def test(dict_size): return reader_creator(download(URL_TRAIN, 'wmt14', MD5_TRAIN), 'test/test', dict_size)