def make_dataset(path, impl, fix_lua_indexing=False, dictionary=None):
    if impl == 'raw' and IndexedRawTextDataset.exists(path):
        assert dictionary is not None
        return IndexedRawTextDataset(path, dictionary)
    elif impl == 'lazy' and IndexedDataset.exists(path):
        return IndexedDataset(path, fix_lua_indexing=fix_lua_indexing)
    elif impl == 'cached' and IndexedDataset.exists(path):
        return IndexedCachedDataset(path, fix_lua_indexing=fix_lua_indexing)
    elif impl == 'mmap' and MMapIndexedDataset.exists(path):
        return MMapIndexedDataset(path)
    elif impl == 'fasta' and FastaDataset.exists(path):
        from fairseq.data.fasta_dataset import EncodedFastaDataset
        return EncodedFastaDataset(path, dictionary)
    return None
예제 #2
0
def make_dataset(path, impl, fix_lua_indexing=False, dictionary=None):
    if impl == "raw" and IndexedRawTextDataset.exists(path):
        assert dictionary is not None
        return IndexedRawTextDataset(path, dictionary)
    elif impl == "lazy" and IndexedDataset.exists(path):
        return IndexedDataset(path, fix_lua_indexing=fix_lua_indexing)
    elif impl == "cached" and IndexedDataset.exists(path):
        return IndexedCachedDataset(path, fix_lua_indexing=fix_lua_indexing)
    elif impl == "mmap" and MMapIndexedDataset.exists(path):
        return MMapIndexedDataset(path)
    elif impl == "fasta" and FastaDataset.exists(path):
        from fairseq.data.fasta_dataset import EncodedFastaDataset

        return EncodedFastaDataset(path, dictionary)
    elif impl == "huffman" and HuffmanMMapIndexedDataset.exists(path):
        return HuffmanMMapIndexedDataset(path)
    return None