def make_dataset(path, impl, fix_lua_indexing=False, dictionary=None): if impl == 'raw' and IndexedRawTextDataset.exists(path): assert dictionary is not None return IndexedRawTextDataset(path, dictionary) elif impl == 'lazy' and IndexedDataset.exists(path): return IndexedDataset(path, fix_lua_indexing=fix_lua_indexing) elif impl == 'cached' and IndexedDataset.exists(path): return IndexedCachedDataset(path, fix_lua_indexing=fix_lua_indexing) elif impl == 'mmap' and MMapIndexedDataset.exists(path): return MMapIndexedDataset(path) elif impl == 'fasta' and FastaDataset.exists(path): from fairseq.data.fasta_dataset import EncodedFastaDataset return EncodedFastaDataset(path, dictionary) return None
def make_dataset(path, impl, fix_lua_indexing=False, dictionary=None): if impl == "raw" and IndexedRawTextDataset.exists(path): assert dictionary is not None return IndexedRawTextDataset(path, dictionary) elif impl == "lazy" and IndexedDataset.exists(path): return IndexedDataset(path, fix_lua_indexing=fix_lua_indexing) elif impl == "cached" and IndexedDataset.exists(path): return IndexedCachedDataset(path, fix_lua_indexing=fix_lua_indexing) elif impl == "mmap" and MMapIndexedDataset.exists(path): return MMapIndexedDataset(path) elif impl == "fasta" and FastaDataset.exists(path): from fairseq.data.fasta_dataset import EncodedFastaDataset return EncodedFastaDataset(path, dictionary) elif impl == "huffman" and HuffmanMMapIndexedDataset.exists(path): return HuffmanMMapIndexedDataset(path) return None