def make_dataset(path, impl, fix_lua_indexing=False, dictionary=None): if impl == 'raw' and IndexedRawTextDataset.exists(path): assert dictionary is not None return IndexedRawTextDataset(path, dictionary) elif impl == 'lazy' and IndexedDataset.exists(path): return IndexedDataset(path, fix_lua_indexing=fix_lua_indexing) elif impl == 'cached' and IndexedDataset.exists(path): return IndexedCachedDataset(path, fix_lua_indexing=fix_lua_indexing) elif impl == 'mmap' and MMapIndexedDataset.exists(path): return MMapIndexedDataset(path) elif impl == 'fasta' and FastaDataset.exists(path): from fairseq.data.fasta_dataset import EncodedFastaDataset return EncodedFastaDataset(path, dictionary) return None
def infer_dataset_impl(path): if IndexedRawTextDataset.exists(path): return "raw" elif IndexedDataset.exists(path): with open(index_file_path(path), "rb") as f: magic = f.read(8) if magic == IndexedDataset._HDR_MAGIC: return "cached" elif magic == MMapIndexedDataset.Index._HDR_MAGIC[:8]: return "mmap" else: return None elif FastaDataset.exists(path): return "fasta" else: return None
def infer_dataset_impl(path): if IndexedRawTextDataset.exists(path): return 'raw' elif IndexedDataset.exists(path): with open(index_file_path(path), 'rb') as f: magic = f.read(8) if magic == IndexedDataset._HDR_MAGIC: return 'cached' elif magic == MMapIndexedDataset.Index._HDR_MAGIC[:8]: return 'mmap' else: return None elif FastaDataset.exists(path): return 'fasta' else: return None
def make_dataset(path, impl, fix_lua_indexing=False, dictionary=None): if impl == "raw" and IndexedRawTextDataset.exists(path): assert dictionary is not None return IndexedRawTextDataset(path, dictionary) elif impl == "lazy" and IndexedDataset.exists(path): return IndexedDataset(path, fix_lua_indexing=fix_lua_indexing) elif impl == "cached" and IndexedDataset.exists(path): return IndexedCachedDataset(path, fix_lua_indexing=fix_lua_indexing) elif impl == "mmap" and MMapIndexedDataset.exists(path): return MMapIndexedDataset(path) elif impl == "fasta" and FastaDataset.exists(path): from fairseq.data.fasta_dataset import EncodedFastaDataset return EncodedFastaDataset(path, dictionary) elif impl == "huffman" and HuffmanMMapIndexedDataset.exists(path): return HuffmanMMapIndexedDataset(path) return None