def make_dataset(path, impl, fix_lua_indexing=False, dictionary=None):
    if impl == 'raw' and IndexedRawTextDataset.exists(path):
        assert dictionary is not None
        return IndexedRawTextDataset(path, dictionary)
    elif impl == 'lazy' and IndexedDataset.exists(path):
        return IndexedDataset(path, fix_lua_indexing=fix_lua_indexing)
    elif impl == 'cached' and IndexedDataset.exists(path):
        return IndexedCachedDataset(path, fix_lua_indexing=fix_lua_indexing)
    elif impl == 'mmap' and MMapIndexedDataset.exists(path):
        return MMapIndexedDataset(path)
    elif impl == 'fasta' and FastaDataset.exists(path):
        from fairseq.data.fasta_dataset import EncodedFastaDataset
        return EncodedFastaDataset(path, dictionary)
    return None
Example #2
0
def infer_dataset_impl(path):
    if IndexedRawTextDataset.exists(path):
        return "raw"
    elif IndexedDataset.exists(path):
        with open(index_file_path(path), "rb") as f:
            magic = f.read(8)
            if magic == IndexedDataset._HDR_MAGIC:
                return "cached"
            elif magic == MMapIndexedDataset.Index._HDR_MAGIC[:8]:
                return "mmap"
            else:
                return None
    elif FastaDataset.exists(path):
        return "fasta"
    else:
        return None
def infer_dataset_impl(path):
    if IndexedRawTextDataset.exists(path):
        return 'raw'
    elif IndexedDataset.exists(path):
        with open(index_file_path(path), 'rb') as f:
            magic = f.read(8)
            if magic == IndexedDataset._HDR_MAGIC:
                return 'cached'
            elif magic == MMapIndexedDataset.Index._HDR_MAGIC[:8]:
                return 'mmap'
            else:
                return None
    elif FastaDataset.exists(path):
        return 'fasta'
    else:
        return None
Example #4
0
def make_dataset(path, impl, fix_lua_indexing=False, dictionary=None):
    if impl == "raw" and IndexedRawTextDataset.exists(path):
        assert dictionary is not None
        return IndexedRawTextDataset(path, dictionary)
    elif impl == "lazy" and IndexedDataset.exists(path):
        return IndexedDataset(path, fix_lua_indexing=fix_lua_indexing)
    elif impl == "cached" and IndexedDataset.exists(path):
        return IndexedCachedDataset(path, fix_lua_indexing=fix_lua_indexing)
    elif impl == "mmap" and MMapIndexedDataset.exists(path):
        return MMapIndexedDataset(path)
    elif impl == "fasta" and FastaDataset.exists(path):
        from fairseq.data.fasta_dataset import EncodedFastaDataset

        return EncodedFastaDataset(path, dictionary)
    elif impl == "huffman" and HuffmanMMapIndexedDataset.exists(path):
        return HuffmanMMapIndexedDataset(path)
    return None