Exemple #1
0
def _load_dataset(path, impl, dict):
    if impl == 'raw':
        raise NotImplementedError("No such {} dataset implementation.".format(impl))
    elif impl == 'mmap':
        dataset = indexed_dataset.MMapIndexedDataset(path=path)
    else:
        raise NotImplementedError("No such {} dataset implementation.".format(impl))
    return dataset
def _load_dataset(path, impl, dict):
    if impl == 'mmap':
        # mmap dataset has been numberized, no need for dict
        src_dataset = indexed_dataset.MMapIndexedDataset(path=path)
    else:
        raise NotImplementedError(
            "No such {} dataset implementation.".format(impl))
    return src_dataset
Exemple #3
0
def _load_dataset(paths, impl, dict=None):
    if impl == 'raw':
        raise NotImplementedError(impl)
    elif impl == 'mmap':
        # mmap dataset has been numberized, no need for dict
        dataset = [indexed_dataset.MMapIndexedDataset(path=path) for path in paths]
    else:
        raise NotImplementedError("No such {} dataset implementation.".format(impl))
    return dataset
Exemple #4
0
def load_lang_dataset_denoising(path, impl, dict):
    if impl == 'raw':
        src_dataset = IndexedRawTextDataset(path=path, dictionary=dict)
    elif impl == 'mmap':
        # mmap dataset has been numberized, no need for dict
        src_dataset = indexed_dataset.MMapIndexedDataset(path=path)
    else:
        raise NotImplementedError(
            "No such {} dataset implementation.".format(impl))
    return src_dataset
def _load_dataset(paths, impl, dict=None, sample_portion=None):
    if impl == 'raw':
        raise NotImplementedError(impl)
    elif impl == 'mmap':
        # mmap dataset has been numberized, no need for dict
        prev_paths, cur_path = paths[:-1], paths[-1]
        dataset = [indexed_dataset.MMapIndexedDataset(path=cur_path)]
        if sample_portion is not None and len(prev_paths) > 0:
            sample_size_per_task = int(
                len(dataset[0]) * sample_portion // len(prev_paths))
            for p_path in prev_paths:
                p_dataset = indexed_dataset.MMapIndexedDataset(path=p_path)
                dataset.append(
                    SliceDataset(p_dataset, end=sample_size_per_task))
        else:
            for p_path in prev_paths:
                p_dataset = indexed_dataset.MMapIndexedDataset(path=p_path)
                dataset.append(p_dataset)
    else:
        raise NotImplementedError(
            "No such {} dataset implementation.".format(impl))
    return dataset
Exemple #6
0
def _load_dataset(path, impl, dict):
    if impl == 'raw':
        raise NotImplementedError("No such {} dataset implementation.".format(impl))
    elif impl == 'mmap':
        if str.endswith(path, 'bin_ast'):
            from dgl.data.utils import load_graphs
            src_dataset, _ = load_graphs(f'{path}.mmap')
            src_dataset = GraphDataset(src_dataset)
        else:

            src_dataset = indexed_dataset.MMapIndexedDataset(path=path)
    else:
        raise NotImplementedError("No such {} dataset implementation.".format(impl))
    return src_dataset
Exemple #7
0
def load_mmap_dataset(dataset):
    return indexed_dataset.MMapIndexedDataset(dataset)