Esempio n. 1
0
def create_dataloader(args, vocabs=None, val=False):
    argvalpfx = "val_" if val else ""
    paths = [getattr(args, f"{argvalpfx}{mode}_path") for mode in MODES]
    if vocabs is None:
        vocabs = [getattr(args, f"{mode}_vocab") for mode in MODES]
        vocabs = [utils.load_pkl(v) if v is not None else None for v in vocabs]
    dset = dataset.TextSequenceDataset(
        paths=paths,
        feats=["string", "tensor"],
        vocabs=vocabs,
        vocab_limit=args.vocab_limit,
        pad_eos=args.eos,
        pad_bos=args.bos,
        unk=args.unk,
    )
    if vocabs is None:
        vocabs = dset.vocabs
    collator = dataset.TextSequenceBatchCollator(
        pad_idxs=[len(v) for v in vocabs]
    )
    return td.DataLoader(
        dataset=dset,
        batch_size=args.batch_size,
        shuffle=False if val else args.shuffle,
        num_workers=args.data_workers,
        collate_fn=collator,
        pin_memory=args.pin_memory
    )
Esempio n. 2
0
def prepare_dataset(args, vocab):
    dset = dataset.TextSequenceDataset(paths=[args.word_path],
                                       feats=["string", "tensor"],
                                       vocabs=[vocab],
                                       pad_eos=args.eos,
                                       pad_bos=args.bos,
                                       unk=args.unk)
    return dset
Esempio n. 3
0
def prepare_dataset(args, vocab_sents, vocab_labels, vocab_intents):
    dset = dataset.TextSequenceDataset(
        paths=[args.word_path, args.label_path, args.intent_path],
        feats=["string", "tensor"],
        vocabs=[vocab_sents, vocab_labels, vocab_intents],
        pad_eos=args.eos,
        pad_bos=args.bos,
        unk=args.unk)
    return dset
Esempio n. 4
0
def prepare_dataset(args, vocab):
    dset = dataset.TextSequenceDataset(
        path=args.data_path,
        feats=["string", "tensor"],
        vocab=vocab,
        pad_eos=args.eos,
        pad_bos=args.bos,
        unk=args.unk
    )
    return dset
Esempio n. 5
0
def create_dataloader(args):
    vocab = None
    if args.vocab is not None:
        vocab = utils.load_pkl(args.vocab)
    dset = dataset.TextSequenceDataset(
        path=args.data_path,
        feats=["string", "tensor"],
        vocab=vocab,
        vocab_limit=args.vocab_limit,
        pad_eos=args.eos,
        pad_bos=args.bos,
        unk=args.unk,
    )
    if vocab is None:
        vocab = dset.vocab
    collator = dataset.TextSequenceBatchCollator(pad_idx=len(vocab))
    return td.DataLoader(dataset=dset,
                         batch_size=args.batch_size,
                         shuffle=args.shuffle,
                         num_workers=args.data_workers,
                         collate_fn=collator,
                         pin_memory=args.pin_memory)