def create_dataloader(args, vocabs=None, val=False): argvalpfx = "val_" if val else "" paths = [getattr(args, f"{argvalpfx}{mode}_path") for mode in MODES] if vocabs is None: vocabs = [getattr(args, f"{mode}_vocab") for mode in MODES] vocabs = [utils.load_pkl(v) if v is not None else None for v in vocabs] dset = dataset.TextSequenceDataset( paths=paths, feats=["string", "tensor"], vocabs=vocabs, vocab_limit=args.vocab_limit, pad_eos=args.eos, pad_bos=args.bos, unk=args.unk, ) if vocabs is None: vocabs = dset.vocabs collator = dataset.TextSequenceBatchCollator( pad_idxs=[len(v) for v in vocabs] ) return td.DataLoader( dataset=dset, batch_size=args.batch_size, shuffle=False if val else args.shuffle, num_workers=args.data_workers, collate_fn=collator, pin_memory=args.pin_memory )
def prepare_dataset(args, vocab): dset = dataset.TextSequenceDataset(paths=[args.word_path], feats=["string", "tensor"], vocabs=[vocab], pad_eos=args.eos, pad_bos=args.bos, unk=args.unk) return dset
def prepare_dataset(args, vocab_sents, vocab_labels, vocab_intents): dset = dataset.TextSequenceDataset( paths=[args.word_path, args.label_path, args.intent_path], feats=["string", "tensor"], vocabs=[vocab_sents, vocab_labels, vocab_intents], pad_eos=args.eos, pad_bos=args.bos, unk=args.unk) return dset
def prepare_dataset(args, vocab): dset = dataset.TextSequenceDataset( path=args.data_path, feats=["string", "tensor"], vocab=vocab, pad_eos=args.eos, pad_bos=args.bos, unk=args.unk ) return dset
def create_dataloader(args): vocab = None if args.vocab is not None: vocab = utils.load_pkl(args.vocab) dset = dataset.TextSequenceDataset( path=args.data_path, feats=["string", "tensor"], vocab=vocab, vocab_limit=args.vocab_limit, pad_eos=args.eos, pad_bos=args.bos, unk=args.unk, ) if vocab is None: vocab = dset.vocab collator = dataset.TextSequenceBatchCollator(pad_idx=len(vocab)) return td.DataLoader(dataset=dset, batch_size=args.batch_size, shuffle=args.shuffle, num_workers=args.data_workers, collate_fn=collator, pin_memory=args.pin_memory)