def dset(ps, kind): assert ps.dset.startswith('mnist') p = pth.Path(ps.dir_data) / ps.dset / kind if not p.exists(): vs = tuple(reader(ps, kind)) R.dump(p / ps.dset, lambda: recorder(vs)) ds = R.dataset(p / ps.dset) return ds, feats
def dset(ps, kind): assert ps.dset == 'enwik8' p = pth.Path(ps.dir_data) / ps.dset pv = p / ps.vocab_path p = p / kind if not p.exists(): tokenizer = encoder.tokenizer_for(ps) tp = F.Topic(ps.dset, tokenizer(reader(ps, kind))) R.dump(p / ps.dset, lambda: recorder(tp)) if kind == 'train' and not pv.exists(): R.dump(pv, lambda: [tokenizer.vocab.record()]) ds = R.dataset(p / ps.dset) return ds, feats
def dset(ps, kind): assert ps.dset == 'squad' p = pth.Path(ps.dir_data) / ps.dset pv = p / ps.vocab_path p = p / kind if not p.exists(): tokenizer = encoder.tokenizer_for(ps) ts = F.Topics(tokenizer(reader(ps, kind))) for n in registry['all']: R.dump(p / n, lambda: registry[n](ts)) if kind == 'train' and not pv.exists(): R.dump(pv, lambda: [tokenizer.vocab.record()]) ds = R.dataset(p / ps.dset_subset) return ds, feats[ps.dset_subset]