def train_valid_datasets_provider(): """Provide train and validation datasets.""" args = get_args() tokenizer = get_tokenizer() train_dataset = RaceDataset('training', args.train_data, tokenizer, args.seq_length) valid_dataset = RaceDataset('validation', args.valid_data, tokenizer, args.seq_length) return train_dataset, valid_dataset
def single_dataset_provider(datapath): name = datapath.split('RACE')[-1].strip('/').replace('/', '-') return RaceDataset(name, [datapath], tokenizer, args.seq_length)