def load_and_cache_examples(args, tokenizer): if isinstance(tokenizer, list): dataset = TextDataset_2Tokenizers(tokenizer, args, args.train_data_file, block_size=args.block_size) else: dataset = TextDataset_Split(tokenizer, args, args.train_data_file, block_size=args.block_size) return dataset
def load_and_cache_examples(args, tokenizer, evaluate=False): if isinstance(tokenizer, list): dataset = TextDataset_2Tokenizers(tokenizer, args, file_path=args.eval_data_file if evaluate else args.train_data_file, block_size=args.block_size) else: dataset = TextDataset_Split(tokenizer, args, file_path=args.eval_data_file if evaluate else args.train_data_file, block_size=args.block_size) return dataset