def get_cc_shard(self, shard: int) -> process_wet_file.CCShardReader: return process_wet_file.CCShardReader( self.dump, shard=shard, num_shards=self.num_shards, num_segments_per_shard=self.num_segments_per_shard, min_len=self.min_len, )
def get_cc_shard(self, shard: int) -> process_wet_file.CCShardReader: dump_cache: Optional[Path] = None if self.cache_dir: self.cache_dir.mkdir(exist_ok=True) dump_cache = self.cache_dir / self.dump dump_cache.mkdir(exist_ok=True) return process_wet_file.CCShardReader( self.dump, shard=shard, num_shards=self.num_shards, num_segments_per_shard=self.num_segments_per_shard, min_len=self.min_len, cache_dir=dump_cache, )