def shard_names(spec): if '@' in spec: fname, shards = spec.split('@', 1) shards = int(shards) return [base.filename_for_shard(fname, shards, shard) for shard in range(shards)] else: return [spec]
def secondary_store(self, base_path, mode='r', compression=None): """Return a store that mimics the sharding of the input store and can be used as as secondary output/input.""" if '@' in base_path: base_path, sharding = base_path.split('@') if int(sharding) != self.shards: raise StandardError('secondary_store sharding should be the same as input sharding. (%s vs %s' % (int(sharding), self.shards)) return store.SingleStore(base.filename_for_shard(base_path, self.shards, self.task_id), mode=mode, compression=compression)
def __init__(self, fname, shards=None, mode='r', DataStoreType=SingleStore, compression=None, buffering=-1): self._shards = [] if shards is None: if '@' in fname: basename, shards = fname.split('@', 1) shards = int(shards) else: basename = fname if not os.path.exists(basename): if basename.endswith('-'): # could be of shell expansion basename = basename[:-1] existing_file = glob.glob(basename +'-????-????.dst') if existing_file: shards = int(existing_file[0][len(basename) + 6:len(basename) + 10]) else: basename = fname if shards is None: self._shards = [DataStoreType(basename, mode=mode, compression=compression, buffering=-1)] else: self._shards = [DataStoreType(base.filename_for_shard(basename, shards, shard), mode=mode, compression=compression, buffering=-1) for shard in range(shards)]