def _to_cbor(args): count = 0 ochunk = CborChunk(file_obj=sys.stdout, mode='wb', write_wrapper=to_primitives) for fpath in args.input_path: ichunk = Chunk(path=fpath, mode='rb') for si in ichunk: count += 1 ochunk.add(si) if (args.limit is not None) and (count >= args.limit): break ichunk.close() if (args.limit is not None) and (count >= args.limit): break ochunk.close() sys.stderr.write('wrote {0} items\n'.format(count))
class ChunkRoller(object): def __init__(self, chunk_dir, chunk_max=500, message=StreamItem): self.chunk_dir = chunk_dir self.chunk_max = chunk_max self.t_path = os.path.join(chunk_dir, 'tmp-%d.sc.xz' % random.randint(0, 10**8)) self.o_chunk = None self.message = message def add(self, si_or_fc): '''puts `si_or_fc` into the currently open chunk, which it creates if necessary. If this item causes the chunk to cross chunk_max, then the chunk closed after adding. ''' if self.o_chunk is None: if os.path.exists(self.t_path): os.remove(self.t_path) if self.message == StreamItem: self.o_chunk = Chunk(self.t_path, mode='wb') else: logger.info('Assuming CborChunk for message=%r', type(self.message)) self.o_chunk = CborChunk(self.t_path, mode='wb') self.o_chunk.add(si_or_fc) logger.debug('added %d-th item to chunk', len(self.o_chunk)) if len(self.o_chunk) == self.chunk_max: self.close() def close(self): if self.o_chunk: self.o_chunk.close() if self.message == StreamItem: extension = 'sc' else: logger.warn('assuming file extension ".cbor"') extension = 'cbor' o_path = os.path.join( self.chunk_dir, '%d-%s.%s.xz' % (len(self.o_chunk), self.o_chunk.md5_hexdigest, extension) ) os.rename(self.t_path, o_path) self.o_chunk = None logger.info('rolled chunk to %s', o_path)
def add(self, si_or_fc): '''puts `si_or_fc` into the currently open chunk, which it creates if necessary. If this item causes the chunk to cross chunk_max, then the chunk closed after adding. ''' if self.o_chunk is None: if os.path.exists(self.t_path): os.remove(self.t_path) if self.message == StreamItem: self.o_chunk = Chunk(self.t_path, mode='wb') else: logger.info('Assuming CborChunk for message=%r', type(self.message)) self.o_chunk = CborChunk(self.t_path, mode='wb') self.o_chunk.add(si_or_fc) logger.debug('added %d-th item to chunk', len(self.o_chunk)) if len(self.o_chunk) == self.chunk_max: self.close()