Exemple #1
0
def _to_cbor(args):
    count = 0
    ochunk = CborChunk(file_obj=sys.stdout, mode='wb', write_wrapper=to_primitives)
    for fpath in args.input_path:
        ichunk = Chunk(path=fpath, mode='rb')
        for si in ichunk:
            count += 1
            ochunk.add(si)
            if (args.limit is not None) and (count >= args.limit):
                break
        ichunk.close()
        if (args.limit is not None) and (count >= args.limit):
            break
    ochunk.close()
    sys.stderr.write('wrote {0} items\n'.format(count))
Exemple #2
0
class ChunkRoller(object):

    def __init__(self, chunk_dir, chunk_max=500, message=StreamItem):
        self.chunk_dir = chunk_dir
        self.chunk_max = chunk_max
        self.t_path = os.path.join(chunk_dir, 'tmp-%d.sc.xz'  % random.randint(0, 10**8))
        self.o_chunk = None
        self.message = message

    def add(self, si_or_fc):
        '''puts `si_or_fc` into the currently open chunk, which it creates if
        necessary.  If this item causes the chunk to cross chunk_max,
        then the chunk closed after adding.

        '''
        if self.o_chunk is None:
            if os.path.exists(self.t_path):
                os.remove(self.t_path)
            if self.message == StreamItem:
                self.o_chunk = Chunk(self.t_path, mode='wb')
            else:
                logger.info('Assuming CborChunk for message=%r', type(self.message))
                self.o_chunk = CborChunk(self.t_path, mode='wb')

        self.o_chunk.add(si_or_fc)
        logger.debug('added %d-th item to chunk', len(self.o_chunk))
        if len(self.o_chunk) == self.chunk_max:
            self.close()

    def close(self):
        if self.o_chunk:
            self.o_chunk.close()
            if self.message == StreamItem:
                extension = 'sc'
            else: 
                logger.warn('assuming file extension ".cbor"')
                extension = 'cbor'
            o_path = os.path.join(
                self.chunk_dir, 
                '%d-%s.%s.xz' % (len(self.o_chunk), self.o_chunk.md5_hexdigest, extension)
            )
            os.rename(self.t_path, o_path)
            self.o_chunk = None
            logger.info('rolled chunk to %s', o_path)
Exemple #3
0
    def add(self, si_or_fc):
        '''puts `si_or_fc` into the currently open chunk, which it creates if
        necessary.  If this item causes the chunk to cross chunk_max,
        then the chunk closed after adding.

        '''
        if self.o_chunk is None:
            if os.path.exists(self.t_path):
                os.remove(self.t_path)
            if self.message == StreamItem:
                self.o_chunk = Chunk(self.t_path, mode='wb')
            else:
                logger.info('Assuming CborChunk for message=%r', type(self.message))
                self.o_chunk = CborChunk(self.t_path, mode='wb')

        self.o_chunk.add(si_or_fc)
        logger.debug('added %d-th item to chunk', len(self.o_chunk))
        if len(self.o_chunk) == self.chunk_max:
            self.close()
Exemple #4
0
def _to_cbor(args):
    count = 0
    ochunk = CborChunk(file_obj=sys.stdout, mode='wb', write_wrapper=to_primitives)
    for fpath in args.input_path:
        ichunk = Chunk(path=fpath, mode='rb')
        for si in ichunk:
            count += 1
            ochunk.add(si)
            if (args.limit is not None) and (count >= args.limit):
                break
        ichunk.close()
        if (args.limit is not None) and (count >= args.limit):
            break
    ochunk.close()
    sys.stderr.write('wrote {0} items\n'.format(count))