def _copy(args): count = 0 ochunk = Chunk(file_obj=sys.stdout, mode='wb') for fpath in args.input_path: ichunk = Chunk(path=fpath, mode='rb', message=message_class) for si in ichunk: count += 1 ochunk.add(si) if (args.limit is not None) and (count >= args.limit): break ichunk.close() if (args.limit is not None) and (count >= args.limit): break ochunk.close() sys.stderr.write('wrote {0} items\n'.format(count))
def _find(fpaths, stream_id, dump_binary_stream_item=False): ''' Read in a streamcorpus.Chunk file and if any of its stream_ids match stream_id, then print stream_item.body.raw to stdout ''' global message_class sys.stderr.write('hunting for %r\n' % stream_id) for fpath in fpaths: for si in Chunk(path=fpath, mode='rb', message=message_class): if si.stream_id == stream_id: if dump_binary_stream_item: o_chunk = Chunk(file_obj=sys.stdout, mode='wb') o_chunk.add(si) o_chunk.close() sys.exit() elif si.body and si.body.raw: print si.body.raw sys.exit() elif si.body: sys.exit('Found %s without si.body.raw' % stream_id) else: sys.exit('Found %s without si.body' % stream_id)