예제 #1
0
파일: dump.py 프로젝트: joy-xu/streamcorpus
def _copy(args):
    count = 0
    ochunk = Chunk(file_obj=sys.stdout, mode='wb')
    for fpath in args.input_path:
        ichunk = Chunk(path=fpath, mode='rb', message=message_class)
        for si in ichunk:
            count += 1
            ochunk.add(si)
            if (args.limit is not None) and (count >= args.limit):
                break
        ichunk.close()
        if (args.limit is not None) and (count >= args.limit):
            break
    ochunk.close()
    sys.stderr.write('wrote {0} items\n'.format(count))
예제 #2
0
파일: dump.py 프로젝트: joy-xu/streamcorpus
def _find(fpaths, stream_id, dump_binary_stream_item=False):
    '''
    Read in a streamcorpus.Chunk file and if any of its stream_ids
    match stream_id, then print stream_item.body.raw to stdout
    '''
    global message_class
    sys.stderr.write('hunting for %r\n' % stream_id)
    for fpath in fpaths:
        for si in Chunk(path=fpath, mode='rb', message=message_class):
            if si.stream_id == stream_id:
                if dump_binary_stream_item:
                    o_chunk = Chunk(file_obj=sys.stdout, mode='wb')
                    o_chunk.add(si)
                    o_chunk.close()
                    sys.exit()
                elif si.body and si.body.raw:
                    print si.body.raw
                    sys.exit()
                elif si.body:
                    sys.exit('Found %s without si.body.raw' % stream_id)
                else:
                    sys.exit('Found %s without si.body' % stream_id)