Ejemplo n.º 1
0
def main(argv):
    (options, input_files) = parser.parse_args(args=argv[1:])

    out = sys.stdout
    

    if len(input_files) < 1:
        dump_archive(WarcRecord.open_archive(file_handle=sys.stdin, gzip=None), name="-",offsets=False)
        
    else:
        for name in input_files:
            fh = ArchiveRecord.open_archive(name, gzip="auto")
            dump_archive(fh,name)
            fh.close()

    

    tf = zipfile.ZipFile("dump.zip", "w")
    for dirname, subdirs, files in os.walk("html"):
        for filename in files:
           tf.write(os.path.join(dirname, filename))
    tf.write("fulltext.html")
    tf.write("index.html")
    tf.close()

        

    return 0
Ejemplo n.º 2
0
def main(argv):
    (options, input_files) = parser.parse_args(args=argv[1:])

    out = sys.stdout
    if len(input_files) < 1:
        fh = WarcRecord.open_archive(file_handle=sys.stdin, gzip=None)

        for record in fh:
            record.write_to(out, gzip=options.gzip)
    else:
        for name in input_files:
            fh = WarcRecord.open_archive(name, gzip="auto")

            for record in fh:
                record.write_to(out, gzip=options.gzip)


            fh.close()



    return 0
Ejemplo n.º 3
0
def main(argv):
    (options, input_files) = parser.parse_args(args=argv[1:])

    out = sys.stdout
    if len(input_files) < 1:
        dump_archive(WarcRecord.open_archive(file_handle=sys.stdin, gzip=None), name="-",offsets=False)
        
    else:
        for name in input_files:
            fh = ArchiveRecord.open_archive(name, gzip="auto")
            dump_archive(fh,name)

            fh.close()


    return 0
Ejemplo n.º 4
0
def main(argv):
    (options, args) = parser.parse_args(args=argv[1:])

    out = sys.stdout
    if len(args) < 1:
        # dump the first record on stdin
        with closing(WarcRecord.open_archive(file_handle=sys.stdin, gzip=None)) as fh:
            dump_record(fh)
        
    else:
        # dump a record from the filename, with optional offset
        filename = args[0]
        if len(args) > 1:
            offset = int(args[1])
        else:
            offset = 0

        with closing(ArchiveRecord.open_archive(filename=filename, gzip="auto")) as fh:
            fh.seek(offset)
            dump_record(fh)


    return 0