def main(argv): (options, input_files) = parser.parse_args(args=argv[1:]) out = sys.stdout if len(input_files) < 1: dump_archive(WarcRecord.open_archive(file_handle=sys.stdin, gzip=None), name="-",offsets=False) else: for name in input_files: fh = ArchiveRecord.open_archive(name, gzip="auto") dump_archive(fh,name) fh.close() tf = zipfile.ZipFile("dump.zip", "w") for dirname, subdirs, files in os.walk("html"): for filename in files: tf.write(os.path.join(dirname, filename)) tf.write("fulltext.html") tf.write("index.html") tf.close() return 0
def main(argv): (options, input_files) = parser.parse_args(args=argv[1:]) out = sys.stdout if len(input_files) < 1: fh = WarcRecord.open_archive(file_handle=sys.stdin, gzip=None) for record in fh: record.write_to(out, gzip=options.gzip) else: for name in input_files: fh = WarcRecord.open_archive(name, gzip="auto") for record in fh: record.write_to(out, gzip=options.gzip) fh.close() return 0
def main(argv): (options, input_files) = parser.parse_args(args=argv[1:]) out = sys.stdout if len(input_files) < 1: dump_archive(WarcRecord.open_archive(file_handle=sys.stdin, gzip=None), name="-",offsets=False) else: for name in input_files: fh = ArchiveRecord.open_archive(name, gzip="auto") dump_archive(fh,name) fh.close() return 0
def main(argv): (options, args) = parser.parse_args(args=argv[1:]) out = sys.stdout if len(args) < 1: # dump the first record on stdin with closing(WarcRecord.open_archive(file_handle=sys.stdin, gzip=None)) as fh: dump_record(fh) else: # dump a record from the filename, with optional offset filename = args[0] if len(args) > 1: offset = int(args[1]) else: offset = 0 with closing(ArchiveRecord.open_archive(filename=filename, gzip="auto")) as fh: fh.seek(offset) dump_record(fh) return 0