def test(): ## This is super slow. # Try byte concatenation #with timethis("Byte concatenation +="): #msg = b"" #for chunk in make_fragments(FRAGMENT_SIZE, NUMBER_FRAGS): #msg += chunk # Try .join() with timethis("Joining a list of fragments"): msgparts = [] for chunk in make_fragments(FRAGMENT_SIZE, NUMBER_FRAGS): msgparts.append(chunk) msg = b"".join(msgparts) open('/dev/null', 'wb').write(msg) # Try bytearray.extend with timethis("Extending a bytearray"): msg = bytearray() for chunk in make_fragments(FRAGMENT_SIZE, NUMBER_FRAGS): msg.extend(chunk) open('/dev/null', 'wb').write(msg) with timethis("BytesIO"): from io import BytesIO msg = BytesIO() for chunk in make_fragments(FRAGMENT_SIZE, NUMBER_FRAGS): msg.write(chunk) open('/dev/null', 'wb').write(msg.read()) with timethis("FileIO"): from io import FileIO msg = FileIO('/dev/null', 'w') for chunk in make_fragments(FRAGMENT_SIZE, NUMBER_FRAGS): msg.write(chunk) with timethis("open, wb"): msg = open('/dev/null', 'wb') for chunk in make_fragments(FRAGMENT_SIZE, NUMBER_FRAGS): msg.write(chunk)
# iterlines.py # # Iterate over the lines of a file using native open() from timethis import timethis with timethis("Iterate over lines"): for line in open("access-log"): pass
# itercodecs.py # # Iterate over the lines of a file using codecs.open() from timethis import timethis import codecs with timethis("Iterate over lines (codecs,latin-1)"): for line in codecs.open("access-log", encoding="latin-1"): pass with timethis("Iterate over lines (native open)"): for line in open("access-log"): pass
# structpack.py # # Compare extending a bytearray with packing a bytearray in place from timethis import timethis import struct from random import random # Create a million random (x,y) points points = [(random(),random()) for n in range(1000000)] # Pack a bytearray and write it all at once with timethis("Packing a bytearray by extending"): out = bytearray() out.extend(struct.pack("I",len(points))) for p in points: out.extend(struct.pack("ff",*p)) # Pack a byte array in place with timethis("Packing a bytearray in place"): recordsize = struct.calcsize("ff") out = bytearray(4 + len(points)*recordsize) struct.pack_into("I",out,0,len(points)) offset = 4 for n,p in enumerate(points): struct.pack_into("ff",out,4+n*recordsize,*p)
# iterfile.py # # Iterate over the lines of a file using native open() from timethis import timethis with timethis("Iterate over lines"): for line in open("access-log"): pass
def timetest(): from timethis import timethis t = timethis('mz_cluster()', globals(), repeat=3, number=1) print('Time: %f s'%t)
# iterbin.py # # Iterate over the lines of a file using binary mode from timethis import timethis with timethis("Iterate over lines (binary mode)"): for line in open("access-log","rb"): pass with timethis("Iterate over lines (unbuffered binary mode)"): for line in open("access-log","rb",buffering=0): pass
from timethis import timethis FRAGMENT_SIZE = 256 NUMBER_FRAGS = 10000 # A generator that creates byte fragments for us def make_fragments(size, count): frag = b"x" * size while count > 0: yield frag count -= 1 # Try byte concatenation with timethis("Byte concatenation +="): msg = b"" for chunk in make_fragments(FRAGMENT_SIZE, NUMBER_FRAGS): msg += chunk # Try .join() with timethis("Joining a list of fragments"): msgparts = [] for chunk in make_fragments(FRAGMENT_SIZE, NUMBER_FRAGS): msgparts.append(chunk) msg = b"".join(msgparts) # Try bytearray.extend with timethis("Extending a bytearray"): msg = bytearray() for chunk in make_fragments(FRAGMENT_SIZE, NUMBER_FRAGS):
text = 'x'*100000000 binary = b'x'*100000000 from timethis import timethis for n in range(10): with timethis("Writing text"): open("big.txt","w").write(text) with timethis("Writing binary"): open("big.txt","wb").write(binary) import os os.remove('big.txt')
# iterenc.py # # Iterate over the lines of a file using three different encodings from timethis import timethis import codecs with timethis("Iterate over lines (UTF-8)"): for line in open("access-log",encoding='utf-8'): pass with timethis("Iterate over lines (ASCII)"): for line in open("access-log",encoding='ascii'): pass with timethis("Iterate over lines (Latin-1)"): for line in open("access-log",encoding='latin-1'): pass
# find404.py # # Find set of all URLs with a 404 error from timethis import timethis with timethis("Find 404 urls - text"): error_404_urls = set() for line in open("access-log"): fields = line.split() if fields[-2] == '404': error_404_urls.add(fields[-4]) for name in error_404_urls: print(name) with timethis("Find 404 urls - binary"): error_404_urls = set() for line in open("access-log","rb"): fields = line.split() if fields[-2] == b'404': error_404_urls.add(fields[-4]) error_404_urls = { n.decode('latin-1') for n in error_404_urls } for name in error_404_urls: print(name)
# itercodecs.py # # Iterate over the lines of a file using codecs.open() from timethis import timethis import codecs with timethis("Iterate over lines (codecs,latin-1)"): for line in codecs.open("access-log",encoding="latin-1"): pass with timethis("Iterate over lines (native open)"): for line in open("access-log"): pass
# find404.py # # Find set of all URLs with a 404 error from timethis import timethis with timethis("Find 404 urls - text"): error_404_urls = set() for line in open("access-log"): fields = line.split() if fields[-2] == '404': error_404_urls.add(fields[-4]) for name in error_404_urls: print(name) with timethis("Find 404 urls - binary"): error_404_urls = set() for line in open("access-log", "rb"): fields = line.split() if fields[-2] == b'404': error_404_urls.add(fields[-4]) error_404_urls = {n.decode('latin-1') for n in error_404_urls} for name in error_404_urls: print(name)
# readall.py # # Read a text file all at once from timethis import timethis with timethis("Read a text file"): data = open("big.txt").read() with timethis("Read a binary file"): data = open("big.txt","rb").read()
# iterbin.py # # Iterate over the lines of a file using binary mode from timethis import timethis with timethis("Iterate over lines (binary mode)"): for line in open("access-log", "rb"): pass with timethis("Iterate over lines (unbuffered binary mode)"): for line in open("access-log", "rb", buffering=0): pass
# iterenc.py # # Iterate over the lines of a file using three different encodings from timethis import timethis import codecs with timethis("Iterate over lines (UTF-8)"): for line in open("access-log", encoding='utf-8'): pass with timethis("Iterate over lines (ASCII)"): for line in open("access-log", encoding='ascii'): pass with timethis("Iterate over lines (Latin-1)"): for line in open("access-log", encoding='latin-1'): pass
lines = open("access-log").readlines() binlines = open("access-log","rb").readlines() from timethis import timethis for n in range(10): with timethis("Write using writelines()"): open("hugelog.txt","wt").writelines(lines) with timethis("Write binary using writelines()"): open("hugelog.txt","wb").writelines(binlines) import os os.remove("hugelog.txt")