Example #1
0
def testPerformance():
    from StringIO import StringIO
    from time import time as now
    src = u"(Цой|punk|Ленин)Жив!" * 20000
    u0 = src.encode("UTF-8")
    #--------------------------
    tStart = now()
    for c in src:
        symbolToUTF8(ord(c))
    tEncode = now() - tStart
    print "Encoding", (len(src) / tEncode), "symbols/sec"
    #--------------------------
    # Test native encorder
    tStart = now()
    src.encode("UTF-8")
    tEncode = now() - tStart
    print "Encoding (native)", (len(src) / tEncode), "symbols/sec"
    #--------------------------
    tStart = now()
    s_read = StringIO(u0)
    while True:
        repr = readSymbol(s_read.read)
        if repr == None:
            break
    tDecode = now() - tStart
    print "Decoding", (len(src) / tDecode), "symbols/sec"
    #--------------------------
    tStart = now()
    s_read = StringIO(u0)
    readString(s_read, len(src))
    tDecode = now() - tStart
    print "Decoding(readString)", (len(src) / tDecode), "symbols/sec"
    #--------------------------
    #    tStart = now()
    #    s_read = StringIO(u0)
    #    readString2(s_read, len(src))
    #    tDecode = now() - tStart
    #    print "Decoding(readString2)",  (len(src) / tDecode), "symbols/sec"
    #--------------------------
    # Test native decorders
    reader = codecs.lookup("UTF-8").streamreader(StringIO(u0), 'strict')
    tStart = now()
    chunk = 4096
    while True:
        # :( If read size is > 1 this may read beyond string and break parser
        # With read size == 1 it is even slower than hand-written procedure (for non ascii chars)
        readSize = 1
        repr = reader.read(readSize, chunk)
        if len(repr) == 0:
            break
    tDecode = now() - tStart
    print "Decoding(native)", (len(src) / tDecode), "symbols/sec"
Example #2
0
def testPerformance():
    from StringIO import StringIO
    from time import time as now
    src = u"(Цой|punk|Ленин)Жив!" * 20000
    u0 = src.encode("UTF-8")
    #--------------------------
    tStart = now()
    for c in src:
        symbolToUTF8(ord(c))
    tEncode = now() - tStart
    print "Encoding",  (len(src) / tEncode), "symbols/sec"
    #--------------------------
    # Test native encorder
    tStart = now()
    src.encode("UTF-8")    
    tEncode = now() - tStart
    print "Encoding (native)",  (len(src) / tEncode), "symbols/sec"
    #--------------------------
    tStart = now()
    s_read = StringIO(u0)
    while True:
        repr = readSymbol(s_read.read)
        if repr == None:
            break 
    tDecode = now() - tStart       
    print "Decoding",  (len(src) / tDecode), "symbols/sec"    
    #--------------------------
    tStart = now()
    s_read = StringIO(u0)
    readString(s_read, len(src))     
    tDecode = now() - tStart       
    print "Decoding(readString)",  (len(src) / tDecode), "symbols/sec"
    #--------------------------
#    tStart = now()
#    s_read = StringIO(u0)
#    readString2(s_read, len(src))     
#    tDecode = now() - tStart       
#    print "Decoding(readString2)",  (len(src) / tDecode), "symbols/sec"    
    #--------------------------
    # Test native decorders
    reader = codecs.lookup("UTF-8").streamreader(StringIO(u0), 'strict')
    tStart = now()
    chunk = 4096
    while True:        
        # :( If read size is > 1 this may read beyond string and break parser
        # With read size == 1 it is even slower than hand-written procedure (for non ascii chars)
        readSize = 1 
        repr = reader.read(readSize, chunk)
        if len(repr) == 0:
            break 
    tDecode = now() - tStart       
    print "Decoding(native)",  (len(src) / tDecode), "symbols/sec"
Example #3
0
def test():
    from StringIO import StringIO
    src = u"""
    В этом нет ничего нового,
    Ибо вообще ничего нового нет.
        Николай Рерих        
    ÀùúûüýþÿĀāĂ㥹ĆćĈĉ
    $¢£¤¥₣₤₧₪₫€
    """
    #u0 = src.encode("UTF-8")
    u1 = ""
    for c in src:
        u1 += "".join(symbolToUTF8(ord(c)))
    assert u1.decode("UTF-8") == src
    s = []
    s_read = StringIO(u1)
    while True:
        repr = readSymbol(s_read.read)
        if repr == None:
            break
        s.append(unichr(repr))

    s = u"".join(s)
    assert s == src
Example #4
0
def test():    
    from StringIO import StringIO
    src = u"""
    В этом нет ничего нового,
    Ибо вообще ничего нового нет.
        Николай Рерих        
    ÀùúûüýþÿĀāĂ㥹ĆćĈĉ
    $¢£¤¥₣₤₧₪₫€
    """    
    #u0 = src.encode("UTF-8")
    u1 = ""
    for c in src:
        u1 += "".join(symbolToUTF8(ord(c)))
    assert u1.decode("UTF-8") == src
    s = []
    s_read = StringIO(u1)
    while True:
        repr = readSymbol(s_read.read)
        if repr == None:
            break
        s.append(unichr(repr))
        
    s = u"".join(s)
    assert s == src