Exemple #1
0
def perfect_trial(key):
    """Depict steps in the computation of perfect hash for given key."""
    from ch03.perfect.generated_dictionary import G, S1, S2, hash_f
    hk1 = hash_f(key, S1)
    print('hash_f(\'{}\', S1)={}'.format(key, hk1))

    total = 0
    for idx,ch in enumerate(key):
        print(comma(S1[idx]),'*',ord(ch),end='')
        total += S1[idx]*ord(ch)
        if idx < len(key)-1: print(' + ', end='')
    print(' % {0} = {1} % {0} = {2}'.format(comma(len(G)), comma(total), comma(total % len(G))))
    print()

    hk2 = hash_f(key, S2)
    print('hash_f(\'{}\', S2)={}'.format(key, hk2))
    total = 0
    for idx,ch in enumerate(key):
        print(comma(S2[idx]),'*',ord(ch),end='')
        total += S2[idx]*ord(ch)
        if idx < len(key)-1: print(' + ', end='')
    print(' % {0} = {1} % {0} = {2}'.format(comma(len(G)), comma(total), comma(total % len(G))))

    print('G[{}] = {}'.format(comma(hk1),G[hk1]))
    print('G[{}] = {}'.format(comma(hk2),G[hk2]))
    print()
    from ch03.hashtable_open_perfect import Hashtable
    ht1 = Hashtable()
    ht1.put(key,key)
    for idx,val in enumerate(ht1.table):
        if val:
            print(val,'at index position',idx)
            return idx
    return None
Exemple #2
0
def count_unused():
    """Count unused entries in G."""
    count = 0
    for _, val in enumerate(G):
        if val == 0:
            count += 1
    print('From G which has', comma(len(G)), 'entries', comma(count),
          'of them are zero ({:.2f}%)'.format(100 * count / len(G)))
Exemple #3
0
def time_results_linked(output=True, decimals=3):
    """Average time to find a key in growing hashtable_open."""

    sizes = [8192, 16384, 32768, 65536, 131072, 262144, 524288, 1048576]
    tbl = DataTable([8] + [8] * len(sizes),
                    ['N'] + [comma(sz) for sz in sizes],
                    output=output,
                    decimals=decimals)
    # Now start with M words to be added into a table of size N.
    # Start at 1000 and work up to 2000
    words = english_words()
    for num_to_add in [32, 64, 128, 256, 512, 1024, 2048, 4096, 8192, 16384]:
        all_words = words[:num_to_add]

        line = [num_to_add]
        for size in sizes:
            time1 = min(
                timeit.repeat(stmt='''
table = Hashtable({})
for word in words:
    table.put(word, 99)'''.format(size),
                              setup='''
from ch03.hashtable_linked import Hashtable
words={}'''.format(all_words),
                              repeat=1,
                              number=100))
            line.append(1000000 * time1 / size)
        tbl.row(line)
    return tbl
Exemple #4
0
def combined_sorted(lo=8, hi=12, output=True):
    """Generate results for different sorting trials."""
    tbl = DataTable([8] * (hi - lo + 1),
                    ['N'] + [comma(2**k) for k in range(lo, hi)],
                    output=output)

    for n in [2**k for k in range(lo, hi)]:
        row = [n]
        for m in [2**k for k in range(lo, hi)]:
            row.append(run_merge_trial(m, n))
        tbl.row(row)

    # Diagonal values are for 2*M*log(M) so divide in HALF for accurate one
    # build model ONLY for first five values
    x = [2**k for k in range(lo, min(lo + 5, hi))]
    y = [
        tbl.entry(r, comma(r))
        for r in [2**k for k in range(lo, min(lo + 5, hi))]
    ]
    if numpy_error:
        a = 0
    else:
        import numpy as np
        from scipy.optimize import curve_fit
        from scipy.stats.stats import pearsonr

        (coeffs, _) = curve_fit(n_log_n_model, np.array(x), np.array(y))
        a = coeffs[0] / 2

        y_fit = [
            n_log_n_model(r, a)
            for r in [2**k for k in range(lo, min(lo + 5, hi))]
        ]

        print()
        print(pearsonr(y, y_fit))
        print()
        print('Prediction')
        model = DataTable([8] * (hi - lo + 1),
                          ['N'] + [comma(2**k) for k in range(lo, hi)],
                          output=output)
        for n in [2**k for k in range(lo, hi)]:
            row = [n]
            for m in [2**k for k in range(lo, hi)]:
                row.append(n_log_n_model(n, a) + n_log_n_model(m, a))
            model.row(row)
    return tbl
Exemple #5
0
def time_results_open_addressing(num_rows=0, output=True, decimals=3):
    """Average time to insert a key in growing hashtable_open (in microseconds)."""
    sizes = [8192, 16384, 32768, 65536, 131072, 262144, 524288, 1048576]
    headers = [comma(s) for s in sizes]
    headers.insert(0,'N')
    tbl = DataTable([8,8,8,8,8,8,8,8,10], headers, output=output, decimals=decimals)

    # Now start with M words to be added into a table of size N.
    # Start at 1000 and work up to 2000
    for num_to_add in [32, 64, 128, 256, 512, 1024, 2048, 4096, 8192, 16384, 32768]:
        all_words = english_words()[:num_to_add]

        line = [len(all_words)]
        for size in sizes:
            try:
                tbl.format(comma(size), '.3f')
                timing = min(timeit.repeat(stmt='''
table = Hashtable({})
for word in all_words:
    table.put(word, 99)'''.format(size), setup='''
from ch03.hashtable_open import Hashtable
from resources.english import english_words
all_words=english_words()[:{}]'''.format(num_to_add),repeat=1,number=100))
                timing = (100000.0 * timing) / size
            except RuntimeError:
                timing = SKIP

            line.append(timing)
        num_rows -= 1
        tbl.row(line)

        # Provide effective way to terminate early for testing.
        if num_rows == 0:
            break

    return tbl