Beispiel #1
0
def perfect_trial(key):
    """Depict steps in the computation of perfect hash for given key."""
    from ch03.perfect.generated_dictionary import G, S1, S2, hash_f
    hk1 = hash_f(key, S1)
    print('hash_f(\'{}\', S1)={}'.format(key, hk1))

    total = 0
    for idx,ch in enumerate(key):
        print(comma(S1[idx]),'*',ord(ch),end='')
        total += S1[idx]*ord(ch)
        if idx < len(key)-1: print(' + ', end='')
    print(' % {0} = {1} % {0} = {2}'.format(comma(len(G)), comma(total), comma(total % len(G))))
    print()

    hk2 = hash_f(key, S2)
    print('hash_f(\'{}\', S2)={}'.format(key, hk2))
    total = 0
    for idx,ch in enumerate(key):
        print(comma(S2[idx]),'*',ord(ch),end='')
        total += S2[idx]*ord(ch)
        if idx < len(key)-1: print(' + ', end='')
    print(' % {0} = {1} % {0} = {2}'.format(comma(len(G)), comma(total), comma(total % len(G))))

    print('G[{}] = {}'.format(comma(hk1),G[hk1]))
    print('G[{}] = {}'.format(comma(hk2),G[hk2]))
    print()
    from ch03.hashtable_open_perfect import Hashtable
    ht1 = Hashtable()
    ht1.put(key,key)
    for idx,val in enumerate(ht1.table):
        if val:
            print(val,'at index position',idx)
            return idx
    return None
Beispiel #2
0
def sample_separate_chaining_hashtable():
    """Generate Figure output."""
    from ch03.hashtable_linked import Hashtable
    ht = Hashtable(7)
    vals = [20, 15, 5, 26, 19]
    for i,v in enumerate(vals):
        ht.put(v, 'e{}'.format(i))
        print('        {:2d} % {:2d} = {:2d}'.format(vals[i], ht.M, vals[i] % ht.M),
              readable_linked_list_table(ht))
Beispiel #3
0
def sample_hashtable():
    """Generate Figure output."""
    from ch03.hashtable_open import Hashtable
    ht = Hashtable(7)
    vals = [20, 15, 5, 26, 19]
    for i,v in enumerate(vals):
        ht.put(v, 'e{}'.format(i))
        print(readable_table(ht),
              '        {:2d} % {:2d} = {:2d}'.format(vals[i], ht.M, vals[i] % ht.M))
Beispiel #4
0
def prime_number_difference(words, output=True, decimals=2):
    """Identify sensitivity of M to being prime or not."""

    from ch03.hashtable_linked import Hashtable as Linked_Hashtable, stats_linked_lists
    from ch03.hashtable_open import Hashtable as Open_Hashtable, stats_open_addressing
    from ch03.base26 import base26

    # these are prime numbers between 428880 and 428980
    lo = 428880
    primes = [428899, 428951, 428957, 428977]
    hi = 428980

    keys = [base26(w) for w in words]
    tbl = DataTable([12, 6, 8, 8, 8, 8],
                    ['M', 'Prime', 'Avg. LL', 'Max LL', 'Avg. OA', 'Max OA'],
                    output=output,
                    decimals=decimals)
    tbl.format('Prime', 's')
    tbl.format('Max LL', 'd')
    tbl.format('Max OA', 'd')
    worst = 0
    worst_m = 0
    for m in range(lo, hi + 1):
        is_p = 'Prime' if m in primes else ''
        ht_linked = Linked_Hashtable(m)
        ht_open = Open_Hashtable(m)

        for k in keys:
            ht_linked.put(k, 1)
            ht_open.put(k, 1)

        (avg_length_linked, max_length_linked) = stats_linked_lists(ht_linked)
        if max_length_linked > worst:
            worst_m = m
            worst = max_length_linked
        (avg_length_open, max_length_open) = stats_open_addressing(ht_open)
        tbl.row([
            m, is_p, avg_length_linked, max_length_linked, avg_length_open,
            max_length_open
        ])

    # Now try to find any more that exceed this maximum amount
    if output:
        print('Worst was {} for M={}'.format(worst, worst_m))
        for m in range(worst_m, worst_m + 10000, 13):
            ht_linked = Linked_Hashtable(m)

            (avg_length_linked,
             max_length_linked) = stats_linked_lists(ht_linked, False)
            if max_length_linked > worst:
                worst_m = m
                worst = max_length_linked
                print('Worst of {} for M={}'.format(worst, worst_m))
        print('Done')

    return tbl
Beispiel #5
0
def iteration_order(output=True):
    """Generate iteration orders for multiple hashtable types."""

    s = 'a rose by any other name would smell as sweet'
    from ch03.hashtable_open import Hashtable as Open_Hashtable
    from ch03.hashtable_linked import Hashtable as Linked_Hashtable
    from ch03.hashtable_open_perfect import Hashtable as Perfect_Hashtable
    ht_oa = Open_Hashtable(13)
    ht_ll = Linked_Hashtable(13)
    ht_ph = Perfect_Hashtable()

    for w in s.split():
        ht_oa.put(w, w)
        ht_ll.put(w, w)
        ht_ph.put(w, w)

    tbl = DataTable([8,8,8], ['Open Addressing', 'Separate Chaining', 'Perfect Hash'],
                    output=output)
    tbl.format('Open Addressing', 's')
    tbl.format('Separate Chaining', 's')
    tbl.format('Perfect Hash', 's')
    for p_oa,p_ll,p_ph in zip(ht_oa, ht_ll, ht_ph):
        tbl.row([p_oa[0], p_ll[0], p_ph[0]])
    return tbl
Beispiel #6
0
def count_collisions(num_rows=0, output=True, decimals=1):
    """Generate table counting collisions."""
    all_words = english_words()
    N = len(all_words)

    from ch03.hashtable_linked import Hashtable as HL
    from ch03.hashtable_linked import stats_linked_lists
    from ch03.hashtable_open import Hashtable as OHL
    from ch03.hashtable_open import stats_open_addressing

    tbl = DataTable([10,8,8,8,8], ['M', 'Avg LL', 'Max LL', 'Avg OA', 'Max OA'],
                    output=output, decimals=decimals)
    tbl.format('Max LL', 'd')
    tbl.format('Max OA', 'd')

    M = 20*N
    hl = HL(M)
    ohl = OHL(M)
    for w in all_words:
        hl.put(w, 1)
        ohl.put(w, 1)
    avg_size_linked = stats_linked_lists(hl)
    avg_size_open = stats_open_addressing(ohl)
    tbl.row([M, avg_size_linked[0], avg_size_linked[1], avg_size_open[0], avg_size_open[1]])

    M = 2*N
    while M > N/16:
        hl = HL(M)
        ohl = OHL(M)
        for w in all_words:
            hl.put(w, 1)
            if M > N:               # otherwise, will fail...
                ohl.put(w, 1)
        avg_size_linked = stats_linked_lists(hl)

        if N < M:
            avg_size_open = stats_open_addressing(ohl)
        else:
            tbl.format('Avg OA', 's')
            tbl.format('Max OA', 's')
            avg_size_open = [SKIP, SKIP]

        num_rows -= 1
        tbl.row([M, avg_size_linked[0], avg_size_linked[1], avg_size_open[0], avg_size_open[1]])

        # Once below threshold, go down at 60% clip
        if M > N:
            M = (M * 95) // 100
        else:
            M = (M * 6) // 10

        # To allow for testing, simple way to break out after a number of rows are generated.
        if num_rows == 0:
            break
    return tbl