def perfect_trial(key): """Depict steps in the computation of perfect hash for given key.""" from ch03.perfect.generated_dictionary import G, S1, S2, hash_f hk1 = hash_f(key, S1) print('hash_f(\'{}\', S1)={}'.format(key, hk1)) total = 0 for idx,ch in enumerate(key): print(comma(S1[idx]),'*',ord(ch),end='') total += S1[idx]*ord(ch) if idx < len(key)-1: print(' + ', end='') print(' % {0} = {1} % {0} = {2}'.format(comma(len(G)), comma(total), comma(total % len(G)))) print() hk2 = hash_f(key, S2) print('hash_f(\'{}\', S2)={}'.format(key, hk2)) total = 0 for idx,ch in enumerate(key): print(comma(S2[idx]),'*',ord(ch),end='') total += S2[idx]*ord(ch) if idx < len(key)-1: print(' + ', end='') print(' % {0} = {1} % {0} = {2}'.format(comma(len(G)), comma(total), comma(total % len(G)))) print('G[{}] = {}'.format(comma(hk1),G[hk1])) print('G[{}] = {}'.format(comma(hk2),G[hk2])) print() from ch03.hashtable_open_perfect import Hashtable ht1 = Hashtable() ht1.put(key,key) for idx,val in enumerate(ht1.table): if val: print(val,'at index position',idx) return idx return None
def count_unused(): """Count unused entries in G.""" count = 0 for _, val in enumerate(G): if val == 0: count += 1 print('From G which has', comma(len(G)), 'entries', comma(count), 'of them are zero ({:.2f}%)'.format(100 * count / len(G)))
def time_results_linked(output=True, decimals=3): """Average time to find a key in growing hashtable_open.""" sizes = [8192, 16384, 32768, 65536, 131072, 262144, 524288, 1048576] tbl = DataTable([8] + [8] * len(sizes), ['N'] + [comma(sz) for sz in sizes], output=output, decimals=decimals) # Now start with M words to be added into a table of size N. # Start at 1000 and work up to 2000 words = english_words() for num_to_add in [32, 64, 128, 256, 512, 1024, 2048, 4096, 8192, 16384]: all_words = words[:num_to_add] line = [num_to_add] for size in sizes: time1 = min( timeit.repeat(stmt=''' table = Hashtable({}) for word in words: table.put(word, 99)'''.format(size), setup=''' from ch03.hashtable_linked import Hashtable words={}'''.format(all_words), repeat=1, number=100)) line.append(1000000 * time1 / size) tbl.row(line) return tbl
def combined_sorted(lo=8, hi=12, output=True): """Generate results for different sorting trials.""" tbl = DataTable([8] * (hi - lo + 1), ['N'] + [comma(2**k) for k in range(lo, hi)], output=output) for n in [2**k for k in range(lo, hi)]: row = [n] for m in [2**k for k in range(lo, hi)]: row.append(run_merge_trial(m, n)) tbl.row(row) # Diagonal values are for 2*M*log(M) so divide in HALF for accurate one # build model ONLY for first five values x = [2**k for k in range(lo, min(lo + 5, hi))] y = [ tbl.entry(r, comma(r)) for r in [2**k for k in range(lo, min(lo + 5, hi))] ] if numpy_error: a = 0 else: import numpy as np from scipy.optimize import curve_fit from scipy.stats.stats import pearsonr (coeffs, _) = curve_fit(n_log_n_model, np.array(x), np.array(y)) a = coeffs[0] / 2 y_fit = [ n_log_n_model(r, a) for r in [2**k for k in range(lo, min(lo + 5, hi))] ] print() print(pearsonr(y, y_fit)) print() print('Prediction') model = DataTable([8] * (hi - lo + 1), ['N'] + [comma(2**k) for k in range(lo, hi)], output=output) for n in [2**k for k in range(lo, hi)]: row = [n] for m in [2**k for k in range(lo, hi)]: row.append(n_log_n_model(n, a) + n_log_n_model(m, a)) model.row(row) return tbl
def time_results_open_addressing(num_rows=0, output=True, decimals=3): """Average time to insert a key in growing hashtable_open (in microseconds).""" sizes = [8192, 16384, 32768, 65536, 131072, 262144, 524288, 1048576] headers = [comma(s) for s in sizes] headers.insert(0,'N') tbl = DataTable([8,8,8,8,8,8,8,8,10], headers, output=output, decimals=decimals) # Now start with M words to be added into a table of size N. # Start at 1000 and work up to 2000 for num_to_add in [32, 64, 128, 256, 512, 1024, 2048, 4096, 8192, 16384, 32768]: all_words = english_words()[:num_to_add] line = [len(all_words)] for size in sizes: try: tbl.format(comma(size), '.3f') timing = min(timeit.repeat(stmt=''' table = Hashtable({}) for word in all_words: table.put(word, 99)'''.format(size), setup=''' from ch03.hashtable_open import Hashtable from resources.english import english_words all_words=english_words()[:{}]'''.format(num_to_add),repeat=1,number=100)) timing = (100000.0 * timing) / size except RuntimeError: timing = SKIP line.append(timing) num_rows -= 1 tbl.row(line) # Provide effective way to terminate early for testing. if num_rows == 0: break return tbl