def search_trials(): """ For randomly constructed NxN mazes, compute efficiency of searching strategies on 512 random mazes, as N grows from 4x4 to 128x128 """ import random from ch07.maze import to_networkx, distance_to tbl = DataTable([8,8,8,8],['N', 'BFS', 'DS', 'GS'], decimals=2) for N in [4, 8, 16, 32, 64, 128]: num_bfs = 0 num_dfs = 0 num_gs = 0 for i in range(512): random.seed(i) m = Maze(N,N) G = to_networkx(m) num_bfs += annotated_bfs_search(G, m.start(), m.end()) num_dfs += annotated_dfs_search(G, m.start(), m.end()) num_gs += annotated_guided_search(G, m.start(), m.end(), distance_to) tbl.row([N, num_bfs/512, num_dfs/512, num_gs/512]) tbl = DataTable([8,8,8,8],['N', 'BFS', 'DS', 'GS'], decimals=2) for N in [4, 8, 16, 32, 64, 128]: m = maze_to_defeat_guided_search(N) G = to_networkx(m) num_bfs = annotated_bfs_search(G, m.start(), m.end()) num_dfs = annotated_dfs_search(G, m.start(), m.end()) num_gs = annotated_guided_search(G, m.start(), m.end(), distance_to) tbl.row([N, num_bfs, num_dfs, num_gs])
def debug_state(title, G, node_from, dist_to, output=True): """Useful to show state of all pairs shortest path.""" from algs.table import DataTable print('debug :', title) labels = list(G.nodes()) tbl = DataTable([6] + [6]*len(labels), ['.'] + labels, output=output) tbl.format('.','s') for field in labels: tbl.format(field, 's') for u in labels: row = [u] for v in labels: if node_from[u][v]: row.append(node_from[u][v]) else: row.append('.') tbl.row(row) print() tbl_dist_to = DataTable([6] + [6]*len(labels), ['.'] + labels, output=output, decimals=1) tbl_dist_to.format('.','s') for u in labels: row = [u] for v in labels: if u == v: row.append(0) else: row.append(dist_to[u][v]) tbl_dist_to.row(row) print() return (tbl, tbl_dist_to)
def trial_multiple_rotations(output=True, num_attempts=10000): """Some trial and error went into these ranges.""" from ch05.challenge import fib tbl = DataTable([6, 6, 6, 6], ['NumRot', 'Height', 'N', 'Random Tree'], output=output) tbl.format('Random Tree', 's') tbl.format('NumRot', 'd') tbl.format('Height', 'd') tbl.format('N', 'd') for extra in range(3): (structure, _) = find_multiple_rotations(extra, lo=4, hi=40, num_attempts=num_attempts, output=False) n = recreate_tree(structure) def count_nodes(n): if n is None: return 0 return 1 + count_nodes(n.left) + count_nodes(n.right) tbl.row([extra + 1, n.height, count_nodes(n), structure]) # Now use Fibonacci Trees to accomplish the same result. if output: print() tbl = DataTable([6, 6, 6, 13], ['NumRot', 'Height', 'N', 'Fib AVL Trees'], output=output) tbl.format('Fib AVL Trees', 's') tbl.format('NumRot', 'd') tbl.format('Height', 'd') tbl.format('N', 'd') for n in range(6, 14, 2): root = fibonacci_avl(n) root.compute_height() check_avl_property(root) # double-check structure = tree_structure(root) bt = ObservableBinaryTree() height = root.height bt.root = root count = count_nodes(root) num_rotations = rotations[0] to_delete = fib(n + 1) - 1 bt.remove(to_delete) check_avl_property(bt.root) num_rotations = rotations[0] - num_rotations tbl.row([num_rotations, height, count, structure]) return tbl
def count_hash_incremental_move(output=True, decimals=4): """ For all English words, starting with a hashtable of size 1,024 and a load factor of 0.75, count how many times the hash code (i.e., %) is invoked. """ from ch03.book import CountableHash from ch03.hashtable_linked import DynamicHashtable print( 'Each emitted row contains an operation more costly than any before...' ) ht_dynamic = DynamicHashtable(1023) tbl = DataTable([20, 10, 10], ['Word', 'N', 'cost'], output=output, decimals=decimals) tbl.format('Word', 's') tbl.format('N', ',d') max_cost = 0 now = time.time() for w in english_words(): before = time.time() ht_dynamic.put(CountableHash(w), w) cost = time.time() - before if cost > max_cost: max_cost = cost tbl.row([w, ht_dynamic.N, cost]) total_normal = time.time() - now print('Normal:{}'.format(total_normal)) for delta in [512, 256, 128, 64, 32, 16, 8, 4]: ht = DynamicHashtableIncrementalResizing(1023, delta=delta) tbl = DataTable([20, 10, 10], ['Word', 'N', 'cost'], output=output, decimals=decimals) tbl.format('Word', 's') tbl.format('N', ',d') max_cost = 0 now = time.time() for w in english_words(): before = time.time() ht.put(CountableHash(w), w) cost = time.time() - before if cost > max_cost: max_cost = cost tbl.row([w, ht.N, cost]) total_delta = time.time() - now print('delta={}, Normal:{}'.format(delta, total_delta))
def actual_table(output=True): """Produce sample table to use for curve fitting.""" # Sample data xvals = [100, 1000, 10000] yvals = [0.063, 0.565, 5.946] # Coefficients are returned as first argument if numpy_error: a, b = 0, 0 else: import numpy as np from scipy.optimize import curve_fit [(a, b), _] = curve_fit(linear_model, np.array(xvals), np.array(yvals)) if output: print('Linear = {}*N + {}'.format(a, b)) [(qa, qb), _] = curve_fit(quadratic_model, np.array(xvals), np.array(yvals)) if output: print('Quadratic = {}*N*N + {}*N'.format(qa, qb)) [(na), _] = curve_fit(n_log_n_model, np.array(xvals), np.array(yvals)) if output: print('N Log N = {}*N*log N'.format(na)) tbl = DataTable([8, 8, 8], ['N', 'Actual', 'Model'], output=output) tbl.row([100, 0.063, linear_model(100, a, b)]) tbl.row([1000, 0.565, linear_model(1000, a, b)]) tbl.row([10000, 5.946, linear_model(10000, a, b)]) print(tbl.pearsonr('Actual', 'Model')) return tbl
def trial_merge_sort_python_style(max_k=15, output=True, decimals=3): """Empirical trial for merge sort using slicing.""" tbl = DataTable([8, 8, 8], ['N', 'merge', 'mergeSlice'], output=output, decimals=decimals) for n in [2**k for k in range(8, max_k)]: m_slice = 1000 * min( timeit.repeat(stmt='slice_merge_sort(A)', setup=''' import random from ch05.challenge import slice_merge_sort A=list(range({})) random.shuffle(A)'''.format(n), repeat=10, number=10)) m_merge = 1000 * min( timeit.repeat(stmt='merge_sort(A)', setup=''' import random from ch05.merge import merge_sort A=list(range({})) random.shuffle(A)'''.format(n), repeat=10, number=10)) tbl.row([n, m_merge, m_slice]) return tbl
def insertion_sort_bas(max_k=18, output=True, decimals=3): """Generate Table for Insertion Sort.""" # Evaluate prototype execution x = [] y = [] for n in [2**k for k in range(8, 12)]: m_insert_bas = 1000 * min( timeit.repeat(stmt='insertion_sort_bas(A)', setup=''' import random from ch05.sorting import insertion_sort_bas A=list(range({})) random.shuffle(A)'''.format(n), repeat=10, number=10)) x.append(n) y.append(m_insert_bas) # Coefficients are returned as first argument if numpy_error: log_coeffs = quadratic_coeffs = [0, 0] else: import numpy as np from scipy.optimize import curve_fit [log_coeffs, _] = curve_fit(n_log_n_model, np.array(x), np.array(y)) [quadratic_coeffs, _] = curve_fit(quadratic_model, np.array(x), np.array(y)) if output: print('Quadratic = {}*N*N + {}*N'.format(quadratic_coeffs[0], quadratic_coeffs[1])) print('Log = {:.12f}*N*log2(N)'.format(log_coeffs[0])) print() tbl = DataTable([12, 10, 10, 10], ['N', 'Time', 'Quad', 'Log'], output=output, decimals=decimals) for n, p in zip(x, y): tbl.row([ n, p, quadratic_model(n, quadratic_coeffs[0], quadratic_coeffs[1]), n_log_n_model(n, log_coeffs[0]) ]) for n in [2**k for k in range(12, max_k)]: m_insert_bas = 1000 * min( timeit.repeat(stmt='insertion_sort_bas(A)', setup=''' import random from ch05.sorting import insertion_sort_bas A=list(range({})) random.shuffle(A)'''.format(n), repeat=10, number=10)) tbl.row([ n, m_insert_bas, quadratic_model(n, quadratic_coeffs[0], quadratic_coeffs[1]), n_log_n_model(n, log_coeffs[0]) ]) return tbl
def worst_heights(max_n=40, output=True): """ Generate random AVL trees of n Nodes to find which ones have greatest height. Purely speculative and not definitive exploration of potential trees. """ from ch06.balanced import BinaryTree tbl = DataTable([8, 8, 8], ['N', 'WorstHeight', 'NumberFound'], output=output) tbl.format('WorstHeight', 'd') tbl.format('NumberFound', ',d') table_max_height = -1 for n in range(1, max_n): number_found = 0 max_height = -1 for _ in range(10001): avl = BinaryTree() for _ in range(n): avl.insert(random.random()) if avl.root.height > max_height: max_height = avl.root.height number_found = 0 elif avl.root.height == max_height: number_found += 1 if max_height > table_max_height: tbl.row([n, max_height, number_found]) table_max_height = max_height return tbl
def table_trials(max_k=15, output=True, decimals=3): """Compare Merge Sort against built in Python sort up to, but not including 2**max_k.""" tbl = DataTable([8, 10, 10], ['N', 'MergeSort', 'Built-In Sort'], output=output, decimals=decimals) for n in [2**k for k in range(8, max_k)]: msort = 1000 * min( timeit.repeat(stmt='merge_sort(x)', setup=''' import random from ch05.merge import merge_sort x=list(range({})) random.shuffle(x)'''.format(n), repeat=20, number=15)) / 15 builtin = 1000 * min( timeit.repeat(stmt='x.sort()', setup=''' import random x=list(range({})) random.shuffle(x)'''.format(n), repeat=20, number=15)) / 15 tbl.row([n, msort, builtin]) return tbl
def run_access_trials(max_trials=100000, output=True, decimals=5): """Generate performance table for up to max_trials number of runs.""" tbl = DataTable([10, 10, 10], ['Dict', 'Raw', 'BAS'], output=output, decimals=decimals) tbl.format('Dict', 'f') m1 = min( timeit.repeat(stmt='days_in_month[s_data[2]]', setup='from ch03.months import s_data, days_in_month', repeat=10, number=max_trials)) m2 = min( timeit.repeat(stmt='days_mixed(s_data[2])', setup='from ch03.months import s_data, days_mixed', repeat=10, number=max_trials)) m3 = min( timeit.repeat(stmt='days_bas(s_data[2])', setup='from ch03.months import s_data, days_bas', repeat=10, number=max_trials)) tbl.row([m1, m2, m3]) return tbl
def time_results_linked(output=True, decimals=3): """Average time to find a key in growing hashtable_open.""" sizes = [8192, 16384, 32768, 65536, 131072, 262144, 524288, 1048576] tbl = DataTable([8] + [8] * len(sizes), ['N'] + [comma(sz) for sz in sizes], output=output, decimals=decimals) # Now start with M words to be added into a table of size N. # Start at 1000 and work up to 2000 words = english_words() for num_to_add in [32, 64, 128, 256, 512, 1024, 2048, 4096, 8192, 16384]: all_words = words[:num_to_add] line = [num_to_add] for size in sizes: time1 = min( timeit.repeat(stmt=''' table = Hashtable({}) for word in words: table.put(word, 99)'''.format(size), setup=''' from ch03.hashtable_linked import Hashtable words={}'''.format(all_words), repeat=1, number=100)) line.append(1000000 * time1 / size) tbl.row(line) return tbl
def algorithms_x_y(): """Generate table for estimates of time for three computers and two algorithms.""" def alg_x(n): """Number of operations for algorithm X.""" return 5 * n def alg_y(n): """Number of operations for algorithm Y.""" return 2020 * math.log(n) / math.log(2) tbl = DataTable([15, 15, 8, 8, 8, 8, 8], ['N', 'X', 'Y', 'X_slow', 'X_fast', 'Y_fast', 'X_fastest'], decimals=1) tbl.format('X', ',d') tbl.format('Y', ',d') for n in [2**k for k in range(2, 24)]: tbl.row([ n, alg_x(n), int(alg_y(n)), alg_x(n) / 1500, alg_x(n) / 3000, alg_y(n) / 1500, alg_x(n) / (250 * 3000) ]) return tbl
def another_fragment_counting(max_k=20, output=True): """Generate table for counts of fragments up to (but including) 2**max_k.""" if numpy_error: a = 0, 0 else: import numpy as np from scipy.optimize import curve_fit def log_log_model(n, a): """Formula for A*Log_2(Log_2(N)) with single coefficient.""" logn = np.log2(n) return a * np.log2(logn) # Train Model trials = [2**k for k in range(5, 15)] nvals = [] yvals = [] for N in trials: nvals.append(N) yvals.append(f4(N)) [a, _] = curve_fit(log_log_model, np.array(nvals), np.array(yvals)) if output: print('LOG_LOG_MODEL = {}*log(log(N))'.format(a)) trials = [2**k for k in range(5, max_k)] tbl = DataTable([8, 8, 8], ['N', 'F4', 'Model'], output=output) tbl.format('F4', 'd') for N in trials: tbl.row([N, f4(N), a[0] * math.log2(math.log2(N))]) return tbl
def run_median_less_than_trial(max_k=20, output=True): """Use RecordedItem to count # of times Less-than invoked up to (but not including) max_k=20.""" tbl = DataTable([10, 15, 15], ['N', 'median_count', 'sort_median_count'], output=output) tbl.format('median_count', ',d') tbl.format('sort_median_count', ',d') trials = [2**k + 1 for k in range(8, max_k)] for n in trials: A = list([RecordedItem(i) for i in range(n)]) random.shuffle(A) # Generated external sorted to reuse list RecordedItem.clear() med2 = sorted(A)[n // 2] sort_lt = RecordedItem.report()[1] RecordedItem.clear() med1 = linear_median(A) lin_lt = RecordedItem.report()[1] assert med1 == med2 tbl.row([n, lin_lt, sort_lt]) return tbl
def performance_different_approaches(output=True): """Produce results on # less-than for different algorithms and data sets.""" headers = ['Algorithm', 'Ascending', 'Descending', 'Alternating'] n = 524288 tbl = DataTable([15, 10, 10, 10], headers, output=output) for hdr in headers: tbl.format(hdr, ',d') tbl.format('Algorithm', 's') # Ascending / Descending / Weave from ch01.largest_two import largest_two, sorting_two, double_two, mutable_two, tournament_two funcs = [largest_two, sorting_two, double_two, mutable_two, tournament_two] algs = [ 'largest_two', 'sorting_two', 'double_two', 'mutable_two', 'tournament_two' ] for label, func in zip(algs, funcs): RecordedItem.clear() func([RecordedItem(i) for i in range(n)]) up_count = sum(RecordedItem.report()) RecordedItem.clear() func([RecordedItem(i) for i in range(n, 0, -1)]) down_count = sum(RecordedItem.report()) RecordedItem.clear() up_down = zip(range(0, n, 2), range(n - 1, 0, -2)) func([RecordedItem(i) for i in itertools.chain(*up_down)]) weave_count = sum(RecordedItem.report()) tbl.row([label, up_count, down_count, weave_count]) return tbl
def visualize_results_floyd_warshall(DG, output=True): """Output the node_from and dist_to arrays for floyd-warshall after completion.""" from ch07.all_pairs_sp import all_pairs_path_to (dist_to, node_from) = floyd_warshall(DG) if output: output_node_from_floyd_warshall(DG, node_from) print() output_dist_to_floyd_warshall(DG, dist_to) print() tbl_path = DataTable([20] * DG.number_of_nodes(), list(DG.nodes()), output=output) for n in DG.nodes(): tbl_path.format(n, 's') for n in DG.nodes(): row = [] for v in DG.nodes(): if n == v: row.append(SKIP) else: if node_from[n][v]: nodes = all_pairs_path_to(node_from, n, v) row.append(' -> '.join(nodes)) else: row.append(SKIP) tbl_path.row(row) if output: print()
def just_compare_sort_tournament_two(max_k=25, output=True, decimals=2): """Very large data sets to investigate whether crossover occurs (no it does not).""" tbl = DataTable([15, 10, 15], ['N', 'sorting_two', 'tournament_two'], output=output, decimals=decimals) trials = [2**k for k in range(10, max_k)] num = 5 for n in trials: m_tt = timeit.timeit(stmt='random.shuffle(x)\ntournament_two(x)', setup=''' import random from ch01.largest_two import tournament_two x=list(range({}))'''.format(n), number=num) m_st = timeit.timeit(stmt='random.shuffle(x)\nsorting_two(x)', setup=''' import random from ch01.largest_two import sorting_two x=list(range({}))'''.format(n), number=num) tbl.row([n, m_st, m_tt]) if output: print() for header in tbl.labels[1:]: print(header, tbl.best_model(header)) return tbl
def run_init_trial(output=True): """First Table in chapter 1.""" n = 100 tbl = DataTable([12, 12, 12], ['N', 'Ascending', 'Descending'], output=output, decimals=3) while n <= 1000000: # 1 up to but not including N m_up = 1000 * min( timeit.repeat(stmt='native_largest(up)', setup=''' from ch01.largest import native_largest up = list(range(1,{}+1))'''.format(n), repeat=10, number=50)) / 50 # N down to but not including 0 m_down = 1000 * min( timeit.repeat(stmt='native_largest(down)', setup=''' from ch01.largest import native_largest down = list(range({}, 0, -1))'''.format(n), repeat=10, number=50)) / 50 tbl.row([n, m_up, m_down]) n *= 10 return tbl
def run_range_analysis(output=True): """Confirm O(log N) algorithm to find range of duplicates.""" tbl = DataTable([8, 8, 8], ['N', 'O(N)', 'O(log N)'], decimals=7, output=output) commands = ''' from random import random tgt = random() alist = [tgt] * {0} for _ in range({0}-{1}): alist.append(random()) alist = sorted(alist) ''' for n in [2**k for k in range(10, 20)]: custom = commands.format(n, n // 16) best_times = min( timeit.repeat(stmt='best_range(alist, tgt)', setup=''' from ch02.challenge import best_range {}'''.format(custom), repeat=40, number=50)) / 50 worst_times = min( timeit.repeat(stmt='worst_range(alist, tgt)', setup=''' from ch02.challenge import worst_range {}'''.format(custom), repeat=40, number=50)) / 50 tbl.row([n, worst_times, best_times])
def compare_time(words, output=True, decimals=4): """Generate table of performance differences with linked hashtable and perfect hashing.""" tbl = DataTable([8, 8, 8], ['N', 'Linked', 'Perfect'], output=output, decimals=decimals) t_perfect = min( timeit.repeat(stmt=''' ht = HL() for w in words: ht.put(w,w)''', setup=''' from ch03.hashtable_open_perfect import Hashtable as HL words={}'''.format(words), repeat=3, number=5)) / 5 t_linked = min( timeit.repeat(stmt=''' ht = HL(len(words)) for w in words: ht.put(w,w)''', setup=''' from ch03.hashtable_linked import Hashtable as HL words={}'''.format(words), repeat=3, number=5)) / 5 tbl.row([len(words), t_linked, t_perfect]) return tbl
def run_median_trial(): """Generate table for Median Trial.""" tbl = DataTable([10, 15, 15], ['N', 'median_time', 'sort_median']) trials = [2**k + 1 for k in range(8, 20)] for n in trials: t_med = 1000 * min( timeit.repeat(stmt='assert(linear_median(a) == {}//2)'.format(n), setup=''' import random from ch01.challenge import linear_median a = list(range({})) random.shuffle(a) '''.format(n), repeat=10, number=5)) / 5 t_sort = 1000 * min( timeit.repeat(stmt='assert(sorted(a)[{0}//2] == {0}//2)'.format(n), setup=''' import random from ch01.challenge import linear_median a = list(range({})) random.shuffle(a) '''.format(n), repeat=10, number=5)) / 5 tbl.row([n, t_med, t_sort]) return tbl
def time_results_open(words, output=True, decimals=4): """Average time to find a key in growing hashtable_open.""" sizes = [8192, 16384, 32768, 65536, 131072, 262144, 524288, 1048576] widths = [8] + [10] * len(sizes) headers = ['N'] + sizes tbl = DataTable(widths, headers, output=output, decimals=decimals) # Now start with N words to be added into a table of size M. # Start at 1000 and work up to 2000 for num_to_add in [32, 64, 128, 256, 512, 1024, 2048, 4096, 8192, 16384]: all_words = words[:num_to_add] arow = [num_to_add] for size in sizes: if num_to_add < size: m1 = min( timeit.repeat(stmt=''' table = Hashtable({}) for word in words: table.put(word, 99)'''.format(size), setup=''' from ch03.hashtable_open import Hashtable words={}'''.format(all_words), repeat=1, number=100)) arow.append((100000.0 * m1) / size) else: arow.append(SKIP) tbl.row(arow) return tbl
def combined_sorted(lo=8, hi=12, output=True): """Generate results for different sorting trials.""" tbl = DataTable([8] * (hi - lo + 1), ['N'] + [comma(2**k) for k in range(lo, hi)], output=output) for n in [2**k for k in range(lo, hi)]: row = [n] for m in [2**k for k in range(lo, hi)]: row.append(run_merge_trial(m, n)) tbl.row(row) # Diagonal values are for 2*M*log(M) so divide in HALF for accurate one # build model ONLY for first five values x = [2**k for k in range(lo, min(lo + 5, hi))] y = [ tbl.entry(r, comma(r)) for r in [2**k for k in range(lo, min(lo + 5, hi))] ] if numpy_error: a = 0 else: import numpy as np from scipy.optimize import curve_fit from scipy.stats.stats import pearsonr (coeffs, _) = curve_fit(n_log_n_model, np.array(x), np.array(y)) a = coeffs[0] / 2 y_fit = [ n_log_n_model(r, a) for r in [2**k for k in range(lo, min(lo + 5, hi))] ] print() print(pearsonr(y, y_fit)) print() print('Prediction') model = DataTable([8] * (hi - lo + 1), ['N'] + [comma(2**k) for k in range(lo, hi)], output=output) for n in [2**k for k in range(lo, hi)]: row = [n] for m in [2**k for k in range(lo, hi)]: row.append(n_log_n_model(n, a) + n_log_n_model(m, a)) model.row(row) return tbl
def prime_number_difference(words, output=True, decimals=2): """Identify sensitivity of M to being prime or not.""" from ch03.hashtable_linked import Hashtable as Linked_Hashtable, stats_linked_lists from ch03.hashtable_open import Hashtable as Open_Hashtable, stats_open_addressing from ch03.base26 import base26 # these are prime numbers between 428880 and 428980 lo = 428880 primes = [428899, 428951, 428957, 428977] hi = 428980 keys = [base26(w) for w in words] tbl = DataTable([12, 6, 8, 8, 8, 8], ['M', 'Prime', 'Avg. LL', 'Max LL', 'Avg. OA', 'Max OA'], output=output, decimals=decimals) tbl.format('Prime', 's') tbl.format('Max LL', 'd') tbl.format('Max OA', 'd') worst = 0 worst_m = 0 for m in range(lo, hi + 1): is_p = 'Prime' if m in primes else '' ht_linked = Linked_Hashtable(m) ht_open = Open_Hashtable(m) for k in keys: ht_linked.put(k, 1) ht_open.put(k, 1) (avg_length_linked, max_length_linked) = stats_linked_lists(ht_linked) if max_length_linked > worst: worst_m = m worst = max_length_linked (avg_length_open, max_length_open) = stats_open_addressing(ht_open) tbl.row([ m, is_p, avg_length_linked, max_length_linked, avg_length_open, max_length_open ]) # Now try to find any more that exceed this maximum amount if output: print('Worst was {} for M={}'.format(worst, worst_m)) for m in range(worst_m, worst_m + 10000, 13): ht_linked = Linked_Hashtable(m) (avg_length_linked, max_length_linked) = stats_linked_lists(ht_linked, False) if max_length_linked > worst: worst_m = m worst = max_length_linked print('Worst of {} for M={}'.format(worst, worst_m)) print('Done') return tbl
def count_collisions(num_rows=0, output=True, decimals=1): """Generate table counting collisions.""" all_words = english_words() N = len(all_words) from ch03.hashtable_linked import Hashtable as HL from ch03.hashtable_linked import stats_linked_lists from ch03.hashtable_open import Hashtable as OHL from ch03.hashtable_open import stats_open_addressing tbl = DataTable([10,8,8,8,8], ['M', 'Avg LL', 'Max LL', 'Avg OA', 'Max OA'], output=output, decimals=decimals) tbl.format('Max LL', 'd') tbl.format('Max OA', 'd') M = 20*N hl = HL(M) ohl = OHL(M) for w in all_words: hl.put(w, 1) ohl.put(w, 1) avg_size_linked = stats_linked_lists(hl) avg_size_open = stats_open_addressing(ohl) tbl.row([M, avg_size_linked[0], avg_size_linked[1], avg_size_open[0], avg_size_open[1]]) M = 2*N while M > N/16: hl = HL(M) ohl = OHL(M) for w in all_words: hl.put(w, 1) if M > N: # otherwise, will fail... ohl.put(w, 1) avg_size_linked = stats_linked_lists(hl) if N < M: avg_size_open = stats_open_addressing(ohl) else: tbl.format('Avg OA', 's') tbl.format('Max OA', 's') avg_size_open = [SKIP, SKIP] num_rows -= 1 tbl.row([M, avg_size_linked[0], avg_size_linked[1], avg_size_open[0], avg_size_open[1]]) # Once below threshold, go down at 60% clip if M > N: M = (M * 95) // 100 else: M = (M * 6) // 10 # To allow for testing, simple way to break out after a number of rows are generated. if num_rows == 0: break return tbl
def table_compare_graph_structures(max_k=15, output=True): """ Compare Matrix implementation vs. Adjacency list implementation vs. NetworkX up to but not including max_k=15. """ tbl = DataTable([8, 10, 10, 10], ['N', 'NetworkX', 'Adjacency List', 'Adjacency Matrix'], output=output) for N in [2**k for k in range(8, max_k)]: undirect_mtime = 1000 * min( timeit.repeat(stmt=''' total=0 for w in G[0]: total += w''', setup=''' from ch07.replacement import UndirectedGraph G = UndirectedGraph() G.add_nodes_from(list(range({0}))) for o in range(10): G.add_edge(0, {0}-o-1)'''.format(N), repeat=20, number=20)) networkx_mtime = 1000 * min( timeit.repeat(stmt=''' total=0 for w in G[0]: total += w''', setup=''' from ch07.replacement import UndirectedGraph G = UndirectedGraph() G.add_nodes_from(list(range({0}))) for o in range(10): G.add_edge(0, {0}-o-1)'''.format(N), repeat=20, number=20)) matrix_mtime = 1000 * min( timeit.repeat(stmt=''' total=0 for w in G[0]: total += w''', setup=''' from ch07.replacement import MatrixUndirectedGraph G = MatrixUndirectedGraph() G.add_nodes_from(list(range({0}))) for o in range(10): G.add_edge(0, {0}-o-1)'''.format(N), repeat=20, number=20)) tbl.row([N, networkx_mtime, undirect_mtime, matrix_mtime]) return tbl
def generate_hash(): """Results are different each time since Python salts hash values.""" s = 'a rose by any other name would smell as sweet' tbl = DataTable([8,20,20], ['key', 'hash(key)', 'hash(key) % 15']) tbl.format('key', 's') tbl.format('hash(key)', 'd') tbl.format('hash(key) % 15', 'd') for w in s.split(): tbl.row([w, hash(w), hash(w) % 15]) return tbl
def generate_heap_table(max_k=18, output=True, decimals=3): """ Generate table showing different stack behaviors. """ # Enqueue table first add_tbl = DataTable([8,8,8], ['N','heapq','PriorityQueue'], output=output, decimals=decimals) for n in [2**k for k in range(10, max_k)]: add_tbl.row([n,heap_add(n, 1000), pq_add(n, 1000)]) remove_tbl = DataTable([8,8,8], ['N','heapq','PriorityQueue'], output=output, decimals=decimals) for n in [2**k for k in range(10, max_k)]: remove_tbl.row([n,heap_remove(n, 1000), pq_remove(n, 1000)]) return (add_tbl, remove_tbl)
def generate_stack_table(max_k=18, output=True, decimals=3): """ Generate table showing different stack behaviors. """ # Enqueue table first push_tbl = DataTable([8,8,8,8], ['N','list','Dequeue', 'LifoQueue'], output=output, decimals=decimals) for n in [2**k for k in range(10, max_k)]: push_tbl.row([n,list_push(n, 1000), dequeue_push(n, 1000), queue_push(n, 1000)]) pop_tbl = DataTable([8,8,8,8], ['N','list','Dequeue', 'LifoQueue'], output=output, decimals=decimals) for n in [2**k for k in range(10, max_k)]: pop_tbl.row([n,list_pop(n, 1000), dequeue_pop(n, 1000), queue_pop(n, 1000)]) return (push_tbl, pop_tbl)
def exercise_triangle_number_probing(output=True, decimals=4): """Compare triangle number probing with M=powers of 2.""" tbl = DataTable([20, 8], ['Type', 'Time to Search'], output=output, decimals=decimals) tbl.format('Type', 's') timing_oa = min( timeit.repeat(stmt=''' for w in words: ht.get(w)''', setup=''' from ch03.hashtable_open import Hashtable from resources.english import english_words words = english_words() ht = Hashtable(524288) for w in words[:160564]: ht.put(w,w)''', repeat=7, number=5)) / 5 tbl.row(['Open Addressing', timing_oa]) timing_sc = min( timeit.repeat(stmt=''' for w in words: ht.get(w)''', setup=''' from ch03.hashtable_linked import Hashtable from resources.english import english_words words = english_words() ht = Hashtable(524288) for w in words[:160564]: ht.put(w,w)''', repeat=7, number=5)) / 5 tbl.row(['Separate Chaining', timing_sc]) timing_tn = min( timeit.repeat(stmt=''' for w in words: ht.get(w)''', setup=''' from ch03.challenge import HashtableTriangleNumbers from resources.english import english_words words = english_words() ht = HashtableTriangleNumbers(524288) for w in words[:160564]: ht.put(w,w)''', repeat=7, number=5)) / 5 tbl.row(['Triangle Probing', timing_tn])