def insertion_sort_bas(max_k=18, output=True, decimals=3): """Generate Table for Insertion Sort.""" # Evaluate prototype execution x = [] y = [] for n in [2**k for k in range(8, 12)]: m_insert_bas = 1000 * min( timeit.repeat(stmt='insertion_sort_bas(A)', setup=''' import random from ch05.sorting import insertion_sort_bas A=list(range({})) random.shuffle(A)'''.format(n), repeat=10, number=10)) x.append(n) y.append(m_insert_bas) # Coefficients are returned as first argument if numpy_error: log_coeffs = quadratic_coeffs = [0, 0] else: import numpy as np from scipy.optimize import curve_fit [log_coeffs, _] = curve_fit(n_log_n_model, np.array(x), np.array(y)) [quadratic_coeffs, _] = curve_fit(quadratic_model, np.array(x), np.array(y)) if output: print('Quadratic = {}*N*N + {}*N'.format(quadratic_coeffs[0], quadratic_coeffs[1])) print('Log = {:.12f}*N*log2(N)'.format(log_coeffs[0])) print() tbl = DataTable([12, 10, 10, 10], ['N', 'Time', 'Quad', 'Log'], output=output, decimals=decimals) for n, p in zip(x, y): tbl.row([ n, p, quadratic_model(n, quadratic_coeffs[0], quadratic_coeffs[1]), n_log_n_model(n, log_coeffs[0]) ]) for n in [2**k for k in range(12, max_k)]: m_insert_bas = 1000 * min( timeit.repeat(stmt='insertion_sort_bas(A)', setup=''' import random from ch05.sorting import insertion_sort_bas A=list(range({})) random.shuffle(A)'''.format(n), repeat=10, number=10)) tbl.row([ n, m_insert_bas, quadratic_model(n, quadratic_coeffs[0], quadratic_coeffs[1]), n_log_n_model(n, log_coeffs[0]) ]) return tbl
def trial_merge_sort_python_style(max_k=15, output=True, decimals=3): """Empirical trial for merge sort using slicing.""" tbl = DataTable([8, 8, 8], ['N', 'merge', 'mergeSlice'], output=output, decimals=decimals) for n in [2**k for k in range(8, max_k)]: m_slice = 1000 * min( timeit.repeat(stmt='slice_merge_sort(A)', setup=''' import random from ch05.challenge import slice_merge_sort A=list(range({})) random.shuffle(A)'''.format(n), repeat=10, number=10)) m_merge = 1000 * min( timeit.repeat(stmt='merge_sort(A)', setup=''' import random from ch05.merge import merge_sort A=list(range({})) random.shuffle(A)'''.format(n), repeat=10, number=10)) tbl.row([n, m_merge, m_slice]) return tbl
def worst_heights(max_n=40, output=True): """ Generate random AVL trees of n Nodes to find which ones have greatest height. Purely speculative and not definitive exploration of potential trees. """ from ch06.balanced import BinaryTree tbl = DataTable([8, 8, 8], ['N', 'WorstHeight', 'NumberFound'], output=output) tbl.format('WorstHeight', 'd') tbl.format('NumberFound', ',d') table_max_height = -1 for n in range(1, max_n): number_found = 0 max_height = -1 for _ in range(10001): avl = BinaryTree() for _ in range(n): avl.insert(random.random()) if avl.root.height > max_height: max_height = avl.root.height number_found = 0 elif avl.root.height == max_height: number_found += 1 if max_height > table_max_height: tbl.row([n, max_height, number_found]) table_max_height = max_height return tbl
def another_fragment_counting(max_k=20, output=True): """Generate table for counts of fragments up to (but including) 2**max_k.""" if numpy_error: a = 0, 0 else: import numpy as np from scipy.optimize import curve_fit def log_log_model(n, a): """Formula for A*Log_2(Log_2(N)) with single coefficient.""" logn = np.log2(n) return a * np.log2(logn) # Train Model trials = [2**k for k in range(5, 15)] nvals = [] yvals = [] for N in trials: nvals.append(N) yvals.append(f4(N)) [a, _] = curve_fit(log_log_model, np.array(nvals), np.array(yvals)) if output: print('LOG_LOG_MODEL = {}*log(log(N))'.format(a)) trials = [2**k for k in range(5, max_k)] tbl = DataTable([8, 8, 8], ['N', 'F4', 'Model'], output=output) tbl.format('F4', 'd') for N in trials: tbl.row([N, f4(N), a[0] * math.log2(math.log2(N))]) return tbl
def table_trials(max_k=15, output=True, decimals=3): """Compare Merge Sort against built in Python sort up to, but not including 2**max_k.""" tbl = DataTable([8, 10, 10], ['N', 'MergeSort', 'Built-In Sort'], output=output, decimals=decimals) for n in [2**k for k in range(8, max_k)]: msort = 1000 * min( timeit.repeat(stmt='merge_sort(x)', setup=''' import random from ch05.merge import merge_sort x=list(range({})) random.shuffle(x)'''.format(n), repeat=20, number=15)) / 15 builtin = 1000 * min( timeit.repeat(stmt='x.sort()', setup=''' import random x=list(range({})) random.shuffle(x)'''.format(n), repeat=20, number=15)) / 15 tbl.row([n, msort, builtin]) return tbl
def run_access_trials(max_trials=100000, output=True, decimals=5): """Generate performance table for up to max_trials number of runs.""" tbl = DataTable([10, 10, 10], ['Dict', 'Raw', 'BAS'], output=output, decimals=decimals) tbl.format('Dict', 'f') m1 = min( timeit.repeat(stmt='days_in_month[s_data[2]]', setup='from ch03.months import s_data, days_in_month', repeat=10, number=max_trials)) m2 = min( timeit.repeat(stmt='days_mixed(s_data[2])', setup='from ch03.months import s_data, days_mixed', repeat=10, number=max_trials)) m3 = min( timeit.repeat(stmt='days_bas(s_data[2])', setup='from ch03.months import s_data, days_bas', repeat=10, number=max_trials)) tbl.row([m1, m2, m3]) return tbl
def time_results_linked(output=True, decimals=3): """Average time to find a key in growing hashtable_open.""" sizes = [8192, 16384, 32768, 65536, 131072, 262144, 524288, 1048576] tbl = DataTable([8] + [8] * len(sizes), ['N'] + [comma(sz) for sz in sizes], output=output, decimals=decimals) # Now start with M words to be added into a table of size N. # Start at 1000 and work up to 2000 words = english_words() for num_to_add in [32, 64, 128, 256, 512, 1024, 2048, 4096, 8192, 16384]: all_words = words[:num_to_add] line = [num_to_add] for size in sizes: time1 = min( timeit.repeat(stmt=''' table = Hashtable({}) for word in words: table.put(word, 99)'''.format(size), setup=''' from ch03.hashtable_linked import Hashtable words={}'''.format(all_words), repeat=1, number=100)) line.append(1000000 * time1 / size) tbl.row(line) return tbl
def algorithms_x_y(): """Generate table for estimates of time for three computers and two algorithms.""" def alg_x(n): """Number of operations for algorithm X.""" return 5 * n def alg_y(n): """Number of operations for algorithm Y.""" return 2020 * math.log(n) / math.log(2) tbl = DataTable([15, 15, 8, 8, 8, 8, 8], ['N', 'X', 'Y', 'X_slow', 'X_fast', 'Y_fast', 'X_fastest'], decimals=1) tbl.format('X', ',d') tbl.format('Y', ',d') for n in [2**k for k in range(2, 24)]: tbl.row([ n, alg_x(n), int(alg_y(n)), alg_x(n) / 1500, alg_x(n) / 3000, alg_y(n) / 1500, alg_x(n) / (250 * 3000) ]) return tbl
def search_trials(): """ For randomly constructed NxN mazes, compute efficiency of searching strategies on 512 random mazes, as N grows from 4x4 to 128x128 """ import random from ch07.maze import to_networkx, distance_to tbl = DataTable([8,8,8,8],['N', 'BFS', 'DS', 'GS'], decimals=2) for N in [4, 8, 16, 32, 64, 128]: num_bfs = 0 num_dfs = 0 num_gs = 0 for i in range(512): random.seed(i) m = Maze(N,N) G = to_networkx(m) num_bfs += annotated_bfs_search(G, m.start(), m.end()) num_dfs += annotated_dfs_search(G, m.start(), m.end()) num_gs += annotated_guided_search(G, m.start(), m.end(), distance_to) tbl.row([N, num_bfs/512, num_dfs/512, num_gs/512]) tbl = DataTable([8,8,8,8],['N', 'BFS', 'DS', 'GS'], decimals=2) for N in [4, 8, 16, 32, 64, 128]: m = maze_to_defeat_guided_search(N) G = to_networkx(m) num_bfs = annotated_bfs_search(G, m.start(), m.end()) num_dfs = annotated_dfs_search(G, m.start(), m.end()) num_gs = annotated_guided_search(G, m.start(), m.end(), distance_to) tbl.row([N, num_bfs, num_dfs, num_gs])
def performance_different_approaches(output=True): """Produce results on # less-than for different algorithms and data sets.""" headers = ['Algorithm', 'Ascending', 'Descending', 'Alternating'] n = 524288 tbl = DataTable([15, 10, 10, 10], headers, output=output) for hdr in headers: tbl.format(hdr, ',d') tbl.format('Algorithm', 's') # Ascending / Descending / Weave from ch01.largest_two import largest_two, sorting_two, double_two, mutable_two, tournament_two funcs = [largest_two, sorting_two, double_two, mutable_two, tournament_two] algs = [ 'largest_two', 'sorting_two', 'double_two', 'mutable_two', 'tournament_two' ] for label, func in zip(algs, funcs): RecordedItem.clear() func([RecordedItem(i) for i in range(n)]) up_count = sum(RecordedItem.report()) RecordedItem.clear() func([RecordedItem(i) for i in range(n, 0, -1)]) down_count = sum(RecordedItem.report()) RecordedItem.clear() up_down = zip(range(0, n, 2), range(n - 1, 0, -2)) func([RecordedItem(i) for i in itertools.chain(*up_down)]) weave_count = sum(RecordedItem.report()) tbl.row([label, up_count, down_count, weave_count]) return tbl
def time_results_open(words, output=True, decimals=4): """Average time to find a key in growing hashtable_open.""" sizes = [8192, 16384, 32768, 65536, 131072, 262144, 524288, 1048576] widths = [8] + [10] * len(sizes) headers = ['N'] + sizes tbl = DataTable(widths, headers, output=output, decimals=decimals) # Now start with N words to be added into a table of size M. # Start at 1000 and work up to 2000 for num_to_add in [32, 64, 128, 256, 512, 1024, 2048, 4096, 8192, 16384]: all_words = words[:num_to_add] arow = [num_to_add] for size in sizes: if num_to_add < size: m1 = min( timeit.repeat(stmt=''' table = Hashtable({}) for word in words: table.put(word, 99)'''.format(size), setup=''' from ch03.hashtable_open import Hashtable words={}'''.format(all_words), repeat=1, number=100)) arow.append((100000.0 * m1) / size) else: arow.append(SKIP) tbl.row(arow) return tbl
def just_compare_sort_tournament_two(max_k=25, output=True, decimals=2): """Very large data sets to investigate whether crossover occurs (no it does not).""" tbl = DataTable([15, 10, 15], ['N', 'sorting_two', 'tournament_two'], output=output, decimals=decimals) trials = [2**k for k in range(10, max_k)] num = 5 for n in trials: m_tt = timeit.timeit(stmt='random.shuffle(x)\ntournament_two(x)', setup=''' import random from ch01.largest_two import tournament_two x=list(range({}))'''.format(n), number=num) m_st = timeit.timeit(stmt='random.shuffle(x)\nsorting_two(x)', setup=''' import random from ch01.largest_two import sorting_two x=list(range({}))'''.format(n), number=num) tbl.row([n, m_st, m_tt]) if output: print() for header in tbl.labels[1:]: print(header, tbl.best_model(header)) return tbl
def run_init_trial(output=True): """First Table in chapter 1.""" n = 100 tbl = DataTable([12, 12, 12], ['N', 'Ascending', 'Descending'], output=output, decimals=3) while n <= 1000000: # 1 up to but not including N m_up = 1000 * min( timeit.repeat(stmt='native_largest(up)', setup=''' from ch01.largest import native_largest up = list(range(1,{}+1))'''.format(n), repeat=10, number=50)) / 50 # N down to but not including 0 m_down = 1000 * min( timeit.repeat(stmt='native_largest(down)', setup=''' from ch01.largest import native_largest down = list(range({}, 0, -1))'''.format(n), repeat=10, number=50)) / 50 tbl.row([n, m_up, m_down]) n *= 10 return tbl
def compare_time(words, output=True, decimals=4): """Generate table of performance differences with linked hashtable and perfect hashing.""" tbl = DataTable([8, 8, 8], ['N', 'Linked', 'Perfect'], output=output, decimals=decimals) t_perfect = min( timeit.repeat(stmt=''' ht = HL() for w in words: ht.put(w,w)''', setup=''' from ch03.hashtable_open_perfect import Hashtable as HL words={}'''.format(words), repeat=3, number=5)) / 5 t_linked = min( timeit.repeat(stmt=''' ht = HL(len(words)) for w in words: ht.put(w,w)''', setup=''' from ch03.hashtable_linked import Hashtable as HL words={}'''.format(words), repeat=3, number=5)) / 5 tbl.row([len(words), t_linked, t_perfect]) return tbl
def run_median_trial(): """Generate table for Median Trial.""" tbl = DataTable([10, 15, 15], ['N', 'median_time', 'sort_median']) trials = [2**k + 1 for k in range(8, 20)] for n in trials: t_med = 1000 * min( timeit.repeat(stmt='assert(linear_median(a) == {}//2)'.format(n), setup=''' import random from ch01.challenge import linear_median a = list(range({})) random.shuffle(a) '''.format(n), repeat=10, number=5)) / 5 t_sort = 1000 * min( timeit.repeat(stmt='assert(sorted(a)[{0}//2] == {0}//2)'.format(n), setup=''' import random from ch01.challenge import linear_median a = list(range({})) random.shuffle(a) '''.format(n), repeat=10, number=5)) / 5 tbl.row([n, t_med, t_sort]) return tbl
def run_median_less_than_trial(max_k=20, output=True): """Use RecordedItem to count # of times Less-than invoked up to (but not including) max_k=20.""" tbl = DataTable([10, 15, 15], ['N', 'median_count', 'sort_median_count'], output=output) tbl.format('median_count', ',d') tbl.format('sort_median_count', ',d') trials = [2**k + 1 for k in range(8, max_k)] for n in trials: A = list([RecordedItem(i) for i in range(n)]) random.shuffle(A) # Generated external sorted to reuse list RecordedItem.clear() med2 = sorted(A)[n // 2] sort_lt = RecordedItem.report()[1] RecordedItem.clear() med1 = linear_median(A) lin_lt = RecordedItem.report()[1] assert med1 == med2 tbl.row([n, lin_lt, sort_lt]) return tbl
def debug_state(title, G, node_from, dist_to, output=True): """Useful to show state of all pairs shortest path.""" from algs.table import DataTable print('debug :', title) labels = list(G.nodes()) tbl = DataTable([6] + [6]*len(labels), ['.'] + labels, output=output) tbl.format('.','s') for field in labels: tbl.format(field, 's') for u in labels: row = [u] for v in labels: if node_from[u][v]: row.append(node_from[u][v]) else: row.append('.') tbl.row(row) print() tbl_dist_to = DataTable([6] + [6]*len(labels), ['.'] + labels, output=output, decimals=1) tbl_dist_to.format('.','s') for u in labels: row = [u] for v in labels: if u == v: row.append(0) else: row.append(dist_to[u][v]) tbl_dist_to.row(row) print() return (tbl, tbl_dist_to)
def run_range_analysis(output=True): """Confirm O(log N) algorithm to find range of duplicates.""" tbl = DataTable([8, 8, 8], ['N', 'O(N)', 'O(log N)'], decimals=7, output=output) commands = ''' from random import random tgt = random() alist = [tgt] * {0} for _ in range({0}-{1}): alist.append(random()) alist = sorted(alist) ''' for n in [2**k for k in range(10, 20)]: custom = commands.format(n, n // 16) best_times = min( timeit.repeat(stmt='best_range(alist, tgt)', setup=''' from ch02.challenge import best_range {}'''.format(custom), repeat=40, number=50)) / 50 worst_times = min( timeit.repeat(stmt='worst_range(alist, tgt)', setup=''' from ch02.challenge import worst_range {}'''.format(custom), repeat=40, number=50)) / 50 tbl.row([n, worst_times, best_times])
def visualize_results_floyd_warshall(DG, output=True): """Output the node_from and dist_to arrays for floyd-warshall after completion.""" from ch07.all_pairs_sp import all_pairs_path_to (dist_to, node_from) = floyd_warshall(DG) if output: output_node_from_floyd_warshall(DG, node_from) print() output_dist_to_floyd_warshall(DG, dist_to) print() tbl_path = DataTable([20] * DG.number_of_nodes(), list(DG.nodes()), output=output) for n in DG.nodes(): tbl_path.format(n, 's') for n in DG.nodes(): row = [] for v in DG.nodes(): if n == v: row.append(SKIP) else: if node_from[n][v]: nodes = all_pairs_path_to(node_from, n, v) row.append(' -> '.join(nodes)) else: row.append(SKIP) tbl_path.row(row) if output: print()
def prime_number_difference(words, output=True, decimals=2): """Identify sensitivity of M to being prime or not.""" from ch03.hashtable_linked import Hashtable as Linked_Hashtable, stats_linked_lists from ch03.hashtable_open import Hashtable as Open_Hashtable, stats_open_addressing from ch03.base26 import base26 # these are prime numbers between 428880 and 428980 lo = 428880 primes = [428899, 428951, 428957, 428977] hi = 428980 keys = [base26(w) for w in words] tbl = DataTable([12, 6, 8, 8, 8, 8], ['M', 'Prime', 'Avg. LL', 'Max LL', 'Avg. OA', 'Max OA'], output=output, decimals=decimals) tbl.format('Prime', 's') tbl.format('Max LL', 'd') tbl.format('Max OA', 'd') worst = 0 worst_m = 0 for m in range(lo, hi + 1): is_p = 'Prime' if m in primes else '' ht_linked = Linked_Hashtable(m) ht_open = Open_Hashtable(m) for k in keys: ht_linked.put(k, 1) ht_open.put(k, 1) (avg_length_linked, max_length_linked) = stats_linked_lists(ht_linked) if max_length_linked > worst: worst_m = m worst = max_length_linked (avg_length_open, max_length_open) = stats_open_addressing(ht_open) tbl.row([ m, is_p, avg_length_linked, max_length_linked, avg_length_open, max_length_open ]) # Now try to find any more that exceed this maximum amount if output: print('Worst was {} for M={}'.format(worst, worst_m)) for m in range(worst_m, worst_m + 10000, 13): ht_linked = Linked_Hashtable(m) (avg_length_linked, max_length_linked) = stats_linked_lists(ht_linked, False) if max_length_linked > worst: worst_m = m worst = max_length_linked print('Worst of {} for M={}'.format(worst, worst_m)) print('Done') return tbl
def count_collisions(num_rows=0, output=True, decimals=1): """Generate table counting collisions.""" all_words = english_words() N = len(all_words) from ch03.hashtable_linked import Hashtable as HL from ch03.hashtable_linked import stats_linked_lists from ch03.hashtable_open import Hashtable as OHL from ch03.hashtable_open import stats_open_addressing tbl = DataTable([10,8,8,8,8], ['M', 'Avg LL', 'Max LL', 'Avg OA', 'Max OA'], output=output, decimals=decimals) tbl.format('Max LL', 'd') tbl.format('Max OA', 'd') M = 20*N hl = HL(M) ohl = OHL(M) for w in all_words: hl.put(w, 1) ohl.put(w, 1) avg_size_linked = stats_linked_lists(hl) avg_size_open = stats_open_addressing(ohl) tbl.row([M, avg_size_linked[0], avg_size_linked[1], avg_size_open[0], avg_size_open[1]]) M = 2*N while M > N/16: hl = HL(M) ohl = OHL(M) for w in all_words: hl.put(w, 1) if M > N: # otherwise, will fail... ohl.put(w, 1) avg_size_linked = stats_linked_lists(hl) if N < M: avg_size_open = stats_open_addressing(ohl) else: tbl.format('Avg OA', 's') tbl.format('Max OA', 's') avg_size_open = [SKIP, SKIP] num_rows -= 1 tbl.row([M, avg_size_linked[0], avg_size_linked[1], avg_size_open[0], avg_size_open[1]]) # Once below threshold, go down at 60% clip if M > N: M = (M * 95) // 100 else: M = (M * 6) // 10 # To allow for testing, simple way to break out after a number of rows are generated. if num_rows == 0: break return tbl
def generate_hash(): """Results are different each time since Python salts hash values.""" s = 'a rose by any other name would smell as sweet' tbl = DataTable([8,20,20], ['key', 'hash(key)', 'hash(key) % 15']) tbl.format('key', 's') tbl.format('hash(key)', 'd') tbl.format('hash(key) % 15', 'd') for w in s.split(): tbl.row([w, hash(w), hash(w) % 15]) return tbl
def table_compare_graph_structures(max_k=15, output=True): """ Compare Matrix implementation vs. Adjacency list implementation vs. NetworkX up to but not including max_k=15. """ tbl = DataTable([8, 10, 10, 10], ['N', 'NetworkX', 'Adjacency List', 'Adjacency Matrix'], output=output) for N in [2**k for k in range(8, max_k)]: undirect_mtime = 1000 * min( timeit.repeat(stmt=''' total=0 for w in G[0]: total += w''', setup=''' from ch07.replacement import UndirectedGraph G = UndirectedGraph() G.add_nodes_from(list(range({0}))) for o in range(10): G.add_edge(0, {0}-o-1)'''.format(N), repeat=20, number=20)) networkx_mtime = 1000 * min( timeit.repeat(stmt=''' total=0 for w in G[0]: total += w''', setup=''' from ch07.replacement import UndirectedGraph G = UndirectedGraph() G.add_nodes_from(list(range({0}))) for o in range(10): G.add_edge(0, {0}-o-1)'''.format(N), repeat=20, number=20)) matrix_mtime = 1000 * min( timeit.repeat(stmt=''' total=0 for w in G[0]: total += w''', setup=''' from ch07.replacement import MatrixUndirectedGraph G = MatrixUndirectedGraph() G.add_nodes_from(list(range({0}))) for o in range(10): G.add_edge(0, {0}-o-1)'''.format(N), repeat=20, number=20)) tbl.row([N, networkx_mtime, undirect_mtime, matrix_mtime]) return tbl
def trial_multiple_rotations(output=True, num_attempts=10000): """Some trial and error went into these ranges.""" from ch05.challenge import fib tbl = DataTable([6, 6, 6, 6], ['NumRot', 'Height', 'N', 'Random Tree'], output=output) tbl.format('Random Tree', 's') tbl.format('NumRot', 'd') tbl.format('Height', 'd') tbl.format('N', 'd') for extra in range(3): (structure, _) = find_multiple_rotations(extra, lo=4, hi=40, num_attempts=num_attempts, output=False) n = recreate_tree(structure) def count_nodes(n): if n is None: return 0 return 1 + count_nodes(n.left) + count_nodes(n.right) tbl.row([extra + 1, n.height, count_nodes(n), structure]) # Now use Fibonacci Trees to accomplish the same result. if output: print() tbl = DataTable([6, 6, 6, 13], ['NumRot', 'Height', 'N', 'Fib AVL Trees'], output=output) tbl.format('Fib AVL Trees', 's') tbl.format('NumRot', 'd') tbl.format('Height', 'd') tbl.format('N', 'd') for n in range(6, 14, 2): root = fibonacci_avl(n) root.compute_height() check_avl_property(root) # double-check structure = tree_structure(root) bt = ObservableBinaryTree() height = root.height bt.root = root count = count_nodes(root) num_rotations = rotations[0] to_delete = fib(n + 1) - 1 bt.remove(to_delete) check_avl_property(bt.root) num_rotations = rotations[0] - num_rotations tbl.row([num_rotations, height, count, structure]) return tbl
def count_hash_incremental_move(output=True, decimals=4): """ For all English words, starting with a hashtable of size 1,024 and a load factor of 0.75, count how many times the hash code (i.e., %) is invoked. """ from ch03.book import CountableHash from ch03.hashtable_linked import DynamicHashtable print( 'Each emitted row contains an operation more costly than any before...' ) ht_dynamic = DynamicHashtable(1023) tbl = DataTable([20, 10, 10], ['Word', 'N', 'cost'], output=output, decimals=decimals) tbl.format('Word', 's') tbl.format('N', ',d') max_cost = 0 now = time.time() for w in english_words(): before = time.time() ht_dynamic.put(CountableHash(w), w) cost = time.time() - before if cost > max_cost: max_cost = cost tbl.row([w, ht_dynamic.N, cost]) total_normal = time.time() - now print('Normal:{}'.format(total_normal)) for delta in [512, 256, 128, 64, 32, 16, 8, 4]: ht = DynamicHashtableIncrementalResizing(1023, delta=delta) tbl = DataTable([20, 10, 10], ['Word', 'N', 'cost'], output=output, decimals=decimals) tbl.format('Word', 's') tbl.format('N', ',d') max_cost = 0 now = time.time() for w in english_words(): before = time.time() ht.put(CountableHash(w), w) cost = time.time() - before if cost > max_cost: max_cost = cost tbl.row([w, ht.N, cost]) total_delta = time.time() - now print('delta={}, Normal:{}'.format(delta, total_delta))
def output_dist_to_floyd_warshall(DG, dist_to, output=True): """Create data table for dist_to.""" tbl_dt = DataTable([8] * DG.number_of_nodes(), list(DG.nodes()), output=output, decimals=1) tbl_dt.format(list(DG.nodes())[0], 'f') # only first one, since this would have been N in tbl for n in DG.nodes(): row = [] for v in DG.nodes(): row.append(dist_to[n][v]) tbl_dt.row(row) return tbl_dt
def average_performance(max_n=65536, output=True, decimals=1): """Generate table of average performance for different PQ implementations.""" T = 3 base = 256 cutoff = 16384 high = max_n heap = {} order_ar = {} order_ll = {} N = base while N <= high: order_ll[N] = 1000000*run_trials('ch04.ordered_list', N, T)/(T*N) heap[N] = 1000000*run_trials('ch04.heap', N, T)/(T*N) N *= 2 N = base array = {} linked = {} builtin = {} while N <= cutoff: order_ar[N] = 1000000*run_trials('ch04.ordered', N, T)/(T*N) linked[N] = 1000000*run_trials('ch04.linked', N, T)/(T*N) array[N] = 1000000*run_trials('ch04.array', N, T)/(T*N) builtin[N] = 1000000*run_trials('ch04.builtin', N, T)/(T*N) N *= 2 N = base tbl = DataTable([8,8,8,8,8,8,8], ['N','Heap','OrderL','Linked','OrderA','Built-in','Array'], output=output, decimals=decimals) while N <= high: if N <= cutoff: tbl.row([N, heap[N], order_ll[N], linked[N], order_ar[N], builtin[N], array[N]]) else: #tbl.set_output(False) tbl.row([N, heap[N], order_ll[N]]) N *= 2 if output: print() print('Heap', tbl.best_model('Heap')) print('OrderL', tbl.best_model('OrderL')) print('Linked', tbl.best_model('Linked')) print('OrderA', tbl.best_model('OrderA')) print('Built-in', tbl.best_model('Built-in')) print('Array', tbl.best_model('Array')) return tbl
def run_largest_alternate(output=True, decimals=3): """Generate tables for largest and alternate.""" n = 8 tbl = DataTable([8, 10, 15, 10, 10], ['N', '#Less', '#LessA', 'largest', 'alternate'], output=output, decimals=decimals) tbl.format('#Less', ',d') tbl.format('#LessA', ',d') while n <= 2048: ascending = list(range(n)) largest_up = 1000 * min( timeit.repeat(stmt='largest({})'.format(ascending), setup='from ch01.largest import largest', repeat=10, number=50)) / 50 alternate_up = 1000 * min( timeit.repeat(stmt='alternate({})'.format(ascending), setup='from ch01.largest import alternate', repeat=10, number=50)) / 50 up_count = [RecordedItem(i) for i in range(n)] RecordedItem.clear() largest(up_count) largest_counts = RecordedItem.report() RecordedItem.clear() up_count = [RecordedItem(i) for i in range(n)] RecordedItem.clear() alternate(up_count) alternate_counts = RecordedItem.report() RecordedItem.clear() tbl.row([ n, sum(largest_counts), sum(alternate_counts), largest_up, alternate_up ]) n *= 2 if output: print() print('largest', tbl.best_model('largest', Model.LINEAR)) print('Alternate', tbl.best_model('alternate', Model.QUADRATIC)) return tbl
def timing_trial(output=True, decimals=3): """ Seek possible crossover between tournament_two() and sorting_two(). Because of the high memory demands, tournament_two() is always slower than sorting_two(). """ tbl = DataTable([8,8,8,8,8,8], ['N', 'Sorting', 'Tournament', 'Tourn. Object', 'Tourn. Linked', 'Tourn. Losers'], output=output, decimals=decimals) for n in [2 ** k for k in range(10, 24)]: st_time = timeit.timeit(stmt='sorting_two(x)', setup=''' import random from ch01.largest_two import sorting_two random.seed({0}) x=list(range({0})) random.shuffle(x)'''.format(n), number=1) tt_time = timeit.timeit(stmt='tournament_two(x)', setup=''' import random from ch01.largest_two import tournament_two random.seed({0}) x=list(range({0})) random.shuffle(x)'''.format(n), number=1) if n > 1048576: tto_time = SKIP else: tto_time = timeit.timeit(stmt='tournament_two_object(x)', setup=''' import random from ch01.largest_two import tournament_two_object random.seed({0}) x=list(range({0})) random.shuffle(x)'''.format(n), number=1) ttl_time = timeit.timeit(stmt='tournament_two_losers(x)', setup=''' import random from ch01.largest_two import tournament_two_losers random.seed({0}) x=list(range({0})) random.shuffle(x)'''.format(n), number=1) ttll_time = timeit.timeit(stmt='tournament_two_linked(x)', setup=''' import random from ch01.largest_two import tournament_two_linked random.seed({0}) x=list(range({0})) random.shuffle(x)'''.format(n), number=1) tbl.row([n, st_time, tt_time, tto_time, ttll_time, ttl_time]) return tbl
def dag_trials(output=True): """Confirm DAG single-source shortest path is O(E+N).""" tbl = DataTable([8,10,10],['N', 'Dijkstra', 'Topologic'], output=output) for n in [2**k for k in range(2,7)]: dijkstra = 1000*min(timeit.repeat(stmt='dijkstra_sp(dg,1)', setup=''' from ch07.challenge import mesh_graph from ch07.single_source_sp import dijkstra_sp dg=mesh_graph({})'''.format(n), repeat=20, number=15))/15 topologic = 1000*min(timeit.repeat(stmt='topological_sp(dg,1)', setup=''' from ch07.challenge import mesh_graph, topological_sp dg=mesh_graph({})'''.format(n), repeat=20, number=15))/15 tbl.row([n*n, dijkstra, topologic])