def just_compare_sort_tournament_two(max_k=25, output=True, decimals=2): """Very large data sets to investigate whether crossover occurs (no it does not).""" tbl = DataTable([15, 10, 15], ['N', 'sorting_two', 'tournament_two'], output=output, decimals=decimals) trials = [2**k for k in range(10, max_k)] num = 5 for n in trials: m_tt = timeit.timeit(stmt='random.shuffle(x)\ntournament_two(x)', setup=''' import random from ch01.largest_two import tournament_two x=list(range({}))'''.format(n), number=num) m_st = timeit.timeit(stmt='random.shuffle(x)\nsorting_two(x)', setup=''' import random from ch01.largest_two import sorting_two x=list(range({}))'''.format(n), number=num) tbl.row([n, m_st, m_tt]) if output: print() for header in tbl.labels[1:]: print(header, tbl.best_model(header)) return tbl
def trial_merge_sort_python_style(max_k=15, output=True, decimals=3): """Empirical trial for merge sort using slicing.""" tbl = DataTable([8, 8, 8], ['N', 'merge', 'mergeSlice'], output=output, decimals=decimals) for n in [2**k for k in range(8, max_k)]: m_slice = 1000 * min( timeit.repeat(stmt='slice_merge_sort(A)', setup=''' import random from ch05.challenge import slice_merge_sort A=list(range({})) random.shuffle(A)'''.format(n), repeat=10, number=10)) m_merge = 1000 * min( timeit.repeat(stmt='merge_sort(A)', setup=''' import random from ch05.merge import merge_sort A=list(range({})) random.shuffle(A)'''.format(n), repeat=10, number=10)) tbl.row([n, m_merge, m_slice]) return tbl
def run_range_analysis(output=True): """Confirm O(log N) algorithm to find range of duplicates.""" tbl = DataTable([8, 8, 8], ['N', 'O(N)', 'O(log N)'], decimals=7, output=output) commands = ''' from random import random tgt = random() alist = [tgt] * {0} for _ in range({0}-{1}): alist.append(random()) alist = sorted(alist) ''' for n in [2**k for k in range(10, 20)]: custom = commands.format(n, n // 16) best_times = min( timeit.repeat(stmt='best_range(alist, tgt)', setup=''' from ch02.challenge import best_range {}'''.format(custom), repeat=40, number=50)) / 50 worst_times = min( timeit.repeat(stmt='worst_range(alist, tgt)', setup=''' from ch02.challenge import worst_range {}'''.format(custom), repeat=40, number=50)) / 50 tbl.row([n, worst_times, best_times])
def run_median_trial(): """Generate table for Median Trial.""" tbl = DataTable([10, 15, 15], ['N', 'median_time', 'sort_median']) trials = [2**k + 1 for k in range(8, 20)] for n in trials: t_med = 1000 * min( timeit.repeat(stmt='assert(linear_median(a) == {}//2)'.format(n), setup=''' import random from ch01.challenge import linear_median a = list(range({})) random.shuffle(a) '''.format(n), repeat=10, number=5)) / 5 t_sort = 1000 * min( timeit.repeat(stmt='assert(sorted(a)[{0}//2] == {0}//2)'.format(n), setup=''' import random from ch01.challenge import linear_median a = list(range({})) random.shuffle(a) '''.format(n), repeat=10, number=5)) / 5 tbl.row([n, t_med, t_sort]) return tbl
def compare_time(words, output=True, decimals=4): """Generate table of performance differences with linked hashtable and perfect hashing.""" tbl = DataTable([8, 8, 8], ['N', 'Linked', 'Perfect'], output=output, decimals=decimals) t_perfect = min( timeit.repeat(stmt=''' ht = HL() for w in words: ht.put(w,w)''', setup=''' from ch03.hashtable_open_perfect import Hashtable as HL words={}'''.format(words), repeat=3, number=5)) / 5 t_linked = min( timeit.repeat(stmt=''' ht = HL(len(words)) for w in words: ht.put(w,w)''', setup=''' from ch03.hashtable_linked import Hashtable as HL words={}'''.format(words), repeat=3, number=5)) / 5 tbl.row([len(words), t_linked, t_perfect]) return tbl
def table_trials(max_k=15, output=True, decimals=3): """Compare Merge Sort against built in Python sort up to, but not including 2**max_k.""" tbl = DataTable([8, 10, 10], ['N', 'MergeSort', 'Built-In Sort'], output=output, decimals=decimals) for n in [2**k for k in range(8, max_k)]: msort = 1000 * min( timeit.repeat(stmt='merge_sort(x)', setup=''' import random from ch05.merge import merge_sort x=list(range({})) random.shuffle(x)'''.format(n), repeat=20, number=15)) / 15 builtin = 1000 * min( timeit.repeat(stmt='x.sort()', setup=''' import random x=list(range({})) random.shuffle(x)'''.format(n), repeat=20, number=15)) / 15 tbl.row([n, msort, builtin]) return tbl
def time_results_linked(output=True, decimals=3): """Average time to find a key in growing hashtable_open.""" sizes = [8192, 16384, 32768, 65536, 131072, 262144, 524288, 1048576] tbl = DataTable([8] + [8] * len(sizes), ['N'] + [comma(sz) for sz in sizes], output=output, decimals=decimals) # Now start with M words to be added into a table of size N. # Start at 1000 and work up to 2000 words = english_words() for num_to_add in [32, 64, 128, 256, 512, 1024, 2048, 4096, 8192, 16384]: all_words = words[:num_to_add] line = [num_to_add] for size in sizes: time1 = min( timeit.repeat(stmt=''' table = Hashtable({}) for word in words: table.put(word, 99)'''.format(size), setup=''' from ch03.hashtable_linked import Hashtable words={}'''.format(all_words), repeat=1, number=100)) line.append(1000000 * time1 / size) tbl.row(line) return tbl
def run_init_trial(output=True): """First Table in chapter 1.""" n = 100 tbl = DataTable([12, 12, 12], ['N', 'Ascending', 'Descending'], output=output, decimals=3) while n <= 1000000: # 1 up to but not including N m_up = 1000 * min( timeit.repeat(stmt='native_largest(up)', setup=''' from ch01.largest import native_largest up = list(range(1,{}+1))'''.format(n), repeat=10, number=50)) / 50 # N down to but not including 0 m_down = 1000 * min( timeit.repeat(stmt='native_largest(down)', setup=''' from ch01.largest import native_largest down = list(range({}, 0, -1))'''.format(n), repeat=10, number=50)) / 50 tbl.row([n, m_up, m_down]) n *= 10 return tbl
def time_results_open(words, output=True, decimals=4): """Average time to find a key in growing hashtable_open.""" sizes = [8192, 16384, 32768, 65536, 131072, 262144, 524288, 1048576] widths = [8] + [10] * len(sizes) headers = ['N'] + sizes tbl = DataTable(widths, headers, output=output, decimals=decimals) # Now start with N words to be added into a table of size M. # Start at 1000 and work up to 2000 for num_to_add in [32, 64, 128, 256, 512, 1024, 2048, 4096, 8192, 16384]: all_words = words[:num_to_add] arow = [num_to_add] for size in sizes: if num_to_add < size: m1 = min( timeit.repeat(stmt=''' table = Hashtable({}) for word in words: table.put(word, 99)'''.format(size), setup=''' from ch03.hashtable_open import Hashtable words={}'''.format(all_words), repeat=1, number=100)) arow.append((100000.0 * m1) / size) else: arow.append(SKIP) tbl.row(arow) return tbl
def count_collisions(num_rows=0, output=True, decimals=1): """Generate table counting collisions.""" all_words = english_words() N = len(all_words) from ch03.hashtable_linked import Hashtable as HL from ch03.hashtable_linked import stats_linked_lists from ch03.hashtable_open import Hashtable as OHL from ch03.hashtable_open import stats_open_addressing tbl = DataTable([10,8,8,8,8], ['M', 'Avg LL', 'Max LL', 'Avg OA', 'Max OA'], output=output, decimals=decimals) tbl.format('Max LL', 'd') tbl.format('Max OA', 'd') M = 20*N hl = HL(M) ohl = OHL(M) for w in all_words: hl.put(w, 1) ohl.put(w, 1) avg_size_linked = stats_linked_lists(hl) avg_size_open = stats_open_addressing(ohl) tbl.row([M, avg_size_linked[0], avg_size_linked[1], avg_size_open[0], avg_size_open[1]]) M = 2*N while M > N/16: hl = HL(M) ohl = OHL(M) for w in all_words: hl.put(w, 1) if M > N: # otherwise, will fail... ohl.put(w, 1) avg_size_linked = stats_linked_lists(hl) if N < M: avg_size_open = stats_open_addressing(ohl) else: tbl.format('Avg OA', 's') tbl.format('Max OA', 's') avg_size_open = [SKIP, SKIP] num_rows -= 1 tbl.row([M, avg_size_linked[0], avg_size_linked[1], avg_size_open[0], avg_size_open[1]]) # Once below threshold, go down at 60% clip if M > N: M = (M * 95) // 100 else: M = (M * 6) // 10 # To allow for testing, simple way to break out after a number of rows are generated. if num_rows == 0: break return tbl
def actual_table(output=True): """Produce sample table to use for curve fitting.""" # Sample data xvals = [100, 1000, 10000] yvals = [0.063, 0.565, 5.946] # Coefficients are returned as first argument if numpy_error: a, b = 0, 0 else: import numpy as np from scipy.optimize import curve_fit [(a, b), _] = curve_fit(linear_model, np.array(xvals), np.array(yvals)) if output: print('Linear = {}*N + {}'.format(a, b)) [(qa, qb), _] = curve_fit(quadratic_model, np.array(xvals), np.array(yvals)) if output: print('Quadratic = {}*N*N + {}*N'.format(qa, qb)) [(na), _] = curve_fit(n_log_n_model, np.array(xvals), np.array(yvals)) if output: print('N Log N = {}*N*log N'.format(na)) tbl = DataTable([8, 8, 8], ['N', 'Actual', 'Model'], output=output) tbl.row([100, 0.063, linear_model(100, a, b)]) tbl.row([1000, 0.565, linear_model(1000, a, b)]) tbl.row([10000, 5.946, linear_model(10000, a, b)]) print(tbl.pearsonr('Actual', 'Model')) return tbl
def table_compare_graph_structures(max_k=15, output=True): """ Compare Matrix implementation vs. Adjacency list implementation vs. NetworkX up to but not including max_k=15. """ tbl = DataTable([8, 10, 10, 10], ['N', 'NetworkX', 'Adjacency List', 'Adjacency Matrix'], output=output) for N in [2**k for k in range(8, max_k)]: undirect_mtime = 1000 * min( timeit.repeat(stmt=''' total=0 for w in G[0]: total += w''', setup=''' from ch07.replacement import UndirectedGraph G = UndirectedGraph() G.add_nodes_from(list(range({0}))) for o in range(10): G.add_edge(0, {0}-o-1)'''.format(N), repeat=20, number=20)) networkx_mtime = 1000 * min( timeit.repeat(stmt=''' total=0 for w in G[0]: total += w''', setup=''' from ch07.replacement import UndirectedGraph G = UndirectedGraph() G.add_nodes_from(list(range({0}))) for o in range(10): G.add_edge(0, {0}-o-1)'''.format(N), repeat=20, number=20)) matrix_mtime = 1000 * min( timeit.repeat(stmt=''' total=0 for w in G[0]: total += w''', setup=''' from ch07.replacement import MatrixUndirectedGraph G = MatrixUndirectedGraph() G.add_nodes_from(list(range({0}))) for o in range(10): G.add_edge(0, {0}-o-1)'''.format(N), repeat=20, number=20)) tbl.row([N, networkx_mtime, undirect_mtime, matrix_mtime]) return tbl
def generate_hash(): """Results are different each time since Python salts hash values.""" s = 'a rose by any other name would smell as sweet' tbl = DataTable([8,20,20], ['key', 'hash(key)', 'hash(key) % 15']) tbl.format('key', 's') tbl.format('hash(key)', 'd') tbl.format('hash(key) % 15', 'd') for w in s.split(): tbl.row([w, hash(w), hash(w) % 15]) return tbl
def exercise_triangle_number_probing(output=True, decimals=4): """Compare triangle number probing with M=powers of 2.""" tbl = DataTable([20, 8], ['Type', 'Time to Search'], output=output, decimals=decimals) tbl.format('Type', 's') timing_oa = min( timeit.repeat(stmt=''' for w in words: ht.get(w)''', setup=''' from ch03.hashtable_open import Hashtable from resources.english import english_words words = english_words() ht = Hashtable(524288) for w in words[:160564]: ht.put(w,w)''', repeat=7, number=5)) / 5 tbl.row(['Open Addressing', timing_oa]) timing_sc = min( timeit.repeat(stmt=''' for w in words: ht.get(w)''', setup=''' from ch03.hashtable_linked import Hashtable from resources.english import english_words words = english_words() ht = Hashtable(524288) for w in words[:160564]: ht.put(w,w)''', repeat=7, number=5)) / 5 tbl.row(['Separate Chaining', timing_sc]) timing_tn = min( timeit.repeat(stmt=''' for w in words: ht.get(w)''', setup=''' from ch03.challenge import HashtableTriangleNumbers from resources.english import english_words words = english_words() ht = HashtableTriangleNumbers(524288) for w in words[:160564]: ht.put(w,w)''', repeat=7, number=5)) / 5 tbl.row(['Triangle Probing', timing_tn])
def search_trials(): """ For randomly constructed NxN mazes, compute efficiency of searching strategies on 512 random mazes, as N grows from 4x4 to 128x128 """ import random from ch07.maze import to_networkx, distance_to tbl = DataTable([8,8,8,8],['N', 'BFS', 'DS', 'GS'], decimals=2) for N in [4, 8, 16, 32, 64, 128]: num_bfs = 0 num_dfs = 0 num_gs = 0 for i in range(512): random.seed(i) m = Maze(N,N) G = to_networkx(m) num_bfs += annotated_bfs_search(G, m.start(), m.end()) num_dfs += annotated_dfs_search(G, m.start(), m.end()) num_gs += annotated_guided_search(G, m.start(), m.end(), distance_to) tbl.row([N, num_bfs/512, num_dfs/512, num_gs/512]) tbl = DataTable([8,8,8,8],['N', 'BFS', 'DS', 'GS'], decimals=2) for N in [4, 8, 16, 32, 64, 128]: m = maze_to_defeat_guided_search(N) G = to_networkx(m) num_bfs = annotated_bfs_search(G, m.start(), m.end()) num_dfs = annotated_dfs_search(G, m.start(), m.end()) num_gs = annotated_guided_search(G, m.start(), m.end(), distance_to) tbl.row([N, num_bfs, num_dfs, num_gs])
def worst_heights(max_n=40, output=True): """ Generate random AVL trees of n Nodes to find which ones have greatest height. Purely speculative and not definitive exploration of potential trees. """ from ch06.balanced import BinaryTree tbl = DataTable([8, 8, 8], ['N', 'WorstHeight', 'NumberFound'], output=output) tbl.format('WorstHeight', 'd') tbl.format('NumberFound', ',d') table_max_height = -1 for n in range(1, max_n): number_found = 0 max_height = -1 for _ in range(10001): avl = BinaryTree() for _ in range(n): avl.insert(random.random()) if avl.root.height > max_height: max_height = avl.root.height number_found = 0 elif avl.root.height == max_height: number_found += 1 if max_height > table_max_height: tbl.row([n, max_height, number_found]) table_max_height = max_height return tbl
def performance_different_approaches(output=True): """Produce results on # less-than for different algorithms and data sets.""" headers = ['Algorithm', 'Ascending', 'Descending', 'Alternating'] n = 524288 tbl = DataTable([15, 10, 10, 10], headers, output=output) for hdr in headers: tbl.format(hdr, ',d') tbl.format('Algorithm', 's') # Ascending / Descending / Weave from ch01.largest_two import largest_two, sorting_two, double_two, mutable_two, tournament_two funcs = [largest_two, sorting_two, double_two, mutable_two, tournament_two] algs = [ 'largest_two', 'sorting_two', 'double_two', 'mutable_two', 'tournament_two' ] for label, func in zip(algs, funcs): RecordedItem.clear() func([RecordedItem(i) for i in range(n)]) up_count = sum(RecordedItem.report()) RecordedItem.clear() func([RecordedItem(i) for i in range(n, 0, -1)]) down_count = sum(RecordedItem.report()) RecordedItem.clear() up_down = zip(range(0, n, 2), range(n - 1, 0, -2)) func([RecordedItem(i) for i in itertools.chain(*up_down)]) weave_count = sum(RecordedItem.report()) tbl.row([label, up_count, down_count, weave_count]) return tbl
def run_median_less_than_trial(max_k=20, output=True): """Use RecordedItem to count # of times Less-than invoked up to (but not including) max_k=20.""" tbl = DataTable([10, 15, 15], ['N', 'median_count', 'sort_median_count'], output=output) tbl.format('median_count', ',d') tbl.format('sort_median_count', ',d') trials = [2**k + 1 for k in range(8, max_k)] for n in trials: A = list([RecordedItem(i) for i in range(n)]) random.shuffle(A) # Generated external sorted to reuse list RecordedItem.clear() med2 = sorted(A)[n // 2] sort_lt = RecordedItem.report()[1] RecordedItem.clear() med1 = linear_median(A) lin_lt = RecordedItem.report()[1] assert med1 == med2 tbl.row([n, lin_lt, sort_lt]) return tbl
def run_largest_alternate(output=True, decimals=3): """Generate tables for largest and alternate.""" n = 8 tbl = DataTable([8, 10, 15, 10, 10], ['N', '#Less', '#LessA', 'largest', 'alternate'], output=output, decimals=decimals) tbl.format('#Less', ',d') tbl.format('#LessA', ',d') while n <= 2048: ascending = list(range(n)) largest_up = 1000 * min( timeit.repeat(stmt='largest({})'.format(ascending), setup='from ch01.largest import largest', repeat=10, number=50)) / 50 alternate_up = 1000 * min( timeit.repeat(stmt='alternate({})'.format(ascending), setup='from ch01.largest import alternate', repeat=10, number=50)) / 50 up_count = [RecordedItem(i) for i in range(n)] RecordedItem.clear() largest(up_count) largest_counts = RecordedItem.report() RecordedItem.clear() up_count = [RecordedItem(i) for i in range(n)] RecordedItem.clear() alternate(up_count) alternate_counts = RecordedItem.report() RecordedItem.clear() tbl.row([ n, sum(largest_counts), sum(alternate_counts), largest_up, alternate_up ]) n *= 2 if output: print() print('largest', tbl.best_model('largest', Model.LINEAR)) print('Alternate', tbl.best_model('alternate', Model.QUADRATIC)) return tbl
def timing_trial(output=True, decimals=3): """ Seek possible crossover between tournament_two() and sorting_two(). Because of the high memory demands, tournament_two() is always slower than sorting_two(). """ tbl = DataTable([8,8,8,8,8,8], ['N', 'Sorting', 'Tournament', 'Tourn. Object', 'Tourn. Linked', 'Tourn. Losers'], output=output, decimals=decimals) for n in [2 ** k for k in range(10, 24)]: st_time = timeit.timeit(stmt='sorting_two(x)', setup=''' import random from ch01.largest_two import sorting_two random.seed({0}) x=list(range({0})) random.shuffle(x)'''.format(n), number=1) tt_time = timeit.timeit(stmt='tournament_two(x)', setup=''' import random from ch01.largest_two import tournament_two random.seed({0}) x=list(range({0})) random.shuffle(x)'''.format(n), number=1) if n > 1048576: tto_time = SKIP else: tto_time = timeit.timeit(stmt='tournament_two_object(x)', setup=''' import random from ch01.largest_two import tournament_two_object random.seed({0}) x=list(range({0})) random.shuffle(x)'''.format(n), number=1) ttl_time = timeit.timeit(stmt='tournament_two_losers(x)', setup=''' import random from ch01.largest_two import tournament_two_losers random.seed({0}) x=list(range({0})) random.shuffle(x)'''.format(n), number=1) ttll_time = timeit.timeit(stmt='tournament_two_linked(x)', setup=''' import random from ch01.largest_two import tournament_two_linked random.seed({0}) x=list(range({0})) random.shuffle(x)'''.format(n), number=1) tbl.row([n, st_time, tt_time, tto_time, ttll_time, ttl_time]) return tbl
def dag_trials(output=True): """Confirm DAG single-source shortest path is O(E+N).""" tbl = DataTable([8,10,10],['N', 'Dijkstra', 'Topologic'], output=output) for n in [2**k for k in range(2,7)]: dijkstra = 1000*min(timeit.repeat(stmt='dijkstra_sp(dg,1)', setup=''' from ch07.challenge import mesh_graph from ch07.single_source_sp import dijkstra_sp dg=mesh_graph({})'''.format(n), repeat=20, number=15))/15 topologic = 1000*min(timeit.repeat(stmt='topological_sp(dg,1)', setup=''' from ch07.challenge import mesh_graph, topological_sp dg=mesh_graph({})'''.format(n), repeat=20, number=15))/15 tbl.row([n*n, dijkstra, topologic])
def performance_bas(max_k=22, output=True, decimals=3): """ Generate performance tables for binary array search up to (but not including) 2**max_k. """ # Train on five values... trials = [2**k for k in range(5, 12)] xvals = [] yvals = [] num = 50000 for n in trials: search_time = timeit.timeit( stmt='binary_array_search(x, random.randint(0,{}*4))'.format(n), setup=''' import random from ch02.bas import binary_array_search x=sorted(random.sample(range({0}*4), {0}))'''.format(n), number=num) xvals.append(n) yvals.append(search_time) if numpy_error: log_coeff = [0] else: import numpy as np from scipy.optimize import curve_fit [log_coeff, _] = curve_fit(log_model, np.array(xvals), np.array(yvals)) if output: print('Log N = {:.12f}*log2(N)'.format(log_coeff[0])) tbl = DataTable([15, 10, 10], ['N', 'T(N)', 'Model'], output=output, decimals=decimals) trials = [2**k for k in range(5, max_k)] for n in trials: search_time = timeit.timeit( stmt='binary_array_search(x, random.randint(0,{}*2))'.format(n), setup=''' import random from ch02.bas import binary_array_search x=sorted(random.sample(range({0}*4), {0}))'''.format(n), number=num) tbl.row([n, search_time, log_model(n, log_coeff[0])]) return tbl
def prime_number_difference(words, output=True, decimals=2): """Identify sensitivity of M to being prime or not.""" from ch03.hashtable_linked import Hashtable as Linked_Hashtable, stats_linked_lists from ch03.hashtable_open import Hashtable as Open_Hashtable, stats_open_addressing from ch03.base26 import base26 # these are prime numbers between 428880 and 428980 lo = 428880 primes = [428899, 428951, 428957, 428977] hi = 428980 keys = [base26(w) for w in words] tbl = DataTable([12, 6, 8, 8, 8, 8], ['M', 'Prime', 'Avg. LL', 'Max LL', 'Avg. OA', 'Max OA'], output=output, decimals=decimals) tbl.format('Prime', 's') tbl.format('Max LL', 'd') tbl.format('Max OA', 'd') worst = 0 worst_m = 0 for m in range(lo, hi + 1): is_p = 'Prime' if m in primes else '' ht_linked = Linked_Hashtable(m) ht_open = Open_Hashtable(m) for k in keys: ht_linked.put(k, 1) ht_open.put(k, 1) (avg_length_linked, max_length_linked) = stats_linked_lists(ht_linked) if max_length_linked > worst: worst_m = m worst = max_length_linked (avg_length_open, max_length_open) = stats_open_addressing(ht_open) tbl.row([ m, is_p, avg_length_linked, max_length_linked, avg_length_open, max_length_open ]) # Now try to find any more that exceed this maximum amount if output: print('Worst was {} for M={}'.format(worst, worst_m)) for m in range(worst_m, worst_m + 10000, 13): ht_linked = Linked_Hashtable(m) (avg_length_linked, max_length_linked) = stats_linked_lists(ht_linked, False) if max_length_linked > worst: worst_m = m worst = max_length_linked print('Worst of {} for M={}'.format(worst, worst_m)) print('Done') return tbl
def run_max_sort_worst_case(max_k=14, output=True, decimals=4): """Generate table for max sort up to (but not including 2**max_k).""" xvals = [] yvals = [] for n in [2**k for k in range(5, 12)]: sort_time = timeit.timeit(stmt='max_sort(x)', setup=''' from ch02.challenge import max_sort import random x=list(range({},0,-1)) random.shuffle(x)'''.format(n), number=10) xvals.append(n) yvals.append(sort_time) if numpy_error: quadratic_coeff = [0, 0] else: import numpy as np from scipy.optimize import curve_fit [quadratic_coeff, _] = curve_fit(quadratic_model, np.array(xvals), np.array(yvals)) if output: print('Quadratic N = {:.12f}*N*N + {:.12f}*N'.format( quadratic_coeff[0], quadratic_coeff[1])) tbl = DataTable([8, 8, 8], ['N', 'MaxSort', 'Model'], output=output, decimals=decimals) for n in [2**k for k in range(5, max_k)]: sort_time = timeit.timeit(stmt='max_sort(x)', setup=''' from ch02.challenge import max_sort import random x=list(range({},0,-1)) random.shuffle(x)'''.format(n), number=10) tbl.row([ n, sort_time, quadratic_model(n, quadratic_coeff[0], quadratic_coeff[1]) ]) return tbl
def generate_list_table(max_k=21, output=True, decimals=3): """ Generate table showing O(N) behavior of Python 'list' structure on insert for lists up to (but not including) 2**max_k """ tbl = DataTable([8, 8, 8, 8, 8], ['N', 'Prepend', 'Remove', 'Append', 'Tree'], output=output, decimals=decimals) for n in [2**k for k in range(10, max_k)]: tbl.row([ n, run_trials_prepend(n, 1000), run_trials_remove(n, 1000), run_trials_append(n, 1000), run_trials_tree(n, 1000) ]) return tbl
def incremental_multiplication(output=True): """ Compute results for multiplying large numbers. This takes several hours to run if you increment by 1. Instead, check powers of 2. """ num = 1000 tbl = DataTable([8, 8, 8], ['N', 'Min Mult', 'Max Mult'], decimals=5, output=output) for n in [2**k for k in range(3, 12)]: all_times = timeit.repeat(stmt='idx += 1\nmult_pair(pairs[idx])', setup=''' from ch02.mult import create_random_pair, mult_pair idx = -1 pairs = [create_random_pair({}) for _ in range({})]'''.format(n, num), repeat=20, number=num) tbl.row([n, min(all_times), max(all_times)]) return tbl
def compare_avl_pq_with_heap_pq(max_k=16, output=True, decimals=2): """Generate times for comparing values.""" tbl = DataTable([8, 10, 10], ['N', 'Heap-pq', 'AVL-pq'], output=output, decimals=decimals) repeat = 25 num = 10 for n in [2**k for k in range(10, max_k)]: t_heap_pq = min( timeit.repeat(stmt=''' random.seed(11) pq = PQ({0}) for _ in range({0}): r = random.random() pq.enqueue(r,r) while pq: pq.dequeue()'''.format(n), setup=''' from ch04.heap import PQ import random''', repeat=repeat, number=num)) / num t_avl_pq = min( timeit.repeat(stmt=''' random.seed(11) pq = PQ() for _ in range({0}): r = random.random() pq.enqueue(r,r) while pq: pq.dequeue()'''.format(n), setup=''' from ch06.pq import PQ import random''', repeat=repeat, number=num)) / num tbl.row([n, t_heap_pq, t_avl_pq]) return tbl
def combined_sorted(lo=8, hi=12, output=True): """Generate results for different sorting trials.""" tbl = DataTable([8] * (hi - lo + 1), ['N'] + [comma(2**k) for k in range(lo, hi)], output=output) for n in [2**k for k in range(lo, hi)]: row = [n] for m in [2**k for k in range(lo, hi)]: row.append(run_merge_trial(m, n)) tbl.row(row) # Diagonal values are for 2*M*log(M) so divide in HALF for accurate one # build model ONLY for first five values x = [2**k for k in range(lo, min(lo + 5, hi))] y = [ tbl.entry(r, comma(r)) for r in [2**k for k in range(lo, min(lo + 5, hi))] ] if numpy_error: a = 0 else: import numpy as np from scipy.optimize import curve_fit from scipy.stats.stats import pearsonr (coeffs, _) = curve_fit(n_log_n_model, np.array(x), np.array(y)) a = coeffs[0] / 2 y_fit = [ n_log_n_model(r, a) for r in [2**k for k in range(lo, min(lo + 5, hi))] ] print() print(pearsonr(y, y_fit)) print() print('Prediction') model = DataTable([8] * (hi - lo + 1), ['N'] + [comma(2**k) for k in range(lo, hi)], output=output) for n in [2**k for k in range(lo, hi)]: row = [n] for m in [2**k for k in range(lo, hi)]: row.append(n_log_n_model(n, a) + n_log_n_model(m, a)) model.row(row) return tbl
def trial_factorial_heap(max_n=32768, output=True, decimals=2): """ Generate trial using factorial heap compared with regular heap up to but not including max_n """ factor = 3 base = 256 high = max_n tbl = DataTable([10, 8, 8], ['N', 'Heap', 'FactHeap'], output=output, decimals=decimals) N = base while N < high: heap = 1000000 * run_trials('ch04.heap', N, factor) / (factor * N) fheap = 1000000 * run_trials('ch04.factorial_heap', N, factor) / (factor * N) tbl.row([N, heap, fheap]) N *= 2 return tbl
def bad_timing(words, size=50000, output=True): """Statistics on hashtables.""" from ch03.hashtable_linked import Hashtable, stats_linked_lists tbl = DataTable([8, 10, 10], ['Type', 'Avg. Len', 'Max Len'], output=output) tbl.format('Type', 's') tbl.format('Max Len', 'd') good_ht = Hashtable(size) bad_ht = Hashtable(size) for w in words: good_ht.put(w, True) bad_ht.put(ValueBadHash(w), True) good = stats_linked_lists(good_ht) tbl.row(['Good', good[0], good[1]]) bad = stats_linked_lists(bad_ht) tbl.row(['Bad', bad[0], bad[1]]) return tbl