def insertion_sort_bas(max_k=18, output=True, decimals=3): """Generate Table for Insertion Sort.""" # Evaluate prototype execution x = [] y = [] for n in [2**k for k in range(8, 12)]: m_insert_bas = 1000 * min( timeit.repeat(stmt='insertion_sort_bas(A)', setup=''' import random from ch05.sorting import insertion_sort_bas A=list(range({})) random.shuffle(A)'''.format(n), repeat=10, number=10)) x.append(n) y.append(m_insert_bas) # Coefficients are returned as first argument if numpy_error: log_coeffs = quadratic_coeffs = [0, 0] else: import numpy as np from scipy.optimize import curve_fit [log_coeffs, _] = curve_fit(n_log_n_model, np.array(x), np.array(y)) [quadratic_coeffs, _] = curve_fit(quadratic_model, np.array(x), np.array(y)) if output: print('Quadratic = {}*N*N + {}*N'.format(quadratic_coeffs[0], quadratic_coeffs[1])) print('Log = {:.12f}*N*log2(N)'.format(log_coeffs[0])) print() tbl = DataTable([12, 10, 10, 10], ['N', 'Time', 'Quad', 'Log'], output=output, decimals=decimals) for n, p in zip(x, y): tbl.row([ n, p, quadratic_model(n, quadratic_coeffs[0], quadratic_coeffs[1]), n_log_n_model(n, log_coeffs[0]) ]) for n in [2**k for k in range(12, max_k)]: m_insert_bas = 1000 * min( timeit.repeat(stmt='insertion_sort_bas(A)', setup=''' import random from ch05.sorting import insertion_sort_bas A=list(range({})) random.shuffle(A)'''.format(n), repeat=10, number=10)) tbl.row([ n, m_insert_bas, quadratic_model(n, quadratic_coeffs[0], quadratic_coeffs[1]), n_log_n_model(n, log_coeffs[0]) ]) return tbl
def run_max_sort_worst_case(max_k=14, output=True, decimals=4): """Generate table for max sort up to (but not including 2**max_k).""" xvals = [] yvals = [] for n in [2**k for k in range(5, 12)]: sort_time = timeit.timeit(stmt='max_sort(x)', setup=''' from ch02.challenge import max_sort import random x=list(range({},0,-1)) random.shuffle(x)'''.format(n), number=10) xvals.append(n) yvals.append(sort_time) if numpy_error: quadratic_coeff = [0, 0] else: import numpy as np from scipy.optimize import curve_fit [quadratic_coeff, _] = curve_fit(quadratic_model, np.array(xvals), np.array(yvals)) if output: print('Quadratic N = {:.12f}*N*N + {:.12f}*N'.format( quadratic_coeff[0], quadratic_coeff[1])) tbl = DataTable([8, 8, 8], ['N', 'MaxSort', 'Model'], output=output, decimals=decimals) for n in [2**k for k in range(5, max_k)]: sort_time = timeit.timeit(stmt='max_sort(x)', setup=''' from ch02.challenge import max_sort import random x=list(range({},0,-1)) random.shuffle(x)'''.format(n), number=10) tbl.row([ n, sort_time, quadratic_model(n, quadratic_coeff[0], quadratic_coeff[1]) ]) return tbl
def modeling_insertion_selection(output=True, decimals=1): """Generate table for Insertion Sort.""" from ch05.sorting import selection_sort_counting, insertion_sort_counting trials = 100 x = [] y_comp_ss = [] y_swap_ss = [] y_comp_is = [] y_swap_is = [] for n in [2**k for k in range(4, 8)]: total_compares_ss = 0 total_swaps_ss = 0 total_compares_is = 0 total_swaps_is = 0 for _ in range(trials): A=list(range(n)) random.shuffle(A) (num_swaps, num_compares) = selection_sort_counting(A) total_swaps_ss += num_swaps total_compares_ss += num_compares A=list(range(n)) random.shuffle(A) (num_swaps, num_compares) = insertion_sort_counting(A) total_swaps_is += num_swaps total_compares_is += num_compares x.append(n) y_comp_ss.append(total_compares_ss/trials) y_swap_ss.append(total_swaps_ss/trials) y_comp_is.append(total_compares_is/trials) y_swap_is.append(total_swaps_is/trials) if numpy_error: quadratic_comp_ss = linear_swap_ss = quadratic_comp_is = quadratic_swap_is = [0,0] else: import numpy as np from scipy.optimize import curve_fit [quadratic_comp_ss, _] = curve_fit(quadratic_model, np.array(x), np.array(y_comp_ss)) [linear_swap_ss, _] = curve_fit(linear_model, np.array(x), np.array(y_swap_ss)) [quadratic_comp_is, _] = curve_fit(quadratic_model, np.array(x), np.array(y_comp_is)) [quadratic_swap_is, _] = curve_fit(quadratic_model, np.array(x), np.array(y_swap_is)) if output: print('Swap SS Linear = {:f}*N + {:f}'.format(linear_swap_ss[0], linear_swap_ss[1])) print('Comp SS Quadratic = {}*N*N + {}*N'.format(quadratic_comp_ss[0], quadratic_comp_ss[1])) print('Swap IS Quadratic = {}*N*N + {}*N'.format(quadratic_swap_is[0], quadratic_swap_is[1])) print('Comp IS Quadratic = {}*N*N + {}*N'.format(quadratic_comp_is[0], quadratic_comp_is[1])) print() tbl = DataTable([12,10,10,10,10,10,10,10,10], ['N','AvgCompSS','MCSS', 'AvgSwapSS', 'MSSS', 'AvgCompIS', 'MCIS', 'AvgSwapIS', 'MSIS'], output=output, decimals=decimals) for n in [2**k for k in range(4, 10)]: total_compares_ss = 0 total_swaps_ss = 0 total_compares_is = 0 total_swaps_is = 0 for _ in range(trials): A=list(range(n)) random.shuffle(A) (num_swaps, num_compares) = selection_sort_counting(A) total_swaps_ss += num_swaps total_compares_ss += num_compares A=list(range(n)) random.shuffle(A) (num_swaps, num_compares) = insertion_sort_counting(A) total_swaps_is += num_swaps total_compares_is += num_compares tbl.row([n, total_compares_ss/trials, quadratic_model(n, quadratic_comp_ss[0], quadratic_comp_ss[1]), total_swaps_ss/trials, linear_model(n, linear_swap_ss[0], linear_swap_ss[1]), total_compares_is/trials, quadratic_model(n, quadratic_comp_is[0], quadratic_comp_is[1]), total_swaps_is/trials, quadratic_model(n, quadratic_swap_is[0], quadratic_swap_is[1]), ]) return tbl
def timing_selection_insertion(min_k=8, max_k=16, output=True, decimals=3): """ Because Insertion Sort is so sensitive to its inputs, we take average time over all of its runs. Models first using 5 rows from [min_k .. min_k+5] and then presents information up to (but not including) max_k. Takes hours to run. In the book, the table goes up to max_k=18. """ if output: print('Building models for Insertion Sort. This may take awhile...') # Build model from Generate 5 data points x = [] y_is = [] y_ss = [] for n in [2**k for k in range(min_k, min_k+5)]: # Not much need to repeat since Selection Sort behaves the same # every time. I'll do it five times. t_ss = timeit.timeit(stmt='selection_sort(A)', setup=''' import random from ch05.sorting import selection_sort A=list(range({})) random.shuffle(A)'''.format(n), number=1) # Insertion Sort is highly dependent upon its input, so execute # far more repetitions, and take average. This is the only time # in the book where I alter my approach for measuring performance # since it could happen that a given data set has long runs of # ascending data, which would significantly reduce the execution # time. Instead, I total all 100 runs and provide an average. t_is = sum(timeit.repeat(stmt='insertion_sort(A)', setup=''' import random from ch05.sorting import insertion_sort A=list(range({})) random.shuffle(A)'''.format(n), repeat=100, number=1))/100 # since seeking average from sum x.append(n) y_ss.append(t_ss) y_is.append(t_is) # Coefficients are returned as first argument if numpy_error: quadratric_ss = quadratric_is = [0, 0] else: import numpy as np from scipy.optimize import curve_fit [quadratric_ss, _] = curve_fit(quadratic_model, np.array(x), np.array(y_ss)) [quadratric_is, _] = curve_fit(quadratic_model, np.array(x), np.array(y_is)) if output: print('Quadratic SS = {}*N*N + {}*N'.format(quadratric_ss[0], quadratric_ss[1])) print('Quadratic IS = {}*N*N + {}*N'.format(quadratric_is[0], quadratric_is[1])) print() tbl = DataTable([12,10,10,10,10,10,10], ['N','TimeSS','ModelSS','MinIS', 'TimeIS', 'MaxIs', 'ModelIS'], output=output, decimals=decimals) for n,t_ss,t_is in zip(x,y_ss,y_is): tbl.row([n, t_ss, quadratic_model(n, quadratric_ss[0], quadratric_ss[1]), t_is, t_is, t_is, quadratic_model(n, quadratric_is[0], quadratric_is[1])]) for n in [2**k for k in range(min_k+5, max_k)]: # selection is stable, so just run once t_ss = timeit.timeit(stmt='selection_sort(A)', setup=''' import random from ch05.sorting import selection_sort A=list(range({})) random.shuffle(A)'''.format(n), number=1) # Once again, take average for Insertion Sort, this time # for 50 runs. But also compute min and max for graphing all_times = timeit.repeat(stmt='insertion_sort(A)', setup=''' import random from ch05.sorting import insertion_sort A=list(range({})) random.shuffle(A)'''.format(n), repeat=5, number=1) t_is = sum(all_times)/5 t_min = min(all_times) t_max = max(all_times) tbl.row([n, t_ss, quadratic_model(n, quadratric_ss[0], quadratric_ss[1]), t_min, t_is, t_max, quadratic_model(n, quadratric_is[0], quadratric_is[1])]) return tbl
def prototype_table(output=True, decimals=3): """ Generate table of results for prototype application. The prototype application is simply a request to sort the N values. """ trials = [100, 1000, 10000] nvals = [] yvals = [] for n in trials: sort_time = 1000 * min( timeit.repeat(stmt='x.sort()', setup=''' import random x=list(range({})) random.shuffle(x)'''.format(n), repeat=100, number=100)) nvals.append(n) yvals.append(sort_time) def quad_model(n, a, b): if a < 0: # attempt to PREVENT negative coefficient. return 1e10 return a * n * n + b * n # Coefficients are returned as first argument if numpy_error: nlog_n_coeffs = linear_coeffs = quadratic_coeffs = [0, 0] else: import numpy as np from scipy.optimize import curve_fit [nlog_n_coeffs, _] = curve_fit(n_log_n_model, np.array(nvals), np.array(yvals)) [linear_coeffs, _] = curve_fit(linear_model, np.array(nvals), np.array(yvals)) [quadratic_coeffs, _] = curve_fit(quad_model, np.array(nvals), np.array(yvals)) if output: print('Linear = {:f}*N + {:f}'.format(linear_coeffs[0], linear_coeffs[1])) print('Quadratic = {}*N*N + {}*N'.format(quadratic_coeffs[0], quadratic_coeffs[1])) print('N Log N = {:.12f}*N*log2(N)'.format(nlog_n_coeffs[0])) print() tbl = DataTable([12, 10, 10, 10, 10], ['N', 'Time', 'Linear', 'Quad', 'NLogN'], output=output, decimals=decimals) for n, p in zip(nvals, yvals): tbl.row([ n, p, linear_model(n, linear_coeffs[0], linear_coeffs[1]), quadratic_model(n, quadratic_coeffs[0], quadratic_coeffs[1]), n_log_n_model(n, nlog_n_coeffs[0]) ]) for n in [100000, 1000000, 10000000]: sort_time = 1000 * min( timeit.repeat(stmt='x.sort()', setup=''' import random x=list(range({})) random.shuffle(x)'''.format(n), repeat=100, number=100)) tbl.row([ n, sort_time, linear_model(n, linear_coeffs[0], linear_coeffs[1]), quadratic_model(n, quadratic_coeffs[0], quadratic_coeffs[1]), n_log_n_model(n, nlog_n_coeffs[0]) ]) if output: print('Linear', tbl.pearsonr('Time', 'Linear')) print('Quad', tbl.pearsonr('Time', 'Quad')) print('NLogN', tbl.pearsonr('Time', 'NLogN')) print(tbl.best_model('Time')) return tbl
def large_multiplication(output=True, decimals=4): """Compute results for multiplying large numbers.""" num = 1000 x = [] y = [] log2_3 = math.log2(3) for n in [2**k for k in range(8, 13)]: mult_time = timeit.timeit(stmt='mult_pair(x)', setup=''' from ch02.mult import create_pair, mult_pair x=create_pair({})'''.format(n), number=num) x.append(n) y.append(mult_time) def karatsuba(n, a): """Models a*N^k where k = log 3 in base 2.""" return a * (n**log2_3) def tkn(n, a, b): """Models a*N^k +b*n where k = log 3 in base 2.""" return a * (n**log2_3) + b * n # Coefficients are returned as first argument if numpy_error: linear_coeffs = quadratic_coeffs = karatsuba_coeffs = tkn_coeffs = [ 0, 0 ] else: import numpy as np from scipy.optimize import curve_fit [linear_coeffs, _] = curve_fit(linear_model, np.array(x), np.array(y)) [quadratic_coeffs, _] = curve_fit(quadratic_model, np.array(x), np.array(y)) [karatsuba_coeffs, _] = curve_fit(karatsuba, np.array(x), np.array(y)) [tkn_coeffs, _] = curve_fit(tkn, np.array(x), np.array(y)) if output: print('Karatsuba={}*N^1.585'.format(karatsuba_coeffs[0])) print('TK={}*N^1.585+{}*N'.format(tkn_coeffs[0], tkn_coeffs[1])) print() tbl = DataTable([8, 12, 12, 12, 12, 12], ['N', 'Time', 'Linear', 'Quad', 'Karatsuba', 'TKN'], output=output, decimals=decimals) for n, mult_time in zip(x, y): tbl.row([ n, mult_time, linear_model(n, linear_coeffs[0], linear_coeffs[1]), quadratic_model(n, quadratic_coeffs[0], quadratic_coeffs[1]), karatsuba(n, karatsuba_coeffs[0]), tkn(n, tkn_coeffs[0], tkn_coeffs[1]) ]) for n in [2**k for k in range(13, 19)]: mult_time = timeit.timeit(stmt='mult_pair(x)', setup=''' from ch02.mult import create_pair, mult_pair x=create_pair({})'''.format(n), number=num) tbl.row([ n, mult_time, linear_model(n, linear_coeffs[0], linear_coeffs[1]), quadratic_model(n, quadratic_coeffs[0], quadratic_coeffs[1]), karatsuba(n, karatsuba_coeffs[0]), tkn(n, tkn_coeffs[0], tkn_coeffs[1]) ]) return tbl