def average_performance(max_n=65536, output=True, decimals=1): """Generate table of average performance for different PQ implementations.""" T = 3 base = 256 cutoff = 16384 high = max_n heap = {} order_ar = {} order_ll = {} N = base while N <= high: order_ll[N] = 1000000*run_trials('ch04.ordered_list', N, T)/(T*N) heap[N] = 1000000*run_trials('ch04.heap', N, T)/(T*N) N *= 2 N = base array = {} linked = {} builtin = {} while N <= cutoff: order_ar[N] = 1000000*run_trials('ch04.ordered', N, T)/(T*N) linked[N] = 1000000*run_trials('ch04.linked', N, T)/(T*N) array[N] = 1000000*run_trials('ch04.array', N, T)/(T*N) builtin[N] = 1000000*run_trials('ch04.builtin', N, T)/(T*N) N *= 2 N = base tbl = DataTable([8,8,8,8,8,8,8], ['N','Heap','OrderL','Linked','OrderA','Built-in','Array'], output=output, decimals=decimals) while N <= high: if N <= cutoff: tbl.row([N, heap[N], order_ll[N], linked[N], order_ar[N], builtin[N], array[N]]) else: #tbl.set_output(False) tbl.row([N, heap[N], order_ll[N]]) N *= 2 if output: print() print('Heap', tbl.best_model('Heap')) print('OrderL', tbl.best_model('OrderL')) print('Linked', tbl.best_model('Linked')) print('OrderA', tbl.best_model('OrderA')) print('Built-in', tbl.best_model('Built-in')) print('Array', tbl.best_model('Array')) return tbl
def run_largest_alternate(output=True, decimals=3): """Generate tables for largest and alternate.""" n = 8 tbl = DataTable([8, 10, 15, 10, 10], ['N', '#Less', '#LessA', 'largest', 'alternate'], output=output, decimals=decimals) tbl.format('#Less', ',d') tbl.format('#LessA', ',d') while n <= 2048: ascending = list(range(n)) largest_up = 1000 * min( timeit.repeat(stmt='largest({})'.format(ascending), setup='from ch01.largest import largest', repeat=10, number=50)) / 50 alternate_up = 1000 * min( timeit.repeat(stmt='alternate({})'.format(ascending), setup='from ch01.largest import alternate', repeat=10, number=50)) / 50 up_count = [RecordedItem(i) for i in range(n)] RecordedItem.clear() largest(up_count) largest_counts = RecordedItem.report() RecordedItem.clear() up_count = [RecordedItem(i) for i in range(n)] RecordedItem.clear() alternate(up_count) alternate_counts = RecordedItem.report() RecordedItem.clear() tbl.row([ n, sum(largest_counts), sum(alternate_counts), largest_up, alternate_up ]) n *= 2 if output: print() print('largest', tbl.best_model('largest', Model.LINEAR)) print('Alternate', tbl.best_model('alternate', Model.QUADRATIC)) return tbl
def just_compare_sort_tournament_two(max_k=25, output=True, decimals=2): """Very large data sets to investigate whether crossover occurs (no it does not).""" tbl = DataTable([15, 10, 15], ['N', 'sorting_two', 'tournament_two'], output=output, decimals=decimals) trials = [2**k for k in range(10, max_k)] num = 5 for n in trials: m_tt = timeit.timeit(stmt='random.shuffle(x)\ntournament_two(x)', setup=''' import random from ch01.largest_two import tournament_two x=list(range({}))'''.format(n), number=num) m_st = timeit.timeit(stmt='random.shuffle(x)\nsorting_two(x)', setup=''' import random from ch01.largest_two import sorting_two x=list(range({}))'''.format(n), number=num) tbl.row([n, m_st, m_tt]) if output: print() for header in tbl.labels[1:]: print(header, tbl.best_model(header)) return tbl
def average_performance(max_n=32768, output=True, decimals=2): """ Generate table of average performance for different PQ implementations. N Heap BinaryTree 128 2.38 5.15 256 2.80 5.75 512 3.22 6.63 1,024 3.51 7.60 2,048 3.89 8.42 4,096 4.35 9.21 8,192 4.75 10.24 16,384 5.23 11.38 32,768 5.96 12.90 Heap [(<Model.LOG: 1>, 0.9946069479866522, 0.19032820110187337, 0.3676270723813611)] BinaryTree [(<Model.LOG: 1>, 0.9945273325485424, 0.48542129185563554, 0.7892486887139494)] While both offer O(Log N) performance, heap is more efficient (a little more than twice as efficient). """ T = 3 high = max_n tbl = DataTable([8, 8, 8], ['N', 'Heap', 'BinaryTree'], output=output, decimals=decimals) N = 128 while N <= high: binary = 1000000 * run_trials_pq('ch06.pq', N, T) / (T * N) heap = 1000000 * run_trials_pq_n('ch04.heap', N, T) / (T * N) tbl.row([N, heap, binary]) N *= 2 if output: print('Heap', tbl.best_model('Heap')) print('BinaryTree', tbl.best_model('BinaryTree')) return tbl
def test_table(self): tbl = DataTable([8, 8, 8], ['N', 'Another', 'SquareRoot'], output=False, decimals=4) tbl.format('Another', 'd') for n in range(2, 10): tbl.row([n, n, n**0.5]) self.assertEqual(tbl.entry(3, 'Another'), 3) print('Testing that Table is print to console') tbl = DataTable([8, 8, 8], ['N', 'Another', 'SquareRoot'], decimals=4) tbl.format('Another', 'd') for n in range(2, 10): tbl.row([n, n, n**0.5]) self.assertEqual(list(range(2, 10)), tbl.column('Another')) model = tbl.best_model('Another')[0] if numpy_error: pass else: self.assertEqual(model[0], Model.LINEAR) self.assertAlmostEqual(model[3], 1.0000, places=5)
def prototype_table(output=True, decimals=3): """ Generate table of results for prototype application. The prototype application is simply a request to sort the N values. """ trials = [100, 1000, 10000] nvals = [] yvals = [] for n in trials: sort_time = 1000 * min( timeit.repeat(stmt='x.sort()', setup=''' import random x=list(range({})) random.shuffle(x)'''.format(n), repeat=100, number=100)) nvals.append(n) yvals.append(sort_time) def quad_model(n, a, b): if a < 0: # attempt to PREVENT negative coefficient. return 1e10 return a * n * n + b * n # Coefficients are returned as first argument if numpy_error: nlog_n_coeffs = linear_coeffs = quadratic_coeffs = [0, 0] else: import numpy as np from scipy.optimize import curve_fit [nlog_n_coeffs, _] = curve_fit(n_log_n_model, np.array(nvals), np.array(yvals)) [linear_coeffs, _] = curve_fit(linear_model, np.array(nvals), np.array(yvals)) [quadratic_coeffs, _] = curve_fit(quad_model, np.array(nvals), np.array(yvals)) if output: print('Linear = {:f}*N + {:f}'.format(linear_coeffs[0], linear_coeffs[1])) print('Quadratic = {}*N*N + {}*N'.format(quadratic_coeffs[0], quadratic_coeffs[1])) print('N Log N = {:.12f}*N*log2(N)'.format(nlog_n_coeffs[0])) print() tbl = DataTable([12, 10, 10, 10, 10], ['N', 'Time', 'Linear', 'Quad', 'NLogN'], output=output, decimals=decimals) for n, p in zip(nvals, yvals): tbl.row([ n, p, linear_model(n, linear_coeffs[0], linear_coeffs[1]), quadratic_model(n, quadratic_coeffs[0], quadratic_coeffs[1]), n_log_n_model(n, nlog_n_coeffs[0]) ]) for n in [100000, 1000000, 10000000]: sort_time = 1000 * min( timeit.repeat(stmt='x.sort()', setup=''' import random x=list(range({})) random.shuffle(x)'''.format(n), repeat=100, number=100)) tbl.row([ n, sort_time, linear_model(n, linear_coeffs[0], linear_coeffs[1]), quadratic_model(n, quadratic_coeffs[0], quadratic_coeffs[1]), n_log_n_model(n, nlog_n_coeffs[0]) ]) if output: print('Linear', tbl.pearsonr('Time', 'Linear')) print('Quad', tbl.pearsonr('Time', 'Quad')) print('NLogN', tbl.pearsonr('Time', 'NLogN')) print(tbl.best_model('Time')) return tbl