예제 #1
0
def insertion_sort_bas(max_k=18, output=True, decimals=3):
    """Generate Table for Insertion Sort."""
    # Evaluate prototype execution
    x = []
    y = []
    for n in [2**k for k in range(8, 12)]:
        m_insert_bas = 1000 * min(
            timeit.repeat(stmt='insertion_sort_bas(A)',
                          setup='''
import random
from ch05.sorting import insertion_sort_bas
A=list(range({}))
random.shuffle(A)'''.format(n),
                          repeat=10,
                          number=10))
        x.append(n)
        y.append(m_insert_bas)

    # Coefficients are returned as first argument
    if numpy_error:
        log_coeffs = quadratic_coeffs = [0, 0]
    else:
        import numpy as np
        from scipy.optimize import curve_fit
        [log_coeffs, _] = curve_fit(n_log_n_model, np.array(x), np.array(y))
        [quadratic_coeffs, _] = curve_fit(quadratic_model, np.array(x),
                                          np.array(y))

    if output:
        print('Quadratic = {}*N*N + {}*N'.format(quadratic_coeffs[0],
                                                 quadratic_coeffs[1]))
        print('Log       = {:.12f}*N*log2(N)'.format(log_coeffs[0]))
        print()

    tbl = DataTable([12, 10, 10, 10], ['N', 'Time', 'Quad', 'Log'],
                    output=output,
                    decimals=decimals)
    for n, p in zip(x, y):
        tbl.row([
            n, p,
            quadratic_model(n, quadratic_coeffs[0], quadratic_coeffs[1]),
            n_log_n_model(n, log_coeffs[0])
        ])

    for n in [2**k for k in range(12, max_k)]:
        m_insert_bas = 1000 * min(
            timeit.repeat(stmt='insertion_sort_bas(A)',
                          setup='''
import random
from ch05.sorting import insertion_sort_bas
A=list(range({}))
random.shuffle(A)'''.format(n),
                          repeat=10,
                          number=10))
        tbl.row([
            n, m_insert_bas,
            quadratic_model(n, quadratic_coeffs[0], quadratic_coeffs[1]),
            n_log_n_model(n, log_coeffs[0])
        ])
    return tbl
예제 #2
0
def trial_merge_sort_python_style(max_k=15, output=True, decimals=3):
    """Empirical trial for merge sort using slicing."""
    tbl = DataTable([8, 8, 8], ['N', 'merge', 'mergeSlice'],
                    output=output,
                    decimals=decimals)
    for n in [2**k for k in range(8, max_k)]:
        m_slice = 1000 * min(
            timeit.repeat(stmt='slice_merge_sort(A)',
                          setup='''
import random
from ch05.challenge import slice_merge_sort
A=list(range({}))
random.shuffle(A)'''.format(n),
                          repeat=10,
                          number=10))

        m_merge = 1000 * min(
            timeit.repeat(stmt='merge_sort(A)',
                          setup='''
import random
from ch05.merge import merge_sort
A=list(range({}))
random.shuffle(A)'''.format(n),
                          repeat=10,
                          number=10))

        tbl.row([n, m_merge, m_slice])
    return tbl
예제 #3
0
def worst_heights(max_n=40, output=True):
    """
    Generate random AVL trees of n Nodes to find which ones have greatest height.
    Purely speculative and not definitive exploration of potential trees.
    """
    from ch06.balanced import BinaryTree
    tbl = DataTable([8, 8, 8], ['N', 'WorstHeight', 'NumberFound'],
                    output=output)
    tbl.format('WorstHeight', 'd')
    tbl.format('NumberFound', ',d')
    table_max_height = -1
    for n in range(1, max_n):
        number_found = 0
        max_height = -1
        for _ in range(10001):
            avl = BinaryTree()
            for _ in range(n):
                avl.insert(random.random())
            if avl.root.height > max_height:
                max_height = avl.root.height
                number_found = 0
            elif avl.root.height == max_height:
                number_found += 1

        if max_height > table_max_height:
            tbl.row([n, max_height, number_found])
            table_max_height = max_height
    return tbl
예제 #4
0
def another_fragment_counting(max_k=20, output=True):
    """Generate table for counts of fragments up to (but including) 2**max_k."""
    if numpy_error:
        a = 0, 0
    else:
        import numpy as np
        from scipy.optimize import curve_fit

        def log_log_model(n, a):
            """Formula for A*Log_2(Log_2(N)) with single coefficient."""
            logn = np.log2(n)
            return a * np.log2(logn)

        # Train Model
        trials = [2**k for k in range(5, 15)]
        nvals = []
        yvals = []
        for N in trials:
            nvals.append(N)
            yvals.append(f4(N))

        [a, _] = curve_fit(log_log_model, np.array(nvals), np.array(yvals))
        if output:
            print('LOG_LOG_MODEL = {}*log(log(N))'.format(a))

    trials = [2**k for k in range(5, max_k)]
    tbl = DataTable([8, 8, 8], ['N', 'F4', 'Model'], output=output)
    tbl.format('F4', 'd')
    for N in trials:
        tbl.row([N, f4(N), a[0] * math.log2(math.log2(N))])
    return tbl
예제 #5
0
def table_trials(max_k=15, output=True, decimals=3):
    """Compare Merge Sort against built in Python sort up to, but not including 2**max_k."""
    tbl = DataTable([8, 10, 10], ['N', 'MergeSort', 'Built-In Sort'],
                    output=output,
                    decimals=decimals)

    for n in [2**k for k in range(8, max_k)]:
        msort = 1000 * min(
            timeit.repeat(stmt='merge_sort(x)',
                          setup='''
import random
from ch05.merge import merge_sort
x=list(range({}))
random.shuffle(x)'''.format(n),
                          repeat=20,
                          number=15)) / 15

        builtin = 1000 * min(
            timeit.repeat(stmt='x.sort()',
                          setup='''
import random
x=list(range({}))
random.shuffle(x)'''.format(n),
                          repeat=20,
                          number=15)) / 15

        tbl.row([n, msort, builtin])
    return tbl
예제 #6
0
def run_access_trials(max_trials=100000, output=True, decimals=5):
    """Generate performance table for up to max_trials number of runs."""
    tbl = DataTable([10, 10, 10], ['Dict', 'Raw', 'BAS'],
                    output=output,
                    decimals=decimals)
    tbl.format('Dict', 'f')

    m1 = min(
        timeit.repeat(stmt='days_in_month[s_data[2]]',
                      setup='from ch03.months import s_data, days_in_month',
                      repeat=10,
                      number=max_trials))

    m2 = min(
        timeit.repeat(stmt='days_mixed(s_data[2])',
                      setup='from ch03.months import s_data, days_mixed',
                      repeat=10,
                      number=max_trials))

    m3 = min(
        timeit.repeat(stmt='days_bas(s_data[2])',
                      setup='from ch03.months import s_data, days_bas',
                      repeat=10,
                      number=max_trials))
    tbl.row([m1, m2, m3])
    return tbl
예제 #7
0
def time_results_linked(output=True, decimals=3):
    """Average time to find a key in growing hashtable_open."""

    sizes = [8192, 16384, 32768, 65536, 131072, 262144, 524288, 1048576]
    tbl = DataTable([8] + [8] * len(sizes),
                    ['N'] + [comma(sz) for sz in sizes],
                    output=output,
                    decimals=decimals)
    # Now start with M words to be added into a table of size N.
    # Start at 1000 and work up to 2000
    words = english_words()
    for num_to_add in [32, 64, 128, 256, 512, 1024, 2048, 4096, 8192, 16384]:
        all_words = words[:num_to_add]

        line = [num_to_add]
        for size in sizes:
            time1 = min(
                timeit.repeat(stmt='''
table = Hashtable({})
for word in words:
    table.put(word, 99)'''.format(size),
                              setup='''
from ch03.hashtable_linked import Hashtable
words={}'''.format(all_words),
                              repeat=1,
                              number=100))
            line.append(1000000 * time1 / size)
        tbl.row(line)
    return tbl
예제 #8
0
def algorithms_x_y():
    """Generate table for estimates of time for three computers and two algorithms."""
    def alg_x(n):
        """Number of operations for algorithm X."""
        return 5 * n

    def alg_y(n):
        """Number of operations for algorithm Y."""
        return 2020 * math.log(n) / math.log(2)

    tbl = DataTable([15, 15, 8, 8, 8, 8, 8],
                    ['N', 'X', 'Y', 'X_slow', 'X_fast', 'Y_fast', 'X_fastest'],
                    decimals=1)
    tbl.format('X', ',d')
    tbl.format('Y', ',d')
    for n in [2**k for k in range(2, 24)]:
        tbl.row([
            n,
            alg_x(n),
            int(alg_y(n)),
            alg_x(n) / 1500,
            alg_x(n) / 3000,
            alg_y(n) / 1500,
            alg_x(n) / (250 * 3000)
        ])
    return tbl
예제 #9
0
def search_trials():
    """
    For randomly constructed NxN mazes, compute efficiency of searching strategies
    on 512 random mazes, as N grows from 4x4 to 128x128
    """
    import random
    from ch07.maze import to_networkx, distance_to

    tbl = DataTable([8,8,8,8],['N', 'BFS', 'DS', 'GS'], decimals=2)
    for N in [4, 8, 16, 32, 64, 128]:
        num_bfs = 0
        num_dfs = 0
        num_gs = 0
        for i in range(512):
            random.seed(i)
            m = Maze(N,N)
            G = to_networkx(m)

            num_bfs += annotated_bfs_search(G, m.start(), m.end())
            num_dfs += annotated_dfs_search(G, m.start(), m.end())
            num_gs += annotated_guided_search(G, m.start(), m.end(), distance_to)

        tbl.row([N, num_bfs/512, num_dfs/512, num_gs/512])

    tbl = DataTable([8,8,8,8],['N', 'BFS', 'DS', 'GS'], decimals=2)
    for N in [4, 8, 16, 32, 64, 128]:
        m = maze_to_defeat_guided_search(N)
        G = to_networkx(m)

        num_bfs = annotated_bfs_search(G, m.start(), m.end())
        num_dfs = annotated_dfs_search(G, m.start(), m.end())
        num_gs = annotated_guided_search(G, m.start(), m.end(), distance_to)

        tbl.row([N, num_bfs, num_dfs, num_gs])
예제 #10
0
def performance_different_approaches(output=True):
    """Produce results on # less-than for different algorithms and data sets."""
    headers = ['Algorithm', 'Ascending', 'Descending', 'Alternating']
    n = 524288

    tbl = DataTable([15, 10, 10, 10], headers, output=output)
    for hdr in headers:
        tbl.format(hdr, ',d')
    tbl.format('Algorithm', 's')

    # Ascending / Descending / Weave
    from ch01.largest_two import largest_two, sorting_two, double_two, mutable_two, tournament_two
    funcs = [largest_two, sorting_two, double_two, mutable_two, tournament_two]
    algs = [
        'largest_two', 'sorting_two', 'double_two', 'mutable_two',
        'tournament_two'
    ]

    for label, func in zip(algs, funcs):
        RecordedItem.clear()
        func([RecordedItem(i) for i in range(n)])
        up_count = sum(RecordedItem.report())

        RecordedItem.clear()
        func([RecordedItem(i) for i in range(n, 0, -1)])
        down_count = sum(RecordedItem.report())

        RecordedItem.clear()
        up_down = zip(range(0, n, 2), range(n - 1, 0, -2))
        func([RecordedItem(i) for i in itertools.chain(*up_down)])
        weave_count = sum(RecordedItem.report())

        tbl.row([label, up_count, down_count, weave_count])
    return tbl
예제 #11
0
def time_results_open(words, output=True, decimals=4):
    """Average time to find a key in growing hashtable_open."""
    sizes = [8192, 16384, 32768, 65536, 131072, 262144, 524288, 1048576]
    widths = [8] + [10] * len(sizes)
    headers = ['N'] + sizes
    tbl = DataTable(widths, headers, output=output, decimals=decimals)

    # Now start with N words to be added into a table of size M.
    # Start at 1000 and work up to 2000
    for num_to_add in [32, 64, 128, 256, 512, 1024, 2048, 4096, 8192, 16384]:
        all_words = words[:num_to_add]

        arow = [num_to_add]
        for size in sizes:
            if num_to_add < size:
                m1 = min(
                    timeit.repeat(stmt='''
table = Hashtable({})
for word in words:
    table.put(word, 99)'''.format(size),
                                  setup='''
from ch03.hashtable_open import Hashtable
words={}'''.format(all_words),
                                  repeat=1,
                                  number=100))
                arow.append((100000.0 * m1) / size)
            else:
                arow.append(SKIP)
        tbl.row(arow)
    return tbl
예제 #12
0
def just_compare_sort_tournament_two(max_k=25, output=True, decimals=2):
    """Very large data sets to investigate whether crossover occurs (no it does not)."""
    tbl = DataTable([15, 10, 15], ['N', 'sorting_two', 'tournament_two'],
                    output=output,
                    decimals=decimals)

    trials = [2**k for k in range(10, max_k)]
    num = 5
    for n in trials:
        m_tt = timeit.timeit(stmt='random.shuffle(x)\ntournament_two(x)',
                             setup='''
import random
from ch01.largest_two import tournament_two
x=list(range({}))'''.format(n),
                             number=num)

        m_st = timeit.timeit(stmt='random.shuffle(x)\nsorting_two(x)',
                             setup='''
import random
from ch01.largest_two import sorting_two
x=list(range({}))'''.format(n),
                             number=num)

        tbl.row([n, m_st, m_tt])

    if output:
        print()
        for header in tbl.labels[1:]:
            print(header, tbl.best_model(header))
    return tbl
예제 #13
0
def run_init_trial(output=True):
    """First Table in chapter 1."""
    n = 100
    tbl = DataTable([12, 12, 12], ['N', 'Ascending', 'Descending'],
                    output=output,
                    decimals=3)

    while n <= 1000000:
        # 1 up to but not including N
        m_up = 1000 * min(
            timeit.repeat(stmt='native_largest(up)',
                          setup='''
from ch01.largest import native_largest
up = list(range(1,{}+1))'''.format(n),
                          repeat=10,
                          number=50)) / 50

        # N down to but not including 0
        m_down = 1000 * min(
            timeit.repeat(stmt='native_largest(down)',
                          setup='''
from ch01.largest import native_largest
down = list(range({}, 0, -1))'''.format(n),
                          repeat=10,
                          number=50)) / 50

        tbl.row([n, m_up, m_down])
        n *= 10
    return tbl
예제 #14
0
def compare_time(words, output=True, decimals=4):
    """Generate table of performance differences with linked hashtable and perfect hashing."""
    tbl = DataTable([8, 8, 8], ['N', 'Linked', 'Perfect'],
                    output=output,
                    decimals=decimals)

    t_perfect = min(
        timeit.repeat(stmt='''
ht = HL()
for w in words:
    ht.put(w,w)''',
                      setup='''
from ch03.hashtable_open_perfect import Hashtable as HL
words={}'''.format(words),
                      repeat=3,
                      number=5)) / 5

    t_linked = min(
        timeit.repeat(stmt='''
ht = HL(len(words))
for w in words:
    ht.put(w,w)''',
                      setup='''
from ch03.hashtable_linked import Hashtable as HL
words={}'''.format(words),
                      repeat=3,
                      number=5)) / 5

    tbl.row([len(words), t_linked, t_perfect])
    return tbl
예제 #15
0
def run_median_trial():
    """Generate table for Median Trial."""
    tbl = DataTable([10, 15, 15], ['N', 'median_time', 'sort_median'])

    trials = [2**k + 1 for k in range(8, 20)]
    for n in trials:
        t_med = 1000 * min(
            timeit.repeat(stmt='assert(linear_median(a) == {}//2)'.format(n),
                          setup='''
import random
from ch01.challenge import linear_median
a = list(range({}))
random.shuffle(a)
'''.format(n),
                          repeat=10,
                          number=5)) / 5

        t_sort = 1000 * min(
            timeit.repeat(stmt='assert(sorted(a)[{0}//2] == {0}//2)'.format(n),
                          setup='''
import random
from ch01.challenge import linear_median
a = list(range({}))
random.shuffle(a)
'''.format(n),
                          repeat=10,
                          number=5)) / 5

        tbl.row([n, t_med, t_sort])

    return tbl
예제 #16
0
def run_median_less_than_trial(max_k=20, output=True):
    """Use RecordedItem to count # of times Less-than invoked up to (but not including) max_k=20."""
    tbl = DataTable([10, 15, 15], ['N', 'median_count', 'sort_median_count'],
                    output=output)
    tbl.format('median_count', ',d')
    tbl.format('sort_median_count', ',d')

    trials = [2**k + 1 for k in range(8, max_k)]
    for n in trials:
        A = list([RecordedItem(i) for i in range(n)])
        random.shuffle(A)

        # Generated external sorted to reuse list
        RecordedItem.clear()
        med2 = sorted(A)[n // 2]
        sort_lt = RecordedItem.report()[1]

        RecordedItem.clear()
        med1 = linear_median(A)
        lin_lt = RecordedItem.report()[1]

        assert med1 == med2

        tbl.row([n, lin_lt, sort_lt])

    return tbl
예제 #17
0
def debug_state(title, G, node_from, dist_to, output=True):
    """Useful to show state of all pairs shortest path."""
    from algs.table import DataTable

    print('debug :', title)
    labels = list(G.nodes())

    tbl = DataTable([6] + [6]*len(labels), ['.'] + labels, output=output)
    tbl.format('.','s')
    for field in labels:
        tbl.format(field, 's')
    for u in labels:
        row = [u]
        for v in labels:
            if node_from[u][v]:
                row.append(node_from[u][v])
            else:
                row.append('.')
        tbl.row(row)
    print()

    tbl_dist_to = DataTable([6] + [6]*len(labels), ['.'] + labels, output=output, decimals=1)
    tbl_dist_to.format('.','s')
    for u in labels:
        row = [u]
        for v in labels:
            if u == v:
                row.append(0)
            else:
                row.append(dist_to[u][v])
        tbl_dist_to.row(row)
    print()
    return (tbl, tbl_dist_to)
예제 #18
0
def run_range_analysis(output=True):
    """Confirm O(log N) algorithm to find range of duplicates."""
    tbl = DataTable([8, 8, 8], ['N', 'O(N)', 'O(log N)'],
                    decimals=7,
                    output=output)

    commands = '''
from random import random
tgt = random()
alist = [tgt] * {0}
for _ in range({0}-{1}):
    alist.append(random())
alist = sorted(alist)
'''
    for n in [2**k for k in range(10, 20)]:
        custom = commands.format(n, n // 16)
        best_times = min(
            timeit.repeat(stmt='best_range(alist, tgt)',
                          setup='''
from ch02.challenge import best_range
{}'''.format(custom),
                          repeat=40,
                          number=50)) / 50
        worst_times = min(
            timeit.repeat(stmt='worst_range(alist, tgt)',
                          setup='''
from ch02.challenge import worst_range
{}'''.format(custom),
                          repeat=40,
                          number=50)) / 50

        tbl.row([n, worst_times, best_times])
예제 #19
0
def visualize_results_floyd_warshall(DG, output=True):
    """Output the node_from and dist_to arrays for floyd-warshall after completion."""
    from ch07.all_pairs_sp import all_pairs_path_to

    (dist_to, node_from) = floyd_warshall(DG)

    if output:
        output_node_from_floyd_warshall(DG, node_from)
        print()
        output_dist_to_floyd_warshall(DG, dist_to)
        print()

    tbl_path = DataTable([20] * DG.number_of_nodes(),
                         list(DG.nodes()),
                         output=output)
    for n in DG.nodes():
        tbl_path.format(n, 's')
    for n in DG.nodes():
        row = []
        for v in DG.nodes():
            if n == v:
                row.append(SKIP)
            else:
                if node_from[n][v]:
                    nodes = all_pairs_path_to(node_from, n, v)
                    row.append(' -> '.join(nodes))
                else:
                    row.append(SKIP)
        tbl_path.row(row)

    if output:
        print()
예제 #20
0
def prime_number_difference(words, output=True, decimals=2):
    """Identify sensitivity of M to being prime or not."""

    from ch03.hashtable_linked import Hashtable as Linked_Hashtable, stats_linked_lists
    from ch03.hashtable_open import Hashtable as Open_Hashtable, stats_open_addressing
    from ch03.base26 import base26

    # these are prime numbers between 428880 and 428980
    lo = 428880
    primes = [428899, 428951, 428957, 428977]
    hi = 428980

    keys = [base26(w) for w in words]
    tbl = DataTable([12, 6, 8, 8, 8, 8],
                    ['M', 'Prime', 'Avg. LL', 'Max LL', 'Avg. OA', 'Max OA'],
                    output=output,
                    decimals=decimals)
    tbl.format('Prime', 's')
    tbl.format('Max LL', 'd')
    tbl.format('Max OA', 'd')
    worst = 0
    worst_m = 0
    for m in range(lo, hi + 1):
        is_p = 'Prime' if m in primes else ''
        ht_linked = Linked_Hashtable(m)
        ht_open = Open_Hashtable(m)

        for k in keys:
            ht_linked.put(k, 1)
            ht_open.put(k, 1)

        (avg_length_linked, max_length_linked) = stats_linked_lists(ht_linked)
        if max_length_linked > worst:
            worst_m = m
            worst = max_length_linked
        (avg_length_open, max_length_open) = stats_open_addressing(ht_open)
        tbl.row([
            m, is_p, avg_length_linked, max_length_linked, avg_length_open,
            max_length_open
        ])

    # Now try to find any more that exceed this maximum amount
    if output:
        print('Worst was {} for M={}'.format(worst, worst_m))
        for m in range(worst_m, worst_m + 10000, 13):
            ht_linked = Linked_Hashtable(m)

            (avg_length_linked,
             max_length_linked) = stats_linked_lists(ht_linked, False)
            if max_length_linked > worst:
                worst_m = m
                worst = max_length_linked
                print('Worst of {} for M={}'.format(worst, worst_m))
        print('Done')

    return tbl
예제 #21
0
def count_collisions(num_rows=0, output=True, decimals=1):
    """Generate table counting collisions."""
    all_words = english_words()
    N = len(all_words)

    from ch03.hashtable_linked import Hashtable as HL
    from ch03.hashtable_linked import stats_linked_lists
    from ch03.hashtable_open import Hashtable as OHL
    from ch03.hashtable_open import stats_open_addressing

    tbl = DataTable([10,8,8,8,8], ['M', 'Avg LL', 'Max LL', 'Avg OA', 'Max OA'],
                    output=output, decimals=decimals)
    tbl.format('Max LL', 'd')
    tbl.format('Max OA', 'd')

    M = 20*N
    hl = HL(M)
    ohl = OHL(M)
    for w in all_words:
        hl.put(w, 1)
        ohl.put(w, 1)
    avg_size_linked = stats_linked_lists(hl)
    avg_size_open = stats_open_addressing(ohl)
    tbl.row([M, avg_size_linked[0], avg_size_linked[1], avg_size_open[0], avg_size_open[1]])

    M = 2*N
    while M > N/16:
        hl = HL(M)
        ohl = OHL(M)
        for w in all_words:
            hl.put(w, 1)
            if M > N:               # otherwise, will fail...
                ohl.put(w, 1)
        avg_size_linked = stats_linked_lists(hl)

        if N < M:
            avg_size_open = stats_open_addressing(ohl)
        else:
            tbl.format('Avg OA', 's')
            tbl.format('Max OA', 's')
            avg_size_open = [SKIP, SKIP]

        num_rows -= 1
        tbl.row([M, avg_size_linked[0], avg_size_linked[1], avg_size_open[0], avg_size_open[1]])

        # Once below threshold, go down at 60% clip
        if M > N:
            M = (M * 95) // 100
        else:
            M = (M * 6) // 10

        # To allow for testing, simple way to break out after a number of rows are generated.
        if num_rows == 0:
            break
    return tbl
예제 #22
0
def generate_hash():
    """Results are different each time since Python salts hash values."""

    s = 'a rose by any other name would smell as sweet'
    tbl = DataTable([8,20,20], ['key', 'hash(key)', 'hash(key) % 15'])
    tbl.format('key', 's')
    tbl.format('hash(key)', 'd')
    tbl.format('hash(key) % 15', 'd')
    for w in s.split():
        tbl.row([w, hash(w), hash(w) % 15])
    return tbl
예제 #23
0
def table_compare_graph_structures(max_k=15, output=True):
    """
    Compare Matrix implementation vs. Adjacency list implementation vs. NetworkX up to
    but not including max_k=15.
    """

    tbl = DataTable([8, 10, 10, 10],
                    ['N', 'NetworkX', 'Adjacency List', 'Adjacency Matrix'],
                    output=output)
    for N in [2**k for k in range(8, max_k)]:
        undirect_mtime = 1000 * min(
            timeit.repeat(stmt='''
total=0
for w in G[0]:
    total += w''',
                          setup='''
from ch07.replacement import UndirectedGraph
G = UndirectedGraph()
G.add_nodes_from(list(range({0})))
for o in range(10):
    G.add_edge(0, {0}-o-1)'''.format(N),
                          repeat=20,
                          number=20))

        networkx_mtime = 1000 * min(
            timeit.repeat(stmt='''
total=0
for w in G[0]:
    total += w''',
                          setup='''
from ch07.replacement import UndirectedGraph
G = UndirectedGraph()
G.add_nodes_from(list(range({0})))
for o in range(10):
    G.add_edge(0, {0}-o-1)'''.format(N),
                          repeat=20,
                          number=20))

        matrix_mtime = 1000 * min(
            timeit.repeat(stmt='''
total=0
for w in G[0]:
    total += w''',
                          setup='''
from ch07.replacement import MatrixUndirectedGraph
G = MatrixUndirectedGraph()
G.add_nodes_from(list(range({0})))
for o in range(10):
    G.add_edge(0, {0}-o-1)'''.format(N),
                          repeat=20,
                          number=20))

        tbl.row([N, networkx_mtime, undirect_mtime, matrix_mtime])
    return tbl
예제 #24
0
def trial_multiple_rotations(output=True, num_attempts=10000):
    """Some trial and error went into these ranges."""
    from ch05.challenge import fib
    tbl = DataTable([6, 6, 6, 6], ['NumRot', 'Height', 'N', 'Random Tree'],
                    output=output)
    tbl.format('Random Tree', 's')
    tbl.format('NumRot', 'd')
    tbl.format('Height', 'd')
    tbl.format('N', 'd')

    for extra in range(3):
        (structure, _) = find_multiple_rotations(extra,
                                                 lo=4,
                                                 hi=40,
                                                 num_attempts=num_attempts,
                                                 output=False)
        n = recreate_tree(structure)

        def count_nodes(n):
            if n is None: return 0
            return 1 + count_nodes(n.left) + count_nodes(n.right)

        tbl.row([extra + 1, n.height, count_nodes(n), structure])

    # Now use Fibonacci Trees to accomplish the same result.
    if output:
        print()
    tbl = DataTable([6, 6, 6, 13], ['NumRot', 'Height', 'N', 'Fib AVL Trees'],
                    output=output)
    tbl.format('Fib AVL Trees', 's')
    tbl.format('NumRot', 'd')
    tbl.format('Height', 'd')
    tbl.format('N', 'd')
    for n in range(6, 14, 2):
        root = fibonacci_avl(n)
        root.compute_height()
        check_avl_property(root)  # double-check
        structure = tree_structure(root)
        bt = ObservableBinaryTree()
        height = root.height
        bt.root = root
        count = count_nodes(root)

        num_rotations = rotations[0]
        to_delete = fib(n + 1) - 1
        bt.remove(to_delete)
        check_avl_property(bt.root)
        num_rotations = rotations[0] - num_rotations

        tbl.row([num_rotations, height, count, structure])

    return tbl
예제 #25
0
def count_hash_incremental_move(output=True, decimals=4):
    """
    For all English words, starting with a hashtable of size 1,024 and
    a load factor of 0.75, count how many times the hash code (i.e., %)
    is invoked.
    """
    from ch03.book import CountableHash
    from ch03.hashtable_linked import DynamicHashtable

    print(
        'Each emitted row contains an operation more costly than any before...'
    )
    ht_dynamic = DynamicHashtable(1023)
    tbl = DataTable([20, 10, 10], ['Word', 'N', 'cost'],
                    output=output,
                    decimals=decimals)
    tbl.format('Word', 's')
    tbl.format('N', ',d')

    max_cost = 0
    now = time.time()
    for w in english_words():
        before = time.time()
        ht_dynamic.put(CountableHash(w), w)
        cost = time.time() - before
        if cost > max_cost:
            max_cost = cost
            tbl.row([w, ht_dynamic.N, cost])
    total_normal = time.time() - now
    print('Normal:{}'.format(total_normal))

    for delta in [512, 256, 128, 64, 32, 16, 8, 4]:
        ht = DynamicHashtableIncrementalResizing(1023, delta=delta)

        tbl = DataTable([20, 10, 10], ['Word', 'N', 'cost'],
                        output=output,
                        decimals=decimals)
        tbl.format('Word', 's')
        tbl.format('N', ',d')

        max_cost = 0
        now = time.time()
        for w in english_words():
            before = time.time()
            ht.put(CountableHash(w), w)
            cost = time.time() - before
            if cost > max_cost:
                max_cost = cost
                tbl.row([w, ht.N, cost])
        total_delta = time.time() - now
        print('delta={}, Normal:{}'.format(delta, total_delta))
예제 #26
0
def output_dist_to_floyd_warshall(DG, dist_to, output=True):
    """Create data table for dist_to."""
    tbl_dt = DataTable([8] * DG.number_of_nodes(),
                       list(DG.nodes()),
                       output=output,
                       decimals=1)
    tbl_dt.format(list(DG.nodes())[0],
                  'f')  #  only first one, since this would have been N in tbl
    for n in DG.nodes():
        row = []
        for v in DG.nodes():
            row.append(dist_to[n][v])
        tbl_dt.row(row)
    return tbl_dt
예제 #27
0
def average_performance(max_n=65536, output=True, decimals=1):
    """Generate table of average performance for different PQ implementations."""
    T = 3
    base = 256
    cutoff = 16384
    high = max_n

    heap = {}
    order_ar = {}
    order_ll = {}
    N = base
    while N <= high:
        order_ll[N] = 1000000*run_trials('ch04.ordered_list', N, T)/(T*N)
        heap[N]     = 1000000*run_trials('ch04.heap', N, T)/(T*N)
        N *= 2

    N = base
    array = {}
    linked = {}
    builtin = {}
    while N <= cutoff:
        order_ar[N]  = 1000000*run_trials('ch04.ordered', N, T)/(T*N)
        linked[N]    = 1000000*run_trials('ch04.linked', N, T)/(T*N)
        array[N]     = 1000000*run_trials('ch04.array', N, T)/(T*N)
        builtin[N]   = 1000000*run_trials('ch04.builtin', N, T)/(T*N)

        N *= 2

    N = base
    tbl = DataTable([8,8,8,8,8,8,8],
                    ['N','Heap','OrderL','Linked','OrderA','Built-in','Array'],
                    output=output, decimals=decimals)
    while N <= high:
        if N <= cutoff:
            tbl.row([N, heap[N], order_ll[N], linked[N], order_ar[N], builtin[N], array[N]])
        else:
            #tbl.set_output(False)
            tbl.row([N, heap[N], order_ll[N]])
        N *= 2

    if output:
        print()
        print('Heap', tbl.best_model('Heap'))
        print('OrderL', tbl.best_model('OrderL'))
        print('Linked', tbl.best_model('Linked'))
        print('OrderA', tbl.best_model('OrderA'))
        print('Built-in', tbl.best_model('Built-in'))
        print('Array', tbl.best_model('Array'))
    return tbl
예제 #28
0
def run_largest_alternate(output=True, decimals=3):
    """Generate tables for largest and alternate."""
    n = 8
    tbl = DataTable([8, 10, 15, 10, 10],
                    ['N', '#Less', '#LessA', 'largest', 'alternate'],
                    output=output,
                    decimals=decimals)
    tbl.format('#Less', ',d')
    tbl.format('#LessA', ',d')

    while n <= 2048:
        ascending = list(range(n))

        largest_up = 1000 * min(
            timeit.repeat(stmt='largest({})'.format(ascending),
                          setup='from ch01.largest import largest',
                          repeat=10,
                          number=50)) / 50
        alternate_up = 1000 * min(
            timeit.repeat(stmt='alternate({})'.format(ascending),
                          setup='from ch01.largest import alternate',
                          repeat=10,
                          number=50)) / 50

        up_count = [RecordedItem(i) for i in range(n)]
        RecordedItem.clear()
        largest(up_count)
        largest_counts = RecordedItem.report()
        RecordedItem.clear()

        up_count = [RecordedItem(i) for i in range(n)]
        RecordedItem.clear()
        alternate(up_count)
        alternate_counts = RecordedItem.report()
        RecordedItem.clear()

        tbl.row([
            n,
            sum(largest_counts),
            sum(alternate_counts), largest_up, alternate_up
        ])

        n *= 2

    if output:
        print()
        print('largest', tbl.best_model('largest', Model.LINEAR))
        print('Alternate', tbl.best_model('alternate', Model.QUADRATIC))
    return tbl
예제 #29
0
def timing_trial(output=True, decimals=3):
    """
    Seek possible crossover between tournament_two() and sorting_two().
    Because of the high memory demands, tournament_two() is always slower than
    sorting_two().
    """
    tbl = DataTable([8,8,8,8,8,8], ['N', 'Sorting', 'Tournament', 'Tourn. Object', 'Tourn. Linked', 'Tourn. Losers'], output=output, decimals=decimals)

    for n in [2 ** k for k in range(10, 24)]:
        st_time = timeit.timeit(stmt='sorting_two(x)', setup='''
import random
from ch01.largest_two import sorting_two
random.seed({0})
x=list(range({0}))
random.shuffle(x)'''.format(n), number=1)

        tt_time = timeit.timeit(stmt='tournament_two(x)', setup='''
import random
from ch01.largest_two import tournament_two
random.seed({0})
x=list(range({0}))
random.shuffle(x)'''.format(n), number=1)

        if n > 1048576:
            tto_time = SKIP
        else:
            tto_time = timeit.timeit(stmt='tournament_two_object(x)', setup='''
import random
from ch01.largest_two import tournament_two_object
random.seed({0})
x=list(range({0}))
random.shuffle(x)'''.format(n), number=1)

        ttl_time = timeit.timeit(stmt='tournament_two_losers(x)', setup='''
import random
from ch01.largest_two import tournament_two_losers
random.seed({0})
x=list(range({0}))
random.shuffle(x)'''.format(n), number=1)

        ttll_time = timeit.timeit(stmt='tournament_two_linked(x)', setup='''
import random
from ch01.largest_two import tournament_two_linked
random.seed({0})
x=list(range({0}))
random.shuffle(x)'''.format(n), number=1)

        tbl.row([n, st_time, tt_time, tto_time, ttll_time, ttl_time])
    return tbl
예제 #30
0
def dag_trials(output=True):
    """Confirm DAG single-source shortest path is O(E+N)."""
    tbl = DataTable([8,10,10],['N', 'Dijkstra', 'Topologic'], output=output)

    for n in [2**k for k in range(2,7)]:
        dijkstra = 1000*min(timeit.repeat(stmt='dijkstra_sp(dg,1)', setup='''
from ch07.challenge import mesh_graph
from ch07.single_source_sp import dijkstra_sp
dg=mesh_graph({})'''.format(n), repeat=20, number=15))/15

        topologic = 1000*min(timeit.repeat(stmt='topological_sp(dg,1)', setup='''
from ch07.challenge import mesh_graph, topological_sp
dg=mesh_graph({})'''.format(n), repeat=20, number=15))/15

        tbl.row([n*n, dijkstra, topologic])