コード例 #1
0
def just_compare_sort_tournament_two(max_k=25, output=True, decimals=2):
    """Very large data sets to investigate whether crossover occurs (no it does not)."""
    tbl = DataTable([15, 10, 15], ['N', 'sorting_two', 'tournament_two'],
                    output=output,
                    decimals=decimals)

    trials = [2**k for k in range(10, max_k)]
    num = 5
    for n in trials:
        m_tt = timeit.timeit(stmt='random.shuffle(x)\ntournament_two(x)',
                             setup='''
import random
from ch01.largest_two import tournament_two
x=list(range({}))'''.format(n),
                             number=num)

        m_st = timeit.timeit(stmt='random.shuffle(x)\nsorting_two(x)',
                             setup='''
import random
from ch01.largest_two import sorting_two
x=list(range({}))'''.format(n),
                             number=num)

        tbl.row([n, m_st, m_tt])

    if output:
        print()
        for header in tbl.labels[1:]:
            print(header, tbl.best_model(header))
    return tbl
コード例 #2
0
def trial_merge_sort_python_style(max_k=15, output=True, decimals=3):
    """Empirical trial for merge sort using slicing."""
    tbl = DataTable([8, 8, 8], ['N', 'merge', 'mergeSlice'],
                    output=output,
                    decimals=decimals)
    for n in [2**k for k in range(8, max_k)]:
        m_slice = 1000 * min(
            timeit.repeat(stmt='slice_merge_sort(A)',
                          setup='''
import random
from ch05.challenge import slice_merge_sort
A=list(range({}))
random.shuffle(A)'''.format(n),
                          repeat=10,
                          number=10))

        m_merge = 1000 * min(
            timeit.repeat(stmt='merge_sort(A)',
                          setup='''
import random
from ch05.merge import merge_sort
A=list(range({}))
random.shuffle(A)'''.format(n),
                          repeat=10,
                          number=10))

        tbl.row([n, m_merge, m_slice])
    return tbl
コード例 #3
0
def run_range_analysis(output=True):
    """Confirm O(log N) algorithm to find range of duplicates."""
    tbl = DataTable([8, 8, 8], ['N', 'O(N)', 'O(log N)'],
                    decimals=7,
                    output=output)

    commands = '''
from random import random
tgt = random()
alist = [tgt] * {0}
for _ in range({0}-{1}):
    alist.append(random())
alist = sorted(alist)
'''
    for n in [2**k for k in range(10, 20)]:
        custom = commands.format(n, n // 16)
        best_times = min(
            timeit.repeat(stmt='best_range(alist, tgt)',
                          setup='''
from ch02.challenge import best_range
{}'''.format(custom),
                          repeat=40,
                          number=50)) / 50
        worst_times = min(
            timeit.repeat(stmt='worst_range(alist, tgt)',
                          setup='''
from ch02.challenge import worst_range
{}'''.format(custom),
                          repeat=40,
                          number=50)) / 50

        tbl.row([n, worst_times, best_times])
コード例 #4
0
def run_median_trial():
    """Generate table for Median Trial."""
    tbl = DataTable([10, 15, 15], ['N', 'median_time', 'sort_median'])

    trials = [2**k + 1 for k in range(8, 20)]
    for n in trials:
        t_med = 1000 * min(
            timeit.repeat(stmt='assert(linear_median(a) == {}//2)'.format(n),
                          setup='''
import random
from ch01.challenge import linear_median
a = list(range({}))
random.shuffle(a)
'''.format(n),
                          repeat=10,
                          number=5)) / 5

        t_sort = 1000 * min(
            timeit.repeat(stmt='assert(sorted(a)[{0}//2] == {0}//2)'.format(n),
                          setup='''
import random
from ch01.challenge import linear_median
a = list(range({}))
random.shuffle(a)
'''.format(n),
                          repeat=10,
                          number=5)) / 5

        tbl.row([n, t_med, t_sort])

    return tbl
コード例 #5
0
def compare_time(words, output=True, decimals=4):
    """Generate table of performance differences with linked hashtable and perfect hashing."""
    tbl = DataTable([8, 8, 8], ['N', 'Linked', 'Perfect'],
                    output=output,
                    decimals=decimals)

    t_perfect = min(
        timeit.repeat(stmt='''
ht = HL()
for w in words:
    ht.put(w,w)''',
                      setup='''
from ch03.hashtable_open_perfect import Hashtable as HL
words={}'''.format(words),
                      repeat=3,
                      number=5)) / 5

    t_linked = min(
        timeit.repeat(stmt='''
ht = HL(len(words))
for w in words:
    ht.put(w,w)''',
                      setup='''
from ch03.hashtable_linked import Hashtable as HL
words={}'''.format(words),
                      repeat=3,
                      number=5)) / 5

    tbl.row([len(words), t_linked, t_perfect])
    return tbl
コード例 #6
0
def table_trials(max_k=15, output=True, decimals=3):
    """Compare Merge Sort against built in Python sort up to, but not including 2**max_k."""
    tbl = DataTable([8, 10, 10], ['N', 'MergeSort', 'Built-In Sort'],
                    output=output,
                    decimals=decimals)

    for n in [2**k for k in range(8, max_k)]:
        msort = 1000 * min(
            timeit.repeat(stmt='merge_sort(x)',
                          setup='''
import random
from ch05.merge import merge_sort
x=list(range({}))
random.shuffle(x)'''.format(n),
                          repeat=20,
                          number=15)) / 15

        builtin = 1000 * min(
            timeit.repeat(stmt='x.sort()',
                          setup='''
import random
x=list(range({}))
random.shuffle(x)'''.format(n),
                          repeat=20,
                          number=15)) / 15

        tbl.row([n, msort, builtin])
    return tbl
コード例 #7
0
def time_results_linked(output=True, decimals=3):
    """Average time to find a key in growing hashtable_open."""

    sizes = [8192, 16384, 32768, 65536, 131072, 262144, 524288, 1048576]
    tbl = DataTable([8] + [8] * len(sizes),
                    ['N'] + [comma(sz) for sz in sizes],
                    output=output,
                    decimals=decimals)
    # Now start with M words to be added into a table of size N.
    # Start at 1000 and work up to 2000
    words = english_words()
    for num_to_add in [32, 64, 128, 256, 512, 1024, 2048, 4096, 8192, 16384]:
        all_words = words[:num_to_add]

        line = [num_to_add]
        for size in sizes:
            time1 = min(
                timeit.repeat(stmt='''
table = Hashtable({})
for word in words:
    table.put(word, 99)'''.format(size),
                              setup='''
from ch03.hashtable_linked import Hashtable
words={}'''.format(all_words),
                              repeat=1,
                              number=100))
            line.append(1000000 * time1 / size)
        tbl.row(line)
    return tbl
コード例 #8
0
def run_init_trial(output=True):
    """First Table in chapter 1."""
    n = 100
    tbl = DataTable([12, 12, 12], ['N', 'Ascending', 'Descending'],
                    output=output,
                    decimals=3)

    while n <= 1000000:
        # 1 up to but not including N
        m_up = 1000 * min(
            timeit.repeat(stmt='native_largest(up)',
                          setup='''
from ch01.largest import native_largest
up = list(range(1,{}+1))'''.format(n),
                          repeat=10,
                          number=50)) / 50

        # N down to but not including 0
        m_down = 1000 * min(
            timeit.repeat(stmt='native_largest(down)',
                          setup='''
from ch01.largest import native_largest
down = list(range({}, 0, -1))'''.format(n),
                          repeat=10,
                          number=50)) / 50

        tbl.row([n, m_up, m_down])
        n *= 10
    return tbl
コード例 #9
0
def time_results_open(words, output=True, decimals=4):
    """Average time to find a key in growing hashtable_open."""
    sizes = [8192, 16384, 32768, 65536, 131072, 262144, 524288, 1048576]
    widths = [8] + [10] * len(sizes)
    headers = ['N'] + sizes
    tbl = DataTable(widths, headers, output=output, decimals=decimals)

    # Now start with N words to be added into a table of size M.
    # Start at 1000 and work up to 2000
    for num_to_add in [32, 64, 128, 256, 512, 1024, 2048, 4096, 8192, 16384]:
        all_words = words[:num_to_add]

        arow = [num_to_add]
        for size in sizes:
            if num_to_add < size:
                m1 = min(
                    timeit.repeat(stmt='''
table = Hashtable({})
for word in words:
    table.put(word, 99)'''.format(size),
                                  setup='''
from ch03.hashtable_open import Hashtable
words={}'''.format(all_words),
                                  repeat=1,
                                  number=100))
                arow.append((100000.0 * m1) / size)
            else:
                arow.append(SKIP)
        tbl.row(arow)
    return tbl
コード例 #10
0
def count_collisions(num_rows=0, output=True, decimals=1):
    """Generate table counting collisions."""
    all_words = english_words()
    N = len(all_words)

    from ch03.hashtable_linked import Hashtable as HL
    from ch03.hashtable_linked import stats_linked_lists
    from ch03.hashtable_open import Hashtable as OHL
    from ch03.hashtable_open import stats_open_addressing

    tbl = DataTable([10,8,8,8,8], ['M', 'Avg LL', 'Max LL', 'Avg OA', 'Max OA'],
                    output=output, decimals=decimals)
    tbl.format('Max LL', 'd')
    tbl.format('Max OA', 'd')

    M = 20*N
    hl = HL(M)
    ohl = OHL(M)
    for w in all_words:
        hl.put(w, 1)
        ohl.put(w, 1)
    avg_size_linked = stats_linked_lists(hl)
    avg_size_open = stats_open_addressing(ohl)
    tbl.row([M, avg_size_linked[0], avg_size_linked[1], avg_size_open[0], avg_size_open[1]])

    M = 2*N
    while M > N/16:
        hl = HL(M)
        ohl = OHL(M)
        for w in all_words:
            hl.put(w, 1)
            if M > N:               # otherwise, will fail...
                ohl.put(w, 1)
        avg_size_linked = stats_linked_lists(hl)

        if N < M:
            avg_size_open = stats_open_addressing(ohl)
        else:
            tbl.format('Avg OA', 's')
            tbl.format('Max OA', 's')
            avg_size_open = [SKIP, SKIP]

        num_rows -= 1
        tbl.row([M, avg_size_linked[0], avg_size_linked[1], avg_size_open[0], avg_size_open[1]])

        # Once below threshold, go down at 60% clip
        if M > N:
            M = (M * 95) // 100
        else:
            M = (M * 6) // 10

        # To allow for testing, simple way to break out after a number of rows are generated.
        if num_rows == 0:
            break
    return tbl
コード例 #11
0
ファイル: book.py プロジェクト: heineman/LearningAlgorithms
def actual_table(output=True):
    """Produce sample table to use for curve fitting."""
    # Sample data
    xvals = [100, 1000, 10000]
    yvals = [0.063, 0.565, 5.946]

    # Coefficients are returned as first argument
    if numpy_error:
        a, b = 0, 0
    else:
        import numpy as np
        from scipy.optimize import curve_fit
        [(a, b), _] = curve_fit(linear_model, np.array(xvals), np.array(yvals))
        if output:
            print('Linear = {}*N + {}'.format(a, b))

        [(qa, qb), _] = curve_fit(quadratic_model, np.array(xvals),
                                  np.array(yvals))
        if output:
            print('Quadratic = {}*N*N + {}*N'.format(qa, qb))

        [(na), _] = curve_fit(n_log_n_model, np.array(xvals), np.array(yvals))
        if output:
            print('N Log N = {}*N*log N'.format(na))

    tbl = DataTable([8, 8, 8], ['N', 'Actual', 'Model'], output=output)

    tbl.row([100, 0.063, linear_model(100, a, b)])
    tbl.row([1000, 0.565, linear_model(1000, a, b)])
    tbl.row([10000, 5.946, linear_model(10000, a, b)])

    print(tbl.pearsonr('Actual', 'Model'))
    return tbl
コード例 #12
0
ファイル: book.py プロジェクト: heineman/LearningAlgorithms
def table_compare_graph_structures(max_k=15, output=True):
    """
    Compare Matrix implementation vs. Adjacency list implementation vs. NetworkX up to
    but not including max_k=15.
    """

    tbl = DataTable([8, 10, 10, 10],
                    ['N', 'NetworkX', 'Adjacency List', 'Adjacency Matrix'],
                    output=output)
    for N in [2**k for k in range(8, max_k)]:
        undirect_mtime = 1000 * min(
            timeit.repeat(stmt='''
total=0
for w in G[0]:
    total += w''',
                          setup='''
from ch07.replacement import UndirectedGraph
G = UndirectedGraph()
G.add_nodes_from(list(range({0})))
for o in range(10):
    G.add_edge(0, {0}-o-1)'''.format(N),
                          repeat=20,
                          number=20))

        networkx_mtime = 1000 * min(
            timeit.repeat(stmt='''
total=0
for w in G[0]:
    total += w''',
                          setup='''
from ch07.replacement import UndirectedGraph
G = UndirectedGraph()
G.add_nodes_from(list(range({0})))
for o in range(10):
    G.add_edge(0, {0}-o-1)'''.format(N),
                          repeat=20,
                          number=20))

        matrix_mtime = 1000 * min(
            timeit.repeat(stmt='''
total=0
for w in G[0]:
    total += w''',
                          setup='''
from ch07.replacement import MatrixUndirectedGraph
G = MatrixUndirectedGraph()
G.add_nodes_from(list(range({0})))
for o in range(10):
    G.add_edge(0, {0}-o-1)'''.format(N),
                          repeat=20,
                          number=20))

        tbl.row([N, networkx_mtime, undirect_mtime, matrix_mtime])
    return tbl
コード例 #13
0
def generate_hash():
    """Results are different each time since Python salts hash values."""

    s = 'a rose by any other name would smell as sweet'
    tbl = DataTable([8,20,20], ['key', 'hash(key)', 'hash(key) % 15'])
    tbl.format('key', 's')
    tbl.format('hash(key)', 'd')
    tbl.format('hash(key) % 15', 'd')
    for w in s.split():
        tbl.row([w, hash(w), hash(w) % 15])
    return tbl
コード例 #14
0
def exercise_triangle_number_probing(output=True, decimals=4):
    """Compare triangle number probing with M=powers of 2."""

    tbl = DataTable([20, 8], ['Type', 'Time to Search'],
                    output=output,
                    decimals=decimals)
    tbl.format('Type', 's')
    timing_oa = min(
        timeit.repeat(stmt='''
for w in words:
    ht.get(w)''',
                      setup='''
from ch03.hashtable_open import Hashtable
from resources.english import english_words
words = english_words()
ht = Hashtable(524288)
for w in words[:160564]:
    ht.put(w,w)''',
                      repeat=7,
                      number=5)) / 5
    tbl.row(['Open Addressing', timing_oa])

    timing_sc = min(
        timeit.repeat(stmt='''
for w in words:
    ht.get(w)''',
                      setup='''
from ch03.hashtable_linked import Hashtable
from resources.english import english_words
words = english_words()
ht = Hashtable(524288)
for w in words[:160564]:
    ht.put(w,w)''',
                      repeat=7,
                      number=5)) / 5
    tbl.row(['Separate Chaining', timing_sc])

    timing_tn = min(
        timeit.repeat(stmt='''
for w in words:
    ht.get(w)''',
                      setup='''
from ch03.challenge import HashtableTriangleNumbers
from resources.english import english_words
words = english_words()
ht = HashtableTriangleNumbers(524288)
for w in words[:160564]:
    ht.put(w,w)''',
                      repeat=7,
                      number=5)) / 5
    tbl.row(['Triangle Probing', timing_tn])
コード例 #15
0
def search_trials():
    """
    For randomly constructed NxN mazes, compute efficiency of searching strategies
    on 512 random mazes, as N grows from 4x4 to 128x128
    """
    import random
    from ch07.maze import to_networkx, distance_to

    tbl = DataTable([8,8,8,8],['N', 'BFS', 'DS', 'GS'], decimals=2)
    for N in [4, 8, 16, 32, 64, 128]:
        num_bfs = 0
        num_dfs = 0
        num_gs = 0
        for i in range(512):
            random.seed(i)
            m = Maze(N,N)
            G = to_networkx(m)

            num_bfs += annotated_bfs_search(G, m.start(), m.end())
            num_dfs += annotated_dfs_search(G, m.start(), m.end())
            num_gs += annotated_guided_search(G, m.start(), m.end(), distance_to)

        tbl.row([N, num_bfs/512, num_dfs/512, num_gs/512])

    tbl = DataTable([8,8,8,8],['N', 'BFS', 'DS', 'GS'], decimals=2)
    for N in [4, 8, 16, 32, 64, 128]:
        m = maze_to_defeat_guided_search(N)
        G = to_networkx(m)

        num_bfs = annotated_bfs_search(G, m.start(), m.end())
        num_dfs = annotated_dfs_search(G, m.start(), m.end())
        num_gs = annotated_guided_search(G, m.start(), m.end(), distance_to)

        tbl.row([N, num_bfs, num_dfs, num_gs])
コード例 #16
0
def worst_heights(max_n=40, output=True):
    """
    Generate random AVL trees of n Nodes to find which ones have greatest height.
    Purely speculative and not definitive exploration of potential trees.
    """
    from ch06.balanced import BinaryTree
    tbl = DataTable([8, 8, 8], ['N', 'WorstHeight', 'NumberFound'],
                    output=output)
    tbl.format('WorstHeight', 'd')
    tbl.format('NumberFound', ',d')
    table_max_height = -1
    for n in range(1, max_n):
        number_found = 0
        max_height = -1
        for _ in range(10001):
            avl = BinaryTree()
            for _ in range(n):
                avl.insert(random.random())
            if avl.root.height > max_height:
                max_height = avl.root.height
                number_found = 0
            elif avl.root.height == max_height:
                number_found += 1

        if max_height > table_max_height:
            tbl.row([n, max_height, number_found])
            table_max_height = max_height
    return tbl
コード例 #17
0
def performance_different_approaches(output=True):
    """Produce results on # less-than for different algorithms and data sets."""
    headers = ['Algorithm', 'Ascending', 'Descending', 'Alternating']
    n = 524288

    tbl = DataTable([15, 10, 10, 10], headers, output=output)
    for hdr in headers:
        tbl.format(hdr, ',d')
    tbl.format('Algorithm', 's')

    # Ascending / Descending / Weave
    from ch01.largest_two import largest_two, sorting_two, double_two, mutable_two, tournament_two
    funcs = [largest_two, sorting_two, double_two, mutable_two, tournament_two]
    algs = [
        'largest_two', 'sorting_two', 'double_two', 'mutable_two',
        'tournament_two'
    ]

    for label, func in zip(algs, funcs):
        RecordedItem.clear()
        func([RecordedItem(i) for i in range(n)])
        up_count = sum(RecordedItem.report())

        RecordedItem.clear()
        func([RecordedItem(i) for i in range(n, 0, -1)])
        down_count = sum(RecordedItem.report())

        RecordedItem.clear()
        up_down = zip(range(0, n, 2), range(n - 1, 0, -2))
        func([RecordedItem(i) for i in itertools.chain(*up_down)])
        weave_count = sum(RecordedItem.report())

        tbl.row([label, up_count, down_count, weave_count])
    return tbl
コード例 #18
0
def run_median_less_than_trial(max_k=20, output=True):
    """Use RecordedItem to count # of times Less-than invoked up to (but not including) max_k=20."""
    tbl = DataTable([10, 15, 15], ['N', 'median_count', 'sort_median_count'],
                    output=output)
    tbl.format('median_count', ',d')
    tbl.format('sort_median_count', ',d')

    trials = [2**k + 1 for k in range(8, max_k)]
    for n in trials:
        A = list([RecordedItem(i) for i in range(n)])
        random.shuffle(A)

        # Generated external sorted to reuse list
        RecordedItem.clear()
        med2 = sorted(A)[n // 2]
        sort_lt = RecordedItem.report()[1]

        RecordedItem.clear()
        med1 = linear_median(A)
        lin_lt = RecordedItem.report()[1]

        assert med1 == med2

        tbl.row([n, lin_lt, sort_lt])

    return tbl
コード例 #19
0
def run_largest_alternate(output=True, decimals=3):
    """Generate tables for largest and alternate."""
    n = 8
    tbl = DataTable([8, 10, 15, 10, 10],
                    ['N', '#Less', '#LessA', 'largest', 'alternate'],
                    output=output,
                    decimals=decimals)
    tbl.format('#Less', ',d')
    tbl.format('#LessA', ',d')

    while n <= 2048:
        ascending = list(range(n))

        largest_up = 1000 * min(
            timeit.repeat(stmt='largest({})'.format(ascending),
                          setup='from ch01.largest import largest',
                          repeat=10,
                          number=50)) / 50
        alternate_up = 1000 * min(
            timeit.repeat(stmt='alternate({})'.format(ascending),
                          setup='from ch01.largest import alternate',
                          repeat=10,
                          number=50)) / 50

        up_count = [RecordedItem(i) for i in range(n)]
        RecordedItem.clear()
        largest(up_count)
        largest_counts = RecordedItem.report()
        RecordedItem.clear()

        up_count = [RecordedItem(i) for i in range(n)]
        RecordedItem.clear()
        alternate(up_count)
        alternate_counts = RecordedItem.report()
        RecordedItem.clear()

        tbl.row([
            n,
            sum(largest_counts),
            sum(alternate_counts), largest_up, alternate_up
        ])

        n *= 2

    if output:
        print()
        print('largest', tbl.best_model('largest', Model.LINEAR))
        print('Alternate', tbl.best_model('alternate', Model.QUADRATIC))
    return tbl
コード例 #20
0
ファイル: timing.py プロジェクト: heineman/LearningAlgorithms
def timing_trial(output=True, decimals=3):
    """
    Seek possible crossover between tournament_two() and sorting_two().
    Because of the high memory demands, tournament_two() is always slower than
    sorting_two().
    """
    tbl = DataTable([8,8,8,8,8,8], ['N', 'Sorting', 'Tournament', 'Tourn. Object', 'Tourn. Linked', 'Tourn. Losers'], output=output, decimals=decimals)

    for n in [2 ** k for k in range(10, 24)]:
        st_time = timeit.timeit(stmt='sorting_two(x)', setup='''
import random
from ch01.largest_two import sorting_two
random.seed({0})
x=list(range({0}))
random.shuffle(x)'''.format(n), number=1)

        tt_time = timeit.timeit(stmt='tournament_two(x)', setup='''
import random
from ch01.largest_two import tournament_two
random.seed({0})
x=list(range({0}))
random.shuffle(x)'''.format(n), number=1)

        if n > 1048576:
            tto_time = SKIP
        else:
            tto_time = timeit.timeit(stmt='tournament_two_object(x)', setup='''
import random
from ch01.largest_two import tournament_two_object
random.seed({0})
x=list(range({0}))
random.shuffle(x)'''.format(n), number=1)

        ttl_time = timeit.timeit(stmt='tournament_two_losers(x)', setup='''
import random
from ch01.largest_two import tournament_two_losers
random.seed({0})
x=list(range({0}))
random.shuffle(x)'''.format(n), number=1)

        ttll_time = timeit.timeit(stmt='tournament_two_linked(x)', setup='''
import random
from ch01.largest_two import tournament_two_linked
random.seed({0})
x=list(range({0}))
random.shuffle(x)'''.format(n), number=1)

        tbl.row([n, st_time, tt_time, tto_time, ttll_time, ttl_time])
    return tbl
コード例 #21
0
def dag_trials(output=True):
    """Confirm DAG single-source shortest path is O(E+N)."""
    tbl = DataTable([8,10,10],['N', 'Dijkstra', 'Topologic'], output=output)

    for n in [2**k for k in range(2,7)]:
        dijkstra = 1000*min(timeit.repeat(stmt='dijkstra_sp(dg,1)', setup='''
from ch07.challenge import mesh_graph
from ch07.single_source_sp import dijkstra_sp
dg=mesh_graph({})'''.format(n), repeat=20, number=15))/15

        topologic = 1000*min(timeit.repeat(stmt='topological_sp(dg,1)', setup='''
from ch07.challenge import mesh_graph, topological_sp
dg=mesh_graph({})'''.format(n), repeat=20, number=15))/15

        tbl.row([n*n, dijkstra, topologic])
コード例 #22
0
def performance_bas(max_k=22, output=True, decimals=3):
    """
    Generate performance tables for binary array search up to (but not including)
    2**max_k.
    """
    # Train on five values...
    trials = [2**k for k in range(5, 12)]
    xvals = []
    yvals = []
    num = 50000
    for n in trials:
        search_time = timeit.timeit(
            stmt='binary_array_search(x, random.randint(0,{}*4))'.format(n),
            setup='''
import random
from ch02.bas import binary_array_search        
x=sorted(random.sample(range({0}*4), {0}))'''.format(n),
            number=num)
        xvals.append(n)
        yvals.append(search_time)

    if numpy_error:
        log_coeff = [0]
    else:
        import numpy as np
        from scipy.optimize import curve_fit
        [log_coeff, _] = curve_fit(log_model, np.array(xvals), np.array(yvals))
        if output:
            print('Log N   = {:.12f}*log2(N)'.format(log_coeff[0]))

    tbl = DataTable([15, 10, 10], ['N', 'T(N)', 'Model'],
                    output=output,
                    decimals=decimals)
    trials = [2**k for k in range(5, max_k)]
    for n in trials:
        search_time = timeit.timeit(
            stmt='binary_array_search(x, random.randint(0,{}*2))'.format(n),
            setup='''
import random
from ch02.bas import binary_array_search        
x=sorted(random.sample(range({0}*4), {0}))'''.format(n),
            number=num)

        tbl.row([n, search_time, log_model(n, log_coeff[0])])

    return tbl
コード例 #23
0
def prime_number_difference(words, output=True, decimals=2):
    """Identify sensitivity of M to being prime or not."""

    from ch03.hashtable_linked import Hashtable as Linked_Hashtable, stats_linked_lists
    from ch03.hashtable_open import Hashtable as Open_Hashtable, stats_open_addressing
    from ch03.base26 import base26

    # these are prime numbers between 428880 and 428980
    lo = 428880
    primes = [428899, 428951, 428957, 428977]
    hi = 428980

    keys = [base26(w) for w in words]
    tbl = DataTable([12, 6, 8, 8, 8, 8],
                    ['M', 'Prime', 'Avg. LL', 'Max LL', 'Avg. OA', 'Max OA'],
                    output=output,
                    decimals=decimals)
    tbl.format('Prime', 's')
    tbl.format('Max LL', 'd')
    tbl.format('Max OA', 'd')
    worst = 0
    worst_m = 0
    for m in range(lo, hi + 1):
        is_p = 'Prime' if m in primes else ''
        ht_linked = Linked_Hashtable(m)
        ht_open = Open_Hashtable(m)

        for k in keys:
            ht_linked.put(k, 1)
            ht_open.put(k, 1)

        (avg_length_linked, max_length_linked) = stats_linked_lists(ht_linked)
        if max_length_linked > worst:
            worst_m = m
            worst = max_length_linked
        (avg_length_open, max_length_open) = stats_open_addressing(ht_open)
        tbl.row([
            m, is_p, avg_length_linked, max_length_linked, avg_length_open,
            max_length_open
        ])

    # Now try to find any more that exceed this maximum amount
    if output:
        print('Worst was {} for M={}'.format(worst, worst_m))
        for m in range(worst_m, worst_m + 10000, 13):
            ht_linked = Linked_Hashtable(m)

            (avg_length_linked,
             max_length_linked) = stats_linked_lists(ht_linked, False)
            if max_length_linked > worst:
                worst_m = m
                worst = max_length_linked
                print('Worst of {} for M={}'.format(worst, worst_m))
        print('Done')

    return tbl
コード例 #24
0
def run_max_sort_worst_case(max_k=14, output=True, decimals=4):
    """Generate table for max sort up to (but not including 2**max_k)."""
    xvals = []
    yvals = []
    for n in [2**k for k in range(5, 12)]:
        sort_time = timeit.timeit(stmt='max_sort(x)',
                                  setup='''
from ch02.challenge import max_sort
import random
x=list(range({},0,-1))
random.shuffle(x)'''.format(n),
                                  number=10)
        xvals.append(n)
        yvals.append(sort_time)

    if numpy_error:
        quadratic_coeff = [0, 0]
    else:
        import numpy as np
        from scipy.optimize import curve_fit
        [quadratic_coeff, _] = curve_fit(quadratic_model, np.array(xvals),
                                         np.array(yvals))
        if output:
            print('Quadratic N  = {:.12f}*N*N + {:.12f}*N'.format(
                quadratic_coeff[0], quadratic_coeff[1]))

    tbl = DataTable([8, 8, 8], ['N', 'MaxSort', 'Model'],
                    output=output,
                    decimals=decimals)

    for n in [2**k for k in range(5, max_k)]:
        sort_time = timeit.timeit(stmt='max_sort(x)',
                                  setup='''
from ch02.challenge import max_sort
import random
x=list(range({},0,-1))
random.shuffle(x)'''.format(n),
                                  number=10)
        tbl.row([
            n, sort_time,
            quadratic_model(n, quadratic_coeff[0], quadratic_coeff[1])
        ])

    return tbl
コード例 #25
0
def generate_list_table(max_k=21, output=True, decimals=3):
    """
    Generate table showing O(N) behavior of Python 'list' structure on insert for
    lists up to (but not including) 2**max_k
    """
    tbl = DataTable([8, 8, 8, 8, 8],
                    ['N', 'Prepend', 'Remove', 'Append', 'Tree'],
                    output=output,
                    decimals=decimals)

    for n in [2**k for k in range(10, max_k)]:
        tbl.row([
            n,
            run_trials_prepend(n, 1000),
            run_trials_remove(n, 1000),
            run_trials_append(n, 1000),
            run_trials_tree(n, 1000)
        ])
    return tbl
コード例 #26
0
def incremental_multiplication(output=True):
    """
    Compute results for multiplying large numbers.
    This takes several hours to run if you increment by 1. Instead, check powers of 2.
    """
    num = 1000
    tbl = DataTable([8, 8, 8], ['N', 'Min Mult', 'Max Mult'],
                    decimals=5,
                    output=output)
    for n in [2**k for k in range(3, 12)]:
        all_times = timeit.repeat(stmt='idx += 1\nmult_pair(pairs[idx])',
                                  setup='''
from ch02.mult import create_random_pair, mult_pair
idx = -1 
pairs = [create_random_pair({}) for _ in range({})]'''.format(n, num),
                                  repeat=20,
                                  number=num)
        tbl.row([n, min(all_times), max(all_times)])
    return tbl
コード例 #27
0
def compare_avl_pq_with_heap_pq(max_k=16, output=True, decimals=2):
    """Generate times for comparing values."""
    tbl = DataTable([8, 10, 10], ['N', 'Heap-pq', 'AVL-pq'],
                    output=output,
                    decimals=decimals)
    repeat = 25
    num = 10

    for n in [2**k for k in range(10, max_k)]:
        t_heap_pq = min(
            timeit.repeat(stmt='''
random.seed(11)
pq = PQ({0})
for _ in range({0}):
    r = random.random()
    pq.enqueue(r,r)
while pq:
    pq.dequeue()'''.format(n),
                          setup='''
from ch04.heap import PQ
import random''',
                          repeat=repeat,
                          number=num)) / num

        t_avl_pq = min(
            timeit.repeat(stmt='''
random.seed(11)
pq = PQ()
for _ in range({0}):
    r = random.random()
    pq.enqueue(r,r)
while pq:
    pq.dequeue()'''.format(n),
                          setup='''
from ch06.pq import PQ
import random''',
                          repeat=repeat,
                          number=num)) / num

        tbl.row([n, t_heap_pq, t_avl_pq])

    return tbl
コード例 #28
0
def combined_sorted(lo=8, hi=12, output=True):
    """Generate results for different sorting trials."""
    tbl = DataTable([8] * (hi - lo + 1),
                    ['N'] + [comma(2**k) for k in range(lo, hi)],
                    output=output)

    for n in [2**k for k in range(lo, hi)]:
        row = [n]
        for m in [2**k for k in range(lo, hi)]:
            row.append(run_merge_trial(m, n))
        tbl.row(row)

    # Diagonal values are for 2*M*log(M) so divide in HALF for accurate one
    # build model ONLY for first five values
    x = [2**k for k in range(lo, min(lo + 5, hi))]
    y = [
        tbl.entry(r, comma(r))
        for r in [2**k for k in range(lo, min(lo + 5, hi))]
    ]
    if numpy_error:
        a = 0
    else:
        import numpy as np
        from scipy.optimize import curve_fit
        from scipy.stats.stats import pearsonr

        (coeffs, _) = curve_fit(n_log_n_model, np.array(x), np.array(y))
        a = coeffs[0] / 2

        y_fit = [
            n_log_n_model(r, a)
            for r in [2**k for k in range(lo, min(lo + 5, hi))]
        ]

        print()
        print(pearsonr(y, y_fit))
        print()
        print('Prediction')
        model = DataTable([8] * (hi - lo + 1),
                          ['N'] + [comma(2**k) for k in range(lo, hi)],
                          output=output)
        for n in [2**k for k in range(lo, hi)]:
            row = [n]
            for m in [2**k for k in range(lo, hi)]:
                row.append(n_log_n_model(n, a) + n_log_n_model(m, a))
            model.row(row)
    return tbl
コード例 #29
0
ファイル: timing.py プロジェクト: heineman/LearningAlgorithms
def trial_factorial_heap(max_n=32768, output=True, decimals=2):
    """
    Generate trial using factorial heap compared with regular heap up to but not including max_n
    """
    factor = 3
    base = 256
    high = max_n

    tbl = DataTable([10, 8, 8], ['N', 'Heap', 'FactHeap'],
                    output=output,
                    decimals=decimals)
    N = base
    while N < high:
        heap = 1000000 * run_trials('ch04.heap', N, factor) / (factor * N)
        fheap = 1000000 * run_trials('ch04.factorial_heap', N,
                                     factor) / (factor * N)
        tbl.row([N, heap, fheap])

        N *= 2
    return tbl
コード例 #30
0
def bad_timing(words, size=50000, output=True):
    """Statistics on hashtables."""
    from ch03.hashtable_linked import Hashtable, stats_linked_lists

    tbl = DataTable([8, 10, 10], ['Type', 'Avg. Len', 'Max Len'],
                    output=output)
    tbl.format('Type', 's')
    tbl.format('Max Len', 'd')
    good_ht = Hashtable(size)
    bad_ht = Hashtable(size)

    for w in words:
        good_ht.put(w, True)
        bad_ht.put(ValueBadHash(w), True)

    good = stats_linked_lists(good_ht)
    tbl.row(['Good', good[0], good[1]])
    bad = stats_linked_lists(bad_ht)
    tbl.row(['Bad', bad[0], bad[1]])
    return tbl