Python DataTable.DataTable Exemples, algs.table.DataTable.DataTable Python Exemples

Exemple #1

0

Afficher le fichier

def search_trials():
    """
    For randomly constructed NxN mazes, compute efficiency of searching strategies
    on 512 random mazes, as N grows from 4x4 to 128x128
    """
    import random
    from ch07.maze import to_networkx, distance_to

    tbl = DataTable([8,8,8,8],['N', 'BFS', 'DS', 'GS'], decimals=2)
    for N in [4, 8, 16, 32, 64, 128]:
        num_bfs = 0
        num_dfs = 0
        num_gs = 0
        for i in range(512):
            random.seed(i)
            m = Maze(N,N)
            G = to_networkx(m)

            num_bfs += annotated_bfs_search(G, m.start(), m.end())
            num_dfs += annotated_dfs_search(G, m.start(), m.end())
            num_gs += annotated_guided_search(G, m.start(), m.end(), distance_to)

        tbl.row([N, num_bfs/512, num_dfs/512, num_gs/512])

    tbl = DataTable([8,8,8,8],['N', 'BFS', 'DS', 'GS'], decimals=2)
    for N in [4, 8, 16, 32, 64, 128]:
        m = maze_to_defeat_guided_search(N)
        G = to_networkx(m)

        num_bfs = annotated_bfs_search(G, m.start(), m.end())
        num_dfs = annotated_dfs_search(G, m.start(), m.end())
        num_gs = annotated_guided_search(G, m.start(), m.end(), distance_to)

        tbl.row([N, num_bfs, num_dfs, num_gs])

Exemple #2

0

Afficher le fichier

Fichier : all_pairs_sp.py Projet : heineman/LearningAlgorithms

def debug_state(title, G, node_from, dist_to, output=True):
    """Useful to show state of all pairs shortest path."""
    from algs.table import DataTable

    print('debug :', title)
    labels = list(G.nodes())

    tbl = DataTable([6] + [6]*len(labels), ['.'] + labels, output=output)
    tbl.format('.','s')
    for field in labels:
        tbl.format(field, 's')
    for u in labels:
        row = [u]
        for v in labels:
            if node_from[u][v]:
                row.append(node_from[u][v])
            else:
                row.append('.')
        tbl.row(row)
    print()

    tbl_dist_to = DataTable([6] + [6]*len(labels), ['.'] + labels, output=output, decimals=1)
    tbl_dist_to.format('.','s')
    for u in labels:
        row = [u]
        for v in labels:
            if u == v:
                row.append(0)
            else:
                row.append(dist_to[u][v])
        tbl_dist_to.row(row)
    print()
    return (tbl, tbl_dist_to)

Exemple #3

0

Afficher le fichier

Fichier : challenge.py Projet : heineman/LearningAlgorithms

def trial_multiple_rotations(output=True, num_attempts=10000):
    """Some trial and error went into these ranges."""
    from ch05.challenge import fib
    tbl = DataTable([6, 6, 6, 6], ['NumRot', 'Height', 'N', 'Random Tree'],
                    output=output)
    tbl.format('Random Tree', 's')
    tbl.format('NumRot', 'd')
    tbl.format('Height', 'd')
    tbl.format('N', 'd')

    for extra in range(3):
        (structure, _) = find_multiple_rotations(extra,
                                                 lo=4,
                                                 hi=40,
                                                 num_attempts=num_attempts,
                                                 output=False)
        n = recreate_tree(structure)

        def count_nodes(n):
            if n is None: return 0
            return 1 + count_nodes(n.left) + count_nodes(n.right)

        tbl.row([extra + 1, n.height, count_nodes(n), structure])

    # Now use Fibonacci Trees to accomplish the same result.
    if output:
        print()
    tbl = DataTable([6, 6, 6, 13], ['NumRot', 'Height', 'N', 'Fib AVL Trees'],
                    output=output)
    tbl.format('Fib AVL Trees', 's')
    tbl.format('NumRot', 'd')
    tbl.format('Height', 'd')
    tbl.format('N', 'd')
    for n in range(6, 14, 2):
        root = fibonacci_avl(n)
        root.compute_height()
        check_avl_property(root)  # double-check
        structure = tree_structure(root)
        bt = ObservableBinaryTree()
        height = root.height
        bt.root = root
        count = count_nodes(root)

        num_rotations = rotations[0]
        to_delete = fib(n + 1) - 1
        bt.remove(to_delete)
        check_avl_property(bt.root)
        num_rotations = rotations[0] - num_rotations

        tbl.row([num_rotations, height, count, structure])

    return tbl

Exemple #4

0

Afficher le fichier

def count_hash_incremental_move(output=True, decimals=4):
    """
    For all English words, starting with a hashtable of size 1,024 and
    a load factor of 0.75, count how many times the hash code (i.e., %)
    is invoked.
    """
    from ch03.book import CountableHash
    from ch03.hashtable_linked import DynamicHashtable

    print(
        'Each emitted row contains an operation more costly than any before...'
    )
    ht_dynamic = DynamicHashtable(1023)
    tbl = DataTable([20, 10, 10], ['Word', 'N', 'cost'],
                    output=output,
                    decimals=decimals)
    tbl.format('Word', 's')
    tbl.format('N', ',d')

    max_cost = 0
    now = time.time()
    for w in english_words():
        before = time.time()
        ht_dynamic.put(CountableHash(w), w)
        cost = time.time() - before
        if cost > max_cost:
            max_cost = cost
            tbl.row([w, ht_dynamic.N, cost])
    total_normal = time.time() - now
    print('Normal:{}'.format(total_normal))

    for delta in [512, 256, 128, 64, 32, 16, 8, 4]:
        ht = DynamicHashtableIncrementalResizing(1023, delta=delta)

        tbl = DataTable([20, 10, 10], ['Word', 'N', 'cost'],
                        output=output,
                        decimals=decimals)
        tbl.format('Word', 's')
        tbl.format('N', ',d')

        max_cost = 0
        now = time.time()
        for w in english_words():
            before = time.time()
            ht.put(CountableHash(w), w)
            cost = time.time() - before
            if cost > max_cost:
                max_cost = cost
                tbl.row([w, ht.N, cost])
        total_delta = time.time() - now
        print('delta={}, Normal:{}'.format(delta, total_delta))

Exemple #5

0

Afficher le fichier

Fichier : book.py Projet : heineman/LearningAlgorithms

def actual_table(output=True):
    """Produce sample table to use for curve fitting."""
    # Sample data
    xvals = [100, 1000, 10000]
    yvals = [0.063, 0.565, 5.946]

    # Coefficients are returned as first argument
    if numpy_error:
        a, b = 0, 0
    else:
        import numpy as np
        from scipy.optimize import curve_fit
        [(a, b), _] = curve_fit(linear_model, np.array(xvals), np.array(yvals))
        if output:
            print('Linear = {}*N + {}'.format(a, b))

        [(qa, qb), _] = curve_fit(quadratic_model, np.array(xvals),
                                  np.array(yvals))
        if output:
            print('Quadratic = {}*N*N + {}*N'.format(qa, qb))

        [(na), _] = curve_fit(n_log_n_model, np.array(xvals), np.array(yvals))
        if output:
            print('N Log N = {}*N*log N'.format(na))

    tbl = DataTable([8, 8, 8], ['N', 'Actual', 'Model'], output=output)

    tbl.row([100, 0.063, linear_model(100, a, b)])
    tbl.row([1000, 0.565, linear_model(1000, a, b)])
    tbl.row([10000, 5.946, linear_model(10000, a, b)])

    print(tbl.pearsonr('Actual', 'Model'))
    return tbl

Exemple #6

0

Afficher le fichier

Fichier : challenge.py Projet : heineman/LearningAlgorithms

def trial_merge_sort_python_style(max_k=15, output=True, decimals=3):
    """Empirical trial for merge sort using slicing."""
    tbl = DataTable([8, 8, 8], ['N', 'merge', 'mergeSlice'],
                    output=output,
                    decimals=decimals)
    for n in [2**k for k in range(8, max_k)]:
        m_slice = 1000 * min(
            timeit.repeat(stmt='slice_merge_sort(A)',
                          setup='''
import random
from ch05.challenge import slice_merge_sort
A=list(range({}))
random.shuffle(A)'''.format(n),
                          repeat=10,
                          number=10))

        m_merge = 1000 * min(
            timeit.repeat(stmt='merge_sort(A)',
                          setup='''
import random
from ch05.merge import merge_sort
A=list(range({}))
random.shuffle(A)'''.format(n),
                          repeat=10,
                          number=10))

        tbl.row([n, m_merge, m_slice])
    return tbl

Exemple #7

0

Afficher le fichier

Fichier : challenge.py Projet : heineman/LearningAlgorithms

def insertion_sort_bas(max_k=18, output=True, decimals=3):
    """Generate Table for Insertion Sort."""
    # Evaluate prototype execution
    x = []
    y = []
    for n in [2**k for k in range(8, 12)]:
        m_insert_bas = 1000 * min(
            timeit.repeat(stmt='insertion_sort_bas(A)',
                          setup='''
import random
from ch05.sorting import insertion_sort_bas
A=list(range({}))
random.shuffle(A)'''.format(n),
                          repeat=10,
                          number=10))
        x.append(n)
        y.append(m_insert_bas)

    # Coefficients are returned as first argument
    if numpy_error:
        log_coeffs = quadratic_coeffs = [0, 0]
    else:
        import numpy as np
        from scipy.optimize import curve_fit
        [log_coeffs, _] = curve_fit(n_log_n_model, np.array(x), np.array(y))
        [quadratic_coeffs, _] = curve_fit(quadratic_model, np.array(x),
                                          np.array(y))

    if output:
        print('Quadratic = {}*N*N + {}*N'.format(quadratic_coeffs[0],
                                                 quadratic_coeffs[1]))
        print('Log       = {:.12f}*N*log2(N)'.format(log_coeffs[0]))
        print()

    tbl = DataTable([12, 10, 10, 10], ['N', 'Time', 'Quad', 'Log'],
                    output=output,
                    decimals=decimals)
    for n, p in zip(x, y):
        tbl.row([
            n, p,
            quadratic_model(n, quadratic_coeffs[0], quadratic_coeffs[1]),
            n_log_n_model(n, log_coeffs[0])
        ])

    for n in [2**k for k in range(12, max_k)]:
        m_insert_bas = 1000 * min(
            timeit.repeat(stmt='insertion_sort_bas(A)',
                          setup='''
import random
from ch05.sorting import insertion_sort_bas
A=list(range({}))
random.shuffle(A)'''.format(n),
                          repeat=10,
                          number=10))
        tbl.row([
            n, m_insert_bas,
            quadratic_model(n, quadratic_coeffs[0], quadratic_coeffs[1]),
            n_log_n_model(n, log_coeffs[0])
        ])
    return tbl

Exemple #8

0

Afficher le fichier

Fichier : challenge.py Projet : heineman/LearningAlgorithms

def worst_heights(max_n=40, output=True):
    """
    Generate random AVL trees of n Nodes to find which ones have greatest height.
    Purely speculative and not definitive exploration of potential trees.
    """
    from ch06.balanced import BinaryTree
    tbl = DataTable([8, 8, 8], ['N', 'WorstHeight', 'NumberFound'],
                    output=output)
    tbl.format('WorstHeight', 'd')
    tbl.format('NumberFound', ',d')
    table_max_height = -1
    for n in range(1, max_n):
        number_found = 0
        max_height = -1
        for _ in range(10001):
            avl = BinaryTree()
            for _ in range(n):
                avl.insert(random.random())
            if avl.root.height > max_height:
                max_height = avl.root.height
                number_found = 0
            elif avl.root.height == max_height:
                number_found += 1

        if max_height > table_max_height:
            tbl.row([n, max_height, number_found])
            table_max_height = max_height
    return tbl

Exemple #9

0

Afficher le fichier

def table_trials(max_k=15, output=True, decimals=3):
    """Compare Merge Sort against built in Python sort up to, but not including 2**max_k."""
    tbl = DataTable([8, 10, 10], ['N', 'MergeSort', 'Built-In Sort'],
                    output=output,
                    decimals=decimals)

    for n in [2**k for k in range(8, max_k)]:
        msort = 1000 * min(
            timeit.repeat(stmt='merge_sort(x)',
                          setup='''
import random
from ch05.merge import merge_sort
x=list(range({}))
random.shuffle(x)'''.format(n),
                          repeat=20,
                          number=15)) / 15

        builtin = 1000 * min(
            timeit.repeat(stmt='x.sort()',
                          setup='''
import random
x=list(range({}))
random.shuffle(x)'''.format(n),
                          repeat=20,
                          number=15)) / 15

        tbl.row([n, msort, builtin])
    return tbl

Exemple #10

0

Afficher le fichier

def run_access_trials(max_trials=100000, output=True, decimals=5):
    """Generate performance table for up to max_trials number of runs."""
    tbl = DataTable([10, 10, 10], ['Dict', 'Raw', 'BAS'],
                    output=output,
                    decimals=decimals)
    tbl.format('Dict', 'f')

    m1 = min(
        timeit.repeat(stmt='days_in_month[s_data[2]]',
                      setup='from ch03.months import s_data, days_in_month',
                      repeat=10,
                      number=max_trials))

    m2 = min(
        timeit.repeat(stmt='days_mixed(s_data[2])',
                      setup='from ch03.months import s_data, days_mixed',
                      repeat=10,
                      number=max_trials))

    m3 = min(
        timeit.repeat(stmt='days_bas(s_data[2])',
                      setup='from ch03.months import s_data, days_bas',
                      repeat=10,
                      number=max_trials))
    tbl.row([m1, m2, m3])
    return tbl

Exemple #11

0

Afficher le fichier

def time_results_linked(output=True, decimals=3):
    """Average time to find a key in growing hashtable_open."""

    sizes = [8192, 16384, 32768, 65536, 131072, 262144, 524288, 1048576]
    tbl = DataTable([8] + [8] * len(sizes),
                    ['N'] + [comma(sz) for sz in sizes],
                    output=output,
                    decimals=decimals)
    # Now start with M words to be added into a table of size N.
    # Start at 1000 and work up to 2000
    words = english_words()
    for num_to_add in [32, 64, 128, 256, 512, 1024, 2048, 4096, 8192, 16384]:
        all_words = words[:num_to_add]

        line = [num_to_add]
        for size in sizes:
            time1 = min(
                timeit.repeat(stmt='''
table = Hashtable({})
for word in words:
    table.put(word, 99)'''.format(size),
                              setup='''
from ch03.hashtable_linked import Hashtable
words={}'''.format(all_words),
                              repeat=1,
                              number=100))
            line.append(1000000 * time1 / size)
        tbl.row(line)
    return tbl

Exemple #12

0

Afficher le fichier

Fichier : book.py Projet : heineman/LearningAlgorithms

def algorithms_x_y():
    """Generate table for estimates of time for three computers and two algorithms."""
    def alg_x(n):
        """Number of operations for algorithm X."""
        return 5 * n

    def alg_y(n):
        """Number of operations for algorithm Y."""
        return 2020 * math.log(n) / math.log(2)

    tbl = DataTable([15, 15, 8, 8, 8, 8, 8],
                    ['N', 'X', 'Y', 'X_slow', 'X_fast', 'Y_fast', 'X_fastest'],
                    decimals=1)
    tbl.format('X', ',d')
    tbl.format('Y', ',d')
    for n in [2**k for k in range(2, 24)]:
        tbl.row([
            n,
            alg_x(n),
            int(alg_y(n)),
            alg_x(n) / 1500,
            alg_x(n) / 3000,
            alg_y(n) / 1500,
            alg_x(n) / (250 * 3000)
        ])
    return tbl

Exemple #13

0

Afficher le fichier

Fichier : challenge.py Projet : heineman/LearningAlgorithms

def another_fragment_counting(max_k=20, output=True):
    """Generate table for counts of fragments up to (but including) 2**max_k."""
    if numpy_error:
        a = 0, 0
    else:
        import numpy as np
        from scipy.optimize import curve_fit

        def log_log_model(n, a):
            """Formula for A*Log_2(Log_2(N)) with single coefficient."""
            logn = np.log2(n)
            return a * np.log2(logn)

        # Train Model
        trials = [2**k for k in range(5, 15)]
        nvals = []
        yvals = []
        for N in trials:
            nvals.append(N)
            yvals.append(f4(N))

        [a, _] = curve_fit(log_log_model, np.array(nvals), np.array(yvals))
        if output:
            print('LOG_LOG_MODEL = {}*log(log(N))'.format(a))

    trials = [2**k for k in range(5, max_k)]
    tbl = DataTable([8, 8, 8], ['N', 'F4', 'Model'], output=output)
    tbl.format('F4', 'd')
    for N in trials:
        tbl.row([N, f4(N), a[0] * math.log2(math.log2(N))])
    return tbl

Exemple #14

0

Afficher le fichier

Fichier : challenge.py Projet : heineman/LearningAlgorithms

def run_median_less_than_trial(max_k=20, output=True):
    """Use RecordedItem to count # of times Less-than invoked up to (but not including) max_k=20."""
    tbl = DataTable([10, 15, 15], ['N', 'median_count', 'sort_median_count'],
                    output=output)
    tbl.format('median_count', ',d')
    tbl.format('sort_median_count', ',d')

    trials = [2**k + 1 for k in range(8, max_k)]
    for n in trials:
        A = list([RecordedItem(i) for i in range(n)])
        random.shuffle(A)

        # Generated external sorted to reuse list
        RecordedItem.clear()
        med2 = sorted(A)[n // 2]
        sort_lt = RecordedItem.report()[1]

        RecordedItem.clear()
        med1 = linear_median(A)
        lin_lt = RecordedItem.report()[1]

        assert med1 == med2

        tbl.row([n, lin_lt, sort_lt])

    return tbl

Exemple #15

0

Afficher le fichier

def performance_different_approaches(output=True):
    """Produce results on # less-than for different algorithms and data sets."""
    headers = ['Algorithm', 'Ascending', 'Descending', 'Alternating']
    n = 524288

    tbl = DataTable([15, 10, 10, 10], headers, output=output)
    for hdr in headers:
        tbl.format(hdr, ',d')
    tbl.format('Algorithm', 's')

    # Ascending / Descending / Weave
    from ch01.largest_two import largest_two, sorting_two, double_two, mutable_two, tournament_two
    funcs = [largest_two, sorting_two, double_two, mutable_two, tournament_two]
    algs = [
        'largest_two', 'sorting_two', 'double_two', 'mutable_two',
        'tournament_two'
    ]

    for label, func in zip(algs, funcs):
        RecordedItem.clear()
        func([RecordedItem(i) for i in range(n)])
        up_count = sum(RecordedItem.report())

        RecordedItem.clear()
        func([RecordedItem(i) for i in range(n, 0, -1)])
        down_count = sum(RecordedItem.report())

        RecordedItem.clear()
        up_down = zip(range(0, n, 2), range(n - 1, 0, -2))
        func([RecordedItem(i) for i in itertools.chain(*up_down)])
        weave_count = sum(RecordedItem.report())

        tbl.row([label, up_count, down_count, weave_count])
    return tbl

Exemple #16

0

Afficher le fichier

Fichier : book.py Projet : heineman/LearningAlgorithms

def visualize_results_floyd_warshall(DG, output=True):
    """Output the node_from and dist_to arrays for floyd-warshall after completion."""
    from ch07.all_pairs_sp import all_pairs_path_to

    (dist_to, node_from) = floyd_warshall(DG)

    if output:
        output_node_from_floyd_warshall(DG, node_from)
        print()
        output_dist_to_floyd_warshall(DG, dist_to)
        print()

    tbl_path = DataTable([20] * DG.number_of_nodes(),
                         list(DG.nodes()),
                         output=output)
    for n in DG.nodes():
        tbl_path.format(n, 's')
    for n in DG.nodes():
        row = []
        for v in DG.nodes():
            if n == v:
                row.append(SKIP)
            else:
                if node_from[n][v]:
                    nodes = all_pairs_path_to(node_from, n, v)
                    row.append(' -> '.join(nodes))
                else:
                    row.append(SKIP)
        tbl_path.row(row)

    if output:
        print()

Exemple #17

0

Afficher le fichier

def just_compare_sort_tournament_two(max_k=25, output=True, decimals=2):
    """Very large data sets to investigate whether crossover occurs (no it does not)."""
    tbl = DataTable([15, 10, 15], ['N', 'sorting_two', 'tournament_two'],
                    output=output,
                    decimals=decimals)

    trials = [2**k for k in range(10, max_k)]
    num = 5
    for n in trials:
        m_tt = timeit.timeit(stmt='random.shuffle(x)\ntournament_two(x)',
                             setup='''
import random
from ch01.largest_two import tournament_two
x=list(range({}))'''.format(n),
                             number=num)

        m_st = timeit.timeit(stmt='random.shuffle(x)\nsorting_two(x)',
                             setup='''
import random
from ch01.largest_two import sorting_two
x=list(range({}))'''.format(n),
                             number=num)

        tbl.row([n, m_st, m_tt])

    if output:
        print()
        for header in tbl.labels[1:]:
            print(header, tbl.best_model(header))
    return tbl

Exemple #18

0

Afficher le fichier

def run_init_trial(output=True):
    """First Table in chapter 1."""
    n = 100
    tbl = DataTable([12, 12, 12], ['N', 'Ascending', 'Descending'],
                    output=output,
                    decimals=3)

    while n <= 1000000:
        # 1 up to but not including N
        m_up = 1000 * min(
            timeit.repeat(stmt='native_largest(up)',
                          setup='''
from ch01.largest import native_largest
up = list(range(1,{}+1))'''.format(n),
                          repeat=10,
                          number=50)) / 50

        # N down to but not including 0
        m_down = 1000 * min(
            timeit.repeat(stmt='native_largest(down)',
                          setup='''
from ch01.largest import native_largest
down = list(range({}, 0, -1))'''.format(n),
                          repeat=10,
                          number=50)) / 50

        tbl.row([n, m_up, m_down])
        n *= 10
    return tbl

Exemple #19

0

Afficher le fichier

def run_range_analysis(output=True):
    """Confirm O(log N) algorithm to find range of duplicates."""
    tbl = DataTable([8, 8, 8], ['N', 'O(N)', 'O(log N)'],
                    decimals=7,
                    output=output)

    commands = '''
from random import random
tgt = random()
alist = [tgt] * {0}
for _ in range({0}-{1}):
    alist.append(random())
alist = sorted(alist)
'''
    for n in [2**k for k in range(10, 20)]:
        custom = commands.format(n, n // 16)
        best_times = min(
            timeit.repeat(stmt='best_range(alist, tgt)',
                          setup='''
from ch02.challenge import best_range
{}'''.format(custom),
                          repeat=40,
                          number=50)) / 50
        worst_times = min(
            timeit.repeat(stmt='worst_range(alist, tgt)',
                          setup='''
from ch02.challenge import worst_range
{}'''.format(custom),
                          repeat=40,
                          number=50)) / 50

        tbl.row([n, worst_times, best_times])

Exemple #20

0

Afficher le fichier

def compare_time(words, output=True, decimals=4):
    """Generate table of performance differences with linked hashtable and perfect hashing."""
    tbl = DataTable([8, 8, 8], ['N', 'Linked', 'Perfect'],
                    output=output,
                    decimals=decimals)

    t_perfect = min(
        timeit.repeat(stmt='''
ht = HL()
for w in words:
    ht.put(w,w)''',
                      setup='''
from ch03.hashtable_open_perfect import Hashtable as HL
words={}'''.format(words),
                      repeat=3,
                      number=5)) / 5

    t_linked = min(
        timeit.repeat(stmt='''
ht = HL(len(words))
for w in words:
    ht.put(w,w)''',
                      setup='''
from ch03.hashtable_linked import Hashtable as HL
words={}'''.format(words),
                      repeat=3,
                      number=5)) / 5

    tbl.row([len(words), t_linked, t_perfect])
    return tbl

Exemple #21

0

Afficher le fichier

Fichier : challenge.py Projet : heineman/LearningAlgorithms

def run_median_trial():
    """Generate table for Median Trial."""
    tbl = DataTable([10, 15, 15], ['N', 'median_time', 'sort_median'])

    trials = [2**k + 1 for k in range(8, 20)]
    for n in trials:
        t_med = 1000 * min(
            timeit.repeat(stmt='assert(linear_median(a) == {}//2)'.format(n),
                          setup='''
import random
from ch01.challenge import linear_median
a = list(range({}))
random.shuffle(a)
'''.format(n),
                          repeat=10,
                          number=5)) / 5

        t_sort = 1000 * min(
            timeit.repeat(stmt='assert(sorted(a)[{0}//2] == {0}//2)'.format(n),
                          setup='''
import random
from ch01.challenge import linear_median
a = list(range({}))
random.shuffle(a)
'''.format(n),
                          repeat=10,
                          number=5)) / 5

        tbl.row([n, t_med, t_sort])

    return tbl

Exemple #22

0

Afficher le fichier

def time_results_open(words, output=True, decimals=4):
    """Average time to find a key in growing hashtable_open."""
    sizes = [8192, 16384, 32768, 65536, 131072, 262144, 524288, 1048576]
    widths = [8] + [10] * len(sizes)
    headers = ['N'] + sizes
    tbl = DataTable(widths, headers, output=output, decimals=decimals)

    # Now start with N words to be added into a table of size M.
    # Start at 1000 and work up to 2000
    for num_to_add in [32, 64, 128, 256, 512, 1024, 2048, 4096, 8192, 16384]:
        all_words = words[:num_to_add]

        arow = [num_to_add]
        for size in sizes:
            if num_to_add < size:
                m1 = min(
                    timeit.repeat(stmt='''
table = Hashtable({})
for word in words:
    table.put(word, 99)'''.format(size),
                                  setup='''
from ch03.hashtable_open import Hashtable
words={}'''.format(all_words),
                                  repeat=1,
                                  number=100))
                arow.append((100000.0 * m1) / size)
            else:
                arow.append(SKIP)
        tbl.row(arow)
    return tbl

Exemple #23

0

Afficher le fichier

def combined_sorted(lo=8, hi=12, output=True):
    """Generate results for different sorting trials."""
    tbl = DataTable([8] * (hi - lo + 1),
                    ['N'] + [comma(2**k) for k in range(lo, hi)],
                    output=output)

    for n in [2**k for k in range(lo, hi)]:
        row = [n]
        for m in [2**k for k in range(lo, hi)]:
            row.append(run_merge_trial(m, n))
        tbl.row(row)

    # Diagonal values are for 2*M*log(M) so divide in HALF for accurate one
    # build model ONLY for first five values
    x = [2**k for k in range(lo, min(lo + 5, hi))]
    y = [
        tbl.entry(r, comma(r))
        for r in [2**k for k in range(lo, min(lo + 5, hi))]
    ]
    if numpy_error:
        a = 0
    else:
        import numpy as np
        from scipy.optimize import curve_fit
        from scipy.stats.stats import pearsonr

        (coeffs, _) = curve_fit(n_log_n_model, np.array(x), np.array(y))
        a = coeffs[0] / 2

        y_fit = [
            n_log_n_model(r, a)
            for r in [2**k for k in range(lo, min(lo + 5, hi))]
        ]

        print()
        print(pearsonr(y, y_fit))
        print()
        print('Prediction')
        model = DataTable([8] * (hi - lo + 1),
                          ['N'] + [comma(2**k) for k in range(lo, hi)],
                          output=output)
        for n in [2**k for k in range(lo, hi)]:
            row = [n]
            for m in [2**k for k in range(lo, hi)]:
                row.append(n_log_n_model(n, a) + n_log_n_model(m, a))
            model.row(row)
    return tbl

Exemple #24

0

Afficher le fichier

def prime_number_difference(words, output=True, decimals=2):
    """Identify sensitivity of M to being prime or not."""

    from ch03.hashtable_linked import Hashtable as Linked_Hashtable, stats_linked_lists
    from ch03.hashtable_open import Hashtable as Open_Hashtable, stats_open_addressing
    from ch03.base26 import base26

    # these are prime numbers between 428880 and 428980
    lo = 428880
    primes = [428899, 428951, 428957, 428977]
    hi = 428980

    keys = [base26(w) for w in words]
    tbl = DataTable([12, 6, 8, 8, 8, 8],
                    ['M', 'Prime', 'Avg. LL', 'Max LL', 'Avg. OA', 'Max OA'],
                    output=output,
                    decimals=decimals)
    tbl.format('Prime', 's')
    tbl.format('Max LL', 'd')
    tbl.format('Max OA', 'd')
    worst = 0
    worst_m = 0
    for m in range(lo, hi + 1):
        is_p = 'Prime' if m in primes else ''
        ht_linked = Linked_Hashtable(m)
        ht_open = Open_Hashtable(m)

        for k in keys:
            ht_linked.put(k, 1)
            ht_open.put(k, 1)

        (avg_length_linked, max_length_linked) = stats_linked_lists(ht_linked)
        if max_length_linked > worst:
            worst_m = m
            worst = max_length_linked
        (avg_length_open, max_length_open) = stats_open_addressing(ht_open)
        tbl.row([
            m, is_p, avg_length_linked, max_length_linked, avg_length_open,
            max_length_open
        ])

    # Now try to find any more that exceed this maximum amount
    if output:
        print('Worst was {} for M={}'.format(worst, worst_m))
        for m in range(worst_m, worst_m + 10000, 13):
            ht_linked = Linked_Hashtable(m)

            (avg_length_linked,
             max_length_linked) = stats_linked_lists(ht_linked, False)
            if max_length_linked > worst:
                worst_m = m
                worst = max_length_linked
                print('Worst of {} for M={}'.format(worst, worst_m))
        print('Done')

    return tbl

Exemple #25

0

Afficher le fichier

def count_collisions(num_rows=0, output=True, decimals=1):
    """Generate table counting collisions."""
    all_words = english_words()
    N = len(all_words)

    from ch03.hashtable_linked import Hashtable as HL
    from ch03.hashtable_linked import stats_linked_lists
    from ch03.hashtable_open import Hashtable as OHL
    from ch03.hashtable_open import stats_open_addressing

    tbl = DataTable([10,8,8,8,8], ['M', 'Avg LL', 'Max LL', 'Avg OA', 'Max OA'],
                    output=output, decimals=decimals)
    tbl.format('Max LL', 'd')
    tbl.format('Max OA', 'd')

    M = 20*N
    hl = HL(M)
    ohl = OHL(M)
    for w in all_words:
        hl.put(w, 1)
        ohl.put(w, 1)
    avg_size_linked = stats_linked_lists(hl)
    avg_size_open = stats_open_addressing(ohl)
    tbl.row([M, avg_size_linked[0], avg_size_linked[1], avg_size_open[0], avg_size_open[1]])

    M = 2*N
    while M > N/16:
        hl = HL(M)
        ohl = OHL(M)
        for w in all_words:
            hl.put(w, 1)
            if M > N:               # otherwise, will fail...
                ohl.put(w, 1)
        avg_size_linked = stats_linked_lists(hl)

        if N < M:
            avg_size_open = stats_open_addressing(ohl)
        else:
            tbl.format('Avg OA', 's')
            tbl.format('Max OA', 's')
            avg_size_open = [SKIP, SKIP]

        num_rows -= 1
        tbl.row([M, avg_size_linked[0], avg_size_linked[1], avg_size_open[0], avg_size_open[1]])

        # Once below threshold, go down at 60% clip
        if M > N:
            M = (M * 95) // 100
        else:
            M = (M * 6) // 10

        # To allow for testing, simple way to break out after a number of rows are generated.
        if num_rows == 0:
            break
    return tbl

Exemple #26

0

Afficher le fichier

Fichier : book.py Projet : heineman/LearningAlgorithms

def table_compare_graph_structures(max_k=15, output=True):
    """
    Compare Matrix implementation vs. Adjacency list implementation vs. NetworkX up to
    but not including max_k=15.
    """

    tbl = DataTable([8, 10, 10, 10],
                    ['N', 'NetworkX', 'Adjacency List', 'Adjacency Matrix'],
                    output=output)
    for N in [2**k for k in range(8, max_k)]:
        undirect_mtime = 1000 * min(
            timeit.repeat(stmt='''
total=0
for w in G[0]:
    total += w''',
                          setup='''
from ch07.replacement import UndirectedGraph
G = UndirectedGraph()
G.add_nodes_from(list(range({0})))
for o in range(10):
    G.add_edge(0, {0}-o-1)'''.format(N),
                          repeat=20,
                          number=20))

        networkx_mtime = 1000 * min(
            timeit.repeat(stmt='''
total=0
for w in G[0]:
    total += w''',
                          setup='''
from ch07.replacement import UndirectedGraph
G = UndirectedGraph()
G.add_nodes_from(list(range({0})))
for o in range(10):
    G.add_edge(0, {0}-o-1)'''.format(N),
                          repeat=20,
                          number=20))

        matrix_mtime = 1000 * min(
            timeit.repeat(stmt='''
total=0
for w in G[0]:
    total += w''',
                          setup='''
from ch07.replacement import MatrixUndirectedGraph
G = MatrixUndirectedGraph()
G.add_nodes_from(list(range({0})))
for o in range(10):
    G.add_edge(0, {0}-o-1)'''.format(N),
                          repeat=20,
                          number=20))

        tbl.row([N, networkx_mtime, undirect_mtime, matrix_mtime])
    return tbl

Exemple #27

0

Afficher le fichier

def generate_hash():
    """Results are different each time since Python salts hash values."""

    s = 'a rose by any other name would smell as sweet'
    tbl = DataTable([8,20,20], ['key', 'hash(key)', 'hash(key) % 15'])
    tbl.format('key', 's')
    tbl.format('hash(key)', 'd')
    tbl.format('hash(key) % 15', 'd')
    for w in s.split():
        tbl.row([w, hash(w), hash(w) % 15])
    return tbl

Exemple #28

0

Afficher le fichier

def generate_heap_table(max_k=18, output=True, decimals=3):
    """
    Generate table showing different stack behaviors.
    """
    # Enqueue table first
    add_tbl = DataTable([8,8,8], ['N','heapq','PriorityQueue'],
                    output=output, decimals=decimals)

    for n in [2**k for k in range(10, max_k)]:
        add_tbl.row([n,heap_add(n, 1000),
                 pq_add(n, 1000)])

    remove_tbl = DataTable([8,8,8], ['N','heapq','PriorityQueue'],
                    output=output, decimals=decimals)

    for n in [2**k for k in range(10, max_k)]:
        remove_tbl.row([n,heap_remove(n, 1000),
                 pq_remove(n, 1000)])

    return (add_tbl, remove_tbl)

Exemple #29

0

Afficher le fichier

def generate_stack_table(max_k=18, output=True, decimals=3):
    """
    Generate table showing different stack behaviors.
    """
    # Enqueue table first
    push_tbl = DataTable([8,8,8,8], ['N','list','Dequeue', 'LifoQueue'],
                    output=output, decimals=decimals)

    for n in [2**k for k in range(10, max_k)]:
        push_tbl.row([n,list_push(n, 1000),
                 dequeue_push(n, 1000),
                 queue_push(n, 1000)])

    pop_tbl = DataTable([8,8,8,8], ['N','list','Dequeue', 'LifoQueue'],
                    output=output, decimals=decimals)

    for n in [2**k for k in range(10, max_k)]:
        pop_tbl.row([n,list_pop(n, 1000),
                 dequeue_pop(n, 1000),
                 queue_pop(n, 1000)])

    return (push_tbl, pop_tbl)

Exemple #30

0

Afficher le fichier

def exercise_triangle_number_probing(output=True, decimals=4):
    """Compare triangle number probing with M=powers of 2."""

    tbl = DataTable([20, 8], ['Type', 'Time to Search'],
                    output=output,
                    decimals=decimals)
    tbl.format('Type', 's')
    timing_oa = min(
        timeit.repeat(stmt='''
for w in words:
    ht.get(w)''',
                      setup='''
from ch03.hashtable_open import Hashtable
from resources.english import english_words
words = english_words()
ht = Hashtable(524288)
for w in words[:160564]:
    ht.put(w,w)''',
                      repeat=7,
                      number=5)) / 5
    tbl.row(['Open Addressing', timing_oa])

    timing_sc = min(
        timeit.repeat(stmt='''
for w in words:
    ht.get(w)''',
                      setup='''
from ch03.hashtable_linked import Hashtable
from resources.english import english_words
words = english_words()
ht = Hashtable(524288)
for w in words[:160564]:
    ht.put(w,w)''',
                      repeat=7,
                      number=5)) / 5
    tbl.row(['Separate Chaining', timing_sc])

    timing_tn = min(
        timeit.repeat(stmt='''
for w in words:
    ht.get(w)''',
                      setup='''
from ch03.challenge import HashtableTriangleNumbers
from resources.english import english_words
words = english_words()
ht = HashtableTriangleNumbers(524288)
for w in words[:160564]:
    ht.put(w,w)''',
                      repeat=7,
                      number=5)) / 5
    tbl.row(['Triangle Probing', timing_tn])