コード例 #1
0
ファイル: book.py プロジェクト: heineman/LearningAlgorithms
def average_performance(max_n=65536, output=True, decimals=1):
    """Generate table of average performance for different PQ implementations."""
    T = 3
    base = 256
    cutoff = 16384
    high = max_n

    heap = {}
    order_ar = {}
    order_ll = {}
    N = base
    while N <= high:
        order_ll[N] = 1000000*run_trials('ch04.ordered_list', N, T)/(T*N)
        heap[N]     = 1000000*run_trials('ch04.heap', N, T)/(T*N)
        N *= 2

    N = base
    array = {}
    linked = {}
    builtin = {}
    while N <= cutoff:
        order_ar[N]  = 1000000*run_trials('ch04.ordered', N, T)/(T*N)
        linked[N]    = 1000000*run_trials('ch04.linked', N, T)/(T*N)
        array[N]     = 1000000*run_trials('ch04.array', N, T)/(T*N)
        builtin[N]   = 1000000*run_trials('ch04.builtin', N, T)/(T*N)

        N *= 2

    N = base
    tbl = DataTable([8,8,8,8,8,8,8],
                    ['N','Heap','OrderL','Linked','OrderA','Built-in','Array'],
                    output=output, decimals=decimals)
    while N <= high:
        if N <= cutoff:
            tbl.row([N, heap[N], order_ll[N], linked[N], order_ar[N], builtin[N], array[N]])
        else:
            #tbl.set_output(False)
            tbl.row([N, heap[N], order_ll[N]])
        N *= 2

    if output:
        print()
        print('Heap', tbl.best_model('Heap'))
        print('OrderL', tbl.best_model('OrderL'))
        print('Linked', tbl.best_model('Linked'))
        print('OrderA', tbl.best_model('OrderA'))
        print('Built-in', tbl.best_model('Built-in'))
        print('Array', tbl.best_model('Array'))
    return tbl
コード例 #2
0
def run_largest_alternate(output=True, decimals=3):
    """Generate tables for largest and alternate."""
    n = 8
    tbl = DataTable([8, 10, 15, 10, 10],
                    ['N', '#Less', '#LessA', 'largest', 'alternate'],
                    output=output,
                    decimals=decimals)
    tbl.format('#Less', ',d')
    tbl.format('#LessA', ',d')

    while n <= 2048:
        ascending = list(range(n))

        largest_up = 1000 * min(
            timeit.repeat(stmt='largest({})'.format(ascending),
                          setup='from ch01.largest import largest',
                          repeat=10,
                          number=50)) / 50
        alternate_up = 1000 * min(
            timeit.repeat(stmt='alternate({})'.format(ascending),
                          setup='from ch01.largest import alternate',
                          repeat=10,
                          number=50)) / 50

        up_count = [RecordedItem(i) for i in range(n)]
        RecordedItem.clear()
        largest(up_count)
        largest_counts = RecordedItem.report()
        RecordedItem.clear()

        up_count = [RecordedItem(i) for i in range(n)]
        RecordedItem.clear()
        alternate(up_count)
        alternate_counts = RecordedItem.report()
        RecordedItem.clear()

        tbl.row([
            n,
            sum(largest_counts),
            sum(alternate_counts), largest_up, alternate_up
        ])

        n *= 2

    if output:
        print()
        print('largest', tbl.best_model('largest', Model.LINEAR))
        print('Alternate', tbl.best_model('alternate', Model.QUADRATIC))
    return tbl
コード例 #3
0
def just_compare_sort_tournament_two(max_k=25, output=True, decimals=2):
    """Very large data sets to investigate whether crossover occurs (no it does not)."""
    tbl = DataTable([15, 10, 15], ['N', 'sorting_two', 'tournament_two'],
                    output=output,
                    decimals=decimals)

    trials = [2**k for k in range(10, max_k)]
    num = 5
    for n in trials:
        m_tt = timeit.timeit(stmt='random.shuffle(x)\ntournament_two(x)',
                             setup='''
import random
from ch01.largest_two import tournament_two
x=list(range({}))'''.format(n),
                             number=num)

        m_st = timeit.timeit(stmt='random.shuffle(x)\nsorting_two(x)',
                             setup='''
import random
from ch01.largest_two import sorting_two
x=list(range({}))'''.format(n),
                             number=num)

        tbl.row([n, m_st, m_tt])

    if output:
        print()
        for header in tbl.labels[1:]:
            print(header, tbl.best_model(header))
    return tbl
コード例 #4
0
def average_performance(max_n=32768, output=True, decimals=2):
    """
    Generate table of average performance for different PQ implementations.

         N        Heap    BinaryTree
         128        2.38        5.15
         256        2.80        5.75
         512        3.22        6.63
       1,024        3.51        7.60
       2,048        3.89        8.42
       4,096        4.35        9.21
       8,192        4.75       10.24
      16,384        5.23       11.38
      32,768        5.96       12.90
    Heap [(<Model.LOG: 1>, 0.9946069479866522, 0.19032820110187337, 0.3676270723813611)]
    BinaryTree [(<Model.LOG: 1>, 0.9945273325485424, 0.48542129185563554, 0.7892486887139494)]

    While both offer O(Log N) performance, heap is more efficient (a little more than
    twice as efficient).
    """
    T = 3
    high = max_n

    tbl = DataTable([8, 8, 8], ['N', 'Heap', 'BinaryTree'],
                    output=output,
                    decimals=decimals)
    N = 128
    while N <= high:
        binary = 1000000 * run_trials_pq('ch06.pq', N, T) / (T * N)
        heap = 1000000 * run_trials_pq_n('ch04.heap', N, T) / (T * N)
        tbl.row([N, heap, binary])
        N *= 2

    if output:
        print('Heap', tbl.best_model('Heap'))
        print('BinaryTree', tbl.best_model('BinaryTree'))

    return tbl
コード例 #5
0
ファイル: test.py プロジェクト: heineman/LearningAlgorithms
    def test_table(self):
        tbl = DataTable([8, 8, 8], ['N', 'Another', 'SquareRoot'],
                        output=False,
                        decimals=4)
        tbl.format('Another', 'd')
        for n in range(2, 10):
            tbl.row([n, n, n**0.5])
        self.assertEqual(tbl.entry(3, 'Another'), 3)

        print('Testing that Table is print to console')
        tbl = DataTable([8, 8, 8], ['N', 'Another', 'SquareRoot'], decimals=4)
        tbl.format('Another', 'd')
        for n in range(2, 10):
            tbl.row([n, n, n**0.5])

        self.assertEqual(list(range(2, 10)), tbl.column('Another'))

        model = tbl.best_model('Another')[0]
        if numpy_error:
            pass
        else:
            self.assertEqual(model[0], Model.LINEAR)
            self.assertAlmostEqual(model[3], 1.0000, places=5)
コード例 #6
0
ファイル: book.py プロジェクト: heineman/LearningAlgorithms
def prototype_table(output=True, decimals=3):
    """
    Generate table of results for prototype application.

    The prototype application is simply a request to sort the N values.
    """
    trials = [100, 1000, 10000]
    nvals = []
    yvals = []
    for n in trials:
        sort_time = 1000 * min(
            timeit.repeat(stmt='x.sort()',
                          setup='''
import random
x=list(range({}))
random.shuffle(x)'''.format(n),
                          repeat=100,
                          number=100))
        nvals.append(n)
        yvals.append(sort_time)

    def quad_model(n, a, b):
        if a < 0:  # attempt to PREVENT negative coefficient.
            return 1e10
        return a * n * n + b * n

    # Coefficients are returned as first argument
    if numpy_error:
        nlog_n_coeffs = linear_coeffs = quadratic_coeffs = [0, 0]
    else:
        import numpy as np
        from scipy.optimize import curve_fit
        [nlog_n_coeffs, _] = curve_fit(n_log_n_model, np.array(nvals),
                                       np.array(yvals))
        [linear_coeffs, _] = curve_fit(linear_model, np.array(nvals),
                                       np.array(yvals))
        [quadratic_coeffs, _] = curve_fit(quad_model, np.array(nvals),
                                          np.array(yvals))

    if output:
        print('Linear    = {:f}*N + {:f}'.format(linear_coeffs[0],
                                                 linear_coeffs[1]))
        print('Quadratic = {}*N*N + {}*N'.format(quadratic_coeffs[0],
                                                 quadratic_coeffs[1]))
        print('N Log N   = {:.12f}*N*log2(N)'.format(nlog_n_coeffs[0]))
        print()

    tbl = DataTable([12, 10, 10, 10, 10],
                    ['N', 'Time', 'Linear', 'Quad', 'NLogN'],
                    output=output,
                    decimals=decimals)

    for n, p in zip(nvals, yvals):
        tbl.row([
            n, p,
            linear_model(n, linear_coeffs[0], linear_coeffs[1]),
            quadratic_model(n, quadratic_coeffs[0], quadratic_coeffs[1]),
            n_log_n_model(n, nlog_n_coeffs[0])
        ])

    for n in [100000, 1000000, 10000000]:
        sort_time = 1000 * min(
            timeit.repeat(stmt='x.sort()',
                          setup='''
import random
x=list(range({}))
random.shuffle(x)'''.format(n),
                          repeat=100,
                          number=100))
        tbl.row([
            n, sort_time,
            linear_model(n, linear_coeffs[0], linear_coeffs[1]),
            quadratic_model(n, quadratic_coeffs[0], quadratic_coeffs[1]),
            n_log_n_model(n, nlog_n_coeffs[0])
        ])

    if output:
        print('Linear', tbl.pearsonr('Time', 'Linear'))
        print('Quad', tbl.pearsonr('Time', 'Quad'))
        print('NLogN', tbl.pearsonr('Time', 'NLogN'))
        print(tbl.best_model('Time'))
    return tbl