def insertion_sort_bas(max_k=18, output=True, decimals=3):
    """Generate Table for Insertion Sort."""
    # Evaluate prototype execution
    x = []
    y = []
    for n in [2**k for k in range(8, 12)]:
        m_insert_bas = 1000 * min(
            timeit.repeat(stmt='insertion_sort_bas(A)',
                          setup='''
import random
from ch05.sorting import insertion_sort_bas
A=list(range({}))
random.shuffle(A)'''.format(n),
                          repeat=10,
                          number=10))
        x.append(n)
        y.append(m_insert_bas)

    # Coefficients are returned as first argument
    if numpy_error:
        log_coeffs = quadratic_coeffs = [0, 0]
    else:
        import numpy as np
        from scipy.optimize import curve_fit
        [log_coeffs, _] = curve_fit(n_log_n_model, np.array(x), np.array(y))
        [quadratic_coeffs, _] = curve_fit(quadratic_model, np.array(x),
                                          np.array(y))

    if output:
        print('Quadratic = {}*N*N + {}*N'.format(quadratic_coeffs[0],
                                                 quadratic_coeffs[1]))
        print('Log       = {:.12f}*N*log2(N)'.format(log_coeffs[0]))
        print()

    tbl = DataTable([12, 10, 10, 10], ['N', 'Time', 'Quad', 'Log'],
                    output=output,
                    decimals=decimals)
    for n, p in zip(x, y):
        tbl.row([
            n, p,
            quadratic_model(n, quadratic_coeffs[0], quadratic_coeffs[1]),
            n_log_n_model(n, log_coeffs[0])
        ])

    for n in [2**k for k in range(12, max_k)]:
        m_insert_bas = 1000 * min(
            timeit.repeat(stmt='insertion_sort_bas(A)',
                          setup='''
import random
from ch05.sorting import insertion_sort_bas
A=list(range({}))
random.shuffle(A)'''.format(n),
                          repeat=10,
                          number=10))
        tbl.row([
            n, m_insert_bas,
            quadratic_model(n, quadratic_coeffs[0], quadratic_coeffs[1]),
            n_log_n_model(n, log_coeffs[0])
        ])
    return tbl
def run_max_sort_worst_case(max_k=14, output=True, decimals=4):
    """Generate table for max sort up to (but not including 2**max_k)."""
    xvals = []
    yvals = []
    for n in [2**k for k in range(5, 12)]:
        sort_time = timeit.timeit(stmt='max_sort(x)',
                                  setup='''
from ch02.challenge import max_sort
import random
x=list(range({},0,-1))
random.shuffle(x)'''.format(n),
                                  number=10)
        xvals.append(n)
        yvals.append(sort_time)

    if numpy_error:
        quadratic_coeff = [0, 0]
    else:
        import numpy as np
        from scipy.optimize import curve_fit
        [quadratic_coeff, _] = curve_fit(quadratic_model, np.array(xvals),
                                         np.array(yvals))
        if output:
            print('Quadratic N  = {:.12f}*N*N + {:.12f}*N'.format(
                quadratic_coeff[0], quadratic_coeff[1]))

    tbl = DataTable([8, 8, 8], ['N', 'MaxSort', 'Model'],
                    output=output,
                    decimals=decimals)

    for n in [2**k for k in range(5, max_k)]:
        sort_time = timeit.timeit(stmt='max_sort(x)',
                                  setup='''
from ch02.challenge import max_sort
import random
x=list(range({},0,-1))
random.shuffle(x)'''.format(n),
                                  number=10)
        tbl.row([
            n, sort_time,
            quadratic_model(n, quadratic_coeff[0], quadratic_coeff[1])
        ])

    return tbl
Exemple #3
0
def modeling_insertion_selection(output=True, decimals=1):
    """Generate table for Insertion Sort."""
    from ch05.sorting import selection_sort_counting, insertion_sort_counting
    trials = 100

    x = []
    y_comp_ss = []
    y_swap_ss = []
    y_comp_is = []
    y_swap_is = []
    for n in [2**k for k in range(4, 8)]:
        total_compares_ss = 0
        total_swaps_ss = 0
        total_compares_is = 0
        total_swaps_is = 0
        for _ in range(trials):
            A=list(range(n))
            random.shuffle(A)
            (num_swaps, num_compares) = selection_sort_counting(A)
            total_swaps_ss += num_swaps
            total_compares_ss += num_compares

            A=list(range(n))
            random.shuffle(A)
            (num_swaps, num_compares) = insertion_sort_counting(A)
            total_swaps_is += num_swaps
            total_compares_is += num_compares

        x.append(n)
        y_comp_ss.append(total_compares_ss/trials)
        y_swap_ss.append(total_swaps_ss/trials)
        y_comp_is.append(total_compares_is/trials)
        y_swap_is.append(total_swaps_is/trials)

    if numpy_error:
        quadratic_comp_ss = linear_swap_ss = quadratic_comp_is = quadratic_swap_is = [0,0]
    else:
        import numpy as np
        from scipy.optimize import curve_fit
        [quadratic_comp_ss, _] = curve_fit(quadratic_model, np.array(x), np.array(y_comp_ss))
        [linear_swap_ss, _] = curve_fit(linear_model, np.array(x), np.array(y_swap_ss))
        [quadratic_comp_is, _] = curve_fit(quadratic_model, np.array(x), np.array(y_comp_is))
        [quadratic_swap_is, _] = curve_fit(quadratic_model, np.array(x), np.array(y_swap_is))

    if output:
        print('Swap SS Linear    = {:f}*N + {:f}'.format(linear_swap_ss[0], linear_swap_ss[1]))
        print('Comp SS Quadratic = {}*N*N + {}*N'.format(quadratic_comp_ss[0], quadratic_comp_ss[1]))

        print('Swap IS Quadratic = {}*N*N + {}*N'.format(quadratic_swap_is[0], quadratic_swap_is[1]))
        print('Comp IS Quadratic = {}*N*N + {}*N'.format(quadratic_comp_is[0], quadratic_comp_is[1]))
        print()

    tbl = DataTable([12,10,10,10,10,10,10,10,10],
            ['N','AvgCompSS','MCSS', 'AvgSwapSS', 'MSSS', 'AvgCompIS', 'MCIS', 'AvgSwapIS', 'MSIS'],
            output=output, decimals=decimals)

    for n in [2**k for k in range(4, 10)]:
        total_compares_ss = 0
        total_swaps_ss = 0
        total_compares_is = 0
        total_swaps_is = 0

        for _ in range(trials):
            A=list(range(n))
            random.shuffle(A)
            (num_swaps, num_compares) = selection_sort_counting(A)
            total_swaps_ss += num_swaps
            total_compares_ss += num_compares

            A=list(range(n))
            random.shuffle(A)
            (num_swaps, num_compares) = insertion_sort_counting(A)
            total_swaps_is += num_swaps
            total_compares_is += num_compares

        tbl.row([n,
                  total_compares_ss/trials,
                  quadratic_model(n, quadratic_comp_ss[0], quadratic_comp_ss[1]),
                  total_swaps_ss/trials,
                  linear_model(n, linear_swap_ss[0],  linear_swap_ss[1]),

                  total_compares_is/trials,
                  quadratic_model(n, quadratic_comp_is[0], quadratic_comp_is[1]),
                  total_swaps_is/trials,
                  quadratic_model(n, quadratic_swap_is[0], quadratic_swap_is[1]),
                  ])
    return tbl
Exemple #4
0
def timing_selection_insertion(min_k=8, max_k=16, output=True, decimals=3):
    """
    Because Insertion Sort is so sensitive to its inputs, we take average time
    over all of its runs. Models first using 5 rows from [min_k .. min_k+5]
    and then presents information up to (but not including) max_k.

    Takes hours to run. In the book, the table goes up to max_k=18.
    """
    if output:
        print('Building models for Insertion Sort. This may take awhile...')
    # Build model from Generate 5 data points
    x = []
    y_is = []
    y_ss = []
    for n in [2**k for k in range(min_k, min_k+5)]:
        # Not much need to repeat since Selection Sort behaves the same
        # every time. I'll do it five times.
        t_ss = timeit.timeit(stmt='selection_sort(A)', setup='''
import random
from ch05.sorting import selection_sort
A=list(range({}))
random.shuffle(A)'''.format(n), number=1)

        # Insertion Sort is highly dependent upon its input, so execute
        # far more repetitions, and take average. This is the only time
        # in the book where I alter my approach for measuring performance
        # since it could happen that a given data set has long runs of
        # ascending data, which would significantly reduce the execution
        # time. Instead, I total all 100 runs and provide an average.
        t_is = sum(timeit.repeat(stmt='insertion_sort(A)', setup='''
import random
from ch05.sorting import insertion_sort
A=list(range({}))
random.shuffle(A)'''.format(n), repeat=100, number=1))/100   # since seeking average from sum

        x.append(n)
        y_ss.append(t_ss)
        y_is.append(t_is)

    # Coefficients are returned as first argument
    if numpy_error:
        quadratric_ss = quadratric_is = [0, 0]
    else:
        import numpy as np
        from scipy.optimize import curve_fit
        [quadratric_ss, _] = curve_fit(quadratic_model, np.array(x), np.array(y_ss))
        [quadratric_is, _] = curve_fit(quadratic_model, np.array(x), np.array(y_is))

    if output:
        print('Quadratic SS = {}*N*N + {}*N'.format(quadratric_ss[0], quadratric_ss[1]))
        print('Quadratic IS = {}*N*N + {}*N'.format(quadratric_is[0], quadratric_is[1]))
        print()

    tbl = DataTable([12,10,10,10,10,10,10],
                    ['N','TimeSS','ModelSS','MinIS', 'TimeIS', 'MaxIs', 'ModelIS'],
                    output=output, decimals=decimals)
    for n,t_ss,t_is in zip(x,y_ss,y_is):
        tbl.row([n, t_ss, quadratic_model(n, quadratric_ss[0], quadratric_ss[1]),
                    t_is, t_is, t_is, quadratic_model(n, quadratric_is[0], quadratric_is[1])])

    for n in [2**k for k in range(min_k+5, max_k)]:
        # selection is stable, so just run once
        t_ss = timeit.timeit(stmt='selection_sort(A)', setup='''
import random
from ch05.sorting import selection_sort
A=list(range({}))
random.shuffle(A)'''.format(n), number=1)

        # Once again, take average for Insertion Sort, this time
        # for 50 runs. But also compute min and max for graphing
        all_times = timeit.repeat(stmt='insertion_sort(A)', setup='''
import random
from ch05.sorting import insertion_sort
A=list(range({}))
random.shuffle(A)'''.format(n), repeat=5, number=1)
        t_is = sum(all_times)/5
        t_min = min(all_times)
        t_max = max(all_times)

        tbl.row([n, t_ss, quadratic_model(n, quadratric_ss[0], quadratric_ss[1]),
                    t_min, t_is, t_max, quadratic_model(n, quadratric_is[0], quadratric_is[1])])
    return tbl
Exemple #5
0
def prototype_table(output=True, decimals=3):
    """
    Generate table of results for prototype application.

    The prototype application is simply a request to sort the N values.
    """
    trials = [100, 1000, 10000]
    nvals = []
    yvals = []
    for n in trials:
        sort_time = 1000 * min(
            timeit.repeat(stmt='x.sort()',
                          setup='''
import random
x=list(range({}))
random.shuffle(x)'''.format(n),
                          repeat=100,
                          number=100))
        nvals.append(n)
        yvals.append(sort_time)

    def quad_model(n, a, b):
        if a < 0:  # attempt to PREVENT negative coefficient.
            return 1e10
        return a * n * n + b * n

    # Coefficients are returned as first argument
    if numpy_error:
        nlog_n_coeffs = linear_coeffs = quadratic_coeffs = [0, 0]
    else:
        import numpy as np
        from scipy.optimize import curve_fit
        [nlog_n_coeffs, _] = curve_fit(n_log_n_model, np.array(nvals),
                                       np.array(yvals))
        [linear_coeffs, _] = curve_fit(linear_model, np.array(nvals),
                                       np.array(yvals))
        [quadratic_coeffs, _] = curve_fit(quad_model, np.array(nvals),
                                          np.array(yvals))

    if output:
        print('Linear    = {:f}*N + {:f}'.format(linear_coeffs[0],
                                                 linear_coeffs[1]))
        print('Quadratic = {}*N*N + {}*N'.format(quadratic_coeffs[0],
                                                 quadratic_coeffs[1]))
        print('N Log N   = {:.12f}*N*log2(N)'.format(nlog_n_coeffs[0]))
        print()

    tbl = DataTable([12, 10, 10, 10, 10],
                    ['N', 'Time', 'Linear', 'Quad', 'NLogN'],
                    output=output,
                    decimals=decimals)

    for n, p in zip(nvals, yvals):
        tbl.row([
            n, p,
            linear_model(n, linear_coeffs[0], linear_coeffs[1]),
            quadratic_model(n, quadratic_coeffs[0], quadratic_coeffs[1]),
            n_log_n_model(n, nlog_n_coeffs[0])
        ])

    for n in [100000, 1000000, 10000000]:
        sort_time = 1000 * min(
            timeit.repeat(stmt='x.sort()',
                          setup='''
import random
x=list(range({}))
random.shuffle(x)'''.format(n),
                          repeat=100,
                          number=100))
        tbl.row([
            n, sort_time,
            linear_model(n, linear_coeffs[0], linear_coeffs[1]),
            quadratic_model(n, quadratic_coeffs[0], quadratic_coeffs[1]),
            n_log_n_model(n, nlog_n_coeffs[0])
        ])

    if output:
        print('Linear', tbl.pearsonr('Time', 'Linear'))
        print('Quad', tbl.pearsonr('Time', 'Quad'))
        print('NLogN', tbl.pearsonr('Time', 'NLogN'))
        print(tbl.best_model('Time'))
    return tbl
Exemple #6
0
def large_multiplication(output=True, decimals=4):
    """Compute results for multiplying large numbers."""
    num = 1000
    x = []
    y = []
    log2_3 = math.log2(3)
    for n in [2**k for k in range(8, 13)]:
        mult_time = timeit.timeit(stmt='mult_pair(x)',
                                  setup='''
from ch02.mult import create_pair, mult_pair 
x=create_pair({})'''.format(n),
                                  number=num)
        x.append(n)
        y.append(mult_time)

    def karatsuba(n, a):
        """Models a*N^k where k = log 3 in base 2."""
        return a * (n**log2_3)

    def tkn(n, a, b):
        """Models a*N^k +b*n where k = log 3 in base 2."""
        return a * (n**log2_3) + b * n

    # Coefficients are returned as first argument
    if numpy_error:
        linear_coeffs = quadratic_coeffs = karatsuba_coeffs = tkn_coeffs = [
            0, 0
        ]
    else:
        import numpy as np
        from scipy.optimize import curve_fit
        [linear_coeffs, _] = curve_fit(linear_model, np.array(x), np.array(y))
        [quadratic_coeffs, _] = curve_fit(quadratic_model, np.array(x),
                                          np.array(y))
        [karatsuba_coeffs, _] = curve_fit(karatsuba, np.array(x), np.array(y))
        [tkn_coeffs, _] = curve_fit(tkn, np.array(x), np.array(y))
        if output:
            print('Karatsuba={}*N^1.585'.format(karatsuba_coeffs[0]))
            print('TK={}*N^1.585+{}*N'.format(tkn_coeffs[0], tkn_coeffs[1]))
            print()

    tbl = DataTable([8, 12, 12, 12, 12, 12],
                    ['N', 'Time', 'Linear', 'Quad', 'Karatsuba', 'TKN'],
                    output=output,
                    decimals=decimals)

    for n, mult_time in zip(x, y):
        tbl.row([
            n, mult_time,
            linear_model(n, linear_coeffs[0], linear_coeffs[1]),
            quadratic_model(n, quadratic_coeffs[0], quadratic_coeffs[1]),
            karatsuba(n, karatsuba_coeffs[0]),
            tkn(n, tkn_coeffs[0], tkn_coeffs[1])
        ])

    for n in [2**k for k in range(13, 19)]:
        mult_time = timeit.timeit(stmt='mult_pair(x)',
                                  setup='''
from ch02.mult import create_pair, mult_pair 
x=create_pair({})'''.format(n),
                                  number=num)

        tbl.row([
            n, mult_time,
            linear_model(n, linear_coeffs[0], linear_coeffs[1]),
            quadratic_model(n, quadratic_coeffs[0], quadratic_coeffs[1]),
            karatsuba(n, karatsuba_coeffs[0]),
            tkn(n, tkn_coeffs[0], tkn_coeffs[1])
        ])
    return tbl