Exemple #1
0
import numpy as np
import pandas as pd
import os
import string
import itertools

from utils.profiler import ExponentialRange

src_dir = os.path.dirname(os.path.abspath(__file__))
data_dir = os.path.join(src_dir, '..', 'data')
target_dir = os.path.join(data_dir, 'big_numeric_csv_files')

# Max size in rows as a power of ten
exp_range = ExponentialRange(0, 7, 1 / 4)
num_cols = 10
col_names = list(string.ascii_uppercase[:num_cols])

_data = np.random.random((exp_range.max, num_cols))
data = pd.DataFrame(_data, columns=col_names)

_letters = string.ascii_uppercase
_file_codes = itertools.product(_letters, repeat=2)
_file_codes = list(_file_codes)[:exp_range.max]
file_codes = [''.join(code) for code in _file_codes]

for j, i in enumerate(exp_range.iterator()):
    code = file_codes[j]
    filename = f'file_{code}_rows_{i}.csv'
    filepath = os.path.join(target_dir, filename)
    data.iloc[:i].to_csv(filepath, index=False)
Exemple #2
0
@time_this(lambda x: len(x))
def np_fast_cusum(values: np.ndarray) -> np.ndarray:
    """
    This is O(n) and optimized with C code
    """
    return values.cumsum()


if __name__ == '__main__':

    exp_range = ExponentialRange(0, 8, 1 / 4)
    values = random_numeric_list(exp_range.max)

    with timed_report():
        for i in exp_range.iterator(4):
            slow_cusum(values[:i])

        for i in exp_range.iterator(4):
            slow_cusum_expanded(values[:i])

        for i in exp_range.iterator():
            python_fast_cusum(values[:i])

        for i in exp_range.iterator():
            pandas_fast_cusum(pd.Series(values[:i]))

        for i in exp_range.iterator():
            numba_fast_cusum(np.array(values[:i]))

        for i in exp_range.iterator():
Exemple #3
0
# Register time-able version of function
@time_this(lambda *args, **kwargs: len(args[0]))
def numba_fast_moving_avg(values: np.ndarray, m: int = 20) -> np.ndarray:
    return _numba_fast_moving_avg(values, m=m)


if __name__ == '__main__':

    exp_range = ExponentialRange(2, 7, 1 / 4)
    values = random_numeric_list(exp_range.max)
    series_values = pd.Series(values)
    np_values = np.array(values)

    with timed_report():
        for i in exp_range.iterator(5):
            slow_moving_avg(values[:i], m=100)
            gc.collect()

        for i in exp_range.iterator(7):
            fast_moving_avg(values[:i], m=100)
            gc.collect()

        for i in exp_range.iterator():
            np_fast_moving_avg(np_values[:i], m=100)
            gc.collect()

        for i in exp_range.iterator():
            pd_fast_moving_avg(series_values[:i], m=100)
            gc.collect()