import numpy as np import pandas as pd import os import string import itertools from utils.profiler import ExponentialRange src_dir = os.path.dirname(os.path.abspath(__file__)) data_dir = os.path.join(src_dir, '..', 'data') target_dir = os.path.join(data_dir, 'big_numeric_csv_files') # Max size in rows as a power of ten exp_range = ExponentialRange(0, 7, 1 / 4) num_cols = 10 col_names = list(string.ascii_uppercase[:num_cols]) _data = np.random.random((exp_range.max, num_cols)) data = pd.DataFrame(_data, columns=col_names) _letters = string.ascii_uppercase _file_codes = itertools.product(_letters, repeat=2) _file_codes = list(_file_codes)[:exp_range.max] file_codes = [''.join(code) for code in _file_codes] for j, i in enumerate(exp_range.iterator()): code = file_codes[j] filename = f'file_{code}_rows_{i}.csv' filepath = os.path.join(target_dir, filename) data.iloc[:i].to_csv(filepath, index=False)
@time_this(lambda x: len(x)) def np_fast_cusum(values: np.ndarray) -> np.ndarray: """ This is O(n) and optimized with C code """ return values.cumsum() if __name__ == '__main__': exp_range = ExponentialRange(0, 8, 1 / 4) values = random_numeric_list(exp_range.max) with timed_report(): for i in exp_range.iterator(4): slow_cusum(values[:i]) for i in exp_range.iterator(4): slow_cusum_expanded(values[:i]) for i in exp_range.iterator(): python_fast_cusum(values[:i]) for i in exp_range.iterator(): pandas_fast_cusum(pd.Series(values[:i])) for i in exp_range.iterator(): numba_fast_cusum(np.array(values[:i])) for i in exp_range.iterator():
# Register time-able version of function @time_this(lambda *args, **kwargs: len(args[0])) def numba_fast_moving_avg(values: np.ndarray, m: int = 20) -> np.ndarray: return _numba_fast_moving_avg(values, m=m) if __name__ == '__main__': exp_range = ExponentialRange(2, 7, 1 / 4) values = random_numeric_list(exp_range.max) series_values = pd.Series(values) np_values = np.array(values) with timed_report(): for i in exp_range.iterator(5): slow_moving_avg(values[:i], m=100) gc.collect() for i in exp_range.iterator(7): fast_moving_avg(values[:i], m=100) gc.collect() for i in exp_range.iterator(): np_fast_moving_avg(np_values[:i], m=100) gc.collect() for i in exp_range.iterator(): pd_fast_moving_avg(series_values[:i], m=100) gc.collect()