from time import process_time import matplotlib.pyplot as plt from fitter import Fitter from scipy.stats import kstest from collatz.v1.misc import get_fullpath, get_data, logmsg CLOS_FILE = get_fullpath('data/collatz-clos2-100.csv') def get_miss(stop_file): data = get_data(stop_file, cols=['miss']) return data['miss'] def fit_exp(miss): dist = ['expon'] f = Fitter(miss, distributions=dist, timeout=600) f.fit() params = f.fitted_param['expon'] logmsg('fitted params exp = %s', str(params)) f.summary() plt.show() return params def stat_test(x, params): start = process_time() _, pvalue = kstest(rvs=x, cdf='expon', args=params, mode='asymp') end = process_time()
from time import process_time import matplotlib.pyplot as plt from fitter import Fitter from scipy.stats import kstest from collatz.v1.misc import get_fullpath, get_data, logmsg CLOS_FILE = get_fullpath('data/collatz-clos1-824559.csv') def get_miss(stop_file): data = get_data(stop_file, cols=['miss']) return data['miss'] def fit_exp(miss): dist = ['expon'] f = Fitter(miss, distributions=dist, timeout=600) f.fit() params = f.fitted_param['expon'] logmsg('fitted params exp = %s', str(params)) f.summary() plt.show() return params def stat_test(x, params): start = process_time() _, pvalue = kstest(rvs=x, cdf='expon', args=params, mode='asymp') end = process_time()
import sys import warnings import matplotlib.pyplot as plt from fitter import Fitter from collatz.v1.misc import get_fullpath, get_data, logmsg CLOS_FILE = get_fullpath('data/collatz-stop.csv') def get_col(stop_file, col): data = get_data(stop_file, cols=[col]) return data[col] def fit_all(miss): # dist = get_distributions() # dist = ['genpareto', 'betaprima', 'lomax', 'f', 'ncf'] # dist = ['genpareto', 'lomax', 'f', 'ncf'] # dist = ['genpareto', 'lomax'] # dist = ['expon', 'lomax'] dist = ['expon'] f = Fitter(miss, timeout=600, distributions=dist) f.fit() print(f.df_errors.sort_values('sumsquare_error')) logmsg('best fit = %s', str(f.get_best())) f.summary() plt.show()
# computes collatz stop function up to certain limit import pandas as pd from collatz.v1.misc import get_fullpath, logmsg MAX_N = 100 STOP_FILE = get_fullpath('data/collatz-stop-%d.csv' % MAX_N) COLS = ['n', 'stop', 'miss'] cache = dict() def collatz_stop(n): orig_n = n stop = 0 miss = 0 while n != 1: if n in cache: stop = stop + cache[n] break else: miss += 1 if n % 2 == 0: n = n // 2 else: n = 3 * n + 1 stop += 1 cache[orig_n] = stop return stop, miss
# computes collatz closures with method 1 import pandas as pd from sortedcontainers import SortedSet from collatz.v1.misc import get_fullpath, logmsg START_N = 100 CLOS_FILE = get_fullpath('data/collatz-clos2-%d.csv' % START_N) COLS = ['n', 'stop', 'miss'] def collatz_stop(n, cache, pend): orig_n = n stop = 0 miss = 0 while n != 1: if n in cache: stop = stop + cache[n][0] break else: miss += 1 pend.add(n) if n % 2 == 0: n = n // 2 else: n = 3 * n + 1 stop += 1 cache[orig_n] = stop, miss
# TODO: fitting stopping time to a dist seems harder, better see miss seq import matplotlib.pyplot as plt from fitter import Fitter from scipy.stats import normaltest from collatz.v1.misc import get_fullpath, get_data, logmsg STOP_FILE = get_fullpath('data/collatz-stop-100.csv') stop = get_data(STOP_FILE)['stop'] plt.hist(stop, density=True, bins=100) plt.show() _, pvalue = normaltest(stop) logmsg('Normality test p-value = %.20f', pvalue) f = Fitter(stop.sample(300000), timeout=60) f.fit() f.summary()