コード例 #1
0
from time import process_time

import matplotlib.pyplot as plt
from fitter import Fitter
from scipy.stats import kstest

from collatz.v1.misc import get_fullpath, get_data, logmsg

CLOS_FILE = get_fullpath('data/collatz-clos2-100.csv')


def get_miss(stop_file):
    data = get_data(stop_file, cols=['miss'])
    return data['miss']


def fit_exp(miss):
    dist = ['expon']
    f = Fitter(miss, distributions=dist, timeout=600)
    f.fit()
    params = f.fitted_param['expon']
    logmsg('fitted params exp = %s', str(params))
    f.summary()
    plt.show()
    return params


def stat_test(x, params):
    start = process_time()
    _, pvalue = kstest(rvs=x, cdf='expon', args=params, mode='asymp')
    end = process_time()
コード例 #2
0
from time import process_time

import matplotlib.pyplot as plt
from fitter import Fitter
from scipy.stats import kstest

from collatz.v1.misc import get_fullpath, get_data, logmsg

CLOS_FILE = get_fullpath('data/collatz-clos1-824559.csv')


def get_miss(stop_file):
    data = get_data(stop_file, cols=['miss'])
    return data['miss']


def fit_exp(miss):
    dist = ['expon']
    f = Fitter(miss, distributions=dist, timeout=600)
    f.fit()
    params = f.fitted_param['expon']
    logmsg('fitted params exp = %s', str(params))
    f.summary()
    plt.show()
    return params


def stat_test(x, params):
    start = process_time()
    _, pvalue = kstest(rvs=x, cdf='expon', args=params, mode='asymp')
    end = process_time()
コード例 #3
0
import sys
import warnings

import matplotlib.pyplot as plt
from fitter import Fitter

from collatz.v1.misc import get_fullpath, get_data, logmsg

CLOS_FILE = get_fullpath('data/collatz-stop.csv')


def get_col(stop_file, col):
    data = get_data(stop_file, cols=[col])
    return data[col]


def fit_all(miss):
    # dist = get_distributions()
    # dist = ['genpareto', 'betaprima', 'lomax', 'f', 'ncf']
    # dist = ['genpareto', 'lomax', 'f', 'ncf']
    # dist = ['genpareto', 'lomax']
    # dist = ['expon', 'lomax']
    dist = ['expon']
    f = Fitter(miss, timeout=600, distributions=dist)
    f.fit()
    print(f.df_errors.sort_values('sumsquare_error'))
    logmsg('best fit = %s', str(f.get_best()))
    f.summary()
    plt.show()

コード例 #4
0
# computes collatz stop function up to certain limit

import pandas as pd

from collatz.v1.misc import get_fullpath, logmsg

MAX_N = 100

STOP_FILE = get_fullpath('data/collatz-stop-%d.csv' % MAX_N)
COLS = ['n', 'stop', 'miss']
cache = dict()


def collatz_stop(n):
    orig_n = n
    stop = 0
    miss = 0
    while n != 1:
        if n in cache:
            stop = stop + cache[n]
            break
        else:
            miss += 1
        if n % 2 == 0:
            n = n // 2
        else:
            n = 3 * n + 1
        stop += 1
    cache[orig_n] = stop
    return stop, miss
コード例 #5
0
# computes collatz closures with method 1

import pandas as pd
from sortedcontainers import SortedSet

from collatz.v1.misc import get_fullpath, logmsg

START_N = 100

CLOS_FILE = get_fullpath('data/collatz-clos2-%d.csv' % START_N)
COLS = ['n', 'stop', 'miss']


def collatz_stop(n, cache, pend):
    orig_n = n
    stop = 0
    miss = 0
    while n != 1:
        if n in cache:
            stop = stop + cache[n][0]
            break
        else:
            miss += 1
            pend.add(n)
        if n % 2 == 0:
            n = n // 2
        else:
            n = 3 * n + 1
        stop += 1
    cache[orig_n] = stop, miss
コード例 #6
0
# TODO: fitting stopping time to a dist seems harder, better see miss seq

import matplotlib.pyplot as plt
from fitter import Fitter
from scipy.stats import normaltest

from collatz.v1.misc import get_fullpath, get_data, logmsg

STOP_FILE = get_fullpath('data/collatz-stop-100.csv')
stop = get_data(STOP_FILE)['stop']
plt.hist(stop, density=True, bins=100)
plt.show()

_, pvalue = normaltest(stop)
logmsg('Normality test p-value = %.20f', pvalue)

f = Fitter(stop.sample(300000), timeout=60)
f.fit()
f.summary()