def test_teststat(self):
        with warnings.catch_warnings(record=True) as w:
            kpss_stat, pval, lags, crits = kpss(self.x, 'c', 3)
        assert_almost_equal(kpss_stat, 5.0169, DECIMAL_3)

        with warnings.catch_warnings(record=True) as w:
            kpss_stat, pval, lags, crits = kpss(self.x, 'ct', 3)
        assert_almost_equal(kpss_stat, 1.1828, DECIMAL_3)
    def test_pval(self):
        with warnings.catch_warnings(record=True) as w:
            kpss_stat, pval, lags, crits = kpss(self.x, 'c', 3)
        assert_equal(pval, 0.01)

        with warnings.catch_warnings(record=True) as w:
            kpss_stat, pval, lags, crits = kpss(self.x, 'ct', 3)
        assert_equal(pval, 0.01)
    def test_fail_unclear_hypothesis(self):
        # these should be fine,
        kpss(self.x, 'c')
        kpss(self.x, 'C')
        kpss(self.x, 'ct')
        kpss(self.x, 'CT')

        assert_raises(ValueError, kpss, self.x, "unclear hypothesis")
    def test_store(self):
        with warnings.catch_warnings(record=True) as w:
            kpss_stat, pval, crit, store = kpss(self.x, 'c', 3, True)

        # assert attributes, and make sure they're correct
        assert_equal(store.nobs, len(self.x))
        assert_equal(store.lags, 3)
    def test_fail_unclear_hypothesis(self):
        # these should be fine,
        with warnings.catch_warnings(record=True) as w:
            kpss(self.x, 'c')
            kpss(self.x, 'C')
            kpss(self.x, 'ct')
            kpss(self.x, 'CT')

        assert_raises(ValueError, kpss, self.x, "unclear hypothesis")
 def test_lags(self):
     # real GDP from macrodata data set
     with warnings.catch_warnings(record=True):
         lags = kpss(self.x, 'c', lags='auto')[2]
     assert_equal(lags, 9)
     # real interest rates from macrodata data set
     with warnings.catch_warnings(record=True):
         lags = kpss(sunspots.load().data['SUNACTIVITY'], 'c',
                     lags='auto')[2]
     assert_equal(lags, 7)
     # volumes from nile data set
     with warnings.catch_warnings(record=True):
         lags = kpss(nile.load().data['volume'], 'c', lags='auto')[2]
     assert_equal(lags, 5)
     # log-coinsurance from randhie data set
     with warnings.catch_warnings(record=True):
         lags = kpss(randhie.load().data['lncoins'], 'ct', lags='auto')[2]
     assert_equal(lags, 75)
     # in-vehicle time from modechoice data set
     with warnings.catch_warnings(record=True):
         lags = kpss(modechoice.load().data['invt'], 'ct', lags='auto')[2]
     assert_equal(lags, 18)
Beispiel #7
0
 def test_lags(self):
     with warnings.catch_warnings(record=True) as w:
         kpss_stat, pval, lags, crits = kpss(self.x, 'c')
     assert_equal(lags,
                  int(np.ceil(12. * np.power(len(self.x) / 100., 1 / 4.))))
Beispiel #8
0
#Step 1 Generate Arma Process
np.random.seed(100)

arparams = np.array([0.6, -0.8])
maparams = np.array([.75])

ar = np.r_[1, -arparams]  # add zero-lag and negate
ma = np.r_[1, maparams]  # add zero-lag
y = sm.tsa.arma_generate_sample(ar, ma, 100)

#model = sm.tsa.ARMA(y, (2, 1)).fit(trend='nc', disp=0)

#Descriptive TSA Statistics
stools.adfuller(y)
stools.kpss(y)
#Plot ACF and PACF
tplot.plot_acf(y)
tplot.plot_pacf(y)

#Fir ARMA Model
tsmodel = sm.tsa.ARMA(y, (2, 1)).fit(trend='nc', disp=0)

residuals = tsmodel.resid

stools.q_stat(tsmodel.resid, nobs=len(tsmodel.resid))

fig = plt.figure()
qq_ax = fig.add_subplot()
sm.qqplot(y, line='s', ax=qq_ax)
plt.show()
    def test_fail_nonvector_input(self):
        with warnings.catch_warnings(record=True) as w:
            kpss(self.x)  # should be fine

        x = np.random.rand(20, 2)
        assert_raises(ValueError, kpss, x)
 def test_unknown_lags(self):
     # Test legacy lags are the same
     with pytest.raises(ValueError):
         kpss(self.x, 'c', lags='unknown')
Beispiel #11
0
def __construct_list_order_difference__(serie):
    """
	__Description__:
	  La fonction effectue 2 tests afin d'estimer la stationnarité de la TS:
		* test de Dickey-Fuller augmenté
		* test de Kwiatkowski–Phillips–Schmidt–Shin
	  La fonction se base par défaut sur les valeurs critiques à 5% mais cela
	  peut être changé en modifiant la variable seuil (str: '1%','5%','10%')
	  ou via alpha (mettre une valeur pour la valeur p; pqr défaut: 0.05).
	  
	  Pour test ADF, la valeur p est relative à la vraissemblance de l'hypo-
	  -these H0 associée:
	  H0 = il y a une racine unitaire ... Ce qui nous intéresse est l'hypot-
	  -hèse alternative = la série est stationnaire ou stationnaire+tendance.
	  
	  Pour test KPSS, la valeur p est relative à la vraissemblance de l'hypo-
	  -these H0 associée:
	  H0 = la série est stationnaire avec une tendance, l'hypothèse alterna-
	  -tive est qu'il y a une racine unitaire.
	
	__Return__:
	  lst_d : [list] [0]/[1]/[0,1] les differentes valeurs pour d
	
	__Remarks__:
	  Pour comprendre les résultats des tests:
	  https://www.statsmodels.org/stable/examples/notebooks/generated/
	  stationarity_detrending_adf_kpss.html?highlight=stationarity
	  
	  Dans l'idée la fonction suit le principe suivant:
	  %%%%%%%%%%%%%%%%%%%(issu de la page juste avant)%%%%%%%%%%%%%%%%%%%%%%%%
		Case 1: Both tests conclude that the series is not stationary
				-> The series is not stationary
		Case 2: Both tests conclude that the series is stationary
				-> The series is stationary
		Case 3: KPSS indicates stationarity and ADF indicates non-stationarity
				-> The series is trend stationary.
				Trend needs to be removed to make series strict stationary.
				The detrended series is checked for stationarity.
		Case 4: KPSS indicates non-stationarity and ADF indicates stationarity
				-> The series is difference stationary.
				Differencing is to be used to make series stationary.
				The differenced series is checked for stationarity.
	  %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
	  
	  La valeur p correspond à un score de probabilité sur lequel nous pouvons
	  décider de rejeter où ou non. Si p est inférieur à un critère alpha pré-
	  -défini (typiquement 0.05), nous rejetons H0
	  
	  La statistique du test (ADF/KPSS) est une grandeur basée sur une formule
	  Pour rejeter H0, la valeur de la statistique doit être plus grande que 
	  la valeur critique considérée (et cela se reflète dans la faible valeur
	  de p)
	"""
    reg_values_adf = ['nc', 'c', 'ct', 'ctt']
    reg_values_kpss = ['c', 'ct']

    adf_results = []
    kpss_results = []

    ADF_seuil = '1%'
    KPSS_seuil = '10%'
    alpha_ADF = 1e-4
    alpha_KPSS = 1e-4  # /!\ p-value uniquement dans intervalle [0.01,0.1]

    for reg_val in reg_values_adf:
        adf_result = adfuller(serie, regression=reg_val)
        print("ADF__:", adf_result)
        adf_results.append([adf_result[0], adf_result[1], adf_result[4]])
    for reg_val in reg_values_kpss:
        kpss_result = kpss(serie, regression=reg_val, nlags='auto')
        print("KPSS__:", kpss_result)
        kpss_results.append([kpss_result[0], kpss_result[1], kpss_result[3]])

    stat_adf = False
    stat_kpss = False
    p_val_adf = False
    p_val_kpss = False

    for adf_elem in adf_results:
        if adf_elem[0] < adf_elem[2][ADF_seuil]:
            stat_adf = True
        if adf_elem[1] < alpha_ADF:
            p_val_adf = True
    for kpss_elem in kpss_results:
        if kpss_elem[0] < kpss_elem[2][KPSS_seuil]:
            stat_kpss = True
        if kpss_elem[1] > alpha_KPSS:
            p_val_kpss = True

    if stat_adf and stat_kpss and p_val_adf and p_val_kpss:
        print("[ADF] + [KPSS]: TS est stationnaire.")
        return ([0])
    if not (stat_adf and p_val_adf) and not (stat_kpss and p_val_kpss):
        print("[ADF] + [KPSS]: TS n'est pas stationnaire.")
        return ([-1])
    if (stat_adf and p_val_adf) and (not (stat_kpss and p_val_kpss)):
        print(
            "[ADF]: TS stationnaire\n[KPSS] non stationnaire\n  --> Stationnaire apres differenciation"
        )
        return ([1])
    if (not (stat_adf and p_val_adf)) and (stat_kpss and p_val_kpss):
        print(
            "[KPSS]: TS stationnaire\n[ADF]: TS non stationnaire\n  --> Staionnaire avec tendance."
        )
        return ([1])
    def test_teststat(self):
        kpss_stat, pval, lags, crits = kpss(self.x, 'c', 3)
        assert_almost_equal(kpss_stat, 5.0169, DECIMAL_3)

        kpss_stat, pval, lags, crits = kpss(self.x, 'ct', 3)
        assert_almost_equal(kpss_stat, 1.1828, DECIMAL_3)
Beispiel #13
0
 def test_unknown_lags(self):
     # Test legacy lags are the same
     with pytest.raises(ValueError):
         kpss(self.x, 'c', nlags='unknown')
    def test_fail_nonvector_input(self):
        kpss(self.x)  # should be fine

        x = np.random.rand(20, 2)
        assert_raises(ValueError, kpss, x)
 def test_legacy_lags(self):
     # Test legacy lags are the same
     with warnings.catch_warnings(record=True):
         lags = kpss(self.x, 'c', lags='legacy')[2]
     assert_equal(lags, 15)
Beispiel #16
0
split = round(s1.shape[0]/2)

X1 = s1[0:int(split)]
X2 = s1[int(split):]

mean1, mean2 = X1.mean(), X2.mean()
var1, var2 = X1.var(), X2.var()
print('mean1=%f, mean2=%f' % (mean1, mean2))
print('variance1=%f, variance2=%f' % (var1, var2))

# to check the stationarity
result = adfuller(s2['replace'], autolag="AIC")
print(f'ADF Statistic: {result[0]}')
print(f'p-value: {result[1]}')

kpsstest = kpss(p1.Sales, regression='c')
kpss_output = pd.Series(kpsstest[0:3], index=['Test Statistic', 'p-value', 'Lags Used'])
kpss_output

# This is used to find the value of d
s1 = s1-s1.shift(1)

#To check the stationarity
result = adfuller(s1.dropna(), autolag='AIC')
print(f'ADF Statistic: {result[0]}')
print(f'p-value: {result[1]}')

kpsstest = kpss(s1.dropna(), regression='c')
kpss_output = pd.Series(kpsstest[0:3], index=['Test Statistic', 'p-value', 'Lags Used'])
kpss_output
def unitroot(
    other_args: List[str],
    residuals: List[float],
):
    """Unit root test / stationarity (ADF, KPSS)

    Parameters
    ----------
    other_args : str
        Command line arguments to be processed with argparse
    residuals : List[float]
        Residuals data
    """
    parser = argparse.ArgumentParser(
        add_help=False,
        prog="arch",
        description="""
            Unit root test / stationarity (ADF, KPSS)
        """,
    )
    try:
        ns_parser = parse_known_args_and_warn(parser, other_args)
        if not ns_parser:
            return

        # The Augmented Dickey-Fuller test
        # Used to test for a unit root in a univariate process in the presence of serial correlation.
        # regression{‘c’,’ct’,’ctt’,’nc’} 'c' - Constant and 't'-trend order to include in regression
        # Note: 'ct' - The data is stationary around a trend
        result = adfuller(residuals, regression="c")
        print("Augmented Dickey Fuller Test")
        print("ADF Statistic: %.4f" % result[0])
        print("p-value: %.4f" % result[1])
        print("Used lags: %d" % result[2])
        print("Num obs: %d" % result[3])
        print("Critical Values:")
        d = OrderedDict(sorted(result[4].items(), key=lambda t: t[1]))
        for key, value in d.items():
            print(f"\t{key}: {value:.3f}")
        print("")

        # Kwiatkowski-Phillips-Schmidt-Shin test
        # Test for level or trend stationarity
        # Note: regressionstr{‘c’, ‘ct’}
        # regressionstr{‘c’, ‘ct’} where:
        # ‘c’  : The data is stationary around a constant (default).
        # ‘ct’ : The data is stationary around a trend.
        # lags{None, ‘auto’, ‘legacy’}
        # see: https://www.statsmodels.org/dev/generated/statsmodels.tsa.stattools.kpss.html
        print("Kwiatkowski-Phillips-Schmidt-Shin Test")
        result = kpss(residuals, regression="c", nlags="auto")
        print("KPSS Statistic: %.4f" % result[0])
        print("Critical Values:")
        d = OrderedDict(
            sorted(result[3].items(), key=lambda t: t[1], reverse=True))
        for key, value in d.items():
            print(f"\t{key}: {value:.3f}")
        print("")

    except Exception as e:
        print(e, "\n")
        return
Beispiel #18
0
                        )
                        st.write(f"ADF Statistic: {result[0]}")
                        st.write(f"p-value: {result[1]}")
                        st.write(f"n-lags: {result[2]}")
                        st.write(f"observations: {result[3]}")
                        st.write("Critical Values:")
                        for key, value in result[4].items():
                            st.write(f"{key}: {value}")
                        if result[1] <= 0.05:
                            st.info("Hypothesis Rejected")
                        else:
                            st.warning("Test Inconclusive")
                with test_2:
                    with st.beta_expander("KPSS Test (Stationary Test)"):
                        regression = st.radio("regression", ["c", "ct"])
                        result = kpss(df.values, regression=regression)

                        st.write(
                            "<p style='color:green;'>H<sub>0</sub>: Stationary</p>",
                            unsafe_allow_html=True,
                        )
                        st.write(f"KPSS Statistic: {result[0]}")
                        st.write(f"p-value: {result[1]}")
                        st.write(f"n-lags: {result[2]}")
                        st.write("Critical Values:")
                        for key, value in result[3].items():
                            st.write(f"{key}: {value}")
                        if result[1] <= 0.05:
                            st.info("Hypothesis Rejected")
                        else:
                            st.warning("Test Inconclusive")
Beispiel #19
0
                with open(path + "/" + folder + "/" + filename, "r") as input:
                    array = []
                    for line in input:
                        array.append(int(line))

                file.write(
                    "minimum: " + str(min(array)) + "\n"
                )  # compute all the metrics: min,max,mean,variance,std.dev,kpss,bds,hurst exponent
                file.write("maximum: " + str(max(array)) + "\n")
                file.write("mean: " + str(numpy.mean(array)) + "\n")
                file.write("variance: " + str(numpy.var(array)) + "\n")
                file.write("standard deviation: " +
                           str(statistics.stdev(array)) + "\n")

                kpss_stat, p_value, lags, crit = stat.kpss(
                    array
                )  # https://www.statsmodels.org/dev/generated/statsmodels.tsa.stattools.kpss.html
                file.write("KPSS: " + str(kpss_stat) + "\n")

                start = 0
                end = 200
                max_bds = 0
                while (end <= len(array)
                       ):  # sliding window with fixed size (200 elements)
                    bds_stat, pvalue = stat.bds(
                        array[start:end]
                    )  # https://www.statsmodels.org/dev/generated/statsmodels.tsa.stattools.bds.html
                    if (bds_stat > max_bds):
                        max_bds = bds_stat
                    start += 1
                    end += 1
    def test_pval(self):
        kpss_stat, pval, lags, crits = kpss(self.x, 'c', 3)
        assert_equal(pval, 0.01)

        kpss_stat, pval, lags, crits = kpss(self.x, 'ct', 3)
        assert_equal(pval, 0.01)
Beispiel #21
0
 def test_legacy_lags(self):
     # Test legacy lags are the same
     with pytest.warns(InterpolationWarning):
         res = kpss(self.x, 'c', nlags='legacy')
     assert_equal(res[2], 15)
    def test_store(self):
        kpss_stat, pval, crit, store = kpss(self.x, 'c', 3, True)

        # assert attributes, and make sure they're correct
        assert_equal(store.nobs, len(self.x))
        assert_equal(store.lags, 3)
Beispiel #23
0
 def test_deprecation(self):
     with pytest.warns(FutureWarning):
         kpss(self.x, 'c')
 def test_lags(self):
     kpss_stat, pval, lags, crits = kpss(self.x, 'c')
     assert_equal(lags, int(np.ceil(12. * np.power(len(self.x) / 100., 1 / 4.))))
Beispiel #25
0
# xTrain = np.c_[xTrain, condData]
# condData = nTrain['conductivity'].values[y.size/2-50: y.size-50]
# xTest = np.c_[xTest, condData]

# y1 = sklearn.preprocessing.normalize([nTrain['nitrateMg'].values], norm='l1').ravel()
# y2 = sklearn.preprocessing.normalize([ nTrain['conductivity'].values], norm='l1').ravel()
# plt.plot(nTrain['index'].values, y1, label='nitrate')
# plt.plot(nTrain['index'].values,y2, label='condictivity')
# plt.plot(nTrain['index'].values, y2-y1, label='residuals')
# plt.legend()
# print(y1.ravel())
# print(adfuller(y2-y1))
a, conductivity = remove_missing_values(index, conductivity)
print(conductivity[0:10])
print(adfuller(nitrateMg))
print(kpss(nitrateMg))

# RANSAC_regresssion(xTrain, xTest, X1, yTrain, X2, yTest, "with conductivity")

# Evaluating whether each method makes sense
# if no relationship, no include
# if high corrolation then include
# if dtw distance is lower than corrolation then include some history
# dwtD = get_dtw_d(nTrain['nitrateMg'], nTrain['conductivity'])
# pcc = pearsonr(nTrain['nitrateMg'], nTrain['conductivity'])
# print("Conductivity and nitrate")
# print("DWT distance normalised: " + str(dwtD/2))
# print("Preason cc: " + str(pcc[0]))

# print("-----")
# dwtD = get_dtw_d(nTrain['nitrateMg'], nTrain['n'])
Beispiel #26
0
from statsmodels.tsa.stattools import adfuller, kpss
# df = pd.read_csv('https://raw.githubusercontent.com/selva86/datasets/master/a10.csv', parse_dates=['date'])
# print('df_input_values:',df_input.values)
# # ADF Test
for classification in ['PA','PB','PC','PD','PE','PF','PG']:
    result = adfuller(df_input[classification], autolag='AIC')
    print(f'ADF Statistic {classification}: {result[0]}')
    print(f'p-value: {result[1]}')
    for key, value in result[4].items():
        print('Critial Values:')
        print(f'  {key}, {value}')

# # KPSS Test
for classification in ['PA','PB','PC','PD','PE','PF','PG']:
    result = kpss(df_input[classification], regression='c')
    print('\nKPSS Statistic:',classification,' %f' % result[0])
    print('p-value: %f' % result[1])
    for key, value in result[3].items():
        print('Critial Values:')
        print(f'   {key}, {value}')

#Estimating and eliminating trend
for classification in ['PA','PB','PC','PD','PE','PF','PG']:
    ts_log=np.log(df_input[classification])
    print('datatype of ts_log:',type(ts_log))
    print('ts_log: ',ts_log)
    plt.plot(ts_log)
    plt.show(block=False)

# print(df.head())
 def test_legacy_lags(self):
     # Test legacy lags are the same
     with warnings.catch_warnings(record=True):
         lags = kpss(self.x, 'c', lags='legacy')[2]
     assert_equal(lags, 15)
Beispiel #28
0
def main():
    _simulations = load.structured()
    _simulations = filtering.by_time_points_amount(_simulations, _time_points=TIME_POINTS)
    _simulations = filtering.by_categories(
        _simulations,
        _is_single_cell=True,
        _is_heterogeneity=False,
        _is_low_connectivity=False,
        _is_causality=False,
        _is_dominant_passive=False,
        _is_fibrin=False
    )
    print('Total simulations:', len(_simulations))

    _fiber_densities = compute_simulations_fiber_densities(_simulations)

    _kpss_y_arrays = [[] for _i in DERIVATIVES]
    _adf_y_arrays = [[] for _i in DERIVATIVES]
    for _simulation in tqdm(_simulations, desc='Simulations loop'):
        _cell_fiber_densities = \
            [_fiber_densities[(_simulation, _direction)] for _direction in ['left', 'right', 'up', 'down']]
        _cell_fiber_densities = np.mean(_cell_fiber_densities, axis=0)
        for _derivative_index, _derivative in enumerate(DERIVATIVES):
            _cell_fiber_densities_derivative = compute_lib.derivative(_cell_fiber_densities, _n=_derivative)
            with warnings.catch_warnings():
                warnings.simplefilter('ignore', category=InterpolationWarning)
                _, _kpss_p_value, _, _ = kpss(_cell_fiber_densities_derivative, nlags='legacy')
                _kpss_y_arrays[_derivative_index].append(_kpss_p_value)
                _, _adf_p_value, _, _, _, _ = adfuller(_cell_fiber_densities_derivative)
                _adf_y_arrays[_derivative_index].append(_adf_p_value)

    print('Total cells:', len(_kpss_y_arrays[0]))

    # print results
    print('KPSS:')
    for _derivative_index, _derivative in enumerate(DERIVATIVES):
        _stationary_count = len([_value for _value in _kpss_y_arrays[_derivative_index] if _value > 0.05])
        print('Derivative:', _derivative, 'Stationary:',
              str(_stationary_count / len(_kpss_y_arrays[_derivative_index]) * 100) + '%')
    print('ADF:')
    for _derivative_index, _derivative in enumerate(DERIVATIVES):
        _stationary_count = len([_value for _value in _adf_y_arrays[_derivative_index] if _value < 0.05])
        print('Derivative:', _derivative, 'Stationary:',
              str(_stationary_count / len(_adf_y_arrays[_derivative_index]) * 100) + '%')

    # plot
    _colors_array = config.colors(3)
    for _test_name, _y_title, _y_tickvals, _p_value_line, _y_arrays in \
            zip(
                ['kpss', 'adf'],
                ['KPSS test p-value', 'ADF test p-value'],
                [[0.05, 0.1], [0.05, 1]],
                [0.05, 0.05],
                [_kpss_y_arrays, _adf_y_arrays]
            ):
        _fig = go.Figure(
            data=[
                go.Box(
                    y=_y,
                    name=_derivative,
                    boxpoints='all',
                    jitter=1,
                    pointpos=0,
                    line={
                        'width': 1
                    },
                    fillcolor='white',
                    marker={
                        'size': 10,
                        'color': _color
                    },
                    opacity=0.7,
                    showlegend=False
                ) for _y, _derivative, _color in zip(_y_arrays, DERIVATIVES_TEXT, _colors_array)
            ],
            layout={
                'xaxis': {
                    'title': 'Fiber density derivative',
                    'zeroline': False
                },
                'yaxis': {
                    'title': _y_title,
                    'zeroline': False,
                    'tickmode': 'array',
                    'tickvals': _y_tickvals
                },
                'shapes': [
                    {
                        'type': 'line',
                        'x0': DERIVATIVES[0] - 0.75,
                        'y0': _p_value_line,
                        'x1': DERIVATIVES[-1] + 0.75,
                        'y1': _p_value_line,
                        'line': {
                            'color': 'red',
                            'width': 2,
                            'dash': 'dash'
                        }
                    }
                ]
            }
        )

        save.to_html(
            _fig=_fig,
            _path=os.path.join(paths.PLOTS, save.get_module_name()),
            _filename='plot_' + _test_name
        )
 def test_deprecation(self):
     with pytest.deprecated_call():
         kpss(self.x, 'c', lags=None)
Beispiel #30
0
def main(_band=None,
         _high_temporal_resolution=True,
         _tuples_to_mark=None,
         _tuples_to_plot=None,
         _plots=None):
    if _plots is None:
        _plots = ['whiteness', 'granger']

    _experiments = all_experiments()
    _experiments = filtering.by_categories(
        _experiments=_experiments,
        _is_single_cell=False,
        _is_high_temporal_resolution=_high_temporal_resolution,
        _is_bleb=False,
        _is_dead_dead=False,
        _is_live_dead=False,
        _is_bead=False,
        _is_metastasis=False)

    _tuples = load.experiments_groups_as_tuples(_experiments)
    _tuples = filtering.by_time_frames_amount(_tuples,
                                              _time_frames=MINIMUM_TIME_FRAMES)
    _tuples = filtering.by_pair_distance_range(
        _tuples, _distance_range=PAIR_DISTANCE_RANGE)
    _tuples = filtering.by_real_pairs(_tuples)
    _tuples = filtering.by_band(_tuples, _band=_band)
    print('Total tuples:', len(_tuples))

    _arguments = []
    for _tuple in _tuples:
        _experiment, _series_id, _group = _tuple
        _latest_time_frame = compute.latest_time_frame_before_overlapping(
            _experiment, _series_id, _group, OFFSET_X)
        for _cell_id in ['left_cell', 'right_cell']:
            _arguments.append({
                'experiment': _experiment,
                'series_id': _series_id,
                'group': _group,
                'length_x': QUANTIFICATION_WINDOW_LENGTH_IN_CELL_DIAMETER,
                'length_y': QUANTIFICATION_WINDOW_HEIGHT_IN_CELL_DIAMETER,
                'length_z': QUANTIFICATION_WINDOW_WIDTH_IN_CELL_DIAMETER,
                'offset_x': OFFSET_X,
                'offset_y': OFFSET_Y,
                'offset_z': OFFSET_Z,
                'cell_id': _cell_id,
                'direction': 'inside',
                'time_points': _latest_time_frame
            })

    _windows_dictionary, _windows_to_compute = compute.windows(
        _arguments, _keys=['experiment', 'series_id', 'group', 'cell_id'])
    _fiber_densities = compute.fiber_densities(_windows_to_compute,
                                               _subtract_border=True)

    _experiments_fiber_densities = {
        _key:
        [_fiber_densities[_tuple] for _tuple in _windows_dictionary[_key]]
        for _key in _windows_dictionary
    }

    _n_pairs = 0
    _n_pairs_with_band = 0
    _whiteness_p_values = []
    _n_passed_whiteness_with_band = 0
    _granger_causality_p_values = []
    _n_passed_granger_causality_with_band = 0
    _correlations = []
    _time_lag_correlations = []
    _end_fiber_densities = []
    for _tuple in _tuples:
        _experiment, _series_id, _group = _tuple

        _left_cell_fiber_densities = \
            _experiments_fiber_densities[(_experiment, _series_id, _group, 'left_cell')]
        _right_cell_fiber_densities = \
            _experiments_fiber_densities[(_experiment, _series_id, _group, 'right_cell')]

        _properties = load.group_properties(_experiment, _series_id, _group)
        _left_cell_fiber_densities = compute.remove_blacklist(
            _experiment, _series_id, _properties['cells_ids']['left_cell'],
            _left_cell_fiber_densities)
        _right_cell_fiber_densities = compute.remove_blacklist(
            _experiment, _series_id, _properties['cells_ids']['right_cell'],
            _right_cell_fiber_densities)

        _left_cell_fiber_densities_filtered, _right_cell_fiber_densities_filtered = \
            compute.longest_same_indices_shared_in_borders_sub_array(
                _left_cell_fiber_densities, _right_cell_fiber_densities)

        # ignore small arrays
        if len(_left_cell_fiber_densities_filtered) < MINIMUM_TIME_FRAMES:
            continue

        _n_pairs += 1
        if _properties['band']:
            _n_pairs_with_band += 1

        _start_time_frame = 0
        for _left in _left_cell_fiber_densities:
            if _left[0] == _left_cell_fiber_densities_filtered[0]:
                break
            _start_time_frame += 1

        # stationary test
        with warnings.catch_warnings():
            warnings.simplefilter('ignore', category=InterpolationWarning)

            # find derivative for stationary
            for _derivative in range(10):
                _left_cell_fiber_densities_derivative = \
                    compute_lib.derivative(_left_cell_fiber_densities_filtered, _n=_derivative)
                _right_cell_fiber_densities_derivative = \
                    compute_lib.derivative(_right_cell_fiber_densities_filtered, _n=_derivative)

                if ADF_TEST:
                    _, _left_cell_adf_p_value, _, _, _, _ = adfuller(
                        _left_cell_fiber_densities_derivative)
                    _, _right_cell_adf_p_value, _, _, _, _ = adfuller(
                        _right_cell_fiber_densities_derivative)
                    if _left_cell_adf_p_value > 0.05 or _right_cell_adf_p_value > 0.05:
                        continue

                if KPSS_TEST:
                    _, _left_cell_kpss_p_value, _, _ = kpss(
                        _left_cell_fiber_densities_derivative, nlags='legacy')
                    _, _right_cell_kpss_p_value, _, _ = kpss(
                        _right_cell_fiber_densities_derivative, nlags='legacy')
                    if _left_cell_kpss_p_value < 0.05 or _right_cell_kpss_p_value < 0.05:
                        continue

                # stationary
                break

        # causality
        try:
            _x = pd.DataFrame(data=[[_left_value, _right_value]
                                    for _left_value, _right_value in zip(
                                        _left_cell_fiber_densities_derivative,
                                        _right_cell_fiber_densities_derivative)
                                    ],
                              columns=['left', 'right'])

            # var model to retrieve lag
            _var_model = VAR(_x)
            _lag_order_results = _var_model.select_order()
            _estimators_lags = [
                _lag_order_results.aic, _lag_order_results.bic,
                _lag_order_results.fpe, _lag_order_results.hqic
            ]
            _min_estimator_lag = min(_estimators_lags)

            # found a lag
            if 0 < _min_estimator_lag <= MAXIMUM_LAG:
                _var_model_results = _var_model.fit(maxlags=_min_estimator_lag,
                                                    ic=None)

                _whiteness = _var_model_results.test_whiteness(
                    nlags=_min_estimator_lag + 1)
                _whiteness_p_values.append(_whiteness.pvalue)

                if _tuples_to_mark is not None and _tuple in _tuples_to_mark and _whiteness.pvalue > 0.05:
                    print(_tuple, 'marked whiteness p-value:',
                          _whiteness.pvalue)

                # no autocorrelation in the residuals
                if _whiteness.pvalue > 0.05:
                    if _properties['band']:
                        _n_passed_whiteness_with_band += 1

                    # time lag = 0
                    _correlation = compute_lib.correlation(
                        _left_cell_fiber_densities_derivative,
                        _right_cell_fiber_densities_derivative)

                    # if _correlation < 0.5:
                    #     continue

                    # granger causality
                    for _caused, _causing in zip(['left', 'right'],
                                                 ['right', 'left']):
                        _granger = _var_model_results.test_causality(
                            caused=_caused, causing=_causing)
                        _granger_causality_p_values.append(_granger.pvalue)

                        # time lag = 0
                        _correlations.append(_correlation)

                        # time lag = min estimator
                        if _causing == 'left':
                            _left_fiber_densities_time_lag = \
                                _left_cell_fiber_densities_derivative[:-_min_estimator_lag]
                            _right_fiber_densities_time_lag = \
                                _right_cell_fiber_densities_derivative[_min_estimator_lag:]
                        else:
                            _left_fiber_densities_time_lag = \
                                _left_cell_fiber_densities_derivative[_min_estimator_lag:]
                            _right_fiber_densities_time_lag = \
                                _right_cell_fiber_densities_derivative[:-_min_estimator_lag]
                        _time_lag_correlation = compute_lib.correlation(
                            _left_fiber_densities_time_lag,
                            _right_fiber_densities_time_lag)
                        _time_lag_correlations.append(_time_lag_correlation)

                        # end fiber density
                        _time_frame = compute.density_time_frame(_experiment)
                        if len(_left_cell_fiber_densities_filtered
                               ) > _time_frame:
                            _end_fiber_density = \
                                (_left_cell_fiber_densities_filtered[_time_frame] +
                                 _right_cell_fiber_densities_filtered[_time_frame]) / 2
                        else:
                            _end_fiber_density = \
                                (_left_cell_fiber_densities_filtered[-1] +
                                 _right_cell_fiber_densities_filtered[-1]) / 2
                        _normalization = load.normalization_series_file_data(
                            _experiment, _series_id)
                        _normalized_fiber_density = compute_lib.z_score(
                            _end_fiber_density, _normalization['average'],
                            _normalization['std'])
                        _end_fiber_densities.append(_normalized_fiber_density)

                        # marking
                        if _tuples_to_mark is not None and _tuple in _tuples_to_mark and _granger.pvalue < 0.05:
                            print(_tuple, 'causing:', _causing,
                                  'marked granger p-value:', _granger.pvalue)

                        if _granger.pvalue < 0.05:
                            if _properties['band']:
                                _n_passed_granger_causality_with_band += 1

                            _normality = _var_model_results.test_normality()
                            _inst_granger = _var_model_results.test_inst_causality(
                                causing=_causing)

                            print(
                                _tuple,
                                _causing.capitalize() + ' causes ' + _caused +
                                '!',
                                'time-points: ' +
                                str(len(
                                    _left_cell_fiber_densities_derivative)),
                                'stationary derivative: ' + str(_derivative),
                                'band:' + str(_properties['band']),
                                'p-value: ' + str(round(_granger.pvalue, 4)),
                                'lag: ' + str(_min_estimator_lag),
                                'normality p-value: ' +
                                str(round(_normality.pvalue, 4)),
                                'inst p-value: ' +
                                str(round(_inst_granger.pvalue, 4)),
                                sep='\t')

                            # lag = 0
                            print('Time lag = 0 correlation:', _correlation)

                            # rest of lags
                            for _lag in range(1, _min_estimator_lag + 1):
                                if _causing == 'left':
                                    _left_fiber_densities_time_lag = _left_cell_fiber_densities_derivative[:
                                                                                                           -_lag]
                                    _right_fiber_densities_time_lag = _right_cell_fiber_densities_derivative[
                                        _lag:]
                                else:
                                    _left_fiber_densities_time_lag = _left_cell_fiber_densities_derivative[
                                        _lag:]
                                    _right_fiber_densities_time_lag = _right_cell_fiber_densities_derivative[:
                                                                                                             -_lag]

                                _correlation = compute_lib.correlation(
                                    _left_fiber_densities_time_lag,
                                    _right_fiber_densities_time_lag)
                                print(
                                    'Time lag = ' + str(_lag) +
                                    ' correlation:', _correlation)

                            # plots
                            if _tuples_to_plot is not None and _tuple in _tuples_to_plot:
                                _y_arrays = [
                                    _left_cell_fiber_densities_derivative,
                                    _right_cell_fiber_densities_derivative
                                ]
                                _names_array = ['Left cell', 'Right cell']
                                _colors_array = config.colors(2)
                                _temporal_resolution = compute.temporal_resolution_in_minutes(
                                    _experiment)
                                _fig = go.Figure(data=[
                                    go.Scatter(x=np.arange(
                                        start=_start_time_frame,
                                        stop=_start_time_frame +
                                        len(_left_cell_fiber_densities_derivative
                                            ),
                                        step=1) * _temporal_resolution,
                                               y=_y,
                                               name=_name,
                                               mode='lines',
                                               line={
                                                   'color': _color,
                                                   'width': 1
                                               })
                                    for _y, _name, _color in zip(
                                        _y_arrays, _names_array, _colors_array)
                                ],
                                                 layout={
                                                     'xaxis': {
                                                         'title':
                                                         'Time (minutes)',
                                                         'zeroline': False
                                                     },
                                                     'yaxis': {
                                                         'title':
                                                         'Fiber density (z-score)'
                                                         + '\'' * _derivative,
                                                         'zeroline':
                                                         False
                                                     },
                                                     'legend': {
                                                         'xanchor': 'left',
                                                         'x': 0.1,
                                                         'yanchor': 'top',
                                                         'bordercolor':
                                                         'black',
                                                         'borderwidth': 2,
                                                         'bgcolor': 'white'
                                                     },
                                                 })

                                _experiment, _series_id, _group = _tuple
                                save.to_html(
                                    _fig=_fig,
                                    _path=os.path.join(paths.PLOTS,
                                                       save.get_module_name()),
                                    _filename='plot_' + _experiment + '_' +
                                    str(_series_id) + '_' + _group)

                                # residuals
                                _y_arrays = \
                                    [_var_model_results.resid.values[:, 0], _var_model_results.resid.values[:, 1]]
                                _fig = go.Figure(data=[
                                    go.Scatter(x=np.arange(
                                        start=_start_time_frame,
                                        stop=_start_time_frame + len(_y),
                                        step=1) * _temporal_resolution,
                                               y=_y,
                                               name=_name,
                                               mode='lines',
                                               line={
                                                   'color': _color,
                                                   'width': 1
                                               })
                                    for _y, _name, _color in zip(
                                        _y_arrays, _names_array, _colors_array)
                                ],
                                                 layout={
                                                     'xaxis': {
                                                         'title':
                                                         'Time (minutes)',
                                                         'zeroline': False
                                                     },
                                                     'yaxis': {
                                                         'title': 'Residual',
                                                         'zeroline': False
                                                     },
                                                     'legend': {
                                                         'xanchor': 'left',
                                                         'x': 0.1,
                                                         'yanchor': 'top',
                                                         'bordercolor':
                                                         'black',
                                                         'borderwidth': 2,
                                                         'bgcolor': 'white'
                                                     },
                                                 })

                                _experiment, _series_id, _group = _tuple
                                save.to_html(
                                    _fig=_fig,
                                    _path=os.path.join(paths.PLOTS,
                                                       save.get_module_name()),
                                    _filename='plot_residuals_' + _experiment +
                                    '_' + str(_series_id) + '_' + _group)

        # not enough time points
        except ValueError:
            continue

    print('Total pairs:', _n_pairs)
    print('Total pairs with band:', _n_pairs_with_band)
    print('Total pairs passed whiteness:',
          (np.array(_whiteness_p_values) > 0.05).sum())
    print('Total pairs passed whiteness with band:',
          _n_passed_whiteness_with_band)
    print('Total cells passed granger causality:',
          (np.array(_granger_causality_p_values) < 0.05).sum())
    print('Total cells passed granger causality with band:',
          _n_passed_granger_causality_with_band)

    # p-value correction
    print('Corrections of GC p-value < 0.05:')
    _granger_causality_p_values_corrected = multipletests(
        pvals=_granger_causality_p_values, method='fdr_bh')
    for _p_value, _p_value_corrected in zip(
            _granger_causality_p_values,
            _granger_causality_p_values_corrected[1]):
        if _p_value < 0.05:
            print('Original GC p-value:', _p_value, 'corrected:',
                  _p_value_corrected)

    # plots
    for _test_name, _y_title, _y_array in \
            zip(
                ['whiteness', 'granger'],
                ['Whiteness p-value', 'Granger causality p-value'],
                [_whiteness_p_values, _granger_causality_p_values]
            ):
        if _test_name in _plots:
            _fig = go.Figure(data=go.Box(y=_y_array,
                                         boxpoints='all',
                                         jitter=1,
                                         pointpos=0,
                                         line={'width': 1},
                                         fillcolor='white',
                                         marker={
                                             'size': 10,
                                             'color': '#ea8500'
                                         },
                                         opacity=0.7,
                                         showlegend=False),
                             layout={
                                 'xaxis': {
                                     'zeroline': False
                                 },
                                 'yaxis': {
                                     'title': _y_title,
                                     'zeroline': False,
                                     'range': [-0.1, 1.1],
                                     'tickmode': 'array',
                                     'tickvals': [0.05, 1]
                                 },
                                 'shapes': [{
                                     'type': 'line',
                                     'x0': -0.75,
                                     'y0': 0.05,
                                     'x1': 0.75,
                                     'y1': 0.05,
                                     'line': {
                                         'color': 'red',
                                         'width': 2,
                                         'dash': 'dash'
                                     }
                                 }]
                             })

            save.to_html(_fig=_fig,
                         _path=os.path.join(paths.PLOTS,
                                            save.get_module_name()),
                         _filename='plot_' + _test_name)

    # granger versus correlation
    print(
        'GC vs. correlation pearson correlation:',
        compute_lib.correlation(_granger_causality_p_values,
                                _correlations,
                                _with_p_value=True))
    _fig = go.Figure(data=go.Scatter(x=_granger_causality_p_values,
                                     y=_correlations,
                                     mode='markers',
                                     marker={
                                         'size': 10,
                                         'color': '#ea8500'
                                     },
                                     showlegend=False),
                     layout={
                         'xaxis': {
                             'title': 'Granger causality p-value',
                             'zeroline': False,
                         },
                         'yaxis': {
                             'title': 'Inner correlation',
                             'zeroline': False,
                         }
                     })

    save.to_html(_fig=_fig,
                 _path=os.path.join(paths.PLOTS, save.get_module_name()),
                 _filename='plot_gc_vs_correlation')

    # granger versus time lag correlation
    print(
        'GC vs. time lag correlation pearson correlation:',
        compute_lib.correlation(_granger_causality_p_values,
                                _time_lag_correlations,
                                _with_p_value=True))
    _fig = go.Figure(data=go.Scatter(x=_granger_causality_p_values,
                                     y=_time_lag_correlations,
                                     mode='markers',
                                     marker={
                                         'size': 10,
                                         'color': '#ea8500'
                                     },
                                     showlegend=False),
                     layout={
                         'xaxis': {
                             'title': 'Granger causality p-value',
                             'zeroline': False,
                         },
                         'yaxis': {
                             'title': 'GC lag inner correlation',
                             'zeroline': False,
                         }
                     })

    save.to_html(_fig=_fig,
                 _path=os.path.join(paths.PLOTS, save.get_module_name()),
                 _filename='plot_gc_vs_time_lag_correlation')

    # granger versus end fiber density
    print(
        'GC vs. end fiber density pearson correlation:',
        compute_lib.correlation(_granger_causality_p_values,
                                _end_fiber_densities,
                                _with_p_value=True))
    _fig = go.Figure(data=go.Scatter(x=_granger_causality_p_values,
                                     y=_end_fiber_densities,
                                     mode='markers',
                                     marker={
                                         'size': 10,
                                         'color': '#ea8500'
                                     },
                                     showlegend=False),
                     layout={
                         'xaxis': {
                             'title': 'Granger causality p-value',
                             'zeroline': False,
                         },
                         'yaxis': {
                             'title': 'End fiber density (z-score)',
                             'zeroline': False,
                         }
                     })

    save.to_html(_fig=_fig,
                 _path=os.path.join(paths.PLOTS, save.get_module_name()),
                 _filename='plot_gc_vs_end_density')
Beispiel #31
0
df1 = df1.set_index(df1['application_date'])
df2 = df2.set_index(df2['application_date'])

#Grouping the data as per the requirement:
df1 = df1.groupby('segment').resample('D').case_count.sum()
df2 = df2.groupby('segment').resample('D').case_count.sum()

#Checking if the given series is stationary for segement 1:
df1 = pd.DataFrame(df1)
rol_mean = df1['case_count'].rolling(30).mean()
X = df1['case_count'].values
x_adf = adfuller(
    X
)  #From ADFuller test(test static < critical value, reject H0). Series is stationary
x_kpss = kpss(
    X
)  #From KPSS test(test static > critical value, reject H0). Series is non stationary
#print(x_adf)
#print(x_kpss)

X_1 = df1.diff(1).dropna()
rol_mean1 = X_1['case_count'].rolling(30).mean()
x_1_adf = adfuller(X_1['case_count'].values)
x_1_kpss = kpss(X_1['case_count'].values)
#print(x_1_adf)
#print(x_1_kpss)

#Plotting to check for stationarity:
fig, ax = plt.subplots(2, 2)
ax[0, 0].plot(df1['case_count'].tolist())
ax[0, 0].plot(rol_mean.tolist(), color='red')
 def test_deprecation(self):
     with pytest.deprecated_call():
         kpss(self.x, 'c', lags=None)
Beispiel #33
0
plt.show()

# Coefficients values for lag>5 are statistically not significant and their impact on the model is minimal, except a few spikes at 8,11,22 and beyond.

# <a id="subsection-four"></a>
# # KPSS Test
#
# The KPSS test, short for, Kwiatkowski-Phillips-Schmidt-Shin (KPSS), is a type of Unit root test that tests for the stationarity of a given series around a deterministic trend.
#
# Here, the null hypothesis is that the series is **stationary**.
#
# That is, if p-value is < signif level (say 0.05), then the series is non-stationary and vice versa.

# In[31]:

stats, p, lags, critical_values = kpss(series, 'ct')

# In[32]:

print(f'Test Statistics : {stats}')
print(f'p-value : {p}')
print(f'Critical Values : {critical_values}')

if p < 0.05:
    print('Series is not Stationary')
else:
    print('Series is Stationary')

# # Interpreting KPSS test results
#
# The output of the KPSS test contains 4 things:
Beispiel #34
0
    def test_fail_nonvector_input(self):
        with warnings.catch_warnings(record=True) as w:
            kpss(self.x)  # should be fine

        x = np.random.rand(20, 2)
        assert_raises(ValueError, kpss, x)
 def test_lags(self):
     with warnings.catch_warnings(record=True) as w:
         kpss_stat, pval, lags, crits = kpss(self.x, 'c')
     assert_equal(lags, int(np.ceil(12. * np.power(len(self.x) / 100., 1 / 4.))))
Beispiel #36
0
def simple_auto_stationarize(
    df,
    verbosity=None,
    alpha=None,
    multitest=None,
    get_conclusions=False,
    get_actions=False,
):
    """Auto-stationarize the given time-series dataframe.

    Parameters
    ----------
    df : pandas.DataFrame
        A dataframe composed solely of numeric columns.
    verbosity : int, logging.Logger, optional
        If an int is given, it is interpreted as the logging lever to use. See
        https://docs.python.org/3/library/logging.html#levels for details. If a
        logging.Logger object is given, it is used for printing instead, with
        appropriate logging levels. If no value is provided, the default
        logging.Logger behaviour is used.
    alpha : int, optional
        Family-wise error rate (FWER) or false discovery rate (FDR), depending
        on the method used for multiple hypothesis testing error control. If no
        value is provided, a default value of 0.05 (5%) is used.
    multitest : str, optional
        The multiple hypothesis testing eror control method to use. If no value
        is provided, the Benjamini–Yekutieli is used. See
        `the documesimple_auto_stationarizentation of statsmodels' multipletests method for supported values <https://www.statsmodels.org/dev/generated/statsmodels.stats.multitest.multipletests.html>`.
    get_conclusions : bool, defaults to False
        If set to true, a conclusions dict is returned.
    get_actions : bool, defaults to False
        If set to true, an actions dict is returned.

    Returns
    -------
    results : pandas.DataFrame or dict
        By default, only he transformed dataframe is returned. However, if
        get_conclusions or get_actions are set to True, a dict is returned
        instead, with the following mappings:
        - `postdf` - Maps to the transformed dataframe.
        - `conclusions` - Maps to a dict mapping each column name to the
          arrived conclusion regarding its stationarity.
        - `actions` - Maps to a dict mapping each column name to the
          transformations performed on it to stationarize it.
    """  # noqa: E501
    if verbosity is not None:
        prev_verbosity = set_verbosity_level(verbosity)
    if alpha is None:
        alpha = DEF_ALPHA

    logger = get_logger()
    logger.info("Starting to auto-stationarize a dataframe!")
    logger.info("Starting to check input data validity...")
    logger.info(f"Data shape (time, variables) is {df.shape}.")
    # the first axis - rows - is expected to represent the time dimension,
    # while the second axis - columns - is expected to represent variables;
    # thus, the first expected to be much longer than the second
    logger.info(
        "Checking current data orientation (rows=time, columns=variables)...")
    if df.shape[1] >= df.shape[0]:
        logger.warning(
            ("stationarizer's input dataframe has more columns than rows! "
             "Columns are expected to represent variables, while rows "
             "represent time steps, and thus the input dataframe is "
             "expected to have more rows than columns. Either the input "
             "data is inverted, or the data has far more variables than "
             "samples."))
    else:
        logger.info("Data orientation is valid.")
    # assert all columns are numeric
    all_cols_numeric = all([np.issubdtype(x, np.number) for x in df.dtypes])
    if not all_cols_numeric:
        err = ValueError(
            "All columns of stationarizer's input dataframe must be numeric!")
        logger.exception(err)

    # util var
    n = len(df.columns)

    # testing for unit root
    logger.info(("Checking for the presence of a unit root in the input time "
                 "series using the Augmented Dicky-Fuller test"))
    logger.info(
        ("Reminder:\n "
         "Null Hypothesis: The series has a unit root (value of a=1); "
         "meaning, it is NOT stationary.\n"
         "Alternate Hypothesis: The series has no unit root; it is either "
         "stationary or non-stationary of a different model than unit root."))
    adf_results = []
    for colname in df.columns:
        srs = df[colname]
        result = adfuller(srs, regression="ct")
        logger.info(
            (f"{colname}: test statistic={result[0]}, p-val={result[1]}."))
        adf_results.append(result)

    # testing for trend stationarity
    logger.info((
        "Testing for trend stationarity of input series using the KPSS test."))
    logger.info(("Reminder:\n"
                 "Null Hypothesis (H0): The series is trend-stationarity.\n"
                 "Alternative Hypothesis (H1): The series has a unit root."))
    kpss_results = []
    for colname in df.columns:
        srs = df[colname]
        result = kpss(srs, regression="ct")
        logger.info(
            (f"{colname}: test statistic={result[0]}, p-val={result[1]}."))
        kpss_results.append(result)

    # Controling FDR
    logger.info(
        ("Controling the False Discovery Rate (FDR) using the Benjamini-"
         f"Yekutieli procedure with α={DEF_ALPHA}."))
    adf_pvals = [x[1] for x in adf_results]
    kpss_pvals = [x[1] for x in kpss_results]
    pvals = adf_pvals + kpss_pvals
    by_res = multipletests(pvals=pvals,
                           alpha=alpha,
                           method="fdr_by",
                           is_sorted=False)
    reject = by_res[0]
    corrected_pvals = by_res[1]
    adf_rejections = reject[:n]
    kpss_rejections = reject[n:]
    adf_corrected_pvals = corrected_pvals[:n]  # noqa: F841
    kpss_corrected_pvals = corrected_pvals[n:]  # noqa: F841
    conclusion_counts = {}

    def dict_inc(dicti, key):
        try:
            dicti[key] += 1
        except KeyError:
            dicti[key] = 1

    # interpret results
    logger.info("Interpreting test results after FDR control...")
    conclusions = {}
    actions = {}
    for i, colname in enumerate(df.columns):
        conclusion = conclude_adf_and_kpss_results(
            adf_reject=adf_rejections[i], kpss_reject=kpss_rejections[i])
        dict_inc(conclusion_counts, conclusion)
        trans = CONCLUSION_TO_TRANSFORMATIONS[conclusion]
        conclusions[colname] = conclusion
        actions[colname] = trans
        logger.info((f"--{colname}--\n "
                     f"ADF corrected p-val: {adf_corrected_pvals[i]}, "
                     f"H0 rejected: {adf_rejections[i]}.\n"
                     f"KPSS corrected p-val: {kpss_corrected_pvals[i]}, "
                     f"H0 rejected: {kpss_rejections[i]}.\n"
                     f"Conclusion: {conclusion}\n Transformations: {trans}."))

    # making non-stationary series stationary!
    logger.info((f"Pre-transformation shape: {df.shape}, "
                 f"#NA: {df.isna().sum().sum()}"))
    post_cols = {}
    logger.info("Applying transformations...")
    for colname in df.columns:
        srs = df[colname]
        if Transformation.DETREND in actions[colname]:
            logger.info(f"Detrending {colname} (len={len(srs)}).")
            srs = detrend(srs, order=1, axis=0)
            logger.debug(f"# NaN after detrending: {np.isnan(srs).sum()}")
        if Transformation.DIFFRENTIATE in actions[colname]:
            logger.info(f"Diffrentiating {colname} (len={len(srs)}).")
            srs = diff(srs, k_diff=1)
            logger.debug(f"# NaN after diffrencing: {np.isnan(srs).sum()}")
        post_cols[colname] = srs
        logger.info(f"{colname} transformed (len={len(post_cols[colname])}).")

    # equalizing lengths
    min_len = min([len(post_cols[x]) for x in post_cols])
    logger.info(f"Min length to trim to: {min_len}")
    trimmed_cols = {}
    for colname in df.columns:
        col = post_cols[colname][:min_len].values
        trimmed_cols[colname] = col
        logger.debug(
            f"#NA trimmed {colname} (len={len(col)}): {np.isnan(col).sum()}")
    postdf = pd.DataFrame.from_dict(trimmed_cols)
    # postdf = postdf[:min_len]
    logger.debug(f"trimmed df shape: {postdf.shape}")
    postdf.index = df.index.copy()[:min_len]
    # postdf = df.copy()
    # postdf = postdf.iloc[:min_len]
    # for colname in df.columns:
    #     postdf[colname] = post_cols[colname]
    logger.info(f"Post trimming shape: {postdf.shape}")

    # checking for NaNs
    nan_count = postdf.isna().sum().sum()
    if nan_count > 0:
        nan_rows = postdf[postdf.isna().any(axis=1)]
        logger.debug(f"Post trimming NaN count: {nan_count}")
        logger.debug(f"Rows with Nan values:\n {nan_rows}")

    for k in conclusion_counts:
        count = conclusion_counts[k]
        ratio = 100 * (count / len(df.columns))
        logger.info(f"{count} series ({ratio}%) found with conclusion: {k}.")

    if verbosity is not None:
        set_verbosity_level(prev_verbosity)

    if not get_actions and not get_conclusions:
        return postdf
    results = {"postdf": postdf}
    if get_conclusions:
        results["conclusions"] = conclusions
    if get_actions:
        results["actions"] = actions
    return results