def get_VAR_noise_matrix(signals, olag):
    from statsmodels.tools.tools import add_constant
    from statsmodels.regression.linear_model import OLS
    from statsmodels.tsa.tsatools import lagmat, lagmat2ds

    T = signals.shape[0]
    num_signals = signals.shape[1]

    # Now we can compute the VAR model with the computed order :
    VAR_resid = np.zeros((T - olag, num_signals))
    VAR_model = {}

    for k in range(0, num_signals):
        # Permuting columns to compute VAR :
        signals = np.concatenate((signals[:, k:], signals[:, 0:k]), axis=1)

        if k == num_signals:
            break

        data = lagmat2ds(signals, olag, trim='both', dropex=1)
        datajoint = add_constant(data[:, 1:], prepend=False)
        OLS_ = OLS(data[:, 0], datajoint).fit()
        VAR_resid[:, k] = OLS_.resid
        VAR_model[k] = OLS_

    # Computing the noise covariance matrix of the full model :
    VAR_noise_matrix = np.cov(VAR_resid.T)

    return VAR_noise_matrix, VAR_resid, VAR_model
Example #2
0
def grangercausalitytests(x, maxlag, addconst=True, verbose=True):
    """four tests for granger non causality of 2 timeseries

    all four tests give similar results
    `params_ftest` and `ssr_ftest` are equivalent based on F test which is
    identical to lmtest:grangertest in R

    Parameters
    ----------
    x : array, 2d, (nobs,2)
        data for test whether the time series in the second column Granger
        causes the time series in the first column
    maxlag : integer
        the Granger causality test results are calculated for all lags up to
        maxlag
    verbose : bool
        print results if true

    Returns
    -------
    results : dictionary
        all test results, dictionary keys are the number of lags. For each
        lag the values are a tuple, with the first element a dictionary with
        teststatistic, pvalues, degrees of freedom, the second element are
        the OLS estimation results for the restricted model, the unrestricted
        model and the restriction (contrast) matrix for the parameter f_test.

    Notes
    -----
    TODO: convert to class and attach results properly

    The Null hypothesis for grangercausalitytests is that the time series in
    the second column, x2, does NOT Granger cause the time series in the first
    column, x1. Grange causality means that past values of x2 have a
    statistically significant effect on the current value of x1, taking past
    values of x1 into account as regressors. We reject the null hypothesis
    that x2 does not Granger cause x1 if the pvalues are below a desired size
    of the test.

    The null hypothesis for all four test is that the coefficients
    corresponding to past values of the second time series are zero.

    'params_ftest', 'ssr_ftest' are based on F distribution

    'ssr_chi2test', 'lrtest' are based on chi-square distribution

    References
    ----------
    http://en.wikipedia.org/wiki/Granger_causality
    Greene: Econometric Analysis

    """
    from scipy import stats

    x = np.asarray(x)

    if x.shape[0] <= 3 * maxlag + int(addconst):
        raise ValueError("Insufficient observations. Maximum allowable "
                         "lag is {0}".format(int((x.shape[0] - int(addconst)) /
                                                 3) - 1))

    resli = {}

    for mlg in range(1, maxlag + 1):
        result = {}
        if verbose:
            print('\nGranger Causality')
            print('number of lags (no zero)', mlg)
        mxlg = mlg

        # create lagmat of both time series
        dta = lagmat2ds(x, mxlg, trim='both', dropex=1)

        #add constant
        if addconst:
            dtaown = add_constant(dta[:, 1:(mxlg + 1)], prepend=False)
            dtajoint = add_constant(dta[:, 1:], prepend=False)
        else:
            raise NotImplementedError('Not Implemented')
            #dtaown = dta[:, 1:mxlg]
            #dtajoint = dta[:, 1:]

        # Run ols on both models without and with lags of second variable
        res2down = OLS(dta[:, 0], dtaown).fit()
        res2djoint = OLS(dta[:, 0], dtajoint).fit()

        #print results
        #for ssr based tests see:
        #http://support.sas.com/rnd/app/examples/ets/granger/index.htm
        #the other tests are made-up

        # Granger Causality test using ssr (F statistic)
        fgc1 = ((res2down.ssr - res2djoint.ssr) /
                res2djoint.ssr / mxlg * res2djoint.df_resid)
        if verbose:
            print('ssr based F test:         F=%-8.4f, p=%-8.4f, df_denom=%d,'
                   ' df_num=%d' % (fgc1,
                                    stats.f.sf(fgc1, mxlg,
                                               res2djoint.df_resid),
                                    res2djoint.df_resid, mxlg))
        result['ssr_ftest'] = (fgc1,
                               stats.f.sf(fgc1, mxlg, res2djoint.df_resid),
                               res2djoint.df_resid, mxlg)

        # Granger Causality test using ssr (ch2 statistic)
        fgc2 = res2down.nobs * (res2down.ssr - res2djoint.ssr) / res2djoint.ssr
        if verbose:
            print('ssr based chi2 test:   chi2=%-8.4f, p=%-8.4f, '
                   'df=%d' % (fgc2, stats.chi2.sf(fgc2, mxlg), mxlg))
        result['ssr_chi2test'] = (fgc2, stats.chi2.sf(fgc2, mxlg), mxlg)

        #likelihood ratio test pvalue:
        lr = -2 * (res2down.llf - res2djoint.llf)
        if verbose:
            print('likelihood ratio test: chi2=%-8.4f, p=%-8.4f, df=%d' %
                   (lr, stats.chi2.sf(lr, mxlg), mxlg))
        result['lrtest'] = (lr, stats.chi2.sf(lr, mxlg), mxlg)

        # F test that all lag coefficients of exog are zero
        rconstr = np.column_stack((np.zeros((mxlg, mxlg)),
                                   np.eye(mxlg, mxlg),
                                   np.zeros((mxlg, 1))))
        ftres = res2djoint.f_test(rconstr)
        if verbose:
            print('parameter F test:         F=%-8.4f, p=%-8.4f, df_denom=%d,'
                   ' df_num=%d' % (ftres.fvalue, ftres.pvalue, ftres.df_denom,
                                    ftres.df_num))
        result['params_ftest'] = (np.squeeze(ftres.fvalue)[()],
                                  np.squeeze(ftres.pvalue)[()],
                                  ftres.df_denom, ftres.df_num)

        resli[mxlg] = (result, [res2down, res2djoint, rconstr])

    return resli
Example #3
0
def grangercausalitytests(x, maxlag, addconst=True, verbose=True):
    """four tests for granger non causality of 2 timeseries

    all four tests give similar results
    `params_ftest` and `ssr_ftest` are equivalent based on F test which is
    identical to lmtest:grangertest in R

    Parameters
    ----------
    x : array, 2d
        data for test whether the time series in the second column Granger
        causes the time series in the first column
    maxlag : integer
        the Granger causality test results are calculated for all lags up to
        maxlag
    verbose : bool
        print results if true

    Returns
    -------
    results : dictionary
        all test results, dictionary keys are the number of lags. For each
        lag the values are a tuple, with the first element a dictionary with
        teststatistic, pvalues, degrees of freedom, the second element are
        the OLS estimation results for the restricted model, the unrestricted
        model and the restriction (contrast) matrix for the parameter f_test.

    Notes
    -----
    TODO: convert to class and attach results properly

    The Null hypothesis for grangercausalitytests is that the time series in
    the second column, x2, does NOT Granger cause the time series in the first
    column, x1. Grange causality means that past values of x2 have a
    statistically significant effect on the current value of x1, taking past
    values of x1 into account as regressors. We reject the null hypothesis
    that x2 does not Granger cause x1 if the pvalues are below a desired size
    of the test.

    The null hypothesis for all four test is that the coefficients
    corresponding to past values of the second time series are zero.

    'params_ftest', 'ssr_ftest' are based on F distribution

    'ssr_chi2test', 'lrtest' are based on chi-square distribution

    References
    ----------
    http://en.wikipedia.org/wiki/Granger_causality
    Greene: Econometric Analysis

    """
    from scipy import stats

    x = np.asarray(x)

    if x.shape[0] <= 3 * maxlag + int(addconst):
        raise ValueError(
            "Insufficient observations. Maximum allowable "
            "lag is {0}".format(int((x.shape[0] - int(addconst)) / 3) - 1))

    resli = {}

    for mlg in range(1, maxlag + 1):
        result = {}
        if verbose:
            print('\nGranger Causality')
            print('number of lags (no zero)', mlg)
        mxlg = mlg

        # create lagmat of both time series
        dta = lagmat2ds(x, mxlg, trim='both', dropex=1)

        #add constant
        if addconst:
            dtaown = add_constant(dta[:, 1:(mxlg + 1)], prepend=False)
            dtajoint = add_constant(dta[:, 1:], prepend=False)
        else:
            raise NotImplementedError('Not Implemented')
            #dtaown = dta[:, 1:mxlg]
            #dtajoint = dta[:, 1:]

        # Run ols on both models without and with lags of second variable
        res2down = OLS(dta[:, 0], dtaown).fit()
        res2djoint = OLS(dta[:, 0], dtajoint).fit()

        #print results
        #for ssr based tests see:
        #http://support.sas.com/rnd/app/examples/ets/granger/index.htm
        #the other tests are made-up

        # Granger Causality test using ssr (F statistic)
        fgc1 = ((res2down.ssr - res2djoint.ssr) / res2djoint.ssr / mxlg *
                res2djoint.df_resid)
        if verbose:
            print('ssr based F test:         F=%-8.4f, p=%-8.4f, df_denom=%d,'
                  ' df_num=%d' %
                  (fgc1, stats.f.sf(fgc1, mxlg, res2djoint.df_resid),
                   res2djoint.df_resid, mxlg))
        result['ssr_ftest'] = (fgc1, stats.f.sf(fgc1, mxlg,
                                                res2djoint.df_resid),
                               res2djoint.df_resid, mxlg)

        # Granger Causality test using ssr (ch2 statistic)
        fgc2 = res2down.nobs * (res2down.ssr - res2djoint.ssr) / res2djoint.ssr
        if verbose:
            print('ssr based chi2 test:   chi2=%-8.4f, p=%-8.4f, '
                  'df=%d' % (fgc2, stats.chi2.sf(fgc2, mxlg), mxlg))
        result['ssr_chi2test'] = (fgc2, stats.chi2.sf(fgc2, mxlg), mxlg)

        #likelihood ratio test pvalue:
        lr = -2 * (res2down.llf - res2djoint.llf)
        if verbose:
            print('likelihood ratio test: chi2=%-8.4f, p=%-8.4f, df=%d' %
                  (lr, stats.chi2.sf(lr, mxlg), mxlg))
        result['lrtest'] = (lr, stats.chi2.sf(lr, mxlg), mxlg)

        # F test that all lag coefficients of exog are zero
        rconstr = np.column_stack((np.zeros(
            (mxlg, mxlg)), np.eye(mxlg, mxlg), np.zeros((mxlg, 1))))
        ftres = res2djoint.f_test(rconstr)
        if verbose:
            print('parameter F test:         F=%-8.4f, p=%-8.4f, df_denom=%d,'
                  ' df_num=%d' %
                  (ftres.fvalue, ftres.pvalue, ftres.df_denom, ftres.df_num))
        result['params_ftest'] = (np.squeeze(ftres.fvalue)[()],
                                  np.squeeze(ftres.pvalue)[()], ftres.df_denom,
                                  ftres.df_num)

        resli[mxlg] = (result, [res2down, res2djoint, rconstr])

    return resli
        # For each order, computing VAR :
        for k in range(0, len(signals) + 1):

            # Permuting columns to compute VAR :
            SIGNALS = np.concatenate((SIGNALS[:, k:], SIGNALS[:, 0:k]), axis=1)

            if k == len(signals):
                break

            criterion_value = np.zeros((self._max_lag, 1))

            # Testing each order :
            for lag in range(1, self._max_lag + 1):

                data = lagmat2ds(SIGNALS, lag, trim="both", dropex=1)
                datajoint = add_constant(data[:, 1:], prepend=False)
                OLS_ = OLS(data[:, 0], datajoint).fit()

                # Saving AIC or BIC temporary values :
                if self._criterion == "bic":
                    criterion_value[lag - 1] = OLS_.bic
                elif self._criterion == "aic":
                    criterion_value[lag - 1] = OLS_.aic

            olag_AR[k] = criterion_value.argmin() + 1

            # The optimal order is chosen as the mean order between all the estimated orders from all models
        olag = int(np.ceil(np.mean(olag_AR)))

        # Now we can compute the VAR model with the computed order :
Example #5
0
def hacked_gct(x, maxlag, addconst=True, verbose=True):

    #from scipy import stats

    x = np.asarray(x)

    if x.shape[0] <= 3 * maxlag + int(addconst):
        raise ValueError(
            "Insufficient observations. Maximum allowable "
            "lag is {0}".format(int((x.shape[0] - int(addconst)) / 3) - 1))

    resli = {}

    for mlg in range(1, maxlag + 1):
        result = {}
        if verbose:
            print('\nGranger Causality')
            print('number of lags (no zero)', mlg)
        mxlg = mlg

        # create lagmat of both time series
        dta = lagmat2ds(x, mxlg, trim='both', dropex=1)

        #add constant
        if addconst:
            '''dtaown = add_constant(dta[:, 1:(mxlg + 1)], prepend=False)'''
            dtajoint = add_constant(dta[:, 1:], prepend=False)
        else:
            raise NotImplementedError('Not Implemented')
            #dtaown = dta[:, 1:mxlg]
            #dtajoint = dta[:, 1:]

        # Run ols on both models without and with lags of second variable
        '''res2down = OLS(dta[:, 0], dtaown).fit()'''
        res2down = 'skipped'
        res2djoint = OLS(dta[:, 0], dtajoint).fit()

        #print results
        #for ssr based tests see:
        #http://support.sas.com/rnd/app/examples/ets/granger/index.htm
        #the other tests are made-up
        '''
        # Granger Causality test using ssr (F statistic)
        fgc1 = ((res2down.ssr - res2djoint.ssr) /
                res2djoint.ssr / mxlg * res2djoint.df_resid)
        if verbose:
            print('ssr based F test:         F=%-8.4f, p=%-8.4f, df_denom=%d,'
                   ' df_num=%d' % (fgc1,
                                    stats.f.sf(fgc1, mxlg,
                                               res2djoint.df_resid),
                                    res2djoint.df_resid, mxlg))
        result['ssr_ftest'] = (fgc1,
                               stats.f.sf(fgc1, mxlg, res2djoint.df_resid),
                               res2djoint.df_resid, mxlg)

        # Granger Causality test using ssr (ch2 statistic)
        fgc2 = res2down.nobs * (res2down.ssr - res2djoint.ssr) / res2djoint.ssr
        if verbose:
            print('ssr based chi2 test:   chi2=%-8.4f, p=%-8.4f, '
                   'df=%d' % (fgc2, stats.chi2.sf(fgc2, mxlg), mxlg))
        result['ssr_chi2test'] = (fgc2, stats.chi2.sf(fgc2, mxlg), mxlg)

        #likelihood ratio test pvalue:
        lr = -2 * (res2down.llf - res2djoint.llf)
        if verbose:
            print('likelihood ratio test: chi2=%-8.4f, p=%-8.4f, df=%d' %
                   (lr, stats.chi2.sf(lr, mxlg), mxlg))
        result['lrtest'] = (lr, stats.chi2.sf(lr, mxlg), mxlg)
        '''
        # F test that all lag coefficients of exog are zero
        rconstr = np.column_stack((np.zeros(
            (mxlg, mxlg)), np.eye(mxlg, mxlg), np.zeros((mxlg, 1))))
        ftres = res2djoint.f_test(rconstr)
        if verbose:
            print('parameter F test:         F=%-8.4f, p=%-8.4f, df_denom=%d,'
                  ' df_num=%d' %
                  (ftres.fvalue, ftres.pvalue, ftres.df_denom, ftres.df_num))
        result['params_ftest'] = (np.squeeze(ftres.fvalue)[()],
                                  np.squeeze(ftres.pvalue)[()], ftres.df_denom,
                                  ftres.df_num)

        resli[mxlg] = (result, [res2down, res2djoint, rconstr])

    return resli
Example #6
0
    def compute(self, *signals):
        """
		This method computes the ConditionalGrangerCausality. At the end of the computation, a graph is made to show the links between the signals.

		:param signals:
			list of signals, one per person.
		:type signals: list[pd.DataFrame]

		:returns: dict
			-- matrix of links between the signals.

		"""

        ' Raise error if parameters are not in the correct type '
        for i in range(0, len(signals)):
            if not (isinstance(signals[i], pd.DataFrame)):
                raise TypeError("Requires signal " + str(i + 1) +
                                " to be a pd.DataFrame, ")

        ' Raise error if DataFrames have not the same size '

        for i in range(0, len(signals)):
            if len(signals[0]) != len(signals[i]):
                raise ValueError(
                    "All the signals must have the same size. Signal " +
                    str(i + 1) + " does not have the same size as signal 1")

        # Saving the size of signals (they all supposed to have the same size)
        T = len(signals[0])

        # Converting DataFrames to arrays :
        SIGNALS = np.zeros((T, len(signals)))

        for i in range(0, len(signals)):
            SIGNALS[:, i] = np.array(signals[i]).reshape(T)

        # Creating Matrix to save the links between the signals :
        M_direct = np.zeros((len(signals), len(signals)))

        # Testing for direct links between signals :
        print "Results of pairwise analysis:\n"
        for i in range(0, len(signals)):
            for j in range(0, len(signals)):
                if (i != j):
                    gc = GC.GrangerCausality(max_lag=self._max_lag,
                                             criterion=self._criterion,
                                             plot=False)
                    gc_res = gc.compute(signals[i], signals[j])
                    if gc_res['ratio'] > 0 and gc_res['p_value'] < 0.01:
                        print "signal", j + 1, "->", i + 1, "detected"
                        M_direct[i, j] = 1

        # Computing the FULL VAR model :

        # First we have to determine the optimal order according to the given criterion
        olag_AR = np.zeros((len(signals), 1))

        # For each order, computing VAR :
        for k in range(0, len(signals)):

            # Permuting columns to compute VAR :
            SIGNALS_V = np.concatenate((SIGNALS[:, k:], SIGNALS[:, 0:k]),
                                       axis=1)

            criterion_value = np.zeros((self._max_lag, 1))

            # Testing each order :
            for lag in range(1, self._max_lag + 1):

                data = lagmat2ds(SIGNALS_V, lag, trim='both', dropex=1)
                datajoint = add_constant(data[:, 1:], prepend=False)
                OLS_ = OLS(data[:, 0], datajoint).fit()

                # Saving AIC or BIC temporary values :
                if self._criterion == 'bic':
                    criterion_value[lag - 1] = OLS_.bic
                elif self._criterion == 'aic':
                    criterion_value[lag - 1] = OLS_.aic

            olag_AR[k] = criterion_value.argmin() + 1

        # The optimal order is chosen as the mean order between all the estimated orders from all models
        olag = int(np.ceil(np.mean(olag_AR)))

        # Now we can compute the VAR model with the computed order :
        VAR_resid = np.zeros((T - olag, len(signals)))

        for k in range(0, len(signals)):
            # Permuting columns to compute VAR :
            SIGNALS_P = np.concatenate((SIGNALS[:, k:], SIGNALS[:, 0:k]),
                                       axis=1)

            data = lagmat2ds(SIGNALS_P, olag, trim='both', dropex=1)
            datajoint = add_constant(data[:, 1:], prepend=False)
            OLS_ = OLS(data[:, 0], datajoint).fit()
            VAR_resid[:, k] = OLS_.resid

        # Computing the noise covariance matrix of the full model :
        VAR_noise_matrix = np.cov(VAR_resid.T)

        M_final = np.zeros((len(signals), len(signals)))

        # Testing for mediated links between signals :
        print "\n"
        for i in range(0, len(signals)):
            for j in range(0, len(signals)):
                if M_direct[i, j] == 1:
                    # We have detected a "direct link", we need to test with other signals to know if there is a mediated link:
                    for k in range(0, len(signals)):
                        if (k != j) and (k != i):
                            SIGNALS_M = np.delete(SIGNALS, [i, j], 1)
                            S = np.concatenate((SIGNALS[:, i].reshape(
                                T, 1), SIGNALS_M[:, 0:].reshape(T, 2)),
                                               axis=1)
                            data = lagmat2ds(S, olag, trim='both', dropex=1)
                            datajoint = add_constant(data[:, 1:],
                                                     prepend=False)
                            OLS_ = OLS(data[:, 0], datajoint).fit()
                            var_noise = np.var(OLS_.resid)
                            ratio = np.log(var_noise) - np.log(
                                VAR_noise_matrix[i, i])
                            if ratio < 0.01:
                                print "signal", j + 1, "->", i + 1, " is mediated by signal", k + 1
                                M_direct[i, j] = 0
                                M_final[i, k] = 1
                                M_final[k, j] = 1
                                break
                            else:
                                M_final[i, j] = 1

        results = dict()
        results['link_matrix'] = M_final

        if (self._plot == True):
            plt.ion()
            self.plot_result(results)

        return results
		
		# For each order, computing VAR :
		for k in range(0,len(signals)+1):
		
			# Permuting columns to compute VAR :
			SIGNALS = np.concatenate((SIGNALS[:,k:],SIGNALS[:,0:k]),axis = 1)
			
			if k == len(signals):
				break
			
			criterion_value = np.zeros((self._max_lag,1))
			
			#Testing each order :
			for lag in range(1, self._max_lag+1):

				data = lagmat2ds(SIGNALS,lag,trim ='both', dropex = 1)
				datajoint = add_constant(data[:, 1:], prepend=False)
				OLS_ = OLS(data[:, 0], datajoint).fit()
			
				# Saving AIC or BIC temporary values :
				if self._criterion == 'bic':
					criterion_value[lag-1] = OLS_.bic
				elif self._criterion == 'aic':
					criterion_value[lag-1] = OLS_.aic
			
			olag_AR[k] = criterion_value.argmin()+1
		
		# The optimal order is chosen as the mean order between all the estimated orders from all models
		olag = int(np.ceil(np.mean(olag_AR)))
		
		# Now we can compute the VAR model with the computed order :
Example #8
0
        # For each order, computing VAR :
        for k in range(0, len(signals) + 1):

            # Permuting columns to compute VAR :
            SIGNALS = np.concatenate((SIGNALS[:, k:], SIGNALS[:, 0:k]), axis=1)

            if k == len(signals):
                break

            criterion_value = np.zeros((self._max_lag, 1))

            #Testing each order :
            for lag in range(1, self._max_lag + 1):

                data = lagmat2ds(SIGNALS, lag, trim='both', dropex=1)
                datajoint = add_constant(data[:, 1:], prepend=False)
                OLS_ = OLS(data[:, 0], datajoint).fit()

                # Saving AIC or BIC temporary values :
                if self._criterion == 'bic':
                    criterion_value[lag - 1] = OLS_.bic
                elif self._criterion == 'aic':
                    criterion_value[lag - 1] = OLS_.aic

            olag_AR[k] = criterion_value.argmin() + 1

        # The optimal order is chosen as the mean order between all the estimated orders from all models
        olag = int(np.ceil(np.mean(olag_AR)))

        # Now we can compute the VAR model with the computed order :
    def compute(self, *signals):
        """
		This method computes the ConditionalGrangerCausality. At the end of the computation, a graph is made to show the links between the signals.

		:param signals:
			list of signals, one per person.
		:type signals: list[pd.DataFrame]

		:returns: dict
			-- matrix of links between the signals.

		"""

        ' Raise error if parameters are not in the correct type '
        for i in range(0, len(signals)):
            if not (isinstance(signals[i], pd.DataFrame)): raise TypeError(
                "Requires signal " + str(i + 1) + " to be a pd.DataFrame, ")

        ' Raise error if DataFrames have not the same size '

        for i in range(0, len(signals)):
            if len(signals[0]) != len(signals[i]): raise ValueError(
                "All the signals must have the same size. Signal " + str(
                    i + 1) + " does not have the same size as signal 1")

        # Saving the size of signals (they all supposed to have the same size)
        T = len(signals[0])

        # Converting DataFrames to arrays :
        SIGNALS = np.zeros((T, len(signals)))

        for i in range(0, len(signals)):
            SIGNALS[:, i] = np.array(signals[i]).reshape(T)

        # Creating Matrix to save the links between the signals :
        M_direct = np.zeros((len(signals), len(signals)))

        # Testing for direct links between signals :
        print "Results of pairwise analysis:\n"
        for i in range(0, len(signals)):
            for j in range(0, len(signals)):
                if (i != j):
                    gc = GC.GrangerCausality(max_lag=self._max_lag, criterion=self._criterion, plot=False)
                    gc_res = gc.compute(signals[i], signals[j])
                    if gc_res['ratio'] > 0 and gc_res['p_value'] < 0.01:
                        print "signal", j + 1, "->", i + 1, "detected"
                        M_direct[i, j] = 1

        # Computing the FULL VAR model :

        # First we have to determine the optimal order according to the given criterion
        olag_AR = np.zeros((len(signals), 1))

        # For each order, computing VAR :
        for k in range(0, len(signals)):

            # Permuting columns to compute VAR :
            SIGNALS_V = np.concatenate((SIGNALS[:, k:], SIGNALS[:, 0:k]), axis=1)

            criterion_value = np.zeros((self._max_lag, 1))

            # Testing each order :
            for lag in range(1, self._max_lag + 1):

                data = lagmat2ds(SIGNALS_V, lag, trim='both', dropex=1)
                datajoint = add_constant(data[:, 1:], prepend=False)
                OLS_ = OLS(data[:, 0], datajoint).fit()

                # Saving AIC or BIC temporary values :
                if self._criterion == 'bic':
                    criterion_value[lag - 1] = OLS_.bic
                elif self._criterion == 'aic':
                    criterion_value[lag - 1] = OLS_.aic

            olag_AR[k] = criterion_value.argmin() + 1

        # The optimal order is chosen as the mean order between all the estimated orders from all models
        olag = int(np.ceil(np.mean(olag_AR)))

        # Now we can compute the VAR model with the computed order :
        VAR_resid = np.zeros((T - olag, len(signals)))

        for k in range(0, len(signals)):
            # Permuting columns to compute VAR :
            SIGNALS_P = np.concatenate((SIGNALS[:, k:], SIGNALS[:, 0:k]), axis=1)

            data = lagmat2ds(SIGNALS_P, olag, trim='both', dropex=1)
            datajoint = add_constant(data[:, 1:], prepend=False)
            OLS_ = OLS(data[:, 0], datajoint).fit()
            VAR_resid[:, k] = OLS_.resid

        # Computing the noise covariance matrix of the full model :
        VAR_noise_matrix = np.cov(VAR_resid.T)

        M_final = np.zeros((len(signals), len(signals)))

        # Testing for mediated links between signals :
        print "\n"
        for i in range(0, len(signals)):
            for j in range(0, len(signals)):
                if M_direct[i, j] == 1:
                    # We have detected a "direct link", we need to test with other signals to know if there is a mediated link:
                    for k in range(0, len(signals)):
                        if (k != j) and (k != i):
                            SIGNALS_M = np.delete(SIGNALS, [i, j], 1)
                            S = np.concatenate((SIGNALS[:, i].reshape(T, 1), SIGNALS_M[:, 0:].reshape(T, 2)), axis=1)
                            data = lagmat2ds(S, olag, trim='both', dropex=1)
                            datajoint = add_constant(data[:, 1:], prepend=False)
                            OLS_ = OLS(data[:, 0], datajoint).fit()
                            var_noise = np.var(OLS_.resid)
                            ratio = np.log(var_noise) - np.log(VAR_noise_matrix[i, i])
                            if ratio < 0.01:
                                print "signal", j + 1, "->", i + 1, " is mediated by signal", k + 1
                                M_direct[i, j] = 0
                                M_final[i, k] = 1
                                M_final[k, j] = 1
                                break
                            else:
                                M_final[i, j] = 1

        results = dict()
        results['link_matrix'] = M_final

        if (self._plot == True):
            plt.ion()
            self.plot_result(results)

        return results
Example #10
0
def hacked_gct (x, maxlag, addconst=True, verbose=True):

    #from scipy import stats

    x = np.asarray(x)

    if x.shape[0] <= 3 * maxlag + int(addconst):
        raise ValueError("Insufficient observations. Maximum allowable "
                         "lag is {0}".format(int((x.shape[0] - int(addconst)) /
                                                 3) - 1))

    resli = {}

    for mlg in range(1, maxlag + 1):
        result = {}
        if verbose:
            print('\nGranger Causality')
            print('number of lags (no zero)', mlg)
        mxlg = mlg

        # create lagmat of both time series
        dta = lagmat2ds(x, mxlg, trim='both', dropex=1)

        #add constant
        if addconst:
            '''dtaown = add_constant(dta[:, 1:(mxlg + 1)], prepend=False)'''
            dtajoint = add_constant(dta[:, 1:], prepend=False)
        else:
            raise NotImplementedError('Not Implemented')
            #dtaown = dta[:, 1:mxlg]
            #dtajoint = dta[:, 1:]

        # Run ols on both models without and with lags of second variable
        '''res2down = OLS(dta[:, 0], dtaown).fit()'''
        res2down = 'skipped'
        res2djoint = OLS(dta[:, 0], dtajoint).fit()

        #print results
        #for ssr based tests see:
        #http://support.sas.com/rnd/app/examples/ets/granger/index.htm
        #the other tests are made-up
        '''
        # Granger Causality test using ssr (F statistic)
        fgc1 = ((res2down.ssr - res2djoint.ssr) /
                res2djoint.ssr / mxlg * res2djoint.df_resid)
        if verbose:
            print('ssr based F test:         F=%-8.4f, p=%-8.4f, df_denom=%d,'
                   ' df_num=%d' % (fgc1,
                                    stats.f.sf(fgc1, mxlg,
                                               res2djoint.df_resid),
                                    res2djoint.df_resid, mxlg))
        result['ssr_ftest'] = (fgc1,
                               stats.f.sf(fgc1, mxlg, res2djoint.df_resid),
                               res2djoint.df_resid, mxlg)

        # Granger Causality test using ssr (ch2 statistic)
        fgc2 = res2down.nobs * (res2down.ssr - res2djoint.ssr) / res2djoint.ssr
        if verbose:
            print('ssr based chi2 test:   chi2=%-8.4f, p=%-8.4f, '
                   'df=%d' % (fgc2, stats.chi2.sf(fgc2, mxlg), mxlg))
        result['ssr_chi2test'] = (fgc2, stats.chi2.sf(fgc2, mxlg), mxlg)

        #likelihood ratio test pvalue:
        lr = -2 * (res2down.llf - res2djoint.llf)
        if verbose:
            print('likelihood ratio test: chi2=%-8.4f, p=%-8.4f, df=%d' %
                   (lr, stats.chi2.sf(lr, mxlg), mxlg))
        result['lrtest'] = (lr, stats.chi2.sf(lr, mxlg), mxlg)
        '''
        # F test that all lag coefficients of exog are zero
        rconstr = np.column_stack((np.zeros((mxlg, mxlg)),
                                   np.eye(mxlg, mxlg),
                                   np.zeros((mxlg, 1))))
        ftres = res2djoint.f_test(rconstr)
        if verbose:
            print('parameter F test:         F=%-8.4f, p=%-8.4f, df_denom=%d,'
                   ' df_num=%d' % (ftres.fvalue, ftres.pvalue, ftres.df_denom,
                                    ftres.df_num))
        result['params_ftest'] = (np.squeeze(ftres.fvalue)[()],
                                  np.squeeze(ftres.pvalue)[()],
                                  ftres.df_denom, ftres.df_num)

        resli[mxlg] = (result, [res2down, res2djoint, rconstr])

    return resli
Example #11
0
def grangercausalitytests_mod(x, maxlag, addconst=True, verbose=True):

    import numpy as np
    from scipy import stats
    from statsmodels.tsa.tsatools import lagmat2ds
    from statsmodels.tools.tools import add_constant
    from statsmodels.regression.linear_model import OLS
    from warnings import warn

    x = np.asarray(x)

    if x.shape[0] <= 3 * maxlag + int(addconst):
        warn("Insufficient observations. Maximum allowable lag is {0}."
             "The maximum lag will be set to "
             "this number".format(int((x.shape[0] - int(addconst)) / 3) - 1))
        maxlag = int((x.shape[0] - int(addconst)) / 3) - 1
#    print(x.shape[0])
#    print(int((x.shape[0] - int(addconst)) /  3) - 1)
#    print(maxlag)

    resli = {}

    for mlg in range(1, maxlag + 1):

        result = {}
        if verbose:
            print('\nGranger Causality')
            print('number of lags (no zero)', mlg)
        mxlg = mlg

        # create lagmat of both time series
        dta = lagmat2ds(x, mxlg, trim='both')
        dta = np.delete(dta, -1, axis=1)  # removal of the not lagged xs

        #add constant
        if addconst:
            dtaown = add_constant(dta[:, 1:(mxlg + 1)], prepend=False)
            dtajoint = add_constant(dta[:, 1:], prepend=False)
        else:
            raise NotImplementedError('Not Implemented')
            #dtaown = dta[:, 1:mxlg]
            #dtajoint = dta[:, 1:]

        # Run ols on both models without and with lags of second variable
        res2down = OLS(dta[:, 0], dtaown).fit()
        res2djoint = OLS(dta[:, 0], dtajoint).fit()

        #print results
        #for ssr based tests see:
        #http://support.sas.com/rnd/app/examples/ets/granger/index.htm
        #the other tests are made-up

        # Granger Causality test using ssr (F statistic)
        fgc1 = ((res2down.ssr - res2djoint.ssr) / res2djoint.ssr / mxlg *
                res2djoint.df_resid)
        if verbose:
            print('ssr based F test:         F=%-8.4f, p=%-8.4f, df_denom=%d,'
                  ' df_num=%d' %
                  (fgc1, stats.f.sf(fgc1, mxlg, res2djoint.df_resid),
                   res2djoint.df_resid, mxlg))
        result['ssr_ftest'] = (fgc1, stats.f.sf(fgc1, mxlg,
                                                res2djoint.df_resid),
                               res2djoint.df_resid, mxlg)

        # Granger Causality test using ssr (ch2 statistic)
        fgc2 = res2down.nobs * (res2down.ssr - res2djoint.ssr) / res2djoint.ssr
        if verbose:
            print('ssr based chi2 test:   chi2=%-8.4f, p=%-8.4f, '
                  'df=%d' % (fgc2, stats.chi2.sf(fgc2, mxlg), mxlg))
        result['ssr_chi2test'] = (fgc2, stats.chi2.sf(fgc2, mxlg), mxlg)

        #likelihood ratio test pvalue:
        lr = -2 * (res2down.llf - res2djoint.llf)
        if verbose:
            print('likelihood ratio test: chi2=%-8.4f, p=%-8.4f, df=%d' %
                  (lr, stats.chi2.sf(lr, mxlg), mxlg))
        result['lrtest'] = (lr, stats.chi2.sf(lr, mxlg), mxlg)

        # F test that all lag coefficients of exog are zero
        rconstr = np.column_stack((np.zeros(
            (mxlg, mxlg)), np.eye(mxlg, mxlg), np.zeros((mxlg, 1))))
        ftres = res2djoint.f_test(rconstr)
        if verbose:
            print('parameter F test:         F=%-8.4f, p=%-8.4f, df_denom=%d,'
                  ' df_num=%d' %
                  (ftres.fvalue, ftres.pvalue, ftres.df_denom, ftres.df_num))
        result['params_ftest'] = (np.squeeze(ftres.fvalue)[()],
                                  np.squeeze(ftres.pvalue)[()], ftres.df_denom,
                                  ftres.df_num)

        resli[mxlg] = (result, [res2down, res2djoint, rconstr])
    return resli
Example #12
0
    pickle.dump(results_ARIMAX1, f)
with open('results_Granger1', 'wb') as f:
    pickle.dump(results_Granger1, f)

plot_hist(results_NN1, lags)
plot_hist(results_LSTM1, lags)
plot_hist(results_GRU1, lags)

#%% AR models performance on test set
from statsmodels.tsa.tsatools import lagmat2ds
from statsmodels.tools.tools import add_constant
#results_Granger1 = grangercausalitytests(data[:7000,:],lags,verbose=False)
for l in lags:
    mdl1 = results_Granger1[l][1][0]
    mdl2 = results_Granger1[l][1][1]
    data_gr = lagmat2ds(data[7000:, :], l, trim="both", dropex=1)
    dtaown = add_constant(data_gr[:, 1:(l + 1)], prepend=False)
    dtajoint = add_constant(data_gr[:, 1:], prepend=False)
    x_pred1 = mdl1.predict(dtaown)
    x_pred2 = mdl2.predict(dtajoint)
    error1 = x_pred1 - data[7000 + l:, 0]
    error2 = x_pred2 - data[7000 + l:, 0]
    rss_x1 = sum(error1**2)
    rss_x2 = sum(error2**2)
    RSS1['Granger'][l] = rss_x1
    RSS2['Granger'][l] = rss_x2
    print('RSS1 = %0.2f' % rss_x1)
    print('RSS2 = %0.2f' % rss_x2)
    S, p_value = stats.wilcoxon(np.abs(error1),
                                np.abs(error2),
                                alternative='greater')
def grangercausalitytests_mod(x, maxlag, addconst=True, verbose=True):
    
    import numpy as np
    from scipy import stats
    from statsmodels.tsa.tsatools import lagmat2ds
    from statsmodels.tools.tools import add_constant
    from statsmodels.regression.linear_model import OLS
    from warnings import warn

    x = np.asarray(x)

    if x.shape[0] <= 3 * maxlag + int(addconst):
        warn("Insufficient observations. Maximum allowable lag is {0}."
             "The maximum lag will be set to "
             "this number".format(int((x.shape[0] - int(addconst)) / 3) - 1))
        maxlag = int((x.shape[0] - int(addconst)) /  3) - 1
#    print(x.shape[0])
#    print(int((x.shape[0] - int(addconst)) /  3) - 1)
#    print(maxlag)

    resli = {}

    for mlg in range(1, maxlag + 1):
        
        result = {}
        if verbose:
            print('\nGranger Causality')
            print('number of lags (no zero)', mlg)
        mxlg = mlg

        # create lagmat of both time series
        dta = lagmat2ds(x, mxlg, trim='both')
        dta = np.delete(dta, -1, axis = 1) # removal of the not lagged xs

        #add constant
        if addconst:
            dtaown = add_constant(dta[:, 1:(mxlg + 1)], prepend=False)
            dtajoint = add_constant(dta[:, 1:], prepend=False)
        else:
            raise NotImplementedError('Not Implemented')
            #dtaown = dta[:, 1:mxlg]
            #dtajoint = dta[:, 1:]

        # Run ols on both models without and with lags of second variable
        res2down = OLS(dta[:, 0], dtaown).fit()
        res2djoint = OLS(dta[:, 0], dtajoint).fit()

        #print results
        #for ssr based tests see:
        #http://support.sas.com/rnd/app/examples/ets/granger/index.htm
        #the other tests are made-up

        # Granger Causality test using ssr (F statistic)
        fgc1 = ((res2down.ssr - res2djoint.ssr) /
                res2djoint.ssr / mxlg * res2djoint.df_resid)
        if verbose:
            print('ssr based F test:         F=%-8.4f, p=%-8.4f, df_denom=%d,'
                   ' df_num=%d' % (fgc1,
                                    stats.f.sf(fgc1, mxlg,
                                               res2djoint.df_resid),
                                    res2djoint.df_resid, mxlg))
        result['ssr_ftest'] = (fgc1,
                               stats.f.sf(fgc1, mxlg, res2djoint.df_resid),
                               res2djoint.df_resid, mxlg)

        # Granger Causality test using ssr (ch2 statistic)
        fgc2 = res2down.nobs * (res2down.ssr - res2djoint.ssr) / res2djoint.ssr
        if verbose:
            print('ssr based chi2 test:   chi2=%-8.4f, p=%-8.4f, '
                   'df=%d' % (fgc2, stats.chi2.sf(fgc2, mxlg), mxlg))
        result['ssr_chi2test'] = (fgc2, stats.chi2.sf(fgc2, mxlg), mxlg)

        #likelihood ratio test pvalue:
        lr = -2 * (res2down.llf - res2djoint.llf)
        if verbose:
            print('likelihood ratio test: chi2=%-8.4f, p=%-8.4f, df=%d' %
                   (lr, stats.chi2.sf(lr, mxlg), mxlg))
        result['lrtest'] = (lr, stats.chi2.sf(lr, mxlg), mxlg)

        # F test that all lag coefficients of exog are zero
        rconstr = np.column_stack((np.zeros((mxlg, mxlg)),
                                   np.eye(mxlg, mxlg),
                                   np.zeros((mxlg, 1))))
        ftres = res2djoint.f_test(rconstr)
        if verbose:
            print('parameter F test:         F=%-8.4f, p=%-8.4f, df_denom=%d,'
                   ' df_num=%d' % (ftres.fvalue, ftres.pvalue, ftres.df_denom,
                                    ftres.df_num))
        result['params_ftest'] = (np.squeeze(ftres.fvalue)[()],
                                  np.squeeze(ftres.pvalue)[()],
                                  ftres.df_denom, ftres.df_num)

        resli[mxlg] = (result, [res2down, res2djoint, rconstr])
    return resli
Example #14
0
        # Converting DataFrames to arrays :
        signal_to_predict = np.array(x).reshape(len(x))
        helping_signal = np.array(y).reshape(len(y))

        # Concatenate the two signals in a (nobs,2) array
        X = np.array([signal_to_predict, helping_signal]).T

        # Arrays that will contain BIC or AIC values according to the given criterion :
        C_r = np.zeros((self._max_lag, 1))
        C_u = np.zeros((self._max_lag, 1))

        # Computing OLS models for both 'restricted' and 'unrestricted' models, for each lag between 1 and 'max_lag'
        for lag in range(1, self._max_lag + 1):

            # Adapting datas :
            data = lagmat2ds(X, lag, trim='both', dropex=1)
            dataown = add_constant(data[:, 1:(lag + 1)], prepend=False)
            datajoint = add_constant(data[:, 1:], prepend=False)

            # OLS models :
            OLS_restricted = OLS(data[:, 0], dataown).fit()
            OLS_unrestricted = OLS(data[:, 0], datajoint).fit()

            # Saving AIC or BIC values :
            if self._criterion == 'bic':
                C_r[lag - 1] = OLS_restricted.bic
                C_u[lag - 1] = OLS_unrestricted.bic
            elif self._criterion == 'aic':
                C_r[lag - 1] = OLS_restricted.aic
                C_u[lag - 1] = OLS_unrestricted.aic
Example #15
0
                        ssrEig = reg[0].ssr
                        ssrBeid = reg[1].ssr
                        print(np.log(ssrEig / ssrBeid))
test, reg = res[1]
reg.llf
reg[0].llf
reg[1].llf
reg[1].nobs
reg[0].ssr
reg[1].ssr
reg[0].param
reg[0].params
reg[1].params
x = np.vstack([dff_e[3], dff[43]]).T
x.shape
dta = lagmat2ds(x, 1, trim='both', dropex=1)
from statsmodels.tsa.tsatools import lagmat, lagmat2ds, add_trend
dta = lagmat2ds(x, 1, trim='both', dropex=1)
dta.shape
dta[:3, :]
x[:3]
pred1 = reg[0].predict(reg[0].params, dta[:, 1])
reg[0].prams
reg[0].params
reg[0].exog.shape
reg[0].exog
            dtaown = add_constant(dta[:, 1:(mxlg + 1)], prepend=False)
            dtajoint = add_constant(dta[:, 1:], prepend=False)
from statsmodels.tools.tools import add_constant, Bunch
dtaown[:3]
            dtaown = add_constant(dta[:, 1:(mxlg + 1)], prepend=False)
Example #16
0
def grangercausalitytests(x, mxlg, autolag=None, alpha=0.0001, max_iter=1e5, addconst=True, verbose=True):
    """four tests for granger non causality of 2 timeseries

    all four tests give similar results
    `params_ftest` and `ssr_ftest` are equivalent based on F test which is
    identical to lmtest:grangertest in R

    Parameters
    ----------
    x : array, 2d
        data for test whether the time series in the second column Granger
        causes the time series in the first column
    lags : list of integers
        the Granger causality test results are calculated for all lags in the list
    autolag: If 'aic' the lag which minimizes the information criterion is used
             from the lags
    verbose : bool
        print results if true

    Returns
    -------
    results : dictionary
        all test results, dictionary keys are the number of lags. For each
        lag the values are a tuple, with the first element a dictionary with
        teststatistic, pvalues, degrees of freedom, the second element are
        the OLS estimation results for the restricted model, the unrestricted
        model and the restriction (contrast) matrix for the parameter f_test.

    Notes
    -----
    TODO: convert to class and attach results properly

    The Null hypothesis for grangercausalitytests is that the time series in
    the second column, x2, does NOT Granger cause the time series in the first
    column, x1. Grange causality means that past values of x2 have a
    statistically significant effect on the current value of x1, taking past
    values of x1 into account as regressors. We reject the null hypothesis
    that x2 does not Granger cause x1 if the pvalues are below a desired size
    of the test.

    The null hypothesis for all four test is that the coefficients
    corresponding to past values of the second time series are zero.

    'params_ftest', 'ssr_ftest' are based on F distribution

    'ssr_chi2test', 'lrtest' are based on chi-square distribution

    References
    ----------
    http://en.wikipedia.org/wiki/Granger_causality
    Greene: Econometric Analysis

    """
    from scipy import stats
    from sklearn.linear_model import Lasso
    
    x = np.asarray(x)

    if x.shape[0] <= 3 * mxlg + int(addconst):
        raise ValueError("Insufficient observations. Maximum allowable "
                         "lag is {0}".format(int((x.shape[0] - int(addconst)) /
                                                 3) - 1))
    result = {}
    if verbose:
        print('\nGranger Causality')
        print('number of lags (no zero)', mxlg)

    # create lagmat of both time series
    dta = lagmat2ds(x, mxlg, trim='both', dropex=1)

    #add constant
    if addconst:
        dtajoint = add_constant(dta[:, 1:], prepend=False)
    else:
        raise NotImplementedError('Not Implemented')

    # Run Lasso on all variables
    lassoreg = Lasso(alpha=alpha,normalize=True, max_iter=max_iter)
    lassoreg.fit(dtajoint[:,1:], dta[:, 0])
    pred = lassoreg.predict(dtajoint[:,1:])
    actual = dta[:,0]
    errors = [abs(i-j) for i, j in zip(actual, pred)]
    step_size = 10
    window_size = 30
    avg_errors=[]
    for i in range(len(actual)/step_size):
        err = 0
        for j in range(i*step_size,(i*step_size)+window_size):
            if j>=len(actual):
                break
            err += errors[j]
        avg_errors += [err]
    rmse = np.mean(avg_errors)
    result = lassoreg.coef_
    non_zeros = [(i,x) for i, x in enumerate(result) if x != 0]
    non_zero_vars = {}
    best_vars = {}
    for (i,x) in non_zeros:
        k = (i+1)/(mxlg)
        if k not in non_zero_vars or abs(x) > abs(best_vars[k]):
            non_zero_vars[k] = (i+1)%(mxlg)
            best_vars[k] = x
    return (rmse , non_zero_vars, best_vars)
Example #17
0
		# Converting DataFrames to arrays :
		signal_to_predict = np.array(x).reshape(len(x))
		helping_signal = np.array(y).reshape(len(y))
		
		# Concatenate the two signals in a (nobs,2) array
		X = np.array([signal_to_predict,helping_signal]).T
		
		# Arrays that will contain BIC or AIC values according to the given criterion :
		C_r = np.zeros((self._max_lag,1))
		C_u = np.zeros((self._max_lag,1))
		
		# Computing OLS models for both 'restricted' and 'unrestricted' models, for each lag between 1 and 'max_lag'
		for lag in range(1, self._max_lag+1):
			
			# Adapting datas :
			data = lagmat2ds(X,lag,trim ='both', dropex = 1)
			dataown = add_constant(data[:, 1:(lag + 1)], prepend=False)
			datajoint = add_constant(data[:, 1:], prepend=False)
			
			# OLS models :
			OLS_restricted   = OLS(data[:, 0], dataown).fit()
			OLS_unrestricted = OLS(data[:, 0], datajoint).fit()
			
			# Saving AIC or BIC values :
			if self._criterion == 'bic':
				C_r[lag-1] = OLS_restricted.bic
				C_u[lag-1] = OLS_unrestricted.bic
			elif self._criterion == 'aic':
				C_r[lag-1] = OLS_restricted.aic
				C_u[lag-1] = OLS_unrestricted.aic	
				
llf_arima_esti = ar1_model.llf

print(ar1_model.summary())

mu_arima_results = [mu_arima_esti, mu_arima_CI]
phi_arima_results = [phi_arima_esti, phi_arima_CI]
pd.DataFrame({
    'mu': mu_arima_results,
    'phi': phi_arima_results
},
             index=['ML estimation', '95% condifence interval'])

#%% OLS estimation of the AR(1) parameters
Y_endog = Y[1:]

Ylag = np.transpose(np.matrix(lagmat2ds(
    x=Y, maxlag0=1)[1:, 1]))  # exclude the first missing point
# convert into matrix to match the datatype of mu_aux in order for concatenation
mu_aux = np.transpose(np.matrix(np.ones(len(Ylag))))

exogen = np.array(np.concatenate((mu_aux, Ylag), axis=1))

OLS_reg = sm.OLS(endog=Y_endog, exog=exogen)

results = OLS_reg.fit()

print(results.summary())

mu_OLS = results.params[0]
phi_OLS = results.params[1]
mu_OLS_CI = results.conf_int()[0]
phi_OLS_CI = results.conf_int()[1]
Example #19
0
    def compute(self,*signals):
            """
            It computes restricted AR and unrestricted AR models, and evaluates whether the first signal (first parameter) could be forecasted
            by the others. F-value and p-value are computed, the interpretation of the results is up to the user.

            :param signals:
                list of signals, one per person.
            :type signals: list[pd.DataFrame]

            :returns: dict
                -- F-values and P-values.
            """
            ' Raise error if parameters are not in the correct type '
            for i in range(len(signals)) :
                if not(isinstance(signals[i], pd.DataFrame)): raise TypeError("Requires signal " + str(i+1) + " to be a pd.DataFrame.")


            ' Raise error if DataFrames have not the same size or same indexes '
            for i in range(0,len(signals)):
                if len(signals[0]) != len(signals[i]) : raise ValueError("All the signals must have the same size. Signal " + str(i+1) + " does not have the same size as first signal.")
                if signals[0].index.tolist() != signals[i].index.tolist() : raise ValueError("All the signals must have the same time indexes. Signal " + str(i+1) + " does not have the same time index as first signal.")


            # Saving the size of signals (they all supposed to have the same size)
            T = len(signals[0])

            # Converting DataFrames to arrays :
            SIGNALS = np.zeros((T,len(signals)))

            for i in range(0,len(signals)):
                SIGNALS[:,i] = np.array(signals[i]).reshape(T)

            # Arrays that will contain BIC or AIC values according to the given criterion :
            C_r = np.zeros((self._max_lag,1))
            C_u = np.zeros((self._max_lag,1))

            # Computing OLS models for both 'restricted' and 'unrestricted' models, for each lag between 1 and 'max_lag'
            for lag in range(1, self._max_lag+1):
                # Adapting datas :
                data = lagmat2ds(SIGNALS,lag,trim ='both', dropex = 1)
                dataown = add_constant(data[:, 1:(lag + 1)], prepend=False)
                datajoint = add_constant(data[:, 1:], prepend=False)

                # OLS models :
                OLS_restricted = OLS(data[:, 0], dataown).fit()
                OLS_unrestricted = OLS(data[:, 0], datajoint).fit()

                # Saving AIC or BIC values :
                if self._criterion == 'bic':
                    C_r[lag-1] = OLS_restricted.bic
                    C_u[lag-1] = OLS_unrestricted.bic
                elif self._criterion == 'aic':
                    C_r[lag-1] = OLS_restricted.aic
                    C_u[lag-1] = OLS_unrestricted.aic

            # Determine the optimal 'lag' according to 'bic' or 'aic' criterion :
            olag_r = C_r.argmin()+1
            olag_u = C_u.argmin()+1
            olag = min(olag_r,olag_u)

            # Computing OLS models with the optimal 'lag'
            data = lagmat2ds(SIGNALS,olag,trim ='both', dropex = 1)
            dataown = add_constant(data[:, 1:(olag + 1)], prepend=False)
            datajoint = add_constant(data[:, 1:], prepend=False)
            OLS_restricted = OLS(data[:, 0], dataown).fit()
            OLS_unrestricted = OLS(data[:, 0], datajoint).fit()

            # Checking divisions by 0
            if np.any(OLS_unrestricted.ssr == 0):
                raise ValueError("OLS_unrestricted.ssr can't be eq to zero because it's used in division.")

            if olag == 0:
                raise ValueError("olag can't be eq to zero because it's used in division.")

            # Doing the F-TEST:
            F_value = ((OLS_restricted.ssr - OLS_unrestricted.ssr)/OLS_unrestricted.ssr/olag*OLS_unrestricted.df_resid)
            p_value = stats.f.sf(F_value, olag, OLS_unrestricted.df_resid)

            # Computing predicted signal with restricted model :
            predicted_signal_restricted = np.zeros(T)
            predicted_signal_restricted[0:olag] = np.copy(SIGNALS[0:olag,0])

            for i in range(olag,T):
                predicted_signal_restricted[i] = np.dot(SIGNALS[(i-1)-np.array(range(0,olag)),0],OLS_restricted.params[0:olag])


            # Computing predicted signal with unrestricted model :
            predicted_signal_unrestricted = np.zeros(T)
            predicted_signal_unrestricted[0:olag] = np.copy(SIGNALS[0:olag,0])

            for i in range(olag,T):
                for k in range(0,len(signals)):
                    predicted_signal_unrestricted[i] = predicted_signal_unrestricted[i] + np.dot(SIGNALS[(i-1)-np.array(range(0,olag)),k],OLS_unrestricted.params[k*olag:(k+1)*olag])

            results = dict()
            results['F_value'] = F_value
            results['p_value'] = p_value
            results['optimal_lag'] = olag
            results['predicted_signal_restricted'] = predicted_signal_restricted
            results['predicted_signal_unrestricted'] = predicted_signal_unrestricted

            if self._plot:
                plt.ion()
                self.plot_result(results)

            return results