def predict_ARMA(df_lrt, time_index, days_to_forecast):
    import pandas as pd
    import statsmodels.tsa.api as smt
    import numpy as np
    import sys, os

    # block printing  did not work. 
    # sys.stdout = open(os.devnull, 'w')

    df = df_lrt.iloc[(time_index - 30):time_index, :]
    prediction = np.empty(len(df.columns))

    i = 0
    ind = time_index
    for asset in df.columns:
        tmp = df[asset]
        try: 
            model = smt.ARMA(df[asset], (1,1))
            model_fit = model.fit() 
        except:
            model = smt.ARMA(df[asset], (0,0))
            model_fit = model.fit()
        prediction[i] = model_fit.forecast(steps = days_to_forecast)[0][-1]
        i+=1
    ## could be improved by implementing auto arma to find ar and ma values. 

    res = prediction
    res = (prediction - df.iloc[-1, :].values) / df.std(axis = 0) 

    #reenable printing
    # sys.stdout = sys.__stdout__

    return (res)
Beispiel #2
0
 def _gdp_vol_estimate(self, variables):
     """
     This function estimates conditional volatilities of GDP 
     growth
     """
     raw_vol = tsa.ARMA(variables['gdp'], order=(1, 0)).fit(disp=False).resid
     abs_vol = np.abs(raw_vol)
     filtered_vol = tsa.ARMA(abs_vol, order=(1, 0)).fit(disp=False)
     return filtered_vol.fittedvalues
 def analyse_ts_arma(self, data):
     ts = self.__logged_data(data).Close
     best_ic = np.inf
     best_order = None
     best_mdl = None
     rng = range(5)      # orders greater than 5 are not practically useful
     for i in rng:
         for j in rng:
             try:
                 tmp_mdl = smt.ARMA(ts, order=(i, j)).fit(
                         method='mle', trend='nc'
                         )
                 tmp_ic = tmp_mdl.bic    # using bic here
                 logn('ic={}, order=({}, {})'.format(tmp_ic, i, j))
                 if tmp_ic < best_ic:
                     best_ic = tmp_ic
                     best_order = (i, j)
                     best_mdl = tmp_mdl
             except: continue
     logn(best_mdl.summary())
     logn('using BIC', '='*20, sep='\n')
     logn('ic: {:6.5f} | estimated order: {}'.format(best_ic, best_order))
     logn('estimated alphas = {}'.format(best_mdl.arparams))
     logn('estimated betas = {}'.format(best_mdl.maparams))
     self.g.tsplot(best_mdl.resid,
                   lags=self.__n_lags,
                   saveas='ts_arma{}{}_residuals.png'.format(
                           best_order[0], best_order[1]
                           )
                   )
Beispiel #4
0
 def learn_model_params(self, train_values):
     self.__model = smt.ARMA(train_values, order=(0, self.__window_size))
     self.__model_fit = self.__model.fit(maxlag=self.__window_size,
                                         method='mle',
                                         trend='c',
                                         disp=-1)
     self.__params = self.__model_fit.params
def create_arma(_df, store_items, filepath, exceptional):
    extra_arr = []
    for sno, ino in store_items:
        df = _df[(_df.store_nbr == sno) & (_df.item_nbr == ino)].copy()
        df = df.set_index('date2', drop=False)
        df = df.sort_index()
        start = datetime.datetime.strptime("2012-01-01", "%Y-%m-%d")
        date_list = [
            start + relativedelta(days=x) for x in range(0, len(df.index))
        ]

        df['index'] = date_list
        df.set_index(['index'], inplace=True)
        df.index.name = None
        try:
            arma = tsa.ARMA(df.log1p, order=(1, 1))
            extra_arr.append(len(date_list))
            results = arma.fit()
            print(sno, ino, len(date_list))
        except:
            exceptional.append([sno, ino])
        else:
            out = open(filepath + str(sno) + '_' + str(ino) + '.pkl', 'wb')
            pickle.dump(results, out)
            out.close()

    # returns an array that contains number of dates it is trained on for each stationary item,store_no combo
    return extra_arr
Beispiel #6
0
def best_arma(df) :
    '''Find the Best ARMA model parameter for the returns'''
    columns = [column for column in df]
    a = [] # collect the arma order for the data
    
    for i in columns :
        best_aic = np.inf # Start point for AIC check, smallest aic wins
        best_order = None
        best_mdl = None
        
        rng = range(1, 5) # set the biggest number for ARMA(p,q)
        
        for j in rng :
            for k in rng :
                try :
                    tmp_mdl = smt.ARMA(df[i], order = (j, k)).fit(method = 'mle', trend = 'c')
                    tmp_aic = tmp_mdl.aic
                    print(tmp_aic)
                    
                    if tmp_aic < best_aic :
                        
                        best_aic = tmp_aic
                        best_order = (i, j, k)
                        best_mdl = tmp_mdl
                except: continue
        a += [best_order, best_mdl]
    
    return np.array(a)
def delay_ARMA_model(filename, lag, delay):
    value = get_input_output(filename)
    output_data = value[0]
    input_data = value[1]

    best_aic = np.inf
    best_order = None
    best_delay = None
    rng = range(lag)
    dl = range(delay)

    for d in dl:
        output_tmp = output_data[d:]
        input_tmp = input_data[:-(d)]
        for i in rng:
            for j in rng:
                try:
                    tmp_mdl = smt.ARMA(np.array(output_tmp),
                                       order=(i, j),
                                       exog=np.array(input_tmp)).fit(
                                           method='mle', trend='nc')
                    tmp_aic = tmp_mdl.aic
                    if tmp_aic < best_aic:
                        best_aic = tmp_aic
                        best_order = (i, j)
                        best_delay = d
                except:
                    continue

    print('aic: {:6.5f} | order: {}'.format(best_aic, best_order))
    print('delay', best_delay)
Beispiel #8
0
    def fit_ARMA(self, y, order_ar, order_ma, maxLag=30):
        '''fit autoregression moving average (ARMA) model. 
		this function does not estimate the best coefficients.

		Arguments:
		----------
		y 			:	numpy array with signal
		order_ar	:	order of autoregression (AR) linear model
		order_ma	:	order of moving average (MA) linear model
		maxlag 		:	max lag
		
		Return:
		----------
		mdl			: 	model object '''

        # if the mean of y is != 0, demean signal
        if int(np.mean(y) != 0):
            for t in range(len(y)):
                y[t] = y[t] - np.mean(y)

        u = np.random.randn(len(y), 2)

        mdl = smt.ARMA(y, order=(order_ar, order_ma)).fit(maxlag=maxLag,
                                                          method='mle',
                                                          trend='nc',
                                                          exog=u)
        print(mdl.summary())
        return mdl
Beispiel #9
0
    def bestfit_ARMA(self, y):
        '''find order for AR and MA models: < Akaike Information Criterion (AIC)
		the signal must be casual, stationary and invertible'''

        best_aic = np.inf
        best_order = None
        best_mdl = None
        u = np.random.randn(len(y), 2)

        rng = range(5)
        for i in rng:
            for j in rng:
                try:
                    tmp_mdl = smt.ARMA(y, order=(i, j)).fit(method='mle',
                                                            trend='nc',
                                                            exog=u)
                    tmp_aic = tmp_mdl.aic
                    if tmp_aic < best_aic:
                        best_aic = tmp_aic
                        best_order = (i, j)
                        best_mdl = tmp_mdl
                except:
                    continue

        print('aic: {:6.5f} | order: {}'.format(best_aic, best_order))
        print best_mdl.summary()
        return best_mdl
Beispiel #10
0
    def fit_ARMAX(self, y, order_ar, order_ma, maxLag=30):
        '''fit autoregression moving average (ARMA) model
		NB: NEEDS FIXING..

		Arguments:
		---------
		order_ar	:	order of autoregression (AR) linear model
		order_ma	: 	order of moving average (MA) linear model
		maxlag 		:	maximim lag

		Return:
		---------
		mdl 		: 	model object '''

        if int(np.mean(y) != 0):
            for t in range(len(y)):
                y[t] = y[t] - np.mean(y)

        u = np.random.randn(len(y), 2)

        mdl = smt.ARMA(y, order=(order_ar, order_ma)).fit(maxlag=maxLag,
                                                          method='mle',
                                                          trend='nc',
                                                          exog=u)
        print(mdl.summary())
        return mdl
Beispiel #11
0
def ARMA_Predict(df, order, symbols, t = 30) :
    predict = list(0 for i in range(len(symbols)))
    for i in range(len(symbols)) :
        model = smt.ARMA(df[symbols[i]], order = (order[i][0], order[i][1])).fit(method = 'mle', trend = 'c')
        predict[i] = model.forecast(steps = t)[0].reshape((t, 1))
        predict[i] = np.mean(predict[i])
    
    return predict
Beispiel #12
0
def ARMA_fit(endogenous, order, exog=None, trend='c'):
    '''
    例如:ARMA_fit(y,[3,0,1])
    使用results.summary()打印结果,或者使用results.params或者 .resid/.pvalues
    :param endogenous:
    :param order:
    :param trend:
    :return:
    '''
    return tsa.ARMA(endogenous, order, exog=exog).fit(trend=trend)
Beispiel #13
0
def ma_model(code='399001'):
    Y = get_stock_data(code)

    max_lag = 30

    result = smt.ARMA(Y.values, order=(0, 3)).fit(maxlag=max_lag,
                                                  method='mle',
                                                  trend='nc')
    print(result.summary())
    resid = pd.Series(result.resid, index=Y.index)
    ts_plot(resid, lags=max_lag, title=code + 'MA拟合残差')
Beispiel #14
0
 def _getStartingVals(self):
     if self._data is not None:
         if self._include_constant:
             c = 'c'
         else:
             c = 'nc'
         try:
             model = sm.ARMA(
                 self._data.values,
                 (self._order['AR'], self._order['MA'])).fit(trend=c)
             self._startingValues = model.params
         except ValueError:
             self._startingValues = None
     else:
         self._startingValues = np.zeros((self._pnum, )) + 0.5
 def fit_arma_model_and_estimate_order(
         self, data, order=(0, 1), maxlag=None,
         method='mle', trend='nc', burnin=0
         ):
     if maxlag is None:
         maxlag = self.__n_lags
     log('Fitting & estimating the ARMA model to the given data...')
     mdl = smt.ARMA(data, order=order).fit(
             maxlag=maxlag,
             method=method,
             trend=trend,
             burnin=burnin
             )
     logn('[Done]')
     logn(mdl.summary())
     return mdl.arparams, mdl.k_ar, mdl.maparams, mdl.k_ma
Beispiel #16
0
    def _gdp_vol_estimate(self, variables):
        """
        This function estimates long-run volatilities of GDP growht. To esimate
        such long-run volatility, I use a 2-sided rolling window to filter raw
        volatility.

        Returns:
        --------

        rolling_vol: pd.Series(float)
            The rolling estimate of volatility
        """
        raw_vol = tsa.ARMA(variables['gdp'], order=(1, 0)).fit(disp=False).resid
        abs_vol = np.abs(raw_vol)
        cycle, trend = sm_filters.tsa.filters.hpfilter(abs_vol)
        return trend
 def analyse_arma_p_q_best_ic(self, p=1, q=1):
     n = 5000
     burns = 2000
     a, b, rts = self.get_sample_data(
             m=SerialCorrelation.ModelType.arma, p=p, q=q, n=n, b=burns
             )
     self.g.tsplot(rts,
                   lags=self.__n_lags,
                   saveas='arma{}{}.png'.format(p, q)
                   )
     # pick best order by minimum ic - aic or bic
     # smallest ic value wins
     best_ic = np.inf
     best_order = None
     best_mdl = None
     rng = range(5)
     for i in rng:
         for j in rng:
             try:
                 tmp_mdl = smt.ARMA(rts, order=(i, j)).fit(
                         method='mle', trend='nc'
                         )
                 tmp_ic = tmp_mdl.bic    # using bic here
                 if tmp_ic < best_ic:
                     best_ic = tmp_ic
                     best_order = (i, j)
                     best_mdl = tmp_mdl
             except: continue
     logn(best_mdl.summary())
     logn('using BIC', '='*20, sep='\n')
     logn('true order: ({}, {})'.format(p, q))
     logn('true alphas = {}'.format(a))
     logn('true betas = {}'.format(b))
     logn('ic: {:6.5f} | estimated order: {}'.format(best_ic, best_order))
     logn('estimated alphas = {}'.format(best_mdl.arparams))
     logn('estimated betas = {}'.format(best_mdl.maparams))
     # analysing the model residuals with the estimated information
     # the residuals should be a white noise process with no serial
     # correlation for any lag, if this is the case then we can say
     # that the best model has been fit to explain the data
     self.g.tsplot(best_mdl.resid,
                   lags=self.__n_lags,
                   saveas='arma{}{}_residuals.png'.format(
                           best_order[0], best_order[1]
                           )
                   )
Beispiel #18
0
 def getbestar(self, data, symbol):
     max_lag = 30
     mdl = smt.AR(data[symbol]).fit(maxlag=max_lag, ic='aic', trend='nc')
     best_order = smt.AR(data[symbol]).select_order(maxlag=max_lag,
                                                    ic='aic',
                                                    trend='nc')
     self.label_dikiful_2.setText(
         'best estimated lag order = {}'.format(best_order))
     max_lag = best_order
     Y = data[symbol]
     best_mdl = smt.ARMA(Y, order=(0, 3)).fit(maxlag=max_lag,
                                              method='mle',
                                              trend='nc')
     if self.checkBox_forecast.isChecked():
         self.forecast(data, symbol, best_mdl,
                       int(self.sdiffspinBox_2.text()))
     elif not self.checkBox_forecast.isChecked():
         self.tsplot(best_mdl.resid, symbol)
Beispiel #19
0
def MA():
    n = int(1000)

    # set the AR(p) alphas equal to 0
    alphas = np.array([0.])
    betas = np.array([0.6])

    # add zero-lag and negate alphas
    ar = np.r_[1, -alphas]
    ma = np.r_[1, betas]

    ma1 = smt.arma_generate_sample(ar=ar, ma=ma, nsample=n)
    _ = tsplot(ma1, lags=30)

    max_lag = 30
    mdl = smt.ARMA(ma1, order=(0, 1)).fit(maxlag=max_lag,
                                          method='mle',
                                          trend='nc')
    print(mdl.summary())
Beispiel #20
0
    def _transform_arma(self, variables, order):
        """
        uses an arma(p, 0, q) model to compute vol

        Paramters:
        -----------
        variables: pd.DataFrame(float)
            The (log) realized volatility.

        order: tuple(int, int)
            The p and q terms for the ARMA(p,q) process

        Returns:
        --------
        res.fittedvalues pd.series(float)
            The fitted values of the arma model
        """
        res = tsa.ARMA(variables['vol'], order=order).fit(disp=False)
        return res.fittedvalues
def _get_best_arma(TS):
    best_aic = np.inf
    best_order = None
    best_mdl = None

    for i in range(5):
        for j in range(5):
            try:
                tmp_mdl = smt.ARMA(TS, order=(i, j)).fit(method='mle',
                                                         trend='nc',
                                                         freq=freq)
                tmp_aic = tmp_mdl.aic
                if tmp_aic < best_aic:
                    best_aic = tmp_aic
                    best_order = (i, j)
                    best_mdl = tmp_mdl
            except:
                continue
    p('aic: {:6.5f} | order: {}'.format(best_aic, best_order))
    return best_aic, best_order, best_mdl
Beispiel #22
0
def arima_pred(df, arima_inds):
    arima_preds = df.copy()
    for col in df.columns:

        arr = df[col].dropna().values

        arma = tsa.ARMA(arr, order=(0, 1, 0))
        results = arma.fit()
        res = results.predict(0, len(arr) - 1)

        for item, ind in zip(res, arima_preds.index):
            if np.isnan(arima_preds[col].loc[ind]):
                continue
            arima_preds[col].loc[ind] = item

        ts = arima_preds[col]
        mu = ts.mean()
        sigma = ts.std()
        for idx in ts.index:
            if ts.loc[idx] > 3 * sigma + mu:  # could be changed to 2*sigma + mu
                arima_inds[col].loc[idx] = 1

    return arima_inds
def evaluate_ARMA_models(y, AR_lim, MA_lim):
    orders, aics = list(), list()
    for i in range(1, AR_lim + 1):
        for j in range(0, MA_lim + 1):
            try:
                mdlt = smt.ARMA(y, order=(i, j)).fit(method='mle',
                                                     trend='nc',
                                                     disp=-1)
                orders.append((i, j))
                aics.append(mdlt.aic)
            except ValueError:
                print(i, j, ' not convergent.')
            except np.linalg.LinAlgError as e:
                if "SVD did not converge" in str(e):
                    print(i, j, ' not convergent.')
                else:
                    raise
    assert len(orders) == len(aics), 'Error: Different lengths.'
    print('Number of convergent models = ', len(orders))
    aics, orders = zip(*sorted(zip(aics, orders)))
    for a, b in zip(aics[:5], orders[:5]):
        print(a, b)
    return orders
Beispiel #24
0
def MA3():

    import os
    import sys

    import pandas as pd
    import pandas_datareader.data as web
    import numpy as np

    import statsmodels.formula.api as smf
    import statsmodels.tsa.api as smt
    import statsmodels.api as sm
    import scipy.stats as scs
    from arch import arch_model

    import matplotlib.pyplot as plt
    import matplotlib as mpl
    get_ipython().magic('matplotlib inline')
    p = print

    end = '2017-01-01'
    start = '2008-01-01'
    get_px = lambda x: web.DataReader(x, 'yahoo', start=start, end=end)[
        'Adj Close']

    symbols = ['SPY', 'TLT', 'MSFT']
    # raw adjusted close prices
    data = pd.DataFrame({sym: get_px(sym) for sym in symbols})
    # log returns
    lrets = np.log(data / data.shift(1)).dropna()

    def tsplot(y, lags=None, figsize=(10, 8), style='bmh'):
        if not isinstance(y, pd.Series):
            y = pd.Series(y)
        with plt.style.context(style):
            fig = plt.figure(figsize=figsize)
            #mpl.rcParams['font.family'] = 'Ubuntu Mono'
            layout = (3, 2)
            ts_ax = plt.subplot2grid(layout, (0, 0), colspan=2)
            acf_ax = plt.subplot2grid(layout, (1, 0))
            pacf_ax = plt.subplot2grid(layout, (1, 1))
            qq_ax = plt.subplot2grid(layout, (2, 0))
            pp_ax = plt.subplot2grid(layout, (2, 1))

            y.plot(ax=ts_ax)
            ts_ax.set_title('Time Series Analysis Plots')
            smt.graphics.plot_acf(y, lags=lags, ax=acf_ax, alpha=0.5)
            smt.graphics.plot_pacf(y, lags=lags, ax=pacf_ax, alpha=0.5)
            sm.qqplot(y, line='s', ax=qq_ax)
            qq_ax.set_title('QQ Plot')
            scs.probplot(y, sparams=(y.mean(), y.std()), plot=pp_ax)

            plt.tight_layout()
        return

    # Fit MA(3) to SPY returns

    max_lag = 30
    Y = lrets.SPY
    mdl = smt.ARMA(Y, order=(0, 3)).fit(maxlag=max_lag,
                                        method='mle',
                                        trend='nc')
    p(mdl.summary())
    _ = tsplot(mdl.resid, lags=max_lag)
Beispiel #25
0
ibm_df.Close_Price.plot()

# Plot ACF and PACF
ibm_df = ibm_df.dropna()
plot_acf(ibm_df.Close_Price, lags=50)
plot_pacf(ibm_df.Close_Price, lags=50)

# QQ plot and probability plot
sm.qqplot(ibm_df['Close_Price'], line='s')

# Optimize ARMA parameters
aicVal = []
for ari in range(1, 3):
    for maj in range(0, 3):
        arma_obj = smtsa.ARMA(ibm_df.Close_Price.tolist(),
                              order=(ari, maj)).fit(maxlag=30,
                                                    method='mle',
                                                    trend='nc')
        aicVal.append([ari, maj, arma_obj.aic])

arma_obj_fin = smtsa.ARMA(ibm_df.Close_Price.tolist(),
                          order=(1, 0)).fit(maxlag=30,
                                            method='mle',
                                            trend='nc')
ibm_df['ARMA'] = arma_obj_fin.predict()
arma_obj_fin.summary()

# Plot the curves
f, axarr = plt.subplots(1, sharex=True)
f.set_size_inches(5.5, 5.5)
ibm_df['Close_Price'].iloc[1:].plot(color='b', linestyle='-', ax=axarr)
ibm_df['ARMA'].iloc[1:].plot(color='r', linestyle='--', ax=axarr)
Beispiel #26
0
# Generate MA(1) dataset
ar = np.r_[1, -0]
ma = np.r_[1, 0.7]
ma1_data = smtsa.arma_generate_sample(ar=ar, ma=ma, nsample=n)
plotds(ma1_data)

# Generate MA(2) dataset
ar = np.r_[1, -0]
ma = np.r_[1, 0.6, 0.7]
ma2_data = smtsa.arma_generate_sample(ar=ar, ma=ma, nsample=n)
plotds(ma2_data)

# Generate MA(3) dataset
ar = np.r_[1, -0]
ma = np.r_[1, 0.6, 0.7, 0.5]
ma3_data = smtsa.arma_generate_sample(ar=ar, ma=ma, nsample=n)
plotds(ma3_data)

# Build MA(1) model
ma1 = smtsa.ARMA(ma1_data.tolist(), order=(0, 1)).fit(maxlag=30,
                                                      method="mle",
                                                      trend="nc")
ma1.summary()

# Build MA(3) model
ma3 = smtsa.ARMA(ma3_data.tolist(), order=(0, 3)).fit(maxlag=30,
                                                      method="mle",
                                                      trend="nc")
ma3.summary()
Beispiel #27
0
mean1, mean2 = djia_df.iloc[:125].Close.mean(), djia_df.iloc[125:].Close.mean()
var1, var2 = djia_df.iloc[:125].Close.var(), djia_df.iloc[125:].Close.var()
print('mean1=%f, mean2=%f' % (mean1, mean2))
print('variance1=%f, variance2=%f' % (var1, var2))

# ADF Test
from statsmodels.tsa.stattools import adfuller
adf_result = adfuller(djia_df.Close.tolist())
print('ADF Statistic: %f' % adf_result[0])
print('p-value: %f' % adf_result[1])

# QQ plot and probability plot
sm.qqplot(djia_df['Close'], line='s')

# Optimize ARMA parameters (Will return a non-stationary error)
arma_obj = smtsa.ARMA(djia_df['Close'].tolist(),
                      order=(1, 1)).fit(maxlag=30, method='mle', trend='nc')

#Let us plot the original time series and first-differences
first_order_diff = djia_df['Close'].diff(1).dropna()
fig, ax = plt.subplots(2, sharex=True)
fig.set_size_inches(5.5, 5.5)
djia_df['Close'].plot(ax=ax[0], color='b')
ax[0].set_title('Close values of DJIA during Jan 2016-Dec 2016')
first_order_diff.plot(ax=ax[1], color='r')
ax[1].set_title('First-order differences of DJIA during Jan 2016-Dec 2016')

# plot signal
plotds(first_order_diff, nlag=50)
adf_result = adfuller(first_order_diff)
print('ADF Statistic: %f' % adf_result[0])
print('p-value: %f' % adf_result[1])
plot_pacf(dado_ar3['Preco']);

# %% [markdown]
# ## Ajuste dos modelos AR

# %%
dado_ar1_p1 = dado_ar1[0:500]
dado_ar1_p2 = dado_ar1[500:600]


# %%
import statsmodels.tsa.api as smtsa 


# %%
modelo_ar1 = smtsa.ARMA(dado_ar1_p1['Preco'], order=(1, 0)).fit()


# %%
modelo_previsto1 = modelo_ar1.predict(start=500,end=599)


# %%
plt.figure(figsize=(12,8))
plt.plot(dado_ar1_p1['Data'],dado_ar1_p1['Preco'])
plt.plot(dado_ar1_p2['Data'],dado_ar1_p2['Preco'])
plt.plot(dado_ar1_p2['Data'], modelo_previsto1,'r.')


# %%
dado_ar3_p1 = dado_ar3[0:500]
Beispiel #29
0
def main(job_no, sex, chroms, rank, dir):
    start_time = time()
    if not os.path.exists(dir):
        os.makedirs(dir)
    LOGGER.log_file_path = dir + "/" + str(
        os.path.basename(__file__)) + job_no + ".log"  # change
    LOGGER.log_args()
    LOGGER.log_message(get_file_hexdigest(__file__),
                       label="Hex digest of script.".ljust(17))
    try:
        LOGGER.log_message(str(os.environ['CONDA_DEFAULT_ENV']),
                           label="Conda environment.".ljust(17))
    except KeyError:
        pass
    LOGGER.log_message('Name = ' + np.__name__ + ', version = ' +
                       np.__version__,
                       label="Imported module ")
    LOGGER.log_message('Name = ' + pd.__name__ + ', version = ' +
                       pd.__version__,
                       label="Imported module ")
    LOGGER.log_message('Name = ' + scipy.__name__ + ', version = ' +
                       scipy.__version__,
                       label="Imported module ")
    LOGGER.log_message('Name = ' + statsmodels.__name__ + ', version = ' +
                       statsmodels.__version__,
                       label="Imported module ")
    LOGGER.log_message('Name = ' + sklearn.__name__ + ', version = ' +
                       sklearn.__version__,
                       label="Imported module ")
    pd.set_option('display.max_columns', None)
    if chroms is None:
        chroms = np.arange(1, 23).astype(str).tolist()
    else:
        chroms = chroms.split(',')
    if rank:
        LOGGER.log_message("%1d" % rank,
                           label="Rank of model to select (best=0).".ljust(30))
    for chrom in chroms:
        csv_name = dir + '/recomb_table_all_sexes_ch' + chrom + '.csv'
        infile = open(csv_name, 'r')
        LOGGER.input_file(infile.name)
        infile.close()
        data_table = pd.read_csv(csv_name, sep=',', index_col=0)
        data_table = recombination.correct_missing_data(
            data_table, 'LOCF', sex)
        std_col = 'stdrate_' + sex
        std_rates = data_table[std_col].values
        variants_profiled = data_table.iloc[:, np.arange(5, 17)]
        variant_counts = variants_profiled.sum(axis=1)
        var_rates = variant_counts / 10000
        print('\n\nChromosome number   = ' + chrom)
        print('Avge. mutation rate = ', np.mean(var_rates))
        xvals = std_rates.reshape(-1, 1)
        lmodel = LinearRegression()
        lmodel.fit(xvals, var_rates)
        residuals = var_rates - lmodel.predict(xvals)
        sys.stdout.flush()
        print('Slope, intercept, mean of residuals = ',
              '%.8f' % lmodel.coef_[0], '%.8f' % lmodel.intercept_,
              '%.12f' % np.mean(residuals))
        orders = recombination.evaluate_ARMA_models(residuals, 10, 4)
        best_order = orders[rank]
        best_mdl = smt.ARMA(residuals, order=best_order).fit(method='mle',
                                                             trend='nc',
                                                             disp=0)
        print(best_mdl.summary())
        outfile_name = dir + '/ARMA_model_ch' + chrom + '_' + job_no + '.pklz'
        recombination.save_model_details(best_mdl, outfile_name)
        outfile = open(outfile_name, 'r')
        LOGGER.output_file(outfile.name)
        outfile.close()

    duration = time() - start_time
    LOGGER.log_message("%.2f" % (duration / 60.),
                       label="run duration (minutes)".ljust(30))
Beispiel #30
0
#
# ## Let's use a systematic approach to finding the order of AR and MA processes.

# In[23]:

# pick best order by aic
# smallest aic value wins
best_aic = np.inf
best_order = None
best_mdl = None

rng = range(5)
for i in rng:
    for j in rng:
        try:
            tmp_mdl = smt.ARMA(arma22, order=(i, j)).fit(method='mle',
                                                         trend='nc')
            tmp_aic = tmp_mdl.aic
            if tmp_aic < best_aic:
                best_aic = tmp_aic
                best_order = (i, j)
                best_mdl = tmp_mdl
        except:
            continue

print('aic: {:6.5f} | order: {}'.format(best_aic, best_order))

# ## We've correctly identified the order of the simulated process as ARMA(2,2).
#
# ### Lets use it for the sales time-series.
#