예제 #1
0
def seasonal_adjustment(dataset):
    """This function seasonally adjusts all columns of a dataset using the X13 arima
    software provided publicly by the Census Bureau
    
    returns: a dataset of seasonally adjusted columns
    """
    for column in dataset.filter(dataset.columns[dataset.dtypes == 'float64']):
        try:
            res = x13_arima_analysis(dataset[column])
            dataset[column] = res.seasadj
        except:
            pass
    return dataset
예제 #2
0
파일: x13.py 프로젝트: wanhanwan/Packages
def seas_adj(series,
             holidayfile='chn',
             remove_spring=False,
             qoq=False,
             before=10,
             after=7):
    '''
    对数据进行季节性调整(X13-arima-seats)
    1. start:开始时点
    2. end:结束时点
    3. holidayfile:移动假日的参数信息文件路径
    4. remove_spring:是否直接合并1-2月信息
    5. qoq:是否对调整后数据求当月环比
    6. before:节假日中前before天异常
    7. after:节假日中后after天异常
    '''
    if holidayfile is None:
        d = x13_arima_analysis(series).seasadj
    elif holidayfile == 'chn':
        name = f'chn_{before}_{after}.dat'
        if not os.path.isfile(curr_path / name):
            val = get_spring_val(before, after)
            val = DataFrame(val, columns=['val'])
            val['year'] = val.index.year
            val['month'] = val.index.month
            val = val[['year', 'month', 'val']]['1970':]
            val.to_csv(curr_path / name, index=False, header=False, sep=' ')
        d = x13_arima_analysis(series, holidayfile=curr_path / name).seasadj
    else:
        d = x13_arima_analysis(series, holidayfile=holidayfile).seasadj
    if remove_spring:
        d = merge_spring_element(d)
    if qoq:
        d = np.log(d).diff()
    d.name = series.name
    return d.dropna().reindex(series.index, method='ffill')
예제 #3
0
    def deseason(self, dframe, method='stl', doplot=False):
        """ Compute and remove seasonal effects in the data.

        Parameters
        ----------
        dframe: pandas.DataFrame
            Pandas DataFrame with aggregations applied
        method: str
            Method for removing seasonal variations in the data. Acceptable
            values include: 
            * `stl` : (Default) Use `statsmodels.tsa.seasonal.STL` method
            * `x13` : Use US Census Bureau X-13ARIMA-SEATS software (see note 2)
            * `None`: Return the raw aggregated data
        
        Returns
        -------
        Pandas.DataFrame with seasonal affects removed as best as possible

        Notes
        -----
        1. It's best to supply as much data as possible to this method
        2. When using `method='x13'` the data must be aggregated either monthly
           (`agg='M'`) or quarterly (`agg='Q'`). This method also requires 
           installing the X-13ARIMA-SEATS software and the `statsmodels` python
           module.
        """
        # Do nothing if method is None
        if method is None:
            return dframe

        # Remove seasonal affects in the data
        for col in dframe.columns:
            
            # Interface to the US Census Bureau seasonal adjustment software
            if method.lower() == 'x13':
                results = x13_arima_analysis(dframe[col], trading=False)
                dframe[col] = results.trend
                if doplot:
                    results.plot()
            # Interface to 'statsmodels.tsa.seasonal.STL'
            elif method.lower() == 'stl':
                results = STL(dframe[col], robust=False, seasonal=3).fit()
                dframe[col] = dframe[col]-results.seasonal

                if doplot:
                    results.plot()
        
        return dframe
예제 #4
0
def test_x13_arima_plot(dataset):
    res = x13_arima_analysis(dataset)
    res.plot()
예제 #5
0
#manipulate data

df.dropna(inplace=True)
df['Status_ClosedDate'] = df['Status_ClosedDate'].apply(
    lambda x: pd.to_datetime(x))
df.set_index('Status_ClosedDate', inplace=True)
df2 = df.resample('MS').sum()
df2 = df2.loc['2013-01-01T00:00:00.000000000':]
df2.plot()

#X13 seasonal decomposition

from statsmodels.tsa.x13 import x13_arima_analysis

output = x13_arima_analysis(df2['Loan_LoanWith'])

df2['trend'] = output.trend
df2['seasadj'] = output.seasadj
df2['irregular'] = output.irregular
df2['seasonal'] = df2['Loan_LoanWith'] - df2['seasadj']
df2['seasadj_irr'] = df2['seasadj'] - df2['irregular']
df2['seasadj_log'] = df2['seasadj_irr'].apply(lambda x: np.log(x))  #log-series

df2['seasonal'].plot(legend='seasonal')
df2['trend'].plot(legend='trend')
df2['seasadj'].plot(legend='seasadj')
df2['irregular'].plot(legend='irregular')
df2['seasadj_irr'].plot(legend='fully adjusted')
df2['seasadj_log'].plot()  # 1st difference model in order to eliminate trend
예제 #6
0
def plot_tsa():
    thp_list = pickle.load(
        open('two_minutes_spaced_january_throughput_data.p', 'rb'))

    # result = seasonal_decompose(
    # 	thp_list,
    # 	model='additive',
    # 	freq=4260,
    # )

    result = decompose(thp_list, period=30)

    if False:
        '''
		Nu merge ca vrea quarterly sau monthly data.
		'''

        t = 1578936840000

        thp_dict = dict()

        for val in thp_list:
            thp_dict[pd.Timestamp(t)] = val
            t += 120000

        result = x13_arima_analysis(
            pd.Series(thp_dict, name="Thp"),
            x12path='/home/mircea/Downloads/x13asall_V1.1_B39/x13as')

    preproc_for_plot_func=lambda arr:\
     tuple(\
      filter(\
       lambda e: str(e[1]) != 'nan',\
       enumerate(arr)\
      )\
     )

    a_func = lambda arr, ind: tuple(map(lambda p: p[ind], arr))

    trend_iterable = preproc_for_plot_func(result.trend)

    seasonal_iterable = preproc_for_plot_func(result.seasonal)

    resid_iterable = preproc_for_plot_func(result.resid)

    if False:

        plt.plot(range(len(thp_list)), thp_list, label='Original')

        plt.plot(a_func(trend_iterable, 0),
                 a_func(trend_iterable, 1),
                 label='Trend')

        plt.plot(a_func(seasonal_iterable, 0),
                 a_func(seasonal_iterable, 1),
                 label='Seasonal')

        plt.plot(a_func(resid_iterable, 0),
                 a_func(resid_iterable, 1),
                 label='Residual')

        plt.legend()

        plt.show()

    if True:
        import matplotlib
        font = {'family': 'normal', 'weight': 'bold', 'size': 22}
        matplotlib.rc('font', **font)
        import matplotlib.ticker as ticker

        b_func = lambda arr: tuple(\
         map( lambda p: ( 0.001388888888888889 * p[0] , p[1] , ) , arr ) )

        trend_iterable = b_func(trend_iterable)

        seasonal_iterable = b_func(seasonal_iterable)

        resid_iterable = b_func(resid_iterable)

        c_func = lambda arr, f: f(map(lambda e: e[1], arr))
        print('Original: [ ' + str(min(thp_list)) + ' , ' +
              str(max(thp_list)) + ' ] ')
        print('Trend: [ ' + str(c_func(trend_iterable, min)) + ' , ' +
              str(c_func(trend_iterable, max)) + ' ] ')
        print('Seasonal: [ ' + str(c_func(seasonal_iterable, min)) + ' , ' +
              str(c_func(seasonal_iterable, max)) + ' ] ')
        print('Residual: [ ' + str(c_func(resid_iterable, min)) + ' , ' +
              str(c_func(resid_iterable, max)) + ' ] ')

        if True:
            max_a = max(
                map(lambda e: 0.001388888888888889 * e, range(len(thp_list))))

        ax = plt.subplot(411)
        ax.xaxis.set_major_locator(ticker.MultipleLocator(1))
        ax.xaxis.set_minor_locator(
            ticker.MultipleLocator(0.041666666666666664))
        plt.plot(tuple(
            map(lambda e: 0.001388888888888889 * e, range(len(thp_list)))),
                 thp_list,
                 label='Throughput')
        plt.plot((0, 0), (min(thp_list), max(thp_list)), 'r-')
        plt.plot((max_a, max_a), (min(thp_list), max(thp_list)), 'r-')
        plt.plot()
        plt.ylabel('MB/s')
        plt.legend()

        ax = plt.subplot(412)
        ax.xaxis.set_major_locator(ticker.MultipleLocator(1))
        ax.xaxis.set_minor_locator(
            ticker.MultipleLocator(0.041666666666666664))
        plt.plot(a_func(trend_iterable, 0),
                 a_func(trend_iterable, 1),
                 label='Trend')
        plt.plot(
            (0, 0),
            (min(a_func(trend_iterable, 1)), max(a_func(trend_iterable, 1))),
            'r-')
        plt.plot(
            (max_a, max_a),
            (min(a_func(trend_iterable, 1)), max(a_func(trend_iterable, 1))),
            'r-')
        plt.ylabel('MB/s')
        plt.legend()

        ax = plt.subplot(413)
        ax.xaxis.set_major_locator(ticker.MultipleLocator(1))
        ax.xaxis.set_minor_locator(
            ticker.MultipleLocator(0.041666666666666664))
        plt.plot(a_func(seasonal_iterable, 0),
                 a_func(seasonal_iterable, 1),
                 label='Seasonal')
        plt.plot((0, 0), (min(a_func(
            seasonal_iterable, 1)), max(a_func(seasonal_iterable, 1))), 'r-')
        plt.plot((max_a, max_a), (min(a_func(
            seasonal_iterable, 1)), max(a_func(seasonal_iterable, 1))), 'r-')
        plt.ylabel('MB/s')
        plt.legend()

        ax = plt.subplot(414)
        ax.xaxis.set_major_locator(ticker.MultipleLocator(1))
        ax.xaxis.set_minor_locator(
            ticker.MultipleLocator(0.041666666666666664))
        plt.plot(a_func(resid_iterable, 0),
                 a_func(resid_iterable, 1),
                 label='Residual')
        plt.plot(
            (0, 0),
            (min(a_func(resid_iterable, 1)), max(a_func(resid_iterable, 1))),
            'r-')
        plt.plot(
            (max_a, max_a),
            (min(a_func(resid_iterable, 1)), max(a_func(resid_iterable, 1))),
            'r-')
        time = 0
        if False:
            while time - 1 < max_a:
                plt.plot((time, time), (min(a_func(
                    resid_iterable, 1)), max(a_func(resid_iterable, 1))), 'r-')
                time += 1
        plt.xlabel('Time in Days')
        plt.ylabel('MB/s')
        plt.legend()

        plt.show()
예제 #7
0
data = sp500.join(
    [M1, CP, IP, PP, AAA, BAA, CD6, CD3, CD1, T120, T60, T12, T6, T3, T1],
    how='outer')
data.columns = [
    'SP', 'DIV', 'R', 'DY', 'M1', 'CP', 'IP', 'PP', 'AAA', 'BAA', 'CD6', 'CD3',
    'CD1', 'T120', 'T60', 'T12', 'T6', 'T3', 'T1'
]

# In[403]:

data = data.truncate('1976-02-01', '1999-12-01')

# Seasonally adjust desired columns
x12path = '/Users/andrewpalmer/Downloads/x13assrc_V1.1_B39/x13as'
for column in data.drop(['DIV', 'T1', 'SP', 'DY'], axis=1).columns:
    data[column] = x13.x13_arima_analysis(data[column],
                                          x12path=x12path).seasadj
    data[column].plot(title=column)
    plt.show()

# In[406]:

# Create spreads and other calculated features
data['T1H'] = data.T1.div(12)
data['ER'] = data['R'] - data['T1H'].shift(-1)
data['ER'] = data['ER'].shift(1)

data['TE1'] = data['T120'] - data['T1']
data['TE2'] = data['T120'] - data['T3']
data['TE3'] = data['T120'] - data['T6']
data['TE4'] = data['T120'] - data['T12']
data['TE5'] = data['T3'] - data['T1']