def seasonal_adjustment(dataset): """This function seasonally adjusts all columns of a dataset using the X13 arima software provided publicly by the Census Bureau returns: a dataset of seasonally adjusted columns """ for column in dataset.filter(dataset.columns[dataset.dtypes == 'float64']): try: res = x13_arima_analysis(dataset[column]) dataset[column] = res.seasadj except: pass return dataset
def seas_adj(series, holidayfile='chn', remove_spring=False, qoq=False, before=10, after=7): ''' 对数据进行季节性调整(X13-arima-seats) 1. start:开始时点 2. end:结束时点 3. holidayfile:移动假日的参数信息文件路径 4. remove_spring:是否直接合并1-2月信息 5. qoq:是否对调整后数据求当月环比 6. before:节假日中前before天异常 7. after:节假日中后after天异常 ''' if holidayfile is None: d = x13_arima_analysis(series).seasadj elif holidayfile == 'chn': name = f'chn_{before}_{after}.dat' if not os.path.isfile(curr_path / name): val = get_spring_val(before, after) val = DataFrame(val, columns=['val']) val['year'] = val.index.year val['month'] = val.index.month val = val[['year', 'month', 'val']]['1970':] val.to_csv(curr_path / name, index=False, header=False, sep=' ') d = x13_arima_analysis(series, holidayfile=curr_path / name).seasadj else: d = x13_arima_analysis(series, holidayfile=holidayfile).seasadj if remove_spring: d = merge_spring_element(d) if qoq: d = np.log(d).diff() d.name = series.name return d.dropna().reindex(series.index, method='ffill')
def deseason(self, dframe, method='stl', doplot=False): """ Compute and remove seasonal effects in the data. Parameters ---------- dframe: pandas.DataFrame Pandas DataFrame with aggregations applied method: str Method for removing seasonal variations in the data. Acceptable values include: * `stl` : (Default) Use `statsmodels.tsa.seasonal.STL` method * `x13` : Use US Census Bureau X-13ARIMA-SEATS software (see note 2) * `None`: Return the raw aggregated data Returns ------- Pandas.DataFrame with seasonal affects removed as best as possible Notes ----- 1. It's best to supply as much data as possible to this method 2. When using `method='x13'` the data must be aggregated either monthly (`agg='M'`) or quarterly (`agg='Q'`). This method also requires installing the X-13ARIMA-SEATS software and the `statsmodels` python module. """ # Do nothing if method is None if method is None: return dframe # Remove seasonal affects in the data for col in dframe.columns: # Interface to the US Census Bureau seasonal adjustment software if method.lower() == 'x13': results = x13_arima_analysis(dframe[col], trading=False) dframe[col] = results.trend if doplot: results.plot() # Interface to 'statsmodels.tsa.seasonal.STL' elif method.lower() == 'stl': results = STL(dframe[col], robust=False, seasonal=3).fit() dframe[col] = dframe[col]-results.seasonal if doplot: results.plot() return dframe
def test_x13_arima_plot(dataset): res = x13_arima_analysis(dataset) res.plot()
#manipulate data df.dropna(inplace=True) df['Status_ClosedDate'] = df['Status_ClosedDate'].apply( lambda x: pd.to_datetime(x)) df.set_index('Status_ClosedDate', inplace=True) df2 = df.resample('MS').sum() df2 = df2.loc['2013-01-01T00:00:00.000000000':] df2.plot() #X13 seasonal decomposition from statsmodels.tsa.x13 import x13_arima_analysis output = x13_arima_analysis(df2['Loan_LoanWith']) df2['trend'] = output.trend df2['seasadj'] = output.seasadj df2['irregular'] = output.irregular df2['seasonal'] = df2['Loan_LoanWith'] - df2['seasadj'] df2['seasadj_irr'] = df2['seasadj'] - df2['irregular'] df2['seasadj_log'] = df2['seasadj_irr'].apply(lambda x: np.log(x)) #log-series df2['seasonal'].plot(legend='seasonal') df2['trend'].plot(legend='trend') df2['seasadj'].plot(legend='seasadj') df2['irregular'].plot(legend='irregular') df2['seasadj_irr'].plot(legend='fully adjusted') df2['seasadj_log'].plot() # 1st difference model in order to eliminate trend
def plot_tsa(): thp_list = pickle.load( open('two_minutes_spaced_january_throughput_data.p', 'rb')) # result = seasonal_decompose( # thp_list, # model='additive', # freq=4260, # ) result = decompose(thp_list, period=30) if False: ''' Nu merge ca vrea quarterly sau monthly data. ''' t = 1578936840000 thp_dict = dict() for val in thp_list: thp_dict[pd.Timestamp(t)] = val t += 120000 result = x13_arima_analysis( pd.Series(thp_dict, name="Thp"), x12path='/home/mircea/Downloads/x13asall_V1.1_B39/x13as') preproc_for_plot_func=lambda arr:\ tuple(\ filter(\ lambda e: str(e[1]) != 'nan',\ enumerate(arr)\ )\ ) a_func = lambda arr, ind: tuple(map(lambda p: p[ind], arr)) trend_iterable = preproc_for_plot_func(result.trend) seasonal_iterable = preproc_for_plot_func(result.seasonal) resid_iterable = preproc_for_plot_func(result.resid) if False: plt.plot(range(len(thp_list)), thp_list, label='Original') plt.plot(a_func(trend_iterable, 0), a_func(trend_iterable, 1), label='Trend') plt.plot(a_func(seasonal_iterable, 0), a_func(seasonal_iterable, 1), label='Seasonal') plt.plot(a_func(resid_iterable, 0), a_func(resid_iterable, 1), label='Residual') plt.legend() plt.show() if True: import matplotlib font = {'family': 'normal', 'weight': 'bold', 'size': 22} matplotlib.rc('font', **font) import matplotlib.ticker as ticker b_func = lambda arr: tuple(\ map( lambda p: ( 0.001388888888888889 * p[0] , p[1] , ) , arr ) ) trend_iterable = b_func(trend_iterable) seasonal_iterable = b_func(seasonal_iterable) resid_iterable = b_func(resid_iterable) c_func = lambda arr, f: f(map(lambda e: e[1], arr)) print('Original: [ ' + str(min(thp_list)) + ' , ' + str(max(thp_list)) + ' ] ') print('Trend: [ ' + str(c_func(trend_iterable, min)) + ' , ' + str(c_func(trend_iterable, max)) + ' ] ') print('Seasonal: [ ' + str(c_func(seasonal_iterable, min)) + ' , ' + str(c_func(seasonal_iterable, max)) + ' ] ') print('Residual: [ ' + str(c_func(resid_iterable, min)) + ' , ' + str(c_func(resid_iterable, max)) + ' ] ') if True: max_a = max( map(lambda e: 0.001388888888888889 * e, range(len(thp_list)))) ax = plt.subplot(411) ax.xaxis.set_major_locator(ticker.MultipleLocator(1)) ax.xaxis.set_minor_locator( ticker.MultipleLocator(0.041666666666666664)) plt.plot(tuple( map(lambda e: 0.001388888888888889 * e, range(len(thp_list)))), thp_list, label='Throughput') plt.plot((0, 0), (min(thp_list), max(thp_list)), 'r-') plt.plot((max_a, max_a), (min(thp_list), max(thp_list)), 'r-') plt.plot() plt.ylabel('MB/s') plt.legend() ax = plt.subplot(412) ax.xaxis.set_major_locator(ticker.MultipleLocator(1)) ax.xaxis.set_minor_locator( ticker.MultipleLocator(0.041666666666666664)) plt.plot(a_func(trend_iterable, 0), a_func(trend_iterable, 1), label='Trend') plt.plot( (0, 0), (min(a_func(trend_iterable, 1)), max(a_func(trend_iterable, 1))), 'r-') plt.plot( (max_a, max_a), (min(a_func(trend_iterable, 1)), max(a_func(trend_iterable, 1))), 'r-') plt.ylabel('MB/s') plt.legend() ax = plt.subplot(413) ax.xaxis.set_major_locator(ticker.MultipleLocator(1)) ax.xaxis.set_minor_locator( ticker.MultipleLocator(0.041666666666666664)) plt.plot(a_func(seasonal_iterable, 0), a_func(seasonal_iterable, 1), label='Seasonal') plt.plot((0, 0), (min(a_func( seasonal_iterable, 1)), max(a_func(seasonal_iterable, 1))), 'r-') plt.plot((max_a, max_a), (min(a_func( seasonal_iterable, 1)), max(a_func(seasonal_iterable, 1))), 'r-') plt.ylabel('MB/s') plt.legend() ax = plt.subplot(414) ax.xaxis.set_major_locator(ticker.MultipleLocator(1)) ax.xaxis.set_minor_locator( ticker.MultipleLocator(0.041666666666666664)) plt.plot(a_func(resid_iterable, 0), a_func(resid_iterable, 1), label='Residual') plt.plot( (0, 0), (min(a_func(resid_iterable, 1)), max(a_func(resid_iterable, 1))), 'r-') plt.plot( (max_a, max_a), (min(a_func(resid_iterable, 1)), max(a_func(resid_iterable, 1))), 'r-') time = 0 if False: while time - 1 < max_a: plt.plot((time, time), (min(a_func( resid_iterable, 1)), max(a_func(resid_iterable, 1))), 'r-') time += 1 plt.xlabel('Time in Days') plt.ylabel('MB/s') plt.legend() plt.show()
data = sp500.join( [M1, CP, IP, PP, AAA, BAA, CD6, CD3, CD1, T120, T60, T12, T6, T3, T1], how='outer') data.columns = [ 'SP', 'DIV', 'R', 'DY', 'M1', 'CP', 'IP', 'PP', 'AAA', 'BAA', 'CD6', 'CD3', 'CD1', 'T120', 'T60', 'T12', 'T6', 'T3', 'T1' ] # In[403]: data = data.truncate('1976-02-01', '1999-12-01') # Seasonally adjust desired columns x12path = '/Users/andrewpalmer/Downloads/x13assrc_V1.1_B39/x13as' for column in data.drop(['DIV', 'T1', 'SP', 'DY'], axis=1).columns: data[column] = x13.x13_arima_analysis(data[column], x12path=x12path).seasadj data[column].plot(title=column) plt.show() # In[406]: # Create spreads and other calculated features data['T1H'] = data.T1.div(12) data['ER'] = data['R'] - data['T1H'].shift(-1) data['ER'] = data['ER'].shift(1) data['TE1'] = data['T120'] - data['T1'] data['TE2'] = data['T120'] - data['T3'] data['TE3'] = data['T120'] - data['T6'] data['TE4'] = data['T120'] - data['T12'] data['TE5'] = data['T3'] - data['T1']