Terence Lim License: MIT """ import os import numpy as np import scipy import pandas as pd from pandas import DataFrame, Series import matplotlib.pyplot as plt import seaborn as sns from finds.alfred import Alfred from settings import settings imgdir = os.path.join(settings['images'], 'ts') alf = Alfred(api_key=settings['fred']['api_key']) # proportion of failures likelihood test def kupiecLR(s, n, var): """Kupiec LR test (S violations in N trials) of VaR""" p = 1 - var # e.g. var95 is 0.95 t = n - s # number of non-violations num = np.log(1 - p)*(n - s) + np.log(p)*s den = np.log(1 - (s/n))*(n - s) + np.log(s/n)*s lr = -2 * (num - den) return {'lr': lr, 'pvalue': 1 - scipy.stats.chi2.cdf(lr, df=1)} def pof(X, pred, var=0.95): """Kupiec proportion of failures VaR test""" Z = X / pred z = scipy.stats.norm.ppf(1 - var)
import os import sklearn.feature_extraction, sklearn.decomposition from wordcloud import WordCloud, STOPWORDS import matplotlib.pyplot as plt from matplotlib.ticker import (MultipleLocator, AutoMinorLocator) from finds.unstructured import Unstructured from finds.readers import fetch_fomc from finds.database import MongoDB from settings import settings mongodb = MongoDB(**settings['mongodb']) fomc = Unstructured(mongodb, 'FOMC') logdir = os.path.join(settings['images'], 'fomc') # retrieve recessions dates for plotting from finds.alfred import Alfred alf = Alfred(api_key=settings['fred']['api_key']) usrec = alf('USREC') usrec.index = pd.DatetimeIndex(usrec.index.astype(str), freq='infer') g = usrec.astype(bool) | usrec.shift(-1, fill_value=0).astype(bool) g = (g != g.shift(fill_value=0)).cumsum()[g].to_frame() g = g.reset_index().groupby('USREC')['date'].agg(['first', 'last']) vspans = [(v[0], v[1]) for k, v in g.iterrows()] # Update FOMC Minutes dates = fomc['minutes'].distinct('date') # check dates stored catalog = fetch_fomc() # check for new dates in fomc site, and retrieve print( f"FOMC: {len(catalog)} dates {min(catalog.keys())}-{max(catalog.keys())}") docs = {d: fetch_fomc(url) for d, url in catalog.items() if d not in dates} print('NEW:', ", ".join([f"{k}: {len(v)} chars " for k, v in docs.items()]))
License: MIT """ import numpy as np import pandas as pd from pandas import DataFrame, Series import matplotlib.pyplot as plt from pandas.api.types import is_list_like, is_numeric_dtype import statsmodels.api as sm from sklearn.metrics import mean_squared_error import time import os from finds.alfred import Alfred, fred_qd from settings import settings ECHO = False imgdir = os.path.join(settings['images'], 'regression') alf = Alfred(api_key=settings['fred']['api_key'], echo=ECHO) # Get FRED-QD data qdf, qt = fred_qd(202004) df = qdf #[list(qt.index[qt['factors']==1])] t = qt['transform'] #df = df[qt[qt['factors']==1].index] beg = 19620701 end = 20191231 # ignore 2020 freq = 'Q' transformed = [] for col in df.columns: transformed.append(alf.transform(df[col], tcode=t[col], freq=freq)) data = pd.concat(transformed, axis=1).iloc[2:] c = list(data.columns) data = data.loc[(data.index >= beg) & (data.index <= end)]
import numpy as np import pandas as pd from pandas import DataFrame, Series import matplotlib.pyplot as plt from copy import deepcopy import os import re import time from datetime import datetime from finds.alfred import fred_md, Alfred from finds.solve import lm, is_inlier from settings import settings imgdir = os.path.join(settings['images'], 'ts') # Load and pre-process time series from FRED alf = Alfred(api_key=settings['fred']['api_key']) usrec = alf('USREC', freq='m') # to indicate recession periods in the plots usrec.index = pd.DatetimeIndex(usrec.index.astype(str), freq='infer') g = usrec.astype(bool) | usrec.shift(-1, fill_value=0).astype(bool) g = (g != g.shift(fill_value=0)).cumsum()[g].to_frame() g = g.reset_index().groupby('USREC')['date'].agg(['first', 'last']) vspans = [(v[0], v[1]) for k, v in g.iterrows()] # Retrieve FRED-MD series mdf, mt = fred_md(202104) # from vintage April 2020 beg = 19600301 end = 20201231 # 20191231 # Apply tcode transformations, DatetimeIndex, and sample beg:end df = mdf
Terence Lim License: MIT """ import numpy as np import pandas as pd from pandas import DataFrame, Series import matplotlib.pyplot as plt from finds.alfred import Alfred, fred_md import time from datetime import datetime import os from settings import settings ECHO = True imgdir = os.path.join(settings['images'], 'ts') alf = Alfred(api_key=settings['fred']['api_key'], echo=ECHO) #savefile=settings['scratch'] + 'fred.md') # Popular FRED series: top two pages r = {} for page in [1, 2]: # scrape first two pages popular = Alfred.popular(page) for s in popular: t = alf.series(s) # calls 'series' FRED api r.update({s: {} if t.empty else t.iloc[-1][['title', 'popularity']]}) DataFrame.from_dict(r, orient='index') # Traversing categories tree node = 0 while True: node = alf.get_category(node)
Terence Lim License: MIT """ import numpy as np import pandas as pd from pandas import DataFrame, Series import matplotlib.pyplot as plt import time from datetime import datetime import os import re from finds.alfred import Alfred from finds.busday import to_monthend from settings import settings imgdir = os.path.join(settings['images'], 'ts') alf = Alfred(api_key=settings['fred']['api_key']) # Term Structure of Interest Rates # list of monthly Constant Maturity Treasury, excluding inflation-indexed c = alf.get_category(115) # Fed H.15 Selected Interest Rates print(c['id'], c['name']) t = Series({ s['id']: s['title'] for s in c['series'] if s['frequency'] == 'Monthly' and 'Inflation' not in s['title'] }) print(t.to_latex()) # retrieve CMT yields, and infer maturity from label b = pd.concat([alf(s, freq='M') for s in t.index], axis=1, join='inner') b.columns = [
Terence Lim License: MIT """ import numpy as np import pandas as pd from pandas import DataFrame, Series import matplotlib.pyplot as plt import time import os import seaborn as sns from finds.alfred import Alfred from settings import settings imgdir = os.path.join(settings['images'], 'ts') alf = Alfred(api_key=settings['fred']['api_key'], savefile=settings['scratch'] + 'fred.md') # Seasonality import scipy.signal series_id = 'ND000334Q' # real gdp df = alf(series_id, log=1, diff=1, freq='Q').dropna() x = df.values.flatten() #y.iloc[:-240].copy() fig, axes = plt.subplots(1, 2, num=1, clear=True, figsize=(10, 5)) axes[0].plot(pd.DatetimeIndex(df.index.astype(str), freq='infer'), x.cumsum(), marker=None) axes[0].set_title(" ".join( alf.header(series_id, ['id', 'title', 'seasonal_adjustment']).to_list()), fontsize=10) freq, power = scipy.signal.welch(x - x.mean(), nperseg=4 * (len(x) // 4))
import random import torch import torch.nn as nn import torch.nn.functional as F import torch.optim as optim from hmmlearn import hmm from sklearn.mixture import GaussianMixture from sklearn.preprocessing import StandardScaler from tqdm import tqdm from finds.alfred import fred_md, fred_qd, Alfred from finds.learning import hmm_summary from settings import settings imgdir = os.path.join(settings['images'], 'rnn') # Load and pre-process time series from FRED alf = Alfred(api_key=settings['fred']['api_key']) # to indicate recession periods in the plots usrec = alf('USREC', freq='m') usrec.index = pd.DatetimeIndex(usrec.index.astype(str), freq='infer') g = usrec.astype(bool) | usrec.shift(-1, fill_value=0).astype(bool) g = (g != g.shift(fill_value=0)).cumsum()[g].to_frame() g = g.reset_index().groupby('USREC')['date'].agg(['first', 'last']) vspans = [(v[0], v[1]) for k, v in g.iterrows()] # Retrieve FRED-MD series and apply tcode transformations beg = 19600301 end = 20200131 df, t = fred_md(202004) # from vintage April 2020 data = []
- risk components of bond index returns and interest rate indicators from FRED Terence Lim License: MIT """ import os import re import numpy as np import pandas as pd from pandas import DataFrame, Series import matplotlib.pyplot as plt import seaborn as sns from finds.alfred import Alfred, marginalR2 from settings import settings imgdir = os.path.join(settings['images'], 'ts') alf = Alfred(api_key=settings['fred']['api_key']) # Bond Return Components and Interest Rate Risk Indicators # get Merrill Lynch bond indexes c = alf.get_category(32413) print(c['id'], c['name']) t = Series({s['id']: s['title'] for s in c['series']}) t bonds = [alf(s, start=19961231) for s in t.index] b = pd.concat(bonds, axis=1) # Show blocks of data available v = b.notna().sum(axis=1).rename('count') v = pd.concat([v, (v != v.shift()).cumsum().rename('notna')], axis=1) g = v.reset_index().groupby(['notna', 'count'])['date'].agg(['first','last'])
Terence Lim License: MIT """ import numpy as np import pandas as pd from pandas import DataFrame, Series import matplotlib.pyplot as plt import time import os import seaborn as sns from finds.alfred import Alfred from settings import settings imgdir = os.path.join(settings['images'], 'ts') alf = Alfred(api_key=settings['fred']['api_key'], savefile=settings['scratch'] + 'fred.md') ## Retrieve GDP quarterly series from FRED s = 'GDPC1' df = alf(s, log=1, freq='Q') df.index = pd.DatetimeIndex(df.index.astype(str)) data = df.to_frame().diff().dropna() ## Run Linear Regression (lagged) import statsmodels.formula.api as smf model = smf.ols(formula='GDPC1 ~ GDPC1.shift(1)', data=data).fit() print(model.summary()) # Convenience method to plot residuals and identify outliers def plot_fitted(fitted,
plt.show() # Plot of sentiment, change, cosine distribution in univ by year """ - visualize with economy-wide profitabality (CP Corporate Profits from FRED) Align year as caldate, e.g. for caldate year 2019: - filings from April 2019 to Mar 2020 (i.e. year = filing year-1 if month<=3) - universe is as of year end prior to filing date - economic time series is average annual value ending Dec 2019 - concurrent return year is Jan 2019 to Dec 2019 - next return year (i.e. lagged filings) is April 2020-Mar 2021 """ #data = data.dropna(subset=['date']) from finds.alfred import Alfred al = Alfred(api_key=settings['fred']['api_key']) series_id = 'CP' # Corporate Profits #series_id = 'UNRATE' #series_id = 'WILL5000IND' econ = al(series_id) econ = econ.to_frame().assign(year=econ.index // 10000).groupby('year').mean() for i, sent in enumerate(['mdasent', 'mdachg', 'mdacos']): g = data[data['currlen'].gt(500)].dropna().groupby('year') iq1, iq2, iq3 = [g[sent].quantile(p) for p in [.25, .5, .75]] y = iq2.index.astype(int) fig, ax = plt.subplots(1, 1, clear=True, num=1 + i, figsize=(9, 3)) ax.plot(y, iq2, ls='-', color='C1') ax.fill_between(y, iq1, iq3, alpha=0.2, color='C1') ax.set_title(f"{sent.upper()} by Fiscal Year of 10-K Filing") ax.set_xlabel("Fiscal Year (previous year if month filed<=March)")
from datetime import datetime import random import torch import torch.nn as nn import torch.nn.functional as F import torch.optim as optim import statsmodels.api as sm from statsmodels.tsa.api import VAR from finds.alfred import Alfred, fred_md, fred_qd from finds.alfred import pcaEM, BaiNg, marginalR2 from finds.solve import integration_order, is_inlier from finds.display import plot_bands from settings import settings imgdir = os.path.join(settings['images'], 'ts') alf = Alfred(api_key=settings['fred']['api_key']) # Transformation Codes, and Stationarity qdf, qt = fred_qd(202004) # 202004 # from vintage April 2020 mdf, mt = fred_md(201505) # 201505 print('Number of time series and suggested transformations, by tcode:') tcodes = pd.concat([Series(alf.tcode_[i], name=i).to_frame().T for i in range(1,8)], axis=0).fillna(False) tcodes = tcodes.join(qt['transform'].value_counts().rename('fred-qd'))\ .join(mt['transform'].value_counts().rename('fred-md'))\ .fillna(0).astype({'fred-qd': int, 'fred-md': int}) # Estimate and Compare Integration Order out = {} for label, df, t in [['md', mdf, mt], ['qd', qdf, qt]]: