Terence Lim
License: MIT
"""
import os
import numpy as np
import scipy
import pandas as pd
from pandas import DataFrame, Series
import matplotlib.pyplot as plt
import seaborn as sns 
from finds.alfred import Alfred

from settings import settings
imgdir = os.path.join(settings['images'], 'ts')
alf = Alfred(api_key=settings['fred']['api_key'])

# proportion of failures likelihood test
def kupiecLR(s, n, var):
    """Kupiec LR test (S violations in N trials) of VaR"""
    p = 1 - var        # e.g. var95 is 0.95
    t = n - s          # number of non-violations
    num = np.log(1 - p)*(n - s) + np.log(p)*s
    den = np.log(1 - (s/n))*(n - s) + np.log(s/n)*s
    lr = -2 * (num - den)
    return {'lr': lr, 'pvalue': 1 - scipy.stats.chi2.cdf(lr, df=1)}

def pof(X, pred, var=0.95):
    """Kupiec proportion of failures VaR test"""
    Z = X / pred
    z = scipy.stats.norm.ppf(1 - var)
import os
import sklearn.feature_extraction, sklearn.decomposition
from wordcloud import WordCloud, STOPWORDS
import matplotlib.pyplot as plt
from matplotlib.ticker import (MultipleLocator, AutoMinorLocator)
from finds.unstructured import Unstructured
from finds.readers import fetch_fomc
from finds.database import MongoDB
from settings import settings
mongodb = MongoDB(**settings['mongodb'])
fomc = Unstructured(mongodb, 'FOMC')
logdir = os.path.join(settings['images'], 'fomc')

# retrieve recessions dates for plotting
from finds.alfred import Alfred
alf = Alfred(api_key=settings['fred']['api_key'])
usrec = alf('USREC')
usrec.index = pd.DatetimeIndex(usrec.index.astype(str), freq='infer')
g = usrec.astype(bool) | usrec.shift(-1, fill_value=0).astype(bool)
g = (g != g.shift(fill_value=0)).cumsum()[g].to_frame()
g = g.reset_index().groupby('USREC')['date'].agg(['first', 'last'])
vspans = [(v[0], v[1]) for k, v in g.iterrows()]

# Update FOMC Minutes
dates = fomc['minutes'].distinct('date')  # check dates stored
catalog = fetch_fomc()  # check for new dates in fomc site, and retrieve
print(
    f"FOMC: {len(catalog)} dates {min(catalog.keys())}-{max(catalog.keys())}")
docs = {d: fetch_fomc(url) for d, url in catalog.items() if d not in dates}
print('NEW:', ", ".join([f"{k}: {len(v)} chars " for k, v in docs.items()]))
License: MIT
"""
import numpy as np
import pandas as pd
from pandas import DataFrame, Series
import matplotlib.pyplot as plt
from pandas.api.types import is_list_like, is_numeric_dtype
import statsmodels.api as sm
from sklearn.metrics import mean_squared_error    
import time
import os
from finds.alfred import Alfred, fred_qd
from settings import settings
ECHO = False
imgdir = os.path.join(settings['images'], 'regression')
alf = Alfred(api_key=settings['fred']['api_key'], echo=ECHO)

# Get FRED-QD data
qdf, qt = fred_qd(202004)
df = qdf  #[list(qt.index[qt['factors']==1])]
t = qt['transform']
#df = df[qt[qt['factors']==1].index]
beg = 19620701
end = 20191231  # ignore 2020
freq = 'Q'   
transformed = []
for col in df.columns:
    transformed.append(alf.transform(df[col], tcode=t[col], freq=freq))
data = pd.concat(transformed, axis=1).iloc[2:]
c = list(data.columns)
data = data.loc[(data.index >= beg) & (data.index <= end)]
Beispiel #4
0
import numpy as np
import pandas as pd
from pandas import DataFrame, Series
import matplotlib.pyplot as plt
from copy import deepcopy
import os
import re
import time
from datetime import datetime
from finds.alfred import fred_md, Alfred
from finds.solve import lm, is_inlier
from settings import settings
imgdir = os.path.join(settings['images'], 'ts')

# Load and pre-process time series from FRED
alf = Alfred(api_key=settings['fred']['api_key'])
usrec = alf('USREC', freq='m')  # to indicate recession periods in the plots
usrec.index = pd.DatetimeIndex(usrec.index.astype(str), freq='infer')

g = usrec.astype(bool) | usrec.shift(-1, fill_value=0).astype(bool)
g = (g != g.shift(fill_value=0)).cumsum()[g].to_frame()
g = g.reset_index().groupby('USREC')['date'].agg(['first', 'last'])
vspans = [(v[0], v[1]) for k, v in g.iterrows()]

# Retrieve FRED-MD series
mdf, mt = fred_md(202104)  # from vintage April 2020
beg = 19600301
end = 20201231  # 20191231

# Apply tcode transformations, DatetimeIndex, and sample beg:end
df = mdf
Beispiel #5
0
Terence Lim
License: MIT
"""
import numpy as np
import pandas as pd
from pandas import DataFrame, Series
import matplotlib.pyplot as plt
from finds.alfred import Alfred, fred_md
import time
from datetime import datetime
import os
from settings import settings

ECHO = True
imgdir = os.path.join(settings['images'], 'ts')
alf = Alfred(api_key=settings['fred']['api_key'], echo=ECHO)
#savefile=settings['scratch'] + 'fred.md')

# Popular FRED series: top two pages
r = {}
for page in [1, 2]:  # scrape first two pages
    popular = Alfred.popular(page)
    for s in popular:
        t = alf.series(s)  # calls 'series' FRED api
        r.update({s: {} if t.empty else t.iloc[-1][['title', 'popularity']]})
DataFrame.from_dict(r, orient='index')

# Traversing categories tree
node = 0
while True:
    node = alf.get_category(node)
Terence Lim
License: MIT
"""
import numpy as np
import pandas as pd
from pandas import DataFrame, Series
import matplotlib.pyplot as plt
import time
from datetime import datetime
import os
import re
from finds.alfred import Alfred
from finds.busday import to_monthend
from settings import settings
imgdir = os.path.join(settings['images'], 'ts')
alf = Alfred(api_key=settings['fred']['api_key'])

# Term Structure of Interest Rates
# list of monthly Constant Maturity Treasury, excluding inflation-indexed
c = alf.get_category(115)  # Fed H.15 Selected Interest Rates
print(c['id'], c['name'])
t = Series({
    s['id']: s['title']
    for s in c['series']
    if s['frequency'] == 'Monthly' and 'Inflation' not in s['title']
})
print(t.to_latex())

# retrieve CMT yields, and infer maturity from label
b = pd.concat([alf(s, freq='M') for s in t.index], axis=1, join='inner')
b.columns = [
Beispiel #7
0
Terence Lim
License: MIT
"""
import numpy as np
import pandas as pd
from pandas import DataFrame, Series
import matplotlib.pyplot as plt
import time
import os
import seaborn as sns
from finds.alfred import Alfred

from settings import settings
imgdir = os.path.join(settings['images'], 'ts')
alf = Alfred(api_key=settings['fred']['api_key'],
             savefile=settings['scratch'] + 'fred.md')

# Seasonality
import scipy.signal
series_id = 'ND000334Q'  # real gdp
df = alf(series_id, log=1, diff=1, freq='Q').dropna()
x = df.values.flatten()  #y.iloc[:-240].copy()
fig, axes = plt.subplots(1, 2, num=1, clear=True, figsize=(10, 5))
axes[0].plot(pd.DatetimeIndex(df.index.astype(str), freq='infer'),
             x.cumsum(),
             marker=None)
axes[0].set_title(" ".join(
    alf.header(series_id, ['id', 'title', 'seasonal_adjustment']).to_list()),
                  fontsize=10)

freq, power = scipy.signal.welch(x - x.mean(), nperseg=4 * (len(x) // 4))
import random
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from hmmlearn import hmm
from sklearn.mixture import GaussianMixture
from sklearn.preprocessing import StandardScaler
from tqdm import tqdm
from finds.alfred import fred_md, fred_qd, Alfred
from finds.learning import hmm_summary
from settings import settings
imgdir = os.path.join(settings['images'], 'rnn')

# Load and pre-process time series from FRED
alf = Alfred(api_key=settings['fred']['api_key'])

# to indicate recession periods in the plots
usrec = alf('USREC', freq='m')
usrec.index = pd.DatetimeIndex(usrec.index.astype(str), freq='infer')
g = usrec.astype(bool) | usrec.shift(-1, fill_value=0).astype(bool)
g = (g != g.shift(fill_value=0)).cumsum()[g].to_frame()
g = g.reset_index().groupby('USREC')['date'].agg(['first', 'last'])
vspans = [(v[0], v[1]) for k, v in g.iterrows()]

# Retrieve FRED-MD series and apply tcode transformations
beg = 19600301
end = 20200131

df, t = fred_md(202004)  # from vintage April 2020
data = []
- risk components of bond index returns and interest rate indicators from FRED

Terence Lim
License: MIT
"""
import os
import re
import numpy as np
import pandas as pd
from pandas import DataFrame, Series
import matplotlib.pyplot as plt
import seaborn as sns
from finds.alfred import Alfred, marginalR2
from settings import settings
imgdir = os.path.join(settings['images'], 'ts')
alf = Alfred(api_key=settings['fred']['api_key'])

# Bond Return Components and Interest Rate Risk Indicators
# get Merrill Lynch bond indexes
c = alf.get_category(32413)
print(c['id'], c['name'])
t = Series({s['id']: s['title'] for s in c['series']})
t

bonds = [alf(s, start=19961231) for s in t.index]
b = pd.concat(bonds, axis=1)

# Show blocks of data available
v = b.notna().sum(axis=1).rename('count')
v = pd.concat([v, (v != v.shift()).cumsum().rename('notna')], axis=1)
g = v.reset_index().groupby(['notna', 'count'])['date'].agg(['first','last'])
Beispiel #10
0
Terence Lim
License: MIT
"""
import numpy as np
import pandas as pd
from pandas import DataFrame, Series
import matplotlib.pyplot as plt
import time
import os
import seaborn as sns
from finds.alfred import Alfred

from settings import settings
imgdir = os.path.join(settings['images'], 'ts')
alf = Alfred(api_key=settings['fred']['api_key'],
             savefile=settings['scratch'] + 'fred.md')

## Retrieve GDP quarterly series from FRED
s = 'GDPC1'
df = alf(s, log=1, freq='Q')
df.index = pd.DatetimeIndex(df.index.astype(str))
data = df.to_frame().diff().dropna()

## Run Linear Regression (lagged)
import statsmodels.formula.api as smf
model = smf.ols(formula='GDPC1 ~ GDPC1.shift(1)', data=data).fit()
print(model.summary())


# Convenience method to plot residuals and identify outliers
def plot_fitted(fitted,
Beispiel #11
0
plt.show()

# Plot of sentiment, change, cosine distribution in univ by year
"""
- visualize with economy-wide profitabality (CP Corporate Profits from FRED)
Align year as caldate, e.g. for caldate year 2019:
- filings from April 2019 to Mar 2020 (i.e. year = filing year-1 if month<=3)
- universe is as of year end prior to filing date
- economic time series is average annual value ending Dec 2019
- concurrent return year is Jan 2019 to Dec 2019
- next return year (i.e. lagged filings) is April 2020-Mar 2021
"""
#data = data.dropna(subset=['date'])

from finds.alfred import Alfred
al = Alfred(api_key=settings['fred']['api_key'])
series_id = 'CP'  # Corporate Profits
#series_id = 'UNRATE'
#series_id = 'WILL5000IND'
econ = al(series_id)
econ = econ.to_frame().assign(year=econ.index // 10000).groupby('year').mean()

for i, sent in enumerate(['mdasent', 'mdachg', 'mdacos']):
    g = data[data['currlen'].gt(500)].dropna().groupby('year')
    iq1, iq2, iq3 = [g[sent].quantile(p) for p in [.25, .5, .75]]
    y = iq2.index.astype(int)
    fig, ax = plt.subplots(1, 1, clear=True, num=1 + i, figsize=(9, 3))
    ax.plot(y, iq2, ls='-', color='C1')
    ax.fill_between(y, iq1, iq3, alpha=0.2, color='C1')
    ax.set_title(f"{sent.upper()} by Fiscal Year of 10-K Filing")
    ax.set_xlabel("Fiscal Year (previous year if month filed<=March)")
from datetime import datetime
import random
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import statsmodels.api as sm
from statsmodels.tsa.api import VAR
from finds.alfred import Alfred, fred_md, fred_qd
from finds.alfred import pcaEM, BaiNg, marginalR2
from finds.solve import integration_order, is_inlier
from finds.display import plot_bands

from settings import settings
imgdir = os.path.join(settings['images'], 'ts')
alf = Alfred(api_key=settings['fred']['api_key'])

# Transformation Codes, and Stationarity
qdf, qt = fred_qd(202004) # 202004      # from vintage April 2020
mdf, mt = fred_md(201505) # 201505

print('Number of time series and suggested transformations, by tcode:')
tcodes = pd.concat([Series(alf.tcode_[i], name=i).to_frame().T
                    for i in range(1,8)], axis=0).fillna(False)
tcodes = tcodes.join(qt['transform'].value_counts().rename('fred-qd'))\
               .join(mt['transform'].value_counts().rename('fred-md'))\
               .fillna(0).astype({'fred-qd': int, 'fred-md': int})

# Estimate and Compare Integration Order
out = {}
for label, df, t in [['md', mdf, mt], ['qd', qdf, qt]]: