class TestFred(unittest.TestCase): def setUp(self): self.fred = Fred() def testGetSeries(self): s = self.fred.get_series('SP500', observation_start='9/2/2014', observation_end='9/5/2014') self.assertEqual(s.ix['9/2/2014'], 2002.28) self.assertEqual(len(s), 4) info = self.fred.get_series_info('PAYEMS') self.assertEqual(info['title'], 'All Employees: Total nonfarm') # invalid series id self.assertRaises(ValueError, self.fred.get_series, 'invalid') self.assertRaises(ValueError, self.fred.get_series_info, 'invalid') # invalid parameter try: self.fred.get_series('SP500', observation_start='invalid-datetime-str') self.assertTrue(False, 'previous line should have thrown a ValueError') except ValueError: pass def tearDown(self): return
class TestFred(unittest.TestCase): def setUp(self): self.fred = Fred() def testGetSeries(self): s = self.fred.get_series('SP500', observation_start='9/2/2014', observation_end='9/5/2014') self.assertEqual(s.ix['9/2/2014'], 2002.28) self.assertEqual(len(s), 4) info = self.fred.get_series_info('PAYEMS') self.assertEqual(info['title'], 'All Employees: Total nonfarm') # invalid series id self.assertRaises(ValueError, self.fred.get_series, 'invalid') self.assertRaises(ValueError, self.fred.get_series_info, 'invalid') # invalid parameter try: self.fred.get_series('SP500', observation_start='invalid-datetime-str') self.assertTrue(False, 'previous line should have thrown a ValueError') except ValueError: pass def testSearch(self): personal_income_series = self.fred.search_by_release(175, limit=3, order_by='popularity', sort_order='desc') series_ids = ['PCPI06037', 'PCPI06075', 'PCPI24510'] for series_id in series_ids: self.assertTrue(series_id in personal_income_series.index) self.assertEqual(personal_income_series.ix[series_id, 'observation_start'], datetime(1969, 1, 1)) def tearDown(self): return
def Get_from_FRED(id=id, path=path): data = list() info = list() id_for_loop = id.copy() while id_for_loop: for i in id_for_loop: print(i, 'trying...') try: series = fred.get_series(i) series_info = fred.get_series_info(i) print('secceed.') data.append(series) info.append(series_info) id_for_loop.remove(i) except IOError: print('did not secceed!') df_info = pd.DataFrame(info).T df_info.columns = list(df_info.loc['id']) df_US = pd.DataFrame(data).T df_US.columns = df_info.columns df_US.index.name = 'date' df_US.to_csv(path + 'fromFRED_data' + file_date + '.csv') df_info.to_csv(path + 'fromFRED_info' + file_date + '.csv') df_US.to_csv(path + 'fromFRED_data.csv') df_info.to_csv(path + 'fromFRED_info.csv') return
class TestFred(unittest.TestCase): def setUp(self): self.fred = Fred() def testGetSeries(self): s = self.fred.get_series("SP500", observation_start="9/2/2014", observation_end="9/5/2014") self.assertEqual(s.ix["9/2/2014"], 2002.28) self.assertEqual(len(s), 4) info = self.fred.get_series_info("PAYEMS") self.assertEqual(info["title"], "All Employees: Total nonfarm") # invalid series id self.assertRaises(ValueError, self.fred.get_series, "invalid") self.assertRaises(ValueError, self.fred.get_series_info, "invalid") # invalid parameter try: self.fred.get_series("SP500", observation_start="invalid-datetime-str") self.assertTrue(False, "previous line should have thrown a ValueError") except ValueError: pass def testSearch(self): personal_income_series = self.fred.search_by_release(175, limit=3, order_by="popularity", sort_order="desc") series_ids = ["PCPI06037", "PCPI06075", "PCPI34039"] for series_id in series_ids: self.assertTrue(series_id in personal_income_series.index) self.assertEqual(personal_income_series.ix[series_id, "observation_start"], datetime(1969, 1, 1)) def tearDown(self): return
class datapoint(object): def __init__(self, symbol, source): self.sym = symbol self.source = source if (self.source == 'fred'): self.fred = Fred(fredKey) self.fred_info(symbol) def fred_info(self, symbol): info = self.fred.get_series_info(symbol) self.title = info.title self.frequency = info.frequency_short.lower() self.units = info.units_short.lower()
class FredClass: def __init__(self, FRED_API): self.fred = Fred(api_key=FRED_API) def search(self, search_text): self.search_df = self.fred.search(search_text) self.search_df['popularity'] = self.search_df['popularity'].astype(int) self.search_df = self.search_df.sort_values(by=['popularity'], ascending=False) return self.search_df def fetch(self, metrics, start_date): self.metrics_df = pd.DataFrame() for metric in metrics: print(f'Fetch metric: {metric}') try: metrics_df_sub = self.fred.get_series_latest_release( metric).to_frame() # TODO: append first then reset_index? metrics_df_sub.reset_index(drop=False, inplace=True) metrics_df_sub.columns = ['activity_date', 'value'] metrics_df_sub[ 'activity_date'] = metrics_df_sub.activity_date.dt.date metrics_df_sub['metric'] = metric print(metrics_df_sub.dtypes) self.metrics_df = self.metrics_df.append(metrics_df_sub) except ValueError: print(f'Skip metric: {metric}') continue self.metrics_df = self.metrics_df.loc[ self.metrics_df.activity_date >= start_date] self.metrics_df = self.metrics_df[['metric', 'activity_date', 'value']] return self.metrics_df def get_metrics_info(self, metrics): info_df = pd.DataFrame() for metric in metrics: info_df_sub = self.fred.get_series_info(metric) info_df = info_df.append(info_df_sub, ignore_index=True) info_df = info_df[[ 'id', 'title', 'frequency', 'units', 'seasonal_adjustment', 'popularity', 'notes' ]] info_df = info_df.rename(columns={'id': 'metric'}) info_df['source'] = 'FRED' info_df['deprecated'] = False return info_df
def getFredAPI(): fred = Fred(api_key=fredToken) #'NASDAQCOM' 更新時間太慢 暫時不發 watchList = ['SP500', 'DJIA'] resultList = [] for watch in watchList: result = dict() watchLastInfo = fred.get_series_info(watch) watchLastIndex = fred.get_series(watch).tail(2) lastUpdateDate = watchLastInfo.observation_end indexGap = round(watchLastIndex[1] - watchLastIndex[0], 2) percent = round(indexGap / watchLastIndex[0] * 100, 2) result['title'] = watch result['value'] = str(watchLastIndex[1]) result['indexGap'] = converterPrefix(indexGap) converterPercent = str(percent).replace('+', '').replace('-', '') result['gapPercent'] = converterPercent + '%' result['lastUpdateDate'] = lastUpdateDate resultList.append(result) return resultList
def fred_get(time_series): '''return a dict with key, value returns: ([(dates,values),...],units) ''' fred = Fred( api_key='c210579b3c6567d016211fdd76cb465a' ) #Key would be specific to server in production, freely available from the Fed fred_time_series = fred.get_series(time_series) units = fred.get_series_info(time_series)[ 'units'] #Get unit information, such as 'in millions of dollars' datapoints = {} for obs_date, value in fred_time_series.items( ): #unpack fred data, strip time info from the date and build the dict obs_date = obs_date.date() #convert datetime obj to date if isnan(value): #skip empty values, usually from holidays continue datapoints[obs_date] = value return (datapoints, units)
class TestFred(unittest.TestCase): def setUp(self): self.fred = Fred() def testGetSeries(self): s = self.fred.get_series('SP500', observation_start='9/2/2014', observation_end='9/5/2014') self.assertEqual(s.ix['9/2/2014'], 2002.28) self.assertEqual(len(s), 4) info = self.fred.get_series_info('PAYEMS') self.assertEqual(info['title'], 'All Employees: Total nonfarm') # invalid series id self.assertRaises(ValueError, self.fred.get_series, 'invalid') self.assertRaises(ValueError, self.fred.get_series_info, 'invalid') # invalid parameter try: self.fred.get_series('SP500', observation_start='invalid-datetime-str') self.assertTrue(False, 'previous line should have thrown a ValueError') except ValueError: pass def testSearch(self): personal_income_series = self.fred.search_by_release( 175, limit=3, order_by='popularity', sort_order='desc') series_ids = ['PCPI06037', 'PCPI06075', 'PCPI34039'] for series_id in series_ids: self.assertTrue(series_id in personal_income_series.index) self.assertEqual( personal_income_series.ix[series_id, 'observation_start'], datetime(1969, 1, 1)) def tearDown(self): return
fred = Fred(api_key="""key here""") """These are the keys that correspond to certain macro economic indicators and data points from the FRED Database""" series = [ 'SP500', 'GDP', 'A067RL1A156NBEA', 'CPIAUCSL', 'A191RL1Q225SBEA', 'DGS10', 'IC4WSA', 'UNRATE', 'DEXUSEU', 'BAMLH0A0HYM2', 'MEHOINUSA672N', 'M2V', 'GFDEGDQ188S', 'FEDFUNDS', 'NAPM', 'DCOILWTICO', 'M2', 'CIVPART', 'PSAVERT', 'USD3MTD156N', 'T10Y2Y', 'HOUST', 'DGS30', 'MORTG', 'DEXCHUS', 'BUSLOANS', 'UEMPMEAN', 'EXPGSCA', 'NETEXP', 'A067RP1A027NBEA', 'FYFSD' ] #strips the data down to the title, frequency of reporting, units, and the latest values for t in series: data = fred.get_series(t) info = fred.get_series_info(t) print info['title'] print info['frequency'] print info['units'] print " " print "LATEST VALUES:" print data.tail() print " " #saves a PDF graph in the folder where code is stored for i in series: info = fred.get_series_info(i) title = info['title'] df = {}
@author: justin.malinchak """ import os #os.environ["FRED_API_KEY"] = "63ef8588c3a78e956fb156c8a1603152" #print os.environ['FRED_API_KEY'] from fredapi import Fred fred = Fred() import pandas as pd pd.options.display.max_colwidth = 60 #%matplotlib inline import matplotlib.pyplot as plt #from IPython.core.pylabtools import figsize #figsize(20, 5) s = fred.get_series('DSPIC96', observation_start='1960-01-01', observation_end='2015-07-01') print s.tail() print '------------------------' info = fred.get_series_info('DSPIC96') print len(info) for k,vinfo in info.iteritems(): print k,vinfo df_gdp = fred.get_series_as_of_date('GDP', '2/1/2015') print df_gdp df_search = fred.search('income').T print df_search
class FredApi: def __init__(self): self.logger = get_logger() self.source = 'fred' self.api_key = SecureKeysAccess.get_vendor_api_key_static( vendor=str.upper(self.source)) self.fred_pwd = OSMuxImpl.get_proper_path('/workspace/data/fred/') self.seaborn_plots_pwd = OSMuxImpl.get_proper_path( '/workspace/data/seaborn/plots/') self.fred = Fred(api_key=self.api_key) def search_fred_by_category(self, category_id, limit=20, order_by='popularity', sort_order='desc'): # https: // fred.stlouisfed.org / categories / 32413 # the above is the category for BAML Total Return Bond Index category df_category_series = self.fred.search_by_category( category_id, limit=limit, order_by=order_by, sort_order=sort_order) return (df_category_series) def get_all_series_in_category( self, category_id, limit=20, # set it obnoxiously high so default is no limit observation_start='2010-01-01', observation_end=pd.datetime.now().strftime('%Y-%m-%d')): df_category_series = self.search_fred_by_category( category_id=category_id, limit=limit, order_by='popularity', sort_order='desc') return_series = df_category_series.id.apply(self.get_data, args=(observation_start, observation_end)) transposed = return_series.T transposed.dropna(inplace=True) return transposed def category_series_to_csv(self, category_id, path_to_file): df_category_series = self.search_fred_by_category( category_id=category_id, order_by='popularity', sort_order='desc') df_category_series.to_csv(path_to_file) def get_multiple_categories_series( self, category_id_list, limit_list=[20, 20], # obnoxiously high for default of no limit observation_start='2010-01-01', observation_end=pd.datetime.now().strftime('%Y-%m-%d')): cat_limit_list = [ tupled for tupled in zip(category_id_list, limit_list) ] df_category_series_list = [ self.search_fred_by_category(category_id=cat_id, limit=limit, order_by='popularity', sort_order='desc') for cat_id, limit in cat_limit_list ] joined_dataframe = pd.DataFrame() for df_category_series in df_category_series_list: return_series = df_category_series.id.apply( self.get_data, args=(observation_start, observation_end)) transposed = return_series.T transposed.dropna(inplace=True) if joined_dataframe.empty is True: joined_dataframe = transposed else: joined_dataframe = joined_dataframe.join(transposed, how='inner') return joined_dataframe def get_data(self, series_id, observation_start='2010-01-01', observation_end=pd.datetime.now().strftime("%Y-%m-%d")): data = self.fred.get_series(series_id, observation_start=observation_start, observation_end=observation_end) series_meta_info = self.fred.get_series_info(series_id=series_id) data.name = series_meta_info['title'] return data def correlation_analysis(self, px_df, corr_heatmap_save_filename='corr_heatmap.png', pairplot_save_filename='pairplot.png'): rets_df = px_df.pct_change().dropna() corr_matrix = rets_df.corr() # use seaborn for heatmap of correlation heatmap_plot = sns.heatmap(data=corr_matrix) heatmap_plot.get_figure().savefig(self.seaborn_plots_pwd + corr_heatmap_save_filename) pairplot = sns.pairplot(data=rets_df) pairplot.fig.savefig(self.seaborn_plots_pwd + pairplot_save_filename) plt.show() def interest_rates_autocorrelation( self, ir_class='EURODOLLARS', contract='ED4_WHITE', observation_start='2014-06-01', observation_end=pd.datetime.now().strftime('%Y-%m-%d'), which_lag=1): # for example, if you wanted to look at the EURODOLLAR, you would do 100 - ED(px). qdo_eurodollar = QuandlDataObject(ir_class, contract, '.csv') ed_df = qdo_eurodollar.get_df() ed_df = ed_df[observation_start:observation_end] # if nothing traded on a certain day, just drop the row - e.g. Dec 5th, 2018 - market # closed for the funeral of Georgy Bush the first. ed_df = ed_df[ed_df.Volume > 0.0] # settle price is the 2 pm CST print. # last price is the 4 pm CST close print. # open price is the 5 pm CST open print. result_series = ed_df.apply(self.create_open_settle_last_ed_ts, axis=1) result_series = result_series.T.fillna(0).apply(lambda x: sum(x), axis=1) result_series.name = 'px' price_diff = result_series.diff().dropna() price_diff.name = 'px_one_lag_diff' #ed_df.Settle.diff().dropna().plot(title="ed_df") #plt.show() #price_diff.plot(title="price_diff") #plt.show() autocorr_one_lag = price_diff.autocorr(lag=which_lag) plot_acf(price_diff, lags=10) plt.show() self.logger.info( "FredApi.interest_rates_autocorrelation(): %s lag autocorr is %4.6f", str(which_lag), autocorr_one_lag) # the highest autocorrelations are for Open-settle pairs, Settle - Last pairs, and # last - open pairs. And the autocorrelaiton is negative, which means there is some # reversion. def rolling_eurodollar_session_corr_pos_ind(self, row, correl_filter): #bool_val = correl_filter[2](correl_filter[0][0](row['lagged_corr_series'], correl_filter[0][1]), # correl_filter[1][0](row['lagged_corr_series'], correl_filter[1][1])) if (np.isnan(row['os_sl_lagged_corr_series']) or np.isnan(row['os_snxto_lagged_corr_series'])): return (0, 0, 0) os_sl_a_cond = correl_filter['os_sl'][0][0]( row['os_sl_lagged_corr_series'], correl_filter['os_sl'][0][1]) os_sl_b_cond = correl_filter['os_sl'][1][0]( row['os_sl_lagged_corr_series'], correl_filter['os_sl'][1][1]) os_snxto_a_cond = correl_filter['os_snxto'][0][0]( row['os_snxto_lagged_corr_series'], correl_filter['os_sl'][0][1]) os_snxto_b_cond = correl_filter['os_snxto'][1][0]( row['os_snxto_lagged_corr_series'], correl_filter['os_sl'][1][1]) ol_lnxto_a_cond = correl_filter['ol_lnxto'][0][0]( row['ol_lnxto_lagged_corr_series'], correl_filter['os_sl'][0][1]) ol_lnxto_b_cond = correl_filter['ol_lnxto'][1][0]( row['ol_lnxto_lagged_corr_series'], correl_filter['os_sl'][1][1]) os_sl_pos_ind = 0 os_snxto_pos_ind = 0 ol_lnxto_pos_ind = 0 # based on corr(settle-open, last-settle) if (os_sl_a_cond): os_sl_pos_ind = -1 elif (os_sl_b_cond): os_sl_pos_ind = 1 # based on corr(open-settle, settle-nextOpen) if (os_snxto_a_cond): os_snxto_pos_ind = -1 elif (os_snxto_b_cond): os_snxto_pos_ind = 1 # based on corr(open-last, last-nextOpen) if (ol_lnxto_a_cond): ol_lnxto_pos_ind = -1 elif (ol_lnxto_b_cond): ol_lnxto_pos_ind = 1 return (os_sl_pos_ind, os_snxto_pos_ind, ol_lnxto_pos_ind) def rolling_eurodollar_os_sl_corr( self, ir_class="EURODOLLARS", contract='ED4_WHITE', observation_start='2014-06-01', observation_end=pd.datetime.now().strftime('%Y-%m-%d'), which_lag=1, rolling_window_size=60, rolling_pnl_window_size=90, execution_slippage=-0.0025, min_input_vol=0.0, pos_correl_filter_val=0.2, neg_correl_filter_val=-0.2): # default window size is one week,there are two observations per day. qdo_eurodollar = QuandlDataObject(ir_class, contract, '.csv') ed_df = qdo_eurodollar.get_df() ed_df = ed_df[observation_start:observation_end] # if nothing traded on a certain day, just drop the row - e.g. Dec 5th, 2018 - market # closed for the funeral of George Bush the first. ed_df = ed_df[ed_df.Volume > 0.0] ed_df['OpenSettleDelta'] = ed_df.Settle - ed_df.Open ed_df['OpenLastDelta'] = ed_df.Last - ed_df.Open ed_df['SettleLastDelta'] = ed_df.Last - ed_df.Settle ed_df['SettleNextOpenDelta'] = ed_df.Open.shift( periods=-which_lag) - ed_df.Settle ed_df['LastNextOpenDelta'] = ed_df.Open.shift( periods=-which_lag) - ed_df.Last conditions = [ (pd.to_numeric(ed_df.OpenSettleDelta.mul(1000000.0), downcast='integer') > int( min_input_vol * 1000000.0)), # one tick = 5000 (pd.to_numeric(ed_df.OpenSettleDelta.mul(1000000.0), downcast='integer') < int( -min_input_vol * 1000000.0)) ] # one tick = 5000 ol_delta_conditions = [ (pd.to_numeric(ed_df.OpenLastDelta.mul(1000000.0), downcast='integer') > int( min_input_vol * 1000000.0)), # one tick = 5000 (pd.to_numeric(ed_df.OpenLastDelta.mul(1000000.0), downcast='integer') < int( -min_input_vol * 1000000.0)) ] # one tick = 5000 ## the below, 1.0 or -1.0 multiples, tells us whether we expect reversion in next period, ## or autocorrelation. (-1.0,1.0) = reversion, (1.0, -1.0) = autocorrelation choices_settle_last = [ ed_df.SettleLastDelta.mul(1.0), ed_df.SettleLastDelta.mul(-1.0) ] choices_settle_nextopen = [ ed_df.SettleNextOpenDelta.mul(1.0), ed_df.SettleNextOpenDelta.mul(-1.0) ] choices_last_nextopen = [ ed_df.LastNextOpenDelta.mul(1.0), ed_df.LastNextOpenDelta.mul(-1.0) ] ed_df['SettleLastTradeSelect'] = np.select(conditions, choices_settle_last, default=0.0) ed_df['SettleNextOpenTradeSelect'] = np.select(conditions, choices_settle_nextopen, default=0.0) ed_df['LastNextOpenTradeSelect'] = np.select(ol_delta_conditions, choices_last_nextopen, default=0.0) ed_df['os_sl_corr_series'] = ed_df.OpenSettleDelta.rolling( rolling_window_size).corr(ed_df.SettleLastDelta) ed_df['os_snxto_corr_series'] = ed_df.OpenSettleDelta.rolling( rolling_window_size).corr(ed_df.SettleNextOpenDelta) ed_df['ol_lnxto_corr_series'] = ed_df.OpenLastDelta.rolling( rolling_window_size).corr(ed_df.LastNextOpenDelta) ed_df['rolling_reversion_trade_pnl'] = ed_df.SettleLastTradeSelect.rolling(rolling_pnl_window_size).\ sum().div(0.005) ed_df['fwd_looking_rolling_reversion_trade_pnl'] = ed_df.rolling_reversion_trade_pnl.\ shift(-1*rolling_pnl_window_size+1) ed_df['rolling_reversion_settleNextOpen_trade_pnl'] = ed_df.SettleNextOpenTradeSelect.\ rolling(rolling_pnl_window_size).sum().div(0.005) ed_df['fwd_looking_rolling_reversion_settleNextOpen_trade_pnl'] = ed_df.rolling_reversion_settleNextOpen_trade_pnl.\ shift(-1*rolling_pnl_window_size+1) ed_df['rolling_reversion_lastNextOpen_trade_pnl'] = ed_df.LastNextOpenTradeSelect.\ rolling(rolling_pnl_window_size).sum().div(0.005) ed_df['fwd_looking_rolling_reversion_lastNextOpen_trade_pnl'] = ed_df.rolling_reversion_lastNextOpen_trade_pnl.\ shift(-1*rolling_pnl_window_size+1) ed_df['os_sl_lagged_corr_series'] = ed_df.os_sl_corr_series.shift( periods=1) ed_df[ 'os_snxto_lagged_corr_series'] = ed_df.os_snxto_corr_series.shift( periods=1) ed_df[ 'ol_lnxto_lagged_corr_series'] = ed_df.ol_lnxto_corr_series.shift( periods=1) # create filter conditions for correlation correl_filter = {} correl_filter['os_sl'] = [(operator.gt, pos_correl_filter_val), (operator.lt, neg_correl_filter_val), operator.or_] correl_filter['os_snxto'] = [(operator.gt, pos_correl_filter_val), (operator.lt, neg_correl_filter_val), operator.or_] correl_filter['ol_lnxto'] = [(operator.gt, pos_correl_filter_val), (operator.lt, neg_correl_filter_val), operator.or_] pos_ind_series = ed_df.apply( self.rolling_eurodollar_session_corr_pos_ind, args=(correl_filter, ), axis=1) ed_df['os_sl_pos_ind'] = pos_ind_series.apply( lambda pos_ind_tuple: pos_ind_tuple[0]) ed_df['os_snxto_pos_ind'] = pos_ind_series.apply( lambda pos_ind_tuple: pos_ind_tuple[1]) ed_df['ol_lnxto_pos_ind'] = pos_ind_series.apply( lambda pos_ind_tuple: pos_ind_tuple[2]) np_os_sl_pos_ind = ed_df.os_sl_pos_ind.values np_os_snxto_pos_ind = ed_df.os_snxto_pos_ind.values np_ol_lnxto_pos_ind = ed_df.ol_lnxto_pos_ind.values np_array_list = [ np.repeat( pos_ind, np.min([ rolling_pnl_window_size, len(np_os_sl_pos_ind) - item_idx ])) for item_idx, pos_ind in enumerate(np_os_sl_pos_ind) ] final_np_array_list = [ np.append( np.append( np.repeat( 0, np.min([item_idx, len(np_array_list) - len(npa)])), np.array(npa)), np.repeat( 0, np.max([len(np_array_list) - (item_idx + len(npa)), 0]))) for item_idx, npa in enumerate(np_array_list) ] np_os_snxto_array_list = [ np.repeat( pos_ind, np.min([ rolling_pnl_window_size, len(np_os_snxto_pos_ind) - item_idx ])) for item_idx, pos_ind in enumerate(np_os_snxto_pos_ind) ] final_np_os_snxto_array_list = [ np.append( np.append( np.repeat( 0, np.min( [item_idx, len(np_os_snxto_array_list) - len(npa)])), np.array(npa)), np.repeat( 0, np.max([ len(np_os_snxto_array_list) - (item_idx + len(npa)), 0 ]))) for item_idx, npa in enumerate(np_os_snxto_array_list) ] np_ol_lnxto_array_list = [ np.repeat( pos_ind, np.min([ rolling_pnl_window_size, len(np_ol_lnxto_pos_ind) - item_idx ])) for item_idx, pos_ind in enumerate(np_ol_lnxto_pos_ind) ] final_np_ol_lnxto_array_list = [ np.append( np.append( np.repeat( 0, np.min( [item_idx, len(np_ol_lnxto_array_list) - len(npa)])), np.array(npa)), np.repeat( 0, np.max([ len(np_ol_lnxto_array_list) - (item_idx + len(npa)), 0 ]))) for item_idx, npa in enumerate(np_ol_lnxto_array_list) ] self.logger.info( "FredAPI:rolling_eurodollar_os_sl_corr(): final_np_array list dimensions are %s", np.array(final_np_array_list).shape) os_sl_total_pos_ind = np.sum(np.array(final_np_array_list), axis=0) os_snxto_total_pos_ind = np.sum(np.array(final_np_os_snxto_array_list), axis=0) ol_lnxto_total_pos_ind = np.sum(np.array(final_np_ol_lnxto_array_list), axis=0) ed_df['os_sl_total_pos_ind'] = pd.Series(os_sl_total_pos_ind, index=ed_df.index) ed_df['os_snxto_total_pos_ind'] = pd.Series(os_snxto_total_pos_ind, index=ed_df.index) ed_df['ol_lnxto_total_pos_ind'] = pd.Series(ol_lnxto_total_pos_ind, index=ed_df.index) ed_df['FinalSettleLastTradeSelect'] = ed_df['SettleLastTradeSelect'].mul(ed_df['os_sl_total_pos_ind']).\ add(ed_df['os_sl_total_pos_ind'].abs().mul(execution_slippage)) ed_df['FinalSettleNextOpenTradeSelect'] = ed_df['SettleNextOpenTradeSelect'].\ mul(ed_df['os_snxto_total_pos_ind']).add(ed_df['os_snxto_total_pos_ind'].abs().mul(execution_slippage)) ed_df['FinalLastNextOpenTradeSelect'] = ed_df['LastNextOpenTradeSelect'].\ mul(ed_df['ol_lnxto_total_pos_ind']).add(ed_df['ol_lnxto_total_pos_ind'].abs().mul(execution_slippage)) ed_df.os_sl_total_pos_ind.plot( title='Open-Settle/Settle-Last Total Pos Ind') plt.show() ed_df.os_snxto_total_pos_ind.plot( title='Open-Settle/Settle-NextOpen Total Pos Ind') plt.show() ed_df.ol_lnxto_total_pos_ind.plot( title='Open-Last/Last-NextOpen Total Pos Ind') plt.show() ed_df.FinalSettleLastTradeSelect.cumsum().plot( title='Settle-to-Last Cumm Pnl') plt.show() ed_df.FinalSettleNextOpenTradeSelect.cumsum().plot( title='Settle-to-NextOpen Cumm Pnl') plt.show() ed_df.FinalLastNextOpenTradeSelect.cumsum().plot( title='Last-to-NextOpen Cumm Pnl') plt.show() ed_df.to_csv('/Users/traderghazy/workspace/data/ed_df.csv') data = ed_df[[ 'os_sl_lagged_corr_series', 'os_snxto_lagged_corr_series', 'ol_lnxto_lagged_corr_series', 'SettleLastTradeSelect', 'SettleNextOpenTradeSelect', 'LastNextOpenTradeSelect', 'rolling_reversion_trade_pnl', 'fwd_looking_rolling_reversion_trade_pnl', 'rolling_reversion_settleNextOpen_trade_pnl', 'fwd_looking_rolling_reversion_settleNextOpen_trade_pnl', 'rolling_reversion_lastNextOpen_trade_pnl', 'fwd_looking_rolling_reversion_lastNextOpen_trade_pnl' ]].dropna() """ the correl_filter is the conditions for filtering the correlations Make sure the last item in this list is either operation.and_ or operator.or_... this will tell the filter how to combine the conditions. """ p_scat_1, p_scat_2, p_scat_3, p_correl_line = ExtendBokeh.bokeh_ed_ir_rolling_ticks_correl( data, title=[ 'OS-SL Rolling Cum. Sum vs. Correl', 'OS-SL Rolling Fwd Cum. Sum vs. Correl', 'OS-SL Point Value vs. Correl', 'OS-SL Correlation vs. Datetime' ], subtitle=['', '', '', ''], diff_types_to_correlate='os_sl', type_list=[ 'rolling_reversion_trade_pnl', 'fwd_looking_rolling_reversion_trade_pnl', 'SettleLastTradeSelect', 'os_sl_lagged_corr_series' ], rolling_window_size=rolling_window_size, correl_filter=correl_filter) p_scat_4, p_scat_5, p_scat_6, p_os_snxto_correl_line = ExtendBokeh.\ bokeh_ed_ir_rolling_ticks_correl(data, title=['OS-SNXTO Rolling Cum. Sum vs. Correl', 'OS-SNXTO Rolling Fwd Cum. Sum vs. Correl', 'OS-SNXTO Point Value vs. Correl', 'OS-SNXTO Correlation vs. Datetime'], subtitle=['', '', '', ''], diff_types_to_correlate='os_snxto', type_list=['rolling_reversion_settleNextOpen_trade_pnl', 'fwd_looking_rolling_reversion_settleNextOpen_trade_pnl', 'SettleNextOpenTradeSelect', 'os_snxto_lagged_corr_series'], rolling_window_size=rolling_window_size, correl_filter=correl_filter) p_scat_7, p_scat_8, p_scat_9, p_ol_lnxto_correl_line = ExtendBokeh.\ bokeh_ed_ir_rolling_ticks_correl(data,title=['OL-LNXTO Rolling Cum. Sum vs. Correl', 'OL-LNXTO Rolling Fwd Cum. Sum vs. Correl', 'OL-LNXTO Point Value vs. Correl', 'OL-LNXTO Correlation vs. Datetime'], subtitle=['', '', '', ''], diff_types_to_correlate='ol_lnxto', type_list=['rolling_reversion_lastNextOpen_trade_pnl', 'fwd_looking_rolling_reversion_lastNextOpen_trade_pnl', 'LastNextOpenTradeSelect', 'ol_lnxto_lagged_corr_series'], rolling_window_size=rolling_window_size, correl_filter=correl_filter) the_plots = [ p_scat_1, p_scat_2, p_scat_3, p_correl_line, p_scat_4, p_scat_5, p_scat_6, p_os_snxto_correl_line, p_scat_7, p_scat_8, p_scat_9, p_ol_lnxto_correl_line ] html_output_file_path = OSMuxImpl.get_proper_path( '/workspace/data/bokeh/html/') html_output_file_title = ir_class + '_' + contract + ".scatter.html" html_output_file = html_output_file_path + html_output_file_title ExtendBokeh.show_hist_plots(the_plots, html_output_file, html_output_file_title) # ok, so next steps: # 1. correlaiton (rolling) is a stationary time series. # 2. We can use ARIMA to anticipate the next values. We have the searborn # histograms, we can see the normality present. # 3. We can plot the acf and show some more stuff for the purpose of # presentation. # 4. Once we can predict the next correlation value, we can the use the # face that correlation is a predictor for next returns # what else? How about this... # 1. upon receiving a signal based on correlation value, track # the rolling pnl at each subsequent correlation # 2. create a histogram by binning the subsequent corelation values, # and plotting returns. # 3. the idea is that we want to see when rolling cumm pnl is maxed out # based on where correlation goes after the signal is triggered. def prepare_post_signal_analysis_data(self, df): return 1 def intraday_ir_correlation( self, ir_class='EURODOLLARS', contract='ED4_WHITE', observation_start='2014-06-01', observation_end=pd.datetime.now().strftime('%Y-%m-%d'), which_lag=1): qdo_eurodollar = QuandlDataObject(ir_class, contract, '.csv') ed_df = qdo_eurodollar.get_df() ed_df = ed_df[observation_start:observation_end] ed_df = ed_df[ed_df.Volume > 0.0] ed_df['OpenSettleDelta'] = ed_df.Settle - ed_df.Open ed_df['SettleLastDelta'] = ed_df.Settle - ed_df.Last ed_df['LastNextOpenDelta'] = ed_df.Last - ed_df.Open.shift( periods=-which_lag) os_sl_corr = np.corrcoef(ed_df.OpenSettleDelta, ed_df.SettleLastDelta)[0][1] sl_lnxto_corr = np.corrcoef( ed_df.SettleLastDelta[0:len(ed_df.SettleLastDelta) - which_lag].values, ed_df.LastNextOpenDelta.dropna().values)[0][1] os_lnxto_corr = np.corrcoef( ed_df.OpenSettleDelta[0:len(ed_df.OpenSettleDelta) - which_lag], ed_df.LastNextOpenDelta.dropna().values)[0][1] self.logger.info( "FredApi.intraday_ir_correlation(): the correlation between open-settle delta price " "and settle-last delta price is %4.6f", os_sl_corr) self.logger.info( "FredApi.intraday_ir_correlation(): the correlation between settle-last delta price " "and last-nextOpen is %4.6f", sl_lnxto_corr) self.logger.info( "FredApi.intraday_ir_correlation(): the correlation between open-settle delta price " "and last-nextOpen is %4.6f", os_lnxto_corr) return (os_sl_corr, sl_lnxto_corr, os_lnxto_corr) def create_open_settle_last_ed_ts(self, row): open_price = row['Open'] settle_price = row['Settle'] last_price = row['Last'] settle_price_datetime = row.name.replace(hour=14) open_price_datetime = row.name.replace(hour=0) last_price_datetime = row.name.replace(hour=16) px_dict = { open_price_datetime: open_price, settle_price_datetime: settle_price, last_price_datetime: last_price } return pd.Series(px_dict) def regress_returns(self, x_series_id, y_series_id): x_px_series = self.get_data(series_id=x_series_id) y_px_series = self.get_data(series_id=y_series_id) df = pd.concat([x_px_series, y_px_series], axis=1, join='inner').dropna() df_daily_rets = df.pct_change().dropna() # Compute correlation of x and y x_rets = df_daily_rets.iloc[:, 0] y_rets = df_daily_rets.iloc[:, 1] correlation = x_rets.corr(y_rets) self.logger.info("The correlation between x and y is %4.2f", correlation) # Convert the Series x to a DataFrame and name the column x df_x = pd.DataFrame(x_rets) # Add a constant to the DataFrame x df_x = sm.add_constant(df_x, 1) # Fit the regression of y on x result = sm.OLS(y_rets, df_x).fit() # Print out the results and look at the relationship between R-squared and the correlation above self.logger.info("FredApi.regress_returns():Regression Results: %s", result.summary())
def getFredData(name): fred = Fred(api_key=fredToken) watchLastInfo = fred.get_series_info(name) watchLastIndex = fred.get_series(name).tail(2) print(watchLastInfo) print(watchLastIndex)
search_results.info() # In[46]: #create a dataframe to save series agg = pd.DataFrame(index=[], columns=[]) # The first series s = 'CNP16OV' # In[47]: #Retrieve the series information about CNP16OV series_info = fred.get_series_info(s) series_info # In[48]: #save the time for column use series_name = series_info.title series_name # In[49]: #Retrieve the data for CNP16OV get_ipython().magic('pinfo fred.get_series')
fred = Fred(api_key = """key here""") """These are the keys that correspond to certain macro economic indicators and data points from the FRED Database""" series = ['SP500','GDP', 'A067RL1A156NBEA', 'CPIAUCSL', 'A191RL1Q225SBEA', 'DGS10', 'IC4WSA', 'UNRATE', 'DEXUSEU', 'BAMLH0A0HYM2', 'MEHOINUSA672N', 'M2V', 'GFDEGDQ188S', 'FEDFUNDS', 'NAPM', 'DCOILWTICO', 'M2', 'CIVPART', 'PSAVERT', 'USD3MTD156N', 'T10Y2Y', 'HOUST', 'DGS30', 'MORTG', 'DEXCHUS', 'BUSLOANS', 'UEMPMEAN', 'EXPGSCA', 'NETEXP', 'A067RP1A027NBEA', 'FYFSD'] #strips the data down to the title, frequency of reporting, units, and the latest values for t in series: data = fred.get_series(t) info = fred.get_series_info(t) print info['title'] print info['frequency'] print info['units'] print " " print "LATEST VALUES:" print data.tail() print " " #saves a PDF graph in the folder where code is stored for i in series: info=fred.get_series_info(i) title=info['title']
# stock market volume # GDP Weekly # unemployment # interest rates # bank data (repo rates) # housing market data # commodity data # China / US Daily FX rates # Helpful to get data from financial sector # data we have and can use # initial unemployment claims seasonally adjusted - icsa - weekly # continued claims - ccsa - weekly # Crude oil prices - DCOILBRENTEU - daily # High yield index option adjusted spread - BAMLH0A0HYM2 - daily # Gold fixing Price - GOLDAMGBD228NLBM - daily # Corporate bond yields AAA - DAAA - daily # Overnight AA financial commercial paper interest rate - RIFSPPFAAD01NB - daily # Euro high yield index-option adjusted - BAMLHE00EHYIOAS - daily # China/US FX rates - DEXCHUS - Daily # US/EURO EXchange rates - DEXUSEU - Daily # 10 year - 3 month interest rate - T10Y3M - Daily # private sector financial EM data - BAMLEMFSFCRPITRIV - DAILY # Trade weighted dollar to foreign goods and currencies - DTWEXAFEGS - DAILY, ONLY GOES BACK TO 2015 df = fred.get_series_info("BAMLHE00EHYIOAS") df["frequency_short"] fred.search('housing') print(df)