def performInitialSetup(DBFilePath=None, forceDBCreation=False, logFilePath=None, recordLog=False, quietShell=False): # Establish Global Variables global db_cursor global db_connection # Initialize Log if quietShell and not recordLog: recordLevel=log.INFO else: recordLevel=None EM_util.initializeLog(recordLog=recordLog, logFilePath=logFilePath, recordLevel=recordLevel) log.info('Log Initialized.') # Connect to FRED log.info('Connecting to FRED.') fred.key(EM_util.FREDAPIKey) # Create Database log.info('Connecting to Database: \n%s', DBFilePath) if DBFilePath is None: DBFilePath = EM_util.defaultDB if not isfile(DBFilePath): log.info('Database not found. Creating new database...') EM_DBMake.doOneTimeDBCreation(force=forceDBCreation, DBFilePath=DBFilePath) # Store Database Connection db_connection = sq.connect(DBFilePath) db_cursor = db_connection.cursor() log.info('Database opened successfully')
def fred_series_mult_pd(symbol_list, api_key): """ Use fred api to retrieve time series data. Args: symbol_list: list of strings representing fred series symbols api_key: developer API key from https://fred.stlouisfed.org/ Returns: merged pandas dataframe with date ('dt') and numeric value (<symbol>) columns Dependencies: pandas, fred, functools.reduce """ # Use API key fred.key(api_key) # Define inner functions def inner_join_pandas_list(pandas_list, join_on): return reduce(lambda a, b: pd.merge(a, b, on=join_on), pandas_list) # Query each series in symbol_list, append to df_list df_list = [] for symbol in symbol_list: # Import series, convert to dataframe, drop query timestamps, rename columns, format dates series_df = pd.DataFrame(fred.observations(symbol)['observations']).\ drop(['realtime_start','realtime_end'], axis = 1).\ rename(columns = {'date' : 'dt', 'value' : symbol.lower()}) series_df['dt'] = [x.date() for x in pd.to_datetime(series_df['dt'])] df_list.append(series_df) # Merge list of dataframes return inner_join_pandas_list(df_list, 'dt')
def get_series_notes(series_term: str) -> pd.DataFrame: """Get Series notes. [Source: FRED] Parameters ---------- series_term : str Search for this series term Returns ---------- pd.DataFrame DataFrame of matched series """ fred.key(cfg.API_FRED_KEY) d_series = fred.search(series_term) df_fred = pd.DataFrame() if "error_message" in d_series: if "api_key" in d_series["error_message"]: console.print("[red]Invalid API Key[/red]\n") else: console.print(d_series["error_message"]) else: if "seriess" in d_series: if d_series["seriess"]: df_fred = pd.DataFrame(d_series["seriess"]) df_fred["notes"] = df_fred["notes"].fillna( "No description provided.") else: console.print("No matches found. \n") else: console.print("No matches found. \n") return df_fred
def get_series_ids(series_term: str, num: int) -> Tuple[List[str], List[str]]: """Get Series IDs. [Source: FRED] Parameters ---------- series_term : str Search for this series term num : int Maximum number of series IDs to output Returns ---------- List[str] List of series IDs List[str] List of series Titles """ fred.key(cfg.API_FRED_KEY) d_series = fred.search(series_term) if "seriess" not in d_series: return [], [] if not d_series["seriess"]: return [], [] df_series = pd.DataFrame(d_series["seriess"]) df_series = df_series.sort_values(by=["popularity"], ascending=False).head(num) return df_series["id"].values, df_series["title"].values
def __init__(self, obsStart, obsEnd, *args): self.tags = [] self.obsStart = obsStart self.obsEnd = obsEnd fred.key('4cfe54d7bef3609a5f3eae6e5d87f790') for i in range(0, len(args)): self.tags.append(args[i])
def get_hour_value(self): fred.key(settings.FRED_KEY) last_observation = fred.observations( settings.FRED_SERIES)['observations'][-1] h = last_observation['value'] try: return float(h) except: return settings.DEFAULT_HOUR_VALUE_IN_USD
def test_fred_series_vintage_dates(self): fred.key('123abc') fred.vintage('AAA', sort='desc') expected = 'https://api.stlouisfed.org/fred/series/vintagedates' params = { 'api_key': '123abc', 'series_id': 'AAA', 'sort_order': 'desc', 'file_type': 'json'} self.get.assert_called_with(expected, params=params)
def test1(): fred.key('f412b494756f8cbd24c8310e01d14630') pp = pprint.PrettyPrinter(indent=2) ##########data for real PCE vs average hourly earnings ########## ##get_fred returns a list of dictionaries pce_list = get_fred("PCEC96") pp.pprint(pce_list)
def test_fred_specific_release(self): fred.key('my_key') fred.release('123') expected = 'http://api.stlouisfed.org/fred/release' params = { 'api_key': 'my_key', 'release_id': '123', 'file_type': 'json' } self.get.assert_called_with(expected, params=params)
def test_fred_category_children(self): fred.key('abc123') fred.children() expected = 'http://api.stlouisfed.org/fred/category/children' params = { 'api_key': 'abc123', 'category_id': None, 'file_type': 'json' } self.get.assert_called_with(expected, params=params)
def test_fred_releases_dates_with_start_and_end_keywords(self): fred.key('github') fred.dates(start='2012-01-01', end='2012-03-16') expected = 'http://api.stlouisfed.org/fred/releases/dates' params = { 'api_key': 'github', 'realtime_start': '2012-01-01', 'realtime_end': '2012-03-16' } self.get.assert_called_with(expected, params=params)
def test_fred_series_search(self): fred.key('123') fred.search('money stock') expected = 'http://api.stlouisfed.org/fred/series/search' params = { 'api_key': '123', 'search_text': 'money stock', 'file_type': 'json' } self.get.assert_called_with(expected, params=params)
def test_fred_category_series_function(self): fred.key('my_fred_key') fred.category_series(123) expected = 'http://api.stlouisfed.org/fred/category/series' params = { 'api_key': 'my_fred_key', 'category_id': 123, 'file_type': 'json' } self.get.assert_called_with(expected, params=params)
def thirdAPI(): #main loop fred.key('3b7e7d31bcc6d28556c82c290eb3572e') obs, more = {}, True if raw_input('Feeling lucky? (Y/N): ').upper() == 'Y': obs, more = lucky(), False while (more): obs.update(getObs(searchTitle())) if raw_input('Search again(Y/N) ').upper() == 'N': break if obs != {}: printCSV(obs) else: print 'No data recorded -> good bye :)'
def set_hour_rate(self, h=None): if h: self.hour = h print("Hour value of work (self.hour) was set to %s usd from FRED API. Retrieving currency rates.." % self.hour) self.set_currenc_rates() print("Done.") else: fred.key('0c9a5ec8dd8c63ab8cbec6514a8f5b37') last_observation = fred.observations( settings.FRED_SERIES)['observations'][-1] h = last_observation['value'] try: self.hour = float(h) print("Hour value of work (self.hour) was set to %s usd from FRED API." % self.hour) except: self.hour = 25. print("Failed to retrieve rates from FRED API. Assuming 1h = 25usd.")
def set_hour_rate(self, h=None): if h: self.hour = h print "Hour value of work (self.hour) was set to %s usd from FRED API. Retrieving currency rates.." % self.hour self.set_currenc_rates() print "Done." else: fred.key('0c9a5ec8dd8c63ab8cbec6514a8f5b37') last_observation = fred.observations( settings.FRED_SERIES)['observations'][-1] h = last_observation['value'] try: self.hour = float(h) print "Hour value of work (self.hour) was set to %s usd from FRED API." % self.hour except: self.hour = 25. print "Failed to retrieve rates from FRED API. Assuming 1h = 25usd."
def get_series_notes(series_term: str) -> pd.DataFrame: """Get Series notes. [Source: FRED] Parameters ---------- series_term : str Search for this series term Returns ---------- pd.DataFrame DataFrame of matched series """ fred.key(cfg.API_FRED_KEY) d_series = fred.search(series_term) if "seriess" not in d_series: return pd.DataFrame() if not d_series["seriess"]: return pd.DataFrame() df_fred = pd.DataFrame(d_series["seriess"]) df_fred["notes"] = df_fred["notes"].fillna("No description provided.") return df_fred
def set_hour_rate(self, h=None): if h: self.hour = h print("Hour value of work (self.hour) was set to %s usd from FRED API. Retrieving currency rates.." % self.hour) self.set_currenc_rates() print("Done.") else: if not settings.FRED_KEY: print("Set settings FRED_KEY. Get one at https://fred.stlouisfed.org") fred.key(settings.FRED_KEY) last_observation = fred.observations(settings.FRED_SERIES)['observations'][-1] h = last_observation['value'] try: self.hour = float(h) print("Hour value of work (self.hour) was set to %s usd from FRED API." % self.hour) except: self.hour = 28.18 print("Failed to retrieve rates from FRED API. Assuming 1h = 28.18 usd.")
def get_series_ids(series_term: str, num: int) -> Tuple[List[str], List[str]]: """Get Series IDs. [Source: FRED] Parameters ---------- series_term : str Search for this series term num : int Maximum number of series IDs to output Returns ---------- List[str] List of series IDs List[str] List of series Titles """ fred.key(cfg.API_FRED_KEY) d_series = fred.search(series_term) # Cover invalid api and empty search terms if "error_message" in d_series: if "api_key" in d_series["error_message"]: console.print("[red]Invalid API Key[/red]\n") else: console.print(d_series["error_message"]) return [], [] if "seriess" not in d_series: return [], [] if not d_series["seriess"]: return [], [] df_series = pd.DataFrame(d_series["seriess"]) df_series = df_series.sort_values(by=["popularity"], ascending=False).head(num) return df_series["id"].values, df_series["title"].values
def get_series_notes(series_term: str, num: int) -> str: """Get Series notes. [Source: FRED] Parameters ---------- series_term : str Search for this series term num : int Maximum number of series notes to display Returns ---------- notes : str Series notes output """ fred.key(cfg.API_FRED_KEY) d_series = fred.search(series_term) if "seriess" not in d_series: return "No Series found using this term!\n" df_fred = pd.DataFrame(d_series["seriess"]) if df_fred.empty: return "No Series found using this term!\n" df_fred = df_fred.sort_values(by=["popularity"], ascending=False).head(num) notes = "" for _, series in df_fred.iterrows(): if series["notes"]: notes += series["id"] + "\n" notes += "-" * len(series["id"]) + "\n" notes += series["notes"] + "\n\n" if not notes: return "Series notes not found!\n" return notes
def test_fred_releases_dates(self): fred.key('123') fred.dates() expected = 'https://api.stlouisfed.org/fred/releases/dates' params = {'api_key': '123', 'file_type': 'json'} self.get.assert_called_with(expected, params=params)
import fred import pandas as pd import numpy as np import matplotlib.pyplot as plt from datetime import date,datetime FRED_API_KEY="055ba538c874e5974ee22d786f27fdda" FRED_SERIES=["INDPRO", "PAYEMS", "HOUST", "RRSFS", "NAPM"] FRED_START=date(1990,1,1) #set to false to output graphs to a file, uses current working directory. All numerical analysis is printed to the console DISPLAY=False fred.key(FRED_API_KEY) def get_fred_series(series): def filter(o): return {'date': datetime.strptime(o['date'],'%Y-%m-%d').date(), series: o['value']} return pd.DataFrame(map(filter,fred.observations(series)['observations']),dtype='float64').set_index('date').dropna() fred_data=get_fred_series(FRED_SERIES[0]) # Build an initial DataFrame for s in FRED_SERIES[1:]: fred_data=fred_data.join(get_fred_series(s))
def test_fred_specific_release(self): fred.key('my_key') fred.release('123') expected = 'https://api.stlouisfed.org/fred/release' params = {'api_key': 'my_key', 'release_id': '123', 'file_type': 'json'} self.get.assert_called_with(expected, params=params)
def test_fred_series_observations(self): fred.key('ohai') fred.observations("AAA") expected = 'http://api.stlouisfed.org/fred/series/observations' params = {'api_key': 'ohai', 'series_id': 'AAA', 'file_type': 'json'} self.get.assert_called_with(expected, params=params)
def test_fred_series_updates(self): fred.key('ALL THE FRED API!') fred.updates() expected = 'http://api.stlouisfed.org/fred/series/updates' params = {'api_key': 'ALL THE FRED API!', 'file_type': 'json'} self.get.assert_called_with(expected, params=params)
def __init__(self): ''' Constructor ''' fred.key('e732114c863993dcb376dac62f9e5473')
def test_fred_releases(self): fred.key('123') fred.releases() expected = 'http://api.stlouisfed.org/fred/releases' params = {'api_key': '123'} self.get.assert_called_with(expected, params=params)
def main_routine(): fred.key('f412b494756f8cbd24c8310e01d14630') ##########data for real PCE vs average hourly earnings ########## ##get_fred returns a list of dictionaries pce_list = get_fred("PCEC96") avghr_list = get_fred("AHETPI") pcedef_list = get_fred("PCEPI") ##get the index for the last date in the series ##pce_list[-1] is the final dictionary in the series end_pce = pce_list[-1] ##retrieve the date value of the dictionary enddate_pce = end_pce['date'] start_date_pce = "1999-01-01" pce_dates = make_index(start_date_pce,enddate_pce) ##find the position of the 1995-1-1 entry (entries prior to 1995-1-1 are zero) start_index = (i for i in range(len(pce_list)) if (pce_list[i])['date'] == start_date_pce).next() ##create a dictionary of PCE date:values ##Real PCE values start as of 1995-1-1, as per start_index pce_d = make_dict(pce_list[start_index:]) ##calculate the trailing 3 month averages, start from index 2 (need 3 values to make a 3 month average) pce_3mo_d = dict((date,make_avg(date,pce_d)) for date in pce_dates[2:]) ##calculate the yoy change pce_yoy_d = make_yoy_dict(pce_dates[14:],pce_3mo_d) ########## avg hourly earnings ######### ##process avg hourly earnings start_avghr, end_avghr = avghr_list[0],avghr_list[-1] start_date_avghr = start_avghr['date'] enddate_avghr = end_avghr['date'] avghr_dates = make_index(start_date_avghr,enddate_avghr) ##the pce deflator series is used in the govt rate vs. pce deflator. I'm more certain that the ##fedfunds rate will already be available fedfund_list = get_fred("FEDFUNDS") discrt_list = get_fred("MDISCRT") pcedef_govrt_end = (fedfund_list[-1])['date'] ##these are the dates to be used for the plotting ##start from the max of the 14th entry of the pce deflator series ##or the discrt start. We need at least 14 data points to ##to generate the 3 month average and the YoY calculation pcedef_start = datetime.datetime.strptime((pcedef_list[14])['date'],"%Y-%m-%d") discrt_start = datetime.datetime.strptime((discrt_list[0])['date'],"%Y-%m-%d") pcedef_govrt_start = max(pcedef_start,discrt_start).strftime("%Y-%m-%d") ##generate the dates for the pce deflator series pcedef_govrt_dates = make_index(pcedef_govrt_start,pcedef_govrt_end) ##create a dictionary for avg hr date:value avghr_d = make_dict(avghr_list) ##create a dictionary for pce deflator date:value pcedef_d = make_dict(pcedef_list) ##if current month for pce deflator hasn't be reported, calculate an estimated value if pcedef_govrt_end not in pcedef_d or avghr_dates[-1] not in pcedef_d: prior1 = (pcedef_list[-1])['date'] prior2 = add_months(datetime.datetime.strptime((pcedef_list[-1])['date'],"%Y-%m-%d"),-1).strftime("%Y-%m-%d") nextMonth = add_months(datetime.datetime.strptime((pcedef_list[-1])['date'],"%Y-%m-%d"),1).strftime("%Y-%m-%d") pcedef_d[nextMonth] = 2*pcedef_d[prior1] - pcedef_d[prior2] ##create the deflated average hourly earnings def_avghr_d = dict((key,100*avghr_d[key]/pcedef_d[key]) for key in avghr_d) ##create the trailing 3 months avghr_3mo_d = dict((date,make_avg(date,def_avghr_d)) for date in avghr_dates[2:]) ##calculate avg hourly earnings yoy change avghr_yoy_d = make_yoy_dict(avghr_dates[14:],avghr_3mo_d) ##########data for real PCE vs dicount rate/fed funds rates ########## ##get the federal funds rate data ##switch from discount rate to federal funds rate as of 1-1-2000 switch_date = datetime.datetime(2000,01,01) govrt_d = dict((each['date'],float(each['value'])) for each in fedfund_list if datetime.datetime.strptime(each['date'],"%Y-%m-%d")>= switch_date ) for obs in discrt_list: if datetime.datetime.strptime(obs['date'],"%Y-%m-%d") < switch_date: govrt_d[obs['date']] = float(obs['value']) ##create an index of dates. Starts from index 12 since the calculation looks back 12 months govrt_yoy_dates = make_index((discrt_list[0])['date'],(fedfund_list[-1])['date'])[12:] ##make dictionary of the YoY change govrt_yoy_d = dict((date,govrt_d[date] - govrt_d[add_months(datetime.datetime.strptime(date,"%Y-%m-%d"),-12).strftime("%Y-%m-%d")]) for date in govrt_yoy_dates) ##########data for the effect of inflation on real average hourly wages ######### ##calculate the pce deflator trailing 3 month average if datetime.datetime.strptime(pcedef_govrt_dates[-1],"%Y-%m-%d") > datetime.datetime.strptime((pcedef_list[-1])['date'],"%Y-%m-%d"): suffix = [pcedef_govrt_dates[-1]] elif datetime.datetime.strptime(avghr_dates[-1],"%Y-%m-%d") > datetime.datetime.strptime((pcedef_list[-1])['date'],"%Y-%m-%d"): suffix = [avghr_dates[-1]] else: suffix = [] pcedef_dates = [obs['date'] for obs in pcedef_list] + suffix pcedef_3mo_d = dict((date,make_avg(date,pcedef_d)) for date in pcedef_dates[2:]) ##calculate the pce deflator YoY pcedef_yoy_d = make_yoy_dict(pcedef_dates[14:],pcedef_3mo_d) ##create the trailing 3 months (nominal) navghr_3mo_d = dict((date,make_avg(date,avghr_d)) for date in avghr_dates[2:]) ##calculate avg hourly earnings (nominal) yoy change navghr_yoy_d = make_yoy_dict(avghr_dates[14:],navghr_3mo_d) ###########data for discount rate/fed funds rate vs pce deflator ########## ###########certain date calculations on line 401 ########## ###########all calculations done above as other series use same data ########### ##########data for the unemployment rate ########## unrate_list = get_fred("UNRATE") ##make dictionary from unemployment rate observations unrate_d = make_dict(unrate_list) ##########data for the employment ########## emply_list = get_fred("CE16OV") ##get employment dates emply_dates = [obs['date'] for obs in emply_list] ##make dictionary from unemployment rate observations emply_d = make_dict(emply_list) ##calculate the 3 mo average emply_3mo_d = dict((date,make_avg(date,emply_d)) for date in emply_dates[2:]) ##calculate the YoY value emply_yoy_d = make_yoy_dict(emply_dates[14:], emply_3mo_d) ##########data for domestic debt and 10 year treasury ######### dmdebt_list = get_fred("TCMDODNS") trsy_10yr_list = get_fred("GS10") dmdebt_start = '1952-01-01' start_index = (dmdebt_list.index(obs) for obs in dmdebt_list if obs['date'] == dmdebt_start).next() dmdebt_dates = make_index(dmdebt_start,(dmdebt_list[-1])['date'],3) ##make dictionary dmdebt_d = dict((obs['date'],float(obs['value'])) for obs in dmdebt_list[start_index:]) ##calculate YoY change dmdebt_yoy_d = dict((date,100*(dmdebt_d[date]/ \ dmdebt_d[add_months(datetime.datetime.strptime(date,"%Y-%m-%d"),-12).strftime("%Y-%m-%d")]-1)) for date in dmdebt_dates[4:]) ##make dictionary trsy_10yr_d = dict((obs['date'],float(obs['value'])) for obs in trsy_10yr_list) ########## date for domestic debt and prime rate########## prime_list = get_fred("MPRIME") ##make dictionary prime_d = dict((obs['date'],float(obs['value'])) for obs in prime_list) write_pce_avghr(pce_yoy_d,avghr_yoy_d,pce_dates) write_pce_govrt(pce_yoy_d,govrt_yoy_d,pce_dates) write_avghr(avghr_yoy_d,navghr_yoy_d,pcedef_yoy_d,avghr_dates) write_dscrt(pcedef_yoy_d,govrt_d,pcedef_govrt_dates) write_unemployment_rate(pce_yoy_d,unrate_d,pce_dates) write_employment(pce_yoy_d,emply_yoy_d,pce_dates) write_domestic_debt(dmdebt_yoy_d,trsy_10yr_d,dmdebt_dates) write_prime(dmdebt_yoy_d,prime_d,dmdebt_dates)
def test_fred_series(self): fred.key('abc') fred.series() expected = 'http://api.stlouisfed.org/fred/series' params = {'api_key': 'abc'} self.get.assert_called_with(expected, params=params)
def test_fred_series_updates(self): fred.key('ALL THE FRED API!') fred.updates() expected = 'https://api.stlouisfed.org/fred/series/updates' params = {'api_key': 'ALL THE FRED API!', 'file_type': 'json'} self.get.assert_called_with(expected, params=params)
def test_fred_source(self): fred.key('123') fred.source(25) expected = 'http://api.stlouisfed.org/fred/source' params = {'api_key': '123', 'source_id': 25, 'file_type': 'json'} self.get.assert_called_with(expected, params=params)
def test_fred_series_search(self): fred.key('123') fred.search('money stock') expected = 'https://api.stlouisfed.org/fred/series/search' params = {'api_key': '123', 'search_text': 'money stock', 'file_type': 'json'} self.get.assert_called_with(expected, params=params)
def test_fred_series_observations(self): fred.key('ohai') fred.observations("AAA") expected = 'https://api.stlouisfed.org/fred/series/observations' params = {'api_key': 'ohai', 'series_id': 'AAA', 'file_type': 'json'} self.get.assert_called_with(expected, params=params)
def test_fred_series_release(self): fred.key('abc') fred.series(releases=True) expected = 'https://api.stlouisfed.org/fred/series/release' params = {'api_key': 'abc', 'file_type': 'json'} self.get.assert_called_with(expected, params=params)
import os import math from datetime import datetime, timedelta import fred fred_api_key = os.environ.get('FRED_API_KEY') fred.key(fred_api_key) days_in_a_year = 365.25 def get_series_data(series, years=5): today = datetime.today().strftime('%Y-%m-%d') five_years_ago = ( datetime.today() - timedelta(days=years * days_in_a_year)).strftime('%Y-%m-%d') api_call_result = fred.observations(series, observation_start=five_years_ago, observation_end=today) series_data = _unpack_series_data(series, api_call_result) return series_data def get_series_metadata(series): api_call_result = fred.series(series) metadata = api_call_result['seriess'][0] metadata_dict = { 'Table Info': ['Title:', 'Units:', 'Frequency:'],
""" import fred import pandas as pd import numpy as np import matplotlib.pyplot as plt from datetime import date, datetime FRED_API_KEY="055ba538c874e5974ee22d786f27fdda" FRED_SERIES=["INDPRO", "PAYEMS", "HOUST", "RRSFS", "NAPM"] FRED_START=date(1990,1,1) #set to false to output graphs to a file, uses current working directory. # All numerical analysis is printed to the console DISPLAY=False fred.key(FRED_API_KEY) def get_fred_series(series): def filter(o): return {'date': datetime.strptime(o['date'],'%Y-%m-%d').date(), series: o['value']} return pd.DataFrame(map(filter,fred.observations(series)['observations']), dtype='float64').set_index('date').dropna() fred_data=get_fred_series(FRED_SERIES[0]) # Build an initial DataFrame for s in FRED_SERIES[1:]: fred_data=fred_data.join(get_fred_series(s)) fred_data=np.log(fred_data).diff(12)[FRED_START:].dropna()
def refresh(self, bulk=False, skip_to=None, fn=None, no_download=False, ids=None, force=False, **kwargs): """ Reads the associated API and saves data to tables. """ if skip_to: skip_to = int(skip_to) tmp_debug = settings.DEBUG settings.DEBUG = False django.db.transaction.enter_transaction_management() django.db.transaction.managed(True) try: if bulk: local_fn = self.download_bulk_data(fn=fn, no_download=no_download) dsfile, _ = DataSourceFile.objects.get_or_create(name=local_fn) if dsfile.complete: return # Process CSV. print 'Reading file...' sys.stdout.flush() source = zipfile.ZipFile(local_fn, 'r') if dsfile.total_lines_complete: total = dsfile.total_lines if not skip_to: skip_to = dsfile.total_lines_complete else: total = len(source.open(s.BULK_INDEX_FN, 'r').readlines()) DataSourceFile.objects.filter(id=dsfile.id).update( complete=False, total_lines=total, total_lines_complete=0, percent=0, ) django.db.transaction.commit() line_iter = iter(source.open(s.BULK_INDEX_FN, 'r')) offset = 0 while 1: try: line = line_iter.next() offset += 1 #print 'line:',line.strip() if line.lower().startswith('series '): line_iter.next() offset += 1 break except StopIteration: break total -= offset i = 0 just_skipped = False data = csv.DictReader(line_iter, delimiter=';') for row in data: i += 1 if skip_to and i < skip_to: if not just_skipped: print print '\rSkipping from %s to %s...' % (i, skip_to), sys.stdout.flush() just_skipped = True continue elif just_skipped: just_skipped = False print DataSourceFile.objects.filter(id=dsfile.id).update( downloaded=True, complete=False, total_lines=total, total_lines_complete=i, percent=i/float(total)*100, ) if not i % 10: django.db.transaction.commit() row = dict( ( (k or '').strip().lower().replace(' ', '_'), (v or '').strip() ) for k,v in row.iteritems() ) if not row.get('file'): continue print '\rLoading %s %.02f%% (%i of %i)...' % (row.get('file'), i/float(total)*100, i, total), sys.stdout.flush() row['id'] = row['file'].split('\\')[-1].split('.')[0] section_fn = row['file'] # FRED2_csv_2/data/4/4BIGEURORECP.csv del row['file'] if row['last_updated']: row['last_updated'] = dateutil.parser.parse(row['last_updated']) row['last_updated'] = date(row['last_updated'].year, row['last_updated'].month, row['last_updated'].day) #print row series, _ = Series.objects.get_or_create(id=row['id'], defaults=row) series.last_updated = row['last_updated'] series_min_date = series.min_date series_max_date = series.max_date prior_series_dates = set(series.data.all().values_list('date', flat=True)) if series.max_date and series.last_updated > (series.max_date - timedelta(days=s.LAST_UPDATE_DAYS)): continue elif not section_fn.endswith('.csv'): continue section_fn = 'FRED2_csv_2/data/' + section_fn.replace('\\', '/') #print 'section_fn:',section_fn lines = source.open(section_fn, 'r').readlines() #last_data = None last_data_date = None last_data_value = None total2 = len(source.open(section_fn, 'r').readlines()) i2 = 0 if s.EXPAND_DATA_TO_DAYS: print series_data_pending = [] for row in csv.DictReader(source.open(section_fn, 'r')): i2 += 1 if s.EXPAND_DATA_TO_DAYS: print '\r\tLine %.02f%% (%i of %i)' % (i2/float(total2)*100, i2, total2), sys.stdout.flush() row['date'] = dateutil.parser.parse(row['DATE']) row['date'] = date(row['date'].year, row['date'].month, row['date'].day) # series_min_date = min(series_min_date or row['date'], row['date']) # series_max_date = max(series_max_date or row['date'], row['date']) del row['DATE'] try: row['value'] = float(row['VALUE']) except ValueError: print print 'Invalid value: "%s"' % (row['VALUE'],) sys.stdout.flush() continue del row['VALUE'] #print row if s.EXPAND_DATA_TO_DAYS and last_data_date: intermediate_days = (row['date'] - last_data_date).days #print 'Expanding data to %i intermediate days...' % (intermediate_days,) #sys.stdout.flush() #Data.objects.bulk_create([ series_data_pending.extend([ Data(series=series, date=last_data_date+timedelta(days=_days), value=last_data_value) for _days in xrange(1, intermediate_days) if (last_data_date+timedelta(days=_days)) not in prior_series_dates ]) #data, _ = Data.objects.get_or_create(series=series, date=row['date'], defaults=row) if row['date'] not in prior_series_dates: data = Data(series=series, date=row['date'], value=row['value']) series_data_pending.append(data) #data.save() last_data_date = row['date'] last_data_value = row['value'] if series_data_pending: Data.objects.bulk_create(series_data_pending) # print '\r\tLine %.02f%% (%i of %i)' % (100, i2, total2), # print series.last_refreshed = date.today() series.save() # Cleanup. django.db.transaction.commit() Series.objects.update() Data.objects.update() gc.collect() DataSourceFile.objects.filter(id=dsfile.id).update( complete=True, downloaded=True, total_lines=total, total_lines_complete=total, percent=100, ) else: #TODO:use API to download data for each series_id individually #e.g. http://api.stlouisfed.org/fred/series/observations?series_id=DEXUSEU&api_key=<api_key> #TODO:check for revised values using output_type? #http://api.stlouisfed.org/docs/fred/series_observations.html#output_type if force: if ids: q = Series.objects.all() else: q = Series.objects.get_loadable() else: q = Series.objects.get_stale(days=30) if ids: q = q.filter(id__in=ids) fred.key(s.API_KEY) i = 0 total = q.count() print '%i series found.' % (total,) for series in q.iterator(): i += 1 print '\rImporting %i of %i' % (i, total), sys.stdout.flush() observation_start = None if series.max_date: observation_start = series.max_date - timedelta(days=7) try: series_info = fred.series(series.id)['seriess'][0] except KeyError: print>>sys.stderr, 'Series %s is missing seriess: %s' % (series.id, fred.series(series.id),) continue except Exception as e: print>>sys.stderr, 'Error on %s: %s' % (series.id, e,) continue #print 'series_info:',series_info last_updated = series_info['last_updated'].strip() series.last_updated = dateutil.parser.parse(last_updated) if last_updated else None series.popularity = series_info['popularity'] series.save() try: series_data = fred.observations( series.id, observation_start=observation_start) except ValueError as e: print>>sys.stderr, e continue for data in series_data['observations']: #print series, data['date'], data['value'] try: value = float(data['value']) except (ValueError, TypeError) as e: print>>sys.stderr, e continue dt = date(*map(int, data['date'].split('-'))) data, created = Data.objects.get_or_create( series=series, date=dt, defaults=dict(value=value)) if not created: data.value = value data.save() series = Series.objects.get(id=series.id) if series.last_updated: most_recent_past_date = series.data.filter(date__lte=date.today()).aggregate(Max('date'))['date__max'] threshold = series.last_updated - timedelta(days=series.days) # print # print 'most_recent_past_date:',most_recent_past_date # print 'last_updated:',series.last_updated # print 'threshold:',threshold if most_recent_past_date: if series.frequency == c.QUARTERLY and most_recent_past_date.day == 1: #TODO: Is this a safe assumption? Might not matter for series without future data. series.date_is_start = True elif most_recent_past_date >= threshold: series.date_is_start = False else: series.date_is_start = True series.last_refreshed = date.today() series.save() if force: series.data.all().update(start_date_inclusive=None, end_date_inclusive=None) missing_dates = series.data.filter(Q(start_date_inclusive__isnull=True)|Q(end_date_inclusive__isnull=True)) print 'Updating %i date ranges.' % (missing_dates.count(),) for _ in missing_dates.iterator(): _.set_date_range() _.save() django.db.transaction.commit() print finally: #print "Committing..." settings.DEBUG = tmp_debug django.db.transaction.commit() django.db.transaction.leave_transaction_management()
def test_fred_sources_accidentally_passed_source_id(self): fred.key('123') fred.sources(123) expected = 'http://api.stlouisfed.org/fred/source' params = {'api_key': '123', 'source_id': 123} self.get.assert_called_with(expected, params=params)
def test_fred_releases_with_id_calls_release(self): fred.key('abc') fred.releases('123') expected = 'http://api.stlouisfed.org/fred/release' params = {'api_key': 'abc', 'release_id': '123'} self.get.assert_called_with(expected, params=params)
def test_fred_category_children(self): fred.key('abc123') fred.children() expected = 'https://api.stlouisfed.org/fred/category/children' params = {'api_key': 'abc123', 'category_id': None, 'file_type': 'json'} self.get.assert_called_with(expected, params=params)
def test_fred_series_release(self): fred.key('abc') fred.series(releases=True) expected = 'http://api.stlouisfed.org/fred/series/release' params = {'api_key': 'abc', 'file_type': 'json'} self.get.assert_called_with(expected, params=params)
def test_fred_category_series_function(self): fred.key('my_fred_key') fred.category_series(123) expected = 'https://api.stlouisfed.org/fred/category/series' params = {'api_key': 'my_fred_key', 'category_id': 123, 'file_type': 'json'} self.get.assert_called_with(expected, params=params)
def test_fred_category_series_function(self): fred.key('my_fred_key') fred.category_series() expected = 'http://api.stlouisfed.org/fred/category/series' params = {'api_key': 'my_fred_key'} self.get.assert_called_with(expected, params=params)
def test_fred_releases_with_id_calls_release(self): fred.key('abc') fred.releases('123') expected = 'https://api.stlouisfed.org/fred/release' params = {'api_key': 'abc', 'release_id': '123', 'file_type': 'json'} self.get.assert_called_with(expected, params=params)
def test_fred_sources(self): fred.key('moar fred') fred.sources() expected = 'http://api.stlouisfed.org/fred/sources' params = {'api_key': 'moar fred', 'file_type': 'json'} self.get.assert_called_with(expected, params=params)
def download_ids(api_key:str, sleep_time:int=60, rate_limit:int=100) -> None: """ Downloading all time series in the provided JSON file. """ fred.key(api_key) #num_requests = 0 tot_downloaded = 0 num_files_written = 0 list_json = [] request_time_stamps = [] #curr_dir = f"dir{tot_downloaded // cfg.source.files_per_folder :04d}/" dirs = glob.glob(cfg.source.path.FRED.meta + "/*") for d in dirs: file_names = glob.glob(d + "/*") for fname in file_names: with open(fname, "r") as fp: ids_meta = json.load(fp) fp.close() for id_meta in ids_meta: try: observations = fred.observations(id_meta["id"]) request_time_stamps.append(time.time()) #num_requests += 1 ts = { "id" : id_meta["id"], "source" : id_meta["source"], "node_id" : id_meta["node_id"], "category_name" : id_meta["category_name"], "parent_id" : id_meta["parent_id"], "frequency" : id_meta["frequency"], "observations" : [{"date" : obs["date"], "value" : obs["value"]} for obs in observations["observations"]] } tot_downloaded += 1 list_json.append(ts) if len(list_json) > cfg.source.samples_per_json: filename = f"raw_{num_files_written:>06}.json" if num_files_written % cfg.source.files_per_folder == 0: curr_dir = f"dir{num_files_written // cfg.source.files_per_folder :04d}/" os.makedirs(os.path.join(cfg.source.path.FRED.raw, curr_dir), exist_ok=True) with open(os.path.join(*[cfg.source.path.FRED.raw, curr_dir, filename]), "w") as fp: json.dump(list_json, fp, sort_keys=True, indent=4, separators=(",", ": ")) fp.close() with open(os.path.join(cfg.source.path.FRED.meta, "ids_downloaded.txt"), "a") as fp: for j in list_json: fp.write(j["id"]) fp.write("\n") fp.close() num_files_written += 1 list_json = [] if tot_downloaded % 10000 == 0: logger.info(f"Downloaded {tot_downloaded} time series.") except Exception as e: logger.info(f"Failed to download id {id_meta['id']} from fname {fname}.") logger.warning(e) if len(request_time_stamps) > rate_limit: first = request_time_stamps.pop(0) if time.time() - first < sleep_time: #logger.info(f"Sleeping for {request_time_stamps[0]-first}.") time.sleep(request_time_stamps[0]-first) logger.info(f"Written files in directory {d} and currently have {tot_downloaded:>6} time series saved") filename = f"raw_{num_files_written:>06}.json" if num_files_written % cfg.source.files_per_folder == 0: curr_dir = f"dir{num_files_written // cfg.source.files_per_folder :04d}/" os.makedirs(os.path.join(cfg.source.path.FRED.raw, curr_dir), exist_ok=True) with open(os.path.join(*[cfg.source.path.FRED.raw, curr_dir, filename]), "w") as fp: json.dump(list_json, fp, sort_keys=True, indent=4, separators=(",", ": ")) fp.close()
def test_fred_category_series(self): fred.key('123abc') fred.category(series=True) expected = 'http://api.stlouisfed.org/fred/category/series' params = {'api_key': '123abc', 'file_type': 'json'} self.get.assert_called_with(expected, params=params)
def test_fred_sources(self): fred.key('moar fred') fred.sources() expected = 'https://api.stlouisfed.org/fred/sources' params = {'api_key': 'moar fred', 'file_type': 'json'} self.get.assert_called_with(expected, params=params)
def test_fred_sources_accidentally_passed_source_id(self): fred.key('123') fred.sources(123) expected = 'https://api.stlouisfed.org/fred/source' params = {'api_key': '123', 'source_id': 123, 'file_type': 'json'} self.get.assert_called_with(expected, params=params)
def test_fred_source(self): fred.key('123') fred.source(25) expected = 'https://api.stlouisfed.org/fred/source' params = {'api_key': '123', 'source_id': 25, 'file_type': 'json'} self.get.assert_called_with(expected, params=params)
def test_fred_releases_dates(self): fred.key('123') fred.dates() expected = 'http://api.stlouisfed.org/fred/releases/dates' params = {'api_key': '123', 'file_type': 'json'} self.get.assert_called_with(expected, params=params)
def test_fred_category_series(self): fred.key('123abc') fred.category(series=True) expected = 'https://api.stlouisfed.org/fred/category/series' params = {'api_key': '123abc', 'file_type': 'json'} self.get.assert_called_with(expected, params=params)
second_series = d.keys()[1] ret = pd.DataFrame.merge(d[first_series], d[second_series], on = "month", how = inner_or_outer) # merge remaining series for s in d.keys()[2:]: ret = pd.DataFrame.merge(ret, d[s], on = "month", how = inner_or_outer) return ret def stripPunct(s): return ''.join(c for c in s if c not in set(string.punctuation)) if __name__ == "__main__": # FRED api key api_key = "your-key-here" fred.key(api_key) # get series observed on a monthly or more-than-monthly (not quarterly, annually, bi-annually) basis series = [] with open("../POSSIBLE_SERIES.txt", "r+") as f: for line in f: series.append(line.strip()) # inner join yields nice, full dataset merged = seriesTable(series, "inner") # write full data frame to file # first, clear file open('../series_monthly_inner.txt', 'w').close() merged.to_csv(r'../series_monthly_inner.txt', header = True, index = None, sep = '\t', mode = 'a')
def test_fred_series_observations(self): fred.key('ohai') fred.observations() expected = 'http://api.stlouisfed.org/fred/series/observations' params = {'api_key': 'ohai'} self.get.assert_called_with(expected, params=params)