def weekly_fred_data(self): # May have to update to scan for the data frequency and then change it based on that. fred = Fred(api_key='81fb59583aa03d2ce139e065d694c299') fred_ids = self.fred_strings input_df = pd.DataFrame({}) #self.weekly_spy_volume() for id in fred_ids: temp_df = pd.DataFrame( fred.get_series(id, observation_start=self.start_date)) for i in range(len(temp_df.iloc[0:, 0])): if np.isnan(temp_df.iloc[0:, 0][i]): try: temp_df.iloc[0:, 0][i] = temp_df.iloc[0:, 0][i - 1] except: temp_df.iloc[0:, 0][i] = temp_df.iloc[0:, 0][i + 1] avg_temp_data, temp_start_dates = self.weekly_stats(temp_df) temp_weekly_df = pd.DataFrame({ "Week": temp_start_dates, id: avg_temp_data }) temp_weekly_df = temp_weekly_df.set_index("Week") input_df = pd.concat([input_df, temp_weekly_df], axis=1) spy_dates = self.weekly_spy_volume().index drop_indicies = input_df.index.difference(spy_dates) spy_inputs = self.weekly_spy_volume().drop(drop_indicies) bond_inputs = self.weekly_bond_volume().drop(drop_indicies) gold_inputs = self.weekly_gold_volume().drop(drop_indicies) input_df["SPY_Volume"] = self.weekly_spy_volume()["Weekly_Volume"] input_df["SHY_Volume"] = self.weekly_bond_volume()["Weekly_Volume"] input_df["GDX_Volume"] = self.weekly_gold_volume()["Weekly_Volume"] return input_df
def main(): # Read in all the data, join, and standarize the column names dfbloom = bloom('/app/bloomberg/') dfgss = gss() dfgsw = gsw() fred = Fred('5240bbe3851ef2d1aaffd0877d6048dd') dfxrates = exrates(fred) dfirates = fredirates(fred) df = pd.concat([dfbloom, dfgss, dfgsw, dfxrates, dfirates], join='outer', axis=1) df.columns = map(str.upper, df.columns) # Convert Eurodollar quotes to implied interest rate df.loc[:, "ED1"] = 100 - df["ED1"] df.loc[:, "ED2"] = 100 - df["ED2"] df.loc[:, "ED3"] = 100 - df["ED3"] # These vars will have growth rates as X_2-X_1 rates = ["ED1", "ED2", "ED3"] rates.extend(dfgss.columns) rates.extend(dfgsw.columns) rates.extend(dfirates.columns) # These vars will have growth rates as (X_2-X_1)/X_1 levels = list(set(df.columns).difference(rates)) # Calculate growth rates for each group and combine print('Calculating Growth Rates') dflevels = df[levels].pct_change(periods=1, fill_method=None) dfrates = df[rates].pct_change(periods=1, fill_method=None) dfchange = pd.concat([dflevels, dfrates], join='outer', axis=1) dfchange.to_csv('/app/output/returns.csv') df.to_csv('/app/output/prices.csv')
def currency_data(api_key): fred = Fred(api_key=api_key) USEUROforex = fred.get_series('DEXUSEU') USEUROforex = pd.DataFrame(USEUROforex) USEUROforex.columns = ['USvsEURO'] USEUROforex.fillna( method="bbfill", inplace=True, ) JC = pd.read_csv('Joined_Closes.csv').iloc[:1478] exchange_indices = [str(ix)[0:10] for ix in USEUROforex.index.values] JC_indices = [ix[0:10] for ix in JC['Periods']] labs = [i for i in exchange_indices if i not in JC_indices] USEUROforex.reset_index(inplace=True) USEUROforex_dict = {'USvsEURO':[]} for i, ix in enumerate(USEUROforex['index']): if str(ix)[0:10] in labs: pass else: USEUROforex_dict['USvsEURO'].append(USEUROforex['USvsEURO'][i]) USEUROforex = pd.DataFrame.from_dict(USEUROforex_dict) JC = JC.join(USEUROforex) JC.drop('Unnamed: 0', axis=1, inplace=True) JC.to_csv('Indicators_Joined_.csv')
def get_labor_data(states, itd=True): ''' get labor market data from STL ''' fred = Fred(api_key=FRED_API_KEY) ur_raw = states_data("UR", states, START_DATE, fred) ur = ur_raw.diff().iloc[-1, ] ur_df = ur.to_frame(name="ur").reset_index() ur_df.rename(columns={"index": "state"}, inplace=True) ic_raw = states_data("ICLAIMS", states, START_DATE, fred) if itd: ic = ic_raw.loc[CRISIS_START_DATE:, :].sum(axis=0) else: ic = ic_raw.rolling(window=4).sum().iloc[-1, :] ic_df = ic.to_frame(name="ic").reset_index() ic_df.rename(columns={"index": "state"}, inplace=True) all_df = pd.merge(ur_df, ic_df, on="state") cc_raw = states_data("CCLAIMS", states, START_DATE, fred) cc_df = cc_raw.iloc[-1].to_frame(name="cc").reset_index() cc_df.rename(columns={"index": "state"}, inplace=True) all_df = pd.merge(all_df, cc_df, on="state") w_52_pct_chg_df = ic_raw.pct_change(periods=52) return ic_raw, all_df, ic_raw.index[-1].date(), w_52_pct_chg_df
def get_popular_fred(num2get=100, api_key_file='fred_api.key'): fred = Fred(api_key_file=api_key_file) cid = 0 num_fetched = 0 res = pd.DataFrame() while num_fetched < num2get: try: df = fred.search_by_category(category_id=cid, order_by='popularity') if df.shape[0] > 0: df['popularity'] = df['popularity'].apply(float) df = df[df.popularity > 10] except Exception as exc: msg = " searching for cid = %d" % (cid, ) msg += str(exc) print(msg) else: res = (df if res.shape[0] == 0 else pd.concat([res, df])) num_fetched += 1 cid += 1 msg = "CID= %d" % (cid, ) print(msg) if cid > 2000: break res.sort_values(by='popularity', inplace=True) return res
def freddata(keyid, api): fred = Fred(api_key=api) df = fred.get_series(keyid) df = df.to_frame().reset_index() df.columns = ['date', 'value'] return df
def get_state_current_population(state, api_key=None): """Get a given state's latest population from the Fed Fred API, getting the number in 1000's and returning the absolute value. Args: (str): The state abbreviation for the state to retrieve population data. The abbreviation can be upper or lower case. (str): A Fed FRED API key. You can sign up for a free API key at http://research.stlouisfed.org/fred2/. You can also pass `None` and set the environment variable 'FRED_API_KEY' to the value of your API key. Returns: (dict): Returns a dictionary with population values and source. """ fred = Fred(api_key=api_key) state_code = state.upper() population_source_code = f'{state_code}POP' population = fred.get_series(population_source_code) real_population = int(population.iloc[-1] * 1000) population_date = population.index[-1].isoformat()[:10] return { 'population': real_population, 'population_formatted': f'{real_population:,}', 'population_source_code': population_source_code, 'population_source': f'https://fred.stlouisfed.org/series/{population_source_code}', 'population_at': population_date, }
def main(): parser = argparse.ArgumentParser(description='scrap fred') parser.add_argument('-input', type=str, default='data_tickers/fred_stats.csv', help='input csv file list all tickers to scrap') parser.add_argument('-output_prefix', type=str, default='../stock_data/raw_fred/', help='prefix of the output file') parser.add_argument('-apikey', type=str, help='Fred API key') args = parser.parse_args() # scrap the data fred = Fred(api_key=args.apikey) with open(args.input) as csvfile: fredreader = csv.reader(csvfile, delimiter=',') next(fredreader) for row in fredreader: filename = args.output_prefix + row[0] + '.csv' print('Getting', row[0], '-', row[1]) try: s = fred.get_series(row[0]) s.to_csv(filename) except: print('failed') return 0
def dataIngest(fred_key, s3_out_bucket, s3_out_prefix): fred = Fred(api_key=fred_key) new_rows_list = [] with open("metadata.csv", "r") as file: reader = csv.reader(file) next(reader, None) first_row = ['Series', 'latest_date_fetched', 'IsRun'] new_rows_list.append(first_row) if s3_out_prefix[-1] == "/": s3_out_prefix = s3_out_prefix[:-1] else: s3_out_prefix = s3_out_prefix fs = s3fs.S3FileSystem(anon=False) for row in reader: fred_data = fetchData(row[0], row[1], int(row[2]), fred) new_flag = int(row[2]) + 1 # fred_data.to_csv(r"D:\\Data PipeLining\\EJCORP\\"+ row[0] +".csv") new_row = [row[0], fred_data.tail(1).index[0], new_flag] new_rows_list.append(new_row) s3_out_train = "s3://{}/{}/{}/{}".format( s3_out_bucket, s3_out_prefix, row[0], row[0] + "_" + str(new_flag) + ".csv") with fs.open(s3_out_train, "wb") as f: fred_data.to_csv(f, sep=str(','), index=False) print(s3_out_train) file = open('metadata.csv', 'w') writer = csv.writer(file, lineterminator='\n') writer.writerows(new_rows_list) file.close()
def update_fred(): con = connect_to_default(DATABASE) fred = Fred(api_key=APIKEY) fred_series_list = [ 'GDPA', 'BASE', 'DEXUSEU', 'DGS10', 'BAMLH0A0HYM2', 'SP500', 'AAA', 'BAA', 'CIVPART', 'CPIAUCSL', 'CURRCIR', 'EXUSEU', 'FEDFUNDS', 'HOUST', 'INDPRO', 'MORTG', 'PAYEMS', 'PSAVERT', 'TB3MS', 'UMCSENT', 'UNRATE', 'GDP', 'GDPC1', 'GDPDEF', 'M2V', 'PCECC96', 'GFDEBTN', 'STLFSI', 'M1', 'M2', 'PAYEMS', ] dflist = [] for ser in fred_series_list: df = pd.DataFrame(fred.get_series(ser), columns=['value']) df.index.name = 'date' df.reset_index(inplace=True) df['field'] = ser dflist.append(df) df = pd.concat(dflist, ignore_index=True) if not 'fred' in con.table_names(): df.to_sql('fred', con, index=False) else: current_list = pd.read_sql( "SELECT field, MAX(date) as 'max_date' from fred group by field", con) dfc = df.merge(current_list, on=['field']) dfc = dfc[dfc['date'] > dfc['max_date']].drop('max_date', axis=1) if not dfc.empty: dfc.to_sql('fred', con, index=False, if_exists='append')
def get_licensees_by_county_ok(licensees, counties): """Get data on licensees by county in Oklahoma. Calculate licensees per capita for each county in Oklahoma by finding all licensees in a given county, getting the population of that county, and calculating the licensees per capita in that county. """ config = dotenv_values('../../../.env') fred = Fred(api_key=config['FRED_API_KEY']) county_data = {} for county in counties: county_name = county['name'] fred_code = county['population_source_code'] county_licensees = licensees.loc[licensees.county == county_name] county_population_data = fred.get_series(fred_code) county_population = int(county_population_data.iloc[-1] * 1000) county_licensees_per_capita = len(county_licensees) / county_population population_date = county_population_data.index[-1].isoformat()[:10] entry = { 'population': f'{county_population:,}', 'population_source_code': fred_code, 'population_source': f'https://fred.stlouisfed.org/series/{fred_code}', 'licensees_per_capita': county_licensees_per_capita, 'population_at': population_date, 'total_licensees': len(county_licensees) } county_data[county_name] = {**county, **entry} return county_data
def fred_1r_ir_today(end=datetime.now()): if USE_API: fred = Fred(api_key='3de60b3b483033f57252b59f497000cf') s = fred.get_series('DGS1', observation_end=end) return s[-1] / 100 else: return 1.2 / 100
def _risk_free_calc(self, date): try: fred = Fred(api_key=self._fred_api_key) except: print( "YOU NEED TO SET THE FRED API KEY USING THE SET_FRED_API_KEY METHOD" ) # Obtain the yield curve CMT data from FRED one_month = fred.get_series_latest_release('DGS1MO').iloc[-1] three_month = fred.get_series_latest_release('DGS3MO').iloc[-1] six_month = fred.get_series_latest_release('DGS6MO').iloc[-1] one_year = fred.get_series_latest_release('DGS1').iloc[-1] two_year = fred.get_series_latest_release('DGS2').iloc[-1] three_year = fred.get_series_latest_release('DGS3').iloc[-1] five_year = fred.get_series_latest_release('DGS5').iloc[-1] days_array = [30.4167, 91.2501, 182.5, 365, 730, 1095, 1825] yield_array = [ one_month, three_month, six_month, one_year, two_year, three_year, five_year ] # Cubic spline interpolation to derive the CMT rates for the near and next term maturities. tck = interpolate.splrep(days_array, yield_array, s=0) # Calculate days from date today = dt.datetime.now() day_delta = date - today days = day_delta.days # Back out interpolated yield y_new = interpolate.splev(days, tck) return y_new
def dataIngest(fred_key, s3_out_bucket, s3_out_prefix): fred = Fred(api_key=fred_key) new_rows_list = [] with open("metadata.csv", "r") as file: reader = csv.reader(file) next(reader, None) first_row = ['Series', 'latest_date_fetched', 'IsRun'] new_rows_list.append(first_row) if s3_out_prefix[-1] == "/": s3_out_prefix = s3_out_prefix[:-1] else: s3_out_prefix = s3_out_prefix for row in reader: fred_data = fetchData(row[0], "", False, fred) # fred_data.to_csv(r"D:\\Data PipeLining\\EJCORP\\"+ row[0] +".csv") new_row = [row[0], fred_data.tail(1).index[0], True] new_rows_list.append(new_row) output_obj_path = row[0] + ".csv" s3_out_train = "s3://{}/{}/{}/{}".format(s3_out_bucket, s3_out_prefix, row[0], row[0] + ".csv") print(s3_out_train) file = open('metadata.csv', 'w') writer = csv.writer(file, lineterminator='\n') writer.writerows(new_rows_list) file.close()
def get_series_data(series_id: str, start: str = None, end: str = None) -> pd.DataFrame: """Get Series data. [Source: FRED] Parameters ---------- series_id : str Series ID to get data from start : str Start date to get data from, format yyyy-mm-dd end : str End data to get from, format yyyy-mm-dd Returns ---------- pd.DataFrame Series data """ df = pd.DataFrame() try: fredapi_client = Fred(cfg.API_FRED_KEY) df = fredapi_client.get_series(series_id, start, end) # Series does not exist & invalid api keys except HTTPError as e: console.print(e) return df
def get_libor_rates_curve(self): # login session = Fred(api_key=self.__api_key) curve = { self.__series_ids[ser_id]: self._get_cc_rate(session, ser_id) for ser_id in self.__series_ids } return curve
def __init__(self, database: str = None, key: str = None, var_list: list = None): self.database = database self.var_list = var_list # for simple request in FRED self.key = key # the form depends on the database we use self.fred = Fred(api_key=self.key)
def __init__(self, api_code, start_date, series_name=None): super().__init__(api_code, start_date, series_name=series_name) with open('ignore_folder/fred_api_key.txt','r') as file: key = file.read().replace('\n', '') self.fred = Fred(api_key = key) self.df = pd.DataFrame(self.fred.get_series(self.api_code)).loc[start_date::] self.df.columns = [self.series_name]
def get_state_current_population(state): """Get a given state's latest population from the Fed Fred API, getting the number in 1000's and returning the absolute value.""" config = dotenv_values('../../../.env') fred = Fred(api_key=config['FRED_API_KEY']) state_code = state.upper() population = fred.get_series(f'{state_code}POP') return population.iloc[-1] * 1000
def get_yield_curve(date: Optional[datetime]) -> Tuple[pd.DataFrame, str]: """Gets yield curve data from FRED Parameters ---------- date: Optional[datetime] Date to get curve for. If None, gets most recent date Returns ------- pd.DataFrame: Dataframe of yields and maturities str Date for which the yield curve is obtained """ fredapi_client = Fred(cfg.API_FRED_KEY) fred_series = { "1Month": "DGS1MO", "3Month": "DGS3MO", "6Month": "DGS6MO", "1Year": "DGS1", "2Year": "DGS2", "3Year": "DGS3", "5Year": "DGS5", "7Year": "DGS7", "10Year": "DGS10", "20Year": "DGS20", "30Year": "DGS30", } df = pd.DataFrame() if date is None: date_to_get = (datetime.now() - timedelta(days=7)).strftime("%Y-%m-%d") else: date_to_get = date.strftime("%Y-%m-%d") for key, s_id in fred_series.items(): df = pd.concat( [ df, pd.DataFrame(fredapi_client.get_series(s_id, date_to_get), columns=[key]), ], axis=1, ) if date is None: date_of_yield = df.index[-1] rates = pd.DataFrame(df.iloc[-1, :].values, columns=["Rate"]) else: date_of_yield = date series = df[df.index == date] if series.empty: return pd.DataFrame(), date.strftime("%Y-%m-%d") rates = pd.DataFrame(series.values.T, columns=["Rate"]) rates["Maturity"] = [1 / 12, 1 / 4, 1 / 2, 1, 2, 3, 5, 7, 10, 20, 30] return rates, date_of_yield
def getCorporatesFred(self, trim_start, trim_end): fred = Fred(api_key=FRED_API_KEY) curr_trim_end = trim_start if (self.corporates.size != 0): self.trim_start = self.corporates['OIS'].index.min().date() curr_trim_end = self.corporates['OIS'].index.max().date() if trim_end <= curr_trim_end: self.trim_end = curr_trim_end return self.corporates self.trim_start = trim_start self.trim_end = trim_end self.OIS = OIS(trim_start=trim_start, trim_end=trim_end) self.datesAll = self.OIS.datesAll self.datesAll.columns = [x.upper() for x in self.datesAll.columns] self.datesAll.index = self.datesAll.DATE self.OISData = self.OIS.getOIS() for i in np.arange(len(self.OISData.columns)): freq = self.OISData.columns[i] self.tenors.append(self.myScheduler.extractDelay(freq=freq)) for rating in self.ratings.keys(): index = self.ratings[rating] try: corpSpreads = 1e-2 * (fred.get_series( index, observation_start=trim_start, observation_end=trim_end).to_frame()) corpSpreads.index = [x.date() for x in corpSpreads.index[:]] corpSpreads = pd.merge(left=self.datesAll, right=corpSpreads, left_index=True, right_index=True, how="left") corpSpreads = corpSpreads.fillna(method='ffill').fillna( method='bfill') corpSpreads = corpSpreads.drop("DATE", axis=1) self.corpSpreads[rating] = corpSpreads.T.fillna( method='ffill').fillna(method='bfill').T except Exception as e: print(e) print(index, " not found") self.corpSpreads = pd.Panel.from_dict(self.corpSpreads) self.corporates = {} self.OISData.drop('DATE', axis=1, inplace=True) ntenors = np.shape(self.OISData)[1] for rating in self.ratings: try: tiledCorps = np.tile(self.corpSpreads[rating][0], ntenors).reshape(np.shape(self.OISData)) self.corporates[rating] = pd.DataFrame( data=(tiledCorps + self.OISData.values), index=self.OISData.index, columns=self.OISData.columns) except: print("Error in addition of Corp Spreads") self.corporates['OIS'] = self.OISData self.corporates = pd.Panel(self.corporates) return self.corporates
def get_fred_data(fredkey: str, SeriesNameDict: dict = {'SeriesID': 'SeriesName'}): """ Imports Data from Federal Reserve args: fredkey - an API key from FRED SeriesNameDict, pairs of FRED Series IDs and Series Names Series id must match Fred IDs, but name can be anything if default is use, several default samples are returned """ if not _has_fred: raise ImportError("Package fredapi is required") fred = Fred(api_key=fredkey) if SeriesNameDict == {'SeriesID': 'SeriesName'}: SeriesNameDict = { 'T10Y2Y': '10 Year Treasury Constant Maturity Minus 2 Year Treasury Constant Maturity', 'DGS10': '10 Year Treasury Constant Maturity Rate', 'DCOILWTICO': 'Crude Oil West Texas Intermediate Cushing Oklahoma', 'SP500': 'S&P 500', 'DEXUSEU': 'US Euro Foreign Exchange Rate', 'DEXCHUS': 'China US Foreign Exchange Rate', 'DEXCAUS': 'Canadian to US Dollar Exchange Rate Daily', 'VIXCLS': 'CBOE Volatility Index: VIX', # this is a more irregular series 'T10YIE': '10 Year Breakeven Inflation Rate', 'USEPUINDXD': 'Economic Policy Uncertainty Index for United States', # also very irregular } series_desired = list(SeriesNameDict.keys()) fred_timeseries = pd.DataFrame( columns=['date', 'value', 'series_id', 'series_name']) for series in series_desired: data = fred.get_series(series) try: series_name = SeriesNameDict[series] except Exception: series_name = series data_df = pd.DataFrame({ 'date': data.index, 'value': data, 'series_id': series, 'series_name': series_name, }) data_df.reset_index(drop=True, inplace=True) fred_timeseries = pd.concat([fred_timeseries, data_df], axis=0, ignore_index=True) return fred_timeseries
def DailyRiskFreeRates(): fred = Fred(api_key='5e8edcc00dd0a9e40375540f32696316') TBills = fred.get_series('DGS10').to_frame().reset_index() TBills.columns = ['Date', 'RiskFree'] TBills.set_index('Date', inplace=True) TBills['RiskFree'] = TBills['RiskFree'].apply(lambda x: x / 100) TBills.to_csv(myPath + 'TBillRate.csv') return TBills
def __init__(self): self.logger = get_logger() self.source = 'fred' self.api_key = SecureKeysAccess.get_vendor_api_key_static( vendor=str.upper(self.source)) self.fred_pwd = OSMuxImpl.get_proper_path('/workspace/data/fred/') self.seaborn_plots_pwd = OSMuxImpl.get_proper_path( '/workspace/data/seaborn/plots/') self.fred = Fred(api_key=self.api_key)
def calc_equity_alloc(start_dt: str = '', end_dt: str = '') -> pd.Series: if not start_dt: start_dt = '1920-01-01' if not end_dt: end_dt = dt.today().strftime('%Y-%m-%d') fred = Fred(api_key=os.environ['TOKEN_FRED']) nonfin_biz_equity_liab = fred.get_series('NCBEILQ027S', observation_start=start_dt, observation_end=end_dt) nonfin_biz_credit_liab = fred.get_series('BCNSDODNS', observation_start=start_dt, observation_end=end_dt) household_nonprofit_credit_liab = fred.get_series( 'CMDEBT', observation_start=start_dt, observation_end=end_dt) fedgov_credit_liab = fred.get_series('FGSDODNS', observation_start=start_dt, observation_end=end_dt) localgov_ex_retirement_credit_liab = fred.get_series( 'SLGSDODNS', observation_start=start_dt, observation_end=end_dt) fin_biz_equity_liab = fred.get_series('FBCELLQ027S', observation_start=start_dt, observation_end=end_dt) restofworld_credit_liab = fred.get_series('DODFFSWCMI', observation_start=start_dt, observation_end=end_dt) # Divide nonfinancial and financial business equity reliabilities by all credit instrument liability in the economy equity_alloc = pd.Series( ( ( nonfin_biz_equity_liab + fin_biz_equity_liab ) / 1000 ) \ / ( ( ( nonfin_biz_equity_liab + fin_biz_equity_liab ) / 1000 ) + ( nonfin_biz_credit_liab + household_nonprofit_credit_liab + fedgov_credit_liab + localgov_ex_retirement_credit_liab + restofworld_credit_liab ) ) ) return make_qtrly(equity_alloc, 'first')
def junk_bond_demand(API): date_end = time.strftime('%Y.%m.%d',time.localtime(time.time()))#today date date_start = time.strftime('%Y.%m.%d',time.localtime(time.time()-2592000))#date of one month ago fred = Fred(api_key=API) junkbond = fred.get_series('BAMLH0A0HYM2EY',date_start,date_end) #junkbond data investbond = fred.get_series('DAAA',date_start,date_end) #investment-grade bond spread = [junkbond [i]-investbond[i] for i in range(min([len(investbond),len(junkbond)]))] spread = [ i for i in spread if i == i]#remove nan value #index = (spread[-1] - min(spread))/(max(spread)-min(spread)) index = CalculateIndex(spread) return index
def scrape_macro(series_id: dict): """ :param series_id: dictionary of FRED ID (key) and Series name (value) pairs :return: """ fred = Fred(api_key=config.FRED_API_KEY) return [ pd.Series(data=fred.get_series(series_key).dropna(), name=series_value) for series_key, series_value in series_id.items() ]
def retrieve_macro_data() -> pd.DataFrame(): """ Retrieves macro data using FRED api from 01 Jan 2006 to 28 Feb 2021. :return: DataFrame representing macro data. """ print('\n==========Collecting macro data==========') INDICATORS = { # 'DFEDTARL': 'Federal Funds Target Range - lower bound', # 'DFEDTARU': 'Federal Funds Target Range - upper bound', 'GDPC1': 'Real GDP', 'CPIAUCSL': 'Consumer Price Index (CPI)', 'UNRATE': 'Unemployment Rate', 'PAYEMS': 'Total NonFarm payrolls (Employment)', 'RRSFS': 'Real Retail and Food Services Sales', 'GFDEBTN': 'Federal Debt', 'VIXCLS': 'CBOE Volatility Index (VIX)', 'DFF': 'Effective Federal Funds Rate', } START_DATE = '1/1/2006' END_DATE = '28/2/2021' API_KEY = '2fb48248f0ce3781c82ffca58fecfb36' fred = Fred(api_key=API_KEY) full_data = pd.DataFrame() for indicator, name in INDICATORS.items(): print(f'Indicator: {indicator}, Name: {name}') data = fred.get_series(indicator, observation_start=START_DATE, observation_end=END_DATE).rename(indicator) df = pd.DataFrame(data) if full_data.empty: full_data = df else: full_data = pd.concat([full_data, df], axis=1) print("==========Done==========\n") # resample daily data to monthly full_data = full_data \ .resample('M') \ .last() \ .interpolate() \ .rename_axis('date') \ .reset_index() return full_data
def findData(self): #Gets the series under Indusdrial Production & Capacity Utilization fred = Fred(api_key='e0a47670a791268b5b30cdf7cc217c4c') series = fred.search_by_category(3, order_by='title', filter=('frequency', 'Monthly')) #limit = 300 #Keeps only the Manufacturing series. Removes unnecessary title info series = series[series['title'].str.startswith( 'Capacity Utilization: Manufacturing')] series['title'] = series['title'].str.replace( 'Capacity Utilization: Manufacturing: Durable Goods: ', '') series['title'] = series['title'].str.replace( 'Capacity Utilization: Manufacturing: Non-Durable Goods: ', '') #Gets the NAICS codes and series IDs for each series naics_code = series['title'].str.extract( r'\= (.{3})') #Some have pt. before ). Need to fix naics_code = naics_code.rename(columns={0: 'NAICS Code'}) naics_code['NAICS Code'] = pd.to_numeric(naics_code['NAICS Code'], downcast='integer') series['title'] = series['title'].str.replace(r'\(([^)]+)\)', '') series_id = series.index.tolist() #Makes a DataFrame with NAICS code, series ID, and title dataset = pd.DataFrame(series.iloc[:, 3]) dataset = naics_code.merge(dataset, left_index=True, right_index=True) #Gets data for each series from 1997 through 2018 data = {} count = 0 for id in series.index: data[id] = fred.get_series(id, observation_start='1997-01-01', observation_end='2019-12-01') count += 1 if count == len(series) / 2: time.sleep(10) data = pd.DataFrame(data) #Adds data to dataset and organizes by NAICS data_id = data.transpose() dataset['Series ID'] = series_id dataset = dataset.merge(data_id, left_index=True, right_index=True) dataset.index = naics_code['NAICS Code'].tolist() dataset = dataset.drop('NAICS Code', 1) dataset = dataset.sort_index() dataset = dataset.rename(columns={'title': 'Industry'}) return dataset
def request(self, data): """ :param data: JSON Object to feed Quandl API request :return: JSON object with the response from he API """ current_app.logger.debug("Cache missed") try: fred = Fred(api_key=self.api_key) series = fred.get_series(data.get('series_id')) return series.last('1D').get(0) except Exception as e: raise e