def get_BEA_data(path): USER_ID = "8766E2A1-3059-4FB9-9CCF-CE8BE40CE9A9" indicators = [ 'ExpGdsServIncRec', 'ImpGdsServIncPay', 'ImpGdsServ', 'SecIncPay', 'FinAssetsExclFinDeriv', 'FinLiabsExclFinDeriv', 'StatDisc', 'BalCurrAcct', 'NetLendBorrFinAcct'] dataframes = [] for item in indicators: dataframes.append(pybea.get_data(USER_ID, DataSetName='ITA', Indicator=item, AreaOrCountry='All', Frequency='QSA', ResultFormat='JSON')) table = pd.concat([item for item in dataframes], axis=1) table.to_csv(path)
def update_all_nipa(): """ Updates all NIPA data (in NIPA_DATA directory) for year 2000 (default, can be changed below) Parameters: None ---------- Returns: dictionary of failed tables ------- """ failed_dict = {} mb_remaining = 100 requests_remaining = 100 nipa_table_ids = pybea.get_parameter_values(UserID, 'NIPA', ParameterName='TableName', ResultFormat='JSON') tablenames = nipa_table_ids['TableName'].values for x in tablenames: temp = pybea.get_data(UserID, 'NIPA', TableName=x, Frequency='A', Year='2000') # Compute how many megabytes each request is size = sys.getsizeof(temp) / 1000000 mb_remaining -= size requests_remaining -= 1 temp.to_csv('../NIPA_DATA/{0}.csv'.format(x)) time.sleep(1) if mb_remaining < 5: time.sleep(30) mb_remaining = 100 if requests_remaining < 2: time.sleep(45) requests_remaining = 100 if pybea.JSON_ERROR: failed_dict[x] = pybea.JSON_ERROR time.sleep(.75) return failed_dict
def update_all_fa(year, frequency): """ Updates all FixedAssets data (in FA_DATA directory) for user defined year and frequency Parameters int year: year to get data of string frequency: frequency ('A' for annual, 'Q' for quarterly, 'M' for monthly) ---------- Returns: dictionary of failed tables ------- """ failed_dict = {} mb_remaining = 100 requests_remaining = 100 fa_table_ids = pybea.get_parameter_values(UserID, 'FixedAssets', ParameterName='TableName', ResultFormat='JSON') tablenames = fa_table_ids['TableName'].values for x in tablenames: temp = pybea.get_data(UserID, 'FixedAssets', TableName=x, Frequency=frequency, Year=year) # Compute how many megabytes each request is # print('This request was ', sys.getsizeof(temp) / 1000000, 'megabytes') size = sys.getsizeof(temp) / 1000000 mb_remaining -= size requests_remaining -= 1 # print('You have ', mb_remaining, 'more megabytes before throttling and ', requests_remaining, # 'request/s remaining before throttling.') temp.to_csv('../FA_DATA/{0}.csv'.format(x)) time.sleep(1) if mb_remaining < 5: time.sleep(30) mb_remaining = 100 if requests_remaining < 2: time.sleep(45) requests_remaining = 100 if pybea.JSON_ERROR: failed_dict[x] = pybea.JSON_ERROR time.sleep(.75) return failed_dict
def update_nipa(tablenames, frequency, year): """ Updates NIPA data (in NIPA_DATA directory) based on specified list and params Parameters ---------- tablenames: list of tables to be updated frequency: Annual, Quarterly, or Monthly ('A', 'Q', 'M') year: year Returns: dictionary of failed tables ------- """ failed_dict = {} mb_remaining = 100 requests_remaining = 100 for x in tablenames: temp = pybea.get_data(UserID, 'NIPA', TableName=x, Frequency=frequency, Year=year) # Compute how many megabytes each request is size = sys.getsizeof(temp) / 1000000 mb_remaining -= size requests_remaining -= 1 temp.to_csv('../NIPA_DATA/{0}.csv'.format(x)) time.sleep(1) if mb_remaining < 5: time.sleep(30) mb_remaining = 100 if requests_remaining < 2: time.sleep(45) requests_remaining = 100 if pybea.JSON_ERROR: failed_dict[x] = pybea.JSON_ERROR time.sleep(.75) return failed_dict
def update_fa(tablenames, frequency, year): """ Updates FixedAsset data (in FA_DATA directory) based on specified list and params Parameters ---------- tablenames: list of tables to be updated frequency: Annual, Quarterly, or Monthly ('A', 'Q', 'M') year: year Returns: dictionary of failed tables ------- """ failed_dict = {} mb_remaining = 100 requests_remaining = 100 for x in tablenames: print(x) temp = pybea.get_data(UserID, 'FixedAssets', TableName=x, Frequency=frequency, Year=year) size = sys.getsizeof(temp) / 1000000 mb_remaining -= size requests_remaining -= 1 print('You have ', mb_remaining, 'more megabytes before throttling and ', requests_remaining, 'request/s remaining before throttling.') temp.to_csv('../FA_DATA/{0}.csv'.format(x)) time.sleep(1) if mb_remaining < 5: time.sleep(30) mb_remaining = 100 if requests_remaining < 2: time.sleep(45) requests_remaining = 100 if pybea.JSON_ERROR: failed_dict[x] = pybea.JSON_ERROR time.sleep(.75) return failed_dict
def main(): """ Tests all get_parameter_list, get_parameter_values, and get_data API calls """ dataset_list = pybea.get_data_set_list(UserID) print(dataset_list, '\n') print(pybea.get_parameter_list(UserID, 'NIPA')) print( pybea.get_parameter_values(UserID, 'NIPA', ParameterName='TableName', ResultFormat='JSON')) # See the documentation (https://apps.bea.gov/api/_pdf/bea_web_service_api_user_guide.pdf) # to see required params for each dataset. # NIPA Okay NIPA_example = pybea.get_data(UserID, 'NIPA', TableName='T31101', Frequency='Q', Year='2015') # T20600, T20700A, T20700B, T20801, T20803, T20804, T20805, T20806, T20807 NIPA_example.to_csv('test.csv') print('This is NIPA example: \n', NIPA_example) NIPA_example = pybea.get_data(UserID, 'NIPA', 'JSON', TableName='T20200A', Frequency='A, Q, M', Year='2017,2018,2019,2020') NIPA_example = pybea.get_data(UserID, 'NIPA', 'JSON', TableName='T20200A', Frequency='Q', Year='1950') # # NIUnderlyingDetail Okay print('Get param list: ', pybea.get_parameter_list(UserID, 'NIUnderlyingDetail')) print( 'Get param values: ', pybea.get_parameter_values(UserID, 'NIUnderlyingDetail', 'TableName')) print( 'This is NIUnderlyingDetail: ', pybea.get_data(UserID=UserID, DataSetName='NIUnderlyingDetail', TableName='U001BC', Frequency='A', Year='ALL')) # # MNE Okay print('Get param list: ', pybea.get_parameter_list(UserID, 'MNE')) print('Get param values: ', pybea.get_parameter_values(UserID, 'MNE', 'OwnershipLevel')) print( 'This is MNE: ', pybea.get_data(UserID=UserID, DataSetName='MNE', DirectionOfInvestment='Outward', Classification='COUNTRY', Year='ALL')) # # FixedAssets Okay print(pybea.get_parameter_list(UserID, 'FixedAssets')) print(pybea.get_parameter_values(UserID, 'FixedAssets', 'TableName')) print( pybea.get_data(UserID, 'FixedAssets', TableName='FAAt101', Year='ALL')) # # ITA Okay print('Get param list: ', pybea.get_parameter_list(UserID, 'ITA')) print('Get param values: ', pybea.get_parameter_values(UserID, 'ITA', 'Indicator')) print( 'This is ITA: ', pybea.get_data(UserID=UserID, DataSetName='ITA', Year='ALL', Indicator='BalCapAcct')) # # IIP Okay print('Get param list: ', pybea.get_parameter_list(UserID, 'IIP')) print('Get param values: ', pybea.get_parameter_values(UserID, 'IIP', 'TypeOfInvestment')) print( 'This is IIP: ', pybea.get_data(UserID=UserID, DataSetName='IIP', TypeOfInvestment='ALL', Year=2015)) # # InputOutput Okay print('Get param list: ', pybea.get_parameter_list(UserID, 'InputOutput')) print('Get param values: ', pybea.get_parameter_values(UserID, 'InputOutput', 'TableID')) print( 'This is IIP: ', pybea.get_data(UserID=UserID, DataSetName='InputOutput', TableID='56', Year=2015)) # IntlServTrade print('Get param list: ', pybea.get_parameter_list(UserID, 'IntlServTrade')) print('Get param values: ', pybea.get_parameter_values(UserID, 'IntlServTrade', 'AreaOrCountry')) print( 'This is IntlServTrade: ', pybea.get_data(UserID=UserID, DataSetName='IntlServTrade', TradeDirection='All', AreaOrCountry='UnitedKingdom', TypeOfService='WasteTreatAndDePol', Year=2018)) # GDPbyIndustry Okay print(pybea.get_parameter_list(UserID, 'GDPbyIndustry')) pybea.get_parameter_values(UserID, 'GDPbyIndustry', 'TableID') print( pybea.get_data(UserID=UserID, DataSetName='GDPbyIndustry', TableID='ALL', Frequency='A', Year='2016', Industry='ALL')) print('Get parameter list: \n', pybea.get_parameter_list(UserID, 'GDPbyIndustry')) print('Get param values: ', pybea.get_parameter_values(UserID, 'GDPbyIndustry', 'TableID')) print( 'Get data: \n', pybea.get_data(UserID, 'GDPbyIndustry', TableID=15, Frequency='A', Year=2015, Industry='ALL')) print( pybea.get_data(UserID, 'GDPbyIndustry', TableID=15, Frequency='A', Year=2015, Industry='ALL')) # Regional Okay print('Get param list: ', pybea.get_parameter_list(UserID, 'Regional')) print('Get param values: ', pybea.get_parameter_values(UserID, 'Regional', 'LineCode')) print( 'This is Regional: ', pybea.get_data(UserID=UserID, DataSetName='Regional', TableName='SQINC7H', GeoFIPS='00000', LineCode=100)) # UnderlyingGDPbyIndustry Okay print('Get param list: ', pybea.get_parameter_list(UserID, 'UnderlyingGDPbyIndustry')) print( 'Get param values: ', pybea.get_parameter_values(UserID, 'UnderlyingGDPbyIndustry', 'Industry')) print( 'This is UnderlyingGDPbyIndustry: ', pybea.get_data(UserID=UserID, DataSetName='UnderlyingGDPbyIndustry', TableID='213', Frequency='A', Industry='113FF', Year='ALL')) pass
def update_all_fa_tag(): """ Updates all FixedAssets data for every available year, aggregates in one .csv file and outputs to FA_ALL directory. Returns: None ------- """ failed_dict = {} mb_remaining = 100 requests_remaining = 100 fa_table_ids = pybea.get_parameter_values(UserID, 'FixedAssets', ParameterName='TableName', ResultFormat='JSON') tablenames = fa_table_ids['TableName'].values table_name_col = [] series_code_col = [] period_col = [] data_val_col = [] line_description_col = [] for x in tablenames: temp = pybea.get_data(UserID, 'FixedAssets', TableName=x, Year='ALL') # Compute how many megabytes each request is size = sys.getsizeof(temp) / 1000000 mb_remaining -= size requests_remaining -= 1 table_name = temp['TableName'] series_code = temp['SeriesCode'] period = temp['TimePeriod'] data_val = temp['DataValue'] line_description = temp['LineDescription'] table_name_col.extend(table_name) series_code_col.extend(series_code) period_col.extend(period) data_val_col.extend(data_val) line_description_col.extend(line_description) time.sleep(1) if mb_remaining < 5: time.sleep(55) mb_remaining = 100 requests_remaining = 100 if requests_remaining < 2: time.sleep(45) mb_remaining = 100 requests_remaining = 100 if pybea.JSON_ERROR: failed_dict[x] = pybea.JSON_ERROR time.sleep(1) aggregate_fa = pd.DataFrame() aggregate_fa['line_number'] = table_name_col aggregate_fa['line_name_short'] = line_description_col aggregate_fa['series_code'] = series_code_col aggregate_fa['year'] = period_col aggregate_fa['value'] = data_val_col aggregate_fa.to_csv('../FA_ALL/aggregate_fa.csv', index=False) aggregate_fa.to_csv('aggregate_fa.csv', index=False) return failed_dict
return df.drop(['NoteRef', 'UNIT_MULT'], axis=1) # we are interested in the following variables... key_codes = [ 'POP_MI', # Total MSA population 'GDP_MP', # Nominal GDP 'RGDP_MP', # Real GDP 'PCRGDP_MP', # Per capita real GDP 'TPI_MI', # Total personal income 'PCPI_MI', # Per capita personal income 'DIR_MI', # Dividends, interest, and rent 'PCTR_MI', # Personal current transfer receipts 'WS_MI', # Wage and salary dispursements 'SUPP_MI', # Supplements to wages and salary 'PROP_MI', # Proprietors income ] # ...in the following years years = ['2000', '2005', '2010'] # fectch the data from the BEA data api... raw_dataframe = pybea.get_data(DataSetName='RegionalData', KeyCodes=key_codes, GeoFips='MSA', Year=years) # ...clean it and the save a copy to disk! dataframe = drop_unused_cols(raw_dataframe) dataframe.to_csv('../data/bea/raw_bea_metro_data.csv')
def update_all_nipa_tag(frequency): """ Generates one .csv file (in NIPA_ALL) containing all the NIPA data for a given frequency for all available years. The TAG model uses Annual data for the NIPA dataset. Parameters ---------- string frequency: 'A', 'Q', 'M' Returns -- ------- """ mb_remaining = 100 requests_remaining = 100 failures_remaining = 30 nipa_table_ids = pybea.get_parameter_values(UserID, 'NIPA', ParameterName='TableName', ResultFormat='JSON') tablenames = nipa_table_ids['TableName'].values series_code_col = [] period_col = [] data_val_col = [] table_name = [] size = .5 for x in tablenames: try: data = pybea.get_data(UserID, 'NIPA', TableName=x, Frequency=frequency, Year='ALL') series_code = data['SeriesCode'] period = data['TimePeriod'] data_val = data['DataValue'] series_code_col.extend(series_code) period_col.extend(period) data_val_col.extend(data_val) size = (sys.getsizeof(data) / 1000000) table_name.append(x) except KeyError: # Failures typically mean that the dataset isn't available for the given frequency that was affected. failures_remaining -= 1 if failures_remaining < 3: time.sleep(60) failures_remaining = 30 mb_remaining -= size requests_remaining -= 1 if mb_remaining < 5 or requests_remaining < 3: time.sleep(60) mb_remaining = 100 requests_remaining = 100 aggregate_nipa = pd.DataFrame() aggregate_nipa['%SeriesCode'] = series_code_col aggregate_nipa['Period'] = period_col aggregate_nipa['Value'] = data_val_col aggregate_nipa.to_csv( '../NIPA_ALL/aggregate_nipa_{0}.csv'.format(frequency), index=False) aggregate_nipa.to_csv('aggregate_nipa_{0}.csv'.format(frequency), index=False)
df.loc[:, 'DataValue'] = df['DataValue'] * 10**df['UNIT_MULT'] return df.drop(['NoteRef', 'UNIT_MULT'], axis=1) # we are interested in the following variables... key_codes = ['POP_MI', # Total MSA population 'GDP_MP', # Nominal GDP 'RGDP_MP', # Real GDP 'PCRGDP_MP', # Per capita real GDP 'TPI_MI', # Total personal income 'PCPI_MI', # Per capita personal income 'DIR_MI', # Dividends, interest, and rent 'PCTR_MI', # Personal current transfer receipts 'WS_MI', # Wage and salary dispursements 'SUPP_MI', # Supplements to wages and salary 'PROP_MI', # Proprietors income ] # ...in the following years years = ['2000', '2005', '2010'] # fectch the data from the BEA data api... raw_dataframe = pybea.get_data(DataSetName='RegionalData', KeyCodes=key_codes, GeoFips='MSA', Year=years) # ...clean it and the save a copy to disk! dataframe = drop_unused_cols(raw_dataframe) dataframe.to_csv('../data/bea/raw_bea_metro_data.csv')