def get_regdata_table_info(): f = open('meta_data/table_info_RegionalData.txt', 'w+') args_dict = {'method': 'GetParameterValuesFiltered', 'dataset': 'RegionalData', 'targparam': 'KeyCode'} df = bea.api_pull_to_df(args_dict) for key, des in zip(df['KeyCode'], df['Description']): args_dict = {'method': 'GetParameterValuesFiltered', 'dataset': 'RegionalData', 'keycode': key, 'targparam': 'Year'} df2 = bea.api_pull_to_df(args_dict) if df2 is not None: print >>f, 'For keycode \"%s\": %s' % (key, des) print >>f, ' cols:', df2.columns try: df2_year = df2['TimePeriod'].unique().values except: df2_year = df2['TimePeriod'].unique() print >>f, ' year entries: %d from %s to %s' % ( len(df2_year), df2_year[0], df2_year[len(df2_year)-1]) print >>f, '' f.close()
def get_reginc_data(table): args_dict = {'method': 'GetParameterValuesFiltered', 'dataset': 'RegionalIncome', 'targparam': 'LineCode', 'table': table} args_dict['verbose'] = 1 line_df = bea.api_pull_to_df(args_dict) if line_df is not None: df = None for line, desc in zip(line_df['Key'], line_df['Desc']): # get personal income per capita data for msa's args_dict = {'method': 'GetData', 'dataset': 'RegionalIncome', 'table': table, 'year': 'ALL', 'geo': 'MSA', 'line': line} args_dict['verbose'] = 1 tmp_df = bea.api_pull_to_df(args_dict) if tmp_df is not None: print tmp_df.head(1) tmp_df['line_id'] = line tmp_df['line_desc'] = desc if df is None: df = tmp_df else: df = df.append(tmp_df, ignore_index=True) print tmp_df.head(1) print '' outfile_tmp = 'data/reginc_seperate/reginc_t%s_l%s.csv' % ( table, line) tmp_df.to_csv(outfile_tmp) else: print 'BAD TMP DF! %s' % desc outfile = 'data/reginc_%s.csv' % table df.to_csv(outfile) return df
def get_regprod_rpcgdp(): args_dict = {'method': 'GetParameterValuesFiltered', 'dataset': 'RegionalProduct', 'targparam': 'IndustryId', 'component': 'PCRGDP_MAN'} args_dict['verbose'] = 1 ind_df = bea.api_pull_to_df(args_dict) if ind_df is not None: df = None for industry, desc in zip(ind_df['Key'], ind_df['Desc']): # get personal income per capita data for msa's args_dict = {'method': 'GetData', 'dataset': 'RegionalProduct', 'component': 'PCRGDP_MAN', 'year': 'ALL', 'geo': 'MSA', 'industry': industry} args_dict['verbose'] = 1 tmp_df = bea.api_pull_to_df(args_dict) if tmp_df is not None: print tmp_df.head(1) tmp_df['ind_id'] = industry tmp_df['ind_desc'] = desc if df is None: df = tmp_df else: df = df.append(tmp_df, ignore_index=True) print tmp_df.head(1) print '' else: print 'BAD TMP DF! %s' % desc outfile = 'data/regdata_pcrgdp.csv' df.to_csv(outfile)
def get_regdata_pcpi(): # get personal income per capita data for msa's args_dict = {'method': 'GetData', 'dataset': 'RegionalData', 'keycode': 'PCPI_MI', 'year': 'ALL', 'geo': 'MSA'} df = bea.api_pull_to_df(args_dict) if df is not None: print df outfile = 'data/regdata_pcpimsa.csv' df.to_csv(outfile)
def get_parameter_lists(dataset): f = open('meta_data/params_list.txt', 'w+') # args dict valid args: # method, dataset, table, line, year, geo args_dict = {'method': 'GetParameterList', 'dataset': dataset} df = bea.api_pull_to_df(args_dict) print >>f, '\"%s\" Dataset Parameter List Summary:' % dataset for name, des in zip(df['ParameterName'], df['ParameterDescription']): print >>f, '%12s' % name, des print >>f, '' print >>f, '\"%s\" Dataset Parameter List full:' % dataset print >>f, df print >>f, '' print >>f, '' f.close()
def get_reginc_table_info(): f = open('meta_data/RegionalIncome/full_table_info.txt', 'w+') f_temp = open('meta_data/RegionalIncome/line_lists.txt', 'w+') args_dict = {'method': 'GetParameterValuesFiltered', 'dataset': 'RegionalIncome', 'targparam': 'TableName'} df = bea.api_pull_to_df(args_dict) print df count1 = -1 count1max = len(df) msa_good_count = 0 msa_bad_count = 0 for key, des in zip(df['Key'], df['Desc']): args_dict = {'method': 'GetParameterValuesFiltered', 'dataset': 'RegionalIncome', 'table': key, 'targparam': 'LineCode'} # args_dict['verbose'] = 1 count1 += 1 df2 = bea.api_pull_to_df(args_dict) print df2 if df2 is not None: count2 = 0 count2max = len(df2) local_bad_count = 0 local_good_count = 0 for key2, des2 in zip(df2['Key'], df2['Desc']): sys.stdout.write( '\rTotalProgress: %d/%d SubProgress %d/%d' % ( count1, count1max, count2, count2max) + ' msa compatible:%d noncompatible:%d ' % ( msa_good_count, msa_bad_count)) sys.stdout.flush() count2 += 1 args_dict = {'method': 'GetData', 'dataset': 'RegionalIncome', 'table': key, 'line': key2, 'geo': 'MSA', 'year': 'ALL'} df3 = bea.api_pull_to_df(args_dict) if df3 is not None: msa_good_count += 1 local_good_count += 1 print >>f_temp, '%12s %s' % (key2, des2) print >>f, 'For table \"%s\": %s' % (key, des) print >>f, ' line %s: %s' % (key2, des2) print >>f, ' cols:', df3.columns try: df3_year = df3['TimePeriod'].unique().values except: df3_year = df3['TimePeriod'].unique() print >>f, ' year entries: %d from %s to %s' % ( len(df3_year), df3_year[0], df3_year[len(df3_year)-1]) print >>f, '' else: # if local_bad_count > 20 and local_good_count < 10: # break msa_bad_count += 1 local_bad_count += 1 f.close() f_temp.close()
def get_regprod_table_info(): f = open('meta_data/table_info_RegionalProduct.txt', 'w+') args_dict = {'method': 'GetParameterValuesFiltered', 'dataset': 'RegionalProduct', 'targparam': 'Component'} df = bea.api_pull_to_df(args_dict) count1 = -1 count1max = len(df) msa_good_count = 0 msa_bad_count = 0 for key, des in zip(df['Key'], df['Desc']): args_dict = {'method': 'GetParameterValuesFiltered', 'dataset': 'RegionalProduct', 'component': key, 'targparam': 'IndustryId'} count1 += 1 df2 = bea.api_pull_to_df(args_dict) if df2 is not None: count2 = 0 count2max = len(df2) local_bad_count = 0 local_good_count = 0 for key2, des2 in zip(df2['Key'], df2['Desc']): sys.stdout.write( '\rTotalProgress: %d/%d SubProgress %d/%d' % ( count1, count1max, count2, count2max) + ' msa compatible:%d noncompatible:%d ' % ( msa_good_count, msa_bad_count)) sys.stdout.flush() count2 += 1 args_dict = {'method': 'GetData', 'dataset': 'RegionalProduct', 'component': key, 'industry': key2, 'geo': 'MSA', 'year': 'ALL'} df3 = bea.api_pull_to_df(args_dict) if df3 is not None: msa_good_count += 1 local_good_count += 1 print >>f, 'For component \"%s\": %s' % (key, des) print >>f, ' industry %s: %s' % (key2, des2) print >>f, ' cols:', df3.columns try: df3_year = df3['TimePeriod'].unique().values except: df3_year = df3['TimePeriod'].unique() print >>f, ' year entries: %d from %s to %s' % ( len(df3_year), df3_year[0], df3_year[len(df3_year)-1]) print >>f, '' else: if local_bad_count > 5 and local_good_count < 6: break msa_bad_count += 1 local_bad_count += 1 f.close()