def __init__(self): self.year_cache = {} self.country_iso_code_to_name = {} countries = fts_queries.fetch_countries_json_as_dataframe() for country_id, row in countries.iterrows(): self.country_iso_code_to_name[row['iso_code_A']] = row['name']
def confirm_country_for_every_appeal(): """ Confirm that every appeal has a country entry (this would have failed on legit countries up until recently) """ countries = fts_queries.fetch_countries_json_as_dataframe() appeals_2013 = fts_queries.fetch_appeals_json_for_year_as_dataframe(2013) for appeal_id, appeal_row in appeals_2013.iterrows(): appeal_country = appeal_row['country'] matching_countries_boolean_series = (countries.name == appeal_country) if matching_countries_boolean_series.sum() != 1: print "No country entry found for appeal", appeal_id, "which has country", appeal_country
def confirm_country_for_every_emergency(): """ Confirm that every emergency has a country entry """ countries = fts_queries.fetch_countries_json_as_dataframe() emergencies_2013 = fts_queries.fetch_emergencies_json_for_year_as_dataframe(2013) for emergency_id, emergency_row in emergencies_2013.iterrows(): emergency_country = emergency_row['country'] matching_countries_boolean_series = (countries.name == emergency_country) if matching_countries_boolean_series.sum() != 1: print "No country entry found for emergency", emergency_id, "which has country", emergency_country
for year, pooled_funding in amount_by_donor_year.sum(level=1).iteritems(): country_funding = COUNTRY_FUNDING_CACHE.get_total_country_funding_for_year(country, year) add_row_to_values('FY620', country, year, pooled_funding) add_row_to_values('FY630', country, year, country_funding) add_row_to_values('FY640', country, year, pooled_funding/country_funding) def populate_data_for_regions(region_list): # cache organizations as it's an expensive call organizations = get_organizations_indexed_by_name() for region in region_list: print "Populating indicators for region", region populate_appeals_level_data(region) populate_organization_level_data(region, organizations) populate_pooled_fund_data(region) if __name__ == "__main__": # regions_of_interest = ['COL', 'KEN', 'YEM'] # regions_of_interest = ['SSD'] # useful for testing CHF regions_of_interest = fts_queries.fetch_countries_json_as_dataframe().iso_code_A populate_data_for_regions(regions_of_interest) # print get_values_as_dataframe() # print get_values_joined_with_indicators() write_values_as_scraperwiki_style_csv('/tmp') write_values_as_scraperwiki_style_sql('/home/')
def produce_countries_csv(output_dir): countries = fts_queries.fetch_countries_json_as_dataframe() write_dataframe_to_csv(countries, build_csv_path(output_dir, 'countries'))
if list_of_non_empty_contributions: contributions_master_frame = pd.concat(list_of_non_empty_contributions) else: # we have a choice, missing file or empty file... here I go with empty file contributions_master_frame = pd.DataFrame() write_dataframe_to_csv(contributions_master_frame, build_csv_path(output_dir, 'contributions', country=country)) def produce_csvs_for_country(base_output_dir, country): output_dir = os.path.join(base_output_dir, 'fts', 'per_country', country) if not os.path.exists(output_dir): os.makedirs(output_dir) produce_emergencies_csv_for_country(output_dir, country) produce_appeals_csv_for_country(output_dir, country) produce_projects_csv_for_country(output_dir, country) produce_contributions_csv_for_country(output_dir, country) if __name__ == "__main__": # output all CSVs for the given countries to '/tmp/' # country_codes = ['COL', 'KEN', 'YEM'] # starter countries for HDX country_codes = fts_queries.fetch_countries_json_as_dataframe().iso_code_A tmp_output_dir = '/tmp/' produce_global_csvs(tmp_output_dir) for country_code in country_codes: produce_csvs_for_country(tmp_output_dir, country_code)
add_row_to_values('FY620', country, year, pooled_funding) add_row_to_values('FY630', country, year, country_funding) def populate_data_for_regions(region_list): """ Populate the various FTS data tuples for a list of regions """ # cache organizations as it's an expensive call organizations = get_organizations_indexed_by_name() for region in region_list: print "Populating indicators for region", region populate_appeals_level_data(region) populate_organization_level_data(region, organizations) populate_pooled_fund_data(region) if __name__ == "__main__": # regions_of_interest = ['COL', 'KEN', 'YEM'] # regions_of_interest = ['SSD'] # useful for testing CHF # regions_of_interest = ['AFG'] # useful for testing spotty data regions_of_interest = fts_queries.fetch_countries_json_as_dataframe( ).iso_code_A populate_data_for_regions(regions_of_interest) write_values_as_scraperwiki_style_csv('/tmp') write_values_as_scraperwiki_style_sql('/home/')