def produce_contributions_csv_for_country(output_dir, country): # first get all emergencies for this country (could eliminate this duplicative call, but it's not expensive) emergencies = fts_queries.fetch_emergencies_json_for_country_as_dataframe(country) emergency_ids = emergencies.index # then get all contributions corresponding to those emergencies and concatenate into one big frame list_of_contributions = [fts_queries.fetch_contributions_json_for_emergency_as_dataframe(emergency_id) for emergency_id in emergency_ids] list_of_non_empty_contributions = filter_out_empty_dataframes(list_of_contributions) contributions_master_frame = pd.concat(list_of_non_empty_contributions) write_dataframe_to_csv(contributions_master_frame, build_csv_path(output_dir, 'contributions', country=country))
def populate_pooled_fund_data(country): emergencies = fts_queries.fetch_emergencies_json_for_country_as_dataframe(country) contribution_dataframes_by_emergency = [] for emergency_id, emergency_row in emergencies.iterrows(): contributions = fts_queries.fetch_contributions_json_for_emergency_as_dataframe(emergency_id) if contributions.empty: continue # note that is_allocation field is much cleaner and _almost_ gives the same answer, # but found 1 instance of contribution that did not have this field set and yet looked like it should # exclude pledges contributions = contributions[contributions.status != FUNDING_STATUS_PLEDGE] # exclude non-CERF/ERF/CHF donor_filter = lambda x: x in POOLED_FUNDS contributions = contributions[contributions.donor.apply(donor_filter)] if contributions.empty: continue # if not excluded, can mess up concat contribution_dataframes_by_emergency.append(contributions) if not contribution_dataframes_by_emergency: return contributions_overall = pd.concat(contribution_dataframes_by_emergency) # sum amount by donor-year amount_by_donor_year = contributions_overall.groupby(['donor', 'year']).amount.sum() for (donor, year), amount in amount_by_donor_year.iteritems(): global_allocations = POOLED_FUND_CACHE.get_pooled_global_allocation_for_year(year) country_funding = COUNTRY_FUNDING_CACHE.get_total_country_funding_for_year(country, year) # note that 'global_allocations' is close to FTS report numbers but not always exactly the same # so FY360, FY500, FY540 are perhaps slightly off if donor == DONOR_CERF: add_row_to_values('FY240', country, year, amount) add_row_to_values('FY360', country, year, amount/global_allocations[DONOR_CERF]) add_row_to_values('FY370', country, year, amount/country_funding) elif donor == DONOR_ERF: add_row_to_values('FY380', country, year, amount) add_row_to_values('FY500', country, year, amount/global_allocations[DONOR_ERF]) add_row_to_values('FY510', country, year, amount/country_funding) elif donor == DONOR_CHF: add_row_to_values('FY520', country, year, amount) add_row_to_values('FY540', country, year, amount/global_allocations[DONOR_CHF]) add_row_to_values('FY550', country, year, amount/country_funding) else: raise Exception('Unexpected donor:' + donor) # collapse funding across the pooled funds for each year, compare to total country funding for year, pooled_funding in amount_by_donor_year.sum(level=1).iteritems(): country_funding = COUNTRY_FUNDING_CACHE.get_total_country_funding_for_year(country, year) add_row_to_values('FY620', country, year, pooled_funding) add_row_to_values('FY630', country, year, country_funding) add_row_to_values('FY640', country, year, pooled_funding/country_funding)
def populate_pooled_fund_data(country): emergencies = fts_queries.fetch_emergencies_json_for_country_as_dataframe(country) contribution_dataframes_by_emergency = [] for emergency_id, emergency_row in emergencies.iterrows(): contributions = fts_queries.fetch_contributions_json_for_emergency_as_dataframe(emergency_id) if contributions.empty: continue # note that is_allocation field is much cleaner and _almost_ gives the same answer, # but found 1 instance of contribution that did not have this field set and yet looked like it should # exclude pledges contributions = contributions[contributions.status != FUNDING_STATUS_PLEDGE] # exclude non-CERF/ERF/CHF donor_filter = lambda x: x in POOLED_FUNDS contributions = contributions[contributions.donor.apply(donor_filter)] if contributions.empty: continue # if not excluded, can mess up concat contribution_dataframes_by_emergency.append(contributions) if contribution_dataframes_by_emergency: contributions_overall = pd.concat(contribution_dataframes_by_emergency) # sum amount by donor-year amount_by_donor_year = contributions_overall.groupby(['donor', 'year']).amount.sum() else: amount_by_donor_year = pd.Series() # empty Series for year in range(YEAR_START, YEAR_END + 1): # note that 'global_allocations' is close to FTS report numbers but not always exactly the same # - email sent to Sean Foo about this 2014-04-21 # so FY360, FY500, FY540 are perhaps slightly off global_allocations = POOLED_FUND_CACHE.get_pooled_global_allocation_for_year(year) cerf_global_allocations = global_allocations[DONOR_CERF] erf_global_allocations = global_allocations[DONOR_ERF] chf_global_allocations = global_allocations[DONOR_CHF] country_funding = COUNTRY_FUNDING_CACHE.get_total_country_funding_for_year(country, year) cerf_amount = 0. erf_amount = 0. chf_amount = 0. if (DONOR_CERF, year) in amount_by_donor_year: cerf_amount = amount_by_donor_year[(DONOR_CERF, year)] if (DONOR_ERF, year) in amount_by_donor_year: erf_amount = amount_by_donor_year[(DONOR_ERF, year)] if (DONOR_CHF, year) in amount_by_donor_year: chf_amount = amount_by_donor_year[(DONOR_CHF, year)] # note the divisions can have divide by 0, "0" is used as fraction instead # would maybe make more sense to use nan, but that will just show up as "empty" in exported CSV # probably a better option would be to just create indicator for country funding and global allocation, # but global allocation is problematic as it's not "per-region" add_row_to_values('FY240', country, year, cerf_amount) add_row_to_values('FY360', country, year, cerf_amount/cerf_global_allocations if cerf_global_allocations > 0 else 0) add_row_to_values('FY370', country, year, cerf_amount/country_funding if country_funding > 0 else 0) add_row_to_values('FY380', country, year, erf_amount) add_row_to_values('FY500', country, year, erf_amount/erf_global_allocations if erf_global_allocations > 0 else 0) add_row_to_values('FY510', country, year, erf_amount/country_funding if country_funding > 0 else 0) add_row_to_values('FY520', country, year, chf_amount) add_row_to_values('FY540', country, year, chf_amount/chf_global_allocations if chf_global_allocations > 0 else 0) add_row_to_values('FY550', country, year, chf_amount/country_funding if country_funding > 0 else 0) pooled_funding = cerf_amount + erf_amount + chf_amount country_funding = COUNTRY_FUNDING_CACHE.get_total_country_funding_for_year(country, year) add_row_to_values('FY620', country, year, pooled_funding) add_row_to_values('FY630', country, year, country_funding)
def produce_emergencies_csv_for_country(output_dir, country): emergencies = fts_queries.fetch_emergencies_json_for_country_as_dataframe(country) write_dataframe_to_csv(emergencies, build_csv_path(output_dir, 'emergencies', country=country))
def populate_pooled_fund_data(country): """ Populate data on pooled funds (CERF, ERF, CHF) """ emergencies = fts_queries.fetch_emergencies_json_for_country_as_dataframe( country) contribution_dataframes_by_emergency = [] for emergency_id, emergency_row in emergencies.iterrows(): contributions = fts_queries.fetch_contributions_json_for_emergency_as_dataframe( emergency_id) if contributions.empty: continue # note that is_allocation field on contributions is much cleaner and _almost_ gives the same answer, # but found 1 instance of contribution that did not have this field set and yet looked like it should # exclude pledges contributions = contributions[ contributions.status != FUNDING_STATUS_PLEDGE] # exclude non-CERF/ERF/CHF donor_filter = lambda x: x in POOLED_FUNDS contributions = contributions[contributions.donor.apply(donor_filter)] if contributions.empty: continue # if not excluded, can mess up concat contribution_dataframes_by_emergency.append(contributions) if contribution_dataframes_by_emergency: # combine the data across emergencies contributions_overall = pd.concat(contribution_dataframes_by_emergency) # sum amount by donor-year amount_by_donor_year = contributions_overall.groupby(['donor', 'year' ]).amount.sum() else: amount_by_donor_year = pd.Series() # empty Series for year in range(YEAR_START, YEAR_END + 1): # note that 'global_allocations' is close to FTS report numbers but not always exactly the same # see notes on get_pooled_global_allocation_for_year above # so FY360, FY500, FY540 are perhaps slightly off global_allocations = POOLED_FUND_CACHE.get_pooled_global_allocation_for_year( year) cerf_global_allocations = global_allocations[DONOR_CERF] erf_global_allocations = global_allocations[DONOR_ERF] chf_global_allocations = global_allocations[DONOR_CHF] country_funding = COUNTRY_FUNDING_CACHE.get_total_country_funding_for_year( country, year) cerf_amount = 0. erf_amount = 0. chf_amount = 0. if (DONOR_CERF, year) in amount_by_donor_year: cerf_amount = amount_by_donor_year[(DONOR_CERF, year)] if (DONOR_ERF, year) in amount_by_donor_year: erf_amount = amount_by_donor_year[(DONOR_ERF, year)] if (DONOR_CHF, year) in amount_by_donor_year: chf_amount = amount_by_donor_year[(DONOR_CHF, year)] # note the divisions can have divide by 0, "0" is used as fraction instead # would maybe make more sense to use nan, but that will just show up as "empty" in exported CSV # probably a better option would be to just create indicator for country funding and global allocation, # but global allocation is problematic as it's not "per-region" add_row_to_values('FY240', country, year, cerf_amount) add_row_to_values( 'FY360', country, year, cerf_amount / cerf_global_allocations if cerf_global_allocations > 0 else 0) add_row_to_values( 'FY370', country, year, cerf_amount / country_funding if country_funding > 0 else 0) add_row_to_values('FY380', country, year, erf_amount) add_row_to_values( 'FY500', country, year, erf_amount / erf_global_allocations if erf_global_allocations > 0 else 0) add_row_to_values( 'FY510', country, year, erf_amount / country_funding if country_funding > 0 else 0) add_row_to_values('FY520', country, year, chf_amount) add_row_to_values( 'FY540', country, year, chf_amount / chf_global_allocations if chf_global_allocations > 0 else 0) add_row_to_values( 'FY550', country, year, chf_amount / country_funding if country_funding > 0 else 0) pooled_funding = cerf_amount + erf_amount + chf_amount country_funding = COUNTRY_FUNDING_CACHE.get_total_country_funding_for_year( country, year) add_row_to_values('FY620', country, year, pooled_funding) add_row_to_values('FY630', country, year, country_funding)