def test_ocha_feed_file_working(self): countries = hxl.data(script_dir_plus_file('Countries_UZB_Deleted.csv', TestCountry), allow_local=True) Country.set_countriesdata(countries) assert Country.get_iso3_country_code('UZBEKISTAN') is None assert Country.get_iso3_country_code('south sudan') == 'SSD' Country.set_ocha_url() Country._countriesdata = None assert Country.get_iso3_country_code('UZBEKISTAN', use_live=True) == 'UZB' Country.set_ocha_url('NOTEXIST') Country._countriesdata = None assert Country.get_iso3_from_iso2('AF') == 'AFG'
def get_access(configuration, admininfo, downloader, scrapers=None): name = inspect.currentframe().f_code.co_name if scrapers and not any(scraper in name for scraper in scrapers): return list(), list(), list(), list(), list(), list(), list(), list( ), list() access_configuration = configuration['access_constraints'] ranking_url = access_configuration['ranking_url'] headers, rows = read_tabular(downloader, { 'url': ranking_url, 'headers': 1, 'format': 'csv' }) sheets = access_configuration['sheets'] constraint_rankings = {x: dict() for x in sheets} nocountries_per_region = {'global': 0} top3counts = {'global': dict()} for region in admininfo.regions: nocountries_per_region[region] = 0 top3counts[region] = dict() for row in rows: countryiso = row['iso3'] nocountries_per_region['global'] += 1 for region in admininfo.iso3_to_region_and_hrp.get(countryiso, list()): nocountries_per_region[region] += 1 for sheet in sheets: if '%s_1' % sheet not in row: continue type_ranking = constraint_rankings.get(sheet, dict()) for i in range(1, 4): constraint = row['%s_%d' % (sheet, i)] dict_of_lists_add(type_ranking, countryiso, constraint) constraint_rankings[sheet] = type_ranking data = dict() datasetinfo = { 'dataset': access_configuration['dataset'], 'headers': 1, 'format': 'xlsx' } for sheet, sheetinfo in sheets.items(): datasetinfo['sheet'] = sheetinfo['sheetname'] headers, rows = read_hdx(downloader, datasetinfo) datasheet = data.get(sheet, dict()) for row in rows: countryiso = Country.get_iso3_country_code( row[sheetinfo['isocol']]) if countryiso not in admininfo.countryiso3s: continue countrydata = datasheet.get(countryiso, dict()) score = countrydata.get('score', 0) newscore = row[sheetinfo['scorecol']] textcol = sheetinfo.get('textcol') if textcol: text = row[textcol] dict_of_lists_add(countrydata, 'text', (newscore, text)) for region, top3countsregion in top3counts.items(): if region != 'global' and region not in admininfo.iso3_to_region_and_hrp.get( countryiso, list()): continue top3countssheet = top3countsregion.get(sheet, dict()) if sheet == 'impact': if newscore != 0: top3countssheet[text] = top3countssheet.get( text, 0) + 1 else: if newscore == 3: top3countssheet[text] = top3countssheet.get( text, 0) + 1 top3countsregion[sheet] = top3countssheet weights = sheetinfo.get('weights') if weights: weight = weights.get(text) if weight: newscore *= weight score += newscore else: dict_of_lists_add(countrydata, 'text', (newscore, newscore)) for region, top3countsregion in top3counts.items(): if region != 'global' and region not in admininfo.iso3_to_region_and_hrp.get( countryiso, list()): continue top3countssheet = top3countsregion.get(sheet, dict()) if newscore == 'yes': top3countssheet[sheet] = top3countssheet.get(sheet, 0) + 1 top3countsregion[sheet] = top3countssheet score = newscore countrydata['score'] = score datasheet[countryiso] = countrydata data[sheet] = datasheet gvaluedicts = [dict() for _ in range(7)] rvaluedicts = [dict() for _ in range(7)] for region, top3countsregion in top3counts.items(): if region == 'global': valuedicts = gvaluedicts else: valuedicts = rvaluedicts for i, (sheet, top3countssheet) in enumerate(top3countsregion.items()): sortedcounts = sorted(top3countssheet, key=top3countssheet.get, reverse=True) texts = list() pcts = list() for text in sortedcounts[:3]: texts.append(text) pcts.append( get_fraction_str(top3countssheet[text], nocountries_per_region[region])) if sheet == 'mitigation': valuedicts[i * 2][region] = pcts[0] else: valuedicts[i * 2][region] = '|'.join(texts) valuedicts[i * 2 + 1][region] = '|'.join(pcts) valuedicts = [dict() for _ in range(6)] severityscore = valuedicts[0] for i, sheet in enumerate(data): datasheet = data[sheet] for countryiso in datasheet: countrydata = datasheet[countryiso] ranked = sorted(countrydata['text'], reverse=True) top_value = ranked[0][0] texts = list() for value, text in countrydata['text']: if value == top_value: if sheet == 'mitigation' or text in constraint_rankings[ sheet][countryiso]: texts.append(text) valuedicts[i + 2][countryiso] = '|'.join(texts) if 'constraints' in sheet: score = severityscore.get(countryiso, 0) score += countrydata['score'] severityscore[countryiso] = score ranges = access_configuration['category'] severitycategory = valuedicts[1] for countryiso in severityscore: score = severityscore.get(countryiso) if score is None: severitycategory[countryiso] = None continue severitycategory[countryiso] = process_range(ranges, score) logger.info('Processed access') grheaders = [ 'Access Constraints Into', 'Access Constraints Into Pct', 'Access Constraints Within', 'Access Constraints Within Pct', 'Access Impact', 'Access Impact Pct', 'Mitigation Pct' ] headers = [ 'Access Severity Score', 'Access Severity Category', 'Access Constraints Into', 'Access Constraints Within', 'Access Impact', 'Mitigation' ] grhxltags = [ '#access+constraints+into+desc', '#access+constraints+into+pct', '#access+constraints+within+desc', '#access+constraints+within+pct', '#access+impact+desc', '#access+impact+pct', '#access+mitigation+pct' ] hxltags = [ '#severity+access+num+score', '#severity+access+category+num', '#access+constraints+into+desc', '#access+constraints+within+desc', '#access+impact+desc', '#access+mitigation+desc' ] return [grheaders, grhxltags], gvaluedicts, \ [(hxltag, datasetinfo['date'], datasetinfo['source'], datasetinfo['source_url']) for hxltag in grhxltags], \ [grheaders, grhxltags], rvaluedicts, \ [(hxltag, datasetinfo['date'], datasetinfo['source'], datasetinfo['source_url']) for hxltag in grhxltags], \ [headers, hxltags], valuedicts, \ [(hxltag, datasetinfo['date'], datasetinfo['source'], datasetinfo['source_url']) for hxltag in hxltags]
def test_get_iso3_country_code(self): assert Country.get_iso3_country_code('jpn', use_live=False) == 'JPN' assert Country.get_iso3_country_code('Dem. Rep. of the Congo', use_live=False) == 'COD' assert Country.get_iso3_country_code('Russian Fed.', use_live=False) == 'RUS' assert Country.get_iso3_country_code( 'Micronesia (Federated States of)', use_live=False) == 'FSM' assert Country.get_iso3_country_code('Iran (Islamic Rep. of)', use_live=False) == 'IRN' assert Country.get_iso3_country_code('United Rep. of Tanzania', use_live=False) == 'TZA' assert Country.get_iso3_country_code('Syrian Arab Rep.', use_live=False) == 'SYR' assert Country.get_iso3_country_code('Central African Rep.', use_live=False) == 'CAF' assert Country.get_iso3_country_code('Rep. of Korea', use_live=False) == 'KOR' assert Country.get_iso3_country_code('St. Pierre and Miquelon', use_live=False) == 'SPM' assert Country.get_iso3_country_code('Christmas Isl.', use_live=False) == 'CXR' assert Country.get_iso3_country_code('Cayman Isl.', use_live=False) == 'CYM' assert Country.get_iso3_country_code('jp', use_live=False) == 'JPN' assert Country.get_iso3_country_code('Taiwan (Province of China)', use_live=False) == 'TWN' assert Country.get_iso3_country_code_fuzzy('jpn', use_live=False) == ('JPN', True) assert Country.get_iso3_country_code_fuzzy('ZWE', use_live=False) == ('ZWE', True) assert Country.get_iso3_country_code_fuzzy('Vut', use_live=False) == ('VUT', True) assert Country.get_iso3_country_code('abc', use_live=False) is None with pytest.raises(LocationError): Country.get_iso3_country_code('abc', use_live=False, exception=LocationError) assert Country.get_iso3_country_code_fuzzy('abc', use_live=False) == (None, False) with pytest.raises(LocationError): Country.get_iso3_country_code_fuzzy('abc', use_live=False, exception=LocationError) assert Country.get_iso3_country_code_fuzzy('United Kingdom', use_live=False) == ('GBR', False) assert Country.get_iso3_country_code_fuzzy( 'United Kingdom of Great Britain and Northern Ireland', use_live=False) == ('GBR', True) assert Country.get_iso3_country_code_fuzzy('united states', use_live=False) == ('USA', False) assert Country.get_iso3_country_code_fuzzy('united states of america', use_live=False) == ('USA', True) assert Country.get_iso3_country_code('UZBEKISTAN', use_live=False) == 'UZB' assert Country.get_iso3_country_code_fuzzy('UZBEKISTAN', use_live=False) == ('UZB', True) assert Country.get_iso3_country_code('Sierra', use_live=False) is None assert Country.get_iso3_country_code_fuzzy('Sierra', use_live=False) == ('SLE', False) assert Country.get_iso3_country_code('Venezuela', use_live=False) is None assert Country.get_iso3_country_code_fuzzy('Venezuela', use_live=False) == ('VEN', False) assert Country.get_iso3_country_code_fuzzy('Heard Isl.', use_live=False) == ('HMD', False) assert Country.get_iso3_country_code_fuzzy('Falkland Isl.', use_live=False) == ('FLK', False) assert Country.get_iso3_country_code_fuzzy('Czech Republic', use_live=False) == ('CZE', False) assert Country.get_iso3_country_code_fuzzy('Czech Rep.', use_live=False) == ('CZE', False) assert Country.get_iso3_country_code_fuzzy('Islamic Rep. of Iran', use_live=False) == ('IRN', False) assert Country.get_iso3_country_code_fuzzy('Dem. Congo', use_live=False) == ('COD', False) assert Country.get_iso3_country_code_fuzzy('Congo, Republic of', use_live=False) == ('COG', False) assert Country.get_iso3_country_code_fuzzy('Republic of the Congo', use_live=False) == ('COG', False) assert Country.get_iso3_country_code_fuzzy('Vietnam', use_live=False) == ('VNM', False) assert Country.get_iso3_country_code_fuzzy('South Korea', use_live=False) == ('KOR', False) assert Country.get_iso3_country_code_fuzzy('Korea Republic', use_live=False) == ('KOR', False) assert Country.get_iso3_country_code_fuzzy('Dem. Republic Korea', use_live=False) == ('PRK', False) assert Country.get_iso3_country_code_fuzzy('North Korea', use_live=False) == ('PRK', False) assert Country.get_iso3_country_code_fuzzy( 'Serbia and Kosovo: S/RES/1244 (1999)', use_live=False) == ('SRB', False) assert Country.get_iso3_country_code_fuzzy('U.S. Virgin Islands', use_live=False) == ('VIR', True) assert Country.get_iso3_country_code_fuzzy('U.K. Virgin Islands', use_live=False) == ('VGB', False) assert Country.get_iso3_country_code_fuzzy('Taiwan', use_live=False) == ('TWN', False) with pytest.raises(ValueError): Country.get_iso3_country_code('abc', use_live=False, exception=ValueError) with pytest.raises(ValueError): Country.get_iso3_country_code_fuzzy('abc', use_live=False, exception=ValueError)
def get_country_url_string(country_list): return ':OR:iso='.join([ str(Country.get_m49_from_iso3(Country.get_iso3_country_code(country))) for country in country_list ])
def test_country_conversion(self): assert Country.get_iso3_country_code("Afghanistan") == "AFG" assert Country.get_iso3_country_code("China") == "CHN"
def get_camp_non_camp_populations(noncamp_types, camp_types, camp_overrides, datasets, downloader): noncamp_types = noncamp_types.split(',') camp_types = camp_types.split(',') dataset_unhcr = None latest_date = None for dataset in datasets: if 'displacement' in dataset['title'].lower(): date = dataset.get_dataset_date_as_datetime() if latest_date is None or date > latest_date: dataset_unhcr = dataset latest_date = date if dataset_unhcr is None: raise ValueError('No UNHCR dataset found!') url = dataset_unhcr.get_resources()[0]['url'] country_ind = 0 # assume first column contains country iso3 = None row = None prev_row = None all_camps_per_country = dict() unhcr_non_camp = dict() unhcr_camp = dict() unhcr_camp_excluded = dict() rowiter = downloader.get_tabular_rows(url, sheet='Tab15') for row in rowiter: country = row[country_ind] iso3 = Country.get_iso3_country_code(country) if iso3 is not None: break prev_row = row accommodation_ind = None location_ind = None population_ind = None population = None for i, text in enumerate(prev_row): header = text.lower() value = row[i] if 'accommodation' in header: accommodation_ind = i elif 'location' in header and len(value) > 1: location_ind = i else: try: population = int(value) population_ind = i break except ValueError: pass campname = row[location_ind] def get_accommodation_type(name): accom_type = camp_overrides['Accommodation Type'].get(name) if accom_type is None: accom_type = row[accommodation_ind] else: logger.info('Overriding accommodation type to %s for %s' % (accom_type, name)) return accom_type.lower() accommodation_type = get_accommodation_type(campname) def match_camp_types(name, accom_type, pop, iso): if check_name_dispersed(name): accom_type = noncamp_types[0] found_camp_type = None for camp_type in camp_types: if camp_type in accom_type: found_camp_type = camp_type unhcr_camp[name] = pop, iso, found_camp_type break for noncamp_type in noncamp_types: if noncamp_type in accom_type: found_camp_type = noncamp_type append_value(unhcr_non_camp, iso, found_camp_type, name, pop) break if found_camp_type is None: append_value(unhcr_camp_excluded, iso, accom_type, name, pop) append_value(all_camps_per_country, iso, accom_type, name, pop) else: append_value(all_camps_per_country, iso, found_camp_type, name, pop) match_camp_types(campname, accommodation_type, population, iso3) for row in rowiter: country = row[country_ind] if not country: continue if 'NOTES' in country.upper(): break iso3, match = Country.get_iso3_country_code_fuzzy(country) if iso3 is None: logger.warning('Country %s could not be matched to ISO3 code!' % country) continue else: if match is False: logger.info('Matched %s to ISO3: %s!' % (country, iso3)) campname = row[location_ind] accommodation_type = get_accommodation_type(campname) population = int(row[population_ind]) match_camp_types(campname, accommodation_type, population, iso3) for campname in sorted(camp_overrides['Population']): if campname in unhcr_camp: continue iso3 = camp_overrides['Country'][campname] accommodation_type = camp_overrides['Accommodation Type'][ campname].lower() population = camp_overrides['Population'][campname] logger.info('Adding camp from override: %s (%s, %s): %d' % (campname, iso3, accommodation_type, population)) match_camp_types(campname, accommodation_type, population, iso3) return all_camps_per_country, unhcr_non_camp, unhcr_camp, unhcr_camp_excluded
def update_fts(base_url, downloader, country_list, resource_updates): requirements_url = '%splan/country/' % base_url funding_url = '%sfts/flow?groupby=plan&countryISO3=' % base_url columns_to_keep = [ 'country', 'id', 'name', 'code', 'startDate', 'endDate', 'year', 'revisedRequirements', 'totalFunding' ] combined = pd.DataFrame() hxl_names = { "country": "#country+name", "id": "#x_appeal+id", "name": "#x_appeal+name", "code": "#x_appeal+code", "revisedRequirements": "#x_requirement+x_usd+x_current", "endDate": "#date+end", "totalFunding": "#x_funding+x_usd", "startDate": "#date+start", "year": "#date+year", "percentFunded": "#x_requirement+x_met+x_percent" } for country in country_list: iso3 = Country.get_iso3_country_code(country) r = downloader.download('%s%s' % (requirements_url, iso3)) data = r.json()['data'] dfreq_norm = json_normalize(data) dfreq_norm['id'].fillna('missing') dfreq_loc = json_normalize(data, 'locations') dfreq_loc.rename(columns={'name': 'country'}, inplace=True) del dfreq_loc['id'] dfreq_norm_loc = dfreq_norm.join(dfreq_loc) dfreq_year = json_normalize(data, 'years') del dfreq_year['id'] dfreq = dfreq_norm_loc.join(dfreq_year) dfreq['id'] = dfreq.id.astype(str).str.replace('\\.0', '') r = downloader.download('%s%s' % (funding_url, iso3)) data = r.json( )['data']['report3']['fundingTotals']['objects'][0]['objectsBreakdown'] dffund = json_normalize(data) df = dfreq.merge(dffund, on='id') df.totalFunding += df.onBoundaryFunding df.rename(columns={'name_x': 'name'}, inplace=True) # drop unwanted columns df = drop_columns(df, columns_to_keep) combined = combined.append(df, ignore_index=True) # trim date strings combined.startDate = combined.startDate.str[:10] combined.endDate = combined.endDate.str[:10] # add column for % funded combined['percentFunded'] = ( pd.to_numeric(combined.totalFunding) / pd.to_numeric(combined.revisedRequirements)) * 100 # sort combined.sort_values(['country', 'endDate'], ascending=[True, False], inplace=True) # add HXL tags combined = hxlate(combined, hxl_names) # convert floats to string and trim ( formatters don't work on columns with mixed types) combined['percentFunded'] = combined['percentFunded'].astype(str) combined['percentFunded'] = \ combined['percentFunded'].loc[combined['percentFunded'].str.contains('.')].str.split('.').str[0] combined.to_csv(resource_updates['fts']['path'], encoding='utf-8', index=False, date_format='%Y-%m-%d')