def extend_columns(level, rows, adms, admininfo, *args): columns = list() for arg in args: if arg: columns.extend(arg) if adms is None: adms = ['global'] for i, adm in enumerate(adms): if level == 'global': row = list() elif level == 'regional': row = [adm] elif level == 'national': row = [ adm, Country.get_country_name_from_iso3(adm), '|'.join(sorted(list(admininfo.iso3_to_region_and_hrp[adm]))) ] elif level == 'subnational': countryiso3 = admininfo.pcode_to_iso3[adm] countryname = Country.get_country_name_from_iso3(countryiso3) adm1_name = admininfo.pcode_to_name[adm] row = [countryiso3, countryname, adm, adm1_name] for column in columns: row.append(column.get(adm)) rows.append(row) return columns
def test_validlocations(self, project_config_yaml): Country.countriesdata(use_live=False) validlocations = [{'name': 'shn', 'title': 'St. Helena'}] assert Locations.get_HDX_code_from_location('sh', locations=validlocations) is None assert Locations.get_HDX_code_from_location_partial('sh', locations=validlocations) == (None, False) assert Locations.get_location_from_HDX_code('shn', locations=validlocations) == 'St. Helena' validlocations = [{'name': 'zmb', 'title': 'Zambia'}, {'name': 'pry', 'title': 'Paraguay'}] Locations.set_validlocations(validlocations) assert Locations.validlocations() == validlocations assert Locations.get_HDX_code_from_location_partial('NOT') == (None, False) assert Locations.get_location_from_HDX_code('pr') is None assert Locations.get_HDX_code_from_location('zmb') == 'ZMB' assert Locations.get_HDX_code_from_location_partial('zmb') == ('ZMB', True) assert Locations.get_HDX_code_from_location('Z') is None assert Locations.get_HDX_code_from_location_partial('Z') == ('ZMB', False) assert Locations.get_HDX_code_from_location_partial('Zambia') == ('ZMB', True) assert Locations.get_HDX_code_from_location_partial('ZAM') == ('ZMB', False) assert Locations.get_location_from_HDX_code('zmb', locations=validlocations) == 'Zambia' validlocations = [{'name': 'shn', 'title': 'St. Helena'}] assert Locations.get_HDX_code_from_location('sh', locations=validlocations) is None assert Locations.get_HDX_code_from_location_partial('sh', locations=validlocations) == (None, False) assert Locations.get_location_from_HDX_code('shn', locations=validlocations) == 'St. Helena' Configuration.setup(MyConfiguration()) Locations.set_validlocations(None) assert Locations.get_HDX_code_from_location('zaf') == 'ZAF' assert Locations.get_HDX_code_from_location_partial('zaf') == ('ZAF', True) assert Locations.get_location_from_HDX_code('zaf') == 'South Africa'
def configuration(self): Configuration._create(hdx_read_only=True, user_agent='test', project_config_yaml=join( 'tests', 'config', 'project_configuration.yml')) Locations.set_validlocations([{ 'name': 'afg', 'title': 'Afghanistan' }, { 'name': 'tza', 'title': 'Tanzania' }, { 'name': 'world', 'title': 'World' }]) Country.countriesdata(use_live=False) Vocabulary._tags_dict = True Vocabulary._approved_vocabulary = { 'tags': [{ 'name': 'hxl' }, { 'name': 'violence and conflict' }, { 'name': 'displacement' }, { 'name': 'internally displaced persons - idp' }], 'id': '4e61d464-4943-4e97-973a-84673c1aaa87', 'name': 'approved' }
def configuration(self): Configuration._create( user_agent="test", hdx_key="12345", project_config_yaml=join("tests", "config", "project_configuration.yml"), ) Locations.set_validlocations([{"name": "bgd", "title": "Bangladesh"}]) Country.countriesdata(use_live=False) Vocabulary._tags_dict = True Vocabulary._approved_vocabulary = { "tags": [ { "name": "hxl" }, { "name": "refugees" }, { "name": "asylum" }, { "name": "population" }, ], "id": "4e61d464-4943-4e97-973a-84673c1aaa87", "name": "approved", } return Configuration.read()
def calculate_regional_average(cls, val_type, datadict, iso3): countryinfo = Country.get_country_info_from_iso3(iso3) level = 3 while level != 0: region_level = cls.region_levels[level] region_prefix = region_level column = Column.parse('#region+code+%s' % region_prefix) regioncode = countryinfo[column.get_display_tag( sort_attributes=True)] if regioncode: regioncode = int(regioncode) column = Column.parse('#region+%s+name+preferred' % region_prefix) regionname = countryinfo[column.get_display_tag( sort_attributes=True)] countries_in_region = Country.get_countries_in_region( regioncode) avg = cls.calculate_average(datadict, countries_in_region) if avg: logger.warning('%s: %s - Using %s (%s) average' % (iso3, val_type, regionname, region_level)) return avg, regioncode level -= 1 logger.warning('%s: %s - Using global average' % (iso3, val_type)) return cls.calculate_average(datadict), '001'
def get_countriesdata(download_url, downloader): countrynameisomapping = dict() countriesdata = dict() headers, iterator = downloader.get_tabular_rows(download_url, headers=1, dict_form=True) countries = list() for row in iterator: countryname = row['country'] countryiso = countrynameisomapping.get(countryname) if countryiso is None: countryiso, _ = Country.get_iso3_country_code_fuzzy( countryname, exception=ValueError) countrynameisomapping[countryname] = countryiso countries.append({ 'iso3': countryiso, 'countryname': Country.get_country_name_from_iso3(countryiso), 'origname': countryname }) row['iso3'] = countryiso dict_of_lists_add(countriesdata, countryiso, row) headers.insert(30, 'iso3') headers.insert(3, 'end_year') headers.insert(3, 'start_year') return countries, headers, countriesdata
def configuration(self): Configuration._create(user_agent='test', hdx_key='12345', project_config_yaml=join( 'tests', 'config', 'project_configuration.yml')) Locations.set_validlocations([{ 'name': 'afg', 'title': 'Afghanistan' }, { 'name': 'cmr', 'title': 'Cameroon' }]) Country.countriesdata(use_live=False) Vocabulary._tags_dict = True Vocabulary._approved_vocabulary = { 'tags': [{ 'name': 'hxl' }, { 'name': 'violence and conflict' }, { 'name': 'protests' }, { 'name': 'security incidents' }], 'id': '4e61d464-4943-4e97-973a-84673c1aaa87', 'name': 'approved' }
def test_get_iso3_from_iso2(self): assert Country.get_iso3_from_iso2('jp', use_live=False) == 'JPN' assert Country.get_iso3_from_iso2('ab', use_live=False) is None with pytest.raises(LocationError): Country.get_iso3_from_iso2('ab', use_live=False, exception=LocationError)
def configuration(self): Configuration._create(hdx_read_only=True, user_agent='test', project_config_yaml=join('tests', 'config', 'project_configuration.yml')) Locations.set_validlocations([{'name': 'arg', 'title': 'Argentina'}]) # add locations used in tests Country.countriesdata(use_live=False) Vocabulary._tags_dict = True Vocabulary._approved_vocabulary = {'tags': [{'name': 'sustainable development'}, {'name': 'demographics'}, {'name': 'socioeconomics'}, {'name': 'education'}], 'id': '4e61d464-4943-4e97-973a-84673c1aaa87', 'name': 'approved'}
def configuration(self): Configuration._create( hdx_read_only=True, user_agent="test", project_config_yaml=join("tests", "config", "project_configuration.yml"), ) Locations.set_validlocations([{ "name": "afg", "title": "Afghanistan" }]) # add locations used in tests Country.countriesdata(use_live=False) Vocabulary._tags_dict = True Vocabulary._approved_vocabulary = { "tags": [ { "name": "hxl" }, { "name": "food security" }, { "name": "indicators" }, ], "id": "4e61d464-4943-4e97-973a-84673c1aaa87", "name": "approved", } return Configuration.read()
def configuration(self): Configuration._create(hdx_read_only=True, user_agent='test', project_config_yaml=join('tests', 'config', 'project_configuration.yml')) Locations.set_validlocations([{'name': 'gin', 'title': 'Guinea'}]) # add locations used in tests Country.countriesdata(use_live=False) Vocabulary._tags_dict = True Vocabulary._approved_vocabulary = {'tags': [{'name': 'hxl'}, {'name': 'food security'}, {'name': 'indicators'}], 'id': '4e61d464-4943-4e97-973a-84673c1aaa87', 'name': 'approved'}
def convert_pcode_length(self, countryiso3, adm1_pcode, scrapername): if adm1_pcode in self.pcodes: return adm1_pcode pcode_length = len(adm1_pcode) country_pcodelength = self.pcode_lengths.get(countryiso3) if not country_pcodelength: return None if pcode_length == country_pcodelength or pcode_length < 4 or pcode_length > 6: return None if country_pcodelength == 4: pcode = '%s%s' % (Country.get_iso2_from_iso3(adm1_pcode[:3]), adm1_pcode[-2:]) elif country_pcodelength == 5: if pcode_length == 4: pcode = '%s0%s' % (adm1_pcode[:2], adm1_pcode[-2:]) else: pcode = '%s%s' % (Country.get_iso2_from_iso3(adm1_pcode[:3]), adm1_pcode[-3:]) elif country_pcodelength == 6: if pcode_length == 4: pcode = '%s0%s' % (Country.get_iso3_from_iso2(adm1_pcode[:2]), adm1_pcode[-2:]) else: pcode = '%s%s' % (Country.get_iso3_from_iso2(adm1_pcode[:2]), adm1_pcode[-3:]) else: pcode = None if pcode in self.pcodes: self.matches.add((scrapername, countryiso3, adm1_pcode, self.pcode_to_name[pcode], 'pcode length conversion')) return pcode return None
def configuration(self): Configuration._create(hdx_read_only=True, user_agent='test', project_config_yaml=join( 'tests', 'config', 'project_configuration.yml')) Locations.set_validlocations([{ 'name': 'afg', 'title': 'Afghanistan' }]) # add locations used in tests Country.countriesdata(use_live=False) Vocabulary._tags_dict = True Vocabulary._approved_vocabulary = { 'tags': [{ 'name': 'hxl' }, { 'name': 'commodities' }, { 'name': 'prices' }, { 'name': 'markets' }], 'id': '4e61d464-4943-4e97-973a-84673c1aaa87', 'name': 'approved' }
def configuration(self): Configuration._create(hdx_read_only=True, user_agent='test', project_config_yaml=join( 'tests', 'config', 'project_configuration.yml')) Locations.set_validlocations([{'name': 'afg', 'title': 'Afghanistan'}]) Country.countriesdata(use_live=False) Vocabulary._tags_dict = True Vocabulary._approved_vocabulary = { 'tags': [{ 'name': 'hxl' }, { 'name': 'indicators' }, { 'name': 'health' }, { 'name': 'education' }, { 'name': 'socioeconomic' }, { 'name': 'demographics' }, { 'name': 'development' }], 'id': '4e61d464-4943-4e97-973a-84673c1aaa87', 'name': 'approved' } return Configuration.read()
def get_iso2(s, use_live=False): if Country: iso3, fuzzy = Country.get_iso3_country_code_fuzzy(s, use_live) if iso3: return Country.get_country_info_from_iso3( iso3)["#country+code+v_iso2"] else: return s.upper()
def test_get_iso3_from_m49(self): assert Country.get_iso3_from_m49(4, use_live=False) == 'AFG' assert Country.get_iso3_from_m49(882, use_live=False) == 'WSM' assert Country.get_iso3_from_m49(9999, use_live=False) is None with pytest.raises(LocationError): Country.get_iso3_from_m49(9999, use_live=False, exception=LocationError)
def configuration(self): Configuration._create(hdx_site='feature', user_agent='test', hdx_key='12345', project_config_yaml=join('tests', 'config', 'project_configuration.yml')) Locations.set_validlocations([{'name': 'afg', 'title': 'Afghanistan'}, {'name': 'cmr', 'title': 'Cameroon'}]) Country.countriesdata(use_live=False) Vocabulary._tags_dict = True Vocabulary._approved_vocabulary = {'tags': [{'name': 'hxl'}, {'name': 'health'}, {'name': 'demographics'}], 'id': '4e61d464-4943-4e97-973a-84673c1aaa87', 'name': 'approved'} return Configuration.read()
def test_expand_countryname_abbrevs(self): assert Country.expand_countryname_abbrevs('jpn') == ['JPN'] assert Country.expand_countryname_abbrevs( 'Haha Dem. Fed. Republic') == [ 'HAHA DEMOCRATIC FED. REPUBLIC', 'HAHA DEMOCRATIC FEDERATION REPUBLIC', 'HAHA DEMOCRATIC FEDERAL REPUBLIC', 'HAHA DEMOCRATIC FEDERATED REPUBLIC' ]
def test_get_country_name_from_m49(self): assert Country.get_country_name_from_m49( 4, use_live=False) == 'Afghanistan' assert Country.get_country_name_from_m49(882, use_live=False) == 'Samoa' assert Country.get_country_name_from_m49(9999, use_live=False) is None with pytest.raises(LocationError): Country.get_country_name_from_m49(9999, use_live=False, exception=LocationError)
def get_countries(countries_url, downloader): countries = list() headers, iterator = downloader.get_tabular_rows(countries_url, headers=1, dict_form=True, format='xlsx') for row in iterator: m49 = row['ISO Code'] if not m49: continue iso3 = Country.get_iso3_from_m49(m49) countryname = Country.get_country_name_from_iso3(iso3) countries.append({'m49': m49, 'iso3': iso3, 'countryname': countryname}) return countries
def test_get_country_name_from_iso2(self): assert Country.get_country_name_from_iso2('jp') == 'Japan' assert Country.get_country_name_from_iso2('ab') is None assert Country.get_country_name_from_iso2('Pl') == 'Poland' assert Country.get_country_name_from_iso2('SG') == 'Singapore' assert Country.get_country_name_from_iso2('SGP') is None with pytest.raises(LocationError): Country.get_country_name_from_iso2('SGP', exception=LocationError) assert Country.get_country_name_from_iso2('VE') == 'Venezuela (Bolivarian Republic of)' assert Country.get_country_name_from_iso2('TW') == 'Taiwan (Province of China)' assert Country.get_country_name_from_iso2('PS') == 'oPt'
def test_ocha_feed_file_working(self): countries = hxl.data(script_dir_plus_file('Countries_UZB_Deleted.csv', TestCountry), allow_local=True) Country.set_countriesdata(countries) assert Country.get_iso3_country_code('UZBEKISTAN') is None assert Country.get_iso3_country_code('south sudan') == 'SSD' Country.set_ocha_url() Country._countriesdata = None assert Country.get_iso3_country_code('UZBEKISTAN', use_live=True) == 'UZB' Country.set_ocha_url('NOTEXIST') Country._countriesdata = None assert Country.get_iso3_from_iso2('AF') == 'AFG'
def configuration(self): Configuration._create(hdx_read_only=True, user_agent='test', project_config_yaml=join( 'tests', 'config', 'project_configuration.yml')) Locations.set_validlocations([{ 'name': 'arg', 'title': 'Argentina' }]) # add locations used in tests Country.countriesdata(use_live=False)
def get_countriesdata(countries_url, downloader, country_correspondence): """Download a list of countries and provide mapping if necessary. A list of dictionaries is returned, each containing the following keys: iso3 - ISO 3 country code name - country name code - WFP country code wfp_countries - a list of dictionaries describing WFP countries that are part of the "ISO 3" country. Note: The data source (WFP countries) may contain countries that do not have its own ISO 3 code. Such WFP countries can be mapped to ISO 3 country using the country_correspondence attribute in the project_configuration.yml. All WFP countries mapped to the same ISO 3 country will be listed in wfp_countries. Each ISO 3 country will appear at most once in the output. """ countries = {} unknown = [] for row in downloader.get_tabular_rows(countries_url, dict_rows=False, headers=1, format='csv'): name = row[0] sub_name = name code = row[1] new_wfp_countries = [dict(name=sub_name, code=code)] iso3, fuzzy = Country.get_iso3_country_code_fuzzy(name) if iso3 is None: name = country_correspondence.get(sub_name) if name is None: unknown.append(sub_name) continue else: iso3, fuzzy = Country.get_iso3_country_code_fuzzy(name) countries[iso3] = countries.get( iso3, dict(name=name, iso3=iso3, wfp_countries=[])) countries[iso3]["wfp_countries"] = countries[iso3][ "wfp_countries"] + new_wfp_countries countries[iso3]["code"] = ( [x for x in countries[iso3]["wfp_countries"] if x["name"] == name] + countries[iso3]["wfp_countries"])[0]["code"] if len(unknown): logger.warning( "Some countries were not recognized and are ignored:\n" + ",\n".join(unknown)) return [ countries[iso3] for name, iso3 in sorted([(x["name"], x["iso3"]) for x in countries.values()]) ]
def get_countriesdata(countries_url, downloader): countries = list() for row in downloader.get_tabular_rows(countries_url, dict_rows=True, headers=1, format='xlsx'): # country = row['Name'] # iso3, _ = Country.get_iso3_country_code_fuzzy(country, exception=ValueError) # m49 = Country.get_m49_from_iso3(iso3) m49 = row['ISO Country Number'] if not m49: continue iso3 = Country.get_iso3_from_m49(m49) countryname = Country.get_country_name_from_iso3(iso3) countries.append({'m49': m49, 'iso3': iso3, 'countryname': countryname}) return countries
def forwards(apps, schema_editor): Country = apps.get_model('core.Country') cc = 0 for cnt in Country.objects.all(): if Country2.get_country_info_from_iso2( cnt.country_code)['#region+name+preferred+sub']: cnt.regionname = Country2.get_country_info_from_iso2( cnt.country_code)['#region+name+preferred+sub'] cnt.region_code = Country2.get_country_info_from_iso2( cnt.country_code)['#region+code+sub'] cnt.save() elif Country2.get_country_info_from_iso2( cnt.country_code)['#region+main+name+preferred']: cnt.regionname = Country2.get_country_info_from_iso2( cnt.country_code)['#region+main+name+preferred'] cnt.region_code = Country2.get_country_info_from_iso2( cnt.country_code)['#region+code+main'] cnt.save() else: cnt.regionname = Country2.get_country_info_from_iso2( cnt.country_code)['#country+name+preferred'] cnt.region_code = Country2.get_country_info_from_iso2( cnt.country_code)['#country+code+v_iso2'] cnt.save() cc += 1 if cc % 300 == 0: print(cc) #to keep track of progress
def configuration(self): Configuration._create(hdx_read_only=True, user_agent='test', project_config_yaml=join( 'tests', 'config', 'project_configuration.yml')) Locations.set_validlocations([{ 'name': 'afg', 'title': 'Afghanistan' }, { 'name': 'pse', 'title': 'State of Palestine' }]) Country.countriesdata(use_live=False)
def extend_columns(level, rows, adms, hrp_countries, region, adminone, headers, *args): columns = list() for arg in args: if arg: columns.extend(arg) if adms is None: adms = ['global'] for i, adm in enumerate(adms): if level == 'global': row = list() elif level == 'regional': row = [adm] elif level == 'national': ishrp = 'Y' if adm in hrp_countries else 'N' regions = sorted(list(region.iso3_to_region_and_hrp[adm])) regions.remove('GHO') row = [ adm, Country.get_country_name_from_iso3(adm), ishrp, '|'.join(regions) ] elif level == 'subnational': countryiso3 = adminone.pcode_to_iso3[adm] countryname = Country.get_country_name_from_iso3(countryiso3) adm1_name = adminone.pcode_to_name[adm] row = [countryiso3, countryname, adm, adm1_name] else: raise ValueError('Invalid level') append = True for existing_row in rows[2:]: match = True for i, col in enumerate(row): if existing_row[i] != col: match = False break if match: append = False row = existing_row break if append: for i, hxltag in enumerate(rows[1][len(row):]): if hxltag not in headers[1]: row.append(None) for column in columns: row.append(column.get(adm)) if append: rows.append(row) return columns
def get_slumratios(url, downloader): stream = downloader.get_tabular_stream(url, headers=1, format='csv', compression='zip') years = set() for header in stream.headers: try: int(header) years.add(header) except ValueError: pass years = sorted(years, reverse=True) slumratios = dict() for row in stream.iter(keyed=True): if not row: break iso3 = Country.get_iso3_from_m49(int(row['CountryCode'])) if iso3 is None: continue for year in years: value = row.get(year) if value and value != ' ': slumratios[iso3] = float(value) / 100.0 return slumratios
def get_WHO_data(config, country_iso3, hxlize=False, smooth_data=False, n_days_smoothing=14): # Download the file and move it to a local directory logger.info('Downloading WHO data from HDX') WHO_dir = os.path.join(config.INPUT_DIR, config.WHO_DIR) Path(WHO_dir).mkdir(parents=True, exist_ok=True) download_filename = list( query_api(config.WHO_HDX_ADDRESS, WHO_dir, resource_format='CSV').values())[0] final_filepath = os.path.join(WHO_dir, config.WHO_FILENAME) os.rename(os.path.join(WHO_dir, download_filename), final_filepath) # Get the data for the country based on ISO3 logger.info(f'Returning WHO data for {country_iso3}') df_WHO = pd.read_csv(final_filepath) df_WHO = df_WHO.loc[df_WHO['Country_code'] == Country.get_iso2_from_iso3( country_iso3)] #in some cases the WHO data might include sudden increases in numbers, due to for example changes in reporting #sudden bumps might cause distorted outcomes of the model and thus in those cases we smooth the WHO data before inputting it to the graph if smooth_data: df_WHO = df_WHO.sort_values(by='Date_reported', ascending=True) for column_name in [ 'New_cases', 'Cumulative_cases', 'New_deaths', 'Cumulative_deaths' ]: df_WHO[column_name] = df_WHO[column_name].rolling( window=n_days_smoothing).mean() if hxlize: df_WHO = df_WHO.rename(columns=HXL_DICT) return df_WHO
def test_get_country_info_from_iso3(self): assert Country.get_country_info_from_iso3('bih', use_live=False) == { '#country+alt+i_ar+name+v_unterm': 'البوسنة والهرسك', '#country+alt+i_en+name+v_unterm': 'Bonaire, Saint Eustatius and Saba', '#country+alt+i_es+name+v_unterm': 'Bosnia y Herzegovina', '#country+alt+i_fr+name+v_unterm': 'Bosnie-Herzégovine (la)', '#country+alt+i_ru+name+v_unterm': 'Босния и Герцеговина', '#country+alt+i_zh+name+v_unterm': '波斯尼亚和黑塞哥维那', '#country+alt+name+v_fts': '', '#country+alt+name+v_hrinfo_country': '', '#country+alt+name+v_iso': '', '#country+alt+name+v_m49': '', '#country+alt+name+v_reliefweb': '', '#country+alt+name+v_unterm': '', '#country+code+num+v_m49': '70', '#country+code+v_fts': '28', '#country+code+v_hrinfo_country': '208', '#country+code+v_iso2': 'BA', '#country+code+v_iso3': 'BIH', '#country+code+v_reliefweb': '40', '#country+name+preferred': 'Bosnia and Herzegovina', '#country+name+short+v_reliefweb': '', '#country+regex': 'herzegovina|bosnia', '#geo+admin_level': '0', '#geo+lat': '44.16506495', '#geo+lon': '17.79105724', '#meta+id': '28', '#region+code+intermediate': '', '#region+code+main': '150', '#region+code+sub': '39', '#region+intermediate+name+preferred': '', '#region+main+name+preferred': 'Europe', '#region+name+preferred+sub': 'Southern Europe' }
def get_countries(countries_url, downloader): countrymapping = dict() _, iterator = downloader.get_tabular_rows(countries_url, headers=1, dict_form=True, format="csv") for row in iterator: countryiso = row["ISO3 Code"].strip() if not countryiso: continue try: int(countryiso) continue except ValueError: pass countrymapping[row["Country Code"].strip()] = ( countryiso, row["Country"].strip(), ) countries = list() for countryiso, countryname in sorted(countrymapping.values()): newcountryname = Country.get_country_name_from_iso3(countryiso) if newcountryname: countries.append({ "iso3": countryiso, "countryname": newcountryname, "origname": countryname, }) return countries, countrymapping
def configuration(self): Configuration._create(user_agent='test', hdx_key='12345', project_config_yaml=join('tests', 'config', 'project_configuration.yml')) Locations.set_validlocations([{'name': 'afg', 'title': 'Afghanistan'}, {'name': 'cmr', 'title': 'Cameroon'}]) Country.countriesdata(use_live=False)