def test_obsolete(): cc = coco.CountryConverter() assert len(cc.data) == 250 cc = coco.CountryConverter(include_obsolete=False) assert len(cc.data) == 250 cc = coco.CountryConverter(include_obsolete=True) assert len(cc.data) == 256
def test_additional_country_file(): converter_basic = coco.CountryConverter() converter_extended = coco.CountryConverter(additional_data=custom_data) assert converter_basic.convert("Congo") == "COG" assert converter_extended.convert("Congo") == "COD" assert converter_extended.convert("wirtland", to="name_short") == "Wirtland"
def test_additional_country_file(): converter_basic = coco.CountryConverter() converter_extended = coco.CountryConverter(additional_data=custom_data) assert converter_basic.convert('Congo') == 'COG' assert converter_extended.convert('Congo') == 'COD' assert converter_extended.convert('wirtland', to='name_short') == 'Wirtland'
def test_parser(): sys.argv = ["AT", "US"] args = _parse_arg(coco.CountryConverter().valid_class) assert args.src == None # noqa assert args.to == None # noqa sys.argv = ["EXIO1"] args = _parse_arg(coco.CountryConverter().valid_class) assert args.src == None # noqa assert args.to == None # noqa
def test_parser(): sys.argv = ['AT', 'US'] args = _parse_arg(coco.CountryConverter().valid_class) assert args.src == None # noqa assert args.to == None # noqa sys.argv = ['EXIO1'] args = _parse_arg(coco.CountryConverter().valid_class) assert args.src == None # noqa assert args.to == None # noqa
def test_get_correspondence_dict_numeric_replace(): """Numeric replacement test of get_correspondence_dict method""" classA = "EXIO1" classB = "OECD" cc = coco.CountryConverter() corr_str = cc.get_correspondence_dict(classA=classA, classB=classB, replace_numeric=True) assert type(corr_str) == dict assert len(corr_str) == 44 assert corr_str["JP"] == ["OECD"] assert corr_str["ZA"] == [None] assert None in corr_str["WW"] assert "OECD" in corr_str["WW"] assert len(corr_str["WW"]) == 2 corr_num = cc.get_correspondence_dict(classA=classA, classB=classB, replace_numeric=False) assert type(corr_num) == dict assert len(corr_num) == 44 assert corr_num["JP"] == [1964] assert pd.isna(corr_num["ZA"]) assert 2010 in corr_num["WW"] assert 1961 in corr_num["WW"] assert len(corr_num["WW"]) == 4
def add_country(update, context): """Adds country to job queue.""" CC = coco.CountryConverter() global COLLECTING_DATA chat_id = update.message.chat_id message = update.message.text try: if message.lower() == 'usa': message = 'United states' Country(CC.convert(message, to='ISO2')) except ValueError: update.message.reply_text('Such a name does not exist') return ConversationHandler.END try: if message in COLLECTING_DATA[chat_id]: update.message.reply_text('You have already ' 'selected this country!') else: COLLECTING_DATA[chat_id].append(message) except KeyError: COLLECTING_DATA[chat_id] = [message] text = 'You have added a country!\nYour current list of countries:\n' for country in COLLECTING_DATA[chat_id]: text += country + ', ' text = text[:-2] + '.' update.message.reply_text(text) return ConversationHandler.END
def coco(name): """ 'U.S.','United States of America' -> 'United States', etc. """ # Remove a leading "the" from names. Note that even for non-countries this is desirable for readability. name = re.sub(r'^(the\s)', '', name, flags=re.IGNORECASE) # Remove ending "'s" from names, which is present for some name = re.sub(r'''('s)$''', '', name) # Custom replacements that country_converter can't seem to handle custom_dic = { 'US': 'United States', 'UK': 'United Kingdom', 'EU': 'European Union', 'E.U.': 'European Union' } if name in custom_dic: name = custom_dic[name] logger.disabled = True # Fixes annoying warnings given by the country_converter module country = country_converter.CountryConverter().convert(name, to='name_short', not_found=None) logger.disabled = False # If multiple countries are identified, a list of them is returned # we'll just take the first country identified in this case if type(country) == list: country = country[0] return country
def test_alternative_names(get_regex_test_data): converter = coco.CountryConverter(include_obsolete=True) not_found_id = "XXX" for row in get_regex_test_data.data.iterrows(): name_test = row[1].name_test name_short = row[1].name_short name_result = converter.convert( name_test, src="regex", to="name_short", not_found=not_found_id, enforce_list=False, ) assert len(name_result) > 2, ( "File {0} - row {1}: Name {2} matched several " "regular expressions: {3}".format(get_regex_test_data.data_name, row[0], name_test, " ,".join(name_result))) if name_short != not_found_id: assert name_result != not_found_id, ( "File {0} - row {1}: Name {2} did not match any " "regular expression".format(get_regex_test_data.data_name, row[0], name_test)) assert name_result == name_short, ( "File {0} - row {1}: Name {2} did match the " "wrong regular expression: {3}".format( get_regex_test_data.data_name, row[0], name_test, name_result))
def patchCountries(df, patches=None): if False: if patches is None: patches = { 'F.S. Micronesia':'Micronesia', 'Micronesia, Federated States of':'Micronesia', 'Holy See':'Vatican City', 'Congo, Republic of the':'Congo', 'Congo Brazzaville':'Congo', 'Democratic Republic of the Congo':'DR Congo', 'Congo, Democratic Republic of the':'DR Congo', 'Congo Kinshasa':'DR Congo', 'The Gambia':'Gambia', "People's Republic of China":'China', 'Bahamas, The':'Bahamas' } applyToCol(df, 'country', lambda x: patches.get(x, x)) else: cc: coco.CountryCoverter = coco.CountryConverter() countries = list(df.country) conv = cc.convert(countries, to = 'name_short', not_found = None, src = 'regex') df.country = conv # cc.convert() return df
def test_get_correspondance_dict_numeric_replace(): """ Numeric replacement test of get_correspondance_dict method """ classA = 'EXIO1' classB = 'OECD' cc = coco.CountryConverter() corr_str = cc.get_correspondance_dict(classA=classA, classB=classB, replace_numeric=True) assert type(corr_str) == dict assert len(corr_str) == 44 assert corr_str['JP'] == ['OECD'] assert corr_str['ZA'] == [None] assert None in corr_str['WW'] assert 'OECD' in corr_str['WW'] assert len(corr_str['WW']) == 2 corr_num = cc.get_correspondance_dict(classA=classA, classB=classB, replace_numeric=False) assert type(corr_num) == dict assert len(corr_num) == 44 assert corr_num['JP'] == [1964] assert pd.np.isnan(corr_num['ZA']) assert 2010 in corr_num['WW'] assert 1961 in corr_num['WW'] assert len(corr_num['WW']) == 4
def test_alternative_names(get_regex_test_data): converter = coco.CountryConverter(include_obsolete=True) not_found_id = 'XXX' for row in get_regex_test_data.data.iterrows(): name_test = row[1].name_test name_short = row[1].name_short name_result = converter.convert(name_test, src='regex', to='name_short', not_found=not_found_id, enforce_list=False) assert len(name_result) > 2, ( 'File {0} - row {1}: Name {2} matched several ' 'regular expressions: {3}'.format(get_regex_test_data.data_name, row[0], name_test, ' ,'.join(name_result))) if name_short != not_found_id: assert name_result != not_found_id, ( 'File {0} - row {1}: Name {2} did not match any ' 'regular expression'.format(get_regex_test_data.data_name, row[0], name_test)) assert name_result == name_short, ( 'File {0} - row {1}: Name {2} did match the ' 'wrong regular expression: {3}'.format( get_regex_test_data.data_name, row[0], name_test, name_result))
def test_EU_output(): cc = coco.CountryConverter() EU28 = cc.EU28as("ISO2") assert len(EU28 == 28) assert cc.convert("Croatia", to="ISO2") in EU28.ISO2.values EU27 = cc.EU27as("ISO2") assert len(EU27 == 27) assert cc.convert("Croatia", to="ISO2") not in EU27.ISO2.values
def test_EU_output(): cc = coco.CountryConverter() EU28 = cc.EU28as('ISO2') assert len(EU28 == 28) assert cc.convert('Croatia', to='ISO2') in EU28.ISO2.values EU27 = cc.EU27as('ISO2') assert len(EU27 == 27) assert cc.convert('Croatia', to='ISO2') not in EU27.ISO2.values
def test_MESSAGE_output(): cc = coco.CountryConverter() ms = cc.MESSAGE mi = cc.MESSAGEas(to="ISO3").set_index("original") assert len(ms) == 12 assert "PAO" in ms.values assert "SAS" in ms.values assert mi.loc["AUT", "aggregated"] == "WEU"
def test_MESSAGE_output(): cc = coco.CountryConverter() ms = cc.MESSAGE mi = cc.MESSAGEas(to='ISO3').set_index('original') assert len(ms) == 12 assert 'PAO' in ms.values assert 'SAS' in ms.values assert mi.loc['AUT', 'aggregated'] == 'WEU'
def test_BRIC_output(): cc = coco.CountryConverter() bs = cc.BRIC bi = cc.BRICas(to='ISO2') bn = cc.BRICas(to=None) assert len(bs) == 4 == len(bi) == len(bn) assert 'Brazil' in bs.values assert 'Brazil' in bn.values assert 'CN' in bi.values
def test_APEC_output(): cc = coco.CountryConverter() aa = cc.APEC ai = cc.APECas(to='ISO2') an = cc.APECas(to=None) assert len(aa) == 21 == len(ai) == len(an) assert 'Taiwan' in aa.values assert 'Russia' in an.values assert 'RU' in ai.values
def test_BASIC_output(): cc = coco.CountryConverter() ba = cc.BASIC bi = cc.BASICas(to='ISO2') bn = cc.BASICas(to=None) assert len(ba) == 4 == len(bi) == len(bn) assert 'Brazil' in ba.values assert 'Brazil' in bn.values assert 'IN' in bi.values
def test_CIS_output(): cc = coco.CountryConverter() ca = cc.CIS ci = cc.CISas(to='ISO2') cn = cc.CISas(to=None) assert len(ca) == 8 == len(ci) == len(cn) assert 'Belarus' in ca.values assert 'Armenia' in cn.values assert 'RU' in ci.values
def shorter_names(column): """ Takes in a country column names and changes all countries to their shorter names. Returns the column with the shorter names. """ cc = coco.CountryConverter() column = column.map(lambda x: cc.convert(names=x, to='name_short')) return column
def test_Cecilia_output(): cc = coco.CountryConverter() cs = cc.Cecilia2050 ci = cc.Cecilia2050as(to='ISO3').set_index('original') assert len(cs) == 4 assert 'RoW' in ci.values assert 'EU' in ci.values assert ci.loc['AUT', 'aggregated'] == 'EU' assert ci.loc['AFG', 'aggregated'] == 'RoW'
def test_WIOD_output(): cc = coco.CountryConverter() ws = cc.WIOD wi = cc.WIODas(to="ISO2").set_index("original") assert len(ws) == 41 assert "RoW" in ws.values assert "NLD" in ws.values assert wi.loc["AF", "aggregated"] == "RoW" assert wi.loc["AT", "aggregated"] == "AUT"
def test_Cecilia_output(): cc = coco.CountryConverter() cs = cc.Cecilia2050 ci = cc.Cecilia2050as(to="ISO3").set_index("original") assert len(cs) == 4 assert "RoW" in ci.values assert "EU" in ci.values assert ci.loc["AUT", "aggregated"] == "EU" assert ci.loc["AFG", "aggregated"] == "RoW"
def test_CIS_output(): cc = coco.CountryConverter() ca = cc.CIS ci = cc.CISas(to="ISO2") cn = cc.CISas(to=None) assert len(ca) == 8 == len(ci) == len(cn) assert "Belarus" in ca.values assert "Armenia" in cn.values assert "RU" in ci.values
def test_BASIC_output(): cc = coco.CountryConverter() ba = cc.BASIC bi = cc.BASICas(to="ISO2") bn = cc.BASICas(to=None) assert len(ba) == 4 == len(bi) == len(bn) assert "Brazil" in ba.values assert "Brazil" in bn.values assert "IN" in bi.values
def test_APEC_output(): cc = coco.CountryConverter() aa = cc.APEC ai = cc.APECas(to="ISO2") an = cc.APECas(to=None) assert len(aa) == 21 == len(ai) == len(an) assert "Taiwan" in aa.values assert "Russia" in an.values assert "RU" in ai.values
def test_Eora_output(): cc = coco.CountryConverter() es = cc.Eora ei = cc.Eoraas(to="ISO2").set_index("original") assert len(es) == 238 assert "AUT" in es.values assert "AFG" in es.values assert ei.loc["AF", "aggregated"] == "AFG" assert ei.loc["AT", "aggregated"] == "AUT"
def test_BRIC_output(): cc = coco.CountryConverter() bs = cc.BRIC bi = cc.BRICas(to="ISO2") bn = cc.BRICas(to=None) assert len(bs) == 4 == len(bi) == len(bn) assert "Brazil" in bs.values assert "Brazil" in bn.values assert "CN" in bi.values
def test_Eora_output(): cc = coco.CountryConverter() es = cc.Eora ei = cc.Eoraas(to='ISO2').set_index('original') assert len(es) == 238 assert 'AUT' in es.values assert 'AFG' in es.values assert ei.loc['AF', 'aggregated'] == 'AFG' assert ei.loc['AT', 'aggregated'] == 'AUT'