def test_obsolete():
    cc = coco.CountryConverter()
    assert len(cc.data) == 250
    cc = coco.CountryConverter(include_obsolete=False)
    assert len(cc.data) == 250
    cc = coco.CountryConverter(include_obsolete=True)
    assert len(cc.data) == 256
def test_additional_country_file():
    converter_basic = coco.CountryConverter()
    converter_extended = coco.CountryConverter(additional_data=custom_data)

    assert converter_basic.convert("Congo") == "COG"
    assert converter_extended.convert("Congo") == "COD"
    assert converter_extended.convert("wirtland",
                                      to="name_short") == "Wirtland"
def test_additional_country_file():
    converter_basic = coco.CountryConverter()
    converter_extended = coco.CountryConverter(additional_data=custom_data)

    assert converter_basic.convert('Congo') == 'COG'
    assert converter_extended.convert('Congo') == 'COD'
    assert converter_extended.convert('wirtland',
                                      to='name_short') == 'Wirtland'
def test_parser():
    sys.argv = ["AT", "US"]
    args = _parse_arg(coco.CountryConverter().valid_class)
    assert args.src == None  # noqa
    assert args.to == None  # noqa

    sys.argv = ["EXIO1"]
    args = _parse_arg(coco.CountryConverter().valid_class)
    assert args.src == None  # noqa
    assert args.to == None  # noqa
def test_parser():
    sys.argv = ['AT', 'US']
    args = _parse_arg(coco.CountryConverter().valid_class)
    assert args.src == None  # noqa
    assert args.to == None  # noqa

    sys.argv = ['EXIO1']
    args = _parse_arg(coco.CountryConverter().valid_class)
    assert args.src == None  # noqa
    assert args.to == None  # noqa
def test_get_correspondence_dict_numeric_replace():
    """Numeric replacement test of get_correspondence_dict method"""
    classA = "EXIO1"
    classB = "OECD"
    cc = coco.CountryConverter()
    corr_str = cc.get_correspondence_dict(classA=classA,
                                          classB=classB,
                                          replace_numeric=True)
    assert type(corr_str) == dict
    assert len(corr_str) == 44
    assert corr_str["JP"] == ["OECD"]
    assert corr_str["ZA"] == [None]
    assert None in corr_str["WW"]
    assert "OECD" in corr_str["WW"]
    assert len(corr_str["WW"]) == 2

    corr_num = cc.get_correspondence_dict(classA=classA,
                                          classB=classB,
                                          replace_numeric=False)
    assert type(corr_num) == dict
    assert len(corr_num) == 44
    assert corr_num["JP"] == [1964]
    assert pd.isna(corr_num["ZA"])
    assert 2010 in corr_num["WW"]
    assert 1961 in corr_num["WW"]
    assert len(corr_num["WW"]) == 4
Exemple #7
0
def add_country(update, context):
    """Adds country to job queue."""
    CC = coco.CountryConverter()
    global COLLECTING_DATA
    chat_id = update.message.chat_id
    message = update.message.text

    try:
        if message.lower() == 'usa':
            message = 'United states'
        Country(CC.convert(message, to='ISO2'))
    except ValueError:
        update.message.reply_text('Such a name does not exist')
        return ConversationHandler.END

    try:
        if message in COLLECTING_DATA[chat_id]:
            update.message.reply_text('You have already '
                                      'selected this country!')
        else:
            COLLECTING_DATA[chat_id].append(message)
    except KeyError:
        COLLECTING_DATA[chat_id] = [message]

    text = 'You have added a country!\nYour current list of countries:\n'
    for country in COLLECTING_DATA[chat_id]:
        text += country + ', '
    text = text[:-2] + '.'

    update.message.reply_text(text)
    return ConversationHandler.END
def coco(name):
    """
    'U.S.','United States of America' -> 'United States', etc.
    """
    # Remove a leading "the" from names. Note that even for non-countries this is desirable for readability.
    name = re.sub(r'^(the\s)', '', name, flags=re.IGNORECASE)
    # Remove ending "'s" from names, which is present for some
    name = re.sub(r'''('s)$''', '', name)

    # Custom replacements that country_converter can't seem to handle
    custom_dic = {
        'US': 'United States',
        'UK': 'United Kingdom',
        'EU': 'European Union',
        'E.U.': 'European Union'
    }
    if name in custom_dic:
        name = custom_dic[name]

    logger.disabled = True  # Fixes annoying warnings given by the country_converter module
    country = country_converter.CountryConverter().convert(name,
                                                           to='name_short',
                                                           not_found=None)
    logger.disabled = False

    # If multiple countries are identified, a list of them is returned
    # we'll just take the first country identified in this case
    if type(country) == list:
        country = country[0]

    return country
def test_alternative_names(get_regex_test_data):
    converter = coco.CountryConverter(include_obsolete=True)
    not_found_id = "XXX"
    for row in get_regex_test_data.data.iterrows():
        name_test = row[1].name_test
        name_short = row[1].name_short
        name_result = converter.convert(
            name_test,
            src="regex",
            to="name_short",
            not_found=not_found_id,
            enforce_list=False,
        )
        assert len(name_result) > 2, (
            "File {0} - row {1}: Name {2} matched several "
            "regular expressions: {3}".format(get_regex_test_data.data_name,
                                              row[0], name_test,
                                              " ,".join(name_result)))
        if name_short != not_found_id:
            assert name_result != not_found_id, (
                "File {0} - row {1}: Name {2} did not match any "
                "regular expression".format(get_regex_test_data.data_name,
                                            row[0], name_test))
        assert name_result == name_short, (
            "File {0} - row {1}: Name {2} did match the "
            "wrong regular expression: {3}".format(
                get_regex_test_data.data_name, row[0], name_test, name_result))
def patchCountries(df, patches=None):
	
	if False: 
		if patches is None: patches = {
			'F.S. Micronesia':'Micronesia',
			'Micronesia, Federated States of':'Micronesia',

			'Holy See':'Vatican City',

			'Congo, Republic of the':'Congo',
			'Congo Brazzaville':'Congo',

			'Democratic Republic of the Congo':'DR Congo',
			'Congo, Democratic Republic of the':'DR Congo', 
			'Congo Kinshasa':'DR Congo',

			'The Gambia':'Gambia',
			"People's Republic of China":'China',
			'Bahamas, The':'Bahamas'
			}

		applyToCol(df, 'country', lambda x: patches.get(x, x))
	else:
		cc: coco.CountryCoverter = coco.CountryConverter()

		countries = list(df.country)
		conv = cc.convert(countries, to = 'name_short', not_found = None, src = 'regex')
		df.country = conv
#		cc.convert()

		
		

	return df
def test_get_correspondance_dict_numeric_replace():
    """ Numeric replacement test of get_correspondance_dict method
    """
    classA = 'EXIO1'
    classB = 'OECD'
    cc = coco.CountryConverter()
    corr_str = cc.get_correspondance_dict(classA=classA,
                                          classB=classB,
                                          replace_numeric=True)
    assert type(corr_str) == dict
    assert len(corr_str) == 44
    assert corr_str['JP'] == ['OECD']
    assert corr_str['ZA'] == [None]
    assert None in corr_str['WW']
    assert 'OECD' in corr_str['WW']
    assert len(corr_str['WW']) == 2

    corr_num = cc.get_correspondance_dict(classA=classA,
                                          classB=classB,
                                          replace_numeric=False)
    assert type(corr_num) == dict
    assert len(corr_num) == 44
    assert corr_num['JP'] == [1964]
    assert pd.np.isnan(corr_num['ZA'])
    assert 2010 in corr_num['WW']
    assert 1961 in corr_num['WW']
    assert len(corr_num['WW']) == 4
def test_alternative_names(get_regex_test_data):
    converter = coco.CountryConverter(include_obsolete=True)
    not_found_id = 'XXX'
    for row in get_regex_test_data.data.iterrows():
        name_test = row[1].name_test
        name_short = row[1].name_short
        name_result = converter.convert(name_test,
                                        src='regex',
                                        to='name_short',
                                        not_found=not_found_id,
                                        enforce_list=False)
        assert len(name_result) > 2, (
            'File {0} - row {1}: Name {2} matched several '
            'regular expressions: {3}'.format(get_regex_test_data.data_name,
                                              row[0], name_test,
                                              ' ,'.join(name_result)))
        if name_short != not_found_id:
            assert name_result != not_found_id, (
                'File {0} - row {1}: Name {2} did not match any '
                'regular expression'.format(get_regex_test_data.data_name,
                                            row[0], name_test))
        assert name_result == name_short, (
            'File {0} - row {1}: Name {2} did match the '
            'wrong regular expression: {3}'.format(
                get_regex_test_data.data_name, row[0], name_test, name_result))
def test_EU_output():
    cc = coco.CountryConverter()
    EU28 = cc.EU28as("ISO2")
    assert len(EU28 == 28)
    assert cc.convert("Croatia", to="ISO2") in EU28.ISO2.values
    EU27 = cc.EU27as("ISO2")
    assert len(EU27 == 27)
    assert cc.convert("Croatia", to="ISO2") not in EU27.ISO2.values
def test_EU_output():
    cc = coco.CountryConverter()
    EU28 = cc.EU28as('ISO2')
    assert len(EU28 == 28)
    assert cc.convert('Croatia', to='ISO2') in EU28.ISO2.values
    EU27 = cc.EU27as('ISO2')
    assert len(EU27 == 27)
    assert cc.convert('Croatia', to='ISO2') not in EU27.ISO2.values
def test_MESSAGE_output():
    cc = coco.CountryConverter()
    ms = cc.MESSAGE
    mi = cc.MESSAGEas(to="ISO3").set_index("original")
    assert len(ms) == 12
    assert "PAO" in ms.values
    assert "SAS" in ms.values
    assert mi.loc["AUT", "aggregated"] == "WEU"
Exemple #16
0
def test_MESSAGE_output():
    cc = coco.CountryConverter()
    ms = cc.MESSAGE
    mi = cc.MESSAGEas(to='ISO3').set_index('original')
    assert len(ms) == 12
    assert 'PAO' in ms.values
    assert 'SAS' in ms.values
    assert mi.loc['AUT', 'aggregated'] == 'WEU'
Exemple #17
0
def test_BRIC_output():
    cc = coco.CountryConverter()
    bs = cc.BRIC
    bi = cc.BRICas(to='ISO2')
    bn = cc.BRICas(to=None)
    assert len(bs) == 4 == len(bi) == len(bn)
    assert 'Brazil' in bs.values
    assert 'Brazil' in bn.values
    assert 'CN' in bi.values
Exemple #18
0
def test_APEC_output():
    cc = coco.CountryConverter()
    aa = cc.APEC
    ai = cc.APECas(to='ISO2')
    an = cc.APECas(to=None)
    assert len(aa) == 21 == len(ai) == len(an)
    assert 'Taiwan' in aa.values
    assert 'Russia' in an.values
    assert 'RU' in ai.values
Exemple #19
0
def test_BASIC_output():
    cc = coco.CountryConverter()
    ba = cc.BASIC
    bi = cc.BASICas(to='ISO2')
    bn = cc.BASICas(to=None)
    assert len(ba) == 4 == len(bi) == len(bn)
    assert 'Brazil' in ba.values
    assert 'Brazil' in bn.values
    assert 'IN' in bi.values
Exemple #20
0
def test_CIS_output():
    cc = coco.CountryConverter()
    ca = cc.CIS
    ci = cc.CISas(to='ISO2')
    cn = cc.CISas(to=None)
    assert len(ca) == 8 == len(ci) == len(cn)
    assert 'Belarus' in ca.values
    assert 'Armenia' in cn.values
    assert 'RU' in ci.values
Exemple #21
0
def shorter_names(column):
    """
    Takes in a country column names and changes all countries to their
    shorter names.
    Returns the column with the shorter names.
    """
    cc = coco.CountryConverter()
    column = column.map(lambda x: cc.convert(names=x, to='name_short'))
    return column
Exemple #22
0
def test_Cecilia_output():
    cc = coco.CountryConverter()
    cs = cc.Cecilia2050
    ci = cc.Cecilia2050as(to='ISO3').set_index('original')
    assert len(cs) == 4
    assert 'RoW' in ci.values
    assert 'EU' in ci.values
    assert ci.loc['AUT', 'aggregated'] == 'EU'
    assert ci.loc['AFG', 'aggregated'] == 'RoW'
def test_WIOD_output():
    cc = coco.CountryConverter()
    ws = cc.WIOD
    wi = cc.WIODas(to="ISO2").set_index("original")
    assert len(ws) == 41
    assert "RoW" in ws.values
    assert "NLD" in ws.values
    assert wi.loc["AF", "aggregated"] == "RoW"
    assert wi.loc["AT", "aggregated"] == "AUT"
def test_Cecilia_output():
    cc = coco.CountryConverter()
    cs = cc.Cecilia2050
    ci = cc.Cecilia2050as(to="ISO3").set_index("original")
    assert len(cs) == 4
    assert "RoW" in ci.values
    assert "EU" in ci.values
    assert ci.loc["AUT", "aggregated"] == "EU"
    assert ci.loc["AFG", "aggregated"] == "RoW"
def test_CIS_output():
    cc = coco.CountryConverter()
    ca = cc.CIS
    ci = cc.CISas(to="ISO2")
    cn = cc.CISas(to=None)
    assert len(ca) == 8 == len(ci) == len(cn)
    assert "Belarus" in ca.values
    assert "Armenia" in cn.values
    assert "RU" in ci.values
def test_BASIC_output():
    cc = coco.CountryConverter()
    ba = cc.BASIC
    bi = cc.BASICas(to="ISO2")
    bn = cc.BASICas(to=None)
    assert len(ba) == 4 == len(bi) == len(bn)
    assert "Brazil" in ba.values
    assert "Brazil" in bn.values
    assert "IN" in bi.values
def test_APEC_output():
    cc = coco.CountryConverter()
    aa = cc.APEC
    ai = cc.APECas(to="ISO2")
    an = cc.APECas(to=None)
    assert len(aa) == 21 == len(ai) == len(an)
    assert "Taiwan" in aa.values
    assert "Russia" in an.values
    assert "RU" in ai.values
def test_Eora_output():
    cc = coco.CountryConverter()
    es = cc.Eora
    ei = cc.Eoraas(to="ISO2").set_index("original")
    assert len(es) == 238
    assert "AUT" in es.values
    assert "AFG" in es.values
    assert ei.loc["AF", "aggregated"] == "AFG"
    assert ei.loc["AT", "aggregated"] == "AUT"
def test_BRIC_output():
    cc = coco.CountryConverter()
    bs = cc.BRIC
    bi = cc.BRICas(to="ISO2")
    bn = cc.BRICas(to=None)
    assert len(bs) == 4 == len(bi) == len(bn)
    assert "Brazil" in bs.values
    assert "Brazil" in bn.values
    assert "CN" in bi.values
Exemple #30
0
def test_Eora_output():
    cc = coco.CountryConverter()
    es = cc.Eora
    ei = cc.Eoraas(to='ISO2').set_index('original')
    assert len(es) == 238
    assert 'AUT' in es.values
    assert 'AFG' in es.values
    assert ei.loc['AF', 'aggregated'] == 'AFG'
    assert ei.loc['AT', 'aggregated'] == 'AUT'