예제 #1
0
def test_suggest():
    sc.heading('test_suggest()')
    string = 'foo'
    ex1 = ['Foo', 'Bar']
    ex2 = ['FOO', 'Foo']
    ex3 = ['Foo', 'boo']
    ex4 = ['asldfkj', 'aosidufasodiu']
    ex5 = ['foo', 'fou', 'fol', 'fal', 'fil']
    res1 = sc.suggest(string, ex1)
    res2 = sc.suggest(string, ex2)
    res3 = sc.suggest(string, ex3)
    res4 = sc.suggest(string, ex4, threshold=4)
    with pytest.raises(Exception):
        sc.suggest(string, ex1, threshold=4, die=True)
    res5a = sc.suggest(string, ex5, n=3)
    res5b = sc.suggest(string, ex5, fulloutput=True)
    assert res1 == 'Foo'
    assert res2 == 'Foo'
    assert res3 == 'Foo'
    assert res4 == None
    assert res5a == ['foo', 'fou', 'fol']
    assert res5b == {
        'foo': 0.0,
        'fou': 1.0,
        'fol': 1.0,
        'fal': 2.0,
        'fil': 2.0
    }
    print(res1)
    print(res2)
    print(res3)
    print(res4)
    print(res5a)
    print(res5b)
    return res5b
예제 #2
0
 def __setitem__(self, key, value):
     ''' Ditto '''
     if key in self.pars:
         self.pars[key] = value
     else:
         suggestion = sc.suggest(key, self.pars.keys())
         if suggestion:
             errormsg = f'Key {key} not found; did you mean "{suggestion}"?'
         else:
             all_keys = '\n'.join(list(self.pars.keys()))
             errormsg = f'Key {key} not found; available keys:\n{all_keys}'
         raise KeyError(errormsg)
     return
예제 #3
0
def map_entries(json, location, which):
    '''
    Find a match between the JSON file and the provided location(s).

    Args:
        json (list or dict): the data being loaded
        location (list or str): the list of locations to pull from
        which (str): either 'age' for age data or 'household' for household size distributions

    '''

    # The data have slightly different formats: list of dicts or just a dict
    if which == 'age':
        countries = [entry["country"].lower()
                     for entry in json]  # Pull out available countries
    else:
        countries = [key.lower() for key in json.keys()]

    # Set parameters
    if location is None:
        location = countries
    else:
        location = sc.promotetolist(location)

    # Define a mapping for common mistakes
    mapping = get_country_aliases()
    mapping = {key.lower(): val.lower() for key, val in mapping.items()}

    entries = {}
    for loc in location:
        lloc = loc.lower()
        if lloc not in countries and lloc in mapping:
            lloc = mapping[lloc]
        try:
            ind = countries.index(lloc)
            if which == 'age':
                entry = json[ind]
            else:
                entry = list(json.values())[ind]
            entries[loc] = entry
        except ValueError as E:
            suggestions = sc.suggest(loc, countries, n=4)
            if suggestions:
                errormsg = f'Location "{loc}" not recognized, did you mean {suggestions}? ({str(E)})'
            else:
                errormsg = f'Location "{loc}" not recognized ({str(E)})'
            raise ValueError(errormsg)

    return entries
예제 #4
0
def get_age_distribution(location=None):
    '''
    Load age distribution for a given country or countries.

    Args:
        location (str or list): name of the country or countries to load the age distribution for

    Returns:
        age_data (array): Numpy array of age distributions
    '''

    # Load the raw data
    json = cad.get_country_age_distributions()
    countries = [entry["country"].lower()
                 for entry in json]  # Pull out available countries

    # Set parameters
    max_age = 99
    if location is None:
        location = countries
    else:
        location = sc.promotetolist(location)

    # Define a mapping for common mistakes
    mapping = {
        'Bolivia': 'Bolivia (Plurinational State of)',
        'Burkina': 'Burkina Faso',
        'Cape Verde': 'Cabo Verdeo',
        'Hong Kong': 'China, Hong Kong Special Administrative Region',
        'Macao': 'China, Macao Special Administrative Region',
        "Cote d'Ivore": 'Côte d’Ivoire',
        'DRC': 'Democratic Republic of the Congo',
        'Iran': 'Iran (Islamic Republic of)',
        'Laos': "Lao People's Democratic Republic",
        'Micronesia': 'Micronesia (Federated States of)',
        'Korea': 'Republic of Korea',
        'South Korea': 'Republic of Korea',
        'Moldova': 'Republic of Moldova',
        'Russia': 'Russian Federation',
        'Palestine': 'State of Palestine',
        'Syria': 'Syrian Arab Republic',
        'Taiwan': 'Taiwan Province of China',
        'Macedonia': 'The former Yugoslav Republic of Macedonia',
        'UK': 'United Kingdom of Great Britain and Northern Ireland',
        'United Kingdom':
        'United Kingdom of Great Britain and Northern Ireland',
        'Tanzania': 'United Republic of Tanzania',
        'USA': 'United States of America',
        'United States': 'United States of America',
        'Venezuela': 'Venezuela (Bolivarian Republic of)',
        'Vietnam': 'Viet Nam',
    }
    mapping = {key.lower(): val.lower()
               for key, val in mapping.items()}  # Convert to lowercase

    result = {}
    for loc in location:
        loc = loc.lower()
        if loc in mapping:
            loc = mapping[loc]
        try:
            ind = countries.index(loc.lower())
            entry = json[ind]
        except ValueError:
            suggestions = sc.suggest(loc, countries, n=4)
            errormsg = f'Location "{loc}" not recognized, did you mean {suggestions}?'
            raise ValueError(errormsg)
        age_distribution = entry["ageDistribution"]
        total_pop = sum(age_distribution.values())
        local_pop = []

        for age, age_pop in age_distribution.items():
            if age[-1] == '+':
                val = [int(age[:-1]), max_age, age_pop / total_pop]
            else:
                ages = age.split('-')
                val = [int(ages[0]), int(ages[1]), age_pop / total_pop]
            local_pop.append(val)
        result[loc] = np.array(local_pop)

    if len(location) == 1:
        result = result[loc]

    return result