Exemple #1
0
def update_caches():
    '''Update internal metadata caches. This needs to be called prior to
    any fetch from an economy endpoint
    '''
    global _localized_metadata, _iso2Codes, _class_data, _aggs

    if _localized_metadata.get(w.lang):
        # nothing to do
        return

    # update translation data here except city names
    db = {}
    for elem in ['region', 'incomelevel', 'lendingtype']:
        for row in w.fetch(elem):
            if 'name' in row:
                db[row['code']] = row['name'].strip()
            else:
                db[row['id']] = row['value'].strip()

            _iso2Codes[row['id']] = row['iso2code']

    _localized_metadata[w.lang] = db

    url = 'country/all'
    if type(_class_data) is not dict:
        # initialize objects
        _class_data = {}
        _aggs = set()

        # here, we update codes and city translations simultaneously
        for row in w.fetch(url):
            _iso2Codes[row['id']] = row['iso2Code']
            _localized_metadata[w.lang]['capitalCity:' + row['id']] = (
                row['capitalCity'].strip() or _empty_meta_value)

            db = {'aggregate': row['region']['id'] == 'NA'}
            for key in ['longitude', 'latitude']:
                db[key] = float(row[key]) if len(row[key]) else None

            for key in ['region', 'adminregion', 'lendingType', 'incomeLevel']:
                db[key] = _empty_meta_value if db['aggregate'] else (
                    row[key]['id'] or _empty_meta_value)

            _class_data[row['id']] = db
            if db['aggregate']:
                _aggs.add(row['id'])
                _aggs.add(row['iso2Code'])

        # add one dummy that we can match to unrecognized economy codes
        db = _class_data['USA']
        _class_data['___'] = {k: None for k in db.keys()}

    else:
        # else, just update city codes
        for row in w.fetch(url):
            _localized_metadata[w.lang]['capitalCity:' + row['id']] = (
                row['capitalCity'].strip() or _empty_meta_value)
Exemple #2
0
def list(id='all', q=None, group=None):
    '''Return a list of regions

    Arguments:
        id:         a region identifier or list-like of identifiers

        q:          search string (on region name)

        group:      subgroup to return. Can be one of: 'admin', 'geo', 'allincomelevels', 'demodividend', 'smallstates', 'other'
                    NB: technically possible to pass 'lending' but the returned values generally aren't useful

    Returns:
        a generator object

    Example:
        regions = {row['code']: row['name'] for row in wbgapi.region.list()}
            
    Notes:
        The region list is global to the entire API and is not specific to the current database.

    '''

    params = {'type': group} if group else {}
    q, _ = utils.qget(q)

    for row in w.fetch('region/' + w.queryParam(id), params):
        if utils.qmatch(q, row['name']):
            yield row
Exemple #3
0
    def normalize(i):

        ilist = []
        if i is None:
            return ilist

        for elem in i:
            if type(elem) is str:
                elem = {'id': elem}

            # this is an atypical way to fetch attributes but only the old-style endpoints give us precision
            attrs = list(wb.fetch('country/{}/indicator/{}'.format('USA', elem['id']), {'mrv': 1}))[0]

            if 'name' not in elem:
                elem['name'] = attrs['indicator']['value']

            if 'source' not in elem:
                elem['source'] = 2 # default to WDI

            if 'multiplier' not in elem:
                elem['multiplier'] = 1

            if 'precision' not in elem:
                elem['precision'] = attrs['decimal']

            ilist.append(elem)

        return ilist
Exemple #4
0
def coder_report(economies, full=True):

    c = {
        row['id']: row['name']
        for row in w.fetch('country/all', lang='en')
        if row['region']['id'] != 'NA'
    }

    rows = [('ORIGINAL NAME', 'WBG NAME', 'ISO_CODE')]
    for k, v in economies.items():
        if v:
            wb_name = c.get(v, '')
            if not full and wb_name.lower() == k.lower():
                continue
        else:
            wb_name = ''

        rows.append((k, wb_name, v))

    maxName1 = len(
        functools.reduce(lambda a, b: a
                         if len(a) > len(b) else b, [row[0] for row in rows]))
    maxName2 = len(
        functools.reduce(lambda a, b: a
                         if len(a) > len(b) else b, [row[1] for row in rows]))

    output = []
    for row in rows:
        output.append([row[0], row[1], row[2]])

    return output
Exemple #5
0
def list(id='all', q=None):
    '''Return a list of databases

    Arguments:
        id:     a database identifier or list-like

        q:      search string (on source name)

    Returns:
        a generator object

    Example:
        for elem in wbgapi.source.list():
            print(elem['id'], elem['name'], elem['lastupdated'])
    '''

    id = w.queryParam(id)
    if not id:
        return []

    q,_ = utils.qget(q)

    for row in w.fetch(_sourceurl(id), {'databid': 'y'}):
        if utils.qmatch(q, row['name']):
            yield row
Exemple #6
0
def list(id='all'):
    '''Return a list of lending groups

    Arguments:
        id:         a lending group identifier or list-like of identifiers

    Returns:
        a generator object

    Example:
        lendingGroups = {row['id']: row['value'] for row in wbapi.lending.list()}
    '''

    for row in w.fetch('lendingtype/' + w.queryParam(id)):
        yield row
Exemple #7
0
def concepts(db=None):
    '''Retrieve the concepts for the specified database. This function also implements
    support for alternate dimension names for the 3 primary dimensions

    Arguments:
        db:     the database ID (e.g., 2=WDI). Default to the global database

    Returns:
        a dictionary of concepts: keys are URL friendly

    Example:
        for k,v in wbgapi.source.concepts(2).items():
            print(k, v['key'], v['value'])
    '''

    global _concepts

    if db is None:
        db = w.db

    db = int(db)
    c = _concepts.get(db)
    if c is not None:
        return c

    url = 'sources/{}/concepts'.format(db)
    c = {}
    for row in w.fetch(url, concepts=True):
        key = urllib.parse.quote(row['id']).lower()
        # there's currently an extra space at the end of "receiving countries" - we support a trimmed version
        # in the event this gets quietly fixed someday
        if key in [
                'country', 'admin%20region', 'states', 'provinces',
                'receiving%20countries%20', 'receiving%20countries'
        ]:
            id = 'economy'
        elif key in ['year']:
            id = 'time'
        elif key in ['indicator']:
            id = 'series'
        else:
            id = key

        id = re.sub(r'[\-\.,:!]', '_', id)  # neutralize special characters
        c[id] = {'key': key, 'value': row['value']}

    _concepts[db] = c
    return c
Exemple #8
0
def members(id):
    '''Return a set of series identifiers that are members of the specified topic

    Arguments:
        id:     a topic identifier

    Returns:
        a set object of series identifiers

    '''

    e = set()
    for row in w.fetch('topic/{}/indicator'.format(w.queryParam(id)),
                       {'source': w.db}):
        e.add(row['id'])

    return e
def list(id='all'):
    '''Return a list of databases

    Arguments:
        id:     a database identifier or list-like

    Returns:
        a generator object

    Example:
        for elem in wbgapi.source.list():
            print(elem['id'], elem['name'], elem['lastupdated'])
    '''

    id = w.queryParam(id)
    if not id:
        return []

    return w.fetch(_sourceurl(id), {'databid': 'y'})
def list(id='all', group=None):
    '''Return a list of regions

    Arguments:
        id:         a region identifier or list-like of identifiers

        group:      subgroup to return. Can be one of: 'admin', 'geo', 'allincomelevels', 'demodividend', 'smallstates', 'other'
                    NB: technically possible to pass 'lending' but the returned values generally aren't useful

    Returns:
        a generator object

    Example:
        regions = {row['code']: row['name'] for row in wbgapi.region.list()}
            
    '''

    params = {'type': group} if group else {}
    for row in w.fetch('region/' + w.queryParam(id), params):
        yield row
def members(id, param='region'):
    '''Return a set of economy identifiers that are members of the specified region

    Arguments:
        id:     a region identifier

        param:  used internally

    Returns:
        a set object of economy identifiers

    Notes:
        the returned members may not match the economies in the current database since we access the universal region lists from the API
    '''

    e = set()
    for row in w.fetch('country', {param: id}):
        e.add(row['id'])

    return e
def concepts(db=None):
    '''Retrieve the concepts for the specified database.

    Arguments:
        db:     the database ID (e.g., 2=WDI). Default to the global database

    Returns:
        a dictionary of concepts: keys are URL friendly

    Example:
        for k,v in wbgapi.source.concepts(2).items():
            print(k, v['key'], v['value'])
    '''

    global _concepts

    if db is None:
        db = w.db

    db = int(db)
    c = _concepts.get(db)
    if c is not None:
        return c

    url = 'sources/{}/concepts'.format(db)
    c = {}
    for row in w.fetch(url, concepts=True):
        key = urllib.parse.quote(row['id']).lower()
        if key in ['country', 'economy', 'admin%20region', 'states', 'provinces']:
            id = 'economy'
        elif key in ['time', 'year']:
            id = 'time'
        else:
            id = key
        c[id] = {'key': key, 'value': row['value']}

    _concepts[db] = c
    return c
Exemple #13
0
def list(id='all', q=None):
    '''Return a list of lending groups

    Arguments:
        id:         a lending group identifier or list-like of identifiers

        q:          search string (on lending group name)

    Returns:
        a generator object

    Example:
        lendingGroups = {row['id']: row['value'] for row in wbapi.lending.list()}
            
    Notes:
        The lending group list is global to the entire API and is not specific to the current database.

    '''

    q, _ = utils.qget(q)

    for row in w.fetch('lendingtype/' + w.queryParam(id)):
        if utils.qmatch(q, row['value']):
            yield row
Exemple #14
0
def list(id='all', q=None):
    '''Return a list of topics

    Arguments:
        id:         a region identifier or list-like of identifiers

        q:          search string (on topic name)

    Returns:
        a generator object

    Example:
        topics = {row['value']: row['id'] for row in wbgapi.topic.list()}
            
    Notes:
        The topic list is global to the entire API and is not specific to the current database.

    '''

    q, _ = utils.qget(q)

    for row in w.fetch('topic/' + w.queryParam(id)):
        if utils.qmatch(q, row['value']):
            yield row
Exemple #15
0
archiveDBs = [i for i in config['--archived-dbs'].split(',')]

yearKeys = [int(i) for i in config['--since'].split(',')]
_yearBreaks = {}
for i in yearKeys:
    _yearBreaks[i] = 0

# sanity checks
if len(config['INDICATOR']) > 1:
    config['--verbose'] = False

# get populations
_pops = {}
for row in wbgapi.fetch(
        'https://api.worldbank.org/v2/en/country/all/indicator/SP.POP.TOTL',
    {'MRNEV': 1}):
    if row['countryiso3code']:
        _pops[row['countryiso3code']] = row['value']

# Then fetch the the country list
_countries = {}
countOfSmallCountries = 0
countOfRichCountries = 0
country_meta = {}
for elem in wbgapi.fetch('https://api.worldbank.org/v2/en/country'):
    if config['--bssi'] and elem['id'] not in bssi_countries:
        continue

    if config['--income'] and (elem['incomeLevel']['id']
                               == config['--income']) != incomeFlag:
Exemple #16
0
def coder(name, debug=None):
    '''Return the country code for a given country name, based on common spellings and conventions.
    This function is intended to make it easier to convert country names to ISO3 codes.

    This feature is English-only and still in development. You can extend the matching algorithm
    by editing the `lookup-data.yaml` file.

    Arguments:

        name:       a country name as a string, or an iterable object of name strings

        debug:      a list of ISO codes for which to print debug output

    Returns:
        If `name` is a string then the function returns the corresponding ISO3 code, or None if the code
        can't be ascertained.

        If `name` is an iterable object, the function returns a dict of country names (passed as arguments)
        and corresponding ISO3 codes. Country names that cannot be ascertained have a value of None

    Examples:
        print(wbgapi.economy.lookup('Eswatini')) # prints 'SWZ'

        print(wbgapi.economy.lookup('Swaziland')) # prints 'SWZ'

        print(wbgapi.economy.lookup(['Canada', 'Toronto']))   # prints {'Canada': 'CAN', 'Toronto': None}
    '''
    global _lookup_data

    def prepare(s, clean=False, magicRegex=False):

        s = s.lower()
        if clean:
            # should be False if the string is regex-capable

            # this next trick is strips the container parentheses from "... (US|UK)"
            # and leaves the inner part. Need this for the Virgin Islands since,
            # before we remove parenthetical text entirely
            s = re.sub(r'\((u\.?s\.?|u\.?k\.?)\)',
                       lambda t: t.group(1).replace('.', ''), s)

            s = re.sub(r'\s*\(.*\)', '', s)  # remove parenthetical text
            s = s.replace("'", '')  # remove apostrophes
            s = re.sub(r'[^a-z0-9&]', ' ',
                       s)  # convert remaining superflous chars to spaces

        s = s.strip()

        if magicRegex:
            # converts 'and' to (and|&), 'st' to (st|saint)
            s = re.sub(r'\band\b', r'(and|\&)', s)
            s = re.sub(r'\bst\b', r'(st|saint)', s)
            s = re.sub(r'\s+', r'\\s+', s)

        return s

    if _lookup_data is None:
        _lookup_data = []
        user_data = yaml.safe_load(
            open(os.path.join(os.path.dirname(__file__), 'lookup-data.yaml'),
                 'r'))

        for row in w.fetch('country/all', lang='en'):
            if row['region']['id'] == 'NA':
                continue  # ignore aggregates

            obj = user_data.get(row['id'], {})
            # convert ordinary arrays to objects - for most cases this simplifies the yaml
            if type(obj) is list:
                obj = {'patterns': obj}

            try:
                order = obj.get('order', 10)
            except:
                print(obj)
                raise

            _lookup_data.append((row['id'].lower(), row['id'], False, order))
            _lookup_data.append(('\\b{}\\b'.format(
                prepare(row['name'], clean=True,
                        magicRegex=True)), row['id'], True, order))
            for row2 in obj.get('patterns', []):
                if row2[0:1] == ':':
                    # treat as an exact case-insensitive string match
                    _lookup_data.append(
                        (row2[1:].lower(), row['id'], False, order))
                else:
                    # treat as a regex string which can match on any word boundary
                    _lookup_data.append(('\\b{}\\b'.format(
                        prepare(row2, clean=False,
                                magicRegex=True)), row['id'], True, order))

        _lookup_data.sort(key=lambda x: x[3])

    if type(name) is str:
        name = [name]
        is_list = False
    else:
        is_list = True

    results = w.Coder({k: None for k in name})
    for t in name:
        t2 = prepare(t, clean=True, magicRegex=False)
        for pattern, id, mode, order in _lookup_data:
            if debug and id in debug:
                print('{}: matching "{}"/{} against "{}"'.format(
                    id, pattern, mode, t2))

            if mode and re.search(pattern, t2):
                results[t] = id
                break
            elif not mode and pattern == t2:
                results[t] = id
                break

    if is_list:
        return results

    return results.get(name[0])
Exemple #17
0
def coder(name, summary=False, debug=None):
    '''Return the country code for a given country name, based on common spellings and conventions.
    This function is intended to make it easier to convert country names to ISO3 codes.

    This feature is English-only and still in development. You can extend the matching algorithm
    by editing the `lookup-data.yaml` file.

    Arguments:

        name:       a country name as a string, or an iterable object of name strings

        summary:    just return anomalies (names that couldn't be matched or that don't match the WBG name).

        debug:      a list of ISO codes for which to print debug output

    Returns:
        If `name` is a string then the function returns the corresponding ISO3 code, or None if the code
        can't be ascertained.

        If `name` is a pandas Series, the function returns a pandas Series with the same index. Note that
        if the summary is True then the function always returns a Coder object.

        If `name` is any other iterable object, the function returns a Coder object. Coder is a dict subclass
        with some sugar to produce a nice command line (or jupyter notebook) report. Country names that
        cannot be coded have a value of None.

        Note that if summary is True then the function ALWAYS returns a Coder object.

    Examples:
        print(wbgapi.economy.lookup('Eswatini')) # prints 'SWZ'

        print(wbgapi.economy.lookup('Swaziland')) # prints 'SWZ'

        print(wbgapi.economy.lookup(['Canada', 'Toronto']))   # prints {'Canada': 'CAN', 'Toronto': None}
    '''
    global _lookup_data, _coder_names

    def prepare(s, clean=False, magicRegex=False):

        s = s.lower()
        if clean:
            # should be False if the string is regex-capable

            # this next trick is strips the container parentheses from "... (US|UK)"
            # and leaves the inner part. Need this for the Virgin Islands since,
            # before we remove parenthetical text entirely
            s = re.sub(r'\((u\.?s\.?|u\.?k\.?)\)',
                       lambda t: t.group(1).replace('.', ''), s)

            s = re.sub(r'\s*\(.*\)', '', s)  # remove parenthetical text
            s = s.replace("'", '')  # remove apostrophes
            s = re.sub(r'[^a-z0-9&]', ' ',
                       s)  # convert remaining superflous chars to spaces

        s = s.strip()

        if magicRegex:
            # converts 'and' to (and|&), 'st' to (st|saint)
            s = re.sub(r'\band\b', r'(and|\&)', s)
            s = re.sub(r'\bst\b', r'(st|saint)', s)
            s = re.sub(r'\s+', r'\\s+', s)

        return s

    if _lookup_data is None:
        _lookup_data = []
        _coder_names = {}
        user_data = yaml.safe_load(
            open(os.path.join(os.path.dirname(__file__), 'lookup-data.yaml'),
                 'r'))

        for row in w.fetch('country/all', lang='en'):
            if row['region']['id'] == 'NA':
                continue  # ignore aggregates

            _coder_names[row['id']] = row['name']

            obj = user_data.get(row['id'], {})
            # convert ordinary arrays to objects - for most cases this simplifies the yaml
            if type(obj) is list:
                obj = {'patterns': obj}

            try:
                order = obj.get('order', 10)
            except:
                print(obj)
                raise

            _lookup_data.append((row['id'].lower(), row['id'], 0, order))
            _lookup_data.append(('\\b{}\\b'.format(
                prepare(row['name'], clean=True,
                        magicRegex=True)), row['id'], 1, order))
            for row2 in obj.get('patterns', []):
                if row2[0:1] == ':':
                    # treat as an exact case-insensitive string match
                    _lookup_data.append(
                        (row2[1:].lower(), row['id'], 0, order))
                elif row2[0:1] == '~':
                    # treat as regex string, but EXCLUDE this pattern
                    _lookup_data.append(('\\b{}\\b'.format(
                        prepare(row2[1:], clean=False,
                                magicRegex=True)), row['id'], 2, order))
                else:
                    # treat as a regex string which can match on any word boundary
                    _lookup_data.append(('\\b{}\\b'.format(
                        prepare(row2, clean=False,
                                magicRegex=True)), row['id'], 1, order))

        _lookup_data.sort(key=lambda x: x[3])

    if type(name) is str:
        name = [name]
        is_list = False
    else:
        is_list = True

    if summary == False and pd is not None and type(
            name) is pd.core.series.Series:
        results = pd.Series(index=name.index, dtype=object, name='iso3')
    else:
        results = w.Coder({k: None for k in name})

    n = 0
    for t in name:
        excludes = []
        t2 = prepare(t, clean=True, magicRegex=False)
        for pattern, id, mode, order in _lookup_data:
            if debug and id in debug:
                print('{}: matching "{}"/{} against "{}"'.format(
                    id, pattern, mode, t2))

            if id in excludes:
                if debug and id in debug:
                    print('{}: excluded'.format(id))
            elif mode == 2 and re.search(pattern, t2):
                # all further patterns for this id will be ignored
                excludes.append(id)
            elif mode == 1 and re.search(pattern, t2):
                if type(results) is w.Coder:
                    results[t] = id
                else:
                    results.iloc[n] = id
                break
            elif mode == 0 and pattern == t2:
                if type(results) is w.Coder:
                    results[t] = id
                else:
                    results.iloc[n] = id
                break

        n += 1

    if is_list or summary:
        if summary and type(results) is w.Coder:
            results = w.Coder(
                dict(
                    filter(
                        lambda x: x[0].lower() != _coder_names.get(x[1], '').
                        lower() if x[1] else True, results.items())))

        return results

    return results.get(name[0])